166 lines
6.1 KiB
MLIR
166 lines
6.1 KiB
MLIR
|
// RUN: mlir-opt --convert-nvvm-to-llvm --split-input-file -verify-diagnostics %s
|
||
|
|
||
|
!mat64f32 = !llvm.struct<(f32, f32, f32, f32, f32, f32, f32)>
|
||
|
func.func @wgmma_f32_f16_f16(%descA : i64, %descB : i64) -> !mat64f32{
|
||
|
%result = llvm.mlir.undef : !mat64f32
|
||
|
// expected-error @+1 {{'nvvm.wgmma.mma_async' op results 64, however output struct has 7 elements}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 128, k = 16>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !mat64f32 -> !mat64f32
|
||
|
return %res : !mat64f32
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_satfinite(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{`satfinite` can be only used with s32 accumulator, however the current accumulator is f32}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 16, k = 16>,
|
||
|
D [<f32>, <zero>, <satfinite>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{shape 'm' must be 64}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 32, n = 16, k = 16>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{op all elements in struct must be same type but there is 'i32'}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 16, k = 16>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{op shape 'k' must be 16 for input type f16}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 16, k = 3>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_transpose(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{op given layouts layout_a = col and layout_b = col for input types tf32 and tf32 requires transpose. However, this is only supported for: f16 and bf16}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 16, k = 8>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_transpose(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f16, f16, f16, f16)>
|
||
|
// expected-error @+1 {{'nvvm.wgmma.mma_async' op f16 += tf32 * tf32, it is not supported.}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 16, k = 8>,
|
||
|
D [<f16>, <zero>],
|
||
|
A [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
:!llvm.struct<(f16, f16, f16, f16)>
|
||
|
-> !llvm.struct<(f16, f16, f16, f16)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(i32, i32, i32, i32)>
|
||
|
// expected-error @+1 {{input struct and result struct must be the same type}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 8, k = 16>,
|
||
|
D [<f16>, <zero>],
|
||
|
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(i32, i32, i32, i32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
|
||
|
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
// expected-error @+1 {{op f32 += bf16 * f16, it is not supported}}
|
||
|
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
|
||
|
#nvvm.shape<m = 64, n = 8, k = 16>,
|
||
|
D [<f32>, <zero>],
|
||
|
A [<bf16>, #nvvm.wgmma_scale_in<neg>, <col>],
|
||
|
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
|
||
|
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
|
||
|
return
|
||
|
}
|
||
|
// -----
|
||
|
|
||
|
func.func @set_max_register() {
|
||
|
// expected-error @+1 {{new register size must be in between 24 to 256}}
|
||
|
nvvm.setmaxregister decrease 8
|
||
|
func.return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @set_max_register() {
|
||
|
// expected-error @+1 {{new register size must be multiple of 8}}
|
||
|
nvvm.setmaxregister decrease 51
|
||
|
func.return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @fence_proxy() {
|
||
|
// expected-error @+1 {{op only async_shared fence can have space attribute}}
|
||
|
nvvm.fence.proxy { kind = #nvvm.proxy_kind<async>, space = #nvvm.shared_space<cluster>}
|
||
|
func.return
|
||
|
}
|
||
|
|
||
|
// -----
|
||
|
|
||
|
func.func @fence_proxy() {
|
||
|
// expected-error @+1 {{op async_shared fence requires space attribute}}
|
||
|
nvvm.fence.proxy { kind = #nvvm.proxy_kind<async.shared>}
|
||
|
func.return
|
||
|
}
|