156 lines
7.4 KiB
MLIR
156 lines
7.4 KiB
MLIR
// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
|
|
|
|
module attributes {gpu.container_module} {
|
|
|
|
// CHECK-LABEL: func @matvec
|
|
// CHECK: llvm.call @mgpuStreamCreate
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuCreateCoo
|
|
// CHECK: llvm.call @mgpuCreateDnVec
|
|
// CHECK: llvm.call @mgpuSpMVBufferSize
|
|
// CHECK: llvm.call @mgpuSpMV
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroyDnVec
|
|
// CHECK: llvm.call @mgpuStreamSynchronize
|
|
// CHECK: llvm.call @mgpuStreamDestroy
|
|
func.func @matvec(%arg0: index) {
|
|
%token0 = gpu.wait async
|
|
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
|
|
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
|
|
%spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
|
|
%dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
|
|
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64
|
|
%token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
|
|
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
|
|
%token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
|
|
gpu.wait [%token9]
|
|
return
|
|
}
|
|
|
|
// CHECK-LABEL: func @matmul
|
|
// CHECK: llvm.call @mgpuStreamCreate
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuCreateCsr
|
|
// CHECK: llvm.call @mgpuCreateDnMat
|
|
// CHECK: llvm.call @mgpuSpMMBufferSize
|
|
// CHECK: llvm.call @mgpuSpMM
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroyDnMat
|
|
// CHECK: llvm.call @mgpuStreamSynchronize
|
|
// CHECK: llvm.call @mgpuStreamDestroy
|
|
func.func @matmul(%arg0: index) {
|
|
%token0 = gpu.wait async
|
|
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
|
|
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
|
|
%spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
|
|
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
|
|
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64
|
|
%token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
|
|
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
|
|
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
|
|
gpu.wait [%token9]
|
|
return
|
|
}
|
|
|
|
// CHECK-LABEL: func @spgemm
|
|
// CHECK: llvm.call @mgpuStreamCreate
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuCreateCsr
|
|
// CHECK: llvm.call @mgpuCreateCsr
|
|
// CHECK: llvm.call @mgpuCreateCsr
|
|
// CHECK: llvm.call @mgpuSpGEMMCreateDescr
|
|
// CHECK: llvm.call @mgpuSpGEMMWorkEstimation
|
|
// CHECK: llvm.call @mgpuSpGEMMCompute
|
|
// CHECK: llvm.call @mgpuSpMatGetSize
|
|
// CHECK: llvm.call @mgpuSetCsrPointers
|
|
// CHECK: llvm.call @mgpuSpGEMMCopy
|
|
// CHECK: llvm.call @mgpuSpGEMMDestroyDescr
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuStreamSynchronize
|
|
// CHECK: llvm.call @mgpuStreamDestroy
|
|
func.func @spgemm(%arg0: index) {
|
|
%token0 = gpu.wait async
|
|
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
|
|
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
|
|
%spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
|
|
%spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
|
|
%spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
|
|
%spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
|
|
%alloc = memref.alloc() : memref<0xi8> // nullptr
|
|
%c0 = arith.constant 0 : index
|
|
%bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
|
|
[%token6]{WORK_ESTIMATION}
|
|
%spmatA, %spmatB, %spmatC,
|
|
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
|
|
%bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
|
|
[%token7]{COMPUTE}
|
|
%spmatA, %spmatB, %spmatC,
|
|
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
|
|
%rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
|
|
%token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
|
|
%token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
|
|
%token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
|
|
%token13 = gpu.destroy_sp_mat async [%token12] %spmatA
|
|
%token14 = gpu.destroy_sp_mat async [%token13] %spmatB
|
|
%token15 = gpu.destroy_sp_mat async [%token14] %spmatC
|
|
gpu.wait [%token15]
|
|
return
|
|
}
|
|
|
|
// CHECK-LABEL: func @sddmm
|
|
// CHECK: llvm.call @mgpuStreamCreate
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuCreateCsr
|
|
// CHECK: llvm.call @mgpuCreateDnMat
|
|
// CHECK: llvm.call @mgpuSDDMMBufferSize
|
|
// CHECK: llvm.call @mgpuSDDMM
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroyDnMat
|
|
// CHECK: llvm.call @mgpuStreamSynchronize
|
|
// CHECK: llvm.call @mgpuStreamDestroy
|
|
func.func @sddmm(%arg0: index) {
|
|
%token0 = gpu.wait async
|
|
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
|
|
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
|
|
%spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
|
|
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
|
|
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64
|
|
%token7 = gpu.sddmm async [%token6] %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
|
|
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
|
|
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
|
|
gpu.wait [%token9]
|
|
return
|
|
}
|
|
|
|
// CHECK-LABEL: func @csc_and_bsr
|
|
// CHECK: llvm.call @mgpuStreamCreate
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuMemAlloc
|
|
// CHECK: llvm.call @mgpuCreateCsc
|
|
// CHECK: llvm.call @mgpuCreateBsr
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuDestroySpMat
|
|
// CHECK: llvm.call @mgpuStreamSynchronize
|
|
// CHECK: llvm.call @mgpuStreamDestroy
|
|
func.func @csc_and_bsr(%arg0: index) {
|
|
%token0 = gpu.wait async
|
|
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
|
|
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
|
|
%csc, %token3 = gpu.create_csc async [%token2]
|
|
%arg0, %arg0, %arg0, %mem1, %mem1, %mem2
|
|
: memref<?xindex>, memref<?xindex>, memref<?xf64>
|
|
%bsr, %token4 = gpu.create_bsr async [%token3]
|
|
%arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
|
|
: memref<?xindex>, memref<?xindex>, memref<?xf64>
|
|
%token5 = gpu.destroy_sp_mat async [%token4] %csc
|
|
%token6 = gpu.destroy_sp_mat async [%token5] %bsr
|
|
gpu.wait [%token6]
|
|
return
|
|
}
|
|
}
|