1// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" | FileCheck %s 2// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" | FileCheck %s --check-prefix=ROCDL 3 4module attributes {gpu.container_module} { 5 6 // CHECK: llvm.mlir.global internal constant @[[KERNEL_NAME:.*]]("kernel\00") 7 // CHECK: llvm.mlir.global internal constant @[[GLOBAL:.*]]("CUBIN") 8 // ROCDL: llvm.mlir.global internal constant @[[GLOBAL:.*]]("HSACO") 9 10 gpu.module @kernel_module attributes { 11 nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO" 12 } { 13 llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr<f32>, 14 %arg2: !llvm.ptr<f32>, %arg3: i64, %arg4: i64, 15 %arg5: i64) attributes {gpu.kernel} { 16 llvm.return 17 } 18 } 19 20 func @foo(%buffer: memref<?xf32>) { 21 %c8 = constant 8 : index 22 %c32 = constant 32 : i32 23 %c256 = constant 256 : i32 24 gpu.launch_func @kernel_module::@kernel 25 blocks in (%c8, %c8, %c8) 26 threads in (%c8, %c8, %c8) 27 dynamic_shared_memory_size %c256 28 args(%c32 : i32, %buffer : memref<?xf32>) 29 return 30 } 31 32 // CHECK-DAG: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 33 // CHECK-DAG: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 34 // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]] 35 // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index) 36 // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}[[C0]], [[C0]]] 37 // CHECK-SAME: -> !llvm.ptr<i8> 38 39 // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]]) 40 // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}}) 41 42 // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate 43 44 // CHECK: [[NUM_PARAMS:%.*]] = llvm.mlir.constant(6 : i32) : i32 45 // CHECK-NEXT: [[PARAMS:%.*]] = llvm.alloca [[NUM_PARAMS]] x !llvm.ptr<i8> 46 47 // CHECK: [[EXTRA_PARAMS:%.*]] = llvm.mlir.null : !llvm.ptr<ptr<i8>> 48 49 // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]], 50 // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C256]], [[STREAM]], 51 // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]]) 52 // CHECK: llvm.call @mgpuStreamSynchronize 53 // CHECK: llvm.call @mgpuStreamDestroy 54 // CHECK: llvm.call @mgpuModuleUnload 55} 56