1// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" | FileCheck %s
2// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" | FileCheck %s --check-prefix=ROCDL
3
4module attributes {gpu.container_module} {
5
6  // CHECK: llvm.mlir.global internal constant @[[KERNEL_NAME:.*]]("kernel\00")
7  // CHECK: llvm.mlir.global internal constant @[[GLOBAL:.*]]("CUBIN")
8  // ROCDL: llvm.mlir.global internal constant @[[GLOBAL:.*]]("HSACO")
9
10  gpu.module @kernel_module attributes {
11      nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO"
12  } {
13    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr<f32>,
14        %arg2: !llvm.ptr<f32>, %arg3: i64, %arg4: i64,
15        %arg5: i64) attributes {gpu.kernel} {
16      llvm.return
17    }
18  }
19
20  func @foo(%buffer: memref<?xf32>) {
21    %c8 = constant 8 : index
22    %c32 = constant 32 : i32
23    %c256 = constant 256 : i32
24    gpu.launch_func @kernel_module::@kernel
25        blocks in (%c8, %c8, %c8)
26        threads in (%c8, %c8, %c8)
27        dynamic_shared_memory_size %c256
28        args(%c32 : i32, %buffer : memref<?xf32>)
29    return
30  }
31
32  // CHECK-DAG: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
33  // CHECK-DAG: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
34  // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]
35  // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index)
36  // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}[[C0]], [[C0]]]
37  // CHECK-SAME: -> !llvm.ptr<i8>
38
39  // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]])
40  // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}})
41
42  // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate
43
44  // CHECK: [[NUM_PARAMS:%.*]] = llvm.mlir.constant(6 : i32) : i32
45  // CHECK-NEXT: [[PARAMS:%.*]] = llvm.alloca [[NUM_PARAMS]] x !llvm.ptr<i8>
46
47  // CHECK: [[EXTRA_PARAMS:%.*]] = llvm.mlir.null : !llvm.ptr<ptr<i8>>
48
49  // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]],
50  // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C256]], [[STREAM]],
51  // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]])
52  // CHECK: llvm.call @mgpuStreamSynchronize
53  // CHECK: llvm.call @mgpuStreamDestroy
54  // CHECK: llvm.call @mgpuModuleUnload
55}
56