1; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64 3 4declare void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b) 5declare void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b) 6 7; CHECK-LABEL: barrierinit 8define void @barrierinit(i64* %a, i32 %b) { 9; CHECK_PTX32: mbarrier.init.b64 [%r{{[0-9]+}}], %r{{[0-9]+}}; 10; CHECK_PTX64: mbarrier.init.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}}; 11 tail call void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b) 12 ret void 13} 14 15; CHECK-LABEL: barrierinitshared 16define void @barrierinitshared(i64 addrspace(3)* %a, i32 %b) { 17; CHECK_PTX32: mbarrier.init.shared.b64 [%r{{[0-9]+}}], %r{{[0-9]+}}; 18; CHECK_PTX64: mbarrier.init.shared.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}}; 19 tail call void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b) 20 ret void 21} 22 23declare void @llvm.nvvm.mbarrier.inval(i64* %a) 24declare void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a) 25 26; CHECK-LABEL: barrierinval 27define void @barrierinval(i64* %a) { 28; CHECK_PTX32: mbarrier.inval.b64 [%r{{[0-1]+}}]; 29; CHECK_PTX64: mbarrier.inval.b64 [%rd{{[0-1]+}}]; 30 tail call void @llvm.nvvm.mbarrier.inval(i64* %a) 31 ret void 32} 33 34; CHECK-LABEL: barrierinvalshared 35define void @barrierinvalshared(i64 addrspace(3)* %a) { 36; CHECK_PTX32: mbarrier.inval.shared.b64 [%r{{[0-1]+}}]; 37; CHECK_PTX64: mbarrier.inval.shared.b64 [%rd{{[0-1]+}}]; 38 tail call void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a) 39 ret void 40} 41 42declare i64 @llvm.nvvm.mbarrier.arrive(i64* %a) 43declare i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a) 44 45; CHECK-LABEL: barrierarrive 46define void @barrierarrive(i64* %a) { 47; CHECK_PTX32: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 48; CHECK_PTX64: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 49 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(i64* %a) 50 ret void 51} 52 53; CHECK-LABEL: barrierarriveshared 54define void @barrierarriveshared(i64 addrspace(3)* %a) { 55; CHECK_PTX32: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 56; CHECK_PTX64: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 57 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a) 58 ret void 59} 60 61declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b) 62declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b) 63 64; CHECK-LABEL: barrierarrivenoComplete 65define void @barrierarrivenoComplete(i64* %a, i32 %b) { 66; CHECK_PTX32: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 67; CHECK_PTX64: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 68 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b) 69 ret void 70} 71 72; CHECK-LABEL: barrierarrivenoCompleteshared 73define void @barrierarrivenoCompleteshared(i64 addrspace(3)* %a, i32 %b) { 74; CHECK_PTX32: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 75; CHECK_PTX64: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 76 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b) 77 ret void 78} 79 80declare i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a) 81declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a) 82 83; CHECK-LABEL: barrierarrivedrop 84define void @barrierarrivedrop(i64* %a) { 85; CHECK_PTX32: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 86; CHECK_PTX64: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 87 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a) 88 ret void 89} 90 91; CHECK-LABEL: barrierarrivedropshared 92define void @barrierarrivedropshared(i64 addrspace(3)* %a) { 93; CHECK_PTX32: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 94; CHECK_PTX64: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 95 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a) 96 ret void 97} 98 99declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b) 100declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b) 101 102; CHECK-LABEL: barrierarrivedropnoComplete 103define void @barrierarrivedropnoComplete(i64* %a, i32 %b) { 104; CHECK_PTX32: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 105; CHECK_PTX64: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 106 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b) 107 ret void 108} 109 110; CHECK-LABEL: barrierarrivedropnoCompleteshared 111define void @barrierarrivedropnoCompleteshared(i64 addrspace(3)* %a, i32 %b) { 112; CHECK_PTX32: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 113; CHECK_PTX64: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 114 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b) 115 ret void 116} 117 118declare i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b) 119declare i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b) 120 121; CHECK-LABEL: barriertestwait 122define void @barriertestwait(i64* %a, i64 %b) { 123; CHECK_PTX32: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}}; 124; CHECK_PTX64: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}}; 125 %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b) 126 ret void 127} 128 129; CHECK-LABEL: barriertestwaitshared 130define void @barriertestwaitshared(i64 addrspace(3)* %a, i64 %b) { 131; CHECK_PTX32: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}}; 132; CHECK_PTX64: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}}; 133 %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b) 134 ret void 135} 136 137declare i32 @llvm.nvvm.mbarrier.pending.count(i64 %b) 138 139; CHECK-LABEL: barrierpendingcount 140define i32 @barrierpendingcount(i64* %a, i64 %b) { 141; CHECK_PTX32: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}}; 142; CHECK_PTX64: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}}; 143 %ret = tail call i32 @llvm.nvvm.mbarrier.pending.count(i64 %b) 144 ret i32 %ret 145} 146