1; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64
3
4declare void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
5declare void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
6
7; CHECK-LABEL: barrierinit
8define void @barrierinit(i64* %a, i32 %b) {
9; CHECK_PTX32: mbarrier.init.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
10; CHECK_PTX64: mbarrier.init.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
11  tail call void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
12  ret void
13}
14
15; CHECK-LABEL: barrierinitshared
16define void @barrierinitshared(i64 addrspace(3)* %a, i32 %b) {
17; CHECK_PTX32: mbarrier.init.shared.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
18; CHECK_PTX64: mbarrier.init.shared.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
19  tail call void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
20  ret void
21}
22
23declare void @llvm.nvvm.mbarrier.inval(i64* %a)
24declare void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
25
26; CHECK-LABEL: barrierinval
27define void @barrierinval(i64* %a) {
28; CHECK_PTX32: mbarrier.inval.b64 [%r{{[0-1]+}}];
29; CHECK_PTX64: mbarrier.inval.b64 [%rd{{[0-1]+}}];
30  tail call void @llvm.nvvm.mbarrier.inval(i64* %a)
31  ret void
32}
33
34; CHECK-LABEL: barrierinvalshared
35define void @barrierinvalshared(i64 addrspace(3)* %a) {
36; CHECK_PTX32: mbarrier.inval.shared.b64 [%r{{[0-1]+}}];
37; CHECK_PTX64: mbarrier.inval.shared.b64 [%rd{{[0-1]+}}];
38  tail call void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
39  ret void
40}
41
42declare i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
43declare i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
44
45; CHECK-LABEL: barrierarrive
46define void @barrierarrive(i64* %a) {
47; CHECK_PTX32: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
48; CHECK_PTX64: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
49  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
50  ret void
51}
52
53; CHECK-LABEL: barrierarriveshared
54define void @barrierarriveshared(i64 addrspace(3)* %a) {
55; CHECK_PTX32: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
56; CHECK_PTX64: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
57  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
58  ret void
59}
60
61declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
62declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
63
64; CHECK-LABEL: barrierarrivenoComplete
65define void @barrierarrivenoComplete(i64* %a, i32 %b) {
66; CHECK_PTX32: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
67; CHECK_PTX64: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
68  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
69  ret void
70}
71
72; CHECK-LABEL: barrierarrivenoCompleteshared
73define void @barrierarrivenoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
74; CHECK_PTX32: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
75; CHECK_PTX64: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
76  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
77  ret void
78}
79
80declare i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
81declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
82
83; CHECK-LABEL: barrierarrivedrop
84define void @barrierarrivedrop(i64* %a) {
85; CHECK_PTX32: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
86; CHECK_PTX64: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
87  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
88  ret void
89}
90
91; CHECK-LABEL: barrierarrivedropshared
92define void @barrierarrivedropshared(i64 addrspace(3)* %a) {
93; CHECK_PTX32: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
94; CHECK_PTX64: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
95  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
96  ret void
97}
98
99declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
100declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
101
102; CHECK-LABEL: barrierarrivedropnoComplete
103define void @barrierarrivedropnoComplete(i64* %a, i32 %b) {
104; CHECK_PTX32: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
105; CHECK_PTX64: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
106  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
107  ret void
108}
109
110; CHECK-LABEL: barrierarrivedropnoCompleteshared
111define void @barrierarrivedropnoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
112; CHECK_PTX32: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
113; CHECK_PTX64: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
114  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
115  ret void
116}
117
118declare i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
119declare i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
120
121; CHECK-LABEL: barriertestwait
122define void @barriertestwait(i64* %a, i64 %b) {
123; CHECK_PTX32: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
124; CHECK_PTX64: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
125  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
126  ret void
127}
128
129; CHECK-LABEL: barriertestwaitshared
130define void @barriertestwaitshared(i64 addrspace(3)* %a, i64 %b) {
131; CHECK_PTX32: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
132; CHECK_PTX64: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
133  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
134  ret void
135}
136
137declare i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)
138
139; CHECK-LABEL: barrierpendingcount
140define i32 @barrierpendingcount(i64* %a, i64 %b) {
141; CHECK_PTX32: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
142; CHECK_PTX64: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
143  %ret = tail call i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)
144  ret i32 %ret
145}
146