1 // REQUIRES: nvptx-registered-target
2 // REQUIRES: nvptx64-registered-target
3 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
4 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
5 
6 int read_tid() {
7 
8 // CHECK: call i32 @llvm.ptx.read.tid.x()
9 // CHECK: call i32 @llvm.ptx.read.tid.y()
10 // CHECK: call i32 @llvm.ptx.read.tid.z()
11 // CHECK: call i32 @llvm.ptx.read.tid.w()
12 
13   int x = __builtin_ptx_read_tid_x();
14   int y = __builtin_ptx_read_tid_y();
15   int z = __builtin_ptx_read_tid_z();
16   int w = __builtin_ptx_read_tid_w();
17 
18   return x + y + z + w;
19 
20 }
21 
22 int read_ntid() {
23 
24 // CHECK: call i32 @llvm.ptx.read.ntid.x()
25 // CHECK: call i32 @llvm.ptx.read.ntid.y()
26 // CHECK: call i32 @llvm.ptx.read.ntid.z()
27 // CHECK: call i32 @llvm.ptx.read.ntid.w()
28 
29   int x = __builtin_ptx_read_ntid_x();
30   int y = __builtin_ptx_read_ntid_y();
31   int z = __builtin_ptx_read_ntid_z();
32   int w = __builtin_ptx_read_ntid_w();
33 
34   return x + y + z + w;
35 
36 }
37 
38 int read_ctaid() {
39 
40 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
41 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
42 // CHECK: call i32 @llvm.ptx.read.ctaid.z()
43 // CHECK: call i32 @llvm.ptx.read.ctaid.w()
44 
45   int x = __builtin_ptx_read_ctaid_x();
46   int y = __builtin_ptx_read_ctaid_y();
47   int z = __builtin_ptx_read_ctaid_z();
48   int w = __builtin_ptx_read_ctaid_w();
49 
50   return x + y + z + w;
51 
52 }
53 
54 int read_nctaid() {
55 
56 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
57 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
58 // CHECK: call i32 @llvm.ptx.read.nctaid.z()
59 // CHECK: call i32 @llvm.ptx.read.nctaid.w()
60 
61   int x = __builtin_ptx_read_nctaid_x();
62   int y = __builtin_ptx_read_nctaid_y();
63   int z = __builtin_ptx_read_nctaid_z();
64   int w = __builtin_ptx_read_nctaid_w();
65 
66   return x + y + z + w;
67 
68 }
69 
70 int read_ids() {
71 
72 // CHECK: call i32 @llvm.ptx.read.laneid()
73 // CHECK: call i32 @llvm.ptx.read.warpid()
74 // CHECK: call i32 @llvm.ptx.read.nwarpid()
75 // CHECK: call i32 @llvm.ptx.read.smid()
76 // CHECK: call i32 @llvm.ptx.read.nsmid()
77 // CHECK: call i32 @llvm.ptx.read.gridid()
78 
79   int a = __builtin_ptx_read_laneid();
80   int b = __builtin_ptx_read_warpid();
81   int c = __builtin_ptx_read_nwarpid();
82   int d = __builtin_ptx_read_smid();
83   int e = __builtin_ptx_read_nsmid();
84   int f = __builtin_ptx_read_gridid();
85 
86   return a + b + c + d + e + f;
87 
88 }
89 
90 int read_lanemasks() {
91 
92 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
93 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
94 // CHECK: call i32 @llvm.ptx.read.lanemask.lt()
95 // CHECK: call i32 @llvm.ptx.read.lanemask.ge()
96 // CHECK: call i32 @llvm.ptx.read.lanemask.gt()
97 
98   int a = __builtin_ptx_read_lanemask_eq();
99   int b = __builtin_ptx_read_lanemask_le();
100   int c = __builtin_ptx_read_lanemask_lt();
101   int d = __builtin_ptx_read_lanemask_ge();
102   int e = __builtin_ptx_read_lanemask_gt();
103 
104   return a + b + c + d + e;
105 
106 }
107 
108 
109 long read_clocks() {
110 
111 // CHECK: call i32 @llvm.ptx.read.clock()
112 // CHECK: call i64 @llvm.ptx.read.clock64()
113 
114   int a = __builtin_ptx_read_clock();
115   long b = __builtin_ptx_read_clock64();
116 
117   return (long)a + b;
118 
119 }
120 
121 int read_pms() {
122 
123 // CHECK: call i32 @llvm.ptx.read.pm0()
124 // CHECK: call i32 @llvm.ptx.read.pm1()
125 // CHECK: call i32 @llvm.ptx.read.pm2()
126 // CHECK: call i32 @llvm.ptx.read.pm3()
127 
128   int a = __builtin_ptx_read_pm0();
129   int b = __builtin_ptx_read_pm1();
130   int c = __builtin_ptx_read_pm2();
131   int d = __builtin_ptx_read_pm3();
132 
133   return a + b + c + d;
134 
135 }
136 
137 void sync() {
138 
139 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
140 
141   __builtin_ptx_bar_sync(0);
142 
143 }
144 
145 
146 // NVVM intrinsics
147 
148 // The idea is not to test all intrinsics, just that Clang is recognizing the
149 // builtins defined in BuiltinsNVPTX.def
150 void nvvm_math(float f1, float f2, double d1, double d2) {
151 // CHECK: call float @llvm.nvvm.fmax.f
152   float t1 = __nvvm_fmax_f(f1, f2);
153 // CHECK: call float @llvm.nvvm.fmin.f
154   float t2 = __nvvm_fmin_f(f1, f2);
155 // CHECK: call float @llvm.nvvm.sqrt.rn.f
156   float t3 = __nvvm_sqrt_rn_f(f1);
157 // CHECK: call float @llvm.nvvm.rcp.rn.f
158   float t4 = __nvvm_rcp_rn_f(f2);
159 
160 // CHECK: call double @llvm.nvvm.fmax.d
161   double td1 = __nvvm_fmax_d(d1, d2);
162 // CHECK: call double @llvm.nvvm.fmin.d
163   double td2 = __nvvm_fmin_d(d1, d2);
164 // CHECK: call double @llvm.nvvm.sqrt.rn.d
165   double td3 = __nvvm_sqrt_rn_d(d1);
166 // CHECK: call double @llvm.nvvm.rcp.rn.d
167   double td4 = __nvvm_rcp_rn_d(d2);
168 
169 // CHECK: call void @llvm.nvvm.membar.cta()
170   __nvvm_membar_cta();
171 // CHECK: call void @llvm.nvvm.membar.gl()
172   __nvvm_membar_gl();
173 // CHECK: call void @llvm.nvvm.membar.sys()
174   __nvvm_membar_sys();
175 // CHECK: call void @llvm.nvvm.barrier0()
176   __nvvm_bar0();
177 }
178