1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}udiv32_invariant_denom:
4; GCN:     v_cvt_f32_u32
5; GCN:     v_rcp_iflag_f32
6; GCN:     v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
7; GCN:     v_cvt_u32_f32_e32
8; GCN-DAG: v_mul_hi_u32
9; GCN-DAG: v_mul_lo_i32
10; GCN-DAG: v_sub_i32_e32
11; GCN-DAG: v_cmp_eq_u32_e64
12; GCN-DAG: v_cndmask_b32_e64
13; GCN-DAG: v_mul_hi_u32
14; GCN-DAG: v_add_i32_e32
15; GCN-DAG: v_subrev_i32_e32
16; GCN-DAG: v_cndmask_b32_e64
17; GCN:     [[LOOP:BB[0-9_]+]]:
18; GCN-NOT: v_rcp
19; GCN:     s_cbranch_scc0 [[LOOP]]
20; GCN:     s_endpgm
21define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
22bb:
23  br label %bb3
24
25bb2:                                              ; preds = %bb3
26  ret void
27
28bb3:                                              ; preds = %bb3, %bb
29  %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
30  %tmp4 = udiv i32 %tmp, %arg1
31  %tmp5 = zext i32 %tmp to i64
32  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
33  store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
34  %tmp7 = add nuw nsw i32 %tmp, 1
35  %tmp8 = icmp eq i32 %tmp7, 1024
36  br i1 %tmp8, label %bb2, label %bb3
37}
38
39; GCN-LABEL: {{^}}urem32_invariant_denom:
40; GCN:     v_cvt_f32_u32
41; GCN:     v_rcp_iflag_f32
42; GCN:     v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
43; GCN:     v_cvt_u32_f32_e32
44; GCN-DAG: v_mul_hi_u32
45; GCN-DAG: v_mul_lo_i32
46; GCN-DAG: v_sub_i32_e32
47; GCN-DAG: v_cmp_eq_u32_e64
48; GCN-DAG: v_cndmask_b32_e64
49; GCN-DAG: v_mul_hi_u32
50; GCN-DAG: v_add_i32_e32
51; GCN-DAG: v_subrev_i32_e32
52; GCN-DAG: v_cndmask_b32_e64
53; GCN:     [[LOOP:BB[0-9_]+]]:
54; GCN-NOT: v_rcp
55; GCN:     s_cbranch_scc0 [[LOOP]]
56; GCN:     s_endpgm
57define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
58bb:
59  br label %bb3
60
61bb2:                                              ; preds = %bb3
62  ret void
63
64bb3:                                              ; preds = %bb3, %bb
65  %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
66  %tmp4 = urem i32 %tmp, %arg1
67  %tmp5 = zext i32 %tmp to i64
68  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
69  store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
70  %tmp7 = add nuw nsw i32 %tmp, 1
71  %tmp8 = icmp eq i32 %tmp7, 1024
72  br i1 %tmp8, label %bb2, label %bb3
73}
74
75; GCN-LABEL: {{^}}sdiv32_invariant_denom:
76; GCN:     v_cvt_f32_u32
77; GCN:     v_rcp_iflag_f32
78; GCN:     v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
79; GCN:     v_cvt_u32_f32_e32
80; GCN-DAG: v_mul_hi_u32
81; GCN-DAG: v_mul_lo_i32
82; GCN-DAG: v_sub_i32_e32
83; GCN-DAG: v_cmp_eq_u32_e64
84; GCN-DAG: v_cndmask_b32_e64
85; GCN-DAG: v_mul_hi_u32
86; GCN-DAG: v_add_i32_e32
87; GCN-DAG: v_subrev_i32_e32
88; GCN-DAG: v_cndmask_b32_e64
89; GCN:     [[LOOP:BB[0-9_]+]]:
90; GCN-NOT: v_rcp
91; GCN:     s_cbranch_scc0 [[LOOP]]
92; GCN:     s_endpgm
93define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
94bb:
95  br label %bb3
96
97bb2:                                              ; preds = %bb3
98  ret void
99
100bb3:                                              ; preds = %bb3, %bb
101  %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
102  %tmp4 = sdiv i32 %tmp, %arg1
103  %tmp5 = zext i32 %tmp to i64
104  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
105  store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
106  %tmp7 = add nuw nsw i32 %tmp, 1
107  %tmp8 = icmp eq i32 %tmp7, 1024
108  br i1 %tmp8, label %bb2, label %bb3
109}
110
111; GCN-LABEL: {{^}}srem32_invariant_denom:
112; GCN:     v_cvt_f32_u32
113; GCN:     v_rcp_iflag_f32
114; GCN:     v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
115; GCN:     v_cvt_u32_f32_e32
116; GCN-DAG: v_mul_hi_u32
117; GCN-DAG: v_mul_lo_i32
118; GCN-DAG: v_sub_i32_e32
119; GCN-DAG: v_cmp_eq_u32_e64
120; GCN-DAG: v_cndmask_b32_e64
121; GCN-DAG: v_mul_hi_u32
122; GCN-DAG: v_add_i32_e32
123; GCN-DAG: v_subrev_i32_e32
124; GCN-DAG: v_cndmask_b32_e64
125; GCN:     [[LOOP:BB[0-9_]+]]:
126; GCN-NOT: v_rcp
127; GCN:     s_cbranch_scc0 [[LOOP]]
128; GCN:     s_endpgm
129define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
130bb:
131  br label %bb3
132
133bb2:                                              ; preds = %bb3
134  ret void
135
136bb3:                                              ; preds = %bb3, %bb
137  %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
138  %tmp4 = srem i32 %tmp, %arg1
139  %tmp5 = zext i32 %tmp to i64
140  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
141  store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
142  %tmp7 = add nuw nsw i32 %tmp, 1
143  %tmp8 = icmp eq i32 %tmp7, 1024
144  br i1 %tmp8, label %bb2, label %bb3
145}
146
147; GCN-LABEL: {{^}}udiv16_invariant_denom:
148; GCN:     v_cvt_f32_u32
149; GCN:     v_rcp_iflag_f32
150; GCN:     [[LOOP:BB[0-9_]+]]:
151; GCN-NOT: v_rcp
152; GCN:     s_cbranch_scc0 [[LOOP]]
153; GCN:     s_endpgm
154define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
155bb:
156  br label %bb3
157
158bb2:                                              ; preds = %bb3
159  ret void
160
161bb3:                                              ; preds = %bb3, %bb
162  %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
163  %tmp4 = udiv i16 %tmp, %arg1
164  %tmp5 = zext i16 %tmp to i64
165  %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
166  store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
167  %tmp7 = add nuw nsw i16 %tmp, 1
168  %tmp8 = icmp eq i16 %tmp7, 1024
169  br i1 %tmp8, label %bb2, label %bb3
170}
171
172; GCN-LABEL: {{^}}urem16_invariant_denom:
173; GCN:     v_cvt_f32_u32
174; GCN:     v_rcp_iflag_f32
175; GCN:     [[LOOP:BB[0-9_]+]]:
176; GCN-NOT: v_rcp
177; GCN:     s_cbranch_scc0 [[LOOP]]
178; GCN:     s_endpgm
179define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
180bb:
181  br label %bb3
182
183bb2:                                              ; preds = %bb3
184  ret void
185
186bb3:                                              ; preds = %bb3, %bb
187  %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
188  %tmp4 = urem i16 %tmp, %arg1
189  %tmp5 = zext i16 %tmp to i64
190  %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
191  store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
192  %tmp7 = add nuw nsw i16 %tmp, 1
193  %tmp8 = icmp eq i16 %tmp7, 1024
194  br i1 %tmp8, label %bb2, label %bb3
195}
196
197; GCN-LABEL: {{^}}sdiv16_invariant_denom:
198; GCN-DAG: s_sext_i32_i16
199; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
200; GCN-DAG: v_cvt_f32_i32
201; GCN-DAG: v_rcp_iflag_f32
202; GCN:     [[LOOP:BB[0-9_]+]]:
203; GCN-NOT: v_rcp
204; GCN:     s_cbranch_scc0 [[LOOP]]
205; GCN:     s_endpgm
206define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
207bb:
208  br label %bb3
209
210bb2:                                              ; preds = %bb3
211  ret void
212
213bb3:                                              ; preds = %bb3, %bb
214  %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
215  %tmp4 = sdiv i16 %tmp, %arg1
216  %tmp5 = zext i16 %tmp to i64
217  %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
218  store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
219  %tmp7 = add nuw nsw i16 %tmp, 1
220  %tmp8 = icmp eq i16 %tmp7, 1024
221  br i1 %tmp8, label %bb2, label %bb3
222}
223
224; GCN-LABEL: {{^}}srem16_invariant_denom:
225; GCN-DAG: s_sext_i32_i16
226; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
227; GCN-DAG: v_cvt_f32_i32
228; GCN-DAG: v_rcp_iflag_f32
229; GCN:     [[LOOP:BB[0-9_]+]]:
230; GCN-NOT: v_rcp
231; GCN:     s_cbranch_scc0 [[LOOP]]
232; GCN:     s_endpgm
233define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
234bb:
235  br label %bb3
236
237bb2:                                              ; preds = %bb3
238  ret void
239
240bb3:                                              ; preds = %bb3, %bb
241  %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
242  %tmp4 = srem i16 %tmp, %arg1
243  %tmp5 = zext i16 %tmp to i64
244  %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
245  store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
246  %tmp7 = add nuw nsw i16 %tmp, 1
247  %tmp8 = icmp eq i16 %tmp7, 1024
248  br i1 %tmp8, label %bb2, label %bb3
249}
250