1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
3; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
4
5define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
6; SI-LABEL: @add_i3(
7; SI-NEXT:    [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]]
8; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
9; SI-NEXT:    ret void
10;
11; VI-LABEL: @add_i3(
12; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
13; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
14; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
15; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
16; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
17; VI-NEXT:    ret void
18;
19  %r = add i3 %a, %b
20  store volatile i3 %r, i3 addrspace(1)* undef
21  ret void
22}
23
24define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
25; SI-LABEL: @add_nsw_i3(
26; SI-NEXT:    [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]]
27; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
28; SI-NEXT:    ret void
29;
30; VI-LABEL: @add_nsw_i3(
31; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
32; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
33; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
34; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
35; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
36; VI-NEXT:    ret void
37;
38  %r = add nsw i3 %a, %b
39  store volatile i3 %r, i3 addrspace(1)* undef
40  ret void
41}
42
43define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
44; SI-LABEL: @add_nuw_i3(
45; SI-NEXT:    [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]]
46; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
47; SI-NEXT:    ret void
48;
49; VI-LABEL: @add_nuw_i3(
50; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
51; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
52; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
53; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
54; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
55; VI-NEXT:    ret void
56;
57  %r = add nuw i3 %a, %b
58  store volatile i3 %r, i3 addrspace(1)* undef
59  ret void
60}
61
62define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
63; SI-LABEL: @add_nuw_nsw_i3(
64; SI-NEXT:    [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]]
65; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
66; SI-NEXT:    ret void
67;
68; VI-LABEL: @add_nuw_nsw_i3(
69; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
70; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
71; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
72; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
73; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
74; VI-NEXT:    ret void
75;
76  %r = add nuw nsw i3 %a, %b
77  store volatile i3 %r, i3 addrspace(1)* undef
78  ret void
79}
80
81define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
82; SI-LABEL: @sub_i3(
83; SI-NEXT:    [[R:%.*]] = sub i3 [[A:%.*]], [[B:%.*]]
84; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
85; SI-NEXT:    ret void
86;
87; VI-LABEL: @sub_i3(
88; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
89; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
90; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
91; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
92; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
93; VI-NEXT:    ret void
94;
95  %r = sub i3 %a, %b
96  store volatile i3 %r, i3 addrspace(1)* undef
97  ret void
98}
99
100define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
101; SI-LABEL: @sub_nsw_i3(
102; SI-NEXT:    [[R:%.*]] = sub nsw i3 [[A:%.*]], [[B:%.*]]
103; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
104; SI-NEXT:    ret void
105;
106; VI-LABEL: @sub_nsw_i3(
107; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
108; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
109; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
110; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
111; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
112; VI-NEXT:    ret void
113;
114  %r = sub nsw i3 %a, %b
115  store volatile i3 %r, i3 addrspace(1)* undef
116  ret void
117}
118
119define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
120; SI-LABEL: @sub_nuw_i3(
121; SI-NEXT:    [[R:%.*]] = sub nuw i3 [[A:%.*]], [[B:%.*]]
122; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
123; SI-NEXT:    ret void
124;
125; VI-LABEL: @sub_nuw_i3(
126; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
127; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
128; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
129; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
130; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
131; VI-NEXT:    ret void
132;
133  %r = sub nuw i3 %a, %b
134  store volatile i3 %r, i3 addrspace(1)* undef
135  ret void
136}
137
138define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
139; SI-LABEL: @sub_nuw_nsw_i3(
140; SI-NEXT:    [[R:%.*]] = sub nuw nsw i3 [[A:%.*]], [[B:%.*]]
141; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
142; SI-NEXT:    ret void
143;
144; VI-LABEL: @sub_nuw_nsw_i3(
145; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
146; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
147; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
148; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
149; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
150; VI-NEXT:    ret void
151;
152  %r = sub nuw nsw i3 %a, %b
153  store volatile i3 %r, i3 addrspace(1)* undef
154  ret void
155}
156
157define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
158; SI-LABEL: @mul_i3(
159; SI-NEXT:    [[R:%.*]] = mul i3 [[A:%.*]], [[B:%.*]]
160; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
161; SI-NEXT:    ret void
162;
163; VI-LABEL: @mul_i3(
164; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
165; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
166; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
167; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
168; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
169; VI-NEXT:    ret void
170;
171  %r = mul i3 %a, %b
172  store volatile i3 %r, i3 addrspace(1)* undef
173  ret void
174}
175
176define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
177; SI-LABEL: @mul_nsw_i3(
178; SI-NEXT:    [[R:%.*]] = mul nsw i3 [[A:%.*]], [[B:%.*]]
179; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
180; SI-NEXT:    ret void
181;
182; VI-LABEL: @mul_nsw_i3(
183; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
184; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
185; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
186; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
187; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
188; VI-NEXT:    ret void
189;
190  %r = mul nsw i3 %a, %b
191  store volatile i3 %r, i3 addrspace(1)* undef
192  ret void
193}
194
195define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
196; SI-LABEL: @mul_nuw_i3(
197; SI-NEXT:    [[R:%.*]] = mul nuw i3 [[A:%.*]], [[B:%.*]]
198; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
199; SI-NEXT:    ret void
200;
201; VI-LABEL: @mul_nuw_i3(
202; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
203; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
204; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
205; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
206; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
207; VI-NEXT:    ret void
208;
209  %r = mul nuw i3 %a, %b
210  store volatile i3 %r, i3 addrspace(1)* undef
211  ret void
212}
213
214define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
215; SI-LABEL: @mul_nuw_nsw_i3(
216; SI-NEXT:    [[R:%.*]] = mul nuw nsw i3 [[A:%.*]], [[B:%.*]]
217; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
218; SI-NEXT:    ret void
219;
220; VI-LABEL: @mul_nuw_nsw_i3(
221; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
222; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
223; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
224; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
225; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
226; VI-NEXT:    ret void
227;
228  %r = mul nuw nsw i3 %a, %b
229  store volatile i3 %r, i3 addrspace(1)* undef
230  ret void
231}
232
233define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
234; SI-LABEL: @shl_i3(
235; SI-NEXT:    [[R:%.*]] = shl i3 [[A:%.*]], [[B:%.*]]
236; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
237; SI-NEXT:    ret void
238;
239; VI-LABEL: @shl_i3(
240; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
241; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
242; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
243; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
244; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
245; VI-NEXT:    ret void
246;
247  %r = shl i3 %a, %b
248  store volatile i3 %r, i3 addrspace(1)* undef
249  ret void
250}
251
252define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
253; SI-LABEL: @shl_nsw_i3(
254; SI-NEXT:    [[R:%.*]] = shl nsw i3 [[A:%.*]], [[B:%.*]]
255; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
256; SI-NEXT:    ret void
257;
258; VI-LABEL: @shl_nsw_i3(
259; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
260; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
261; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
262; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
263; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
264; VI-NEXT:    ret void
265;
266  %r = shl nsw i3 %a, %b
267  store volatile i3 %r, i3 addrspace(1)* undef
268  ret void
269}
270
271define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
272; SI-LABEL: @shl_nuw_i3(
273; SI-NEXT:    [[R:%.*]] = shl nuw i3 [[A:%.*]], [[B:%.*]]
274; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
275; SI-NEXT:    ret void
276;
277; VI-LABEL: @shl_nuw_i3(
278; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
279; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
280; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
281; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
282; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
283; VI-NEXT:    ret void
284;
285  %r = shl nuw i3 %a, %b
286  store volatile i3 %r, i3 addrspace(1)* undef
287  ret void
288}
289
290define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
291; SI-LABEL: @shl_nuw_nsw_i3(
292; SI-NEXT:    [[R:%.*]] = shl nuw nsw i3 [[A:%.*]], [[B:%.*]]
293; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
294; SI-NEXT:    ret void
295;
296; VI-LABEL: @shl_nuw_nsw_i3(
297; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
298; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
299; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
300; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
301; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
302; VI-NEXT:    ret void
303;
304  %r = shl nuw nsw i3 %a, %b
305  store volatile i3 %r, i3 addrspace(1)* undef
306  ret void
307}
308
309define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
310; SI-LABEL: @lshr_i3(
311; SI-NEXT:    [[R:%.*]] = lshr i3 [[A:%.*]], [[B:%.*]]
312; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
313; SI-NEXT:    ret void
314;
315; VI-LABEL: @lshr_i3(
316; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
317; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
318; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
319; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
320; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
321; VI-NEXT:    ret void
322;
323  %r = lshr i3 %a, %b
324  store volatile i3 %r, i3 addrspace(1)* undef
325  ret void
326}
327
328define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
329; SI-LABEL: @lshr_exact_i3(
330; SI-NEXT:    [[R:%.*]] = lshr exact i3 [[A:%.*]], [[B:%.*]]
331; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
332; SI-NEXT:    ret void
333;
334; VI-LABEL: @lshr_exact_i3(
335; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
336; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
337; VI-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
338; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
339; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
340; VI-NEXT:    ret void
341;
342  %r = lshr exact i3 %a, %b
343  store volatile i3 %r, i3 addrspace(1)* undef
344  ret void
345}
346
347define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
348; SI-LABEL: @ashr_i3(
349; SI-NEXT:    [[R:%.*]] = ashr i3 [[A:%.*]], [[B:%.*]]
350; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
351; SI-NEXT:    ret void
352;
353; VI-LABEL: @ashr_i3(
354; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
355; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
356; VI-NEXT:    [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
357; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
358; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
359; VI-NEXT:    ret void
360;
361  %r = ashr i3 %a, %b
362  store volatile i3 %r, i3 addrspace(1)* undef
363  ret void
364}
365
366define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
367; SI-LABEL: @ashr_exact_i3(
368; SI-NEXT:    [[R:%.*]] = ashr exact i3 [[A:%.*]], [[B:%.*]]
369; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
370; SI-NEXT:    ret void
371;
372; VI-LABEL: @ashr_exact_i3(
373; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
374; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
375; VI-NEXT:    [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
376; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
377; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
378; VI-NEXT:    ret void
379;
380  %r = ashr exact i3 %a, %b
381  store volatile i3 %r, i3 addrspace(1)* undef
382  ret void
383}
384
385define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
386; SI-LABEL: @and_i3(
387; SI-NEXT:    [[R:%.*]] = and i3 [[A:%.*]], [[B:%.*]]
388; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
389; SI-NEXT:    ret void
390;
391; VI-LABEL: @and_i3(
392; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
393; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
394; VI-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
395; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
396; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
397; VI-NEXT:    ret void
398;
399  %r = and i3 %a, %b
400  store volatile i3 %r, i3 addrspace(1)* undef
401  ret void
402}
403
404define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
405; SI-LABEL: @or_i3(
406; SI-NEXT:    [[R:%.*]] = or i3 [[A:%.*]], [[B:%.*]]
407; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
408; SI-NEXT:    ret void
409;
410; VI-LABEL: @or_i3(
411; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
412; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
413; VI-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
414; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
415; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
416; VI-NEXT:    ret void
417;
418  %r = or i3 %a, %b
419  store volatile i3 %r, i3 addrspace(1)* undef
420  ret void
421}
422
423define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
424; SI-LABEL: @xor_i3(
425; SI-NEXT:    [[R:%.*]] = xor i3 [[A:%.*]], [[B:%.*]]
426; SI-NEXT:    store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
427; SI-NEXT:    ret void
428;
429; VI-LABEL: @xor_i3(
430; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
431; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
432; VI-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
433; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
434; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
435; VI-NEXT:    ret void
436;
437  %r = xor i3 %a, %b
438  store volatile i3 %r, i3 addrspace(1)* undef
439  ret void
440}
441
442define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) {
443; SI-LABEL: @select_eq_i3(
444; SI-NEXT:    [[CMP:%.*]] = icmp eq i3 [[A:%.*]], [[B:%.*]]
445; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
446; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
447; SI-NEXT:    ret void
448;
449; VI-LABEL: @select_eq_i3(
450; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
451; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
452; VI-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
453; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
454; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
455; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
456; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
457; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
458; VI-NEXT:    ret void
459;
460  %cmp = icmp eq i3 %a, %b
461  %sel = select i1 %cmp, i3 %a, i3 %b
462  store volatile i3 %sel, i3 addrspace(1)* undef
463  ret void
464}
465
466define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) {
467; SI-LABEL: @select_ne_i3(
468; SI-NEXT:    [[CMP:%.*]] = icmp ne i3 [[A:%.*]], [[B:%.*]]
469; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
470; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
471; SI-NEXT:    ret void
472;
473; VI-LABEL: @select_ne_i3(
474; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
475; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
476; VI-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
477; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
478; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
479; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
480; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
481; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
482; VI-NEXT:    ret void
483;
484  %cmp = icmp ne i3 %a, %b
485  %sel = select i1 %cmp, i3 %a, i3 %b
486  store volatile i3 %sel, i3 addrspace(1)* undef
487  ret void
488}
489
490define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) {
491; SI-LABEL: @select_ugt_i3(
492; SI-NEXT:    [[CMP:%.*]] = icmp ugt i3 [[A:%.*]], [[B:%.*]]
493; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
494; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
495; SI-NEXT:    ret void
496;
497; VI-LABEL: @select_ugt_i3(
498; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
499; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
500; VI-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
501; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
502; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
503; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
504; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
505; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
506; VI-NEXT:    ret void
507;
508  %cmp = icmp ugt i3 %a, %b
509  %sel = select i1 %cmp, i3 %a, i3 %b
510  store volatile i3 %sel, i3 addrspace(1)* undef
511  ret void
512}
513
514define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) {
515; SI-LABEL: @select_uge_i3(
516; SI-NEXT:    [[CMP:%.*]] = icmp uge i3 [[A:%.*]], [[B:%.*]]
517; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
518; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
519; SI-NEXT:    ret void
520;
521; VI-LABEL: @select_uge_i3(
522; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
523; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
524; VI-NEXT:    [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
525; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
526; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
527; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
528; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
529; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
530; VI-NEXT:    ret void
531;
532  %cmp = icmp uge i3 %a, %b
533  %sel = select i1 %cmp, i3 %a, i3 %b
534  store volatile i3 %sel, i3 addrspace(1)* undef
535  ret void
536}
537
538define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) {
539; SI-LABEL: @select_ult_i3(
540; SI-NEXT:    [[CMP:%.*]] = icmp ult i3 [[A:%.*]], [[B:%.*]]
541; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
542; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
543; SI-NEXT:    ret void
544;
545; VI-LABEL: @select_ult_i3(
546; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
547; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
548; VI-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
549; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
550; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
551; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
552; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
553; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
554; VI-NEXT:    ret void
555;
556  %cmp = icmp ult i3 %a, %b
557  %sel = select i1 %cmp, i3 %a, i3 %b
558  store volatile i3 %sel, i3 addrspace(1)* undef
559  ret void
560}
561
562define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) {
563; SI-LABEL: @select_ule_i3(
564; SI-NEXT:    [[CMP:%.*]] = icmp ule i3 [[A:%.*]], [[B:%.*]]
565; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
566; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
567; SI-NEXT:    ret void
568;
569; VI-LABEL: @select_ule_i3(
570; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
571; VI-NEXT:    [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
572; VI-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
573; VI-NEXT:    [[TMP4:%.*]] = zext i3 [[A]] to i32
574; VI-NEXT:    [[TMP5:%.*]] = zext i3 [[B]] to i32
575; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
576; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
577; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
578; VI-NEXT:    ret void
579;
580  %cmp = icmp ule i3 %a, %b
581  %sel = select i1 %cmp, i3 %a, i3 %b
582  store volatile i3 %sel, i3 addrspace(1)* undef
583  ret void
584}
585
586define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) {
587; SI-LABEL: @select_sgt_i3(
588; SI-NEXT:    [[CMP:%.*]] = icmp sgt i3 [[A:%.*]], [[B:%.*]]
589; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
590; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
591; SI-NEXT:    ret void
592;
593; VI-LABEL: @select_sgt_i3(
594; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
595; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
596; VI-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
597; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
598; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
599; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
600; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
601; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
602; VI-NEXT:    ret void
603;
604  %cmp = icmp sgt i3 %a, %b
605  %sel = select i1 %cmp, i3 %a, i3 %b
606  store volatile i3 %sel, i3 addrspace(1)* undef
607  ret void
608}
609
610define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) {
611; SI-LABEL: @select_sge_i3(
612; SI-NEXT:    [[CMP:%.*]] = icmp sge i3 [[A:%.*]], [[B:%.*]]
613; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
614; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
615; SI-NEXT:    ret void
616;
617; VI-LABEL: @select_sge_i3(
618; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
619; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
620; VI-NEXT:    [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
621; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
622; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
623; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
624; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
625; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
626; VI-NEXT:    ret void
627;
628  %cmp = icmp sge i3 %a, %b
629  %sel = select i1 %cmp, i3 %a, i3 %b
630  store volatile i3 %sel, i3 addrspace(1)* undef
631  ret void
632}
633
634define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) {
635; SI-LABEL: @select_slt_i3(
636; SI-NEXT:    [[CMP:%.*]] = icmp slt i3 [[A:%.*]], [[B:%.*]]
637; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
638; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
639; SI-NEXT:    ret void
640;
641; VI-LABEL: @select_slt_i3(
642; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
643; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
644; VI-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
645; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
646; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
647; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
648; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
649; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
650; VI-NEXT:    ret void
651;
652  %cmp = icmp slt i3 %a, %b
653  %sel = select i1 %cmp, i3 %a, i3 %b
654  store volatile i3 %sel, i3 addrspace(1)* undef
655  ret void
656}
657
658define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
659; SI-LABEL: @select_sle_i3(
660; SI-NEXT:    [[CMP:%.*]] = icmp sle i3 [[A:%.*]], [[B:%.*]]
661; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
662; SI-NEXT:    store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
663; SI-NEXT:    ret void
664;
665; VI-LABEL: @select_sle_i3(
666; VI-NEXT:    [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32
667; VI-NEXT:    [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
668; VI-NEXT:    [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
669; VI-NEXT:    [[TMP4:%.*]] = sext i3 [[A]] to i32
670; VI-NEXT:    [[TMP5:%.*]] = sext i3 [[B]] to i32
671; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
672; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
673; VI-NEXT:    store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
674; VI-NEXT:    ret void
675;
676  %cmp = icmp sle i3 %a, %b
677  %sel = select i1 %cmp, i3 %a, i3 %b
678  store volatile i3 %sel, i3 addrspace(1)* undef
679  ret void
680}
681
682declare i3 @llvm.bitreverse.i3(i3)
683define amdgpu_kernel void @bitreverse_i3(i3 %a) {
684; SI-LABEL: @bitreverse_i3(
685; SI-NEXT:    [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]])
686; SI-NEXT:    store volatile i3 [[BREV]], i3 addrspace(1)* undef, align 1
687; SI-NEXT:    ret void
688;
689; VI-LABEL: @bitreverse_i3(
690; VI-NEXT:    [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
691; VI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
692; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 29
693; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
694; VI-NEXT:    store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
695; VI-NEXT:    ret void
696;
697  %brev = call i3 @llvm.bitreverse.i3(i3 %a)
698  store volatile i3 %brev, i3 addrspace(1)* undef
699  ret void
700}
701
702define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
703; SI-LABEL: @add_i16(
704; SI-NEXT:    [[R:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
705; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
706; SI-NEXT:    ret void
707;
708; VI-LABEL: @add_i16(
709; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
710; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
711; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
712; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
713; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
714; VI-NEXT:    ret void
715;
716  %r = add i16 %a, %b
717  store volatile i16 %r, i16 addrspace(1)* undef
718  ret void
719}
720
721define amdgpu_kernel void @constant_add_i16() {
722; SI-LABEL: @constant_add_i16(
723; SI-NEXT:    [[R:%.*]] = add i16 1, 2
724; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
725; SI-NEXT:    ret void
726;
727; VI-LABEL: @constant_add_i16(
728; VI-NEXT:    store volatile i16 3, i16 addrspace(1)* undef, align 2
729; VI-NEXT:    ret void
730;
731  %r = add i16 1, 2
732  store volatile i16 %r, i16 addrspace(1)* undef
733  ret void
734}
735
736define amdgpu_kernel void @constant_add_nsw_i16() {
737; SI-LABEL: @constant_add_nsw_i16(
738; SI-NEXT:    [[R:%.*]] = add nsw i16 1, 2
739; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
740; SI-NEXT:    ret void
741;
742; VI-LABEL: @constant_add_nsw_i16(
743; VI-NEXT:    store volatile i16 3, i16 addrspace(1)* undef, align 2
744; VI-NEXT:    ret void
745;
746  %r = add nsw i16 1, 2
747  store volatile i16 %r, i16 addrspace(1)* undef
748  ret void
749}
750
751define amdgpu_kernel void @constant_add_nuw_i16() {
752; SI-LABEL: @constant_add_nuw_i16(
753; SI-NEXT:    [[R:%.*]] = add nsw i16 1, 2
754; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
755; SI-NEXT:    ret void
756;
757; VI-LABEL: @constant_add_nuw_i16(
758; VI-NEXT:    store volatile i16 3, i16 addrspace(1)* undef, align 2
759; VI-NEXT:    ret void
760;
761  %r = add nsw i16 1, 2
762  store volatile i16 %r, i16 addrspace(1)* undef
763  ret void
764}
765
766define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
767; SI-LABEL: @add_nsw_i16(
768; SI-NEXT:    [[R:%.*]] = add nsw i16 [[A:%.*]], [[B:%.*]]
769; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
770; SI-NEXT:    ret void
771;
772; VI-LABEL: @add_nsw_i16(
773; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
774; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
775; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
776; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
777; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
778; VI-NEXT:    ret void
779;
780  %r = add nsw i16 %a, %b
781  store volatile i16 %r, i16 addrspace(1)* undef
782  ret void
783}
784
785define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
786; SI-LABEL: @add_nuw_i16(
787; SI-NEXT:    [[R:%.*]] = add nuw i16 [[A:%.*]], [[B:%.*]]
788; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
789; SI-NEXT:    ret void
790;
791; VI-LABEL: @add_nuw_i16(
792; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
793; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
794; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
795; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
796; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
797; VI-NEXT:    ret void
798;
799  %r = add nuw i16 %a, %b
800  store volatile i16 %r, i16 addrspace(1)* undef
801  ret void
802}
803
804define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
805; SI-LABEL: @add_nuw_nsw_i16(
806; SI-NEXT:    [[R:%.*]] = add nuw nsw i16 [[A:%.*]], [[B:%.*]]
807; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
808; SI-NEXT:    ret void
809;
810; VI-LABEL: @add_nuw_nsw_i16(
811; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
812; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
813; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
814; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
815; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
816; VI-NEXT:    ret void
817;
818  %r = add nuw nsw i16 %a, %b
819  store volatile i16 %r, i16 addrspace(1)* undef
820  ret void
821}
822
823define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
824; SI-LABEL: @sub_i16(
825; SI-NEXT:    [[R:%.*]] = sub i16 [[A:%.*]], [[B:%.*]]
826; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
827; SI-NEXT:    ret void
828;
829; VI-LABEL: @sub_i16(
830; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
831; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
832; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
833; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
834; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
835; VI-NEXT:    ret void
836;
837  %r = sub i16 %a, %b
838  store volatile i16 %r, i16 addrspace(1)* undef
839  ret void
840}
841
842define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
843; SI-LABEL: @sub_nsw_i16(
844; SI-NEXT:    [[R:%.*]] = sub nsw i16 [[A:%.*]], [[B:%.*]]
845; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
846; SI-NEXT:    ret void
847;
848; VI-LABEL: @sub_nsw_i16(
849; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
850; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
851; VI-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
852; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
853; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
854; VI-NEXT:    ret void
855;
856  %r = sub nsw i16 %a, %b
857  store volatile i16 %r, i16 addrspace(1)* undef
858  ret void
859}
860
861define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
862; SI-LABEL: @sub_nuw_i16(
863; SI-NEXT:    [[R:%.*]] = sub nuw i16 [[A:%.*]], [[B:%.*]]
864; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
865; SI-NEXT:    ret void
866;
867; VI-LABEL: @sub_nuw_i16(
868; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
869; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
870; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
871; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
872; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
873; VI-NEXT:    ret void
874;
875  %r = sub nuw i16 %a, %b
876  store volatile i16 %r, i16 addrspace(1)* undef
877  ret void
878}
879
880define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
881; SI-LABEL: @sub_nuw_nsw_i16(
882; SI-NEXT:    [[R:%.*]] = sub nuw nsw i16 [[A:%.*]], [[B:%.*]]
883; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
884; SI-NEXT:    ret void
885;
886; VI-LABEL: @sub_nuw_nsw_i16(
887; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
888; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
889; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
890; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
891; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
892; VI-NEXT:    ret void
893;
894  %r = sub nuw nsw i16 %a, %b
895  store volatile i16 %r, i16 addrspace(1)* undef
896  ret void
897}
898
899define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
900; SI-LABEL: @mul_i16(
901; SI-NEXT:    [[R:%.*]] = mul i16 [[A:%.*]], [[B:%.*]]
902; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
903; SI-NEXT:    ret void
904;
905; VI-LABEL: @mul_i16(
906; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
907; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
908; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
909; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
910; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
911; VI-NEXT:    ret void
912;
913  %r = mul i16 %a, %b
914  store volatile i16 %r, i16 addrspace(1)* undef
915  ret void
916}
917
918define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
919; SI-LABEL: @mul_nsw_i16(
920; SI-NEXT:    [[R:%.*]] = mul nsw i16 [[A:%.*]], [[B:%.*]]
921; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
922; SI-NEXT:    ret void
923;
924; VI-LABEL: @mul_nsw_i16(
925; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
926; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
927; VI-NEXT:    [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
928; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
929; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
930; VI-NEXT:    ret void
931;
932  %r = mul nsw i16 %a, %b
933  store volatile i16 %r, i16 addrspace(1)* undef
934  ret void
935}
936
937define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
938; SI-LABEL: @mul_nuw_i16(
939; SI-NEXT:    [[R:%.*]] = mul nuw i16 [[A:%.*]], [[B:%.*]]
940; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
941; SI-NEXT:    ret void
942;
943; VI-LABEL: @mul_nuw_i16(
944; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
945; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
946; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
947; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
948; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
949; VI-NEXT:    ret void
950;
951  %r = mul nuw i16 %a, %b
952  store volatile i16 %r, i16 addrspace(1)* undef
953  ret void
954}
955
956define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
957; SI-LABEL: @mul_nuw_nsw_i16(
958; SI-NEXT:    [[R:%.*]] = mul nuw nsw i16 [[A:%.*]], [[B:%.*]]
959; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
960; SI-NEXT:    ret void
961;
962; VI-LABEL: @mul_nuw_nsw_i16(
963; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
964; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
965; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
966; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
967; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
968; VI-NEXT:    ret void
969;
970  %r = mul nuw nsw i16 %a, %b
971  store volatile i16 %r, i16 addrspace(1)* undef
972  ret void
973}
974
975define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
976; SI-LABEL: @shl_i16(
977; SI-NEXT:    [[R:%.*]] = shl i16 [[A:%.*]], [[B:%.*]]
978; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
979; SI-NEXT:    ret void
980;
981; VI-LABEL: @shl_i16(
982; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
983; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
984; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
985; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
986; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
987; VI-NEXT:    ret void
988;
989  %r = shl i16 %a, %b
990  store volatile i16 %r, i16 addrspace(1)* undef
991  ret void
992}
993
994define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
995; SI-LABEL: @shl_nsw_i16(
996; SI-NEXT:    [[R:%.*]] = shl nsw i16 [[A:%.*]], [[B:%.*]]
997; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
998; SI-NEXT:    ret void
999;
1000; VI-LABEL: @shl_nsw_i16(
1001; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1002; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1003; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1004; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1005; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1006; VI-NEXT:    ret void
1007;
1008  %r = shl nsw i16 %a, %b
1009  store volatile i16 %r, i16 addrspace(1)* undef
1010  ret void
1011}
1012
1013define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
1014; SI-LABEL: @shl_nuw_i16(
1015; SI-NEXT:    [[R:%.*]] = shl nuw i16 [[A:%.*]], [[B:%.*]]
1016; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1017; SI-NEXT:    ret void
1018;
1019; VI-LABEL: @shl_nuw_i16(
1020; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1021; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1022; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1023; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1024; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1025; VI-NEXT:    ret void
1026;
1027  %r = shl nuw i16 %a, %b
1028  store volatile i16 %r, i16 addrspace(1)* undef
1029  ret void
1030}
1031
1032define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
1033; SI-LABEL: @shl_nuw_nsw_i16(
1034; SI-NEXT:    [[R:%.*]] = shl nuw nsw i16 [[A:%.*]], [[B:%.*]]
1035; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1036; SI-NEXT:    ret void
1037;
1038; VI-LABEL: @shl_nuw_nsw_i16(
1039; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1040; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1041; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
1042; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1043; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1044; VI-NEXT:    ret void
1045;
1046  %r = shl nuw nsw i16 %a, %b
1047  store volatile i16 %r, i16 addrspace(1)* undef
1048  ret void
1049}
1050
1051define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
1052; SI-LABEL: @lshr_i16(
1053; SI-NEXT:    [[R:%.*]] = lshr i16 [[A:%.*]], [[B:%.*]]
1054; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1055; SI-NEXT:    ret void
1056;
1057; VI-LABEL: @lshr_i16(
1058; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1059; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1060; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
1061; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1062; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1063; VI-NEXT:    ret void
1064;
1065  %r = lshr i16 %a, %b
1066  store volatile i16 %r, i16 addrspace(1)* undef
1067  ret void
1068}
1069
1070define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
1071; SI-LABEL: @lshr_exact_i16(
1072; SI-NEXT:    [[R:%.*]] = lshr exact i16 [[A:%.*]], [[B:%.*]]
1073; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1074; SI-NEXT:    ret void
1075;
1076; VI-LABEL: @lshr_exact_i16(
1077; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1078; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1079; VI-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
1080; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1081; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1082; VI-NEXT:    ret void
1083;
1084  %r = lshr exact i16 %a, %b
1085  store volatile i16 %r, i16 addrspace(1)* undef
1086  ret void
1087}
1088
1089define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
1090; SI-LABEL: @ashr_i16(
1091; SI-NEXT:    [[R:%.*]] = ashr i16 [[A:%.*]], [[B:%.*]]
1092; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1093; SI-NEXT:    ret void
1094;
1095; VI-LABEL: @ashr_i16(
1096; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1097; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1098; VI-NEXT:    [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
1099; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1100; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1101; VI-NEXT:    ret void
1102;
1103  %r = ashr i16 %a, %b
1104  store volatile i16 %r, i16 addrspace(1)* undef
1105  ret void
1106}
1107
1108define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
1109; SI-LABEL: @ashr_exact_i16(
1110; SI-NEXT:    [[R:%.*]] = ashr exact i16 [[A:%.*]], [[B:%.*]]
1111; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1112; SI-NEXT:    ret void
1113;
1114; VI-LABEL: @ashr_exact_i16(
1115; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1116; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1117; VI-NEXT:    [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
1118; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1119; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1120; VI-NEXT:    ret void
1121;
1122  %r = ashr exact i16 %a, %b
1123  store volatile i16 %r, i16 addrspace(1)* undef
1124  ret void
1125}
1126
1127define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
1128; SI-LABEL: @constant_lshr_exact_i16(
1129; SI-NEXT:    [[R:%.*]] = lshr exact i16 4, 1
1130; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1131; SI-NEXT:    ret void
1132;
1133; VI-LABEL: @constant_lshr_exact_i16(
1134; VI-NEXT:    store volatile i16 2, i16 addrspace(1)* undef, align 2
1135; VI-NEXT:    ret void
1136;
1137  %r = lshr exact i16 4, 1
1138  store volatile i16 %r, i16 addrspace(1)* undef
1139  ret void
1140}
1141
1142define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
1143; SI-LABEL: @and_i16(
1144; SI-NEXT:    [[R:%.*]] = and i16 [[A:%.*]], [[B:%.*]]
1145; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1146; SI-NEXT:    ret void
1147;
1148; VI-LABEL: @and_i16(
1149; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1150; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1151; VI-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
1152; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1153; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1154; VI-NEXT:    ret void
1155;
1156  %r = and i16 %a, %b
1157  store volatile i16 %r, i16 addrspace(1)* undef
1158  ret void
1159}
1160
1161define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
1162; SI-LABEL: @or_i16(
1163; SI-NEXT:    [[R:%.*]] = or i16 [[A:%.*]], [[B:%.*]]
1164; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1165; SI-NEXT:    ret void
1166;
1167; VI-LABEL: @or_i16(
1168; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1169; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1170; VI-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
1171; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1172; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1173; VI-NEXT:    ret void
1174;
1175  %r = or i16 %a, %b
1176  store volatile i16 %r, i16 addrspace(1)* undef
1177  ret void
1178}
1179
1180define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
1181; SI-LABEL: @xor_i16(
1182; SI-NEXT:    [[R:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
1183; SI-NEXT:    store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
1184; SI-NEXT:    ret void
1185;
1186; VI-LABEL: @xor_i16(
1187; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1188; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1189; VI-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
1190; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1191; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1192; VI-NEXT:    ret void
1193;
1194  %r = xor i16 %a, %b
1195  store volatile i16 %r, i16 addrspace(1)* undef
1196  ret void
1197}
1198
1199define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) {
1200; SI-LABEL: @select_eq_i16(
1201; SI-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
1202; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1203; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1204; SI-NEXT:    ret void
1205;
1206; VI-LABEL: @select_eq_i16(
1207; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1208; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1209; VI-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
1210; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1211; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1212; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1213; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1214; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1215; VI-NEXT:    ret void
1216;
1217  %cmp = icmp eq i16 %a, %b
1218  %sel = select i1 %cmp, i16 %a, i16 %b
1219  store volatile i16 %sel, i16 addrspace(1)* undef
1220  ret void
1221}
1222
1223define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) {
1224; SI-LABEL: @select_ne_i16(
1225; SI-NEXT:    [[CMP:%.*]] = icmp ne i16 [[A:%.*]], [[B:%.*]]
1226; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1227; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1228; SI-NEXT:    ret void
1229;
1230; VI-LABEL: @select_ne_i16(
1231; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1232; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1233; VI-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
1234; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1235; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1236; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1237; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1238; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1239; VI-NEXT:    ret void
1240;
1241  %cmp = icmp ne i16 %a, %b
1242  %sel = select i1 %cmp, i16 %a, i16 %b
1243  store volatile i16 %sel, i16 addrspace(1)* undef
1244  ret void
1245}
1246
1247define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) {
1248; SI-LABEL: @select_ugt_i16(
1249; SI-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], [[B:%.*]]
1250; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1251; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1252; SI-NEXT:    ret void
1253;
1254; VI-LABEL: @select_ugt_i16(
1255; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1256; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1257; VI-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
1258; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1259; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1260; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1261; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1262; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1263; VI-NEXT:    ret void
1264;
1265  %cmp = icmp ugt i16 %a, %b
1266  %sel = select i1 %cmp, i16 %a, i16 %b
1267  store volatile i16 %sel, i16 addrspace(1)* undef
1268  ret void
1269}
1270
1271define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) {
1272; SI-LABEL: @select_uge_i16(
1273; SI-NEXT:    [[CMP:%.*]] = icmp uge i16 [[A:%.*]], [[B:%.*]]
1274; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1275; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1276; SI-NEXT:    ret void
1277;
1278; VI-LABEL: @select_uge_i16(
1279; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1280; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1281; VI-NEXT:    [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]]
1282; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1283; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1284; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1285; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1286; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1287; VI-NEXT:    ret void
1288;
1289  %cmp = icmp uge i16 %a, %b
1290  %sel = select i1 %cmp, i16 %a, i16 %b
1291  store volatile i16 %sel, i16 addrspace(1)* undef
1292  ret void
1293}
1294
1295define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) {
1296; SI-LABEL: @select_ult_i16(
1297; SI-NEXT:    [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
1298; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1299; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1300; SI-NEXT:    ret void
1301;
1302; VI-LABEL: @select_ult_i16(
1303; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1304; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1305; VI-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]]
1306; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1307; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1308; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1309; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1310; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1311; VI-NEXT:    ret void
1312;
1313  %cmp = icmp ult i16 %a, %b
1314  %sel = select i1 %cmp, i16 %a, i16 %b
1315  store volatile i16 %sel, i16 addrspace(1)* undef
1316  ret void
1317}
1318
1319define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) {
1320; SI-LABEL: @select_ule_i16(
1321; SI-NEXT:    [[CMP:%.*]] = icmp ule i16 [[A:%.*]], [[B:%.*]]
1322; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1323; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1324; SI-NEXT:    ret void
1325;
1326; VI-LABEL: @select_ule_i16(
1327; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1328; VI-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
1329; VI-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]]
1330; VI-NEXT:    [[TMP4:%.*]] = zext i16 [[A]] to i32
1331; VI-NEXT:    [[TMP5:%.*]] = zext i16 [[B]] to i32
1332; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1333; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1334; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1335; VI-NEXT:    ret void
1336;
1337  %cmp = icmp ule i16 %a, %b
1338  %sel = select i1 %cmp, i16 %a, i16 %b
1339  store volatile i16 %sel, i16 addrspace(1)* undef
1340  ret void
1341}
1342
1343define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) {
1344; SI-LABEL: @select_sgt_i16(
1345; SI-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], [[B:%.*]]
1346; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1347; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1348; SI-NEXT:    ret void
1349;
1350; VI-LABEL: @select_sgt_i16(
1351; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1352; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1353; VI-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
1354; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1355; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1356; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1357; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1358; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1359; VI-NEXT:    ret void
1360;
1361  %cmp = icmp sgt i16 %a, %b
1362  %sel = select i1 %cmp, i16 %a, i16 %b
1363  store volatile i16 %sel, i16 addrspace(1)* undef
1364  ret void
1365}
1366
1367define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) {
1368; SI-LABEL: @select_sge_i16(
1369; SI-NEXT:    [[CMP:%.*]] = icmp sge i16 [[A:%.*]], [[B:%.*]]
1370; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1371; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1372; SI-NEXT:    ret void
1373;
1374; VI-LABEL: @select_sge_i16(
1375; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1376; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1377; VI-NEXT:    [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]]
1378; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1379; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1380; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1381; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1382; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1383; VI-NEXT:    ret void
1384;
1385  %cmp = icmp sge i16 %a, %b
1386  %sel = select i1 %cmp, i16 %a, i16 %b
1387  store volatile i16 %sel, i16 addrspace(1)* undef
1388  ret void
1389}
1390
1391define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) {
1392; SI-LABEL: @select_slt_i16(
1393; SI-NEXT:    [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
1394; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1395; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1396; SI-NEXT:    ret void
1397;
1398; VI-LABEL: @select_slt_i16(
1399; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1400; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1401; VI-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
1402; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1403; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1404; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1405; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1406; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1407; VI-NEXT:    ret void
1408;
1409  %cmp = icmp slt i16 %a, %b
1410  %sel = select i1 %cmp, i16 %a, i16 %b
1411  store volatile i16 %sel, i16 addrspace(1)* undef
1412  ret void
1413}
1414
1415define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
1416; SI-LABEL: @select_sle_i16(
1417; SI-NEXT:    [[CMP:%.*]] = icmp sle i16 [[A:%.*]], [[B:%.*]]
1418; SI-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
1419; SI-NEXT:    store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
1420; SI-NEXT:    ret void
1421;
1422; VI-LABEL: @select_sle_i16(
1423; VI-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
1424; VI-NEXT:    [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
1425; VI-NEXT:    [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]]
1426; VI-NEXT:    [[TMP4:%.*]] = sext i16 [[A]] to i32
1427; VI-NEXT:    [[TMP5:%.*]] = sext i16 [[B]] to i32
1428; VI-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
1429; VI-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
1430; VI-NEXT:    store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
1431; VI-NEXT:    ret void
1432;
1433  %cmp = icmp sle i16 %a, %b
1434  %sel = select i1 %cmp, i16 %a, i16 %b
1435  store volatile i16 %sel, i16 addrspace(1)* undef
1436  ret void
1437}
1438
1439declare i16 @llvm.bitreverse.i16(i16)
1440
1441define amdgpu_kernel void @bitreverse_i16(i16 %a) {
1442; SI-LABEL: @bitreverse_i16(
1443; SI-NEXT:    [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]])
1444; SI-NEXT:    store volatile i16 [[BREV]], i16 addrspace(1)* undef, align 2
1445; SI-NEXT:    ret void
1446;
1447; VI-LABEL: @bitreverse_i16(
1448; VI-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
1449; VI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
1450; VI-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 16
1451; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1452; VI-NEXT:    store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
1453; VI-NEXT:    ret void
1454;
1455  %brev = call i16 @llvm.bitreverse.i16(i16 %a)
1456  store volatile i16 %brev, i16 addrspace(1)* undef
1457  ret void
1458}
1459
1460define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
1461; SI-LABEL: @add_3xi15(
1462; SI-NEXT:    [[R:%.*]] = add <3 x i15> [[A:%.*]], [[B:%.*]]
1463; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1464; SI-NEXT:    ret void
1465;
1466; VI-LABEL: @add_3xi15(
1467; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1468; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1469; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1470; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1471; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1472; VI-NEXT:    ret void
1473;
1474  %r = add <3 x i15> %a, %b
1475  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1476  ret void
1477}
1478
1479define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1480; SI-LABEL: @add_nsw_3xi15(
1481; SI-NEXT:    [[R:%.*]] = add nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1482; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1483; SI-NEXT:    ret void
1484;
1485; VI-LABEL: @add_nsw_3xi15(
1486; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1487; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1488; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1489; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1490; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1491; VI-NEXT:    ret void
1492;
1493  %r = add nsw <3 x i15> %a, %b
1494  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1495  ret void
1496}
1497
1498define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1499; SI-LABEL: @add_nuw_3xi15(
1500; SI-NEXT:    [[R:%.*]] = add nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1501; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1502; SI-NEXT:    ret void
1503;
1504; VI-LABEL: @add_nuw_3xi15(
1505; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1506; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1507; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1508; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1509; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1510; VI-NEXT:    ret void
1511;
1512  %r = add nuw <3 x i15> %a, %b
1513  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1514  ret void
1515}
1516
1517define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1518; SI-LABEL: @add_nuw_nsw_3xi15(
1519; SI-NEXT:    [[R:%.*]] = add nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1520; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1521; SI-NEXT:    ret void
1522;
1523; VI-LABEL: @add_nuw_nsw_3xi15(
1524; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1525; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1526; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1527; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1528; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1529; VI-NEXT:    ret void
1530;
1531  %r = add nuw nsw <3 x i15> %a, %b
1532  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1533  ret void
1534}
1535
1536define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
1537; SI-LABEL: @sub_3xi15(
1538; SI-NEXT:    [[R:%.*]] = sub <3 x i15> [[A:%.*]], [[B:%.*]]
1539; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1540; SI-NEXT:    ret void
1541;
1542; VI-LABEL: @sub_3xi15(
1543; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1544; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1545; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1546; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1547; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1548; VI-NEXT:    ret void
1549;
1550  %r = sub <3 x i15> %a, %b
1551  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1552  ret void
1553}
1554
1555define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1556; SI-LABEL: @sub_nsw_3xi15(
1557; SI-NEXT:    [[R:%.*]] = sub nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1558; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1559; SI-NEXT:    ret void
1560;
1561; VI-LABEL: @sub_nsw_3xi15(
1562; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1563; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1564; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
1565; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1566; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1567; VI-NEXT:    ret void
1568;
1569  %r = sub nsw <3 x i15> %a, %b
1570  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1571  ret void
1572}
1573
1574define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1575; SI-LABEL: @sub_nuw_3xi15(
1576; SI-NEXT:    [[R:%.*]] = sub nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1577; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1578; SI-NEXT:    ret void
1579;
1580; VI-LABEL: @sub_nuw_3xi15(
1581; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1582; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1583; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1584; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1585; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1586; VI-NEXT:    ret void
1587;
1588  %r = sub nuw <3 x i15> %a, %b
1589  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1590  ret void
1591}
1592
1593define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1594; SI-LABEL: @sub_nuw_nsw_3xi15(
1595; SI-NEXT:    [[R:%.*]] = sub nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1596; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1597; SI-NEXT:    ret void
1598;
1599; VI-LABEL: @sub_nuw_nsw_3xi15(
1600; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1601; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1602; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1603; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1604; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1605; VI-NEXT:    ret void
1606;
1607  %r = sub nuw nsw <3 x i15> %a, %b
1608  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1609  ret void
1610}
1611
1612define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
1613; SI-LABEL: @mul_3xi15(
1614; SI-NEXT:    [[R:%.*]] = mul <3 x i15> [[A:%.*]], [[B:%.*]]
1615; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1616; SI-NEXT:    ret void
1617;
1618; VI-LABEL: @mul_3xi15(
1619; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1620; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1621; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1622; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1623; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1624; VI-NEXT:    ret void
1625;
1626  %r = mul <3 x i15> %a, %b
1627  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1628  ret void
1629}
1630
1631define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1632; SI-LABEL: @mul_nsw_3xi15(
1633; SI-NEXT:    [[R:%.*]] = mul nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1634; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1635; SI-NEXT:    ret void
1636;
1637; VI-LABEL: @mul_nsw_3xi15(
1638; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1639; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1640; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
1641; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1642; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1643; VI-NEXT:    ret void
1644;
1645  %r = mul nsw <3 x i15> %a, %b
1646  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1647  ret void
1648}
1649
1650define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1651; SI-LABEL: @mul_nuw_3xi15(
1652; SI-NEXT:    [[R:%.*]] = mul nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1653; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1654; SI-NEXT:    ret void
1655;
1656; VI-LABEL: @mul_nuw_3xi15(
1657; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1658; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1659; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1660; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1661; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1662; VI-NEXT:    ret void
1663;
1664  %r = mul nuw <3 x i15> %a, %b
1665  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1666  ret void
1667}
1668
1669define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1670; SI-LABEL: @mul_nuw_nsw_3xi15(
1671; SI-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1672; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1673; SI-NEXT:    ret void
1674;
1675; VI-LABEL: @mul_nuw_nsw_3xi15(
1676; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1677; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1678; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1679; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1680; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1681; VI-NEXT:    ret void
1682;
1683  %r = mul nuw nsw <3 x i15> %a, %b
1684  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1685  ret void
1686}
1687
1688define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
1689; SI-LABEL: @shl_3xi15(
1690; SI-NEXT:    [[R:%.*]] = shl <3 x i15> [[A:%.*]], [[B:%.*]]
1691; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1692; SI-NEXT:    ret void
1693;
1694; VI-LABEL: @shl_3xi15(
1695; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1696; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1697; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1698; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1699; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1700; VI-NEXT:    ret void
1701;
1702  %r = shl <3 x i15> %a, %b
1703  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1704  ret void
1705}
1706
1707define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1708; SI-LABEL: @shl_nsw_3xi15(
1709; SI-NEXT:    [[R:%.*]] = shl nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1710; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1711; SI-NEXT:    ret void
1712;
1713; VI-LABEL: @shl_nsw_3xi15(
1714; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1715; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1716; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1717; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1718; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1719; VI-NEXT:    ret void
1720;
1721  %r = shl nsw <3 x i15> %a, %b
1722  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1723  ret void
1724}
1725
1726define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1727; SI-LABEL: @shl_nuw_3xi15(
1728; SI-NEXT:    [[R:%.*]] = shl nuw <3 x i15> [[A:%.*]], [[B:%.*]]
1729; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1730; SI-NEXT:    ret void
1731;
1732; VI-LABEL: @shl_nuw_3xi15(
1733; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1734; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1735; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1736; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1737; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1738; VI-NEXT:    ret void
1739;
1740  %r = shl nuw <3 x i15> %a, %b
1741  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1742  ret void
1743}
1744
1745define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
1746; SI-LABEL: @shl_nuw_nsw_3xi15(
1747; SI-NEXT:    [[R:%.*]] = shl nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
1748; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1749; SI-NEXT:    ret void
1750;
1751; VI-LABEL: @shl_nuw_nsw_3xi15(
1752; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1753; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1754; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
1755; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1756; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1757; VI-NEXT:    ret void
1758;
1759  %r = shl nuw nsw <3 x i15> %a, %b
1760  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1761  ret void
1762}
1763
1764define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1765; SI-LABEL: @lshr_3xi15(
1766; SI-NEXT:    [[R:%.*]] = lshr <3 x i15> [[A:%.*]], [[B:%.*]]
1767; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1768; SI-NEXT:    ret void
1769;
1770; VI-LABEL: @lshr_3xi15(
1771; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1772; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1773; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
1774; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1775; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1776; VI-NEXT:    ret void
1777;
1778  %r = lshr <3 x i15> %a, %b
1779  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1780  ret void
1781}
1782
1783define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1784; SI-LABEL: @lshr_exact_3xi15(
1785; SI-NEXT:    [[R:%.*]] = lshr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1786; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1787; SI-NEXT:    ret void
1788;
1789; VI-LABEL: @lshr_exact_3xi15(
1790; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1791; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1792; VI-NEXT:    [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
1793; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1794; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1795; VI-NEXT:    ret void
1796;
1797  %r = lshr exact <3 x i15> %a, %b
1798  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1799  ret void
1800}
1801
1802define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
1803; SI-LABEL: @ashr_3xi15(
1804; SI-NEXT:    [[R:%.*]] = ashr <3 x i15> [[A:%.*]], [[B:%.*]]
1805; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1806; SI-NEXT:    ret void
1807;
1808; VI-LABEL: @ashr_3xi15(
1809; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1810; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1811; VI-NEXT:    [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
1812; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1813; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1814; VI-NEXT:    ret void
1815;
1816  %r = ashr <3 x i15> %a, %b
1817  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1818  ret void
1819}
1820
1821define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
1822; SI-LABEL: @ashr_exact_3xi15(
1823; SI-NEXT:    [[R:%.*]] = ashr exact <3 x i15> [[A:%.*]], [[B:%.*]]
1824; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1825; SI-NEXT:    ret void
1826;
1827; VI-LABEL: @ashr_exact_3xi15(
1828; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
1829; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
1830; VI-NEXT:    [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
1831; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1832; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1833; VI-NEXT:    ret void
1834;
1835  %r = ashr exact <3 x i15> %a, %b
1836  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1837  ret void
1838}
1839
1840define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
1841; SI-LABEL: @and_3xi15(
1842; SI-NEXT:    [[R:%.*]] = and <3 x i15> [[A:%.*]], [[B:%.*]]
1843; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1844; SI-NEXT:    ret void
1845;
1846; VI-LABEL: @and_3xi15(
1847; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1848; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1849; VI-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
1850; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1851; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1852; VI-NEXT:    ret void
1853;
1854  %r = and <3 x i15> %a, %b
1855  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1856  ret void
1857}
1858
1859define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
1860; SI-LABEL: @or_3xi15(
1861; SI-NEXT:    [[R:%.*]] = or <3 x i15> [[A:%.*]], [[B:%.*]]
1862; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1863; SI-NEXT:    ret void
1864;
1865; VI-LABEL: @or_3xi15(
1866; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1867; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1868; VI-NEXT:    [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
1869; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1870; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1871; VI-NEXT:    ret void
1872;
1873  %r = or <3 x i15> %a, %b
1874  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1875  ret void
1876}
1877
1878define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
1879; SI-LABEL: @xor_3xi15(
1880; SI-NEXT:    [[R:%.*]] = xor <3 x i15> [[A:%.*]], [[B:%.*]]
1881; SI-NEXT:    store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
1882; SI-NEXT:    ret void
1883;
1884; VI-LABEL: @xor_3xi15(
1885; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1886; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1887; VI-NEXT:    [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
1888; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
1889; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
1890; VI-NEXT:    ret void
1891;
1892  %r = xor <3 x i15> %a, %b
1893  store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef
1894  ret void
1895}
1896
1897define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) {
1898; SI-LABEL: @select_eq_3xi15(
1899; SI-NEXT:    [[CMP:%.*]] = icmp eq <3 x i15> [[A:%.*]], [[B:%.*]]
1900; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1901; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
1902; SI-NEXT:    ret void
1903;
1904; VI-LABEL: @select_eq_3xi15(
1905; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1906; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1907; VI-NEXT:    [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
1908; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1909; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1910; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1911; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1912; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
1913; VI-NEXT:    ret void
1914;
1915  %cmp = icmp eq <3 x i15> %a, %b
1916  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1917  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1918  ret void
1919}
1920
1921define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) {
1922; SI-LABEL: @select_ne_3xi15(
1923; SI-NEXT:    [[CMP:%.*]] = icmp ne <3 x i15> [[A:%.*]], [[B:%.*]]
1924; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1925; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
1926; SI-NEXT:    ret void
1927;
1928; VI-LABEL: @select_ne_3xi15(
1929; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1930; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1931; VI-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
1932; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1933; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1934; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1935; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1936; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
1937; VI-NEXT:    ret void
1938;
1939  %cmp = icmp ne <3 x i15> %a, %b
1940  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1941  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1942  ret void
1943}
1944
1945define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) {
1946; SI-LABEL: @select_ugt_3xi15(
1947; SI-NEXT:    [[CMP:%.*]] = icmp ugt <3 x i15> [[A:%.*]], [[B:%.*]]
1948; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1949; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
1950; SI-NEXT:    ret void
1951;
1952; VI-LABEL: @select_ugt_3xi15(
1953; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1954; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1955; VI-NEXT:    [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
1956; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1957; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1958; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1959; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1960; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
1961; VI-NEXT:    ret void
1962;
1963  %cmp = icmp ugt <3 x i15> %a, %b
1964  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1965  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1966  ret void
1967}
1968
1969define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) {
1970; SI-LABEL: @select_uge_3xi15(
1971; SI-NEXT:    [[CMP:%.*]] = icmp uge <3 x i15> [[A:%.*]], [[B:%.*]]
1972; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1973; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
1974; SI-NEXT:    ret void
1975;
1976; VI-LABEL: @select_uge_3xi15(
1977; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
1978; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
1979; VI-NEXT:    [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
1980; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
1981; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
1982; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
1983; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
1984; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
1985; VI-NEXT:    ret void
1986;
1987  %cmp = icmp uge <3 x i15> %a, %b
1988  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
1989  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
1990  ret void
1991}
1992
1993define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) {
1994; SI-LABEL: @select_ult_3xi15(
1995; SI-NEXT:    [[CMP:%.*]] = icmp ult <3 x i15> [[A:%.*]], [[B:%.*]]
1996; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
1997; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
1998; SI-NEXT:    ret void
1999;
2000; VI-LABEL: @select_ult_3xi15(
2001; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2002; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2003; VI-NEXT:    [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2004; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2005; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2006; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2007; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2008; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2009; VI-NEXT:    ret void
2010;
2011  %cmp = icmp ult <3 x i15> %a, %b
2012  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2013  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2014  ret void
2015}
2016
2017define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) {
2018; SI-LABEL: @select_ule_3xi15(
2019; SI-NEXT:    [[CMP:%.*]] = icmp ule <3 x i15> [[A:%.*]], [[B:%.*]]
2020; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2021; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
2022; SI-NEXT:    ret void
2023;
2024; VI-LABEL: @select_ule_3xi15(
2025; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2026; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
2027; VI-NEXT:    [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2028; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32>
2029; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
2030; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2031; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2032; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2033; VI-NEXT:    ret void
2034;
2035  %cmp = icmp ule <3 x i15> %a, %b
2036  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2037  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2038  ret void
2039}
2040
2041define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2042; SI-LABEL: @select_sgt_3xi15(
2043; SI-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i15> [[A:%.*]], [[B:%.*]]
2044; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2045; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
2046; SI-NEXT:    ret void
2047;
2048; VI-LABEL: @select_sgt_3xi15(
2049; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2050; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2051; VI-NEXT:    [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2052; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2053; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2054; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2055; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2056; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2057; VI-NEXT:    ret void
2058;
2059  %cmp = icmp sgt <3 x i15> %a, %b
2060  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2061  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2062  ret void
2063}
2064
2065define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) {
2066; SI-LABEL: @select_sge_3xi15(
2067; SI-NEXT:    [[CMP:%.*]] = icmp sge <3 x i15> [[A:%.*]], [[B:%.*]]
2068; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2069; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
2070; SI-NEXT:    ret void
2071;
2072; VI-LABEL: @select_sge_3xi15(
2073; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2074; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2075; VI-NEXT:    [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2076; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2077; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2078; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2079; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2080; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2081; VI-NEXT:    ret void
2082;
2083  %cmp = icmp sge <3 x i15> %a, %b
2084  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2085  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2086  ret void
2087}
2088
2089define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) {
2090; SI-LABEL: @select_slt_3xi15(
2091; SI-NEXT:    [[CMP:%.*]] = icmp slt <3 x i15> [[A:%.*]], [[B:%.*]]
2092; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2093; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
2094; SI-NEXT:    ret void
2095;
2096; VI-LABEL: @select_slt_3xi15(
2097; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2098; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2099; VI-NEXT:    [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2100; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2101; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2102; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2103; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2104; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2105; VI-NEXT:    ret void
2106;
2107  %cmp = icmp slt <3 x i15> %a, %b
2108  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2109  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2110  ret void
2111}
2112
2113define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
2114; SI-LABEL: @select_sle_3xi15(
2115; SI-NEXT:    [[CMP:%.*]] = icmp sle <3 x i15> [[A:%.*]], [[B:%.*]]
2116; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
2117; SI-NEXT:    store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
2118; SI-NEXT:    ret void
2119;
2120; VI-LABEL: @select_sle_3xi15(
2121; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32>
2122; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
2123; VI-NEXT:    [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2124; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32>
2125; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
2126; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2127; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
2128; VI-NEXT:    store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
2129; VI-NEXT:    ret void
2130;
2131  %cmp = icmp sle <3 x i15> %a, %b
2132  %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b
2133  store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef
2134  ret void
2135}
2136
2137declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
2138define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
2139; SI-LABEL: @bitreverse_3xi15(
2140; SI-NEXT:    [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]])
2141; SI-NEXT:    store volatile <3 x i15> [[BREV]], <3 x i15> addrspace(1)* undef, align 8
2142; SI-NEXT:    ret void
2143;
2144; VI-LABEL: @bitreverse_3xi15(
2145; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
2146; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2147; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17>
2148; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
2149; VI-NEXT:    store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
2150; VI-NEXT:    ret void
2151;
2152  %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
2153  store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef
2154  ret void
2155}
2156
2157define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
2158; SI-LABEL: @add_3xi16(
2159; SI-NEXT:    [[R:%.*]] = add <3 x i16> [[A:%.*]], [[B:%.*]]
2160; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2161; SI-NEXT:    ret void
2162;
2163; VI-LABEL: @add_3xi16(
2164; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2165; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2166; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2167; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2168; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2169; VI-NEXT:    ret void
2170;
2171  %r = add <3 x i16> %a, %b
2172  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2173  ret void
2174}
2175
2176define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2177; SI-LABEL: @add_nsw_3xi16(
2178; SI-NEXT:    [[R:%.*]] = add nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2179; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2180; SI-NEXT:    ret void
2181;
2182; VI-LABEL: @add_nsw_3xi16(
2183; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2184; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2185; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2186; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2187; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2188; VI-NEXT:    ret void
2189;
2190  %r = add nsw <3 x i16> %a, %b
2191  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2192  ret void
2193}
2194
2195define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2196; SI-LABEL: @add_nuw_3xi16(
2197; SI-NEXT:    [[R:%.*]] = add nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2198; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2199; SI-NEXT:    ret void
2200;
2201; VI-LABEL: @add_nuw_3xi16(
2202; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2203; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2204; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2205; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2206; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2207; VI-NEXT:    ret void
2208;
2209  %r = add nuw <3 x i16> %a, %b
2210  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2211  ret void
2212}
2213
2214define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2215; SI-LABEL: @add_nuw_nsw_3xi16(
2216; SI-NEXT:    [[R:%.*]] = add nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2217; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2218; SI-NEXT:    ret void
2219;
2220; VI-LABEL: @add_nuw_nsw_3xi16(
2221; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2222; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2223; VI-NEXT:    [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2224; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2225; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2226; VI-NEXT:    ret void
2227;
2228  %r = add nuw nsw <3 x i16> %a, %b
2229  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2230  ret void
2231}
2232
2233define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
2234; SI-LABEL: @sub_3xi16(
2235; SI-NEXT:    [[R:%.*]] = sub <3 x i16> [[A:%.*]], [[B:%.*]]
2236; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2237; SI-NEXT:    ret void
2238;
2239; VI-LABEL: @sub_3xi16(
2240; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2241; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2242; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2243; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2244; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2245; VI-NEXT:    ret void
2246;
2247  %r = sub <3 x i16> %a, %b
2248  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2249  ret void
2250}
2251
2252define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2253; SI-LABEL: @sub_nsw_3xi16(
2254; SI-NEXT:    [[R:%.*]] = sub nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2255; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2256; SI-NEXT:    ret void
2257;
2258; VI-LABEL: @sub_nsw_3xi16(
2259; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2260; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2261; VI-NEXT:    [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
2262; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2263; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2264; VI-NEXT:    ret void
2265;
2266  %r = sub nsw <3 x i16> %a, %b
2267  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2268  ret void
2269}
2270
2271define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2272; SI-LABEL: @sub_nuw_3xi16(
2273; SI-NEXT:    [[R:%.*]] = sub nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2274; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2275; SI-NEXT:    ret void
2276;
2277; VI-LABEL: @sub_nuw_3xi16(
2278; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2279; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2280; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2281; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2282; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2283; VI-NEXT:    ret void
2284;
2285  %r = sub nuw <3 x i16> %a, %b
2286  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2287  ret void
2288}
2289
2290define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2291; SI-LABEL: @sub_nuw_nsw_3xi16(
2292; SI-NEXT:    [[R:%.*]] = sub nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2293; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2294; SI-NEXT:    ret void
2295;
2296; VI-LABEL: @sub_nuw_nsw_3xi16(
2297; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2298; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2299; VI-NEXT:    [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2300; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2301; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2302; VI-NEXT:    ret void
2303;
2304  %r = sub nuw nsw <3 x i16> %a, %b
2305  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2306  ret void
2307}
2308
2309define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
2310; SI-LABEL: @mul_3xi16(
2311; SI-NEXT:    [[R:%.*]] = mul <3 x i16> [[A:%.*]], [[B:%.*]]
2312; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2313; SI-NEXT:    ret void
2314;
2315; VI-LABEL: @mul_3xi16(
2316; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2317; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2318; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2319; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2320; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2321; VI-NEXT:    ret void
2322;
2323  %r = mul <3 x i16> %a, %b
2324  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2325  ret void
2326}
2327
2328define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2329; SI-LABEL: @mul_nsw_3xi16(
2330; SI-NEXT:    [[R:%.*]] = mul nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2331; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2332; SI-NEXT:    ret void
2333;
2334; VI-LABEL: @mul_nsw_3xi16(
2335; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2336; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2337; VI-NEXT:    [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
2338; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2339; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2340; VI-NEXT:    ret void
2341;
2342  %r = mul nsw <3 x i16> %a, %b
2343  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2344  ret void
2345}
2346
2347define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2348; SI-LABEL: @mul_nuw_3xi16(
2349; SI-NEXT:    [[R:%.*]] = mul nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2350; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2351; SI-NEXT:    ret void
2352;
2353; VI-LABEL: @mul_nuw_3xi16(
2354; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2355; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2356; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2357; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2358; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2359; VI-NEXT:    ret void
2360;
2361  %r = mul nuw <3 x i16> %a, %b
2362  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2363  ret void
2364}
2365
2366define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2367; SI-LABEL: @mul_nuw_nsw_3xi16(
2368; SI-NEXT:    [[R:%.*]] = mul nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2369; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2370; SI-NEXT:    ret void
2371;
2372; VI-LABEL: @mul_nuw_nsw_3xi16(
2373; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2374; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2375; VI-NEXT:    [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2376; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2377; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2378; VI-NEXT:    ret void
2379;
2380  %r = mul nuw nsw <3 x i16> %a, %b
2381  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2382  ret void
2383}
2384
2385define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
2386; SI-LABEL: @shl_3xi16(
2387; SI-NEXT:    [[R:%.*]] = shl <3 x i16> [[A:%.*]], [[B:%.*]]
2388; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2389; SI-NEXT:    ret void
2390;
2391; VI-LABEL: @shl_3xi16(
2392; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2393; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2394; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2395; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2396; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2397; VI-NEXT:    ret void
2398;
2399  %r = shl <3 x i16> %a, %b
2400  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2401  ret void
2402}
2403
2404define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2405; SI-LABEL: @shl_nsw_3xi16(
2406; SI-NEXT:    [[R:%.*]] = shl nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2407; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2408; SI-NEXT:    ret void
2409;
2410; VI-LABEL: @shl_nsw_3xi16(
2411; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2412; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2413; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2414; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2415; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2416; VI-NEXT:    ret void
2417;
2418  %r = shl nsw <3 x i16> %a, %b
2419  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2420  ret void
2421}
2422
2423define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2424; SI-LABEL: @shl_nuw_3xi16(
2425; SI-NEXT:    [[R:%.*]] = shl nuw <3 x i16> [[A:%.*]], [[B:%.*]]
2426; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2427; SI-NEXT:    ret void
2428;
2429; VI-LABEL: @shl_nuw_3xi16(
2430; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2431; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2432; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2433; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2434; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2435; VI-NEXT:    ret void
2436;
2437  %r = shl nuw <3 x i16> %a, %b
2438  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2439  ret void
2440}
2441
2442define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
2443; SI-LABEL: @shl_nuw_nsw_3xi16(
2444; SI-NEXT:    [[R:%.*]] = shl nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
2445; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2446; SI-NEXT:    ret void
2447;
2448; VI-LABEL: @shl_nuw_nsw_3xi16(
2449; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2450; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2451; VI-NEXT:    [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
2452; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2453; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2454; VI-NEXT:    ret void
2455;
2456  %r = shl nuw nsw <3 x i16> %a, %b
2457  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2458  ret void
2459}
2460
2461define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2462; SI-LABEL: @lshr_3xi16(
2463; SI-NEXT:    [[R:%.*]] = lshr <3 x i16> [[A:%.*]], [[B:%.*]]
2464; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2465; SI-NEXT:    ret void
2466;
2467; VI-LABEL: @lshr_3xi16(
2468; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2469; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2470; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
2471; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2472; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2473; VI-NEXT:    ret void
2474;
2475  %r = lshr <3 x i16> %a, %b
2476  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2477  ret void
2478}
2479
2480define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2481; SI-LABEL: @lshr_exact_3xi16(
2482; SI-NEXT:    [[R:%.*]] = lshr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2483; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2484; SI-NEXT:    ret void
2485;
2486; VI-LABEL: @lshr_exact_3xi16(
2487; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2488; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2489; VI-NEXT:    [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
2490; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2491; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2492; VI-NEXT:    ret void
2493;
2494  %r = lshr exact <3 x i16> %a, %b
2495  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2496  ret void
2497}
2498
2499define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
2500; SI-LABEL: @ashr_3xi16(
2501; SI-NEXT:    [[R:%.*]] = ashr <3 x i16> [[A:%.*]], [[B:%.*]]
2502; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2503; SI-NEXT:    ret void
2504;
2505; VI-LABEL: @ashr_3xi16(
2506; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2507; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2508; VI-NEXT:    [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
2509; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2510; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2511; VI-NEXT:    ret void
2512;
2513  %r = ashr <3 x i16> %a, %b
2514  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2515  ret void
2516}
2517
2518define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
2519; SI-LABEL: @ashr_exact_3xi16(
2520; SI-NEXT:    [[R:%.*]] = ashr exact <3 x i16> [[A:%.*]], [[B:%.*]]
2521; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2522; SI-NEXT:    ret void
2523;
2524; VI-LABEL: @ashr_exact_3xi16(
2525; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2526; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2527; VI-NEXT:    [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
2528; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2529; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2530; VI-NEXT:    ret void
2531;
2532  %r = ashr exact <3 x i16> %a, %b
2533  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2534  ret void
2535}
2536
2537define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
2538; SI-LABEL: @and_3xi16(
2539; SI-NEXT:    [[R:%.*]] = and <3 x i16> [[A:%.*]], [[B:%.*]]
2540; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2541; SI-NEXT:    ret void
2542;
2543; VI-LABEL: @and_3xi16(
2544; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2545; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2546; VI-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
2547; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2548; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2549; VI-NEXT:    ret void
2550;
2551  %r = and <3 x i16> %a, %b
2552  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2553  ret void
2554}
2555
2556define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
2557; SI-LABEL: @or_3xi16(
2558; SI-NEXT:    [[R:%.*]] = or <3 x i16> [[A:%.*]], [[B:%.*]]
2559; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2560; SI-NEXT:    ret void
2561;
2562; VI-LABEL: @or_3xi16(
2563; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2564; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2565; VI-NEXT:    [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
2566; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2567; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2568; VI-NEXT:    ret void
2569;
2570  %r = or <3 x i16> %a, %b
2571  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2572  ret void
2573}
2574
2575define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
2576; SI-LABEL: @xor_3xi16(
2577; SI-NEXT:    [[R:%.*]] = xor <3 x i16> [[A:%.*]], [[B:%.*]]
2578; SI-NEXT:    store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
2579; SI-NEXT:    ret void
2580;
2581; VI-LABEL: @xor_3xi16(
2582; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2583; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2584; VI-NEXT:    [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
2585; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2586; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2587; VI-NEXT:    ret void
2588;
2589  %r = xor <3 x i16> %a, %b
2590  store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef
2591  ret void
2592}
2593
2594define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
2595; SI-LABEL: @select_eq_3xi16(
2596; SI-NEXT:    [[CMP:%.*]] = icmp eq <3 x i16> [[A:%.*]], [[B:%.*]]
2597; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2598; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2599; SI-NEXT:    ret void
2600;
2601; VI-LABEL: @select_eq_3xi16(
2602; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2603; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2604; VI-NEXT:    [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]]
2605; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2606; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2607; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2608; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2609; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2610; VI-NEXT:    ret void
2611;
2612  %cmp = icmp eq <3 x i16> %a, %b
2613  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2614  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2615  ret void
2616}
2617
2618define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
2619; SI-LABEL: @select_ne_3xi16(
2620; SI-NEXT:    [[CMP:%.*]] = icmp ne <3 x i16> [[A:%.*]], [[B:%.*]]
2621; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2622; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2623; SI-NEXT:    ret void
2624;
2625; VI-LABEL: @select_ne_3xi16(
2626; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2627; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2628; VI-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]]
2629; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2630; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2631; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2632; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2633; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2634; VI-NEXT:    ret void
2635;
2636  %cmp = icmp ne <3 x i16> %a, %b
2637  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2638  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2639  ret void
2640}
2641
2642define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2643; SI-LABEL: @select_ugt_3xi16(
2644; SI-NEXT:    [[CMP:%.*]] = icmp ugt <3 x i16> [[A:%.*]], [[B:%.*]]
2645; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2646; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2647; SI-NEXT:    ret void
2648;
2649; VI-LABEL: @select_ugt_3xi16(
2650; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2651; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2652; VI-NEXT:    [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]]
2653; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2654; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2655; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2656; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2657; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2658; VI-NEXT:    ret void
2659;
2660  %cmp = icmp ugt <3 x i16> %a, %b
2661  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2662  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2663  ret void
2664}
2665
2666define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2667; SI-LABEL: @select_uge_3xi16(
2668; SI-NEXT:    [[CMP:%.*]] = icmp uge <3 x i16> [[A:%.*]], [[B:%.*]]
2669; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2670; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2671; SI-NEXT:    ret void
2672;
2673; VI-LABEL: @select_uge_3xi16(
2674; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2675; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2676; VI-NEXT:    [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]]
2677; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2678; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2679; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2680; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2681; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2682; VI-NEXT:    ret void
2683;
2684  %cmp = icmp uge <3 x i16> %a, %b
2685  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2686  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2687  ret void
2688}
2689
2690define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
2691; SI-LABEL: @select_ult_3xi16(
2692; SI-NEXT:    [[CMP:%.*]] = icmp ult <3 x i16> [[A:%.*]], [[B:%.*]]
2693; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2694; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2695; SI-NEXT:    ret void
2696;
2697; VI-LABEL: @select_ult_3xi16(
2698; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2699; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2700; VI-NEXT:    [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]]
2701; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2702; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2703; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2704; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2705; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2706; VI-NEXT:    ret void
2707;
2708  %cmp = icmp ult <3 x i16> %a, %b
2709  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2710  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2711  ret void
2712}
2713
2714define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
2715; SI-LABEL: @select_ule_3xi16(
2716; SI-NEXT:    [[CMP:%.*]] = icmp ule <3 x i16> [[A:%.*]], [[B:%.*]]
2717; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2718; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2719; SI-NEXT:    ret void
2720;
2721; VI-LABEL: @select_ule_3xi16(
2722; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2723; VI-NEXT:    [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
2724; VI-NEXT:    [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]]
2725; VI-NEXT:    [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32>
2726; VI-NEXT:    [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
2727; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2728; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2729; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2730; VI-NEXT:    ret void
2731;
2732  %cmp = icmp ule <3 x i16> %a, %b
2733  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2734  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2735  ret void
2736}
2737
2738define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2739; SI-LABEL: @select_sgt_3xi16(
2740; SI-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i16> [[A:%.*]], [[B:%.*]]
2741; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2742; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2743; SI-NEXT:    ret void
2744;
2745; VI-LABEL: @select_sgt_3xi16(
2746; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2747; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2748; VI-NEXT:    [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]]
2749; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2750; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2751; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2752; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2753; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2754; VI-NEXT:    ret void
2755;
2756  %cmp = icmp sgt <3 x i16> %a, %b
2757  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2758  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2759  ret void
2760}
2761
2762define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
2763; SI-LABEL: @select_sge_3xi16(
2764; SI-NEXT:    [[CMP:%.*]] = icmp sge <3 x i16> [[A:%.*]], [[B:%.*]]
2765; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2766; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2767; SI-NEXT:    ret void
2768;
2769; VI-LABEL: @select_sge_3xi16(
2770; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2771; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2772; VI-NEXT:    [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]]
2773; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2774; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2775; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2776; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2777; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2778; VI-NEXT:    ret void
2779;
2780  %cmp = icmp sge <3 x i16> %a, %b
2781  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2782  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2783  ret void
2784}
2785
2786define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
2787; SI-LABEL: @select_slt_3xi16(
2788; SI-NEXT:    [[CMP:%.*]] = icmp slt <3 x i16> [[A:%.*]], [[B:%.*]]
2789; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2790; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2791; SI-NEXT:    ret void
2792;
2793; VI-LABEL: @select_slt_3xi16(
2794; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2795; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2796; VI-NEXT:    [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]]
2797; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2798; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2799; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2800; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2801; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2802; VI-NEXT:    ret void
2803;
2804  %cmp = icmp slt <3 x i16> %a, %b
2805  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2806  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2807  ret void
2808}
2809
2810define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
2811; SI-LABEL: @select_sle_3xi16(
2812; SI-NEXT:    [[CMP:%.*]] = icmp sle <3 x i16> [[A:%.*]], [[B:%.*]]
2813; SI-NEXT:    [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
2814; SI-NEXT:    store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
2815; SI-NEXT:    ret void
2816;
2817; VI-LABEL: @select_sle_3xi16(
2818; VI-NEXT:    [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32>
2819; VI-NEXT:    [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
2820; VI-NEXT:    [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]]
2821; VI-NEXT:    [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32>
2822; VI-NEXT:    [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
2823; VI-NEXT:    [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
2824; VI-NEXT:    [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
2825; VI-NEXT:    store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
2826; VI-NEXT:    ret void
2827;
2828  %cmp = icmp sle <3 x i16> %a, %b
2829  %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
2830  store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef
2831  ret void
2832}
2833
2834declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
2835
2836define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
2837; SI-LABEL: @bitreverse_3xi16(
2838; SI-NEXT:    [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]])
2839; SI-NEXT:    store volatile <3 x i16> [[BREV]], <3 x i16> addrspace(1)* undef, align 8
2840; SI-NEXT:    ret void
2841;
2842; VI-LABEL: @bitreverse_3xi16(
2843; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
2844; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
2845; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16>
2846; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
2847; VI-NEXT:    store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
2848; VI-NEXT:    ret void
2849;
2850  %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
2851  store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef
2852  ret void
2853}
2854