1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
4; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s
5
6; FUNC-LABEL: {{^}}local_load_i1:
7; SICIVI: s_mov_b32 m0
8; GFX9-NOT: m0
9
10; GCN: ds_read_u8
11; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
12; GCN: ds_write_b8
13
14; EG: LDS_UBYTE_READ_RET
15; EG: AND_INT
16; EG: LDS_BYTE_WRITE
17define amdgpu_kernel void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
18  %load = load i1, i1 addrspace(3)* %in
19  store i1 %load, i1 addrspace(3)* %out
20  ret void
21}
22
23; FUNC-LABEL: {{^}}local_load_v2i1:
24; SICIVI: s_mov_b32 m0
25; GFX9-NOT: m0
26define amdgpu_kernel void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
27  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
28  store <2 x i1> %load, <2 x i1> addrspace(3)* %out
29  ret void
30}
31
32; FUNC-LABEL: {{^}}local_load_v3i1:
33; SICIVI: s_mov_b32 m0
34; GFX9-NOT: m0
35define amdgpu_kernel void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
36  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
37  store <3 x i1> %load, <3 x i1> addrspace(3)* %out
38  ret void
39}
40
41; FUNC-LABEL: {{^}}local_load_v4i1:
42; SICIVI: s_mov_b32 m0
43; GFX9-NOT: m0
44define amdgpu_kernel void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
45  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
46  store <4 x i1> %load, <4 x i1> addrspace(3)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}local_load_v8i1:
51; SICIVI: s_mov_b32 m0
52; GFX9-NOT: m0
53define amdgpu_kernel void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
54  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
55  store <8 x i1> %load, <8 x i1> addrspace(3)* %out
56  ret void
57}
58
59; FUNC-LABEL: {{^}}local_load_v16i1:
60; SICIVI: s_mov_b32 m0
61; GFX9-NOT: m0
62define amdgpu_kernel void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
63  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
64  store <16 x i1> %load, <16 x i1> addrspace(3)* %out
65  ret void
66}
67
68; FUNC-LABEL: {{^}}local_load_v32i1:
69; SICIVI: s_mov_b32 m0
70; GFX9-NOT: m0
71define amdgpu_kernel void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
72  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
73  store <32 x i1> %load, <32 x i1> addrspace(3)* %out
74  ret void
75}
76
77; FUNC-LABEL: {{^}}local_load_v64i1:
78; SICIVI: s_mov_b32 m0
79; GFX9-NOT: m0
80define amdgpu_kernel void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
81  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
82  store <64 x i1> %load, <64 x i1> addrspace(3)* %out
83  ret void
84}
85
86; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
87; SICIVI: s_mov_b32 m0
88; GFX9-NOT: m0
89
90; GCN: ds_read_u8
91; GCN: ds_write_b32
92define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
93  %a = load i1, i1 addrspace(3)* %in
94  %ext = zext i1 %a to i32
95  store i32 %ext, i32 addrspace(3)* %out
96  ret void
97}
98
99; FUNC-LABEL: {{^}}local_sextload_i1_to_i32:
100; SICIVI: s_mov_b32 m0
101; GFX9-NOT: m0
102
103; GCN: ds_read_u8
104; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
105; GCN: ds_write_b32
106
107; EG: LDS_UBYTE_READ_RET
108; EG: BFE_INT
109define amdgpu_kernel void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
110  %a = load i1, i1 addrspace(3)* %in
111  %ext = sext i1 %a to i32
112  store i32 %ext, i32 addrspace(3)* %out
113  ret void
114}
115
116; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
117; SICIVI: s_mov_b32 m0
118; GFX9-NOT: m0
119define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
120  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
121  %ext = zext <1 x i1> %load to <1 x i32>
122  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
123  ret void
124}
125
126; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
127; SICIVI: s_mov_b32 m0
128; GFX9-NOT: m0
129define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
130  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
131  %ext = sext <1 x i1> %load to <1 x i32>
132  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
133  ret void
134}
135
136; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
137; SICIVI: s_mov_b32 m0
138; GFX9-NOT: m0
139define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
140  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
141  %ext = zext <2 x i1> %load to <2 x i32>
142  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
143  ret void
144}
145
146; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
147; SICIVI: s_mov_b32 m0
148; GFX9-NOT: m0
149define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
150  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
151  %ext = sext <2 x i1> %load to <2 x i32>
152  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
153  ret void
154}
155
156; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
157; SICIVI: s_mov_b32 m0
158; GFX9-NOT: m0
159define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
160  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
161  %ext = zext <3 x i1> %load to <3 x i32>
162  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
163  ret void
164}
165
166; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
167; SICIVI: s_mov_b32 m0
168; GFX9-NOT: m0
169define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
170  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
171  %ext = sext <3 x i1> %load to <3 x i32>
172  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
173  ret void
174}
175
176; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
177; SICIVI: s_mov_b32 m0
178; GFX9-NOT: m0
179define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
180  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
181  %ext = zext <4 x i1> %load to <4 x i32>
182  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
183  ret void
184}
185
186; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
187; SICIVI: s_mov_b32 m0
188; GFX9-NOT: m0
189define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
190  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
191  %ext = sext <4 x i1> %load to <4 x i32>
192  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
193  ret void
194}
195
196; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
197; SICIVI: s_mov_b32 m0
198; GFX9-NOT: m0
199define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
200  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
201  %ext = zext <8 x i1> %load to <8 x i32>
202  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
203  ret void
204}
205
206; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
207; SICIVI: s_mov_b32 m0
208; GFX9-NOT: m0
209define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
210  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
211  %ext = sext <8 x i1> %load to <8 x i32>
212  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
213  ret void
214}
215
216; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
217; SICIVI: s_mov_b32 m0
218; GFX9-NOT: m0
219define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
220  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
221  %ext = zext <16 x i1> %load to <16 x i32>
222  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
223  ret void
224}
225
226; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
227; SICIVI: s_mov_b32 m0
228; GFX9-NOT: m0
229define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
230  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
231  %ext = sext <16 x i1> %load to <16 x i32>
232  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
233  ret void
234}
235
236; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
237; SICIVI: s_mov_b32 m0
238; GFX9-NOT: m0
239define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
240  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
241  %ext = zext <32 x i1> %load to <32 x i32>
242  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
243  ret void
244}
245
246; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
247; SICIVI: s_mov_b32 m0
248; GFX9-NOT: m0
249define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
250  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
251  %ext = sext <32 x i1> %load to <32 x i32>
252  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
253  ret void
254}
255
256; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
257; SICIVI: s_mov_b32 m0
258; GFX9-NOT: m0
259define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
260  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
261  %ext = zext <64 x i1> %load to <64 x i32>
262  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
263  ret void
264}
265
266; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
267; SICIVI: s_mov_b32 m0
268; GFX9-NOT: m0
269define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
270  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
271  %ext = sext <64 x i1> %load to <64 x i32>
272  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
273  ret void
274}
275
276; FUNC-LABEL: {{^}}local_zextload_i1_to_i64:
277; SICIVI: s_mov_b32 m0
278; GFX9-NOT: m0
279
280; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
281; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
282; GCN: ds_write_b64
283define amdgpu_kernel void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
284  %a = load i1, i1 addrspace(3)* %in
285  %ext = zext i1 %a to i64
286  store i64 %ext, i64 addrspace(3)* %out
287  ret void
288}
289
290; FUNC-LABEL: {{^}}local_sextload_i1_to_i64:
291; SICIVI: s_mov_b32 m0
292; GFX9-NOT: m0
293
294; GCN: ds_read_u8 [[LOAD:v[0-9]+]],
295; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
296; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
297; GCN: ds_write_b64
298define amdgpu_kernel void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
299  %a = load i1, i1 addrspace(3)* %in
300  %ext = sext i1 %a to i64
301  store i64 %ext, i64 addrspace(3)* %out
302  ret void
303}
304
305; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
306; SICIVI: s_mov_b32 m0
307; GFX9-NOT: m0
308define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
309  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
310  %ext = zext <1 x i1> %load to <1 x i64>
311  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
312  ret void
313}
314
315; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
316; SICIVI: s_mov_b32 m0
317; GFX9-NOT: m0
318define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
319  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
320  %ext = sext <1 x i1> %load to <1 x i64>
321  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
322  ret void
323}
324
325; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
326; SICIVI: s_mov_b32 m0
327; GFX9-NOT: m0
328define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
329  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
330  %ext = zext <2 x i1> %load to <2 x i64>
331  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
332  ret void
333}
334
335; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
336; SICIVI: s_mov_b32 m0
337; GFX9-NOT: m0
338define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
339  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
340  %ext = sext <2 x i1> %load to <2 x i64>
341  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
342  ret void
343}
344
345; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
346; SICIVI: s_mov_b32 m0
347; GFX9-NOT: m0
348define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
349  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
350  %ext = zext <3 x i1> %load to <3 x i64>
351  store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
352  ret void
353}
354
355; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
356; SICIVI: s_mov_b32 m0
357; GFX9-NOT: m0
358define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
359  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
360  %ext = sext <3 x i1> %load to <3 x i64>
361  store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
362  ret void
363}
364
365; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
366; SICIVI: s_mov_b32 m0
367; GFX9-NOT: m0
368define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
369  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
370  %ext = zext <4 x i1> %load to <4 x i64>
371  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
372  ret void
373}
374
375; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
376; SICIVI: s_mov_b32 m0
377; GFX9-NOT: m0
378define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
379  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
380  %ext = sext <4 x i1> %load to <4 x i64>
381  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
382  ret void
383}
384
385; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
386; SICIVI: s_mov_b32 m0
387; GFX9-NOT: m0
388define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
389  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
390  %ext = zext <8 x i1> %load to <8 x i64>
391  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
392  ret void
393}
394
395; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
396; SICIVI: s_mov_b32 m0
397; GFX9-NOT: m0
398define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
399  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
400  %ext = sext <8 x i1> %load to <8 x i64>
401  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
402  ret void
403}
404
405; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
406; SICIVI: s_mov_b32 m0
407; GFX9-NOT: m0
408define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
409  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
410  %ext = zext <16 x i1> %load to <16 x i64>
411  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
412  ret void
413}
414
415; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
416; SICIVI: s_mov_b32 m0
417; GFX9-NOT: m0
418define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
419  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
420  %ext = sext <16 x i1> %load to <16 x i64>
421  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
422  ret void
423}
424
425; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
426; SICIVI: s_mov_b32 m0
427; GFX9-NOT: m0
428define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
429  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
430  %ext = zext <32 x i1> %load to <32 x i64>
431  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
432  ret void
433}
434
435; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
436; SICIVI: s_mov_b32 m0
437; GFX9-NOT: m0
438define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
439  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
440  %ext = sext <32 x i1> %load to <32 x i64>
441  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
442  ret void
443}
444
445; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
446; SICIVI: s_mov_b32 m0
447; GFX9-NOT: m0
448define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
449  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
450  %ext = zext <64 x i1> %load to <64 x i64>
451  store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
452  ret void
453}
454
455; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
456; SICIVI: s_mov_b32 m0
457; GFX9-NOT: m0
458define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
459  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
460  %ext = sext <64 x i1> %load to <64 x i64>
461  store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
462  ret void
463}
464
465attributes #0 = { nounwind }
466