1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
3; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
4
5; EG-LABEL: {{^}}i8_arg:
6; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
7; SI-LABEL: {{^}}i8_arg:
8; SI: buffer_load_ubyte
9
10define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
11entry:
12  %0 = zext i8 %in to i32
13  store i32 %0, i32 addrspace(1)* %out, align 4
14  ret void
15}
16
17; EG-LABEL: {{^}}i8_zext_arg:
18; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
19; SI-LABEL: {{^}}i8_zext_arg:
20; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
21
22define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
23entry:
24  %0 = zext i8 %in to i32
25  store i32 %0, i32 addrspace(1)* %out, align 4
26  ret void
27}
28
29; EG-LABEL: {{^}}i8_sext_arg:
30; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
31; SI-LABEL: {{^}}i8_sext_arg:
32; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
33
34define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
35entry:
36  %0 = sext i8 %in to i32
37  store i32 %0, i32 addrspace(1)* %out, align 4
38  ret void
39}
40
41; EG-LABEL: {{^}}i16_arg:
42; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
43; SI-LABEL: {{^}}i16_arg:
44; SI: buffer_load_ushort
45
46define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
47entry:
48  %0 = zext i16 %in to i32
49  store i32 %0, i32 addrspace(1)* %out, align 4
50  ret void
51}
52
53; EG-LABEL: {{^}}i16_zext_arg:
54; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
55; SI-LABEL: {{^}}i16_zext_arg:
56; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
57
58define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
59entry:
60  %0 = zext i16 %in to i32
61  store i32 %0, i32 addrspace(1)* %out, align 4
62  ret void
63}
64
65; EG-LABEL: {{^}}i16_sext_arg:
66; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
67; SI-LABEL: {{^}}i16_sext_arg:
68; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
69
70define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
71entry:
72  %0 = sext i16 %in to i32
73  store i32 %0, i32 addrspace(1)* %out, align 4
74  ret void
75}
76
77; EG-LABEL: {{^}}i32_arg:
78; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
79; SI-LABEL: {{^}}i32_arg:
80; s_load_dword s{{[0-9]}}, s[0:1], 0xb
81define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
82entry:
83  store i32 %in, i32 addrspace(1)* %out, align 4
84  ret void
85}
86
87; EG-LABEL: {{^}}f32_arg:
88; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
89; SI-LABEL: {{^}}f32_arg:
90; s_load_dword s{{[0-9]}}, s[0:1], 0xb
91define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
92entry:
93  store float %in, float addrspace(1)* %out, align 4
94  ret void
95}
96
97; EG-LABEL: {{^}}v2i8_arg:
98; EG: VTX_READ_8
99; EG: VTX_READ_8
100; SI-LABEL: {{^}}v2i8_arg:
101; SI: buffer_load_ubyte
102; SI: buffer_load_ubyte
103define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
104entry:
105  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
106  ret void
107}
108
109; EG-LABEL: {{^}}v2i16_arg:
110; EG: VTX_READ_16
111; EG: VTX_READ_16
112; SI-LABEL: {{^}}v2i16_arg:
113; SI-DAG: buffer_load_ushort
114; SI-DAG: buffer_load_ushort
115define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
116entry:
117  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
118  ret void
119}
120
121; EG-LABEL: {{^}}v2i32_arg:
122; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
123; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
124; SI-LABEL: {{^}}v2i32_arg:
125; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
126define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
127entry:
128  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
129  ret void
130}
131
132; EG-LABEL: {{^}}v2f32_arg:
133; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
134; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
135; SI-LABEL: {{^}}v2f32_arg:
136; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
137define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
138entry:
139  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
140  ret void
141}
142
143; EG-LABEL: {{^}}v3i8_arg:
144; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
145; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
146; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
147; SI-LABEL: {{^}}v3i8_arg:
148define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
149entry:
150  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
151  ret void
152}
153
154; EG-LABEL: {{^}}v3i16_arg:
155; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
156; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
157; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
158; SI-LABEL: {{^}}v3i16_arg:
159define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
160entry:
161  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
162  ret void
163}
164; EG-LABEL: {{^}}v3i32_arg:
165; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
166; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
167; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
168; SI-LABEL: {{^}}v3i32_arg:
169; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
170define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
171entry:
172  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
173  ret void
174}
175
176; EG-LABEL: {{^}}v3f32_arg:
177; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
179; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
180; SI-LABEL: {{^}}v3f32_arg:
181; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
182define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
183entry:
184  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
185  ret void
186}
187
188; EG-LABEL: {{^}}v4i8_arg:
189; EG: VTX_READ_8
190; EG: VTX_READ_8
191; EG: VTX_READ_8
192; EG: VTX_READ_8
193; SI-LABEL: {{^}}v4i8_arg:
194; SI: buffer_load_ubyte
195; SI: buffer_load_ubyte
196; SI: buffer_load_ubyte
197; SI: buffer_load_ubyte
198define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
199entry:
200  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
201  ret void
202}
203
204; EG-LABEL: {{^}}v4i16_arg:
205; EG: VTX_READ_16
206; EG: VTX_READ_16
207; EG: VTX_READ_16
208; EG: VTX_READ_16
209; SI-LABEL: {{^}}v4i16_arg:
210; SI: buffer_load_ushort
211; SI: buffer_load_ushort
212; SI: buffer_load_ushort
213; SI: buffer_load_ushort
214define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
215entry:
216  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
217  ret void
218}
219
220; EG-LABEL: {{^}}v4i32_arg:
221; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
222; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
223; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
224; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
225; SI-LABEL: {{^}}v4i32_arg:
226; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
227define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
228entry:
229  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
230  ret void
231}
232
233; EG-LABEL: {{^}}v4f32_arg:
234; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
235; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
237; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
238; SI-LABEL: {{^}}v4f32_arg:
239; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
240define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
241entry:
242  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
243  ret void
244}
245
246; EG-LABEL: {{^}}v8i8_arg:
247; EG: VTX_READ_8
248; EG: VTX_READ_8
249; EG: VTX_READ_8
250; EG: VTX_READ_8
251; EG: VTX_READ_8
252; EG: VTX_READ_8
253; EG: VTX_READ_8
254; EG: VTX_READ_8
255; SI-LABEL: {{^}}v8i8_arg:
256; SI: buffer_load_ubyte
257; SI: buffer_load_ubyte
258; SI: buffer_load_ubyte
259; SI: buffer_load_ubyte
260; SI: buffer_load_ubyte
261; SI: buffer_load_ubyte
262; SI: buffer_load_ubyte
263define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
264entry:
265  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
266  ret void
267}
268
269; EG-LABEL: {{^}}v8i16_arg:
270; EG: VTX_READ_16
271; EG: VTX_READ_16
272; EG: VTX_READ_16
273; EG: VTX_READ_16
274; EG: VTX_READ_16
275; EG: VTX_READ_16
276; EG: VTX_READ_16
277; EG: VTX_READ_16
278; SI-LABEL: {{^}}v8i16_arg:
279; SI: buffer_load_ushort
280; SI: buffer_load_ushort
281; SI: buffer_load_ushort
282; SI: buffer_load_ushort
283; SI: buffer_load_ushort
284; SI: buffer_load_ushort
285; SI: buffer_load_ushort
286; SI: buffer_load_ushort
287define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
288entry:
289  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
290  ret void
291}
292
293; EG-LABEL: {{^}}v8i32_arg:
294; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
295; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
296; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
297; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
298; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
299; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
300; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
301; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
302; SI-LABEL: {{^}}v8i32_arg:
303; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
304define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
305entry:
306  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
307  ret void
308}
309
310; EG-LABEL: {{^}}v8f32_arg:
311; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
312; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
313; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
314; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
315; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
316; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
317; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
318; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
319; SI-LABEL: {{^}}v8f32_arg:
320; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
321define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
322entry:
323  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
324  ret void
325}
326
327; EG-LABEL: {{^}}v16i8_arg:
328; EG: VTX_READ_8
329; EG: VTX_READ_8
330; EG: VTX_READ_8
331; EG: VTX_READ_8
332; EG: VTX_READ_8
333; EG: VTX_READ_8
334; EG: VTX_READ_8
335; EG: VTX_READ_8
336; EG: VTX_READ_8
337; EG: VTX_READ_8
338; EG: VTX_READ_8
339; EG: VTX_READ_8
340; EG: VTX_READ_8
341; EG: VTX_READ_8
342; EG: VTX_READ_8
343; EG: VTX_READ_8
344; SI-LABEL: {{^}}v16i8_arg:
345; SI: buffer_load_ubyte
346; SI: buffer_load_ubyte
347; SI: buffer_load_ubyte
348; SI: buffer_load_ubyte
349; SI: buffer_load_ubyte
350; SI: buffer_load_ubyte
351; SI: buffer_load_ubyte
352; SI: buffer_load_ubyte
353; SI: buffer_load_ubyte
354; SI: buffer_load_ubyte
355; SI: buffer_load_ubyte
356; SI: buffer_load_ubyte
357; SI: buffer_load_ubyte
358; SI: buffer_load_ubyte
359; SI: buffer_load_ubyte
360; SI: buffer_load_ubyte
361define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
362entry:
363  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
364  ret void
365}
366
367; EG-LABEL: {{^}}v16i16_arg:
368; EG: VTX_READ_16
369; EG: VTX_READ_16
370; EG: VTX_READ_16
371; EG: VTX_READ_16
372; EG: VTX_READ_16
373; EG: VTX_READ_16
374; EG: VTX_READ_16
375; EG: VTX_READ_16
376; EG: VTX_READ_16
377; EG: VTX_READ_16
378; EG: VTX_READ_16
379; EG: VTX_READ_16
380; EG: VTX_READ_16
381; EG: VTX_READ_16
382; EG: VTX_READ_16
383; EG: VTX_READ_16
384; SI-LABEL: {{^}}v16i16_arg:
385; SI: buffer_load_ushort
386; SI: buffer_load_ushort
387; SI: buffer_load_ushort
388; SI: buffer_load_ushort
389; SI: buffer_load_ushort
390; SI: buffer_load_ushort
391; SI: buffer_load_ushort
392; SI: buffer_load_ushort
393; SI: buffer_load_ushort
394; SI: buffer_load_ushort
395; SI: buffer_load_ushort
396; SI: buffer_load_ushort
397; SI: buffer_load_ushort
398; SI: buffer_load_ushort
399; SI: buffer_load_ushort
400; SI: buffer_load_ushort
401define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
402entry:
403  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
404  ret void
405}
406
407; EG-LABEL: {{^}}v16i32_arg:
408; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
409; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
410; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
411; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
412; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
413; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
414; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
415; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
416; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
417; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
418; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
419; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
420; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
424; SI-LABEL: {{^}}v16i32_arg:
425; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
426define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
427entry:
428  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
429  ret void
430}
431
432; EG-LABEL: {{^}}v16f32_arg:
433; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
434; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
435; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
436; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
437; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
438; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
439; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
440; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
441; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
442; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
443; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
444; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
445; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
446; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
447; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
448; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
449; SI-LABEL: {{^}}v16f32_arg:
450; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
451define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
452entry:
453  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
454  ret void
455}
456
457; FUNC-LABEL: {{^}}kernel_arg_i64:
458; SI: s_load_dwordx2
459; SI: s_load_dwordx2
460; SI: buffer_store_dwordx2
461define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
462  store i64 %a, i64 addrspace(1)* %out, align 8
463  ret void
464}
465
466; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
467; XSI: s_load_dwordx2
468; XSI: s_load_dwordx2
469; XSI: buffer_store_dwordx2
470; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
471;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
472;   ret void
473; }
474