1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
4
5define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
6  ; HSA-VI-LABEL: name: i8_arg
7  ; HSA-VI: bb.1 (%ir-block.0):
8  ; HSA-VI:   liveins: $sgpr4_sgpr5
9  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
10  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
11  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
12  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
13  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
14  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
15  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
16  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
17  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
18  ; HSA-VI:   S_ENDPGM 0
19  ; LEGACY-MESA-VI-LABEL: name: i8_arg
20  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
21  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
22  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
23  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
24  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
25  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
26  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
27  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
28  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
29  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
30  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
31  ; LEGACY-MESA-VI:   S_ENDPGM 0
32  %ext = zext i8 %in to i32
33  store i32 %ext, i32 addrspace(1)* %out, align 4
34  ret void
35}
36
37define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
38  ; HSA-VI-LABEL: name: i8_zext_arg
39  ; HSA-VI: bb.1 (%ir-block.0):
40  ; HSA-VI:   liveins: $sgpr4_sgpr5
41  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
42  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
43  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
44  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
45  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
46  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
47  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
48  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
49  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
50  ; HSA-VI:   S_ENDPGM 0
51  ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg
52  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
53  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
54  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
55  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
56  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
57  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
58  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
59  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
60  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
61  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
62  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
63  ; LEGACY-MESA-VI:   S_ENDPGM 0
64  %ext = zext i8 %in to i32
65  store i32 %ext, i32 addrspace(1)* %out, align 4
66  ret void
67}
68
69define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
70  ; HSA-VI-LABEL: name: i8_sext_arg
71  ; HSA-VI: bb.1 (%ir-block.0):
72  ; HSA-VI:   liveins: $sgpr4_sgpr5
73  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
74  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
75  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
76  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
77  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
78  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
79  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
80  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
81  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
82  ; HSA-VI:   S_ENDPGM 0
83  ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg
84  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
85  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
86  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
87  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
88  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
89  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
90  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
91  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
92  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
93  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
94  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
95  ; LEGACY-MESA-VI:   S_ENDPGM 0
96  %ext = sext i8 %in to i32
97  store i32 %ext, i32 addrspace(1)* %out, align 4
98  ret void
99}
100
101define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
102  ; HSA-VI-LABEL: name: i16_arg
103  ; HSA-VI: bb.1 (%ir-block.0):
104  ; HSA-VI:   liveins: $sgpr4_sgpr5
105  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
106  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
107  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
108  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
109  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
110  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
111  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
112  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
113  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
114  ; HSA-VI:   S_ENDPGM 0
115  ; LEGACY-MESA-VI-LABEL: name: i16_arg
116  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
117  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
118  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
119  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
120  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
121  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
122  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
123  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
124  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
125  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
126  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
127  ; LEGACY-MESA-VI:   S_ENDPGM 0
128  %ext = zext i16 %in to i32
129  store i32 %ext, i32 addrspace(1)* %out, align 4
130  ret void
131}
132
133define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
134  ; HSA-VI-LABEL: name: i16_zext_arg
135  ; HSA-VI: bb.1 (%ir-block.0):
136  ; HSA-VI:   liveins: $sgpr4_sgpr5
137  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
138  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
139  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
140  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
141  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
142  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
143  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
144  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
145  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
146  ; HSA-VI:   S_ENDPGM 0
147  ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg
148  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
149  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
150  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
151  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
152  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
153  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
154  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
155  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
156  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
157  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
158  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
159  ; LEGACY-MESA-VI:   S_ENDPGM 0
160  %ext = zext i16 %in to i32
161  store i32 %ext, i32 addrspace(1)* %out, align 4
162  ret void
163}
164
165define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
166  ; HSA-VI-LABEL: name: i16_sext_arg
167  ; HSA-VI: bb.1 (%ir-block.0):
168  ; HSA-VI:   liveins: $sgpr4_sgpr5
169  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
170  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
171  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
172  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
173  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
174  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
175  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
176  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
177  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
178  ; HSA-VI:   S_ENDPGM 0
179  ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg
180  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
181  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
182  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
183  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
184  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
185  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
186  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
187  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
188  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
189  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
190  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
191  ; LEGACY-MESA-VI:   S_ENDPGM 0
192  %ext = sext i16 %in to i32
193  store i32 %ext, i32 addrspace(1)* %out, align 4
194  ret void
195}
196
197define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
198  ; HSA-VI-LABEL: name: i32_arg
199  ; HSA-VI: bb.1.entry:
200  ; HSA-VI:   liveins: $sgpr4_sgpr5
201  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
202  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
203  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
204  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
205  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
206  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
207  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
208  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
209  ; HSA-VI:   S_ENDPGM 0
210  ; LEGACY-MESA-VI-LABEL: name: i32_arg
211  ; LEGACY-MESA-VI: bb.1.entry:
212  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
213  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
214  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
215  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
216  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
217  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
218  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
219  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
220  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
221  ; LEGACY-MESA-VI:   S_ENDPGM 0
222entry:
223  store i32 %in, i32 addrspace(1)* %out, align 4
224  ret void
225}
226
227define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
228  ; HSA-VI-LABEL: name: f32_arg
229  ; HSA-VI: bb.1.entry:
230  ; HSA-VI:   liveins: $sgpr4_sgpr5
231  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
232  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
233  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
234  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
235  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
236  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
237  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
238  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
239  ; HSA-VI:   S_ENDPGM 0
240  ; LEGACY-MESA-VI-LABEL: name: f32_arg
241  ; LEGACY-MESA-VI: bb.1.entry:
242  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
243  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
244  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
245  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
246  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
247  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
248  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
249  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
250  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
251  ; LEGACY-MESA-VI:   S_ENDPGM 0
252entry:
253  store float %in, float addrspace(1)* %out, align 4
254  ret void
255}
256
257define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
258  ; HSA-VI-LABEL: name: v2i8_arg
259  ; HSA-VI: bb.1.entry:
260  ; HSA-VI:   liveins: $sgpr4_sgpr5
261  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
262  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
263  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
264  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
265  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
266  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
267  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
268  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
269  ; HSA-VI:   S_ENDPGM 0
270  ; LEGACY-MESA-VI-LABEL: name: v2i8_arg
271  ; LEGACY-MESA-VI: bb.1.entry:
272  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
273  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
274  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
275  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
276  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
277  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
278  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
279  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
280  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
281  ; LEGACY-MESA-VI:   S_ENDPGM 0
282entry:
283  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
284  ret void
285}
286
287define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
288  ; HSA-VI-LABEL: name: v2i16_arg
289  ; HSA-VI: bb.1.entry:
290  ; HSA-VI:   liveins: $sgpr4_sgpr5
291  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
292  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
293  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
294  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
295  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
296  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
297  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
298  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
299  ; HSA-VI:   S_ENDPGM 0
300  ; LEGACY-MESA-VI-LABEL: name: v2i16_arg
301  ; LEGACY-MESA-VI: bb.1.entry:
302  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
303  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
304  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
305  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
306  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
307  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
308  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
309  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
310  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
311  ; LEGACY-MESA-VI:   S_ENDPGM 0
312entry:
313  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
314  ret void
315}
316
317define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
318  ; HSA-VI-LABEL: name: v2i32_arg
319  ; HSA-VI: bb.1.entry:
320  ; HSA-VI:   liveins: $sgpr4_sgpr5
321  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
322  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
323  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
324  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
325  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
326  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
327  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
328  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
329  ; HSA-VI:   S_ENDPGM 0
330  ; LEGACY-MESA-VI-LABEL: name: v2i32_arg
331  ; LEGACY-MESA-VI: bb.1.entry:
332  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
333  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
334  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
335  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
336  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
337  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
338  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
339  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
340  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
341  ; LEGACY-MESA-VI:   S_ENDPGM 0
342entry:
343  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
344  ret void
345}
346
347define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
348  ; HSA-VI-LABEL: name: v2f32_arg
349  ; HSA-VI: bb.1.entry:
350  ; HSA-VI:   liveins: $sgpr4_sgpr5
351  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
352  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
353  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
354  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
355  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
356  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
357  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
358  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
359  ; HSA-VI:   S_ENDPGM 0
360  ; LEGACY-MESA-VI-LABEL: name: v2f32_arg
361  ; LEGACY-MESA-VI: bb.1.entry:
362  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
363  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
364  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
365  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
366  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
367  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
368  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
369  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
370  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
371  ; LEGACY-MESA-VI:   S_ENDPGM 0
372entry:
373  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
374  ret void
375}
376
377define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
378  ; HSA-VI-LABEL: name: v3i8_arg
379  ; HSA-VI: bb.1.entry:
380  ; HSA-VI:   liveins: $sgpr4_sgpr5
381  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
382  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
383  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
384  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
385  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
386  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
387  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
388  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
389  ; HSA-VI:   S_ENDPGM 0
390  ; LEGACY-MESA-VI-LABEL: name: v3i8_arg
391  ; LEGACY-MESA-VI: bb.1.entry:
392  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
393  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
394  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
395  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
396  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
397  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
398  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
399  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
400  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
401  ; LEGACY-MESA-VI:   S_ENDPGM 0
402entry:
403  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
404  ret void
405}
406
407define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
408  ; HSA-VI-LABEL: name: v3i16_arg
409  ; HSA-VI: bb.1.entry:
410  ; HSA-VI:   liveins: $sgpr4_sgpr5
411  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
412  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
413  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
414  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
415  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
416  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
417  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
418  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
419  ; HSA-VI:   S_ENDPGM 0
420  ; LEGACY-MESA-VI-LABEL: name: v3i16_arg
421  ; LEGACY-MESA-VI: bb.1.entry:
422  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
423  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
424  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
425  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
426  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
427  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
428  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
429  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
430  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
431  ; LEGACY-MESA-VI:   S_ENDPGM 0
432entry:
433  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
434  ret void
435}
436
437define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
438  ; HSA-VI-LABEL: name: v3i32_arg
439  ; HSA-VI: bb.1.entry:
440  ; HSA-VI:   liveins: $sgpr4_sgpr5
441  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
442  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
443  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
444  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
445  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
446  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
447  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
448  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
449  ; HSA-VI:   S_ENDPGM 0
450  ; LEGACY-MESA-VI-LABEL: name: v3i32_arg
451  ; LEGACY-MESA-VI: bb.1.entry:
452  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
453  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
454  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
455  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
456  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
457  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
458  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
459  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
460  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
461  ; LEGACY-MESA-VI:   S_ENDPGM 0
462entry:
463  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
464  ret void
465}
466
467define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
468  ; HSA-VI-LABEL: name: v3f32_arg
469  ; HSA-VI: bb.1.entry:
470  ; HSA-VI:   liveins: $sgpr4_sgpr5
471  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
472  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
473  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
474  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
475  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
476  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
477  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
478  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
479  ; HSA-VI:   S_ENDPGM 0
480  ; LEGACY-MESA-VI-LABEL: name: v3f32_arg
481  ; LEGACY-MESA-VI: bb.1.entry:
482  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
483  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
484  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
485  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
486  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
487  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
488  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
489  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
490  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
491  ; LEGACY-MESA-VI:   S_ENDPGM 0
492entry:
493  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
494  ret void
495}
496
497define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
498  ; HSA-VI-LABEL: name: v4i8_arg
499  ; HSA-VI: bb.1.entry:
500  ; HSA-VI:   liveins: $sgpr4_sgpr5
501  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
502  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
503  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
504  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
505  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
506  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
507  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
508  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
509  ; HSA-VI:   S_ENDPGM 0
510  ; LEGACY-MESA-VI-LABEL: name: v4i8_arg
511  ; LEGACY-MESA-VI: bb.1.entry:
512  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
513  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
514  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
515  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
516  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
517  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
518  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
519  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
520  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
521  ; LEGACY-MESA-VI:   S_ENDPGM 0
522entry:
523  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
524  ret void
525}
526
527define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
528  ; HSA-VI-LABEL: name: v4i16_arg
529  ; HSA-VI: bb.1.entry:
530  ; HSA-VI:   liveins: $sgpr4_sgpr5
531  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
532  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
533  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
534  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
535  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
536  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
537  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
538  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
539  ; HSA-VI:   S_ENDPGM 0
540  ; LEGACY-MESA-VI-LABEL: name: v4i16_arg
541  ; LEGACY-MESA-VI: bb.1.entry:
542  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
543  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
544  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
545  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
546  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
547  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
548  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
549  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
550  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
551  ; LEGACY-MESA-VI:   S_ENDPGM 0
552entry:
553  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
554  ret void
555}
556
557define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
558  ; HSA-VI-LABEL: name: v4i32_arg
559  ; HSA-VI: bb.1.entry:
560  ; HSA-VI:   liveins: $sgpr4_sgpr5
561  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
562  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
563  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
564  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
565  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
566  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
567  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
568  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
569  ; HSA-VI:   S_ENDPGM 0
570  ; LEGACY-MESA-VI-LABEL: name: v4i32_arg
571  ; LEGACY-MESA-VI: bb.1.entry:
572  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
573  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
574  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
575  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
576  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
577  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
578  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
579  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
580  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
581  ; LEGACY-MESA-VI:   S_ENDPGM 0
582entry:
583  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
584  ret void
585}
586
587define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
588  ; HSA-VI-LABEL: name: v4f32_arg
589  ; HSA-VI: bb.1.entry:
590  ; HSA-VI:   liveins: $sgpr4_sgpr5
591  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
592  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
593  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
594  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
595  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
596  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
597  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
598  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
599  ; HSA-VI:   S_ENDPGM 0
600  ; LEGACY-MESA-VI-LABEL: name: v4f32_arg
601  ; LEGACY-MESA-VI: bb.1.entry:
602  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
603  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
604  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
605  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
606  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
607  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
608  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
609  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
610  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
611  ; LEGACY-MESA-VI:   S_ENDPGM 0
612entry:
613  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
614  ret void
615}
616
617define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
618  ; HSA-VI-LABEL: name: v8i8_arg
619  ; HSA-VI: bb.1.entry:
620  ; HSA-VI:   liveins: $sgpr4_sgpr5
621  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
622  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
623  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
624  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
625  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
626  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
627  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
628  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
629  ; HSA-VI:   S_ENDPGM 0
630  ; LEGACY-MESA-VI-LABEL: name: v8i8_arg
631  ; LEGACY-MESA-VI: bb.1.entry:
632  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
633  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
634  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
635  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
636  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
637  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
638  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
639  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
640  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
641  ; LEGACY-MESA-VI:   S_ENDPGM 0
642entry:
643  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
644  ret void
645}
646
647define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
648  ; HSA-VI-LABEL: name: v8i16_arg
649  ; HSA-VI: bb.1.entry:
650  ; HSA-VI:   liveins: $sgpr4_sgpr5
651  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
652  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
653  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
654  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
655  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
656  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
657  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
658  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
659  ; HSA-VI:   S_ENDPGM 0
660  ; LEGACY-MESA-VI-LABEL: name: v8i16_arg
661  ; LEGACY-MESA-VI: bb.1.entry:
662  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
663  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
664  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
665  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
666  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
667  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
668  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
669  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
670  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
671  ; LEGACY-MESA-VI:   S_ENDPGM 0
672entry:
673  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
674  ret void
675}
676
677define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
678  ; HSA-VI-LABEL: name: v8i32_arg
679  ; HSA-VI: bb.1.entry:
680  ; HSA-VI:   liveins: $sgpr4_sgpr5
681  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
682  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
683  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
684  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
685  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
686  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
687  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
688  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
689  ; HSA-VI:   S_ENDPGM 0
690  ; LEGACY-MESA-VI-LABEL: name: v8i32_arg
691  ; LEGACY-MESA-VI: bb.1.entry:
692  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
693  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
694  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
695  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
696  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
697  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
698  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
699  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
700  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
701  ; LEGACY-MESA-VI:   S_ENDPGM 0
702entry:
703  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
704  ret void
705}
706
707define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
708  ; HSA-VI-LABEL: name: v8f32_arg
709  ; HSA-VI: bb.1.entry:
710  ; HSA-VI:   liveins: $sgpr4_sgpr5
711  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
712  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
713  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
714  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
715  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
716  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
717  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
718  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
719  ; HSA-VI:   S_ENDPGM 0
720  ; LEGACY-MESA-VI-LABEL: name: v8f32_arg
721  ; LEGACY-MESA-VI: bb.1.entry:
722  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
723  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
724  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
725  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
726  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
727  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
728  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
729  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
730  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
731  ; LEGACY-MESA-VI:   S_ENDPGM 0
732entry:
733  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
734  ret void
735}
736
737define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
738  ; HSA-VI-LABEL: name: v16i8_arg
739  ; HSA-VI: bb.1.entry:
740  ; HSA-VI:   liveins: $sgpr4_sgpr5
741  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
742  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
743  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
744  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
745  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
746  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
747  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
748  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
749  ; HSA-VI:   S_ENDPGM 0
750  ; LEGACY-MESA-VI-LABEL: name: v16i8_arg
751  ; LEGACY-MESA-VI: bb.1.entry:
752  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
753  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
754  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
755  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
756  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
757  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
758  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
759  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
760  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
761  ; LEGACY-MESA-VI:   S_ENDPGM 0
762entry:
763  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
764  ret void
765}
766
767define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
768  ; HSA-VI-LABEL: name: v16i16_arg
769  ; HSA-VI: bb.1.entry:
770  ; HSA-VI:   liveins: $sgpr4_sgpr5
771  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
772  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
773  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
774  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
775  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
776  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
777  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
778  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
779  ; HSA-VI:   S_ENDPGM 0
780  ; LEGACY-MESA-VI-LABEL: name: v16i16_arg
781  ; LEGACY-MESA-VI: bb.1.entry:
782  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
783  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
784  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
785  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
786  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
787  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
788  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
789  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
790  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
791  ; LEGACY-MESA-VI:   S_ENDPGM 0
792entry:
793  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
794  ret void
795}
796
797define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
798  ; HSA-VI-LABEL: name: v16i32_arg
799  ; HSA-VI: bb.1.entry:
800  ; HSA-VI:   liveins: $sgpr4_sgpr5
801  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
802  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
803  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
804  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
805  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
806  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
807  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
808  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
809  ; HSA-VI:   S_ENDPGM 0
810  ; LEGACY-MESA-VI-LABEL: name: v16i32_arg
811  ; LEGACY-MESA-VI: bb.1.entry:
812  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
813  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
814  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
815  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
816  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
817  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
818  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
819  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
820  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
821  ; LEGACY-MESA-VI:   S_ENDPGM 0
822entry:
823  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
824  ret void
825}
826
827define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
828  ; HSA-VI-LABEL: name: v16f32_arg
829  ; HSA-VI: bb.1.entry:
830  ; HSA-VI:   liveins: $sgpr4_sgpr5
831  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
832  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
833  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
834  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
835  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
836  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
837  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
838  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
839  ; HSA-VI:   S_ENDPGM 0
840  ; LEGACY-MESA-VI-LABEL: name: v16f32_arg
841  ; LEGACY-MESA-VI: bb.1.entry:
842  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
843  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
844  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
845  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
846  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
847  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
848  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
849  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
850  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
851  ; LEGACY-MESA-VI:   S_ENDPGM 0
852entry:
853  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
854  ret void
855}
856
857define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
858  ; HSA-VI-LABEL: name: kernel_arg_i64
859  ; HSA-VI: bb.1 (%ir-block.0):
860  ; HSA-VI:   liveins: $sgpr4_sgpr5
861  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
862  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
863  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
864  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
865  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
866  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
867  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
868  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
869  ; HSA-VI:   S_ENDPGM 0
870  ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64
871  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
872  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
873  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
874  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
875  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
876  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
877  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
878  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
879  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
880  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
881  ; LEGACY-MESA-VI:   S_ENDPGM 0
882  store i64 %a, i64 addrspace(1)* %out, align 8
883  ret void
884}
885
886define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
887  ; HSA-VI-LABEL: name: f64_kernel_arg
888  ; HSA-VI: bb.1.entry:
889  ; HSA-VI:   liveins: $sgpr4_sgpr5
890  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
891  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
892  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
893  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
894  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
895  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
896  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
897  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
898  ; HSA-VI:   S_ENDPGM 0
899  ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg
900  ; LEGACY-MESA-VI: bb.1.entry:
901  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
902  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
903  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
904  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
905  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
906  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
907  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
908  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
909  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
910  ; LEGACY-MESA-VI:   S_ENDPGM 0
911entry:
912  store double %in, double addrspace(1)* %out
913  ret void
914}
915
916define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
917  ; HSA-VI-LABEL: name: i1_arg
918  ; HSA-VI: bb.1 (%ir-block.0):
919  ; HSA-VI:   liveins: $sgpr4_sgpr5
920  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
921  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
922  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
923  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
924  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
925  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
926  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
927  ; HSA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
928  ; HSA-VI:   S_ENDPGM 0
929  ; LEGACY-MESA-VI-LABEL: name: i1_arg
930  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
931  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
932  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
933  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
934  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
935  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
936  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
937  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
938  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
939  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
940  ; LEGACY-MESA-VI:   S_ENDPGM 0
941  store i1 %x, i1 addrspace(1)* %out, align 1
942  ret void
943}
944
945define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
946  ; HSA-VI-LABEL: name: i1_arg_zext_i32
947  ; HSA-VI: bb.1 (%ir-block.0):
948  ; HSA-VI:   liveins: $sgpr4_sgpr5
949  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
950  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
951  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
952  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
953  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
954  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
955  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
956  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
957  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
958  ; HSA-VI:   S_ENDPGM 0
959  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32
960  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
961  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
962  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
963  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
964  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
965  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
966  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
967  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
968  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
969  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
970  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
971  ; LEGACY-MESA-VI:   S_ENDPGM 0
972  %ext = zext i1 %x to i32
973  store i32 %ext, i32 addrspace(1)* %out, align 4
974  ret void
975}
976
977define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
978  ; HSA-VI-LABEL: name: i1_arg_zext_i64
979  ; HSA-VI: bb.1 (%ir-block.0):
980  ; HSA-VI:   liveins: $sgpr4_sgpr5
981  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
982  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
983  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
984  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
985  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
986  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
987  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
988  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
989  ; HSA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
990  ; HSA-VI:   S_ENDPGM 0
991  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64
992  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
993  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
994  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
995  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
996  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
997  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
998  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
999  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1000  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1001  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1002  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1003  ; LEGACY-MESA-VI:   S_ENDPGM 0
1004  %ext = zext i1 %x to i64
1005  store i64 %ext, i64 addrspace(1)* %out, align 8
1006  ret void
1007}
1008
1009define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
1010  ; HSA-VI-LABEL: name: i1_arg_sext_i32
1011  ; HSA-VI: bb.1 (%ir-block.0):
1012  ; HSA-VI:   liveins: $sgpr4_sgpr5
1013  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1014  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1015  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1016  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1017  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1018  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1019  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1020  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1021  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1022  ; HSA-VI:   S_ENDPGM 0
1023  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32
1024  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1025  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1026  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1027  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1028  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1029  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1030  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1031  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1032  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1033  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1034  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1035  ; LEGACY-MESA-VI:   S_ENDPGM 0
1036  %ext = sext i1 %x to i32
1037  store i32 %ext, i32addrspace(1)* %out, align 4
1038  ret void
1039}
1040
1041define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
1042  ; HSA-VI-LABEL: name: i1_arg_sext_i64
1043  ; HSA-VI: bb.1 (%ir-block.0):
1044  ; HSA-VI:   liveins: $sgpr4_sgpr5
1045  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1046  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1047  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1048  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1049  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1050  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1051  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1052  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1053  ; HSA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1054  ; HSA-VI:   S_ENDPGM 0
1055  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64
1056  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1057  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1058  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1059  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1060  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1061  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1062  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1063  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1064  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1065  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1066  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1067  ; LEGACY-MESA-VI:   S_ENDPGM 0
1068  %ext = sext i1 %x to i64
1069  store i64 %ext, i64 addrspace(1)* %out, align 8
1070  ret void
1071}
1072
1073; 0-sized arguments do not add a slot to the argument register set, so
1074; waste an index in the virtual register array.
1075define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
1076  ; HSA-VI-LABEL: name: empty_struct_arg
1077  ; HSA-VI: bb.1 (%ir-block.0):
1078  ; HSA-VI:   liveins: $sgpr4_sgpr5
1079  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1080  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1081  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1082  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1083  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1084  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1085  ; HSA-VI:   S_ENDPGM 0
1086  ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
1087  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1088  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1089  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1090  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1091  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1092  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1093  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1094  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1095  ; LEGACY-MESA-VI:   S_ENDPGM 0
1096  store i32 %arg1, i32 addrspace(1)* undef
1097  ret void
1098}
1099
1100define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
1101  ; HSA-VI-LABEL: name: empty_array_arg
1102  ; HSA-VI: bb.1 (%ir-block.0):
1103  ; HSA-VI:   liveins: $sgpr4_sgpr5
1104  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1105  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1106  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1107  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1108  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1109  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1110  ; HSA-VI:   S_ENDPGM 0
1111  ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
1112  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1113  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1114  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1115  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1116  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1117  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1118  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1119  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1120  ; LEGACY-MESA-VI:   S_ENDPGM 0
1121  store i32 %arg1, i32 addrspace(1)* undef
1122  ret void
1123}
1124
1125; The correct load offsets for these:
1126; load 4 from 0,
1127; load 8 from 8
1128; load 4 from 24
1129; load 8 from 32
1130
1131; With the SelectionDAG argument lowering, the alignments for the
1132; struct members is not properly considered, making these wrong.
1133define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
1134  ; HSA-VI-LABEL: name: struct_argument_alignment
1135  ; HSA-VI: bb.1 (%ir-block.0):
1136  ; HSA-VI:   liveins: $sgpr4_sgpr5
1137  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1138  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1139  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1140  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1141  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1142  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1143  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1144  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1145  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1146  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1147  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1148  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1149  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1150  ; HSA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1151  ; HSA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1152  ; HSA-VI:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1153  ; HSA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1154  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1155  ; HSA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1156  ; HSA-VI:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1157  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1158  ; HSA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1159  ; HSA-VI:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1160  ; HSA-VI:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1161  ; HSA-VI:   S_ENDPGM 0
1162  ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
1163  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1164  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1165  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1166  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1167  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1168  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1169  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1170  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1171  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1172  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1173  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1174  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1175  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1176  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1177  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1178  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1179  ; LEGACY-MESA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1180  ; LEGACY-MESA-VI:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1181  ; LEGACY-MESA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1182  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1183  ; LEGACY-MESA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1184  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1185  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1186  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1187  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1188  ; LEGACY-MESA-VI:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1189  ; LEGACY-MESA-VI:   S_ENDPGM 0
1190  %val0 = extractvalue {i32, i64} %arg0, 0
1191  %val1 = extractvalue {i32, i64} %arg0, 1
1192  %val2 = extractvalue {i32, i64} %arg1, 0
1193  %val3 = extractvalue {i32, i64} %arg1, 1
1194  store volatile i32 %val0, i32 addrspace(1)* null
1195  store volatile i64 %val1, i64 addrspace(1)* null
1196  store volatile i8 %pad, i8 addrspace(1)* null
1197  store volatile i32 %val2, i32 addrspace(1)* null
1198  store volatile i64 %val3, i64 addrspace(1)* null
1199  ret void
1200}
1201
1202define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
1203  ; HSA-VI-LABEL: name: pointer_in_struct_argument
1204  ; HSA-VI: bb.1 (%ir-block.0):
1205  ; HSA-VI:   liveins: $sgpr4_sgpr5
1206  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1207  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1208  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1209  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1210  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1211  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1212  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1213  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1214  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1215  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1216  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1217  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1218  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1219  ; HSA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1220  ; HSA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1221  ; HSA-VI:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1222  ; HSA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1223  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1224  ; HSA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1225  ; HSA-VI:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1226  ; HSA-VI:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1227  ; HSA-VI:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
1228  ; HSA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1229  ; HSA-VI:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1230  ; HSA-VI:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
1231  ; HSA-VI:   S_ENDPGM 0
1232  ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
1233  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1234  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1235  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1236  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1237  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1238  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1239  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1240  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1241  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1242  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1243  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1244  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1245  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1246  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1247  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1248  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1249  ; LEGACY-MESA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1250  ; LEGACY-MESA-VI:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1251  ; LEGACY-MESA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1252  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1253  ; LEGACY-MESA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1254  ; LEGACY-MESA-VI:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1255  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1256  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
1257  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1258  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1259  ; LEGACY-MESA-VI:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
1260  ; LEGACY-MESA-VI:   S_ENDPGM 0
1261  %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
1262  %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
1263  %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
1264  %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
1265  store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
1266  store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
1267  store volatile i8 %pad, i8 addrspace(1)* null
1268  store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
1269  store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
1270  ret void
1271}
1272
1273; No padding between i8 and next struct, but round up at end to 4 byte
1274; multiple.
1275define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
1276  ; HSA-VI-LABEL: name: packed_struct_argument_alignment
1277  ; HSA-VI: bb.1 (%ir-block.1):
1278  ; HSA-VI:   liveins: $sgpr4_sgpr5
1279  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1280  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1281  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1282  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1283  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
1284  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1285  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1286  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
1287  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1288  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1289  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
1290  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1291  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1292  ; HSA-VI:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1293  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
1294  ; HSA-VI:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1295  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1296  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1297  ; HSA-VI:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1298  ; HSA-VI:   S_ENDPGM 0
1299  ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
1300  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1301  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1302  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1303  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1304  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1305  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1306  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
1307  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1308  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1309  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
1310  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1311  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1312  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
1313  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1314  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1315  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1316  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
1317  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1318  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1319  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1320  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1321  ; LEGACY-MESA-VI:   S_ENDPGM 0
1322  %val0 = extractvalue <{i32, i64}> %arg0, 0
1323  %val1 = extractvalue <{i32, i64}> %arg0, 1
1324  %val2 = extractvalue <{i32, i64}> %arg1, 0
1325  %val3 = extractvalue <{i32, i64}> %arg1, 1
1326  store volatile i32 %val0, i32 addrspace(1)* null
1327  store volatile i64 %val1, i64 addrspace(1)* null
1328  store volatile i32 %val2, i32 addrspace(1)* null
1329  store volatile i64 %val3, i64 addrspace(1)* null
1330  ret void
1331}
1332
1333define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
1334  ; HSA-VI-LABEL: name: unused_i32_arg
1335  ; HSA-VI: bb.1.entry:
1336  ; HSA-VI:   liveins: $sgpr4_sgpr5
1337  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1338  ; HSA-VI:   S_ENDPGM 0
1339  ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
1340  ; LEGACY-MESA-VI: bb.1.entry:
1341  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1342  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1343  ; LEGACY-MESA-VI:   S_ENDPGM 0
1344entry:
1345  ret void
1346}
1347
1348; Byref pointers should only be treated as offsets from kernarg
1349define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
1350  ; HSA-VI-LABEL: name: byref_constant_i8_arg
1351  ; HSA-VI: bb.1 (%ir-block.0):
1352  ; HSA-VI:   liveins: $sgpr4_sgpr5
1353  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1354  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1355  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1356  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1357  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1358  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1359  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
1360  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1361  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1362  ; HSA-VI:   S_ENDPGM 0
1363  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg
1364  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1365  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1366  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1367  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1368  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1369  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1370  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1371  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1372  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
1373  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1374  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1375  ; LEGACY-MESA-VI:   S_ENDPGM 0
1376  %in = load i8, i8 addrspace(4)* %in.byref
1377  %ext = zext i8 %in to i32
1378  store i32 %ext, i32 addrspace(1)* %out, align 4
1379  ret void
1380}
1381
1382define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) {
1383  ; HSA-VI-LABEL: name: byref_constant_i16_arg
1384  ; HSA-VI: bb.1 (%ir-block.0):
1385  ; HSA-VI:   liveins: $sgpr4_sgpr5
1386  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1387  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1388  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1389  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1390  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1391  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1392  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
1393  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1394  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1395  ; HSA-VI:   S_ENDPGM 0
1396  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg
1397  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1398  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1399  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1400  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1401  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1402  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1403  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1404  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1405  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
1406  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1407  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1408  ; LEGACY-MESA-VI:   S_ENDPGM 0
1409  %in = load i16, i16 addrspace(4)* %in.byref
1410  %ext = zext i16 %in to i32
1411  store i32 %ext, i32 addrspace(1)* %out, align 4
1412  ret void
1413}
1414
1415define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) {
1416  ; HSA-VI-LABEL: name: byref_constant_i32_arg
1417  ; HSA-VI: bb.1 (%ir-block.0):
1418  ; HSA-VI:   liveins: $sgpr4_sgpr5
1419  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1420  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1421  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1422  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1423  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1424  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1425  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1426  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1427  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1428  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1429  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1430  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1431  ; HSA-VI:   S_ENDPGM 0
1432  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg
1433  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1434  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1435  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1436  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1437  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1438  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1439  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1440  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1441  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1442  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1443  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1444  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1445  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1446  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1447  ; LEGACY-MESA-VI:   S_ENDPGM 0
1448  %in = load i32, i32 addrspace(4)* %in.byref
1449  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1450  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1451  ret void
1452}
1453
1454define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
1455  ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
1456  ; HSA-VI: bb.1 (%ir-block.0):
1457  ; HSA-VI:   liveins: $sgpr4_sgpr5
1458  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1459  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1460  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1461  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1462  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1463  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1464  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1465  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1466  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1467  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
1468  ; HSA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1469  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
1470  ; HSA-VI:   S_ENDPGM 0
1471  ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
1472  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1473  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1474  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1475  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1476  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1477  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1478  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1479  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1480  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1481  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1482  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1483  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
1484  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1485  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
1486  ; LEGACY-MESA-VI:   S_ENDPGM 0
1487  %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref
1488  store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
1489  %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)*
1490  store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4
1491  ret void
1492}
1493
1494define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
1495  ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
1496  ; HSA-VI: bb.1 (%ir-block.0):
1497  ; HSA-VI:   liveins: $sgpr4_sgpr5
1498  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1499  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1500  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1501  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1502  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
1503  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1504  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
1505  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1506  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1507  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1508  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1509  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1510  ; HSA-VI:   S_ENDPGM 0
1511  ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg
1512  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1513  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1514  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1515  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1516  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1517  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1518  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
1519  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1520  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
1521  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1522  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1523  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1524  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1525  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1526  ; LEGACY-MESA-VI:   S_ENDPGM 0
1527  %in = load i32, i32 addrspace(4)* %in.byref
1528  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1529  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1530  ret void
1531}
1532
1533define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
1534  ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1535  ; HSA-VI: bb.1 (%ir-block.1):
1536  ; HSA-VI:   liveins: $sgpr4_sgpr5
1537  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1538  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1539  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1540  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1541  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
1542  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1543  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
1544  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1545  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1546  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
1547  ; HSA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
1548  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1549  ; HSA-VI:   S_ENDPGM 0
1550  ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1551  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1552  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1553  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1554  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1555  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1556  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1557  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
1558  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1559  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
1560  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1561  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1562  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
1563  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
1564  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1565  ; LEGACY-MESA-VI:   S_ENDPGM 0
1566  %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref
1567  %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)*
1568  store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4
1569  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1570  ret void
1571}
1572
1573; Also accept byref kernel arguments with other global address spaces.
1574define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) {
1575  ; HSA-VI-LABEL: name: byref_global_i32_arg
1576  ; HSA-VI: bb.1 (%ir-block.0):
1577  ; HSA-VI:   liveins: $sgpr4_sgpr5
1578  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1579  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1580  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1581  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1582  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1583  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1584  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1585  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
1586  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
1587  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1588  ; HSA-VI:   S_ENDPGM 0
1589  ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
1590  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1591  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1592  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1593  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1594  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1595  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1596  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1597  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1598  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1599  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
1600  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
1601  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1602  ; LEGACY-MESA-VI:   S_ENDPGM 0
1603  %in = load i32, i32 addrspace(1)* %in.byref
1604  store i32 %in, i32 addrspace(1)* %out, align 4
1605  ret void
1606}
1607
1608define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) {
1609  ; HSA-VI-LABEL: name: byref_flat_i32_arg
1610  ; HSA-VI: bb.1 (%ir-block.0):
1611  ; HSA-VI:   liveins: $sgpr4_sgpr5
1612  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1613  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1614  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1615  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1616  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1617  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1618  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1619  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1620  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1621  ; HSA-VI:   S_ENDPGM 0
1622  ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
1623  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1624  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1625  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1626  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1627  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1628  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1629  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1630  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1631  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1632  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1633  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1634  ; LEGACY-MESA-VI:   S_ENDPGM 0
1635  %in = load i32, i32* %in.byref
1636  store i32 %in, i32 addrspace(1)* %out, align 4
1637  ret void
1638}
1639
1640define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) {
1641  ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
1642  ; HSA-VI: bb.1 (%ir-block.0):
1643  ; HSA-VI:   liveins: $sgpr4_sgpr5
1644  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1645  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1646  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1647  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1648  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1649  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1650  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1651  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
1652  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1653  ; HSA-VI:   S_ENDPGM 0
1654  ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
1655  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1656  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1657  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1658  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1659  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1660  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1661  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1662  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1663  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1664  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
1665  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1666  ; LEGACY-MESA-VI:   S_ENDPGM 0
1667  %in = load i32, i32 addrspace(6)* %in.byref
1668  store i32 %in, i32 addrspace(1)* %out, align 4
1669  ret void
1670}
1671
1672define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) {
1673  ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
1674  ; HSA-VI: bb.1 (%ir-block.0):
1675  ; HSA-VI:   liveins: $sgpr4_sgpr5
1676  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1677  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1678  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1679  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1680  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1681  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1682  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1683  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1684  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1685  ; HSA-VI:   S_ENDPGM 0
1686  ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
1687  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1688  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1689  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1690  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1691  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1692  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1693  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1694  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1695  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1696  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1697  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1698  ; LEGACY-MESA-VI:   S_ENDPGM 0
1699  %in = load i32, i32 addrspace(999)* %in.byref
1700  store i32 %in, i32 addrspace(1)* %out, align 4
1701  ret void
1702}
1703
1704; Invalid, but should not crash.
1705define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) {
1706  ; HSA-VI-LABEL: name: byref_local_i32_arg
1707  ; HSA-VI: bb.1 (%ir-block.0):
1708  ; HSA-VI:   liveins: $sgpr4_sgpr5
1709  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1710  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1711  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1712  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1713  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1714  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1715  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1716  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1717  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1718  ; HSA-VI:   S_ENDPGM 0
1719  ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
1720  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1721  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1722  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1723  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1724  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1725  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1726  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1727  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1728  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1729  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1730  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1731  ; LEGACY-MESA-VI:   S_ENDPGM 0
1732  %in = load i32, i32 addrspace(3)* %in.byref
1733  store i32 %in, i32 addrspace(1)* %out, align 4
1734  ret void
1735}
1736
1737define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) {
1738  ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
1739  ; HSA-VI: bb.1 (%ir-block.0):
1740  ; HSA-VI:   liveins: $sgpr4_sgpr5
1741  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1742  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1743  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1744  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1745  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1746  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1747  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1748  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1749  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1750  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1751  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1752  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
1753  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
1754  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1755  ; HSA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1756  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1757  ; HSA-VI:   S_ENDPGM 0
1758  ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg
1759  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1760  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1761  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1762  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1763  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1764  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1765  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1766  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1767  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1768  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1769  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1770  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1771  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1772  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
1773  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
1774  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1775  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1776  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1777  ; LEGACY-MESA-VI:   S_ENDPGM 0
1778  %in0 = load i32, i32 addrspace(4)* %in0.byref
1779  %in1 = load i32, i32 addrspace(4)* %in1.byref
1780  store volatile i32 %in0, i32 addrspace(1)* %out, align 4
1781  store volatile i32 %in1, i32 addrspace(1)* %out, align 4
1782  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1783  ret void
1784}
1785
1786define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) {
1787  ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
1788  ; HSA-VI: bb.1 (%ir-block.0):
1789  ; HSA-VI:   liveins: $sgpr4_sgpr5
1790  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1791  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1792  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1793  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1794  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1795  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1796  ; HSA-VI:   S_ENDPGM 0
1797  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
1798  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1799  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1800  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1801  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1802  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1803  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1804  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1805  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1806  ; LEGACY-MESA-VI:   S_ENDPGM 0
1807  %in = load i32, i32 addrspace(4)* %in.byref
1808  store i32 %in, i32 addrspace(1)* undef, align 4
1809  ret void
1810}
1811
1812define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
1813  ; HSA-VI-LABEL: name: p3i8_arg
1814  ; HSA-VI: bb.1 (%ir-block.0):
1815  ; HSA-VI:   liveins: $sgpr4_sgpr5
1816  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1817  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1818  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1819  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4)
1820  ; HSA-VI:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1821  ; HSA-VI:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1822  ; HSA-VI:   S_ENDPGM 0
1823  ; LEGACY-MESA-VI-LABEL: name: p3i8_arg
1824  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1825  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1826  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1827  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1828  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1829  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4)
1830  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1831  ; LEGACY-MESA-VI:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1832  ; LEGACY-MESA-VI:   S_ENDPGM 0
1833  store i8 9, i8 addrspace(3)* %arg, align 4
1834  ret void
1835}
1836
1837define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
1838  ; HSA-VI-LABEL: name: p1i8_arg
1839  ; HSA-VI: bb.1 (%ir-block.0):
1840  ; HSA-VI:   liveins: $sgpr4_sgpr5
1841  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1842  ; HSA-VI:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1843  ; HSA-VI:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1844  ; HSA-VI:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
1845  ; HSA-VI:   S_ENDPGM 0
1846  ; LEGACY-MESA-VI-LABEL: name: p1i8_arg
1847  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1848  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1849  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1850  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1851  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1852  ; LEGACY-MESA-VI:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
1853  ; LEGACY-MESA-VI:   S_ENDPGM 0
1854  store i8 9, i8 addrspace(3)* null
1855  ret void
1856}
1857
1858define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
1859  ; HSA-VI-LABEL: name: v2p1i8_arg
1860  ; HSA-VI: bb.1 (%ir-block.0):
1861  ; HSA-VI:   liveins: $sgpr4_sgpr5
1862  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1863  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1864  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1865  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
1866  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1867  ; HSA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
1868  ; HSA-VI:   S_ENDPGM 0
1869  ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
1870  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1871  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1872  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1873  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1874  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1875  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
1876  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1877  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
1878  ; LEGACY-MESA-VI:   S_ENDPGM 0
1879  store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
1880  ret void
1881}
1882
1883define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
1884  ; HSA-VI-LABEL: name: v2p3i8_arg
1885  ; HSA-VI: bb.1 (%ir-block.0):
1886  ; HSA-VI:   liveins: $sgpr4_sgpr5
1887  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1888  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1889  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1890  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
1891  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1892  ; HSA-VI:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
1893  ; HSA-VI:   S_ENDPGM 0
1894  ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
1895  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1896  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1897  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1898  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1899  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1900  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
1901  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1902  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
1903  ; LEGACY-MESA-VI:   S_ENDPGM 0
1904  store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
1905  ret void
1906}
1907
1908define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
1909  ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
1910  ; HSA-VI: bb.1 (%ir-block.0):
1911  ; HSA-VI:   liveins: $sgpr4_sgpr5
1912  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1913  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1914  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1915  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
1916  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1917  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1918  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
1919  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1920  ; HSA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
1921  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1922  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
1923  ; HSA-VI:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
1924  ; HSA-VI:   S_ENDPGM 0
1925  ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
1926  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1927  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1928  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1929  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1930  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1931  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
1932  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1933  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1934  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
1935  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1936  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
1937  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1938  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
1939  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
1940  ; LEGACY-MESA-VI:   S_ENDPGM 0
1941  store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
1942  ret void
1943}
1944