1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
2
3; All these tests create a vector tuple, insert z5 into one of the elements,
4; and finally extracts that element from the wide vector to return it.  These
5; checks ensure that z5 is always the value that is returned.
6
7;
8; Insert into two element tuples
9;
10
11; tuple:      { tuple2.res0, tuple2.res1 }
12; insert z5:  {     z5     , tuple2.res1 }
13; extract z5:       ^^
14define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
15                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
16                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
17  ; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
18  ; CHECK-NEXT:  mov     z0.d, z5.d
19  ; CHECK-NEXT:  ret
20  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
21  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
22  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
23  ret <vscale x 4 x i32> %ext
24}
25
26; tuple:       { tuple2.res0, tuple2.res1 }
27; insert z5:   { tuple2.res0,     z5      }
28; extract z5:                     ^^
29define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
30                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
31                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
32  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
33  ; CHECK-NEXT:  mov     z0.d, z5.d
34  ; CHECK-NEXT:  ret
35  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
36  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
37  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
38  ret <vscale x 4 x i32> %ext
39}
40
41; This test checks the elements _not_ being set aren't changed.
42
43; tuple:       { tuple2.res0, tuple2.res1 }
44; insert z5:   { tuple2.res0,     z5      }
45; extract z0:         ^^
46define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
47                                                            <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
48                                                            <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
49  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
50  ; CHECK-NEXT:  ret
51  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
52  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
53  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
54  ret <vscale x 4 x i32> %ext
55}
56
57; Test extract of tuple passed into function
58define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
59  ; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
60  ; CHECK-NEXT:  mov     z0.d, z1.d
61  ; CHECK-NEXT:  ret
62  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
63  ret <vscale x 4 x i32> %ext
64}
65
66;
67; Insert into three element tuples
68;
69
70; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
71; insert z5:   {     z5     , tuple3.res0, tuple3.res2 }
72; extract z5:        ^^
73define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
74                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
75                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
76  ; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
77  ; CHECK-NEXT:  mov     z0.d, z5.d
78  ; CHECK-NEXT:  ret
79  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
80  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
81  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
82  ret <vscale x 4 x i32> %ext
83}
84
85; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
86; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
87; extract z5:                     ^^
88define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
89                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
90                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
91  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
92  ; CHECK-NEXT:  mov     z0.d, z5.d
93  ; CHECK-NEXT:  ret
94  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
95  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
96  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
97  ret <vscale x 4 x i32> %ext
98}
99
100; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
101; insert z5:   { tuple3.res0, tuple3.res1,     z5      }
102; extract z5:                                  ^^
103define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
104                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
105                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
106  ; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
107  ; CHECK-NEXT:  mov     z0.d, z5.d
108  ; CHECK-NEXT:  ret
109  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
110  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
111  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
112  ret <vscale x 4 x i32> %ext
113}
114
115; This test checks the elements _not_ being set aren't changed.
116
117; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
118; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
119; extract z2:                                  ^^
120define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
121                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
122                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
123  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
124  ; CHECK-NEXT:  mov     z0.d, z2.d
125  ; CHECK-NEXT:  ret
126  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
127  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
128  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
129  ret <vscale x 4 x i32> %ext
130}
131
132; Test extract of tuple passed into function
133define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
134  ; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
135  ; CHECK-NEXT:  mov     z0.d, z3.d
136  ; CHECK-NEXT:  ret
137  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
138  ret <vscale x 4 x i32> %ext
139}
140
141;
142; Insert into four element tuples
143;
144
145; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
146; insert z5:   {     z5     , tuple4.res1, tuple4.res2, tuple4.res3 }
147; extract z5:        ^^
148define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
149                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
150                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
151  ; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
152  ; CHECK-NEXT:  mov     z0.d, z5.d
153  ; CHECK-NEXT:  ret
154  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
155  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
156  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
157  ret <vscale x 4 x i32> %ext
158}
159
160; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
161; insert z5:   { tuple4.res0,     z5     , tuple4.res2, tuple4.res3 }
162; extract z5:                     ^^
163define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
164                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
165                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
166  ; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
167  ; CHECK-NEXT:  mov     z0.d, z5.d
168  ; CHECK-NEXT:  ret
169  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
170  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
171  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
172  ret <vscale x 4 x i32> %ext
173}
174
175; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
176; insert z5:   { tuple4.res0, tuple4.res1,     z5     , tuple4.res3 }
177; extract z5:                                  ^^
178define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
179                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
180                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
181  ; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
182  ; CHECK-NEXT:  mov     z0.d, z5.d
183  ; CHECK-NEXT:  ret
184  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
185  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
186  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
187  ret <vscale x 4 x i32> %ext
188}
189
190; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
191; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
192; extract z5:                                               ^^
193define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
194                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
195                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
196  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
197  ; CHECK-NEXT:  mov     z0.d, z5.d
198  ; CHECK-NEXT:  ret
199  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
200  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
201  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
202  ret <vscale x 4 x i32> %ext
203}
204
205; This test checks the elements _not_ being set aren't changed.
206
207; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
208; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
209; extract z2:                                               ^^
210define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
211                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
212                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
213  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
214  ; CHECK-NEXT:  mov     z0.d, z2.d
215  ; CHECK-NEXT:  ret
216  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
217  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
218  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
219  ret <vscale x 4 x i32> %ext
220}
221
222; Test extract of tuple passed into function
223define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
224  ; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
225  ; CHECK-NEXT:  mov     z0.d, z3.d
226  ; CHECK-NEXT:  ret
227  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
228  ret <vscale x 4 x i32> %ext
229}
230
231attributes #0 = { nounwind "target-features"="+sve" }
232
233declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
234declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
235declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
236
237declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
238declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
239declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
240
241declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
242declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
243declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)
244