1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <4 x i32> @test_vadciq_s32(<4 x i32> %a, <4 x i32> %b, i32* %carry_out) {
5; CHECK-LABEL: test_vadciq_s32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vadci.i32 q0, q0, q1
8; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
9; CHECK-NEXT:    ubfx r1, r1, #29, #1
10; CHECK-NEXT:    str r1, [r0]
11; CHECK-NEXT:    bx lr
12entry:
13  %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
14  %1 = extractvalue { <4 x i32>, i32 } %0, 1
15  %2 = lshr i32 %1, 29
16  %3 = and i32 %2, 1
17  store i32 %3, i32* %carry_out, align 4
18  %4 = extractvalue { <4 x i32>, i32 } %0, 0
19  ret <4 x i32> %4
20}
21
22declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32>, <4 x i32>, i32)
23
24define arm_aapcs_vfpcc <4 x i32> @test_vadcq_u32(<4 x i32> %a, <4 x i32> %b, i32* %carry) {
25; CHECK-LABEL: test_vadcq_u32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    ldr r1, [r0]
28; CHECK-NEXT:    lsls r1, r1, #29
29; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
30; CHECK-NEXT:    vadc.i32 q0, q0, q1
31; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
32; CHECK-NEXT:    ubfx r1, r1, #29, #1
33; CHECK-NEXT:    str r1, [r0]
34; CHECK-NEXT:    bx lr
35entry:
36  %0 = load i32, i32* %carry, align 4
37  %1 = shl i32 %0, 29
38  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1)
39  %3 = extractvalue { <4 x i32>, i32 } %2, 1
40  %4 = lshr i32 %3, 29
41  %5 = and i32 %4, 1
42  store i32 %5, i32* %carry, align 4
43  %6 = extractvalue { <4 x i32>, i32 } %2, 0
44  ret <4 x i32> %6
45}
46
47define arm_aapcs_vfpcc <4 x i32> @test_vadciq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* %carry_out, i16 zeroext %p) {
48; CHECK-LABEL: test_vadciq_m_u32:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    vmsr p0, r1
51; CHECK-NEXT:    vpst
52; CHECK-NEXT:    vadcit.i32 q0, q1, q2
53; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
54; CHECK-NEXT:    ubfx r1, r1, #29, #1
55; CHECK-NEXT:    str r1, [r0]
56; CHECK-NEXT:    bx lr
57entry:
58  %0 = zext i16 %p to i32
59  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
60  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
61  %3 = extractvalue { <4 x i32>, i32 } %2, 1
62  %4 = lshr i32 %3, 29
63  %5 = and i32 %4, 1
64  store i32 %5, i32* %carry_out, align 4
65  %6 = extractvalue { <4 x i32>, i32 } %2, 0
66  ret <4 x i32> %6
67}
68
69declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
70
71declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>)
72
73define arm_aapcs_vfpcc <4 x i32> @test_vadcq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* %carry, i16 zeroext %p) {
74; CHECK-LABEL: test_vadcq_m_s32:
75; CHECK:       @ %bb.0: @ %entry
76; CHECK-NEXT:    ldr r2, [r0]
77; CHECK-NEXT:    vmsr p0, r1
78; CHECK-NEXT:    lsls r1, r2, #29
79; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
80; CHECK-NEXT:    vpst
81; CHECK-NEXT:    vadct.i32 q0, q1, q2
82; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
83; CHECK-NEXT:    ubfx r1, r1, #29, #1
84; CHECK-NEXT:    str r1, [r0]
85; CHECK-NEXT:    bx lr
86entry:
87  %0 = load i32, i32* %carry, align 4
88  %1 = shl i32 %0, 29
89  %2 = zext i16 %p to i32
90  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
91  %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3)
92  %5 = extractvalue { <4 x i32>, i32 } %4, 1
93  %6 = lshr i32 %5, 29
94  %7 = and i32 %6, 1
95  store i32 %7, i32* %carry, align 4
96  %8 = extractvalue { <4 x i32>, i32 } %4, 0
97  ret <4 x i32> %8
98}
99
100declare { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32>, <4 x i32>, i32)
101
102define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out) {
103; CHECK-LABEL: test_vsbciq_s32:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vsbci.i32 q0, q0, q1
106; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
107; CHECK-NEXT:    ubfx r1, r1, #29, #1
108; CHECK-NEXT:    str r1, [r0]
109; CHECK-NEXT:    bx lr
110entry:
111  %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
112  %1 = extractvalue { <4 x i32>, i32 } %0, 1
113  %2 = lshr i32 %1, 29
114  %3 = and i32 %2, 1
115  store i32 %3, i32* %carry_out, align 4
116  %4 = extractvalue { <4 x i32>, i32 } %0, 0
117  ret <4 x i32> %4
118}
119
120define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out) {
121; CHECK-LABEL: test_vsbciq_u32:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vsbci.i32 q0, q0, q1
124; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
125; CHECK-NEXT:    ubfx r1, r1, #29, #1
126; CHECK-NEXT:    str r1, [r0]
127; CHECK-NEXT:    bx lr
128entry:
129  %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
130  %1 = extractvalue { <4 x i32>, i32 } %0, 1
131  %2 = lshr i32 %1, 29
132  %3 = and i32 %2, 1
133  store i32 %3, i32* %carry_out, align 4
134  %4 = extractvalue { <4 x i32>, i32 } %0, 0
135  ret <4 x i32> %4
136}
137
138define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry) {
139; CHECK-LABEL: test_vsbcq_s32:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    ldr r1, [r0]
142; CHECK-NEXT:    lsls r1, r1, #29
143; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
144; CHECK-NEXT:    vsbc.i32 q0, q0, q1
145; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
146; CHECK-NEXT:    ubfx r1, r1, #29, #1
147; CHECK-NEXT:    str r1, [r0]
148; CHECK-NEXT:    bx lr
149entry:
150  %0 = load i32, i32* %carry, align 4
151  %1 = shl i32 %0, 29
152  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1)
153  %3 = extractvalue { <4 x i32>, i32 } %2, 1
154  %4 = lshr i32 %3, 29
155  %5 = and i32 %4, 1
156  store i32 %5, i32* %carry, align 4
157  %6 = extractvalue { <4 x i32>, i32 } %2, 0
158  ret <4 x i32> %6
159}
160
161define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32(<4 x i32> %a, <4 x i32> %b, i32* nocapture %carry) {
162; CHECK-LABEL: test_vsbcq_u32:
163; CHECK:       @ %bb.0: @ %entry
164; CHECK-NEXT:    ldr r1, [r0]
165; CHECK-NEXT:    lsls r1, r1, #29
166; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
167; CHECK-NEXT:    vsbc.i32 q0, q0, q1
168; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
169; CHECK-NEXT:    ubfx r1, r1, #29, #1
170; CHECK-NEXT:    str r1, [r0]
171; CHECK-NEXT:    bx lr
172entry:
173  %0 = load i32, i32* %carry, align 4
174  %1 = shl i32 %0, 29
175  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %1)
176  %3 = extractvalue { <4 x i32>, i32 } %2, 1
177  %4 = lshr i32 %3, 29
178  %5 = and i32 %4, 1
179  store i32 %5, i32* %carry, align 4
180  %6 = extractvalue { <4 x i32>, i32 } %2, 0
181  ret <4 x i32> %6
182}
183
184declare { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>)
185
186define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out, i16 zeroext %p) {
187; CHECK-LABEL: test_vsbciq_m_s32:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vmsr p0, r1
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vsbcit.i32 q0, q1, q2
192; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
193; CHECK-NEXT:    ubfx r1, r1, #29, #1
194; CHECK-NEXT:    str r1, [r0]
195; CHECK-NEXT:    bx lr
196entry:
197  %0 = zext i16 %p to i32
198  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
199  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
200  %3 = extractvalue { <4 x i32>, i32 } %2, 1
201  %4 = lshr i32 %3, 29
202  %5 = and i32 %4, 1
203  store i32 %5, i32* %carry_out, align 4
204  %6 = extractvalue { <4 x i32>, i32 } %2, 0
205  ret <4 x i32> %6
206}
207
208define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry_out, i16 zeroext %p) {
209; CHECK-LABEL: test_vsbciq_m_u32:
210; CHECK:       @ %bb.0: @ %entry
211; CHECK-NEXT:    vmsr p0, r1
212; CHECK-NEXT:    vpst
213; CHECK-NEXT:    vsbcit.i32 q0, q1, q2
214; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
215; CHECK-NEXT:    ubfx r1, r1, #29, #1
216; CHECK-NEXT:    str r1, [r0]
217; CHECK-NEXT:    bx lr
218entry:
219  %0 = zext i16 %p to i32
220  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
221  %2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
222  %3 = extractvalue { <4 x i32>, i32 } %2, 1
223  %4 = lshr i32 %3, 29
224  %5 = and i32 %4, 1
225  store i32 %5, i32* %carry_out, align 4
226  %6 = extractvalue { <4 x i32>, i32 } %2, 0
227  ret <4 x i32> %6
228}
229
230define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry, i16 zeroext %p) {
231; CHECK-LABEL: test_vsbcq_m_s32:
232; CHECK:       @ %bb.0: @ %entry
233; CHECK-NEXT:    ldr r2, [r0]
234; CHECK-NEXT:    vmsr p0, r1
235; CHECK-NEXT:    lsls r1, r2, #29
236; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
237; CHECK-NEXT:    vpst
238; CHECK-NEXT:    vsbct.i32 q0, q1, q2
239; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
240; CHECK-NEXT:    ubfx r1, r1, #29, #1
241; CHECK-NEXT:    str r1, [r0]
242; CHECK-NEXT:    bx lr
243entry:
244  %0 = load i32, i32* %carry, align 4
245  %1 = shl i32 %0, 29
246  %2 = zext i16 %p to i32
247  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
248  %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3)
249  %5 = extractvalue { <4 x i32>, i32 } %4, 1
250  %6 = lshr i32 %5, 29
251  %7 = and i32 %6, 1
252  store i32 %7, i32* %carry, align 4
253  %8 = extractvalue { <4 x i32>, i32 } %4, 0
254  ret <4 x i32> %8
255}
256
257define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32* nocapture %carry, i16 zeroext %p) {
258; CHECK-LABEL: test_vsbcq_m_u32:
259; CHECK:       @ %bb.0: @ %entry
260; CHECK-NEXT:    ldr r2, [r0]
261; CHECK-NEXT:    vmsr p0, r1
262; CHECK-NEXT:    lsls r1, r2, #29
263; CHECK-NEXT:    vmsr fpscr_nzcvqc, r1
264; CHECK-NEXT:    vpst
265; CHECK-NEXT:    vsbct.i32 q0, q1, q2
266; CHECK-NEXT:    vmrs r1, fpscr_nzcvqc
267; CHECK-NEXT:    ubfx r1, r1, #29, #1
268; CHECK-NEXT:    str r1, [r0]
269; CHECK-NEXT:    bx lr
270entry:
271  %0 = load i32, i32* %carry, align 4
272  %1 = shl i32 %0, 29
273  %2 = zext i16 %p to i32
274  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
275  %4 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 %1, <4 x i1> %3)
276  %5 = extractvalue { <4 x i32>, i32 } %4, 1
277  %6 = lshr i32 %5, 29
278  %7 = and i32 %6, 1
279  store i32 %7, i32* %carry, align 4
280  %8 = extractvalue { <4 x i32>, i32 } %4, 0
281  ret <4 x i32> %8
282}
283