1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
5; CHECK-LABEL: test_vbicq_u8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vbic q0, q0, q1
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
11  %1 = and <16 x i8> %0, %a
12  ret <16 x i8> %1
13}
14
15define arm_aapcs_vfpcc <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
16; CHECK-LABEL: test_vbicq_s16:
17; CHECK:       @ %bb.0: @ %entry
18; CHECK-NEXT:    vbic q0, q0, q1
19; CHECK-NEXT:    bx lr
20entry:
21  %0 = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
22  %1 = and <8 x i16> %0, %a
23  ret <8 x i16> %1
24}
25
26define arm_aapcs_vfpcc <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
27; CHECK-LABEL: test_vbicq_u32:
28; CHECK:       @ %bb.0: @ %entry
29; CHECK-NEXT:    vbic q0, q0, q1
30; CHECK-NEXT:    bx lr
31entry:
32  %0 = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
33  %1 = and <4 x i32> %0, %a
34  ret <4 x i32> %1
35}
36
37define arm_aapcs_vfpcc <4 x float> @test_vbicq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
38; CHECK-LABEL: test_vbicq_f32:
39; CHECK:       @ %bb.0: @ %entry
40; CHECK-NEXT:    vbic q0, q0, q1
41; CHECK-NEXT:    bx lr
42entry:
43  %0 = bitcast <4 x float> %a to <4 x i32>
44  %1 = bitcast <4 x float> %b to <4 x i32>
45  %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
46  %3 = and <4 x i32> %2, %0
47  %4 = bitcast <4 x i32> %3 to <4 x float>
48  ret <4 x float> %4
49}
50
51define arm_aapcs_vfpcc <16 x i8> @test_vbicq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
52; CHECK-LABEL: test_vbicq_m_s8:
53; CHECK:       @ %bb.0: @ %entry
54; CHECK-NEXT:    vmsr p0, r0
55; CHECK-NEXT:    vpst
56; CHECK-NEXT:    vbict q0, q1, q2
57; CHECK-NEXT:    bx lr
58entry:
59  %0 = zext i16 %p to i32
60  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
61  %2 = tail call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
62  ret <16 x i8> %2
63}
64
65declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
66
67declare <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
68
69define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
70; CHECK-LABEL: test_vbicq_m_u16:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    vmsr p0, r0
73; CHECK-NEXT:    vpst
74; CHECK-NEXT:    vbict q0, q1, q2
75; CHECK-NEXT:    bx lr
76entry:
77  %0 = zext i16 %p to i32
78  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
79  %2 = tail call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
80  ret <8 x i16> %2
81}
82
83declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
84
85declare <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
86
87define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
88; CHECK-LABEL: test_vbicq_m_s32:
89; CHECK:       @ %bb.0: @ %entry
90; CHECK-NEXT:    vmsr p0, r0
91; CHECK-NEXT:    vpst
92; CHECK-NEXT:    vbict q0, q1, q2
93; CHECK-NEXT:    bx lr
94entry:
95  %0 = zext i16 %p to i32
96  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
97  %2 = tail call <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
98  ret <4 x i32> %2
99}
100
101declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
102
103declare <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
104
105define arm_aapcs_vfpcc <8 x half> @test_vbicq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
106; CHECK-LABEL: test_vbicq_m_f16:
107; CHECK:       @ %bb.0: @ %entry
108; CHECK-NEXT:    vmsr p0, r0
109; CHECK-NEXT:    vpst
110; CHECK-NEXT:    vbict q0, q1, q2
111; CHECK-NEXT:    bx lr
112entry:
113  %0 = bitcast <8 x half> %a to <8 x i16>
114  %1 = bitcast <8 x half> %b to <8 x i16>
115  %2 = zext i16 %p to i32
116  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
117  %4 = bitcast <8 x half> %inactive to <8 x i16>
118  %5 = tail call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> %0, <8 x i16> %1, <8 x i1> %3, <8 x i16> %4)
119  %6 = bitcast <8 x i16> %5 to <8 x half>
120  ret <8 x half> %6
121}
122
123define arm_aapcs_vfpcc <16 x i8> @test_vbicq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
124; CHECK-LABEL: test_vbicq_x_u8:
125; CHECK:       @ %bb.0: @ %entry
126; CHECK-NEXT:    vmsr p0, r0
127; CHECK-NEXT:    vpst
128; CHECK-NEXT:    vbict q0, q0, q1
129; CHECK-NEXT:    bx lr
130entry:
131  %0 = zext i16 %p to i32
132  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
133  %2 = tail call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> undef)
134  ret <16 x i8> %2
135}
136
137define arm_aapcs_vfpcc <8 x i16> @test_vbicq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
138; CHECK-LABEL: test_vbicq_x_s16:
139; CHECK:       @ %bb.0: @ %entry
140; CHECK-NEXT:    vmsr p0, r0
141; CHECK-NEXT:    vpst
142; CHECK-NEXT:    vbict q0, q0, q1
143; CHECK-NEXT:    bx lr
144entry:
145  %0 = zext i16 %p to i32
146  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
147  %2 = tail call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef)
148  ret <8 x i16> %2
149}
150
151define arm_aapcs_vfpcc <4 x i32> @test_vbicq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
152; CHECK-LABEL: test_vbicq_x_u32:
153; CHECK:       @ %bb.0: @ %entry
154; CHECK-NEXT:    vmsr p0, r0
155; CHECK-NEXT:    vpst
156; CHECK-NEXT:    vbict q0, q0, q1
157; CHECK-NEXT:    bx lr
158entry:
159  %0 = zext i16 %p to i32
160  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
161  %2 = tail call <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> undef)
162  ret <4 x i32> %2
163}
164
165define arm_aapcs_vfpcc <4 x float> @test_vbicq_m_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
166; CHECK-LABEL: test_vbicq_m_f32:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    vmsr p0, r0
169; CHECK-NEXT:    vpst
170; CHECK-NEXT:    vbict q0, q0, q1
171; CHECK-NEXT:    bx lr
172entry:
173  %0 = bitcast <4 x float> %a to <4 x i32>
174  %1 = bitcast <4 x float> %b to <4 x i32>
175  %2 = zext i16 %p to i32
176  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
177  %4 = tail call <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> undef)
178  %5 = bitcast <4 x i32> %4 to <4 x float>
179  ret <4 x float> %5
180}
181
182