1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
8
9define <2 x i8> @vsub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vsub_vv_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
13; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
14; CHECK-NEXT:    ret
15  %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
16  ret <2 x i8> %v
17}
18
19define <2 x i8> @vsub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
20; CHECK-LABEL: vsub_vv_v2i8_unmasked:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
23; CHECK-NEXT:    vsub.vv v8, v8, v9
24; CHECK-NEXT:    ret
25  %head = insertelement <2 x i1> undef, i1 true, i32 0
26  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
27  %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
28  ret <2 x i8> %v
29}
30
31define <2 x i8> @vsub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
32; CHECK-LABEL: vsub_vx_v2i8:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
35; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
36; CHECK-NEXT:    ret
37  %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
38  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
39  %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
40  ret <2 x i8> %v
41}
42
43define <2 x i8> @vsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
44; CHECK-LABEL: vsub_vx_v2i8_unmasked:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
47; CHECK-NEXT:    vsub.vx v8, v8, a0
48; CHECK-NEXT:    ret
49  %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
50  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
51  %head = insertelement <2 x i1> undef, i1 true, i32 0
52  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
53  %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
54  ret <2 x i8> %v
55}
56
57declare <4 x i8> @llvm.vp.sub.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
58
59define <4 x i8> @vsub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
60; CHECK-LABEL: vsub_vv_v4i8:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
63; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
64; CHECK-NEXT:    ret
65  %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
66  ret <4 x i8> %v
67}
68
69define <4 x i8> @vsub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
70; CHECK-LABEL: vsub_vv_v4i8_unmasked:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
73; CHECK-NEXT:    vsub.vv v8, v8, v9
74; CHECK-NEXT:    ret
75  %head = insertelement <4 x i1> undef, i1 true, i32 0
76  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
77  %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
78  ret <4 x i8> %v
79}
80
81define <4 x i8> @vsub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
82; CHECK-LABEL: vsub_vx_v4i8:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
85; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
86; CHECK-NEXT:    ret
87  %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
88  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
89  %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
90  ret <4 x i8> %v
91}
92
93define <4 x i8> @vsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
94; CHECK-LABEL: vsub_vx_v4i8_unmasked:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
97; CHECK-NEXT:    vsub.vx v8, v8, a0
98; CHECK-NEXT:    ret
99  %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
100  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
101  %head = insertelement <4 x i1> undef, i1 true, i32 0
102  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
103  %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
104  ret <4 x i8> %v
105}
106
107declare <8 x i8> @llvm.vp.sub.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
108
109define <8 x i8> @vsub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
110; CHECK-LABEL: vsub_vv_v8i8:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
113; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
114; CHECK-NEXT:    ret
115  %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
116  ret <8 x i8> %v
117}
118
119define <8 x i8> @vsub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
120; CHECK-LABEL: vsub_vv_v8i8_unmasked:
121; CHECK:       # %bb.0:
122; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
123; CHECK-NEXT:    vsub.vv v8, v8, v9
124; CHECK-NEXT:    ret
125  %head = insertelement <8 x i1> undef, i1 true, i32 0
126  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
127  %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
128  ret <8 x i8> %v
129}
130
131define <8 x i8> @vsub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
132; CHECK-LABEL: vsub_vx_v8i8:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
135; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
136; CHECK-NEXT:    ret
137  %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
138  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
139  %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
140  ret <8 x i8> %v
141}
142
143define <8 x i8> @vsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
144; CHECK-LABEL: vsub_vx_v8i8_unmasked:
145; CHECK:       # %bb.0:
146; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
147; CHECK-NEXT:    vsub.vx v8, v8, a0
148; CHECK-NEXT:    ret
149  %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
150  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
151  %head = insertelement <8 x i1> undef, i1 true, i32 0
152  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
153  %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
154  ret <8 x i8> %v
155}
156
157declare <16 x i8> @llvm.vp.sub.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
158
159define <16 x i8> @vsub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
160; CHECK-LABEL: vsub_vv_v16i8:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
163; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
164; CHECK-NEXT:    ret
165  %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
166  ret <16 x i8> %v
167}
168
169define <16 x i8> @vsub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
170; CHECK-LABEL: vsub_vv_v16i8_unmasked:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
173; CHECK-NEXT:    vsub.vv v8, v8, v9
174; CHECK-NEXT:    ret
175  %head = insertelement <16 x i1> undef, i1 true, i32 0
176  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
177  %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
178  ret <16 x i8> %v
179}
180
181define <16 x i8> @vsub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
182; CHECK-LABEL: vsub_vx_v16i8:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
185; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
186; CHECK-NEXT:    ret
187  %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
188  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
189  %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
190  ret <16 x i8> %v
191}
192
193define <16 x i8> @vsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
194; CHECK-LABEL: vsub_vx_v16i8_unmasked:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
197; CHECK-NEXT:    vsub.vx v8, v8, a0
198; CHECK-NEXT:    ret
199  %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
200  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
201  %head = insertelement <16 x i1> undef, i1 true, i32 0
202  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
203  %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
204  ret <16 x i8> %v
205}
206
207declare <2 x i16> @llvm.vp.sub.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
208
209define <2 x i16> @vsub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
210; CHECK-LABEL: vsub_vv_v2i16:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
213; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
214; CHECK-NEXT:    ret
215  %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
216  ret <2 x i16> %v
217}
218
219define <2 x i16> @vsub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
220; CHECK-LABEL: vsub_vv_v2i16_unmasked:
221; CHECK:       # %bb.0:
222; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
223; CHECK-NEXT:    vsub.vv v8, v8, v9
224; CHECK-NEXT:    ret
225  %head = insertelement <2 x i1> undef, i1 true, i32 0
226  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
227  %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
228  ret <2 x i16> %v
229}
230
231define <2 x i16> @vsub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
232; CHECK-LABEL: vsub_vx_v2i16:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
235; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
236; CHECK-NEXT:    ret
237  %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
238  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
239  %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
240  ret <2 x i16> %v
241}
242
243define <2 x i16> @vsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
244; CHECK-LABEL: vsub_vx_v2i16_unmasked:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
247; CHECK-NEXT:    vsub.vx v8, v8, a0
248; CHECK-NEXT:    ret
249  %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
250  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
251  %head = insertelement <2 x i1> undef, i1 true, i32 0
252  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
253  %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
254  ret <2 x i16> %v
255}
256
257declare <4 x i16> @llvm.vp.sub.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
258
259define <4 x i16> @vsub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
260; CHECK-LABEL: vsub_vv_v4i16:
261; CHECK:       # %bb.0:
262; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
263; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
264; CHECK-NEXT:    ret
265  %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
266  ret <4 x i16> %v
267}
268
269define <4 x i16> @vsub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
270; CHECK-LABEL: vsub_vv_v4i16_unmasked:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
273; CHECK-NEXT:    vsub.vv v8, v8, v9
274; CHECK-NEXT:    ret
275  %head = insertelement <4 x i1> undef, i1 true, i32 0
276  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
277  %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
278  ret <4 x i16> %v
279}
280
281define <4 x i16> @vsub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
282; CHECK-LABEL: vsub_vx_v4i16:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
285; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
286; CHECK-NEXT:    ret
287  %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
288  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
289  %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
290  ret <4 x i16> %v
291}
292
293define <4 x i16> @vsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
294; CHECK-LABEL: vsub_vx_v4i16_unmasked:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
297; CHECK-NEXT:    vsub.vx v8, v8, a0
298; CHECK-NEXT:    ret
299  %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
300  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
301  %head = insertelement <4 x i1> undef, i1 true, i32 0
302  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
303  %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
304  ret <4 x i16> %v
305}
306
307declare <8 x i16> @llvm.vp.sub.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
308
309define <8 x i16> @vsub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
310; CHECK-LABEL: vsub_vv_v8i16:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
313; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
314; CHECK-NEXT:    ret
315  %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
316  ret <8 x i16> %v
317}
318
319define <8 x i16> @vsub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
320; CHECK-LABEL: vsub_vv_v8i16_unmasked:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
323; CHECK-NEXT:    vsub.vv v8, v8, v9
324; CHECK-NEXT:    ret
325  %head = insertelement <8 x i1> undef, i1 true, i32 0
326  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
327  %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
328  ret <8 x i16> %v
329}
330
331define <8 x i16> @vsub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
332; CHECK-LABEL: vsub_vx_v8i16:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
335; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
336; CHECK-NEXT:    ret
337  %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
338  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
339  %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
340  ret <8 x i16> %v
341}
342
343define <8 x i16> @vsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
344; CHECK-LABEL: vsub_vx_v8i16_unmasked:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
347; CHECK-NEXT:    vsub.vx v8, v8, a0
348; CHECK-NEXT:    ret
349  %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
350  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
351  %head = insertelement <8 x i1> undef, i1 true, i32 0
352  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
353  %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
354  ret <8 x i16> %v
355}
356
357declare <16 x i16> @llvm.vp.sub.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
358
359define <16 x i16> @vsub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
360; CHECK-LABEL: vsub_vv_v16i16:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
363; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
364; CHECK-NEXT:    ret
365  %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
366  ret <16 x i16> %v
367}
368
369define <16 x i16> @vsub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
370; CHECK-LABEL: vsub_vv_v16i16_unmasked:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
373; CHECK-NEXT:    vsub.vv v8, v8, v10
374; CHECK-NEXT:    ret
375  %head = insertelement <16 x i1> undef, i1 true, i32 0
376  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
377  %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
378  ret <16 x i16> %v
379}
380
381define <16 x i16> @vsub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
382; CHECK-LABEL: vsub_vx_v16i16:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
385; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
386; CHECK-NEXT:    ret
387  %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
388  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
389  %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
390  ret <16 x i16> %v
391}
392
393define <16 x i16> @vsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
394; CHECK-LABEL: vsub_vx_v16i16_unmasked:
395; CHECK:       # %bb.0:
396; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
397; CHECK-NEXT:    vsub.vx v8, v8, a0
398; CHECK-NEXT:    ret
399  %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
400  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
401  %head = insertelement <16 x i1> undef, i1 true, i32 0
402  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
403  %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
404  ret <16 x i16> %v
405}
406
407declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
408
409define <2 x i32> @vsub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
410; CHECK-LABEL: vsub_vv_v2i32:
411; CHECK:       # %bb.0:
412; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
413; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
414; CHECK-NEXT:    ret
415  %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
416  ret <2 x i32> %v
417}
418
419define <2 x i32> @vsub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
420; CHECK-LABEL: vsub_vv_v2i32_unmasked:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
423; CHECK-NEXT:    vsub.vv v8, v8, v9
424; CHECK-NEXT:    ret
425  %head = insertelement <2 x i1> undef, i1 true, i32 0
426  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
427  %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
428  ret <2 x i32> %v
429}
430
431define <2 x i32> @vsub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
432; CHECK-LABEL: vsub_vx_v2i32:
433; CHECK:       # %bb.0:
434; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
435; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
436; CHECK-NEXT:    ret
437  %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
438  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
439  %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
440  ret <2 x i32> %v
441}
442
443define <2 x i32> @vsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
444; CHECK-LABEL: vsub_vx_v2i32_unmasked:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
447; CHECK-NEXT:    vsub.vx v8, v8, a0
448; CHECK-NEXT:    ret
449  %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
450  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
451  %head = insertelement <2 x i1> undef, i1 true, i32 0
452  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
453  %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
454  ret <2 x i32> %v
455}
456
457declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
458
459define <4 x i32> @vsub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
460; CHECK-LABEL: vsub_vv_v4i32:
461; CHECK:       # %bb.0:
462; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
463; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
464; CHECK-NEXT:    ret
465  %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
466  ret <4 x i32> %v
467}
468
469define <4 x i32> @vsub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
470; CHECK-LABEL: vsub_vv_v4i32_unmasked:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
473; CHECK-NEXT:    vsub.vv v8, v8, v9
474; CHECK-NEXT:    ret
475  %head = insertelement <4 x i1> undef, i1 true, i32 0
476  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
477  %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
478  ret <4 x i32> %v
479}
480
481define <4 x i32> @vsub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
482; CHECK-LABEL: vsub_vx_v4i32:
483; CHECK:       # %bb.0:
484; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
485; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
486; CHECK-NEXT:    ret
487  %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
488  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
489  %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
490  ret <4 x i32> %v
491}
492
493define <4 x i32> @vsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
494; CHECK-LABEL: vsub_vx_v4i32_unmasked:
495; CHECK:       # %bb.0:
496; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
497; CHECK-NEXT:    vsub.vx v8, v8, a0
498; CHECK-NEXT:    ret
499  %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
500  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
501  %head = insertelement <4 x i1> undef, i1 true, i32 0
502  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
503  %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
504  ret <4 x i32> %v
505}
506
507declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
508
509define <8 x i32> @vsub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
510; CHECK-LABEL: vsub_vv_v8i32:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
513; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
514; CHECK-NEXT:    ret
515  %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
516  ret <8 x i32> %v
517}
518
519define <8 x i32> @vsub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
520; CHECK-LABEL: vsub_vv_v8i32_unmasked:
521; CHECK:       # %bb.0:
522; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
523; CHECK-NEXT:    vsub.vv v8, v8, v10
524; CHECK-NEXT:    ret
525  %head = insertelement <8 x i1> undef, i1 true, i32 0
526  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
527  %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
528  ret <8 x i32> %v
529}
530
531define <8 x i32> @vsub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
532; CHECK-LABEL: vsub_vx_v8i32:
533; CHECK:       # %bb.0:
534; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
535; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
536; CHECK-NEXT:    ret
537  %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
538  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
539  %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
540  ret <8 x i32> %v
541}
542
543define <8 x i32> @vsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
544; CHECK-LABEL: vsub_vx_v8i32_unmasked:
545; CHECK:       # %bb.0:
546; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
547; CHECK-NEXT:    vsub.vx v8, v8, a0
548; CHECK-NEXT:    ret
549  %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
550  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
551  %head = insertelement <8 x i1> undef, i1 true, i32 0
552  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
553  %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
554  ret <8 x i32> %v
555}
556
557declare <16 x i32> @llvm.vp.sub.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
558
559define <16 x i32> @vsub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
560; CHECK-LABEL: vsub_vv_v16i32:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
563; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
564; CHECK-NEXT:    ret
565  %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
566  ret <16 x i32> %v
567}
568
569define <16 x i32> @vsub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
570; CHECK-LABEL: vsub_vv_v16i32_unmasked:
571; CHECK:       # %bb.0:
572; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
573; CHECK-NEXT:    vsub.vv v8, v8, v12
574; CHECK-NEXT:    ret
575  %head = insertelement <16 x i1> undef, i1 true, i32 0
576  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
577  %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
578  ret <16 x i32> %v
579}
580
581define <16 x i32> @vsub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
582; CHECK-LABEL: vsub_vx_v16i32:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
585; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
586; CHECK-NEXT:    ret
587  %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
588  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
589  %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
590  ret <16 x i32> %v
591}
592
593define <16 x i32> @vsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
594; CHECK-LABEL: vsub_vx_v16i32_unmasked:
595; CHECK:       # %bb.0:
596; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
597; CHECK-NEXT:    vsub.vx v8, v8, a0
598; CHECK-NEXT:    ret
599  %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
600  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
601  %head = insertelement <16 x i1> undef, i1 true, i32 0
602  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
603  %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
604  ret <16 x i32> %v
605}
606
607declare <2 x i64> @llvm.vp.sub.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
608
609define <2 x i64> @vsub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
610; CHECK-LABEL: vsub_vv_v2i64:
611; CHECK:       # %bb.0:
612; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
613; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
614; CHECK-NEXT:    ret
615  %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
616  ret <2 x i64> %v
617}
618
619define <2 x i64> @vsub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
620; CHECK-LABEL: vsub_vv_v2i64_unmasked:
621; CHECK:       # %bb.0:
622; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
623; CHECK-NEXT:    vsub.vv v8, v8, v9
624; CHECK-NEXT:    ret
625  %head = insertelement <2 x i1> undef, i1 true, i32 0
626  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
627  %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
628  ret <2 x i64> %v
629}
630
631define <2 x i64> @vsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
632; RV32-LABEL: vsub_vx_v2i64:
633; RV32:       # %bb.0:
634; RV32-NEXT:    addi sp, sp, -16
635; RV32-NEXT:    .cfi_def_cfa_offset 16
636; RV32-NEXT:    sw a1, 12(sp)
637; RV32-NEXT:    sw a0, 8(sp)
638; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
639; RV32-NEXT:    addi a0, sp, 8
640; RV32-NEXT:    vlse64.v v25, (a0), zero
641; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
642; RV32-NEXT:    vsub.vv v8, v8, v25, v0.t
643; RV32-NEXT:    addi sp, sp, 16
644; RV32-NEXT:    ret
645;
646; RV64-LABEL: vsub_vx_v2i64:
647; RV64:       # %bb.0:
648; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
649; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
650; RV64-NEXT:    ret
651  %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
652  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
653  %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
654  ret <2 x i64> %v
655}
656
657define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
658; RV32-LABEL: vsub_vx_v2i64_unmasked:
659; RV32:       # %bb.0:
660; RV32-NEXT:    addi sp, sp, -16
661; RV32-NEXT:    .cfi_def_cfa_offset 16
662; RV32-NEXT:    sw a1, 12(sp)
663; RV32-NEXT:    sw a0, 8(sp)
664; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
665; RV32-NEXT:    addi a0, sp, 8
666; RV32-NEXT:    vlse64.v v25, (a0), zero
667; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
668; RV32-NEXT:    vsub.vv v8, v8, v25
669; RV32-NEXT:    addi sp, sp, 16
670; RV32-NEXT:    ret
671;
672; RV64-LABEL: vsub_vx_v2i64_unmasked:
673; RV64:       # %bb.0:
674; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
675; RV64-NEXT:    vsub.vx v8, v8, a0
676; RV64-NEXT:    ret
677  %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
678  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
679  %head = insertelement <2 x i1> undef, i1 true, i32 0
680  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
681  %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
682  ret <2 x i64> %v
683}
684
685declare <4 x i64> @llvm.vp.sub.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
686
687define <4 x i64> @vsub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
688; CHECK-LABEL: vsub_vv_v4i64:
689; CHECK:       # %bb.0:
690; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
691; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
692; CHECK-NEXT:    ret
693  %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
694  ret <4 x i64> %v
695}
696
697define <4 x i64> @vsub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
698; CHECK-LABEL: vsub_vv_v4i64_unmasked:
699; CHECK:       # %bb.0:
700; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
701; CHECK-NEXT:    vsub.vv v8, v8, v10
702; CHECK-NEXT:    ret
703  %head = insertelement <4 x i1> undef, i1 true, i32 0
704  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
705  %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
706  ret <4 x i64> %v
707}
708
709define <4 x i64> @vsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
710; RV32-LABEL: vsub_vx_v4i64:
711; RV32:       # %bb.0:
712; RV32-NEXT:    addi sp, sp, -16
713; RV32-NEXT:    .cfi_def_cfa_offset 16
714; RV32-NEXT:    sw a1, 12(sp)
715; RV32-NEXT:    sw a0, 8(sp)
716; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
717; RV32-NEXT:    addi a0, sp, 8
718; RV32-NEXT:    vlse64.v v26, (a0), zero
719; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
720; RV32-NEXT:    vsub.vv v8, v8, v26, v0.t
721; RV32-NEXT:    addi sp, sp, 16
722; RV32-NEXT:    ret
723;
724; RV64-LABEL: vsub_vx_v4i64:
725; RV64:       # %bb.0:
726; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
727; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
728; RV64-NEXT:    ret
729  %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
730  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
731  %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
732  ret <4 x i64> %v
733}
734
735define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
736; RV32-LABEL: vsub_vx_v4i64_unmasked:
737; RV32:       # %bb.0:
738; RV32-NEXT:    addi sp, sp, -16
739; RV32-NEXT:    .cfi_def_cfa_offset 16
740; RV32-NEXT:    sw a1, 12(sp)
741; RV32-NEXT:    sw a0, 8(sp)
742; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
743; RV32-NEXT:    addi a0, sp, 8
744; RV32-NEXT:    vlse64.v v26, (a0), zero
745; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
746; RV32-NEXT:    vsub.vv v8, v8, v26
747; RV32-NEXT:    addi sp, sp, 16
748; RV32-NEXT:    ret
749;
750; RV64-LABEL: vsub_vx_v4i64_unmasked:
751; RV64:       # %bb.0:
752; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
753; RV64-NEXT:    vsub.vx v8, v8, a0
754; RV64-NEXT:    ret
755  %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
756  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
757  %head = insertelement <4 x i1> undef, i1 true, i32 0
758  %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
759  %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
760  ret <4 x i64> %v
761}
762
763declare <8 x i64> @llvm.vp.sub.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
764
765define <8 x i64> @vsub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
766; CHECK-LABEL: vsub_vv_v8i64:
767; CHECK:       # %bb.0:
768; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
769; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
770; CHECK-NEXT:    ret
771  %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
772  ret <8 x i64> %v
773}
774
775define <8 x i64> @vsub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
776; CHECK-LABEL: vsub_vv_v8i64_unmasked:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
779; CHECK-NEXT:    vsub.vv v8, v8, v12
780; CHECK-NEXT:    ret
781  %head = insertelement <8 x i1> undef, i1 true, i32 0
782  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
783  %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
784  ret <8 x i64> %v
785}
786
787define <8 x i64> @vsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
788; RV32-LABEL: vsub_vx_v8i64:
789; RV32:       # %bb.0:
790; RV32-NEXT:    addi sp, sp, -16
791; RV32-NEXT:    .cfi_def_cfa_offset 16
792; RV32-NEXT:    sw a1, 12(sp)
793; RV32-NEXT:    sw a0, 8(sp)
794; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
795; RV32-NEXT:    addi a0, sp, 8
796; RV32-NEXT:    vlse64.v v28, (a0), zero
797; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
798; RV32-NEXT:    vsub.vv v8, v8, v28, v0.t
799; RV32-NEXT:    addi sp, sp, 16
800; RV32-NEXT:    ret
801;
802; RV64-LABEL: vsub_vx_v8i64:
803; RV64:       # %bb.0:
804; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
805; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
806; RV64-NEXT:    ret
807  %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
808  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
809  %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
810  ret <8 x i64> %v
811}
812
813define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
814; RV32-LABEL: vsub_vx_v8i64_unmasked:
815; RV32:       # %bb.0:
816; RV32-NEXT:    addi sp, sp, -16
817; RV32-NEXT:    .cfi_def_cfa_offset 16
818; RV32-NEXT:    sw a1, 12(sp)
819; RV32-NEXT:    sw a0, 8(sp)
820; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
821; RV32-NEXT:    addi a0, sp, 8
822; RV32-NEXT:    vlse64.v v28, (a0), zero
823; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
824; RV32-NEXT:    vsub.vv v8, v8, v28
825; RV32-NEXT:    addi sp, sp, 16
826; RV32-NEXT:    ret
827;
828; RV64-LABEL: vsub_vx_v8i64_unmasked:
829; RV64:       # %bb.0:
830; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
831; RV64-NEXT:    vsub.vx v8, v8, a0
832; RV64-NEXT:    ret
833  %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
834  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
835  %head = insertelement <8 x i1> undef, i1 true, i32 0
836  %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
837  %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
838  ret <8 x i64> %v
839}
840
841declare <16 x i64> @llvm.vp.sub.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
842
843define <16 x i64> @vsub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
844; CHECK-LABEL: vsub_vv_v16i64:
845; CHECK:       # %bb.0:
846; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
847; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
848; CHECK-NEXT:    ret
849  %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
850  ret <16 x i64> %v
851}
852
853define <16 x i64> @vsub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
854; CHECK-LABEL: vsub_vv_v16i64_unmasked:
855; CHECK:       # %bb.0:
856; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
857; CHECK-NEXT:    vsub.vv v8, v8, v16
858; CHECK-NEXT:    ret
859  %head = insertelement <16 x i1> undef, i1 true, i32 0
860  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
861  %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
862  ret <16 x i64> %v
863}
864
865define <16 x i64> @vsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
866; RV32-LABEL: vsub_vx_v16i64:
867; RV32:       # %bb.0:
868; RV32-NEXT:    addi sp, sp, -16
869; RV32-NEXT:    .cfi_def_cfa_offset 16
870; RV32-NEXT:    sw a1, 12(sp)
871; RV32-NEXT:    sw a0, 8(sp)
872; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
873; RV32-NEXT:    addi a0, sp, 8
874; RV32-NEXT:    vlse64.v v16, (a0), zero
875; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
876; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
877; RV32-NEXT:    addi sp, sp, 16
878; RV32-NEXT:    ret
879;
880; RV64-LABEL: vsub_vx_v16i64:
881; RV64:       # %bb.0:
882; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
883; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
884; RV64-NEXT:    ret
885  %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
886  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
887  %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
888  ret <16 x i64> %v
889}
890
891define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
892; RV32-LABEL: vsub_vx_v16i64_unmasked:
893; RV32:       # %bb.0:
894; RV32-NEXT:    addi sp, sp, -16
895; RV32-NEXT:    .cfi_def_cfa_offset 16
896; RV32-NEXT:    sw a1, 12(sp)
897; RV32-NEXT:    sw a0, 8(sp)
898; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
899; RV32-NEXT:    addi a0, sp, 8
900; RV32-NEXT:    vlse64.v v16, (a0), zero
901; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
902; RV32-NEXT:    vsub.vv v8, v8, v16
903; RV32-NEXT:    addi sp, sp, 16
904; RV32-NEXT:    ret
905;
906; RV64-LABEL: vsub_vx_v16i64_unmasked:
907; RV64:       # %bb.0:
908; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
909; RV64-NEXT:    vsub.vx v8, v8, a0
910; RV64-NEXT:    ret
911  %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
912  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
913  %head = insertelement <16 x i1> undef, i1 true, i32 0
914  %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
915  %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
916  ret <16 x i64> %v
917}
918