1; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
2
3;;; Test vector broadcast intrinsic instructions
4;;;
5;;; Note:
6;;;   We test VLD*rrl, VLD*irl, VLD*rrl_v, and VLD*irl_v instructions.
7
8; Function Attrs: nounwind
9define void @vbrdd_vsl(double %0, i8* %1) {
10; CHECK-LABEL: vbrdd_vsl:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    lea %s2, 256
13; CHECK-NEXT:    lvl %s2
14; CHECK-NEXT:    vbrd %v0, %s0
15; CHECK-NEXT:    #APP
16; CHECK-NEXT:    vst %v0, 8, %s1
17; CHECK-NEXT:    #NO_APP
18; CHECK-NEXT:    b.l.t (, %s10)
19  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrdd.vsl(double %0, i32 256)
20  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
21  ret void
22}
23
24; Function Attrs: nounwind readnone
25declare <256 x double> @llvm.ve.vl.vbrdd.vsl(double, i32)
26
27; Function Attrs: nounwind
28define void @vbrdd_vsvl(double %0, i8* %1) {
29; CHECK-LABEL: vbrdd_vsvl:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    lea %s2, 256
32; CHECK-NEXT:    lvl %s2
33; CHECK-NEXT:    vld %v0, 8, %s1
34; CHECK-NEXT:    vbrd %v0, %s0
35; CHECK-NEXT:    vst %v0, 8, %s1
36; CHECK-NEXT:    b.l.t (, %s10)
37  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
38  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrdd.vsvl(double %0, <256 x double> %3, i32 256)
39  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
40  ret void
41}
42
43; Function Attrs: nounwind readonly
44declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
45
46; Function Attrs: nounwind readnone
47declare <256 x double> @llvm.ve.vl.vbrdd.vsvl(double, <256 x double>, i32)
48
49; Function Attrs: nounwind writeonly
50declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
51
52; Function Attrs: nounwind
53define void @vbrdd_vsmvl(double %0, i8* %1) {
54; CHECK-LABEL: vbrdd_vsmvl:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    lea %s2, 256
57; CHECK-NEXT:    lvl %s2
58; CHECK-NEXT:    vld %v0, 8, %s1
59; CHECK-NEXT:    lea.sl %s3, 1138753536
60; CHECK-NEXT:    fcmp.d %s4, %s0, %s3
61; CHECK-NEXT:    fsub.d %s3, %s0, %s3
62; CHECK-NEXT:    cvt.l.d.rz %s3, %s3
63; CHECK-NEXT:    xor %s3, %s3, (1)1
64; CHECK-NEXT:    cvt.l.d.rz %s5, %s0
65; CHECK-NEXT:    cmov.d.lt %s3, %s5, %s4
66; CHECK-NEXT:    lvm %vm1, 3, %s3
67; CHECK-NEXT:    vbrd %v0, %s0, %vm1
68; CHECK-NEXT:    vst %v0, 8, %s1
69; CHECK-NEXT:    b.l.t (, %s10)
70  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
71  %4 = fptoui double %0 to i64
72  %5 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %4)
73  %6 = tail call fast <256 x double> @llvm.ve.vl.vbrdd.vsmvl(double %0, <256 x i1> %5, <256 x double> %3, i32 256)
74  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %6, i64 8, i8* %1, i32 256)
75  ret void
76}
77
78; Function Attrs: nounwind readnone
79declare <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1>, i64, i64)
80
81; Function Attrs: nounwind readnone
82declare <256 x double> @llvm.ve.vl.vbrdd.vsmvl(double, <256 x i1>, <256 x double>, i32)
83
84; Function Attrs: nounwind
85define void @vbrdl_vsl(i64 %0, i8* %1) {
86; CHECK-LABEL: vbrdl_vsl:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    lea %s2, 256
89; CHECK-NEXT:    lvl %s2
90; CHECK-NEXT:    vbrd %v0, %s0
91; CHECK-NEXT:    #APP
92; CHECK-NEXT:    vst %v0, 8, %s1
93; CHECK-NEXT:    #NO_APP
94; CHECK-NEXT:    b.l.t (, %s10)
95  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsl(i64 %0, i32 256)
96  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
97  ret void
98}
99
100; Function Attrs: nounwind readnone
101declare <256 x double> @llvm.ve.vl.vbrdl.vsl(i64, i32)
102
103; Function Attrs: nounwind
104define void @vbrdl_vsvl(i64 %0, i8* %1) {
105; CHECK-LABEL: vbrdl_vsvl:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    lea %s2, 256
108; CHECK-NEXT:    lvl %s2
109; CHECK-NEXT:    vld %v0, 8, %s1
110; CHECK-NEXT:    vbrd %v0, %s0
111; CHECK-NEXT:    vst %v0, 8, %s1
112; CHECK-NEXT:    b.l.t (, %s10)
113  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
114  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsvl(i64 %0, <256 x double> %3, i32 256)
115  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
116  ret void
117}
118
119; Function Attrs: nounwind readnone
120declare <256 x double> @llvm.ve.vl.vbrdl.vsvl(i64, <256 x double>, i32)
121
122; Function Attrs: nounwind
123define void @vbrdl_vsmvl(i64 %0, i8* %1) {
124; CHECK-LABEL: vbrdl_vsmvl:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    lea %s2, 256
127; CHECK-NEXT:    lvl %s2
128; CHECK-NEXT:    vld %v0, 8, %s1
129; CHECK-NEXT:    lvm %vm1, 3, %s0
130; CHECK-NEXT:    vbrd %v0, %s0, %vm1
131; CHECK-NEXT:    vst %v0, 8, %s1
132; CHECK-NEXT:    b.l.t (, %s10)
133  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
134  %4 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %0)
135  %5 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsmvl(i64 %0, <256 x i1> %4, <256 x double> %3, i32 256)
136  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %5, i64 8, i8* %1, i32 256)
137  ret void
138}
139
140; Function Attrs: nounwind readnone
141declare <256 x double> @llvm.ve.vl.vbrdl.vsmvl(i64, <256 x i1>, <256 x double>, i32)
142
143; Function Attrs: nounwind
144define void @vbrdl_imm_vsl(i64 %0, i8* %1) {
145; CHECK-LABEL: vbrdl_imm_vsl:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    lea %s0, 256
148; CHECK-NEXT:    lvl %s0
149; CHECK-NEXT:    vbrd %v0, 31
150; CHECK-NEXT:    #APP
151; CHECK-NEXT:    vst %v0, 8, %s1
152; CHECK-NEXT:    #NO_APP
153; CHECK-NEXT:    b.l.t (, %s10)
154  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsl(i64 31, i32 256)
155  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
156  ret void
157}
158
159; Function Attrs: nounwind
160define void @vbrdl_imm_vsvl(i64 %0, i8* %1) {
161; CHECK-LABEL: vbrdl_imm_vsvl:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    lea %s0, 256
164; CHECK-NEXT:    lvl %s0
165; CHECK-NEXT:    vld %v0, 8, %s1
166; CHECK-NEXT:    vbrd %v0, 31
167; CHECK-NEXT:    vst %v0, 8, %s1
168; CHECK-NEXT:    b.l.t (, %s10)
169  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
170  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsvl(i64 31, <256 x double> %3, i32 256)
171  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
172  ret void
173}
174
175; Function Attrs: nounwind
176define void @vbrdl_imm_vsmvl(i64 %0, i8* %1) {
177; CHECK-LABEL: vbrdl_imm_vsmvl:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    lea %s2, 256
180; CHECK-NEXT:    lvl %s2
181; CHECK-NEXT:    vld %v0, 8, %s1
182; CHECK-NEXT:    lvm %vm1, 3, %s0
183; CHECK-NEXT:    vbrd %v0, 31, %vm1
184; CHECK-NEXT:    vst %v0, 8, %s1
185; CHECK-NEXT:    b.l.t (, %s10)
186  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
187  %4 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %0)
188  %5 = tail call fast <256 x double> @llvm.ve.vl.vbrdl.vsmvl(i64 31, <256 x i1> %4, <256 x double> %3, i32 256)
189  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %5, i64 8, i8* %1, i32 256)
190  ret void
191}
192
193; Function Attrs: nounwind
194define void @vbrds_vsl(float %0, i8* %1) {
195; CHECK-LABEL: vbrds_vsl:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    lea %s2, 256
198; CHECK-NEXT:    lvl %s2
199; CHECK-NEXT:    vbrdu %v0, %s0
200; CHECK-NEXT:    #APP
201; CHECK-NEXT:    vst %v0, 8, %s1
202; CHECK-NEXT:    #NO_APP
203; CHECK-NEXT:    b.l.t (, %s10)
204  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrds.vsl(float %0, i32 256)
205  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
206  ret void
207}
208
209; Function Attrs: nounwind readnone
210declare <256 x double> @llvm.ve.vl.vbrds.vsl(float, i32)
211
212; Function Attrs: nounwind
213define void @vbrds_vsvl(float %0, i8* %1) {
214; CHECK-LABEL: vbrds_vsvl:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    lea %s2, 256
217; CHECK-NEXT:    lvl %s2
218; CHECK-NEXT:    vld %v0, 8, %s1
219; CHECK-NEXT:    vbrdu %v0, %s0
220; CHECK-NEXT:    vst %v0, 8, %s1
221; CHECK-NEXT:    b.l.t (, %s10)
222  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
223  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrds.vsvl(float %0, <256 x double> %3, i32 256)
224  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
225  ret void
226}
227
228; Function Attrs: nounwind readnone
229declare <256 x double> @llvm.ve.vl.vbrds.vsvl(float, <256 x double>, i32)
230
231; Function Attrs: nounwind
232define void @vbrds_vsmvl(float %0, i8* %1) {
233; CHECK-LABEL: vbrds_vsmvl:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    lea %s2, 256
236; CHECK-NEXT:    lvl %s2
237; CHECK-NEXT:    vld %v0, 8, %s1
238; CHECK-NEXT:    lea.sl %s3, 1593835520
239; CHECK-NEXT:    fcmp.s %s4, %s0, %s3
240; CHECK-NEXT:    fsub.s %s3, %s0, %s3
241; CHECK-NEXT:    cvt.d.s %s3, %s3
242; CHECK-NEXT:    cvt.l.d.rz %s3, %s3
243; CHECK-NEXT:    xor %s3, %s3, (1)1
244; CHECK-NEXT:    cvt.d.s %s5, %s0
245; CHECK-NEXT:    cvt.l.d.rz %s5, %s5
246; CHECK-NEXT:    cmov.s.lt %s3, %s5, %s4
247; CHECK-NEXT:    lvm %vm1, 3, %s3
248; CHECK-NEXT:    vbrdu %v0, %s0, %vm1
249; CHECK-NEXT:    vst %v0, 8, %s1
250; CHECK-NEXT:    b.l.t (, %s10)
251  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
252  %4 = fptoui float %0 to i64
253  %5 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %4)
254  %6 = tail call fast <256 x double> @llvm.ve.vl.vbrds.vsmvl(float %0, <256 x i1> %5, <256 x double> %3, i32 256)
255  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %6, i64 8, i8* %1, i32 256)
256  ret void
257}
258
259; Function Attrs: nounwind readnone
260declare <256 x double> @llvm.ve.vl.vbrds.vsmvl(float, <256 x i1>, <256 x double>, i32)
261
262; Function Attrs: nounwind
263define void @vbrdw_vsl(i32 signext %0, i8* %1) {
264; CHECK-LABEL: vbrdw_vsl:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    and %s0, %s0, (32)0
267; CHECK-NEXT:    lea %s2, 256
268; CHECK-NEXT:    lvl %s2
269; CHECK-NEXT:    vbrdl %v0, %s0
270; CHECK-NEXT:    #APP
271; CHECK-NEXT:    vst %v0, 8, %s1
272; CHECK-NEXT:    #NO_APP
273; CHECK-NEXT:    b.l.t (, %s10)
274  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsl(i32 %0, i32 256)
275  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
276  ret void
277}
278
279; Function Attrs: nounwind readnone
280declare <256 x double> @llvm.ve.vl.vbrdw.vsl(i32, i32)
281
282; Function Attrs: nounwind
283define void @vbrdw_vsvl(i32 signext %0, i8* %1) {
284; CHECK-LABEL: vbrdw_vsvl:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    lea %s2, 256
287; CHECK-NEXT:    lvl %s2
288; CHECK-NEXT:    vld %v0, 8, %s1
289; CHECK-NEXT:    and %s0, %s0, (32)0
290; CHECK-NEXT:    vbrdl %v0, %s0
291; CHECK-NEXT:    vst %v0, 8, %s1
292; CHECK-NEXT:    b.l.t (, %s10)
293  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
294  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsvl(i32 %0, <256 x double> %3, i32 256)
295  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
296  ret void
297}
298
299; Function Attrs: nounwind readnone
300declare <256 x double> @llvm.ve.vl.vbrdw.vsvl(i32, <256 x double>, i32)
301
302; Function Attrs: nounwind
303define void @vbrdw_vsmvl(i32 signext %0, i8* %1) {
304; CHECK-LABEL: vbrdw_vsmvl:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    lea %s2, 256
307; CHECK-NEXT:    lvl %s2
308; CHECK-NEXT:    vld %v0, 8, %s1
309; CHECK-NEXT:    and %s3, %s0, (32)0
310; CHECK-NEXT:    lvm %vm1, 3, %s0
311; CHECK-NEXT:    vbrdl %v0, %s3, %vm1
312; CHECK-NEXT:    vst %v0, 8, %s1
313; CHECK-NEXT:    b.l.t (, %s10)
314  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
315  %4 = sext i32 %0 to i64
316  %5 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %4)
317  %6 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsmvl(i32 %0, <256 x i1> %5, <256 x double> %3, i32 256)
318  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %6, i64 8, i8* %1, i32 256)
319  ret void
320}
321
322; Function Attrs: nounwind readnone
323declare <256 x double> @llvm.ve.vl.vbrdw.vsmvl(i32, <256 x i1>, <256 x double>, i32)
324
325; Function Attrs: nounwind
326define void @vbrdw_imm_vsl(i32 signext %0, i8* %1) {
327; CHECK-LABEL: vbrdw_imm_vsl:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    lea %s0, 256
330; CHECK-NEXT:    lvl %s0
331; CHECK-NEXT:    vbrdl %v0, 31
332; CHECK-NEXT:    #APP
333; CHECK-NEXT:    vst %v0, 8, %s1
334; CHECK-NEXT:    #NO_APP
335; CHECK-NEXT:    b.l.t (, %s10)
336  %3 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsl(i32 31, i32 256)
337  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
338  ret void
339}
340
341; Function Attrs: nounwind
342define void @vbrdw_imm_vsvl(i32 signext %0, i8* %1) {
343; CHECK-LABEL: vbrdw_imm_vsvl:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    lea %s0, 256
346; CHECK-NEXT:    lvl %s0
347; CHECK-NEXT:    vld %v0, 8, %s1
348; CHECK-NEXT:    vbrdl %v0, 31
349; CHECK-NEXT:    vst %v0, 8, %s1
350; CHECK-NEXT:    b.l.t (, %s10)
351  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
352  %4 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsvl(i32 31, <256 x double> %3, i32 256)
353  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
354  ret void
355}
356
357; Function Attrs: nounwind
358define void @vbrdw_imm_vsmvl(i32 signext %0, i8* %1) {
359; CHECK-LABEL: vbrdw_imm_vsmvl:
360; CHECK:       # %bb.0:
361; CHECK-NEXT:    lea %s2, 256
362; CHECK-NEXT:    lvl %s2
363; CHECK-NEXT:    vld %v0, 8, %s1
364; CHECK-NEXT:    lvm %vm1, 3, %s0
365; CHECK-NEXT:    vbrdl %v0, 31, %vm1
366; CHECK-NEXT:    vst %v0, 8, %s1
367; CHECK-NEXT:    b.l.t (, %s10)
368  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
369  %4 = sext i32 %0 to i64
370  %5 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %4)
371  %6 = tail call fast <256 x double> @llvm.ve.vl.vbrdw.vsmvl(i32 31, <256 x i1> %5, <256 x double> %3, i32 256)
372  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %6, i64 8, i8* %1, i32 256)
373  ret void
374}
375
376; Function Attrs: nounwind
377define void @pvbrd_vsl(i64 %0, i8* %1) {
378; CHECK-LABEL: pvbrd_vsl:
379; CHECK:       # %bb.0:
380; CHECK-NEXT:    lea %s2, 256
381; CHECK-NEXT:    lvl %s2
382; CHECK-NEXT:    pvbrd %v0, %s0
383; CHECK-NEXT:    #APP
384; CHECK-NEXT:    vst %v0, 8, %s1
385; CHECK-NEXT:    #NO_APP
386; CHECK-NEXT:    b.l.t (, %s10)
387  %3 = tail call fast <256 x double> @llvm.ve.vl.pvbrd.vsl(i64 %0, i32 256)
388  tail call void asm sideeffect "vst ${0:v}, 8, $1", "v,r"(<256 x double> %3, i8* %1)
389  ret void
390}
391
392; Function Attrs: nounwind readnone
393declare <256 x double> @llvm.ve.vl.pvbrd.vsl(i64, i32)
394
395; Function Attrs: nounwind
396define void @pvbrd_vsvl(i64 %0, i8* %1) {
397; CHECK-LABEL: pvbrd_vsvl:
398; CHECK:       # %bb.0:
399; CHECK-NEXT:    lea %s2, 256
400; CHECK-NEXT:    lvl %s2
401; CHECK-NEXT:    vld %v0, 8, %s1
402; CHECK-NEXT:    pvbrd %v0, %s0
403; CHECK-NEXT:    vst %v0, 8, %s1
404; CHECK-NEXT:    b.l.t (, %s10)
405  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
406  %4 = tail call fast <256 x double> @llvm.ve.vl.pvbrd.vsvl(i64 %0, <256 x double> %3, i32 256)
407  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %1, i32 256)
408  ret void
409}
410
411; Function Attrs: nounwind readnone
412declare <256 x double> @llvm.ve.vl.pvbrd.vsvl(i64, <256 x double>, i32)
413
414; Function Attrs: nounwind
415define void @pvbrd_vsMvl(i64 %0, i8* %1) {
416; CHECK-LABEL: pvbrd_vsMvl:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    lea %s2, 256
419; CHECK-NEXT:    lvl %s2
420; CHECK-NEXT:    vld %v0, 8, %s1
421; CHECK-NEXT:    lvm %vm3, 1, %s0
422; CHECK-NEXT:    lvm %vm2, 2, %s0
423; CHECK-NEXT:    pvbrd %v0, %s0, %vm2
424; CHECK-NEXT:    vst %v0, 8, %s1
425; CHECK-NEXT:    b.l.t (, %s10)
426  %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %1, i32 256)
427  %4 = tail call <512 x i1> @llvm.ve.vl.lvm.MMss(<512 x i1> undef, i64 1, i64 %0)
428  %5 = tail call <512 x i1> @llvm.ve.vl.lvm.MMss(<512 x i1> %4, i64 6, i64 %0)
429  %6 = tail call fast <256 x double> @llvm.ve.vl.pvbrd.vsMvl(i64 %0, <512 x i1> %5, <256 x double> %3, i32 256)
430  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %6, i64 8, i8* %1, i32 256)
431  ret void
432}
433
434; Function Attrs: nounwind readnone
435declare <512 x i1> @llvm.ve.vl.lvm.MMss(<512 x i1>, i64, i64)
436
437; Function Attrs: nounwind readnone
438declare <256 x double> @llvm.ve.vl.pvbrd.vsMvl(i64, <512 x i1>, <256 x double>, i32)
439