1; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
2
3;;; Test vector merge intrinsic instructions
4;;;
5;;; Note:
6;;;   We test VMRG*vvml, VMRG*vvml_v, VMRG*rvml, VMRG*rvml_v, VMRG*ivml, and
7;;;   VMRG*ivml_v instructions.
8
9; Function Attrs: nounwind readnone
10define fastcc <256 x double> @vmrg_vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2) {
11; CHECK-LABEL: vmrg_vvvml:
12; CHECK:       # %bb.0:
13; CHECK-NEXT:    lea %s0, 256
14; CHECK-NEXT:    lvl %s0
15; CHECK-NEXT:    vmrg %v0, %v0, %v1, %vm1
16; CHECK-NEXT:    b.l.t (, %s10)
17  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2, i32 256)
18  ret <256 x double> %4
19}
20
21; Function Attrs: nounwind readnone
22declare <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double>, <256 x double>, <256 x i1>, i32)
23
24; Function Attrs: nounwind readnone
25define fastcc <256 x double> @vmrg_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
26; CHECK-LABEL: vmrg_vvvmvl:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    lea %s0, 128
29; CHECK-NEXT:    lvl %s0
30; CHECK-NEXT:    vmrg %v2, %v0, %v1, %vm1
31; CHECK-NEXT:    lea %s16, 256
32; CHECK-NEXT:    lvl %s16
33; CHECK-NEXT:    vor %v0, (0)1, %v2
34; CHECK-NEXT:    b.l.t (, %s10)
35  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
36  ret <256 x double> %5
37}
38
39; Function Attrs: nounwind readnone
40declare <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
41
42; Function Attrs: nounwind readnone
43define fastcc <256 x double> @vmrg_vsvml(i64 %0, <256 x double> %1, <256 x i1> %2) {
44; CHECK-LABEL: vmrg_vsvml:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    lea %s1, 256
47; CHECK-NEXT:    lvl %s1
48; CHECK-NEXT:    vmrg %v0, %s0, %v0, %vm1
49; CHECK-NEXT:    b.l.t (, %s10)
50  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 %0, <256 x double> %1, <256 x i1> %2, i32 256)
51  ret <256 x double> %4
52}
53
54; Function Attrs: nounwind readnone
55declare <256 x double> @llvm.ve.vl.vmrg.vsvml(i64, <256 x double>, <256 x i1>, i32)
56
57; Function Attrs: nounwind readnone
58define fastcc <256 x double> @vmrg_vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
59; CHECK-LABEL: vmrg_vsvmvl:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    lea %s1, 128
62; CHECK-NEXT:    lvl %s1
63; CHECK-NEXT:    vmrg %v1, %s0, %v0, %vm1
64; CHECK-NEXT:    lea %s16, 256
65; CHECK-NEXT:    lvl %s16
66; CHECK-NEXT:    vor %v0, (0)1, %v1
67; CHECK-NEXT:    b.l.t (, %s10)
68  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
69  ret <256 x double> %5
70}
71
72; Function Attrs: nounwind readnone
73declare <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64, <256 x double>, <256 x i1>, <256 x double>, i32)
74
75; Function Attrs: nounwind readnone
76define fastcc <256 x double> @vmrg_vsvml_imm(<256 x double> %0, <256 x i1> %1) {
77; CHECK-LABEL: vmrg_vsvml_imm:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    lea %s0, 256
80; CHECK-NEXT:    lvl %s0
81; CHECK-NEXT:    vmrg %v0, 8, %v0, %vm1
82; CHECK-NEXT:    b.l.t (, %s10)
83  %3 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 8, <256 x double> %0, <256 x i1> %1, i32 256)
84  ret <256 x double> %3
85}
86
87; Function Attrs: nounwind readnone
88define fastcc <256 x double> @vmrg_vsvmvl_imm(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
89; CHECK-LABEL: vmrg_vsvmvl_imm:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    lea %s0, 128
92; CHECK-NEXT:    lvl %s0
93; CHECK-NEXT:    vmrg %v1, 8, %v0, %vm1
94; CHECK-NEXT:    lea %s16, 256
95; CHECK-NEXT:    lvl %s16
96; CHECK-NEXT:    vor %v0, (0)1, %v1
97; CHECK-NEXT:    b.l.t (, %s10)
98  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 8, <256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
99  ret <256 x double> %4
100}
101
102; Function Attrs: nounwind readnone
103define fastcc <256 x double> @vmrgw_vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2) {
104; CHECK-LABEL: vmrgw_vvvMl:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    lea %s0, 256
107; CHECK-NEXT:    lvl %s0
108; CHECK-NEXT:    vmrg.w %v0, %v0, %v1, %vm2
109; CHECK-NEXT:    b.l.t (, %s10)
110  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, i32 256)
111  ret <256 x double> %4
112}
113
114; Function Attrs: nounwind readnone
115declare <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double>, <256 x double>, <512 x i1>, i32)
116
117; Function Attrs: nounwind readnone
118define fastcc <256 x double> @vmrgw_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
119; CHECK-LABEL: vmrgw_vvvMvl:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    lea %s0, 128
122; CHECK-NEXT:    lvl %s0
123; CHECK-NEXT:    vmrg.w %v2, %v0, %v1, %vm2
124; CHECK-NEXT:    lea %s16, 256
125; CHECK-NEXT:    lvl %s16
126; CHECK-NEXT:    vor %v0, (0)1, %v2
127; CHECK-NEXT:    b.l.t (, %s10)
128  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
129  ret <256 x double> %5
130}
131
132; Function Attrs: nounwind readnone
133declare <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
134
135; Function Attrs: nounwind readnone
136define fastcc <256 x double> @vmrgw_vsvMl(i32 signext %0, <256 x double> %1, <512 x i1> %2) {
137; CHECK-LABEL: vmrgw_vsvMl:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    and %s0, %s0, (32)0
140; CHECK-NEXT:    lea %s1, 256
141; CHECK-NEXT:    lvl %s1
142; CHECK-NEXT:    vmrg.w %v0, %s0, %v0, %vm2
143; CHECK-NEXT:    b.l.t (, %s10)
144  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32 %0, <256 x double> %1, <512 x i1> %2, i32 256)
145  ret <256 x double> %4
146}
147
148; Function Attrs: nounwind readnone
149declare <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32, <256 x double>, <512 x i1>, i32)
150
151; Function Attrs: nounwind readnone
152define fastcc <256 x double> @vmrgw_vsvMvl(i32 signext %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
153; CHECK-LABEL: vmrgw_vsvMvl:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    and %s0, %s0, (32)0
156; CHECK-NEXT:    lea %s1, 128
157; CHECK-NEXT:    lvl %s1
158; CHECK-NEXT:    vmrg.w %v1, %s0, %v0, %vm2
159; CHECK-NEXT:    lea %s16, 256
160; CHECK-NEXT:    lvl %s16
161; CHECK-NEXT:    vor %v0, (0)1, %v1
162; CHECK-NEXT:    b.l.t (, %s10)
163  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
164  ret <256 x double> %5
165}
166
167; Function Attrs: nounwind readnone
168declare <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32, <256 x double>, <512 x i1>, <256 x double>, i32)
169