1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
6; CHECK-LABEL: shuffle_v4f16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    addi a0, zero, 11
9; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
10; CHECK-NEXT:    vmv.s.x v0, a0
11; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
12; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
13; CHECK-NEXT:    ret
14  %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
15  ret <4 x half> %s
16}
17
18define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
19; CHECK-LABEL: shuffle_v8f32:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    addi a0, zero, 236
22; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
23; CHECK-NEXT:    vmv.s.x v0, a0
24; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
25; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
26; CHECK-NEXT:    ret
27  %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
28  ret <8 x float> %s
29}
30
31define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
32; RV32-LABEL: shuffle_fv_v4f64:
33; RV32:       # %bb.0:
34; RV32-NEXT:    addi a0, zero, 9
35; RV32-NEXT:    lui a1, %hi(.LCPI2_0)
36; RV32-NEXT:    fld ft0, %lo(.LCPI2_0)(a1)
37; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
38; RV32-NEXT:    vmv.s.x v0, a0
39; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
40; RV32-NEXT:    vfmerge.vfm v8, v8, ft0, v0
41; RV32-NEXT:    ret
42;
43; RV64-LABEL: shuffle_fv_v4f64:
44; RV64:       # %bb.0:
45; RV64-NEXT:    lui a0, %hi(.LCPI2_0)
46; RV64-NEXT:    fld ft0, %lo(.LCPI2_0)(a0)
47; RV64-NEXT:    addi a0, zero, 9
48; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
49; RV64-NEXT:    vmv.s.x v0, a0
50; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
51; RV64-NEXT:    vfmerge.vfm v8, v8, ft0, v0
52; RV64-NEXT:    ret
53  %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
54  ret <4 x double> %s
55}
56
57define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
58; RV32-LABEL: shuffle_vf_v4f64:
59; RV32:       # %bb.0:
60; RV32-NEXT:    addi a0, zero, 6
61; RV32-NEXT:    lui a1, %hi(.LCPI3_0)
62; RV32-NEXT:    fld ft0, %lo(.LCPI3_0)(a1)
63; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
64; RV32-NEXT:    vmv.s.x v0, a0
65; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
66; RV32-NEXT:    vfmerge.vfm v8, v8, ft0, v0
67; RV32-NEXT:    ret
68;
69; RV64-LABEL: shuffle_vf_v4f64:
70; RV64:       # %bb.0:
71; RV64-NEXT:    lui a0, %hi(.LCPI3_0)
72; RV64-NEXT:    fld ft0, %lo(.LCPI3_0)(a0)
73; RV64-NEXT:    addi a0, zero, 6
74; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
75; RV64-NEXT:    vmv.s.x v0, a0
76; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
77; RV64-NEXT:    vfmerge.vfm v8, v8, ft0, v0
78; RV64-NEXT:    ret
79  %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
80  ret <4 x double> %s
81}
82
83define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
84; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
85; RV32:       # %bb.0:
86; RV32-NEXT:    lui a0, %hi(.LCPI4_0)
87; RV32-NEXT:    addi a0, a0, %lo(.LCPI4_0)
88; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
89; RV32-NEXT:    vle16.v v25, (a0)
90; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
91; RV32-NEXT:    vrgatherei16.vv v26, v8, v25
92; RV32-NEXT:    vmv2r.v v8, v26
93; RV32-NEXT:    ret
94;
95; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
96; RV64:       # %bb.0:
97; RV64-NEXT:    lui a0, %hi(.LCPI4_0)
98; RV64-NEXT:    addi a0, a0, %lo(.LCPI4_0)
99; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
100; RV64-NEXT:    vle64.v v28, (a0)
101; RV64-NEXT:    vrgather.vv v26, v8, v28
102; RV64-NEXT:    vmv2r.v v8, v26
103; RV64-NEXT:    ret
104  %s = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
105  ret <4 x double> %s
106}
107
108define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
109; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
110; RV32:       # %bb.0:
111; RV32-NEXT:    lui a0, %hi(.LCPI5_0)
112; RV32-NEXT:    addi a0, a0, %lo(.LCPI5_0)
113; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
114; RV32-NEXT:    vle16.v v25, (a0)
115; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
116; RV32-NEXT:    vrgatherei16.vv v26, v8, v25
117; RV32-NEXT:    vmv2r.v v8, v26
118; RV32-NEXT:    ret
119;
120; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
121; RV64:       # %bb.0:
122; RV64-NEXT:    lui a0, %hi(.LCPI5_0)
123; RV64-NEXT:    addi a0, a0, %lo(.LCPI5_0)
124; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
125; RV64-NEXT:    vle64.v v28, (a0)
126; RV64-NEXT:    vrgather.vv v26, v8, v28
127; RV64-NEXT:    vmv2r.v v8, v26
128; RV64-NEXT:    ret
129  %s = shufflevector <4 x double> undef, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
130  ret <4 x double> %s
131}
132
133define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
134; RV32-LABEL: vrgather_shuffle_vv_v4f64:
135; RV32:       # %bb.0:
136; RV32-NEXT:    lui a0, %hi(.LCPI6_0)
137; RV32-NEXT:    addi a0, a0, %lo(.LCPI6_0)
138; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
139; RV32-NEXT:    vle16.v v25, (a0)
140; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
141; RV32-NEXT:    vrgatherei16.vv v26, v8, v25
142; RV32-NEXT:    addi a0, zero, 8
143; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
144; RV32-NEXT:    vmv.s.x v0, a0
145; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
146; RV32-NEXT:    vmv.v.i v25, 1
147; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
148; RV32-NEXT:    vrgatherei16.vv v26, v10, v25, v0.t
149; RV32-NEXT:    vmv2r.v v8, v26
150; RV32-NEXT:    ret
151;
152; RV64-LABEL: vrgather_shuffle_vv_v4f64:
153; RV64:       # %bb.0:
154; RV64-NEXT:    lui a0, %hi(.LCPI6_0)
155; RV64-NEXT:    addi a0, a0, %lo(.LCPI6_0)
156; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
157; RV64-NEXT:    vle64.v v28, (a0)
158; RV64-NEXT:    vrgather.vv v26, v8, v28
159; RV64-NEXT:    addi a0, zero, 8
160; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
161; RV64-NEXT:    vmv.s.x v0, a0
162; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
163; RV64-NEXT:    vmv.v.i v28, 1
164; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
165; RV64-NEXT:    vrgather.vv v26, v10, v28, v0.t
166; RV64-NEXT:    vmv2r.v v8, v26
167; RV64-NEXT:    ret
168  %s = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
169  ret <4 x double> %s
170}
171
172define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
173; RV32-LABEL: vrgather_shuffle_xv_v4f64:
174; RV32:       # %bb.0:
175; RV32-NEXT:    addi a0, zero, 12
176; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
177; RV32-NEXT:    vmv.s.x v0, a0
178; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
179; RV32-NEXT:    vid.v v25
180; RV32-NEXT:    vrsub.vi v25, v25, 4
181; RV32-NEXT:    lui a0, %hi(.LCPI7_0)
182; RV32-NEXT:    addi a0, a0, %lo(.LCPI7_0)
183; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
184; RV32-NEXT:    vlse64.v v26, (a0), zero
185; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
186; RV32-NEXT:    vrgatherei16.vv v26, v8, v25, v0.t
187; RV32-NEXT:    vmv2r.v v8, v26
188; RV32-NEXT:    ret
189;
190; RV64-LABEL: vrgather_shuffle_xv_v4f64:
191; RV64:       # %bb.0:
192; RV64-NEXT:    addi a0, zero, 12
193; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
194; RV64-NEXT:    vmv.s.x v0, a0
195; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
196; RV64-NEXT:    lui a0, %hi(.LCPI7_0)
197; RV64-NEXT:    addi a0, a0, %lo(.LCPI7_0)
198; RV64-NEXT:    vlse64.v v26, (a0), zero
199; RV64-NEXT:    vid.v v28
200; RV64-NEXT:    vrsub.vi v28, v28, 4
201; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
202; RV64-NEXT:    vrgather.vv v26, v8, v28, v0.t
203; RV64-NEXT:    vmv2r.v v8, v26
204; RV64-NEXT:    ret
205  %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
206  ret <4 x double> %s
207}
208
209define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
210; RV32-LABEL: vrgather_shuffle_vx_v4f64:
211; RV32:       # %bb.0:
212; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
213; RV32-NEXT:    vid.v v25
214; RV32-NEXT:    addi a0, zero, 3
215; RV32-NEXT:    vmul.vx v25, v25, a0
216; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
217; RV32-NEXT:    vmv.s.x v0, a0
218; RV32-NEXT:    lui a0, %hi(.LCPI8_0)
219; RV32-NEXT:    addi a0, a0, %lo(.LCPI8_0)
220; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
221; RV32-NEXT:    vlse64.v v26, (a0), zero
222; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
223; RV32-NEXT:    vrgatherei16.vv v26, v8, v25, v0.t
224; RV32-NEXT:    vmv2r.v v8, v26
225; RV32-NEXT:    ret
226;
227; RV64-LABEL: vrgather_shuffle_vx_v4f64:
228; RV64:       # %bb.0:
229; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
230; RV64-NEXT:    vid.v v26
231; RV64-NEXT:    addi a0, zero, 3
232; RV64-NEXT:    vmul.vx v28, v26, a0
233; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
234; RV64-NEXT:    vmv.s.x v0, a0
235; RV64-NEXT:    lui a0, %hi(.LCPI8_0)
236; RV64-NEXT:    addi a0, a0, %lo(.LCPI8_0)
237; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
238; RV64-NEXT:    vlse64.v v26, (a0), zero
239; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
240; RV64-NEXT:    vrgather.vv v26, v8, v28, v0.t
241; RV64-NEXT:    vmv2r.v v8, v26
242; RV64-NEXT:    ret
243  %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
244  ret <4 x double> %s
245}
246