1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { 6; CHECK-LABEL: shuffle_v4f16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: addi a0, zero, 11 9; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 10; CHECK-NEXT: vmv.s.x v0, a0 11; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 12; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 13; CHECK-NEXT: ret 14 %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 15 ret <4 x half> %s 16} 17 18define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) { 19; CHECK-LABEL: shuffle_v8f32: 20; CHECK: # %bb.0: 21; CHECK-NEXT: addi a0, zero, 236 22; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 23; CHECK-NEXT: vmv.s.x v0, a0 24; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 25; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 26; CHECK-NEXT: ret 27 %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7> 28 ret <8 x float> %s 29} 30 31define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { 32; RV32-LABEL: shuffle_fv_v4f64: 33; RV32: # %bb.0: 34; RV32-NEXT: addi a0, zero, 9 35; RV32-NEXT: lui a1, %hi(.LCPI2_0) 36; RV32-NEXT: fld ft0, %lo(.LCPI2_0)(a1) 37; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 38; RV32-NEXT: vmv.s.x v0, a0 39; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu 40; RV32-NEXT: vfmerge.vfm v8, v8, ft0, v0 41; RV32-NEXT: ret 42; 43; RV64-LABEL: shuffle_fv_v4f64: 44; RV64: # %bb.0: 45; RV64-NEXT: lui a0, %hi(.LCPI2_0) 46; RV64-NEXT: fld ft0, %lo(.LCPI2_0)(a0) 47; RV64-NEXT: addi a0, zero, 9 48; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 49; RV64-NEXT: vmv.s.x v0, a0 50; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 51; RV64-NEXT: vfmerge.vfm v8, v8, ft0, v0 52; RV64-NEXT: ret 53 %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 54 ret <4 x double> %s 55} 56 57define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { 58; RV32-LABEL: shuffle_vf_v4f64: 59; RV32: # %bb.0: 60; RV32-NEXT: addi a0, zero, 6 61; RV32-NEXT: lui a1, %hi(.LCPI3_0) 62; RV32-NEXT: fld ft0, %lo(.LCPI3_0)(a1) 63; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 64; RV32-NEXT: vmv.s.x v0, a0 65; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu 66; RV32-NEXT: vfmerge.vfm v8, v8, ft0, v0 67; RV32-NEXT: ret 68; 69; RV64-LABEL: shuffle_vf_v4f64: 70; RV64: # %bb.0: 71; RV64-NEXT: lui a0, %hi(.LCPI3_0) 72; RV64-NEXT: fld ft0, %lo(.LCPI3_0)(a0) 73; RV64-NEXT: addi a0, zero, 6 74; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 75; RV64-NEXT: vmv.s.x v0, a0 76; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 77; RV64-NEXT: vfmerge.vfm v8, v8, ft0, v0 78; RV64-NEXT: ret 79 %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 80 ret <4 x double> %s 81} 82 83define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { 84; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64: 85; RV32: # %bb.0: 86; RV32-NEXT: lui a0, %hi(.LCPI4_0) 87; RV32-NEXT: addi a0, a0, %lo(.LCPI4_0) 88; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 89; RV32-NEXT: vle16.v v25, (a0) 90; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 91; RV32-NEXT: vrgatherei16.vv v26, v8, v25 92; RV32-NEXT: vmv2r.v v8, v26 93; RV32-NEXT: ret 94; 95; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64: 96; RV64: # %bb.0: 97; RV64-NEXT: lui a0, %hi(.LCPI4_0) 98; RV64-NEXT: addi a0, a0, %lo(.LCPI4_0) 99; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 100; RV64-NEXT: vle64.v v28, (a0) 101; RV64-NEXT: vrgather.vv v26, v8, v28 102; RV64-NEXT: vmv2r.v v8, v26 103; RV64-NEXT: ret 104 %s = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 1> 105 ret <4 x double> %s 106} 107 108define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { 109; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64: 110; RV32: # %bb.0: 111; RV32-NEXT: lui a0, %hi(.LCPI5_0) 112; RV32-NEXT: addi a0, a0, %lo(.LCPI5_0) 113; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 114; RV32-NEXT: vle16.v v25, (a0) 115; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 116; RV32-NEXT: vrgatherei16.vv v26, v8, v25 117; RV32-NEXT: vmv2r.v v8, v26 118; RV32-NEXT: ret 119; 120; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64: 121; RV64: # %bb.0: 122; RV64-NEXT: lui a0, %hi(.LCPI5_0) 123; RV64-NEXT: addi a0, a0, %lo(.LCPI5_0) 124; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 125; RV64-NEXT: vle64.v v28, (a0) 126; RV64-NEXT: vrgather.vv v26, v8, v28 127; RV64-NEXT: vmv2r.v v8, v26 128; RV64-NEXT: ret 129 %s = shufflevector <4 x double> undef, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5> 130 ret <4 x double> %s 131} 132 133define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { 134; RV32-LABEL: vrgather_shuffle_vv_v4f64: 135; RV32: # %bb.0: 136; RV32-NEXT: lui a0, %hi(.LCPI6_0) 137; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) 138; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 139; RV32-NEXT: vle16.v v25, (a0) 140; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 141; RV32-NEXT: vrgatherei16.vv v26, v8, v25 142; RV32-NEXT: addi a0, zero, 8 143; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 144; RV32-NEXT: vmv.s.x v0, a0 145; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 146; RV32-NEXT: vmv.v.i v25, 1 147; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu 148; RV32-NEXT: vrgatherei16.vv v26, v10, v25, v0.t 149; RV32-NEXT: vmv2r.v v8, v26 150; RV32-NEXT: ret 151; 152; RV64-LABEL: vrgather_shuffle_vv_v4f64: 153; RV64: # %bb.0: 154; RV64-NEXT: lui a0, %hi(.LCPI6_0) 155; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) 156; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 157; RV64-NEXT: vle64.v v28, (a0) 158; RV64-NEXT: vrgather.vv v26, v8, v28 159; RV64-NEXT: addi a0, zero, 8 160; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 161; RV64-NEXT: vmv.s.x v0, a0 162; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 163; RV64-NEXT: vmv.v.i v28, 1 164; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu 165; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t 166; RV64-NEXT: vmv2r.v v8, v26 167; RV64-NEXT: ret 168 %s = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5> 169 ret <4 x double> %s 170} 171 172define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { 173; RV32-LABEL: vrgather_shuffle_xv_v4f64: 174; RV32: # %bb.0: 175; RV32-NEXT: addi a0, zero, 12 176; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 177; RV32-NEXT: vmv.s.x v0, a0 178; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 179; RV32-NEXT: vid.v v25 180; RV32-NEXT: vrsub.vi v25, v25, 4 181; RV32-NEXT: lui a0, %hi(.LCPI7_0) 182; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) 183; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 184; RV32-NEXT: vlse64.v v26, (a0), zero 185; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu 186; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t 187; RV32-NEXT: vmv2r.v v8, v26 188; RV32-NEXT: ret 189; 190; RV64-LABEL: vrgather_shuffle_xv_v4f64: 191; RV64: # %bb.0: 192; RV64-NEXT: addi a0, zero, 12 193; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 194; RV64-NEXT: vmv.s.x v0, a0 195; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 196; RV64-NEXT: lui a0, %hi(.LCPI7_0) 197; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) 198; RV64-NEXT: vlse64.v v26, (a0), zero 199; RV64-NEXT: vid.v v28 200; RV64-NEXT: vrsub.vi v28, v28, 4 201; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu 202; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t 203; RV64-NEXT: vmv2r.v v8, v26 204; RV64-NEXT: ret 205 %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 206 ret <4 x double> %s 207} 208 209define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { 210; RV32-LABEL: vrgather_shuffle_vx_v4f64: 211; RV32: # %bb.0: 212; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 213; RV32-NEXT: vid.v v25 214; RV32-NEXT: addi a0, zero, 3 215; RV32-NEXT: vmul.vx v25, v25, a0 216; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 217; RV32-NEXT: vmv.s.x v0, a0 218; RV32-NEXT: lui a0, %hi(.LCPI8_0) 219; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0) 220; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu 221; RV32-NEXT: vlse64.v v26, (a0), zero 222; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu 223; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t 224; RV32-NEXT: vmv2r.v v8, v26 225; RV32-NEXT: ret 226; 227; RV64-LABEL: vrgather_shuffle_vx_v4f64: 228; RV64: # %bb.0: 229; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 230; RV64-NEXT: vid.v v26 231; RV64-NEXT: addi a0, zero, 3 232; RV64-NEXT: vmul.vx v28, v26, a0 233; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 234; RV64-NEXT: vmv.s.x v0, a0 235; RV64-NEXT: lui a0, %hi(.LCPI8_0) 236; RV64-NEXT: addi a0, a0, %lo(.LCPI8_0) 237; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu 238; RV64-NEXT: vlse64.v v26, (a0), zero 239; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu 240; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t 241; RV64-NEXT: vmv2r.v v8, v26 242; RV64-NEXT: ret 243 %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 244 ret <4 x double> %s 245} 246