1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ 4; RUN: -check-prefix=P9 5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 6; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ 7; RUN: -check-prefix=P8 8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { 9; P9-LABEL: test: 10; P9: # %bb.0: # %entry 11; P9-NEXT: addi r4, r4, 24 12; P9-NEXT: lxvdsx vs0, 0, r4 13; P9-NEXT: stxv vs0, 0(r3) 14; P9-NEXT: blr 15; 16; P8-LABEL: test: 17; P8: # %bb.0: # %entry 18; P8-NEXT: addi r4, r4, 24 19; P8-NEXT: lxvdsx vs0, 0, r4 20; P8-NEXT: stxvd2x vs0, 0, r3 21; P8-NEXT: blr 22entry: 23 %arrayidx = getelementptr inbounds double, double* %a, i64 3 24 %0 = load double, double* %arrayidx, align 8 25 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0 26 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer 27 store <2 x double> %splat.splat.i, <2 x double>* %c, align 16 28 ret void 29} 30 31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr { 32; P9-LABEL: test2: 33; P9: # %bb.0: # %entry 34; P9-NEXT: addi r4, r4, 12 35; P9-NEXT: lxvwsx vs0, 0, r4 36; P9-NEXT: stxv vs0, 0(r3) 37; P9-NEXT: blr 38; 39; P8-LABEL: test2: 40; P8: # %bb.0: # %entry 41; P8-NEXT: addi r4, r4, 12 42; P8-NEXT: lfiwzx f0, 0, r4 43; P8-NEXT: xxspltw v2, vs0, 1 44; P8-NEXT: stvx v2, 0, r3 45; P8-NEXT: blr 46entry: 47 %arrayidx = getelementptr inbounds float, float* %a, i64 3 48 %0 = load float, float* %arrayidx, align 4 49 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0 50 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer 51 store <4 x float> %splat.splat.i, <4 x float>* %c, align 16 52 ret void 53} 54 55define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr { 56; P9-LABEL: test3: 57; P9: # %bb.0: # %entry 58; P9-NEXT: addi r4, r4, 12 59; P9-NEXT: lxvwsx vs0, 0, r4 60; P9-NEXT: stxv vs0, 0(r3) 61; P9-NEXT: blr 62; 63; P8-LABEL: test3: 64; P8: # %bb.0: # %entry 65; P8-NEXT: addi r4, r4, 12 66; P8-NEXT: lfiwzx f0, 0, r4 67; P8-NEXT: xxspltw v2, vs0, 1 68; P8-NEXT: stvx v2, 0, r3 69; P8-NEXT: blr 70entry: 71 %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 72 %0 = load i32, i32* %arrayidx, align 4 73 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0 74 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer 75 store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16 76 ret void 77} 78 79define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr { 80; P9-LABEL: test4: 81; P9: # %bb.0: # %entry 82; P9-NEXT: addi r4, r4, 24 83; P9-NEXT: lxvdsx vs0, 0, r4 84; P9-NEXT: stxv vs0, 0(r3) 85; P9-NEXT: blr 86; 87; P8-LABEL: test4: 88; P8: # %bb.0: # %entry 89; P8-NEXT: addi r4, r4, 24 90; P8-NEXT: lxvdsx vs0, 0, r4 91; P8-NEXT: stxvd2x vs0, 0, r3 92; P8-NEXT: blr 93entry: 94 %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 95 %0 = load i64, i64* %arrayidx, align 8 96 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0 97 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer 98 store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16 99 ret void 100} 101 102define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { 103; P9-LABEL: unadjusted_lxvwsx: 104; P9: # %bb.0: # %entry 105; P9-NEXT: lxvwsx v2, 0, r3 106; P9-NEXT: blr 107; 108; P8-LABEL: unadjusted_lxvwsx: 109; P8: # %bb.0: # %entry 110; P8-NEXT: lfiwzx f0, 0, r3 111; P8-NEXT: xxspltw v2, vs0, 1 112; P8-NEXT: blr 113 entry: 114 %0 = bitcast i32* %s to <4 x i8>* 115 %1 = load <4 x i8>, <4 x i8>* %0, align 4 116 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 117 ret <16 x i8> %2 118} 119 120define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { 121; P9-LABEL: adjusted_lxvwsx: 122; P9: # %bb.0: # %entry 123; P9-NEXT: addi r3, r3, 4 124; P9-NEXT: lxvwsx v2, 0, r3 125; P9-NEXT: blr 126; 127; P8-LABEL: adjusted_lxvwsx: 128; P8: # %bb.0: # %entry 129; P8-NEXT: lfdx f0, 0, r3 130; P8-NEXT: xxspltw v2, vs0, 0 131; P8-NEXT: blr 132 entry: 133 %0 = bitcast i64* %s to <8 x i8>* 134 %1 = load <8 x i8>, <8 x i8>* %0, align 8 135 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 136 ret <16 x i8> %2 137} 138 139define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 140; P9-LABEL: unadjusted_lxvwsx_v16i8: 141; P9: # %bb.0: # %entry 142; P9-NEXT: lxvwsx v2, 0, r3 143; P9-NEXT: blr 144; 145; P8-LABEL: unadjusted_lxvwsx_v16i8: 146; P8: # %bb.0: # %entry 147; P8-NEXT: lvx v2, 0, r3 148; P8-NEXT: xxspltw v2, v2, 3 149; P8-NEXT: blr 150 entry: 151 %0 = load <16 x i8>, <16 x i8>* %s, align 16 152 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 153 ret <16 x i8> %1 154} 155 156define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 157; P9-LABEL: adjusted_lxvwsx_v16i8: 158; P9: # %bb.0: # %entry 159; P9-NEXT: addi r3, r3, 4 160; P9-NEXT: lxvwsx v2, 0, r3 161; P9-NEXT: blr 162; 163; P8-LABEL: adjusted_lxvwsx_v16i8: 164; P8: # %bb.0: # %entry 165; P8-NEXT: lvx v2, 0, r3 166; P8-NEXT: xxspltw v2, v2, 2 167; P8-NEXT: blr 168 entry: 169 %0 = load <16 x i8>, <16 x i8>* %s, align 16 170 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 171 ret <16 x i8> %1 172} 173 174define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) { 175; P9-LABEL: adjusted_lxvwsx_v16i8_2: 176; P9: # %bb.0: # %entry 177; P9-NEXT: addi r3, r3, 8 178; P9-NEXT: lxvwsx v2, 0, r3 179; P9-NEXT: blr 180; 181; P8-LABEL: adjusted_lxvwsx_v16i8_2: 182; P8: # %bb.0: # %entry 183; P8-NEXT: lvx v2, 0, r3 184; P8-NEXT: xxspltw v2, v2, 1 185; P8-NEXT: blr 186 entry: 187 %0 = load <16 x i8>, <16 x i8>* %s, align 16 188 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 189 ret <16 x i8> %1 190} 191 192define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) { 193; P9-LABEL: adjusted_lxvwsx_v16i8_3: 194; P9: # %bb.0: # %entry 195; P9-NEXT: addi r3, r3, 12 196; P9-NEXT: lxvwsx v2, 0, r3 197; P9-NEXT: blr 198; 199; P8-LABEL: adjusted_lxvwsx_v16i8_3: 200; P8: # %bb.0: # %entry 201; P8-NEXT: lvx v2, 0, r3 202; P8-NEXT: xxspltw v2, v2, 0 203; P8-NEXT: blr 204 entry: 205 %0 = load <16 x i8>, <16 x i8>* %s, align 16 206 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> 207 ret <16 x i8> %1 208} 209 210define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) { 211; P9-LABEL: unadjusted_lxvdsx: 212; P9: # %bb.0: # %entry 213; P9-NEXT: lxvdsx v2, 0, r3 214; P9-NEXT: blr 215; 216; P8-LABEL: unadjusted_lxvdsx: 217; P8: # %bb.0: # %entry 218; P8-NEXT: lxvdsx v2, 0, r3 219; P8-NEXT: blr 220 entry: 221 %0 = bitcast i64* %s to <8 x i8>* 222 %1 = load <8 x i8>, <8 x i8>* %0, align 8 223 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 224 ret <16 x i8> %2 225} 226 227define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 228; P9-LABEL: unadjusted_lxvdsx_v16i8: 229; P9: # %bb.0: # %entry 230; P9-NEXT: lxvdsx v2, 0, r3 231; P9-NEXT: blr 232; 233; P8-LABEL: unadjusted_lxvdsx_v16i8: 234; P8: # %bb.0: # %entry 235; P8-NEXT: lxvdsx v2, 0, r3 236; P8-NEXT: blr 237 entry: 238 %0 = load <16 x i8>, <16 x i8>* %s, align 16 239 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 240 ret <16 x i8> %1 241} 242 243define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 244; P9-LABEL: adjusted_lxvdsx_v16i8: 245; P9: # %bb.0: # %entry 246; P9-NEXT: addi r3, r3, 8 247; P9-NEXT: lxvdsx v2, 0, r3 248; P9-NEXT: blr 249; 250; P8-LABEL: adjusted_lxvdsx_v16i8: 251; P8: # %bb.0: # %entry 252; P8-NEXT: addi r3, r3, 8 253; P8-NEXT: lxvdsx v2, 0, r3 254; P8-NEXT: blr 255 entry: 256 %0 = load <16 x i8>, <16 x i8>* %s, align 16 257 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 258 ret <16 x i8> %1 259} 260