1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ 4; RUN: -check-prefix=P9 5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 6; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ 7; RUN: -check-prefix=P8 8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { 9; P9-LABEL: test: 10; P9: # %bb.0: # %entry 11; P9-NEXT: addi r4, r4, 24 12; P9-NEXT: lxvdsx vs0, 0, r4 13; P9-NEXT: stxv vs0, 0(r3) 14; P9-NEXT: blr 15; 16; P8-LABEL: test: 17; P8: # %bb.0: # %entry 18; P8-NEXT: addi r4, r4, 24 19; P8-NEXT: lxvdsx vs0, 0, r4 20; P8-NEXT: stxvd2x vs0, 0, r3 21; P8-NEXT: blr 22entry: 23 %arrayidx = getelementptr inbounds double, double* %a, i64 3 24 %0 = load double, double* %arrayidx, align 8 25 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0 26 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer 27 store <2 x double> %splat.splat.i, <2 x double>* %c, align 16 28 ret void 29} 30 31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr { 32; P9-LABEL: test2: 33; P9: # %bb.0: # %entry 34; P9-NEXT: addi r4, r4, 12 35; P9-NEXT: lxvwsx vs0, 0, r4 36; P9-NEXT: stxv vs0, 0(r3) 37; P9-NEXT: blr 38; 39; P8-LABEL: test2: 40; P8: # %bb.0: # %entry 41; P8-NEXT: addi r4, r4, 12 42; P8-NEXT: lfiwzx f0, 0, r4 43; P8-NEXT: xxpermdi vs0, f0, f0, 2 44; P8-NEXT: xxspltw v2, vs0, 3 45; P8-NEXT: stvx v2, 0, r3 46; P8-NEXT: blr 47entry: 48 %arrayidx = getelementptr inbounds float, float* %a, i64 3 49 %0 = load float, float* %arrayidx, align 4 50 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0 51 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer 52 store <4 x float> %splat.splat.i, <4 x float>* %c, align 16 53 ret void 54} 55 56define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr { 57; P9-LABEL: test3: 58; P9: # %bb.0: # %entry 59; P9-NEXT: addi r4, r4, 12 60; P9-NEXT: lxvwsx vs0, 0, r4 61; P9-NEXT: stxv vs0, 0(r3) 62; P9-NEXT: blr 63; 64; P8-LABEL: test3: 65; P8: # %bb.0: # %entry 66; P8-NEXT: addi r4, r4, 12 67; P8-NEXT: lfiwzx f0, 0, r4 68; P8-NEXT: xxpermdi vs0, f0, f0, 2 69; P8-NEXT: xxspltw v2, vs0, 3 70; P8-NEXT: stvx v2, 0, r3 71; P8-NEXT: blr 72entry: 73 %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 74 %0 = load i32, i32* %arrayidx, align 4 75 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0 76 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer 77 store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16 78 ret void 79} 80 81define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr { 82; P9-LABEL: test4: 83; P9: # %bb.0: # %entry 84; P9-NEXT: addi r4, r4, 24 85; P9-NEXT: lxvdsx vs0, 0, r4 86; P9-NEXT: stxv vs0, 0(r3) 87; P9-NEXT: blr 88; 89; P8-LABEL: test4: 90; P8: # %bb.0: # %entry 91; P8-NEXT: addi r4, r4, 24 92; P8-NEXT: lxvdsx vs0, 0, r4 93; P8-NEXT: stxvd2x vs0, 0, r3 94; P8-NEXT: blr 95entry: 96 %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 97 %0 = load i64, i64* %arrayidx, align 8 98 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0 99 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer 100 store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16 101 ret void 102} 103 104define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { 105; P9-LABEL: unadjusted_lxvwsx: 106; P9: # %bb.0: # %entry 107; P9-NEXT: lxvwsx v2, 0, r3 108; P9-NEXT: blr 109; 110; P8-LABEL: unadjusted_lxvwsx: 111; P8: # %bb.0: # %entry 112; P8-NEXT: lfiwzx f0, 0, r3 113; P8-NEXT: xxpermdi vs0, f0, f0, 2 114; P8-NEXT: xxspltw v2, vs0, 3 115; P8-NEXT: blr 116 entry: 117 %0 = bitcast i32* %s to <4 x i8>* 118 %1 = load <4 x i8>, <4 x i8>* %0, align 4 119 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 120 ret <16 x i8> %2 121} 122 123define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { 124; P9-LABEL: adjusted_lxvwsx: 125; P9: # %bb.0: # %entry 126; P9-NEXT: addi r3, r3, 4 127; P9-NEXT: lxvwsx v2, 0, r3 128; P9-NEXT: blr 129; 130; P8-LABEL: adjusted_lxvwsx: 131; P8: # %bb.0: # %entry 132; P8-NEXT: ld r3, 0(r3) 133; P8-NEXT: mtvsrd f0, r3 134; P8-NEXT: xxswapd v2, vs0 135; P8-NEXT: xxspltw v2, v2, 2 136; P8-NEXT: blr 137 entry: 138 %0 = bitcast i64* %s to <8 x i8>* 139 %1 = load <8 x i8>, <8 x i8>* %0, align 8 140 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 141 ret <16 x i8> %2 142} 143 144define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 145; P9-LABEL: unadjusted_lxvwsx_v16i8: 146; P9: # %bb.0: # %entry 147; P9-NEXT: lxvwsx v2, 0, r3 148; P9-NEXT: blr 149; 150; P8-LABEL: unadjusted_lxvwsx_v16i8: 151; P8: # %bb.0: # %entry 152; P8-NEXT: lvx v2, 0, r3 153; P8-NEXT: xxspltw v2, v2, 3 154; P8-NEXT: blr 155 entry: 156 %0 = load <16 x i8>, <16 x i8>* %s, align 16 157 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 158 ret <16 x i8> %1 159} 160 161define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 162; P9-LABEL: adjusted_lxvwsx_v16i8: 163; P9: # %bb.0: # %entry 164; P9-NEXT: addi r3, r3, 4 165; P9-NEXT: lxvwsx v2, 0, r3 166; P9-NEXT: blr 167; 168; P8-LABEL: adjusted_lxvwsx_v16i8: 169; P8: # %bb.0: # %entry 170; P8-NEXT: lvx v2, 0, r3 171; P8-NEXT: xxspltw v2, v2, 2 172; P8-NEXT: blr 173 entry: 174 %0 = load <16 x i8>, <16 x i8>* %s, align 16 175 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 176 ret <16 x i8> %1 177} 178 179define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) { 180; P9-LABEL: adjusted_lxvwsx_v16i8_2: 181; P9: # %bb.0: # %entry 182; P9-NEXT: addi r3, r3, 8 183; P9-NEXT: lxvwsx v2, 0, r3 184; P9-NEXT: blr 185; 186; P8-LABEL: adjusted_lxvwsx_v16i8_2: 187; P8: # %bb.0: # %entry 188; P8-NEXT: lvx v2, 0, r3 189; P8-NEXT: xxspltw v2, v2, 1 190; P8-NEXT: blr 191 entry: 192 %0 = load <16 x i8>, <16 x i8>* %s, align 16 193 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 194 ret <16 x i8> %1 195} 196 197define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) { 198; P9-LABEL: adjusted_lxvwsx_v16i8_3: 199; P9: # %bb.0: # %entry 200; P9-NEXT: addi r3, r3, 12 201; P9-NEXT: lxvwsx v2, 0, r3 202; P9-NEXT: blr 203; 204; P8-LABEL: adjusted_lxvwsx_v16i8_3: 205; P8: # %bb.0: # %entry 206; P8-NEXT: lvx v2, 0, r3 207; P8-NEXT: xxspltw v2, v2, 0 208; P8-NEXT: blr 209 entry: 210 %0 = load <16 x i8>, <16 x i8>* %s, align 16 211 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> 212 ret <16 x i8> %1 213} 214 215define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) { 216; P9-LABEL: unadjusted_lxvdsx: 217; P9: # %bb.0: # %entry 218; P9-NEXT: lxvdsx v2, 0, r3 219; P9-NEXT: blr 220; 221; P8-LABEL: unadjusted_lxvdsx: 222; P8: # %bb.0: # %entry 223; P8-NEXT: lxvdsx v2, 0, r3 224; P8-NEXT: blr 225 entry: 226 %0 = bitcast i64* %s to <8 x i8>* 227 %1 = load <8 x i8>, <8 x i8>* %0, align 8 228 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 229 ret <16 x i8> %2 230} 231 232define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 233; P9-LABEL: unadjusted_lxvdsx_v16i8: 234; P9: # %bb.0: # %entry 235; P9-NEXT: lxvdsx v2, 0, r3 236; P9-NEXT: blr 237; 238; P8-LABEL: unadjusted_lxvdsx_v16i8: 239; P8: # %bb.0: # %entry 240; P8-NEXT: lxvdsx v2, 0, r3 241; P8-NEXT: blr 242 entry: 243 %0 = load <16 x i8>, <16 x i8>* %s, align 16 244 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 245 ret <16 x i8> %1 246} 247 248define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 249; P9-LABEL: adjusted_lxvdsx_v16i8: 250; P9: # %bb.0: # %entry 251; P9-NEXT: addi r3, r3, 8 252; P9-NEXT: lxvdsx v2, 0, r3 253; P9-NEXT: blr 254; 255; P8-LABEL: adjusted_lxvdsx_v16i8: 256; P8: # %bb.0: # %entry 257; P8-NEXT: addi r3, r3, 8 258; P8-NEXT: lxvdsx v2, 0, r3 259; P8-NEXT: blr 260 entry: 261 %0 = load <16 x i8>, <16 x i8>* %s, align 16 262 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 263 ret <16 x i8> %1 264} 265