1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE 10 11; Function Attrs: norecurse nounwind readonly 12define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { 13; P9LE-LABEL: s2v_test1: 14; P9LE: # %bb.0: # %entry 15; P9LE-NEXT: lwz r3, 0(r3) 16; P9LE-NEXT: mtfprwz f0, r3 17; P9LE-NEXT: xxinsertw v2, vs0, 12 18; P9LE-NEXT: blr 19; 20; P9BE-LABEL: s2v_test1: 21; P9BE: # %bb.0: # %entry 22; P9BE-NEXT: lwz r3, 0(r3) 23; P9BE-NEXT: mtfprwz f0, r3 24; P9BE-NEXT: xxinsertw v2, vs0, 0 25; P9BE-NEXT: blr 26; 27; P8LE-LABEL: s2v_test1: 28; P8LE: # %bb.0: # %entry 29; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha 30; P8LE-NEXT: lxsiwzx v4, 0, r3 31; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l 32; P8LE-NEXT: lvx v3, 0, r4 33; P8LE-NEXT: vperm v2, v2, v4, v3 34; P8LE-NEXT: blr 35; 36; P8BE-LABEL: s2v_test1: 37; P8BE: # %bb.0: # %entry 38; P8BE-NEXT: lfiwzx f0, 0, r3 39; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 40; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 41; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 42; P8BE-NEXT: blr 43entry: 44 %0 = load i32, i32* %int32, align 4 45 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 46 ret <4 x i32> %vecins 47} 48 49; Function Attrs: norecurse nounwind readonly 50define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { 51; P9LE-LABEL: s2v_test2: 52; P9LE: # %bb.0: # %entry 53; P9LE-NEXT: lwz r3, 4(r3) 54; P9LE-NEXT: mtfprwz f0, r3 55; P9LE-NEXT: xxinsertw v2, vs0, 12 56; P9LE-NEXT: blr 57; 58; P9BE-LABEL: s2v_test2: 59; P9BE: # %bb.0: # %entry 60; P9BE-NEXT: lwz r3, 4(r3) 61; P9BE-NEXT: mtfprwz f0, r3 62; P9BE-NEXT: xxinsertw v2, vs0, 0 63; P9BE-NEXT: blr 64; 65; P8LE-LABEL: s2v_test2: 66; P8LE: # %bb.0: # %entry 67; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha 68; P8LE-NEXT: addi r3, r3, 4 69; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l 70; P8LE-NEXT: lxsiwzx v4, 0, r3 71; P8LE-NEXT: lvx v3, 0, r4 72; P8LE-NEXT: vperm v2, v2, v4, v3 73; P8LE-NEXT: blr 74; 75; P8BE-LABEL: s2v_test2: 76; P8BE: # %bb.0: # %entry 77; P8BE-NEXT: addi r3, r3, 4 78; P8BE-NEXT: lfiwzx f0, 0, r3 79; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 80; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 81; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 82; P8BE-NEXT: blr 83entry: 84 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 85 %0 = load i32, i32* %arrayidx, align 4 86 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 87 ret <4 x i32> %vecins 88} 89 90; Function Attrs: norecurse nounwind readonly 91define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { 92; P9LE-LABEL: s2v_test3: 93; P9LE: # %bb.0: # %entry 94; P9LE-NEXT: sldi r4, r7, 2 95; P9LE-NEXT: lwzx r3, r3, r4 96; P9LE-NEXT: mtfprwz f0, r3 97; P9LE-NEXT: xxinsertw v2, vs0, 12 98; P9LE-NEXT: blr 99; 100; P9BE-LABEL: s2v_test3: 101; P9BE: # %bb.0: # %entry 102; P9BE-NEXT: sldi r4, r7, 2 103; P9BE-NEXT: lwzx r3, r3, r4 104; P9BE-NEXT: mtfprwz f0, r3 105; P9BE-NEXT: xxinsertw v2, vs0, 0 106; P9BE-NEXT: blr 107; 108; P8LE-LABEL: s2v_test3: 109; P8LE: # %bb.0: # %entry 110; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha 111; P8LE-NEXT: sldi r5, r7, 2 112; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l 113; P8LE-NEXT: lxsiwzx v3, r3, r5 114; P8LE-NEXT: lvx v4, 0, r4 115; P8LE-NEXT: vperm v2, v2, v3, v4 116; P8LE-NEXT: blr 117; 118; P8BE-LABEL: s2v_test3: 119; P8BE: # %bb.0: # %entry 120; P8BE-NEXT: sldi r4, r7, 2 121; P8BE-NEXT: lfiwzx f0, r3, r4 122; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 123; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 124; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 125; P8BE-NEXT: blr 126entry: 127 %idxprom = sext i32 %Idx to i64 128 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom 129 %0 = load i32, i32* %arrayidx, align 4 130 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 131 ret <4 x i32> %vecins 132} 133 134; Function Attrs: norecurse nounwind readonly 135define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { 136; P9LE-LABEL: s2v_test4: 137; P9LE: # %bb.0: # %entry 138; P9LE-NEXT: lwz r3, 4(r3) 139; P9LE-NEXT: mtfprwz f0, r3 140; P9LE-NEXT: xxinsertw v2, vs0, 12 141; P9LE-NEXT: blr 142; 143; P9BE-LABEL: s2v_test4: 144; P9BE: # %bb.0: # %entry 145; P9BE-NEXT: lwz r3, 4(r3) 146; P9BE-NEXT: mtfprwz f0, r3 147; P9BE-NEXT: xxinsertw v2, vs0, 0 148; P9BE-NEXT: blr 149; 150; P8LE-LABEL: s2v_test4: 151; P8LE: # %bb.0: # %entry 152; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha 153; P8LE-NEXT: addi r3, r3, 4 154; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l 155; P8LE-NEXT: lxsiwzx v4, 0, r3 156; P8LE-NEXT: lvx v3, 0, r4 157; P8LE-NEXT: vperm v2, v2, v4, v3 158; P8LE-NEXT: blr 159; 160; P8BE-LABEL: s2v_test4: 161; P8BE: # %bb.0: # %entry 162; P8BE-NEXT: addi r3, r3, 4 163; P8BE-NEXT: lfiwzx f0, 0, r3 164; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 165; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 166; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 167; P8BE-NEXT: blr 168entry: 169 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 170 %0 = load i32, i32* %arrayidx, align 4 171 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 172 ret <4 x i32> %vecins 173} 174 175; Function Attrs: norecurse nounwind readonly 176define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { 177; P9LE-LABEL: s2v_test5: 178; P9LE: # %bb.0: # %entry 179; P9LE-NEXT: lwz r3, 0(r5) 180; P9LE-NEXT: mtfprwz f0, r3 181; P9LE-NEXT: xxinsertw v2, vs0, 12 182; P9LE-NEXT: blr 183; 184; P9BE-LABEL: s2v_test5: 185; P9BE: # %bb.0: # %entry 186; P9BE-NEXT: lwz r3, 0(r5) 187; P9BE-NEXT: mtfprwz f0, r3 188; P9BE-NEXT: xxinsertw v2, vs0, 0 189; P9BE-NEXT: blr 190; 191; P8LE-LABEL: s2v_test5: 192; P8LE: # %bb.0: # %entry 193; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 194; P8LE-NEXT: lxsiwzx v4, 0, r5 195; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l 196; P8LE-NEXT: lvx v3, 0, r3 197; P8LE-NEXT: vperm v2, v2, v4, v3 198; P8LE-NEXT: blr 199; 200; P8BE-LABEL: s2v_test5: 201; P8BE: # %bb.0: # %entry 202; P8BE-NEXT: lfiwzx f0, 0, r5 203; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 204; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 205; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 206; P8BE-NEXT: blr 207entry: 208 %0 = load i32, i32* %ptr1, align 4 209 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 210 ret <4 x i32> %vecins 211} 212 213; Function Attrs: norecurse nounwind readonly 214define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { 215; P9LE-LABEL: s2v_test_f1: 216; P9LE: # %bb.0: # %entry 217; P9LE-NEXT: lfs f0, 0(r3) 218; P9LE-NEXT: xscvdpspn vs0, f0 219; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 220; P9LE-NEXT: xxinsertw v2, vs0, 12 221; P9LE-NEXT: blr 222; 223; P9BE-LABEL: s2v_test_f1: 224; P9BE: # %bb.0: # %entry 225; P9BE-NEXT: lfs f0, 0(r3) 226; P9BE-NEXT: xscvdpspn vs0, f0 227; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 228; P9BE-NEXT: xxinsertw v2, vs0, 0 229; P9BE-NEXT: blr 230; 231; P8LE-LABEL: s2v_test_f1: 232; P8LE: # %bb.0: # %entry 233; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha 234; P8LE-NEXT: lxsiwzx v4, 0, r3 235; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l 236; P8LE-NEXT: lvx v3, 0, r4 237; P8LE-NEXT: vperm v2, v2, v4, v3 238; P8LE-NEXT: blr 239; 240; P8BE-LABEL: s2v_test_f1: 241; P8BE: # %bb.0: # %entry 242; P8BE-NEXT: lfiwzx f0, 0, r3 243; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 244; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 245; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 246; P8BE-NEXT: blr 247entry: 248 %0 = load float, float* %f64, align 4 249 %vecins = insertelement <4 x float> %vec, float %0, i32 0 250 ret <4 x float> %vecins 251} 252 253; Function Attrs: norecurse nounwind readonly 254define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { 255; P9LE-LABEL: s2v_test_f2: 256; P9LE: # %bb.0: # %entry 257; P9LE-NEXT: addi r3, r3, 4 258; P9LE-NEXT: vmrglw v2, v2, v2 259; P9LE-NEXT: lxsiwzx v3, 0, r3 260; P9LE-NEXT: vmrghw v2, v2, v3 261; P9LE-NEXT: blr 262; 263; P9BE-LABEL: s2v_test_f2: 264; P9BE: # %bb.0: # %entry 265; P9BE-NEXT: addi r3, r3, 4 266; P9BE-NEXT: xxspltw v2, v2, 1 267; P9BE-NEXT: lfiwzx f0, 0, r3 268; P9BE-NEXT: xxsldwi v3, f0, f0, 1 269; P9BE-NEXT: vmrghw v2, v3, v2 270; P9BE-NEXT: blr 271; 272; P8LE-LABEL: s2v_test_f2: 273; P8LE: # %bb.0: # %entry 274; P8LE-NEXT: vmrglw v2, v2, v2 275; P8LE-NEXT: addi r3, r3, 4 276; P8LE-NEXT: lxsiwzx v3, 0, r3 277; P8LE-NEXT: vmrghw v2, v2, v3 278; P8LE-NEXT: blr 279; 280; P8BE-LABEL: s2v_test_f2: 281; P8BE: # %bb.0: # %entry 282; P8BE-NEXT: addi r3, r3, 4 283; P8BE-NEXT: xxspltw v2, v2, 1 284; P8BE-NEXT: lfiwzx f0, 0, r3 285; P8BE-NEXT: xxsldwi v3, f0, f0, 1 286; P8BE-NEXT: vmrghw v2, v3, v2 287; P8BE-NEXT: blr 288entry: 289 %arrayidx = getelementptr inbounds float, float* %f64, i64 1 290 %0 = load float, float* %arrayidx, align 8 291 %vecins = insertelement <2 x float> %vec, float %0, i32 0 292 ret <2 x float> %vecins 293} 294 295; Function Attrs: norecurse nounwind readonly 296define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { 297; P9LE-LABEL: s2v_test_f3: 298; P9LE: # %bb.0: # %entry 299; P9LE-NEXT: sldi r4, r7, 2 300; P9LE-NEXT: vmrglw v2, v2, v2 301; P9LE-NEXT: lxsiwzx v3, r3, r4 302; P9LE-NEXT: vmrghw v2, v2, v3 303; P9LE-NEXT: blr 304; 305; P9BE-LABEL: s2v_test_f3: 306; P9BE: # %bb.0: # %entry 307; P9BE-NEXT: sldi r4, r7, 2 308; P9BE-NEXT: xxspltw v2, v2, 1 309; P9BE-NEXT: lfiwzx f0, r3, r4 310; P9BE-NEXT: xxsldwi v3, f0, f0, 1 311; P9BE-NEXT: vmrghw v2, v3, v2 312; P9BE-NEXT: blr 313; 314; P8LE-LABEL: s2v_test_f3: 315; P8LE: # %bb.0: # %entry 316; P8LE-NEXT: vmrglw v2, v2, v2 317; P8LE-NEXT: sldi r4, r7, 2 318; P8LE-NEXT: lxsiwzx v3, r3, r4 319; P8LE-NEXT: vmrghw v2, v2, v3 320; P8LE-NEXT: blr 321; 322; P8BE-LABEL: s2v_test_f3: 323; P8BE: # %bb.0: # %entry 324; P8BE-NEXT: sldi r4, r7, 2 325; P8BE-NEXT: xxspltw v2, v2, 1 326; P8BE-NEXT: lfiwzx f0, r3, r4 327; P8BE-NEXT: xxsldwi v3, f0, f0, 1 328; P8BE-NEXT: vmrghw v2, v3, v2 329; P8BE-NEXT: blr 330entry: 331 %idxprom = sext i32 %Idx to i64 332 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom 333 %0 = load float, float* %arrayidx, align 8 334 %vecins = insertelement <2 x float> %vec, float %0, i32 0 335 ret <2 x float> %vecins 336} 337 338; Function Attrs: norecurse nounwind readonly 339define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { 340; P9LE-LABEL: s2v_test_f4: 341; P9LE: # %bb.0: # %entry 342; P9LE-NEXT: addi r3, r3, 4 343; P9LE-NEXT: vmrglw v2, v2, v2 344; P9LE-NEXT: lxsiwzx v3, 0, r3 345; P9LE-NEXT: vmrghw v2, v2, v3 346; P9LE-NEXT: blr 347; 348; P9BE-LABEL: s2v_test_f4: 349; P9BE: # %bb.0: # %entry 350; P9BE-NEXT: addi r3, r3, 4 351; P9BE-NEXT: xxspltw v2, v2, 1 352; P9BE-NEXT: lfiwzx f0, 0, r3 353; P9BE-NEXT: xxsldwi v3, f0, f0, 1 354; P9BE-NEXT: vmrghw v2, v3, v2 355; P9BE-NEXT: blr 356; 357; P8LE-LABEL: s2v_test_f4: 358; P8LE: # %bb.0: # %entry 359; P8LE-NEXT: vmrglw v2, v2, v2 360; P8LE-NEXT: addi r3, r3, 4 361; P8LE-NEXT: lxsiwzx v3, 0, r3 362; P8LE-NEXT: vmrghw v2, v2, v3 363; P8LE-NEXT: blr 364; 365; P8BE-LABEL: s2v_test_f4: 366; P8BE: # %bb.0: # %entry 367; P8BE-NEXT: addi r3, r3, 4 368; P8BE-NEXT: xxspltw v2, v2, 1 369; P8BE-NEXT: lfiwzx f0, 0, r3 370; P8BE-NEXT: xxsldwi v3, f0, f0, 1 371; P8BE-NEXT: vmrghw v2, v3, v2 372; P8BE-NEXT: blr 373entry: 374 %arrayidx = getelementptr inbounds float, float* %f64, i64 1 375 %0 = load float, float* %arrayidx, align 8 376 %vecins = insertelement <2 x float> %vec, float %0, i32 0 377 ret <2 x float> %vecins 378} 379 380; Function Attrs: norecurse nounwind readonly 381define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { 382; P9LE-LABEL: s2v_test_f5: 383; P9LE: # %bb.0: # %entry 384; P9LE-NEXT: lxsiwzx v3, 0, r5 385; P9LE-NEXT: vmrglw v2, v2, v2 386; P9LE-NEXT: vmrghw v2, v2, v3 387; P9LE-NEXT: blr 388; 389; P9BE-LABEL: s2v_test_f5: 390; P9BE: # %bb.0: # %entry 391; P9BE-NEXT: lfiwzx f0, 0, r5 392; P9BE-NEXT: xxspltw v2, v2, 1 393; P9BE-NEXT: xxsldwi v3, f0, f0, 1 394; P9BE-NEXT: vmrghw v2, v3, v2 395; P9BE-NEXT: blr 396; 397; P8LE-LABEL: s2v_test_f5: 398; P8LE: # %bb.0: # %entry 399; P8LE-NEXT: vmrglw v2, v2, v2 400; P8LE-NEXT: lxsiwzx v3, 0, r5 401; P8LE-NEXT: vmrghw v2, v2, v3 402; P8LE-NEXT: blr 403; 404; P8BE-LABEL: s2v_test_f5: 405; P8BE: # %bb.0: # %entry 406; P8BE-NEXT: lfiwzx f0, 0, r5 407; P8BE-NEXT: xxspltw v2, v2, 1 408; P8BE-NEXT: xxsldwi v3, f0, f0, 1 409; P8BE-NEXT: vmrghw v2, v3, v2 410; P8BE-NEXT: blr 411entry: 412 %0 = load float, float* %ptr1, align 8 413 %vecins = insertelement <2 x float> %vec, float %0, i32 0 414 ret <2 x float> %vecins 415} 416 417