1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s 4; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s \ 6; RUN: --check-prefix=CHECK-BE 7 8; This test checks the PPCLoopInstrFormPrep pass supports the lxvp and stxvp 9; intrinsics so we generate more dq-form instructions instead of x-forms. 10 11%_elem_type_of_x = type <{ double }> 12%_elem_type_of_y = type <{ double }> 13 14define void @foo(i64* %.n, [0 x %_elem_type_of_x]* %.x, [0 x %_elem_type_of_y]* %.y, <2 x double>* %.sum) { 15; CHECK-LABEL: foo: 16; CHECK: # %bb.0: # %entry 17; CHECK-NEXT: ld r5, 0(r3) 18; CHECK-NEXT: cmpdi r5, 1 19; CHECK-NEXT: bltlr cr0 20; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph 21; CHECK-NEXT: addi r3, r4, 1 22; CHECK-NEXT: addi r4, r5, -1 23; CHECK-NEXT: lxv vs0, 0(r6) 24; CHECK-NEXT: rldicl r4, r4, 60, 4 25; CHECK-NEXT: addi r4, r4, 1 26; CHECK-NEXT: mtctr r4 27; CHECK-NEXT: .p2align 5 28; CHECK-NEXT: .LBB0_2: # %_loop_1_do_ 29; CHECK-NEXT: # 30; CHECK-NEXT: lxvp vsp2, 0(r3) 31; CHECK-NEXT: lxvp vsp4, 32(r3) 32; CHECK-NEXT: addi r3, r3, 128 33; CHECK-NEXT: xvadddp vs0, vs0, vs3 34; CHECK-NEXT: xvadddp vs0, vs0, vs2 35; CHECK-NEXT: xvadddp vs0, vs0, vs5 36; CHECK-NEXT: xvadddp vs0, vs0, vs4 37; CHECK-NEXT: bdnz .LBB0_2 38; CHECK-NEXT: # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge 39; CHECK-NEXT: stxv vs0, 0(r6) 40; CHECK-NEXT: blr 41; 42; CHECK-BE-LABEL: foo: 43; CHECK-BE: # %bb.0: # %entry 44; CHECK-BE-NEXT: ld r5, 0(r3) 45; CHECK-BE-NEXT: cmpdi r5, 1 46; CHECK-BE-NEXT: bltlr cr0 47; CHECK-BE-NEXT: # %bb.1: # %_loop_1_do_.lr.ph 48; CHECK-BE-NEXT: addi r3, r4, 1 49; CHECK-BE-NEXT: addi r4, r5, -1 50; CHECK-BE-NEXT: lxv vs0, 0(r6) 51; CHECK-BE-NEXT: rldicl r4, r4, 60, 4 52; CHECK-BE-NEXT: addi r4, r4, 1 53; CHECK-BE-NEXT: mtctr r4 54; CHECK-BE-NEXT: .p2align 5 55; CHECK-BE-NEXT: .LBB0_2: # %_loop_1_do_ 56; CHECK-BE-NEXT: # 57; CHECK-BE-NEXT: lxvp vsp2, 0(r3) 58; CHECK-BE-NEXT: lxvp vsp4, 32(r3) 59; CHECK-BE-NEXT: addi r3, r3, 128 60; CHECK-BE-NEXT: xvadddp vs0, vs0, vs2 61; CHECK-BE-NEXT: xvadddp vs0, vs0, vs3 62; CHECK-BE-NEXT: xvadddp vs0, vs0, vs4 63; CHECK-BE-NEXT: xvadddp vs0, vs0, vs5 64; CHECK-BE-NEXT: bdnz .LBB0_2 65; CHECK-BE-NEXT: # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge 66; CHECK-BE-NEXT: stxv vs0, 0(r6) 67; CHECK-BE-NEXT: blr 68entry: 69 %_val_n_2 = load i64, i64* %.n, align 8 70 %_grt_tmp7 = icmp slt i64 %_val_n_2, 1 71 br i1 %_grt_tmp7, label %_return_bb, label %_loop_1_do_.lr.ph 72 73_loop_1_do_.lr.ph: ; preds = %entry 74 %x_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1 75 %.sum.promoted = load <2 x double>, <2 x double>* %.sum, align 16 76 br label %_loop_1_do_ 77 78_loop_1_do_: ; preds = %_loop_1_do_.lr.ph, %_loop_1_do_ 79 %_val_sum_9 = phi <2 x double> [ %.sum.promoted, %_loop_1_do_.lr.ph ], [ %_add_tmp49, %_loop_1_do_ ] 80 %i.08 = phi i64 [ 1, %_loop_1_do_.lr.ph ], [ %_loop_1_update_loop_ix, %_loop_1_do_ ] 81 %x_ix_dim_0_6 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_5, i64 %i.08 82 %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_6 to i8* 83 %0 = getelementptr i8, i8* %x_ix_dim_0_, i64 1 84 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0) 85 %2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %1) 86 %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %2, 0 87 %.fca.1.extract2 = extractvalue { <16 x i8>, <16 x i8> } %2, 1 88 %3 = getelementptr i8, i8* %x_ix_dim_0_, i64 33 89 %4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3) 90 %5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %4) 91 %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 0 92 %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 1 93 %6 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double> 94 %_add_tmp23 = fadd contract <2 x double> %_val_sum_9, %6 95 %7 = bitcast <16 x i8> %.fca.1.extract2 to <2 x double> 96 %_add_tmp32 = fadd contract <2 x double> %_add_tmp23, %7 97 %8 = bitcast <16 x i8> %.fca.0.extract to <2 x double> 98 %_add_tmp40 = fadd contract <2 x double> %_add_tmp32, %8 99 %9 = bitcast <16 x i8> %.fca.1.extract to <2 x double> 100 %_add_tmp49 = fadd contract <2 x double> %_add_tmp40, %9 101 %_loop_1_update_loop_ix = add nuw nsw i64 %i.08, 16 102 %_grt_tmp = icmp sgt i64 %_loop_1_update_loop_ix, %_val_n_2 103 br i1 %_grt_tmp, label %_loop_1_loopHeader_._return_bb_crit_edge, label %_loop_1_do_ 104 105_loop_1_loopHeader_._return_bb_crit_edge: ; preds = %_loop_1_do_ 106 store <2 x double> %_add_tmp49, <2 x double>* %.sum, align 16 107 br label %_return_bb 108 109_return_bb: ; preds = %_loop_1_loopHeader_._return_bb_crit_edge, %entry 110 ret void 111} 112 113declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) 114declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>) 115