1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s 3; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s 4target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" 5 6define void @foo1(i16* %p, i16* %r) nounwind { 7; CHECK-LABEL: foo1: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: lhz 3, 0(3) 10; CHECK-NEXT: sth 3, 0(4) 11; CHECK-NEXT: blr 12; 13; CHECK-VSX-LABEL: foo1: 14; CHECK-VSX: # %bb.0: # %entry 15; CHECK-VSX-NEXT: lhz 3, 0(3) 16; CHECK-VSX-NEXT: sth 3, 0(4) 17; CHECK-VSX-NEXT: blr 18entry: 19 %v = load i16, i16* %p, align 1 20 store i16 %v, i16* %r, align 1 21 ret void 22 23 24} 25 26define void @foo2(i32* %p, i32* %r) nounwind { 27; CHECK-LABEL: foo2: 28; CHECK: # %bb.0: # %entry 29; CHECK-NEXT: lwz 3, 0(3) 30; CHECK-NEXT: stw 3, 0(4) 31; CHECK-NEXT: blr 32; 33; CHECK-VSX-LABEL: foo2: 34; CHECK-VSX: # %bb.0: # %entry 35; CHECK-VSX-NEXT: lwz 3, 0(3) 36; CHECK-VSX-NEXT: stw 3, 0(4) 37; CHECK-VSX-NEXT: blr 38entry: 39 %v = load i32, i32* %p, align 1 40 store i32 %v, i32* %r, align 1 41 ret void 42 43 44} 45 46define void @foo3(i64* %p, i64* %r) nounwind { 47; CHECK-LABEL: foo3: 48; CHECK: # %bb.0: # %entry 49; CHECK-NEXT: ld 3, 0(3) 50; CHECK-NEXT: std 3, 0(4) 51; CHECK-NEXT: blr 52; 53; CHECK-VSX-LABEL: foo3: 54; CHECK-VSX: # %bb.0: # %entry 55; CHECK-VSX-NEXT: ld 3, 0(3) 56; CHECK-VSX-NEXT: std 3, 0(4) 57; CHECK-VSX-NEXT: blr 58entry: 59 %v = load i64, i64* %p, align 1 60 store i64 %v, i64* %r, align 1 61 ret void 62 63 64} 65 66define void @foo4(float* %p, float* %r) nounwind { 67; CHECK-LABEL: foo4: 68; CHECK: # %bb.0: # %entry 69; CHECK-NEXT: lfs 0, 0(3) 70; CHECK-NEXT: stfs 0, 0(4) 71; CHECK-NEXT: blr 72; 73; CHECK-VSX-LABEL: foo4: 74; CHECK-VSX: # %bb.0: # %entry 75; CHECK-VSX-NEXT: lfs 0, 0(3) 76; CHECK-VSX-NEXT: stfs 0, 0(4) 77; CHECK-VSX-NEXT: blr 78entry: 79 %v = load float, float* %p, align 1 80 store float %v, float* %r, align 1 81 ret void 82 83 84} 85 86define void @foo5(double* %p, double* %r) nounwind { 87; CHECK-LABEL: foo5: 88; CHECK: # %bb.0: # %entry 89; CHECK-NEXT: lfd 0, 0(3) 90; CHECK-NEXT: stfd 0, 0(4) 91; CHECK-NEXT: blr 92; 93; CHECK-VSX-LABEL: foo5: 94; CHECK-VSX: # %bb.0: # %entry 95; CHECK-VSX-NEXT: lfdx 0, 0, 3 96; CHECK-VSX-NEXT: stfdx 0, 0, 4 97; CHECK-VSX-NEXT: blr 98entry: 99 %v = load double, double* %p, align 1 100 store double %v, double* %r, align 1 101 ret void 102 103 104} 105 106define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind { 107; These loads and stores are legalized into aligned loads and stores 108; using aligned stack slots. 109; CHECK-LABEL: foo6: 110; CHECK: # %bb.0: # %entry 111; CHECK-NEXT: li 5, 15 112; CHECK-NEXT: lvsl 3, 0, 3 113; CHECK-NEXT: lvx 2, 3, 5 114; CHECK-NEXT: lvx 4, 0, 3 115; CHECK-NEXT: addi 3, 1, -16 116; CHECK-NEXT: vperm 2, 4, 2, 3 117; CHECK-NEXT: stvx 2, 0, 3 118; CHECK-NEXT: ld 3, -8(1) 119; CHECK-NEXT: std 3, 8(4) 120; CHECK-NEXT: ld 3, -16(1) 121; CHECK-NEXT: std 3, 0(4) 122; CHECK-NEXT: blr 123; 124; CHECK-VSX-LABEL: foo6: 125; CHECK-VSX: # %bb.0: # %entry 126; CHECK-VSX-NEXT: li 5, 15 127; CHECK-VSX-NEXT: lvsl 3, 0, 3 128; CHECK-VSX-NEXT: lvx 2, 3, 5 129; CHECK-VSX-NEXT: lvx 4, 0, 3 130; CHECK-VSX-NEXT: vperm 2, 4, 2, 3 131; CHECK-VSX-NEXT: stxvw4x 34, 0, 4 132; CHECK-VSX-NEXT: blr 133; For VSX on P7, unaligned loads and stores are preferable to aligned 134; stack slots, but lvsl/vperm is better still. (On P8 lxvw4x is preferable.) 135; Using unaligned stxvw4x is preferable on both machines. 136entry: 137 %v = load <4 x float>, <4 x float>* %p, align 1 138 store <4 x float> %v, <4 x float>* %r, align 1 139 ret void 140} 141 142