1; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
2target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
3; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
4target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
5
6define void @foo1(i16* %p, i16* %r) nounwind {
7entry:
8  %v = load i16* %p, align 1
9  store i16 %v, i16* %r, align 1
10  ret void
11
12; CHECK: @foo1
13; CHECK: lhz
14; CHECK: sth
15
16; CHECK-VSX: @foo1
17; CHECK-VSX: lhz
18; CHECK-VSX: sth
19}
20
21define void @foo2(i32* %p, i32* %r) nounwind {
22entry:
23  %v = load i32* %p, align 1
24  store i32 %v, i32* %r, align 1
25  ret void
26
27; CHECK: @foo2
28; CHECK: lwz
29; CHECK: stw
30
31; CHECK-VSX: @foo2
32; CHECK-VSX: lwz
33; CHECK-VSX: stw
34}
35
36define void @foo3(i64* %p, i64* %r) nounwind {
37entry:
38  %v = load i64* %p, align 1
39  store i64 %v, i64* %r, align 1
40  ret void
41
42; CHECK: @foo3
43; CHECK: ld
44; CHECK: std
45
46; CHECK-VSX: @foo3
47; CHECK-VSX: ld
48; CHECK-VSX: std
49}
50
51define void @foo4(float* %p, float* %r) nounwind {
52entry:
53  %v = load float* %p, align 1
54  store float %v, float* %r, align 1
55  ret void
56
57; CHECK: @foo4
58; CHECK: lfs
59; CHECK: stfs
60
61; CHECK-VSX: @foo4
62; CHECK-VSX: lfs
63; CHECK-VSX: stfs
64}
65
66define void @foo5(double* %p, double* %r) nounwind {
67entry:
68  %v = load double* %p, align 1
69  store double %v, double* %r, align 1
70  ret void
71
72; CHECK: @foo5
73; CHECK: lfd
74; CHECK: stfd
75
76; CHECK-VSX: @foo5
77; CHECK-VSX: lxsdx
78; CHECK-VSX: stxsdx
79}
80
81define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
82entry:
83  %v = load <4 x float>* %p, align 1
84  store <4 x float> %v, <4 x float>* %r, align 1
85  ret void
86
87; These loads and stores are legalized into aligned loads and stores
88; using aligned stack slots.
89; CHECK: @foo6
90; CHECK-DAG: ld
91; CHECK-DAG: ld
92; CHECK-DAG: stdx
93; CHECK: stdx
94
95; For VSX on P7, unaligned loads and stores are preferable to aligned
96; stack slots, but lvsl/vperm is better still.  (On P8 lxvw4x is preferable.)
97; Using unaligned stxvw4x is preferable on both machines.
98; CHECK-VSX: @foo6
99; CHECK-VSX-DAG: lvsl
100; CHECK-VSX-DAG: lvx
101; CHECK-VSX-DAG: lvx
102; CHECK-VSX: vperm
103; CHECK-VSX: stxvw4x
104}
105
106