1; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
2; instructions as expensive. If lowering is improved the cost model needs to
3; change.
4; RUN: opt < %s  -cost-model -analyze -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
5%T0_5 = type <8 x i8>
6%T1_5 = type <8 x i32>
7; CHECK-LABEL: func_cvt5:
8define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
9; CHECK: vmovl.s8
10; CHECK: vmovl.s16
11; CHECK: vmovl.s16
12  %v0 = load %T0_5, %T0_5* %loadaddr
13; COST: func_cvt5
14; COST: cost of 3 {{.*}} sext
15  %r = sext %T0_5 %v0 to %T1_5
16  store %T1_5 %r, %T1_5* %storeaddr
17  ret void
18}
19;; We currently estimate the cost of this instruction as expensive. If lowering
20;; is improved the cost needs to change.
21%TA0_5 = type <8 x i8>
22%TA1_5 = type <8 x i32>
23; CHECK-LABEL: func_cvt1:
24define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
25; CHECK: vmovl.u8
26; CHECK: vmovl.u16
27; CHECK: vmovl.u16
28  %v0 = load %TA0_5, %TA0_5* %loadaddr
29; COST: func_cvt1
30; COST: cost of 3 {{.*}} zext
31  %r = zext %TA0_5 %v0 to %TA1_5
32  store %TA1_5 %r, %TA1_5* %storeaddr
33  ret void
34}
35
36%T0_51 = type <8 x i32>
37%T1_51 = type <8 x i8>
38; CHECK-LABEL: func_cvt51:
39define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
40; CHECK: vmovn.i32
41; CHECK: vmovn.i32
42; CHECK: vmovn.i16
43  %v0 = load %T0_51, %T0_51* %loadaddr
44; COST: func_cvt51
45; COST: cost of 3 {{.*}} trunc
46  %r = trunc %T0_51 %v0 to %T1_51
47  store %T1_51 %r, %T1_51* %storeaddr
48  ret void
49}
50
51%TT0_5 = type <16 x i8>
52%TT1_5 = type <16 x i32>
53; CHECK-LABEL: func_cvt52:
54define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
55; CHECK: vmovl.s16
56; CHECK: vmovl.s16
57; CHECK: vmovl.s16
58; CHECK: vmovl.s16
59  %v0 = load %TT0_5, %TT0_5* %loadaddr
60; COST: func_cvt52
61; COST: cost of 6 {{.*}} sext
62  %r = sext %TT0_5 %v0 to %TT1_5
63  store %TT1_5 %r, %TT1_5* %storeaddr
64  ret void
65}
66;; We currently estimate the cost of this instruction as expensive. If lowering
67;; is improved the cost needs to change.
68%TTA0_5 = type <16 x i8>
69%TTA1_5 = type <16 x i32>
70; CHECK-LABEL: func_cvt12:
71define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
72; CHECK: vmovl.u16
73; CHECK: vmovl.u16
74; CHECK: vmovl.u16
75; CHECK: vmovl.u16
76  %v0 = load %TTA0_5, %TTA0_5* %loadaddr
77; COST: func_cvt12
78; COST: cost of 6 {{.*}} zext
79  %r = zext %TTA0_5 %v0 to %TTA1_5
80  store %TTA1_5 %r, %TTA1_5* %storeaddr
81  ret void
82}
83
84%TT0_51 = type <16 x i32>
85%TT1_51 = type <16 x i8>
86; CHECK-LABEL: func_cvt512:
87define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
88; CHECK: vmovn.i32
89; CHECK: vmovn.i32
90; CHECK: vmovn.i32
91; CHECK: vmovn.i32
92; CHECK: vmovn.i16
93; CHECK: vmovn.i16
94  %v0 = load %TT0_51, %TT0_51* %loadaddr
95; COST: func_cvt512
96; COST: cost of 6 {{.*}} trunc
97  %r = trunc %TT0_51 %v0 to %TT1_51
98  store %TT1_51 %r, %TT1_51* %storeaddr
99  ret void
100}
101
102; CHECK-LABEL: sext_v4i16_v4i64:
103define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
104; CHECK: vmovl.s32
105; CHECK: vmovl.s32
106  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
107; COST: sext_v4i16_v4i64
108; COST: cost of 3 {{.*}} sext
109  %r = sext <4 x i16> %v0 to <4 x i64>
110  store <4 x i64> %r, <4 x i64>* %storeaddr
111  ret void
112}
113
114; CHECK-LABEL: zext_v4i16_v4i64:
115define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
116; CHECK: vmovl.u32
117; CHECK: vmovl.u32
118  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
119; COST: zext_v4i16_v4i64
120; COST: cost of 3 {{.*}} zext
121  %r = zext <4 x i16> %v0 to <4 x i64>
122  store <4 x i64> %r, <4 x i64>* %storeaddr
123  ret void
124}
125
126; CHECK-LABEL: sext_v8i16_v8i64:
127define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
128; CHECK: vmovl.s32
129; CHECK: vmovl.s32
130; CHECK: vmovl.s32
131; CHECK: vmovl.s32
132  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
133; COST: sext_v8i16_v8i64
134; COST: cost of 6 {{.*}} sext
135  %r = sext <8 x i16> %v0 to <8 x i64>
136  store <8 x i64> %r, <8 x i64>* %storeaddr
137  ret void
138}
139
140; CHECK-LABEL: zext_v8i16_v8i64:
141define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
142; CHECK: vmovl.u32
143; CHECK: vmovl.u32
144; CHECK: vmovl.u32
145; CHECK: vmovl.u32
146  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
147; COST: zext_v8i16_v8i64
148; COST: cost of 6 {{.*}} zext
149  %r = zext <8 x i16> %v0 to <8 x i64>
150  store <8 x i64> %r, <8 x i64>* %storeaddr
151  ret void
152}
153
154