1; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
2; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
3; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.8.0"
7
8define i32 @add(i32 %arg) {
9; CHECK-LABEL: for function 'add'
10  ; -- Same size registeres --
11  ;CHECK: cost of 1 {{.*}} zext
12  %A = zext <4 x i1> undef to <4 x i32>
13  ;CHECK: cost of 2 {{.*}} sext
14  %B = sext <4 x i1> undef to <4 x i32>
15  ;CHECK: cost of 0 {{.*}} trunc
16  %C = trunc <4 x i32> undef to <4 x i1>
17
18  ; -- Different size registers --
19  ;CHECK-NOT: cost of 1 {{.*}} zext
20  %D = zext <8 x i1> undef to <8 x i32>
21  ;CHECK-NOT: cost of 2 {{.*}} sext
22  %E = sext <8 x i1> undef to <8 x i32>
23  ;CHECK-NOT: cost of 2 {{.*}} trunc
24  %F = trunc <8 x i32> undef to <8 x i1>
25
26  ; -- scalars --
27
28  ;CHECK: cost of 1 {{.*}} zext
29  %G = zext i1 undef to i32
30  ;CHECK: cost of 0 {{.*}} trunc
31  %H = trunc i32 undef to i1
32
33  ;CHECK: cost of 0 {{.*}} ret
34  ret i32 undef
35}
36
37define i32 @zext_sext(<8 x i1> %in) {
38; CHECK-AVX2-LABEL: for function 'zext_sext'
39; CHECK-AVX-LABEL: for function 'zext_sext'
40  ;CHECK-AVX2: cost of 3 {{.*}} zext
41  ;CHECK-AVX: cost of 4 {{.*}} zext
42  %Z = zext <8 x i1> %in to <8 x i32>
43  ;CHECK-AVX2: cost of 3 {{.*}} sext
44  ;CHECK-AVX: cost of 7 {{.*}} sext
45  %S = sext <8 x i1> %in to <8 x i32>
46
47  ;CHECK-AVX2: cost of 1 {{.*}} zext
48  ;CHECK-AVX: cost of 4 {{.*}} zext
49  %A1 = zext <16 x i8> undef to <16 x i16>
50  ;CHECK-AVX2: cost of 1 {{.*}} sext
51  ;CHECK-AVX: cost of 4 {{.*}} sext
52  %A2 = sext <16 x i8> undef to <16 x i16>
53  ;CHECK-AVX2: cost of 1 {{.*}} sext
54  ;CHECK-AVX: cost of 4 {{.*}} sext
55  %A = sext <8 x i16> undef to <8 x i32>
56  ;CHECK-AVX2: cost of 1 {{.*}} zext
57  ;CHECK-AVX: cost of 4 {{.*}} zext
58  %B = zext <8 x i16> undef to <8 x i32>
59  ;CHECK-AVX2: cost of 1 {{.*}} sext
60  ;CHECK-AVX: cost of 4 {{.*}} sext
61  %C = sext <4 x i32> undef to <4 x i64>
62
63  ;CHECK-AVX2: cost of 3 {{.*}} zext
64  ;CHECK-AVX: cost of 4 {{.*}} zext
65  %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
66  ;CHECK-AVX2: cost of 3 {{.*}} sext
67  ;CHECK-AVX: cost of 7 {{.*}} sext
68  %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
69  ;CHECK-AVX2: cost of 3 {{.*}} zext
70  ;CHECK-AVX: cost of 3 {{.*}} zext
71  %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
72  ;CHECK-AVX2: cost of 3 {{.*}} sext
73  ;CHECK-AVX: cost of 6 {{.*}} sext
74  %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
75
76  ;CHECK-AVX2: cost of 3 {{.*}} zext
77  ;CHECK-AVX: cost of 4 {{.*}} zext
78  %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
79  ;CHECK-AVX2: cost of 3 {{.*}} sext
80  ;CHECK-AVX: cost of 6 {{.*}} sext
81  %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
82
83  ;CHECK-AVX2: cost of 1 {{.*}} zext
84  ;CHECK-AVX: cost of 4 {{.*}} zext
85  %D = zext <4 x i32> undef to <4 x i64>
86
87  ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
88  %D1 = zext <16 x i32> undef to <16 x i64>
89
90  ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
91  %D2 = sext <16 x i32> undef to <16 x i64>
92
93  ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
94  %D3 = zext <16 x i16> undef to <16 x i32>
95  ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
96  %D4 = zext <16 x i8> undef to <16 x i32>
97  ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
98  %D5 = zext <16 x i1> undef to <16 x i32>
99
100  ;CHECK-AVX2: cost of 2 {{.*}} trunc
101  ;CHECK-AVX: cost of 4 {{.*}} trunc
102  %E = trunc <4 x i64> undef to <4 x i32>
103  ;CHECK-AVX2: cost of 2 {{.*}} trunc
104  ;CHECK-AVX: cost of 5 {{.*}} trunc
105  %F = trunc <8 x i32> undef to <8 x i16>
106  ;CHECK-AVX2: cost of 4 {{.*}} trunc
107  ;CHECK-AVX: cost of 4 {{.*}} trunc
108  %F1 = trunc <16 x i16> undef to <16 x i8>
109  ;CHECK-AVX2: cost of 2 {{.*}} trunc
110  ;CHECK-AVX: cost of 4 {{.*}} trunc
111  %F2 = trunc <8 x i32> undef to <8 x i8>
112  ;CHECK-AVX2: cost of 2 {{.*}} trunc
113  ;CHECK-AVX: cost of 4 {{.*}} trunc
114  %F3 = trunc <4 x i64> undef to <4 x i8>
115
116  ;CHECK-AVX2: cost of 4 {{.*}} trunc
117  ;CHECK-AVX: cost of 9 {{.*}} trunc
118  ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
119  %G = trunc <8 x i64> undef to <8 x i32>
120
121  ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
122  %G1 = trunc <16 x i64> undef to <16 x i32>
123
124  ret i32 undef
125}
126
127define i32 @masks8(<8 x i1> %in) {
128; CHECK-AVX2-LABEL: for function 'masks8'
129; CHECK-AVX-LABEL: for function 'masks8'
130
131  ;CHECK-AVX2: cost of 3 {{.*}} zext
132  ;CHECK-AVX: cost of 4 {{.*}} zext
133  %Z = zext <8 x i1> %in to <8 x i32>
134  ;CHECK-AVX2: cost of 3 {{.*}} sext
135  ;CHECK-AVX: cost of 7 {{.*}} sext
136  %S = sext <8 x i1> %in to <8 x i32>
137  ret i32 undef
138}
139
140define i32 @masks4(<4 x i1> %in) {
141; CHECK-AVX2-LABEL: for function 'masks4'
142; CHECK-AVX-LABEL: for function 'masks4'
143
144  ;CHECK-AVX2: cost of 3 {{.*}} zext
145  ;CHECK-AVX: cost of 4 {{.*}} zext
146  %Z = zext <4 x i1> %in to <4 x i64>
147  ;CHECK-AVX2: cost of 3 {{.*}} sext
148  ;CHECK-AVX: cost of 6 {{.*}} sext
149  %S = sext <4 x i1> %in to <4 x i64>
150  ret i32 undef
151}
152
153define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
154; CHECK-LABEL: for function 'sitofp4'
155  ; CHECK: cost of 3 {{.*}} sitofp
156  %A1 = sitofp <4 x i1> %a to <4 x float>
157  ; CHECK: cost of 3 {{.*}} sitofp
158  %A2 = sitofp <4 x i1> %a to <4 x double>
159
160  ; CHECK: cost of 3 {{.*}} sitofp
161  %B1 = sitofp <4 x i8> %b to <4 x float>
162  ; CHECK: cost of 3 {{.*}} sitofp
163  %B2 = sitofp <4 x i8> %b to <4 x double>
164
165  ; CHECK: cost of 3 {{.*}} sitofp
166  %C1 = sitofp <4 x i16> %c to <4 x float>
167  ; CHECK: cost of 3 {{.*}} sitofp
168  %C2 = sitofp <4 x i16> %c to <4 x double>
169
170  ; CHECK: cost of 1 {{.*}} sitofp
171  %D1 = sitofp <4 x i32> %d to <4 x float>
172  ; CHECK: cost of 1 {{.*}} sitofp
173  %D2 = sitofp <4 x i32> %d to <4 x double>
174  ret void
175}
176
177define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
178; CHECK-LABEL: for function 'sitofp8'
179  ; CHECK: cost of 8 {{.*}} sitofp
180  %A1 = sitofp <8 x i1> %a to <8 x float>
181
182  ; CHECK: cost of 8 {{.*}} sitofp
183  %B1 = sitofp <8 x i8> %b to <8 x float>
184
185  ; CHECK: cost of 5 {{.*}} sitofp
186  %C1 = sitofp <8 x i16> %c to <8 x float>
187
188  ; CHECK: cost of 1 {{.*}} sitofp
189  %D1 = sitofp <8 x i32> %d to <8 x float>
190  ret void
191}
192
193define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
194; CHECK-LABEL: for function 'uitofp4'
195  ; CHECK: cost of 7 {{.*}} uitofp
196  %A1 = uitofp <4 x i1> %a to <4 x float>
197  ; CHECK: cost of 7 {{.*}} uitofp
198  %A2 = uitofp <4 x i1> %a to <4 x double>
199
200  ; CHECK: cost of 2 {{.*}} uitofp
201  %B1 = uitofp <4 x i8> %b to <4 x float>
202  ; CHECK: cost of 2 {{.*}} uitofp
203  %B2 = uitofp <4 x i8> %b to <4 x double>
204
205  ; CHECK: cost of 2 {{.*}} uitofp
206  %C1 = uitofp <4 x i16> %c to <4 x float>
207  ; CHECK: cost of 2 {{.*}} uitofp
208  %C2 = uitofp <4 x i16> %c to <4 x double>
209
210  ; CHECK: cost of 6 {{.*}} uitofp
211  %D1 = uitofp <4 x i32> %d to <4 x float>
212  ; CHECK: cost of 6 {{.*}} uitofp
213  %D2 = uitofp <4 x i32> %d to <4 x double>
214  ret void
215}
216
217define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
218; CHECK-LABEL: for function 'uitofp8'
219  ; CHECK: cost of 6 {{.*}} uitofp
220  %A1 = uitofp <8 x i1> %a to <8 x float>
221
222  ; CHECK: cost of 5 {{.*}} uitofp
223  %B1 = uitofp <8 x i8> %b to <8 x float>
224
225  ; CHECK: cost of 5 {{.*}} uitofp
226  %C1 = uitofp <8 x i16> %c to <8 x float>
227
228  ; CHECK-AVX2: cost of 8 {{.*}} uitofp
229  ; CHECK-AVX512: cost of 8 {{.*}} uitofp
230  ; CHECK-AVX: cost of 9 {{.*}} uitofp
231  %D1 = uitofp <8 x i32> %d to <8 x float>
232  ret void
233}
234
235define void @fp_conv(<8 x float> %a, <16 x float>%b) {
236;CHECK-LABEL: for function 'fp_conv'
237  ; CHECK-AVX512: cost of 1 {{.*}} fpext
238  %A1 = fpext <8 x float> %a to <8 x double>
239
240  ; CHECK-AVX512: cost of 3 {{.*}} fpext
241  %A2 = fpext <16 x float> %b to <16 x double>
242
243  ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
244  ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
245  %A3 = fpext <8 x float> %a to <8 x double>
246
247  ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
248  ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
249  %A4 = fptrunc <8 x double> undef to <8 x float>
250
251  ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
252  %A5 = fptrunc <16 x double> undef to <16 x float>
253  ret void
254}
255