1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
3; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
4; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
5; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
6
7; Test a cross-section of intrinsics for various cost-kinds.
8; Other test files may check for accuracy of a particular intrinsic
9; across subtargets or types. This is just a sanity check using the
10; default x86 target and a legal scalar type (i32/float) and/or an
11; illegal vector type (16 x i32/float).
12
13declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
14declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
15
16declare i32 @llvm.smax.i32(i32, i32)
17declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
18
19declare float @llvm.fmuladd.f32(float, float, float)
20declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
21
22declare float @llvm.log2.f32(float)
23declare <16 x float> @llvm.log2.v16f32(<16 x float>)
24
25declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
26declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
27
28declare float @llvm.maximum.f32(float, float)
29declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
30
31declare i32 @llvm.cttz.i32(i32, i1)
32declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
33
34declare i32 @llvm.ctlz.i32(i32, i1)
35declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
36
37declare i32 @llvm.fshl.i32(i32, i32, i32)
38declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
39
40declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
41declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
42declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
43declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>)
44declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
45
46declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
47
48define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
49; THRU-LABEL: 'umul'
50; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
51; THRU-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
52; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
53;
54; LATE-LABEL: 'umul'
55; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
56; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
57; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
58;
59; SIZE-LABEL: 'umul'
60; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
61; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
62; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
63;
64; SIZE_LATE-LABEL: 'umul'
65; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
66; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
67; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
68;
69  %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
70  %v = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
71  ret void
72}
73
74define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
75; THRU-LABEL: 'smax'
76; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
77; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
78; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
79;
80; LATE-LABEL: 'smax'
81; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
82; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
83; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
84;
85; SIZE-LABEL: 'smax'
86; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
87; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
88; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
89;
90; SIZE_LATE-LABEL: 'smax'
91; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
92; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
93; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
94;
95  %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
96  %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
97  ret void
98}
99
100define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
101; THRU-LABEL: 'fmuladd'
102; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
103; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
104; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
105;
106; LATE-LABEL: 'fmuladd'
107; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
108; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
109; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
110;
111; SIZE-LABEL: 'fmuladd'
112; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
113; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
114; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
115;
116; SIZE_LATE-LABEL: 'fmuladd'
117; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
118; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
119; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
120;
121  %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
122  %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
123  ret void
124}
125
126define void @log2(float %a, <16 x float> %va) {
127; THRU-LABEL: 'log2'
128; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
129; THRU-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
130; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
131;
132; LATE-LABEL: 'log2'
133; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.log2.f32(float %a)
134; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
135; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
136;
137; SIZE-LABEL: 'log2'
138; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
139; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
140; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
141;
142; SIZE_LATE-LABEL: 'log2'
143; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
144; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
145; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
146;
147  %s = call float @llvm.log2.f32(float %a)
148  %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
149  ret void
150}
151
152define void @constrained_fadd(float %a, <16 x float> %va) {
153; THRU-LABEL: 'constrained_fadd'
154; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
155; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
156; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
157;
158; LATE-LABEL: 'constrained_fadd'
159; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
160; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
161; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
162;
163; SIZE-LABEL: 'constrained_fadd'
164; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
165; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
166; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
167;
168; SIZE_LATE-LABEL: 'constrained_fadd'
169; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
170; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
171; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
172;
173  %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
174  %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
175  ret void
176}
177
178define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
179; THRU-LABEL: 'fmaximum'
180; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
181; THRU-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
182; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
183;
184; LATE-LABEL: 'fmaximum'
185; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
186; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
187; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
188;
189; SIZE-LABEL: 'fmaximum'
190; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
191; SIZE-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
192; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
193;
194; SIZE_LATE-LABEL: 'fmaximum'
195; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
196; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
197; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
198;
199  %s = call float @llvm.maximum.f32(float %a, float %b)
200  %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
201  ret void
202}
203
204define void @cttz(i32 %a, <16 x i32> %va) {
205; THRU-LABEL: 'cttz'
206; THRU-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
207; THRU-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
208; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
209;
210; LATE-LABEL: 'cttz'
211; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
212; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
213; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
214;
215; SIZE-LABEL: 'cttz'
216; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
217; SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
218; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
219;
220; SIZE_LATE-LABEL: 'cttz'
221; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
222; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
223; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
224;
225  %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
226  %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
227  ret void
228}
229
230define void @ctlz(i32 %a, <16 x i32> %va) {
231; THRU-LABEL: 'ctlz'
232; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
233; THRU-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
234; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
235;
236; LATE-LABEL: 'ctlz'
237; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
238; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
239; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
240;
241; SIZE-LABEL: 'ctlz'
242; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
243; SIZE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
244; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
245;
246; SIZE_LATE-LABEL: 'ctlz'
247; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
248; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
249; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
250;
251  %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
252  %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
253  ret void
254}
255
256define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
257; THRU-LABEL: 'fshl'
258; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
259; THRU-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
260; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
261;
262; LATE-LABEL: 'fshl'
263; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
264; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
265; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
266;
267; SIZE-LABEL: 'fshl'
268; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
269; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
270; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
271;
272; SIZE_LATE-LABEL: 'fshl'
273; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
274; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
275; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
276;
277  %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
278  %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
279  ret void
280}
281
282define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
283; THRU-LABEL: 'maskedgather'
284; THRU-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
285; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
286;
287; LATE-LABEL: 'maskedgather'
288; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
289; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
290;
291; SIZE-LABEL: 'maskedgather'
292; SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
293; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
294;
295; SIZE_LATE-LABEL: 'maskedgather'
296; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
297; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
298;
299  %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
300  ret void
301}
302
303define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
304; THRU-LABEL: 'maskedscatter'
305; THRU-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
306; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
307;
308; LATE-LABEL: 'maskedscatter'
309; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
310; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
311;
312; SIZE-LABEL: 'maskedscatter'
313; SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
314; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
315;
316; SIZE_LATE-LABEL: 'maskedscatter'
317; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
318; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
319;
320  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
321  ret void
322}
323
324define void @reduce_fmax(<16 x float> %va) {
325; THRU-LABEL: 'reduce_fmax'
326; THRU-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
327; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
328;
329; LATE-LABEL: 'reduce_fmax'
330; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
331; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
332;
333; SIZE-LABEL: 'reduce_fmax'
334; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
335; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
336;
337; SIZE_LATE-LABEL: 'reduce_fmax'
338; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
339; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
340;
341  %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
342  ret void
343}
344
345define void @reduce_fmul(<16 x float> %va) {
346; THRU-LABEL: 'reduce_fmul'
347; THRU-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
348; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
349;
350; LATE-LABEL: 'reduce_fmul'
351; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
352; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
353;
354; SIZE-LABEL: 'reduce_fmul'
355; SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
356; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
357;
358; SIZE_LATE-LABEL: 'reduce_fmul'
359; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
360; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
361;
362  %v = call float @llvm.vector.reduce.fmul.v16f32(float 42.0, <16 x float> %va)
363  ret void
364}
365
366define void @reduce_fadd_fast(<16 x float> %va) {
367; THRU-LABEL: 'reduce_fadd_fast'
368; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
369; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
370;
371; LATE-LABEL: 'reduce_fadd_fast'
372; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
373; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
374;
375; SIZE-LABEL: 'reduce_fadd_fast'
376; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
377; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
378;
379; SIZE_LATE-LABEL: 'reduce_fadd_fast'
380; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
381; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
382;
383  %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %va)
384  ret void
385}
386
387define void @memcpy(i8* %a, i8* %b, i32 %c) {
388; THRU-LABEL: 'memcpy'
389; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
390; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
391;
392; LATE-LABEL: 'memcpy'
393; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
394; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
395;
396; SIZE-LABEL: 'memcpy'
397; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
398; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
399;
400; SIZE_LATE-LABEL: 'memcpy'
401; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
402; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
403;
404  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
405  ret void
406}
407