1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
3; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
4; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
5; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
6
7; Test a cross-section of intrinsics for various cost-kinds.
8; Other test files may check for accuracy of a particular intrinsic
9; across subtargets or types. This is just a sanity check using an
10; ARM target and a legal scalar type (i32/float) and/or an
11; illegal vector type (16 x i32/float).
12
13declare i32 @llvm.smax.i32(i32, i32)
14declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
15
16declare float @llvm.fmuladd.f32(float, float, float)
17declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
18
19declare float @llvm.log2.f32(float)
20declare <16 x float> @llvm.log2.v16f32(<16 x float>)
21
22declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
23declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
24
25declare float @llvm.maximum.f32(float, float)
26declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
27
28declare i32 @llvm.cttz.i32(i32, i1)
29declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
30
31declare i32 @llvm.ctlz.i32(i32, i1)
32declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
33
34declare i32 @llvm.fshl.i32(i32, i32, i32)
35declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
36
37declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
38declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
39declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
40
41declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
42
43define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
44; THRU-LABEL: 'smax'
45; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
46; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
47; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
48;
49; LATE-LABEL: 'smax'
50; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
51; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
52; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
53;
54; SIZE-LABEL: 'smax'
55; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
56; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
57; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
58;
59; SIZE_LATE-LABEL: 'smax'
60; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
61; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
62; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
63;
64  %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
65  %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
66  ret void
67}
68
69define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
70; THRU-LABEL: 'fmuladd'
71; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
72; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
73; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
74;
75; LATE-LABEL: 'fmuladd'
76; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
77; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
78; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
79;
80; SIZE-LABEL: 'fmuladd'
81; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
82; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
83; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
84;
85; SIZE_LATE-LABEL: 'fmuladd'
86; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
87; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
88; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
89;
90  %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
91  %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
92  ret void
93}
94
95define void @log2(float %a, <16 x float> %va) {
96; THRU-LABEL: 'log2'
97; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
98; THRU-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
99; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
100;
101; LATE-LABEL: 'log2'
102; LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %s = call float @llvm.log2.f32(float %a)
103; LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
104; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
105;
106; SIZE-LABEL: 'log2'
107; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
108; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
109; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
110;
111; SIZE_LATE-LABEL: 'log2'
112; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
113; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
114; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
115;
116  %s = call float @llvm.log2.f32(float %a)
117  %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
118  ret void
119}
120
121define void @constrained_fadd(float %a, <16 x float> %va) {
122; THRU-LABEL: 'constrained_fadd'
123; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
124; THRU-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
125; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
126;
127; LATE-LABEL: 'constrained_fadd'
128; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
129; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
130; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
131;
132; SIZE-LABEL: 'constrained_fadd'
133; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
134; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
135; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
136;
137; SIZE_LATE-LABEL: 'constrained_fadd'
138; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
139; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
140; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
141;
142  %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
143  %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
144  ret void
145}
146
147define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
148; THRU-LABEL: 'fmaximum'
149; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
150; THRU-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
151; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
152;
153; LATE-LABEL: 'fmaximum'
154; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
155; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
156; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
157;
158; SIZE-LABEL: 'fmaximum'
159; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
160; SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
161; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
162;
163; SIZE_LATE-LABEL: 'fmaximum'
164; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
165; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
166; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
167;
168  %s = call float @llvm.maximum.f32(float %a, float %b)
169  %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
170  ret void
171}
172
173define void @cttz(i32 %a, <16 x i32> %va) {
174; THRU-LABEL: 'cttz'
175; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
176; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
177; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
178;
179; LATE-LABEL: 'cttz'
180; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
181; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
182; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
183;
184; SIZE-LABEL: 'cttz'
185; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
186; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
187; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
188;
189; SIZE_LATE-LABEL: 'cttz'
190; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
191; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
192; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
193;
194  %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
195  %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
196  ret void
197}
198
199define void @ctlz(i32 %a, <16 x i32> %va) {
200; THRU-LABEL: 'ctlz'
201; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
202; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
203; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
204;
205; LATE-LABEL: 'ctlz'
206; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
207; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
208; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
209;
210; SIZE-LABEL: 'ctlz'
211; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
212; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
213; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
214;
215; SIZE_LATE-LABEL: 'ctlz'
216; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
217; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
218; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
219;
220  %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
221  %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
222  ret void
223}
224
225define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
226; THRU-LABEL: 'fshl'
227; THRU-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
228; THRU-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
229; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
230;
231; LATE-LABEL: 'fshl'
232; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
233; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
234; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
235;
236; SIZE-LABEL: 'fshl'
237; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
238; SIZE-NEXT:  Cost Model: Found an estimated cost of 229 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
239; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
240;
241; SIZE_LATE-LABEL: 'fshl'
242; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
243; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
244; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
245;
246  %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
247  %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
248  ret void
249}
250
251define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
252; THRU-LABEL: 'maskedgather'
253; THRU-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
254; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
255;
256; LATE-LABEL: 'maskedgather'
257; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
258; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
259;
260; SIZE-LABEL: 'maskedgather'
261; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
262; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
263;
264; SIZE_LATE-LABEL: 'maskedgather'
265; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
266; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
267;
268  %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
269  ret void
270}
271
272define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
273; THRU-LABEL: 'maskedscatter'
274; THRU-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
275; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
276;
277; LATE-LABEL: 'maskedscatter'
278; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
279; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
280;
281; SIZE-LABEL: 'maskedscatter'
282; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
283; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
284;
285; SIZE_LATE-LABEL: 'maskedscatter'
286; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
287; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
288;
289  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
290  ret void
291}
292
293define void @reduce_fmax(<16 x float> %va) {
294; THRU-LABEL: 'reduce_fmax'
295; THRU-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
296; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
297;
298; LATE-LABEL: 'reduce_fmax'
299; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
300; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
301;
302; SIZE-LABEL: 'reduce_fmax'
303; SIZE-NEXT:  Cost Model: Found an estimated cost of 122 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
304; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
305;
306; SIZE_LATE-LABEL: 'reduce_fmax'
307; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
308; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
309;
310  %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
311  ret void
312}
313
314define void @memcpy(i8* %a, i8* %b, i32 %c) {
315; THRU-LABEL: 'memcpy'
316; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
317; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
318;
319; LATE-LABEL: 'memcpy'
320; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
321; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
322;
323; SIZE-LABEL: 'memcpy'
324; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
325; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
326;
327; SIZE_LATE-LABEL: 'memcpy'
328; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
329; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
330;
331  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
332  ret void
333}
334