1 /*
2 * Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /**
19 \file
20 \brief Structures to describe the x86 CPU type and CPU features
21 */
22
23 #include "gbldefs.h"
24 #include "global.h"
25 #include "error.h"
26 #include "x86.h"
27
28 X86TYPE mach;
29
30 #ifdef TARGET_WIN
31 #define DONT_GENERATE_AVX512 true /* a temporary restriction */
32 #else
33 #define DONT_GENERATE_AVX512 false
34 #endif
35
36 void
set_mach(X86TYPE * mach,int machtype)37 set_mach(X86TYPE *mach, int machtype)
38 {
39 int has_fma3 = 0;
40 int has_fma4 = 0;
41 memset(&mach->type, 0, sizeof(mach->type));
42 memset(&mach->feature, 0, sizeof(mach->feature));
43 mach->tpval = machtype;
44 /*
45 * -tp may not be appropriate for ARM; so machtype may refer to
46 * ARM attriubutes rather than a TP_ value.
47 */
48 #if !defined(TARGET_LLVM_ARM) /* { */
49 mach->cachesize = flg.x[32]; /* this applies to all 'machtype's */
50
51 switch (machtype) {
52 case TP_ZEN:
53 /* AMD Zen microarchitecture, e.g. EPYC and Ryzen processors.
54 */
55 mach->type[MACH_AMD_ZEN] = 1;
56 mach->feature[FEATURE_AVX2] = 1;
57 /* ...and fall through... */
58
59 case TP_PILEDRIVER:
60 /* AMD piledriver
61 */
62 mach->type[MACH_AMD_PILEDRIVER] = 1;
63 mach->feature[FEATURE_FMA3] = 1;
64 has_fma3 = 1;
65 mach->feature[FEATURE_LD_VMOVUPD] = 1; /* added on 14 Dec 2015 */
66 mach->feature[FEATURE_ST_VMOVUPD] = 1; /* " " " " */
67 mach->feature[FEATURE_ST_MOVUPD] = 1; /* " " " " */
68 /* ...and fall through... */
69
70 case TP_BULLDOZER:
71 /* AMD bulldozer
72 */
73 mach->type[MACH_AMD_BULLDOZER] = 1;
74 mach->feature[FEATURE_SSE41] = 1; /* JHM: added on 2 Feb 2017 */
75 mach->feature[FEATURE_SSE42] = 1; /* " " " " " */
76 mach->feature[FEATURE_AVX] = 1;
77 if (machtype == TP_PILEDRIVER || machtype == TP_BULLDOZER) {
78 mach->feature[FEATURE_SIMD128] = 1;
79 mach->feature[FEATURE_FMA4] = 1;
80 has_fma4 = 1;
81 mach->feature[FEATURE_XOP] = 1;
82 }
83 mach->feature[FEATURE_ALIGNLOOP8] = 1;
84 mach->feature[FEATURE_ALIGNJMP8] = 1;
85 /* ...and fall through... */
86
87 case TP_ISTANBUL:
88 /* AMD instanbul
89 */
90 mach->type[MACH_AMD_ISTANBUL] = 1;
91 /* ...and fall through... */
92
93 case TP_SHANGHAI:
94 /* AMD shanghai, like greyhound but with a larger cache.
95 */
96 mach->type[MACH_AMD_SHANGHAI] = 1;
97 mach->feature[FEATURE_MULTI_ACCUM] = 1;
98 if (mach->cachesize == 0)
99 mach->cachesize = (6 * 1024 * 1024);
100 /* ...and fall through... */
101
102 case TP_GH:
103 /* AMD greyhound
104 */
105 mach->type[MACH_AMD] = 1;
106 mach->type[MACH_AMD_HAMMER] = 1;
107 mach->type[MACH_AMD_GH] = 1;
108 mach->feature[FEATURE_SSE] = 1;
109 mach->feature[FEATURE_SSE2] = 1;
110 mach->feature[FEATURE_SSE3] = 1;
111 mach->feature[FEATURE_MISALIGNEDSSE] = 1;
112 mach->feature[FEATURE_LD_MOVUPD] = 1;
113 mach->feature[FEATURE_UNROLL_16] = 1;
114 mach->feature[FEATURE_DOUBLE_UNROLL] = 1;
115 mach->feature[FEATURE_PEEL_SHUFFLE] = 1;
116 mach->feature[FEATURE_PREFETCHNTA] = 1;
117 mach->feature[FEATURE_PDSHUF] = 1;
118 mach->feature[FEATURE_GHLIBS] = 1;
119 mach->feature[FEATURE_SSEMISALN] = 1;
120 mach->feature[FEATURE_DAZ] = 0; /* cf. 1 for Intel */
121 mach->feature[FEATURE_PREFER_MOVLPD] = 0;
122 mach->feature[FEATURE_USE_INC] = 1; /* cf. 0 for Intel */
123 mach->feature[FEATURE_USE_MOVAPD] = 1;
124 mach->feature[FEATURE_MERGE_DEPENDENT] = 1;
125 mach->feature[FEATURE_SCALAR_NONTEMP] = 1;
126 mach->feature[FEATURE_SSE4A] = 1;
127 mach->feature[FEATURE_SSEIMAX] = 1;
128 mach->feature[FEATURE_ABM] = 1;
129 if (machtype == TP_ISTANBUL ||
130 machtype == TP_SHANGHAI ||
131 machtype == TP_GH)
132 {
133 if (XBIT(135, 0x400000))
134 mach->feature[FEATURE_ALIGNLOOP32] = 1;
135 else
136 mach->feature[FEATURE_ALIGNLOOP16] = 1;
137 mach->feature[FEATURE_ALIGNJMP16] = 1;
138 }
139 if (mach->cachesize == 0)
140 mach->cachesize = (2 * 1024 * 1024);
141 break;
142
143 case TP_K8E:
144 /* AMD hammer
145 */
146 mach->feature[FEATURE_SSE3] = 1;
147 /* ...and fall through... */
148
149 case TP_K8:
150 /* AMD hammer
151 */
152 mach->type[MACH_AMD] = 1;
153 mach->type[MACH_AMD_HAMMER] = 1;
154 mach->feature[FEATURE_SSE] = 1;
155 mach->feature[FEATURE_SSE2] = 1;
156 mach->feature[FEATURE_DAZ] = 0; /* cf. 1 for Intel */
157 mach->feature[FEATURE_PREFER_MOVLPD] = 1;
158 mach->feature[FEATURE_USE_INC] = 1;
159 mach->feature[FEATURE_ALIGNLOOP16] = 1;
160 mach->feature[FEATURE_ALIGNJMP16] = 1;
161 if (mach->cachesize == 0)
162 mach->cachesize = (1024 * 1024);
163 break;
164
165 case TP_LARRABEE:
166 mach->type[MACH_INTEL] = 1;
167 mach->type[MACH_INTEL_PENTIUM4] = 1;
168 mach->type[MACH_INTEL_LARRABEE] = 1;
169 mach->feature[FEATURE_SSE] = 1;
170 mach->feature[FEATURE_SSE2] = 1;
171 mach->feature[FEATURE_SSE3] = 1;
172 mach->feature[FEATURE_SSE41] = 1;
173 mach->feature[FEATURE_SSE42] = 1;
174 mach->feature[FEATURE_USE_INC] = 0;
175 mach->feature[FEATURE_LD_MOVUPD] = 1;
176 mach->feature[FEATURE_USE_MOVAPD] = 1;
177 mach->feature[FEATURE_MNI] = 1;
178 mach->feature[FEATURE_DAZ] = 1;
179 mach->feature[FEATURE_SSEIMAX] = 1;
180 mach->feature[FEATURE_SSEPMAX] = 1;
181 mach->feature[FEATURE_LRBNI] = 1;
182 mach->feature[FEATURE_NOPREFETCH] = 1;
183 mach->feature[FEATURE_ALIGNLOOP8] = 1;
184 mach->feature[FEATURE_ALIGNJMP8] = 1;
185 if (mach->cachesize == 0)
186 mach->cachesize = 262144;
187 break;
188
189 case TP_SKYLAKE:
190 if (! DONT_GENERATE_AVX512) {
191 mach->type[MACH_INTEL_SKYLAKE] = 1;
192 mach->feature[FEATURE_AVX512VL] = 1;
193 }
194 /* ...and fall through... */
195
196 case TP_KNIGHTS_LANDING:
197 if (! DONT_GENERATE_AVX512) {
198 if (machtype == TP_KNIGHTS_LANDING) {
199 mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1;
200 }
201 mach->feature[FEATURE_AVX512F] = 1;
202 }
203 /* ...and fall through... */
204
205 case TP_HASWELL:
206 mach->type[MACH_INTEL_HASWELL] = 1;
207 mach->feature[FEATURE_AVX2] = 1;
208 mach->feature[FEATURE_FMA3] = 1;
209 has_fma3 = 1;
210 mach->feature[FEATURE_LD_VMOVUPD] = 1;
211 mach->feature[FEATURE_ST_VMOVUPD] = 1;
212 /* ...and fall through... */
213
214 case TP_IVYBRIDGE:
215 case TP_SANDYBRIDGE:
216 mach->type[MACH_INTEL_SANDYBRIDGE] = 1;
217 mach->feature[FEATURE_AVX] = 1;
218 mach->feature[FEATURE_ST_MOVUPD] = 1;
219 mach->feature[FEATURE_MULTI_ACCUM] = 1;
220 /* ...and fall through... */
221
222 case TP_NEHALEM:
223 mach->type[MACH_INTEL_NEHALEM] = 1;
224 mach->feature[FEATURE_SSE42] = 1;
225 mach->feature[FEATURE_LD_MOVUPD] = 1;
226 mach->feature[FEATURE_SSEIMAX] = 1;
227 mach->feature[FEATURE_SSEPMAX] = 1;
228 if (mach->cachesize == 0)
229 mach->cachesize = (8 * 1024 * 1024);
230 /* ...and fall through... */
231
232 case TP_PENRYN:
233 mach->type[MACH_INTEL_PENRYN] = 1;
234 mach->feature[FEATURE_SSE41] = 1;
235 if (mach->cachesize == 0)
236 mach->cachesize = (6 * 1024 * 1024);
237 /* ...and fall through... */
238
239 case TP_CORE2:
240 mach->type[MACH_INTEL_CORE2] = 1;
241 mach->feature[FEATURE_SSE3] = 1;
242 mach->feature[FEATURE_MNI] = 1;
243 if (mach->cachesize == 0)
244 mach->cachesize = (4 * 1024 * 1024);
245 /* ...and fall through... */
246
247 case TP_P7:
248 /* Intel P7 Pentium IV
249 */
250 mach->type[MACH_INTEL] = 1;
251 mach->type[MACH_INTEL_PENTIUM4] = 1;
252 mach->feature[FEATURE_SSE] = 1;
253 mach->feature[FEATURE_SSE2] = 1;
254 mach->feature[FEATURE_USE_INC] = 0; /* cf. 1 for AMD */
255 mach->feature[FEATURE_USE_MOVAPD] = 1;
256 mach->feature[FEATURE_DAZ] = 1; /* cf. 0 for AMD */
257 mach->feature[FEATURE_ALIGNLOOP8] = 1;
258 mach->feature[FEATURE_ALIGNJMP8] = 1;
259 if (XBIT(80, 0x4000000)) {
260 mach->feature[FEATURE_SSE3] = 1;
261 }
262 if (machtype == TP_PENRYN || machtype == TP_CORE2 || machtype == TP_P7) {
263 mach->feature[FEATURE_NOPREFETCH] = 1;
264 }
265 if (mach->cachesize == 0)
266 mach->cachesize = (1024 * 1024);
267 break;
268
269 #ifdef TARGET_X8664
270 case TP_PY:
271 case TP_PX:
272 /* we know all x86-64 have at least SSE and SSE2 */
273 /* more or less the same as p7 without sse3, without -Mdaz */
274 mach->type[MACH_INTEL] = 1;
275 mach->type[MACH_INTEL_PENTIUM4] = 1;
276 mach->feature[FEATURE_SSE] = 1;
277 mach->feature[FEATURE_SSE2] = 1;
278 mach->feature[FEATURE_USE_INC] = 0;
279 mach->feature[FEATURE_USE_MOVAPD] = 1;
280 mach->feature[FEATURE_NOPREFETCH] = 1;
281 mach->feature[FEATURE_ALIGNLOOP8] = 1;
282 mach->feature[FEATURE_ALIGNJMP8] = 1;
283 if (mach->cachesize == 0)
284 mach->cachesize = (1024 * 1024);
285 break;
286 #else
287 case TP_PY: /* for 32-bit, treat like generic */
288 case TP_PX: /* " " " " " */
289 #endif
290 default:
291 mach->type[MACH_GENERIC] = 1;
292 mach->feature[FEATURE_SSE] = 1;
293 mach->feature[FEATURE_SSE2] = 1;
294 mach->feature[FEATURE_USE_INC] = 0;
295 mach->feature[FEATURE_NOPREFETCH] = 1;
296 mach->feature[FEATURE_ALIGNLOOP4] = 1;
297 mach->feature[FEATURE_ALIGNJMP8] = 1;
298 if (XBIT(129, 4))
299 mach->feature[FEATURE_DAZ] = 1;
300 mach->tpval = TP_PX;
301 if (mach->cachesize == 0)
302 mach->cachesize = 262144;
303 break;
304 } /* end switch (machtype) */
305 #endif /* !defined(TARGET_LLVM_ARM) } */
306
307 #if defined(TARGET_LLVM_ARM)
308 mach->feature[FEATURE_SCALAR_NEON] = 1;
309 mach->feature[FEATURE_NEON] = 1;
310 mach->feature[FEATURE_FMA] = 1;
311 has_fma3 = 1;
312 #elif defined(TARGET_LLVM_POWER)
313 mach->feature[FEATURE_SCALAR_VSX] = 1;
314 mach->feature[FEATURE_VSX] = 1;
315 mach->feature[FEATURE_FMA] = 1;
316 has_fma3 = 1;
317 #elif defined(X86_64)
318 /* new cg or 64-bit cg */
319 mach->feature[FEATURE_SCALAR_SSE] = 1;
320 #endif
321
322 /* override machine-specific settings of DAZ */
323 if (XBIT(129, 4))
324 mach->feature[FEATURE_DAZ] = 1;
325 else if (XBIT(129, 0x400))
326 mach->feature[FEATURE_DAZ] = 0;
327 if (XBIT(135, 0x20))
328 mach->feature[FEATURE_USE_MOVAPD] = 1;
329
330 /* -Mnoprefetch */
331 if (XBIT(39, 1))
332 mach->feature[FEATURE_NOPREFETCH] = 1;
333
334 /* -Mvect=simd:128 */
335 if (XBIT(56, 0x40))
336 mach->feature[FEATURE_SIMD128] = 1;
337 /* -Mvect=simd:256 or -Mvect=simd:512 */
338 if (XBIT(56, 0x100) || XBIT(56, 0x800))
339 mach->feature[FEATURE_SIMD128] = 0;
340
341 /* align 16 before loops */
342 if (XBIT(135, 0x2000) && !XBIT(135, 0x4000)) {
343 mach->feature[FEATURE_ALIGNLOOP4] = 0;
344 mach->feature[FEATURE_ALIGNLOOP8] = 0;
345 mach->feature[FEATURE_ALIGNLOOP16] = 1;
346 mach->feature[FEATURE_ALIGNLOOP32] = 0;
347 }
348 /* align 8 before loops */
349 if (XBIT(135, 0x2000) && XBIT(135, 0x4000)) {
350 mach->feature[FEATURE_ALIGNLOOP4] = 0;
351 mach->feature[FEATURE_ALIGNLOOP8] = 1;
352 mach->feature[FEATURE_ALIGNLOOP16] = 0;
353 mach->feature[FEATURE_ALIGNLOOP32] = 0;
354 }
355 /* no align before loops */
356 if (XBIT(135, 0x8000) || (XBIT(135, 0x4000) && !XBIT(135, 0x2000))) {
357 mach->feature[FEATURE_ALIGNLOOP4] = 0;
358 mach->feature[FEATURE_ALIGNLOOP8] = 0;
359 mach->feature[FEATURE_ALIGNLOOP16] = 0;
360 mach->feature[FEATURE_ALIGNLOOP32] = 0;
361 }
362 /* align 16 after jmp */
363 if (XBIT(135, 0x10000) && XBIT(135, 0x8000)) {
364 mach->feature[FEATURE_ALIGNJMP8] = 0;
365 mach->feature[FEATURE_ALIGNJMP16] = 1;
366 }
367 /* align 8 after jmp */
368 if (!XBIT(135, 0x10000) && XBIT(135, 0x8000)) {
369 mach->feature[FEATURE_ALIGNJMP8] = 1;
370 mach->feature[FEATURE_ALIGNJMP16] = 0;
371 }
372 /* no align after jmp */
373 if (XBIT(135, 0x10000) && !XBIT(135, 0x8000)) {
374 mach->feature[FEATURE_ALIGNJMP8] = 0;
375 mach->feature[FEATURE_ALIGNJMP16] = 0;
376 }
377
378 /* override feature settings */
379 if (XBIT(171, 1))
380 mach->feature[FEATURE_SCALAR_SSE] = 0;
381 else if (XBIT(172, 1))
382 mach->feature[FEATURE_SCALAR_SSE] = 1;
383 if (XBIT(171, 2))
384 mach->feature[FEATURE_SSE] = 0;
385 else if (XBIT(172, 2))
386 mach->feature[FEATURE_SSE] = 1;
387 if (XBIT(171, 4))
388 mach->feature[FEATURE_SSE2] = 0;
389 else if (XBIT(172, 4))
390 mach->feature[FEATURE_SSE2] = 1;
391 if (XBIT(171, 8))
392 mach->feature[FEATURE_SSE3] = 0;
393 else if (XBIT(172, 8))
394 mach->feature[FEATURE_SSE3] = 1;
395 if (XBIT(171, 0x10))
396 mach->feature[FEATURE_SSE41] = 0;
397 else if (XBIT(172, 0x10))
398 mach->feature[FEATURE_SSE41] = 1;
399 if (XBIT(171, 0x20))
400 mach->feature[FEATURE_SSE42] = 0;
401 else if (XBIT(172, 0x20))
402 mach->feature[FEATURE_SSE42] = 1;
403 if (XBIT(171, 0x40))
404 mach->feature[FEATURE_SSE4A] = 0;
405 else if (XBIT(172, 0x40))
406 mach->feature[FEATURE_SSE4A] = 1;
407 if (XBIT(171, 0x80))
408 mach->feature[FEATURE_SSE5] = 0;
409 else if (XBIT(172, 0x80))
410 mach->feature[FEATURE_SSE5] = 1;
411 if (XBIT(171, 0x100))
412 mach->feature[FEATURE_MNI] = 0;
413 else if (XBIT(172, 0x100))
414 mach->feature[FEATURE_MNI] = 1;
415 if (XBIT(171, 0x200))
416 mach->feature[FEATURE_DAZ] = 0;
417 else if (XBIT(172, 0x200))
418 mach->feature[FEATURE_DAZ] = 1;
419 if (XBIT(171, 0x400))
420 mach->feature[FEATURE_PREFER_MOVLPD] = 0;
421 else if (XBIT(172, 0x400))
422 mach->feature[FEATURE_PREFER_MOVLPD] = 1;
423 if (XBIT(171, 0x800))
424 mach->feature[FEATURE_USE_INC] = 0;
425 else if (XBIT(172, 0x800))
426 mach->feature[FEATURE_USE_INC] = 1;
427 if (XBIT(171, 0x1000))
428 mach->feature[FEATURE_USE_MOVAPD] = 0;
429 else if (XBIT(172, 0x1000))
430 mach->feature[FEATURE_USE_MOVAPD] = 1;
431 if (XBIT(171, 0x2000))
432 mach->feature[FEATURE_MERGE_DEPENDENT] = 0;
433 else if (XBIT(172, 0x2000))
434 mach->feature[FEATURE_MERGE_DEPENDENT] = 1;
435 if (XBIT(171, 0x4000))
436 mach->feature[FEATURE_SCALAR_NONTEMP] = 0;
437 else if (XBIT(172, 0x4000))
438 mach->feature[FEATURE_SCALAR_NONTEMP] = 1;
439 if (XBIT(171, 0x8000))
440 mach->feature[FEATURE_SSEIMAX] = 0;
441 else if (XBIT(172, 0x8000))
442 mach->feature[FEATURE_SSEIMAX] = 1;
443 if (XBIT(171, 0x10000))
444 mach->feature[FEATURE_MISALIGNEDSSE] = 0;
445 else if (XBIT(172, 0x10000))
446 mach->feature[FEATURE_MISALIGNEDSSE] = 1;
447 if (XBIT(171, 0x20000))
448 mach->feature[FEATURE_LD_MOVUPD] = 0;
449 else if (XBIT(172, 0x20000))
450 mach->feature[FEATURE_LD_MOVUPD] = 1;
451 if (XBIT(171, 0x40000))
452 mach->feature[FEATURE_ST_MOVUPD] = 0;
453 else if (XBIT(172, 0x40000))
454 mach->feature[FEATURE_ST_MOVUPD] = 1;
455 if (XBIT(171, 0x80000))
456 mach->feature[FEATURE_UNROLL_16] = 0;
457 else if (XBIT(172, 0x80000))
458 mach->feature[FEATURE_UNROLL_16] = 1;
459 if (XBIT(171, 0x100000))
460 mach->feature[FEATURE_DOUBLE_UNROLL] = 0;
461 else if (XBIT(172, 0x100000))
462 mach->feature[FEATURE_DOUBLE_UNROLL] = 1;
463 if (XBIT(171, 0x200000))
464 mach->feature[FEATURE_PEEL_SHUFFLE] = 0;
465 else if (XBIT(172, 0x200000))
466 mach->feature[FEATURE_PEEL_SHUFFLE] = 1;
467 if (XBIT(171, 0x400000))
468 mach->feature[FEATURE_PREFETCHNTA] = 0;
469 else if (XBIT(172, 0x400000))
470 mach->feature[FEATURE_PREFETCHNTA] = 1;
471 if (XBIT(171, 0x800000))
472 mach->feature[FEATURE_PDSHUF] = 0;
473 else if (XBIT(172, 0x800000))
474 mach->feature[FEATURE_PDSHUF] = 1;
475 if (XBIT(171, 0x1000000))
476 mach->feature[FEATURE_SSEPMAX] = 0;
477 else if (XBIT(172, 0x1000000))
478 mach->feature[FEATURE_SSEPMAX] = 1;
479 if (XBIT(171, 0x2000000))
480 mach->feature[FEATURE_GHLIBS] = 0;
481 else if (XBIT(172, 0x2000000))
482 mach->feature[FEATURE_GHLIBS] = 1;
483 if (XBIT(171, 0x4000000))
484 mach->feature[FEATURE_SSEMISALN] = 0;
485 else if (XBIT(172, 0x4000000))
486 mach->feature[FEATURE_SSEMISALN] = 1;
487 if (XBIT(171, 0x8000000))
488 mach->feature[FEATURE_ABM] = 0;
489 else if (XBIT(172, 0x8000000))
490 mach->feature[FEATURE_ABM] = 1;
491 if (XBIT(171, 0x10000000))
492 mach->feature[FEATURE_AVX] = 0;
493 else if (XBIT(172, 0x10000000))
494 mach->feature[FEATURE_AVX] = 1;
495 if (XBIT(171, 0x20000000))
496 mach->feature[FEATURE_LRBNI] = 0;
497 else if (XBIT(172, 0x20000000))
498 mach->feature[FEATURE_LRBNI] = 1;
499 if (has_fma4) {
500 if (XBIT(171, 0x40000000))
501 mach->feature[FEATURE_FMA4] = 0;
502 else if (XBIT(172, 0x40000000))
503 mach->feature[FEATURE_FMA4] = 1;
504 }
505 if (XBIT(171, 0x80000000))
506 mach->feature[FEATURE_XOP] = 0;
507 else if (XBIT(172, 0x80000000))
508 mach->feature[FEATURE_XOP] = 1;
509 if (has_fma3) {
510 if (XBIT(178, 0x01))
511 mach->feature[FEATURE_FMA3] = 0;
512 else if (XBIT(179, 0x01))
513 mach->feature[FEATURE_FMA3] = 1;
514 }
515 if (XBIT(178, 0x02))
516 mach->feature[FEATURE_MULTI_ACCUM] = 0;
517 else if (XBIT(179, 0x02))
518 mach->feature[FEATURE_MULTI_ACCUM] = 1;
519 if (XBIT(178, 0x04))
520 mach->feature[FEATURE_SIMD128] = 0;
521 else if (XBIT(179, 0x04))
522 mach->feature[FEATURE_SIMD128] = 1;
523 if (XBIT(178, 0x08))
524 mach->feature[FEATURE_NOPREFETCH] = 0;
525 else if (XBIT(179, 0x08))
526 mach->feature[FEATURE_NOPREFETCH] = 1;
527 if (XBIT(178, 0x10))
528 mach->feature[FEATURE_ALIGNLOOP4] = 0;
529 else if (XBIT(179, 0x10))
530 mach->feature[FEATURE_ALIGNLOOP4] = 1;
531 if (XBIT(178, 0x20))
532 mach->feature[FEATURE_ALIGNLOOP8] = 0;
533 else if (XBIT(179, 0x20))
534 mach->feature[FEATURE_ALIGNLOOP8] = 1;
535 if (XBIT(178, 0x40))
536 mach->feature[FEATURE_ALIGNLOOP16] = 0;
537 else if (XBIT(179, 0x40))
538 mach->feature[FEATURE_ALIGNLOOP16] = 1;
539 if (XBIT(178, 0x80))
540 mach->feature[FEATURE_ALIGNLOOP32] = 0;
541 else if (XBIT(179, 0x80))
542 mach->feature[FEATURE_ALIGNLOOP32] = 1;
543 if (XBIT(178, 0x100))
544 mach->feature[FEATURE_LD_VMOVUPD] = 0;
545 else if (XBIT(179, 0x100))
546 mach->feature[FEATURE_LD_VMOVUPD] = 1;
547 if (XBIT(178, 0x200))
548 mach->feature[FEATURE_ST_VMOVUPD] = 0;
549 else if (XBIT(179, 0x200))
550 mach->feature[FEATURE_ST_VMOVUPD] = 1;
551 if (XBIT(178, 0x400))
552 mach->feature[FEATURE_AVX2] = 0;
553 else if (XBIT(179, 0x400))
554 mach->feature[FEATURE_AVX2] = 1;
555 if (XBIT(178, 0x800))
556 mach->feature[FEATURE_AVX512F] = 0;
557 else if (XBIT(179, 0x800))
558 mach->feature[FEATURE_AVX512F] = 1;
559 if (XBIT(178, 0x2000))
560 mach->feature[FEATURE_AVX512VL] = 0;
561 else if (XBIT(179, 0x2000))
562 mach->feature[FEATURE_AVX512VL] = 1;
563
564 } /* set_mach */
565
566 /* take intersection of all mach-> features */
567 static X86TYPE mach_intersect;
568 void
init_mach_intersect()569 init_mach_intersect()
570 {
571 int i;
572 mach_intersect.tpval = 0;
573 /* take minimum of all cache sizes */
574 mach_intersect.cachesize = 0;
575 for (i = 0; i < MACH_NUMBER; ++i)
576 mach_intersect.type[i] = 1;
577 for (i = 0; i < FEATURE_NUMBER; ++i)
578 mach_intersect.feature[i] = 1;
579 } /* init_machintersect */
580
581 void
intersect_mach_intersect(X86TYPE * mach)582 intersect_mach_intersect(X86TYPE *mach)
583 {
584 int i;
585 if (mach_intersect.cachesize == 0 ||
586 (mach->cachesize && mach->cachesize > mach_intersect.cachesize))
587 mach_intersect.cachesize = mach->cachesize;
588 for (i = 0; i < MACH_NUMBER; ++i) {
589 if (!mach->type[i])
590 mach_intersect.type[i] = 0;
591 }
592 for (i = 0; i < FEATURE_NUMBER; ++i) {
593 if (!mach->feature[i])
594 mach_intersect.feature[i] = 0;
595 }
596 } /* intersect_mach_intersect */
597
598 void
copy_mach_intersect(X86TYPE * mach)599 copy_mach_intersect(X86TYPE *mach)
600 {
601 int i;
602 mach->cachesize = mach_intersect.cachesize;
603 for (i = 0; i < MACH_NUMBER; ++i)
604 mach->type[i] = mach_intersect.type[i];
605 for (i = 0; i < FEATURE_NUMBER; ++i)
606 mach->feature[i] = mach_intersect.feature[i];
607 } /* copy_mach_intersect */
608
609 int
machvalue(char * thistpname)610 machvalue(char *thistpname)
611 {
612 #ifdef TARGET_X8664
613 if (strcmp(thistpname, "amd64") == 0)
614 return TP_K8;
615 if (strcmp(thistpname, "amd64e") == 0)
616 return TP_K8E;
617 #endif
618 if (strcmp(thistpname, "athlon") == 0)
619 return TP_K8;
620 if (strcmp(thistpname, "bulldozer") == 0)
621 return TP_BULLDOZER;
622 if (strncmp(thistpname, "core2", 5) == 0)
623 return TP_CORE2;
624 if (strncmp(thistpname, "gh", 2) == 0)
625 return TP_GH;
626 if (strncmp(thistpname, "hammer", 6) == 0)
627 return TP_K8;
628 if (strncmp(thistpname, "haswell", 9) == 0)
629 return TP_HASWELL;
630 if (strncmp(thistpname, "istanbul", 8) == 0)
631 return TP_ISTANBUL;
632 if (strncmp(thistpname, "ivybridge", 9) == 0)
633 return TP_IVYBRIDGE;
634 if (strcmp(thistpname, "k8") == 0)
635 return TP_K8;
636 if (strncmp(thistpname, "k8", 2) == 0 &&
637 thistpname[strlen(thistpname) - 1] == 'e')
638 return TP_K8E;
639 if (strncmp(thistpname, "k8", 2) == 0)
640 return TP_K8;
641 if (strncmp(thistpname, "knl", 3) == 0)
642 return TP_KNIGHTS_LANDING;
643 if (strncmp(thistpname, "nehalem", 7) == 0)
644 return TP_NEHALEM;
645 if (strncmp(thistpname, "p7", 2) == 0)
646 return TP_P7;
647 if (strncmp(thistpname, "penryn", 6) == 0)
648 return TP_PENRYN;
649 if (strcmp(thistpname, "piledriver") == 0)
650 return TP_PILEDRIVER;
651 if (strncmp(thistpname, "px", 2) == 0)
652 return TP_PX;
653 if (strncmp(thistpname, "py", 2) == 0)
654 return TP_PY;
655 if (strncmp(thistpname, "sandybridge", 11) == 0)
656 return TP_SANDYBRIDGE;
657 if (strncmp(thistpname, "shanghai", 8) == 0)
658 return TP_SHANGHAI;
659 if (strncmp(thistpname, "skylake", 7) == 0)
660 return TP_SKYLAKE;
661 if (strncmp(thistpname, "zen", 3) == 0)
662 return TP_ZEN;
663 return 0;
664 } /* machvalue */
665
666 void
set_tp(char * thistpname)667 set_tp(char *thistpname)
668 {
669 if (flg.tpcount <= TPNVERSION) {
670 int n, i, j;
671 n = machvalue(thistpname);
672 if (n <= 0) {
673 interr("Unexpected value for -tp switch", 0, ERR_Fatal);
674 } else {
675 if (flg.tpcount == 0) {
676 flg.tpvalue[flg.tpcount] = n;
677 ++flg.tpcount;
678 } else {
679 if (n < flg.tpvalue[0])
680 flg.tpvalue[0] = n;
681 }
682 }
683 }
684 } /* set_tp */
685
686 void
check_tp(bool skip)687 check_tp(bool skip)
688 {
689 }
690
691 #if DEBUG
692 char *
sxtp(int tp)693 sxtp(int tp)
694 {
695 switch (tp) {
696 case TP_PY:
697 return "py";
698 case TP_PX:
699 return "px";
700 case TP_P5:
701 return "p5";
702 case TP_ATHLON:
703 return "athlon";
704 case TP_P6:
705 return "p6";
706 case TP_ATHLON_XP:
707 return "athlon_xp";
708 case TP_PIII:
709 return "piii";
710 case TP_K8:
711 return "k8";
712 case TP_P7:
713 return "p7";
714 case TP_K8E:
715 return "k8e";
716 case TP_PIV:
717 return "piv";
718 case TP_GH:
719 return "gh";
720 case TP_CORE2:
721 return "core2";
722 case TP_PENRYN:
723 return "penryn";
724 case TP_SHANGHAI:
725 return "shanghai";
726 case TP_ISTANBUL:
727 return "istanbul";
728 case TP_NEHALEM:
729 return "nehalem";
730 case TP_BULLDOZER:
731 return "bulldozer";
732 case TP_SANDYBRIDGE:
733 return "sandybridge";
734 case TP_IVYBRIDGE:
735 return "ivybridge";
736 case TP_HASWELL:
737 return "haswell";
738 case TP_LARRABEE:
739 return "larrabee";
740 case TP_PILEDRIVER:
741 return "piledriver";
742 case TP_KNIGHTS_LANDING:
743 return "knl";
744 case TP_SKYLAKE:
745 return "skylake";
746 case TP_ZEN:
747 return "zen";
748 default:
749 return "??";
750 }
751 } /* sxtp */
752
753 char *
sxtype(int m)754 sxtype(int m)
755 {
756 switch (m) {
757 case MACH_GENERIC:
758 return "mach_generic";
759 case MACH_INTEL:
760 return "mach_intel";
761 case MACH_INTEL_PENTIUM4:
762 return "mach_pentium4";
763 case MACH_INTEL_CORE2:
764 return "mach_core2";
765 case MACH_INTEL_PENRYN:
766 return "mach_penryn";
767 case MACH_INTEL_NEHALEM:
768 return "mach_nehalem";
769 case MACH_INTEL_SANDYBRIDGE:
770 return "mach_sandybridge";
771 case MACH_INTEL_HASWELL:
772 return "mach_haswell";
773 case MACH_INTEL_KNIGHTS_LANDING:
774 return "mach_knl";
775 case MACH_INTEL_SKYLAKE:
776 return "mach_skylake";
777 case MACH_INTEL_LARRABEE:
778 return "mach_larrabee";
779 case MACH_AMD:
780 return "mach_amd";
781 case MACH_AMD_ATHLON:
782 return "mach_athlon";
783 case MACH_AMD_ATHLON_XP:
784 return "mach_athlon_xp";
785 case MACH_AMD_HAMMER:
786 return "mach_hammer";
787 case MACH_AMD_GH:
788 return "mach_gh";
789 case MACH_AMD_SHANGHAI:
790 return "mach_shanghai";
791 case MACH_AMD_ISTANBUL:
792 return "mach_istanbul";
793 case MACH_AMD_BULLDOZER:
794 return "mach_bulldozer";
795 case MACH_AMD_PILEDRIVER:
796 return "mach_piledriver";
797 case MACH_AMD_ZEN:
798 return "mach_zen";
799 default:
800 return "??";
801 }
802 } /* sxtype */
803
804 char *
sxfeature(int f)805 sxfeature(int f)
806 {
807 switch (f) {
808 case FEATURE_SCALAR_SSE:
809 return "feature_scalar_sse";
810 case FEATURE_SSE:
811 return "feature_sse";
812 case FEATURE_SSE2:
813 return "feature_sse2";
814 case FEATURE_SSE3:
815 return "feature_sse3";
816 case FEATURE_SSE41:
817 return "feature_sse41";
818 case FEATURE_SSE42:
819 return "feature_sse42";
820 case FEATURE_SSE4A:
821 return "feature_sse4a";
822 case FEATURE_SSE5:
823 return "feature_sse5";
824 case FEATURE_MNI:
825 return "feature_mni";
826 case FEATURE_DAZ:
827 return "feature_daz";
828 case FEATURE_PREFER_MOVLPD:
829 return "feature_prever_movlpd";
830 case FEATURE_USE_INC:
831 return "feature_use_inc";
832 case FEATURE_USE_MOVAPD:
833 return "feature_use_movapd";
834 case FEATURE_MERGE_DEPENDENT:
835 return "feature_merge_dependent";
836 case FEATURE_SCALAR_NONTEMP:
837 return "feature_scalar_nontemp";
838 case FEATURE_SSEIMAX:
839 return "feature_sseimax";
840 case FEATURE_MISALIGNEDSSE:
841 return "feature_misalignedsse";
842 case FEATURE_LD_MOVUPD:
843 return "feature_ld_movupd";
844 case FEATURE_ST_MOVUPD:
845 return "feature_st_movupd";
846 case FEATURE_UNROLL_16:
847 return "feature_unroll_16";
848 case FEATURE_DOUBLE_UNROLL:
849 return "feature_double_unroll";
850 case FEATURE_PEEL_SHUFFLE:
851 return "feature_peel_shuffle";
852 case FEATURE_PREFETCHNTA:
853 return "feature_prefetchnta";
854 case FEATURE_PDSHUF:
855 return "feature_pdshuf";
856 case FEATURE_SSEPMAX:
857 return "feature_ssepmax";
858 case FEATURE_GHLIBS:
859 return "feature_ghlibs";
860 case FEATURE_SSEMISALN:
861 return "feature_ssemisaln";
862 case FEATURE_ABM:
863 return "feature_abm";
864 case FEATURE_AVX:
865 return "feature_avx";
866 case FEATURE_LRBNI:
867 return "feature_lrbni";
868 case FEATURE_FMA4:
869 return "feature_fma4";
870 case FEATURE_XOP:
871 return "feature_xop";
872 case FEATURE_FMA3:
873 return "feature_fma3";
874 case FEATURE_MULTI_ACCUM:
875 return "feature_multi_accum";
876 case FEATURE_SIMD128:
877 return "feature_simd128";
878 case FEATURE_NOPREFETCH:
879 return "feature_noprefetch";
880 case FEATURE_ALIGNLOOP4:
881 return "feature_alignloop4";
882 case FEATURE_ALIGNLOOP8:
883 return "feature_alignloop8";
884 case FEATURE_ALIGNLOOP16:
885 return "feature_alignloop16";
886 case FEATURE_ALIGNLOOP32:
887 return "feature_alignloop32";
888 case FEATURE_ALIGNJMP8:
889 return "feature_alignjmp8";
890 case FEATURE_ALIGNJMP16:
891 return "feature_alignjmp16";
892 case FEATURE_LD_VMOVUPD:
893 return "feature_ld_vmovupd";
894 case FEATURE_ST_VMOVUPD:
895 return "feature_st_vmovupd";
896 case FEATURE_AVX2:
897 return "feature_avx2";
898 case FEATURE_AVX512F:
899 return "feature_avx512f";
900 case FEATURE_AVX512VL:
901 return "feature_avx512vl";
902 default:
903 return "??";
904 }
905 } /* sxfeature */
906
907 void
_dumpmach(X86TYPE * mach)908 _dumpmach(X86TYPE *mach)
909 {
910 FILE *dfile;
911 int m, f;
912 dfile = gbl.dbgfil ? gbl.dbgfil : stderr;
913 fprintf(dfile, "%d=tpval=%s\n", mach->tpval, sxtp(mach->tpval));
914 for (m = 0; m < MACH_NUMBER; ++m) {
915 if (mach->type[m]) {
916 fprintf(dfile, "%d=type[%2d]=%s\n", mach->type[m], m, sxtype(m));
917 }
918 }
919
920 for (f = 0; f < FEATURE_NUMBER; ++f) {
921 if (mach->feature[f]) {
922 fprintf(dfile, "%d=feature[%2d]=%s\n", mach->feature[f], f, sxfeature(f));
923 }
924 }
925
926 fprintf(dfile, "%ld=cachesize\n", mach->cachesize);
927 } /* _dumpmach */
928
929 void
dumpmach()930 dumpmach()
931 {
932 _dumpmach(&mach);
933 } /* dumpmach */
934 #endif
935