1 /*
2  * Copyright (c) 2006-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19    \file
20    \brief Structures to describe the x86 CPU type and CPU features
21  */
22 
23 #include "gbldefs.h"
24 #include "global.h"
25 #include "error.h"
26 #include "x86.h"
27 
28 X86TYPE mach;
29 
30 #ifdef TARGET_WIN
31 #define DONT_GENERATE_AVX512  true    /* a temporary restriction */
32 #else
33 #define DONT_GENERATE_AVX512  false
34 #endif
35 
36 void
set_mach(X86TYPE * mach,int machtype)37 set_mach(X86TYPE *mach, int machtype)
38 {
39   int has_fma3 = 0;
40   int has_fma4 = 0;
41   memset(&mach->type, 0, sizeof(mach->type));
42   memset(&mach->feature, 0, sizeof(mach->feature));
43   mach->tpval = machtype;
44 /*
45  *  -tp may not be appropriate for ARM; so machtype may refer to
46  *  ARM attriubutes rather than a TP_ value.
47  */
48 #if !defined(TARGET_LLVM_ARM)  /* { */
49   mach->cachesize = flg.x[32]; /* this applies to all 'machtype's */
50 
51   switch (machtype) {
52   case TP_ZEN:
53     /* AMD Zen microarchitecture, e.g. EPYC and Ryzen processors.
54      */
55     mach->type[MACH_AMD_ZEN] = 1;
56     mach->feature[FEATURE_AVX2] = 1;
57     /* ...and fall through... */
58 
59   case TP_PILEDRIVER:
60     /* AMD piledriver
61      */
62     mach->type[MACH_AMD_PILEDRIVER] = 1;
63     mach->feature[FEATURE_FMA3] = 1;
64     has_fma3 = 1;
65     mach->feature[FEATURE_LD_VMOVUPD] = 1;    /* added on 14 Dec 2015 */
66     mach->feature[FEATURE_ST_VMOVUPD] = 1;    /*   "    "    "    "   */
67     mach->feature[FEATURE_ST_MOVUPD] = 1;     /*   "    "    "    "   */
68     /* ...and fall through... */
69 
70   case TP_BULLDOZER:
71     /* AMD bulldozer
72      */
73     mach->type[MACH_AMD_BULLDOZER] = 1;
74     mach->feature[FEATURE_SSE41] = 1;    /* JHM: added on 2 Feb 2017 */
75     mach->feature[FEATURE_SSE42] = 1;    /*  "    "    "    "    "   */
76     mach->feature[FEATURE_AVX] = 1;
77     if (machtype == TP_PILEDRIVER || machtype == TP_BULLDOZER) {
78       mach->feature[FEATURE_SIMD128] = 1;
79       mach->feature[FEATURE_FMA4] = 1;
80       has_fma4 = 1;
81       mach->feature[FEATURE_XOP] = 1;
82     }
83     mach->feature[FEATURE_ALIGNLOOP8] = 1;
84     mach->feature[FEATURE_ALIGNJMP8] = 1;
85     /* ...and fall through... */
86 
87   case TP_ISTANBUL:
88     /* AMD instanbul
89      */
90     mach->type[MACH_AMD_ISTANBUL] = 1;
91     /* ...and fall through... */
92 
93   case TP_SHANGHAI:
94     /* AMD shanghai, like greyhound but with a larger cache.
95      */
96     mach->type[MACH_AMD_SHANGHAI] = 1;
97     mach->feature[FEATURE_MULTI_ACCUM] = 1;
98     if (mach->cachesize == 0)
99       mach->cachesize = (6 * 1024 * 1024);
100     /* ...and fall through... */
101 
102   case TP_GH:
103     /* AMD greyhound
104      */
105     mach->type[MACH_AMD] = 1;
106     mach->type[MACH_AMD_HAMMER] = 1;
107     mach->type[MACH_AMD_GH] = 1;
108     mach->feature[FEATURE_SSE] = 1;
109     mach->feature[FEATURE_SSE2] = 1;
110     mach->feature[FEATURE_SSE3] = 1;
111     mach->feature[FEATURE_MISALIGNEDSSE] = 1;
112     mach->feature[FEATURE_LD_MOVUPD] = 1;
113     mach->feature[FEATURE_UNROLL_16] = 1;
114     mach->feature[FEATURE_DOUBLE_UNROLL] = 1;
115     mach->feature[FEATURE_PEEL_SHUFFLE] = 1;
116     mach->feature[FEATURE_PREFETCHNTA] = 1;
117     mach->feature[FEATURE_PDSHUF] = 1;
118     mach->feature[FEATURE_GHLIBS] = 1;
119     mach->feature[FEATURE_SSEMISALN] = 1;
120     mach->feature[FEATURE_DAZ] = 0;        /* cf. 1 for Intel */
121     mach->feature[FEATURE_PREFER_MOVLPD] = 0;
122     mach->feature[FEATURE_USE_INC] = 1;    /* cf. 0 for Intel */
123     mach->feature[FEATURE_USE_MOVAPD] = 1;
124     mach->feature[FEATURE_MERGE_DEPENDENT] = 1;
125     mach->feature[FEATURE_SCALAR_NONTEMP] = 1;
126     mach->feature[FEATURE_SSE4A] = 1;
127     mach->feature[FEATURE_SSEIMAX] = 1;
128     mach->feature[FEATURE_ABM] = 1;
129     if (machtype == TP_ISTANBUL ||
130         machtype == TP_SHANGHAI ||
131         machtype == TP_GH)
132     {
133       if (XBIT(135, 0x400000))
134         mach->feature[FEATURE_ALIGNLOOP32] = 1;
135       else
136         mach->feature[FEATURE_ALIGNLOOP16] = 1;
137       mach->feature[FEATURE_ALIGNJMP16] = 1;
138     }
139     if (mach->cachesize == 0)
140       mach->cachesize = (2 * 1024 * 1024);
141     break;
142 
143   case TP_K8E:
144     /* AMD hammer
145      */
146     mach->feature[FEATURE_SSE3] = 1;
147     /* ...and fall through... */
148 
149   case TP_K8:
150     /* AMD hammer
151      */
152     mach->type[MACH_AMD] = 1;
153     mach->type[MACH_AMD_HAMMER] = 1;
154     mach->feature[FEATURE_SSE] = 1;
155     mach->feature[FEATURE_SSE2] = 1;
156     mach->feature[FEATURE_DAZ] = 0;    /* cf. 1 for Intel */
157     mach->feature[FEATURE_PREFER_MOVLPD] = 1;
158     mach->feature[FEATURE_USE_INC] = 1;
159     mach->feature[FEATURE_ALIGNLOOP16] = 1;
160     mach->feature[FEATURE_ALIGNJMP16] = 1;
161     if (mach->cachesize == 0)
162       mach->cachesize = (1024 * 1024);
163     break;
164 
165   case TP_LARRABEE:
166     mach->type[MACH_INTEL] = 1;
167     mach->type[MACH_INTEL_PENTIUM4] = 1;
168     mach->type[MACH_INTEL_LARRABEE] = 1;
169     mach->feature[FEATURE_SSE] = 1;
170     mach->feature[FEATURE_SSE2] = 1;
171     mach->feature[FEATURE_SSE3] = 1;
172     mach->feature[FEATURE_SSE41] = 1;
173     mach->feature[FEATURE_SSE42] = 1;
174     mach->feature[FEATURE_USE_INC] = 0;
175     mach->feature[FEATURE_LD_MOVUPD] = 1;
176     mach->feature[FEATURE_USE_MOVAPD] = 1;
177     mach->feature[FEATURE_MNI] = 1;
178     mach->feature[FEATURE_DAZ] = 1;
179     mach->feature[FEATURE_SSEIMAX] = 1;
180     mach->feature[FEATURE_SSEPMAX] = 1;
181     mach->feature[FEATURE_LRBNI] = 1;
182     mach->feature[FEATURE_NOPREFETCH] = 1;
183     mach->feature[FEATURE_ALIGNLOOP8] = 1;
184     mach->feature[FEATURE_ALIGNJMP8] = 1;
185     if (mach->cachesize == 0)
186       mach->cachesize = 262144;
187     break;
188 
189   case TP_SKYLAKE:
190     if (! DONT_GENERATE_AVX512) {
191       mach->type[MACH_INTEL_SKYLAKE] = 1;
192       mach->feature[FEATURE_AVX512VL] = 1;
193     }
194     /* ...and fall through... */
195 
196   case TP_KNIGHTS_LANDING:
197     if (! DONT_GENERATE_AVX512) {
198       if (machtype == TP_KNIGHTS_LANDING) {
199         mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1;
200       }
201       mach->feature[FEATURE_AVX512F] = 1;
202     }
203     /* ...and fall through... */
204 
205   case TP_HASWELL:
206     mach->type[MACH_INTEL_HASWELL] = 1;
207     mach->feature[FEATURE_AVX2] = 1;
208     mach->feature[FEATURE_FMA3] = 1;
209     has_fma3 = 1;
210     mach->feature[FEATURE_LD_VMOVUPD] = 1;
211     mach->feature[FEATURE_ST_VMOVUPD] = 1;
212     /* ...and fall through... */
213 
214   case TP_IVYBRIDGE:
215   case TP_SANDYBRIDGE:
216     mach->type[MACH_INTEL_SANDYBRIDGE] = 1;
217     mach->feature[FEATURE_AVX] = 1;
218     mach->feature[FEATURE_ST_MOVUPD] = 1;
219     mach->feature[FEATURE_MULTI_ACCUM] = 1;
220     /* ...and fall through... */
221 
222   case TP_NEHALEM:
223     mach->type[MACH_INTEL_NEHALEM] = 1;
224     mach->feature[FEATURE_SSE42] = 1;
225     mach->feature[FEATURE_LD_MOVUPD] = 1;
226     mach->feature[FEATURE_SSEIMAX] = 1;
227     mach->feature[FEATURE_SSEPMAX] = 1;
228     if (mach->cachesize == 0)
229       mach->cachesize = (8 * 1024 * 1024);
230     /* ...and fall through... */
231 
232   case TP_PENRYN:
233     mach->type[MACH_INTEL_PENRYN] = 1;
234     mach->feature[FEATURE_SSE41] = 1;
235     if (mach->cachesize == 0)
236       mach->cachesize = (6 * 1024 * 1024);
237     /* ...and fall through... */
238 
239   case TP_CORE2:
240     mach->type[MACH_INTEL_CORE2] = 1;
241     mach->feature[FEATURE_SSE3] = 1;
242     mach->feature[FEATURE_MNI] = 1;
243     if (mach->cachesize == 0)
244       mach->cachesize = (4 * 1024 * 1024);
245     /* ...and fall through... */
246 
247   case TP_P7:
248     /* Intel P7 Pentium IV
249      */
250     mach->type[MACH_INTEL] = 1;
251     mach->type[MACH_INTEL_PENTIUM4] = 1;
252     mach->feature[FEATURE_SSE] = 1;
253     mach->feature[FEATURE_SSE2] = 1;
254     mach->feature[FEATURE_USE_INC] = 0;    /* cf. 1 for AMD */
255     mach->feature[FEATURE_USE_MOVAPD] = 1;
256     mach->feature[FEATURE_DAZ] = 1;        /* cf. 0 for AMD */
257     mach->feature[FEATURE_ALIGNLOOP8] = 1;
258     mach->feature[FEATURE_ALIGNJMP8] = 1;
259     if (XBIT(80, 0x4000000)) {
260       mach->feature[FEATURE_SSE3] = 1;
261     }
262     if (machtype == TP_PENRYN || machtype == TP_CORE2 || machtype == TP_P7) {
263       mach->feature[FEATURE_NOPREFETCH] = 1;
264     }
265     if (mach->cachesize == 0)
266       mach->cachesize = (1024 * 1024);
267     break;
268 
269 #ifdef TARGET_X8664
270   case TP_PY:
271   case TP_PX:
272     /* we know all x86-64 have at least SSE and SSE2 */
273     /* more or less the same as p7 without sse3, without -Mdaz */
274     mach->type[MACH_INTEL] = 1;
275     mach->type[MACH_INTEL_PENTIUM4] = 1;
276     mach->feature[FEATURE_SSE] = 1;
277     mach->feature[FEATURE_SSE2] = 1;
278     mach->feature[FEATURE_USE_INC] = 0;
279     mach->feature[FEATURE_USE_MOVAPD] = 1;
280     mach->feature[FEATURE_NOPREFETCH] = 1;
281     mach->feature[FEATURE_ALIGNLOOP8] = 1;
282     mach->feature[FEATURE_ALIGNJMP8] = 1;
283     if (mach->cachesize == 0)
284       mach->cachesize = (1024 * 1024);
285     break;
286 #else
287   case TP_PY: /* for 32-bit, treat like generic */
288   case TP_PX: /*   "     "     "     "     "    */
289 #endif
290   default:
291     mach->type[MACH_GENERIC] = 1;
292     mach->feature[FEATURE_SSE] = 1;
293     mach->feature[FEATURE_SSE2] = 1;
294     mach->feature[FEATURE_USE_INC] = 0;
295     mach->feature[FEATURE_NOPREFETCH] = 1;
296     mach->feature[FEATURE_ALIGNLOOP4] = 1;
297     mach->feature[FEATURE_ALIGNJMP8] = 1;
298     if (XBIT(129, 4))
299       mach->feature[FEATURE_DAZ] = 1;
300     mach->tpval = TP_PX;
301     if (mach->cachesize == 0)
302       mach->cachesize = 262144;
303     break;
304   }    /* end switch (machtype) */
305 #endif /* !defined(TARGET_LLVM_ARM) } */
306 
307 #if defined(TARGET_LLVM_ARM)
308   mach->feature[FEATURE_SCALAR_NEON] = 1;
309   mach->feature[FEATURE_NEON] = 1;
310   mach->feature[FEATURE_FMA] = 1;
311   has_fma3 = 1;
312 #elif defined(TARGET_LLVM_POWER)
313   mach->feature[FEATURE_SCALAR_VSX] = 1;
314   mach->feature[FEATURE_VSX] = 1;
315   mach->feature[FEATURE_FMA] = 1;
316   has_fma3 = 1;
317 #elif defined(X86_64)
318   /* new cg or 64-bit cg */
319   mach->feature[FEATURE_SCALAR_SSE] = 1;
320 #endif
321 
322   /* override machine-specific settings of DAZ */
323   if (XBIT(129, 4))
324     mach->feature[FEATURE_DAZ] = 1;
325   else if (XBIT(129, 0x400))
326     mach->feature[FEATURE_DAZ] = 0;
327   if (XBIT(135, 0x20))
328     mach->feature[FEATURE_USE_MOVAPD] = 1;
329 
330   /* -Mnoprefetch */
331   if (XBIT(39, 1))
332     mach->feature[FEATURE_NOPREFETCH] = 1;
333 
334   /* -Mvect=simd:128 */
335   if (XBIT(56, 0x40))
336     mach->feature[FEATURE_SIMD128] = 1;
337   /* -Mvect=simd:256 or -Mvect=simd:512 */
338   if (XBIT(56, 0x100) || XBIT(56, 0x800))
339     mach->feature[FEATURE_SIMD128] = 0;
340 
341   /* align 16 before loops */
342   if (XBIT(135, 0x2000) && !XBIT(135, 0x4000)) {
343     mach->feature[FEATURE_ALIGNLOOP4] = 0;
344     mach->feature[FEATURE_ALIGNLOOP8] = 0;
345     mach->feature[FEATURE_ALIGNLOOP16] = 1;
346     mach->feature[FEATURE_ALIGNLOOP32] = 0;
347   }
348   /* align 8 before loops */
349   if (XBIT(135, 0x2000) && XBIT(135, 0x4000)) {
350     mach->feature[FEATURE_ALIGNLOOP4] = 0;
351     mach->feature[FEATURE_ALIGNLOOP8] = 1;
352     mach->feature[FEATURE_ALIGNLOOP16] = 0;
353     mach->feature[FEATURE_ALIGNLOOP32] = 0;
354   }
355   /* no align before loops */
356   if (XBIT(135, 0x8000) || (XBIT(135, 0x4000) && !XBIT(135, 0x2000))) {
357     mach->feature[FEATURE_ALIGNLOOP4] = 0;
358     mach->feature[FEATURE_ALIGNLOOP8] = 0;
359     mach->feature[FEATURE_ALIGNLOOP16] = 0;
360     mach->feature[FEATURE_ALIGNLOOP32] = 0;
361   }
362   /* align 16 after jmp */
363   if (XBIT(135, 0x10000) && XBIT(135, 0x8000)) {
364     mach->feature[FEATURE_ALIGNJMP8] = 0;
365     mach->feature[FEATURE_ALIGNJMP16] = 1;
366   }
367   /* align 8 after jmp */
368   if (!XBIT(135, 0x10000) && XBIT(135, 0x8000)) {
369     mach->feature[FEATURE_ALIGNJMP8] = 1;
370     mach->feature[FEATURE_ALIGNJMP16] = 0;
371   }
372   /* no align after jmp */
373   if (XBIT(135, 0x10000) && !XBIT(135, 0x8000)) {
374     mach->feature[FEATURE_ALIGNJMP8] = 0;
375     mach->feature[FEATURE_ALIGNJMP16] = 0;
376   }
377 
378   /* override feature settings */
379   if (XBIT(171, 1))
380     mach->feature[FEATURE_SCALAR_SSE] = 0;
381   else if (XBIT(172, 1))
382     mach->feature[FEATURE_SCALAR_SSE] = 1;
383   if (XBIT(171, 2))
384     mach->feature[FEATURE_SSE] = 0;
385   else if (XBIT(172, 2))
386     mach->feature[FEATURE_SSE] = 1;
387   if (XBIT(171, 4))
388     mach->feature[FEATURE_SSE2] = 0;
389   else if (XBIT(172, 4))
390     mach->feature[FEATURE_SSE2] = 1;
391   if (XBIT(171, 8))
392     mach->feature[FEATURE_SSE3] = 0;
393   else if (XBIT(172, 8))
394     mach->feature[FEATURE_SSE3] = 1;
395   if (XBIT(171, 0x10))
396     mach->feature[FEATURE_SSE41] = 0;
397   else if (XBIT(172, 0x10))
398     mach->feature[FEATURE_SSE41] = 1;
399   if (XBIT(171, 0x20))
400     mach->feature[FEATURE_SSE42] = 0;
401   else if (XBIT(172, 0x20))
402     mach->feature[FEATURE_SSE42] = 1;
403   if (XBIT(171, 0x40))
404     mach->feature[FEATURE_SSE4A] = 0;
405   else if (XBIT(172, 0x40))
406     mach->feature[FEATURE_SSE4A] = 1;
407   if (XBIT(171, 0x80))
408     mach->feature[FEATURE_SSE5] = 0;
409   else if (XBIT(172, 0x80))
410     mach->feature[FEATURE_SSE5] = 1;
411   if (XBIT(171, 0x100))
412     mach->feature[FEATURE_MNI] = 0;
413   else if (XBIT(172, 0x100))
414     mach->feature[FEATURE_MNI] = 1;
415   if (XBIT(171, 0x200))
416     mach->feature[FEATURE_DAZ] = 0;
417   else if (XBIT(172, 0x200))
418     mach->feature[FEATURE_DAZ] = 1;
419   if (XBIT(171, 0x400))
420     mach->feature[FEATURE_PREFER_MOVLPD] = 0;
421   else if (XBIT(172, 0x400))
422     mach->feature[FEATURE_PREFER_MOVLPD] = 1;
423   if (XBIT(171, 0x800))
424     mach->feature[FEATURE_USE_INC] = 0;
425   else if (XBIT(172, 0x800))
426     mach->feature[FEATURE_USE_INC] = 1;
427   if (XBIT(171, 0x1000))
428     mach->feature[FEATURE_USE_MOVAPD] = 0;
429   else if (XBIT(172, 0x1000))
430     mach->feature[FEATURE_USE_MOVAPD] = 1;
431   if (XBIT(171, 0x2000))
432     mach->feature[FEATURE_MERGE_DEPENDENT] = 0;
433   else if (XBIT(172, 0x2000))
434     mach->feature[FEATURE_MERGE_DEPENDENT] = 1;
435   if (XBIT(171, 0x4000))
436     mach->feature[FEATURE_SCALAR_NONTEMP] = 0;
437   else if (XBIT(172, 0x4000))
438     mach->feature[FEATURE_SCALAR_NONTEMP] = 1;
439   if (XBIT(171, 0x8000))
440     mach->feature[FEATURE_SSEIMAX] = 0;
441   else if (XBIT(172, 0x8000))
442     mach->feature[FEATURE_SSEIMAX] = 1;
443   if (XBIT(171, 0x10000))
444     mach->feature[FEATURE_MISALIGNEDSSE] = 0;
445   else if (XBIT(172, 0x10000))
446     mach->feature[FEATURE_MISALIGNEDSSE] = 1;
447   if (XBIT(171, 0x20000))
448     mach->feature[FEATURE_LD_MOVUPD] = 0;
449   else if (XBIT(172, 0x20000))
450     mach->feature[FEATURE_LD_MOVUPD] = 1;
451   if (XBIT(171, 0x40000))
452     mach->feature[FEATURE_ST_MOVUPD] = 0;
453   else if (XBIT(172, 0x40000))
454     mach->feature[FEATURE_ST_MOVUPD] = 1;
455   if (XBIT(171, 0x80000))
456     mach->feature[FEATURE_UNROLL_16] = 0;
457   else if (XBIT(172, 0x80000))
458     mach->feature[FEATURE_UNROLL_16] = 1;
459   if (XBIT(171, 0x100000))
460     mach->feature[FEATURE_DOUBLE_UNROLL] = 0;
461   else if (XBIT(172, 0x100000))
462     mach->feature[FEATURE_DOUBLE_UNROLL] = 1;
463   if (XBIT(171, 0x200000))
464     mach->feature[FEATURE_PEEL_SHUFFLE] = 0;
465   else if (XBIT(172, 0x200000))
466     mach->feature[FEATURE_PEEL_SHUFFLE] = 1;
467   if (XBIT(171, 0x400000))
468     mach->feature[FEATURE_PREFETCHNTA] = 0;
469   else if (XBIT(172, 0x400000))
470     mach->feature[FEATURE_PREFETCHNTA] = 1;
471   if (XBIT(171, 0x800000))
472     mach->feature[FEATURE_PDSHUF] = 0;
473   else if (XBIT(172, 0x800000))
474     mach->feature[FEATURE_PDSHUF] = 1;
475   if (XBIT(171, 0x1000000))
476     mach->feature[FEATURE_SSEPMAX] = 0;
477   else if (XBIT(172, 0x1000000))
478     mach->feature[FEATURE_SSEPMAX] = 1;
479   if (XBIT(171, 0x2000000))
480     mach->feature[FEATURE_GHLIBS] = 0;
481   else if (XBIT(172, 0x2000000))
482     mach->feature[FEATURE_GHLIBS] = 1;
483   if (XBIT(171, 0x4000000))
484     mach->feature[FEATURE_SSEMISALN] = 0;
485   else if (XBIT(172, 0x4000000))
486     mach->feature[FEATURE_SSEMISALN] = 1;
487   if (XBIT(171, 0x8000000))
488     mach->feature[FEATURE_ABM] = 0;
489   else if (XBIT(172, 0x8000000))
490     mach->feature[FEATURE_ABM] = 1;
491   if (XBIT(171, 0x10000000))
492     mach->feature[FEATURE_AVX] = 0;
493   else if (XBIT(172, 0x10000000))
494     mach->feature[FEATURE_AVX] = 1;
495   if (XBIT(171, 0x20000000))
496     mach->feature[FEATURE_LRBNI] = 0;
497   else if (XBIT(172, 0x20000000))
498     mach->feature[FEATURE_LRBNI] = 1;
499   if (has_fma4) {
500     if (XBIT(171, 0x40000000))
501       mach->feature[FEATURE_FMA4] = 0;
502     else if (XBIT(172, 0x40000000))
503       mach->feature[FEATURE_FMA4] = 1;
504   }
505   if (XBIT(171, 0x80000000))
506     mach->feature[FEATURE_XOP] = 0;
507   else if (XBIT(172, 0x80000000))
508     mach->feature[FEATURE_XOP] = 1;
509   if (has_fma3) {
510     if (XBIT(178, 0x01))
511       mach->feature[FEATURE_FMA3] = 0;
512     else if (XBIT(179, 0x01))
513       mach->feature[FEATURE_FMA3] = 1;
514   }
515   if (XBIT(178, 0x02))
516     mach->feature[FEATURE_MULTI_ACCUM] = 0;
517   else if (XBIT(179, 0x02))
518     mach->feature[FEATURE_MULTI_ACCUM] = 1;
519   if (XBIT(178, 0x04))
520     mach->feature[FEATURE_SIMD128] = 0;
521   else if (XBIT(179, 0x04))
522     mach->feature[FEATURE_SIMD128] = 1;
523   if (XBIT(178, 0x08))
524     mach->feature[FEATURE_NOPREFETCH] = 0;
525   else if (XBIT(179, 0x08))
526     mach->feature[FEATURE_NOPREFETCH] = 1;
527   if (XBIT(178, 0x10))
528     mach->feature[FEATURE_ALIGNLOOP4] = 0;
529   else if (XBIT(179, 0x10))
530     mach->feature[FEATURE_ALIGNLOOP4] = 1;
531   if (XBIT(178, 0x20))
532     mach->feature[FEATURE_ALIGNLOOP8] = 0;
533   else if (XBIT(179, 0x20))
534     mach->feature[FEATURE_ALIGNLOOP8] = 1;
535   if (XBIT(178, 0x40))
536     mach->feature[FEATURE_ALIGNLOOP16] = 0;
537   else if (XBIT(179, 0x40))
538     mach->feature[FEATURE_ALIGNLOOP16] = 1;
539   if (XBIT(178, 0x80))
540     mach->feature[FEATURE_ALIGNLOOP32] = 0;
541   else if (XBIT(179, 0x80))
542     mach->feature[FEATURE_ALIGNLOOP32] = 1;
543   if (XBIT(178, 0x100))
544     mach->feature[FEATURE_LD_VMOVUPD] = 0;
545   else if (XBIT(179, 0x100))
546     mach->feature[FEATURE_LD_VMOVUPD] = 1;
547   if (XBIT(178, 0x200))
548     mach->feature[FEATURE_ST_VMOVUPD] = 0;
549   else if (XBIT(179, 0x200))
550     mach->feature[FEATURE_ST_VMOVUPD] = 1;
551   if (XBIT(178, 0x400))
552     mach->feature[FEATURE_AVX2] = 0;
553   else if (XBIT(179, 0x400))
554     mach->feature[FEATURE_AVX2] = 1;
555   if (XBIT(178, 0x800))
556     mach->feature[FEATURE_AVX512F] = 0;
557   else if (XBIT(179, 0x800))
558     mach->feature[FEATURE_AVX512F] = 1;
559   if (XBIT(178, 0x2000))
560     mach->feature[FEATURE_AVX512VL] = 0;
561   else if (XBIT(179, 0x2000))
562     mach->feature[FEATURE_AVX512VL] = 1;
563 
564 } /* set_mach */
565 
566 /* take intersection of all mach-> features */
567 static X86TYPE mach_intersect;
568 void
init_mach_intersect()569 init_mach_intersect()
570 {
571   int i;
572   mach_intersect.tpval = 0;
573   /* take minimum of all cache sizes */
574   mach_intersect.cachesize = 0;
575   for (i = 0; i < MACH_NUMBER; ++i)
576     mach_intersect.type[i] = 1;
577   for (i = 0; i < FEATURE_NUMBER; ++i)
578     mach_intersect.feature[i] = 1;
579 } /* init_machintersect */
580 
581 void
intersect_mach_intersect(X86TYPE * mach)582 intersect_mach_intersect(X86TYPE *mach)
583 {
584   int i;
585   if (mach_intersect.cachesize == 0 ||
586       (mach->cachesize && mach->cachesize > mach_intersect.cachesize))
587     mach_intersect.cachesize = mach->cachesize;
588   for (i = 0; i < MACH_NUMBER; ++i) {
589     if (!mach->type[i])
590       mach_intersect.type[i] = 0;
591   }
592   for (i = 0; i < FEATURE_NUMBER; ++i) {
593     if (!mach->feature[i])
594       mach_intersect.feature[i] = 0;
595   }
596 } /* intersect_mach_intersect */
597 
598 void
copy_mach_intersect(X86TYPE * mach)599 copy_mach_intersect(X86TYPE *mach)
600 {
601   int i;
602   mach->cachesize = mach_intersect.cachesize;
603   for (i = 0; i < MACH_NUMBER; ++i)
604     mach->type[i] = mach_intersect.type[i];
605   for (i = 0; i < FEATURE_NUMBER; ++i)
606     mach->feature[i] = mach_intersect.feature[i];
607 } /* copy_mach_intersect */
608 
609 int
machvalue(char * thistpname)610 machvalue(char *thistpname)
611 {
612 #ifdef TARGET_X8664
613   if (strcmp(thistpname, "amd64") == 0)
614     return TP_K8;
615   if (strcmp(thistpname, "amd64e") == 0)
616     return TP_K8E;
617 #endif
618   if (strcmp(thistpname, "athlon") == 0)
619     return TP_K8;
620   if (strcmp(thistpname, "bulldozer") == 0)
621     return TP_BULLDOZER;
622   if (strncmp(thistpname, "core2", 5) == 0)
623     return TP_CORE2;
624   if (strncmp(thistpname, "gh", 2) == 0)
625     return TP_GH;
626   if (strncmp(thistpname, "hammer", 6) == 0)
627     return TP_K8;
628   if (strncmp(thistpname, "haswell", 9) == 0)
629     return TP_HASWELL;
630   if (strncmp(thistpname, "istanbul", 8) == 0)
631     return TP_ISTANBUL;
632   if (strncmp(thistpname, "ivybridge", 9) == 0)
633     return TP_IVYBRIDGE;
634   if (strcmp(thistpname, "k8") == 0)
635     return TP_K8;
636   if (strncmp(thistpname, "k8", 2) == 0 &&
637       thistpname[strlen(thistpname) - 1] == 'e')
638     return TP_K8E;
639   if (strncmp(thistpname, "k8", 2) == 0)
640     return TP_K8;
641   if (strncmp(thistpname, "knl", 3) == 0)
642     return TP_KNIGHTS_LANDING;
643   if (strncmp(thistpname, "nehalem", 7) == 0)
644     return TP_NEHALEM;
645   if (strncmp(thistpname, "p7", 2) == 0)
646     return TP_P7;
647   if (strncmp(thistpname, "penryn", 6) == 0)
648     return TP_PENRYN;
649   if (strcmp(thistpname, "piledriver") == 0)
650     return TP_PILEDRIVER;
651   if (strncmp(thistpname, "px", 2) == 0)
652     return TP_PX;
653   if (strncmp(thistpname, "py", 2) == 0)
654     return TP_PY;
655   if (strncmp(thistpname, "sandybridge", 11) == 0)
656     return TP_SANDYBRIDGE;
657   if (strncmp(thistpname, "shanghai", 8) == 0)
658     return TP_SHANGHAI;
659   if (strncmp(thistpname, "skylake", 7) == 0)
660     return TP_SKYLAKE;
661   if (strncmp(thistpname, "zen", 3) == 0)
662     return TP_ZEN;
663   return 0;
664 } /* machvalue */
665 
666 void
set_tp(char * thistpname)667 set_tp(char *thistpname)
668 {
669   if (flg.tpcount <= TPNVERSION) {
670     int n, i, j;
671     n = machvalue(thistpname);
672     if (n <= 0) {
673       interr("Unexpected value for -tp switch", 0, ERR_Fatal);
674     } else {
675       if (flg.tpcount == 0) {
676         flg.tpvalue[flg.tpcount] = n;
677         ++flg.tpcount;
678       } else {
679       if (n < flg.tpvalue[0])
680         flg.tpvalue[0] = n;
681       }
682     }
683   }
684 } /* set_tp */
685 
686 void
check_tp(bool skip)687 check_tp(bool skip)
688 {
689 }
690 
691 #if DEBUG
692 char *
sxtp(int tp)693 sxtp(int tp)
694 {
695   switch (tp) {
696   case TP_PY:
697     return "py";
698   case TP_PX:
699     return "px";
700   case TP_P5:
701     return "p5";
702   case TP_ATHLON:
703     return "athlon";
704   case TP_P6:
705     return "p6";
706   case TP_ATHLON_XP:
707     return "athlon_xp";
708   case TP_PIII:
709     return "piii";
710   case TP_K8:
711     return "k8";
712   case TP_P7:
713     return "p7";
714   case TP_K8E:
715     return "k8e";
716   case TP_PIV:
717     return "piv";
718   case TP_GH:
719     return "gh";
720   case TP_CORE2:
721     return "core2";
722   case TP_PENRYN:
723     return "penryn";
724   case TP_SHANGHAI:
725     return "shanghai";
726   case TP_ISTANBUL:
727     return "istanbul";
728   case TP_NEHALEM:
729     return "nehalem";
730   case TP_BULLDOZER:
731     return "bulldozer";
732   case TP_SANDYBRIDGE:
733     return "sandybridge";
734   case TP_IVYBRIDGE:
735     return "ivybridge";
736   case TP_HASWELL:
737     return "haswell";
738   case TP_LARRABEE:
739     return "larrabee";
740   case TP_PILEDRIVER:
741     return "piledriver";
742   case TP_KNIGHTS_LANDING:
743     return "knl";
744   case TP_SKYLAKE:
745     return "skylake";
746   case TP_ZEN:
747     return "zen";
748   default:
749     return "??";
750   }
751 } /* sxtp */
752 
753 char *
sxtype(int m)754 sxtype(int m)
755 {
756   switch (m) {
757   case MACH_GENERIC:
758     return "mach_generic";
759   case MACH_INTEL:
760     return "mach_intel";
761   case MACH_INTEL_PENTIUM4:
762     return "mach_pentium4";
763   case MACH_INTEL_CORE2:
764     return "mach_core2";
765   case MACH_INTEL_PENRYN:
766     return "mach_penryn";
767   case MACH_INTEL_NEHALEM:
768     return "mach_nehalem";
769   case MACH_INTEL_SANDYBRIDGE:
770     return "mach_sandybridge";
771   case MACH_INTEL_HASWELL:
772     return "mach_haswell";
773   case MACH_INTEL_KNIGHTS_LANDING:
774     return "mach_knl";
775   case MACH_INTEL_SKYLAKE:
776     return "mach_skylake";
777   case MACH_INTEL_LARRABEE:
778     return "mach_larrabee";
779   case MACH_AMD:
780     return "mach_amd";
781   case MACH_AMD_ATHLON:
782     return "mach_athlon";
783   case MACH_AMD_ATHLON_XP:
784     return "mach_athlon_xp";
785   case MACH_AMD_HAMMER:
786     return "mach_hammer";
787   case MACH_AMD_GH:
788     return "mach_gh";
789   case MACH_AMD_SHANGHAI:
790     return "mach_shanghai";
791   case MACH_AMD_ISTANBUL:
792     return "mach_istanbul";
793   case MACH_AMD_BULLDOZER:
794     return "mach_bulldozer";
795   case MACH_AMD_PILEDRIVER:
796     return "mach_piledriver";
797   case MACH_AMD_ZEN:
798     return "mach_zen";
799   default:
800     return "??";
801   }
802 } /* sxtype */
803 
804 char *
sxfeature(int f)805 sxfeature(int f)
806 {
807   switch (f) {
808   case FEATURE_SCALAR_SSE:
809     return "feature_scalar_sse";
810   case FEATURE_SSE:
811     return "feature_sse";
812   case FEATURE_SSE2:
813     return "feature_sse2";
814   case FEATURE_SSE3:
815     return "feature_sse3";
816   case FEATURE_SSE41:
817     return "feature_sse41";
818   case FEATURE_SSE42:
819     return "feature_sse42";
820   case FEATURE_SSE4A:
821     return "feature_sse4a";
822   case FEATURE_SSE5:
823     return "feature_sse5";
824   case FEATURE_MNI:
825     return "feature_mni";
826   case FEATURE_DAZ:
827     return "feature_daz";
828   case FEATURE_PREFER_MOVLPD:
829     return "feature_prever_movlpd";
830   case FEATURE_USE_INC:
831     return "feature_use_inc";
832   case FEATURE_USE_MOVAPD:
833     return "feature_use_movapd";
834   case FEATURE_MERGE_DEPENDENT:
835     return "feature_merge_dependent";
836   case FEATURE_SCALAR_NONTEMP:
837     return "feature_scalar_nontemp";
838   case FEATURE_SSEIMAX:
839     return "feature_sseimax";
840   case FEATURE_MISALIGNEDSSE:
841     return "feature_misalignedsse";
842   case FEATURE_LD_MOVUPD:
843     return "feature_ld_movupd";
844   case FEATURE_ST_MOVUPD:
845     return "feature_st_movupd";
846   case FEATURE_UNROLL_16:
847     return "feature_unroll_16";
848   case FEATURE_DOUBLE_UNROLL:
849     return "feature_double_unroll";
850   case FEATURE_PEEL_SHUFFLE:
851     return "feature_peel_shuffle";
852   case FEATURE_PREFETCHNTA:
853     return "feature_prefetchnta";
854   case FEATURE_PDSHUF:
855     return "feature_pdshuf";
856   case FEATURE_SSEPMAX:
857     return "feature_ssepmax";
858   case FEATURE_GHLIBS:
859     return "feature_ghlibs";
860   case FEATURE_SSEMISALN:
861     return "feature_ssemisaln";
862   case FEATURE_ABM:
863     return "feature_abm";
864   case FEATURE_AVX:
865     return "feature_avx";
866   case FEATURE_LRBNI:
867     return "feature_lrbni";
868   case FEATURE_FMA4:
869     return "feature_fma4";
870   case FEATURE_XOP:
871     return "feature_xop";
872   case FEATURE_FMA3:
873     return "feature_fma3";
874   case FEATURE_MULTI_ACCUM:
875     return "feature_multi_accum";
876   case FEATURE_SIMD128:
877     return "feature_simd128";
878   case FEATURE_NOPREFETCH:
879     return "feature_noprefetch";
880   case FEATURE_ALIGNLOOP4:
881     return "feature_alignloop4";
882   case FEATURE_ALIGNLOOP8:
883     return "feature_alignloop8";
884   case FEATURE_ALIGNLOOP16:
885     return "feature_alignloop16";
886   case FEATURE_ALIGNLOOP32:
887     return "feature_alignloop32";
888   case FEATURE_ALIGNJMP8:
889     return "feature_alignjmp8";
890   case FEATURE_ALIGNJMP16:
891     return "feature_alignjmp16";
892   case FEATURE_LD_VMOVUPD:
893     return "feature_ld_vmovupd";
894   case FEATURE_ST_VMOVUPD:
895     return "feature_st_vmovupd";
896   case FEATURE_AVX2:
897     return "feature_avx2";
898   case FEATURE_AVX512F:
899     return "feature_avx512f";
900   case FEATURE_AVX512VL:
901     return "feature_avx512vl";
902   default:
903     return "??";
904   }
905 } /* sxfeature */
906 
907 void
_dumpmach(X86TYPE * mach)908 _dumpmach(X86TYPE *mach)
909 {
910   FILE *dfile;
911   int m, f;
912   dfile = gbl.dbgfil ? gbl.dbgfil : stderr;
913   fprintf(dfile, "%d=tpval=%s\n", mach->tpval, sxtp(mach->tpval));
914   for (m = 0; m < MACH_NUMBER; ++m) {
915     if (mach->type[m]) {
916       fprintf(dfile, "%d=type[%2d]=%s\n", mach->type[m], m, sxtype(m));
917     }
918   }
919 
920   for (f = 0; f < FEATURE_NUMBER; ++f) {
921     if (mach->feature[f]) {
922       fprintf(dfile, "%d=feature[%2d]=%s\n", mach->feature[f], f, sxfeature(f));
923     }
924   }
925 
926   fprintf(dfile, "%ld=cachesize\n", mach->cachesize);
927 } /* _dumpmach */
928 
929 void
dumpmach()930 dumpmach()
931 {
932   _dumpmach(&mach);
933 } /* dumpmach */
934 #endif
935