1 /*
2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef CPU_X86_VM_VERSION_X86_HPP
26 #define CPU_X86_VM_VERSION_X86_HPP
27 
28 #include "runtime/abstract_vm_version.hpp"
29 #include "runtime/globals_extension.hpp"
30 
31 class VM_Version : public Abstract_VM_Version {
32   friend class VMStructs;
33   friend class JVMCIVMStructs;
34 
35  public:
36   // cpuid result register layouts.  These are all unions of a uint32_t
37   // (in case anyone wants access to the register as a whole) and a bitfield.
38 
39   union StdCpuid1Eax {
40     uint32_t value;
41     struct {
42       uint32_t stepping   : 4,
43                model      : 4,
44                family     : 4,
45                proc_type  : 2,
46                           : 2,
47                ext_model  : 4,
48                ext_family : 8,
49                           : 4;
50     } bits;
51   };
52 
53   union StdCpuid1Ebx { // example, unused
54     uint32_t value;
55     struct {
56       uint32_t brand_id         : 8,
57                clflush_size     : 8,
58                threads_per_cpu  : 8,
59                apic_id          : 8;
60     } bits;
61   };
62 
63   union StdCpuid1Ecx {
64     uint32_t value;
65     struct {
66       uint32_t sse3     : 1,
67                clmul    : 1,
68                         : 1,
69                monitor  : 1,
70                         : 1,
71                vmx      : 1,
72                         : 1,
73                est      : 1,
74                         : 1,
75                ssse3    : 1,
76                cid      : 1,
77                         : 1,
78                fma      : 1,
79                cmpxchg16: 1,
80                         : 4,
81                dca      : 1,
82                sse4_1   : 1,
83                sse4_2   : 1,
84                         : 2,
85                popcnt   : 1,
86                         : 1,
87                aes      : 1,
88                         : 1,
89                osxsave  : 1,
90                avx      : 1,
91                         : 3;
92     } bits;
93   };
94 
95   union StdCpuid1Edx {
96     uint32_t value;
97     struct {
98       uint32_t          : 4,
99                tsc      : 1,
100                         : 3,
101                cmpxchg8 : 1,
102                         : 6,
103                cmov     : 1,
104                         : 3,
105                clflush  : 1,
106                         : 3,
107                mmx      : 1,
108                fxsr     : 1,
109                sse      : 1,
110                sse2     : 1,
111                         : 1,
112                ht       : 1,
113                         : 3;
114     } bits;
115   };
116 
117   union DcpCpuid4Eax {
118     uint32_t value;
119     struct {
120       uint32_t cache_type    : 5,
121                              : 21,
122                cores_per_cpu : 6;
123     } bits;
124   };
125 
126   union DcpCpuid4Ebx {
127     uint32_t value;
128     struct {
129       uint32_t L1_line_size  : 12,
130                partitions    : 10,
131                associativity : 10;
132     } bits;
133   };
134 
135   union TplCpuidBEbx {
136     uint32_t value;
137     struct {
138       uint32_t logical_cpus : 16,
139                             : 16;
140     } bits;
141   };
142 
143   union ExtCpuid1Ecx {
144     uint32_t value;
145     struct {
146       uint32_t LahfSahf     : 1,
147                CmpLegacy    : 1,
148                             : 3,
149                lzcnt_intel  : 1,
150                lzcnt        : 1,
151                sse4a        : 1,
152                misalignsse  : 1,
153                prefetchw    : 1,
154                             : 22;
155     } bits;
156   };
157 
158   union ExtCpuid1Edx {
159     uint32_t value;
160     struct {
161       uint32_t           : 22,
162                mmx_amd   : 1,
163                mmx       : 1,
164                fxsr      : 1,
165                          : 4,
166                long_mode : 1,
167                tdnow2    : 1,
168                tdnow     : 1;
169     } bits;
170   };
171 
172   union ExtCpuid5Ex {
173     uint32_t value;
174     struct {
175       uint32_t L1_line_size : 8,
176                L1_tag_lines : 8,
177                L1_assoc     : 8,
178                L1_size      : 8;
179     } bits;
180   };
181 
182   union ExtCpuid7Edx {
183     uint32_t value;
184     struct {
185       uint32_t               : 8,
186               tsc_invariance : 1,
187                              : 23;
188     } bits;
189   };
190 
191   union ExtCpuid8Ecx {
192     uint32_t value;
193     struct {
194       uint32_t cores_per_cpu : 8,
195                              : 24;
196     } bits;
197   };
198 
199   union SefCpuid7Eax {
200     uint32_t value;
201   };
202 
203   union SefCpuid7Ebx {
204     uint32_t value;
205     struct {
206       uint32_t fsgsbase : 1,
207                         : 2,
208                    bmi1 : 1,
209                         : 1,
210                    avx2 : 1,
211                         : 2,
212                    bmi2 : 1,
213                    erms : 1,
214                         : 1,
215                     rtm : 1,
216                         : 4,
217                 avx512f : 1,
218                avx512dq : 1,
219                         : 1,
220                     adx : 1,
221                         : 6,
222                avx512pf : 1,
223                avx512er : 1,
224                avx512cd : 1,
225                     sha : 1,
226                avx512bw : 1,
227                avx512vl : 1;
228     } bits;
229   };
230 
231   union SefCpuid7Ecx {
232     uint32_t value;
233     struct {
234       uint32_t prefetchwt1 : 1,
235                avx512_vbmi : 1,
236                       umip : 1,
237                        pku : 1,
238                      ospke : 1,
239                            : 1,
240               avx512_vbmi2 : 1,
241                            : 1,
242                       gfni : 1,
243                       vaes : 1,
244          avx512_vpclmulqdq : 1,
245                avx512_vnni : 1,
246              avx512_bitalg : 1,
247                            : 1,
248           avx512_vpopcntdq : 1,
249                            : 17;
250     } bits;
251   };
252 
253   union SefCpuid7Edx {
254     uint32_t value;
255     struct {
256       uint32_t             : 2,
257              avx512_4vnniw : 1,
258              avx512_4fmaps : 1,
259                            : 28;
260     } bits;
261   };
262 
263   union ExtCpuid1EEbx {
264     uint32_t value;
265     struct {
266       uint32_t                  : 8,
267                threads_per_core : 8,
268                                 : 16;
269     } bits;
270   };
271 
272   union XemXcr0Eax {
273     uint32_t value;
274     struct {
275       uint32_t x87     : 1,
276                sse     : 1,
277                ymm     : 1,
278                bndregs : 1,
279                bndcsr  : 1,
280                opmask  : 1,
281                zmm512  : 1,
282                zmm32   : 1,
283                        : 24;
284     } bits;
285   };
286 
287 protected:
288   static int _cpu;
289   static int _model;
290   static int _stepping;
291 
292   static address   _cpuinfo_segv_addr; // address of instruction which causes SEGV
293   static address   _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
294 
295   enum Feature_Flag {
296     CPU_CX8      = (1 << 0), // next bits are from cpuid 1 (EDX)
297     CPU_CMOV     = (1 << 1),
298     CPU_FXSR     = (1 << 2),
299     CPU_HT       = (1 << 3),
300     CPU_MMX      = (1 << 4),
301     CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
302                                    // may not necessarily support other 3dnow instructions
303     CPU_SSE      = (1 << 6),
304     CPU_SSE2     = (1 << 7),
305     CPU_SSE3     = (1 << 8),  // SSE3 comes from cpuid 1 (ECX)
306     CPU_SSSE3    = (1 << 9),
307     CPU_SSE4A    = (1 << 10),
308     CPU_SSE4_1   = (1 << 11),
309     CPU_SSE4_2   = (1 << 12),
310     CPU_POPCNT   = (1 << 13),
311     CPU_LZCNT    = (1 << 14),
312     CPU_TSC      = (1 << 15),
313     CPU_TSCINV   = (1 << 16),
314     CPU_AVX      = (1 << 17),
315     CPU_AVX2     = (1 << 18),
316     CPU_AES      = (1 << 19),
317     CPU_ERMS     = (1 << 20), // enhanced 'rep movsb/stosb' instructions
318     CPU_CLMUL    = (1 << 21), // carryless multiply for CRC
319     CPU_BMI1     = (1 << 22),
320     CPU_BMI2     = (1 << 23),
321     CPU_RTM      = (1 << 24), // Restricted Transactional Memory instructions
322     CPU_ADX      = (1 << 25),
323     CPU_AVX512F  = (1 << 26), // AVX 512bit foundation instructions
324     CPU_AVX512DQ = (1 << 27),
325     CPU_AVX512PF = (1 << 28),
326     CPU_AVX512ER = (1 << 29),
327     CPU_AVX512CD = (1 << 30)
328     // Keeping sign bit 31 unassigned.
329   };
330 
331 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
332 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
333 #define CPU_SHA ((uint64_t)UCONST64(0x400000000))      // SHA instructions
334 #define CPU_FMA ((uint64_t)UCONST64(0x800000000))      // FMA instructions
335 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000))       // Vzeroupper instruction
336 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
337 #define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
338 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000))    // Vector AES instructions
339 #define CPU_VNNI ((uint64_t)UCONST64(0x10000000000))   // Vector Neural Network Instructions
340 
341   enum Extended_Family {
342     // AMD
343     CPU_FAMILY_AMD_11H       = 0x11,
344     // ZX
345     CPU_FAMILY_ZX_CORE_F6    = 6,
346     CPU_FAMILY_ZX_CORE_F7    = 7,
347     // Intel
348     CPU_FAMILY_INTEL_CORE    = 6,
349     CPU_MODEL_NEHALEM        = 0x1e,
350     CPU_MODEL_NEHALEM_EP     = 0x1a,
351     CPU_MODEL_NEHALEM_EX     = 0x2e,
352     CPU_MODEL_WESTMERE       = 0x25,
353     CPU_MODEL_WESTMERE_EP    = 0x2c,
354     CPU_MODEL_WESTMERE_EX    = 0x2f,
355     CPU_MODEL_SANDYBRIDGE    = 0x2a,
356     CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
357     CPU_MODEL_IVYBRIDGE_EP   = 0x3a,
358     CPU_MODEL_HASWELL_E3     = 0x3c,
359     CPU_MODEL_HASWELL_E7     = 0x3f,
360     CPU_MODEL_BROADWELL      = 0x3d,
361     CPU_MODEL_SKYLAKE        = 0x55
362   };
363 
364   // cpuid information block.  All info derived from executing cpuid with
365   // various function numbers is stored here.  Intel and AMD info is
366   // merged in this block: accessor methods disentangle it.
367   //
368   // The info block is laid out in subblocks of 4 dwords corresponding to
369   // eax, ebx, ecx and edx, whether or not they contain anything useful.
370   struct CpuidInfo {
371     // cpuid function 0
372     uint32_t std_max_function;
373     uint32_t std_vendor_name_0;
374     uint32_t std_vendor_name_1;
375     uint32_t std_vendor_name_2;
376 
377     // cpuid function 1
378     StdCpuid1Eax std_cpuid1_eax;
379     StdCpuid1Ebx std_cpuid1_ebx;
380     StdCpuid1Ecx std_cpuid1_ecx;
381     StdCpuid1Edx std_cpuid1_edx;
382 
383     // cpuid function 4 (deterministic cache parameters)
384     DcpCpuid4Eax dcp_cpuid4_eax;
385     DcpCpuid4Ebx dcp_cpuid4_ebx;
386     uint32_t     dcp_cpuid4_ecx; // unused currently
387     uint32_t     dcp_cpuid4_edx; // unused currently
388 
389     // cpuid function 7 (structured extended features)
390     SefCpuid7Eax sef_cpuid7_eax;
391     SefCpuid7Ebx sef_cpuid7_ebx;
392     SefCpuid7Ecx sef_cpuid7_ecx;
393     SefCpuid7Edx sef_cpuid7_edx;
394 
395     // cpuid function 0xB (processor topology)
396     // ecx = 0
397     uint32_t     tpl_cpuidB0_eax;
398     TplCpuidBEbx tpl_cpuidB0_ebx;
399     uint32_t     tpl_cpuidB0_ecx; // unused currently
400     uint32_t     tpl_cpuidB0_edx; // unused currently
401 
402     // ecx = 1
403     uint32_t     tpl_cpuidB1_eax;
404     TplCpuidBEbx tpl_cpuidB1_ebx;
405     uint32_t     tpl_cpuidB1_ecx; // unused currently
406     uint32_t     tpl_cpuidB1_edx; // unused currently
407 
408     // ecx = 2
409     uint32_t     tpl_cpuidB2_eax;
410     TplCpuidBEbx tpl_cpuidB2_ebx;
411     uint32_t     tpl_cpuidB2_ecx; // unused currently
412     uint32_t     tpl_cpuidB2_edx; // unused currently
413 
414     // cpuid function 0x80000000 // example, unused
415     uint32_t ext_max_function;
416     uint32_t ext_vendor_name_0;
417     uint32_t ext_vendor_name_1;
418     uint32_t ext_vendor_name_2;
419 
420     // cpuid function 0x80000001
421     uint32_t     ext_cpuid1_eax; // reserved
422     uint32_t     ext_cpuid1_ebx; // reserved
423     ExtCpuid1Ecx ext_cpuid1_ecx;
424     ExtCpuid1Edx ext_cpuid1_edx;
425 
426     // cpuid functions 0x80000002 thru 0x80000004: example, unused
427     uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
428     uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
429     uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
430 
431     // cpuid function 0x80000005 // AMD L1, Intel reserved
432     uint32_t     ext_cpuid5_eax; // unused currently
433     uint32_t     ext_cpuid5_ebx; // reserved
434     ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
435     ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
436 
437     // cpuid function 0x80000007
438     uint32_t     ext_cpuid7_eax; // reserved
439     uint32_t     ext_cpuid7_ebx; // reserved
440     uint32_t     ext_cpuid7_ecx; // reserved
441     ExtCpuid7Edx ext_cpuid7_edx; // tscinv
442 
443     // cpuid function 0x80000008
444     uint32_t     ext_cpuid8_eax; // unused currently
445     uint32_t     ext_cpuid8_ebx; // reserved
446     ExtCpuid8Ecx ext_cpuid8_ecx;
447     uint32_t     ext_cpuid8_edx; // reserved
448 
449     // cpuid function 0x8000001E // AMD 17h
450     uint32_t      ext_cpuid1E_eax;
451     ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
452     uint32_t      ext_cpuid1E_ecx;
453     uint32_t      ext_cpuid1E_edx; // unused currently
454 
455     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
456     XemXcr0Eax   xem_xcr0_eax;
457     uint32_t     xem_xcr0_edx; // reserved
458 
459     // Space to save ymm registers after signal handle
460     int          ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
461 
462     // Space to save zmm registers after signal handle
463     int          zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
464   };
465 
466   // The actual cpuid info block
467   static CpuidInfo _cpuid_info;
468 
469   // Extractors and predicates
extended_cpu_family()470   static uint32_t extended_cpu_family() {
471     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
472     result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
473     return result;
474   }
475 
extended_cpu_model()476   static uint32_t extended_cpu_model() {
477     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
478     result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
479     return result;
480   }
481 
cpu_stepping()482   static uint32_t cpu_stepping() {
483     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
484     return result;
485   }
486 
logical_processor_count()487   static uint logical_processor_count() {
488     uint result = threads_per_core();
489     return result;
490   }
491 
feature_flags()492   static uint64_t feature_flags() {
493     uint64_t result = 0;
494     if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
495       result |= CPU_CX8;
496     if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
497       result |= CPU_CMOV;
498     if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
499         _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
500       result |= CPU_FXSR;
501     // HT flag is set for multi-core processors also.
502     if (threads_per_core() > 1)
503       result |= CPU_HT;
504     if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
505         _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
506       result |= CPU_MMX;
507     if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
508       result |= CPU_SSE;
509     if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
510       result |= CPU_SSE2;
511     if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
512       result |= CPU_SSE3;
513     if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
514       result |= CPU_SSSE3;
515     if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
516       result |= CPU_SSE4_1;
517     if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
518       result |= CPU_SSE4_2;
519     if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
520       result |= CPU_POPCNT;
521     if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
522         _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
523         _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
524         _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
525       result |= CPU_AVX;
526       result |= CPU_VZEROUPPER;
527       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
528         result |= CPU_AVX2;
529       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
530           _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
531           _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
532           _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
533         result |= CPU_AVX512F;
534         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
535           result |= CPU_AVX512CD;
536         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
537           result |= CPU_AVX512DQ;
538         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
539           result |= CPU_AVX512PF;
540         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
541           result |= CPU_AVX512ER;
542         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
543           result |= CPU_AVX512BW;
544         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
545           result |= CPU_AVX512VL;
546         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
547           result |= CPU_AVX512_VPOPCNTDQ;
548         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
549           result |= CPU_AVX512_VPCLMULQDQ;
550         if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
551           result |= CPU_VAES;
552         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
553           result |= CPU_VNNI;
554       }
555     }
556     if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
557       result |= CPU_BMI1;
558     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
559       result |= CPU_TSC;
560     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
561       result |= CPU_TSCINV;
562     if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
563       result |= CPU_AES;
564     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
565       result |= CPU_ERMS;
566     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
567       result |= CPU_CLMUL;
568     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
569       result |= CPU_RTM;
570     if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
571        result |= CPU_ADX;
572     if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
573       result |= CPU_BMI2;
574     if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
575       result |= CPU_SHA;
576     if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
577       result |= CPU_FMA;
578 
579     // AMD|Hygon features.
580     if (is_amd_family()) {
581       if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
582           (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
583         result |= CPU_3DNOW_PREFETCH;
584       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
585         result |= CPU_LZCNT;
586       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
587         result |= CPU_SSE4A;
588     }
589     // Intel features.
590     if (is_intel()) {
591       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
592         result |= CPU_LZCNT;
593       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
594       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
595         result |= CPU_3DNOW_PREFETCH;
596       }
597     }
598 
599     // ZX features.
600     if (is_zx()) {
601       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
602         result |= CPU_LZCNT;
603       // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
604       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
605         result |= CPU_3DNOW_PREFETCH;
606       }
607     }
608 
609     return result;
610   }
611 
os_supports_avx_vectors()612   static bool os_supports_avx_vectors() {
613     bool retVal = false;
614     int nreg = 2 LP64_ONLY(+2);
615     if (supports_evex()) {
616       // Verify that OS save/restore all bits of EVEX registers
617       // during signal processing.
618       retVal = true;
619       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
620         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
621           retVal = false;
622           break;
623         }
624       }
625     } else if (supports_avx()) {
626       // Verify that OS save/restore all bits of AVX registers
627       // during signal processing.
628       retVal = true;
629       for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
630         if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
631           retVal = false;
632           break;
633         }
634       }
635       // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
636       if (retVal == false) {
637         // Verify that OS save/restore all bits of EVEX registers
638         // during signal processing.
639         retVal = true;
640         for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
641           if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
642             retVal = false;
643             break;
644           }
645         }
646       }
647     }
648     return retVal;
649   }
650 
651   static void get_processor_features();
652 
653 public:
654   // Offsets for cpuid asm stub
std_cpuid0_offset()655   static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
std_cpuid1_offset()656   static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
dcp_cpuid4_offset()657   static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
sef_cpuid7_offset()658   static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
ext_cpuid1_offset()659   static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
ext_cpuid5_offset()660   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
ext_cpuid7_offset()661   static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
ext_cpuid8_offset()662   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
ext_cpuid1E_offset()663   static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
tpl_cpuidB0_offset()664   static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
tpl_cpuidB1_offset()665   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
tpl_cpuidB2_offset()666   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
xem_xcr0_offset()667   static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
ymm_save_offset()668   static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
zmm_save_offset()669   static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save); }
670 
671   // The value used to check ymm register after signal handle
ymm_test_value()672   static int ymm_test_value()    { return 0xCAFEBABE; }
673 
674   static void get_cpu_info_wrapper();
set_cpuinfo_segv_addr(address pc)675   static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
is_cpuinfo_segv_addr(address pc)676   static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
set_cpuinfo_cont_addr(address pc)677   static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
cpuinfo_cont_addr()678   static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
679 
clean_cpuFeatures()680   static void clean_cpuFeatures()   { _features = 0; }
set_avx_cpuFeatures()681   static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }
set_evex_cpuFeatures()682   static void set_evex_cpuFeatures() { _features = (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); }
683 
684 
685   // Initialization
686   static void initialize();
687 
688   // Override Abstract_VM_Version implementation
689   static void print_platform_virtualization_info(outputStream*);
690 
691   // Override Abstract_VM_Version implementation
692   static bool use_biased_locking();
693 
694   // Asserts
assert_is_initialized()695   static void assert_is_initialized() {
696     assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
697   }
698 
699   //
700   // Processor family:
701   //       3   -  386
702   //       4   -  486
703   //       5   -  Pentium
704   //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
705   //              Pentium M, Core Solo, Core Duo, Core2 Duo
706   //    family 6 model:   9,        13,       14,        15
707   //    0x0f   -  Pentium 4, Opteron
708   //
709   // Note: The cpu family should be used to select between
710   //       instruction sequences which are valid on all Intel
711   //       processors.  Use the feature test functions below to
712   //       determine whether a particular instruction is supported.
713   //
cpu_family()714   static int  cpu_family()        { return _cpu;}
is_P6()715   static bool is_P6()             { return cpu_family() >= 6; }
is_amd()716   static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
is_hygon()717   static bool is_hygon()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
is_amd_family()718   static bool is_amd_family()     { return is_amd() || is_hygon(); }
is_intel()719   static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
is_zx()720   static bool is_zx()             { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS  '
is_atom_family()721   static bool is_atom_family()    { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
is_knights_family()722   static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
723 
supports_processor_topology()724   static bool supports_processor_topology() {
725     return (_cpuid_info.std_max_function >= 0xB) &&
726            // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
727            // Some cpus have max cpuid >= 0xB but do not support processor topology.
728            (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
729   }
730 
cores_per_cpu()731   static uint cores_per_cpu()  {
732     uint result = 1;
733     if (is_intel()) {
734       bool supports_topology = supports_processor_topology();
735       if (supports_topology) {
736         result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
737                  _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
738       }
739       if (!supports_topology || result == 0) {
740         result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
741       }
742     } else if (is_amd_family()) {
743       result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
744     } else if (is_zx()) {
745       bool supports_topology = supports_processor_topology();
746       if (supports_topology) {
747         result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
748                  _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
749       }
750       if (!supports_topology || result == 0) {
751         result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
752       }
753     }
754     return result;
755   }
756 
threads_per_core()757   static uint threads_per_core()  {
758     uint result = 1;
759     if (is_intel() && supports_processor_topology()) {
760       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
761     } else if (is_zx() && supports_processor_topology()) {
762       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
763     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
764       if (cpu_family() >= 0x17) {
765         result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
766       } else {
767         result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
768                  cores_per_cpu();
769       }
770     }
771     return (result == 0 ? 1 : result);
772   }
773 
L1_line_size()774   static intx L1_line_size()  {
775     intx result = 0;
776     if (is_intel()) {
777       result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
778     } else if (is_amd_family()) {
779       result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
780     } else if (is_zx()) {
781       result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
782     }
783     if (result < 32) // not defined ?
784       result = 32;   // 32 bytes by default on x86 and other x64
785     return result;
786   }
787 
prefetch_data_size()788   static intx prefetch_data_size()  {
789     return L1_line_size();
790   }
791 
792   //
793   // Feature identification
794   //
supports_cpuid()795   static bool supports_cpuid()    { return _features  != 0; }
supports_cmpxchg8()796   static bool supports_cmpxchg8() { return (_features & CPU_CX8) != 0; }
supports_cmov()797   static bool supports_cmov()     { return (_features & CPU_CMOV) != 0; }
supports_fxsr()798   static bool supports_fxsr()     { return (_features & CPU_FXSR) != 0; }
supports_ht()799   static bool supports_ht()       { return (_features & CPU_HT) != 0; }
supports_mmx()800   static bool supports_mmx()      { return (_features & CPU_MMX) != 0; }
supports_sse()801   static bool supports_sse()      { return (_features & CPU_SSE) != 0; }
supports_sse2()802   static bool supports_sse2()     { return (_features & CPU_SSE2) != 0; }
supports_sse3()803   static bool supports_sse3()     { return (_features & CPU_SSE3) != 0; }
supports_ssse3()804   static bool supports_ssse3()    { return (_features & CPU_SSSE3)!= 0; }
supports_sse4_1()805   static bool supports_sse4_1()   { return (_features & CPU_SSE4_1) != 0; }
supports_sse4_2()806   static bool supports_sse4_2()   { return (_features & CPU_SSE4_2) != 0; }
supports_popcnt()807   static bool supports_popcnt()   { return (_features & CPU_POPCNT) != 0; }
supports_avx()808   static bool supports_avx()      { return (_features & CPU_AVX) != 0; }
supports_avx2()809   static bool supports_avx2()     { return (_features & CPU_AVX2) != 0; }
supports_tsc()810   static bool supports_tsc()      { return (_features & CPU_TSC)    != 0; }
supports_aes()811   static bool supports_aes()      { return (_features & CPU_AES) != 0; }
supports_erms()812   static bool supports_erms()     { return (_features & CPU_ERMS) != 0; }
supports_clmul()813   static bool supports_clmul()    { return (_features & CPU_CLMUL) != 0; }
supports_rtm()814   static bool supports_rtm()      { return (_features & CPU_RTM) != 0; }
supports_bmi1()815   static bool supports_bmi1()     { return (_features & CPU_BMI1) != 0; }
supports_bmi2()816   static bool supports_bmi2()     { return (_features & CPU_BMI2) != 0; }
supports_adx()817   static bool supports_adx()      { return (_features & CPU_ADX) != 0; }
supports_evex()818   static bool supports_evex()     { return (_features & CPU_AVX512F) != 0; }
supports_avx512dq()819   static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
supports_avx512pf()820   static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
supports_avx512er()821   static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
supports_avx512cd()822   static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
supports_avx512bw()823   static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
supports_avx512vl()824   static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
supports_avx512vlbw()825   static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
supports_avx512vldq()826   static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
supports_avx512vlbwdq()827   static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
828                                                 supports_avx512bw() && supports_avx512dq()); }
supports_avx512novl()829   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
supports_avx512nobw()830   static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
supports_avx256only()831   static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
supports_avxonly()832   static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
supports_sha()833   static bool supports_sha()        { return (_features & CPU_SHA) != 0; }
supports_fma()834   static bool supports_fma()        { return (_features & CPU_FMA) != 0 && supports_avx(); }
supports_vzeroupper()835   static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
supports_vpopcntdq()836   static bool supports_vpopcntdq()  { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
supports_avx512_vpclmulqdq()837   static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
supports_vaes()838   static bool supports_vaes()       { return (_features & CPU_VAES) != 0; }
supports_vnni()839   static bool supports_vnni()       { return (_features & CPU_VNNI) != 0; }
840 
841   // Intel features
is_intel_family_core()842   static bool is_intel_family_core() { return is_intel() &&
843                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
844 
is_intel_skylake()845   static bool is_intel_skylake() { return is_intel_family_core() &&
846                                           extended_cpu_model() == CPU_MODEL_SKYLAKE; }
847 
is_intel_tsc_synched_at_init()848   static bool is_intel_tsc_synched_at_init()  {
849     if (is_intel_family_core()) {
850       uint32_t ext_model = extended_cpu_model();
851       if (ext_model == CPU_MODEL_NEHALEM_EP     ||
852           ext_model == CPU_MODEL_WESTMERE_EP    ||
853           ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
854           ext_model == CPU_MODEL_IVYBRIDGE_EP) {
855         // <= 2-socket invariant tsc support. EX versions are usually used
856         // in > 2-socket systems and likely don't synchronize tscs at
857         // initialization.
858         // Code that uses tsc values must be prepared for them to arbitrarily
859         // jump forward or backward.
860         return true;
861       }
862     }
863     return false;
864   }
865 
866   // AMD features
supports_3dnow_prefetch()867   static bool supports_3dnow_prefetch()    { return (_features & CPU_3DNOW_PREFETCH) != 0; }
supports_mmx_ext()868   static bool supports_mmx_ext()  { return is_amd_family() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
supports_lzcnt()869   static bool supports_lzcnt()    { return (_features & CPU_LZCNT) != 0; }
supports_sse4a()870   static bool supports_sse4a()    { return (_features & CPU_SSE4A) != 0; }
871 
is_amd_Barcelona()872   static bool is_amd_Barcelona()  { return is_amd() &&
873                                            extended_cpu_family() == CPU_FAMILY_AMD_11H; }
874 
875   // Intel and AMD newer cores support fast timestamps well
supports_tscinv_bit()876   static bool supports_tscinv_bit() {
877     return (_features & CPU_TSCINV) != 0;
878   }
supports_tscinv()879   static bool supports_tscinv() {
880     return supports_tscinv_bit() &&
881       ((is_amd_family() && !is_amd_Barcelona()) ||
882         is_intel_tsc_synched_at_init());
883   }
884 
885   // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
has_fast_idiv()886   static bool has_fast_idiv()     { return is_intel() && cpu_family() == 6 &&
887                                            supports_sse3() && _model != 0x1C; }
888 
supports_compare_and_exchange()889   static bool supports_compare_and_exchange() { return true; }
890 
allocate_prefetch_distance(bool use_watermark_prefetch)891   static intx allocate_prefetch_distance(bool use_watermark_prefetch) {
892     // Hardware prefetching (distance/size in bytes):
893     // Pentium 3 -  64 /  32
894     // Pentium 4 - 256 / 128
895     // Athlon    -  64 /  32 ????
896     // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
897     // Core      - 128 /  64
898     //
899     // Software prefetching (distance in bytes / instruction with best score):
900     // Pentium 3 - 128 / prefetchnta
901     // Pentium 4 - 512 / prefetchnta
902     // Athlon    - 128 / prefetchnta
903     // Opteron   - 256 / prefetchnta
904     // Core      - 256 / prefetchnta
905     // It will be used only when AllocatePrefetchStyle > 0
906 
907     if (is_amd_family()) { // AMD | Hygon
908       if (supports_sse2()) {
909         return 256; // Opteron
910       } else {
911         return 128; // Athlon
912       }
913     } else { // Intel
914       if (supports_sse3() && cpu_family() == 6) {
915         if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
916           return 192;
917         } else if (use_watermark_prefetch) { // watermark prefetching on Core
918 #ifdef _LP64
919           return 384;
920 #else
921           return 320;
922 #endif
923         }
924       }
925       if (supports_sse2()) {
926         if (cpu_family() == 6) {
927           return 256; // Pentium M, Core, Core2
928         } else {
929           return 512; // Pentium 4
930         }
931       } else {
932         return 128; // Pentium 3 (and all other old CPUs)
933       }
934     }
935   }
936 
937   // SSE2 and later processors implement a 'pause' instruction
938   // that can be used for efficient implementation of
939   // the intrinsic for java.lang.Thread.onSpinWait()
supports_on_spin_wait()940   static bool supports_on_spin_wait() { return supports_sse2(); }
941 
942   // x86_64 supports fast class initialization checks for static methods.
supports_fast_class_init_checks()943   static bool supports_fast_class_init_checks() {
944     return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32
945   }
946 
947 #ifdef __APPLE__
948   // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
949   static bool is_cpu_emulated();
950 #endif
951 
952   // support functions for virtualization detection
953  private:
954   static void check_virt_cpuid(uint32_t idx, uint32_t *regs);
955   static void check_virtualizations();
956 };
957 
958 #endif // CPU_X86_VM_VERSION_X86_HPP
959