1 /*
2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "logging/log.hpp"
30 #include "logging/logStream.hpp"
31 #include "memory/resourceArea.hpp"
32 #include "runtime/java.hpp"
33 #include "runtime/os.hpp"
34 #include "runtime/stubCodeGenerator.hpp"
35 #include "runtime/vm_version.hpp"
36 #include "utilities/virtualizationSupport.hpp"
37 
38 
39 int VM_Version::_cpu;
40 int VM_Version::_model;
41 int VM_Version::_stepping;
42 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
43 
44 // Address of instruction which causes SEGV
45 address VM_Version::_cpuinfo_segv_addr = 0;
46 // Address of instruction after the one which causes SEGV
47 address VM_Version::_cpuinfo_cont_addr = 0;
48 
49 static BufferBlob* stub_blob;
50 static const int stub_size = 1100;
51 
52 extern "C" {
53   typedef void (*get_cpu_info_stub_t)(void*);
54 }
55 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
56 
57 
58 class VM_Version_StubGenerator: public StubCodeGenerator {
59  public:
60 
VM_Version_StubGenerator(CodeBuffer * c)61   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
62 
generate_get_cpu_info()63   address generate_get_cpu_info() {
64     // Flags to test CPU type.
65     const uint32_t HS_EFL_AC = 0x40000;
66     const uint32_t HS_EFL_ID = 0x200000;
67     // Values for when we don't have a CPUID instruction.
68     const int      CPU_FAMILY_SHIFT = 8;
69     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
70     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
71     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
72 
73     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
74     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
75     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
76 
77     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
78 #   define __ _masm->
79 
80     address start = __ pc();
81 
82     //
83     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
84     //
85     // LP64: rcx and rdx are first and second argument registers on windows
86 
87     __ push(rbp);
88 #ifdef _LP64
89     __ mov(rbp, c_rarg0); // cpuid_info address
90 #else
91     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
92 #endif
93     __ push(rbx);
94     __ push(rsi);
95     __ pushf();          // preserve rbx, and flags
96     __ pop(rax);
97     __ push(rax);
98     __ mov(rcx, rax);
99     //
100     // if we are unable to change the AC flag, we have a 386
101     //
102     __ xorl(rax, HS_EFL_AC);
103     __ push(rax);
104     __ popf();
105     __ pushf();
106     __ pop(rax);
107     __ cmpptr(rax, rcx);
108     __ jccb(Assembler::notEqual, detect_486);
109 
110     __ movl(rax, CPU_FAMILY_386);
111     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
112     __ jmp(done);
113 
114     //
115     // If we are unable to change the ID flag, we have a 486 which does
116     // not support the "cpuid" instruction.
117     //
118     __ bind(detect_486);
119     __ mov(rax, rcx);
120     __ xorl(rax, HS_EFL_ID);
121     __ push(rax);
122     __ popf();
123     __ pushf();
124     __ pop(rax);
125     __ cmpptr(rcx, rax);
126     __ jccb(Assembler::notEqual, detect_586);
127 
128     __ bind(cpu486);
129     __ movl(rax, CPU_FAMILY_486);
130     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
131     __ jmp(done);
132 
133     //
134     // At this point, we have a chip which supports the "cpuid" instruction
135     //
136     __ bind(detect_586);
137     __ xorl(rax, rax);
138     __ cpuid();
139     __ orl(rax, rax);
140     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
141                                         // value of at least 1, we give up and
142                                         // assume a 486
143     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
144     __ movl(Address(rsi, 0), rax);
145     __ movl(Address(rsi, 4), rbx);
146     __ movl(Address(rsi, 8), rcx);
147     __ movl(Address(rsi,12), rdx);
148 
149     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
150     __ jccb(Assembler::belowEqual, std_cpuid4);
151 
152     //
153     // cpuid(0xB) Processor Topology
154     //
155     __ movl(rax, 0xb);
156     __ xorl(rcx, rcx);   // Threads level
157     __ cpuid();
158 
159     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
160     __ movl(Address(rsi, 0), rax);
161     __ movl(Address(rsi, 4), rbx);
162     __ movl(Address(rsi, 8), rcx);
163     __ movl(Address(rsi,12), rdx);
164 
165     __ movl(rax, 0xb);
166     __ movl(rcx, 1);     // Cores level
167     __ cpuid();
168     __ push(rax);
169     __ andl(rax, 0x1f);  // Determine if valid topology level
170     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
171     __ andl(rax, 0xffff);
172     __ pop(rax);
173     __ jccb(Assembler::equal, std_cpuid4);
174 
175     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
176     __ movl(Address(rsi, 0), rax);
177     __ movl(Address(rsi, 4), rbx);
178     __ movl(Address(rsi, 8), rcx);
179     __ movl(Address(rsi,12), rdx);
180 
181     __ movl(rax, 0xb);
182     __ movl(rcx, 2);     // Packages level
183     __ cpuid();
184     __ push(rax);
185     __ andl(rax, 0x1f);  // Determine if valid topology level
186     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
187     __ andl(rax, 0xffff);
188     __ pop(rax);
189     __ jccb(Assembler::equal, std_cpuid4);
190 
191     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
192     __ movl(Address(rsi, 0), rax);
193     __ movl(Address(rsi, 4), rbx);
194     __ movl(Address(rsi, 8), rcx);
195     __ movl(Address(rsi,12), rdx);
196 
197     //
198     // cpuid(0x4) Deterministic cache params
199     //
200     __ bind(std_cpuid4);
201     __ movl(rax, 4);
202     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
203     __ jccb(Assembler::greater, std_cpuid1);
204 
205     __ xorl(rcx, rcx);   // L1 cache
206     __ cpuid();
207     __ push(rax);
208     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
209     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
210     __ pop(rax);
211     __ jccb(Assembler::equal, std_cpuid1);
212 
213     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
214     __ movl(Address(rsi, 0), rax);
215     __ movl(Address(rsi, 4), rbx);
216     __ movl(Address(rsi, 8), rcx);
217     __ movl(Address(rsi,12), rdx);
218 
219     //
220     // Standard cpuid(0x1)
221     //
222     __ bind(std_cpuid1);
223     __ movl(rax, 1);
224     __ cpuid();
225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
226     __ movl(Address(rsi, 0), rax);
227     __ movl(Address(rsi, 4), rbx);
228     __ movl(Address(rsi, 8), rcx);
229     __ movl(Address(rsi,12), rdx);
230 
231     //
232     // Check if OS has enabled XGETBV instruction to access XCR0
233     // (OSXSAVE feature flag) and CPU supports AVX
234     //
235     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
236     __ cmpl(rcx, 0x18000000);
237     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
238 
239     //
240     // XCR0, XFEATURE_ENABLED_MASK register
241     //
242     __ xorl(rcx, rcx);   // zero for XCR0 register
243     __ xgetbv();
244     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
245     __ movl(Address(rsi, 0), rax);
246     __ movl(Address(rsi, 4), rdx);
247 
248     //
249     // cpuid(0x7) Structured Extended Features
250     //
251     __ bind(sef_cpuid);
252     __ movl(rax, 7);
253     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
254     __ jccb(Assembler::greater, ext_cpuid);
255 
256     __ xorl(rcx, rcx);
257     __ cpuid();
258     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
259     __ movl(Address(rsi, 0), rax);
260     __ movl(Address(rsi, 4), rbx);
261     __ movl(Address(rsi, 8), rcx);
262     __ movl(Address(rsi, 12), rdx);
263 
264     //
265     // Extended cpuid(0x80000000)
266     //
267     __ bind(ext_cpuid);
268     __ movl(rax, 0x80000000);
269     __ cpuid();
270     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
271     __ jcc(Assembler::belowEqual, done);
272     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
273     __ jcc(Assembler::belowEqual, ext_cpuid1);
274     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
275     __ jccb(Assembler::belowEqual, ext_cpuid5);
276     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
277     __ jccb(Assembler::belowEqual, ext_cpuid7);
278     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
279     __ jccb(Assembler::belowEqual, ext_cpuid8);
280     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
281     __ jccb(Assembler::below, ext_cpuid8);
282     //
283     // Extended cpuid(0x8000001E)
284     //
285     __ movl(rax, 0x8000001E);
286     __ cpuid();
287     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
288     __ movl(Address(rsi, 0), rax);
289     __ movl(Address(rsi, 4), rbx);
290     __ movl(Address(rsi, 8), rcx);
291     __ movl(Address(rsi,12), rdx);
292 
293     //
294     // Extended cpuid(0x80000008)
295     //
296     __ bind(ext_cpuid8);
297     __ movl(rax, 0x80000008);
298     __ cpuid();
299     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
300     __ movl(Address(rsi, 0), rax);
301     __ movl(Address(rsi, 4), rbx);
302     __ movl(Address(rsi, 8), rcx);
303     __ movl(Address(rsi,12), rdx);
304 
305     //
306     // Extended cpuid(0x80000007)
307     //
308     __ bind(ext_cpuid7);
309     __ movl(rax, 0x80000007);
310     __ cpuid();
311     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
312     __ movl(Address(rsi, 0), rax);
313     __ movl(Address(rsi, 4), rbx);
314     __ movl(Address(rsi, 8), rcx);
315     __ movl(Address(rsi,12), rdx);
316 
317     //
318     // Extended cpuid(0x80000005)
319     //
320     __ bind(ext_cpuid5);
321     __ movl(rax, 0x80000005);
322     __ cpuid();
323     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
324     __ movl(Address(rsi, 0), rax);
325     __ movl(Address(rsi, 4), rbx);
326     __ movl(Address(rsi, 8), rcx);
327     __ movl(Address(rsi,12), rdx);
328 
329     //
330     // Extended cpuid(0x80000001)
331     //
332     __ bind(ext_cpuid1);
333     __ movl(rax, 0x80000001);
334     __ cpuid();
335     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
336     __ movl(Address(rsi, 0), rax);
337     __ movl(Address(rsi, 4), rbx);
338     __ movl(Address(rsi, 8), rcx);
339     __ movl(Address(rsi,12), rdx);
340 
341     //
342     // Check if OS has enabled XGETBV instruction to access XCR0
343     // (OSXSAVE feature flag) and CPU supports AVX
344     //
345     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
346     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
347     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
348     __ cmpl(rcx, 0x18000000);
349     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
350 
351     __ movl(rax, 0x6);
352     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
353     __ cmpl(rax, 0x6);
354     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
355 
356     // we need to bridge farther than imm8, so we use this island as a thunk
357     __ bind(done);
358     __ jmp(wrapup);
359 
360     __ bind(start_simd_check);
361     //
362     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
363     // registers are not restored after a signal processing.
364     // Generate SEGV here (reference through NULL)
365     // and check upper YMM/ZMM bits after it.
366     //
367     intx saved_useavx = UseAVX;
368     intx saved_usesse = UseSSE;
369 
370     // If UseAVX is unitialized or is set by the user to include EVEX
371     if (use_evex) {
372       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
373       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
374       __ movl(rax, 0x10000);
375       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
376       __ cmpl(rax, 0x10000);
377       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
378       // check _cpuid_info.xem_xcr0_eax.bits.opmask
379       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
380       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
381       __ movl(rax, 0xE0);
382       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
383       __ cmpl(rax, 0xE0);
384       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
385 
386       if (FLAG_IS_DEFAULT(UseAVX)) {
387         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
388         __ movl(rax, Address(rsi, 0));
389         __ cmpl(rax, 0x50654);              // If it is Skylake
390         __ jcc(Assembler::equal, legacy_setup);
391       }
392       // EVEX setup: run in lowest evex mode
393       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
394       UseAVX = 3;
395       UseSSE = 2;
396 #ifdef _WINDOWS
397       // xmm5-xmm15 are not preserved by caller on windows
398       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
399       __ subptr(rsp, 64);
400       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
401 #ifdef _LP64
402       __ subptr(rsp, 64);
403       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
404       __ subptr(rsp, 64);
405       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
406 #endif // _LP64
407 #endif // _WINDOWS
408 
409       // load value into all 64 bytes of zmm7 register
410       __ movl(rcx, VM_Version::ymm_test_value());
411       __ movdl(xmm0, rcx);
412       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
413       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
414 #ifdef _LP64
415       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
416       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
417 #endif
418       VM_Version::clean_cpuFeatures();
419       __ jmp(save_restore_except);
420     }
421 
422     __ bind(legacy_setup);
423     // AVX setup
424     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
425     UseAVX = 1;
426     UseSSE = 2;
427 #ifdef _WINDOWS
428     __ subptr(rsp, 32);
429     __ vmovdqu(Address(rsp, 0), xmm7);
430 #ifdef _LP64
431     __ subptr(rsp, 32);
432     __ vmovdqu(Address(rsp, 0), xmm8);
433     __ subptr(rsp, 32);
434     __ vmovdqu(Address(rsp, 0), xmm15);
435 #endif // _LP64
436 #endif // _WINDOWS
437 
438     // load value into all 32 bytes of ymm7 register
439     __ movl(rcx, VM_Version::ymm_test_value());
440 
441     __ movdl(xmm0, rcx);
442     __ pshufd(xmm0, xmm0, 0x00);
443     __ vinsertf128_high(xmm0, xmm0);
444     __ vmovdqu(xmm7, xmm0);
445 #ifdef _LP64
446     __ vmovdqu(xmm8, xmm0);
447     __ vmovdqu(xmm15, xmm0);
448 #endif
449     VM_Version::clean_cpuFeatures();
450 
451     __ bind(save_restore_except);
452     __ xorl(rsi, rsi);
453     VM_Version::set_cpuinfo_segv_addr(__ pc());
454     // Generate SEGV
455     __ movl(rax, Address(rsi, 0));
456 
457     VM_Version::set_cpuinfo_cont_addr(__ pc());
458     // Returns here after signal. Save xmm0 to check it later.
459 
460     // If UseAVX is unitialized or is set by the user to include EVEX
461     if (use_evex) {
462       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
463       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
464       __ movl(rax, 0x10000);
465       __ andl(rax, Address(rsi, 4));
466       __ cmpl(rax, 0x10000);
467       __ jcc(Assembler::notEqual, legacy_save_restore);
468       // check _cpuid_info.xem_xcr0_eax.bits.opmask
469       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
470       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
471       __ movl(rax, 0xE0);
472       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
473       __ cmpl(rax, 0xE0);
474       __ jcc(Assembler::notEqual, legacy_save_restore);
475 
476       if (FLAG_IS_DEFAULT(UseAVX)) {
477         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
478         __ movl(rax, Address(rsi, 0));
479         __ cmpl(rax, 0x50654);              // If it is Skylake
480         __ jcc(Assembler::equal, legacy_save_restore);
481       }
482       // EVEX check: run in lowest evex mode
483       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
484       UseAVX = 3;
485       UseSSE = 2;
486       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
487       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
488       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
489 #ifdef _LP64
490       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
491       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
492 #endif
493 
494 #ifdef _WINDOWS
495 #ifdef _LP64
496       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
497       __ addptr(rsp, 64);
498       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
499       __ addptr(rsp, 64);
500 #endif // _LP64
501       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
502       __ addptr(rsp, 64);
503 #endif // _WINDOWS
504       generate_vzeroupper(wrapup);
505       VM_Version::clean_cpuFeatures();
506       UseAVX = saved_useavx;
507       UseSSE = saved_usesse;
508       __ jmp(wrapup);
509    }
510 
511     __ bind(legacy_save_restore);
512     // AVX check
513     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
514     UseAVX = 1;
515     UseSSE = 2;
516     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
517     __ vmovdqu(Address(rsi, 0), xmm0);
518     __ vmovdqu(Address(rsi, 32), xmm7);
519 #ifdef _LP64
520     __ vmovdqu(Address(rsi, 64), xmm8);
521     __ vmovdqu(Address(rsi, 96), xmm15);
522 #endif
523 
524 #ifdef _WINDOWS
525 #ifdef _LP64
526     __ vmovdqu(xmm15, Address(rsp, 0));
527     __ addptr(rsp, 32);
528     __ vmovdqu(xmm8, Address(rsp, 0));
529     __ addptr(rsp, 32);
530 #endif // _LP64
531     __ vmovdqu(xmm7, Address(rsp, 0));
532     __ addptr(rsp, 32);
533 #endif // _WINDOWS
534     generate_vzeroupper(wrapup);
535     VM_Version::clean_cpuFeatures();
536     UseAVX = saved_useavx;
537     UseSSE = saved_usesse;
538 
539     __ bind(wrapup);
540     __ popf();
541     __ pop(rsi);
542     __ pop(rbx);
543     __ pop(rbp);
544     __ ret(0);
545 
546 #   undef __
547 
548     return start;
549   };
generate_vzeroupper(Label & L_wrapup)550   void generate_vzeroupper(Label& L_wrapup) {
551 #   define __ _masm->
552     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
553     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
554     __ jcc(Assembler::notEqual, L_wrapup);
555     __ movl(rcx, 0x0FFF0FF0);
556     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
557     __ andl(rcx, Address(rsi, 0));
558     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
559     __ jcc(Assembler::equal, L_wrapup);
560     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
561     __ jcc(Assembler::equal, L_wrapup);
562     __ vzeroupper();
563 #   undef __
564   }
565 };
566 
get_processor_features()567 void VM_Version::get_processor_features() {
568 
569   _cpu = 4; // 486 by default
570   _model = 0;
571   _stepping = 0;
572   _features = 0;
573   _logical_processors_per_package = 1;
574   // i486 internal cache is both I&D and has a 16-byte line size
575   _L1_data_cache_line_size = 16;
576 
577   // Get raw processor info
578 
579   get_cpu_info_stub(&_cpuid_info);
580 
581   assert_is_initialized();
582   _cpu = extended_cpu_family();
583   _model = extended_cpu_model();
584   _stepping = cpu_stepping();
585 
586   if (cpu_family() > 4) { // it supports CPUID
587     _features = feature_flags();
588     // Logical processors are only available on P4s and above,
589     // and only if hyperthreading is available.
590     _logical_processors_per_package = logical_processor_count();
591     _L1_data_cache_line_size = L1_line_size();
592   }
593 
594   _supports_cx8 = supports_cmpxchg8();
595   // xchg and xadd instructions
596   _supports_atomic_getset4 = true;
597   _supports_atomic_getadd4 = true;
598   LP64_ONLY(_supports_atomic_getset8 = true);
599   LP64_ONLY(_supports_atomic_getadd8 = true);
600 
601 #ifdef _LP64
602   // OS should support SSE for x64 and hardware should support at least SSE2.
603   if (!VM_Version::supports_sse2()) {
604     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
605   }
606   // in 64 bit the use of SSE2 is the minimum
607   if (UseSSE < 2) UseSSE = 2;
608 #endif
609 
610 #ifdef AMD64
611   // flush_icache_stub have to be generated first.
612   // That is why Icache line size is hard coded in ICache class,
613   // see icache_x86.hpp. It is also the reason why we can't use
614   // clflush instruction in 32-bit VM since it could be running
615   // on CPU which does not support it.
616   //
617   // The only thing we can do is to verify that flushed
618   // ICache::line_size has correct value.
619   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
620   // clflush_size is size in quadwords (8 bytes).
621   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
622 #endif
623 
624   // If the OS doesn't support SSE, we can't use this feature even if the HW does
625   if (!os::supports_sse())
626     _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
627 
628   if (UseSSE < 4) {
629     _features &= ~CPU_SSE4_1;
630     _features &= ~CPU_SSE4_2;
631   }
632 
633   if (UseSSE < 3) {
634     _features &= ~CPU_SSE3;
635     _features &= ~CPU_SSSE3;
636     _features &= ~CPU_SSE4A;
637   }
638 
639   if (UseSSE < 2)
640     _features &= ~CPU_SSE2;
641 
642   if (UseSSE < 1)
643     _features &= ~CPU_SSE;
644 
645   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
646   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
647     UseAVX = 0;
648   }
649 
650   // first try initial setting and detect what we can support
651   int use_avx_limit = 0;
652   if (UseAVX > 0) {
653     if (UseAVX > 2 && supports_evex()) {
654       use_avx_limit = 3;
655     } else if (UseAVX > 1 && supports_avx2()) {
656       use_avx_limit = 2;
657     } else if (UseAVX > 0 && supports_avx()) {
658       use_avx_limit = 1;
659     } else {
660       use_avx_limit = 0;
661     }
662   }
663   if (FLAG_IS_DEFAULT(UseAVX)) {
664     // Don't use AVX-512 on older Skylakes unless explicitly requested.
665     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
666       FLAG_SET_DEFAULT(UseAVX, 2);
667     } else {
668       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
669     }
670   }
671   if (UseAVX > use_avx_limit) {
672     warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
673     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
674   } else if (UseAVX < 0) {
675     warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
676     FLAG_SET_DEFAULT(UseAVX, 0);
677   }
678 
679   if (UseAVX < 3) {
680     _features &= ~CPU_AVX512F;
681     _features &= ~CPU_AVX512DQ;
682     _features &= ~CPU_AVX512CD;
683     _features &= ~CPU_AVX512BW;
684     _features &= ~CPU_AVX512VL;
685     _features &= ~CPU_AVX512_VPOPCNTDQ;
686     _features &= ~CPU_AVX512_VPCLMULQDQ;
687     _features &= ~CPU_VAES;
688   }
689 
690   if (UseAVX < 2)
691     _features &= ~CPU_AVX2;
692 
693   if (UseAVX < 1) {
694     _features &= ~CPU_AVX;
695     _features &= ~CPU_VZEROUPPER;
696   }
697 
698   if (logical_processors_per_package() == 1) {
699     // HT processor could be installed on a system which doesn't support HT.
700     _features &= ~CPU_HT;
701   }
702 
703   if (is_intel()) { // Intel cpus specific settings
704     if (is_knights_family()) {
705       _features &= ~CPU_VZEROUPPER;
706     }
707   }
708 
709   char buf[256];
710   jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
711                cores_per_cpu(), threads_per_core(),
712                cpu_family(), _model, _stepping,
713                (supports_cmov() ? ", cmov" : ""),
714                (supports_cmpxchg8() ? ", cx8" : ""),
715                (supports_fxsr() ? ", fxsr" : ""),
716                (supports_mmx()  ? ", mmx"  : ""),
717                (supports_sse()  ? ", sse"  : ""),
718                (supports_sse2() ? ", sse2" : ""),
719                (supports_sse3() ? ", sse3" : ""),
720                (supports_ssse3()? ", ssse3": ""),
721                (supports_sse4_1() ? ", sse4.1" : ""),
722                (supports_sse4_2() ? ", sse4.2" : ""),
723                (supports_popcnt() ? ", popcnt" : ""),
724                (supports_avx()    ? ", avx" : ""),
725                (supports_avx2()   ? ", avx2" : ""),
726                (supports_aes()    ? ", aes" : ""),
727                (supports_clmul()  ? ", clmul" : ""),
728                (supports_erms()   ? ", erms" : ""),
729                (supports_rtm()    ? ", rtm" : ""),
730                (supports_mmx_ext() ? ", mmxext" : ""),
731                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
732                (supports_lzcnt()   ? ", lzcnt": ""),
733                (supports_sse4a()   ? ", sse4a": ""),
734                (supports_ht() ? ", ht": ""),
735                (supports_tsc() ? ", tsc": ""),
736                (supports_tscinv_bit() ? ", tscinvbit": ""),
737                (supports_tscinv() ? ", tscinv": ""),
738                (supports_bmi1() ? ", bmi1" : ""),
739                (supports_bmi2() ? ", bmi2" : ""),
740                (supports_adx() ? ", adx" : ""),
741                (supports_evex() ? ", evex" : ""),
742                (supports_sha() ? ", sha" : ""),
743                (supports_fma() ? ", fma" : ""));
744   _features_string = os::strdup(buf);
745 
746   // UseSSE is set to the smaller of what hardware supports and what
747   // the command line requires.  I.e., you cannot set UseSSE to 2 on
748   // older Pentiums which do not support it.
749   int use_sse_limit = 0;
750   if (UseSSE > 0) {
751     if (UseSSE > 3 && supports_sse4_1()) {
752       use_sse_limit = 4;
753     } else if (UseSSE > 2 && supports_sse3()) {
754       use_sse_limit = 3;
755     } else if (UseSSE > 1 && supports_sse2()) {
756       use_sse_limit = 2;
757     } else if (UseSSE > 0 && supports_sse()) {
758       use_sse_limit = 1;
759     } else {
760       use_sse_limit = 0;
761     }
762   }
763   if (FLAG_IS_DEFAULT(UseSSE)) {
764     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
765   } else if (UseSSE > use_sse_limit) {
766     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
767     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
768   } else if (UseSSE < 0) {
769     warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
770     FLAG_SET_DEFAULT(UseSSE, 0);
771   }
772 
773   // Use AES instructions if available.
774   if (supports_aes()) {
775     if (FLAG_IS_DEFAULT(UseAES)) {
776       FLAG_SET_DEFAULT(UseAES, true);
777     }
778     if (!UseAES) {
779       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
780         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
781       }
782       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
783     } else {
784       if (UseSSE > 2) {
785         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
786           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
787         }
788       } else {
789         // The AES intrinsic stubs require AES instruction support (of course)
790         // but also require sse3 mode or higher for instructions it use.
791         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
792           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
793         }
794         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
795       }
796 
797       // --AES-CTR begins--
798       if (!UseAESIntrinsics) {
799         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
800           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
801           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
802         }
803       } else {
804         if (supports_sse4_1()) {
805           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
806             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
807           }
808         } else {
809            // The AES-CTR intrinsic stubs require AES instruction support (of course)
810            // but also require sse4.1 mode or higher for instructions it use.
811           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
812              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
813            }
814            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
815         }
816       }
817       // --AES-CTR ends--
818     }
819   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
820     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
821       warning("AES instructions are not available on this CPU");
822       FLAG_SET_DEFAULT(UseAES, false);
823     }
824     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
825       warning("AES intrinsics are not available on this CPU");
826       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
827     }
828     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
829       warning("AES-CTR intrinsics are not available on this CPU");
830       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
831     }
832   }
833 
834   // Use CLMUL instructions if available.
835   if (supports_clmul()) {
836     if (FLAG_IS_DEFAULT(UseCLMUL)) {
837       UseCLMUL = true;
838     }
839   } else if (UseCLMUL) {
840     if (!FLAG_IS_DEFAULT(UseCLMUL))
841       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
842     FLAG_SET_DEFAULT(UseCLMUL, false);
843   }
844 
845   if (UseCLMUL && (UseSSE > 2)) {
846     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
847       UseCRC32Intrinsics = true;
848     }
849   } else if (UseCRC32Intrinsics) {
850     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
851       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
852     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
853   }
854 
855   if (supports_sse4_2() && supports_clmul()) {
856     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
857       UseCRC32CIntrinsics = true;
858     }
859   } else if (UseCRC32CIntrinsics) {
860     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
861       warning("CRC32C intrinsics are not available on this CPU");
862     }
863     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
864   }
865 
866   // GHASH/GCM intrinsics
867   if (UseCLMUL && (UseSSE > 2)) {
868     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
869       UseGHASHIntrinsics = true;
870     }
871   } else if (UseGHASHIntrinsics) {
872     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
873       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
874     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
875   }
876 
877   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
878   if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
879     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
880       UseBASE64Intrinsics = true;
881     }
882   } else if (UseBASE64Intrinsics) {
883      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
884       warning("Base64 intrinsic requires EVEX instructions on this CPU");
885     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
886   }
887 
888   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
889     if (FLAG_IS_DEFAULT(UseFMA)) {
890       UseFMA = true;
891     }
892   } else if (UseFMA) {
893     warning("FMA instructions are not available on this CPU");
894     FLAG_SET_DEFAULT(UseFMA, false);
895   }
896 
897   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
898     if (FLAG_IS_DEFAULT(UseSHA)) {
899       UseSHA = true;
900     }
901   } else if (UseSHA) {
902     warning("SHA instructions are not available on this CPU");
903     FLAG_SET_DEFAULT(UseSHA, false);
904   }
905 
906   if (supports_sha() && supports_sse4_1() && UseSHA) {
907     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
908       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
909     }
910   } else if (UseSHA1Intrinsics) {
911     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
912     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
913   }
914 
915   if (supports_sse4_1() && UseSHA) {
916     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
917       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
918     }
919   } else if (UseSHA256Intrinsics) {
920     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
921     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
922   }
923 
924 #ifdef _LP64
925   // These are only supported on 64-bit
926   if (UseSHA && supports_avx2() && supports_bmi2()) {
927     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
928       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
929     }
930   } else
931 #endif
932   if (UseSHA512Intrinsics) {
933     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
934     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
935   }
936 
937   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
938     FLAG_SET_DEFAULT(UseSHA, false);
939   }
940 
941   if (UseAdler32Intrinsics) {
942     warning("Adler32Intrinsics not available on this CPU.");
943     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
944   }
945 
946   if (!supports_rtm() && UseRTMLocking) {
947     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
948     // setting during arguments processing. See use_biased_locking().
949     // VM_Version_init() is executed after UseBiasedLocking is used
950     // in Thread::allocate().
951     vm_exit_during_initialization("RTM instructions are not available on this CPU");
952   }
953 
954 #if INCLUDE_RTM_OPT
955   if (UseRTMLocking) {
956     if (is_client_compilation_mode_vm()) {
957       // Only C2 does RTM locking optimization.
958       // Can't continue because UseRTMLocking affects UseBiasedLocking flag
959       // setting during arguments processing. See use_biased_locking().
960       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
961     }
962     if (is_intel_family_core()) {
963       if ((_model == CPU_MODEL_HASWELL_E3) ||
964           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
965           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
966         // currently a collision between SKL and HSW_E3
967         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
968           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
969                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
970         } else {
971           warning("UseRTMLocking is only available as experimental option on this platform.");
972         }
973       }
974     }
975     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
976       // RTM locking should be used only for applications with
977       // high lock contention. For now we do not use it by default.
978       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
979     }
980   } else { // !UseRTMLocking
981     if (UseRTMForStackLocks) {
982       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
983         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
984       }
985       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
986     }
987     if (UseRTMDeopt) {
988       FLAG_SET_DEFAULT(UseRTMDeopt, false);
989     }
990     if (PrintPreciseRTMLockingStatistics) {
991       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
992     }
993   }
994 #else
995   if (UseRTMLocking) {
996     // Only C2 does RTM locking optimization.
997     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
998     // setting during arguments processing. See use_biased_locking().
999     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1000   }
1001 #endif
1002 
1003 #ifdef COMPILER2
1004   if (UseFPUForSpilling) {
1005     if (UseSSE < 2) {
1006       // Only supported with SSE2+
1007       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1008     }
1009   }
1010 #endif
1011 
1012 #if COMPILER2_OR_JVMCI
1013   int max_vector_size = 0;
1014   if (UseSSE < 2) {
1015     // Vectors (in XMM) are only supported with SSE2+
1016     // SSE is always 2 on x64.
1017     max_vector_size = 0;
1018   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1019     // 16 byte vectors (in XMM) are supported with SSE2+
1020     max_vector_size = 16;
1021   } else if (UseAVX == 1 || UseAVX == 2) {
1022     // 32 bytes vectors (in YMM) are only supported with AVX+
1023     max_vector_size = 32;
1024   } else if (UseAVX > 2) {
1025     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1026     max_vector_size = 64;
1027   }
1028 
1029 #ifdef _LP64
1030   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1031 #else
1032   int min_vector_size = 0;
1033 #endif
1034 
1035   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1036     if (MaxVectorSize < min_vector_size) {
1037       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1038       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1039     }
1040     if (MaxVectorSize > max_vector_size) {
1041       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1042       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1043     }
1044     if (!is_power_of_2(MaxVectorSize)) {
1045       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1046       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1047     }
1048   } else {
1049     // If default, use highest supported configuration
1050     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1051   }
1052 
1053 #if defined(COMPILER2) && defined(ASSERT)
1054   if (MaxVectorSize > 0) {
1055     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1056       tty->print_cr("State of YMM registers after signal handle:");
1057       int nreg = 2 LP64_ONLY(+2);
1058       const char* ymm_name[4] = {"0", "7", "8", "15"};
1059       for (int i = 0; i < nreg; i++) {
1060         tty->print("YMM%s:", ymm_name[i]);
1061         for (int j = 7; j >=0; j--) {
1062           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1063         }
1064         tty->cr();
1065       }
1066     }
1067   }
1068 #endif // COMPILER2 && ASSERT
1069 
1070   if (!FLAG_IS_DEFAULT(AVX3Threshold)) {
1071     if (!is_power_of_2(AVX3Threshold)) {
1072       warning("AVX3Threshold must be a power of 2");
1073       FLAG_SET_DEFAULT(AVX3Threshold, 4096);
1074     }
1075   }
1076 
1077 #ifdef _LP64
1078   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1079     UseMultiplyToLenIntrinsic = true;
1080   }
1081   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1082     UseSquareToLenIntrinsic = true;
1083   }
1084   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1085     UseMulAddIntrinsic = true;
1086   }
1087   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1088     UseMontgomeryMultiplyIntrinsic = true;
1089   }
1090   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1091     UseMontgomerySquareIntrinsic = true;
1092   }
1093 #else
1094   if (UseMultiplyToLenIntrinsic) {
1095     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1096       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1097     }
1098     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1099   }
1100   if (UseMontgomeryMultiplyIntrinsic) {
1101     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1102       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1103     }
1104     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1105   }
1106   if (UseMontgomerySquareIntrinsic) {
1107     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1108       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1109     }
1110     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1111   }
1112   if (UseSquareToLenIntrinsic) {
1113     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1114       warning("squareToLen intrinsic is not available in 32-bit VM");
1115     }
1116     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1117   }
1118   if (UseMulAddIntrinsic) {
1119     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1120       warning("mulAdd intrinsic is not available in 32-bit VM");
1121     }
1122     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1123   }
1124 #endif // _LP64
1125 #endif // COMPILER2_OR_JVMCI
1126 
1127   // On new cpus instructions which update whole XMM register should be used
1128   // to prevent partial register stall due to dependencies on high half.
1129   //
1130   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1131   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1132   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1133   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1134 
1135 
1136   if (is_zx()) { // ZX cpus specific settings
1137     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1138       UseStoreImmI16 = false; // don't use it on ZX cpus
1139     }
1140     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1141       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1142         // Use it on all ZX cpus
1143         UseAddressNop = true;
1144       }
1145     }
1146     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1147       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1148     }
1149     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1150       if (supports_sse3()) {
1151         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1152       } else {
1153         UseXmmRegToRegMoveAll = false;
1154       }
1155     }
1156     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1157 #ifdef COMPILER2
1158       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1159         // For new ZX cpus do the next optimization:
1160         // don't align the beginning of a loop if there are enough instructions
1161         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1162         // in current fetch line (OptoLoopAlignment) or the padding
1163         // is big (> MaxLoopPad).
1164         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1165         // generated NOP instructions. 11 is the largest size of one
1166         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1167         MaxLoopPad = 11;
1168       }
1169 #endif // COMPILER2
1170       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1171         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1172       }
1173       if (supports_sse4_2()) { // new ZX cpus
1174         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1175           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1176         }
1177       }
1178       if (supports_sse4_2()) {
1179         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1180           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1181         }
1182       } else {
1183         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1184           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1185         }
1186         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1187       }
1188     }
1189 
1190     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1191       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1192     }
1193   }
1194 
1195   if (is_amd_family()) { // AMD cpus specific settings
1196     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1197       // Use it on new AMD cpus starting from Opteron.
1198       UseAddressNop = true;
1199     }
1200     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1201       // Use it on new AMD cpus starting from Opteron.
1202       UseNewLongLShift = true;
1203     }
1204     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1205       if (supports_sse4a()) {
1206         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1207       } else {
1208         UseXmmLoadAndClearUpper = false;
1209       }
1210     }
1211     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1212       if (supports_sse4a()) {
1213         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1214       } else {
1215         UseXmmRegToRegMoveAll = false;
1216       }
1217     }
1218     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1219       if (supports_sse4a()) {
1220         UseXmmI2F = true;
1221       } else {
1222         UseXmmI2F = false;
1223       }
1224     }
1225     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1226       if (supports_sse4a()) {
1227         UseXmmI2D = true;
1228       } else {
1229         UseXmmI2D = false;
1230       }
1231     }
1232     if (supports_sse4_2()) {
1233       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1234         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1235       }
1236     } else {
1237       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1238         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1239       }
1240       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1241     }
1242 
1243     // some defaults for AMD family 15h
1244     if (cpu_family() == 0x15) {
1245       // On family 15h processors default is no sw prefetch
1246       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1247         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1248       }
1249       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1250       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1251         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1252       }
1253       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1254       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1255         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1256       }
1257       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1258         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1259       }
1260     }
1261 
1262 #ifdef COMPILER2
1263     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1264       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1265       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1266     }
1267 #endif // COMPILER2
1268 
1269     // Some defaults for AMD family 17h || Hygon family 18h
1270     if (cpu_family() == 0x17 || cpu_family() == 0x18) {
1271       // On family 17h processors use XMM and UnalignedLoadStores for Array Copy
1272       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1273         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1274       }
1275       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1276         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1277       }
1278 #ifdef COMPILER2
1279       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1280         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1281       }
1282 #endif
1283     }
1284   }
1285 
1286   if (is_intel()) { // Intel cpus specific settings
1287     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1288       UseStoreImmI16 = false; // don't use it on Intel cpus
1289     }
1290     if (cpu_family() == 6 || cpu_family() == 15) {
1291       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1292         // Use it on all Intel cpus starting from PentiumPro
1293         UseAddressNop = true;
1294       }
1295     }
1296     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1297       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1298     }
1299     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1300       if (supports_sse3()) {
1301         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1302       } else {
1303         UseXmmRegToRegMoveAll = false;
1304       }
1305     }
1306     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1307 #ifdef COMPILER2
1308       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1309         // For new Intel cpus do the next optimization:
1310         // don't align the beginning of a loop if there are enough instructions
1311         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1312         // in current fetch line (OptoLoopAlignment) or the padding
1313         // is big (> MaxLoopPad).
1314         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1315         // generated NOP instructions. 11 is the largest size of one
1316         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1317         MaxLoopPad = 11;
1318       }
1319 #endif // COMPILER2
1320       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1321         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1322       }
1323       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1324         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1325           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1326         }
1327       }
1328       if (supports_sse4_2()) {
1329         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1330           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1331         }
1332       } else {
1333         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1334           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1335         }
1336         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1337       }
1338     }
1339     if (is_atom_family() || is_knights_family()) {
1340 #ifdef COMPILER2
1341       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1342         OptoScheduling = true;
1343       }
1344 #endif
1345       if (supports_sse4_2()) { // Silvermont
1346         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1347           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1348         }
1349       }
1350       if (FLAG_IS_DEFAULT(UseIncDec)) {
1351         FLAG_SET_DEFAULT(UseIncDec, false);
1352       }
1353     }
1354     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1355       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1356     }
1357   }
1358 
1359 #ifdef _LP64
1360   if (UseSSE42Intrinsics) {
1361     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1362       UseVectorizedMismatchIntrinsic = true;
1363     }
1364   } else if (UseVectorizedMismatchIntrinsic) {
1365     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1366       warning("vectorizedMismatch intrinsics are not available on this CPU");
1367     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1368   }
1369 #else
1370   if (UseVectorizedMismatchIntrinsic) {
1371     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1372       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1373     }
1374     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1375   }
1376 #endif // _LP64
1377 
1378   // Use count leading zeros count instruction if available.
1379   if (supports_lzcnt()) {
1380     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1381       UseCountLeadingZerosInstruction = true;
1382     }
1383    } else if (UseCountLeadingZerosInstruction) {
1384     warning("lzcnt instruction is not available on this CPU");
1385     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1386   }
1387 
1388   // Use count trailing zeros instruction if available
1389   if (supports_bmi1()) {
1390     // tzcnt does not require VEX prefix
1391     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1392       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1393         // Don't use tzcnt if BMI1 is switched off on command line.
1394         UseCountTrailingZerosInstruction = false;
1395       } else {
1396         UseCountTrailingZerosInstruction = true;
1397       }
1398     }
1399   } else if (UseCountTrailingZerosInstruction) {
1400     warning("tzcnt instruction is not available on this CPU");
1401     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1402   }
1403 
1404   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1405   // VEX prefix is generated only when AVX > 0.
1406   if (supports_bmi1() && supports_avx()) {
1407     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1408       UseBMI1Instructions = true;
1409     }
1410   } else if (UseBMI1Instructions) {
1411     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1412     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1413   }
1414 
1415   if (supports_bmi2() && supports_avx()) {
1416     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1417       UseBMI2Instructions = true;
1418     }
1419   } else if (UseBMI2Instructions) {
1420     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1421     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1422   }
1423 
1424   // Use population count instruction if available.
1425   if (supports_popcnt()) {
1426     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1427       UsePopCountInstruction = true;
1428     }
1429   } else if (UsePopCountInstruction) {
1430     warning("POPCNT instruction is not available on this CPU");
1431     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1432   }
1433 
1434   // Use fast-string operations if available.
1435   if (supports_erms()) {
1436     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1437       UseFastStosb = true;
1438     }
1439   } else if (UseFastStosb) {
1440     warning("fast-string operations are not available on this CPU");
1441     FLAG_SET_DEFAULT(UseFastStosb, false);
1442   }
1443 
1444   // Use XMM/YMM MOVDQU instruction for Object Initialization
1445   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1446     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1447       UseXMMForObjInit = true;
1448     }
1449   } else if (UseXMMForObjInit) {
1450     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1451     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1452   }
1453 
1454 #ifdef COMPILER2
1455   if (FLAG_IS_DEFAULT(AlignVector)) {
1456     // Modern processors allow misaligned memory operations for vectors.
1457     AlignVector = !UseUnalignedLoadStores;
1458   }
1459 #endif // COMPILER2
1460 
1461   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1462     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1463       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1464     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1465       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1466     }
1467   }
1468 
1469   // Allocation prefetch settings
1470   intx cache_line_size = prefetch_data_size();
1471   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1472       (cache_line_size > AllocatePrefetchStepSize)) {
1473     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1474   }
1475 
1476   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1477     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1478     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1479       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1480     }
1481     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1482   }
1483 
1484   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1485     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1486     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1487   }
1488 
1489   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1490     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1491         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1492       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1493     }
1494 #ifdef COMPILER2
1495     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1496       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1497     }
1498 #endif
1499   }
1500 
1501   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1502 #ifdef COMPILER2
1503     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1504       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1505     }
1506 #endif
1507   }
1508 
1509 #ifdef _LP64
1510   // Prefetch settings
1511 
1512   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1513   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1514   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1515   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1516 
1517   // gc copy/scan is disabled if prefetchw isn't supported, because
1518   // Prefetch::write emits an inlined prefetchw on Linux.
1519   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1520   // The used prefetcht0 instruction works for both amd64 and em64t.
1521 
1522   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1523     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1524   }
1525   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1526     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1527   }
1528   if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1529     FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1530   }
1531 #endif
1532 
1533   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1534      (cache_line_size > ContendedPaddingWidth))
1535      ContendedPaddingWidth = cache_line_size;
1536 
1537   // This machine allows unaligned memory accesses
1538   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1539     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1540   }
1541 
1542 #ifndef PRODUCT
1543   if (log_is_enabled(Info, os, cpu)) {
1544     LogStream ls(Log(os, cpu)::info());
1545     outputStream* log = &ls;
1546     log->print_cr("Logical CPUs per core: %u",
1547                   logical_processors_per_package());
1548     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1549     log->print("UseSSE=%d", (int) UseSSE);
1550     if (UseAVX > 0) {
1551       log->print("  UseAVX=%d", (int) UseAVX);
1552     }
1553     if (UseAES) {
1554       log->print("  UseAES=1");
1555     }
1556 #ifdef COMPILER2
1557     if (MaxVectorSize > 0) {
1558       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1559     }
1560 #endif
1561     log->cr();
1562     log->print("Allocation");
1563     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1564       log->print_cr(": no prefetching");
1565     } else {
1566       log->print(" prefetching: ");
1567       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1568         log->print("PREFETCHW");
1569       } else if (UseSSE >= 1) {
1570         if (AllocatePrefetchInstr == 0) {
1571           log->print("PREFETCHNTA");
1572         } else if (AllocatePrefetchInstr == 1) {
1573           log->print("PREFETCHT0");
1574         } else if (AllocatePrefetchInstr == 2) {
1575           log->print("PREFETCHT2");
1576         } else if (AllocatePrefetchInstr == 3) {
1577           log->print("PREFETCHW");
1578         }
1579       }
1580       if (AllocatePrefetchLines > 1) {
1581         log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1582       } else {
1583         log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1584       }
1585     }
1586 
1587     if (PrefetchCopyIntervalInBytes > 0) {
1588       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1589     }
1590     if (PrefetchScanIntervalInBytes > 0) {
1591       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1592     }
1593     if (PrefetchFieldsAhead > 0) {
1594       log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1595     }
1596     if (ContendedPaddingWidth > 0) {
1597       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1598     }
1599   }
1600 #endif // !PRODUCT
1601 }
1602 
print_platform_virtualization_info(outputStream * st)1603 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1604   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1605   if (vrt == XenHVM) {
1606     st->print_cr("Xen hardware-assisted virtualization detected");
1607   } else if (vrt == KVM) {
1608     st->print_cr("KVM virtualization detected");
1609   } else if (vrt == VMWare) {
1610     st->print_cr("VMWare virtualization detected");
1611     VirtualizationSupport::print_virtualization_info(st);
1612   } else if (vrt == HyperV) {
1613     st->print_cr("HyperV virtualization detected");
1614   }
1615 }
1616 
check_virt_cpuid(uint32_t idx,uint32_t * regs)1617 void VM_Version::check_virt_cpuid(uint32_t idx, uint32_t *regs) {
1618 // TODO support 32 bit
1619 #if defined(_LP64)
1620 #if defined(_MSC_VER)
1621   // Allocate space for the code
1622   const int code_size = 100;
1623   ResourceMark rm;
1624   CodeBuffer cb("detect_virt", code_size, 0);
1625   MacroAssembler* a = new MacroAssembler(&cb);
1626   address code = a->pc();
1627   void (*test)(uint32_t idx, uint32_t *regs) = (void(*)(uint32_t idx, uint32_t *regs))code;
1628 
1629   a->movq(r9, rbx); // save nonvolatile register
1630 
1631   // next line would not work on 32-bit
1632   a->movq(rax, c_rarg0 /* rcx */);
1633   a->movq(r8, c_rarg1 /* rdx */);
1634   a->cpuid();
1635   a->movl(Address(r8,  0), rax);
1636   a->movl(Address(r8,  4), rbx);
1637   a->movl(Address(r8,  8), rcx);
1638   a->movl(Address(r8, 12), rdx);
1639 
1640   a->movq(rbx, r9); // restore nonvolatile register
1641   a->ret(0);
1642 
1643   uint32_t *code_end = (uint32_t *)a->pc();
1644   a->flush();
1645 
1646   // execute code
1647   (*test)(idx, regs);
1648 #elif defined(__GNUC__)
1649   __asm__ volatile (
1650      "        cpuid;"
1651      "        mov %%eax,(%1);"
1652      "        mov %%ebx,4(%1);"
1653      "        mov %%ecx,8(%1);"
1654      "        mov %%edx,12(%1);"
1655      : "+a" (idx)
1656      : "S" (regs)
1657      : "ebx", "ecx", "edx", "memory" );
1658 #endif
1659 #endif
1660 }
1661 
1662 
use_biased_locking()1663 bool VM_Version::use_biased_locking() {
1664 #if INCLUDE_RTM_OPT
1665   // RTM locking is most useful when there is high lock contention and
1666   // low data contention.  With high lock contention the lock is usually
1667   // inflated and biased locking is not suitable for that case.
1668   // RTM locking code requires that biased locking is off.
1669   // Note: we can't switch off UseBiasedLocking in get_processor_features()
1670   // because it is used by Thread::allocate() which is called before
1671   // VM_Version::initialize().
1672   if (UseRTMLocking && UseBiasedLocking) {
1673     if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1674       FLAG_SET_DEFAULT(UseBiasedLocking, false);
1675     } else {
1676       warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1677       UseBiasedLocking = false;
1678     }
1679   }
1680 #endif
1681   return UseBiasedLocking;
1682 }
1683 
1684 // On Xen, the cpuid instruction returns
1685 //  eax / registers[0]: Version of Xen
1686 //  ebx / registers[1]: chars 'XenV'
1687 //  ecx / registers[2]: chars 'MMXe'
1688 //  edx / registers[3]: chars 'nVMM'
1689 //
1690 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1691 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1692 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1693 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
1694 //
1695 // more information :
1696 // https://kb.vmware.com/s/article/1009458
1697 //
check_virtualizations()1698 void VM_Version::check_virtualizations() {
1699 #if defined(_LP64)
1700   uint32_t registers[4];
1701   char signature[13];
1702   uint32_t base;
1703   signature[12] = '\0';
1704   memset((void*)registers, 0, 4*sizeof(uint32_t));
1705 
1706   for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1707     check_virt_cpuid(base, registers);
1708 
1709     *(uint32_t *)(signature + 0) = registers[1];
1710     *(uint32_t *)(signature + 4) = registers[2];
1711     *(uint32_t *)(signature + 8) = registers[3];
1712 
1713     if (strncmp("VMwareVMware", signature, 12) == 0) {
1714       Abstract_VM_Version::_detected_virtualization = VMWare;
1715       // check for extended metrics from guestlib
1716       VirtualizationSupport::initialize();
1717     }
1718 
1719     if (strncmp("Microsoft Hv", signature, 12) == 0) {
1720       Abstract_VM_Version::_detected_virtualization = HyperV;
1721     }
1722 
1723     if (strncmp("KVMKVMKVM", signature, 9) == 0) {
1724       Abstract_VM_Version::_detected_virtualization = KVM;
1725     }
1726 
1727     if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
1728       Abstract_VM_Version::_detected_virtualization = XenHVM;
1729     }
1730   }
1731 #endif
1732 }
1733 
initialize()1734 void VM_Version::initialize() {
1735   ResourceMark rm;
1736   // Making this stub must be FIRST use of assembler
1737 
1738   stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1739   if (stub_blob == NULL) {
1740     vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1741   }
1742   CodeBuffer c(stub_blob);
1743   VM_Version_StubGenerator g(&c);
1744   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1745                                      g.generate_get_cpu_info());
1746 
1747   get_processor_features();
1748   if (cpu_family() > 4) { // it supports CPUID
1749     check_virtualizations();
1750   }
1751 }
1752