1 /*
2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "jvm.h"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "logging/log.hpp"
30 #include "logging/logStream.hpp"
31 #include "memory/resourceArea.hpp"
32 #include "runtime/java.hpp"
33 #include "runtime/os.hpp"
34 #include "runtime/stubCodeGenerator.hpp"
35 #include "vm_version_x86.hpp"
36
37
38 int VM_Version::_cpu;
39 int VM_Version::_model;
40 int VM_Version::_stepping;
41 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
42
43 // Address of instruction which causes SEGV
44 address VM_Version::_cpuinfo_segv_addr = 0;
45 // Address of instruction after the one which causes SEGV
46 address VM_Version::_cpuinfo_cont_addr = 0;
47
48 static BufferBlob* stub_blob;
49 static const int stub_size = 1100;
50
51 extern "C" {
52 typedef void (*get_cpu_info_stub_t)(void*);
53 }
54 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
55
56
57 class VM_Version_StubGenerator: public StubCodeGenerator {
58 public:
59
VM_Version_StubGenerator(CodeBuffer * c)60 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
61
generate_get_cpu_info()62 address generate_get_cpu_info() {
63 // Flags to test CPU type.
64 const uint32_t HS_EFL_AC = 0x40000;
65 const uint32_t HS_EFL_ID = 0x200000;
66 // Values for when we don't have a CPUID instruction.
67 const int CPU_FAMILY_SHIFT = 8;
68 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
69 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
70 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
71
72 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
73 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
74 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
75
76 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
77 # define __ _masm->
78
79 address start = __ pc();
80
81 //
82 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
83 //
84 // LP64: rcx and rdx are first and second argument registers on windows
85
86 __ push(rbp);
87 #ifdef _LP64
88 __ mov(rbp, c_rarg0); // cpuid_info address
89 #else
90 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
91 #endif
92 __ push(rbx);
93 __ push(rsi);
94 __ pushf(); // preserve rbx, and flags
95 __ pop(rax);
96 __ push(rax);
97 __ mov(rcx, rax);
98 //
99 // if we are unable to change the AC flag, we have a 386
100 //
101 __ xorl(rax, HS_EFL_AC);
102 __ push(rax);
103 __ popf();
104 __ pushf();
105 __ pop(rax);
106 __ cmpptr(rax, rcx);
107 __ jccb(Assembler::notEqual, detect_486);
108
109 __ movl(rax, CPU_FAMILY_386);
110 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
111 __ jmp(done);
112
113 //
114 // If we are unable to change the ID flag, we have a 486 which does
115 // not support the "cpuid" instruction.
116 //
117 __ bind(detect_486);
118 __ mov(rax, rcx);
119 __ xorl(rax, HS_EFL_ID);
120 __ push(rax);
121 __ popf();
122 __ pushf();
123 __ pop(rax);
124 __ cmpptr(rcx, rax);
125 __ jccb(Assembler::notEqual, detect_586);
126
127 __ bind(cpu486);
128 __ movl(rax, CPU_FAMILY_486);
129 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
130 __ jmp(done);
131
132 //
133 // At this point, we have a chip which supports the "cpuid" instruction
134 //
135 __ bind(detect_586);
136 __ xorl(rax, rax);
137 __ cpuid();
138 __ orl(rax, rax);
139 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
140 // value of at least 1, we give up and
141 // assume a 486
142 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
143 __ movl(Address(rsi, 0), rax);
144 __ movl(Address(rsi, 4), rbx);
145 __ movl(Address(rsi, 8), rcx);
146 __ movl(Address(rsi,12), rdx);
147
148 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
149 __ jccb(Assembler::belowEqual, std_cpuid4);
150
151 //
152 // cpuid(0xB) Processor Topology
153 //
154 __ movl(rax, 0xb);
155 __ xorl(rcx, rcx); // Threads level
156 __ cpuid();
157
158 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
159 __ movl(Address(rsi, 0), rax);
160 __ movl(Address(rsi, 4), rbx);
161 __ movl(Address(rsi, 8), rcx);
162 __ movl(Address(rsi,12), rdx);
163
164 __ movl(rax, 0xb);
165 __ movl(rcx, 1); // Cores level
166 __ cpuid();
167 __ push(rax);
168 __ andl(rax, 0x1f); // Determine if valid topology level
169 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
170 __ andl(rax, 0xffff);
171 __ pop(rax);
172 __ jccb(Assembler::equal, std_cpuid4);
173
174 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
175 __ movl(Address(rsi, 0), rax);
176 __ movl(Address(rsi, 4), rbx);
177 __ movl(Address(rsi, 8), rcx);
178 __ movl(Address(rsi,12), rdx);
179
180 __ movl(rax, 0xb);
181 __ movl(rcx, 2); // Packages level
182 __ cpuid();
183 __ push(rax);
184 __ andl(rax, 0x1f); // Determine if valid topology level
185 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
186 __ andl(rax, 0xffff);
187 __ pop(rax);
188 __ jccb(Assembler::equal, std_cpuid4);
189
190 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
191 __ movl(Address(rsi, 0), rax);
192 __ movl(Address(rsi, 4), rbx);
193 __ movl(Address(rsi, 8), rcx);
194 __ movl(Address(rsi,12), rdx);
195
196 //
197 // cpuid(0x4) Deterministic cache params
198 //
199 __ bind(std_cpuid4);
200 __ movl(rax, 4);
201 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
202 __ jccb(Assembler::greater, std_cpuid1);
203
204 __ xorl(rcx, rcx); // L1 cache
205 __ cpuid();
206 __ push(rax);
207 __ andl(rax, 0x1f); // Determine if valid cache parameters used
208 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
209 __ pop(rax);
210 __ jccb(Assembler::equal, std_cpuid1);
211
212 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
213 __ movl(Address(rsi, 0), rax);
214 __ movl(Address(rsi, 4), rbx);
215 __ movl(Address(rsi, 8), rcx);
216 __ movl(Address(rsi,12), rdx);
217
218 //
219 // Standard cpuid(0x1)
220 //
221 __ bind(std_cpuid1);
222 __ movl(rax, 1);
223 __ cpuid();
224 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
225 __ movl(Address(rsi, 0), rax);
226 __ movl(Address(rsi, 4), rbx);
227 __ movl(Address(rsi, 8), rcx);
228 __ movl(Address(rsi,12), rdx);
229
230 //
231 // Check if OS has enabled XGETBV instruction to access XCR0
232 // (OSXSAVE feature flag) and CPU supports AVX
233 //
234 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
235 __ cmpl(rcx, 0x18000000);
236 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
237
238 //
239 // XCR0, XFEATURE_ENABLED_MASK register
240 //
241 __ xorl(rcx, rcx); // zero for XCR0 register
242 __ xgetbv();
243 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
244 __ movl(Address(rsi, 0), rax);
245 __ movl(Address(rsi, 4), rdx);
246
247 //
248 // cpuid(0x7) Structured Extended Features
249 //
250 __ bind(sef_cpuid);
251 __ movl(rax, 7);
252 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
253 __ jccb(Assembler::greater, ext_cpuid);
254
255 __ xorl(rcx, rcx);
256 __ cpuid();
257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
258 __ movl(Address(rsi, 0), rax);
259 __ movl(Address(rsi, 4), rbx);
260 __ movl(Address(rsi, 8), rcx);
261 __ movl(Address(rsi, 12), rdx);
262
263 //
264 // Extended cpuid(0x80000000)
265 //
266 __ bind(ext_cpuid);
267 __ movl(rax, 0x80000000);
268 __ cpuid();
269 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
270 __ jcc(Assembler::belowEqual, done);
271 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
272 __ jcc(Assembler::belowEqual, ext_cpuid1);
273 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
274 __ jccb(Assembler::belowEqual, ext_cpuid5);
275 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
276 __ jccb(Assembler::belowEqual, ext_cpuid7);
277 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
278 __ jccb(Assembler::belowEqual, ext_cpuid8);
279 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
280 __ jccb(Assembler::below, ext_cpuid8);
281 //
282 // Extended cpuid(0x8000001E)
283 //
284 __ movl(rax, 0x8000001E);
285 __ cpuid();
286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
287 __ movl(Address(rsi, 0), rax);
288 __ movl(Address(rsi, 4), rbx);
289 __ movl(Address(rsi, 8), rcx);
290 __ movl(Address(rsi,12), rdx);
291
292 //
293 // Extended cpuid(0x80000008)
294 //
295 __ bind(ext_cpuid8);
296 __ movl(rax, 0x80000008);
297 __ cpuid();
298 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
299 __ movl(Address(rsi, 0), rax);
300 __ movl(Address(rsi, 4), rbx);
301 __ movl(Address(rsi, 8), rcx);
302 __ movl(Address(rsi,12), rdx);
303
304 //
305 // Extended cpuid(0x80000007)
306 //
307 __ bind(ext_cpuid7);
308 __ movl(rax, 0x80000007);
309 __ cpuid();
310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
311 __ movl(Address(rsi, 0), rax);
312 __ movl(Address(rsi, 4), rbx);
313 __ movl(Address(rsi, 8), rcx);
314 __ movl(Address(rsi,12), rdx);
315
316 //
317 // Extended cpuid(0x80000005)
318 //
319 __ bind(ext_cpuid5);
320 __ movl(rax, 0x80000005);
321 __ cpuid();
322 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
323 __ movl(Address(rsi, 0), rax);
324 __ movl(Address(rsi, 4), rbx);
325 __ movl(Address(rsi, 8), rcx);
326 __ movl(Address(rsi,12), rdx);
327
328 //
329 // Extended cpuid(0x80000001)
330 //
331 __ bind(ext_cpuid1);
332 __ movl(rax, 0x80000001);
333 __ cpuid();
334 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
335 __ movl(Address(rsi, 0), rax);
336 __ movl(Address(rsi, 4), rbx);
337 __ movl(Address(rsi, 8), rcx);
338 __ movl(Address(rsi,12), rdx);
339
340 //
341 // Check if OS has enabled XGETBV instruction to access XCR0
342 // (OSXSAVE feature flag) and CPU supports AVX
343 //
344 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
345 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
346 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
347 __ cmpl(rcx, 0x18000000);
348 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
349
350 __ movl(rax, 0x6);
351 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
352 __ cmpl(rax, 0x6);
353 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
354
355 // we need to bridge farther than imm8, so we use this island as a thunk
356 __ bind(done);
357 __ jmp(wrapup);
358
359 __ bind(start_simd_check);
360 //
361 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
362 // registers are not restored after a signal processing.
363 // Generate SEGV here (reference through NULL)
364 // and check upper YMM/ZMM bits after it.
365 //
366 intx saved_useavx = UseAVX;
367 intx saved_usesse = UseSSE;
368 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
369 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
370 __ movl(rax, 0x10000);
371 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
372 __ cmpl(rax, 0x10000);
373 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
374 // check _cpuid_info.xem_xcr0_eax.bits.opmask
375 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
376 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
377 __ movl(rax, 0xE0);
378 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
379 __ cmpl(rax, 0xE0);
380 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
381
382 // If UseAVX is unitialized or is set by the user to include EVEX
383 if (use_evex) {
384 // EVEX setup: run in lowest evex mode
385 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
386 UseAVX = 3;
387 UseSSE = 2;
388 #ifdef _WINDOWS
389 // xmm5-xmm15 are not preserved by caller on windows
390 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
391 __ subptr(rsp, 64);
392 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
393 #ifdef _LP64
394 __ subptr(rsp, 64);
395 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
396 __ subptr(rsp, 64);
397 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
398 #endif // _LP64
399 #endif // _WINDOWS
400
401 // load value into all 64 bytes of zmm7 register
402 __ movl(rcx, VM_Version::ymm_test_value());
403 __ movdl(xmm0, rcx);
404 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
405 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
406 #ifdef _LP64
407 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
408 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
409 #endif
410 VM_Version::clean_cpuFeatures();
411 __ jmp(save_restore_except);
412 }
413
414 __ bind(legacy_setup);
415 // AVX setup
416 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
417 UseAVX = 1;
418 UseSSE = 2;
419 #ifdef _WINDOWS
420 __ subptr(rsp, 32);
421 __ vmovdqu(Address(rsp, 0), xmm7);
422 #ifdef _LP64
423 __ subptr(rsp, 32);
424 __ vmovdqu(Address(rsp, 0), xmm8);
425 __ subptr(rsp, 32);
426 __ vmovdqu(Address(rsp, 0), xmm15);
427 #endif // _LP64
428 #endif // _WINDOWS
429
430 // load value into all 32 bytes of ymm7 register
431 __ movl(rcx, VM_Version::ymm_test_value());
432
433 __ movdl(xmm0, rcx);
434 __ pshufd(xmm0, xmm0, 0x00);
435 __ vinsertf128_high(xmm0, xmm0);
436 __ vmovdqu(xmm7, xmm0);
437 #ifdef _LP64
438 __ vmovdqu(xmm8, xmm0);
439 __ vmovdqu(xmm15, xmm0);
440 #endif
441 VM_Version::clean_cpuFeatures();
442
443 __ bind(save_restore_except);
444 __ xorl(rsi, rsi);
445 VM_Version::set_cpuinfo_segv_addr(__ pc());
446 // Generate SEGV
447 __ movl(rax, Address(rsi, 0));
448
449 VM_Version::set_cpuinfo_cont_addr(__ pc());
450 // Returns here after signal. Save xmm0 to check it later.
451
452 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
453 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
454 __ movl(rax, 0x10000);
455 __ andl(rax, Address(rsi, 4));
456 __ cmpl(rax, 0x10000);
457 __ jcc(Assembler::notEqual, legacy_save_restore);
458 // check _cpuid_info.xem_xcr0_eax.bits.opmask
459 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
460 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
461 __ movl(rax, 0xE0);
462 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
463 __ cmpl(rax, 0xE0);
464 __ jcc(Assembler::notEqual, legacy_save_restore);
465
466 // If UseAVX is unitialized or is set by the user to include EVEX
467 if (use_evex) {
468 // EVEX check: run in lowest evex mode
469 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
470 UseAVX = 3;
471 UseSSE = 2;
472 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
473 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
474 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
475 #ifdef _LP64
476 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
477 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
478 #endif
479
480 #ifdef _WINDOWS
481 #ifdef _LP64
482 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
483 __ addptr(rsp, 64);
484 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
485 __ addptr(rsp, 64);
486 #endif // _LP64
487 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
488 __ addptr(rsp, 64);
489 #endif // _WINDOWS
490 generate_vzeroupper(wrapup);
491 VM_Version::clean_cpuFeatures();
492 UseAVX = saved_useavx;
493 UseSSE = saved_usesse;
494 __ jmp(wrapup);
495 }
496
497 __ bind(legacy_save_restore);
498 // AVX check
499 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
500 UseAVX = 1;
501 UseSSE = 2;
502 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
503 __ vmovdqu(Address(rsi, 0), xmm0);
504 __ vmovdqu(Address(rsi, 32), xmm7);
505 #ifdef _LP64
506 __ vmovdqu(Address(rsi, 64), xmm8);
507 __ vmovdqu(Address(rsi, 96), xmm15);
508 #endif
509
510 #ifdef _WINDOWS
511 #ifdef _LP64
512 __ vmovdqu(xmm15, Address(rsp, 0));
513 __ addptr(rsp, 32);
514 __ vmovdqu(xmm8, Address(rsp, 0));
515 __ addptr(rsp, 32);
516 #endif // _LP64
517 __ vmovdqu(xmm7, Address(rsp, 0));
518 __ addptr(rsp, 32);
519 #endif // _WINDOWS
520 generate_vzeroupper(wrapup);
521 VM_Version::clean_cpuFeatures();
522 UseAVX = saved_useavx;
523 UseSSE = saved_usesse;
524
525 __ bind(wrapup);
526 __ popf();
527 __ pop(rsi);
528 __ pop(rbx);
529 __ pop(rbp);
530 __ ret(0);
531
532 # undef __
533
534 return start;
535 };
generate_vzeroupper(Label & L_wrapup)536 void generate_vzeroupper(Label& L_wrapup) {
537 # define __ _masm->
538 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
539 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
540 __ jcc(Assembler::notEqual, L_wrapup);
541 __ movl(rcx, 0x0FFF0FF0);
542 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
543 __ andl(rcx, Address(rsi, 0));
544 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
545 __ jcc(Assembler::equal, L_wrapup);
546 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
547 __ jcc(Assembler::equal, L_wrapup);
548 __ vzeroupper();
549 # undef __
550 }
551 };
552
get_processor_features()553 void VM_Version::get_processor_features() {
554
555 _cpu = 4; // 486 by default
556 _model = 0;
557 _stepping = 0;
558 _features = 0;
559 _logical_processors_per_package = 1;
560 // i486 internal cache is both I&D and has a 16-byte line size
561 _L1_data_cache_line_size = 16;
562
563 // Get raw processor info
564
565 get_cpu_info_stub(&_cpuid_info);
566
567 assert_is_initialized();
568 _cpu = extended_cpu_family();
569 _model = extended_cpu_model();
570 _stepping = cpu_stepping();
571
572 if (cpu_family() > 4) { // it supports CPUID
573 _features = feature_flags();
574 // Logical processors are only available on P4s and above,
575 // and only if hyperthreading is available.
576 _logical_processors_per_package = logical_processor_count();
577 _L1_data_cache_line_size = L1_line_size();
578 }
579
580 _supports_cx8 = supports_cmpxchg8();
581 // xchg and xadd instructions
582 _supports_atomic_getset4 = true;
583 _supports_atomic_getadd4 = true;
584 LP64_ONLY(_supports_atomic_getset8 = true);
585 LP64_ONLY(_supports_atomic_getadd8 = true);
586
587 #ifdef _LP64
588 // OS should support SSE for x64 and hardware should support at least SSE2.
589 if (!VM_Version::supports_sse2()) {
590 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
591 }
592 // in 64 bit the use of SSE2 is the minimum
593 if (UseSSE < 2) UseSSE = 2;
594 #endif
595
596 #ifdef AMD64
597 // flush_icache_stub have to be generated first.
598 // That is why Icache line size is hard coded in ICache class,
599 // see icache_x86.hpp. It is also the reason why we can't use
600 // clflush instruction in 32-bit VM since it could be running
601 // on CPU which does not support it.
602 //
603 // The only thing we can do is to verify that flushed
604 // ICache::line_size has correct value.
605 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
606 // clflush_size is size in quadwords (8 bytes).
607 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
608 #endif
609
610 // If the OS doesn't support SSE, we can't use this feature even if the HW does
611 if (!os::supports_sse())
612 _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
613
614 if (UseSSE < 4) {
615 _features &= ~CPU_SSE4_1;
616 _features &= ~CPU_SSE4_2;
617 }
618
619 if (UseSSE < 3) {
620 _features &= ~CPU_SSE3;
621 _features &= ~CPU_SSSE3;
622 _features &= ~CPU_SSE4A;
623 }
624
625 if (UseSSE < 2)
626 _features &= ~CPU_SSE2;
627
628 if (UseSSE < 1)
629 _features &= ~CPU_SSE;
630
631 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
632 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
633 UseAVX = 0;
634 }
635
636 // first try initial setting and detect what we can support
637 int use_avx_limit = 0;
638 if (UseAVX > 0) {
639 if (UseAVX > 2 && supports_evex()) {
640 use_avx_limit = 3;
641 } else if (UseAVX > 1 && supports_avx2()) {
642 use_avx_limit = 2;
643 } else if (UseAVX > 0 && supports_avx()) {
644 use_avx_limit = 1;
645 } else {
646 use_avx_limit = 0;
647 }
648 }
649 if (FLAG_IS_DEFAULT(UseAVX)) {
650 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
651 } else if (UseAVX > use_avx_limit) {
652 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
653 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
654 } else if (UseAVX < 0) {
655 warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
656 FLAG_SET_DEFAULT(UseAVX, 0);
657 }
658
659 if (UseAVX < 3) {
660 _features &= ~CPU_AVX512F;
661 _features &= ~CPU_AVX512DQ;
662 _features &= ~CPU_AVX512CD;
663 _features &= ~CPU_AVX512BW;
664 _features &= ~CPU_AVX512VL;
665 _features &= ~CPU_AVX512_VPOPCNTDQ;
666 _features &= ~CPU_VPCLMULQDQ;
667 _features &= ~CPU_VAES;
668 }
669
670 if (UseAVX < 2)
671 _features &= ~CPU_AVX2;
672
673 if (UseAVX < 1) {
674 _features &= ~CPU_AVX;
675 _features &= ~CPU_VZEROUPPER;
676 }
677
678 if (logical_processors_per_package() == 1) {
679 // HT processor could be installed on a system which doesn't support HT.
680 _features &= ~CPU_HT;
681 }
682
683 if( is_intel() ) { // Intel cpus specific settings
684 if (is_knights_family()) {
685 _features &= ~CPU_VZEROUPPER;
686 }
687 }
688
689 char buf[256];
690 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
691 cores_per_cpu(), threads_per_core(),
692 cpu_family(), _model, _stepping,
693 (supports_cmov() ? ", cmov" : ""),
694 (supports_cmpxchg8() ? ", cx8" : ""),
695 (supports_fxsr() ? ", fxsr" : ""),
696 (supports_mmx() ? ", mmx" : ""),
697 (supports_sse() ? ", sse" : ""),
698 (supports_sse2() ? ", sse2" : ""),
699 (supports_sse3() ? ", sse3" : ""),
700 (supports_ssse3()? ", ssse3": ""),
701 (supports_sse4_1() ? ", sse4.1" : ""),
702 (supports_sse4_2() ? ", sse4.2" : ""),
703 (supports_popcnt() ? ", popcnt" : ""),
704 (supports_avx() ? ", avx" : ""),
705 (supports_avx2() ? ", avx2" : ""),
706 (supports_aes() ? ", aes" : ""),
707 (supports_clmul() ? ", clmul" : ""),
708 (supports_erms() ? ", erms" : ""),
709 (supports_rtm() ? ", rtm" : ""),
710 (supports_mmx_ext() ? ", mmxext" : ""),
711 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
712 (supports_lzcnt() ? ", lzcnt": ""),
713 (supports_sse4a() ? ", sse4a": ""),
714 (supports_ht() ? ", ht": ""),
715 (supports_tsc() ? ", tsc": ""),
716 (supports_tscinv_bit() ? ", tscinvbit": ""),
717 (supports_tscinv() ? ", tscinv": ""),
718 (supports_bmi1() ? ", bmi1" : ""),
719 (supports_bmi2() ? ", bmi2" : ""),
720 (supports_adx() ? ", adx" : ""),
721 (supports_evex() ? ", evex" : ""),
722 (supports_sha() ? ", sha" : ""),
723 (supports_fma() ? ", fma" : ""));
724 _features_string = os::strdup(buf);
725
726 // UseSSE is set to the smaller of what hardware supports and what
727 // the command line requires. I.e., you cannot set UseSSE to 2 on
728 // older Pentiums which do not support it.
729 int use_sse_limit = 0;
730 if (UseSSE > 0) {
731 if (UseSSE > 3 && supports_sse4_1()) {
732 use_sse_limit = 4;
733 } else if (UseSSE > 2 && supports_sse3()) {
734 use_sse_limit = 3;
735 } else if (UseSSE > 1 && supports_sse2()) {
736 use_sse_limit = 2;
737 } else if (UseSSE > 0 && supports_sse()) {
738 use_sse_limit = 1;
739 } else {
740 use_sse_limit = 0;
741 }
742 }
743 if (FLAG_IS_DEFAULT(UseSSE)) {
744 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
745 } else if (UseSSE > use_sse_limit) {
746 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
747 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
748 } else if (UseSSE < 0) {
749 warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
750 FLAG_SET_DEFAULT(UseSSE, 0);
751 }
752
753 // Use AES instructions if available.
754 if (supports_aes()) {
755 if (FLAG_IS_DEFAULT(UseAES)) {
756 FLAG_SET_DEFAULT(UseAES, true);
757 }
758 if (!UseAES) {
759 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
760 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
761 }
762 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
763 } else {
764 if (UseSSE > 2) {
765 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
766 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
767 }
768 } else {
769 // The AES intrinsic stubs require AES instruction support (of course)
770 // but also require sse3 mode or higher for instructions it use.
771 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
772 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
773 }
774 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
775 }
776
777 // --AES-CTR begins--
778 if (!UseAESIntrinsics) {
779 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
780 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
781 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
782 }
783 } else {
784 if(supports_sse4_1()) {
785 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
786 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
787 }
788 } else {
789 // The AES-CTR intrinsic stubs require AES instruction support (of course)
790 // but also require sse4.1 mode or higher for instructions it use.
791 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
792 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
793 }
794 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
795 }
796 }
797 // --AES-CTR ends--
798 }
799 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
800 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
801 warning("AES instructions are not available on this CPU");
802 FLAG_SET_DEFAULT(UseAES, false);
803 }
804 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
805 warning("AES intrinsics are not available on this CPU");
806 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
807 }
808 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
809 warning("AES-CTR intrinsics are not available on this CPU");
810 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
811 }
812 }
813
814 // Use CLMUL instructions if available.
815 if (supports_clmul()) {
816 if (FLAG_IS_DEFAULT(UseCLMUL)) {
817 UseCLMUL = true;
818 }
819 } else if (UseCLMUL) {
820 if (!FLAG_IS_DEFAULT(UseCLMUL))
821 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
822 FLAG_SET_DEFAULT(UseCLMUL, false);
823 }
824
825 if (UseCLMUL && (UseSSE > 2)) {
826 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
827 UseCRC32Intrinsics = true;
828 }
829 } else if (UseCRC32Intrinsics) {
830 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
831 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
832 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
833 }
834
835 if (supports_sse4_2() && supports_clmul()) {
836 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
837 UseCRC32CIntrinsics = true;
838 }
839 } else if (UseCRC32CIntrinsics) {
840 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
841 warning("CRC32C intrinsics are not available on this CPU");
842 }
843 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
844 }
845
846 // GHASH/GCM intrinsics
847 if (UseCLMUL && (UseSSE > 2)) {
848 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
849 UseGHASHIntrinsics = true;
850 }
851 } else if (UseGHASHIntrinsics) {
852 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
853 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
854 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
855 }
856
857 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
858 if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
859 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
860 UseBASE64Intrinsics = true;
861 }
862 } else if (UseBASE64Intrinsics) {
863 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
864 warning("Base64 intrinsic requires EVEX instructions on this CPU");
865 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
866 }
867
868 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
869 if (FLAG_IS_DEFAULT(UseFMA)) {
870 UseFMA = true;
871 }
872 } else if (UseFMA) {
873 warning("FMA instructions are not available on this CPU");
874 FLAG_SET_DEFAULT(UseFMA, false);
875 }
876
877 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
878 if (FLAG_IS_DEFAULT(UseSHA)) {
879 UseSHA = true;
880 }
881 } else if (UseSHA) {
882 warning("SHA instructions are not available on this CPU");
883 FLAG_SET_DEFAULT(UseSHA, false);
884 }
885
886 if (supports_sha() && supports_sse4_1() && UseSHA) {
887 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
888 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
889 }
890 } else if (UseSHA1Intrinsics) {
891 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
892 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
893 }
894
895 if (supports_sse4_1() && UseSHA) {
896 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
897 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
898 }
899 } else if (UseSHA256Intrinsics) {
900 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
901 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
902 }
903
904 #ifdef _LP64
905 // These are only supported on 64-bit
906 if (UseSHA && supports_avx2() && supports_bmi2()) {
907 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
908 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
909 }
910 } else
911 #endif
912 if (UseSHA512Intrinsics) {
913 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
914 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
915 }
916
917 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
918 FLAG_SET_DEFAULT(UseSHA, false);
919 }
920
921 if (UseAdler32Intrinsics) {
922 warning("Adler32Intrinsics not available on this CPU.");
923 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
924 }
925
926 if (!supports_rtm() && UseRTMLocking) {
927 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
928 // setting during arguments processing. See use_biased_locking().
929 // VM_Version_init() is executed after UseBiasedLocking is used
930 // in Thread::allocate().
931 vm_exit_during_initialization("RTM instructions are not available on this CPU");
932 }
933
934 #if INCLUDE_RTM_OPT
935 if (UseRTMLocking) {
936 if (is_client_compilation_mode_vm()) {
937 // Only C2 does RTM locking optimization.
938 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
939 // setting during arguments processing. See use_biased_locking().
940 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
941 }
942 if (is_intel_family_core()) {
943 if ((_model == CPU_MODEL_HASWELL_E3) ||
944 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
945 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
946 // currently a collision between SKL and HSW_E3
947 if (!UnlockExperimentalVMOptions && UseAVX < 3) {
948 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
949 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
950 } else {
951 warning("UseRTMLocking is only available as experimental option on this platform.");
952 }
953 }
954 }
955 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
956 // RTM locking should be used only for applications with
957 // high lock contention. For now we do not use it by default.
958 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
959 }
960 } else { // !UseRTMLocking
961 if (UseRTMForStackLocks) {
962 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
963 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
964 }
965 FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
966 }
967 if (UseRTMDeopt) {
968 FLAG_SET_DEFAULT(UseRTMDeopt, false);
969 }
970 if (PrintPreciseRTMLockingStatistics) {
971 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
972 }
973 }
974 #else
975 if (UseRTMLocking) {
976 // Only C2 does RTM locking optimization.
977 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
978 // setting during arguments processing. See use_biased_locking().
979 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
980 }
981 #endif
982
983 #ifdef COMPILER2
984 if (UseFPUForSpilling) {
985 if (UseSSE < 2) {
986 // Only supported with SSE2+
987 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
988 }
989 }
990 #endif
991
992 #if COMPILER2_OR_JVMCI
993 int max_vector_size = 0;
994 if (UseSSE < 2) {
995 // Vectors (in XMM) are only supported with SSE2+
996 // SSE is always 2 on x64.
997 max_vector_size = 0;
998 } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
999 // 16 byte vectors (in XMM) are supported with SSE2+
1000 max_vector_size = 16;
1001 } else if (UseAVX == 1 || UseAVX == 2) {
1002 // 32 bytes vectors (in YMM) are only supported with AVX+
1003 max_vector_size = 32;
1004 } else if (UseAVX > 2 ) {
1005 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1006 max_vector_size = 64;
1007 }
1008
1009 #ifdef _LP64
1010 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1011 #else
1012 int min_vector_size = 0;
1013 #endif
1014
1015 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1016 if (MaxVectorSize < min_vector_size) {
1017 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1018 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1019 }
1020 if (MaxVectorSize > max_vector_size) {
1021 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1022 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1023 }
1024 if (!is_power_of_2(MaxVectorSize)) {
1025 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1026 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1027 }
1028 } else {
1029 // If default, use highest supported configuration
1030 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1031 }
1032
1033 #if defined(COMPILER2) && defined(ASSERT)
1034 if (MaxVectorSize > 0) {
1035 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1036 tty->print_cr("State of YMM registers after signal handle:");
1037 int nreg = 2 LP64_ONLY(+2);
1038 const char* ymm_name[4] = {"0", "7", "8", "15"};
1039 for (int i = 0; i < nreg; i++) {
1040 tty->print("YMM%s:", ymm_name[i]);
1041 for (int j = 7; j >=0; j--) {
1042 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1043 }
1044 tty->cr();
1045 }
1046 }
1047 }
1048 #endif // COMPILER2 && ASSERT
1049
1050 #ifdef _LP64
1051 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1052 UseMultiplyToLenIntrinsic = true;
1053 }
1054 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1055 UseSquareToLenIntrinsic = true;
1056 }
1057 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1058 UseMulAddIntrinsic = true;
1059 }
1060 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1061 UseMontgomeryMultiplyIntrinsic = true;
1062 }
1063 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1064 UseMontgomerySquareIntrinsic = true;
1065 }
1066 #else
1067 if (UseMultiplyToLenIntrinsic) {
1068 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1069 warning("multiplyToLen intrinsic is not available in 32-bit VM");
1070 }
1071 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1072 }
1073 if (UseMontgomeryMultiplyIntrinsic) {
1074 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1075 warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1076 }
1077 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1078 }
1079 if (UseMontgomerySquareIntrinsic) {
1080 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1081 warning("montgomerySquare intrinsic is not available in 32-bit VM");
1082 }
1083 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1084 }
1085 if (UseSquareToLenIntrinsic) {
1086 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1087 warning("squareToLen intrinsic is not available in 32-bit VM");
1088 }
1089 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1090 }
1091 if (UseMulAddIntrinsic) {
1092 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1093 warning("mulAdd intrinsic is not available in 32-bit VM");
1094 }
1095 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1096 }
1097 #endif // _LP64
1098 #endif // COMPILER2_OR_JVMCI
1099
1100 // On new cpus instructions which update whole XMM register should be used
1101 // to prevent partial register stall due to dependencies on high half.
1102 //
1103 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1104 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1105 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1106 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1107
1108
1109 if (is_zx()) { // ZX cpus specific settings
1110 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1111 UseStoreImmI16 = false; // don't use it on ZX cpus
1112 }
1113 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1114 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1115 // Use it on all ZX cpus
1116 UseAddressNop = true;
1117 }
1118 }
1119 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1120 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1121 }
1122 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1123 if (supports_sse3()) {
1124 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1125 } else {
1126 UseXmmRegToRegMoveAll = false;
1127 }
1128 }
1129 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1130 #ifdef COMPILER2
1131 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1132 // For new ZX cpus do the next optimization:
1133 // don't align the beginning of a loop if there are enough instructions
1134 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1135 // in current fetch line (OptoLoopAlignment) or the padding
1136 // is big (> MaxLoopPad).
1137 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1138 // generated NOP instructions. 11 is the largest size of one
1139 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1140 MaxLoopPad = 11;
1141 }
1142 #endif // COMPILER2
1143 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1144 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1145 }
1146 if (supports_sse4_2()) { // new ZX cpus
1147 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1148 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1149 }
1150 }
1151 if (supports_sse4_2()) {
1152 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1153 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1154 }
1155 } else {
1156 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1157 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1158 }
1159 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1160 }
1161 }
1162
1163 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1164 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1165 }
1166 }
1167
1168 if( is_amd() ) { // AMD cpus specific settings
1169 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
1170 // Use it on new AMD cpus starting from Opteron.
1171 UseAddressNop = true;
1172 }
1173 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
1174 // Use it on new AMD cpus starting from Opteron.
1175 UseNewLongLShift = true;
1176 }
1177 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1178 if (supports_sse4a()) {
1179 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1180 } else {
1181 UseXmmLoadAndClearUpper = false;
1182 }
1183 }
1184 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1185 if( supports_sse4a() ) {
1186 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1187 } else {
1188 UseXmmRegToRegMoveAll = false;
1189 }
1190 }
1191 if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
1192 if( supports_sse4a() ) {
1193 UseXmmI2F = true;
1194 } else {
1195 UseXmmI2F = false;
1196 }
1197 }
1198 if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
1199 if( supports_sse4a() ) {
1200 UseXmmI2D = true;
1201 } else {
1202 UseXmmI2D = false;
1203 }
1204 }
1205 if (supports_sse4_2()) {
1206 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1207 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1208 }
1209 } else {
1210 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1211 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1212 }
1213 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1214 }
1215
1216 // some defaults for AMD family 15h
1217 if ( cpu_family() == 0x15 ) {
1218 // On family 15h processors default is no sw prefetch
1219 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1220 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1221 }
1222 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1223 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1224 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1225 }
1226 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1227 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1228 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1229 }
1230 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1231 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1232 }
1233 }
1234
1235 #ifdef COMPILER2
1236 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1237 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1238 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1239 }
1240 #endif // COMPILER2
1241
1242 // Some defaults for AMD family 17h
1243 if ( cpu_family() == 0x17 ) {
1244 // On family 17h processors use XMM and UnalignedLoadStores for Array Copy
1245 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1246 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1247 }
1248 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1249 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1250 }
1251 #ifdef COMPILER2
1252 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1253 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1254 }
1255 #endif
1256 }
1257 }
1258
1259 if( is_intel() ) { // Intel cpus specific settings
1260 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1261 UseStoreImmI16 = false; // don't use it on Intel cpus
1262 }
1263 if( cpu_family() == 6 || cpu_family() == 15 ) {
1264 if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1265 // Use it on all Intel cpus starting from PentiumPro
1266 UseAddressNop = true;
1267 }
1268 }
1269 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1270 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1271 }
1272 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1273 if( supports_sse3() ) {
1274 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1275 } else {
1276 UseXmmRegToRegMoveAll = false;
1277 }
1278 }
1279 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
1280 #ifdef COMPILER2
1281 if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
1282 // For new Intel cpus do the next optimization:
1283 // don't align the beginning of a loop if there are enough instructions
1284 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1285 // in current fetch line (OptoLoopAlignment) or the padding
1286 // is big (> MaxLoopPad).
1287 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1288 // generated NOP instructions. 11 is the largest size of one
1289 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1290 MaxLoopPad = 11;
1291 }
1292 #endif // COMPILER2
1293 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1294 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1295 }
1296 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1297 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1298 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1299 }
1300 }
1301 if (supports_sse4_2()) {
1302 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1303 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1304 }
1305 } else {
1306 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1307 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1308 }
1309 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1310 }
1311 }
1312 if (is_atom_family() || is_knights_family()) {
1313 #ifdef COMPILER2
1314 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1315 OptoScheduling = true;
1316 }
1317 #endif
1318 if (supports_sse4_2()) { // Silvermont
1319 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1320 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1321 }
1322 }
1323 if (FLAG_IS_DEFAULT(UseIncDec)) {
1324 FLAG_SET_DEFAULT(UseIncDec, false);
1325 }
1326 }
1327 if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1328 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1329 }
1330 }
1331
1332 #ifdef _LP64
1333 if (UseSSE42Intrinsics) {
1334 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1335 UseVectorizedMismatchIntrinsic = true;
1336 }
1337 } else if (UseVectorizedMismatchIntrinsic) {
1338 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1339 warning("vectorizedMismatch intrinsics are not available on this CPU");
1340 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1341 }
1342 #else
1343 if (UseVectorizedMismatchIntrinsic) {
1344 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1345 warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1346 }
1347 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1348 }
1349 #endif // _LP64
1350
1351 // Use count leading zeros count instruction if available.
1352 if (supports_lzcnt()) {
1353 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1354 UseCountLeadingZerosInstruction = true;
1355 }
1356 } else if (UseCountLeadingZerosInstruction) {
1357 warning("lzcnt instruction is not available on this CPU");
1358 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1359 }
1360
1361 // Use count trailing zeros instruction if available
1362 if (supports_bmi1()) {
1363 // tzcnt does not require VEX prefix
1364 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1365 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1366 // Don't use tzcnt if BMI1 is switched off on command line.
1367 UseCountTrailingZerosInstruction = false;
1368 } else {
1369 UseCountTrailingZerosInstruction = true;
1370 }
1371 }
1372 } else if (UseCountTrailingZerosInstruction) {
1373 warning("tzcnt instruction is not available on this CPU");
1374 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1375 }
1376
1377 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1378 // VEX prefix is generated only when AVX > 0.
1379 if (supports_bmi1() && supports_avx()) {
1380 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1381 UseBMI1Instructions = true;
1382 }
1383 } else if (UseBMI1Instructions) {
1384 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1385 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1386 }
1387
1388 if (supports_bmi2() && supports_avx()) {
1389 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1390 UseBMI2Instructions = true;
1391 }
1392 } else if (UseBMI2Instructions) {
1393 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1394 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1395 }
1396
1397 // Use population count instruction if available.
1398 if (supports_popcnt()) {
1399 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1400 UsePopCountInstruction = true;
1401 }
1402 } else if (UsePopCountInstruction) {
1403 warning("POPCNT instruction is not available on this CPU");
1404 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1405 }
1406
1407 // Use fast-string operations if available.
1408 if (supports_erms()) {
1409 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1410 UseFastStosb = true;
1411 }
1412 } else if (UseFastStosb) {
1413 warning("fast-string operations are not available on this CPU");
1414 FLAG_SET_DEFAULT(UseFastStosb, false);
1415 }
1416
1417 // Use XMM/YMM MOVDQU instruction for Object Initialization
1418 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1419 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1420 UseXMMForObjInit = true;
1421 }
1422 } else if (UseXMMForObjInit) {
1423 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1424 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1425 }
1426
1427 #ifdef COMPILER2
1428 if (FLAG_IS_DEFAULT(AlignVector)) {
1429 // Modern processors allow misaligned memory operations for vectors.
1430 AlignVector = !UseUnalignedLoadStores;
1431 }
1432 #endif // COMPILER2
1433
1434 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1435 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1436 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1437 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1438 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1439 }
1440 }
1441
1442 // Allocation prefetch settings
1443 intx cache_line_size = prefetch_data_size();
1444 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1445 (cache_line_size > AllocatePrefetchStepSize)) {
1446 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1447 }
1448
1449 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1450 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1451 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1452 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1453 }
1454 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1455 }
1456
1457 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1458 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1459 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1460 }
1461
1462 if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1463 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1464 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1465 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1466 }
1467 #ifdef COMPILER2
1468 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1469 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1470 }
1471 #endif
1472 }
1473
1474 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1475 #ifdef COMPILER2
1476 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1477 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1478 }
1479 #endif
1480 }
1481
1482 #ifdef _LP64
1483 // Prefetch settings
1484
1485 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1486 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1487 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1488 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1489
1490 // gc copy/scan is disabled if prefetchw isn't supported, because
1491 // Prefetch::write emits an inlined prefetchw on Linux.
1492 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1493 // The used prefetcht0 instruction works for both amd64 and em64t.
1494
1495 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1496 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1497 }
1498 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1499 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1500 }
1501 if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1502 FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1503 }
1504 #endif
1505
1506 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1507 (cache_line_size > ContendedPaddingWidth))
1508 ContendedPaddingWidth = cache_line_size;
1509
1510 // This machine allows unaligned memory accesses
1511 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1512 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1513 }
1514
1515 #ifndef PRODUCT
1516 if (log_is_enabled(Info, os, cpu)) {
1517 LogStream ls(Log(os, cpu)::info());
1518 outputStream* log = &ls;
1519 log->print_cr("Logical CPUs per core: %u",
1520 logical_processors_per_package());
1521 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1522 log->print("UseSSE=%d", (int) UseSSE);
1523 if (UseAVX > 0) {
1524 log->print(" UseAVX=%d", (int) UseAVX);
1525 }
1526 if (UseAES) {
1527 log->print(" UseAES=1");
1528 }
1529 #ifdef COMPILER2
1530 if (MaxVectorSize > 0) {
1531 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1532 }
1533 #endif
1534 log->cr();
1535 log->print("Allocation");
1536 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1537 log->print_cr(": no prefetching");
1538 } else {
1539 log->print(" prefetching: ");
1540 if (UseSSE == 0 && supports_3dnow_prefetch()) {
1541 log->print("PREFETCHW");
1542 } else if (UseSSE >= 1) {
1543 if (AllocatePrefetchInstr == 0) {
1544 log->print("PREFETCHNTA");
1545 } else if (AllocatePrefetchInstr == 1) {
1546 log->print("PREFETCHT0");
1547 } else if (AllocatePrefetchInstr == 2) {
1548 log->print("PREFETCHT2");
1549 } else if (AllocatePrefetchInstr == 3) {
1550 log->print("PREFETCHW");
1551 }
1552 }
1553 if (AllocatePrefetchLines > 1) {
1554 log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1555 } else {
1556 log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1557 }
1558 }
1559
1560 if (PrefetchCopyIntervalInBytes > 0) {
1561 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1562 }
1563 if (PrefetchScanIntervalInBytes > 0) {
1564 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1565 }
1566 if (PrefetchFieldsAhead > 0) {
1567 log->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1568 }
1569 if (ContendedPaddingWidth > 0) {
1570 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1571 }
1572 }
1573 #endif // !PRODUCT
1574 }
1575
use_biased_locking()1576 bool VM_Version::use_biased_locking() {
1577 #if INCLUDE_RTM_OPT
1578 // RTM locking is most useful when there is high lock contention and
1579 // low data contention. With high lock contention the lock is usually
1580 // inflated and biased locking is not suitable for that case.
1581 // RTM locking code requires that biased locking is off.
1582 // Note: we can't switch off UseBiasedLocking in get_processor_features()
1583 // because it is used by Thread::allocate() which is called before
1584 // VM_Version::initialize().
1585 if (UseRTMLocking && UseBiasedLocking) {
1586 if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1587 FLAG_SET_DEFAULT(UseBiasedLocking, false);
1588 } else {
1589 warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1590 UseBiasedLocking = false;
1591 }
1592 }
1593 #endif
1594 return UseBiasedLocking;
1595 }
1596
initialize()1597 void VM_Version::initialize() {
1598 ResourceMark rm;
1599 // Making this stub must be FIRST use of assembler
1600
1601 stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1602 if (stub_blob == NULL) {
1603 vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1604 }
1605 CodeBuffer c(stub_blob);
1606 VM_Version_StubGenerator g(&c);
1607 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1608 g.generate_get_cpu_info());
1609
1610 get_processor_features();
1611 }
1612