1// 2// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// architecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// General Registers 63// Previously set EBX, ESI, and EDI as save-on-entry for java code 64// Turn off SOE in java-code due to frequent use of uncommon-traps. 65// Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77// Float registers. We treat TOS/FPR0 special. It is invisible to the 78// allocator, and only shows up in the encodings. 79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81// Ok so here's the trick FPR1 is really st(0) except in the midst 82// of emission of assembly for a machnode. During the emission the fpu stack 83// is pushed making FPR1 == st(1) temporarily. However at any safepoint 84// the stack will not have this element so FPR1 == st(0) from the 85// oopMap viewpoint. This same weirdness with numbering causes 86// instruction encoding to have to play games with the register 87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88// where it does flt->flt moves to see an example 89// 90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104// 105// Empty fill registers, which are never used, but supply alignment to xmm regs 106// 107reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116// Specify priority of register selection within phases of register 117// allocation. Highest priority is first. A useful heuristic is to 118// give registers a low priority when they are required by machine 119// instructions, like EAX and EDX. Registers which are used as 120// pairs must fall on an even boundary (witness the FPR#L's in this list). 121// For the Intel integer registers, the equivalent Long pairs are 122// EDX:EAX, EBX:ECX, and EDI:EBP. 123alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130//----------Architecture Description Register Classes-------------------------- 131// Several register classes are automatically defined based upon information in 132// this architecture description. 133// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137// 138// Class for no registers (empty set). 139reg_class no_reg(); 140 141// Class for all registers 142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143// Class for all registers (excluding EBP) 144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145// Dynamic register class that selects at runtime between register classes 146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150// Class for general registers 151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152// Class for general registers (excluding EBP). 153// This register class can be used for implicit null checks on win95. 154// It is also safe for use by tailjumps (we don't want to allocate in ebp). 155// Used also if the PreserveFramePointer flag is true. 156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157// Dynamic register class that selects between int_reg and int_reg_no_ebp. 158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160// Class of "X" registers 161reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163// Class of registers that can appear in an address with no offset. 164// EBP and ESP require an extra instruction byte for zero offset. 165// Used in fast-unlock 166reg_class p_reg(EDX, EDI, ESI, EBX); 167 168// Class for general registers excluding ECX 169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170// Class for general registers excluding ECX (and EBP) 171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175// Class for general registers excluding EAX 176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178// Class for general registers excluding EAX and EBX. 179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180// Class for general registers excluding EAX and EBX (and EBP) 181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185// Class of EAX (for multiply and divide operations) 186reg_class eax_reg(EAX); 187 188// Class of EBX (for atomic add) 189reg_class ebx_reg(EBX); 190 191// Class of ECX (for shift and JCXZ operations and cmpLTMask) 192reg_class ecx_reg(ECX); 193 194// Class of EDX (for multiply and divide operations) 195reg_class edx_reg(EDX); 196 197// Class of EDI (for synchronization) 198reg_class edi_reg(EDI); 199 200// Class of ESI (for synchronization) 201reg_class esi_reg(ESI); 202 203// Singleton class for stack pointer 204reg_class sp_reg(ESP); 205 206// Singleton class for instruction pointer 207// reg_class ip_reg(EIP); 208 209// Class of integer register pairs 210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211// Class of integer register pairs (excluding EBP and EDI); 212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213// Dynamic register class that selects between long_reg and long_reg_no_ebp. 214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216// Class of integer register pairs that aligns with calling convention 217reg_class eadx_reg( EAX,EDX ); 218reg_class ebcx_reg( ECX,EBX ); 219 220// Not AX or DX, used in divides 221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222// Not AX or DX (and neither EBP), used in divides 223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227// Floating point registers. Notice FPR0 is not a choice. 228// FPR0 is not ever allocated; we use clever encodings to fake 229// a 2-address instructions out of Intels FP stack. 230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236reg_class fp_flt_reg0( FPR1L ); 237reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242%} 243 244 245//----------SOURCE BLOCK------------------------------------------------------- 246// This is a block of C++ code which provides values, functions, and 247// definitions necessary in the rest of the architecture description 248source_hpp %{ 249// Must be visible to the DFA in dfa_x86_32.cpp 250extern bool is_operand_hi32_zero(Node* n); 251%} 252 253source %{ 254#define RELOC_IMM32 Assembler::imm_operand 255#define RELOC_DISP32 Assembler::disp32_operand 256 257#define __ _masm. 258 259// How to find the high register of a Long pair, given the low register 260#define HIGH_FROM_LOW(x) ((x)+2) 261 262// These masks are used to provide 128-bit aligned bitmasks to the XMM 263// instructions, to allow sign-masking or sign-bit flipping. They allow 264// fast versions of NegF/NegD and AbsF/AbsD. 265 266// Note: 'double' and 'long long' have 32-bits alignment on x86. 267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275} 276 277// Buffer for 128-bits masks used by SSE instructions. 278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280// Static initialization during VM startup. 281static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286// Offset hacking within calls. 287static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297} 298 299// !!!!! Special hack to get all type of calls to specify the byte offset 300// from the start of the call to the point where the return address 301// will point. 302int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304} 305 306int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308} 309 310static int sizeof_FFree_Float_Stack_All = -1; 311 312int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 315} 316 317// Indicate if the safepoint node needs the polling page as an input. 318// Since x86 does have absolute addressing, it doesn't. 319bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321} 322 323// 324// Compute padding required for nodes which need alignment 325// 326 327// The address of the call instruction needs to be 4-byte aligned to 328// ensure that it does not span a cache line so that it can be patched. 329int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333} 334 335// The address of the call instruction needs to be 4-byte aligned to 336// ensure that it does not span a cache line so that it can be patched. 337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342} 343 344// EMIT_RM() 345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348} 349 350// EMIT_CC() 351void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354} 355 356// EMIT_OPCODE() 357void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359} 360 361// EMIT_OPCODE() w/ relocation information 362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365} 366 367// EMIT_D8() 368void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370} 371 372// EMIT_D16() 373void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375} 376 377// EMIT_D32() 378void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380} 381 382// emit 32 bit value and construct relocation entry from relocInfo::relocType 383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387} 388 389// emit 32 bit value and construct relocation entry from RelocationHolder 390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392#ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code"); 395 } 396#endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399} 400 401// Access stack slot for load or store 402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413} 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480} 481 482 483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490} 491 492void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510} 511 512void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520} 521 522 523//============================================================================= 524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528} 529 530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533} 534 535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537} 538 539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541} 542 543#ifndef PRODUCT 544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546} 547#endif 548 549 550//============================================================================= 551#ifndef PRODUCT 552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604#ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609#endif 610 st->cr(); 611} 612#endif 613 614 615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632} 633 634uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636} 637 638int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640} 641 642//============================================================================= 643#ifndef PRODUCT 644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668} 669#endif 670 671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723} 724 725uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728} 729 730int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732} 733 734const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736} 737 738int MachEpilogNode::safepoint_offset() const { return 0; } 739 740//============================================================================= 741 742enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756} 757 758static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763#ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772#endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776} 777 778// Helper for XMM registers. Extra opcode bits, limited syntax. 779static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805#ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820#endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835} 836 837 838static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851#ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867#endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875} 876 877static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886#ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889#endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892} 893 894 895static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904#ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907#endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910} 911 912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916#ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920#endif 921 } 922 return size+2; 923} 924 925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931#ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935#endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953} 954 955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026#ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065#endif 1066 } 1067 return calc_size; 1068} 1069 1070uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171#ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176#endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204#ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208#endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248#ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252#endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266#ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270#endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299} 1300 1301#ifndef PRODUCT 1302void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304} 1305#endif 1306 1307void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309} 1310 1311uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return MachNode::size(ra_); 1313} 1314 1315 1316//============================================================================= 1317#ifndef PRODUCT 1318void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322} 1323#endif 1324 1325void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340} 1341 1342uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350} 1351 1352//============================================================================= 1353#ifndef PRODUCT 1354void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361} 1362#endif 1363 1364void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366#ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368#endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380} 1381 1382uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384} 1385 1386 1387//============================================================================= 1388 1389int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391} 1392 1393// This is UltraSparc specific, true just means we have fast l2f conversion 1394const bool Matcher::convL2FSupported(void) { 1395 return true; 1396} 1397 1398// Is this branch offset short enough that a short branch can be used? 1399// 1400// NOTE: If the platform does not provide any short branch variants, then 1401// this method should return false for offset 0. 1402bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413} 1414 1415const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418} 1419 1420// The ecx parameter to rep stos for the ClearArray node is in dwords. 1421const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423// Needs 2 CMOV's for longs. 1424const int Matcher::long_cmove_cost() { return 1; } 1425 1426// No CMOVF/CMOVD with SSE/SSE2 1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429// Does the CPU require late expand (see block.cpp for description of late expand)? 1430const bool Matcher::require_postalloc_expand = false; 1431 1432// Do we need to mask the count passed to shift instructions or does 1433// the cpu only look at the lower 5/6 bits anyway? 1434const bool Matcher::need_masked_shift_count = false; 1435 1436bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439} 1440 1441bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444} 1445 1446bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449} 1450 1451bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454} 1455 1456// Is it better to copy float constants, or load them directly from memory? 1457// Intel can load a float constant from a direct address, requiring no 1458// extra registers. Most RISCs will have to materialize an address into a 1459// register first, so they would do better to copy the constant from stack. 1460const bool Matcher::rematerialize_float_constants = true; 1461 1462// If CPU can load and store mis-aligned doubles directly then no fixup is 1463// needed. Else we split the double into 2 integer pieces and move it 1464// piece-by-piece. Only happens when passing doubles into C code as the 1465// Java calling convention forces doubles to be aligned. 1466const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517} 1518 1519// Advertise here if the CPU requires explicit rounding operations 1520// to implement the UseStrictFP mode. 1521const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523// Are floats conerted to double when stored to stack during deoptimization? 1524// On x32 it is stored with convertion only when FPU is used for floats. 1525bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527// Do ints take an entire long register or just half? 1528const bool Matcher::int_in_long = false; 1529 1530// Return whether or not this register is ever used as an argument. This 1531// function is used on startup to build the trampoline stubs in generateOptoStub. 1532// Registers not mentioned will be killed by the VM call in the trampoline, and 1533// arguments in those registers not be available to the callee. 1534bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539} 1540 1541bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543} 1544 1545bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553} 1554 1555// Register for DIVI projection of divmodI 1556RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558} 1559 1560// Register for MODI projection of divmodI 1561RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563} 1564 1565// Register for DIVL projection of divmodL 1566RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569} 1570 1571// Register for MODL projection of divmodL 1572RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575} 1576 1577const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579} 1580 1581// Returns true if the high 32 bits of the value is known to be zero. 1582bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594} 1595 1596%} 1597 1598//----------ENCODING BLOCK----------------------------------------------------- 1599// This block specifies the encoding classes used by the compiler to output 1600// byte streams. Encoding classes generate functions which are called by 1601// Machine Instruction Nodes in order to generate the bit encoding of the 1602// instruction. Operands specify their base encoding interface with the 1603// interface keyword. There are currently supported four interfaces, 1604// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605// operand to generate a function which returns its register number when 1606// queried. CONST_INTER causes an operand to generate a function which 1607// returns the value of the constant when queried. MEMORY_INTER causes an 1608// operand to generate four functions which return the Base Register, the 1609// Index Register, the Scale Value, and the Offset Value of the operand when 1610// queried. COND_INTER causes an operand to generate six functions which 1611// return the encoding code (ie - encoding bits for the instruction) 1612// associated with each basic boolean condition for a conditional instruction. 1613// Instructions specify two basic values for encoding. They use the 1614// ins_encode keyword to specify their encoding class (which must be one of 1615// the class names specified in the encoding block), and they use the 1616// opcode keyword to specify, in order, their primary, secondary, and 1617// tertiary opcode. Only the opcode sections which a particular instruction 1618// needs for encoding need to be specified. 1619encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943// Following encoding is no longer used, but may be restored if calling 1944// convention changes significantly. 1945// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946// 1947// enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948// // int ic_reg = Matcher::inline_cache_reg(); 1949// // int ic_encode = Matcher::_regEncode[ic_reg]; 1950// // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951// // int imo_encode = Matcher::_regEncode[imo_reg]; 1952// 1953// // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954// // // so we load it immediately before the call 1955// // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956// // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957// 1958// // xor rbp,ebp 1959// emit_opcode(cbuf, 0x33); 1960// emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961// 1962// // CALL to interpreter. 1963// cbuf.set_insts_mark(); 1964// $$$emit8$primary; 1965// emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966// runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967// %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 if( os::is_MP() ) 2091 emit_opcode(cbuf,0xF0); // [Lock] 2092 %} 2093 2094 // Cmp-xchg long value. 2095 // Note: we need to swap rbx, and rcx before and after the 2096 // cmpxchg8 instruction because the instruction uses 2097 // rcx as the high order word of the new value to store but 2098 // our register encoding uses rbx,. 2099 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2100 2101 // XCHG rbx,ecx 2102 emit_opcode(cbuf,0x87); 2103 emit_opcode(cbuf,0xD9); 2104 // [Lock] 2105 if( os::is_MP() ) 2106 emit_opcode(cbuf,0xF0); 2107 // CMPXCHG8 [Eptr] 2108 emit_opcode(cbuf,0x0F); 2109 emit_opcode(cbuf,0xC7); 2110 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2111 // XCHG rbx,ecx 2112 emit_opcode(cbuf,0x87); 2113 emit_opcode(cbuf,0xD9); 2114 %} 2115 2116 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2117 // [Lock] 2118 if( os::is_MP() ) 2119 emit_opcode(cbuf,0xF0); 2120 2121 // CMPXCHG [Eptr] 2122 emit_opcode(cbuf,0x0F); 2123 emit_opcode(cbuf,0xB1); 2124 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2125 %} 2126 2127 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2128 // [Lock] 2129 if( os::is_MP() ) 2130 emit_opcode(cbuf,0xF0); 2131 2132 // CMPXCHGB [Eptr] 2133 emit_opcode(cbuf,0x0F); 2134 emit_opcode(cbuf,0xB0); 2135 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2136 %} 2137 2138 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2139 // [Lock] 2140 if( os::is_MP() ) 2141 emit_opcode(cbuf,0xF0); 2142 2143 // 16-bit mode 2144 emit_opcode(cbuf, 0x66); 2145 2146 // CMPXCHGW [Eptr] 2147 emit_opcode(cbuf,0x0F); 2148 emit_opcode(cbuf,0xB1); 2149 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2150 %} 2151 2152 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2153 int res_encoding = $res$$reg; 2154 2155 // MOV res,0 2156 emit_opcode( cbuf, 0xB8 + res_encoding); 2157 emit_d32( cbuf, 0 ); 2158 // JNE,s fail 2159 emit_opcode(cbuf,0x75); 2160 emit_d8(cbuf, 5 ); 2161 // MOV res,1 2162 emit_opcode( cbuf, 0xB8 + res_encoding); 2163 emit_d32( cbuf, 1 ); 2164 // fail: 2165 %} 2166 2167 enc_class set_instruction_start( ) %{ 2168 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2169 %} 2170 2171 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2172 int reg_encoding = $ereg$$reg; 2173 int base = $mem$$base; 2174 int index = $mem$$index; 2175 int scale = $mem$$scale; 2176 int displace = $mem$$disp; 2177 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2178 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2179 %} 2180 2181 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2182 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2183 int base = $mem$$base; 2184 int index = $mem$$index; 2185 int scale = $mem$$scale; 2186 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2187 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2188 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2189 %} 2190 2191 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2192 int r1, r2; 2193 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2194 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2195 emit_opcode(cbuf,0x0F); 2196 emit_opcode(cbuf,$tertiary); 2197 emit_rm(cbuf, 0x3, r1, r2); 2198 emit_d8(cbuf,$cnt$$constant); 2199 emit_d8(cbuf,$primary); 2200 emit_rm(cbuf, 0x3, $secondary, r1); 2201 emit_d8(cbuf,$cnt$$constant); 2202 %} 2203 2204 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2205 emit_opcode( cbuf, 0x8B ); // Move 2206 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2207 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2208 emit_d8(cbuf,$primary); 2209 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2210 emit_d8(cbuf,$cnt$$constant-32); 2211 } 2212 emit_d8(cbuf,$primary); 2213 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2214 emit_d8(cbuf,31); 2215 %} 2216 2217 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2218 int r1, r2; 2219 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2220 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2221 2222 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2223 emit_rm(cbuf, 0x3, r1, r2); 2224 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2225 emit_opcode(cbuf,$primary); 2226 emit_rm(cbuf, 0x3, $secondary, r1); 2227 emit_d8(cbuf,$cnt$$constant-32); 2228 } 2229 emit_opcode(cbuf,0x33); // XOR r2,r2 2230 emit_rm(cbuf, 0x3, r2, r2); 2231 %} 2232 2233 // Clone of RegMem but accepts an extra parameter to access each 2234 // half of a double in memory; it never needs relocation info. 2235 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2236 emit_opcode(cbuf,$opcode$$constant); 2237 int reg_encoding = $rm_reg$$reg; 2238 int base = $mem$$base; 2239 int index = $mem$$index; 2240 int scale = $mem$$scale; 2241 int displace = $mem$$disp + $disp_for_half$$constant; 2242 relocInfo::relocType disp_reloc = relocInfo::none; 2243 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2244 %} 2245 2246 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2247 // 2248 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2249 // and it never needs relocation information. 2250 // Frequently used to move data between FPU's Stack Top and memory. 2251 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2252 int rm_byte_opcode = $rm_opcode$$constant; 2253 int base = $mem$$base; 2254 int index = $mem$$index; 2255 int scale = $mem$$scale; 2256 int displace = $mem$$disp; 2257 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2258 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2259 %} 2260 2261 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2262 int rm_byte_opcode = $rm_opcode$$constant; 2263 int base = $mem$$base; 2264 int index = $mem$$index; 2265 int scale = $mem$$scale; 2266 int displace = $mem$$disp; 2267 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2268 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2269 %} 2270 2271 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2272 int reg_encoding = $dst$$reg; 2273 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2274 int index = 0x04; // 0x04 indicates no index 2275 int scale = 0x00; // 0x00 indicates no scale 2276 int displace = $src1$$constant; // 0x00 indicates no displacement 2277 relocInfo::relocType disp_reloc = relocInfo::none; 2278 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2279 %} 2280 2281 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2282 // Compare dst,src 2283 emit_opcode(cbuf,0x3B); 2284 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2285 // jmp dst < src around move 2286 emit_opcode(cbuf,0x7C); 2287 emit_d8(cbuf,2); 2288 // move dst,src 2289 emit_opcode(cbuf,0x8B); 2290 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2291 %} 2292 2293 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2294 // Compare dst,src 2295 emit_opcode(cbuf,0x3B); 2296 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2297 // jmp dst > src around move 2298 emit_opcode(cbuf,0x7F); 2299 emit_d8(cbuf,2); 2300 // move dst,src 2301 emit_opcode(cbuf,0x8B); 2302 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2303 %} 2304 2305 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2306 // If src is FPR1, we can just FST to store it. 2307 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2308 int reg_encoding = 0x2; // Just store 2309 int base = $mem$$base; 2310 int index = $mem$$index; 2311 int scale = $mem$$scale; 2312 int displace = $mem$$disp; 2313 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2314 if( $src$$reg != FPR1L_enc ) { 2315 reg_encoding = 0x3; // Store & pop 2316 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2317 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2318 } 2319 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2320 emit_opcode(cbuf,$primary); 2321 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2322 %} 2323 2324 enc_class neg_reg(rRegI dst) %{ 2325 // NEG $dst 2326 emit_opcode(cbuf,0xF7); 2327 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2328 %} 2329 2330 enc_class setLT_reg(eCXRegI dst) %{ 2331 // SETLT $dst 2332 emit_opcode(cbuf,0x0F); 2333 emit_opcode(cbuf,0x9C); 2334 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2335 %} 2336 2337 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2338 int tmpReg = $tmp$$reg; 2339 2340 // SUB $p,$q 2341 emit_opcode(cbuf,0x2B); 2342 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2343 // SBB $tmp,$tmp 2344 emit_opcode(cbuf,0x1B); 2345 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2346 // AND $tmp,$y 2347 emit_opcode(cbuf,0x23); 2348 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2349 // ADD $p,$tmp 2350 emit_opcode(cbuf,0x03); 2351 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2352 %} 2353 2354 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2355 // TEST shift,32 2356 emit_opcode(cbuf,0xF7); 2357 emit_rm(cbuf, 0x3, 0, ECX_enc); 2358 emit_d32(cbuf,0x20); 2359 // JEQ,s small 2360 emit_opcode(cbuf, 0x74); 2361 emit_d8(cbuf, 0x04); 2362 // MOV $dst.hi,$dst.lo 2363 emit_opcode( cbuf, 0x8B ); 2364 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2365 // CLR $dst.lo 2366 emit_opcode(cbuf, 0x33); 2367 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2368// small: 2369 // SHLD $dst.hi,$dst.lo,$shift 2370 emit_opcode(cbuf,0x0F); 2371 emit_opcode(cbuf,0xA5); 2372 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2373 // SHL $dst.lo,$shift" 2374 emit_opcode(cbuf,0xD3); 2375 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2376 %} 2377 2378 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2379 // TEST shift,32 2380 emit_opcode(cbuf,0xF7); 2381 emit_rm(cbuf, 0x3, 0, ECX_enc); 2382 emit_d32(cbuf,0x20); 2383 // JEQ,s small 2384 emit_opcode(cbuf, 0x74); 2385 emit_d8(cbuf, 0x04); 2386 // MOV $dst.lo,$dst.hi 2387 emit_opcode( cbuf, 0x8B ); 2388 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2389 // CLR $dst.hi 2390 emit_opcode(cbuf, 0x33); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2392// small: 2393 // SHRD $dst.lo,$dst.hi,$shift 2394 emit_opcode(cbuf,0x0F); 2395 emit_opcode(cbuf,0xAD); 2396 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2397 // SHR $dst.hi,$shift" 2398 emit_opcode(cbuf,0xD3); 2399 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2400 %} 2401 2402 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2403 // TEST shift,32 2404 emit_opcode(cbuf,0xF7); 2405 emit_rm(cbuf, 0x3, 0, ECX_enc); 2406 emit_d32(cbuf,0x20); 2407 // JEQ,s small 2408 emit_opcode(cbuf, 0x74); 2409 emit_d8(cbuf, 0x05); 2410 // MOV $dst.lo,$dst.hi 2411 emit_opcode( cbuf, 0x8B ); 2412 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2413 // SAR $dst.hi,31 2414 emit_opcode(cbuf, 0xC1); 2415 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2416 emit_d8(cbuf, 0x1F ); 2417// small: 2418 // SHRD $dst.lo,$dst.hi,$shift 2419 emit_opcode(cbuf,0x0F); 2420 emit_opcode(cbuf,0xAD); 2421 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2422 // SAR $dst.hi,$shift" 2423 emit_opcode(cbuf,0xD3); 2424 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2425 %} 2426 2427 2428 // ----------------- Encodings for floating point unit ----------------- 2429 // May leave result in FPU-TOS or FPU reg depending on opcodes 2430 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2431 $$$emit8$primary; 2432 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2433 %} 2434 2435 // Pop argument in FPR0 with FSTP ST(0) 2436 enc_class PopFPU() %{ 2437 emit_opcode( cbuf, 0xDD ); 2438 emit_d8( cbuf, 0xD8 ); 2439 %} 2440 2441 // !!!!! equivalent to Pop_Reg_F 2442 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2444 emit_d8( cbuf, 0xD8+$dst$$reg ); 2445 %} 2446 2447 enc_class Push_Reg_DPR( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xD9 ); 2449 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2450 %} 2451 2452 enc_class strictfp_bias1( regDPR dst ) %{ 2453 emit_opcode( cbuf, 0xDB ); // FLD m80real 2454 emit_opcode( cbuf, 0x2D ); 2455 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2456 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2457 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2458 %} 2459 2460 enc_class strictfp_bias2( regDPR dst ) %{ 2461 emit_opcode( cbuf, 0xDB ); // FLD m80real 2462 emit_opcode( cbuf, 0x2D ); 2463 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2464 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2465 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2466 %} 2467 2468 // Special case for moving an integer register to a stack slot. 2469 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2471 %} 2472 2473 // Special case for moving a register to a stack slot. 2474 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2475 // Opcode already emitted 2476 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2477 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2478 emit_d32(cbuf, $dst$$disp); // Displacement 2479 %} 2480 2481 // Push the integer in stackSlot 'src' onto FP-stack 2482 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2483 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2484 %} 2485 2486 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2487 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2488 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2489 %} 2490 2491 // Same as Pop_Mem_F except for opcode 2492 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2493 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2494 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2495 %} 2496 2497 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2499 emit_d8( cbuf, 0xD8+$dst$$reg ); 2500 %} 2501 2502 enc_class Push_Reg_FPR( regFPR dst ) %{ 2503 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2504 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2505 %} 2506 2507 // Push FPU's float to a stack-slot, and pop FPU-stack 2508 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2509 int pop = 0x02; 2510 if ($src$$reg != FPR1L_enc) { 2511 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2512 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2513 pop = 0x03; 2514 } 2515 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2516 %} 2517 2518 // Push FPU's double to a stack-slot, and pop FPU-stack 2519 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2520 int pop = 0x02; 2521 if ($src$$reg != FPR1L_enc) { 2522 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2523 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2524 pop = 0x03; 2525 } 2526 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2527 %} 2528 2529 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2530 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2531 int pop = 0xD0 - 1; // -1 since we skip FLD 2532 if ($src$$reg != FPR1L_enc) { 2533 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2534 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2535 pop = 0xD8; 2536 } 2537 emit_opcode( cbuf, 0xDD ); 2538 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2539 %} 2540 2541 2542 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2543 // load dst in FPR0 2544 emit_opcode( cbuf, 0xD9 ); 2545 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2546 if ($src$$reg != FPR1L_enc) { 2547 // fincstp 2548 emit_opcode (cbuf, 0xD9); 2549 emit_opcode (cbuf, 0xF7); 2550 // swap src with FPR1: 2551 // FXCH FPR1 with src 2552 emit_opcode(cbuf, 0xD9); 2553 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2554 // fdecstp 2555 emit_opcode (cbuf, 0xD9); 2556 emit_opcode (cbuf, 0xF6); 2557 } 2558 %} 2559 2560 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2561 MacroAssembler _masm(&cbuf); 2562 __ subptr(rsp, 8); 2563 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2564 __ fld_d(Address(rsp, 0)); 2565 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2566 __ fld_d(Address(rsp, 0)); 2567 %} 2568 2569 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 4); 2572 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2573 __ fld_s(Address(rsp, 0)); 2574 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2575 __ fld_s(Address(rsp, 0)); 2576 %} 2577 2578 enc_class Push_ResultD(regD dst) %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ fstp_d(Address(rsp, 0)); 2581 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2582 __ addptr(rsp, 8); 2583 %} 2584 2585 enc_class Push_ResultF(regF dst, immI d8) %{ 2586 MacroAssembler _masm(&cbuf); 2587 __ fstp_s(Address(rsp, 0)); 2588 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2589 __ addptr(rsp, $d8$$constant); 2590 %} 2591 2592 enc_class Push_SrcD(regD src) %{ 2593 MacroAssembler _masm(&cbuf); 2594 __ subptr(rsp, 8); 2595 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2596 __ fld_d(Address(rsp, 0)); 2597 %} 2598 2599 enc_class push_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ subptr(rsp, 8); 2602 %} 2603 2604 enc_class pop_stack_temp_qword() %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ addptr(rsp, 8); 2607 %} 2608 2609 enc_class push_xmm_to_fpr1(regD src) %{ 2610 MacroAssembler _masm(&cbuf); 2611 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2612 __ fld_d(Address(rsp, 0)); 2613 %} 2614 2615 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2616 if ($src$$reg != FPR1L_enc) { 2617 // fincstp 2618 emit_opcode (cbuf, 0xD9); 2619 emit_opcode (cbuf, 0xF7); 2620 // FXCH FPR1 with src 2621 emit_opcode(cbuf, 0xD9); 2622 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2623 // fdecstp 2624 emit_opcode (cbuf, 0xD9); 2625 emit_opcode (cbuf, 0xF6); 2626 } 2627 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2628 // // FSTP FPR$dst$$reg 2629 // emit_opcode( cbuf, 0xDD ); 2630 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2631 %} 2632 2633 enc_class fnstsw_sahf_skip_parity() %{ 2634 // fnstsw ax 2635 emit_opcode( cbuf, 0xDF ); 2636 emit_opcode( cbuf, 0xE0 ); 2637 // sahf 2638 emit_opcode( cbuf, 0x9E ); 2639 // jnp ::skip 2640 emit_opcode( cbuf, 0x7B ); 2641 emit_opcode( cbuf, 0x05 ); 2642 %} 2643 2644 enc_class emitModDPR() %{ 2645 // fprem must be iterative 2646 // :: loop 2647 // fprem 2648 emit_opcode( cbuf, 0xD9 ); 2649 emit_opcode( cbuf, 0xF8 ); 2650 // wait 2651 emit_opcode( cbuf, 0x9b ); 2652 // fnstsw ax 2653 emit_opcode( cbuf, 0xDF ); 2654 emit_opcode( cbuf, 0xE0 ); 2655 // sahf 2656 emit_opcode( cbuf, 0x9E ); 2657 // jp ::loop 2658 emit_opcode( cbuf, 0x0F ); 2659 emit_opcode( cbuf, 0x8A ); 2660 emit_opcode( cbuf, 0xF4 ); 2661 emit_opcode( cbuf, 0xFF ); 2662 emit_opcode( cbuf, 0xFF ); 2663 emit_opcode( cbuf, 0xFF ); 2664 %} 2665 2666 enc_class fpu_flags() %{ 2667 // fnstsw_ax 2668 emit_opcode( cbuf, 0xDF); 2669 emit_opcode( cbuf, 0xE0); 2670 // test ax,0x0400 2671 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2672 emit_opcode( cbuf, 0xA9 ); 2673 emit_d16 ( cbuf, 0x0400 ); 2674 // // // This sequence works, but stalls for 12-16 cycles on PPro 2675 // // test rax,0x0400 2676 // emit_opcode( cbuf, 0xA9 ); 2677 // emit_d32 ( cbuf, 0x00000400 ); 2678 // 2679 // jz exit (no unordered comparison) 2680 emit_opcode( cbuf, 0x74 ); 2681 emit_d8 ( cbuf, 0x02 ); 2682 // mov ah,1 - treat as LT case (set carry flag) 2683 emit_opcode( cbuf, 0xB4 ); 2684 emit_d8 ( cbuf, 0x01 ); 2685 // sahf 2686 emit_opcode( cbuf, 0x9E); 2687 %} 2688 2689 enc_class cmpF_P6_fixup() %{ 2690 // Fixup the integer flags in case comparison involved a NaN 2691 // 2692 // JNP exit (no unordered comparison, P-flag is set by NaN) 2693 emit_opcode( cbuf, 0x7B ); 2694 emit_d8 ( cbuf, 0x03 ); 2695 // MOV AH,1 - treat as LT case (set carry flag) 2696 emit_opcode( cbuf, 0xB4 ); 2697 emit_d8 ( cbuf, 0x01 ); 2698 // SAHF 2699 emit_opcode( cbuf, 0x9E); 2700 // NOP // target for branch to avoid branch to branch 2701 emit_opcode( cbuf, 0x90); 2702 %} 2703 2704// fnstsw_ax(); 2705// sahf(); 2706// movl(dst, nan_result); 2707// jcc(Assembler::parity, exit); 2708// movl(dst, less_result); 2709// jcc(Assembler::below, exit); 2710// movl(dst, equal_result); 2711// jcc(Assembler::equal, exit); 2712// movl(dst, greater_result); 2713 2714// less_result = 1; 2715// greater_result = -1; 2716// equal_result = 0; 2717// nan_result = -1; 2718 2719 enc_class CmpF_Result(rRegI dst) %{ 2720 // fnstsw_ax(); 2721 emit_opcode( cbuf, 0xDF); 2722 emit_opcode( cbuf, 0xE0); 2723 // sahf 2724 emit_opcode( cbuf, 0x9E); 2725 // movl(dst, nan_result); 2726 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2727 emit_d32( cbuf, -1 ); 2728 // jcc(Assembler::parity, exit); 2729 emit_opcode( cbuf, 0x7A ); 2730 emit_d8 ( cbuf, 0x13 ); 2731 // movl(dst, less_result); 2732 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2733 emit_d32( cbuf, -1 ); 2734 // jcc(Assembler::below, exit); 2735 emit_opcode( cbuf, 0x72 ); 2736 emit_d8 ( cbuf, 0x0C ); 2737 // movl(dst, equal_result); 2738 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2739 emit_d32( cbuf, 0 ); 2740 // jcc(Assembler::equal, exit); 2741 emit_opcode( cbuf, 0x74 ); 2742 emit_d8 ( cbuf, 0x05 ); 2743 // movl(dst, greater_result); 2744 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2745 emit_d32( cbuf, 1 ); 2746 %} 2747 2748 2749 // Compare the longs and set flags 2750 // BROKEN! Do Not use as-is 2751 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2752 // CMP $src1.hi,$src2.hi 2753 emit_opcode( cbuf, 0x3B ); 2754 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2755 // JNE,s done 2756 emit_opcode(cbuf,0x75); 2757 emit_d8(cbuf, 2 ); 2758 // CMP $src1.lo,$src2.lo 2759 emit_opcode( cbuf, 0x3B ); 2760 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2761// done: 2762 %} 2763 2764 enc_class convert_int_long( regL dst, rRegI src ) %{ 2765 // mov $dst.lo,$src 2766 int dst_encoding = $dst$$reg; 2767 int src_encoding = $src$$reg; 2768 encode_Copy( cbuf, dst_encoding , src_encoding ); 2769 // mov $dst.hi,$src 2770 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2771 // sar $dst.hi,31 2772 emit_opcode( cbuf, 0xC1 ); 2773 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2774 emit_d8(cbuf, 0x1F ); 2775 %} 2776 2777 enc_class convert_long_double( eRegL src ) %{ 2778 // push $src.hi 2779 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2780 // push $src.lo 2781 emit_opcode(cbuf, 0x50+$src$$reg ); 2782 // fild 64-bits at [SP] 2783 emit_opcode(cbuf,0xdf); 2784 emit_d8(cbuf, 0x6C); 2785 emit_d8(cbuf, 0x24); 2786 emit_d8(cbuf, 0x00); 2787 // pop stack 2788 emit_opcode(cbuf, 0x83); // add SP, #8 2789 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2790 emit_d8(cbuf, 0x8); 2791 %} 2792 2793 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2794 // IMUL EDX:EAX,$src1 2795 emit_opcode( cbuf, 0xF7 ); 2796 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2797 // SAR EDX,$cnt-32 2798 int shift_count = ((int)$cnt$$constant) - 32; 2799 if (shift_count > 0) { 2800 emit_opcode(cbuf, 0xC1); 2801 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2802 emit_d8(cbuf, shift_count); 2803 } 2804 %} 2805 2806 // this version doesn't have add sp, 8 2807 enc_class convert_long_double2( eRegL src ) %{ 2808 // push $src.hi 2809 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2810 // push $src.lo 2811 emit_opcode(cbuf, 0x50+$src$$reg ); 2812 // fild 64-bits at [SP] 2813 emit_opcode(cbuf,0xdf); 2814 emit_d8(cbuf, 0x6C); 2815 emit_d8(cbuf, 0x24); 2816 emit_d8(cbuf, 0x00); 2817 %} 2818 2819 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2820 // Basic idea: long = (long)int * (long)int 2821 // IMUL EDX:EAX, src 2822 emit_opcode( cbuf, 0xF7 ); 2823 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2824 %} 2825 2826 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2827 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2828 // MUL EDX:EAX, src 2829 emit_opcode( cbuf, 0xF7 ); 2830 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2831 %} 2832 2833 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2834 // Basic idea: lo(result) = lo(x_lo * y_lo) 2835 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2836 // MOV $tmp,$src.lo 2837 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2838 // IMUL $tmp,EDX 2839 emit_opcode( cbuf, 0x0F ); 2840 emit_opcode( cbuf, 0xAF ); 2841 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2842 // MOV EDX,$src.hi 2843 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2844 // IMUL EDX,EAX 2845 emit_opcode( cbuf, 0x0F ); 2846 emit_opcode( cbuf, 0xAF ); 2847 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2848 // ADD $tmp,EDX 2849 emit_opcode( cbuf, 0x03 ); 2850 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2851 // MUL EDX:EAX,$src.lo 2852 emit_opcode( cbuf, 0xF7 ); 2853 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2854 // ADD EDX,ESI 2855 emit_opcode( cbuf, 0x03 ); 2856 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2857 %} 2858 2859 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2860 // Basic idea: lo(result) = lo(src * y_lo) 2861 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2862 // IMUL $tmp,EDX,$src 2863 emit_opcode( cbuf, 0x6B ); 2864 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2865 emit_d8( cbuf, (int)$src$$constant ); 2866 // MOV EDX,$src 2867 emit_opcode(cbuf, 0xB8 + EDX_enc); 2868 emit_d32( cbuf, (int)$src$$constant ); 2869 // MUL EDX:EAX,EDX 2870 emit_opcode( cbuf, 0xF7 ); 2871 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2872 // ADD EDX,ESI 2873 emit_opcode( cbuf, 0x03 ); 2874 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2875 %} 2876 2877 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2878 // PUSH src1.hi 2879 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2880 // PUSH src1.lo 2881 emit_opcode(cbuf, 0x50+$src1$$reg ); 2882 // PUSH src2.hi 2883 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2884 // PUSH src2.lo 2885 emit_opcode(cbuf, 0x50+$src2$$reg ); 2886 // CALL directly to the runtime 2887 cbuf.set_insts_mark(); 2888 emit_opcode(cbuf,0xE8); // Call into runtime 2889 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2890 // Restore stack 2891 emit_opcode(cbuf, 0x83); // add SP, #framesize 2892 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2893 emit_d8(cbuf, 4*4); 2894 %} 2895 2896 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2897 // PUSH src1.hi 2898 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2899 // PUSH src1.lo 2900 emit_opcode(cbuf, 0x50+$src1$$reg ); 2901 // PUSH src2.hi 2902 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2903 // PUSH src2.lo 2904 emit_opcode(cbuf, 0x50+$src2$$reg ); 2905 // CALL directly to the runtime 2906 cbuf.set_insts_mark(); 2907 emit_opcode(cbuf,0xE8); // Call into runtime 2908 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2909 // Restore stack 2910 emit_opcode(cbuf, 0x83); // add SP, #framesize 2911 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2912 emit_d8(cbuf, 4*4); 2913 %} 2914 2915 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2916 // MOV $tmp,$src.lo 2917 emit_opcode(cbuf, 0x8B); 2918 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2919 // OR $tmp,$src.hi 2920 emit_opcode(cbuf, 0x0B); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2922 %} 2923 2924 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2925 // CMP $src1.lo,$src2.lo 2926 emit_opcode( cbuf, 0x3B ); 2927 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2928 // JNE,s skip 2929 emit_cc(cbuf, 0x70, 0x5); 2930 emit_d8(cbuf,2); 2931 // CMP $src1.hi,$src2.hi 2932 emit_opcode( cbuf, 0x3B ); 2933 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2934 %} 2935 2936 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2937 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2938 emit_opcode( cbuf, 0x3B ); 2939 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2940 // MOV $tmp,$src1.hi 2941 emit_opcode( cbuf, 0x8B ); 2942 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2943 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2944 emit_opcode( cbuf, 0x1B ); 2945 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2946 %} 2947 2948 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2949 // XOR $tmp,$tmp 2950 emit_opcode(cbuf,0x33); // XOR 2951 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2952 // CMP $tmp,$src.lo 2953 emit_opcode( cbuf, 0x3B ); 2954 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2955 // SBB $tmp,$src.hi 2956 emit_opcode( cbuf, 0x1B ); 2957 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2958 %} 2959 2960 // Sniff, sniff... smells like Gnu Superoptimizer 2961 enc_class neg_long( eRegL dst ) %{ 2962 emit_opcode(cbuf,0xF7); // NEG hi 2963 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2964 emit_opcode(cbuf,0xF7); // NEG lo 2965 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2966 emit_opcode(cbuf,0x83); // SBB hi,0 2967 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2968 emit_d8 (cbuf,0 ); 2969 %} 2970 2971 enc_class enc_pop_rdx() %{ 2972 emit_opcode(cbuf,0x5A); 2973 %} 2974 2975 enc_class enc_rethrow() %{ 2976 cbuf.set_insts_mark(); 2977 emit_opcode(cbuf, 0xE9); // jmp entry 2978 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2979 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2980 %} 2981 2982 2983 // Convert a double to an int. Java semantics require we do complex 2984 // manglelations in the corner cases. So we set the rounding mode to 2985 // 'zero', store the darned double down as an int, and reset the 2986 // rounding mode to 'nearest'. The hardware throws an exception which 2987 // patches up the correct value directly to the stack. 2988 enc_class DPR2I_encoding( regDPR src ) %{ 2989 // Flip to round-to-zero mode. We attempted to allow invalid-op 2990 // exceptions here, so that a NAN or other corner-case value will 2991 // thrown an exception (but normal values get converted at full speed). 2992 // However, I2C adapters and other float-stack manglers leave pending 2993 // invalid-op exceptions hanging. We would have to clear them before 2994 // enabling them and that is more expensive than just testing for the 2995 // invalid value Intel stores down in the corner cases. 2996 emit_opcode(cbuf,0xD9); // FLDCW trunc 2997 emit_opcode(cbuf,0x2D); 2998 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2999 // Allocate a word 3000 emit_opcode(cbuf,0x83); // SUB ESP,4 3001 emit_opcode(cbuf,0xEC); 3002 emit_d8(cbuf,0x04); 3003 // Encoding assumes a double has been pushed into FPR0. 3004 // Store down the double as an int, popping the FPU stack 3005 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3006 emit_opcode(cbuf,0x1C); 3007 emit_d8(cbuf,0x24); 3008 // Restore the rounding mode; mask the exception 3009 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3010 emit_opcode(cbuf,0x2D); 3011 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3012 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3013 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3014 3015 // Load the converted int; adjust CPU stack 3016 emit_opcode(cbuf,0x58); // POP EAX 3017 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3018 emit_d32 (cbuf,0x80000000); // 0x80000000 3019 emit_opcode(cbuf,0x75); // JNE around_slow_call 3020 emit_d8 (cbuf,0x07); // Size of slow_call 3021 // Push src onto stack slow-path 3022 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3023 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3024 // CALL directly to the runtime 3025 cbuf.set_insts_mark(); 3026 emit_opcode(cbuf,0xE8); // Call into runtime 3027 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3028 // Carry on here... 3029 %} 3030 3031 enc_class DPR2L_encoding( regDPR src ) %{ 3032 emit_opcode(cbuf,0xD9); // FLDCW trunc 3033 emit_opcode(cbuf,0x2D); 3034 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3035 // Allocate a word 3036 emit_opcode(cbuf,0x83); // SUB ESP,8 3037 emit_opcode(cbuf,0xEC); 3038 emit_d8(cbuf,0x08); 3039 // Encoding assumes a double has been pushed into FPR0. 3040 // Store down the double as a long, popping the FPU stack 3041 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3042 emit_opcode(cbuf,0x3C); 3043 emit_d8(cbuf,0x24); 3044 // Restore the rounding mode; mask the exception 3045 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3046 emit_opcode(cbuf,0x2D); 3047 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3048 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3049 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3050 3051 // Load the converted int; adjust CPU stack 3052 emit_opcode(cbuf,0x58); // POP EAX 3053 emit_opcode(cbuf,0x5A); // POP EDX 3054 emit_opcode(cbuf,0x81); // CMP EDX,imm 3055 emit_d8 (cbuf,0xFA); // rdx 3056 emit_d32 (cbuf,0x80000000); // 0x80000000 3057 emit_opcode(cbuf,0x75); // JNE around_slow_call 3058 emit_d8 (cbuf,0x07+4); // Size of slow_call 3059 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3060 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3061 emit_opcode(cbuf,0x75); // JNE around_slow_call 3062 emit_d8 (cbuf,0x07); // Size of slow_call 3063 // Push src onto stack slow-path 3064 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3065 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3066 // CALL directly to the runtime 3067 cbuf.set_insts_mark(); 3068 emit_opcode(cbuf,0xE8); // Call into runtime 3069 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3070 // Carry on here... 3071 %} 3072 3073 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3074 // Operand was loaded from memory into fp ST (stack top) 3075 // FMUL ST,$src /* D8 C8+i */ 3076 emit_opcode(cbuf, 0xD8); 3077 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3078 %} 3079 3080 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3081 // FADDP ST,src2 /* D8 C0+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3084 //could use FADDP src2,fpST /* DE C0+i */ 3085 %} 3086 3087 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3088 // FADDP src2,ST /* DE C0+i */ 3089 emit_opcode(cbuf, 0xDE); 3090 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3091 %} 3092 3093 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3094 // Operand has been loaded into fp ST (stack top) 3095 // FSUB ST,$src1 3096 emit_opcode(cbuf, 0xD8); 3097 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3098 3099 // FDIV 3100 emit_opcode(cbuf, 0xD8); 3101 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3102 %} 3103 3104 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3105 // Operand was loaded from memory into fp ST (stack top) 3106 // FADD ST,$src /* D8 C0+i */ 3107 emit_opcode(cbuf, 0xD8); 3108 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3109 3110 // FMUL ST,src2 /* D8 C*+i */ 3111 emit_opcode(cbuf, 0xD8); 3112 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3113 %} 3114 3115 3116 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3117 // Operand was loaded from memory into fp ST (stack top) 3118 // FADD ST,$src /* D8 C0+i */ 3119 emit_opcode(cbuf, 0xD8); 3120 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3121 3122 // FMULP src2,ST /* DE C8+i */ 3123 emit_opcode(cbuf, 0xDE); 3124 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3125 %} 3126 3127 // Atomically load the volatile long 3128 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3129 emit_opcode(cbuf,0xDF); 3130 int rm_byte_opcode = 0x05; 3131 int base = $mem$$base; 3132 int index = $mem$$index; 3133 int scale = $mem$$scale; 3134 int displace = $mem$$disp; 3135 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3136 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3137 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3138 %} 3139 3140 // Volatile Store Long. Must be atomic, so move it into 3141 // the FP TOS and then do a 64-bit FIST. Has to probe the 3142 // target address before the store (for null-ptr checks) 3143 // so the memory operand is used twice in the encoding. 3144 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3145 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3146 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3147 emit_opcode(cbuf,0xDF); 3148 int rm_byte_opcode = 0x07; 3149 int base = $mem$$base; 3150 int index = $mem$$index; 3151 int scale = $mem$$scale; 3152 int displace = $mem$$disp; 3153 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3154 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3155 %} 3156 3157 // Safepoint Poll. This polls the safepoint page, and causes an 3158 // exception if it is not readable. Unfortunately, it kills the condition code 3159 // in the process 3160 // We current use TESTL [spp],EDI 3161 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3162 3163 enc_class Safepoint_Poll() %{ 3164 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3165 emit_opcode(cbuf,0x85); 3166 emit_rm (cbuf, 0x0, 0x7, 0x5); 3167 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3168 %} 3169%} 3170 3171 3172//----------FRAME-------------------------------------------------------------- 3173// Definition of frame structure and management information. 3174// 3175// S T A C K L A Y O U T Allocators stack-slot number 3176// | (to get allocators register number 3177// G Owned by | | v add OptoReg::stack0()) 3178// r CALLER | | 3179// o | +--------+ pad to even-align allocators stack-slot 3180// w V | pad0 | numbers; owned by CALLER 3181// t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3182// h ^ | in | 5 3183// | | args | 4 Holes in incoming args owned by SELF 3184// | | | | 3 3185// | | +--------+ 3186// V | | old out| Empty on Intel, window on Sparc 3187// | old |preserve| Must be even aligned. 3188// | SP-+--------+----> Matcher::_old_SP, even aligned 3189// | | in | 3 area for Intel ret address 3190// Owned by |preserve| Empty on Sparc. 3191// SELF +--------+ 3192// | | pad2 | 2 pad to align old SP 3193// | +--------+ 1 3194// | | locks | 0 3195// | +--------+----> OptoReg::stack0(), even aligned 3196// | | pad1 | 11 pad to align new SP 3197// | +--------+ 3198// | | | 10 3199// | | spills | 9 spills 3200// V | | 8 (pad0 slot for callee) 3201// -----------+--------+----> Matcher::_out_arg_limit, unaligned 3202// ^ | out | 7 3203// | | args | 6 Holes in outgoing args owned by CALLEE 3204// Owned by +--------+ 3205// CALLEE | new out| 6 Empty on Intel, window on Sparc 3206// | new |preserve| Must be even-aligned. 3207// | SP-+--------+----> Matcher::_new_SP, even aligned 3208// | | | 3209// 3210// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3211// known from SELF's arguments and the Java calling convention. 3212// Region 6-7 is determined per call site. 3213// Note 2: If the calling convention leaves holes in the incoming argument 3214// area, those holes are owned by SELF. Holes in the outgoing area 3215// are owned by the CALLEE. Holes should not be nessecary in the 3216// incoming area, as the Java calling convention is completely under 3217// the control of the AD file. Doubles can be sorted and packed to 3218// avoid holes. Holes in the outgoing arguments may be nessecary for 3219// varargs C calling conventions. 3220// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3221// even aligned with pad0 as needed. 3222// Region 6 is even aligned. Region 6-7 is NOT even aligned; 3223// region 6-11 is even aligned; it may be padded out more so that 3224// the region from SP to FP meets the minimum stack alignment. 3225 3226frame %{ 3227 // What direction does stack grow in (assumed to be same for C & Java) 3228 stack_direction(TOWARDS_LOW); 3229 3230 // These three registers define part of the calling convention 3231 // between compiled code and the interpreter. 3232 inline_cache_reg(EAX); // Inline Cache Register 3233 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3234 3235 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3236 cisc_spilling_operand_name(indOffset32); 3237 3238 // Number of stack slots consumed by locking an object 3239 sync_stack_slots(1); 3240 3241 // Compiled code's Frame Pointer 3242 frame_pointer(ESP); 3243 // Interpreter stores its frame pointer in a register which is 3244 // stored to the stack by I2CAdaptors. 3245 // I2CAdaptors convert from interpreted java to compiled java. 3246 interpreter_frame_pointer(EBP); 3247 3248 // Stack alignment requirement 3249 // Alignment size in bytes (128-bit -> 16 bytes) 3250 stack_alignment(StackAlignmentInBytes); 3251 3252 // Number of stack slots between incoming argument block and the start of 3253 // a new frame. The PROLOG must add this many slots to the stack. The 3254 // EPILOG must remove this many slots. Intel needs one slot for 3255 // return address and one for rbp, (must save rbp) 3256 in_preserve_stack_slots(2+VerifyStackAtCalls); 3257 3258 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3259 // for calls to C. Supports the var-args backing area for register parms. 3260 varargs_C_out_slots_killed(0); 3261 3262 // The after-PROLOG location of the return address. Location of 3263 // return address specifies a type (REG or STACK) and a number 3264 // representing the register number (i.e. - use a register name) or 3265 // stack slot. 3266 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3267 // Otherwise, it is above the locks and verification slot and alignment word 3268 return_addr(STACK - 1 + 3269 align_up((Compile::current()->in_preserve_stack_slots() + 3270 Compile::current()->fixed_slots()), 3271 stack_alignment_in_slots())); 3272 3273 // Body of function which returns an integer array locating 3274 // arguments either in registers or in stack slots. Passed an array 3275 // of ideal registers called "sig" and a "length" count. Stack-slot 3276 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3277 // arguments for a CALLEE. Incoming stack arguments are 3278 // automatically biased by the preserve_stack_slots field above. 3279 calling_convention %{ 3280 // No difference between ingoing/outgoing just pass false 3281 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3282 %} 3283 3284 3285 // Body of function which returns an integer array locating 3286 // arguments either in registers or in stack slots. Passed an array 3287 // of ideal registers called "sig" and a "length" count. Stack-slot 3288 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3289 // arguments for a CALLEE. Incoming stack arguments are 3290 // automatically biased by the preserve_stack_slots field above. 3291 c_calling_convention %{ 3292 // This is obviously always outgoing 3293 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3294 %} 3295 3296 // Location of C & interpreter return values 3297 c_return_value %{ 3298 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3299 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3300 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3301 3302 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3303 // that C functions return float and double results in XMM0. 3304 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3305 return OptoRegPair(XMM0b_num,XMM0_num); 3306 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3307 return OptoRegPair(OptoReg::Bad,XMM0_num); 3308 3309 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3310 %} 3311 3312 // Location of return values 3313 return_value %{ 3314 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3315 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3316 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3317 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3318 return OptoRegPair(XMM0b_num,XMM0_num); 3319 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3320 return OptoRegPair(OptoReg::Bad,XMM0_num); 3321 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3322 %} 3323 3324%} 3325 3326//----------ATTRIBUTES--------------------------------------------------------- 3327//----------Operand Attributes------------------------------------------------- 3328op_attrib op_cost(0); // Required cost attribute 3329 3330//----------Instruction Attributes--------------------------------------------- 3331ins_attrib ins_cost(100); // Required cost attribute 3332ins_attrib ins_size(8); // Required size attribute (in bits) 3333ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3334 // non-matching short branch variant of some 3335 // long branch? 3336ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3337 // specifies the alignment that some part of the instruction (not 3338 // necessarily the start) requires. If > 1, a compute_padding() 3339 // function must be provided for the instruction 3340 3341//----------OPERANDS----------------------------------------------------------- 3342// Operand definitions must precede instruction definitions for correct parsing 3343// in the ADLC because operands constitute user defined types which are used in 3344// instruction definitions. 3345 3346//----------Simple Operands---------------------------------------------------- 3347// Immediate Operands 3348// Integer Immediate 3349operand immI() %{ 3350 match(ConI); 3351 3352 op_cost(10); 3353 format %{ %} 3354 interface(CONST_INTER); 3355%} 3356 3357// Constant for test vs zero 3358operand immI0() %{ 3359 predicate(n->get_int() == 0); 3360 match(ConI); 3361 3362 op_cost(0); 3363 format %{ %} 3364 interface(CONST_INTER); 3365%} 3366 3367// Constant for increment 3368operand immI1() %{ 3369 predicate(n->get_int() == 1); 3370 match(ConI); 3371 3372 op_cost(0); 3373 format %{ %} 3374 interface(CONST_INTER); 3375%} 3376 3377// Constant for decrement 3378operand immI_M1() %{ 3379 predicate(n->get_int() == -1); 3380 match(ConI); 3381 3382 op_cost(0); 3383 format %{ %} 3384 interface(CONST_INTER); 3385%} 3386 3387// Valid scale values for addressing modes 3388operand immI2() %{ 3389 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3390 match(ConI); 3391 3392 format %{ %} 3393 interface(CONST_INTER); 3394%} 3395 3396operand immI8() %{ 3397 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3398 match(ConI); 3399 3400 op_cost(5); 3401 format %{ %} 3402 interface(CONST_INTER); 3403%} 3404 3405operand immI16() %{ 3406 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3407 match(ConI); 3408 3409 op_cost(10); 3410 format %{ %} 3411 interface(CONST_INTER); 3412%} 3413 3414// Int Immediate non-negative 3415operand immU31() 3416%{ 3417 predicate(n->get_int() >= 0); 3418 match(ConI); 3419 3420 op_cost(0); 3421 format %{ %} 3422 interface(CONST_INTER); 3423%} 3424 3425// Constant for long shifts 3426operand immI_32() %{ 3427 predicate( n->get_int() == 32 ); 3428 match(ConI); 3429 3430 op_cost(0); 3431 format %{ %} 3432 interface(CONST_INTER); 3433%} 3434 3435operand immI_1_31() %{ 3436 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3437 match(ConI); 3438 3439 op_cost(0); 3440 format %{ %} 3441 interface(CONST_INTER); 3442%} 3443 3444operand immI_32_63() %{ 3445 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3446 match(ConI); 3447 op_cost(0); 3448 3449 format %{ %} 3450 interface(CONST_INTER); 3451%} 3452 3453operand immI_1() %{ 3454 predicate( n->get_int() == 1 ); 3455 match(ConI); 3456 3457 op_cost(0); 3458 format %{ %} 3459 interface(CONST_INTER); 3460%} 3461 3462operand immI_2() %{ 3463 predicate( n->get_int() == 2 ); 3464 match(ConI); 3465 3466 op_cost(0); 3467 format %{ %} 3468 interface(CONST_INTER); 3469%} 3470 3471operand immI_3() %{ 3472 predicate( n->get_int() == 3 ); 3473 match(ConI); 3474 3475 op_cost(0); 3476 format %{ %} 3477 interface(CONST_INTER); 3478%} 3479 3480// Pointer Immediate 3481operand immP() %{ 3482 match(ConP); 3483 3484 op_cost(10); 3485 format %{ %} 3486 interface(CONST_INTER); 3487%} 3488 3489// NULL Pointer Immediate 3490operand immP0() %{ 3491 predicate( n->get_ptr() == 0 ); 3492 match(ConP); 3493 op_cost(0); 3494 3495 format %{ %} 3496 interface(CONST_INTER); 3497%} 3498 3499// Long Immediate 3500operand immL() %{ 3501 match(ConL); 3502 3503 op_cost(20); 3504 format %{ %} 3505 interface(CONST_INTER); 3506%} 3507 3508// Long Immediate zero 3509operand immL0() %{ 3510 predicate( n->get_long() == 0L ); 3511 match(ConL); 3512 op_cost(0); 3513 3514 format %{ %} 3515 interface(CONST_INTER); 3516%} 3517 3518// Long Immediate zero 3519operand immL_M1() %{ 3520 predicate( n->get_long() == -1L ); 3521 match(ConL); 3522 op_cost(0); 3523 3524 format %{ %} 3525 interface(CONST_INTER); 3526%} 3527 3528// Long immediate from 0 to 127. 3529// Used for a shorter form of long mul by 10. 3530operand immL_127() %{ 3531 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3532 match(ConL); 3533 op_cost(0); 3534 3535 format %{ %} 3536 interface(CONST_INTER); 3537%} 3538 3539// Long Immediate: low 32-bit mask 3540operand immL_32bits() %{ 3541 predicate(n->get_long() == 0xFFFFFFFFL); 3542 match(ConL); 3543 op_cost(0); 3544 3545 format %{ %} 3546 interface(CONST_INTER); 3547%} 3548 3549// Long Immediate: low 32-bit mask 3550operand immL32() %{ 3551 predicate(n->get_long() == (int)(n->get_long())); 3552 match(ConL); 3553 op_cost(20); 3554 3555 format %{ %} 3556 interface(CONST_INTER); 3557%} 3558 3559//Double Immediate zero 3560operand immDPR0() %{ 3561 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3562 // bug that generates code such that NaNs compare equal to 0.0 3563 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3564 match(ConD); 3565 3566 op_cost(5); 3567 format %{ %} 3568 interface(CONST_INTER); 3569%} 3570 3571// Double Immediate one 3572operand immDPR1() %{ 3573 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3574 match(ConD); 3575 3576 op_cost(5); 3577 format %{ %} 3578 interface(CONST_INTER); 3579%} 3580 3581// Double Immediate 3582operand immDPR() %{ 3583 predicate(UseSSE<=1); 3584 match(ConD); 3585 3586 op_cost(5); 3587 format %{ %} 3588 interface(CONST_INTER); 3589%} 3590 3591operand immD() %{ 3592 predicate(UseSSE>=2); 3593 match(ConD); 3594 3595 op_cost(5); 3596 format %{ %} 3597 interface(CONST_INTER); 3598%} 3599 3600// Double Immediate zero 3601operand immD0() %{ 3602 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3603 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3604 // compare equal to -0.0. 3605 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3606 match(ConD); 3607 3608 format %{ %} 3609 interface(CONST_INTER); 3610%} 3611 3612// Float Immediate zero 3613operand immFPR0() %{ 3614 predicate(UseSSE == 0 && n->getf() == 0.0F); 3615 match(ConF); 3616 3617 op_cost(5); 3618 format %{ %} 3619 interface(CONST_INTER); 3620%} 3621 3622// Float Immediate one 3623operand immFPR1() %{ 3624 predicate(UseSSE == 0 && n->getf() == 1.0F); 3625 match(ConF); 3626 3627 op_cost(5); 3628 format %{ %} 3629 interface(CONST_INTER); 3630%} 3631 3632// Float Immediate 3633operand immFPR() %{ 3634 predicate( UseSSE == 0 ); 3635 match(ConF); 3636 3637 op_cost(5); 3638 format %{ %} 3639 interface(CONST_INTER); 3640%} 3641 3642// Float Immediate 3643operand immF() %{ 3644 predicate(UseSSE >= 1); 3645 match(ConF); 3646 3647 op_cost(5); 3648 format %{ %} 3649 interface(CONST_INTER); 3650%} 3651 3652// Float Immediate zero. Zero and not -0.0 3653operand immF0() %{ 3654 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3655 match(ConF); 3656 3657 op_cost(5); 3658 format %{ %} 3659 interface(CONST_INTER); 3660%} 3661 3662// Immediates for special shifts (sign extend) 3663 3664// Constants for increment 3665operand immI_16() %{ 3666 predicate( n->get_int() == 16 ); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671%} 3672 3673operand immI_24() %{ 3674 predicate( n->get_int() == 24 ); 3675 match(ConI); 3676 3677 format %{ %} 3678 interface(CONST_INTER); 3679%} 3680 3681// Constant for byte-wide masking 3682operand immI_255() %{ 3683 predicate( n->get_int() == 255 ); 3684 match(ConI); 3685 3686 format %{ %} 3687 interface(CONST_INTER); 3688%} 3689 3690// Constant for short-wide masking 3691operand immI_65535() %{ 3692 predicate(n->get_int() == 65535); 3693 match(ConI); 3694 3695 format %{ %} 3696 interface(CONST_INTER); 3697%} 3698 3699// Register Operands 3700// Integer Register 3701operand rRegI() %{ 3702 constraint(ALLOC_IN_RC(int_reg)); 3703 match(RegI); 3704 match(xRegI); 3705 match(eAXRegI); 3706 match(eBXRegI); 3707 match(eCXRegI); 3708 match(eDXRegI); 3709 match(eDIRegI); 3710 match(eSIRegI); 3711 3712 format %{ %} 3713 interface(REG_INTER); 3714%} 3715 3716// Subset of Integer Register 3717operand xRegI(rRegI reg) %{ 3718 constraint(ALLOC_IN_RC(int_x_reg)); 3719 match(reg); 3720 match(eAXRegI); 3721 match(eBXRegI); 3722 match(eCXRegI); 3723 match(eDXRegI); 3724 3725 format %{ %} 3726 interface(REG_INTER); 3727%} 3728 3729// Special Registers 3730operand eAXRegI(xRegI reg) %{ 3731 constraint(ALLOC_IN_RC(eax_reg)); 3732 match(reg); 3733 match(rRegI); 3734 3735 format %{ "EAX" %} 3736 interface(REG_INTER); 3737%} 3738 3739// Special Registers 3740operand eBXRegI(xRegI reg) %{ 3741 constraint(ALLOC_IN_RC(ebx_reg)); 3742 match(reg); 3743 match(rRegI); 3744 3745 format %{ "EBX" %} 3746 interface(REG_INTER); 3747%} 3748 3749operand eCXRegI(xRegI reg) %{ 3750 constraint(ALLOC_IN_RC(ecx_reg)); 3751 match(reg); 3752 match(rRegI); 3753 3754 format %{ "ECX" %} 3755 interface(REG_INTER); 3756%} 3757 3758operand eDXRegI(xRegI reg) %{ 3759 constraint(ALLOC_IN_RC(edx_reg)); 3760 match(reg); 3761 match(rRegI); 3762 3763 format %{ "EDX" %} 3764 interface(REG_INTER); 3765%} 3766 3767operand eDIRegI(xRegI reg) %{ 3768 constraint(ALLOC_IN_RC(edi_reg)); 3769 match(reg); 3770 match(rRegI); 3771 3772 format %{ "EDI" %} 3773 interface(REG_INTER); 3774%} 3775 3776operand naxRegI() %{ 3777 constraint(ALLOC_IN_RC(nax_reg)); 3778 match(RegI); 3779 match(eCXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786%} 3787 3788operand nadxRegI() %{ 3789 constraint(ALLOC_IN_RC(nadx_reg)); 3790 match(RegI); 3791 match(eBXRegI); 3792 match(eCXRegI); 3793 match(eSIRegI); 3794 match(eDIRegI); 3795 3796 format %{ %} 3797 interface(REG_INTER); 3798%} 3799 3800operand ncxRegI() %{ 3801 constraint(ALLOC_IN_RC(ncx_reg)); 3802 match(RegI); 3803 match(eAXRegI); 3804 match(eDXRegI); 3805 match(eSIRegI); 3806 match(eDIRegI); 3807 3808 format %{ %} 3809 interface(REG_INTER); 3810%} 3811 3812// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3813// // 3814operand eSIRegI(xRegI reg) %{ 3815 constraint(ALLOC_IN_RC(esi_reg)); 3816 match(reg); 3817 match(rRegI); 3818 3819 format %{ "ESI" %} 3820 interface(REG_INTER); 3821%} 3822 3823// Pointer Register 3824operand anyRegP() %{ 3825 constraint(ALLOC_IN_RC(any_reg)); 3826 match(RegP); 3827 match(eAXRegP); 3828 match(eBXRegP); 3829 match(eCXRegP); 3830 match(eDIRegP); 3831 match(eRegP); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835%} 3836 3837operand eRegP() %{ 3838 constraint(ALLOC_IN_RC(int_reg)); 3839 match(RegP); 3840 match(eAXRegP); 3841 match(eBXRegP); 3842 match(eCXRegP); 3843 match(eDIRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847%} 3848 3849// On windows95, EBP is not safe to use for implicit null tests. 3850operand eRegP_no_EBP() %{ 3851 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3852 match(RegP); 3853 match(eAXRegP); 3854 match(eBXRegP); 3855 match(eCXRegP); 3856 match(eDIRegP); 3857 3858 op_cost(100); 3859 format %{ %} 3860 interface(REG_INTER); 3861%} 3862 3863operand naxRegP() %{ 3864 constraint(ALLOC_IN_RC(nax_reg)); 3865 match(RegP); 3866 match(eBXRegP); 3867 match(eDXRegP); 3868 match(eCXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874%} 3875 3876operand nabxRegP() %{ 3877 constraint(ALLOC_IN_RC(nabx_reg)); 3878 match(RegP); 3879 match(eCXRegP); 3880 match(eDXRegP); 3881 match(eSIRegP); 3882 match(eDIRegP); 3883 3884 format %{ %} 3885 interface(REG_INTER); 3886%} 3887 3888operand pRegP() %{ 3889 constraint(ALLOC_IN_RC(p_reg)); 3890 match(RegP); 3891 match(eBXRegP); 3892 match(eDXRegP); 3893 match(eSIRegP); 3894 match(eDIRegP); 3895 3896 format %{ %} 3897 interface(REG_INTER); 3898%} 3899 3900// Special Registers 3901// Return a pointer value 3902operand eAXRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(eax_reg)); 3904 match(reg); 3905 format %{ "EAX" %} 3906 interface(REG_INTER); 3907%} 3908 3909// Used in AtomicAdd 3910operand eBXRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(ebx_reg)); 3912 match(reg); 3913 format %{ "EBX" %} 3914 interface(REG_INTER); 3915%} 3916 3917// Tail-call (interprocedural jump) to interpreter 3918operand eCXRegP(eRegP reg) %{ 3919 constraint(ALLOC_IN_RC(ecx_reg)); 3920 match(reg); 3921 format %{ "ECX" %} 3922 interface(REG_INTER); 3923%} 3924 3925operand eSIRegP(eRegP reg) %{ 3926 constraint(ALLOC_IN_RC(esi_reg)); 3927 match(reg); 3928 format %{ "ESI" %} 3929 interface(REG_INTER); 3930%} 3931 3932// Used in rep stosw 3933operand eDIRegP(eRegP reg) %{ 3934 constraint(ALLOC_IN_RC(edi_reg)); 3935 match(reg); 3936 format %{ "EDI" %} 3937 interface(REG_INTER); 3938%} 3939 3940operand eRegL() %{ 3941 constraint(ALLOC_IN_RC(long_reg)); 3942 match(RegL); 3943 match(eADXRegL); 3944 3945 format %{ %} 3946 interface(REG_INTER); 3947%} 3948 3949operand eADXRegL( eRegL reg ) %{ 3950 constraint(ALLOC_IN_RC(eadx_reg)); 3951 match(reg); 3952 3953 format %{ "EDX:EAX" %} 3954 interface(REG_INTER); 3955%} 3956 3957operand eBCXRegL( eRegL reg ) %{ 3958 constraint(ALLOC_IN_RC(ebcx_reg)); 3959 match(reg); 3960 3961 format %{ "EBX:ECX" %} 3962 interface(REG_INTER); 3963%} 3964 3965// Special case for integer high multiply 3966operand eADXRegL_low_only() %{ 3967 constraint(ALLOC_IN_RC(eadx_reg)); 3968 match(RegL); 3969 3970 format %{ "EAX" %} 3971 interface(REG_INTER); 3972%} 3973 3974// Flags register, used as output of compare instructions 3975operand eFlagsReg() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 3979 format %{ "EFLAGS" %} 3980 interface(REG_INTER); 3981%} 3982 3983// Flags register, used as output of FLOATING POINT compare instructions 3984operand eFlagsRegU() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 3988 format %{ "EFLAGS_U" %} 3989 interface(REG_INTER); 3990%} 3991 3992operand eFlagsRegUCF() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 predicate(false); 3996 3997 format %{ "EFLAGS_U_CF" %} 3998 interface(REG_INTER); 3999%} 4000 4001// Condition Code Register used by long compare 4002operand flagsReg_long_LTGE() %{ 4003 constraint(ALLOC_IN_RC(int_flags)); 4004 match(RegFlags); 4005 format %{ "FLAGS_LTGE" %} 4006 interface(REG_INTER); 4007%} 4008operand flagsReg_long_EQNE() %{ 4009 constraint(ALLOC_IN_RC(int_flags)); 4010 match(RegFlags); 4011 format %{ "FLAGS_EQNE" %} 4012 interface(REG_INTER); 4013%} 4014operand flagsReg_long_LEGT() %{ 4015 constraint(ALLOC_IN_RC(int_flags)); 4016 match(RegFlags); 4017 format %{ "FLAGS_LEGT" %} 4018 interface(REG_INTER); 4019%} 4020 4021// Condition Code Register used by unsigned long compare 4022operand flagsReg_ulong_LTGE() %{ 4023 constraint(ALLOC_IN_RC(int_flags)); 4024 match(RegFlags); 4025 format %{ "FLAGS_U_LTGE" %} 4026 interface(REG_INTER); 4027%} 4028operand flagsReg_ulong_EQNE() %{ 4029 constraint(ALLOC_IN_RC(int_flags)); 4030 match(RegFlags); 4031 format %{ "FLAGS_U_EQNE" %} 4032 interface(REG_INTER); 4033%} 4034operand flagsReg_ulong_LEGT() %{ 4035 constraint(ALLOC_IN_RC(int_flags)); 4036 match(RegFlags); 4037 format %{ "FLAGS_U_LEGT" %} 4038 interface(REG_INTER); 4039%} 4040 4041// Float register operands 4042operand regDPR() %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4045 match(RegD); 4046 match(regDPR1); 4047 match(regDPR2); 4048 format %{ %} 4049 interface(REG_INTER); 4050%} 4051 4052operand regDPR1(regDPR reg) %{ 4053 predicate( UseSSE < 2 ); 4054 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4055 match(reg); 4056 format %{ "FPR1" %} 4057 interface(REG_INTER); 4058%} 4059 4060operand regDPR2(regDPR reg) %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4063 match(reg); 4064 format %{ "FPR2" %} 4065 interface(REG_INTER); 4066%} 4067 4068operand regnotDPR1(regDPR reg) %{ 4069 predicate( UseSSE < 2 ); 4070 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4071 match(reg); 4072 format %{ %} 4073 interface(REG_INTER); 4074%} 4075 4076// Float register operands 4077operand regFPR() %{ 4078 predicate( UseSSE < 2 ); 4079 constraint(ALLOC_IN_RC(fp_flt_reg)); 4080 match(RegF); 4081 match(regFPR1); 4082 format %{ %} 4083 interface(REG_INTER); 4084%} 4085 4086// Float register operands 4087operand regFPR1(regFPR reg) %{ 4088 predicate( UseSSE < 2 ); 4089 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4090 match(reg); 4091 format %{ "FPR1" %} 4092 interface(REG_INTER); 4093%} 4094 4095// XMM Float register operands 4096operand regF() %{ 4097 predicate( UseSSE>=1 ); 4098 constraint(ALLOC_IN_RC(float_reg_legacy)); 4099 match(RegF); 4100 format %{ %} 4101 interface(REG_INTER); 4102%} 4103 4104// Float register operands 4105operand vlRegF() %{ 4106 constraint(ALLOC_IN_RC(float_reg_vl)); 4107 match(RegF); 4108 4109 format %{ %} 4110 interface(REG_INTER); 4111%} 4112 4113// XMM Double register operands 4114operand regD() %{ 4115 predicate( UseSSE>=2 ); 4116 constraint(ALLOC_IN_RC(double_reg_legacy)); 4117 match(RegD); 4118 format %{ %} 4119 interface(REG_INTER); 4120%} 4121 4122// Double register operands 4123operand vlRegD() %{ 4124 constraint(ALLOC_IN_RC(double_reg_vl)); 4125 match(RegD); 4126 4127 format %{ %} 4128 interface(REG_INTER); 4129%} 4130 4131// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4132// runtime code generation via reg_class_dynamic. 4133operand vecS() %{ 4134 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4135 match(VecS); 4136 4137 format %{ %} 4138 interface(REG_INTER); 4139%} 4140 4141operand legVecS() %{ 4142 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4143 match(VecS); 4144 4145 format %{ %} 4146 interface(REG_INTER); 4147%} 4148 4149operand vecD() %{ 4150 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4151 match(VecD); 4152 4153 format %{ %} 4154 interface(REG_INTER); 4155%} 4156 4157operand legVecD() %{ 4158 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4159 match(VecD); 4160 4161 format %{ %} 4162 interface(REG_INTER); 4163%} 4164 4165operand vecX() %{ 4166 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4167 match(VecX); 4168 4169 format %{ %} 4170 interface(REG_INTER); 4171%} 4172 4173operand legVecX() %{ 4174 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4175 match(VecX); 4176 4177 format %{ %} 4178 interface(REG_INTER); 4179%} 4180 4181operand vecY() %{ 4182 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4183 match(VecY); 4184 4185 format %{ %} 4186 interface(REG_INTER); 4187%} 4188 4189operand legVecY() %{ 4190 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4191 match(VecY); 4192 4193 format %{ %} 4194 interface(REG_INTER); 4195%} 4196 4197//----------Memory Operands---------------------------------------------------- 4198// Direct Memory Operand 4199operand direct(immP addr) %{ 4200 match(addr); 4201 4202 format %{ "[$addr]" %} 4203 interface(MEMORY_INTER) %{ 4204 base(0xFFFFFFFF); 4205 index(0x4); 4206 scale(0x0); 4207 disp($addr); 4208 %} 4209%} 4210 4211// Indirect Memory Operand 4212operand indirect(eRegP reg) %{ 4213 constraint(ALLOC_IN_RC(int_reg)); 4214 match(reg); 4215 4216 format %{ "[$reg]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index(0x4); 4220 scale(0x0); 4221 disp(0x0); 4222 %} 4223%} 4224 4225// Indirect Memory Plus Short Offset Operand 4226operand indOffset8(eRegP reg, immI8 off) %{ 4227 match(AddP reg off); 4228 4229 format %{ "[$reg + $off]" %} 4230 interface(MEMORY_INTER) %{ 4231 base($reg); 4232 index(0x4); 4233 scale(0x0); 4234 disp($off); 4235 %} 4236%} 4237 4238// Indirect Memory Plus Long Offset Operand 4239operand indOffset32(eRegP reg, immI off) %{ 4240 match(AddP reg off); 4241 4242 format %{ "[$reg + $off]" %} 4243 interface(MEMORY_INTER) %{ 4244 base($reg); 4245 index(0x4); 4246 scale(0x0); 4247 disp($off); 4248 %} 4249%} 4250 4251// Indirect Memory Plus Long Offset Operand 4252operand indOffset32X(rRegI reg, immP off) %{ 4253 match(AddP off reg); 4254 4255 format %{ "[$reg + $off]" %} 4256 interface(MEMORY_INTER) %{ 4257 base($reg); 4258 index(0x4); 4259 scale(0x0); 4260 disp($off); 4261 %} 4262%} 4263 4264// Indirect Memory Plus Index Register Plus Offset Operand 4265operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4266 match(AddP (AddP reg ireg) off); 4267 4268 op_cost(10); 4269 format %{"[$reg + $off + $ireg]" %} 4270 interface(MEMORY_INTER) %{ 4271 base($reg); 4272 index($ireg); 4273 scale(0x0); 4274 disp($off); 4275 %} 4276%} 4277 4278// Indirect Memory Plus Index Register Plus Offset Operand 4279operand indIndex(eRegP reg, rRegI ireg) %{ 4280 match(AddP reg ireg); 4281 4282 op_cost(10); 4283 format %{"[$reg + $ireg]" %} 4284 interface(MEMORY_INTER) %{ 4285 base($reg); 4286 index($ireg); 4287 scale(0x0); 4288 disp(0x0); 4289 %} 4290%} 4291 4292// // ------------------------------------------------------------------------- 4293// // 486 architecture doesn't support "scale * index + offset" with out a base 4294// // ------------------------------------------------------------------------- 4295// // Scaled Memory Operands 4296// // Indirect Memory Times Scale Plus Offset Operand 4297// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4298// match(AddP off (LShiftI ireg scale)); 4299// 4300// op_cost(10); 4301// format %{"[$off + $ireg << $scale]" %} 4302// interface(MEMORY_INTER) %{ 4303// base(0x4); 4304// index($ireg); 4305// scale($scale); 4306// disp($off); 4307// %} 4308// %} 4309 4310// Indirect Memory Times Scale Plus Index Register 4311operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4312 match(AddP reg (LShiftI ireg scale)); 4313 4314 op_cost(10); 4315 format %{"[$reg + $ireg << $scale]" %} 4316 interface(MEMORY_INTER) %{ 4317 base($reg); 4318 index($ireg); 4319 scale($scale); 4320 disp(0x0); 4321 %} 4322%} 4323 4324// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4325operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4326 match(AddP (AddP reg (LShiftI ireg scale)) off); 4327 4328 op_cost(10); 4329 format %{"[$reg + $off + $ireg << $scale]" %} 4330 interface(MEMORY_INTER) %{ 4331 base($reg); 4332 index($ireg); 4333 scale($scale); 4334 disp($off); 4335 %} 4336%} 4337 4338//----------Load Long Memory Operands------------------------------------------ 4339// The load-long idiom will use it's address expression again after loading 4340// the first word of the long. If the load-long destination overlaps with 4341// registers used in the addressing expression, the 2nd half will be loaded 4342// from a clobbered address. Fix this by requiring that load-long use 4343// address registers that do not overlap with the load-long target. 4344 4345// load-long support 4346operand load_long_RegP() %{ 4347 constraint(ALLOC_IN_RC(esi_reg)); 4348 match(RegP); 4349 match(eSIRegP); 4350 op_cost(100); 4351 format %{ %} 4352 interface(REG_INTER); 4353%} 4354 4355// Indirect Memory Operand Long 4356operand load_long_indirect(load_long_RegP reg) %{ 4357 constraint(ALLOC_IN_RC(esi_reg)); 4358 match(reg); 4359 4360 format %{ "[$reg]" %} 4361 interface(MEMORY_INTER) %{ 4362 base($reg); 4363 index(0x4); 4364 scale(0x0); 4365 disp(0x0); 4366 %} 4367%} 4368 4369// Indirect Memory Plus Long Offset Operand 4370operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4371 match(AddP reg off); 4372 4373 format %{ "[$reg + $off]" %} 4374 interface(MEMORY_INTER) %{ 4375 base($reg); 4376 index(0x4); 4377 scale(0x0); 4378 disp($off); 4379 %} 4380%} 4381 4382opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4383 4384 4385//----------Special Memory Operands-------------------------------------------- 4386// Stack Slot Operand - This operand is used for loading and storing temporary 4387// values on the stack where a match requires a value to 4388// flow through memory. 4389operand stackSlotP(sRegP reg) %{ 4390 constraint(ALLOC_IN_RC(stack_slots)); 4391 // No match rule because this operand is only generated in matching 4392 format %{ "[$reg]" %} 4393 interface(MEMORY_INTER) %{ 4394 base(0x4); // ESP 4395 index(0x4); // No Index 4396 scale(0x0); // No Scale 4397 disp($reg); // Stack Offset 4398 %} 4399%} 4400 4401operand stackSlotI(sRegI reg) %{ 4402 constraint(ALLOC_IN_RC(stack_slots)); 4403 // No match rule because this operand is only generated in matching 4404 format %{ "[$reg]" %} 4405 interface(MEMORY_INTER) %{ 4406 base(0x4); // ESP 4407 index(0x4); // No Index 4408 scale(0x0); // No Scale 4409 disp($reg); // Stack Offset 4410 %} 4411%} 4412 4413operand stackSlotF(sRegF reg) %{ 4414 constraint(ALLOC_IN_RC(stack_slots)); 4415 // No match rule because this operand is only generated in matching 4416 format %{ "[$reg]" %} 4417 interface(MEMORY_INTER) %{ 4418 base(0x4); // ESP 4419 index(0x4); // No Index 4420 scale(0x0); // No Scale 4421 disp($reg); // Stack Offset 4422 %} 4423%} 4424 4425operand stackSlotD(sRegD reg) %{ 4426 constraint(ALLOC_IN_RC(stack_slots)); 4427 // No match rule because this operand is only generated in matching 4428 format %{ "[$reg]" %} 4429 interface(MEMORY_INTER) %{ 4430 base(0x4); // ESP 4431 index(0x4); // No Index 4432 scale(0x0); // No Scale 4433 disp($reg); // Stack Offset 4434 %} 4435%} 4436 4437operand stackSlotL(sRegL reg) %{ 4438 constraint(ALLOC_IN_RC(stack_slots)); 4439 // No match rule because this operand is only generated in matching 4440 format %{ "[$reg]" %} 4441 interface(MEMORY_INTER) %{ 4442 base(0x4); // ESP 4443 index(0x4); // No Index 4444 scale(0x0); // No Scale 4445 disp($reg); // Stack Offset 4446 %} 4447%} 4448 4449//----------Memory Operands - Win95 Implicit Null Variants---------------- 4450// Indirect Memory Operand 4451operand indirect_win95_safe(eRegP_no_EBP reg) 4452%{ 4453 constraint(ALLOC_IN_RC(int_reg)); 4454 match(reg); 4455 4456 op_cost(100); 4457 format %{ "[$reg]" %} 4458 interface(MEMORY_INTER) %{ 4459 base($reg); 4460 index(0x4); 4461 scale(0x0); 4462 disp(0x0); 4463 %} 4464%} 4465 4466// Indirect Memory Plus Short Offset Operand 4467operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4468%{ 4469 match(AddP reg off); 4470 4471 op_cost(100); 4472 format %{ "[$reg + $off]" %} 4473 interface(MEMORY_INTER) %{ 4474 base($reg); 4475 index(0x4); 4476 scale(0x0); 4477 disp($off); 4478 %} 4479%} 4480 4481// Indirect Memory Plus Long Offset Operand 4482operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4483%{ 4484 match(AddP reg off); 4485 4486 op_cost(100); 4487 format %{ "[$reg + $off]" %} 4488 interface(MEMORY_INTER) %{ 4489 base($reg); 4490 index(0x4); 4491 scale(0x0); 4492 disp($off); 4493 %} 4494%} 4495 4496// Indirect Memory Plus Index Register Plus Offset Operand 4497operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4498%{ 4499 match(AddP (AddP reg ireg) off); 4500 4501 op_cost(100); 4502 format %{"[$reg + $off + $ireg]" %} 4503 interface(MEMORY_INTER) %{ 4504 base($reg); 4505 index($ireg); 4506 scale(0x0); 4507 disp($off); 4508 %} 4509%} 4510 4511// Indirect Memory Times Scale Plus Index Register 4512operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4513%{ 4514 match(AddP reg (LShiftI ireg scale)); 4515 4516 op_cost(100); 4517 format %{"[$reg + $ireg << $scale]" %} 4518 interface(MEMORY_INTER) %{ 4519 base($reg); 4520 index($ireg); 4521 scale($scale); 4522 disp(0x0); 4523 %} 4524%} 4525 4526// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4527operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4528%{ 4529 match(AddP (AddP reg (LShiftI ireg scale)) off); 4530 4531 op_cost(100); 4532 format %{"[$reg + $off + $ireg << $scale]" %} 4533 interface(MEMORY_INTER) %{ 4534 base($reg); 4535 index($ireg); 4536 scale($scale); 4537 disp($off); 4538 %} 4539%} 4540 4541//----------Conditional Branch Operands---------------------------------------- 4542// Comparison Op - This is the operation of the comparison, and is limited to 4543// the following set of codes: 4544// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4545// 4546// Other attributes of the comparison, such as unsignedness, are specified 4547// by the comparison instruction that sets a condition code flags register. 4548// That result is represented by a flags operand whose subtype is appropriate 4549// to the unsignedness (etc.) of the comparison. 4550// 4551// Later, the instruction which matches both the Comparison Op (a Bool) and 4552// the flags (produced by the Cmp) specifies the coding of the comparison op 4553// by matching a specific subtype of Bool operand below, such as cmpOpU. 4554 4555// Comparision Code 4556operand cmpOp() %{ 4557 match(Bool); 4558 4559 format %{ "" %} 4560 interface(COND_INTER) %{ 4561 equal(0x4, "e"); 4562 not_equal(0x5, "ne"); 4563 less(0xC, "l"); 4564 greater_equal(0xD, "ge"); 4565 less_equal(0xE, "le"); 4566 greater(0xF, "g"); 4567 overflow(0x0, "o"); 4568 no_overflow(0x1, "no"); 4569 %} 4570%} 4571 4572// Comparison Code, unsigned compare. Used by FP also, with 4573// C2 (unordered) turned into GT or LT already. The other bits 4574// C0 and C3 are turned into Carry & Zero flags. 4575operand cmpOpU() %{ 4576 match(Bool); 4577 4578 format %{ "" %} 4579 interface(COND_INTER) %{ 4580 equal(0x4, "e"); 4581 not_equal(0x5, "ne"); 4582 less(0x2, "b"); 4583 greater_equal(0x3, "nb"); 4584 less_equal(0x6, "be"); 4585 greater(0x7, "nbe"); 4586 overflow(0x0, "o"); 4587 no_overflow(0x1, "no"); 4588 %} 4589%} 4590 4591// Floating comparisons that don't require any fixup for the unordered case 4592operand cmpOpUCF() %{ 4593 match(Bool); 4594 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4595 n->as_Bool()->_test._test == BoolTest::ge || 4596 n->as_Bool()->_test._test == BoolTest::le || 4597 n->as_Bool()->_test._test == BoolTest::gt); 4598 format %{ "" %} 4599 interface(COND_INTER) %{ 4600 equal(0x4, "e"); 4601 not_equal(0x5, "ne"); 4602 less(0x2, "b"); 4603 greater_equal(0x3, "nb"); 4604 less_equal(0x6, "be"); 4605 greater(0x7, "nbe"); 4606 overflow(0x0, "o"); 4607 no_overflow(0x1, "no"); 4608 %} 4609%} 4610 4611 4612// Floating comparisons that can be fixed up with extra conditional jumps 4613operand cmpOpUCF2() %{ 4614 match(Bool); 4615 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4616 n->as_Bool()->_test._test == BoolTest::eq); 4617 format %{ "" %} 4618 interface(COND_INTER) %{ 4619 equal(0x4, "e"); 4620 not_equal(0x5, "ne"); 4621 less(0x2, "b"); 4622 greater_equal(0x3, "nb"); 4623 less_equal(0x6, "be"); 4624 greater(0x7, "nbe"); 4625 overflow(0x0, "o"); 4626 no_overflow(0x1, "no"); 4627 %} 4628%} 4629 4630// Comparison Code for FP conditional move 4631operand cmpOp_fcmov() %{ 4632 match(Bool); 4633 4634 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4635 n->as_Bool()->_test._test != BoolTest::no_overflow); 4636 format %{ "" %} 4637 interface(COND_INTER) %{ 4638 equal (0x0C8); 4639 not_equal (0x1C8); 4640 less (0x0C0); 4641 greater_equal(0x1C0); 4642 less_equal (0x0D0); 4643 greater (0x1D0); 4644 overflow(0x0, "o"); // not really supported by the instruction 4645 no_overflow(0x1, "no"); // not really supported by the instruction 4646 %} 4647%} 4648 4649// Comparison Code used in long compares 4650operand cmpOp_commute() %{ 4651 match(Bool); 4652 4653 format %{ "" %} 4654 interface(COND_INTER) %{ 4655 equal(0x4, "e"); 4656 not_equal(0x5, "ne"); 4657 less(0xF, "g"); 4658 greater_equal(0xE, "le"); 4659 less_equal(0xD, "ge"); 4660 greater(0xC, "l"); 4661 overflow(0x0, "o"); 4662 no_overflow(0x1, "no"); 4663 %} 4664%} 4665 4666// Comparison Code used in unsigned long compares 4667operand cmpOpU_commute() %{ 4668 match(Bool); 4669 4670 format %{ "" %} 4671 interface(COND_INTER) %{ 4672 equal(0x4, "e"); 4673 not_equal(0x5, "ne"); 4674 less(0x7, "nbe"); 4675 greater_equal(0x6, "be"); 4676 less_equal(0x3, "nb"); 4677 greater(0x2, "b"); 4678 overflow(0x0, "o"); 4679 no_overflow(0x1, "no"); 4680 %} 4681%} 4682 4683//----------OPERAND CLASSES---------------------------------------------------- 4684// Operand Classes are groups of operands that are used as to simplify 4685// instruction definitions by not requiring the AD writer to specify separate 4686// instructions for every form of operand when the instruction accepts 4687// multiple operand types with the same basic encoding and format. The classic 4688// case of this is memory operands. 4689 4690opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4691 indIndex, indIndexScale, indIndexScaleOffset); 4692 4693// Long memory operations are encoded in 2 instructions and a +4 offset. 4694// This means some kind of offset is always required and you cannot use 4695// an oop as the offset (done when working on static globals). 4696opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4697 indIndex, indIndexScale, indIndexScaleOffset); 4698 4699 4700//----------PIPELINE----------------------------------------------------------- 4701// Rules which define the behavior of the target architectures pipeline. 4702pipeline %{ 4703 4704//----------ATTRIBUTES--------------------------------------------------------- 4705attributes %{ 4706 variable_size_instructions; // Fixed size instructions 4707 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4708 instruction_unit_size = 1; // An instruction is 1 bytes long 4709 instruction_fetch_unit_size = 16; // The processor fetches one line 4710 instruction_fetch_units = 1; // of 16 bytes 4711 4712 // List of nop instructions 4713 nops( MachNop ); 4714%} 4715 4716//----------RESOURCES---------------------------------------------------------- 4717// Resources are the functional units available to the machine 4718 4719// Generic P2/P3 pipeline 4720// 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4721// 3 instructions decoded per cycle. 4722// 2 load/store ops per cycle, 1 branch, 1 FPU, 4723// 2 ALU op, only ALU0 handles mul/div instructions. 4724resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4725 MS0, MS1, MEM = MS0 | MS1, 4726 BR, FPU, 4727 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4728 4729//----------PIPELINE DESCRIPTION----------------------------------------------- 4730// Pipeline Description specifies the stages in the machine's pipeline 4731 4732// Generic P2/P3 pipeline 4733pipe_desc(S0, S1, S2, S3, S4, S5); 4734 4735//----------PIPELINE CLASSES--------------------------------------------------- 4736// Pipeline Classes describe the stages in which input and output are 4737// referenced by the hardware pipeline. 4738 4739// Naming convention: ialu or fpu 4740// Then: _reg 4741// Then: _reg if there is a 2nd register 4742// Then: _long if it's a pair of instructions implementing a long 4743// Then: _fat if it requires the big decoder 4744// Or: _mem if it requires the big decoder and a memory unit. 4745 4746// Integer ALU reg operation 4747pipe_class ialu_reg(rRegI dst) %{ 4748 single_instruction; 4749 dst : S4(write); 4750 dst : S3(read); 4751 DECODE : S0; // any decoder 4752 ALU : S3; // any alu 4753%} 4754 4755// Long ALU reg operation 4756pipe_class ialu_reg_long(eRegL dst) %{ 4757 instruction_count(2); 4758 dst : S4(write); 4759 dst : S3(read); 4760 DECODE : S0(2); // any 2 decoders 4761 ALU : S3(2); // both alus 4762%} 4763 4764// Integer ALU reg operation using big decoder 4765pipe_class ialu_reg_fat(rRegI dst) %{ 4766 single_instruction; 4767 dst : S4(write); 4768 dst : S3(read); 4769 D0 : S0; // big decoder only 4770 ALU : S3; // any alu 4771%} 4772 4773// Long ALU reg operation using big decoder 4774pipe_class ialu_reg_long_fat(eRegL dst) %{ 4775 instruction_count(2); 4776 dst : S4(write); 4777 dst : S3(read); 4778 D0 : S0(2); // big decoder only; twice 4779 ALU : S3(2); // any 2 alus 4780%} 4781 4782// Integer ALU reg-reg operation 4783pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4784 single_instruction; 4785 dst : S4(write); 4786 src : S3(read); 4787 DECODE : S0; // any decoder 4788 ALU : S3; // any alu 4789%} 4790 4791// Long ALU reg-reg operation 4792pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4793 instruction_count(2); 4794 dst : S4(write); 4795 src : S3(read); 4796 DECODE : S0(2); // any 2 decoders 4797 ALU : S3(2); // both alus 4798%} 4799 4800// Integer ALU reg-reg operation 4801pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4802 single_instruction; 4803 dst : S4(write); 4804 src : S3(read); 4805 D0 : S0; // big decoder only 4806 ALU : S3; // any alu 4807%} 4808 4809// Long ALU reg-reg operation 4810pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4811 instruction_count(2); 4812 dst : S4(write); 4813 src : S3(read); 4814 D0 : S0(2); // big decoder only; twice 4815 ALU : S3(2); // both alus 4816%} 4817 4818// Integer ALU reg-mem operation 4819pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4820 single_instruction; 4821 dst : S5(write); 4822 mem : S3(read); 4823 D0 : S0; // big decoder only 4824 ALU : S4; // any alu 4825 MEM : S3; // any mem 4826%} 4827 4828// Long ALU reg-mem operation 4829pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4830 instruction_count(2); 4831 dst : S5(write); 4832 mem : S3(read); 4833 D0 : S0(2); // big decoder only; twice 4834 ALU : S4(2); // any 2 alus 4835 MEM : S3(2); // both mems 4836%} 4837 4838// Integer mem operation (prefetch) 4839pipe_class ialu_mem(memory mem) 4840%{ 4841 single_instruction; 4842 mem : S3(read); 4843 D0 : S0; // big decoder only 4844 MEM : S3; // any mem 4845%} 4846 4847// Integer Store to Memory 4848pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4849 single_instruction; 4850 mem : S3(read); 4851 src : S5(read); 4852 D0 : S0; // big decoder only 4853 ALU : S4; // any alu 4854 MEM : S3; 4855%} 4856 4857// Long Store to Memory 4858pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4859 instruction_count(2); 4860 mem : S3(read); 4861 src : S5(read); 4862 D0 : S0(2); // big decoder only; twice 4863 ALU : S4(2); // any 2 alus 4864 MEM : S3(2); // Both mems 4865%} 4866 4867// Integer Store to Memory 4868pipe_class ialu_mem_imm(memory mem) %{ 4869 single_instruction; 4870 mem : S3(read); 4871 D0 : S0; // big decoder only 4872 ALU : S4; // any alu 4873 MEM : S3; 4874%} 4875 4876// Integer ALU0 reg-reg operation 4877pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4878 single_instruction; 4879 dst : S4(write); 4880 src : S3(read); 4881 D0 : S0; // Big decoder only 4882 ALU0 : S3; // only alu0 4883%} 4884 4885// Integer ALU0 reg-mem operation 4886pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4887 single_instruction; 4888 dst : S5(write); 4889 mem : S3(read); 4890 D0 : S0; // big decoder only 4891 ALU0 : S4; // ALU0 only 4892 MEM : S3; // any mem 4893%} 4894 4895// Integer ALU reg-reg operation 4896pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4897 single_instruction; 4898 cr : S4(write); 4899 src1 : S3(read); 4900 src2 : S3(read); 4901 DECODE : S0; // any decoder 4902 ALU : S3; // any alu 4903%} 4904 4905// Integer ALU reg-imm operation 4906pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4907 single_instruction; 4908 cr : S4(write); 4909 src1 : S3(read); 4910 DECODE : S0; // any decoder 4911 ALU : S3; // any alu 4912%} 4913 4914// Integer ALU reg-mem operation 4915pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4916 single_instruction; 4917 cr : S4(write); 4918 src1 : S3(read); 4919 src2 : S3(read); 4920 D0 : S0; // big decoder only 4921 ALU : S4; // any alu 4922 MEM : S3; 4923%} 4924 4925// Conditional move reg-reg 4926pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4927 instruction_count(4); 4928 y : S4(read); 4929 q : S3(read); 4930 p : S3(read); 4931 DECODE : S0(4); // any decoder 4932%} 4933 4934// Conditional move reg-reg 4935pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4936 single_instruction; 4937 dst : S4(write); 4938 src : S3(read); 4939 cr : S3(read); 4940 DECODE : S0; // any decoder 4941%} 4942 4943// Conditional move reg-mem 4944pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4945 single_instruction; 4946 dst : S4(write); 4947 src : S3(read); 4948 cr : S3(read); 4949 DECODE : S0; // any decoder 4950 MEM : S3; 4951%} 4952 4953// Conditional move reg-reg long 4954pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4955 single_instruction; 4956 dst : S4(write); 4957 src : S3(read); 4958 cr : S3(read); 4959 DECODE : S0(2); // any 2 decoders 4960%} 4961 4962// Conditional move double reg-reg 4963pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4964 single_instruction; 4965 dst : S4(write); 4966 src : S3(read); 4967 cr : S3(read); 4968 DECODE : S0; // any decoder 4969%} 4970 4971// Float reg-reg operation 4972pipe_class fpu_reg(regDPR dst) %{ 4973 instruction_count(2); 4974 dst : S3(read); 4975 DECODE : S0(2); // any 2 decoders 4976 FPU : S3; 4977%} 4978 4979// Float reg-reg operation 4980pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4981 instruction_count(2); 4982 dst : S4(write); 4983 src : S3(read); 4984 DECODE : S0(2); // any 2 decoders 4985 FPU : S3; 4986%} 4987 4988// Float reg-reg operation 4989pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4990 instruction_count(3); 4991 dst : S4(write); 4992 src1 : S3(read); 4993 src2 : S3(read); 4994 DECODE : S0(3); // any 3 decoders 4995 FPU : S3(2); 4996%} 4997 4998// Float reg-reg operation 4999pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 5000 instruction_count(4); 5001 dst : S4(write); 5002 src1 : S3(read); 5003 src2 : S3(read); 5004 src3 : S3(read); 5005 DECODE : S0(4); // any 3 decoders 5006 FPU : S3(2); 5007%} 5008 5009// Float reg-reg operation 5010pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 5011 instruction_count(4); 5012 dst : S4(write); 5013 src1 : S3(read); 5014 src2 : S3(read); 5015 src3 : S3(read); 5016 DECODE : S1(3); // any 3 decoders 5017 D0 : S0; // Big decoder only 5018 FPU : S3(2); 5019 MEM : S3; 5020%} 5021 5022// Float reg-mem operation 5023pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 5024 instruction_count(2); 5025 dst : S5(write); 5026 mem : S3(read); 5027 D0 : S0; // big decoder only 5028 DECODE : S1; // any decoder for FPU POP 5029 FPU : S4; 5030 MEM : S3; // any mem 5031%} 5032 5033// Float reg-mem operation 5034pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5035 instruction_count(3); 5036 dst : S5(write); 5037 src1 : S3(read); 5038 mem : S3(read); 5039 D0 : S0; // big decoder only 5040 DECODE : S1(2); // any decoder for FPU POP 5041 FPU : S4; 5042 MEM : S3; // any mem 5043%} 5044 5045// Float mem-reg operation 5046pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5047 instruction_count(2); 5048 src : S5(read); 5049 mem : S3(read); 5050 DECODE : S0; // any decoder for FPU PUSH 5051 D0 : S1; // big decoder only 5052 FPU : S4; 5053 MEM : S3; // any mem 5054%} 5055 5056pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5057 instruction_count(3); 5058 src1 : S3(read); 5059 src2 : S3(read); 5060 mem : S3(read); 5061 DECODE : S0(2); // any decoder for FPU PUSH 5062 D0 : S1; // big decoder only 5063 FPU : S4; 5064 MEM : S3; // any mem 5065%} 5066 5067pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5068 instruction_count(3); 5069 src1 : S3(read); 5070 src2 : S3(read); 5071 mem : S4(read); 5072 DECODE : S0; // any decoder for FPU PUSH 5073 D0 : S0(2); // big decoder only 5074 FPU : S4; 5075 MEM : S3(2); // any mem 5076%} 5077 5078pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5079 instruction_count(2); 5080 src1 : S3(read); 5081 dst : S4(read); 5082 D0 : S0(2); // big decoder only 5083 MEM : S3(2); // any mem 5084%} 5085 5086pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5087 instruction_count(3); 5088 src1 : S3(read); 5089 src2 : S3(read); 5090 dst : S4(read); 5091 D0 : S0(3); // big decoder only 5092 FPU : S4; 5093 MEM : S3(3); // any mem 5094%} 5095 5096pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5097 instruction_count(3); 5098 src1 : S4(read); 5099 mem : S4(read); 5100 DECODE : S0; // any decoder for FPU PUSH 5101 D0 : S0(2); // big decoder only 5102 FPU : S4; 5103 MEM : S3(2); // any mem 5104%} 5105 5106// Float load constant 5107pipe_class fpu_reg_con(regDPR dst) %{ 5108 instruction_count(2); 5109 dst : S5(write); 5110 D0 : S0; // big decoder only for the load 5111 DECODE : S1; // any decoder for FPU POP 5112 FPU : S4; 5113 MEM : S3; // any mem 5114%} 5115 5116// Float load constant 5117pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5118 instruction_count(3); 5119 dst : S5(write); 5120 src : S3(read); 5121 D0 : S0; // big decoder only for the load 5122 DECODE : S1(2); // any decoder for FPU POP 5123 FPU : S4; 5124 MEM : S3; // any mem 5125%} 5126 5127// UnConditional branch 5128pipe_class pipe_jmp( label labl ) %{ 5129 single_instruction; 5130 BR : S3; 5131%} 5132 5133// Conditional branch 5134pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5135 single_instruction; 5136 cr : S1(read); 5137 BR : S3; 5138%} 5139 5140// Allocation idiom 5141pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5142 instruction_count(1); force_serialization; 5143 fixed_latency(6); 5144 heap_ptr : S3(read); 5145 DECODE : S0(3); 5146 D0 : S2; 5147 MEM : S3; 5148 ALU : S3(2); 5149 dst : S5(write); 5150 BR : S5; 5151%} 5152 5153// Generic big/slow expanded idiom 5154pipe_class pipe_slow( ) %{ 5155 instruction_count(10); multiple_bundles; force_serialization; 5156 fixed_latency(100); 5157 D0 : S0(2); 5158 MEM : S3(2); 5159%} 5160 5161// The real do-nothing guy 5162pipe_class empty( ) %{ 5163 instruction_count(0); 5164%} 5165 5166// Define the class for the Nop node 5167define %{ 5168 MachNop = empty; 5169%} 5170 5171%} 5172 5173//----------INSTRUCTIONS------------------------------------------------------- 5174// 5175// match -- States which machine-independent subtree may be replaced 5176// by this instruction. 5177// ins_cost -- The estimated cost of this instruction is used by instruction 5178// selection to identify a minimum cost tree of machine 5179// instructions that matches a tree of machine-independent 5180// instructions. 5181// format -- A string providing the disassembly for this instruction. 5182// The value of an instruction's operand may be inserted 5183// by referring to it with a '$' prefix. 5184// opcode -- Three instruction opcodes may be provided. These are referred 5185// to within an encode class as $primary, $secondary, and $tertiary 5186// respectively. The primary opcode is commonly used to 5187// indicate the type of machine instruction, while secondary 5188// and tertiary are often used for prefix options or addressing 5189// modes. 5190// ins_encode -- A list of encode classes with parameters. The encode class 5191// name must have been defined in an 'enc_class' specification 5192// in the encode section of the architecture description. 5193 5194//----------BSWAP-Instruction-------------------------------------------------- 5195instruct bytes_reverse_int(rRegI dst) %{ 5196 match(Set dst (ReverseBytesI dst)); 5197 5198 format %{ "BSWAP $dst" %} 5199 opcode(0x0F, 0xC8); 5200 ins_encode( OpcP, OpcSReg(dst) ); 5201 ins_pipe( ialu_reg ); 5202%} 5203 5204instruct bytes_reverse_long(eRegL dst) %{ 5205 match(Set dst (ReverseBytesL dst)); 5206 5207 format %{ "BSWAP $dst.lo\n\t" 5208 "BSWAP $dst.hi\n\t" 5209 "XCHG $dst.lo $dst.hi" %} 5210 5211 ins_cost(125); 5212 ins_encode( bswap_long_bytes(dst) ); 5213 ins_pipe( ialu_reg_reg); 5214%} 5215 5216instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5217 match(Set dst (ReverseBytesUS dst)); 5218 effect(KILL cr); 5219 5220 format %{ "BSWAP $dst\n\t" 5221 "SHR $dst,16\n\t" %} 5222 ins_encode %{ 5223 __ bswapl($dst$$Register); 5224 __ shrl($dst$$Register, 16); 5225 %} 5226 ins_pipe( ialu_reg ); 5227%} 5228 5229instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5230 match(Set dst (ReverseBytesS dst)); 5231 effect(KILL cr); 5232 5233 format %{ "BSWAP $dst\n\t" 5234 "SAR $dst,16\n\t" %} 5235 ins_encode %{ 5236 __ bswapl($dst$$Register); 5237 __ sarl($dst$$Register, 16); 5238 %} 5239 ins_pipe( ialu_reg ); 5240%} 5241 5242 5243//---------- Zeros Count Instructions ------------------------------------------ 5244 5245instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5246 predicate(UseCountLeadingZerosInstruction); 5247 match(Set dst (CountLeadingZerosI src)); 5248 effect(KILL cr); 5249 5250 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5251 ins_encode %{ 5252 __ lzcntl($dst$$Register, $src$$Register); 5253 %} 5254 ins_pipe(ialu_reg); 5255%} 5256 5257instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5258 predicate(!UseCountLeadingZerosInstruction); 5259 match(Set dst (CountLeadingZerosI src)); 5260 effect(KILL cr); 5261 5262 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5263 "JNZ skip\n\t" 5264 "MOV $dst, -1\n" 5265 "skip:\n\t" 5266 "NEG $dst\n\t" 5267 "ADD $dst, 31" %} 5268 ins_encode %{ 5269 Register Rdst = $dst$$Register; 5270 Register Rsrc = $src$$Register; 5271 Label skip; 5272 __ bsrl(Rdst, Rsrc); 5273 __ jccb(Assembler::notZero, skip); 5274 __ movl(Rdst, -1); 5275 __ bind(skip); 5276 __ negl(Rdst); 5277 __ addl(Rdst, BitsPerInt - 1); 5278 %} 5279 ins_pipe(ialu_reg); 5280%} 5281 5282instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5283 predicate(UseCountLeadingZerosInstruction); 5284 match(Set dst (CountLeadingZerosL src)); 5285 effect(TEMP dst, KILL cr); 5286 5287 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5288 "JNC done\n\t" 5289 "LZCNT $dst, $src.lo\n\t" 5290 "ADD $dst, 32\n" 5291 "done:" %} 5292 ins_encode %{ 5293 Register Rdst = $dst$$Register; 5294 Register Rsrc = $src$$Register; 5295 Label done; 5296 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5297 __ jccb(Assembler::carryClear, done); 5298 __ lzcntl(Rdst, Rsrc); 5299 __ addl(Rdst, BitsPerInt); 5300 __ bind(done); 5301 %} 5302 ins_pipe(ialu_reg); 5303%} 5304 5305instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5306 predicate(!UseCountLeadingZerosInstruction); 5307 match(Set dst (CountLeadingZerosL src)); 5308 effect(TEMP dst, KILL cr); 5309 5310 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5311 "JZ msw_is_zero\n\t" 5312 "ADD $dst, 32\n\t" 5313 "JMP not_zero\n" 5314 "msw_is_zero:\n\t" 5315 "BSR $dst, $src.lo\n\t" 5316 "JNZ not_zero\n\t" 5317 "MOV $dst, -1\n" 5318 "not_zero:\n\t" 5319 "NEG $dst\n\t" 5320 "ADD $dst, 63\n" %} 5321 ins_encode %{ 5322 Register Rdst = $dst$$Register; 5323 Register Rsrc = $src$$Register; 5324 Label msw_is_zero; 5325 Label not_zero; 5326 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5327 __ jccb(Assembler::zero, msw_is_zero); 5328 __ addl(Rdst, BitsPerInt); 5329 __ jmpb(not_zero); 5330 __ bind(msw_is_zero); 5331 __ bsrl(Rdst, Rsrc); 5332 __ jccb(Assembler::notZero, not_zero); 5333 __ movl(Rdst, -1); 5334 __ bind(not_zero); 5335 __ negl(Rdst); 5336 __ addl(Rdst, BitsPerLong - 1); 5337 %} 5338 ins_pipe(ialu_reg); 5339%} 5340 5341instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5342 predicate(UseCountTrailingZerosInstruction); 5343 match(Set dst (CountTrailingZerosI src)); 5344 effect(KILL cr); 5345 5346 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5347 ins_encode %{ 5348 __ tzcntl($dst$$Register, $src$$Register); 5349 %} 5350 ins_pipe(ialu_reg); 5351%} 5352 5353instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5354 predicate(!UseCountTrailingZerosInstruction); 5355 match(Set dst (CountTrailingZerosI src)); 5356 effect(KILL cr); 5357 5358 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5359 "JNZ done\n\t" 5360 "MOV $dst, 32\n" 5361 "done:" %} 5362 ins_encode %{ 5363 Register Rdst = $dst$$Register; 5364 Label done; 5365 __ bsfl(Rdst, $src$$Register); 5366 __ jccb(Assembler::notZero, done); 5367 __ movl(Rdst, BitsPerInt); 5368 __ bind(done); 5369 %} 5370 ins_pipe(ialu_reg); 5371%} 5372 5373instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5374 predicate(UseCountTrailingZerosInstruction); 5375 match(Set dst (CountTrailingZerosL src)); 5376 effect(TEMP dst, KILL cr); 5377 5378 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5379 "JNC done\n\t" 5380 "TZCNT $dst, $src.hi\n\t" 5381 "ADD $dst, 32\n" 5382 "done:" %} 5383 ins_encode %{ 5384 Register Rdst = $dst$$Register; 5385 Register Rsrc = $src$$Register; 5386 Label done; 5387 __ tzcntl(Rdst, Rsrc); 5388 __ jccb(Assembler::carryClear, done); 5389 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5390 __ addl(Rdst, BitsPerInt); 5391 __ bind(done); 5392 %} 5393 ins_pipe(ialu_reg); 5394%} 5395 5396instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5397 predicate(!UseCountTrailingZerosInstruction); 5398 match(Set dst (CountTrailingZerosL src)); 5399 effect(TEMP dst, KILL cr); 5400 5401 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5402 "JNZ done\n\t" 5403 "BSF $dst, $src.hi\n\t" 5404 "JNZ msw_not_zero\n\t" 5405 "MOV $dst, 32\n" 5406 "msw_not_zero:\n\t" 5407 "ADD $dst, 32\n" 5408 "done:" %} 5409 ins_encode %{ 5410 Register Rdst = $dst$$Register; 5411 Register Rsrc = $src$$Register; 5412 Label msw_not_zero; 5413 Label done; 5414 __ bsfl(Rdst, Rsrc); 5415 __ jccb(Assembler::notZero, done); 5416 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5417 __ jccb(Assembler::notZero, msw_not_zero); 5418 __ movl(Rdst, BitsPerInt); 5419 __ bind(msw_not_zero); 5420 __ addl(Rdst, BitsPerInt); 5421 __ bind(done); 5422 %} 5423 ins_pipe(ialu_reg); 5424%} 5425 5426 5427//---------- Population Count Instructions ------------------------------------- 5428 5429instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5430 predicate(UsePopCountInstruction); 5431 match(Set dst (PopCountI src)); 5432 effect(KILL cr); 5433 5434 format %{ "POPCNT $dst, $src" %} 5435 ins_encode %{ 5436 __ popcntl($dst$$Register, $src$$Register); 5437 %} 5438 ins_pipe(ialu_reg); 5439%} 5440 5441instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5442 predicate(UsePopCountInstruction); 5443 match(Set dst (PopCountI (LoadI mem))); 5444 effect(KILL cr); 5445 5446 format %{ "POPCNT $dst, $mem" %} 5447 ins_encode %{ 5448 __ popcntl($dst$$Register, $mem$$Address); 5449 %} 5450 ins_pipe(ialu_reg); 5451%} 5452 5453// Note: Long.bitCount(long) returns an int. 5454instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5455 predicate(UsePopCountInstruction); 5456 match(Set dst (PopCountL src)); 5457 effect(KILL cr, TEMP tmp, TEMP dst); 5458 5459 format %{ "POPCNT $dst, $src.lo\n\t" 5460 "POPCNT $tmp, $src.hi\n\t" 5461 "ADD $dst, $tmp" %} 5462 ins_encode %{ 5463 __ popcntl($dst$$Register, $src$$Register); 5464 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5465 __ addl($dst$$Register, $tmp$$Register); 5466 %} 5467 ins_pipe(ialu_reg); 5468%} 5469 5470// Note: Long.bitCount(long) returns an int. 5471instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5472 predicate(UsePopCountInstruction); 5473 match(Set dst (PopCountL (LoadL mem))); 5474 effect(KILL cr, TEMP tmp, TEMP dst); 5475 5476 format %{ "POPCNT $dst, $mem\n\t" 5477 "POPCNT $tmp, $mem+4\n\t" 5478 "ADD $dst, $tmp" %} 5479 ins_encode %{ 5480 //__ popcntl($dst$$Register, $mem$$Address$$first); 5481 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5482 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5483 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5484 __ addl($dst$$Register, $tmp$$Register); 5485 %} 5486 ins_pipe(ialu_reg); 5487%} 5488 5489 5490//----------Load/Store/Move Instructions--------------------------------------- 5491//----------Load Instructions-------------------------------------------------- 5492// Load Byte (8bit signed) 5493instruct loadB(xRegI dst, memory mem) %{ 5494 match(Set dst (LoadB mem)); 5495 5496 ins_cost(125); 5497 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5498 5499 ins_encode %{ 5500 __ movsbl($dst$$Register, $mem$$Address); 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504%} 5505 5506// Load Byte (8bit signed) into Long Register 5507instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5508 match(Set dst (ConvI2L (LoadB mem))); 5509 effect(KILL cr); 5510 5511 ins_cost(375); 5512 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5513 "MOV $dst.hi,$dst.lo\n\t" 5514 "SAR $dst.hi,7" %} 5515 5516 ins_encode %{ 5517 __ movsbl($dst$$Register, $mem$$Address); 5518 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5519 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5520 %} 5521 5522 ins_pipe(ialu_reg_mem); 5523%} 5524 5525// Load Unsigned Byte (8bit UNsigned) 5526instruct loadUB(xRegI dst, memory mem) %{ 5527 match(Set dst (LoadUB mem)); 5528 5529 ins_cost(125); 5530 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5531 5532 ins_encode %{ 5533 __ movzbl($dst$$Register, $mem$$Address); 5534 %} 5535 5536 ins_pipe(ialu_reg_mem); 5537%} 5538 5539// Load Unsigned Byte (8 bit UNsigned) into Long Register 5540instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5541 match(Set dst (ConvI2L (LoadUB mem))); 5542 effect(KILL cr); 5543 5544 ins_cost(250); 5545 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5546 "XOR $dst.hi,$dst.hi" %} 5547 5548 ins_encode %{ 5549 Register Rdst = $dst$$Register; 5550 __ movzbl(Rdst, $mem$$Address); 5551 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5552 %} 5553 5554 ins_pipe(ialu_reg_mem); 5555%} 5556 5557// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5558instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5559 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5560 effect(KILL cr); 5561 5562 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5563 "XOR $dst.hi,$dst.hi\n\t" 5564 "AND $dst.lo,right_n_bits($mask, 8)" %} 5565 ins_encode %{ 5566 Register Rdst = $dst$$Register; 5567 __ movzbl(Rdst, $mem$$Address); 5568 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5569 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5570 %} 5571 ins_pipe(ialu_reg_mem); 5572%} 5573 5574// Load Short (16bit signed) 5575instruct loadS(rRegI dst, memory mem) %{ 5576 match(Set dst (LoadS mem)); 5577 5578 ins_cost(125); 5579 format %{ "MOVSX $dst,$mem\t# short" %} 5580 5581 ins_encode %{ 5582 __ movswl($dst$$Register, $mem$$Address); 5583 %} 5584 5585 ins_pipe(ialu_reg_mem); 5586%} 5587 5588// Load Short (16 bit signed) to Byte (8 bit signed) 5589instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5590 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5591 5592 ins_cost(125); 5593 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5594 ins_encode %{ 5595 __ movsbl($dst$$Register, $mem$$Address); 5596 %} 5597 ins_pipe(ialu_reg_mem); 5598%} 5599 5600// Load Short (16bit signed) into Long Register 5601instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5602 match(Set dst (ConvI2L (LoadS mem))); 5603 effect(KILL cr); 5604 5605 ins_cost(375); 5606 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5607 "MOV $dst.hi,$dst.lo\n\t" 5608 "SAR $dst.hi,15" %} 5609 5610 ins_encode %{ 5611 __ movswl($dst$$Register, $mem$$Address); 5612 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5613 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5614 %} 5615 5616 ins_pipe(ialu_reg_mem); 5617%} 5618 5619// Load Unsigned Short/Char (16bit unsigned) 5620instruct loadUS(rRegI dst, memory mem) %{ 5621 match(Set dst (LoadUS mem)); 5622 5623 ins_cost(125); 5624 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5625 5626 ins_encode %{ 5627 __ movzwl($dst$$Register, $mem$$Address); 5628 %} 5629 5630 ins_pipe(ialu_reg_mem); 5631%} 5632 5633// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5634instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5635 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5636 5637 ins_cost(125); 5638 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5639 ins_encode %{ 5640 __ movsbl($dst$$Register, $mem$$Address); 5641 %} 5642 ins_pipe(ialu_reg_mem); 5643%} 5644 5645// Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5646instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5647 match(Set dst (ConvI2L (LoadUS mem))); 5648 effect(KILL cr); 5649 5650 ins_cost(250); 5651 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5652 "XOR $dst.hi,$dst.hi" %} 5653 5654 ins_encode %{ 5655 __ movzwl($dst$$Register, $mem$$Address); 5656 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5657 %} 5658 5659 ins_pipe(ialu_reg_mem); 5660%} 5661 5662// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5663instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5664 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5665 effect(KILL cr); 5666 5667 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5668 "XOR $dst.hi,$dst.hi" %} 5669 ins_encode %{ 5670 Register Rdst = $dst$$Register; 5671 __ movzbl(Rdst, $mem$$Address); 5672 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5673 %} 5674 ins_pipe(ialu_reg_mem); 5675%} 5676 5677// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5678instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5679 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5680 effect(KILL cr); 5681 5682 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5683 "XOR $dst.hi,$dst.hi\n\t" 5684 "AND $dst.lo,right_n_bits($mask, 16)" %} 5685 ins_encode %{ 5686 Register Rdst = $dst$$Register; 5687 __ movzwl(Rdst, $mem$$Address); 5688 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5689 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5690 %} 5691 ins_pipe(ialu_reg_mem); 5692%} 5693 5694// Load Integer 5695instruct loadI(rRegI dst, memory mem) %{ 5696 match(Set dst (LoadI mem)); 5697 5698 ins_cost(125); 5699 format %{ "MOV $dst,$mem\t# int" %} 5700 5701 ins_encode %{ 5702 __ movl($dst$$Register, $mem$$Address); 5703 %} 5704 5705 ins_pipe(ialu_reg_mem); 5706%} 5707 5708// Load Integer (32 bit signed) to Byte (8 bit signed) 5709instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5710 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5711 5712 ins_cost(125); 5713 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5714 ins_encode %{ 5715 __ movsbl($dst$$Register, $mem$$Address); 5716 %} 5717 ins_pipe(ialu_reg_mem); 5718%} 5719 5720// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5721instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5722 match(Set dst (AndI (LoadI mem) mask)); 5723 5724 ins_cost(125); 5725 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5726 ins_encode %{ 5727 __ movzbl($dst$$Register, $mem$$Address); 5728 %} 5729 ins_pipe(ialu_reg_mem); 5730%} 5731 5732// Load Integer (32 bit signed) to Short (16 bit signed) 5733instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5734 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5735 5736 ins_cost(125); 5737 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5738 ins_encode %{ 5739 __ movswl($dst$$Register, $mem$$Address); 5740 %} 5741 ins_pipe(ialu_reg_mem); 5742%} 5743 5744// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5745instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5746 match(Set dst (AndI (LoadI mem) mask)); 5747 5748 ins_cost(125); 5749 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5750 ins_encode %{ 5751 __ movzwl($dst$$Register, $mem$$Address); 5752 %} 5753 ins_pipe(ialu_reg_mem); 5754%} 5755 5756// Load Integer into Long Register 5757instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5758 match(Set dst (ConvI2L (LoadI mem))); 5759 effect(KILL cr); 5760 5761 ins_cost(375); 5762 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5763 "MOV $dst.hi,$dst.lo\n\t" 5764 "SAR $dst.hi,31" %} 5765 5766 ins_encode %{ 5767 __ movl($dst$$Register, $mem$$Address); 5768 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5769 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5770 %} 5771 5772 ins_pipe(ialu_reg_mem); 5773%} 5774 5775// Load Integer with mask 0xFF into Long Register 5776instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5777 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5778 effect(KILL cr); 5779 5780 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5781 "XOR $dst.hi,$dst.hi" %} 5782 ins_encode %{ 5783 Register Rdst = $dst$$Register; 5784 __ movzbl(Rdst, $mem$$Address); 5785 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5786 %} 5787 ins_pipe(ialu_reg_mem); 5788%} 5789 5790// Load Integer with mask 0xFFFF into Long Register 5791instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5792 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5793 effect(KILL cr); 5794 5795 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5796 "XOR $dst.hi,$dst.hi" %} 5797 ins_encode %{ 5798 Register Rdst = $dst$$Register; 5799 __ movzwl(Rdst, $mem$$Address); 5800 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5801 %} 5802 ins_pipe(ialu_reg_mem); 5803%} 5804 5805// Load Integer with 31-bit mask into Long Register 5806instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5807 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5808 effect(KILL cr); 5809 5810 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5811 "XOR $dst.hi,$dst.hi\n\t" 5812 "AND $dst.lo,$mask" %} 5813 ins_encode %{ 5814 Register Rdst = $dst$$Register; 5815 __ movl(Rdst, $mem$$Address); 5816 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5817 __ andl(Rdst, $mask$$constant); 5818 %} 5819 ins_pipe(ialu_reg_mem); 5820%} 5821 5822// Load Unsigned Integer into Long Register 5823instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5824 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5825 effect(KILL cr); 5826 5827 ins_cost(250); 5828 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5829 "XOR $dst.hi,$dst.hi" %} 5830 5831 ins_encode %{ 5832 __ movl($dst$$Register, $mem$$Address); 5833 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5834 %} 5835 5836 ins_pipe(ialu_reg_mem); 5837%} 5838 5839// Load Long. Cannot clobber address while loading, so restrict address 5840// register to ESI 5841instruct loadL(eRegL dst, load_long_memory mem) %{ 5842 predicate(!((LoadLNode*)n)->require_atomic_access()); 5843 match(Set dst (LoadL mem)); 5844 5845 ins_cost(250); 5846 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5847 "MOV $dst.hi,$mem+4" %} 5848 5849 ins_encode %{ 5850 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5851 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5852 __ movl($dst$$Register, Amemlo); 5853 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5854 %} 5855 5856 ins_pipe(ialu_reg_long_mem); 5857%} 5858 5859// Volatile Load Long. Must be atomic, so do 64-bit FILD 5860// then store it down to the stack and reload on the int 5861// side. 5862instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5863 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5864 match(Set dst (LoadL mem)); 5865 5866 ins_cost(200); 5867 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5868 "FISTp $dst" %} 5869 ins_encode(enc_loadL_volatile(mem,dst)); 5870 ins_pipe( fpu_reg_mem ); 5871%} 5872 5873instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5874 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5875 match(Set dst (LoadL mem)); 5876 effect(TEMP tmp); 5877 ins_cost(180); 5878 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5879 "MOVSD $dst,$tmp" %} 5880 ins_encode %{ 5881 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5882 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5883 %} 5884 ins_pipe( pipe_slow ); 5885%} 5886 5887instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5888 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5889 match(Set dst (LoadL mem)); 5890 effect(TEMP tmp); 5891 ins_cost(160); 5892 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5893 "MOVD $dst.lo,$tmp\n\t" 5894 "PSRLQ $tmp,32\n\t" 5895 "MOVD $dst.hi,$tmp" %} 5896 ins_encode %{ 5897 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5898 __ movdl($dst$$Register, $tmp$$XMMRegister); 5899 __ psrlq($tmp$$XMMRegister, 32); 5900 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5901 %} 5902 ins_pipe( pipe_slow ); 5903%} 5904 5905// Load Range 5906instruct loadRange(rRegI dst, memory mem) %{ 5907 match(Set dst (LoadRange mem)); 5908 5909 ins_cost(125); 5910 format %{ "MOV $dst,$mem" %} 5911 opcode(0x8B); 5912 ins_encode( OpcP, RegMem(dst,mem)); 5913 ins_pipe( ialu_reg_mem ); 5914%} 5915 5916 5917// Load Pointer 5918instruct loadP(eRegP dst, memory mem) %{ 5919 match(Set dst (LoadP mem)); 5920 5921 ins_cost(125); 5922 format %{ "MOV $dst,$mem" %} 5923 opcode(0x8B); 5924 ins_encode( OpcP, RegMem(dst,mem)); 5925 ins_pipe( ialu_reg_mem ); 5926%} 5927 5928// Load Klass Pointer 5929instruct loadKlass(eRegP dst, memory mem) %{ 5930 match(Set dst (LoadKlass mem)); 5931 5932 ins_cost(125); 5933 format %{ "MOV $dst,$mem" %} 5934 opcode(0x8B); 5935 ins_encode( OpcP, RegMem(dst,mem)); 5936 ins_pipe( ialu_reg_mem ); 5937%} 5938 5939// Load Double 5940instruct loadDPR(regDPR dst, memory mem) %{ 5941 predicate(UseSSE<=1); 5942 match(Set dst (LoadD mem)); 5943 5944 ins_cost(150); 5945 format %{ "FLD_D ST,$mem\n\t" 5946 "FSTP $dst" %} 5947 opcode(0xDD); /* DD /0 */ 5948 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5949 Pop_Reg_DPR(dst) ); 5950 ins_pipe( fpu_reg_mem ); 5951%} 5952 5953// Load Double to XMM 5954instruct loadD(regD dst, memory mem) %{ 5955 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5956 match(Set dst (LoadD mem)); 5957 ins_cost(145); 5958 format %{ "MOVSD $dst,$mem" %} 5959 ins_encode %{ 5960 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5961 %} 5962 ins_pipe( pipe_slow ); 5963%} 5964 5965instruct loadD_partial(regD dst, memory mem) %{ 5966 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5967 match(Set dst (LoadD mem)); 5968 ins_cost(145); 5969 format %{ "MOVLPD $dst,$mem" %} 5970 ins_encode %{ 5971 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5972 %} 5973 ins_pipe( pipe_slow ); 5974%} 5975 5976// Load to XMM register (single-precision floating point) 5977// MOVSS instruction 5978instruct loadF(regF dst, memory mem) %{ 5979 predicate(UseSSE>=1); 5980 match(Set dst (LoadF mem)); 5981 ins_cost(145); 5982 format %{ "MOVSS $dst,$mem" %} 5983 ins_encode %{ 5984 __ movflt ($dst$$XMMRegister, $mem$$Address); 5985 %} 5986 ins_pipe( pipe_slow ); 5987%} 5988 5989// Load Float 5990instruct loadFPR(regFPR dst, memory mem) %{ 5991 predicate(UseSSE==0); 5992 match(Set dst (LoadF mem)); 5993 5994 ins_cost(150); 5995 format %{ "FLD_S ST,$mem\n\t" 5996 "FSTP $dst" %} 5997 opcode(0xD9); /* D9 /0 */ 5998 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5999 Pop_Reg_FPR(dst) ); 6000 ins_pipe( fpu_reg_mem ); 6001%} 6002 6003// Load Effective Address 6004instruct leaP8(eRegP dst, indOffset8 mem) %{ 6005 match(Set dst mem); 6006 6007 ins_cost(110); 6008 format %{ "LEA $dst,$mem" %} 6009 opcode(0x8D); 6010 ins_encode( OpcP, RegMem(dst,mem)); 6011 ins_pipe( ialu_reg_reg_fat ); 6012%} 6013 6014instruct leaP32(eRegP dst, indOffset32 mem) %{ 6015 match(Set dst mem); 6016 6017 ins_cost(110); 6018 format %{ "LEA $dst,$mem" %} 6019 opcode(0x8D); 6020 ins_encode( OpcP, RegMem(dst,mem)); 6021 ins_pipe( ialu_reg_reg_fat ); 6022%} 6023 6024instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6025 match(Set dst mem); 6026 6027 ins_cost(110); 6028 format %{ "LEA $dst,$mem" %} 6029 opcode(0x8D); 6030 ins_encode( OpcP, RegMem(dst,mem)); 6031 ins_pipe( ialu_reg_reg_fat ); 6032%} 6033 6034instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6035 match(Set dst mem); 6036 6037 ins_cost(110); 6038 format %{ "LEA $dst,$mem" %} 6039 opcode(0x8D); 6040 ins_encode( OpcP, RegMem(dst,mem)); 6041 ins_pipe( ialu_reg_reg_fat ); 6042%} 6043 6044instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6045 match(Set dst mem); 6046 6047 ins_cost(110); 6048 format %{ "LEA $dst,$mem" %} 6049 opcode(0x8D); 6050 ins_encode( OpcP, RegMem(dst,mem)); 6051 ins_pipe( ialu_reg_reg_fat ); 6052%} 6053 6054// Load Constant 6055instruct loadConI(rRegI dst, immI src) %{ 6056 match(Set dst src); 6057 6058 format %{ "MOV $dst,$src" %} 6059 ins_encode( LdImmI(dst, src) ); 6060 ins_pipe( ialu_reg_fat ); 6061%} 6062 6063// Load Constant zero 6064instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6065 match(Set dst src); 6066 effect(KILL cr); 6067 6068 ins_cost(50); 6069 format %{ "XOR $dst,$dst" %} 6070 opcode(0x33); /* + rd */ 6071 ins_encode( OpcP, RegReg( dst, dst ) ); 6072 ins_pipe( ialu_reg ); 6073%} 6074 6075instruct loadConP(eRegP dst, immP src) %{ 6076 match(Set dst src); 6077 6078 format %{ "MOV $dst,$src" %} 6079 opcode(0xB8); /* + rd */ 6080 ins_encode( LdImmP(dst, src) ); 6081 ins_pipe( ialu_reg_fat ); 6082%} 6083 6084instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6085 match(Set dst src); 6086 effect(KILL cr); 6087 ins_cost(200); 6088 format %{ "MOV $dst.lo,$src.lo\n\t" 6089 "MOV $dst.hi,$src.hi" %} 6090 opcode(0xB8); 6091 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6092 ins_pipe( ialu_reg_long_fat ); 6093%} 6094 6095instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6096 match(Set dst src); 6097 effect(KILL cr); 6098 ins_cost(150); 6099 format %{ "XOR $dst.lo,$dst.lo\n\t" 6100 "XOR $dst.hi,$dst.hi" %} 6101 opcode(0x33,0x33); 6102 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6103 ins_pipe( ialu_reg_long ); 6104%} 6105 6106// The instruction usage is guarded by predicate in operand immFPR(). 6107instruct loadConFPR(regFPR dst, immFPR con) %{ 6108 match(Set dst con); 6109 ins_cost(125); 6110 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6111 "FSTP $dst" %} 6112 ins_encode %{ 6113 __ fld_s($constantaddress($con)); 6114 __ fstp_d($dst$$reg); 6115 %} 6116 ins_pipe(fpu_reg_con); 6117%} 6118 6119// The instruction usage is guarded by predicate in operand immFPR0(). 6120instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6121 match(Set dst con); 6122 ins_cost(125); 6123 format %{ "FLDZ ST\n\t" 6124 "FSTP $dst" %} 6125 ins_encode %{ 6126 __ fldz(); 6127 __ fstp_d($dst$$reg); 6128 %} 6129 ins_pipe(fpu_reg_con); 6130%} 6131 6132// The instruction usage is guarded by predicate in operand immFPR1(). 6133instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6134 match(Set dst con); 6135 ins_cost(125); 6136 format %{ "FLD1 ST\n\t" 6137 "FSTP $dst" %} 6138 ins_encode %{ 6139 __ fld1(); 6140 __ fstp_d($dst$$reg); 6141 %} 6142 ins_pipe(fpu_reg_con); 6143%} 6144 6145// The instruction usage is guarded by predicate in operand immF(). 6146instruct loadConF(regF dst, immF con) %{ 6147 match(Set dst con); 6148 ins_cost(125); 6149 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6150 ins_encode %{ 6151 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6152 %} 6153 ins_pipe(pipe_slow); 6154%} 6155 6156// The instruction usage is guarded by predicate in operand immF0(). 6157instruct loadConF0(regF dst, immF0 src) %{ 6158 match(Set dst src); 6159 ins_cost(100); 6160 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6161 ins_encode %{ 6162 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6163 %} 6164 ins_pipe(pipe_slow); 6165%} 6166 6167// The instruction usage is guarded by predicate in operand immDPR(). 6168instruct loadConDPR(regDPR dst, immDPR con) %{ 6169 match(Set dst con); 6170 ins_cost(125); 6171 6172 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6173 "FSTP $dst" %} 6174 ins_encode %{ 6175 __ fld_d($constantaddress($con)); 6176 __ fstp_d($dst$$reg); 6177 %} 6178 ins_pipe(fpu_reg_con); 6179%} 6180 6181// The instruction usage is guarded by predicate in operand immDPR0(). 6182instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6183 match(Set dst con); 6184 ins_cost(125); 6185 6186 format %{ "FLDZ ST\n\t" 6187 "FSTP $dst" %} 6188 ins_encode %{ 6189 __ fldz(); 6190 __ fstp_d($dst$$reg); 6191 %} 6192 ins_pipe(fpu_reg_con); 6193%} 6194 6195// The instruction usage is guarded by predicate in operand immDPR1(). 6196instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6197 match(Set dst con); 6198 ins_cost(125); 6199 6200 format %{ "FLD1 ST\n\t" 6201 "FSTP $dst" %} 6202 ins_encode %{ 6203 __ fld1(); 6204 __ fstp_d($dst$$reg); 6205 %} 6206 ins_pipe(fpu_reg_con); 6207%} 6208 6209// The instruction usage is guarded by predicate in operand immD(). 6210instruct loadConD(regD dst, immD con) %{ 6211 match(Set dst con); 6212 ins_cost(125); 6213 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6214 ins_encode %{ 6215 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6216 %} 6217 ins_pipe(pipe_slow); 6218%} 6219 6220// The instruction usage is guarded by predicate in operand immD0(). 6221instruct loadConD0(regD dst, immD0 src) %{ 6222 match(Set dst src); 6223 ins_cost(100); 6224 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6225 ins_encode %{ 6226 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6227 %} 6228 ins_pipe( pipe_slow ); 6229%} 6230 6231// Load Stack Slot 6232instruct loadSSI(rRegI dst, stackSlotI src) %{ 6233 match(Set dst src); 6234 ins_cost(125); 6235 6236 format %{ "MOV $dst,$src" %} 6237 opcode(0x8B); 6238 ins_encode( OpcP, RegMem(dst,src)); 6239 ins_pipe( ialu_reg_mem ); 6240%} 6241 6242instruct loadSSL(eRegL dst, stackSlotL src) %{ 6243 match(Set dst src); 6244 6245 ins_cost(200); 6246 format %{ "MOV $dst,$src.lo\n\t" 6247 "MOV $dst+4,$src.hi" %} 6248 opcode(0x8B, 0x8B); 6249 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6250 ins_pipe( ialu_mem_long_reg ); 6251%} 6252 6253// Load Stack Slot 6254instruct loadSSP(eRegP dst, stackSlotP src) %{ 6255 match(Set dst src); 6256 ins_cost(125); 6257 6258 format %{ "MOV $dst,$src" %} 6259 opcode(0x8B); 6260 ins_encode( OpcP, RegMem(dst,src)); 6261 ins_pipe( ialu_reg_mem ); 6262%} 6263 6264// Load Stack Slot 6265instruct loadSSF(regFPR dst, stackSlotF src) %{ 6266 match(Set dst src); 6267 ins_cost(125); 6268 6269 format %{ "FLD_S $src\n\t" 6270 "FSTP $dst" %} 6271 opcode(0xD9); /* D9 /0, FLD m32real */ 6272 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6273 Pop_Reg_FPR(dst) ); 6274 ins_pipe( fpu_reg_mem ); 6275%} 6276 6277// Load Stack Slot 6278instruct loadSSD(regDPR dst, stackSlotD src) %{ 6279 match(Set dst src); 6280 ins_cost(125); 6281 6282 format %{ "FLD_D $src\n\t" 6283 "FSTP $dst" %} 6284 opcode(0xDD); /* DD /0, FLD m64real */ 6285 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6286 Pop_Reg_DPR(dst) ); 6287 ins_pipe( fpu_reg_mem ); 6288%} 6289 6290// Prefetch instructions for allocation. 6291// Must be safe to execute with invalid address (cannot fault). 6292 6293instruct prefetchAlloc0( memory mem ) %{ 6294 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6295 match(PrefetchAllocation mem); 6296 ins_cost(0); 6297 size(0); 6298 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6299 ins_encode(); 6300 ins_pipe(empty); 6301%} 6302 6303instruct prefetchAlloc( memory mem ) %{ 6304 predicate(AllocatePrefetchInstr==3); 6305 match( PrefetchAllocation mem ); 6306 ins_cost(100); 6307 6308 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6309 ins_encode %{ 6310 __ prefetchw($mem$$Address); 6311 %} 6312 ins_pipe(ialu_mem); 6313%} 6314 6315instruct prefetchAllocNTA( memory mem ) %{ 6316 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6317 match(PrefetchAllocation mem); 6318 ins_cost(100); 6319 6320 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6321 ins_encode %{ 6322 __ prefetchnta($mem$$Address); 6323 %} 6324 ins_pipe(ialu_mem); 6325%} 6326 6327instruct prefetchAllocT0( memory mem ) %{ 6328 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6329 match(PrefetchAllocation mem); 6330 ins_cost(100); 6331 6332 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6333 ins_encode %{ 6334 __ prefetcht0($mem$$Address); 6335 %} 6336 ins_pipe(ialu_mem); 6337%} 6338 6339instruct prefetchAllocT2( memory mem ) %{ 6340 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6341 match(PrefetchAllocation mem); 6342 ins_cost(100); 6343 6344 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6345 ins_encode %{ 6346 __ prefetcht2($mem$$Address); 6347 %} 6348 ins_pipe(ialu_mem); 6349%} 6350 6351//----------Store Instructions------------------------------------------------- 6352 6353// Store Byte 6354instruct storeB(memory mem, xRegI src) %{ 6355 match(Set mem (StoreB mem src)); 6356 6357 ins_cost(125); 6358 format %{ "MOV8 $mem,$src" %} 6359 opcode(0x88); 6360 ins_encode( OpcP, RegMem( src, mem ) ); 6361 ins_pipe( ialu_mem_reg ); 6362%} 6363 6364// Store Char/Short 6365instruct storeC(memory mem, rRegI src) %{ 6366 match(Set mem (StoreC mem src)); 6367 6368 ins_cost(125); 6369 format %{ "MOV16 $mem,$src" %} 6370 opcode(0x89, 0x66); 6371 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6372 ins_pipe( ialu_mem_reg ); 6373%} 6374 6375// Store Integer 6376instruct storeI(memory mem, rRegI src) %{ 6377 match(Set mem (StoreI mem src)); 6378 6379 ins_cost(125); 6380 format %{ "MOV $mem,$src" %} 6381 opcode(0x89); 6382 ins_encode( OpcP, RegMem( src, mem ) ); 6383 ins_pipe( ialu_mem_reg ); 6384%} 6385 6386// Store Long 6387instruct storeL(long_memory mem, eRegL src) %{ 6388 predicate(!((StoreLNode*)n)->require_atomic_access()); 6389 match(Set mem (StoreL mem src)); 6390 6391 ins_cost(200); 6392 format %{ "MOV $mem,$src.lo\n\t" 6393 "MOV $mem+4,$src.hi" %} 6394 opcode(0x89, 0x89); 6395 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6396 ins_pipe( ialu_mem_long_reg ); 6397%} 6398 6399// Store Long to Integer 6400instruct storeL2I(memory mem, eRegL src) %{ 6401 match(Set mem (StoreI mem (ConvL2I src))); 6402 6403 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6404 ins_encode %{ 6405 __ movl($mem$$Address, $src$$Register); 6406 %} 6407 ins_pipe(ialu_mem_reg); 6408%} 6409 6410// Volatile Store Long. Must be atomic, so move it into 6411// the FP TOS and then do a 64-bit FIST. Has to probe the 6412// target address before the store (for null-ptr checks) 6413// so the memory operand is used twice in the encoding. 6414instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6415 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6416 match(Set mem (StoreL mem src)); 6417 effect( KILL cr ); 6418 ins_cost(400); 6419 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6420 "FILD $src\n\t" 6421 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6422 opcode(0x3B); 6423 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6424 ins_pipe( fpu_reg_mem ); 6425%} 6426 6427instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6428 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6429 match(Set mem (StoreL mem src)); 6430 effect( TEMP tmp, KILL cr ); 6431 ins_cost(380); 6432 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6433 "MOVSD $tmp,$src\n\t" 6434 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6435 ins_encode %{ 6436 __ cmpl(rax, $mem$$Address); 6437 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6438 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6439 %} 6440 ins_pipe( pipe_slow ); 6441%} 6442 6443instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6444 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6445 match(Set mem (StoreL mem src)); 6446 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6447 ins_cost(360); 6448 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6449 "MOVD $tmp,$src.lo\n\t" 6450 "MOVD $tmp2,$src.hi\n\t" 6451 "PUNPCKLDQ $tmp,$tmp2\n\t" 6452 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6453 ins_encode %{ 6454 __ cmpl(rax, $mem$$Address); 6455 __ movdl($tmp$$XMMRegister, $src$$Register); 6456 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6457 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6458 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6459 %} 6460 ins_pipe( pipe_slow ); 6461%} 6462 6463// Store Pointer; for storing unknown oops and raw pointers 6464instruct storeP(memory mem, anyRegP src) %{ 6465 match(Set mem (StoreP mem src)); 6466 6467 ins_cost(125); 6468 format %{ "MOV $mem,$src" %} 6469 opcode(0x89); 6470 ins_encode( OpcP, RegMem( src, mem ) ); 6471 ins_pipe( ialu_mem_reg ); 6472%} 6473 6474// Store Integer Immediate 6475instruct storeImmI(memory mem, immI src) %{ 6476 match(Set mem (StoreI mem src)); 6477 6478 ins_cost(150); 6479 format %{ "MOV $mem,$src" %} 6480 opcode(0xC7); /* C7 /0 */ 6481 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6482 ins_pipe( ialu_mem_imm ); 6483%} 6484 6485// Store Short/Char Immediate 6486instruct storeImmI16(memory mem, immI16 src) %{ 6487 predicate(UseStoreImmI16); 6488 match(Set mem (StoreC mem src)); 6489 6490 ins_cost(150); 6491 format %{ "MOV16 $mem,$src" %} 6492 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6493 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6494 ins_pipe( ialu_mem_imm ); 6495%} 6496 6497// Store Pointer Immediate; null pointers or constant oops that do not 6498// need card-mark barriers. 6499instruct storeImmP(memory mem, immP src) %{ 6500 match(Set mem (StoreP mem src)); 6501 6502 ins_cost(150); 6503 format %{ "MOV $mem,$src" %} 6504 opcode(0xC7); /* C7 /0 */ 6505 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6506 ins_pipe( ialu_mem_imm ); 6507%} 6508 6509// Store Byte Immediate 6510instruct storeImmB(memory mem, immI8 src) %{ 6511 match(Set mem (StoreB mem src)); 6512 6513 ins_cost(150); 6514 format %{ "MOV8 $mem,$src" %} 6515 opcode(0xC6); /* C6 /0 */ 6516 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6517 ins_pipe( ialu_mem_imm ); 6518%} 6519 6520// Store CMS card-mark Immediate 6521instruct storeImmCM(memory mem, immI8 src) %{ 6522 match(Set mem (StoreCM mem src)); 6523 6524 ins_cost(150); 6525 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6526 opcode(0xC6); /* C6 /0 */ 6527 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6528 ins_pipe( ialu_mem_imm ); 6529%} 6530 6531// Store Double 6532instruct storeDPR( memory mem, regDPR1 src) %{ 6533 predicate(UseSSE<=1); 6534 match(Set mem (StoreD mem src)); 6535 6536 ins_cost(100); 6537 format %{ "FST_D $mem,$src" %} 6538 opcode(0xDD); /* DD /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541%} 6542 6543// Store double does rounding on x86 6544instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6545 predicate(UseSSE<=1); 6546 match(Set mem (StoreD mem (RoundDouble src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_D $mem,$src\t# round" %} 6550 opcode(0xDD); /* DD /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553%} 6554 6555// Store XMM register to memory (double-precision floating points) 6556// MOVSD instruction 6557instruct storeD(memory mem, regD src) %{ 6558 predicate(UseSSE>=2); 6559 match(Set mem (StoreD mem src)); 6560 ins_cost(95); 6561 format %{ "MOVSD $mem,$src" %} 6562 ins_encode %{ 6563 __ movdbl($mem$$Address, $src$$XMMRegister); 6564 %} 6565 ins_pipe( pipe_slow ); 6566%} 6567 6568// Load Double 6569instruct MoveD2VL(vlRegD dst, regD src) %{ 6570 match(Set dst src); 6571 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6572 ins_encode %{ 6573 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6574 %} 6575 ins_pipe( fpu_reg_reg ); 6576%} 6577 6578// Load Double 6579instruct MoveVL2D(regD dst, vlRegD src) %{ 6580 match(Set dst src); 6581 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6582 ins_encode %{ 6583 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6584 %} 6585 ins_pipe( fpu_reg_reg ); 6586%} 6587 6588// Store XMM register to memory (single-precision floating point) 6589// MOVSS instruction 6590instruct storeF(memory mem, regF src) %{ 6591 predicate(UseSSE>=1); 6592 match(Set mem (StoreF mem src)); 6593 ins_cost(95); 6594 format %{ "MOVSS $mem,$src" %} 6595 ins_encode %{ 6596 __ movflt($mem$$Address, $src$$XMMRegister); 6597 %} 6598 ins_pipe( pipe_slow ); 6599%} 6600 6601// Load Float 6602instruct MoveF2VL(vlRegF dst, regF src) %{ 6603 match(Set dst src); 6604 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6605 ins_encode %{ 6606 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6607 %} 6608 ins_pipe( fpu_reg_reg ); 6609%} 6610 6611// Load Float 6612instruct MoveVL2F(regF dst, vlRegF src) %{ 6613 match(Set dst src); 6614 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6615 ins_encode %{ 6616 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6617 %} 6618 ins_pipe( fpu_reg_reg ); 6619%} 6620 6621// Store Float 6622instruct storeFPR( memory mem, regFPR1 src) %{ 6623 predicate(UseSSE==0); 6624 match(Set mem (StoreF mem src)); 6625 6626 ins_cost(100); 6627 format %{ "FST_S $mem,$src" %} 6628 opcode(0xD9); /* D9 /2 */ 6629 ins_encode( enc_FPR_store(mem,src) ); 6630 ins_pipe( fpu_mem_reg ); 6631%} 6632 6633// Store Float does rounding on x86 6634instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6635 predicate(UseSSE==0); 6636 match(Set mem (StoreF mem (RoundFloat src))); 6637 6638 ins_cost(100); 6639 format %{ "FST_S $mem,$src\t# round" %} 6640 opcode(0xD9); /* D9 /2 */ 6641 ins_encode( enc_FPR_store(mem,src) ); 6642 ins_pipe( fpu_mem_reg ); 6643%} 6644 6645// Store Float does rounding on x86 6646instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6647 predicate(UseSSE<=1); 6648 match(Set mem (StoreF mem (ConvD2F src))); 6649 6650 ins_cost(100); 6651 format %{ "FST_S $mem,$src\t# D-round" %} 6652 opcode(0xD9); /* D9 /2 */ 6653 ins_encode( enc_FPR_store(mem,src) ); 6654 ins_pipe( fpu_mem_reg ); 6655%} 6656 6657// Store immediate Float value (it is faster than store from FPU register) 6658// The instruction usage is guarded by predicate in operand immFPR(). 6659instruct storeFPR_imm( memory mem, immFPR src) %{ 6660 match(Set mem (StoreF mem src)); 6661 6662 ins_cost(50); 6663 format %{ "MOV $mem,$src\t# store float" %} 6664 opcode(0xC7); /* C7 /0 */ 6665 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6666 ins_pipe( ialu_mem_imm ); 6667%} 6668 6669// Store immediate Float value (it is faster than store from XMM register) 6670// The instruction usage is guarded by predicate in operand immF(). 6671instruct storeF_imm( memory mem, immF src) %{ 6672 match(Set mem (StoreF mem src)); 6673 6674 ins_cost(50); 6675 format %{ "MOV $mem,$src\t# store float" %} 6676 opcode(0xC7); /* C7 /0 */ 6677 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6678 ins_pipe( ialu_mem_imm ); 6679%} 6680 6681// Store Integer to stack slot 6682instruct storeSSI(stackSlotI dst, rRegI src) %{ 6683 match(Set dst src); 6684 6685 ins_cost(100); 6686 format %{ "MOV $dst,$src" %} 6687 opcode(0x89); 6688 ins_encode( OpcPRegSS( dst, src ) ); 6689 ins_pipe( ialu_mem_reg ); 6690%} 6691 6692// Store Integer to stack slot 6693instruct storeSSP(stackSlotP dst, eRegP src) %{ 6694 match(Set dst src); 6695 6696 ins_cost(100); 6697 format %{ "MOV $dst,$src" %} 6698 opcode(0x89); 6699 ins_encode( OpcPRegSS( dst, src ) ); 6700 ins_pipe( ialu_mem_reg ); 6701%} 6702 6703// Store Long to stack slot 6704instruct storeSSL(stackSlotL dst, eRegL src) %{ 6705 match(Set dst src); 6706 6707 ins_cost(200); 6708 format %{ "MOV $dst,$src.lo\n\t" 6709 "MOV $dst+4,$src.hi" %} 6710 opcode(0x89, 0x89); 6711 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6712 ins_pipe( ialu_mem_long_reg ); 6713%} 6714 6715//----------MemBar Instructions----------------------------------------------- 6716// Memory barrier flavors 6717 6718instruct membar_acquire() %{ 6719 match(MemBarAcquire); 6720 match(LoadFence); 6721 ins_cost(400); 6722 6723 size(0); 6724 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6725 ins_encode(); 6726 ins_pipe(empty); 6727%} 6728 6729instruct membar_acquire_lock() %{ 6730 match(MemBarAcquireLock); 6731 ins_cost(0); 6732 6733 size(0); 6734 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6735 ins_encode( ); 6736 ins_pipe(empty); 6737%} 6738 6739instruct membar_release() %{ 6740 match(MemBarRelease); 6741 match(StoreFence); 6742 ins_cost(400); 6743 6744 size(0); 6745 format %{ "MEMBAR-release ! (empty encoding)" %} 6746 ins_encode( ); 6747 ins_pipe(empty); 6748%} 6749 6750instruct membar_release_lock() %{ 6751 match(MemBarReleaseLock); 6752 ins_cost(0); 6753 6754 size(0); 6755 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6756 ins_encode( ); 6757 ins_pipe(empty); 6758%} 6759 6760instruct membar_volatile(eFlagsReg cr) %{ 6761 match(MemBarVolatile); 6762 effect(KILL cr); 6763 ins_cost(400); 6764 6765 format %{ 6766 $$template 6767 if (os::is_MP()) { 6768 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6769 } else { 6770 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6771 } 6772 %} 6773 ins_encode %{ 6774 __ membar(Assembler::StoreLoad); 6775 %} 6776 ins_pipe(pipe_slow); 6777%} 6778 6779instruct unnecessary_membar_volatile() %{ 6780 match(MemBarVolatile); 6781 predicate(Matcher::post_store_load_barrier(n)); 6782 ins_cost(0); 6783 6784 size(0); 6785 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6786 ins_encode( ); 6787 ins_pipe(empty); 6788%} 6789 6790instruct membar_storestore() %{ 6791 match(MemBarStoreStore); 6792 ins_cost(0); 6793 6794 size(0); 6795 format %{ "MEMBAR-storestore (empty encoding)" %} 6796 ins_encode( ); 6797 ins_pipe(empty); 6798%} 6799 6800//----------Move Instructions-------------------------------------------------- 6801instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6802 match(Set dst (CastX2P src)); 6803 format %{ "# X2P $dst, $src" %} 6804 ins_encode( /*empty encoding*/ ); 6805 ins_cost(0); 6806 ins_pipe(empty); 6807%} 6808 6809instruct castP2X(rRegI dst, eRegP src ) %{ 6810 match(Set dst (CastP2X src)); 6811 ins_cost(50); 6812 format %{ "MOV $dst, $src\t# CastP2X" %} 6813 ins_encode( enc_Copy( dst, src) ); 6814 ins_pipe( ialu_reg_reg ); 6815%} 6816 6817//----------Conditional Move--------------------------------------------------- 6818// Conditional move 6819instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6820 predicate(!VM_Version::supports_cmov() ); 6821 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6822 ins_cost(200); 6823 format %{ "J$cop,us skip\t# signed cmove\n\t" 6824 "MOV $dst,$src\n" 6825 "skip:" %} 6826 ins_encode %{ 6827 Label Lskip; 6828 // Invert sense of branch from sense of CMOV 6829 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6830 __ movl($dst$$Register, $src$$Register); 6831 __ bind(Lskip); 6832 %} 6833 ins_pipe( pipe_cmov_reg ); 6834%} 6835 6836instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6837 predicate(!VM_Version::supports_cmov() ); 6838 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6839 ins_cost(200); 6840 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6841 "MOV $dst,$src\n" 6842 "skip:" %} 6843 ins_encode %{ 6844 Label Lskip; 6845 // Invert sense of branch from sense of CMOV 6846 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6847 __ movl($dst$$Register, $src$$Register); 6848 __ bind(Lskip); 6849 %} 6850 ins_pipe( pipe_cmov_reg ); 6851%} 6852 6853instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6854 predicate(VM_Version::supports_cmov() ); 6855 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6856 ins_cost(200); 6857 format %{ "CMOV$cop $dst,$src" %} 6858 opcode(0x0F,0x40); 6859 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6860 ins_pipe( pipe_cmov_reg ); 6861%} 6862 6863instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6864 predicate(VM_Version::supports_cmov() ); 6865 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6866 ins_cost(200); 6867 format %{ "CMOV$cop $dst,$src" %} 6868 opcode(0x0F,0x40); 6869 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6870 ins_pipe( pipe_cmov_reg ); 6871%} 6872 6873instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6874 predicate(VM_Version::supports_cmov() ); 6875 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6876 ins_cost(200); 6877 expand %{ 6878 cmovI_regU(cop, cr, dst, src); 6879 %} 6880%} 6881 6882// Conditional move 6883instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6884 predicate(VM_Version::supports_cmov() ); 6885 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6886 ins_cost(250); 6887 format %{ "CMOV$cop $dst,$src" %} 6888 opcode(0x0F,0x40); 6889 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6890 ins_pipe( pipe_cmov_mem ); 6891%} 6892 6893// Conditional move 6894instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6895 predicate(VM_Version::supports_cmov() ); 6896 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6897 ins_cost(250); 6898 format %{ "CMOV$cop $dst,$src" %} 6899 opcode(0x0F,0x40); 6900 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6901 ins_pipe( pipe_cmov_mem ); 6902%} 6903 6904instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6905 predicate(VM_Version::supports_cmov() ); 6906 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6907 ins_cost(250); 6908 expand %{ 6909 cmovI_memU(cop, cr, dst, src); 6910 %} 6911%} 6912 6913// Conditional move 6914instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6915 predicate(VM_Version::supports_cmov() ); 6916 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6917 ins_cost(200); 6918 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6919 opcode(0x0F,0x40); 6920 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6921 ins_pipe( pipe_cmov_reg ); 6922%} 6923 6924// Conditional move (non-P6 version) 6925// Note: a CMoveP is generated for stubs and native wrappers 6926// regardless of whether we are on a P6, so we 6927// emulate a cmov here 6928instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6929 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6930 ins_cost(300); 6931 format %{ "Jn$cop skip\n\t" 6932 "MOV $dst,$src\t# pointer\n" 6933 "skip:" %} 6934 opcode(0x8b); 6935 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6936 ins_pipe( pipe_cmov_reg ); 6937%} 6938 6939// Conditional move 6940instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6941 predicate(VM_Version::supports_cmov() ); 6942 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6943 ins_cost(200); 6944 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6945 opcode(0x0F,0x40); 6946 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6947 ins_pipe( pipe_cmov_reg ); 6948%} 6949 6950instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6951 predicate(VM_Version::supports_cmov() ); 6952 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6953 ins_cost(200); 6954 expand %{ 6955 cmovP_regU(cop, cr, dst, src); 6956 %} 6957%} 6958 6959// DISABLED: Requires the ADLC to emit a bottom_type call that 6960// correctly meets the two pointer arguments; one is an incoming 6961// register but the other is a memory operand. ALSO appears to 6962// be buggy with implicit null checks. 6963// 6964//// Conditional move 6965//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6966// predicate(VM_Version::supports_cmov() ); 6967// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6968// ins_cost(250); 6969// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6970// opcode(0x0F,0x40); 6971// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6972// ins_pipe( pipe_cmov_mem ); 6973//%} 6974// 6975//// Conditional move 6976//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6977// predicate(VM_Version::supports_cmov() ); 6978// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6979// ins_cost(250); 6980// format %{ "CMOV$cop $dst,$src\t# ptr" %} 6981// opcode(0x0F,0x40); 6982// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6983// ins_pipe( pipe_cmov_mem ); 6984//%} 6985 6986// Conditional move 6987instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6988 predicate(UseSSE<=1); 6989 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6990 ins_cost(200); 6991 format %{ "FCMOV$cop $dst,$src\t# double" %} 6992 opcode(0xDA); 6993 ins_encode( enc_cmov_dpr(cop,src) ); 6994 ins_pipe( pipe_cmovDPR_reg ); 6995%} 6996 6997// Conditional move 6998instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6999 predicate(UseSSE==0); 7000 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7001 ins_cost(200); 7002 format %{ "FCMOV$cop $dst,$src\t# float" %} 7003 opcode(0xDA); 7004 ins_encode( enc_cmov_dpr(cop,src) ); 7005 ins_pipe( pipe_cmovDPR_reg ); 7006%} 7007 7008// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7009instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7010 predicate(UseSSE<=1); 7011 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7012 ins_cost(200); 7013 format %{ "Jn$cop skip\n\t" 7014 "MOV $dst,$src\t# double\n" 7015 "skip:" %} 7016 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7017 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7018 ins_pipe( pipe_cmovDPR_reg ); 7019%} 7020 7021// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7022instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7023 predicate(UseSSE==0); 7024 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7025 ins_cost(200); 7026 format %{ "Jn$cop skip\n\t" 7027 "MOV $dst,$src\t# float\n" 7028 "skip:" %} 7029 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7030 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7031 ins_pipe( pipe_cmovDPR_reg ); 7032%} 7033 7034// No CMOVE with SSE/SSE2 7035instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7036 predicate (UseSSE>=1); 7037 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7038 ins_cost(200); 7039 format %{ "Jn$cop skip\n\t" 7040 "MOVSS $dst,$src\t# float\n" 7041 "skip:" %} 7042 ins_encode %{ 7043 Label skip; 7044 // Invert sense of branch from sense of CMOV 7045 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7046 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7047 __ bind(skip); 7048 %} 7049 ins_pipe( pipe_slow ); 7050%} 7051 7052// No CMOVE with SSE/SSE2 7053instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7054 predicate (UseSSE>=2); 7055 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7056 ins_cost(200); 7057 format %{ "Jn$cop skip\n\t" 7058 "MOVSD $dst,$src\t# float\n" 7059 "skip:" %} 7060 ins_encode %{ 7061 Label skip; 7062 // Invert sense of branch from sense of CMOV 7063 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7064 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7065 __ bind(skip); 7066 %} 7067 ins_pipe( pipe_slow ); 7068%} 7069 7070// unsigned version 7071instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7072 predicate (UseSSE>=1); 7073 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7074 ins_cost(200); 7075 format %{ "Jn$cop skip\n\t" 7076 "MOVSS $dst,$src\t# float\n" 7077 "skip:" %} 7078 ins_encode %{ 7079 Label skip; 7080 // Invert sense of branch from sense of CMOV 7081 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7082 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7083 __ bind(skip); 7084 %} 7085 ins_pipe( pipe_slow ); 7086%} 7087 7088instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7089 predicate (UseSSE>=1); 7090 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7091 ins_cost(200); 7092 expand %{ 7093 fcmovF_regU(cop, cr, dst, src); 7094 %} 7095%} 7096 7097// unsigned version 7098instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7099 predicate (UseSSE>=2); 7100 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7101 ins_cost(200); 7102 format %{ "Jn$cop skip\n\t" 7103 "MOVSD $dst,$src\t# float\n" 7104 "skip:" %} 7105 ins_encode %{ 7106 Label skip; 7107 // Invert sense of branch from sense of CMOV 7108 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7109 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7110 __ bind(skip); 7111 %} 7112 ins_pipe( pipe_slow ); 7113%} 7114 7115instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7116 predicate (UseSSE>=2); 7117 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7118 ins_cost(200); 7119 expand %{ 7120 fcmovD_regU(cop, cr, dst, src); 7121 %} 7122%} 7123 7124instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7125 predicate(VM_Version::supports_cmov() ); 7126 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7127 ins_cost(200); 7128 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7129 "CMOV$cop $dst.hi,$src.hi" %} 7130 opcode(0x0F,0x40); 7131 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7132 ins_pipe( pipe_cmov_reg_long ); 7133%} 7134 7135instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7136 predicate(VM_Version::supports_cmov() ); 7137 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7138 ins_cost(200); 7139 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7140 "CMOV$cop $dst.hi,$src.hi" %} 7141 opcode(0x0F,0x40); 7142 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7143 ins_pipe( pipe_cmov_reg_long ); 7144%} 7145 7146instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7147 predicate(VM_Version::supports_cmov() ); 7148 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7149 ins_cost(200); 7150 expand %{ 7151 cmovL_regU(cop, cr, dst, src); 7152 %} 7153%} 7154 7155//----------Arithmetic Instructions-------------------------------------------- 7156//----------Addition Instructions---------------------------------------------- 7157 7158// Integer Addition Instructions 7159instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7160 match(Set dst (AddI dst src)); 7161 effect(KILL cr); 7162 7163 size(2); 7164 format %{ "ADD $dst,$src" %} 7165 opcode(0x03); 7166 ins_encode( OpcP, RegReg( dst, src) ); 7167 ins_pipe( ialu_reg_reg ); 7168%} 7169 7170instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7171 match(Set dst (AddI dst src)); 7172 effect(KILL cr); 7173 7174 format %{ "ADD $dst,$src" %} 7175 opcode(0x81, 0x00); /* /0 id */ 7176 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7177 ins_pipe( ialu_reg ); 7178%} 7179 7180instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7181 predicate(UseIncDec); 7182 match(Set dst (AddI dst src)); 7183 effect(KILL cr); 7184 7185 size(1); 7186 format %{ "INC $dst" %} 7187 opcode(0x40); /* */ 7188 ins_encode( Opc_plus( primary, dst ) ); 7189 ins_pipe( ialu_reg ); 7190%} 7191 7192instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7193 match(Set dst (AddI src0 src1)); 7194 ins_cost(110); 7195 7196 format %{ "LEA $dst,[$src0 + $src1]" %} 7197 opcode(0x8D); /* 0x8D /r */ 7198 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7199 ins_pipe( ialu_reg_reg ); 7200%} 7201 7202instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7203 match(Set dst (AddP src0 src1)); 7204 ins_cost(110); 7205 7206 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7207 opcode(0x8D); /* 0x8D /r */ 7208 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7209 ins_pipe( ialu_reg_reg ); 7210%} 7211 7212instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7213 predicate(UseIncDec); 7214 match(Set dst (AddI dst src)); 7215 effect(KILL cr); 7216 7217 size(1); 7218 format %{ "DEC $dst" %} 7219 opcode(0x48); /* */ 7220 ins_encode( Opc_plus( primary, dst ) ); 7221 ins_pipe( ialu_reg ); 7222%} 7223 7224instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7225 match(Set dst (AddP dst src)); 7226 effect(KILL cr); 7227 7228 size(2); 7229 format %{ "ADD $dst,$src" %} 7230 opcode(0x03); 7231 ins_encode( OpcP, RegReg( dst, src) ); 7232 ins_pipe( ialu_reg_reg ); 7233%} 7234 7235instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7236 match(Set dst (AddP dst src)); 7237 effect(KILL cr); 7238 7239 format %{ "ADD $dst,$src" %} 7240 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7241 // ins_encode( RegImm( dst, src) ); 7242 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7243 ins_pipe( ialu_reg ); 7244%} 7245 7246instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7247 match(Set dst (AddI dst (LoadI src))); 7248 effect(KILL cr); 7249 7250 ins_cost(125); 7251 format %{ "ADD $dst,$src" %} 7252 opcode(0x03); 7253 ins_encode( OpcP, RegMem( dst, src) ); 7254 ins_pipe( ialu_reg_mem ); 7255%} 7256 7257instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7258 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7259 effect(KILL cr); 7260 7261 ins_cost(150); 7262 format %{ "ADD $dst,$src" %} 7263 opcode(0x01); /* Opcode 01 /r */ 7264 ins_encode( OpcP, RegMem( src, dst ) ); 7265 ins_pipe( ialu_mem_reg ); 7266%} 7267 7268// Add Memory with Immediate 7269instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7270 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7271 effect(KILL cr); 7272 7273 ins_cost(125); 7274 format %{ "ADD $dst,$src" %} 7275 opcode(0x81); /* Opcode 81 /0 id */ 7276 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7277 ins_pipe( ialu_mem_imm ); 7278%} 7279 7280instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7281 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7282 effect(KILL cr); 7283 7284 ins_cost(125); 7285 format %{ "INC $dst" %} 7286 opcode(0xFF); /* Opcode FF /0 */ 7287 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7288 ins_pipe( ialu_mem_imm ); 7289%} 7290 7291instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7292 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7293 effect(KILL cr); 7294 7295 ins_cost(125); 7296 format %{ "DEC $dst" %} 7297 opcode(0xFF); /* Opcode FF /1 */ 7298 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7299 ins_pipe( ialu_mem_imm ); 7300%} 7301 7302 7303instruct checkCastPP( eRegP dst ) %{ 7304 match(Set dst (CheckCastPP dst)); 7305 7306 size(0); 7307 format %{ "#checkcastPP of $dst" %} 7308 ins_encode( /*empty encoding*/ ); 7309 ins_pipe( empty ); 7310%} 7311 7312instruct castPP( eRegP dst ) %{ 7313 match(Set dst (CastPP dst)); 7314 format %{ "#castPP of $dst" %} 7315 ins_encode( /*empty encoding*/ ); 7316 ins_pipe( empty ); 7317%} 7318 7319instruct castII( rRegI dst ) %{ 7320 match(Set dst (CastII dst)); 7321 format %{ "#castII of $dst" %} 7322 ins_encode( /*empty encoding*/ ); 7323 ins_cost(0); 7324 ins_pipe( empty ); 7325%} 7326 7327// Load-locked - same as a regular pointer load when used with compare-swap 7328instruct loadPLocked(eRegP dst, memory mem) %{ 7329 match(Set dst (LoadPLocked mem)); 7330 7331 ins_cost(125); 7332 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7333 opcode(0x8B); 7334 ins_encode( OpcP, RegMem(dst,mem)); 7335 ins_pipe( ialu_reg_mem ); 7336%} 7337 7338// Conditional-store of the updated heap-top. 7339// Used during allocation of the shared heap. 7340// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7341instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7342 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7343 // EAX is killed if there is contention, but then it's also unused. 7344 // In the common case of no contention, EAX holds the new oop address. 7345 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7346 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7347 ins_pipe( pipe_cmpxchg ); 7348%} 7349 7350// Conditional-store of an int value. 7351// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7352instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7353 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7354 effect(KILL oldval); 7355 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7356 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7357 ins_pipe( pipe_cmpxchg ); 7358%} 7359 7360// Conditional-store of a long value. 7361// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7362instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7363 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7364 effect(KILL oldval); 7365 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7366 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7367 "XCHG EBX,ECX" 7368 %} 7369 ins_encode %{ 7370 // Note: we need to swap rbx, and rcx before and after the 7371 // cmpxchg8 instruction because the instruction uses 7372 // rcx as the high order word of the new value to store but 7373 // our register encoding uses rbx. 7374 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7375 if( os::is_MP() ) 7376 __ lock(); 7377 __ cmpxchg8($mem$$Address); 7378 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7379 %} 7380 ins_pipe( pipe_cmpxchg ); 7381%} 7382 7383// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7384 7385instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7386 predicate(VM_Version::supports_cx8()); 7387 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7388 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7389 effect(KILL cr, KILL oldval); 7390 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7391 "MOV $res,0\n\t" 7392 "JNE,s fail\n\t" 7393 "MOV $res,1\n" 7394 "fail:" %} 7395 ins_encode( enc_cmpxchg8(mem_ptr), 7396 enc_flags_ne_to_boolean(res) ); 7397 ins_pipe( pipe_cmpxchg ); 7398%} 7399 7400instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7401 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7402 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7403 effect(KILL cr, KILL oldval); 7404 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7405 "MOV $res,0\n\t" 7406 "JNE,s fail\n\t" 7407 "MOV $res,1\n" 7408 "fail:" %} 7409 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7410 ins_pipe( pipe_cmpxchg ); 7411%} 7412 7413instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7414 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7415 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7416 effect(KILL cr, KILL oldval); 7417 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7418 "MOV $res,0\n\t" 7419 "JNE,s fail\n\t" 7420 "MOV $res,1\n" 7421 "fail:" %} 7422 ins_encode( enc_cmpxchgb(mem_ptr), 7423 enc_flags_ne_to_boolean(res) ); 7424 ins_pipe( pipe_cmpxchg ); 7425%} 7426 7427instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7428 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7429 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7430 effect(KILL cr, KILL oldval); 7431 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7432 "MOV $res,0\n\t" 7433 "JNE,s fail\n\t" 7434 "MOV $res,1\n" 7435 "fail:" %} 7436 ins_encode( enc_cmpxchgw(mem_ptr), 7437 enc_flags_ne_to_boolean(res) ); 7438 ins_pipe( pipe_cmpxchg ); 7439%} 7440 7441instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7442 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7443 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7444 effect(KILL cr, KILL oldval); 7445 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7446 "MOV $res,0\n\t" 7447 "JNE,s fail\n\t" 7448 "MOV $res,1\n" 7449 "fail:" %} 7450 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7451 ins_pipe( pipe_cmpxchg ); 7452%} 7453 7454instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7455 predicate(VM_Version::supports_cx8()); 7456 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7457 effect(KILL cr); 7458 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7459 ins_encode( enc_cmpxchg8(mem_ptr) ); 7460 ins_pipe( pipe_cmpxchg ); 7461%} 7462 7463instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7464 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7465 effect(KILL cr); 7466 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7467 ins_encode( enc_cmpxchg(mem_ptr) ); 7468 ins_pipe( pipe_cmpxchg ); 7469%} 7470 7471instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7472 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7473 effect(KILL cr); 7474 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7475 ins_encode( enc_cmpxchgb(mem_ptr) ); 7476 ins_pipe( pipe_cmpxchg ); 7477%} 7478 7479instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7480 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7481 effect(KILL cr); 7482 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7483 ins_encode( enc_cmpxchgw(mem_ptr) ); 7484 ins_pipe( pipe_cmpxchg ); 7485%} 7486 7487instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7488 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7489 effect(KILL cr); 7490 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7491 ins_encode( enc_cmpxchg(mem_ptr) ); 7492 ins_pipe( pipe_cmpxchg ); 7493%} 7494 7495instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7496 predicate(n->as_LoadStore()->result_not_used()); 7497 match(Set dummy (GetAndAddB mem add)); 7498 effect(KILL cr); 7499 format %{ "ADDB [$mem],$add" %} 7500 ins_encode %{ 7501 if (os::is_MP()) { __ lock(); } 7502 __ addb($mem$$Address, $add$$constant); 7503 %} 7504 ins_pipe( pipe_cmpxchg ); 7505%} 7506 7507// Important to match to xRegI: only 8-bit regs. 7508instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7509 match(Set newval (GetAndAddB mem newval)); 7510 effect(KILL cr); 7511 format %{ "XADDB [$mem],$newval" %} 7512 ins_encode %{ 7513 if (os::is_MP()) { __ lock(); } 7514 __ xaddb($mem$$Address, $newval$$Register); 7515 %} 7516 ins_pipe( pipe_cmpxchg ); 7517%} 7518 7519instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7520 predicate(n->as_LoadStore()->result_not_used()); 7521 match(Set dummy (GetAndAddS mem add)); 7522 effect(KILL cr); 7523 format %{ "ADDS [$mem],$add" %} 7524 ins_encode %{ 7525 if (os::is_MP()) { __ lock(); } 7526 __ addw($mem$$Address, $add$$constant); 7527 %} 7528 ins_pipe( pipe_cmpxchg ); 7529%} 7530 7531instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7532 match(Set newval (GetAndAddS mem newval)); 7533 effect(KILL cr); 7534 format %{ "XADDS [$mem],$newval" %} 7535 ins_encode %{ 7536 if (os::is_MP()) { __ lock(); } 7537 __ xaddw($mem$$Address, $newval$$Register); 7538 %} 7539 ins_pipe( pipe_cmpxchg ); 7540%} 7541 7542instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7543 predicate(n->as_LoadStore()->result_not_used()); 7544 match(Set dummy (GetAndAddI mem add)); 7545 effect(KILL cr); 7546 format %{ "ADDL [$mem],$add" %} 7547 ins_encode %{ 7548 if (os::is_MP()) { __ lock(); } 7549 __ addl($mem$$Address, $add$$constant); 7550 %} 7551 ins_pipe( pipe_cmpxchg ); 7552%} 7553 7554instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7555 match(Set newval (GetAndAddI mem newval)); 7556 effect(KILL cr); 7557 format %{ "XADDL [$mem],$newval" %} 7558 ins_encode %{ 7559 if (os::is_MP()) { __ lock(); } 7560 __ xaddl($mem$$Address, $newval$$Register); 7561 %} 7562 ins_pipe( pipe_cmpxchg ); 7563%} 7564 7565// Important to match to xRegI: only 8-bit regs. 7566instruct xchgB( memory mem, xRegI newval) %{ 7567 match(Set newval (GetAndSetB mem newval)); 7568 format %{ "XCHGB $newval,[$mem]" %} 7569 ins_encode %{ 7570 __ xchgb($newval$$Register, $mem$$Address); 7571 %} 7572 ins_pipe( pipe_cmpxchg ); 7573%} 7574 7575instruct xchgS( memory mem, rRegI newval) %{ 7576 match(Set newval (GetAndSetS mem newval)); 7577 format %{ "XCHGW $newval,[$mem]" %} 7578 ins_encode %{ 7579 __ xchgw($newval$$Register, $mem$$Address); 7580 %} 7581 ins_pipe( pipe_cmpxchg ); 7582%} 7583 7584instruct xchgI( memory mem, rRegI newval) %{ 7585 match(Set newval (GetAndSetI mem newval)); 7586 format %{ "XCHGL $newval,[$mem]" %} 7587 ins_encode %{ 7588 __ xchgl($newval$$Register, $mem$$Address); 7589 %} 7590 ins_pipe( pipe_cmpxchg ); 7591%} 7592 7593instruct xchgP( memory mem, pRegP newval) %{ 7594 match(Set newval (GetAndSetP mem newval)); 7595 format %{ "XCHGL $newval,[$mem]" %} 7596 ins_encode %{ 7597 __ xchgl($newval$$Register, $mem$$Address); 7598 %} 7599 ins_pipe( pipe_cmpxchg ); 7600%} 7601 7602//----------Subtraction Instructions------------------------------------------- 7603 7604// Integer Subtraction Instructions 7605instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7606 match(Set dst (SubI dst src)); 7607 effect(KILL cr); 7608 7609 size(2); 7610 format %{ "SUB $dst,$src" %} 7611 opcode(0x2B); 7612 ins_encode( OpcP, RegReg( dst, src) ); 7613 ins_pipe( ialu_reg_reg ); 7614%} 7615 7616instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7617 match(Set dst (SubI dst src)); 7618 effect(KILL cr); 7619 7620 format %{ "SUB $dst,$src" %} 7621 opcode(0x81,0x05); /* Opcode 81 /5 */ 7622 // ins_encode( RegImm( dst, src) ); 7623 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7624 ins_pipe( ialu_reg ); 7625%} 7626 7627instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7628 match(Set dst (SubI dst (LoadI src))); 7629 effect(KILL cr); 7630 7631 ins_cost(125); 7632 format %{ "SUB $dst,$src" %} 7633 opcode(0x2B); 7634 ins_encode( OpcP, RegMem( dst, src) ); 7635 ins_pipe( ialu_reg_mem ); 7636%} 7637 7638instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7639 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7640 effect(KILL cr); 7641 7642 ins_cost(150); 7643 format %{ "SUB $dst,$src" %} 7644 opcode(0x29); /* Opcode 29 /r */ 7645 ins_encode( OpcP, RegMem( src, dst ) ); 7646 ins_pipe( ialu_mem_reg ); 7647%} 7648 7649// Subtract from a pointer 7650instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7651 match(Set dst (AddP dst (SubI zero src))); 7652 effect(KILL cr); 7653 7654 size(2); 7655 format %{ "SUB $dst,$src" %} 7656 opcode(0x2B); 7657 ins_encode( OpcP, RegReg( dst, src) ); 7658 ins_pipe( ialu_reg_reg ); 7659%} 7660 7661instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7662 match(Set dst (SubI zero dst)); 7663 effect(KILL cr); 7664 7665 size(2); 7666 format %{ "NEG $dst" %} 7667 opcode(0xF7,0x03); // Opcode F7 /3 7668 ins_encode( OpcP, RegOpc( dst ) ); 7669 ins_pipe( ialu_reg ); 7670%} 7671 7672//----------Multiplication/Division Instructions------------------------------- 7673// Integer Multiplication Instructions 7674// Multiply Register 7675instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7676 match(Set dst (MulI dst src)); 7677 effect(KILL cr); 7678 7679 size(3); 7680 ins_cost(300); 7681 format %{ "IMUL $dst,$src" %} 7682 opcode(0xAF, 0x0F); 7683 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7684 ins_pipe( ialu_reg_reg_alu0 ); 7685%} 7686 7687// Multiply 32-bit Immediate 7688instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7689 match(Set dst (MulI src imm)); 7690 effect(KILL cr); 7691 7692 ins_cost(300); 7693 format %{ "IMUL $dst,$src,$imm" %} 7694 opcode(0x69); /* 69 /r id */ 7695 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7696 ins_pipe( ialu_reg_reg_alu0 ); 7697%} 7698 7699instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7700 match(Set dst src); 7701 effect(KILL cr); 7702 7703 // Note that this is artificially increased to make it more expensive than loadConL 7704 ins_cost(250); 7705 format %{ "MOV EAX,$src\t// low word only" %} 7706 opcode(0xB8); 7707 ins_encode( LdImmL_Lo(dst, src) ); 7708 ins_pipe( ialu_reg_fat ); 7709%} 7710 7711// Multiply by 32-bit Immediate, taking the shifted high order results 7712// (special case for shift by 32) 7713instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7714 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7715 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7716 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7717 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7718 effect(USE src1, KILL cr); 7719 7720 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7721 ins_cost(0*100 + 1*400 - 150); 7722 format %{ "IMUL EDX:EAX,$src1" %} 7723 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7724 ins_pipe( pipe_slow ); 7725%} 7726 7727// Multiply by 32-bit Immediate, taking the shifted high order results 7728instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7729 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7730 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7731 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7732 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7733 effect(USE src1, KILL cr); 7734 7735 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7736 ins_cost(1*100 + 1*400 - 150); 7737 format %{ "IMUL EDX:EAX,$src1\n\t" 7738 "SAR EDX,$cnt-32" %} 7739 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7740 ins_pipe( pipe_slow ); 7741%} 7742 7743// Multiply Memory 32-bit Immediate 7744instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7745 match(Set dst (MulI (LoadI src) imm)); 7746 effect(KILL cr); 7747 7748 ins_cost(300); 7749 format %{ "IMUL $dst,$src,$imm" %} 7750 opcode(0x69); /* 69 /r id */ 7751 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7752 ins_pipe( ialu_reg_mem_alu0 ); 7753%} 7754 7755// Multiply Memory 7756instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7757 match(Set dst (MulI dst (LoadI src))); 7758 effect(KILL cr); 7759 7760 ins_cost(350); 7761 format %{ "IMUL $dst,$src" %} 7762 opcode(0xAF, 0x0F); 7763 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7764 ins_pipe( ialu_reg_mem_alu0 ); 7765%} 7766 7767// Multiply Register Int to Long 7768instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7769 // Basic Idea: long = (long)int * (long)int 7770 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7771 effect(DEF dst, USE src, USE src1, KILL flags); 7772 7773 ins_cost(300); 7774 format %{ "IMUL $dst,$src1" %} 7775 7776 ins_encode( long_int_multiply( dst, src1 ) ); 7777 ins_pipe( ialu_reg_reg_alu0 ); 7778%} 7779 7780instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7781 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7782 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7783 effect(KILL flags); 7784 7785 ins_cost(300); 7786 format %{ "MUL $dst,$src1" %} 7787 7788 ins_encode( long_uint_multiply(dst, src1) ); 7789 ins_pipe( ialu_reg_reg_alu0 ); 7790%} 7791 7792// Multiply Register Long 7793instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7794 match(Set dst (MulL dst src)); 7795 effect(KILL cr, TEMP tmp); 7796 ins_cost(4*100+3*400); 7797// Basic idea: lo(result) = lo(x_lo * y_lo) 7798// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7799 format %{ "MOV $tmp,$src.lo\n\t" 7800 "IMUL $tmp,EDX\n\t" 7801 "MOV EDX,$src.hi\n\t" 7802 "IMUL EDX,EAX\n\t" 7803 "ADD $tmp,EDX\n\t" 7804 "MUL EDX:EAX,$src.lo\n\t" 7805 "ADD EDX,$tmp" %} 7806 ins_encode( long_multiply( dst, src, tmp ) ); 7807 ins_pipe( pipe_slow ); 7808%} 7809 7810// Multiply Register Long where the left operand's high 32 bits are zero 7811instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7812 predicate(is_operand_hi32_zero(n->in(1))); 7813 match(Set dst (MulL dst src)); 7814 effect(KILL cr, TEMP tmp); 7815 ins_cost(2*100+2*400); 7816// Basic idea: lo(result) = lo(x_lo * y_lo) 7817// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7818 format %{ "MOV $tmp,$src.hi\n\t" 7819 "IMUL $tmp,EAX\n\t" 7820 "MUL EDX:EAX,$src.lo\n\t" 7821 "ADD EDX,$tmp" %} 7822 ins_encode %{ 7823 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7824 __ imull($tmp$$Register, rax); 7825 __ mull($src$$Register); 7826 __ addl(rdx, $tmp$$Register); 7827 %} 7828 ins_pipe( pipe_slow ); 7829%} 7830 7831// Multiply Register Long where the right operand's high 32 bits are zero 7832instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7833 predicate(is_operand_hi32_zero(n->in(2))); 7834 match(Set dst (MulL dst src)); 7835 effect(KILL cr, TEMP tmp); 7836 ins_cost(2*100+2*400); 7837// Basic idea: lo(result) = lo(x_lo * y_lo) 7838// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7839 format %{ "MOV $tmp,$src.lo\n\t" 7840 "IMUL $tmp,EDX\n\t" 7841 "MUL EDX:EAX,$src.lo\n\t" 7842 "ADD EDX,$tmp" %} 7843 ins_encode %{ 7844 __ movl($tmp$$Register, $src$$Register); 7845 __ imull($tmp$$Register, rdx); 7846 __ mull($src$$Register); 7847 __ addl(rdx, $tmp$$Register); 7848 %} 7849 ins_pipe( pipe_slow ); 7850%} 7851 7852// Multiply Register Long where the left and the right operands' high 32 bits are zero 7853instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7854 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7855 match(Set dst (MulL dst src)); 7856 effect(KILL cr); 7857 ins_cost(1*400); 7858// Basic idea: lo(result) = lo(x_lo * y_lo) 7859// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7860 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7861 ins_encode %{ 7862 __ mull($src$$Register); 7863 %} 7864 ins_pipe( pipe_slow ); 7865%} 7866 7867// Multiply Register Long by small constant 7868instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7869 match(Set dst (MulL dst src)); 7870 effect(KILL cr, TEMP tmp); 7871 ins_cost(2*100+2*400); 7872 size(12); 7873// Basic idea: lo(result) = lo(src * EAX) 7874// hi(result) = hi(src * EAX) + lo(src * EDX) 7875 format %{ "IMUL $tmp,EDX,$src\n\t" 7876 "MOV EDX,$src\n\t" 7877 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7878 "ADD EDX,$tmp" %} 7879 ins_encode( long_multiply_con( dst, src, tmp ) ); 7880 ins_pipe( pipe_slow ); 7881%} 7882 7883// Integer DIV with Register 7884instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7885 match(Set rax (DivI rax div)); 7886 effect(KILL rdx, KILL cr); 7887 size(26); 7888 ins_cost(30*100+10*100); 7889 format %{ "CMP EAX,0x80000000\n\t" 7890 "JNE,s normal\n\t" 7891 "XOR EDX,EDX\n\t" 7892 "CMP ECX,-1\n\t" 7893 "JE,s done\n" 7894 "normal: CDQ\n\t" 7895 "IDIV $div\n\t" 7896 "done:" %} 7897 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7898 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7899 ins_pipe( ialu_reg_reg_alu0 ); 7900%} 7901 7902// Divide Register Long 7903instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7904 match(Set dst (DivL src1 src2)); 7905 effect( KILL cr, KILL cx, KILL bx ); 7906 ins_cost(10000); 7907 format %{ "PUSH $src1.hi\n\t" 7908 "PUSH $src1.lo\n\t" 7909 "PUSH $src2.hi\n\t" 7910 "PUSH $src2.lo\n\t" 7911 "CALL SharedRuntime::ldiv\n\t" 7912 "ADD ESP,16" %} 7913 ins_encode( long_div(src1,src2) ); 7914 ins_pipe( pipe_slow ); 7915%} 7916 7917// Integer DIVMOD with Register, both quotient and mod results 7918instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7919 match(DivModI rax div); 7920 effect(KILL cr); 7921 size(26); 7922 ins_cost(30*100+10*100); 7923 format %{ "CMP EAX,0x80000000\n\t" 7924 "JNE,s normal\n\t" 7925 "XOR EDX,EDX\n\t" 7926 "CMP ECX,-1\n\t" 7927 "JE,s done\n" 7928 "normal: CDQ\n\t" 7929 "IDIV $div\n\t" 7930 "done:" %} 7931 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7932 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7933 ins_pipe( pipe_slow ); 7934%} 7935 7936// Integer MOD with Register 7937instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7938 match(Set rdx (ModI rax div)); 7939 effect(KILL rax, KILL cr); 7940 7941 size(26); 7942 ins_cost(300); 7943 format %{ "CDQ\n\t" 7944 "IDIV $div" %} 7945 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7946 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7947 ins_pipe( ialu_reg_reg_alu0 ); 7948%} 7949 7950// Remainder Register Long 7951instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7952 match(Set dst (ModL src1 src2)); 7953 effect( KILL cr, KILL cx, KILL bx ); 7954 ins_cost(10000); 7955 format %{ "PUSH $src1.hi\n\t" 7956 "PUSH $src1.lo\n\t" 7957 "PUSH $src2.hi\n\t" 7958 "PUSH $src2.lo\n\t" 7959 "CALL SharedRuntime::lrem\n\t" 7960 "ADD ESP,16" %} 7961 ins_encode( long_mod(src1,src2) ); 7962 ins_pipe( pipe_slow ); 7963%} 7964 7965// Divide Register Long (no special case since divisor != -1) 7966instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7967 match(Set dst (DivL dst imm)); 7968 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7969 ins_cost(1000); 7970 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7971 "XOR $tmp2,$tmp2\n\t" 7972 "CMP $tmp,EDX\n\t" 7973 "JA,s fast\n\t" 7974 "MOV $tmp2,EAX\n\t" 7975 "MOV EAX,EDX\n\t" 7976 "MOV EDX,0\n\t" 7977 "JLE,s pos\n\t" 7978 "LNEG EAX : $tmp2\n\t" 7979 "DIV $tmp # unsigned division\n\t" 7980 "XCHG EAX,$tmp2\n\t" 7981 "DIV $tmp\n\t" 7982 "LNEG $tmp2 : EAX\n\t" 7983 "JMP,s done\n" 7984 "pos:\n\t" 7985 "DIV $tmp\n\t" 7986 "XCHG EAX,$tmp2\n" 7987 "fast:\n\t" 7988 "DIV $tmp\n" 7989 "done:\n\t" 7990 "MOV EDX,$tmp2\n\t" 7991 "NEG EDX:EAX # if $imm < 0" %} 7992 ins_encode %{ 7993 int con = (int)$imm$$constant; 7994 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7995 int pcon = (con > 0) ? con : -con; 7996 Label Lfast, Lpos, Ldone; 7997 7998 __ movl($tmp$$Register, pcon); 7999 __ xorl($tmp2$$Register,$tmp2$$Register); 8000 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8001 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8002 8003 __ movl($tmp2$$Register, $dst$$Register); // save 8004 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8005 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8006 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8007 8008 // Negative dividend. 8009 // convert value to positive to use unsigned division 8010 __ lneg($dst$$Register, $tmp2$$Register); 8011 __ divl($tmp$$Register); 8012 __ xchgl($dst$$Register, $tmp2$$Register); 8013 __ divl($tmp$$Register); 8014 // revert result back to negative 8015 __ lneg($tmp2$$Register, $dst$$Register); 8016 __ jmpb(Ldone); 8017 8018 __ bind(Lpos); 8019 __ divl($tmp$$Register); // Use unsigned division 8020 __ xchgl($dst$$Register, $tmp2$$Register); 8021 // Fallthrow for final divide, tmp2 has 32 bit hi result 8022 8023 __ bind(Lfast); 8024 // fast path: src is positive 8025 __ divl($tmp$$Register); // Use unsigned division 8026 8027 __ bind(Ldone); 8028 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8029 if (con < 0) { 8030 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8031 } 8032 %} 8033 ins_pipe( pipe_slow ); 8034%} 8035 8036// Remainder Register Long (remainder fit into 32 bits) 8037instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8038 match(Set dst (ModL dst imm)); 8039 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8040 ins_cost(1000); 8041 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8042 "CMP $tmp,EDX\n\t" 8043 "JA,s fast\n\t" 8044 "MOV $tmp2,EAX\n\t" 8045 "MOV EAX,EDX\n\t" 8046 "MOV EDX,0\n\t" 8047 "JLE,s pos\n\t" 8048 "LNEG EAX : $tmp2\n\t" 8049 "DIV $tmp # unsigned division\n\t" 8050 "MOV EAX,$tmp2\n\t" 8051 "DIV $tmp\n\t" 8052 "NEG EDX\n\t" 8053 "JMP,s done\n" 8054 "pos:\n\t" 8055 "DIV $tmp\n\t" 8056 "MOV EAX,$tmp2\n" 8057 "fast:\n\t" 8058 "DIV $tmp\n" 8059 "done:\n\t" 8060 "MOV EAX,EDX\n\t" 8061 "SAR EDX,31\n\t" %} 8062 ins_encode %{ 8063 int con = (int)$imm$$constant; 8064 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8065 int pcon = (con > 0) ? con : -con; 8066 Label Lfast, Lpos, Ldone; 8067 8068 __ movl($tmp$$Register, pcon); 8069 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8070 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8071 8072 __ movl($tmp2$$Register, $dst$$Register); // save 8073 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8074 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8075 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8076 8077 // Negative dividend. 8078 // convert value to positive to use unsigned division 8079 __ lneg($dst$$Register, $tmp2$$Register); 8080 __ divl($tmp$$Register); 8081 __ movl($dst$$Register, $tmp2$$Register); 8082 __ divl($tmp$$Register); 8083 // revert remainder back to negative 8084 __ negl(HIGH_FROM_LOW($dst$$Register)); 8085 __ jmpb(Ldone); 8086 8087 __ bind(Lpos); 8088 __ divl($tmp$$Register); 8089 __ movl($dst$$Register, $tmp2$$Register); 8090 8091 __ bind(Lfast); 8092 // fast path: src is positive 8093 __ divl($tmp$$Register); 8094 8095 __ bind(Ldone); 8096 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8097 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8098 8099 %} 8100 ins_pipe( pipe_slow ); 8101%} 8102 8103// Integer Shift Instructions 8104// Shift Left by one 8105instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8106 match(Set dst (LShiftI dst shift)); 8107 effect(KILL cr); 8108 8109 size(2); 8110 format %{ "SHL $dst,$shift" %} 8111 opcode(0xD1, 0x4); /* D1 /4 */ 8112 ins_encode( OpcP, RegOpc( dst ) ); 8113 ins_pipe( ialu_reg ); 8114%} 8115 8116// Shift Left by 8-bit immediate 8117instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8118 match(Set dst (LShiftI dst shift)); 8119 effect(KILL cr); 8120 8121 size(3); 8122 format %{ "SHL $dst,$shift" %} 8123 opcode(0xC1, 0x4); /* C1 /4 ib */ 8124 ins_encode( RegOpcImm( dst, shift) ); 8125 ins_pipe( ialu_reg ); 8126%} 8127 8128// Shift Left by variable 8129instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8130 match(Set dst (LShiftI dst shift)); 8131 effect(KILL cr); 8132 8133 size(2); 8134 format %{ "SHL $dst,$shift" %} 8135 opcode(0xD3, 0x4); /* D3 /4 */ 8136 ins_encode( OpcP, RegOpc( dst ) ); 8137 ins_pipe( ialu_reg_reg ); 8138%} 8139 8140// Arithmetic shift right by one 8141instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8142 match(Set dst (RShiftI dst shift)); 8143 effect(KILL cr); 8144 8145 size(2); 8146 format %{ "SAR $dst,$shift" %} 8147 opcode(0xD1, 0x7); /* D1 /7 */ 8148 ins_encode( OpcP, RegOpc( dst ) ); 8149 ins_pipe( ialu_reg ); 8150%} 8151 8152// Arithmetic shift right by one 8153instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8154 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8155 effect(KILL cr); 8156 format %{ "SAR $dst,$shift" %} 8157 opcode(0xD1, 0x7); /* D1 /7 */ 8158 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8159 ins_pipe( ialu_mem_imm ); 8160%} 8161 8162// Arithmetic Shift Right by 8-bit immediate 8163instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8164 match(Set dst (RShiftI dst shift)); 8165 effect(KILL cr); 8166 8167 size(3); 8168 format %{ "SAR $dst,$shift" %} 8169 opcode(0xC1, 0x7); /* C1 /7 ib */ 8170 ins_encode( RegOpcImm( dst, shift ) ); 8171 ins_pipe( ialu_mem_imm ); 8172%} 8173 8174// Arithmetic Shift Right by 8-bit immediate 8175instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8176 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8177 effect(KILL cr); 8178 8179 format %{ "SAR $dst,$shift" %} 8180 opcode(0xC1, 0x7); /* C1 /7 ib */ 8181 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8182 ins_pipe( ialu_mem_imm ); 8183%} 8184 8185// Arithmetic Shift Right by variable 8186instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8187 match(Set dst (RShiftI dst shift)); 8188 effect(KILL cr); 8189 8190 size(2); 8191 format %{ "SAR $dst,$shift" %} 8192 opcode(0xD3, 0x7); /* D3 /7 */ 8193 ins_encode( OpcP, RegOpc( dst ) ); 8194 ins_pipe( ialu_reg_reg ); 8195%} 8196 8197// Logical shift right by one 8198instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8199 match(Set dst (URShiftI dst shift)); 8200 effect(KILL cr); 8201 8202 size(2); 8203 format %{ "SHR $dst,$shift" %} 8204 opcode(0xD1, 0x5); /* D1 /5 */ 8205 ins_encode( OpcP, RegOpc( dst ) ); 8206 ins_pipe( ialu_reg ); 8207%} 8208 8209// Logical Shift Right by 8-bit immediate 8210instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8211 match(Set dst (URShiftI dst shift)); 8212 effect(KILL cr); 8213 8214 size(3); 8215 format %{ "SHR $dst,$shift" %} 8216 opcode(0xC1, 0x5); /* C1 /5 ib */ 8217 ins_encode( RegOpcImm( dst, shift) ); 8218 ins_pipe( ialu_reg ); 8219%} 8220 8221 8222// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8223// This idiom is used by the compiler for the i2b bytecode. 8224instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8225 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8226 8227 size(3); 8228 format %{ "MOVSX $dst,$src :8" %} 8229 ins_encode %{ 8230 __ movsbl($dst$$Register, $src$$Register); 8231 %} 8232 ins_pipe(ialu_reg_reg); 8233%} 8234 8235// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8236// This idiom is used by the compiler the i2s bytecode. 8237instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8238 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8239 8240 size(3); 8241 format %{ "MOVSX $dst,$src :16" %} 8242 ins_encode %{ 8243 __ movswl($dst$$Register, $src$$Register); 8244 %} 8245 ins_pipe(ialu_reg_reg); 8246%} 8247 8248 8249// Logical Shift Right by variable 8250instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8251 match(Set dst (URShiftI dst shift)); 8252 effect(KILL cr); 8253 8254 size(2); 8255 format %{ "SHR $dst,$shift" %} 8256 opcode(0xD3, 0x5); /* D3 /5 */ 8257 ins_encode( OpcP, RegOpc( dst ) ); 8258 ins_pipe( ialu_reg_reg ); 8259%} 8260 8261 8262//----------Logical Instructions----------------------------------------------- 8263//----------Integer Logical Instructions--------------------------------------- 8264// And Instructions 8265// And Register with Register 8266instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8267 match(Set dst (AndI dst src)); 8268 effect(KILL cr); 8269 8270 size(2); 8271 format %{ "AND $dst,$src" %} 8272 opcode(0x23); 8273 ins_encode( OpcP, RegReg( dst, src) ); 8274 ins_pipe( ialu_reg_reg ); 8275%} 8276 8277// And Register with Immediate 8278instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8279 match(Set dst (AndI dst src)); 8280 effect(KILL cr); 8281 8282 format %{ "AND $dst,$src" %} 8283 opcode(0x81,0x04); /* Opcode 81 /4 */ 8284 // ins_encode( RegImm( dst, src) ); 8285 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8286 ins_pipe( ialu_reg ); 8287%} 8288 8289// And Register with Memory 8290instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8291 match(Set dst (AndI dst (LoadI src))); 8292 effect(KILL cr); 8293 8294 ins_cost(125); 8295 format %{ "AND $dst,$src" %} 8296 opcode(0x23); 8297 ins_encode( OpcP, RegMem( dst, src) ); 8298 ins_pipe( ialu_reg_mem ); 8299%} 8300 8301// And Memory with Register 8302instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8303 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8304 effect(KILL cr); 8305 8306 ins_cost(150); 8307 format %{ "AND $dst,$src" %} 8308 opcode(0x21); /* Opcode 21 /r */ 8309 ins_encode( OpcP, RegMem( src, dst ) ); 8310 ins_pipe( ialu_mem_reg ); 8311%} 8312 8313// And Memory with Immediate 8314instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8315 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8316 effect(KILL cr); 8317 8318 ins_cost(125); 8319 format %{ "AND $dst,$src" %} 8320 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8321 // ins_encode( MemImm( dst, src) ); 8322 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8323 ins_pipe( ialu_mem_imm ); 8324%} 8325 8326// BMI1 instructions 8327instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8328 match(Set dst (AndI (XorI src1 minus_1) src2)); 8329 predicate(UseBMI1Instructions); 8330 effect(KILL cr); 8331 8332 format %{ "ANDNL $dst, $src1, $src2" %} 8333 8334 ins_encode %{ 8335 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8336 %} 8337 ins_pipe(ialu_reg); 8338%} 8339 8340instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8341 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8342 predicate(UseBMI1Instructions); 8343 effect(KILL cr); 8344 8345 ins_cost(125); 8346 format %{ "ANDNL $dst, $src1, $src2" %} 8347 8348 ins_encode %{ 8349 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8350 %} 8351 ins_pipe(ialu_reg_mem); 8352%} 8353 8354instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8355 match(Set dst (AndI (SubI imm_zero src) src)); 8356 predicate(UseBMI1Instructions); 8357 effect(KILL cr); 8358 8359 format %{ "BLSIL $dst, $src" %} 8360 8361 ins_encode %{ 8362 __ blsil($dst$$Register, $src$$Register); 8363 %} 8364 ins_pipe(ialu_reg); 8365%} 8366 8367instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8368 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8369 predicate(UseBMI1Instructions); 8370 effect(KILL cr); 8371 8372 ins_cost(125); 8373 format %{ "BLSIL $dst, $src" %} 8374 8375 ins_encode %{ 8376 __ blsil($dst$$Register, $src$$Address); 8377 %} 8378 ins_pipe(ialu_reg_mem); 8379%} 8380 8381instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8382%{ 8383 match(Set dst (XorI (AddI src minus_1) src)); 8384 predicate(UseBMI1Instructions); 8385 effect(KILL cr); 8386 8387 format %{ "BLSMSKL $dst, $src" %} 8388 8389 ins_encode %{ 8390 __ blsmskl($dst$$Register, $src$$Register); 8391 %} 8392 8393 ins_pipe(ialu_reg); 8394%} 8395 8396instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8397%{ 8398 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8399 predicate(UseBMI1Instructions); 8400 effect(KILL cr); 8401 8402 ins_cost(125); 8403 format %{ "BLSMSKL $dst, $src" %} 8404 8405 ins_encode %{ 8406 __ blsmskl($dst$$Register, $src$$Address); 8407 %} 8408 8409 ins_pipe(ialu_reg_mem); 8410%} 8411 8412instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8413%{ 8414 match(Set dst (AndI (AddI src minus_1) src) ); 8415 predicate(UseBMI1Instructions); 8416 effect(KILL cr); 8417 8418 format %{ "BLSRL $dst, $src" %} 8419 8420 ins_encode %{ 8421 __ blsrl($dst$$Register, $src$$Register); 8422 %} 8423 8424 ins_pipe(ialu_reg); 8425%} 8426 8427instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8428%{ 8429 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8430 predicate(UseBMI1Instructions); 8431 effect(KILL cr); 8432 8433 ins_cost(125); 8434 format %{ "BLSRL $dst, $src" %} 8435 8436 ins_encode %{ 8437 __ blsrl($dst$$Register, $src$$Address); 8438 %} 8439 8440 ins_pipe(ialu_reg_mem); 8441%} 8442 8443// Or Instructions 8444// Or Register with Register 8445instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8446 match(Set dst (OrI dst src)); 8447 effect(KILL cr); 8448 8449 size(2); 8450 format %{ "OR $dst,$src" %} 8451 opcode(0x0B); 8452 ins_encode( OpcP, RegReg( dst, src) ); 8453 ins_pipe( ialu_reg_reg ); 8454%} 8455 8456instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8457 match(Set dst (OrI dst (CastP2X src))); 8458 effect(KILL cr); 8459 8460 size(2); 8461 format %{ "OR $dst,$src" %} 8462 opcode(0x0B); 8463 ins_encode( OpcP, RegReg( dst, src) ); 8464 ins_pipe( ialu_reg_reg ); 8465%} 8466 8467 8468// Or Register with Immediate 8469instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8470 match(Set dst (OrI dst src)); 8471 effect(KILL cr); 8472 8473 format %{ "OR $dst,$src" %} 8474 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8475 // ins_encode( RegImm( dst, src) ); 8476 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8477 ins_pipe( ialu_reg ); 8478%} 8479 8480// Or Register with Memory 8481instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8482 match(Set dst (OrI dst (LoadI src))); 8483 effect(KILL cr); 8484 8485 ins_cost(125); 8486 format %{ "OR $dst,$src" %} 8487 opcode(0x0B); 8488 ins_encode( OpcP, RegMem( dst, src) ); 8489 ins_pipe( ialu_reg_mem ); 8490%} 8491 8492// Or Memory with Register 8493instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8494 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8495 effect(KILL cr); 8496 8497 ins_cost(150); 8498 format %{ "OR $dst,$src" %} 8499 opcode(0x09); /* Opcode 09 /r */ 8500 ins_encode( OpcP, RegMem( src, dst ) ); 8501 ins_pipe( ialu_mem_reg ); 8502%} 8503 8504// Or Memory with Immediate 8505instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8506 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8507 effect(KILL cr); 8508 8509 ins_cost(125); 8510 format %{ "OR $dst,$src" %} 8511 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8512 // ins_encode( MemImm( dst, src) ); 8513 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8514 ins_pipe( ialu_mem_imm ); 8515%} 8516 8517// ROL/ROR 8518// ROL expand 8519instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8520 effect(USE_DEF dst, USE shift, KILL cr); 8521 8522 format %{ "ROL $dst, $shift" %} 8523 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8524 ins_encode( OpcP, RegOpc( dst )); 8525 ins_pipe( ialu_reg ); 8526%} 8527 8528instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8529 effect(USE_DEF dst, USE shift, KILL cr); 8530 8531 format %{ "ROL $dst, $shift" %} 8532 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8533 ins_encode( RegOpcImm(dst, shift) ); 8534 ins_pipe(ialu_reg); 8535%} 8536 8537instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8538 effect(USE_DEF dst, USE shift, KILL cr); 8539 8540 format %{ "ROL $dst, $shift" %} 8541 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8542 ins_encode(OpcP, RegOpc(dst)); 8543 ins_pipe( ialu_reg_reg ); 8544%} 8545// end of ROL expand 8546 8547// ROL 32bit by one once 8548instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8549 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8550 8551 expand %{ 8552 rolI_eReg_imm1(dst, lshift, cr); 8553 %} 8554%} 8555 8556// ROL 32bit var by imm8 once 8557instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8558 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8559 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8560 8561 expand %{ 8562 rolI_eReg_imm8(dst, lshift, cr); 8563 %} 8564%} 8565 8566// ROL 32bit var by var once 8567instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8568 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8569 8570 expand %{ 8571 rolI_eReg_CL(dst, shift, cr); 8572 %} 8573%} 8574 8575// ROL 32bit var by var once 8576instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8577 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8578 8579 expand %{ 8580 rolI_eReg_CL(dst, shift, cr); 8581 %} 8582%} 8583 8584// ROR expand 8585instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8586 effect(USE_DEF dst, USE shift, KILL cr); 8587 8588 format %{ "ROR $dst, $shift" %} 8589 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8590 ins_encode( OpcP, RegOpc( dst ) ); 8591 ins_pipe( ialu_reg ); 8592%} 8593 8594instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8595 effect (USE_DEF dst, USE shift, KILL cr); 8596 8597 format %{ "ROR $dst, $shift" %} 8598 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8599 ins_encode( RegOpcImm(dst, shift) ); 8600 ins_pipe( ialu_reg ); 8601%} 8602 8603instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8604 effect(USE_DEF dst, USE shift, KILL cr); 8605 8606 format %{ "ROR $dst, $shift" %} 8607 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8608 ins_encode(OpcP, RegOpc(dst)); 8609 ins_pipe( ialu_reg_reg ); 8610%} 8611// end of ROR expand 8612 8613// ROR right once 8614instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8615 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8616 8617 expand %{ 8618 rorI_eReg_imm1(dst, rshift, cr); 8619 %} 8620%} 8621 8622// ROR 32bit by immI8 once 8623instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8624 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8625 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8626 8627 expand %{ 8628 rorI_eReg_imm8(dst, rshift, cr); 8629 %} 8630%} 8631 8632// ROR 32bit var by var once 8633instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8634 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8635 8636 expand %{ 8637 rorI_eReg_CL(dst, shift, cr); 8638 %} 8639%} 8640 8641// ROR 32bit var by var once 8642instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8643 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8644 8645 expand %{ 8646 rorI_eReg_CL(dst, shift, cr); 8647 %} 8648%} 8649 8650// Xor Instructions 8651// Xor Register with Register 8652instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8653 match(Set dst (XorI dst src)); 8654 effect(KILL cr); 8655 8656 size(2); 8657 format %{ "XOR $dst,$src" %} 8658 opcode(0x33); 8659 ins_encode( OpcP, RegReg( dst, src) ); 8660 ins_pipe( ialu_reg_reg ); 8661%} 8662 8663// Xor Register with Immediate -1 8664instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8665 match(Set dst (XorI dst imm)); 8666 8667 size(2); 8668 format %{ "NOT $dst" %} 8669 ins_encode %{ 8670 __ notl($dst$$Register); 8671 %} 8672 ins_pipe( ialu_reg ); 8673%} 8674 8675// Xor Register with Immediate 8676instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8677 match(Set dst (XorI dst src)); 8678 effect(KILL cr); 8679 8680 format %{ "XOR $dst,$src" %} 8681 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8682 // ins_encode( RegImm( dst, src) ); 8683 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8684 ins_pipe( ialu_reg ); 8685%} 8686 8687// Xor Register with Memory 8688instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8689 match(Set dst (XorI dst (LoadI src))); 8690 effect(KILL cr); 8691 8692 ins_cost(125); 8693 format %{ "XOR $dst,$src" %} 8694 opcode(0x33); 8695 ins_encode( OpcP, RegMem(dst, src) ); 8696 ins_pipe( ialu_reg_mem ); 8697%} 8698 8699// Xor Memory with Register 8700instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8701 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8702 effect(KILL cr); 8703 8704 ins_cost(150); 8705 format %{ "XOR $dst,$src" %} 8706 opcode(0x31); /* Opcode 31 /r */ 8707 ins_encode( OpcP, RegMem( src, dst ) ); 8708 ins_pipe( ialu_mem_reg ); 8709%} 8710 8711// Xor Memory with Immediate 8712instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8713 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8714 effect(KILL cr); 8715 8716 ins_cost(125); 8717 format %{ "XOR $dst,$src" %} 8718 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8719 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8720 ins_pipe( ialu_mem_imm ); 8721%} 8722 8723//----------Convert Int to Boolean--------------------------------------------- 8724 8725instruct movI_nocopy(rRegI dst, rRegI src) %{ 8726 effect( DEF dst, USE src ); 8727 format %{ "MOV $dst,$src" %} 8728 ins_encode( enc_Copy( dst, src) ); 8729 ins_pipe( ialu_reg_reg ); 8730%} 8731 8732instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8733 effect( USE_DEF dst, USE src, KILL cr ); 8734 8735 size(4); 8736 format %{ "NEG $dst\n\t" 8737 "ADC $dst,$src" %} 8738 ins_encode( neg_reg(dst), 8739 OpcRegReg(0x13,dst,src) ); 8740 ins_pipe( ialu_reg_reg_long ); 8741%} 8742 8743instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8744 match(Set dst (Conv2B src)); 8745 8746 expand %{ 8747 movI_nocopy(dst,src); 8748 ci2b(dst,src,cr); 8749 %} 8750%} 8751 8752instruct movP_nocopy(rRegI dst, eRegP src) %{ 8753 effect( DEF dst, USE src ); 8754 format %{ "MOV $dst,$src" %} 8755 ins_encode( enc_Copy( dst, src) ); 8756 ins_pipe( ialu_reg_reg ); 8757%} 8758 8759instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8760 effect( USE_DEF dst, USE src, KILL cr ); 8761 format %{ "NEG $dst\n\t" 8762 "ADC $dst,$src" %} 8763 ins_encode( neg_reg(dst), 8764 OpcRegReg(0x13,dst,src) ); 8765 ins_pipe( ialu_reg_reg_long ); 8766%} 8767 8768instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8769 match(Set dst (Conv2B src)); 8770 8771 expand %{ 8772 movP_nocopy(dst,src); 8773 cp2b(dst,src,cr); 8774 %} 8775%} 8776 8777instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8778 match(Set dst (CmpLTMask p q)); 8779 effect(KILL cr); 8780 ins_cost(400); 8781 8782 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8783 format %{ "XOR $dst,$dst\n\t" 8784 "CMP $p,$q\n\t" 8785 "SETlt $dst\n\t" 8786 "NEG $dst" %} 8787 ins_encode %{ 8788 Register Rp = $p$$Register; 8789 Register Rq = $q$$Register; 8790 Register Rd = $dst$$Register; 8791 Label done; 8792 __ xorl(Rd, Rd); 8793 __ cmpl(Rp, Rq); 8794 __ setb(Assembler::less, Rd); 8795 __ negl(Rd); 8796 %} 8797 8798 ins_pipe(pipe_slow); 8799%} 8800 8801instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8802 match(Set dst (CmpLTMask dst zero)); 8803 effect(DEF dst, KILL cr); 8804 ins_cost(100); 8805 8806 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8807 ins_encode %{ 8808 __ sarl($dst$$Register, 31); 8809 %} 8810 ins_pipe(ialu_reg); 8811%} 8812 8813/* better to save a register than avoid a branch */ 8814instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8815 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8816 effect(KILL cr); 8817 ins_cost(400); 8818 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8819 "JGE done\n\t" 8820 "ADD $p,$y\n" 8821 "done: " %} 8822 ins_encode %{ 8823 Register Rp = $p$$Register; 8824 Register Rq = $q$$Register; 8825 Register Ry = $y$$Register; 8826 Label done; 8827 __ subl(Rp, Rq); 8828 __ jccb(Assembler::greaterEqual, done); 8829 __ addl(Rp, Ry); 8830 __ bind(done); 8831 %} 8832 8833 ins_pipe(pipe_cmplt); 8834%} 8835 8836/* better to save a register than avoid a branch */ 8837instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8838 match(Set y (AndI (CmpLTMask p q) y)); 8839 effect(KILL cr); 8840 8841 ins_cost(300); 8842 8843 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8844 "JLT done\n\t" 8845 "XORL $y, $y\n" 8846 "done: " %} 8847 ins_encode %{ 8848 Register Rp = $p$$Register; 8849 Register Rq = $q$$Register; 8850 Register Ry = $y$$Register; 8851 Label done; 8852 __ cmpl(Rp, Rq); 8853 __ jccb(Assembler::less, done); 8854 __ xorl(Ry, Ry); 8855 __ bind(done); 8856 %} 8857 8858 ins_pipe(pipe_cmplt); 8859%} 8860 8861/* If I enable this, I encourage spilling in the inner loop of compress. 8862instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8863 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8864*/ 8865//----------Overflow Math Instructions----------------------------------------- 8866 8867instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8868%{ 8869 match(Set cr (OverflowAddI op1 op2)); 8870 effect(DEF cr, USE_KILL op1, USE op2); 8871 8872 format %{ "ADD $op1, $op2\t# overflow check int" %} 8873 8874 ins_encode %{ 8875 __ addl($op1$$Register, $op2$$Register); 8876 %} 8877 ins_pipe(ialu_reg_reg); 8878%} 8879 8880instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8881%{ 8882 match(Set cr (OverflowAddI op1 op2)); 8883 effect(DEF cr, USE_KILL op1, USE op2); 8884 8885 format %{ "ADD $op1, $op2\t# overflow check int" %} 8886 8887 ins_encode %{ 8888 __ addl($op1$$Register, $op2$$constant); 8889 %} 8890 ins_pipe(ialu_reg_reg); 8891%} 8892 8893instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8894%{ 8895 match(Set cr (OverflowSubI op1 op2)); 8896 8897 format %{ "CMP $op1, $op2\t# overflow check int" %} 8898 ins_encode %{ 8899 __ cmpl($op1$$Register, $op2$$Register); 8900 %} 8901 ins_pipe(ialu_reg_reg); 8902%} 8903 8904instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8905%{ 8906 match(Set cr (OverflowSubI op1 op2)); 8907 8908 format %{ "CMP $op1, $op2\t# overflow check int" %} 8909 ins_encode %{ 8910 __ cmpl($op1$$Register, $op2$$constant); 8911 %} 8912 ins_pipe(ialu_reg_reg); 8913%} 8914 8915instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8916%{ 8917 match(Set cr (OverflowSubI zero op2)); 8918 effect(DEF cr, USE_KILL op2); 8919 8920 format %{ "NEG $op2\t# overflow check int" %} 8921 ins_encode %{ 8922 __ negl($op2$$Register); 8923 %} 8924 ins_pipe(ialu_reg_reg); 8925%} 8926 8927instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8928%{ 8929 match(Set cr (OverflowMulI op1 op2)); 8930 effect(DEF cr, USE_KILL op1, USE op2); 8931 8932 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8933 ins_encode %{ 8934 __ imull($op1$$Register, $op2$$Register); 8935 %} 8936 ins_pipe(ialu_reg_reg_alu0); 8937%} 8938 8939instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8940%{ 8941 match(Set cr (OverflowMulI op1 op2)); 8942 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8943 8944 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8945 ins_encode %{ 8946 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8947 %} 8948 ins_pipe(ialu_reg_reg_alu0); 8949%} 8950 8951// Integer Absolute Instructions 8952instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8953%{ 8954 match(Set dst (AbsI src)); 8955 effect(TEMP dst, TEMP tmp, KILL cr); 8956 format %{ "movl $tmp, $src\n\t" 8957 "sarl $tmp, 31\n\t" 8958 "movl $dst, $src\n\t" 8959 "xorl $dst, $tmp\n\t" 8960 "subl $dst, $tmp\n" 8961 %} 8962 ins_encode %{ 8963 __ movl($tmp$$Register, $src$$Register); 8964 __ sarl($tmp$$Register, 31); 8965 __ movl($dst$$Register, $src$$Register); 8966 __ xorl($dst$$Register, $tmp$$Register); 8967 __ subl($dst$$Register, $tmp$$Register); 8968 %} 8969 8970 ins_pipe(ialu_reg_reg); 8971%} 8972 8973//----------Long Instructions------------------------------------------------ 8974// Add Long Register with Register 8975instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8976 match(Set dst (AddL dst src)); 8977 effect(KILL cr); 8978 ins_cost(200); 8979 format %{ "ADD $dst.lo,$src.lo\n\t" 8980 "ADC $dst.hi,$src.hi" %} 8981 opcode(0x03, 0x13); 8982 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8983 ins_pipe( ialu_reg_reg_long ); 8984%} 8985 8986// Add Long Register with Immediate 8987instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8988 match(Set dst (AddL dst src)); 8989 effect(KILL cr); 8990 format %{ "ADD $dst.lo,$src.lo\n\t" 8991 "ADC $dst.hi,$src.hi" %} 8992 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8993 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8994 ins_pipe( ialu_reg_long ); 8995%} 8996 8997// Add Long Register with Memory 8998instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8999 match(Set dst (AddL dst (LoadL mem))); 9000 effect(KILL cr); 9001 ins_cost(125); 9002 format %{ "ADD $dst.lo,$mem\n\t" 9003 "ADC $dst.hi,$mem+4" %} 9004 opcode(0x03, 0x13); 9005 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9006 ins_pipe( ialu_reg_long_mem ); 9007%} 9008 9009// Subtract Long Register with Register. 9010instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9011 match(Set dst (SubL dst src)); 9012 effect(KILL cr); 9013 ins_cost(200); 9014 format %{ "SUB $dst.lo,$src.lo\n\t" 9015 "SBB $dst.hi,$src.hi" %} 9016 opcode(0x2B, 0x1B); 9017 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9018 ins_pipe( ialu_reg_reg_long ); 9019%} 9020 9021// Subtract Long Register with Immediate 9022instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9023 match(Set dst (SubL dst src)); 9024 effect(KILL cr); 9025 format %{ "SUB $dst.lo,$src.lo\n\t" 9026 "SBB $dst.hi,$src.hi" %} 9027 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9028 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9029 ins_pipe( ialu_reg_long ); 9030%} 9031 9032// Subtract Long Register with Memory 9033instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9034 match(Set dst (SubL dst (LoadL mem))); 9035 effect(KILL cr); 9036 ins_cost(125); 9037 format %{ "SUB $dst.lo,$mem\n\t" 9038 "SBB $dst.hi,$mem+4" %} 9039 opcode(0x2B, 0x1B); 9040 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9041 ins_pipe( ialu_reg_long_mem ); 9042%} 9043 9044instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9045 match(Set dst (SubL zero dst)); 9046 effect(KILL cr); 9047 ins_cost(300); 9048 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9049 ins_encode( neg_long(dst) ); 9050 ins_pipe( ialu_reg_reg_long ); 9051%} 9052 9053// And Long Register with Register 9054instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9055 match(Set dst (AndL dst src)); 9056 effect(KILL cr); 9057 format %{ "AND $dst.lo,$src.lo\n\t" 9058 "AND $dst.hi,$src.hi" %} 9059 opcode(0x23,0x23); 9060 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9061 ins_pipe( ialu_reg_reg_long ); 9062%} 9063 9064// And Long Register with Immediate 9065instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9066 match(Set dst (AndL dst src)); 9067 effect(KILL cr); 9068 format %{ "AND $dst.lo,$src.lo\n\t" 9069 "AND $dst.hi,$src.hi" %} 9070 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9071 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9072 ins_pipe( ialu_reg_long ); 9073%} 9074 9075// And Long Register with Memory 9076instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9077 match(Set dst (AndL dst (LoadL mem))); 9078 effect(KILL cr); 9079 ins_cost(125); 9080 format %{ "AND $dst.lo,$mem\n\t" 9081 "AND $dst.hi,$mem+4" %} 9082 opcode(0x23, 0x23); 9083 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9084 ins_pipe( ialu_reg_long_mem ); 9085%} 9086 9087// BMI1 instructions 9088instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9089 match(Set dst (AndL (XorL src1 minus_1) src2)); 9090 predicate(UseBMI1Instructions); 9091 effect(KILL cr, TEMP dst); 9092 9093 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9094 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9095 %} 9096 9097 ins_encode %{ 9098 Register Rdst = $dst$$Register; 9099 Register Rsrc1 = $src1$$Register; 9100 Register Rsrc2 = $src2$$Register; 9101 __ andnl(Rdst, Rsrc1, Rsrc2); 9102 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9103 %} 9104 ins_pipe(ialu_reg_reg_long); 9105%} 9106 9107instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9108 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9109 predicate(UseBMI1Instructions); 9110 effect(KILL cr, TEMP dst); 9111 9112 ins_cost(125); 9113 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9114 "ANDNL $dst.hi, $src1.hi, $src2+4" 9115 %} 9116 9117 ins_encode %{ 9118 Register Rdst = $dst$$Register; 9119 Register Rsrc1 = $src1$$Register; 9120 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9121 9122 __ andnl(Rdst, Rsrc1, $src2$$Address); 9123 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9124 %} 9125 ins_pipe(ialu_reg_mem); 9126%} 9127 9128instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9129 match(Set dst (AndL (SubL imm_zero src) src)); 9130 predicate(UseBMI1Instructions); 9131 effect(KILL cr, TEMP dst); 9132 9133 format %{ "MOVL $dst.hi, 0\n\t" 9134 "BLSIL $dst.lo, $src.lo\n\t" 9135 "JNZ done\n\t" 9136 "BLSIL $dst.hi, $src.hi\n" 9137 "done:" 9138 %} 9139 9140 ins_encode %{ 9141 Label done; 9142 Register Rdst = $dst$$Register; 9143 Register Rsrc = $src$$Register; 9144 __ movl(HIGH_FROM_LOW(Rdst), 0); 9145 __ blsil(Rdst, Rsrc); 9146 __ jccb(Assembler::notZero, done); 9147 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9148 __ bind(done); 9149 %} 9150 ins_pipe(ialu_reg); 9151%} 9152 9153instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9154 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9155 predicate(UseBMI1Instructions); 9156 effect(KILL cr, TEMP dst); 9157 9158 ins_cost(125); 9159 format %{ "MOVL $dst.hi, 0\n\t" 9160 "BLSIL $dst.lo, $src\n\t" 9161 "JNZ done\n\t" 9162 "BLSIL $dst.hi, $src+4\n" 9163 "done:" 9164 %} 9165 9166 ins_encode %{ 9167 Label done; 9168 Register Rdst = $dst$$Register; 9169 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9170 9171 __ movl(HIGH_FROM_LOW(Rdst), 0); 9172 __ blsil(Rdst, $src$$Address); 9173 __ jccb(Assembler::notZero, done); 9174 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9175 __ bind(done); 9176 %} 9177 ins_pipe(ialu_reg_mem); 9178%} 9179 9180instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9181%{ 9182 match(Set dst (XorL (AddL src minus_1) src)); 9183 predicate(UseBMI1Instructions); 9184 effect(KILL cr, TEMP dst); 9185 9186 format %{ "MOVL $dst.hi, 0\n\t" 9187 "BLSMSKL $dst.lo, $src.lo\n\t" 9188 "JNC done\n\t" 9189 "BLSMSKL $dst.hi, $src.hi\n" 9190 "done:" 9191 %} 9192 9193 ins_encode %{ 9194 Label done; 9195 Register Rdst = $dst$$Register; 9196 Register Rsrc = $src$$Register; 9197 __ movl(HIGH_FROM_LOW(Rdst), 0); 9198 __ blsmskl(Rdst, Rsrc); 9199 __ jccb(Assembler::carryClear, done); 9200 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9201 __ bind(done); 9202 %} 9203 9204 ins_pipe(ialu_reg); 9205%} 9206 9207instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9208%{ 9209 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9210 predicate(UseBMI1Instructions); 9211 effect(KILL cr, TEMP dst); 9212 9213 ins_cost(125); 9214 format %{ "MOVL $dst.hi, 0\n\t" 9215 "BLSMSKL $dst.lo, $src\n\t" 9216 "JNC done\n\t" 9217 "BLSMSKL $dst.hi, $src+4\n" 9218 "done:" 9219 %} 9220 9221 ins_encode %{ 9222 Label done; 9223 Register Rdst = $dst$$Register; 9224 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9225 9226 __ movl(HIGH_FROM_LOW(Rdst), 0); 9227 __ blsmskl(Rdst, $src$$Address); 9228 __ jccb(Assembler::carryClear, done); 9229 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9230 __ bind(done); 9231 %} 9232 9233 ins_pipe(ialu_reg_mem); 9234%} 9235 9236instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9237%{ 9238 match(Set dst (AndL (AddL src minus_1) src) ); 9239 predicate(UseBMI1Instructions); 9240 effect(KILL cr, TEMP dst); 9241 9242 format %{ "MOVL $dst.hi, $src.hi\n\t" 9243 "BLSRL $dst.lo, $src.lo\n\t" 9244 "JNC done\n\t" 9245 "BLSRL $dst.hi, $src.hi\n" 9246 "done:" 9247 %} 9248 9249 ins_encode %{ 9250 Label done; 9251 Register Rdst = $dst$$Register; 9252 Register Rsrc = $src$$Register; 9253 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9254 __ blsrl(Rdst, Rsrc); 9255 __ jccb(Assembler::carryClear, done); 9256 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9257 __ bind(done); 9258 %} 9259 9260 ins_pipe(ialu_reg); 9261%} 9262 9263instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9264%{ 9265 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9266 predicate(UseBMI1Instructions); 9267 effect(KILL cr, TEMP dst); 9268 9269 ins_cost(125); 9270 format %{ "MOVL $dst.hi, $src+4\n\t" 9271 "BLSRL $dst.lo, $src\n\t" 9272 "JNC done\n\t" 9273 "BLSRL $dst.hi, $src+4\n" 9274 "done:" 9275 %} 9276 9277 ins_encode %{ 9278 Label done; 9279 Register Rdst = $dst$$Register; 9280 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9281 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9282 __ blsrl(Rdst, $src$$Address); 9283 __ jccb(Assembler::carryClear, done); 9284 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9285 __ bind(done); 9286 %} 9287 9288 ins_pipe(ialu_reg_mem); 9289%} 9290 9291// Or Long Register with Register 9292instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9293 match(Set dst (OrL dst src)); 9294 effect(KILL cr); 9295 format %{ "OR $dst.lo,$src.lo\n\t" 9296 "OR $dst.hi,$src.hi" %} 9297 opcode(0x0B,0x0B); 9298 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9299 ins_pipe( ialu_reg_reg_long ); 9300%} 9301 9302// Or Long Register with Immediate 9303instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9304 match(Set dst (OrL dst src)); 9305 effect(KILL cr); 9306 format %{ "OR $dst.lo,$src.lo\n\t" 9307 "OR $dst.hi,$src.hi" %} 9308 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9309 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9310 ins_pipe( ialu_reg_long ); 9311%} 9312 9313// Or Long Register with Memory 9314instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9315 match(Set dst (OrL dst (LoadL mem))); 9316 effect(KILL cr); 9317 ins_cost(125); 9318 format %{ "OR $dst.lo,$mem\n\t" 9319 "OR $dst.hi,$mem+4" %} 9320 opcode(0x0B,0x0B); 9321 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9322 ins_pipe( ialu_reg_long_mem ); 9323%} 9324 9325// Xor Long Register with Register 9326instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9327 match(Set dst (XorL dst src)); 9328 effect(KILL cr); 9329 format %{ "XOR $dst.lo,$src.lo\n\t" 9330 "XOR $dst.hi,$src.hi" %} 9331 opcode(0x33,0x33); 9332 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9333 ins_pipe( ialu_reg_reg_long ); 9334%} 9335 9336// Xor Long Register with Immediate -1 9337instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9338 match(Set dst (XorL dst imm)); 9339 format %{ "NOT $dst.lo\n\t" 9340 "NOT $dst.hi" %} 9341 ins_encode %{ 9342 __ notl($dst$$Register); 9343 __ notl(HIGH_FROM_LOW($dst$$Register)); 9344 %} 9345 ins_pipe( ialu_reg_long ); 9346%} 9347 9348// Xor Long Register with Immediate 9349instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9350 match(Set dst (XorL dst src)); 9351 effect(KILL cr); 9352 format %{ "XOR $dst.lo,$src.lo\n\t" 9353 "XOR $dst.hi,$src.hi" %} 9354 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9355 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9356 ins_pipe( ialu_reg_long ); 9357%} 9358 9359// Xor Long Register with Memory 9360instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9361 match(Set dst (XorL dst (LoadL mem))); 9362 effect(KILL cr); 9363 ins_cost(125); 9364 format %{ "XOR $dst.lo,$mem\n\t" 9365 "XOR $dst.hi,$mem+4" %} 9366 opcode(0x33,0x33); 9367 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9368 ins_pipe( ialu_reg_long_mem ); 9369%} 9370 9371// Shift Left Long by 1 9372instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9373 predicate(UseNewLongLShift); 9374 match(Set dst (LShiftL dst cnt)); 9375 effect(KILL cr); 9376 ins_cost(100); 9377 format %{ "ADD $dst.lo,$dst.lo\n\t" 9378 "ADC $dst.hi,$dst.hi" %} 9379 ins_encode %{ 9380 __ addl($dst$$Register,$dst$$Register); 9381 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9382 %} 9383 ins_pipe( ialu_reg_long ); 9384%} 9385 9386// Shift Left Long by 2 9387instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9388 predicate(UseNewLongLShift); 9389 match(Set dst (LShiftL dst cnt)); 9390 effect(KILL cr); 9391 ins_cost(100); 9392 format %{ "ADD $dst.lo,$dst.lo\n\t" 9393 "ADC $dst.hi,$dst.hi\n\t" 9394 "ADD $dst.lo,$dst.lo\n\t" 9395 "ADC $dst.hi,$dst.hi" %} 9396 ins_encode %{ 9397 __ addl($dst$$Register,$dst$$Register); 9398 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9399 __ addl($dst$$Register,$dst$$Register); 9400 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9401 %} 9402 ins_pipe( ialu_reg_long ); 9403%} 9404 9405// Shift Left Long by 3 9406instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9407 predicate(UseNewLongLShift); 9408 match(Set dst (LShiftL dst cnt)); 9409 effect(KILL cr); 9410 ins_cost(100); 9411 format %{ "ADD $dst.lo,$dst.lo\n\t" 9412 "ADC $dst.hi,$dst.hi\n\t" 9413 "ADD $dst.lo,$dst.lo\n\t" 9414 "ADC $dst.hi,$dst.hi\n\t" 9415 "ADD $dst.lo,$dst.lo\n\t" 9416 "ADC $dst.hi,$dst.hi" %} 9417 ins_encode %{ 9418 __ addl($dst$$Register,$dst$$Register); 9419 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9420 __ addl($dst$$Register,$dst$$Register); 9421 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9422 __ addl($dst$$Register,$dst$$Register); 9423 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9424 %} 9425 ins_pipe( ialu_reg_long ); 9426%} 9427 9428// Shift Left Long by 1-31 9429instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9430 match(Set dst (LShiftL dst cnt)); 9431 effect(KILL cr); 9432 ins_cost(200); 9433 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9434 "SHL $dst.lo,$cnt" %} 9435 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9436 ins_encode( move_long_small_shift(dst,cnt) ); 9437 ins_pipe( ialu_reg_long ); 9438%} 9439 9440// Shift Left Long by 32-63 9441instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9442 match(Set dst (LShiftL dst cnt)); 9443 effect(KILL cr); 9444 ins_cost(300); 9445 format %{ "MOV $dst.hi,$dst.lo\n" 9446 "\tSHL $dst.hi,$cnt-32\n" 9447 "\tXOR $dst.lo,$dst.lo" %} 9448 opcode(0xC1, 0x4); /* C1 /4 ib */ 9449 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9450 ins_pipe( ialu_reg_long ); 9451%} 9452 9453// Shift Left Long by variable 9454instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9455 match(Set dst (LShiftL dst shift)); 9456 effect(KILL cr); 9457 ins_cost(500+200); 9458 size(17); 9459 format %{ "TEST $shift,32\n\t" 9460 "JEQ,s small\n\t" 9461 "MOV $dst.hi,$dst.lo\n\t" 9462 "XOR $dst.lo,$dst.lo\n" 9463 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9464 "SHL $dst.lo,$shift" %} 9465 ins_encode( shift_left_long( dst, shift ) ); 9466 ins_pipe( pipe_slow ); 9467%} 9468 9469// Shift Right Long by 1-31 9470instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9471 match(Set dst (URShiftL dst cnt)); 9472 effect(KILL cr); 9473 ins_cost(200); 9474 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9475 "SHR $dst.hi,$cnt" %} 9476 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9477 ins_encode( move_long_small_shift(dst,cnt) ); 9478 ins_pipe( ialu_reg_long ); 9479%} 9480 9481// Shift Right Long by 32-63 9482instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9483 match(Set dst (URShiftL dst cnt)); 9484 effect(KILL cr); 9485 ins_cost(300); 9486 format %{ "MOV $dst.lo,$dst.hi\n" 9487 "\tSHR $dst.lo,$cnt-32\n" 9488 "\tXOR $dst.hi,$dst.hi" %} 9489 opcode(0xC1, 0x5); /* C1 /5 ib */ 9490 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9491 ins_pipe( ialu_reg_long ); 9492%} 9493 9494// Shift Right Long by variable 9495instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9496 match(Set dst (URShiftL dst shift)); 9497 effect(KILL cr); 9498 ins_cost(600); 9499 size(17); 9500 format %{ "TEST $shift,32\n\t" 9501 "JEQ,s small\n\t" 9502 "MOV $dst.lo,$dst.hi\n\t" 9503 "XOR $dst.hi,$dst.hi\n" 9504 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9505 "SHR $dst.hi,$shift" %} 9506 ins_encode( shift_right_long( dst, shift ) ); 9507 ins_pipe( pipe_slow ); 9508%} 9509 9510// Shift Right Long by 1-31 9511instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9512 match(Set dst (RShiftL dst cnt)); 9513 effect(KILL cr); 9514 ins_cost(200); 9515 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9516 "SAR $dst.hi,$cnt" %} 9517 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9518 ins_encode( move_long_small_shift(dst,cnt) ); 9519 ins_pipe( ialu_reg_long ); 9520%} 9521 9522// Shift Right Long by 32-63 9523instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9524 match(Set dst (RShiftL dst cnt)); 9525 effect(KILL cr); 9526 ins_cost(300); 9527 format %{ "MOV $dst.lo,$dst.hi\n" 9528 "\tSAR $dst.lo,$cnt-32\n" 9529 "\tSAR $dst.hi,31" %} 9530 opcode(0xC1, 0x7); /* C1 /7 ib */ 9531 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9532 ins_pipe( ialu_reg_long ); 9533%} 9534 9535// Shift Right arithmetic Long by variable 9536instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9537 match(Set dst (RShiftL dst shift)); 9538 effect(KILL cr); 9539 ins_cost(600); 9540 size(18); 9541 format %{ "TEST $shift,32\n\t" 9542 "JEQ,s small\n\t" 9543 "MOV $dst.lo,$dst.hi\n\t" 9544 "SAR $dst.hi,31\n" 9545 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9546 "SAR $dst.hi,$shift" %} 9547 ins_encode( shift_right_arith_long( dst, shift ) ); 9548 ins_pipe( pipe_slow ); 9549%} 9550 9551 9552//----------Double Instructions------------------------------------------------ 9553// Double Math 9554 9555// Compare & branch 9556 9557// P6 version of float compare, sets condition codes in EFLAGS 9558instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9559 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9560 match(Set cr (CmpD src1 src2)); 9561 effect(KILL rax); 9562 ins_cost(150); 9563 format %{ "FLD $src1\n\t" 9564 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9565 "JNP exit\n\t" 9566 "MOV ah,1 // saw a NaN, set CF\n\t" 9567 "SAHF\n" 9568 "exit:\tNOP // avoid branch to branch" %} 9569 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9570 ins_encode( Push_Reg_DPR(src1), 9571 OpcP, RegOpc(src2), 9572 cmpF_P6_fixup ); 9573 ins_pipe( pipe_slow ); 9574%} 9575 9576instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9577 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9578 match(Set cr (CmpD src1 src2)); 9579 ins_cost(150); 9580 format %{ "FLD $src1\n\t" 9581 "FUCOMIP ST,$src2 // P6 instruction" %} 9582 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9583 ins_encode( Push_Reg_DPR(src1), 9584 OpcP, RegOpc(src2)); 9585 ins_pipe( pipe_slow ); 9586%} 9587 9588// Compare & branch 9589instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9590 predicate(UseSSE<=1); 9591 match(Set cr (CmpD src1 src2)); 9592 effect(KILL rax); 9593 ins_cost(200); 9594 format %{ "FLD $src1\n\t" 9595 "FCOMp $src2\n\t" 9596 "FNSTSW AX\n\t" 9597 "TEST AX,0x400\n\t" 9598 "JZ,s flags\n\t" 9599 "MOV AH,1\t# unordered treat as LT\n" 9600 "flags:\tSAHF" %} 9601 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9602 ins_encode( Push_Reg_DPR(src1), 9603 OpcP, RegOpc(src2), 9604 fpu_flags); 9605 ins_pipe( pipe_slow ); 9606%} 9607 9608// Compare vs zero into -1,0,1 9609instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9610 predicate(UseSSE<=1); 9611 match(Set dst (CmpD3 src1 zero)); 9612 effect(KILL cr, KILL rax); 9613 ins_cost(280); 9614 format %{ "FTSTD $dst,$src1" %} 9615 opcode(0xE4, 0xD9); 9616 ins_encode( Push_Reg_DPR(src1), 9617 OpcS, OpcP, PopFPU, 9618 CmpF_Result(dst)); 9619 ins_pipe( pipe_slow ); 9620%} 9621 9622// Compare into -1,0,1 9623instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9624 predicate(UseSSE<=1); 9625 match(Set dst (CmpD3 src1 src2)); 9626 effect(KILL cr, KILL rax); 9627 ins_cost(300); 9628 format %{ "FCMPD $dst,$src1,$src2" %} 9629 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9630 ins_encode( Push_Reg_DPR(src1), 9631 OpcP, RegOpc(src2), 9632 CmpF_Result(dst)); 9633 ins_pipe( pipe_slow ); 9634%} 9635 9636// float compare and set condition codes in EFLAGS by XMM regs 9637instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9638 predicate(UseSSE>=2); 9639 match(Set cr (CmpD src1 src2)); 9640 ins_cost(145); 9641 format %{ "UCOMISD $src1,$src2\n\t" 9642 "JNP,s exit\n\t" 9643 "PUSHF\t# saw NaN, set CF\n\t" 9644 "AND [rsp], #0xffffff2b\n\t" 9645 "POPF\n" 9646 "exit:" %} 9647 ins_encode %{ 9648 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9649 emit_cmpfp_fixup(_masm); 9650 %} 9651 ins_pipe( pipe_slow ); 9652%} 9653 9654instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9655 predicate(UseSSE>=2); 9656 match(Set cr (CmpD src1 src2)); 9657 ins_cost(100); 9658 format %{ "UCOMISD $src1,$src2" %} 9659 ins_encode %{ 9660 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9661 %} 9662 ins_pipe( pipe_slow ); 9663%} 9664 9665// float compare and set condition codes in EFLAGS by XMM regs 9666instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9667 predicate(UseSSE>=2); 9668 match(Set cr (CmpD src1 (LoadD src2))); 9669 ins_cost(145); 9670 format %{ "UCOMISD $src1,$src2\n\t" 9671 "JNP,s exit\n\t" 9672 "PUSHF\t# saw NaN, set CF\n\t" 9673 "AND [rsp], #0xffffff2b\n\t" 9674 "POPF\n" 9675 "exit:" %} 9676 ins_encode %{ 9677 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9678 emit_cmpfp_fixup(_masm); 9679 %} 9680 ins_pipe( pipe_slow ); 9681%} 9682 9683instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9684 predicate(UseSSE>=2); 9685 match(Set cr (CmpD src1 (LoadD src2))); 9686 ins_cost(100); 9687 format %{ "UCOMISD $src1,$src2" %} 9688 ins_encode %{ 9689 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9690 %} 9691 ins_pipe( pipe_slow ); 9692%} 9693 9694// Compare into -1,0,1 in XMM 9695instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9696 predicate(UseSSE>=2); 9697 match(Set dst (CmpD3 src1 src2)); 9698 effect(KILL cr); 9699 ins_cost(255); 9700 format %{ "UCOMISD $src1, $src2\n\t" 9701 "MOV $dst, #-1\n\t" 9702 "JP,s done\n\t" 9703 "JB,s done\n\t" 9704 "SETNE $dst\n\t" 9705 "MOVZB $dst, $dst\n" 9706 "done:" %} 9707 ins_encode %{ 9708 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9709 emit_cmpfp3(_masm, $dst$$Register); 9710 %} 9711 ins_pipe( pipe_slow ); 9712%} 9713 9714// Compare into -1,0,1 in XMM and memory 9715instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9716 predicate(UseSSE>=2); 9717 match(Set dst (CmpD3 src1 (LoadD src2))); 9718 effect(KILL cr); 9719 ins_cost(275); 9720 format %{ "UCOMISD $src1, $src2\n\t" 9721 "MOV $dst, #-1\n\t" 9722 "JP,s done\n\t" 9723 "JB,s done\n\t" 9724 "SETNE $dst\n\t" 9725 "MOVZB $dst, $dst\n" 9726 "done:" %} 9727 ins_encode %{ 9728 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9729 emit_cmpfp3(_masm, $dst$$Register); 9730 %} 9731 ins_pipe( pipe_slow ); 9732%} 9733 9734 9735instruct subDPR_reg(regDPR dst, regDPR src) %{ 9736 predicate (UseSSE <=1); 9737 match(Set dst (SubD dst src)); 9738 9739 format %{ "FLD $src\n\t" 9740 "DSUBp $dst,ST" %} 9741 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9742 ins_cost(150); 9743 ins_encode( Push_Reg_DPR(src), 9744 OpcP, RegOpc(dst) ); 9745 ins_pipe( fpu_reg_reg ); 9746%} 9747 9748instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9749 predicate (UseSSE <=1); 9750 match(Set dst (RoundDouble (SubD src1 src2))); 9751 ins_cost(250); 9752 9753 format %{ "FLD $src2\n\t" 9754 "DSUB ST,$src1\n\t" 9755 "FSTP_D $dst\t# D-round" %} 9756 opcode(0xD8, 0x5); 9757 ins_encode( Push_Reg_DPR(src2), 9758 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9759 ins_pipe( fpu_mem_reg_reg ); 9760%} 9761 9762 9763instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9764 predicate (UseSSE <=1); 9765 match(Set dst (SubD dst (LoadD src))); 9766 ins_cost(150); 9767 9768 format %{ "FLD $src\n\t" 9769 "DSUBp $dst,ST" %} 9770 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9771 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9772 OpcP, RegOpc(dst) ); 9773 ins_pipe( fpu_reg_mem ); 9774%} 9775 9776instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9777 predicate (UseSSE<=1); 9778 match(Set dst (AbsD src)); 9779 ins_cost(100); 9780 format %{ "FABS" %} 9781 opcode(0xE1, 0xD9); 9782 ins_encode( OpcS, OpcP ); 9783 ins_pipe( fpu_reg_reg ); 9784%} 9785 9786instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9787 predicate(UseSSE<=1); 9788 match(Set dst (NegD src)); 9789 ins_cost(100); 9790 format %{ "FCHS" %} 9791 opcode(0xE0, 0xD9); 9792 ins_encode( OpcS, OpcP ); 9793 ins_pipe( fpu_reg_reg ); 9794%} 9795 9796instruct addDPR_reg(regDPR dst, regDPR src) %{ 9797 predicate(UseSSE<=1); 9798 match(Set dst (AddD dst src)); 9799 format %{ "FLD $src\n\t" 9800 "DADD $dst,ST" %} 9801 size(4); 9802 ins_cost(150); 9803 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9804 ins_encode( Push_Reg_DPR(src), 9805 OpcP, RegOpc(dst) ); 9806 ins_pipe( fpu_reg_reg ); 9807%} 9808 9809 9810instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9811 predicate(UseSSE<=1); 9812 match(Set dst (RoundDouble (AddD src1 src2))); 9813 ins_cost(250); 9814 9815 format %{ "FLD $src2\n\t" 9816 "DADD ST,$src1\n\t" 9817 "FSTP_D $dst\t# D-round" %} 9818 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9819 ins_encode( Push_Reg_DPR(src2), 9820 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9821 ins_pipe( fpu_mem_reg_reg ); 9822%} 9823 9824 9825instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9826 predicate(UseSSE<=1); 9827 match(Set dst (AddD dst (LoadD src))); 9828 ins_cost(150); 9829 9830 format %{ "FLD $src\n\t" 9831 "DADDp $dst,ST" %} 9832 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9833 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9834 OpcP, RegOpc(dst) ); 9835 ins_pipe( fpu_reg_mem ); 9836%} 9837 9838// add-to-memory 9839instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9840 predicate(UseSSE<=1); 9841 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9842 ins_cost(150); 9843 9844 format %{ "FLD_D $dst\n\t" 9845 "DADD ST,$src\n\t" 9846 "FST_D $dst" %} 9847 opcode(0xDD, 0x0); 9848 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9849 Opcode(0xD8), RegOpc(src), 9850 set_instruction_start, 9851 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9852 ins_pipe( fpu_reg_mem ); 9853%} 9854 9855instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9856 predicate(UseSSE<=1); 9857 match(Set dst (AddD dst con)); 9858 ins_cost(125); 9859 format %{ "FLD1\n\t" 9860 "DADDp $dst,ST" %} 9861 ins_encode %{ 9862 __ fld1(); 9863 __ faddp($dst$$reg); 9864 %} 9865 ins_pipe(fpu_reg); 9866%} 9867 9868instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9869 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9870 match(Set dst (AddD dst con)); 9871 ins_cost(200); 9872 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9873 "DADDp $dst,ST" %} 9874 ins_encode %{ 9875 __ fld_d($constantaddress($con)); 9876 __ faddp($dst$$reg); 9877 %} 9878 ins_pipe(fpu_reg_mem); 9879%} 9880 9881instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9882 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9883 match(Set dst (RoundDouble (AddD src con))); 9884 ins_cost(200); 9885 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9886 "DADD ST,$src\n\t" 9887 "FSTP_D $dst\t# D-round" %} 9888 ins_encode %{ 9889 __ fld_d($constantaddress($con)); 9890 __ fadd($src$$reg); 9891 __ fstp_d(Address(rsp, $dst$$disp)); 9892 %} 9893 ins_pipe(fpu_mem_reg_con); 9894%} 9895 9896instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9897 predicate(UseSSE<=1); 9898 match(Set dst (MulD dst src)); 9899 format %{ "FLD $src\n\t" 9900 "DMULp $dst,ST" %} 9901 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9902 ins_cost(150); 9903 ins_encode( Push_Reg_DPR(src), 9904 OpcP, RegOpc(dst) ); 9905 ins_pipe( fpu_reg_reg ); 9906%} 9907 9908// Strict FP instruction biases argument before multiply then 9909// biases result to avoid double rounding of subnormals. 9910// 9911// scale arg1 by multiplying arg1 by 2^(-15360) 9912// load arg2 9913// multiply scaled arg1 by arg2 9914// rescale product by 2^(15360) 9915// 9916instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9917 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9918 match(Set dst (MulD dst src)); 9919 ins_cost(1); // Select this instruction for all strict FP double multiplies 9920 9921 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9922 "DMULp $dst,ST\n\t" 9923 "FLD $src\n\t" 9924 "DMULp $dst,ST\n\t" 9925 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9926 "DMULp $dst,ST\n\t" %} 9927 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9928 ins_encode( strictfp_bias1(dst), 9929 Push_Reg_DPR(src), 9930 OpcP, RegOpc(dst), 9931 strictfp_bias2(dst) ); 9932 ins_pipe( fpu_reg_reg ); 9933%} 9934 9935instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9936 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9937 match(Set dst (MulD dst con)); 9938 ins_cost(200); 9939 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9940 "DMULp $dst,ST" %} 9941 ins_encode %{ 9942 __ fld_d($constantaddress($con)); 9943 __ fmulp($dst$$reg); 9944 %} 9945 ins_pipe(fpu_reg_mem); 9946%} 9947 9948 9949instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9950 predicate( UseSSE<=1 ); 9951 match(Set dst (MulD dst (LoadD src))); 9952 ins_cost(200); 9953 format %{ "FLD_D $src\n\t" 9954 "DMULp $dst,ST" %} 9955 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9956 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9957 OpcP, RegOpc(dst) ); 9958 ins_pipe( fpu_reg_mem ); 9959%} 9960 9961// 9962// Cisc-alternate to reg-reg multiply 9963instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9964 predicate( UseSSE<=1 ); 9965 match(Set dst (MulD src (LoadD mem))); 9966 ins_cost(250); 9967 format %{ "FLD_D $mem\n\t" 9968 "DMUL ST,$src\n\t" 9969 "FSTP_D $dst" %} 9970 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9971 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9972 OpcReg_FPR(src), 9973 Pop_Reg_DPR(dst) ); 9974 ins_pipe( fpu_reg_reg_mem ); 9975%} 9976 9977 9978// MACRO3 -- addDPR a mulDPR 9979// This instruction is a '2-address' instruction in that the result goes 9980// back to src2. This eliminates a move from the macro; possibly the 9981// register allocator will have to add it back (and maybe not). 9982instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9983 predicate( UseSSE<=1 ); 9984 match(Set src2 (AddD (MulD src0 src1) src2)); 9985 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9986 "DMUL ST,$src1\n\t" 9987 "DADDp $src2,ST" %} 9988 ins_cost(250); 9989 opcode(0xDD); /* LoadD DD /0 */ 9990 ins_encode( Push_Reg_FPR(src0), 9991 FMul_ST_reg(src1), 9992 FAddP_reg_ST(src2) ); 9993 ins_pipe( fpu_reg_reg_reg ); 9994%} 9995 9996 9997// MACRO3 -- subDPR a mulDPR 9998instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9999 predicate( UseSSE<=1 ); 10000 match(Set src2 (SubD (MulD src0 src1) src2)); 10001 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10002 "DMUL ST,$src1\n\t" 10003 "DSUBRp $src2,ST" %} 10004 ins_cost(250); 10005 ins_encode( Push_Reg_FPR(src0), 10006 FMul_ST_reg(src1), 10007 Opcode(0xDE), Opc_plus(0xE0,src2)); 10008 ins_pipe( fpu_reg_reg_reg ); 10009%} 10010 10011 10012instruct divDPR_reg(regDPR dst, regDPR src) %{ 10013 predicate( UseSSE<=1 ); 10014 match(Set dst (DivD dst src)); 10015 10016 format %{ "FLD $src\n\t" 10017 "FDIVp $dst,ST" %} 10018 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10019 ins_cost(150); 10020 ins_encode( Push_Reg_DPR(src), 10021 OpcP, RegOpc(dst) ); 10022 ins_pipe( fpu_reg_reg ); 10023%} 10024 10025// Strict FP instruction biases argument before division then 10026// biases result, to avoid double rounding of subnormals. 10027// 10028// scale dividend by multiplying dividend by 2^(-15360) 10029// load divisor 10030// divide scaled dividend by divisor 10031// rescale quotient by 2^(15360) 10032// 10033instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10034 predicate (UseSSE<=1); 10035 match(Set dst (DivD dst src)); 10036 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10037 ins_cost(01); 10038 10039 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10040 "DMULp $dst,ST\n\t" 10041 "FLD $src\n\t" 10042 "FDIVp $dst,ST\n\t" 10043 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10044 "DMULp $dst,ST\n\t" %} 10045 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10046 ins_encode( strictfp_bias1(dst), 10047 Push_Reg_DPR(src), 10048 OpcP, RegOpc(dst), 10049 strictfp_bias2(dst) ); 10050 ins_pipe( fpu_reg_reg ); 10051%} 10052 10053instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10054 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10055 match(Set dst (RoundDouble (DivD src1 src2))); 10056 10057 format %{ "FLD $src1\n\t" 10058 "FDIV ST,$src2\n\t" 10059 "FSTP_D $dst\t# D-round" %} 10060 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10061 ins_encode( Push_Reg_DPR(src1), 10062 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10063 ins_pipe( fpu_mem_reg_reg ); 10064%} 10065 10066 10067instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10068 predicate(UseSSE<=1); 10069 match(Set dst (ModD dst src)); 10070 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10071 10072 format %{ "DMOD $dst,$src" %} 10073 ins_cost(250); 10074 ins_encode(Push_Reg_Mod_DPR(dst, src), 10075 emitModDPR(), 10076 Push_Result_Mod_DPR(src), 10077 Pop_Reg_DPR(dst)); 10078 ins_pipe( pipe_slow ); 10079%} 10080 10081instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10082 predicate(UseSSE>=2); 10083 match(Set dst (ModD src0 src1)); 10084 effect(KILL rax, KILL cr); 10085 10086 format %{ "SUB ESP,8\t # DMOD\n" 10087 "\tMOVSD [ESP+0],$src1\n" 10088 "\tFLD_D [ESP+0]\n" 10089 "\tMOVSD [ESP+0],$src0\n" 10090 "\tFLD_D [ESP+0]\n" 10091 "loop:\tFPREM\n" 10092 "\tFWAIT\n" 10093 "\tFNSTSW AX\n" 10094 "\tSAHF\n" 10095 "\tJP loop\n" 10096 "\tFSTP_D [ESP+0]\n" 10097 "\tMOVSD $dst,[ESP+0]\n" 10098 "\tADD ESP,8\n" 10099 "\tFSTP ST0\t # Restore FPU Stack" 10100 %} 10101 ins_cost(250); 10102 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10103 ins_pipe( pipe_slow ); 10104%} 10105 10106instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10107 predicate (UseSSE<=1); 10108 match(Set dst(AtanD dst src)); 10109 format %{ "DATA $dst,$src" %} 10110 opcode(0xD9, 0xF3); 10111 ins_encode( Push_Reg_DPR(src), 10112 OpcP, OpcS, RegOpc(dst) ); 10113 ins_pipe( pipe_slow ); 10114%} 10115 10116instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10117 predicate (UseSSE>=2); 10118 match(Set dst(AtanD dst src)); 10119 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10120 format %{ "DATA $dst,$src" %} 10121 opcode(0xD9, 0xF3); 10122 ins_encode( Push_SrcD(src), 10123 OpcP, OpcS, Push_ResultD(dst) ); 10124 ins_pipe( pipe_slow ); 10125%} 10126 10127instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10128 predicate (UseSSE<=1); 10129 match(Set dst (SqrtD src)); 10130 format %{ "DSQRT $dst,$src" %} 10131 opcode(0xFA, 0xD9); 10132 ins_encode( Push_Reg_DPR(src), 10133 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10134 ins_pipe( pipe_slow ); 10135%} 10136 10137//-------------Float Instructions------------------------------- 10138// Float Math 10139 10140// Code for float compare: 10141// fcompp(); 10142// fwait(); fnstsw_ax(); 10143// sahf(); 10144// movl(dst, unordered_result); 10145// jcc(Assembler::parity, exit); 10146// movl(dst, less_result); 10147// jcc(Assembler::below, exit); 10148// movl(dst, equal_result); 10149// jcc(Assembler::equal, exit); 10150// movl(dst, greater_result); 10151// exit: 10152 10153// P6 version of float compare, sets condition codes in EFLAGS 10154instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10155 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10156 match(Set cr (CmpF src1 src2)); 10157 effect(KILL rax); 10158 ins_cost(150); 10159 format %{ "FLD $src1\n\t" 10160 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10161 "JNP exit\n\t" 10162 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10163 "SAHF\n" 10164 "exit:\tNOP // avoid branch to branch" %} 10165 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10166 ins_encode( Push_Reg_DPR(src1), 10167 OpcP, RegOpc(src2), 10168 cmpF_P6_fixup ); 10169 ins_pipe( pipe_slow ); 10170%} 10171 10172instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10173 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10174 match(Set cr (CmpF src1 src2)); 10175 ins_cost(100); 10176 format %{ "FLD $src1\n\t" 10177 "FUCOMIP ST,$src2 // P6 instruction" %} 10178 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10179 ins_encode( Push_Reg_DPR(src1), 10180 OpcP, RegOpc(src2)); 10181 ins_pipe( pipe_slow ); 10182%} 10183 10184 10185// Compare & branch 10186instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10187 predicate(UseSSE == 0); 10188 match(Set cr (CmpF src1 src2)); 10189 effect(KILL rax); 10190 ins_cost(200); 10191 format %{ "FLD $src1\n\t" 10192 "FCOMp $src2\n\t" 10193 "FNSTSW AX\n\t" 10194 "TEST AX,0x400\n\t" 10195 "JZ,s flags\n\t" 10196 "MOV AH,1\t# unordered treat as LT\n" 10197 "flags:\tSAHF" %} 10198 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10199 ins_encode( Push_Reg_DPR(src1), 10200 OpcP, RegOpc(src2), 10201 fpu_flags); 10202 ins_pipe( pipe_slow ); 10203%} 10204 10205// Compare vs zero into -1,0,1 10206instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10207 predicate(UseSSE == 0); 10208 match(Set dst (CmpF3 src1 zero)); 10209 effect(KILL cr, KILL rax); 10210 ins_cost(280); 10211 format %{ "FTSTF $dst,$src1" %} 10212 opcode(0xE4, 0xD9); 10213 ins_encode( Push_Reg_DPR(src1), 10214 OpcS, OpcP, PopFPU, 10215 CmpF_Result(dst)); 10216 ins_pipe( pipe_slow ); 10217%} 10218 10219// Compare into -1,0,1 10220instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10221 predicate(UseSSE == 0); 10222 match(Set dst (CmpF3 src1 src2)); 10223 effect(KILL cr, KILL rax); 10224 ins_cost(300); 10225 format %{ "FCMPF $dst,$src1,$src2" %} 10226 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10227 ins_encode( Push_Reg_DPR(src1), 10228 OpcP, RegOpc(src2), 10229 CmpF_Result(dst)); 10230 ins_pipe( pipe_slow ); 10231%} 10232 10233// float compare and set condition codes in EFLAGS by XMM regs 10234instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10235 predicate(UseSSE>=1); 10236 match(Set cr (CmpF src1 src2)); 10237 ins_cost(145); 10238 format %{ "UCOMISS $src1,$src2\n\t" 10239 "JNP,s exit\n\t" 10240 "PUSHF\t# saw NaN, set CF\n\t" 10241 "AND [rsp], #0xffffff2b\n\t" 10242 "POPF\n" 10243 "exit:" %} 10244 ins_encode %{ 10245 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10246 emit_cmpfp_fixup(_masm); 10247 %} 10248 ins_pipe( pipe_slow ); 10249%} 10250 10251instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10252 predicate(UseSSE>=1); 10253 match(Set cr (CmpF src1 src2)); 10254 ins_cost(100); 10255 format %{ "UCOMISS $src1,$src2" %} 10256 ins_encode %{ 10257 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10258 %} 10259 ins_pipe( pipe_slow ); 10260%} 10261 10262// float compare and set condition codes in EFLAGS by XMM regs 10263instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10264 predicate(UseSSE>=1); 10265 match(Set cr (CmpF src1 (LoadF src2))); 10266 ins_cost(165); 10267 format %{ "UCOMISS $src1,$src2\n\t" 10268 "JNP,s exit\n\t" 10269 "PUSHF\t# saw NaN, set CF\n\t" 10270 "AND [rsp], #0xffffff2b\n\t" 10271 "POPF\n" 10272 "exit:" %} 10273 ins_encode %{ 10274 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10275 emit_cmpfp_fixup(_masm); 10276 %} 10277 ins_pipe( pipe_slow ); 10278%} 10279 10280instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10281 predicate(UseSSE>=1); 10282 match(Set cr (CmpF src1 (LoadF src2))); 10283 ins_cost(100); 10284 format %{ "UCOMISS $src1,$src2" %} 10285 ins_encode %{ 10286 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10287 %} 10288 ins_pipe( pipe_slow ); 10289%} 10290 10291// Compare into -1,0,1 in XMM 10292instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10293 predicate(UseSSE>=1); 10294 match(Set dst (CmpF3 src1 src2)); 10295 effect(KILL cr); 10296 ins_cost(255); 10297 format %{ "UCOMISS $src1, $src2\n\t" 10298 "MOV $dst, #-1\n\t" 10299 "JP,s done\n\t" 10300 "JB,s done\n\t" 10301 "SETNE $dst\n\t" 10302 "MOVZB $dst, $dst\n" 10303 "done:" %} 10304 ins_encode %{ 10305 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10306 emit_cmpfp3(_masm, $dst$$Register); 10307 %} 10308 ins_pipe( pipe_slow ); 10309%} 10310 10311// Compare into -1,0,1 in XMM and memory 10312instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10313 predicate(UseSSE>=1); 10314 match(Set dst (CmpF3 src1 (LoadF src2))); 10315 effect(KILL cr); 10316 ins_cost(275); 10317 format %{ "UCOMISS $src1, $src2\n\t" 10318 "MOV $dst, #-1\n\t" 10319 "JP,s done\n\t" 10320 "JB,s done\n\t" 10321 "SETNE $dst\n\t" 10322 "MOVZB $dst, $dst\n" 10323 "done:" %} 10324 ins_encode %{ 10325 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10326 emit_cmpfp3(_masm, $dst$$Register); 10327 %} 10328 ins_pipe( pipe_slow ); 10329%} 10330 10331// Spill to obtain 24-bit precision 10332instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10333 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10334 match(Set dst (SubF src1 src2)); 10335 10336 format %{ "FSUB $dst,$src1 - $src2" %} 10337 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10338 ins_encode( Push_Reg_FPR(src1), 10339 OpcReg_FPR(src2), 10340 Pop_Mem_FPR(dst) ); 10341 ins_pipe( fpu_mem_reg_reg ); 10342%} 10343// 10344// This instruction does not round to 24-bits 10345instruct subFPR_reg(regFPR dst, regFPR src) %{ 10346 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10347 match(Set dst (SubF dst src)); 10348 10349 format %{ "FSUB $dst,$src" %} 10350 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10351 ins_encode( Push_Reg_FPR(src), 10352 OpcP, RegOpc(dst) ); 10353 ins_pipe( fpu_reg_reg ); 10354%} 10355 10356// Spill to obtain 24-bit precision 10357instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10358 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10359 match(Set dst (AddF src1 src2)); 10360 10361 format %{ "FADD $dst,$src1,$src2" %} 10362 opcode(0xD8, 0x0); /* D8 C0+i */ 10363 ins_encode( Push_Reg_FPR(src2), 10364 OpcReg_FPR(src1), 10365 Pop_Mem_FPR(dst) ); 10366 ins_pipe( fpu_mem_reg_reg ); 10367%} 10368// 10369// This instruction does not round to 24-bits 10370instruct addFPR_reg(regFPR dst, regFPR src) %{ 10371 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10372 match(Set dst (AddF dst src)); 10373 10374 format %{ "FLD $src\n\t" 10375 "FADDp $dst,ST" %} 10376 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10377 ins_encode( Push_Reg_FPR(src), 10378 OpcP, RegOpc(dst) ); 10379 ins_pipe( fpu_reg_reg ); 10380%} 10381 10382instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10383 predicate(UseSSE==0); 10384 match(Set dst (AbsF src)); 10385 ins_cost(100); 10386 format %{ "FABS" %} 10387 opcode(0xE1, 0xD9); 10388 ins_encode( OpcS, OpcP ); 10389 ins_pipe( fpu_reg_reg ); 10390%} 10391 10392instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10393 predicate(UseSSE==0); 10394 match(Set dst (NegF src)); 10395 ins_cost(100); 10396 format %{ "FCHS" %} 10397 opcode(0xE0, 0xD9); 10398 ins_encode( OpcS, OpcP ); 10399 ins_pipe( fpu_reg_reg ); 10400%} 10401 10402// Cisc-alternate to addFPR_reg 10403// Spill to obtain 24-bit precision 10404instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10405 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10406 match(Set dst (AddF src1 (LoadF src2))); 10407 10408 format %{ "FLD $src2\n\t" 10409 "FADD ST,$src1\n\t" 10410 "FSTP_S $dst" %} 10411 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10412 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10413 OpcReg_FPR(src1), 10414 Pop_Mem_FPR(dst) ); 10415 ins_pipe( fpu_mem_reg_mem ); 10416%} 10417// 10418// Cisc-alternate to addFPR_reg 10419// This instruction does not round to 24-bits 10420instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10421 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10422 match(Set dst (AddF dst (LoadF src))); 10423 10424 format %{ "FADD $dst,$src" %} 10425 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10426 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10427 OpcP, RegOpc(dst) ); 10428 ins_pipe( fpu_reg_mem ); 10429%} 10430 10431// // Following two instructions for _222_mpegaudio 10432// Spill to obtain 24-bit precision 10433instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10434 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10435 match(Set dst (AddF src1 src2)); 10436 10437 format %{ "FADD $dst,$src1,$src2" %} 10438 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10439 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10440 OpcReg_FPR(src2), 10441 Pop_Mem_FPR(dst) ); 10442 ins_pipe( fpu_mem_reg_mem ); 10443%} 10444 10445// Cisc-spill variant 10446// Spill to obtain 24-bit precision 10447instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10448 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10449 match(Set dst (AddF src1 (LoadF src2))); 10450 10451 format %{ "FADD $dst,$src1,$src2 cisc" %} 10452 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10453 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10454 set_instruction_start, 10455 OpcP, RMopc_Mem(secondary,src1), 10456 Pop_Mem_FPR(dst) ); 10457 ins_pipe( fpu_mem_mem_mem ); 10458%} 10459 10460// Spill to obtain 24-bit precision 10461instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10462 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10463 match(Set dst (AddF src1 src2)); 10464 10465 format %{ "FADD $dst,$src1,$src2" %} 10466 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10467 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10468 set_instruction_start, 10469 OpcP, RMopc_Mem(secondary,src1), 10470 Pop_Mem_FPR(dst) ); 10471 ins_pipe( fpu_mem_mem_mem ); 10472%} 10473 10474 10475// Spill to obtain 24-bit precision 10476instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10477 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10478 match(Set dst (AddF src con)); 10479 format %{ "FLD $src\n\t" 10480 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10481 "FSTP_S $dst" %} 10482 ins_encode %{ 10483 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10484 __ fadd_s($constantaddress($con)); 10485 __ fstp_s(Address(rsp, $dst$$disp)); 10486 %} 10487 ins_pipe(fpu_mem_reg_con); 10488%} 10489// 10490// This instruction does not round to 24-bits 10491instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10492 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10493 match(Set dst (AddF src con)); 10494 format %{ "FLD $src\n\t" 10495 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10496 "FSTP $dst" %} 10497 ins_encode %{ 10498 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10499 __ fadd_s($constantaddress($con)); 10500 __ fstp_d($dst$$reg); 10501 %} 10502 ins_pipe(fpu_reg_reg_con); 10503%} 10504 10505// Spill to obtain 24-bit precision 10506instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10507 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10508 match(Set dst (MulF src1 src2)); 10509 10510 format %{ "FLD $src1\n\t" 10511 "FMUL $src2\n\t" 10512 "FSTP_S $dst" %} 10513 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10514 ins_encode( Push_Reg_FPR(src1), 10515 OpcReg_FPR(src2), 10516 Pop_Mem_FPR(dst) ); 10517 ins_pipe( fpu_mem_reg_reg ); 10518%} 10519// 10520// This instruction does not round to 24-bits 10521instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10522 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10523 match(Set dst (MulF src1 src2)); 10524 10525 format %{ "FLD $src1\n\t" 10526 "FMUL $src2\n\t" 10527 "FSTP_S $dst" %} 10528 opcode(0xD8, 0x1); /* D8 C8+i */ 10529 ins_encode( Push_Reg_FPR(src2), 10530 OpcReg_FPR(src1), 10531 Pop_Reg_FPR(dst) ); 10532 ins_pipe( fpu_reg_reg_reg ); 10533%} 10534 10535 10536// Spill to obtain 24-bit precision 10537// Cisc-alternate to reg-reg multiply 10538instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10539 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10540 match(Set dst (MulF src1 (LoadF src2))); 10541 10542 format %{ "FLD_S $src2\n\t" 10543 "FMUL $src1\n\t" 10544 "FSTP_S $dst" %} 10545 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10546 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10547 OpcReg_FPR(src1), 10548 Pop_Mem_FPR(dst) ); 10549 ins_pipe( fpu_mem_reg_mem ); 10550%} 10551// 10552// This instruction does not round to 24-bits 10553// Cisc-alternate to reg-reg multiply 10554instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (MulF src1 (LoadF src2))); 10557 10558 format %{ "FMUL $dst,$src1,$src2" %} 10559 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10560 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10561 OpcReg_FPR(src1), 10562 Pop_Reg_FPR(dst) ); 10563 ins_pipe( fpu_reg_reg_mem ); 10564%} 10565 10566// Spill to obtain 24-bit precision 10567instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10568 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10569 match(Set dst (MulF src1 src2)); 10570 10571 format %{ "FMUL $dst,$src1,$src2" %} 10572 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10573 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10574 set_instruction_start, 10575 OpcP, RMopc_Mem(secondary,src1), 10576 Pop_Mem_FPR(dst) ); 10577 ins_pipe( fpu_mem_mem_mem ); 10578%} 10579 10580// Spill to obtain 24-bit precision 10581instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10582 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10583 match(Set dst (MulF src con)); 10584 10585 format %{ "FLD $src\n\t" 10586 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10587 "FSTP_S $dst" %} 10588 ins_encode %{ 10589 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10590 __ fmul_s($constantaddress($con)); 10591 __ fstp_s(Address(rsp, $dst$$disp)); 10592 %} 10593 ins_pipe(fpu_mem_reg_con); 10594%} 10595// 10596// This instruction does not round to 24-bits 10597instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10598 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10599 match(Set dst (MulF src con)); 10600 10601 format %{ "FLD $src\n\t" 10602 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10603 "FSTP $dst" %} 10604 ins_encode %{ 10605 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10606 __ fmul_s($constantaddress($con)); 10607 __ fstp_d($dst$$reg); 10608 %} 10609 ins_pipe(fpu_reg_reg_con); 10610%} 10611 10612 10613// 10614// MACRO1 -- subsume unshared load into mulFPR 10615// This instruction does not round to 24-bits 10616instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10617 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10618 match(Set dst (MulF (LoadF mem1) src)); 10619 10620 format %{ "FLD $mem1 ===MACRO1===\n\t" 10621 "FMUL ST,$src\n\t" 10622 "FSTP $dst" %} 10623 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10624 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10625 OpcReg_FPR(src), 10626 Pop_Reg_FPR(dst) ); 10627 ins_pipe( fpu_reg_reg_mem ); 10628%} 10629// 10630// MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10631// This instruction does not round to 24-bits 10632instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10633 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10634 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10635 ins_cost(95); 10636 10637 format %{ "FLD $mem1 ===MACRO2===\n\t" 10638 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10639 "FADD ST,$src2\n\t" 10640 "FSTP $dst" %} 10641 opcode(0xD9); /* LoadF D9 /0 */ 10642 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10643 FMul_ST_reg(src1), 10644 FAdd_ST_reg(src2), 10645 Pop_Reg_FPR(dst) ); 10646 ins_pipe( fpu_reg_mem_reg_reg ); 10647%} 10648 10649// MACRO3 -- addFPR a mulFPR 10650// This instruction does not round to 24-bits. It is a '2-address' 10651// instruction in that the result goes back to src2. This eliminates 10652// a move from the macro; possibly the register allocator will have 10653// to add it back (and maybe not). 10654instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10655 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10656 match(Set src2 (AddF (MulF src0 src1) src2)); 10657 10658 format %{ "FLD $src0 ===MACRO3===\n\t" 10659 "FMUL ST,$src1\n\t" 10660 "FADDP $src2,ST" %} 10661 opcode(0xD9); /* LoadF D9 /0 */ 10662 ins_encode( Push_Reg_FPR(src0), 10663 FMul_ST_reg(src1), 10664 FAddP_reg_ST(src2) ); 10665 ins_pipe( fpu_reg_reg_reg ); 10666%} 10667 10668// MACRO4 -- divFPR subFPR 10669// This instruction does not round to 24-bits 10670instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10671 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10672 match(Set dst (DivF (SubF src2 src1) src3)); 10673 10674 format %{ "FLD $src2 ===MACRO4===\n\t" 10675 "FSUB ST,$src1\n\t" 10676 "FDIV ST,$src3\n\t" 10677 "FSTP $dst" %} 10678 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10679 ins_encode( Push_Reg_FPR(src2), 10680 subFPR_divFPR_encode(src1,src3), 10681 Pop_Reg_FPR(dst) ); 10682 ins_pipe( fpu_reg_reg_reg_reg ); 10683%} 10684 10685// Spill to obtain 24-bit precision 10686instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10687 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10688 match(Set dst (DivF src1 src2)); 10689 10690 format %{ "FDIV $dst,$src1,$src2" %} 10691 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10692 ins_encode( Push_Reg_FPR(src1), 10693 OpcReg_FPR(src2), 10694 Pop_Mem_FPR(dst) ); 10695 ins_pipe( fpu_mem_reg_reg ); 10696%} 10697// 10698// This instruction does not round to 24-bits 10699instruct divFPR_reg(regFPR dst, regFPR src) %{ 10700 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10701 match(Set dst (DivF dst src)); 10702 10703 format %{ "FDIV $dst,$src" %} 10704 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10705 ins_encode( Push_Reg_FPR(src), 10706 OpcP, RegOpc(dst) ); 10707 ins_pipe( fpu_reg_reg ); 10708%} 10709 10710 10711// Spill to obtain 24-bit precision 10712instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10713 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10714 match(Set dst (ModF src1 src2)); 10715 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10716 10717 format %{ "FMOD $dst,$src1,$src2" %} 10718 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10719 emitModDPR(), 10720 Push_Result_Mod_DPR(src2), 10721 Pop_Mem_FPR(dst)); 10722 ins_pipe( pipe_slow ); 10723%} 10724// 10725// This instruction does not round to 24-bits 10726instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10727 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10728 match(Set dst (ModF dst src)); 10729 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10730 10731 format %{ "FMOD $dst,$src" %} 10732 ins_encode(Push_Reg_Mod_DPR(dst, src), 10733 emitModDPR(), 10734 Push_Result_Mod_DPR(src), 10735 Pop_Reg_FPR(dst)); 10736 ins_pipe( pipe_slow ); 10737%} 10738 10739instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10740 predicate(UseSSE>=1); 10741 match(Set dst (ModF src0 src1)); 10742 effect(KILL rax, KILL cr); 10743 format %{ "SUB ESP,4\t # FMOD\n" 10744 "\tMOVSS [ESP+0],$src1\n" 10745 "\tFLD_S [ESP+0]\n" 10746 "\tMOVSS [ESP+0],$src0\n" 10747 "\tFLD_S [ESP+0]\n" 10748 "loop:\tFPREM\n" 10749 "\tFWAIT\n" 10750 "\tFNSTSW AX\n" 10751 "\tSAHF\n" 10752 "\tJP loop\n" 10753 "\tFSTP_S [ESP+0]\n" 10754 "\tMOVSS $dst,[ESP+0]\n" 10755 "\tADD ESP,4\n" 10756 "\tFSTP ST0\t # Restore FPU Stack" 10757 %} 10758 ins_cost(250); 10759 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10760 ins_pipe( pipe_slow ); 10761%} 10762 10763 10764//----------Arithmetic Conversion Instructions--------------------------------- 10765// The conversions operations are all Alpha sorted. Please keep it that way! 10766 10767instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10768 predicate(UseSSE==0); 10769 match(Set dst (RoundFloat src)); 10770 ins_cost(125); 10771 format %{ "FST_S $dst,$src\t# F-round" %} 10772 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10773 ins_pipe( fpu_mem_reg ); 10774%} 10775 10776instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10777 predicate(UseSSE<=1); 10778 match(Set dst (RoundDouble src)); 10779 ins_cost(125); 10780 format %{ "FST_D $dst,$src\t# D-round" %} 10781 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10782 ins_pipe( fpu_mem_reg ); 10783%} 10784 10785// Force rounding to 24-bit precision and 6-bit exponent 10786instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10787 predicate(UseSSE==0); 10788 match(Set dst (ConvD2F src)); 10789 format %{ "FST_S $dst,$src\t# F-round" %} 10790 expand %{ 10791 roundFloat_mem_reg(dst,src); 10792 %} 10793%} 10794 10795// Force rounding to 24-bit precision and 6-bit exponent 10796instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10797 predicate(UseSSE==1); 10798 match(Set dst (ConvD2F src)); 10799 effect( KILL cr ); 10800 format %{ "SUB ESP,4\n\t" 10801 "FST_S [ESP],$src\t# F-round\n\t" 10802 "MOVSS $dst,[ESP]\n\t" 10803 "ADD ESP,4" %} 10804 ins_encode %{ 10805 __ subptr(rsp, 4); 10806 if ($src$$reg != FPR1L_enc) { 10807 __ fld_s($src$$reg-1); 10808 __ fstp_s(Address(rsp, 0)); 10809 } else { 10810 __ fst_s(Address(rsp, 0)); 10811 } 10812 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10813 __ addptr(rsp, 4); 10814 %} 10815 ins_pipe( pipe_slow ); 10816%} 10817 10818// Force rounding double precision to single precision 10819instruct convD2F_reg(regF dst, regD src) %{ 10820 predicate(UseSSE>=2); 10821 match(Set dst (ConvD2F src)); 10822 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10823 ins_encode %{ 10824 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10825 %} 10826 ins_pipe( pipe_slow ); 10827%} 10828 10829instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10830 predicate(UseSSE==0); 10831 match(Set dst (ConvF2D src)); 10832 format %{ "FST_S $dst,$src\t# D-round" %} 10833 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10834 ins_pipe( fpu_reg_reg ); 10835%} 10836 10837instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10838 predicate(UseSSE==1); 10839 match(Set dst (ConvF2D src)); 10840 format %{ "FST_D $dst,$src\t# D-round" %} 10841 expand %{ 10842 roundDouble_mem_reg(dst,src); 10843 %} 10844%} 10845 10846instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10847 predicate(UseSSE==1); 10848 match(Set dst (ConvF2D src)); 10849 effect( KILL cr ); 10850 format %{ "SUB ESP,4\n\t" 10851 "MOVSS [ESP] $src\n\t" 10852 "FLD_S [ESP]\n\t" 10853 "ADD ESP,4\n\t" 10854 "FSTP $dst\t# D-round" %} 10855 ins_encode %{ 10856 __ subptr(rsp, 4); 10857 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10858 __ fld_s(Address(rsp, 0)); 10859 __ addptr(rsp, 4); 10860 __ fstp_d($dst$$reg); 10861 %} 10862 ins_pipe( pipe_slow ); 10863%} 10864 10865instruct convF2D_reg(regD dst, regF src) %{ 10866 predicate(UseSSE>=2); 10867 match(Set dst (ConvF2D src)); 10868 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10869 ins_encode %{ 10870 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10871 %} 10872 ins_pipe( pipe_slow ); 10873%} 10874 10875// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10876instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10877 predicate(UseSSE<=1); 10878 match(Set dst (ConvD2I src)); 10879 effect( KILL tmp, KILL cr ); 10880 format %{ "FLD $src\t# Convert double to int \n\t" 10881 "FLDCW trunc mode\n\t" 10882 "SUB ESP,4\n\t" 10883 "FISTp [ESP + #0]\n\t" 10884 "FLDCW std/24-bit mode\n\t" 10885 "POP EAX\n\t" 10886 "CMP EAX,0x80000000\n\t" 10887 "JNE,s fast\n\t" 10888 "FLD_D $src\n\t" 10889 "CALL d2i_wrapper\n" 10890 "fast:" %} 10891 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10892 ins_pipe( pipe_slow ); 10893%} 10894 10895// Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10896instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10897 predicate(UseSSE>=2); 10898 match(Set dst (ConvD2I src)); 10899 effect( KILL tmp, KILL cr ); 10900 format %{ "CVTTSD2SI $dst, $src\n\t" 10901 "CMP $dst,0x80000000\n\t" 10902 "JNE,s fast\n\t" 10903 "SUB ESP, 8\n\t" 10904 "MOVSD [ESP], $src\n\t" 10905 "FLD_D [ESP]\n\t" 10906 "ADD ESP, 8\n\t" 10907 "CALL d2i_wrapper\n" 10908 "fast:" %} 10909 ins_encode %{ 10910 Label fast; 10911 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10912 __ cmpl($dst$$Register, 0x80000000); 10913 __ jccb(Assembler::notEqual, fast); 10914 __ subptr(rsp, 8); 10915 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10916 __ fld_d(Address(rsp, 0)); 10917 __ addptr(rsp, 8); 10918 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10919 __ bind(fast); 10920 %} 10921 ins_pipe( pipe_slow ); 10922%} 10923 10924instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10925 predicate(UseSSE<=1); 10926 match(Set dst (ConvD2L src)); 10927 effect( KILL cr ); 10928 format %{ "FLD $src\t# Convert double to long\n\t" 10929 "FLDCW trunc mode\n\t" 10930 "SUB ESP,8\n\t" 10931 "FISTp [ESP + #0]\n\t" 10932 "FLDCW std/24-bit mode\n\t" 10933 "POP EAX\n\t" 10934 "POP EDX\n\t" 10935 "CMP EDX,0x80000000\n\t" 10936 "JNE,s fast\n\t" 10937 "TEST EAX,EAX\n\t" 10938 "JNE,s fast\n\t" 10939 "FLD $src\n\t" 10940 "CALL d2l_wrapper\n" 10941 "fast:" %} 10942 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10943 ins_pipe( pipe_slow ); 10944%} 10945 10946// XMM lacks a float/double->long conversion, so use the old FPU stack. 10947instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10948 predicate (UseSSE>=2); 10949 match(Set dst (ConvD2L src)); 10950 effect( KILL cr ); 10951 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10952 "MOVSD [ESP],$src\n\t" 10953 "FLD_D [ESP]\n\t" 10954 "FLDCW trunc mode\n\t" 10955 "FISTp [ESP + #0]\n\t" 10956 "FLDCW std/24-bit mode\n\t" 10957 "POP EAX\n\t" 10958 "POP EDX\n\t" 10959 "CMP EDX,0x80000000\n\t" 10960 "JNE,s fast\n\t" 10961 "TEST EAX,EAX\n\t" 10962 "JNE,s fast\n\t" 10963 "SUB ESP,8\n\t" 10964 "MOVSD [ESP],$src\n\t" 10965 "FLD_D [ESP]\n\t" 10966 "ADD ESP,8\n\t" 10967 "CALL d2l_wrapper\n" 10968 "fast:" %} 10969 ins_encode %{ 10970 Label fast; 10971 __ subptr(rsp, 8); 10972 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10973 __ fld_d(Address(rsp, 0)); 10974 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10975 __ fistp_d(Address(rsp, 0)); 10976 // Restore the rounding mode, mask the exception 10977 if (Compile::current()->in_24_bit_fp_mode()) { 10978 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10979 } else { 10980 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10981 } 10982 // Load the converted long, adjust CPU stack 10983 __ pop(rax); 10984 __ pop(rdx); 10985 __ cmpl(rdx, 0x80000000); 10986 __ jccb(Assembler::notEqual, fast); 10987 __ testl(rax, rax); 10988 __ jccb(Assembler::notEqual, fast); 10989 __ subptr(rsp, 8); 10990 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10991 __ fld_d(Address(rsp, 0)); 10992 __ addptr(rsp, 8); 10993 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10994 __ bind(fast); 10995 %} 10996 ins_pipe( pipe_slow ); 10997%} 10998 10999// Convert a double to an int. Java semantics require we do complex 11000// manglations in the corner cases. So we set the rounding mode to 11001// 'zero', store the darned double down as an int, and reset the 11002// rounding mode to 'nearest'. The hardware stores a flag value down 11003// if we would overflow or converted a NAN; we check for this and 11004// and go the slow path if needed. 11005instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 11006 predicate(UseSSE==0); 11007 match(Set dst (ConvF2I src)); 11008 effect( KILL tmp, KILL cr ); 11009 format %{ "FLD $src\t# Convert float to int \n\t" 11010 "FLDCW trunc mode\n\t" 11011 "SUB ESP,4\n\t" 11012 "FISTp [ESP + #0]\n\t" 11013 "FLDCW std/24-bit mode\n\t" 11014 "POP EAX\n\t" 11015 "CMP EAX,0x80000000\n\t" 11016 "JNE,s fast\n\t" 11017 "FLD $src\n\t" 11018 "CALL d2i_wrapper\n" 11019 "fast:" %} 11020 // DPR2I_encoding works for FPR2I 11021 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 11022 ins_pipe( pipe_slow ); 11023%} 11024 11025// Convert a float in xmm to an int reg. 11026instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11027 predicate(UseSSE>=1); 11028 match(Set dst (ConvF2I src)); 11029 effect( KILL tmp, KILL cr ); 11030 format %{ "CVTTSS2SI $dst, $src\n\t" 11031 "CMP $dst,0x80000000\n\t" 11032 "JNE,s fast\n\t" 11033 "SUB ESP, 4\n\t" 11034 "MOVSS [ESP], $src\n\t" 11035 "FLD [ESP]\n\t" 11036 "ADD ESP, 4\n\t" 11037 "CALL d2i_wrapper\n" 11038 "fast:" %} 11039 ins_encode %{ 11040 Label fast; 11041 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11042 __ cmpl($dst$$Register, 0x80000000); 11043 __ jccb(Assembler::notEqual, fast); 11044 __ subptr(rsp, 4); 11045 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11046 __ fld_s(Address(rsp, 0)); 11047 __ addptr(rsp, 4); 11048 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11049 __ bind(fast); 11050 %} 11051 ins_pipe( pipe_slow ); 11052%} 11053 11054instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11055 predicate(UseSSE==0); 11056 match(Set dst (ConvF2L src)); 11057 effect( KILL cr ); 11058 format %{ "FLD $src\t# Convert float to long\n\t" 11059 "FLDCW trunc mode\n\t" 11060 "SUB ESP,8\n\t" 11061 "FISTp [ESP + #0]\n\t" 11062 "FLDCW std/24-bit mode\n\t" 11063 "POP EAX\n\t" 11064 "POP EDX\n\t" 11065 "CMP EDX,0x80000000\n\t" 11066 "JNE,s fast\n\t" 11067 "TEST EAX,EAX\n\t" 11068 "JNE,s fast\n\t" 11069 "FLD $src\n\t" 11070 "CALL d2l_wrapper\n" 11071 "fast:" %} 11072 // DPR2L_encoding works for FPR2L 11073 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11074 ins_pipe( pipe_slow ); 11075%} 11076 11077// XMM lacks a float/double->long conversion, so use the old FPU stack. 11078instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11079 predicate (UseSSE>=1); 11080 match(Set dst (ConvF2L src)); 11081 effect( KILL cr ); 11082 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11083 "MOVSS [ESP],$src\n\t" 11084 "FLD_S [ESP]\n\t" 11085 "FLDCW trunc mode\n\t" 11086 "FISTp [ESP + #0]\n\t" 11087 "FLDCW std/24-bit mode\n\t" 11088 "POP EAX\n\t" 11089 "POP EDX\n\t" 11090 "CMP EDX,0x80000000\n\t" 11091 "JNE,s fast\n\t" 11092 "TEST EAX,EAX\n\t" 11093 "JNE,s fast\n\t" 11094 "SUB ESP,4\t# Convert float to long\n\t" 11095 "MOVSS [ESP],$src\n\t" 11096 "FLD_S [ESP]\n\t" 11097 "ADD ESP,4\n\t" 11098 "CALL d2l_wrapper\n" 11099 "fast:" %} 11100 ins_encode %{ 11101 Label fast; 11102 __ subptr(rsp, 8); 11103 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11104 __ fld_s(Address(rsp, 0)); 11105 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11106 __ fistp_d(Address(rsp, 0)); 11107 // Restore the rounding mode, mask the exception 11108 if (Compile::current()->in_24_bit_fp_mode()) { 11109 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11110 } else { 11111 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11112 } 11113 // Load the converted long, adjust CPU stack 11114 __ pop(rax); 11115 __ pop(rdx); 11116 __ cmpl(rdx, 0x80000000); 11117 __ jccb(Assembler::notEqual, fast); 11118 __ testl(rax, rax); 11119 __ jccb(Assembler::notEqual, fast); 11120 __ subptr(rsp, 4); 11121 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11122 __ fld_s(Address(rsp, 0)); 11123 __ addptr(rsp, 4); 11124 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11125 __ bind(fast); 11126 %} 11127 ins_pipe( pipe_slow ); 11128%} 11129 11130instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11131 predicate( UseSSE<=1 ); 11132 match(Set dst (ConvI2D src)); 11133 format %{ "FILD $src\n\t" 11134 "FSTP $dst" %} 11135 opcode(0xDB, 0x0); /* DB /0 */ 11136 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11137 ins_pipe( fpu_reg_mem ); 11138%} 11139 11140instruct convI2D_reg(regD dst, rRegI src) %{ 11141 predicate( UseSSE>=2 && !UseXmmI2D ); 11142 match(Set dst (ConvI2D src)); 11143 format %{ "CVTSI2SD $dst,$src" %} 11144 ins_encode %{ 11145 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11146 %} 11147 ins_pipe( pipe_slow ); 11148%} 11149 11150instruct convI2D_mem(regD dst, memory mem) %{ 11151 predicate( UseSSE>=2 ); 11152 match(Set dst (ConvI2D (LoadI mem))); 11153 format %{ "CVTSI2SD $dst,$mem" %} 11154 ins_encode %{ 11155 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11156 %} 11157 ins_pipe( pipe_slow ); 11158%} 11159 11160instruct convXI2D_reg(regD dst, rRegI src) 11161%{ 11162 predicate( UseSSE>=2 && UseXmmI2D ); 11163 match(Set dst (ConvI2D src)); 11164 11165 format %{ "MOVD $dst,$src\n\t" 11166 "CVTDQ2PD $dst,$dst\t# i2d" %} 11167 ins_encode %{ 11168 __ movdl($dst$$XMMRegister, $src$$Register); 11169 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11170 %} 11171 ins_pipe(pipe_slow); // XXX 11172%} 11173 11174instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11175 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11176 match(Set dst (ConvI2D (LoadI mem))); 11177 format %{ "FILD $mem\n\t" 11178 "FSTP $dst" %} 11179 opcode(0xDB); /* DB /0 */ 11180 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11181 Pop_Reg_DPR(dst)); 11182 ins_pipe( fpu_reg_mem ); 11183%} 11184 11185// Convert a byte to a float; no rounding step needed. 11186instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11187 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11188 match(Set dst (ConvI2F src)); 11189 format %{ "FILD $src\n\t" 11190 "FSTP $dst" %} 11191 11192 opcode(0xDB, 0x0); /* DB /0 */ 11193 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11194 ins_pipe( fpu_reg_mem ); 11195%} 11196 11197// In 24-bit mode, force exponent rounding by storing back out 11198instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11199 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11200 match(Set dst (ConvI2F src)); 11201 ins_cost(200); 11202 format %{ "FILD $src\n\t" 11203 "FSTP_S $dst" %} 11204 opcode(0xDB, 0x0); /* DB /0 */ 11205 ins_encode( Push_Mem_I(src), 11206 Pop_Mem_FPR(dst)); 11207 ins_pipe( fpu_mem_mem ); 11208%} 11209 11210// In 24-bit mode, force exponent rounding by storing back out 11211instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11212 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11213 match(Set dst (ConvI2F (LoadI mem))); 11214 ins_cost(200); 11215 format %{ "FILD $mem\n\t" 11216 "FSTP_S $dst" %} 11217 opcode(0xDB); /* DB /0 */ 11218 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11219 Pop_Mem_FPR(dst)); 11220 ins_pipe( fpu_mem_mem ); 11221%} 11222 11223// This instruction does not round to 24-bits 11224instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11225 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11226 match(Set dst (ConvI2F src)); 11227 format %{ "FILD $src\n\t" 11228 "FSTP $dst" %} 11229 opcode(0xDB, 0x0); /* DB /0 */ 11230 ins_encode( Push_Mem_I(src), 11231 Pop_Reg_FPR(dst)); 11232 ins_pipe( fpu_reg_mem ); 11233%} 11234 11235// This instruction does not round to 24-bits 11236instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11237 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11238 match(Set dst (ConvI2F (LoadI mem))); 11239 format %{ "FILD $mem\n\t" 11240 "FSTP $dst" %} 11241 opcode(0xDB); /* DB /0 */ 11242 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11243 Pop_Reg_FPR(dst)); 11244 ins_pipe( fpu_reg_mem ); 11245%} 11246 11247// Convert an int to a float in xmm; no rounding step needed. 11248instruct convI2F_reg(regF dst, rRegI src) %{ 11249 predicate( UseSSE==1 || (UseSSE>=2 && !UseXmmI2F) ); 11250 match(Set dst (ConvI2F src)); 11251 format %{ "CVTSI2SS $dst, $src" %} 11252 ins_encode %{ 11253 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11254 %} 11255 ins_pipe( pipe_slow ); 11256%} 11257 11258 instruct convXI2F_reg(regF dst, rRegI src) 11259%{ 11260 predicate( UseSSE>=2 && UseXmmI2F ); 11261 match(Set dst (ConvI2F src)); 11262 11263 format %{ "MOVD $dst,$src\n\t" 11264 "CVTDQ2PS $dst,$dst\t# i2f" %} 11265 ins_encode %{ 11266 __ movdl($dst$$XMMRegister, $src$$Register); 11267 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11268 %} 11269 ins_pipe(pipe_slow); // XXX 11270%} 11271 11272instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11273 match(Set dst (ConvI2L src)); 11274 effect(KILL cr); 11275 ins_cost(375); 11276 format %{ "MOV $dst.lo,$src\n\t" 11277 "MOV $dst.hi,$src\n\t" 11278 "SAR $dst.hi,31" %} 11279 ins_encode(convert_int_long(dst,src)); 11280 ins_pipe( ialu_reg_reg_long ); 11281%} 11282 11283// Zero-extend convert int to long 11284instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11285 match(Set dst (AndL (ConvI2L src) mask) ); 11286 effect( KILL flags ); 11287 ins_cost(250); 11288 format %{ "MOV $dst.lo,$src\n\t" 11289 "XOR $dst.hi,$dst.hi" %} 11290 opcode(0x33); // XOR 11291 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11292 ins_pipe( ialu_reg_reg_long ); 11293%} 11294 11295// Zero-extend long 11296instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11297 match(Set dst (AndL src mask) ); 11298 effect( KILL flags ); 11299 ins_cost(250); 11300 format %{ "MOV $dst.lo,$src.lo\n\t" 11301 "XOR $dst.hi,$dst.hi\n\t" %} 11302 opcode(0x33); // XOR 11303 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11304 ins_pipe( ialu_reg_reg_long ); 11305%} 11306 11307instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11308 predicate (UseSSE<=1); 11309 match(Set dst (ConvL2D src)); 11310 effect( KILL cr ); 11311 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11312 "PUSH $src.lo\n\t" 11313 "FILD ST,[ESP + #0]\n\t" 11314 "ADD ESP,8\n\t" 11315 "FSTP_D $dst\t# D-round" %} 11316 opcode(0xDF, 0x5); /* DF /5 */ 11317 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11318 ins_pipe( pipe_slow ); 11319%} 11320 11321instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11322 predicate (UseSSE>=2); 11323 match(Set dst (ConvL2D src)); 11324 effect( KILL cr ); 11325 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11326 "PUSH $src.lo\n\t" 11327 "FILD_D [ESP]\n\t" 11328 "FSTP_D [ESP]\n\t" 11329 "MOVSD $dst,[ESP]\n\t" 11330 "ADD ESP,8" %} 11331 opcode(0xDF, 0x5); /* DF /5 */ 11332 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11333 ins_pipe( pipe_slow ); 11334%} 11335 11336instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11337 predicate (UseSSE>=1); 11338 match(Set dst (ConvL2F src)); 11339 effect( KILL cr ); 11340 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11341 "PUSH $src.lo\n\t" 11342 "FILD_D [ESP]\n\t" 11343 "FSTP_S [ESP]\n\t" 11344 "MOVSS $dst,[ESP]\n\t" 11345 "ADD ESP,8" %} 11346 opcode(0xDF, 0x5); /* DF /5 */ 11347 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11348 ins_pipe( pipe_slow ); 11349%} 11350 11351instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11352 match(Set dst (ConvL2F src)); 11353 effect( KILL cr ); 11354 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11355 "PUSH $src.lo\n\t" 11356 "FILD ST,[ESP + #0]\n\t" 11357 "ADD ESP,8\n\t" 11358 "FSTP_S $dst\t# F-round" %} 11359 opcode(0xDF, 0x5); /* DF /5 */ 11360 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11361 ins_pipe( pipe_slow ); 11362%} 11363 11364instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11365 match(Set dst (ConvL2I src)); 11366 effect( DEF dst, USE src ); 11367 format %{ "MOV $dst,$src.lo" %} 11368 ins_encode(enc_CopyL_Lo(dst,src)); 11369 ins_pipe( ialu_reg_reg ); 11370%} 11371 11372instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11373 match(Set dst (MoveF2I src)); 11374 effect( DEF dst, USE src ); 11375 ins_cost(100); 11376 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11377 ins_encode %{ 11378 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11379 %} 11380 ins_pipe( ialu_reg_mem ); 11381%} 11382 11383instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11384 predicate(UseSSE==0); 11385 match(Set dst (MoveF2I src)); 11386 effect( DEF dst, USE src ); 11387 11388 ins_cost(125); 11389 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11390 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11391 ins_pipe( fpu_mem_reg ); 11392%} 11393 11394instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11395 predicate(UseSSE>=1); 11396 match(Set dst (MoveF2I src)); 11397 effect( DEF dst, USE src ); 11398 11399 ins_cost(95); 11400 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11401 ins_encode %{ 11402 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11403 %} 11404 ins_pipe( pipe_slow ); 11405%} 11406 11407instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11408 predicate(UseSSE>=2); 11409 match(Set dst (MoveF2I src)); 11410 effect( DEF dst, USE src ); 11411 ins_cost(85); 11412 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11413 ins_encode %{ 11414 __ movdl($dst$$Register, $src$$XMMRegister); 11415 %} 11416 ins_pipe( pipe_slow ); 11417%} 11418 11419instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11420 match(Set dst (MoveI2F src)); 11421 effect( DEF dst, USE src ); 11422 11423 ins_cost(100); 11424 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11425 ins_encode %{ 11426 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11427 %} 11428 ins_pipe( ialu_mem_reg ); 11429%} 11430 11431 11432instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11433 predicate(UseSSE==0); 11434 match(Set dst (MoveI2F src)); 11435 effect(DEF dst, USE src); 11436 11437 ins_cost(125); 11438 format %{ "FLD_S $src\n\t" 11439 "FSTP $dst\t# MoveI2F_stack_reg" %} 11440 opcode(0xD9); /* D9 /0, FLD m32real */ 11441 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11442 Pop_Reg_FPR(dst) ); 11443 ins_pipe( fpu_reg_mem ); 11444%} 11445 11446instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11447 predicate(UseSSE>=1); 11448 match(Set dst (MoveI2F src)); 11449 effect( DEF dst, USE src ); 11450 11451 ins_cost(95); 11452 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11453 ins_encode %{ 11454 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11455 %} 11456 ins_pipe( pipe_slow ); 11457%} 11458 11459instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11460 predicate(UseSSE>=2); 11461 match(Set dst (MoveI2F src)); 11462 effect( DEF dst, USE src ); 11463 11464 ins_cost(85); 11465 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11466 ins_encode %{ 11467 __ movdl($dst$$XMMRegister, $src$$Register); 11468 %} 11469 ins_pipe( pipe_slow ); 11470%} 11471 11472instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11473 match(Set dst (MoveD2L src)); 11474 effect(DEF dst, USE src); 11475 11476 ins_cost(250); 11477 format %{ "MOV $dst.lo,$src\n\t" 11478 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11479 opcode(0x8B, 0x8B); 11480 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11481 ins_pipe( ialu_mem_long_reg ); 11482%} 11483 11484instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11485 predicate(UseSSE<=1); 11486 match(Set dst (MoveD2L src)); 11487 effect(DEF dst, USE src); 11488 11489 ins_cost(125); 11490 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11491 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11492 ins_pipe( fpu_mem_reg ); 11493%} 11494 11495instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11496 predicate(UseSSE>=2); 11497 match(Set dst (MoveD2L src)); 11498 effect(DEF dst, USE src); 11499 ins_cost(95); 11500 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11501 ins_encode %{ 11502 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11503 %} 11504 ins_pipe( pipe_slow ); 11505%} 11506 11507instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11508 predicate(UseSSE>=2); 11509 match(Set dst (MoveD2L src)); 11510 effect(DEF dst, USE src, TEMP tmp); 11511 ins_cost(85); 11512 format %{ "MOVD $dst.lo,$src\n\t" 11513 "PSHUFLW $tmp,$src,0x4E\n\t" 11514 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11515 ins_encode %{ 11516 __ movdl($dst$$Register, $src$$XMMRegister); 11517 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11518 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11519 %} 11520 ins_pipe( pipe_slow ); 11521%} 11522 11523instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11524 match(Set dst (MoveL2D src)); 11525 effect(DEF dst, USE src); 11526 11527 ins_cost(200); 11528 format %{ "MOV $dst,$src.lo\n\t" 11529 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11530 opcode(0x89, 0x89); 11531 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11532 ins_pipe( ialu_mem_long_reg ); 11533%} 11534 11535 11536instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11537 predicate(UseSSE<=1); 11538 match(Set dst (MoveL2D src)); 11539 effect(DEF dst, USE src); 11540 ins_cost(125); 11541 11542 format %{ "FLD_D $src\n\t" 11543 "FSTP $dst\t# MoveL2D_stack_reg" %} 11544 opcode(0xDD); /* DD /0, FLD m64real */ 11545 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11546 Pop_Reg_DPR(dst) ); 11547 ins_pipe( fpu_reg_mem ); 11548%} 11549 11550 11551instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11552 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11553 match(Set dst (MoveL2D src)); 11554 effect(DEF dst, USE src); 11555 11556 ins_cost(95); 11557 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11558 ins_encode %{ 11559 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11560 %} 11561 ins_pipe( pipe_slow ); 11562%} 11563 11564instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11565 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11566 match(Set dst (MoveL2D src)); 11567 effect(DEF dst, USE src); 11568 11569 ins_cost(95); 11570 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11571 ins_encode %{ 11572 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11573 %} 11574 ins_pipe( pipe_slow ); 11575%} 11576 11577instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11578 predicate(UseSSE>=2); 11579 match(Set dst (MoveL2D src)); 11580 effect(TEMP dst, USE src, TEMP tmp); 11581 ins_cost(85); 11582 format %{ "MOVD $dst,$src.lo\n\t" 11583 "MOVD $tmp,$src.hi\n\t" 11584 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11585 ins_encode %{ 11586 __ movdl($dst$$XMMRegister, $src$$Register); 11587 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11588 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11589 %} 11590 ins_pipe( pipe_slow ); 11591%} 11592 11593 11594// ======================================================================= 11595// fast clearing of an array 11596instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11597 predicate(!((ClearArrayNode*)n)->is_large()); 11598 match(Set dummy (ClearArray cnt base)); 11599 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11600 11601 format %{ $$template 11602 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11603 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11604 $$emit$$"JG LARGE\n\t" 11605 $$emit$$"SHL ECX, 1\n\t" 11606 $$emit$$"DEC ECX\n\t" 11607 $$emit$$"JS DONE\t# Zero length\n\t" 11608 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11609 $$emit$$"DEC ECX\n\t" 11610 $$emit$$"JGE LOOP\n\t" 11611 $$emit$$"JMP DONE\n\t" 11612 $$emit$$"# LARGE:\n\t" 11613 if (UseFastStosb) { 11614 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11615 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11616 } else if (UseXMMForObjInit) { 11617 $$emit$$"MOV RDI,RAX\n\t" 11618 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11619 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11620 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11621 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11622 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11623 $$emit$$"ADD 0x40,RAX\n\t" 11624 $$emit$$"# L_zero_64_bytes:\n\t" 11625 $$emit$$"SUB 0x8,RCX\n\t" 11626 $$emit$$"JGE L_loop\n\t" 11627 $$emit$$"ADD 0x4,RCX\n\t" 11628 $$emit$$"JL L_tail\n\t" 11629 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11630 $$emit$$"ADD 0x20,RAX\n\t" 11631 $$emit$$"SUB 0x4,RCX\n\t" 11632 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11633 $$emit$$"ADD 0x4,RCX\n\t" 11634 $$emit$$"JLE L_end\n\t" 11635 $$emit$$"DEC RCX\n\t" 11636 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11637 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11638 $$emit$$"ADD 0x8,RAX\n\t" 11639 $$emit$$"DEC RCX\n\t" 11640 $$emit$$"JGE L_sloop\n\t" 11641 $$emit$$"# L_end:\n\t" 11642 } else { 11643 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11644 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11645 } 11646 $$emit$$"# DONE" 11647 %} 11648 ins_encode %{ 11649 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11650 $tmp$$XMMRegister, false); 11651 %} 11652 ins_pipe( pipe_slow ); 11653%} 11654 11655instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11656 predicate(((ClearArrayNode*)n)->is_large()); 11657 match(Set dummy (ClearArray cnt base)); 11658 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11659 format %{ $$template 11660 if (UseFastStosb) { 11661 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11662 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11663 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11664 } else if (UseXMMForObjInit) { 11665 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11666 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11667 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11668 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11669 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11670 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11671 $$emit$$"ADD 0x40,RAX\n\t" 11672 $$emit$$"# L_zero_64_bytes:\n\t" 11673 $$emit$$"SUB 0x8,RCX\n\t" 11674 $$emit$$"JGE L_loop\n\t" 11675 $$emit$$"ADD 0x4,RCX\n\t" 11676 $$emit$$"JL L_tail\n\t" 11677 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11678 $$emit$$"ADD 0x20,RAX\n\t" 11679 $$emit$$"SUB 0x4,RCX\n\t" 11680 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11681 $$emit$$"ADD 0x4,RCX\n\t" 11682 $$emit$$"JLE L_end\n\t" 11683 $$emit$$"DEC RCX\n\t" 11684 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11685 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11686 $$emit$$"ADD 0x8,RAX\n\t" 11687 $$emit$$"DEC RCX\n\t" 11688 $$emit$$"JGE L_sloop\n\t" 11689 $$emit$$"# L_end:\n\t" 11690 } else { 11691 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11692 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11693 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11694 } 11695 $$emit$$"# DONE" 11696 %} 11697 ins_encode %{ 11698 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11699 $tmp$$XMMRegister, true); 11700 %} 11701 ins_pipe( pipe_slow ); 11702%} 11703 11704instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11705 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11706 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11707 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11708 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11709 11710 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11711 ins_encode %{ 11712 __ string_compare($str1$$Register, $str2$$Register, 11713 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11714 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11715 %} 11716 ins_pipe( pipe_slow ); 11717%} 11718 11719instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11720 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11721 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11722 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11723 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11724 11725 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11726 ins_encode %{ 11727 __ string_compare($str1$$Register, $str2$$Register, 11728 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11729 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11730 %} 11731 ins_pipe( pipe_slow ); 11732%} 11733 11734instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11735 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11736 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11737 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11738 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11739 11740 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11741 ins_encode %{ 11742 __ string_compare($str1$$Register, $str2$$Register, 11743 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11744 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11745 %} 11746 ins_pipe( pipe_slow ); 11747%} 11748 11749instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11750 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11751 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11752 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11753 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11754 11755 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11756 ins_encode %{ 11757 __ string_compare($str2$$Register, $str1$$Register, 11758 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11759 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11760 %} 11761 ins_pipe( pipe_slow ); 11762%} 11763 11764// fast string equals 11765instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11766 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11767 match(Set result (StrEquals (Binary str1 str2) cnt)); 11768 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11769 11770 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11771 ins_encode %{ 11772 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11773 $cnt$$Register, $result$$Register, $tmp3$$Register, 11774 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11775 %} 11776 11777 ins_pipe( pipe_slow ); 11778%} 11779 11780// fast search of substring with known size. 11781instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11782 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11783 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11784 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11785 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11786 11787 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11788 ins_encode %{ 11789 int icnt2 = (int)$int_cnt2$$constant; 11790 if (icnt2 >= 16) { 11791 // IndexOf for constant substrings with size >= 16 elements 11792 // which don't need to be loaded through stack. 11793 __ string_indexofC8($str1$$Register, $str2$$Register, 11794 $cnt1$$Register, $cnt2$$Register, 11795 icnt2, $result$$Register, 11796 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11797 } else { 11798 // Small strings are loaded through stack if they cross page boundary. 11799 __ string_indexof($str1$$Register, $str2$$Register, 11800 $cnt1$$Register, $cnt2$$Register, 11801 icnt2, $result$$Register, 11802 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11803 } 11804 %} 11805 ins_pipe( pipe_slow ); 11806%} 11807 11808// fast search of substring with known size. 11809instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11810 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11811 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11812 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11813 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11814 11815 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11816 ins_encode %{ 11817 int icnt2 = (int)$int_cnt2$$constant; 11818 if (icnt2 >= 8) { 11819 // IndexOf for constant substrings with size >= 8 elements 11820 // which don't need to be loaded through stack. 11821 __ string_indexofC8($str1$$Register, $str2$$Register, 11822 $cnt1$$Register, $cnt2$$Register, 11823 icnt2, $result$$Register, 11824 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11825 } else { 11826 // Small strings are loaded through stack if they cross page boundary. 11827 __ string_indexof($str1$$Register, $str2$$Register, 11828 $cnt1$$Register, $cnt2$$Register, 11829 icnt2, $result$$Register, 11830 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11831 } 11832 %} 11833 ins_pipe( pipe_slow ); 11834%} 11835 11836// fast search of substring with known size. 11837instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11838 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11839 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11840 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11841 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11842 11843 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11844 ins_encode %{ 11845 int icnt2 = (int)$int_cnt2$$constant; 11846 if (icnt2 >= 8) { 11847 // IndexOf for constant substrings with size >= 8 elements 11848 // which don't need to be loaded through stack. 11849 __ string_indexofC8($str1$$Register, $str2$$Register, 11850 $cnt1$$Register, $cnt2$$Register, 11851 icnt2, $result$$Register, 11852 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11853 } else { 11854 // Small strings are loaded through stack if they cross page boundary. 11855 __ string_indexof($str1$$Register, $str2$$Register, 11856 $cnt1$$Register, $cnt2$$Register, 11857 icnt2, $result$$Register, 11858 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11859 } 11860 %} 11861 ins_pipe( pipe_slow ); 11862%} 11863 11864instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11865 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11866 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11867 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11868 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11869 11870 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11871 ins_encode %{ 11872 __ string_indexof($str1$$Register, $str2$$Register, 11873 $cnt1$$Register, $cnt2$$Register, 11874 (-1), $result$$Register, 11875 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11876 %} 11877 ins_pipe( pipe_slow ); 11878%} 11879 11880instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11881 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11882 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11883 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11884 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11885 11886 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11887 ins_encode %{ 11888 __ string_indexof($str1$$Register, $str2$$Register, 11889 $cnt1$$Register, $cnt2$$Register, 11890 (-1), $result$$Register, 11891 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11892 %} 11893 ins_pipe( pipe_slow ); 11894%} 11895 11896instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11897 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11898 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11899 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11900 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11901 11902 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11903 ins_encode %{ 11904 __ string_indexof($str1$$Register, $str2$$Register, 11905 $cnt1$$Register, $cnt2$$Register, 11906 (-1), $result$$Register, 11907 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11908 %} 11909 ins_pipe( pipe_slow ); 11910%} 11911 11912instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11913 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11914 predicate(UseSSE42Intrinsics); 11915 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11916 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11917 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11918 ins_encode %{ 11919 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11920 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11921 %} 11922 ins_pipe( pipe_slow ); 11923%} 11924 11925// fast array equals 11926instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11927 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11928%{ 11929 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11930 match(Set result (AryEq ary1 ary2)); 11931 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11932 //ins_cost(300); 11933 11934 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11935 ins_encode %{ 11936 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11937 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11938 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11939 %} 11940 ins_pipe( pipe_slow ); 11941%} 11942 11943instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11944 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11945%{ 11946 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11947 match(Set result (AryEq ary1 ary2)); 11948 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11949 //ins_cost(300); 11950 11951 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11952 ins_encode %{ 11953 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11954 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11955 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11956 %} 11957 ins_pipe( pipe_slow ); 11958%} 11959 11960instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11961 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11962%{ 11963 match(Set result (HasNegatives ary1 len)); 11964 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11965 11966 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11967 ins_encode %{ 11968 __ has_negatives($ary1$$Register, $len$$Register, 11969 $result$$Register, $tmp3$$Register, 11970 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11971 %} 11972 ins_pipe( pipe_slow ); 11973%} 11974 11975// fast char[] to byte[] compression 11976instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11977 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11978 match(Set result (StrCompressedCopy src (Binary dst len))); 11979 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11980 11981 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11982 ins_encode %{ 11983 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11984 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11985 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11986 %} 11987 ins_pipe( pipe_slow ); 11988%} 11989 11990// fast byte[] to char[] inflation 11991instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11992 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11993 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11994 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11995 11996 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11997 ins_encode %{ 11998 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11999 $tmp1$$XMMRegister, $tmp2$$Register); 12000 %} 12001 ins_pipe( pipe_slow ); 12002%} 12003 12004// encode char[] to byte[] in ISO_8859_1 12005instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12006 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12007 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12008 match(Set result (EncodeISOArray src (Binary dst len))); 12009 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12010 12011 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12012 ins_encode %{ 12013 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12014 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12015 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 12016 %} 12017 ins_pipe( pipe_slow ); 12018%} 12019 12020 12021//----------Control Flow Instructions------------------------------------------ 12022// Signed compare Instructions 12023instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12024 match(Set cr (CmpI op1 op2)); 12025 effect( DEF cr, USE op1, USE op2 ); 12026 format %{ "CMP $op1,$op2" %} 12027 opcode(0x3B); /* Opcode 3B /r */ 12028 ins_encode( OpcP, RegReg( op1, op2) ); 12029 ins_pipe( ialu_cr_reg_reg ); 12030%} 12031 12032instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12033 match(Set cr (CmpI op1 op2)); 12034 effect( DEF cr, USE op1 ); 12035 format %{ "CMP $op1,$op2" %} 12036 opcode(0x81,0x07); /* Opcode 81 /7 */ 12037 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12038 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12039 ins_pipe( ialu_cr_reg_imm ); 12040%} 12041 12042// Cisc-spilled version of cmpI_eReg 12043instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12044 match(Set cr (CmpI op1 (LoadI op2))); 12045 12046 format %{ "CMP $op1,$op2" %} 12047 ins_cost(500); 12048 opcode(0x3B); /* Opcode 3B /r */ 12049 ins_encode( OpcP, RegMem( op1, op2) ); 12050 ins_pipe( ialu_cr_reg_mem ); 12051%} 12052 12053instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 12054 match(Set cr (CmpI src zero)); 12055 effect( DEF cr, USE src ); 12056 12057 format %{ "TEST $src,$src" %} 12058 opcode(0x85); 12059 ins_encode( OpcP, RegReg( src, src ) ); 12060 ins_pipe( ialu_cr_reg_imm ); 12061%} 12062 12063instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 12064 match(Set cr (CmpI (AndI src con) zero)); 12065 12066 format %{ "TEST $src,$con" %} 12067 opcode(0xF7,0x00); 12068 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12069 ins_pipe( ialu_cr_reg_imm ); 12070%} 12071 12072instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 12073 match(Set cr (CmpI (AndI src mem) zero)); 12074 12075 format %{ "TEST $src,$mem" %} 12076 opcode(0x85); 12077 ins_encode( OpcP, RegMem( src, mem ) ); 12078 ins_pipe( ialu_cr_reg_mem ); 12079%} 12080 12081// Unsigned compare Instructions; really, same as signed except they 12082// produce an eFlagsRegU instead of eFlagsReg. 12083instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12084 match(Set cr (CmpU op1 op2)); 12085 12086 format %{ "CMPu $op1,$op2" %} 12087 opcode(0x3B); /* Opcode 3B /r */ 12088 ins_encode( OpcP, RegReg( op1, op2) ); 12089 ins_pipe( ialu_cr_reg_reg ); 12090%} 12091 12092instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12093 match(Set cr (CmpU op1 op2)); 12094 12095 format %{ "CMPu $op1,$op2" %} 12096 opcode(0x81,0x07); /* Opcode 81 /7 */ 12097 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12098 ins_pipe( ialu_cr_reg_imm ); 12099%} 12100 12101// // Cisc-spilled version of cmpU_eReg 12102instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12103 match(Set cr (CmpU op1 (LoadI op2))); 12104 12105 format %{ "CMPu $op1,$op2" %} 12106 ins_cost(500); 12107 opcode(0x3B); /* Opcode 3B /r */ 12108 ins_encode( OpcP, RegMem( op1, op2) ); 12109 ins_pipe( ialu_cr_reg_mem ); 12110%} 12111 12112// // Cisc-spilled version of cmpU_eReg 12113//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12114// match(Set cr (CmpU (LoadI op1) op2)); 12115// 12116// format %{ "CMPu $op1,$op2" %} 12117// ins_cost(500); 12118// opcode(0x39); /* Opcode 39 /r */ 12119// ins_encode( OpcP, RegMem( op1, op2) ); 12120//%} 12121 12122instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12123 match(Set cr (CmpU src zero)); 12124 12125 format %{ "TESTu $src,$src" %} 12126 opcode(0x85); 12127 ins_encode( OpcP, RegReg( src, src ) ); 12128 ins_pipe( ialu_cr_reg_imm ); 12129%} 12130 12131// Unsigned pointer compare Instructions 12132instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12133 match(Set cr (CmpP op1 op2)); 12134 12135 format %{ "CMPu $op1,$op2" %} 12136 opcode(0x3B); /* Opcode 3B /r */ 12137 ins_encode( OpcP, RegReg( op1, op2) ); 12138 ins_pipe( ialu_cr_reg_reg ); 12139%} 12140 12141instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12142 match(Set cr (CmpP op1 op2)); 12143 12144 format %{ "CMPu $op1,$op2" %} 12145 opcode(0x81,0x07); /* Opcode 81 /7 */ 12146 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12147 ins_pipe( ialu_cr_reg_imm ); 12148%} 12149 12150// // Cisc-spilled version of cmpP_eReg 12151instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12152 match(Set cr (CmpP op1 (LoadP op2))); 12153 12154 format %{ "CMPu $op1,$op2" %} 12155 ins_cost(500); 12156 opcode(0x3B); /* Opcode 3B /r */ 12157 ins_encode( OpcP, RegMem( op1, op2) ); 12158 ins_pipe( ialu_cr_reg_mem ); 12159%} 12160 12161// // Cisc-spilled version of cmpP_eReg 12162//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12163// match(Set cr (CmpP (LoadP op1) op2)); 12164// 12165// format %{ "CMPu $op1,$op2" %} 12166// ins_cost(500); 12167// opcode(0x39); /* Opcode 39 /r */ 12168// ins_encode( OpcP, RegMem( op1, op2) ); 12169//%} 12170 12171// Compare raw pointer (used in out-of-heap check). 12172// Only works because non-oop pointers must be raw pointers 12173// and raw pointers have no anti-dependencies. 12174instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12175 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12176 match(Set cr (CmpP op1 (LoadP op2))); 12177 12178 format %{ "CMPu $op1,$op2" %} 12179 opcode(0x3B); /* Opcode 3B /r */ 12180 ins_encode( OpcP, RegMem( op1, op2) ); 12181 ins_pipe( ialu_cr_reg_mem ); 12182%} 12183 12184// 12185// This will generate a signed flags result. This should be ok 12186// since any compare to a zero should be eq/neq. 12187instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12188 match(Set cr (CmpP src zero)); 12189 12190 format %{ "TEST $src,$src" %} 12191 opcode(0x85); 12192 ins_encode( OpcP, RegReg( src, src ) ); 12193 ins_pipe( ialu_cr_reg_imm ); 12194%} 12195 12196// Cisc-spilled version of testP_reg 12197// This will generate a signed flags result. This should be ok 12198// since any compare to a zero should be eq/neq. 12199instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12200 match(Set cr (CmpP (LoadP op) zero)); 12201 12202 format %{ "TEST $op,0xFFFFFFFF" %} 12203 ins_cost(500); 12204 opcode(0xF7); /* Opcode F7 /0 */ 12205 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12206 ins_pipe( ialu_cr_reg_imm ); 12207%} 12208 12209// Yanked all unsigned pointer compare operations. 12210// Pointer compares are done with CmpP which is already unsigned. 12211 12212//----------Max and Min-------------------------------------------------------- 12213// Min Instructions 12214//// 12215// *** Min and Max using the conditional move are slower than the 12216// *** branch version on a Pentium III. 12217// // Conditional move for min 12218//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12219// effect( USE_DEF op2, USE op1, USE cr ); 12220// format %{ "CMOVlt $op2,$op1\t! min" %} 12221// opcode(0x4C,0x0F); 12222// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12223// ins_pipe( pipe_cmov_reg ); 12224//%} 12225// 12226//// Min Register with Register (P6 version) 12227//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12228// predicate(VM_Version::supports_cmov() ); 12229// match(Set op2 (MinI op1 op2)); 12230// ins_cost(200); 12231// expand %{ 12232// eFlagsReg cr; 12233// compI_eReg(cr,op1,op2); 12234// cmovI_reg_lt(op2,op1,cr); 12235// %} 12236//%} 12237 12238// Min Register with Register (generic version) 12239instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12240 match(Set dst (MinI dst src)); 12241 effect(KILL flags); 12242 ins_cost(300); 12243 12244 format %{ "MIN $dst,$src" %} 12245 opcode(0xCC); 12246 ins_encode( min_enc(dst,src) ); 12247 ins_pipe( pipe_slow ); 12248%} 12249 12250// Max Register with Register 12251// *** Min and Max using the conditional move are slower than the 12252// *** branch version on a Pentium III. 12253// // Conditional move for max 12254//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12255// effect( USE_DEF op2, USE op1, USE cr ); 12256// format %{ "CMOVgt $op2,$op1\t! max" %} 12257// opcode(0x4F,0x0F); 12258// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12259// ins_pipe( pipe_cmov_reg ); 12260//%} 12261// 12262// // Max Register with Register (P6 version) 12263//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12264// predicate(VM_Version::supports_cmov() ); 12265// match(Set op2 (MaxI op1 op2)); 12266// ins_cost(200); 12267// expand %{ 12268// eFlagsReg cr; 12269// compI_eReg(cr,op1,op2); 12270// cmovI_reg_gt(op2,op1,cr); 12271// %} 12272//%} 12273 12274// Max Register with Register (generic version) 12275instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12276 match(Set dst (MaxI dst src)); 12277 effect(KILL flags); 12278 ins_cost(300); 12279 12280 format %{ "MAX $dst,$src" %} 12281 opcode(0xCC); 12282 ins_encode( max_enc(dst,src) ); 12283 ins_pipe( pipe_slow ); 12284%} 12285 12286// ============================================================================ 12287// Counted Loop limit node which represents exact final iterator value. 12288// Note: the resulting value should fit into integer range since 12289// counted loops have limit check on overflow. 12290instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12291 match(Set limit (LoopLimit (Binary init limit) stride)); 12292 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12293 ins_cost(300); 12294 12295 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12296 ins_encode %{ 12297 int strd = (int)$stride$$constant; 12298 assert(strd != 1 && strd != -1, "sanity"); 12299 int m1 = (strd > 0) ? 1 : -1; 12300 // Convert limit to long (EAX:EDX) 12301 __ cdql(); 12302 // Convert init to long (init:tmp) 12303 __ movl($tmp$$Register, $init$$Register); 12304 __ sarl($tmp$$Register, 31); 12305 // $limit - $init 12306 __ subl($limit$$Register, $init$$Register); 12307 __ sbbl($limit_hi$$Register, $tmp$$Register); 12308 // + ($stride - 1) 12309 if (strd > 0) { 12310 __ addl($limit$$Register, (strd - 1)); 12311 __ adcl($limit_hi$$Register, 0); 12312 __ movl($tmp$$Register, strd); 12313 } else { 12314 __ addl($limit$$Register, (strd + 1)); 12315 __ adcl($limit_hi$$Register, -1); 12316 __ lneg($limit_hi$$Register, $limit$$Register); 12317 __ movl($tmp$$Register, -strd); 12318 } 12319 // signed devision: (EAX:EDX) / pos_stride 12320 __ idivl($tmp$$Register); 12321 if (strd < 0) { 12322 // restore sign 12323 __ negl($tmp$$Register); 12324 } 12325 // (EAX) * stride 12326 __ mull($tmp$$Register); 12327 // + init (ignore upper bits) 12328 __ addl($limit$$Register, $init$$Register); 12329 %} 12330 ins_pipe( pipe_slow ); 12331%} 12332 12333// ============================================================================ 12334// Branch Instructions 12335// Jump Table 12336instruct jumpXtnd(rRegI switch_val) %{ 12337 match(Jump switch_val); 12338 ins_cost(350); 12339 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12340 ins_encode %{ 12341 // Jump to Address(table_base + switch_reg) 12342 Address index(noreg, $switch_val$$Register, Address::times_1); 12343 __ jump(ArrayAddress($constantaddress, index)); 12344 %} 12345 ins_pipe(pipe_jmp); 12346%} 12347 12348// Jump Direct - Label defines a relative address from JMP+1 12349instruct jmpDir(label labl) %{ 12350 match(Goto); 12351 effect(USE labl); 12352 12353 ins_cost(300); 12354 format %{ "JMP $labl" %} 12355 size(5); 12356 ins_encode %{ 12357 Label* L = $labl$$label; 12358 __ jmp(*L, false); // Always long jump 12359 %} 12360 ins_pipe( pipe_jmp ); 12361%} 12362 12363// Jump Direct Conditional - Label defines a relative address from Jcc+1 12364instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12365 match(If cop cr); 12366 effect(USE labl); 12367 12368 ins_cost(300); 12369 format %{ "J$cop $labl" %} 12370 size(6); 12371 ins_encode %{ 12372 Label* L = $labl$$label; 12373 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12374 %} 12375 ins_pipe( pipe_jcc ); 12376%} 12377 12378// Jump Direct Conditional - Label defines a relative address from Jcc+1 12379instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12380 predicate(!n->has_vector_mask_set()); 12381 match(CountedLoopEnd cop cr); 12382 effect(USE labl); 12383 12384 ins_cost(300); 12385 format %{ "J$cop $labl\t# Loop end" %} 12386 size(6); 12387 ins_encode %{ 12388 Label* L = $labl$$label; 12389 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12390 %} 12391 ins_pipe( pipe_jcc ); 12392%} 12393 12394// Jump Direct Conditional - Label defines a relative address from Jcc+1 12395instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12396 predicate(!n->has_vector_mask_set()); 12397 match(CountedLoopEnd cop cmp); 12398 effect(USE labl); 12399 12400 ins_cost(300); 12401 format %{ "J$cop,u $labl\t# Loop end" %} 12402 size(6); 12403 ins_encode %{ 12404 Label* L = $labl$$label; 12405 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12406 %} 12407 ins_pipe( pipe_jcc ); 12408%} 12409 12410instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12411 predicate(!n->has_vector_mask_set()); 12412 match(CountedLoopEnd cop cmp); 12413 effect(USE labl); 12414 12415 ins_cost(200); 12416 format %{ "J$cop,u $labl\t# Loop end" %} 12417 size(6); 12418 ins_encode %{ 12419 Label* L = $labl$$label; 12420 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12421 %} 12422 ins_pipe( pipe_jcc ); 12423%} 12424 12425// mask version 12426// Jump Direct Conditional - Label defines a relative address from Jcc+1 12427instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12428 predicate(n->has_vector_mask_set()); 12429 match(CountedLoopEnd cop cr); 12430 effect(USE labl); 12431 12432 ins_cost(400); 12433 format %{ "J$cop $labl\t# Loop end\n\t" 12434 "restorevectmask \t# vector mask restore for loops" %} 12435 size(10); 12436 ins_encode %{ 12437 Label* L = $labl$$label; 12438 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12439 __ restorevectmask(); 12440 %} 12441 ins_pipe( pipe_jcc ); 12442%} 12443 12444// Jump Direct Conditional - Label defines a relative address from Jcc+1 12445instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12446 predicate(n->has_vector_mask_set()); 12447 match(CountedLoopEnd cop cmp); 12448 effect(USE labl); 12449 12450 ins_cost(400); 12451 format %{ "J$cop,u $labl\t# Loop end\n\t" 12452 "restorevectmask \t# vector mask restore for loops" %} 12453 size(10); 12454 ins_encode %{ 12455 Label* L = $labl$$label; 12456 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12457 __ restorevectmask(); 12458 %} 12459 ins_pipe( pipe_jcc ); 12460%} 12461 12462instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12463 predicate(n->has_vector_mask_set()); 12464 match(CountedLoopEnd cop cmp); 12465 effect(USE labl); 12466 12467 ins_cost(300); 12468 format %{ "J$cop,u $labl\t# Loop end\n\t" 12469 "restorevectmask \t# vector mask restore for loops" %} 12470 size(10); 12471 ins_encode %{ 12472 Label* L = $labl$$label; 12473 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12474 __ restorevectmask(); 12475 %} 12476 ins_pipe( pipe_jcc ); 12477%} 12478 12479// Jump Direct Conditional - using unsigned comparison 12480instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12481 match(If cop cmp); 12482 effect(USE labl); 12483 12484 ins_cost(300); 12485 format %{ "J$cop,u $labl" %} 12486 size(6); 12487 ins_encode %{ 12488 Label* L = $labl$$label; 12489 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12490 %} 12491 ins_pipe(pipe_jcc); 12492%} 12493 12494instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12495 match(If cop cmp); 12496 effect(USE labl); 12497 12498 ins_cost(200); 12499 format %{ "J$cop,u $labl" %} 12500 size(6); 12501 ins_encode %{ 12502 Label* L = $labl$$label; 12503 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12504 %} 12505 ins_pipe(pipe_jcc); 12506%} 12507 12508instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12509 match(If cop cmp); 12510 effect(USE labl); 12511 12512 ins_cost(200); 12513 format %{ $$template 12514 if ($cop$$cmpcode == Assembler::notEqual) { 12515 $$emit$$"JP,u $labl\n\t" 12516 $$emit$$"J$cop,u $labl" 12517 } else { 12518 $$emit$$"JP,u done\n\t" 12519 $$emit$$"J$cop,u $labl\n\t" 12520 $$emit$$"done:" 12521 } 12522 %} 12523 ins_encode %{ 12524 Label* l = $labl$$label; 12525 if ($cop$$cmpcode == Assembler::notEqual) { 12526 __ jcc(Assembler::parity, *l, false); 12527 __ jcc(Assembler::notEqual, *l, false); 12528 } else if ($cop$$cmpcode == Assembler::equal) { 12529 Label done; 12530 __ jccb(Assembler::parity, done); 12531 __ jcc(Assembler::equal, *l, false); 12532 __ bind(done); 12533 } else { 12534 ShouldNotReachHere(); 12535 } 12536 %} 12537 ins_pipe(pipe_jcc); 12538%} 12539 12540// ============================================================================ 12541// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12542// array for an instance of the superklass. Set a hidden internal cache on a 12543// hit (cache is checked with exposed code in gen_subtype_check()). Return 12544// NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12545instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12546 match(Set result (PartialSubtypeCheck sub super)); 12547 effect( KILL rcx, KILL cr ); 12548 12549 ins_cost(1100); // slightly larger than the next version 12550 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12551 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12552 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12553 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12554 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12555 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12556 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12557 "miss:\t" %} 12558 12559 opcode(0x1); // Force a XOR of EDI 12560 ins_encode( enc_PartialSubtypeCheck() ); 12561 ins_pipe( pipe_slow ); 12562%} 12563 12564instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12565 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12566 effect( KILL rcx, KILL result ); 12567 12568 ins_cost(1000); 12569 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12570 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12571 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12572 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12573 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12574 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12575 "miss:\t" %} 12576 12577 opcode(0x0); // No need to XOR EDI 12578 ins_encode( enc_PartialSubtypeCheck() ); 12579 ins_pipe( pipe_slow ); 12580%} 12581 12582// ============================================================================ 12583// Branch Instructions -- short offset versions 12584// 12585// These instructions are used to replace jumps of a long offset (the default 12586// match) with jumps of a shorter offset. These instructions are all tagged 12587// with the ins_short_branch attribute, which causes the ADLC to suppress the 12588// match rules in general matching. Instead, the ADLC generates a conversion 12589// method in the MachNode which can be used to do in-place replacement of the 12590// long variant with the shorter variant. The compiler will determine if a 12591// branch can be taken by the is_short_branch_offset() predicate in the machine 12592// specific code section of the file. 12593 12594// Jump Direct - Label defines a relative address from JMP+1 12595instruct jmpDir_short(label labl) %{ 12596 match(Goto); 12597 effect(USE labl); 12598 12599 ins_cost(300); 12600 format %{ "JMP,s $labl" %} 12601 size(2); 12602 ins_encode %{ 12603 Label* L = $labl$$label; 12604 __ jmpb(*L); 12605 %} 12606 ins_pipe( pipe_jmp ); 12607 ins_short_branch(1); 12608%} 12609 12610// Jump Direct Conditional - Label defines a relative address from Jcc+1 12611instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12612 match(If cop cr); 12613 effect(USE labl); 12614 12615 ins_cost(300); 12616 format %{ "J$cop,s $labl" %} 12617 size(2); 12618 ins_encode %{ 12619 Label* L = $labl$$label; 12620 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12621 %} 12622 ins_pipe( pipe_jcc ); 12623 ins_short_branch(1); 12624%} 12625 12626// Jump Direct Conditional - Label defines a relative address from Jcc+1 12627instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12628 match(CountedLoopEnd cop cr); 12629 effect(USE labl); 12630 12631 ins_cost(300); 12632 format %{ "J$cop,s $labl\t# Loop end" %} 12633 size(2); 12634 ins_encode %{ 12635 Label* L = $labl$$label; 12636 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12637 %} 12638 ins_pipe( pipe_jcc ); 12639 ins_short_branch(1); 12640%} 12641 12642// Jump Direct Conditional - Label defines a relative address from Jcc+1 12643instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12644 match(CountedLoopEnd cop cmp); 12645 effect(USE labl); 12646 12647 ins_cost(300); 12648 format %{ "J$cop,us $labl\t# Loop end" %} 12649 size(2); 12650 ins_encode %{ 12651 Label* L = $labl$$label; 12652 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12653 %} 12654 ins_pipe( pipe_jcc ); 12655 ins_short_branch(1); 12656%} 12657 12658instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12659 match(CountedLoopEnd cop cmp); 12660 effect(USE labl); 12661 12662 ins_cost(300); 12663 format %{ "J$cop,us $labl\t# Loop end" %} 12664 size(2); 12665 ins_encode %{ 12666 Label* L = $labl$$label; 12667 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12668 %} 12669 ins_pipe( pipe_jcc ); 12670 ins_short_branch(1); 12671%} 12672 12673// Jump Direct Conditional - using unsigned comparison 12674instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12675 match(If cop cmp); 12676 effect(USE labl); 12677 12678 ins_cost(300); 12679 format %{ "J$cop,us $labl" %} 12680 size(2); 12681 ins_encode %{ 12682 Label* L = $labl$$label; 12683 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12684 %} 12685 ins_pipe( pipe_jcc ); 12686 ins_short_branch(1); 12687%} 12688 12689instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12690 match(If cop cmp); 12691 effect(USE labl); 12692 12693 ins_cost(300); 12694 format %{ "J$cop,us $labl" %} 12695 size(2); 12696 ins_encode %{ 12697 Label* L = $labl$$label; 12698 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12699 %} 12700 ins_pipe( pipe_jcc ); 12701 ins_short_branch(1); 12702%} 12703 12704instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12705 match(If cop cmp); 12706 effect(USE labl); 12707 12708 ins_cost(300); 12709 format %{ $$template 12710 if ($cop$$cmpcode == Assembler::notEqual) { 12711 $$emit$$"JP,u,s $labl\n\t" 12712 $$emit$$"J$cop,u,s $labl" 12713 } else { 12714 $$emit$$"JP,u,s done\n\t" 12715 $$emit$$"J$cop,u,s $labl\n\t" 12716 $$emit$$"done:" 12717 } 12718 %} 12719 size(4); 12720 ins_encode %{ 12721 Label* l = $labl$$label; 12722 if ($cop$$cmpcode == Assembler::notEqual) { 12723 __ jccb(Assembler::parity, *l); 12724 __ jccb(Assembler::notEqual, *l); 12725 } else if ($cop$$cmpcode == Assembler::equal) { 12726 Label done; 12727 __ jccb(Assembler::parity, done); 12728 __ jccb(Assembler::equal, *l); 12729 __ bind(done); 12730 } else { 12731 ShouldNotReachHere(); 12732 } 12733 %} 12734 ins_pipe(pipe_jcc); 12735 ins_short_branch(1); 12736%} 12737 12738// ============================================================================ 12739// Long Compare 12740// 12741// Currently we hold longs in 2 registers. Comparing such values efficiently 12742// is tricky. The flavor of compare used depends on whether we are testing 12743// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12744// The GE test is the negated LT test. The LE test can be had by commuting 12745// the operands (yielding a GE test) and then negating; negate again for the 12746// GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12747// NE test is negated from that. 12748 12749// Due to a shortcoming in the ADLC, it mixes up expressions like: 12750// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12751// difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12752// are collapsed internally in the ADLC's dfa-gen code. The match for 12753// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12754// foo match ends up with the wrong leaf. One fix is to not match both 12755// reg-reg and reg-zero forms of long-compare. This is unfortunate because 12756// both forms beat the trinary form of long-compare and both are very useful 12757// on Intel which has so few registers. 12758 12759// Manifest a CmpL result in an integer register. Very painful. 12760// This is the test to avoid. 12761instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12762 match(Set dst (CmpL3 src1 src2)); 12763 effect( KILL flags ); 12764 ins_cost(1000); 12765 format %{ "XOR $dst,$dst\n\t" 12766 "CMP $src1.hi,$src2.hi\n\t" 12767 "JLT,s m_one\n\t" 12768 "JGT,s p_one\n\t" 12769 "CMP $src1.lo,$src2.lo\n\t" 12770 "JB,s m_one\n\t" 12771 "JEQ,s done\n" 12772 "p_one:\tINC $dst\n\t" 12773 "JMP,s done\n" 12774 "m_one:\tDEC $dst\n" 12775 "done:" %} 12776 ins_encode %{ 12777 Label p_one, m_one, done; 12778 __ xorptr($dst$$Register, $dst$$Register); 12779 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12780 __ jccb(Assembler::less, m_one); 12781 __ jccb(Assembler::greater, p_one); 12782 __ cmpl($src1$$Register, $src2$$Register); 12783 __ jccb(Assembler::below, m_one); 12784 __ jccb(Assembler::equal, done); 12785 __ bind(p_one); 12786 __ incrementl($dst$$Register); 12787 __ jmpb(done); 12788 __ bind(m_one); 12789 __ decrementl($dst$$Register); 12790 __ bind(done); 12791 %} 12792 ins_pipe( pipe_slow ); 12793%} 12794 12795//====== 12796// Manifest a CmpL result in the normal flags. Only good for LT or GE 12797// compares. Can be used for LE or GT compares by reversing arguments. 12798// NOT GOOD FOR EQ/NE tests. 12799instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12800 match( Set flags (CmpL src zero )); 12801 ins_cost(100); 12802 format %{ "TEST $src.hi,$src.hi" %} 12803 opcode(0x85); 12804 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12805 ins_pipe( ialu_cr_reg_reg ); 12806%} 12807 12808// Manifest a CmpL result in the normal flags. Only good for LT or GE 12809// compares. Can be used for LE or GT compares by reversing arguments. 12810// NOT GOOD FOR EQ/NE tests. 12811instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12812 match( Set flags (CmpL src1 src2 )); 12813 effect( TEMP tmp ); 12814 ins_cost(300); 12815 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12816 "MOV $tmp,$src1.hi\n\t" 12817 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12818 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12819 ins_pipe( ialu_cr_reg_reg ); 12820%} 12821 12822// Long compares reg < zero/req OR reg >= zero/req. 12823// Just a wrapper for a normal branch, plus the predicate test. 12824instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12825 match(If cmp flags); 12826 effect(USE labl); 12827 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12828 expand %{ 12829 jmpCon(cmp,flags,labl); // JLT or JGE... 12830 %} 12831%} 12832 12833//====== 12834// Manifest a CmpUL result in the normal flags. Only good for LT or GE 12835// compares. Can be used for LE or GT compares by reversing arguments. 12836// NOT GOOD FOR EQ/NE tests. 12837instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12838 match(Set flags (CmpUL src zero)); 12839 ins_cost(100); 12840 format %{ "TEST $src.hi,$src.hi" %} 12841 opcode(0x85); 12842 ins_encode(OpcP, RegReg_Hi2(src, src)); 12843 ins_pipe(ialu_cr_reg_reg); 12844%} 12845 12846// Manifest a CmpUL result in the normal flags. Only good for LT or GE 12847// compares. Can be used for LE or GT compares by reversing arguments. 12848// NOT GOOD FOR EQ/NE tests. 12849instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12850 match(Set flags (CmpUL src1 src2)); 12851 effect(TEMP tmp); 12852 ins_cost(300); 12853 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12854 "MOV $tmp,$src1.hi\n\t" 12855 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12856 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12857 ins_pipe(ialu_cr_reg_reg); 12858%} 12859 12860// Unsigned long compares reg < zero/req OR reg >= zero/req. 12861// Just a wrapper for a normal branch, plus the predicate test. 12862instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12863 match(If cmp flags); 12864 effect(USE labl); 12865 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12866 expand %{ 12867 jmpCon(cmp, flags, labl); // JLT or JGE... 12868 %} 12869%} 12870 12871// Compare 2 longs and CMOVE longs. 12872instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12873 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12874 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12875 ins_cost(400); 12876 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12877 "CMOV$cmp $dst.hi,$src.hi" %} 12878 opcode(0x0F,0x40); 12879 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12880 ins_pipe( pipe_cmov_reg_long ); 12881%} 12882 12883instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12884 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12885 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12886 ins_cost(500); 12887 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12888 "CMOV$cmp $dst.hi,$src.hi" %} 12889 opcode(0x0F,0x40); 12890 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12891 ins_pipe( pipe_cmov_reg_long ); 12892%} 12893 12894// Compare 2 longs and CMOVE ints. 12895instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12896 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12897 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12898 ins_cost(200); 12899 format %{ "CMOV$cmp $dst,$src" %} 12900 opcode(0x0F,0x40); 12901 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12902 ins_pipe( pipe_cmov_reg ); 12903%} 12904 12905instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12906 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12907 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12908 ins_cost(250); 12909 format %{ "CMOV$cmp $dst,$src" %} 12910 opcode(0x0F,0x40); 12911 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12912 ins_pipe( pipe_cmov_mem ); 12913%} 12914 12915// Compare 2 longs and CMOVE ints. 12916instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12917 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12918 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12919 ins_cost(200); 12920 format %{ "CMOV$cmp $dst,$src" %} 12921 opcode(0x0F,0x40); 12922 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12923 ins_pipe( pipe_cmov_reg ); 12924%} 12925 12926// Compare 2 longs and CMOVE doubles 12927instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12928 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) ); 12929 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12930 ins_cost(200); 12931 expand %{ 12932 fcmovDPR_regS(cmp,flags,dst,src); 12933 %} 12934%} 12935 12936// Compare 2 longs and CMOVE doubles 12937instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12938 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) ); 12939 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12940 ins_cost(200); 12941 expand %{ 12942 fcmovD_regS(cmp,flags,dst,src); 12943 %} 12944%} 12945 12946instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12947 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) ); 12948 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12949 ins_cost(200); 12950 expand %{ 12951 fcmovFPR_regS(cmp,flags,dst,src); 12952 %} 12953%} 12954 12955instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12956 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) ); 12957 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12958 ins_cost(200); 12959 expand %{ 12960 fcmovF_regS(cmp,flags,dst,src); 12961 %} 12962%} 12963 12964//====== 12965// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12966instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12967 match( Set flags (CmpL src zero )); 12968 effect(TEMP tmp); 12969 ins_cost(200); 12970 format %{ "MOV $tmp,$src.lo\n\t" 12971 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12972 ins_encode( long_cmp_flags0( src, tmp ) ); 12973 ins_pipe( ialu_reg_reg_long ); 12974%} 12975 12976// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12977instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12978 match( Set flags (CmpL src1 src2 )); 12979 ins_cost(200+300); 12980 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12981 "JNE,s skip\n\t" 12982 "CMP $src1.hi,$src2.hi\n\t" 12983 "skip:\t" %} 12984 ins_encode( long_cmp_flags1( src1, src2 ) ); 12985 ins_pipe( ialu_cr_reg_reg ); 12986%} 12987 12988// Long compare reg == zero/reg OR reg != zero/reg 12989// Just a wrapper for a normal branch, plus the predicate test. 12990instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12991 match(If cmp flags); 12992 effect(USE labl); 12993 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12994 expand %{ 12995 jmpCon(cmp,flags,labl); // JEQ or JNE... 12996 %} 12997%} 12998 12999//====== 13000// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13001instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13002 match(Set flags (CmpUL src zero)); 13003 effect(TEMP tmp); 13004 ins_cost(200); 13005 format %{ "MOV $tmp,$src.lo\n\t" 13006 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13007 ins_encode(long_cmp_flags0(src, tmp)); 13008 ins_pipe(ialu_reg_reg_long); 13009%} 13010 13011// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13012instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13013 match(Set flags (CmpUL src1 src2)); 13014 ins_cost(200+300); 13015 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13016 "JNE,s skip\n\t" 13017 "CMP $src1.hi,$src2.hi\n\t" 13018 "skip:\t" %} 13019 ins_encode(long_cmp_flags1(src1, src2)); 13020 ins_pipe(ialu_cr_reg_reg); 13021%} 13022 13023// Unsigned long compare reg == zero/reg OR reg != zero/reg 13024// Just a wrapper for a normal branch, plus the predicate test. 13025instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13026 match(If cmp flags); 13027 effect(USE labl); 13028 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13029 expand %{ 13030 jmpCon(cmp, flags, labl); // JEQ or JNE... 13031 %} 13032%} 13033 13034// Compare 2 longs and CMOVE longs. 13035instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13036 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13037 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13038 ins_cost(400); 13039 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13040 "CMOV$cmp $dst.hi,$src.hi" %} 13041 opcode(0x0F,0x40); 13042 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13043 ins_pipe( pipe_cmov_reg_long ); 13044%} 13045 13046instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13047 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13048 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13049 ins_cost(500); 13050 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13051 "CMOV$cmp $dst.hi,$src.hi" %} 13052 opcode(0x0F,0x40); 13053 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13054 ins_pipe( pipe_cmov_reg_long ); 13055%} 13056 13057// Compare 2 longs and CMOVE ints. 13058instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13059 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13060 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13061 ins_cost(200); 13062 format %{ "CMOV$cmp $dst,$src" %} 13063 opcode(0x0F,0x40); 13064 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13065 ins_pipe( pipe_cmov_reg ); 13066%} 13067 13068instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13069 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13070 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13071 ins_cost(250); 13072 format %{ "CMOV$cmp $dst,$src" %} 13073 opcode(0x0F,0x40); 13074 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13075 ins_pipe( pipe_cmov_mem ); 13076%} 13077 13078// Compare 2 longs and CMOVE ints. 13079instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13080 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13081 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13082 ins_cost(200); 13083 format %{ "CMOV$cmp $dst,$src" %} 13084 opcode(0x0F,0x40); 13085 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13086 ins_pipe( pipe_cmov_reg ); 13087%} 13088 13089// Compare 2 longs and CMOVE doubles 13090instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13091 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13092 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13093 ins_cost(200); 13094 expand %{ 13095 fcmovDPR_regS(cmp,flags,dst,src); 13096 %} 13097%} 13098 13099// Compare 2 longs and CMOVE doubles 13100instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13101 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13102 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13103 ins_cost(200); 13104 expand %{ 13105 fcmovD_regS(cmp,flags,dst,src); 13106 %} 13107%} 13108 13109instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13110 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ) ); 13111 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13112 ins_cost(200); 13113 expand %{ 13114 fcmovFPR_regS(cmp,flags,dst,src); 13115 %} 13116%} 13117 13118instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13119 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13120 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13121 ins_cost(200); 13122 expand %{ 13123 fcmovF_regS(cmp,flags,dst,src); 13124 %} 13125%} 13126 13127//====== 13128// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13129// Same as cmpL_reg_flags_LEGT except must negate src 13130instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13131 match( Set flags (CmpL src zero )); 13132 effect( TEMP tmp ); 13133 ins_cost(300); 13134 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13135 "CMP $tmp,$src.lo\n\t" 13136 "SBB $tmp,$src.hi\n\t" %} 13137 ins_encode( long_cmp_flags3(src, tmp) ); 13138 ins_pipe( ialu_reg_reg_long ); 13139%} 13140 13141// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13142// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13143// requires a commuted test to get the same result. 13144instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13145 match( Set flags (CmpL src1 src2 )); 13146 effect( TEMP tmp ); 13147 ins_cost(300); 13148 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13149 "MOV $tmp,$src2.hi\n\t" 13150 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13151 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13152 ins_pipe( ialu_cr_reg_reg ); 13153%} 13154 13155// Long compares reg < zero/req OR reg >= zero/req. 13156// Just a wrapper for a normal branch, plus the predicate test 13157instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13158 match(If cmp flags); 13159 effect(USE labl); 13160 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13161 ins_cost(300); 13162 expand %{ 13163 jmpCon(cmp,flags,labl); // JGT or JLE... 13164 %} 13165%} 13166 13167//====== 13168// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13169// Same as cmpUL_reg_flags_LEGT except must negate src 13170instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13171 match(Set flags (CmpUL src zero)); 13172 effect(TEMP tmp); 13173 ins_cost(300); 13174 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13175 "CMP $tmp,$src.lo\n\t" 13176 "SBB $tmp,$src.hi\n\t" %} 13177 ins_encode(long_cmp_flags3(src, tmp)); 13178 ins_pipe(ialu_reg_reg_long); 13179%} 13180 13181// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13182// Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13183// requires a commuted test to get the same result. 13184instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13185 match(Set flags (CmpUL src1 src2)); 13186 effect(TEMP tmp); 13187 ins_cost(300); 13188 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13189 "MOV $tmp,$src2.hi\n\t" 13190 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13191 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13192 ins_pipe(ialu_cr_reg_reg); 13193%} 13194 13195// Unsigned long compares reg < zero/req OR reg >= zero/req. 13196// Just a wrapper for a normal branch, plus the predicate test 13197instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13198 match(If cmp flags); 13199 effect(USE labl); 13200 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13201 ins_cost(300); 13202 expand %{ 13203 jmpCon(cmp, flags, labl); // JGT or JLE... 13204 %} 13205%} 13206 13207// Compare 2 longs and CMOVE longs. 13208instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13209 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13210 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13211 ins_cost(400); 13212 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13213 "CMOV$cmp $dst.hi,$src.hi" %} 13214 opcode(0x0F,0x40); 13215 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13216 ins_pipe( pipe_cmov_reg_long ); 13217%} 13218 13219instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13220 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13221 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13222 ins_cost(500); 13223 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13224 "CMOV$cmp $dst.hi,$src.hi+4" %} 13225 opcode(0x0F,0x40); 13226 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13227 ins_pipe( pipe_cmov_reg_long ); 13228%} 13229 13230// Compare 2 longs and CMOVE ints. 13231instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13232 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13233 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13234 ins_cost(200); 13235 format %{ "CMOV$cmp $dst,$src" %} 13236 opcode(0x0F,0x40); 13237 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13238 ins_pipe( pipe_cmov_reg ); 13239%} 13240 13241instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13242 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13243 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13244 ins_cost(250); 13245 format %{ "CMOV$cmp $dst,$src" %} 13246 opcode(0x0F,0x40); 13247 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13248 ins_pipe( pipe_cmov_mem ); 13249%} 13250 13251// Compare 2 longs and CMOVE ptrs. 13252instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13253 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13254 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13255 ins_cost(200); 13256 format %{ "CMOV$cmp $dst,$src" %} 13257 opcode(0x0F,0x40); 13258 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13259 ins_pipe( pipe_cmov_reg ); 13260%} 13261 13262// Compare 2 longs and CMOVE doubles 13263instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13264 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13265 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13266 ins_cost(200); 13267 expand %{ 13268 fcmovDPR_regS(cmp,flags,dst,src); 13269 %} 13270%} 13271 13272// Compare 2 longs and CMOVE doubles 13273instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13274 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13275 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13276 ins_cost(200); 13277 expand %{ 13278 fcmovD_regS(cmp,flags,dst,src); 13279 %} 13280%} 13281 13282instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13283 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) ); 13284 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13285 ins_cost(200); 13286 expand %{ 13287 fcmovFPR_regS(cmp,flags,dst,src); 13288 %} 13289%} 13290 13291 13292instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13293 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) ); 13294 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13295 ins_cost(200); 13296 expand %{ 13297 fcmovF_regS(cmp,flags,dst,src); 13298 %} 13299%} 13300 13301 13302// ============================================================================ 13303// Procedure Call/Return Instructions 13304// Call Java Static Instruction 13305// Note: If this code changes, the corresponding ret_addr_offset() and 13306// compute_padding() functions will have to be adjusted. 13307instruct CallStaticJavaDirect(method meth) %{ 13308 match(CallStaticJava); 13309 effect(USE meth); 13310 13311 ins_cost(300); 13312 format %{ "CALL,static " %} 13313 opcode(0xE8); /* E8 cd */ 13314 ins_encode( pre_call_resets, 13315 Java_Static_Call( meth ), 13316 call_epilog, 13317 post_call_FPU ); 13318 ins_pipe( pipe_slow ); 13319 ins_alignment(4); 13320%} 13321 13322// Call Java Dynamic Instruction 13323// Note: If this code changes, the corresponding ret_addr_offset() and 13324// compute_padding() functions will have to be adjusted. 13325instruct CallDynamicJavaDirect(method meth) %{ 13326 match(CallDynamicJava); 13327 effect(USE meth); 13328 13329 ins_cost(300); 13330 format %{ "MOV EAX,(oop)-1\n\t" 13331 "CALL,dynamic" %} 13332 opcode(0xE8); /* E8 cd */ 13333 ins_encode( pre_call_resets, 13334 Java_Dynamic_Call( meth ), 13335 call_epilog, 13336 post_call_FPU ); 13337 ins_pipe( pipe_slow ); 13338 ins_alignment(4); 13339%} 13340 13341// Call Runtime Instruction 13342instruct CallRuntimeDirect(method meth) %{ 13343 match(CallRuntime ); 13344 effect(USE meth); 13345 13346 ins_cost(300); 13347 format %{ "CALL,runtime " %} 13348 opcode(0xE8); /* E8 cd */ 13349 // Use FFREEs to clear entries in float stack 13350 ins_encode( pre_call_resets, 13351 FFree_Float_Stack_All, 13352 Java_To_Runtime( meth ), 13353 post_call_FPU ); 13354 ins_pipe( pipe_slow ); 13355%} 13356 13357// Call runtime without safepoint 13358instruct CallLeafDirect(method meth) %{ 13359 match(CallLeaf); 13360 effect(USE meth); 13361 13362 ins_cost(300); 13363 format %{ "CALL_LEAF,runtime " %} 13364 opcode(0xE8); /* E8 cd */ 13365 ins_encode( pre_call_resets, 13366 FFree_Float_Stack_All, 13367 Java_To_Runtime( meth ), 13368 Verify_FPU_For_Leaf, post_call_FPU ); 13369 ins_pipe( pipe_slow ); 13370%} 13371 13372instruct CallLeafNoFPDirect(method meth) %{ 13373 match(CallLeafNoFP); 13374 effect(USE meth); 13375 13376 ins_cost(300); 13377 format %{ "CALL_LEAF_NOFP,runtime " %} 13378 opcode(0xE8); /* E8 cd */ 13379 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13380 ins_pipe( pipe_slow ); 13381%} 13382 13383 13384// Return Instruction 13385// Remove the return address & jump to it. 13386instruct Ret() %{ 13387 match(Return); 13388 format %{ "RET" %} 13389 opcode(0xC3); 13390 ins_encode(OpcP); 13391 ins_pipe( pipe_jmp ); 13392%} 13393 13394// Tail Call; Jump from runtime stub to Java code. 13395// Also known as an 'interprocedural jump'. 13396// Target of jump will eventually return to caller. 13397// TailJump below removes the return address. 13398instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13399 match(TailCall jump_target method_oop ); 13400 ins_cost(300); 13401 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13402 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13403 ins_encode( OpcP, RegOpc(jump_target) ); 13404 ins_pipe( pipe_jmp ); 13405%} 13406 13407 13408// Tail Jump; remove the return address; jump to target. 13409// TailCall above leaves the return address around. 13410instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13411 match( TailJump jump_target ex_oop ); 13412 ins_cost(300); 13413 format %{ "POP EDX\t# pop return address into dummy\n\t" 13414 "JMP $jump_target " %} 13415 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13416 ins_encode( enc_pop_rdx, 13417 OpcP, RegOpc(jump_target) ); 13418 ins_pipe( pipe_jmp ); 13419%} 13420 13421// Create exception oop: created by stack-crawling runtime code. 13422// Created exception is now available to this handler, and is setup 13423// just prior to jumping to this handler. No code emitted. 13424instruct CreateException( eAXRegP ex_oop ) 13425%{ 13426 match(Set ex_oop (CreateEx)); 13427 13428 size(0); 13429 // use the following format syntax 13430 format %{ "# exception oop is in EAX; no code emitted" %} 13431 ins_encode(); 13432 ins_pipe( empty ); 13433%} 13434 13435 13436// Rethrow exception: 13437// The exception oop will come in the first argument position. 13438// Then JUMP (not call) to the rethrow stub code. 13439instruct RethrowException() 13440%{ 13441 match(Rethrow); 13442 13443 // use the following format syntax 13444 format %{ "JMP rethrow_stub" %} 13445 ins_encode(enc_rethrow); 13446 ins_pipe( pipe_jmp ); 13447%} 13448 13449// inlined locking and unlocking 13450 13451instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13452 predicate(Compile::current()->use_rtm()); 13453 match(Set cr (FastLock object box)); 13454 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13455 ins_cost(300); 13456 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13457 ins_encode %{ 13458 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13459 $scr$$Register, $cx1$$Register, $cx2$$Register, 13460 _counters, _rtm_counters, _stack_rtm_counters, 13461 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13462 true, ra_->C->profile_rtm()); 13463 %} 13464 ins_pipe(pipe_slow); 13465%} 13466 13467instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13468 predicate(!Compile::current()->use_rtm()); 13469 match(Set cr (FastLock object box)); 13470 effect(TEMP tmp, TEMP scr, USE_KILL box); 13471 ins_cost(300); 13472 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13473 ins_encode %{ 13474 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13475 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13476 %} 13477 ins_pipe(pipe_slow); 13478%} 13479 13480instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13481 match(Set cr (FastUnlock object box)); 13482 effect(TEMP tmp, USE_KILL box); 13483 ins_cost(300); 13484 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13485 ins_encode %{ 13486 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13487 %} 13488 ins_pipe(pipe_slow); 13489%} 13490 13491 13492 13493// ============================================================================ 13494// Safepoint Instruction 13495instruct safePoint_poll(eFlagsReg cr) %{ 13496 predicate(SafepointMechanism::uses_global_page_poll()); 13497 match(SafePoint); 13498 effect(KILL cr); 13499 13500 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13501 // On SPARC that might be acceptable as we can generate the address with 13502 // just a sethi, saving an or. By polling at offset 0 we can end up 13503 // putting additional pressure on the index-0 in the D$. Because of 13504 // alignment (just like the situation at hand) the lower indices tend 13505 // to see more traffic. It'd be better to change the polling address 13506 // to offset 0 of the last $line in the polling page. 13507 13508 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13509 ins_cost(125); 13510 size(6) ; 13511 ins_encode( Safepoint_Poll() ); 13512 ins_pipe( ialu_reg_mem ); 13513%} 13514 13515instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13516 predicate(SafepointMechanism::uses_thread_local_poll()); 13517 match(SafePoint poll); 13518 effect(KILL cr, USE poll); 13519 13520 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13521 ins_cost(125); 13522 // EBP would need size(3) 13523 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13524 ins_encode %{ 13525 __ relocate(relocInfo::poll_type); 13526 address pre_pc = __ pc(); 13527 __ testl(rax, Address($poll$$Register, 0)); 13528 address post_pc = __ pc(); 13529 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13530 %} 13531 ins_pipe(ialu_reg_mem); 13532%} 13533 13534 13535// ============================================================================ 13536// This name is KNOWN by the ADLC and cannot be changed. 13537// The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13538// for this guy. 13539instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13540 match(Set dst (ThreadLocal)); 13541 effect(DEF dst, KILL cr); 13542 13543 format %{ "MOV $dst, Thread::current()" %} 13544 ins_encode %{ 13545 Register dstReg = as_Register($dst$$reg); 13546 __ get_thread(dstReg); 13547 %} 13548 ins_pipe( ialu_reg_fat ); 13549%} 13550 13551 13552 13553//----------PEEPHOLE RULES----------------------------------------------------- 13554// These must follow all instruction definitions as they use the names 13555// defined in the instructions definitions. 13556// 13557// peepmatch ( root_instr_name [preceding_instruction]* ); 13558// 13559// peepconstraint %{ 13560// (instruction_number.operand_name relational_op instruction_number.operand_name 13561// [, ...] ); 13562// // instruction numbers are zero-based using left to right order in peepmatch 13563// 13564// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13565// // provide an instruction_number.operand_name for each operand that appears 13566// // in the replacement instruction's match rule 13567// 13568// ---------VM FLAGS--------------------------------------------------------- 13569// 13570// All peephole optimizations can be turned off using -XX:-OptoPeephole 13571// 13572// Each peephole rule is given an identifying number starting with zero and 13573// increasing by one in the order seen by the parser. An individual peephole 13574// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13575// on the command-line. 13576// 13577// ---------CURRENT LIMITATIONS---------------------------------------------- 13578// 13579// Only match adjacent instructions in same basic block 13580// Only equality constraints 13581// Only constraints between operands, not (0.dest_reg == EAX_enc) 13582// Only one replacement instruction 13583// 13584// ---------EXAMPLE---------------------------------------------------------- 13585// 13586// // pertinent parts of existing instructions in architecture description 13587// instruct movI(rRegI dst, rRegI src) %{ 13588// match(Set dst (CopyI src)); 13589// %} 13590// 13591// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13592// match(Set dst (AddI dst src)); 13593// effect(KILL cr); 13594// %} 13595// 13596// // Change (inc mov) to lea 13597// peephole %{ 13598// // increment preceeded by register-register move 13599// peepmatch ( incI_eReg movI ); 13600// // require that the destination register of the increment 13601// // match the destination register of the move 13602// peepconstraint ( 0.dst == 1.dst ); 13603// // construct a replacement instruction that sets 13604// // the destination to ( move's source register + one ) 13605// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13606// %} 13607// 13608// Implementation no longer uses movX instructions since 13609// machine-independent system no longer uses CopyX nodes. 13610// 13611// peephole %{ 13612// peepmatch ( incI_eReg movI ); 13613// peepconstraint ( 0.dst == 1.dst ); 13614// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13615// %} 13616// 13617// peephole %{ 13618// peepmatch ( decI_eReg movI ); 13619// peepconstraint ( 0.dst == 1.dst ); 13620// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13621// %} 13622// 13623// peephole %{ 13624// peepmatch ( addI_eReg_imm movI ); 13625// peepconstraint ( 0.dst == 1.dst ); 13626// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13627// %} 13628// 13629// peephole %{ 13630// peepmatch ( addP_eReg_imm movP ); 13631// peepconstraint ( 0.dst == 1.dst ); 13632// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13633// %} 13634 13635// // Change load of spilled value to only a spill 13636// instruct storeI(memory mem, rRegI src) %{ 13637// match(Set mem (StoreI mem src)); 13638// %} 13639// 13640// instruct loadI(rRegI dst, memory mem) %{ 13641// match(Set dst (LoadI mem)); 13642// %} 13643// 13644peephole %{ 13645 peepmatch ( loadI storeI ); 13646 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13647 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13648%} 13649 13650//----------SMARTSPILL RULES--------------------------------------------------- 13651// These must follow all instruction definitions as they use the names 13652// defined in the instructions definitions. 13653