1//
2// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// architecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78// allocator, and only shows up in the encodings.
79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81// Ok so here's the trick FPR1 is really st(0) except in the midst
82// of emission of assembly for a machnode. During the emission the fpu stack
83// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84// the stack will not have this element so FPR1 == st(0) from the
85// oopMap viewpoint. This same weirdness with numbering causes
86// instruction encoding to have to play games with the register
87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88// where it does flt->flt moves to see an example
89//
90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104//
105// Empty fill registers, which are never used, but supply alignment to xmm regs
106//
107reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116// Specify priority of register selection within phases of register
117// allocation.  Highest priority is first.  A useful heuristic is to
118// give registers a low priority when they are required by machine
119// instructions, like EAX and EDX.  Registers which are used as
120// pairs must fall on an even boundary (witness the FPR#L's in this list).
121// For the Intel integer registers, the equivalent Long pairs are
122// EDX:EAX, EBX:ECX, and EDI:EBP.
123alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126                    FPR6L, FPR6H, FPR7L, FPR7H,
127                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130//----------Architecture Description Register Classes--------------------------
131// Several register classes are automatically defined based upon information in
132// this architecture description.
133// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
137//
138// Class for no registers (empty set).
139reg_class no_reg();
140
141// Class for all registers
142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
143// Class for all registers (excluding EBP)
144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
145// Dynamic register class that selects at runtime between register classes
146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
149
150// Class for general registers
151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
152// Class for general registers (excluding EBP).
153// This register class can be used for implicit null checks on win95.
154// It is also safe for use by tailjumps (we don't want to allocate in ebp).
155// Used also if the PreserveFramePointer flag is true.
156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
157// Dynamic register class that selects between int_reg and int_reg_no_ebp.
158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
159
160// Class of "X" registers
161reg_class int_x_reg(EBX, ECX, EDX, EAX);
162
163// Class of registers that can appear in an address with no offset.
164// EBP and ESP require an extra instruction byte for zero offset.
165// Used in fast-unlock
166reg_class p_reg(EDX, EDI, ESI, EBX);
167
168// Class for general registers excluding ECX
169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
170// Class for general registers excluding ECX (and EBP)
171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
174
175// Class for general registers excluding EAX
176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
177
178// Class for general registers excluding EAX and EBX.
179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
180// Class for general registers excluding EAX and EBX (and EBP)
181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
184
185// Class of EAX (for multiply and divide operations)
186reg_class eax_reg(EAX);
187
188// Class of EBX (for atomic add)
189reg_class ebx_reg(EBX);
190
191// Class of ECX (for shift and JCXZ operations and cmpLTMask)
192reg_class ecx_reg(ECX);
193
194// Class of EDX (for multiply and divide operations)
195reg_class edx_reg(EDX);
196
197// Class of EDI (for synchronization)
198reg_class edi_reg(EDI);
199
200// Class of ESI (for synchronization)
201reg_class esi_reg(ESI);
202
203// Singleton class for stack pointer
204reg_class sp_reg(ESP);
205
206// Singleton class for instruction pointer
207// reg_class ip_reg(EIP);
208
209// Class of integer register pairs
210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
211// Class of integer register pairs (excluding EBP and EDI);
212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
213// Dynamic register class that selects between long_reg and long_reg_no_ebp.
214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
215
216// Class of integer register pairs that aligns with calling convention
217reg_class eadx_reg( EAX,EDX );
218reg_class ebcx_reg( ECX,EBX );
219
220// Not AX or DX, used in divides
221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
222// Not AX or DX (and neither EBP), used in divides
223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
226
227// Floating point registers.  Notice FPR0 is not a choice.
228// FPR0 is not ever allocated; we use clever encodings to fake
229// a 2-address instructions out of Intels FP stack.
230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
231
232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
233                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234                      FPR7L,FPR7H );
235
236reg_class fp_flt_reg0( FPR1L );
237reg_class fp_dbl_reg0( FPR1L,FPR1H );
238reg_class fp_dbl_reg1( FPR2L,FPR2H );
239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
240                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
241
242%}
243
244
245//----------SOURCE BLOCK-------------------------------------------------------
246// This is a block of C++ code which provides values, functions, and
247// definitions necessary in the rest of the architecture description
248source_hpp %{
249// Must be visible to the DFA in dfa_x86_32.cpp
250extern bool is_operand_hi32_zero(Node* n);
251%}
252
253source %{
254#define   RELOC_IMM32    Assembler::imm_operand
255#define   RELOC_DISP32   Assembler::disp32_operand
256
257#define __ _masm.
258
259// How to find the high register of a Long pair, given the low register
260#define   HIGH_FROM_LOW(x) ((x)+2)
261
262// These masks are used to provide 128-bit aligned bitmasks to the XMM
263// instructions, to allow sign-masking or sign-bit flipping.  They allow
264// fast versions of NegF/NegD and AbsF/AbsD.
265
266// Note: 'double' and 'long long' have 32-bits alignment on x86.
267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
268  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
269  // of 128-bits operands for SSE instructions.
270  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
271  // Store the value to a 128-bits operand.
272  operand[0] = lo;
273  operand[1] = hi;
274  return operand;
275}
276
277// Buffer for 128-bits masks used by SSE instructions.
278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
279
280// Static initialization during VM startup.
281static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
283static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
285
286// Offset hacking within calls.
287static int pre_call_resets_size() {
288  int size = 0;
289  Compile* C = Compile::current();
290  if (C->in_24_bit_fp_mode()) {
291    size += 6; // fldcw
292  }
293  if (VM_Version::supports_vzeroupper()) {
294    size += 3; // vzeroupper
295  }
296  return size;
297}
298
299// !!!!! Special hack to get all type of calls to specify the byte offset
300//       from the start of the call to the point where the return address
301//       will point.
302int MachCallStaticJavaNode::ret_addr_offset() {
303  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
304}
305
306int MachCallDynamicJavaNode::ret_addr_offset() {
307  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
308}
309
310static int sizeof_FFree_Float_Stack_All = -1;
311
312int MachCallRuntimeNode::ret_addr_offset() {
313  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
314  return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
315}
316
317// Indicate if the safepoint node needs the polling page as an input.
318// Since x86 does have absolute addressing, it doesn't.
319bool SafePointNode::needs_polling_address_input() {
320  return SafepointMechanism::uses_thread_local_poll();
321}
322
323//
324// Compute padding required for nodes which need alignment
325//
326
327// The address of the call instruction needs to be 4-byte aligned to
328// ensure that it does not span a cache line so that it can be patched.
329int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
330  current_offset += pre_call_resets_size();  // skip fldcw, if any
331  current_offset += 1;      // skip call opcode byte
332  return align_up(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_resets_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return align_up(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // rRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && (disp_reloc == relocInfo::none) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( disp_reloc != relocInfo::none ) {
435            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( disp_reloc != relocInfo::none ) {
443            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && (disp_reloc == relocInfo::none) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( disp_reloc != relocInfo::none ) {
473          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
532  ShouldNotReachHere();
533}
534
535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
536  // Empty encoding
537}
538
539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
540  return 0;
541}
542
543#ifndef PRODUCT
544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
545  st->print("# MachConstantBaseNode (empty encoding)");
546}
547#endif
548
549
550//=============================================================================
551#ifndef PRODUCT
552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553  Compile* C = ra_->C;
554
555  int framesize = C->frame_size_in_bytes();
556  int bangsize = C->bang_size_in_bytes();
557  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
558  // Remove wordSize for return addr which is already pushed.
559  framesize -= wordSize;
560
561  if (C->need_stack_bang(bangsize)) {
562    framesize -= wordSize;
563    st->print("# stack bang (%d bytes)", bangsize);
564    st->print("\n\t");
565    st->print("PUSH   EBP\t# Save EBP");
566    if (PreserveFramePointer) {
567      st->print("\n\t");
568      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
569    }
570    if (framesize) {
571      st->print("\n\t");
572      st->print("SUB    ESP, #%d\t# Create frame",framesize);
573    }
574  } else {
575    st->print("SUB    ESP, #%d\t# Create frame",framesize);
576    st->print("\n\t");
577    framesize -= wordSize;
578    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
579    if (PreserveFramePointer) {
580      st->print("\n\t");
581      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
582      if (framesize > 0) {
583        st->print("\n\t");
584        st->print("ADD    EBP, #%d", framesize);
585      }
586    }
587  }
588
589  if (VerifyStackAtCalls) {
590    st->print("\n\t");
591    framesize -= wordSize;
592    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
593  }
594
595  if( C->in_24_bit_fp_mode() ) {
596    st->print("\n\t");
597    st->print("FLDCW  \t# load 24 bit fpu control word");
598  }
599  if (UseSSE >= 2 && VerifyFPU) {
600    st->print("\n\t");
601    st->print("# verify FPU stack (must be clean on entry)");
602  }
603
604#ifdef ASSERT
605  if (VerifyStackAtCalls) {
606    st->print("\n\t");
607    st->print("# stack alignment check");
608  }
609#endif
610  st->cr();
611}
612#endif
613
614
615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
616  Compile* C = ra_->C;
617  MacroAssembler _masm(&cbuf);
618
619  int framesize = C->frame_size_in_bytes();
620  int bangsize = C->bang_size_in_bytes();
621
622  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
623
624  C->set_frame_complete(cbuf.insts_size());
625
626  if (C->has_mach_constant_base_node()) {
627    // NOTE: We set the table base offset here because users might be
628    // emitted before MachConstantBaseNode.
629    Compile::ConstantTable& constant_table = C->constant_table();
630    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
631  }
632}
633
634uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
635  return MachNode::size(ra_); // too many variables; just compute it the hard way
636}
637
638int MachPrologNode::reloc() const {
639  return 0; // a large enough number
640}
641
642//=============================================================================
643#ifndef PRODUCT
644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
645  Compile *C = ra_->C;
646  int framesize = C->frame_size_in_bytes();
647  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
648  // Remove two words for return addr and rbp,
649  framesize -= 2*wordSize;
650
651  if (C->max_vector_size() > 16) {
652    st->print("VZEROUPPER");
653    st->cr(); st->print("\t");
654  }
655  if (C->in_24_bit_fp_mode()) {
656    st->print("FLDCW  standard control word");
657    st->cr(); st->print("\t");
658  }
659  if (framesize) {
660    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
661    st->cr(); st->print("\t");
662  }
663  st->print_cr("POPL   EBP"); st->print("\t");
664  if (do_polling() && C->is_method_compilation()) {
665    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
666    st->cr(); st->print("\t");
667  }
668}
669#endif
670
671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
672  Compile *C = ra_->C;
673  MacroAssembler _masm(&cbuf);
674
675  if (C->max_vector_size() > 16) {
676    // Clear upper bits of YMM registers when current compiled code uses
677    // wide vectors to avoid AVX <-> SSE transition penalty during call.
678    _masm.vzeroupper();
679  }
680  // If method set FPU control word, restore to standard control word
681  if (C->in_24_bit_fp_mode()) {
682    _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
683  }
684
685  int framesize = C->frame_size_in_bytes();
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
691
692  if (framesize >= 128) {
693    emit_opcode(cbuf, 0x81); // add  SP, #framesize
694    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695    emit_d32(cbuf, framesize);
696  } else if (framesize) {
697    emit_opcode(cbuf, 0x83); // add  SP, #framesize
698    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
699    emit_d8(cbuf, framesize);
700  }
701
702  emit_opcode(cbuf, 0x58 | EBP_enc);
703
704  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
705    __ reserved_stack_check();
706  }
707
708  if (do_polling() && C->is_method_compilation()) {
709    if (SafepointMechanism::uses_thread_local_poll()) {
710      Register pollReg = as_Register(EBX_enc);
711      MacroAssembler masm(&cbuf);
712      masm.get_thread(pollReg);
713      masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
714      masm.relocate(relocInfo::poll_return_type);
715      masm.testl(rax, Address(pollReg, 0));
716    } else {
717      cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
718      emit_opcode(cbuf,0x85);
719      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
720      emit_d32(cbuf, (intptr_t)os::get_polling_page());
721    }
722  }
723}
724
725uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
726  return MachNode::size(ra_); // too many variables; just compute it
727                              // the hard way
728}
729
730int MachEpilogNode::reloc() const {
731  return 0; // a large enough number
732}
733
734const Pipeline * MachEpilogNode::pipeline() const {
735  return MachNode::pipeline_class();
736}
737
738int MachEpilogNode::safepoint_offset() const { return 0; }
739
740//=============================================================================
741
742enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
743static enum RC rc_class( OptoReg::Name reg ) {
744
745  if( !OptoReg::is_valid(reg)  ) return rc_bad;
746  if (OptoReg::is_stack(reg)) return rc_stack;
747
748  VMReg r = OptoReg::as_VMReg(reg);
749  if (r->is_Register()) return rc_int;
750  if (r->is_FloatRegister()) {
751    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
752    return rc_float;
753  }
754  assert(r->is_XMMRegister(), "must be");
755  return rc_xmm;
756}
757
758static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
759                        int opcode, const char *op_str, int size, outputStream* st ) {
760  if( cbuf ) {
761    emit_opcode  (*cbuf, opcode );
762    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
763#ifndef PRODUCT
764  } else if( !do_size ) {
765    if( size != 0 ) st->print("\n\t");
766    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
767      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
768      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
769    } else { // FLD, FST, PUSH, POP
770      st->print("%s [ESP + #%d]",op_str,offset);
771    }
772#endif
773  }
774  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
775  return size+3+offset_size;
776}
777
778// Helper for XMM registers.  Extra opcode bits, limited syntax.
779static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
780                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
781  int in_size_in_bits = Assembler::EVEX_32bit;
782  int evex_encoding = 0;
783  if (reg_lo+1 == reg_hi) {
784    in_size_in_bits = Assembler::EVEX_64bit;
785    evex_encoding = Assembler::VEX_W;
786  }
787  if (cbuf) {
788    MacroAssembler _masm(cbuf);
789    // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
790    //                          it maps more cases to single byte displacement
791    _masm.set_managed();
792    if (reg_lo+1 == reg_hi) { // double move?
793      if (is_load) {
794        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
795      } else {
796        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
797      }
798    } else {
799      if (is_load) {
800        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
801      } else {
802        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
803      }
804    }
805#ifndef PRODUCT
806  } else if (!do_size) {
807    if (size != 0) st->print("\n\t");
808    if (reg_lo+1 == reg_hi) { // double move?
809      if (is_load) st->print("%s %s,[ESP + #%d]",
810                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
811                              Matcher::regName[reg_lo], offset);
812      else         st->print("MOVSD  [ESP + #%d],%s",
813                              offset, Matcher::regName[reg_lo]);
814    } else {
815      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
816                              Matcher::regName[reg_lo], offset);
817      else         st->print("MOVSS  [ESP + #%d],%s",
818                              offset, Matcher::regName[reg_lo]);
819    }
820#endif
821  }
822  bool is_single_byte = false;
823  if ((UseAVX > 2) && (offset != 0)) {
824    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
825  }
826  int offset_size = 0;
827  if (UseAVX > 2 ) {
828    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
829  } else {
830    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
831  }
832  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
833  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
834  return size+5+offset_size;
835}
836
837
838static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
839                            int src_hi, int dst_hi, int size, outputStream* st ) {
840  if (cbuf) {
841    MacroAssembler _masm(cbuf);
842    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
843    _masm.set_managed();
844    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
845      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
846                as_XMMRegister(Matcher::_regEncode[src_lo]));
847    } else {
848      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
849                as_XMMRegister(Matcher::_regEncode[src_lo]));
850    }
851#ifndef PRODUCT
852  } else if (!do_size) {
853    if (size != 0) st->print("\n\t");
854    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
855      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
856        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
857      } else {
858        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
859      }
860    } else {
861      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
862        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
863      } else {
864        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
865      }
866    }
867#endif
868  }
869  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
870  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
871  int sz = (UseAVX > 2) ? 6 : 4;
872  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
873      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
874  return size + sz;
875}
876
877static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
878                            int src_hi, int dst_hi, int size, outputStream* st ) {
879  // 32-bit
880  if (cbuf) {
881    MacroAssembler _masm(cbuf);
882    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
883    _masm.set_managed();
884    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
885             as_Register(Matcher::_regEncode[src_lo]));
886#ifndef PRODUCT
887  } else if (!do_size) {
888    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
889#endif
890  }
891  return (UseAVX> 2) ? 6 : 4;
892}
893
894
895static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
896                                 int src_hi, int dst_hi, int size, outputStream* st ) {
897  // 32-bit
898  if (cbuf) {
899    MacroAssembler _masm(cbuf);
900    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
901    _masm.set_managed();
902    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
903             as_XMMRegister(Matcher::_regEncode[src_lo]));
904#ifndef PRODUCT
905  } else if (!do_size) {
906    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
907#endif
908  }
909  return (UseAVX> 2) ? 6 : 4;
910}
911
912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
913  if( cbuf ) {
914    emit_opcode(*cbuf, 0x8B );
915    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
916#ifndef PRODUCT
917  } else if( !do_size ) {
918    if( size != 0 ) st->print("\n\t");
919    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
920#endif
921  }
922  return size+2;
923}
924
925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
926                                 int offset, int size, outputStream* st ) {
927  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
928    if( cbuf ) {
929      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
930      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
931#ifndef PRODUCT
932    } else if( !do_size ) {
933      if( size != 0 ) st->print("\n\t");
934      st->print("FLD    %s",Matcher::regName[src_lo]);
935#endif
936    }
937    size += 2;
938  }
939
940  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
941  const char *op_str;
942  int op;
943  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
944    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
945    op = 0xDD;
946  } else {                   // 32-bit store
947    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
948    op = 0xD9;
949    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
950  }
951
952  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
953}
954
955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
957                          int src_hi, int dst_hi, uint ireg, outputStream* st);
958
959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
960                            int stack_offset, int reg, uint ireg, outputStream* st);
961
962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
963                                     int dst_offset, uint ireg, outputStream* st) {
964  int calc_size = 0;
965  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
966  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
967  switch (ireg) {
968  case Op_VecS:
969    calc_size = 3+src_offset_size + 3+dst_offset_size;
970    break;
971  case Op_VecD: {
972    calc_size = 3+src_offset_size + 3+dst_offset_size;
973    int tmp_src_offset = src_offset + 4;
974    int tmp_dst_offset = dst_offset + 4;
975    src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
976    dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
977    calc_size += 3+src_offset_size + 3+dst_offset_size;
978    break;
979  }
980  case Op_VecX:
981  case Op_VecY:
982  case Op_VecZ:
983    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
984    break;
985  default:
986    ShouldNotReachHere();
987  }
988  if (cbuf) {
989    MacroAssembler _masm(cbuf);
990    int offset = __ offset();
991    switch (ireg) {
992    case Op_VecS:
993      __ pushl(Address(rsp, src_offset));
994      __ popl (Address(rsp, dst_offset));
995      break;
996    case Op_VecD:
997      __ pushl(Address(rsp, src_offset));
998      __ popl (Address(rsp, dst_offset));
999      __ pushl(Address(rsp, src_offset+4));
1000      __ popl (Address(rsp, dst_offset+4));
1001      break;
1002    case Op_VecX:
1003      __ movdqu(Address(rsp, -16), xmm0);
1004      __ movdqu(xmm0, Address(rsp, src_offset));
1005      __ movdqu(Address(rsp, dst_offset), xmm0);
1006      __ movdqu(xmm0, Address(rsp, -16));
1007      break;
1008    case Op_VecY:
1009      __ vmovdqu(Address(rsp, -32), xmm0);
1010      __ vmovdqu(xmm0, Address(rsp, src_offset));
1011      __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012      __ vmovdqu(xmm0, Address(rsp, -32));
1013      break;
1014    case Op_VecZ:
1015      __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018      __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019      break;
1020    default:
1021      ShouldNotReachHere();
1022    }
1023    int size = __ offset() - offset;
1024    assert(size == calc_size, "incorrect size calculation");
1025    return size;
1026#ifndef PRODUCT
1027  } else if (!do_size) {
1028    switch (ireg) {
1029    case Op_VecS:
1030      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                "popl    [rsp + #%d]",
1032                src_offset, dst_offset);
1033      break;
1034    case Op_VecD:
1035      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                "popq    [rsp + #%d]\n\t"
1037                "pushl   [rsp + #%d]\n\t"
1038                "popq    [rsp + #%d]",
1039                src_offset, dst_offset, src_offset+4, dst_offset+4);
1040      break;
1041     case Op_VecX:
1042      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                "movdqu  xmm0, [rsp + #%d]\n\t"
1044                "movdqu  [rsp + #%d], xmm0\n\t"
1045                "movdqu  xmm0, [rsp - #16]",
1046                src_offset, dst_offset);
1047      break;
1048    case Op_VecY:
1049      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                "vmovdqu [rsp + #%d], xmm0\n\t"
1052                "vmovdqu xmm0, [rsp - #32]",
1053                src_offset, dst_offset);
1054      break;
1055    case Op_VecZ:
1056      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                "vmovdqu [rsp + #%d], xmm0\n\t"
1059                "vmovdqu xmm0, [rsp - #64]",
1060                src_offset, dst_offset);
1061      break;
1062    default:
1063      ShouldNotReachHere();
1064    }
1065#endif
1066  }
1067  return calc_size;
1068}
1069
1070uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071  // Get registers to move
1072  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074  OptoReg::Name dst_second = ra_->get_reg_second(this );
1075  OptoReg::Name dst_first = ra_->get_reg_first(this );
1076
1077  enum RC src_second_rc = rc_class(src_second);
1078  enum RC src_first_rc = rc_class(src_first);
1079  enum RC dst_second_rc = rc_class(dst_second);
1080  enum RC dst_first_rc = rc_class(dst_first);
1081
1082  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083
1084  // Generate spill code!
1085  int size = 0;
1086
1087  if( src_first == dst_first && src_second == dst_second )
1088    return size;            // Self copy, no move
1089
1090  if (bottom_type()->isa_vect() != NULL) {
1091    uint ireg = ideal_reg();
1092    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096      // mem -> mem
1097      int src_offset = ra_->reg2offset(src_first);
1098      int dst_offset = ra_->reg2offset(dst_first);
1099      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103      int stack_offset = ra_->reg2offset(dst_first);
1104      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106      int stack_offset = ra_->reg2offset(src_first);
1107      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108    } else {
1109      ShouldNotReachHere();
1110    }
1111  }
1112
1113  // --------------------------------------
1114  // Check for mem-mem move.  push/pop to move.
1115  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116    if( src_second == dst_first ) { // overlapping stack copy ranges
1117      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121    }
1122    // move low bits
1123    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128    }
1129    return size;
1130  }
1131
1132  // --------------------------------------
1133  // Check for integer reg-reg copy
1134  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136
1137  // Check for integer store
1138  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140
1141  // Check for integer load
1142  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144
1145  // Check for integer reg-xmm reg copy
1146  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148            "no 64 bit integer-float reg moves" );
1149    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150  }
1151  // --------------------------------------
1152  // Check for float reg-reg copy
1153  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156    if( cbuf ) {
1157
1158      // Note the mucking with the register encode to compensate for the 0/1
1159      // indexing issue mentioned in a comment in the reg_def sections
1160      // for FPR registers many lines above here.
1161
1162      if( src_first != FPR1L_num ) {
1163        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167     } else {
1168        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170     }
1171#ifndef PRODUCT
1172    } else if( !do_size ) {
1173      if( size != 0 ) st->print("\n\t");
1174      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176#endif
1177    }
1178    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179  }
1180
1181  // Check for float store
1182  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184  }
1185
1186  // Check for float load
1187  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188    int offset = ra_->reg2offset(src_first);
1189    const char *op_str;
1190    int op;
1191    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192      op_str = "FLD_D";
1193      op = 0xDD;
1194    } else {                   // 32-bit load
1195      op_str = "FLD_S";
1196      op = 0xD9;
1197      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198    }
1199    if( cbuf ) {
1200      emit_opcode  (*cbuf, op );
1201      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204#ifndef PRODUCT
1205    } else if( !do_size ) {
1206      if( size != 0 ) st->print("\n\t");
1207      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208#endif
1209    }
1210    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211    return size + 3+offset_size+2;
1212  }
1213
1214  // Check for xmm reg-reg copy
1215  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217            (src_first+1 == src_second && dst_first+1 == dst_second),
1218            "no non-adjacent float-moves" );
1219    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220  }
1221
1222  // Check for xmm reg-integer reg copy
1223  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225            "no 64 bit float-integer reg moves" );
1226    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227  }
1228
1229  // Check for xmm store
1230  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232  }
1233
1234  // Check for float xmm load
1235  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237  }
1238
1239  // Copy from float reg to xmm reg
1240  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241    // copy to the top of stack from floating point reg
1242    // and use LEA to preserve flags
1243    if( cbuf ) {
1244      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247      emit_d8(*cbuf,0xF8);
1248#ifndef PRODUCT
1249    } else if( !do_size ) {
1250      if( size != 0 ) st->print("\n\t");
1251      st->print("LEA    ESP,[ESP-8]");
1252#endif
1253    }
1254    size += 4;
1255
1256    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257
1258    // Copy from the temp memory to the xmm reg.
1259    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260
1261    if( cbuf ) {
1262      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265      emit_d8(*cbuf,0x08);
1266#ifndef PRODUCT
1267    } else if( !do_size ) {
1268      if( size != 0 ) st->print("\n\t");
1269      st->print("LEA    ESP,[ESP+8]");
1270#endif
1271    }
1272    size += 4;
1273    return size;
1274  }
1275
1276  assert( size > 0, "missed a case" );
1277
1278  // --------------------------------------------------------------------
1279  // Check for second bits still needing moving.
1280  if( src_second == dst_second )
1281    return size;               // Self copy; no move
1282  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283
1284  // Check for second word int-int move
1285  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287
1288  // Check for second word integer store
1289  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291
1292  // Check for second word integer load
1293  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295
1296
1297  Unimplemented();
1298  return 0; // Mute compiler
1299}
1300
1301#ifndef PRODUCT
1302void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303  implementation( NULL, ra_, false, st );
1304}
1305#endif
1306
1307void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308  implementation( &cbuf, ra_, false, NULL );
1309}
1310
1311uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312  return MachNode::size(ra_);
1313}
1314
1315
1316//=============================================================================
1317#ifndef PRODUCT
1318void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320  int reg = ra_->get_reg_first(this);
1321  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322}
1323#endif
1324
1325void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327  int reg = ra_->get_encode(this);
1328  if( offset >= 128 ) {
1329    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330    emit_rm(cbuf, 0x2, reg, 0x04);
1331    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332    emit_d32(cbuf, offset);
1333  }
1334  else {
1335    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336    emit_rm(cbuf, 0x1, reg, 0x04);
1337    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338    emit_d8(cbuf, offset);
1339  }
1340}
1341
1342uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344  if( offset >= 128 ) {
1345    return 7;
1346  }
1347  else {
1348    return 4;
1349  }
1350}
1351
1352//=============================================================================
1353#ifndef PRODUCT
1354void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357  st->print_cr("\tNOP");
1358  st->print_cr("\tNOP");
1359  if( !OptoBreakpoint )
1360    st->print_cr("\tNOP");
1361}
1362#endif
1363
1364void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365  MacroAssembler masm(&cbuf);
1366#ifdef ASSERT
1367  uint insts_size = cbuf.insts_size();
1368#endif
1369  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370  masm.jump_cc(Assembler::notEqual,
1371               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372  /* WARNING these NOPs are critical so that verified entry point is properly
1373     aligned for patching by NativeJump::patch_verified_entry() */
1374  int nops_cnt = 2;
1375  if( !OptoBreakpoint ) // Leave space for int3
1376     nops_cnt += 1;
1377  masm.nop(nops_cnt);
1378
1379  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380}
1381
1382uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383  return OptoBreakpoint ? 11 : 12;
1384}
1385
1386
1387//=============================================================================
1388
1389int Matcher::regnum_to_fpu_offset(int regnum) {
1390  return regnum - 32; // The FP registers are in the second chunk
1391}
1392
1393// This is UltraSparc specific, true just means we have fast l2f conversion
1394const bool Matcher::convL2FSupported(void) {
1395  return true;
1396}
1397
1398// Is this branch offset short enough that a short branch can be used?
1399//
1400// NOTE: If the platform does not provide any short branch variants, then
1401//       this method should return false for offset 0.
1402bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403  // The passed offset is relative to address of the branch.
1404  // On 86 a branch displacement is calculated relative to address
1405  // of a next instruction.
1406  offset -= br_size;
1407
1408  // the short version of jmpConUCF2 contains multiple branches,
1409  // making the reach slightly less
1410  if (rule == jmpConUCF2_rule)
1411    return (-126 <= offset && offset <= 125);
1412  return (-128 <= offset && offset <= 127);
1413}
1414
1415const bool Matcher::isSimpleConstant64(jlong value) {
1416  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417  return false;
1418}
1419
1420// The ecx parameter to rep stos for the ClearArray node is in dwords.
1421const bool Matcher::init_array_count_is_in_bytes = false;
1422
1423// Needs 2 CMOV's for longs.
1424const int Matcher::long_cmove_cost() { return 1; }
1425
1426// No CMOVF/CMOVD with SSE/SSE2
1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428
1429// Does the CPU require late expand (see block.cpp for description of late expand)?
1430const bool Matcher::require_postalloc_expand = false;
1431
1432// Do we need to mask the count passed to shift instructions or does
1433// the cpu only look at the lower 5/6 bits anyway?
1434const bool Matcher::need_masked_shift_count = false;
1435
1436bool Matcher::narrow_oop_use_complex_address() {
1437  ShouldNotCallThis();
1438  return true;
1439}
1440
1441bool Matcher::narrow_klass_use_complex_address() {
1442  ShouldNotCallThis();
1443  return true;
1444}
1445
1446bool Matcher::const_oop_prefer_decode() {
1447  ShouldNotCallThis();
1448  return true;
1449}
1450
1451bool Matcher::const_klass_prefer_decode() {
1452  ShouldNotCallThis();
1453  return true;
1454}
1455
1456// Is it better to copy float constants, or load them directly from memory?
1457// Intel can load a float constant from a direct address, requiring no
1458// extra registers.  Most RISCs will have to materialize an address into a
1459// register first, so they would do better to copy the constant from stack.
1460const bool Matcher::rematerialize_float_constants = true;
1461
1462// If CPU can load and store mis-aligned doubles directly then no fixup is
1463// needed.  Else we split the double into 2 integer pieces and move it
1464// piece-by-piece.  Only happens when passing doubles into C code as the
1465// Java calling convention forces doubles to be aligned.
1466const bool Matcher::misaligned_doubles_ok = true;
1467
1468
1469void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470  // Get the memory operand from the node
1471  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474  uint opcnt     = 1;                 // First operand
1475  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476  while( idx >= skipped+num_edges ) {
1477    skipped += num_edges;
1478    opcnt++;                          // Bump operand count
1479    assert( opcnt < numopnds, "Accessing non-existent operand" );
1480    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481  }
1482
1483  MachOper *memory = node->_opnds[opcnt];
1484  MachOper *new_memory = NULL;
1485  switch (memory->opcode()) {
1486  case DIRECT:
1487  case INDOFFSET32X:
1488    // No transformation necessary.
1489    return;
1490  case INDIRECT:
1491    new_memory = new indirect_win95_safeOper( );
1492    break;
1493  case INDOFFSET8:
1494    new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495    break;
1496  case INDOFFSET32:
1497    new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498    break;
1499  case INDINDEXOFFSET:
1500    new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501    break;
1502  case INDINDEXSCALE:
1503    new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504    break;
1505  case INDINDEXSCALEOFFSET:
1506    new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507    break;
1508  case LOAD_LONG_INDIRECT:
1509  case LOAD_LONG_INDOFFSET32:
1510    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511    return;
1512  default:
1513    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514    return;
1515  }
1516  node->_opnds[opcnt] = new_memory;
1517}
1518
1519// Advertise here if the CPU requires explicit rounding operations
1520// to implement the UseStrictFP mode.
1521const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522
1523// Are floats conerted to double when stored to stack during deoptimization?
1524// On x32 it is stored with convertion only when FPU is used for floats.
1525bool Matcher::float_in_double() { return (UseSSE == 0); }
1526
1527// Do ints take an entire long register or just half?
1528const bool Matcher::int_in_long = false;
1529
1530// Return whether or not this register is ever used as an argument.  This
1531// function is used on startup to build the trampoline stubs in generateOptoStub.
1532// Registers not mentioned will be killed by the VM call in the trampoline, and
1533// arguments in those registers not be available to the callee.
1534bool Matcher::can_be_java_arg( int reg ) {
1535  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538  return false;
1539}
1540
1541bool Matcher::is_spillable_arg( int reg ) {
1542  return can_be_java_arg(reg);
1543}
1544
1545bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546  // Use hardware integer DIV instruction when
1547  // it is faster than a code which use multiply.
1548  // Only when constant divisor fits into 32 bit
1549  // (min_jint is excluded to get only correct
1550  // positive 32 bit values from negative).
1551  return VM_Version::has_fast_idiv() &&
1552         (divisor == (int)divisor && divisor != min_jint);
1553}
1554
1555// Register for DIVI projection of divmodI
1556RegMask Matcher::divI_proj_mask() {
1557  return EAX_REG_mask();
1558}
1559
1560// Register for MODI projection of divmodI
1561RegMask Matcher::modI_proj_mask() {
1562  return EDX_REG_mask();
1563}
1564
1565// Register for DIVL projection of divmodL
1566RegMask Matcher::divL_proj_mask() {
1567  ShouldNotReachHere();
1568  return RegMask();
1569}
1570
1571// Register for MODL projection of divmodL
1572RegMask Matcher::modL_proj_mask() {
1573  ShouldNotReachHere();
1574  return RegMask();
1575}
1576
1577const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578  return NO_REG_mask();
1579}
1580
1581// Returns true if the high 32 bits of the value is known to be zero.
1582bool is_operand_hi32_zero(Node* n) {
1583  int opc = n->Opcode();
1584  if (opc == Op_AndL) {
1585    Node* o2 = n->in(2);
1586    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587      return true;
1588    }
1589  }
1590  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591    return true;
1592  }
1593  return false;
1594}
1595
1596%}
1597
1598//----------ENCODING BLOCK-----------------------------------------------------
1599// This block specifies the encoding classes used by the compiler to output
1600// byte streams.  Encoding classes generate functions which are called by
1601// Machine Instruction Nodes in order to generate the bit encoding of the
1602// instruction.  Operands specify their base encoding interface with the
1603// interface keyword.  There are currently supported four interfaces,
1604// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605// operand to generate a function which returns its register number when
1606// queried.   CONST_INTER causes an operand to generate a function which
1607// returns the value of the constant when queried.  MEMORY_INTER causes an
1608// operand to generate four functions which return the Base Register, the
1609// Index Register, the Scale Value, and the Offset Value of the operand when
1610// queried.  COND_INTER causes an operand to generate six functions which
1611// return the encoding code (ie - encoding bits for the instruction)
1612// associated with each basic boolean condition for a conditional instruction.
1613// Instructions specify two basic values for encoding.  They use the
1614// ins_encode keyword to specify their encoding class (which must be one of
1615// the class names specified in the encoding block), and they use the
1616// opcode keyword to specify, in order, their primary, secondary, and
1617// tertiary opcode.  Only the opcode sections which a particular instruction
1618// needs for encoding need to be specified.
1619encode %{
1620  // Build emit functions for each basic byte or larger field in the intel
1621  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622  // code in the enc_class source block.  Emit functions will live in the
1623  // main source block for now.  In future, we can generalize this by
1624  // adding a syntax that specifies the sizes of fields in an order,
1625  // so that the adlc can build the emit functions automagically
1626
1627  // Emit primary opcode
1628  enc_class OpcP %{
1629    emit_opcode(cbuf, $primary);
1630  %}
1631
1632  // Emit secondary opcode
1633  enc_class OpcS %{
1634    emit_opcode(cbuf, $secondary);
1635  %}
1636
1637  // Emit opcode directly
1638  enc_class Opcode(immI d8) %{
1639    emit_opcode(cbuf, $d8$$constant);
1640  %}
1641
1642  enc_class SizePrefix %{
1643    emit_opcode(cbuf,0x66);
1644  %}
1645
1646  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648  %}
1649
1650  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651    emit_opcode(cbuf,$opcode$$constant);
1652    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653  %}
1654
1655  enc_class mov_r32_imm0( rRegI dst ) %{
1656    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658  %}
1659
1660  enc_class cdq_enc %{
1661    // Full implementation of Java idiv and irem; checks for
1662    // special case as described in JVM spec., p.243 & p.271.
1663    //
1664    //         normal case                           special case
1665    //
1666    // input : rax,: dividend                         min_int
1667    //         reg: divisor                          -1
1668    //
1669    // output: rax,: quotient  (= rax, idiv reg)       min_int
1670    //         rdx: remainder (= rax, irem reg)       0
1671    //
1672    //  Code sequnce:
1673    //
1674    //  81 F8 00 00 00 80    cmp         rax,80000000h
1675    //  0F 85 0B 00 00 00    jne         normal_case
1676    //  33 D2                xor         rdx,edx
1677    //  83 F9 FF             cmp         rcx,0FFh
1678    //  0F 84 03 00 00 00    je          done
1679    //                  normal_case:
1680    //  99                   cdq
1681    //  F7 F9                idiv        rax,ecx
1682    //                  done:
1683    //
1684    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695    // normal_case:
1696    emit_opcode(cbuf,0x99);                                         // cdq
1697    // idiv (note: must be emitted by the user of this rule)
1698    // normal:
1699  %}
1700
1701  // Dense encoding for older common ops
1702  enc_class Opc_plus(immI opcode, rRegI reg) %{
1703    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704  %}
1705
1706
1707  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709    // Check for 8-bit immediate, and set sign extend bit in opcode
1710    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711      emit_opcode(cbuf, $primary | 0x02);
1712    }
1713    else {                          // If 32-bit immediate
1714      emit_opcode(cbuf, $primary);
1715    }
1716  %}
1717
1718  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719    // Emit primary opcode and set sign-extend bit
1720    // Check for 8-bit immediate, and set sign extend bit in opcode
1721    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722      emit_opcode(cbuf, $primary | 0x02);    }
1723    else {                          // If 32-bit immediate
1724      emit_opcode(cbuf, $primary);
1725    }
1726    // Emit r/m byte with secondary opcode, after primary opcode.
1727    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728  %}
1729
1730  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731    // Check for 8-bit immediate, and set sign extend bit in opcode
1732    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733      $$$emit8$imm$$constant;
1734    }
1735    else {                          // If 32-bit immediate
1736      // Output immediate
1737      $$$emit32$imm$$constant;
1738    }
1739  %}
1740
1741  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742    // Emit primary opcode and set sign-extend bit
1743    // Check for 8-bit immediate, and set sign extend bit in opcode
1744    int con = (int)$imm$$constant; // Throw away top bits
1745    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746    // Emit r/m byte with secondary opcode, after primary opcode.
1747    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749    else                               emit_d32(cbuf,con);
1750  %}
1751
1752  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753    // Emit primary opcode and set sign-extend bit
1754    // Check for 8-bit immediate, and set sign extend bit in opcode
1755    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757    // Emit r/m byte with tertiary opcode, after primary opcode.
1758    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760    else                               emit_d32(cbuf,con);
1761  %}
1762
1763  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764    emit_cc(cbuf, $secondary, $dst$$reg );
1765  %}
1766
1767  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768    int destlo = $dst$$reg;
1769    int desthi = HIGH_FROM_LOW(destlo);
1770    // bswap lo
1771    emit_opcode(cbuf, 0x0F);
1772    emit_cc(cbuf, 0xC8, destlo);
1773    // bswap hi
1774    emit_opcode(cbuf, 0x0F);
1775    emit_cc(cbuf, 0xC8, desthi);
1776    // xchg lo and hi
1777    emit_opcode(cbuf, 0x87);
1778    emit_rm(cbuf, 0x3, destlo, desthi);
1779  %}
1780
1781  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783  %}
1784
1785  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786    $$$emit8$primary;
1787    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788  %}
1789
1790  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792    emit_d8(cbuf, op >> 8 );
1793    emit_d8(cbuf, op & 255);
1794  %}
1795
1796  // emulate a CMOV with a conditional branch around a MOV
1797  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798    // Invert sense of branch from sense of CMOV
1799    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800    emit_d8( cbuf, $brOffs$$constant );
1801  %}
1802
1803  enc_class enc_PartialSubtypeCheck( ) %{
1804    Register Redi = as_Register(EDI_enc); // result register
1805    Register Reax = as_Register(EAX_enc); // super class
1806    Register Recx = as_Register(ECX_enc); // killed
1807    Register Resi = as_Register(ESI_enc); // sub class
1808    Label miss;
1809
1810    MacroAssembler _masm(&cbuf);
1811    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                     NULL, &miss,
1813                                     /*set_cond_codes:*/ true);
1814    if ($primary) {
1815      __ xorptr(Redi, Redi);
1816    }
1817    __ bind(miss);
1818  %}
1819
1820  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821    MacroAssembler masm(&cbuf);
1822    int start = masm.offset();
1823    if (UseSSE >= 2) {
1824      if (VerifyFPU) {
1825        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826      }
1827    } else {
1828      // External c_calling_convention expects the FPU stack to be 'clean'.
1829      // Compiled code leaves it dirty.  Do cleanup now.
1830      masm.empty_FPU_stack();
1831    }
1832    if (sizeof_FFree_Float_Stack_All == -1) {
1833      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834    } else {
1835      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836    }
1837  %}
1838
1839  enc_class Verify_FPU_For_Leaf %{
1840    if( VerifyFPU ) {
1841      MacroAssembler masm(&cbuf);
1842      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843    }
1844  %}
1845
1846  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847    // This is the instruction starting address for relocation info.
1848    cbuf.set_insts_mark();
1849    $$$emit8$primary;
1850    // CALL directly to the runtime
1851    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                runtime_call_Relocation::spec(), RELOC_IMM32 );
1853
1854    if (UseSSE >= 2) {
1855      MacroAssembler _masm(&cbuf);
1856      BasicType rt = tf()->return_type();
1857
1858      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859        // A C runtime call where the return value is unused.  In SSE2+
1860        // mode the result needs to be removed from the FPU stack.  It's
1861        // likely that this function call could be removed by the
1862        // optimizer if the C function is a pure function.
1863        __ ffree(0);
1864      } else if (rt == T_FLOAT) {
1865        __ lea(rsp, Address(rsp, -4));
1866        __ fstp_s(Address(rsp, 0));
1867        __ movflt(xmm0, Address(rsp, 0));
1868        __ lea(rsp, Address(rsp,  4));
1869      } else if (rt == T_DOUBLE) {
1870        __ lea(rsp, Address(rsp, -8));
1871        __ fstp_d(Address(rsp, 0));
1872        __ movdbl(xmm0, Address(rsp, 0));
1873        __ lea(rsp, Address(rsp,  8));
1874      }
1875    }
1876  %}
1877
1878  enc_class pre_call_resets %{
1879    // If method sets FPU control word restore it here
1880    debug_only(int off0 = cbuf.insts_size());
1881    if (ra_->C->in_24_bit_fp_mode()) {
1882      MacroAssembler _masm(&cbuf);
1883      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884    }
1885    // Clear upper bits of YMM registers when current compiled code uses
1886    // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887    MacroAssembler _masm(&cbuf);
1888    __ vzeroupper();
1889    debug_only(int off1 = cbuf.insts_size());
1890    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891  %}
1892
1893  enc_class post_call_FPU %{
1894    // If method sets FPU control word do it here also
1895    if (Compile::current()->in_24_bit_fp_mode()) {
1896      MacroAssembler masm(&cbuf);
1897      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898    }
1899  %}
1900
1901  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903    // who we intended to call.
1904    cbuf.set_insts_mark();
1905    $$$emit8$primary;
1906
1907    if (!_method) {
1908      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                     runtime_call_Relocation::spec(),
1910                     RELOC_IMM32);
1911    } else {
1912      int method_index = resolved_method_index(cbuf);
1913      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                  : static_call_Relocation::spec(method_index);
1915      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                     rspec, RELOC_DISP32);
1917      // Emit stubs for static call.
1918      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919      if (stub == NULL) {
1920        ciEnv::current()->record_failure("CodeCache is full");
1921        return;
1922      }
1923    }
1924  %}
1925
1926  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927    MacroAssembler _masm(&cbuf);
1928    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929  %}
1930
1931  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932    int disp = in_bytes(Method::from_compiled_offset());
1933    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934
1935    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936    cbuf.set_insts_mark();
1937    $$$emit8$primary;
1938    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939    emit_d8(cbuf, disp);             // Displacement
1940
1941  %}
1942
1943//   Following encoding is no longer used, but may be restored if calling
1944//   convention changes significantly.
1945//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946//
1947//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948//     // int ic_reg     = Matcher::inline_cache_reg();
1949//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951//     // int imo_encode = Matcher::_regEncode[imo_reg];
1952//
1953//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954//     // // so we load it immediately before the call
1955//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957//
1958//     // xor rbp,ebp
1959//     emit_opcode(cbuf, 0x33);
1960//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961//
1962//     // CALL to interpreter.
1963//     cbuf.set_insts_mark();
1964//     $$$emit8$primary;
1965//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967//   %}
1968
1969  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970    $$$emit8$primary;
1971    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972    $$$emit8$shift$$constant;
1973  %}
1974
1975  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976    // Load immediate does not have a zero or sign extended version
1977    // for 8-bit immediates
1978    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979    $$$emit32$src$$constant;
1980  %}
1981
1982  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983    // Load immediate does not have a zero or sign extended version
1984    // for 8-bit immediates
1985    emit_opcode(cbuf, $primary + $dst$$reg);
1986    $$$emit32$src$$constant;
1987  %}
1988
1989  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990    // Load immediate does not have a zero or sign extended version
1991    // for 8-bit immediates
1992    int dst_enc = $dst$$reg;
1993    int src_con = $src$$constant & 0x0FFFFFFFFL;
1994    if (src_con == 0) {
1995      // xor dst, dst
1996      emit_opcode(cbuf, 0x33);
1997      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998    } else {
1999      emit_opcode(cbuf, $primary + dst_enc);
2000      emit_d32(cbuf, src_con);
2001    }
2002  %}
2003
2004  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005    // Load immediate does not have a zero or sign extended version
2006    // for 8-bit immediates
2007    int dst_enc = $dst$$reg + 2;
2008    int src_con = ((julong)($src$$constant)) >> 32;
2009    if (src_con == 0) {
2010      // xor dst, dst
2011      emit_opcode(cbuf, 0x33);
2012      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013    } else {
2014      emit_opcode(cbuf, $primary + dst_enc);
2015      emit_d32(cbuf, src_con);
2016    }
2017  %}
2018
2019
2020  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021  enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023  %}
2024
2025  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027  %}
2028
2029  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031  %}
2032
2033  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034    $$$emit8$primary;
2035    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036  %}
2037
2038  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039    $$$emit8$secondary;
2040    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041  %}
2042
2043  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045  %}
2046
2047  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049  %}
2050
2051  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053  %}
2054
2055  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056    // Output immediate
2057    $$$emit32$src$$constant;
2058  %}
2059
2060  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061    // Output Float immediate bits
2062    jfloat jf = $src$$constant;
2063    int    jf_as_bits = jint_cast( jf );
2064    emit_d32(cbuf, jf_as_bits);
2065  %}
2066
2067  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068    // Output Float immediate bits
2069    jfloat jf = $src$$constant;
2070    int    jf_as_bits = jint_cast( jf );
2071    emit_d32(cbuf, jf_as_bits);
2072  %}
2073
2074  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075    // Output immediate
2076    $$$emit16$src$$constant;
2077  %}
2078
2079  enc_class Con_d32(immI src) %{
2080    emit_d32(cbuf,$src$$constant);
2081  %}
2082
2083  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084    // Output immediate memory reference
2085    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086    emit_d32(cbuf, 0x00);
2087  %}
2088
2089  enc_class lock_prefix( ) %{
2090    emit_opcode(cbuf,0xF0);         // [Lock]
2091  %}
2092
2093  // Cmp-xchg long value.
2094  // Note: we need to swap rbx, and rcx before and after the
2095  //       cmpxchg8 instruction because the instruction uses
2096  //       rcx as the high order word of the new value to store but
2097  //       our register encoding uses rbx,.
2098  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099
2100    // XCHG  rbx,ecx
2101    emit_opcode(cbuf,0x87);
2102    emit_opcode(cbuf,0xD9);
2103    // [Lock]
2104    emit_opcode(cbuf,0xF0);
2105    // CMPXCHG8 [Eptr]
2106    emit_opcode(cbuf,0x0F);
2107    emit_opcode(cbuf,0xC7);
2108    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109    // XCHG  rbx,ecx
2110    emit_opcode(cbuf,0x87);
2111    emit_opcode(cbuf,0xD9);
2112  %}
2113
2114  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115    // [Lock]
2116    emit_opcode(cbuf,0xF0);
2117
2118    // CMPXCHG [Eptr]
2119    emit_opcode(cbuf,0x0F);
2120    emit_opcode(cbuf,0xB1);
2121    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122  %}
2123
2124  enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125    // [Lock]
2126    emit_opcode(cbuf,0xF0);
2127
2128    // CMPXCHGB [Eptr]
2129    emit_opcode(cbuf,0x0F);
2130    emit_opcode(cbuf,0xB0);
2131    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132  %}
2133
2134  enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135    // [Lock]
2136    emit_opcode(cbuf,0xF0);
2137
2138    // 16-bit mode
2139    emit_opcode(cbuf, 0x66);
2140
2141    // CMPXCHGW [Eptr]
2142    emit_opcode(cbuf,0x0F);
2143    emit_opcode(cbuf,0xB1);
2144    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145  %}
2146
2147  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148    int res_encoding = $res$$reg;
2149
2150    // MOV  res,0
2151    emit_opcode( cbuf, 0xB8 + res_encoding);
2152    emit_d32( cbuf, 0 );
2153    // JNE,s  fail
2154    emit_opcode(cbuf,0x75);
2155    emit_d8(cbuf, 5 );
2156    // MOV  res,1
2157    emit_opcode( cbuf, 0xB8 + res_encoding);
2158    emit_d32( cbuf, 1 );
2159    // fail:
2160  %}
2161
2162  enc_class set_instruction_start( ) %{
2163    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164  %}
2165
2166  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167    int reg_encoding = $ereg$$reg;
2168    int base  = $mem$$base;
2169    int index = $mem$$index;
2170    int scale = $mem$$scale;
2171    int displace = $mem$$disp;
2172    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174  %}
2175
2176  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178    int base  = $mem$$base;
2179    int index = $mem$$index;
2180    int scale = $mem$$scale;
2181    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184  %}
2185
2186  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187    int r1, r2;
2188    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190    emit_opcode(cbuf,0x0F);
2191    emit_opcode(cbuf,$tertiary);
2192    emit_rm(cbuf, 0x3, r1, r2);
2193    emit_d8(cbuf,$cnt$$constant);
2194    emit_d8(cbuf,$primary);
2195    emit_rm(cbuf, 0x3, $secondary, r1);
2196    emit_d8(cbuf,$cnt$$constant);
2197  %}
2198
2199  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200    emit_opcode( cbuf, 0x8B ); // Move
2201    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203      emit_d8(cbuf,$primary);
2204      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205      emit_d8(cbuf,$cnt$$constant-32);
2206    }
2207    emit_d8(cbuf,$primary);
2208    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209    emit_d8(cbuf,31);
2210  %}
2211
2212  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213    int r1, r2;
2214    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216
2217    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218    emit_rm(cbuf, 0x3, r1, r2);
2219    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220      emit_opcode(cbuf,$primary);
2221      emit_rm(cbuf, 0x3, $secondary, r1);
2222      emit_d8(cbuf,$cnt$$constant-32);
2223    }
2224    emit_opcode(cbuf,0x33);  // XOR r2,r2
2225    emit_rm(cbuf, 0x3, r2, r2);
2226  %}
2227
2228  // Clone of RegMem but accepts an extra parameter to access each
2229  // half of a double in memory; it never needs relocation info.
2230  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231    emit_opcode(cbuf,$opcode$$constant);
2232    int reg_encoding = $rm_reg$$reg;
2233    int base     = $mem$$base;
2234    int index    = $mem$$index;
2235    int scale    = $mem$$scale;
2236    int displace = $mem$$disp + $disp_for_half$$constant;
2237    relocInfo::relocType disp_reloc = relocInfo::none;
2238    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239  %}
2240
2241  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242  //
2243  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244  // and it never needs relocation information.
2245  // Frequently used to move data between FPU's Stack Top and memory.
2246  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247    int rm_byte_opcode = $rm_opcode$$constant;
2248    int base     = $mem$$base;
2249    int index    = $mem$$index;
2250    int scale    = $mem$$scale;
2251    int displace = $mem$$disp;
2252    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254  %}
2255
2256  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257    int rm_byte_opcode = $rm_opcode$$constant;
2258    int base     = $mem$$base;
2259    int index    = $mem$$index;
2260    int scale    = $mem$$scale;
2261    int displace = $mem$$disp;
2262    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264  %}
2265
2266  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267    int reg_encoding = $dst$$reg;
2268    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269    int index        = 0x04;            // 0x04 indicates no index
2270    int scale        = 0x00;            // 0x00 indicates no scale
2271    int displace     = $src1$$constant; // 0x00 indicates no displacement
2272    relocInfo::relocType disp_reloc = relocInfo::none;
2273    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274  %}
2275
2276  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277    // Compare dst,src
2278    emit_opcode(cbuf,0x3B);
2279    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280    // jmp dst < src around move
2281    emit_opcode(cbuf,0x7C);
2282    emit_d8(cbuf,2);
2283    // move dst,src
2284    emit_opcode(cbuf,0x8B);
2285    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286  %}
2287
2288  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289    // Compare dst,src
2290    emit_opcode(cbuf,0x3B);
2291    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292    // jmp dst > src around move
2293    emit_opcode(cbuf,0x7F);
2294    emit_d8(cbuf,2);
2295    // move dst,src
2296    emit_opcode(cbuf,0x8B);
2297    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298  %}
2299
2300  enc_class enc_FPR_store(memory mem, regDPR src) %{
2301    // If src is FPR1, we can just FST to store it.
2302    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303    int reg_encoding = 0x2; // Just store
2304    int base  = $mem$$base;
2305    int index = $mem$$index;
2306    int scale = $mem$$scale;
2307    int displace = $mem$$disp;
2308    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309    if( $src$$reg != FPR1L_enc ) {
2310      reg_encoding = 0x3;  // Store & pop
2311      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312      emit_d8( cbuf, 0xC0-1+$src$$reg );
2313    }
2314    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315    emit_opcode(cbuf,$primary);
2316    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317  %}
2318
2319  enc_class neg_reg(rRegI dst) %{
2320    // NEG $dst
2321    emit_opcode(cbuf,0xF7);
2322    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323  %}
2324
2325  enc_class setLT_reg(eCXRegI dst) %{
2326    // SETLT $dst
2327    emit_opcode(cbuf,0x0F);
2328    emit_opcode(cbuf,0x9C);
2329    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330  %}
2331
2332  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333    int tmpReg = $tmp$$reg;
2334
2335    // SUB $p,$q
2336    emit_opcode(cbuf,0x2B);
2337    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338    // SBB $tmp,$tmp
2339    emit_opcode(cbuf,0x1B);
2340    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341    // AND $tmp,$y
2342    emit_opcode(cbuf,0x23);
2343    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344    // ADD $p,$tmp
2345    emit_opcode(cbuf,0x03);
2346    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347  %}
2348
2349  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350    // TEST shift,32
2351    emit_opcode(cbuf,0xF7);
2352    emit_rm(cbuf, 0x3, 0, ECX_enc);
2353    emit_d32(cbuf,0x20);
2354    // JEQ,s small
2355    emit_opcode(cbuf, 0x74);
2356    emit_d8(cbuf, 0x04);
2357    // MOV    $dst.hi,$dst.lo
2358    emit_opcode( cbuf, 0x8B );
2359    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360    // CLR    $dst.lo
2361    emit_opcode(cbuf, 0x33);
2362    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363// small:
2364    // SHLD   $dst.hi,$dst.lo,$shift
2365    emit_opcode(cbuf,0x0F);
2366    emit_opcode(cbuf,0xA5);
2367    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368    // SHL    $dst.lo,$shift"
2369    emit_opcode(cbuf,0xD3);
2370    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371  %}
2372
2373  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374    // TEST shift,32
2375    emit_opcode(cbuf,0xF7);
2376    emit_rm(cbuf, 0x3, 0, ECX_enc);
2377    emit_d32(cbuf,0x20);
2378    // JEQ,s small
2379    emit_opcode(cbuf, 0x74);
2380    emit_d8(cbuf, 0x04);
2381    // MOV    $dst.lo,$dst.hi
2382    emit_opcode( cbuf, 0x8B );
2383    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384    // CLR    $dst.hi
2385    emit_opcode(cbuf, 0x33);
2386    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387// small:
2388    // SHRD   $dst.lo,$dst.hi,$shift
2389    emit_opcode(cbuf,0x0F);
2390    emit_opcode(cbuf,0xAD);
2391    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392    // SHR    $dst.hi,$shift"
2393    emit_opcode(cbuf,0xD3);
2394    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395  %}
2396
2397  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398    // TEST shift,32
2399    emit_opcode(cbuf,0xF7);
2400    emit_rm(cbuf, 0x3, 0, ECX_enc);
2401    emit_d32(cbuf,0x20);
2402    // JEQ,s small
2403    emit_opcode(cbuf, 0x74);
2404    emit_d8(cbuf, 0x05);
2405    // MOV    $dst.lo,$dst.hi
2406    emit_opcode( cbuf, 0x8B );
2407    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408    // SAR    $dst.hi,31
2409    emit_opcode(cbuf, 0xC1);
2410    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411    emit_d8(cbuf, 0x1F );
2412// small:
2413    // SHRD   $dst.lo,$dst.hi,$shift
2414    emit_opcode(cbuf,0x0F);
2415    emit_opcode(cbuf,0xAD);
2416    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417    // SAR    $dst.hi,$shift"
2418    emit_opcode(cbuf,0xD3);
2419    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420  %}
2421
2422
2423  // ----------------- Encodings for floating point unit -----------------
2424  // May leave result in FPU-TOS or FPU reg depending on opcodes
2425  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426    $$$emit8$primary;
2427    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428  %}
2429
2430  // Pop argument in FPR0 with FSTP ST(0)
2431  enc_class PopFPU() %{
2432    emit_opcode( cbuf, 0xDD );
2433    emit_d8( cbuf, 0xD8 );
2434  %}
2435
2436  // !!!!! equivalent to Pop_Reg_F
2437  enc_class Pop_Reg_DPR( regDPR dst ) %{
2438    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439    emit_d8( cbuf, 0xD8+$dst$$reg );
2440  %}
2441
2442  enc_class Push_Reg_DPR( regDPR dst ) %{
2443    emit_opcode( cbuf, 0xD9 );
2444    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445  %}
2446
2447  enc_class strictfp_bias1( regDPR dst ) %{
2448    emit_opcode( cbuf, 0xDB );           // FLD m80real
2449    emit_opcode( cbuf, 0x2D );
2450    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452    emit_opcode( cbuf, 0xC8+$dst$$reg );
2453  %}
2454
2455  enc_class strictfp_bias2( regDPR dst ) %{
2456    emit_opcode( cbuf, 0xDB );           // FLD m80real
2457    emit_opcode( cbuf, 0x2D );
2458    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460    emit_opcode( cbuf, 0xC8+$dst$$reg );
2461  %}
2462
2463  // Special case for moving an integer register to a stack slot.
2464  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466  %}
2467
2468  // Special case for moving a register to a stack slot.
2469  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470    // Opcode already emitted
2471    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473    emit_d32(cbuf, $dst$$disp);   // Displacement
2474  %}
2475
2476  // Push the integer in stackSlot 'src' onto FP-stack
2477  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479  %}
2480
2481  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484  %}
2485
2486  // Same as Pop_Mem_F except for opcode
2487  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490  %}
2491
2492  enc_class Pop_Reg_FPR( regFPR dst ) %{
2493    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494    emit_d8( cbuf, 0xD8+$dst$$reg );
2495  %}
2496
2497  enc_class Push_Reg_FPR( regFPR dst ) %{
2498    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500  %}
2501
2502  // Push FPU's float to a stack-slot, and pop FPU-stack
2503  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504    int pop = 0x02;
2505    if ($src$$reg != FPR1L_enc) {
2506      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507      emit_d8( cbuf, 0xC0-1+$src$$reg );
2508      pop = 0x03;
2509    }
2510    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511  %}
2512
2513  // Push FPU's double to a stack-slot, and pop FPU-stack
2514  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515    int pop = 0x02;
2516    if ($src$$reg != FPR1L_enc) {
2517      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518      emit_d8( cbuf, 0xC0-1+$src$$reg );
2519      pop = 0x03;
2520    }
2521    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522  %}
2523
2524  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526    int pop = 0xD0 - 1; // -1 since we skip FLD
2527    if ($src$$reg != FPR1L_enc) {
2528      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529      emit_d8( cbuf, 0xC0-1+$src$$reg );
2530      pop = 0xD8;
2531    }
2532    emit_opcode( cbuf, 0xDD );
2533    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534  %}
2535
2536
2537  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538    // load dst in FPR0
2539    emit_opcode( cbuf, 0xD9 );
2540    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541    if ($src$$reg != FPR1L_enc) {
2542      // fincstp
2543      emit_opcode (cbuf, 0xD9);
2544      emit_opcode (cbuf, 0xF7);
2545      // swap src with FPR1:
2546      // FXCH FPR1 with src
2547      emit_opcode(cbuf, 0xD9);
2548      emit_d8(cbuf, 0xC8-1+$src$$reg );
2549      // fdecstp
2550      emit_opcode (cbuf, 0xD9);
2551      emit_opcode (cbuf, 0xF6);
2552    }
2553  %}
2554
2555  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556    MacroAssembler _masm(&cbuf);
2557    __ subptr(rsp, 8);
2558    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559    __ fld_d(Address(rsp, 0));
2560    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561    __ fld_d(Address(rsp, 0));
2562  %}
2563
2564  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565    MacroAssembler _masm(&cbuf);
2566    __ subptr(rsp, 4);
2567    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568    __ fld_s(Address(rsp, 0));
2569    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570    __ fld_s(Address(rsp, 0));
2571  %}
2572
2573  enc_class Push_ResultD(regD dst) %{
2574    MacroAssembler _masm(&cbuf);
2575    __ fstp_d(Address(rsp, 0));
2576    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577    __ addptr(rsp, 8);
2578  %}
2579
2580  enc_class Push_ResultF(regF dst, immI d8) %{
2581    MacroAssembler _masm(&cbuf);
2582    __ fstp_s(Address(rsp, 0));
2583    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584    __ addptr(rsp, $d8$$constant);
2585  %}
2586
2587  enc_class Push_SrcD(regD src) %{
2588    MacroAssembler _masm(&cbuf);
2589    __ subptr(rsp, 8);
2590    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591    __ fld_d(Address(rsp, 0));
2592  %}
2593
2594  enc_class push_stack_temp_qword() %{
2595    MacroAssembler _masm(&cbuf);
2596    __ subptr(rsp, 8);
2597  %}
2598
2599  enc_class pop_stack_temp_qword() %{
2600    MacroAssembler _masm(&cbuf);
2601    __ addptr(rsp, 8);
2602  %}
2603
2604  enc_class push_xmm_to_fpr1(regD src) %{
2605    MacroAssembler _masm(&cbuf);
2606    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607    __ fld_d(Address(rsp, 0));
2608  %}
2609
2610  enc_class Push_Result_Mod_DPR( regDPR src) %{
2611    if ($src$$reg != FPR1L_enc) {
2612      // fincstp
2613      emit_opcode (cbuf, 0xD9);
2614      emit_opcode (cbuf, 0xF7);
2615      // FXCH FPR1 with src
2616      emit_opcode(cbuf, 0xD9);
2617      emit_d8(cbuf, 0xC8-1+$src$$reg );
2618      // fdecstp
2619      emit_opcode (cbuf, 0xD9);
2620      emit_opcode (cbuf, 0xF6);
2621    }
2622    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623    // // FSTP   FPR$dst$$reg
2624    // emit_opcode( cbuf, 0xDD );
2625    // emit_d8( cbuf, 0xD8+$dst$$reg );
2626  %}
2627
2628  enc_class fnstsw_sahf_skip_parity() %{
2629    // fnstsw ax
2630    emit_opcode( cbuf, 0xDF );
2631    emit_opcode( cbuf, 0xE0 );
2632    // sahf
2633    emit_opcode( cbuf, 0x9E );
2634    // jnp  ::skip
2635    emit_opcode( cbuf, 0x7B );
2636    emit_opcode( cbuf, 0x05 );
2637  %}
2638
2639  enc_class emitModDPR() %{
2640    // fprem must be iterative
2641    // :: loop
2642    // fprem
2643    emit_opcode( cbuf, 0xD9 );
2644    emit_opcode( cbuf, 0xF8 );
2645    // wait
2646    emit_opcode( cbuf, 0x9b );
2647    // fnstsw ax
2648    emit_opcode( cbuf, 0xDF );
2649    emit_opcode( cbuf, 0xE0 );
2650    // sahf
2651    emit_opcode( cbuf, 0x9E );
2652    // jp  ::loop
2653    emit_opcode( cbuf, 0x0F );
2654    emit_opcode( cbuf, 0x8A );
2655    emit_opcode( cbuf, 0xF4 );
2656    emit_opcode( cbuf, 0xFF );
2657    emit_opcode( cbuf, 0xFF );
2658    emit_opcode( cbuf, 0xFF );
2659  %}
2660
2661  enc_class fpu_flags() %{
2662    // fnstsw_ax
2663    emit_opcode( cbuf, 0xDF);
2664    emit_opcode( cbuf, 0xE0);
2665    // test ax,0x0400
2666    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667    emit_opcode( cbuf, 0xA9 );
2668    emit_d16   ( cbuf, 0x0400 );
2669    // // // This sequence works, but stalls for 12-16 cycles on PPro
2670    // // test rax,0x0400
2671    // emit_opcode( cbuf, 0xA9 );
2672    // emit_d32   ( cbuf, 0x00000400 );
2673    //
2674    // jz exit (no unordered comparison)
2675    emit_opcode( cbuf, 0x74 );
2676    emit_d8    ( cbuf, 0x02 );
2677    // mov ah,1 - treat as LT case (set carry flag)
2678    emit_opcode( cbuf, 0xB4 );
2679    emit_d8    ( cbuf, 0x01 );
2680    // sahf
2681    emit_opcode( cbuf, 0x9E);
2682  %}
2683
2684  enc_class cmpF_P6_fixup() %{
2685    // Fixup the integer flags in case comparison involved a NaN
2686    //
2687    // JNP exit (no unordered comparison, P-flag is set by NaN)
2688    emit_opcode( cbuf, 0x7B );
2689    emit_d8    ( cbuf, 0x03 );
2690    // MOV AH,1 - treat as LT case (set carry flag)
2691    emit_opcode( cbuf, 0xB4 );
2692    emit_d8    ( cbuf, 0x01 );
2693    // SAHF
2694    emit_opcode( cbuf, 0x9E);
2695    // NOP     // target for branch to avoid branch to branch
2696    emit_opcode( cbuf, 0x90);
2697  %}
2698
2699//     fnstsw_ax();
2700//     sahf();
2701//     movl(dst, nan_result);
2702//     jcc(Assembler::parity, exit);
2703//     movl(dst, less_result);
2704//     jcc(Assembler::below, exit);
2705//     movl(dst, equal_result);
2706//     jcc(Assembler::equal, exit);
2707//     movl(dst, greater_result);
2708
2709// less_result     =  1;
2710// greater_result  = -1;
2711// equal_result    = 0;
2712// nan_result      = -1;
2713
2714  enc_class CmpF_Result(rRegI dst) %{
2715    // fnstsw_ax();
2716    emit_opcode( cbuf, 0xDF);
2717    emit_opcode( cbuf, 0xE0);
2718    // sahf
2719    emit_opcode( cbuf, 0x9E);
2720    // movl(dst, nan_result);
2721    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722    emit_d32( cbuf, -1 );
2723    // jcc(Assembler::parity, exit);
2724    emit_opcode( cbuf, 0x7A );
2725    emit_d8    ( cbuf, 0x13 );
2726    // movl(dst, less_result);
2727    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728    emit_d32( cbuf, -1 );
2729    // jcc(Assembler::below, exit);
2730    emit_opcode( cbuf, 0x72 );
2731    emit_d8    ( cbuf, 0x0C );
2732    // movl(dst, equal_result);
2733    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734    emit_d32( cbuf, 0 );
2735    // jcc(Assembler::equal, exit);
2736    emit_opcode( cbuf, 0x74 );
2737    emit_d8    ( cbuf, 0x05 );
2738    // movl(dst, greater_result);
2739    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740    emit_d32( cbuf, 1 );
2741  %}
2742
2743
2744  // Compare the longs and set flags
2745  // BROKEN!  Do Not use as-is
2746  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747    // CMP    $src1.hi,$src2.hi
2748    emit_opcode( cbuf, 0x3B );
2749    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750    // JNE,s  done
2751    emit_opcode(cbuf,0x75);
2752    emit_d8(cbuf, 2 );
2753    // CMP    $src1.lo,$src2.lo
2754    emit_opcode( cbuf, 0x3B );
2755    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756// done:
2757  %}
2758
2759  enc_class convert_int_long( regL dst, rRegI src ) %{
2760    // mov $dst.lo,$src
2761    int dst_encoding = $dst$$reg;
2762    int src_encoding = $src$$reg;
2763    encode_Copy( cbuf, dst_encoding  , src_encoding );
2764    // mov $dst.hi,$src
2765    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766    // sar $dst.hi,31
2767    emit_opcode( cbuf, 0xC1 );
2768    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769    emit_d8(cbuf, 0x1F );
2770  %}
2771
2772  enc_class convert_long_double( eRegL src ) %{
2773    // push $src.hi
2774    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775    // push $src.lo
2776    emit_opcode(cbuf, 0x50+$src$$reg  );
2777    // fild 64-bits at [SP]
2778    emit_opcode(cbuf,0xdf);
2779    emit_d8(cbuf, 0x6C);
2780    emit_d8(cbuf, 0x24);
2781    emit_d8(cbuf, 0x00);
2782    // pop stack
2783    emit_opcode(cbuf, 0x83); // add  SP, #8
2784    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785    emit_d8(cbuf, 0x8);
2786  %}
2787
2788  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789    // IMUL   EDX:EAX,$src1
2790    emit_opcode( cbuf, 0xF7 );
2791    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792    // SAR    EDX,$cnt-32
2793    int shift_count = ((int)$cnt$$constant) - 32;
2794    if (shift_count > 0) {
2795      emit_opcode(cbuf, 0xC1);
2796      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797      emit_d8(cbuf, shift_count);
2798    }
2799  %}
2800
2801  // this version doesn't have add sp, 8
2802  enc_class convert_long_double2( eRegL src ) %{
2803    // push $src.hi
2804    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805    // push $src.lo
2806    emit_opcode(cbuf, 0x50+$src$$reg  );
2807    // fild 64-bits at [SP]
2808    emit_opcode(cbuf,0xdf);
2809    emit_d8(cbuf, 0x6C);
2810    emit_d8(cbuf, 0x24);
2811    emit_d8(cbuf, 0x00);
2812  %}
2813
2814  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815    // Basic idea: long = (long)int * (long)int
2816    // IMUL EDX:EAX, src
2817    emit_opcode( cbuf, 0xF7 );
2818    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819  %}
2820
2821  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823    // MUL EDX:EAX, src
2824    emit_opcode( cbuf, 0xF7 );
2825    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826  %}
2827
2828  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829    // Basic idea: lo(result) = lo(x_lo * y_lo)
2830    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831    // MOV    $tmp,$src.lo
2832    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833    // IMUL   $tmp,EDX
2834    emit_opcode( cbuf, 0x0F );
2835    emit_opcode( cbuf, 0xAF );
2836    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837    // MOV    EDX,$src.hi
2838    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839    // IMUL   EDX,EAX
2840    emit_opcode( cbuf, 0x0F );
2841    emit_opcode( cbuf, 0xAF );
2842    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843    // ADD    $tmp,EDX
2844    emit_opcode( cbuf, 0x03 );
2845    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846    // MUL   EDX:EAX,$src.lo
2847    emit_opcode( cbuf, 0xF7 );
2848    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849    // ADD    EDX,ESI
2850    emit_opcode( cbuf, 0x03 );
2851    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852  %}
2853
2854  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855    // Basic idea: lo(result) = lo(src * y_lo)
2856    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857    // IMUL   $tmp,EDX,$src
2858    emit_opcode( cbuf, 0x6B );
2859    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860    emit_d8( cbuf, (int)$src$$constant );
2861    // MOV    EDX,$src
2862    emit_opcode(cbuf, 0xB8 + EDX_enc);
2863    emit_d32( cbuf, (int)$src$$constant );
2864    // MUL   EDX:EAX,EDX
2865    emit_opcode( cbuf, 0xF7 );
2866    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867    // ADD    EDX,ESI
2868    emit_opcode( cbuf, 0x03 );
2869    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870  %}
2871
2872  enc_class long_div( eRegL src1, eRegL src2 ) %{
2873    // PUSH src1.hi
2874    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875    // PUSH src1.lo
2876    emit_opcode(cbuf,               0x50+$src1$$reg  );
2877    // PUSH src2.hi
2878    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879    // PUSH src2.lo
2880    emit_opcode(cbuf,               0x50+$src2$$reg  );
2881    // CALL directly to the runtime
2882    cbuf.set_insts_mark();
2883    emit_opcode(cbuf,0xE8);       // Call into runtime
2884    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885    // Restore stack
2886    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888    emit_d8(cbuf, 4*4);
2889  %}
2890
2891  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892    // PUSH src1.hi
2893    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894    // PUSH src1.lo
2895    emit_opcode(cbuf,               0x50+$src1$$reg  );
2896    // PUSH src2.hi
2897    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898    // PUSH src2.lo
2899    emit_opcode(cbuf,               0x50+$src2$$reg  );
2900    // CALL directly to the runtime
2901    cbuf.set_insts_mark();
2902    emit_opcode(cbuf,0xE8);       // Call into runtime
2903    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904    // Restore stack
2905    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907    emit_d8(cbuf, 4*4);
2908  %}
2909
2910  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911    // MOV   $tmp,$src.lo
2912    emit_opcode(cbuf, 0x8B);
2913    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914    // OR    $tmp,$src.hi
2915    emit_opcode(cbuf, 0x0B);
2916    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917  %}
2918
2919  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920    // CMP    $src1.lo,$src2.lo
2921    emit_opcode( cbuf, 0x3B );
2922    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923    // JNE,s  skip
2924    emit_cc(cbuf, 0x70, 0x5);
2925    emit_d8(cbuf,2);
2926    // CMP    $src1.hi,$src2.hi
2927    emit_opcode( cbuf, 0x3B );
2928    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929  %}
2930
2931  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933    emit_opcode( cbuf, 0x3B );
2934    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935    // MOV    $tmp,$src1.hi
2936    emit_opcode( cbuf, 0x8B );
2937    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939    emit_opcode( cbuf, 0x1B );
2940    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941  %}
2942
2943  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944    // XOR    $tmp,$tmp
2945    emit_opcode(cbuf,0x33);  // XOR
2946    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947    // CMP    $tmp,$src.lo
2948    emit_opcode( cbuf, 0x3B );
2949    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950    // SBB    $tmp,$src.hi
2951    emit_opcode( cbuf, 0x1B );
2952    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953  %}
2954
2955 // Sniff, sniff... smells like Gnu Superoptimizer
2956  enc_class neg_long( eRegL dst ) %{
2957    emit_opcode(cbuf,0xF7);    // NEG hi
2958    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959    emit_opcode(cbuf,0xF7);    // NEG lo
2960    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961    emit_opcode(cbuf,0x83);    // SBB hi,0
2962    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963    emit_d8    (cbuf,0 );
2964  %}
2965
2966  enc_class enc_pop_rdx() %{
2967    emit_opcode(cbuf,0x5A);
2968  %}
2969
2970  enc_class enc_rethrow() %{
2971    cbuf.set_insts_mark();
2972    emit_opcode(cbuf, 0xE9);        // jmp    entry
2973    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2975  %}
2976
2977
2978  // Convert a double to an int.  Java semantics require we do complex
2979  // manglelations in the corner cases.  So we set the rounding mode to
2980  // 'zero', store the darned double down as an int, and reset the
2981  // rounding mode to 'nearest'.  The hardware throws an exception which
2982  // patches up the correct value directly to the stack.
2983  enc_class DPR2I_encoding( regDPR src ) %{
2984    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985    // exceptions here, so that a NAN or other corner-case value will
2986    // thrown an exception (but normal values get converted at full speed).
2987    // However, I2C adapters and other float-stack manglers leave pending
2988    // invalid-op exceptions hanging.  We would have to clear them before
2989    // enabling them and that is more expensive than just testing for the
2990    // invalid value Intel stores down in the corner cases.
2991    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992    emit_opcode(cbuf,0x2D);
2993    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994    // Allocate a word
2995    emit_opcode(cbuf,0x83);            // SUB ESP,4
2996    emit_opcode(cbuf,0xEC);
2997    emit_d8(cbuf,0x04);
2998    // Encoding assumes a double has been pushed into FPR0.
2999    // Store down the double as an int, popping the FPU stack
3000    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001    emit_opcode(cbuf,0x1C);
3002    emit_d8(cbuf,0x24);
3003    // Restore the rounding mode; mask the exception
3004    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005    emit_opcode(cbuf,0x2D);
3006    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009
3010    // Load the converted int; adjust CPU stack
3011    emit_opcode(cbuf,0x58);       // POP EAX
3012    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013    emit_d32   (cbuf,0x80000000); //         0x80000000
3014    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015    emit_d8    (cbuf,0x07);       // Size of slow_call
3016    // Push src onto stack slow-path
3017    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018    emit_d8    (cbuf,0xC0-1+$src$$reg );
3019    // CALL directly to the runtime
3020    cbuf.set_insts_mark();
3021    emit_opcode(cbuf,0xE8);       // Call into runtime
3022    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023    // Carry on here...
3024  %}
3025
3026  enc_class DPR2L_encoding( regDPR src ) %{
3027    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028    emit_opcode(cbuf,0x2D);
3029    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030    // Allocate a word
3031    emit_opcode(cbuf,0x83);            // SUB ESP,8
3032    emit_opcode(cbuf,0xEC);
3033    emit_d8(cbuf,0x08);
3034    // Encoding assumes a double has been pushed into FPR0.
3035    // Store down the double as a long, popping the FPU stack
3036    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037    emit_opcode(cbuf,0x3C);
3038    emit_d8(cbuf,0x24);
3039    // Restore the rounding mode; mask the exception
3040    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041    emit_opcode(cbuf,0x2D);
3042    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045
3046    // Load the converted int; adjust CPU stack
3047    emit_opcode(cbuf,0x58);       // POP EAX
3048    emit_opcode(cbuf,0x5A);       // POP EDX
3049    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050    emit_d8    (cbuf,0xFA);       // rdx
3051    emit_d32   (cbuf,0x80000000); //         0x80000000
3052    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057    emit_d8    (cbuf,0x07);       // Size of slow_call
3058    // Push src onto stack slow-path
3059    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060    emit_d8    (cbuf,0xC0-1+$src$$reg );
3061    // CALL directly to the runtime
3062    cbuf.set_insts_mark();
3063    emit_opcode(cbuf,0xE8);       // Call into runtime
3064    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065    // Carry on here...
3066  %}
3067
3068  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069    // Operand was loaded from memory into fp ST (stack top)
3070    // FMUL   ST,$src  /* D8 C8+i */
3071    emit_opcode(cbuf, 0xD8);
3072    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073  %}
3074
3075  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076    // FADDP  ST,src2  /* D8 C0+i */
3077    emit_opcode(cbuf, 0xD8);
3078    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079    //could use FADDP  src2,fpST  /* DE C0+i */
3080  %}
3081
3082  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083    // FADDP  src2,ST  /* DE C0+i */
3084    emit_opcode(cbuf, 0xDE);
3085    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086  %}
3087
3088  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089    // Operand has been loaded into fp ST (stack top)
3090      // FSUB   ST,$src1
3091      emit_opcode(cbuf, 0xD8);
3092      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093
3094      // FDIV
3095      emit_opcode(cbuf, 0xD8);
3096      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097  %}
3098
3099  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100    // Operand was loaded from memory into fp ST (stack top)
3101    // FADD   ST,$src  /* D8 C0+i */
3102    emit_opcode(cbuf, 0xD8);
3103    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104
3105    // FMUL  ST,src2  /* D8 C*+i */
3106    emit_opcode(cbuf, 0xD8);
3107    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108  %}
3109
3110
3111  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112    // Operand was loaded from memory into fp ST (stack top)
3113    // FADD   ST,$src  /* D8 C0+i */
3114    emit_opcode(cbuf, 0xD8);
3115    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116
3117    // FMULP  src2,ST  /* DE C8+i */
3118    emit_opcode(cbuf, 0xDE);
3119    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120  %}
3121
3122  // Atomically load the volatile long
3123  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124    emit_opcode(cbuf,0xDF);
3125    int rm_byte_opcode = 0x05;
3126    int base     = $mem$$base;
3127    int index    = $mem$$index;
3128    int scale    = $mem$$scale;
3129    int displace = $mem$$disp;
3130    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133  %}
3134
3135  // Volatile Store Long.  Must be atomic, so move it into
3136  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137  // target address before the store (for null-ptr checks)
3138  // so the memory operand is used twice in the encoding.
3139  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142    emit_opcode(cbuf,0xDF);
3143    int rm_byte_opcode = 0x07;
3144    int base     = $mem$$base;
3145    int index    = $mem$$index;
3146    int scale    = $mem$$scale;
3147    int displace = $mem$$disp;
3148    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150  %}
3151
3152  // Safepoint Poll.  This polls the safepoint page, and causes an
3153  // exception if it is not readable. Unfortunately, it kills the condition code
3154  // in the process
3155  // We current use TESTL [spp],EDI
3156  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157
3158  enc_class Safepoint_Poll() %{
3159    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160    emit_opcode(cbuf,0x85);
3161    emit_rm (cbuf, 0x0, 0x7, 0x5);
3162    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163  %}
3164%}
3165
3166
3167//----------FRAME--------------------------------------------------------------
3168// Definition of frame structure and management information.
3169//
3170//  S T A C K   L A Y O U T    Allocators stack-slot number
3171//                             |   (to get allocators register number
3172//  G  Owned by    |        |  v    add OptoReg::stack0())
3173//  r   CALLER     |        |
3174//  o     |        +--------+      pad to even-align allocators stack-slot
3175//  w     V        |  pad0  |        numbers; owned by CALLER
3176//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177//  h     ^        |   in   |  5
3178//        |        |  args  |  4   Holes in incoming args owned by SELF
3179//  |     |        |        |  3
3180//  |     |        +--------+
3181//  V     |        | old out|      Empty on Intel, window on Sparc
3182//        |    old |preserve|      Must be even aligned.
3183//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184//        |        |   in   |  3   area for Intel ret address
3185//     Owned by    |preserve|      Empty on Sparc.
3186//       SELF      +--------+
3187//        |        |  pad2  |  2   pad to align old SP
3188//        |        +--------+  1
3189//        |        | locks  |  0
3190//        |        +--------+----> OptoReg::stack0(), even aligned
3191//        |        |  pad1  | 11   pad to align new SP
3192//        |        +--------+
3193//        |        |        | 10
3194//        |        | spills |  9   spills
3195//        V        |        |  8   (pad0 slot for callee)
3196//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197//        ^        |  out   |  7
3198//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199//     Owned by    +--------+
3200//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201//        |    new |preserve|      Must be even-aligned.
3202//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203//        |        |        |
3204//
3205// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206//         known from SELF's arguments and the Java calling convention.
3207//         Region 6-7 is determined per call site.
3208// Note 2: If the calling convention leaves holes in the incoming argument
3209//         area, those holes are owned by SELF.  Holes in the outgoing area
3210//         are owned by the CALLEE.  Holes should not be nessecary in the
3211//         incoming area, as the Java calling convention is completely under
3212//         the control of the AD file.  Doubles can be sorted and packed to
3213//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214//         varargs C calling conventions.
3215// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216//         even aligned with pad0 as needed.
3217//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218//         region 6-11 is even aligned; it may be padded out more so that
3219//         the region from SP to FP meets the minimum stack alignment.
3220
3221frame %{
3222  // What direction does stack grow in (assumed to be same for C & Java)
3223  stack_direction(TOWARDS_LOW);
3224
3225  // These three registers define part of the calling convention
3226  // between compiled code and the interpreter.
3227  inline_cache_reg(EAX);                // Inline Cache Register
3228  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229
3230  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231  cisc_spilling_operand_name(indOffset32);
3232
3233  // Number of stack slots consumed by locking an object
3234  sync_stack_slots(1);
3235
3236  // Compiled code's Frame Pointer
3237  frame_pointer(ESP);
3238  // Interpreter stores its frame pointer in a register which is
3239  // stored to the stack by I2CAdaptors.
3240  // I2CAdaptors convert from interpreted java to compiled java.
3241  interpreter_frame_pointer(EBP);
3242
3243  // Stack alignment requirement
3244  // Alignment size in bytes (128-bit -> 16 bytes)
3245  stack_alignment(StackAlignmentInBytes);
3246
3247  // Number of stack slots between incoming argument block and the start of
3248  // a new frame.  The PROLOG must add this many slots to the stack.  The
3249  // EPILOG must remove this many slots.  Intel needs one slot for
3250  // return address and one for rbp, (must save rbp)
3251  in_preserve_stack_slots(2+VerifyStackAtCalls);
3252
3253  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254  // for calls to C.  Supports the var-args backing area for register parms.
3255  varargs_C_out_slots_killed(0);
3256
3257  // The after-PROLOG location of the return address.  Location of
3258  // return address specifies a type (REG or STACK) and a number
3259  // representing the register number (i.e. - use a register name) or
3260  // stack slot.
3261  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262  // Otherwise, it is above the locks and verification slot and alignment word
3263  return_addr(STACK - 1 +
3264              align_up((Compile::current()->in_preserve_stack_slots() +
3265                        Compile::current()->fixed_slots()),
3266                       stack_alignment_in_slots()));
3267
3268  // Body of function which returns an integer array locating
3269  // arguments either in registers or in stack slots.  Passed an array
3270  // of ideal registers called "sig" and a "length" count.  Stack-slot
3271  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272  // arguments for a CALLEE.  Incoming stack arguments are
3273  // automatically biased by the preserve_stack_slots field above.
3274  calling_convention %{
3275    // No difference between ingoing/outgoing just pass false
3276    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277  %}
3278
3279
3280  // Body of function which returns an integer array locating
3281  // arguments either in registers or in stack slots.  Passed an array
3282  // of ideal registers called "sig" and a "length" count.  Stack-slot
3283  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284  // arguments for a CALLEE.  Incoming stack arguments are
3285  // automatically biased by the preserve_stack_slots field above.
3286  c_calling_convention %{
3287    // This is obviously always outgoing
3288    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289  %}
3290
3291  // Location of C & interpreter return values
3292  c_return_value %{
3293    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296
3297    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298    // that C functions return float and double results in XMM0.
3299    if( ideal_reg == Op_RegD && UseSSE>=2 )
3300      return OptoRegPair(XMM0b_num,XMM0_num);
3301    if( ideal_reg == Op_RegF && UseSSE>=2 )
3302      return OptoRegPair(OptoReg::Bad,XMM0_num);
3303
3304    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305  %}
3306
3307  // Location of return values
3308  return_value %{
3309    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312    if( ideal_reg == Op_RegD && UseSSE>=2 )
3313      return OptoRegPair(XMM0b_num,XMM0_num);
3314    if( ideal_reg == Op_RegF && UseSSE>=1 )
3315      return OptoRegPair(OptoReg::Bad,XMM0_num);
3316    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317  %}
3318
3319%}
3320
3321//----------ATTRIBUTES---------------------------------------------------------
3322//----------Operand Attributes-------------------------------------------------
3323op_attrib op_cost(0);        // Required cost attribute
3324
3325//----------Instruction Attributes---------------------------------------------
3326ins_attrib ins_cost(100);       // Required cost attribute
3327ins_attrib ins_size(8);         // Required size attribute (in bits)
3328ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                // non-matching short branch variant of some
3330                                                            // long branch?
3331ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                // specifies the alignment that some part of the instruction (not
3333                                // necessarily the start) requires.  If > 1, a compute_padding()
3334                                // function must be provided for the instruction
3335
3336//----------OPERANDS-----------------------------------------------------------
3337// Operand definitions must precede instruction definitions for correct parsing
3338// in the ADLC because operands constitute user defined types which are used in
3339// instruction definitions.
3340
3341//----------Simple Operands----------------------------------------------------
3342// Immediate Operands
3343// Integer Immediate
3344operand immI() %{
3345  match(ConI);
3346
3347  op_cost(10);
3348  format %{ %}
3349  interface(CONST_INTER);
3350%}
3351
3352// Constant for test vs zero
3353operand immI0() %{
3354  predicate(n->get_int() == 0);
3355  match(ConI);
3356
3357  op_cost(0);
3358  format %{ %}
3359  interface(CONST_INTER);
3360%}
3361
3362// Constant for increment
3363operand immI1() %{
3364  predicate(n->get_int() == 1);
3365  match(ConI);
3366
3367  op_cost(0);
3368  format %{ %}
3369  interface(CONST_INTER);
3370%}
3371
3372// Constant for decrement
3373operand immI_M1() %{
3374  predicate(n->get_int() == -1);
3375  match(ConI);
3376
3377  op_cost(0);
3378  format %{ %}
3379  interface(CONST_INTER);
3380%}
3381
3382// Valid scale values for addressing modes
3383operand immI2() %{
3384  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385  match(ConI);
3386
3387  format %{ %}
3388  interface(CONST_INTER);
3389%}
3390
3391operand immI8() %{
3392  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393  match(ConI);
3394
3395  op_cost(5);
3396  format %{ %}
3397  interface(CONST_INTER);
3398%}
3399
3400operand immI16() %{
3401  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402  match(ConI);
3403
3404  op_cost(10);
3405  format %{ %}
3406  interface(CONST_INTER);
3407%}
3408
3409// Int Immediate non-negative
3410operand immU31()
3411%{
3412  predicate(n->get_int() >= 0);
3413  match(ConI);
3414
3415  op_cost(0);
3416  format %{ %}
3417  interface(CONST_INTER);
3418%}
3419
3420// Constant for long shifts
3421operand immI_32() %{
3422  predicate( n->get_int() == 32 );
3423  match(ConI);
3424
3425  op_cost(0);
3426  format %{ %}
3427  interface(CONST_INTER);
3428%}
3429
3430operand immI_1_31() %{
3431  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432  match(ConI);
3433
3434  op_cost(0);
3435  format %{ %}
3436  interface(CONST_INTER);
3437%}
3438
3439operand immI_32_63() %{
3440  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441  match(ConI);
3442  op_cost(0);
3443
3444  format %{ %}
3445  interface(CONST_INTER);
3446%}
3447
3448operand immI_1() %{
3449  predicate( n->get_int() == 1 );
3450  match(ConI);
3451
3452  op_cost(0);
3453  format %{ %}
3454  interface(CONST_INTER);
3455%}
3456
3457operand immI_2() %{
3458  predicate( n->get_int() == 2 );
3459  match(ConI);
3460
3461  op_cost(0);
3462  format %{ %}
3463  interface(CONST_INTER);
3464%}
3465
3466operand immI_3() %{
3467  predicate( n->get_int() == 3 );
3468  match(ConI);
3469
3470  op_cost(0);
3471  format %{ %}
3472  interface(CONST_INTER);
3473%}
3474
3475// Pointer Immediate
3476operand immP() %{
3477  match(ConP);
3478
3479  op_cost(10);
3480  format %{ %}
3481  interface(CONST_INTER);
3482%}
3483
3484// NULL Pointer Immediate
3485operand immP0() %{
3486  predicate( n->get_ptr() == 0 );
3487  match(ConP);
3488  op_cost(0);
3489
3490  format %{ %}
3491  interface(CONST_INTER);
3492%}
3493
3494// Long Immediate
3495operand immL() %{
3496  match(ConL);
3497
3498  op_cost(20);
3499  format %{ %}
3500  interface(CONST_INTER);
3501%}
3502
3503// Long Immediate zero
3504operand immL0() %{
3505  predicate( n->get_long() == 0L );
3506  match(ConL);
3507  op_cost(0);
3508
3509  format %{ %}
3510  interface(CONST_INTER);
3511%}
3512
3513// Long Immediate zero
3514operand immL_M1() %{
3515  predicate( n->get_long() == -1L );
3516  match(ConL);
3517  op_cost(0);
3518
3519  format %{ %}
3520  interface(CONST_INTER);
3521%}
3522
3523// Long immediate from 0 to 127.
3524// Used for a shorter form of long mul by 10.
3525operand immL_127() %{
3526  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527  match(ConL);
3528  op_cost(0);
3529
3530  format %{ %}
3531  interface(CONST_INTER);
3532%}
3533
3534// Long Immediate: low 32-bit mask
3535operand immL_32bits() %{
3536  predicate(n->get_long() == 0xFFFFFFFFL);
3537  match(ConL);
3538  op_cost(0);
3539
3540  format %{ %}
3541  interface(CONST_INTER);
3542%}
3543
3544// Long Immediate: low 32-bit mask
3545operand immL32() %{
3546  predicate(n->get_long() == (int)(n->get_long()));
3547  match(ConL);
3548  op_cost(20);
3549
3550  format %{ %}
3551  interface(CONST_INTER);
3552%}
3553
3554//Double Immediate zero
3555operand immDPR0() %{
3556  // Do additional (and counter-intuitive) test against NaN to work around VC++
3557  // bug that generates code such that NaNs compare equal to 0.0
3558  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559  match(ConD);
3560
3561  op_cost(5);
3562  format %{ %}
3563  interface(CONST_INTER);
3564%}
3565
3566// Double Immediate one
3567operand immDPR1() %{
3568  predicate( UseSSE<=1 && n->getd() == 1.0 );
3569  match(ConD);
3570
3571  op_cost(5);
3572  format %{ %}
3573  interface(CONST_INTER);
3574%}
3575
3576// Double Immediate
3577operand immDPR() %{
3578  predicate(UseSSE<=1);
3579  match(ConD);
3580
3581  op_cost(5);
3582  format %{ %}
3583  interface(CONST_INTER);
3584%}
3585
3586operand immD() %{
3587  predicate(UseSSE>=2);
3588  match(ConD);
3589
3590  op_cost(5);
3591  format %{ %}
3592  interface(CONST_INTER);
3593%}
3594
3595// Double Immediate zero
3596operand immD0() %{
3597  // Do additional (and counter-intuitive) test against NaN to work around VC++
3598  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599  // compare equal to -0.0.
3600  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601  match(ConD);
3602
3603  format %{ %}
3604  interface(CONST_INTER);
3605%}
3606
3607// Float Immediate zero
3608operand immFPR0() %{
3609  predicate(UseSSE == 0 && n->getf() == 0.0F);
3610  match(ConF);
3611
3612  op_cost(5);
3613  format %{ %}
3614  interface(CONST_INTER);
3615%}
3616
3617// Float Immediate one
3618operand immFPR1() %{
3619  predicate(UseSSE == 0 && n->getf() == 1.0F);
3620  match(ConF);
3621
3622  op_cost(5);
3623  format %{ %}
3624  interface(CONST_INTER);
3625%}
3626
3627// Float Immediate
3628operand immFPR() %{
3629  predicate( UseSSE == 0 );
3630  match(ConF);
3631
3632  op_cost(5);
3633  format %{ %}
3634  interface(CONST_INTER);
3635%}
3636
3637// Float Immediate
3638operand immF() %{
3639  predicate(UseSSE >= 1);
3640  match(ConF);
3641
3642  op_cost(5);
3643  format %{ %}
3644  interface(CONST_INTER);
3645%}
3646
3647// Float Immediate zero.  Zero and not -0.0
3648operand immF0() %{
3649  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650  match(ConF);
3651
3652  op_cost(5);
3653  format %{ %}
3654  interface(CONST_INTER);
3655%}
3656
3657// Immediates for special shifts (sign extend)
3658
3659// Constants for increment
3660operand immI_16() %{
3661  predicate( n->get_int() == 16 );
3662  match(ConI);
3663
3664  format %{ %}
3665  interface(CONST_INTER);
3666%}
3667
3668operand immI_24() %{
3669  predicate( n->get_int() == 24 );
3670  match(ConI);
3671
3672  format %{ %}
3673  interface(CONST_INTER);
3674%}
3675
3676// Constant for byte-wide masking
3677operand immI_255() %{
3678  predicate( n->get_int() == 255 );
3679  match(ConI);
3680
3681  format %{ %}
3682  interface(CONST_INTER);
3683%}
3684
3685// Constant for short-wide masking
3686operand immI_65535() %{
3687  predicate(n->get_int() == 65535);
3688  match(ConI);
3689
3690  format %{ %}
3691  interface(CONST_INTER);
3692%}
3693
3694// Register Operands
3695// Integer Register
3696operand rRegI() %{
3697  constraint(ALLOC_IN_RC(int_reg));
3698  match(RegI);
3699  match(xRegI);
3700  match(eAXRegI);
3701  match(eBXRegI);
3702  match(eCXRegI);
3703  match(eDXRegI);
3704  match(eDIRegI);
3705  match(eSIRegI);
3706
3707  format %{ %}
3708  interface(REG_INTER);
3709%}
3710
3711// Subset of Integer Register
3712operand xRegI(rRegI reg) %{
3713  constraint(ALLOC_IN_RC(int_x_reg));
3714  match(reg);
3715  match(eAXRegI);
3716  match(eBXRegI);
3717  match(eCXRegI);
3718  match(eDXRegI);
3719
3720  format %{ %}
3721  interface(REG_INTER);
3722%}
3723
3724// Special Registers
3725operand eAXRegI(xRegI reg) %{
3726  constraint(ALLOC_IN_RC(eax_reg));
3727  match(reg);
3728  match(rRegI);
3729
3730  format %{ "EAX" %}
3731  interface(REG_INTER);
3732%}
3733
3734// Special Registers
3735operand eBXRegI(xRegI reg) %{
3736  constraint(ALLOC_IN_RC(ebx_reg));
3737  match(reg);
3738  match(rRegI);
3739
3740  format %{ "EBX" %}
3741  interface(REG_INTER);
3742%}
3743
3744operand eCXRegI(xRegI reg) %{
3745  constraint(ALLOC_IN_RC(ecx_reg));
3746  match(reg);
3747  match(rRegI);
3748
3749  format %{ "ECX" %}
3750  interface(REG_INTER);
3751%}
3752
3753operand eDXRegI(xRegI reg) %{
3754  constraint(ALLOC_IN_RC(edx_reg));
3755  match(reg);
3756  match(rRegI);
3757
3758  format %{ "EDX" %}
3759  interface(REG_INTER);
3760%}
3761
3762operand eDIRegI(xRegI reg) %{
3763  constraint(ALLOC_IN_RC(edi_reg));
3764  match(reg);
3765  match(rRegI);
3766
3767  format %{ "EDI" %}
3768  interface(REG_INTER);
3769%}
3770
3771operand naxRegI() %{
3772  constraint(ALLOC_IN_RC(nax_reg));
3773  match(RegI);
3774  match(eCXRegI);
3775  match(eDXRegI);
3776  match(eSIRegI);
3777  match(eDIRegI);
3778
3779  format %{ %}
3780  interface(REG_INTER);
3781%}
3782
3783operand nadxRegI() %{
3784  constraint(ALLOC_IN_RC(nadx_reg));
3785  match(RegI);
3786  match(eBXRegI);
3787  match(eCXRegI);
3788  match(eSIRegI);
3789  match(eDIRegI);
3790
3791  format %{ %}
3792  interface(REG_INTER);
3793%}
3794
3795operand ncxRegI() %{
3796  constraint(ALLOC_IN_RC(ncx_reg));
3797  match(RegI);
3798  match(eAXRegI);
3799  match(eDXRegI);
3800  match(eSIRegI);
3801  match(eDIRegI);
3802
3803  format %{ %}
3804  interface(REG_INTER);
3805%}
3806
3807// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808// //
3809operand eSIRegI(xRegI reg) %{
3810   constraint(ALLOC_IN_RC(esi_reg));
3811   match(reg);
3812   match(rRegI);
3813
3814   format %{ "ESI" %}
3815   interface(REG_INTER);
3816%}
3817
3818// Pointer Register
3819operand anyRegP() %{
3820  constraint(ALLOC_IN_RC(any_reg));
3821  match(RegP);
3822  match(eAXRegP);
3823  match(eBXRegP);
3824  match(eCXRegP);
3825  match(eDIRegP);
3826  match(eRegP);
3827
3828  format %{ %}
3829  interface(REG_INTER);
3830%}
3831
3832operand eRegP() %{
3833  constraint(ALLOC_IN_RC(int_reg));
3834  match(RegP);
3835  match(eAXRegP);
3836  match(eBXRegP);
3837  match(eCXRegP);
3838  match(eDIRegP);
3839
3840  format %{ %}
3841  interface(REG_INTER);
3842%}
3843
3844// On windows95, EBP is not safe to use for implicit null tests.
3845operand eRegP_no_EBP() %{
3846  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847  match(RegP);
3848  match(eAXRegP);
3849  match(eBXRegP);
3850  match(eCXRegP);
3851  match(eDIRegP);
3852
3853  op_cost(100);
3854  format %{ %}
3855  interface(REG_INTER);
3856%}
3857
3858operand naxRegP() %{
3859  constraint(ALLOC_IN_RC(nax_reg));
3860  match(RegP);
3861  match(eBXRegP);
3862  match(eDXRegP);
3863  match(eCXRegP);
3864  match(eSIRegP);
3865  match(eDIRegP);
3866
3867  format %{ %}
3868  interface(REG_INTER);
3869%}
3870
3871operand nabxRegP() %{
3872  constraint(ALLOC_IN_RC(nabx_reg));
3873  match(RegP);
3874  match(eCXRegP);
3875  match(eDXRegP);
3876  match(eSIRegP);
3877  match(eDIRegP);
3878
3879  format %{ %}
3880  interface(REG_INTER);
3881%}
3882
3883operand pRegP() %{
3884  constraint(ALLOC_IN_RC(p_reg));
3885  match(RegP);
3886  match(eBXRegP);
3887  match(eDXRegP);
3888  match(eSIRegP);
3889  match(eDIRegP);
3890
3891  format %{ %}
3892  interface(REG_INTER);
3893%}
3894
3895// Special Registers
3896// Return a pointer value
3897operand eAXRegP(eRegP reg) %{
3898  constraint(ALLOC_IN_RC(eax_reg));
3899  match(reg);
3900  format %{ "EAX" %}
3901  interface(REG_INTER);
3902%}
3903
3904// Used in AtomicAdd
3905operand eBXRegP(eRegP reg) %{
3906  constraint(ALLOC_IN_RC(ebx_reg));
3907  match(reg);
3908  format %{ "EBX" %}
3909  interface(REG_INTER);
3910%}
3911
3912// Tail-call (interprocedural jump) to interpreter
3913operand eCXRegP(eRegP reg) %{
3914  constraint(ALLOC_IN_RC(ecx_reg));
3915  match(reg);
3916  format %{ "ECX" %}
3917  interface(REG_INTER);
3918%}
3919
3920operand eSIRegP(eRegP reg) %{
3921  constraint(ALLOC_IN_RC(esi_reg));
3922  match(reg);
3923  format %{ "ESI" %}
3924  interface(REG_INTER);
3925%}
3926
3927// Used in rep stosw
3928operand eDIRegP(eRegP reg) %{
3929  constraint(ALLOC_IN_RC(edi_reg));
3930  match(reg);
3931  format %{ "EDI" %}
3932  interface(REG_INTER);
3933%}
3934
3935operand eRegL() %{
3936  constraint(ALLOC_IN_RC(long_reg));
3937  match(RegL);
3938  match(eADXRegL);
3939
3940  format %{ %}
3941  interface(REG_INTER);
3942%}
3943
3944operand eADXRegL( eRegL reg ) %{
3945  constraint(ALLOC_IN_RC(eadx_reg));
3946  match(reg);
3947
3948  format %{ "EDX:EAX" %}
3949  interface(REG_INTER);
3950%}
3951
3952operand eBCXRegL( eRegL reg ) %{
3953  constraint(ALLOC_IN_RC(ebcx_reg));
3954  match(reg);
3955
3956  format %{ "EBX:ECX" %}
3957  interface(REG_INTER);
3958%}
3959
3960// Special case for integer high multiply
3961operand eADXRegL_low_only() %{
3962  constraint(ALLOC_IN_RC(eadx_reg));
3963  match(RegL);
3964
3965  format %{ "EAX" %}
3966  interface(REG_INTER);
3967%}
3968
3969// Flags register, used as output of compare instructions
3970operand eFlagsReg() %{
3971  constraint(ALLOC_IN_RC(int_flags));
3972  match(RegFlags);
3973
3974  format %{ "EFLAGS" %}
3975  interface(REG_INTER);
3976%}
3977
3978// Flags register, used as output of FLOATING POINT compare instructions
3979operand eFlagsRegU() %{
3980  constraint(ALLOC_IN_RC(int_flags));
3981  match(RegFlags);
3982
3983  format %{ "EFLAGS_U" %}
3984  interface(REG_INTER);
3985%}
3986
3987operand eFlagsRegUCF() %{
3988  constraint(ALLOC_IN_RC(int_flags));
3989  match(RegFlags);
3990  predicate(false);
3991
3992  format %{ "EFLAGS_U_CF" %}
3993  interface(REG_INTER);
3994%}
3995
3996// Condition Code Register used by long compare
3997operand flagsReg_long_LTGE() %{
3998  constraint(ALLOC_IN_RC(int_flags));
3999  match(RegFlags);
4000  format %{ "FLAGS_LTGE" %}
4001  interface(REG_INTER);
4002%}
4003operand flagsReg_long_EQNE() %{
4004  constraint(ALLOC_IN_RC(int_flags));
4005  match(RegFlags);
4006  format %{ "FLAGS_EQNE" %}
4007  interface(REG_INTER);
4008%}
4009operand flagsReg_long_LEGT() %{
4010  constraint(ALLOC_IN_RC(int_flags));
4011  match(RegFlags);
4012  format %{ "FLAGS_LEGT" %}
4013  interface(REG_INTER);
4014%}
4015
4016// Condition Code Register used by unsigned long compare
4017operand flagsReg_ulong_LTGE() %{
4018  constraint(ALLOC_IN_RC(int_flags));
4019  match(RegFlags);
4020  format %{ "FLAGS_U_LTGE" %}
4021  interface(REG_INTER);
4022%}
4023operand flagsReg_ulong_EQNE() %{
4024  constraint(ALLOC_IN_RC(int_flags));
4025  match(RegFlags);
4026  format %{ "FLAGS_U_EQNE" %}
4027  interface(REG_INTER);
4028%}
4029operand flagsReg_ulong_LEGT() %{
4030  constraint(ALLOC_IN_RC(int_flags));
4031  match(RegFlags);
4032  format %{ "FLAGS_U_LEGT" %}
4033  interface(REG_INTER);
4034%}
4035
4036// Float register operands
4037operand regDPR() %{
4038  predicate( UseSSE < 2 );
4039  constraint(ALLOC_IN_RC(fp_dbl_reg));
4040  match(RegD);
4041  match(regDPR1);
4042  match(regDPR2);
4043  format %{ %}
4044  interface(REG_INTER);
4045%}
4046
4047operand regDPR1(regDPR reg) %{
4048  predicate( UseSSE < 2 );
4049  constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050  match(reg);
4051  format %{ "FPR1" %}
4052  interface(REG_INTER);
4053%}
4054
4055operand regDPR2(regDPR reg) %{
4056  predicate( UseSSE < 2 );
4057  constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058  match(reg);
4059  format %{ "FPR2" %}
4060  interface(REG_INTER);
4061%}
4062
4063operand regnotDPR1(regDPR reg) %{
4064  predicate( UseSSE < 2 );
4065  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066  match(reg);
4067  format %{ %}
4068  interface(REG_INTER);
4069%}
4070
4071// Float register operands
4072operand regFPR() %{
4073  predicate( UseSSE < 2 );
4074  constraint(ALLOC_IN_RC(fp_flt_reg));
4075  match(RegF);
4076  match(regFPR1);
4077  format %{ %}
4078  interface(REG_INTER);
4079%}
4080
4081// Float register operands
4082operand regFPR1(regFPR reg) %{
4083  predicate( UseSSE < 2 );
4084  constraint(ALLOC_IN_RC(fp_flt_reg0));
4085  match(reg);
4086  format %{ "FPR1" %}
4087  interface(REG_INTER);
4088%}
4089
4090// XMM Float register operands
4091operand regF() %{
4092  predicate( UseSSE>=1 );
4093  constraint(ALLOC_IN_RC(float_reg_legacy));
4094  match(RegF);
4095  format %{ %}
4096  interface(REG_INTER);
4097%}
4098
4099// Float register operands
4100operand vlRegF() %{
4101   constraint(ALLOC_IN_RC(float_reg_vl));
4102   match(RegF);
4103
4104   format %{ %}
4105   interface(REG_INTER);
4106%}
4107
4108// XMM Double register operands
4109operand regD() %{
4110  predicate( UseSSE>=2 );
4111  constraint(ALLOC_IN_RC(double_reg_legacy));
4112  match(RegD);
4113  format %{ %}
4114  interface(REG_INTER);
4115%}
4116
4117// Double register operands
4118operand vlRegD() %{
4119   constraint(ALLOC_IN_RC(double_reg_vl));
4120   match(RegD);
4121
4122   format %{ %}
4123   interface(REG_INTER);
4124%}
4125
4126// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127// runtime code generation via reg_class_dynamic.
4128operand vecS() %{
4129  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130  match(VecS);
4131
4132  format %{ %}
4133  interface(REG_INTER);
4134%}
4135
4136operand legVecS() %{
4137  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138  match(VecS);
4139
4140  format %{ %}
4141  interface(REG_INTER);
4142%}
4143
4144operand vecD() %{
4145  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146  match(VecD);
4147
4148  format %{ %}
4149  interface(REG_INTER);
4150%}
4151
4152operand legVecD() %{
4153  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154  match(VecD);
4155
4156  format %{ %}
4157  interface(REG_INTER);
4158%}
4159
4160operand vecX() %{
4161  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162  match(VecX);
4163
4164  format %{ %}
4165  interface(REG_INTER);
4166%}
4167
4168operand legVecX() %{
4169  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170  match(VecX);
4171
4172  format %{ %}
4173  interface(REG_INTER);
4174%}
4175
4176operand vecY() %{
4177  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178  match(VecY);
4179
4180  format %{ %}
4181  interface(REG_INTER);
4182%}
4183
4184operand legVecY() %{
4185  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186  match(VecY);
4187
4188  format %{ %}
4189  interface(REG_INTER);
4190%}
4191
4192//----------Memory Operands----------------------------------------------------
4193// Direct Memory Operand
4194operand direct(immP addr) %{
4195  match(addr);
4196
4197  format %{ "[$addr]" %}
4198  interface(MEMORY_INTER) %{
4199    base(0xFFFFFFFF);
4200    index(0x4);
4201    scale(0x0);
4202    disp($addr);
4203  %}
4204%}
4205
4206// Indirect Memory Operand
4207operand indirect(eRegP reg) %{
4208  constraint(ALLOC_IN_RC(int_reg));
4209  match(reg);
4210
4211  format %{ "[$reg]" %}
4212  interface(MEMORY_INTER) %{
4213    base($reg);
4214    index(0x4);
4215    scale(0x0);
4216    disp(0x0);
4217  %}
4218%}
4219
4220// Indirect Memory Plus Short Offset Operand
4221operand indOffset8(eRegP reg, immI8 off) %{
4222  match(AddP reg off);
4223
4224  format %{ "[$reg + $off]" %}
4225  interface(MEMORY_INTER) %{
4226    base($reg);
4227    index(0x4);
4228    scale(0x0);
4229    disp($off);
4230  %}
4231%}
4232
4233// Indirect Memory Plus Long Offset Operand
4234operand indOffset32(eRegP reg, immI off) %{
4235  match(AddP reg off);
4236
4237  format %{ "[$reg + $off]" %}
4238  interface(MEMORY_INTER) %{
4239    base($reg);
4240    index(0x4);
4241    scale(0x0);
4242    disp($off);
4243  %}
4244%}
4245
4246// Indirect Memory Plus Long Offset Operand
4247operand indOffset32X(rRegI reg, immP off) %{
4248  match(AddP off reg);
4249
4250  format %{ "[$reg + $off]" %}
4251  interface(MEMORY_INTER) %{
4252    base($reg);
4253    index(0x4);
4254    scale(0x0);
4255    disp($off);
4256  %}
4257%}
4258
4259// Indirect Memory Plus Index Register Plus Offset Operand
4260operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261  match(AddP (AddP reg ireg) off);
4262
4263  op_cost(10);
4264  format %{"[$reg + $off + $ireg]" %}
4265  interface(MEMORY_INTER) %{
4266    base($reg);
4267    index($ireg);
4268    scale(0x0);
4269    disp($off);
4270  %}
4271%}
4272
4273// Indirect Memory Plus Index Register Plus Offset Operand
4274operand indIndex(eRegP reg, rRegI ireg) %{
4275  match(AddP reg ireg);
4276
4277  op_cost(10);
4278  format %{"[$reg + $ireg]" %}
4279  interface(MEMORY_INTER) %{
4280    base($reg);
4281    index($ireg);
4282    scale(0x0);
4283    disp(0x0);
4284  %}
4285%}
4286
4287// // -------------------------------------------------------------------------
4288// // 486 architecture doesn't support "scale * index + offset" with out a base
4289// // -------------------------------------------------------------------------
4290// // Scaled Memory Operands
4291// // Indirect Memory Times Scale Plus Offset Operand
4292// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293//   match(AddP off (LShiftI ireg scale));
4294//
4295//   op_cost(10);
4296//   format %{"[$off + $ireg << $scale]" %}
4297//   interface(MEMORY_INTER) %{
4298//     base(0x4);
4299//     index($ireg);
4300//     scale($scale);
4301//     disp($off);
4302//   %}
4303// %}
4304
4305// Indirect Memory Times Scale Plus Index Register
4306operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307  match(AddP reg (LShiftI ireg scale));
4308
4309  op_cost(10);
4310  format %{"[$reg + $ireg << $scale]" %}
4311  interface(MEMORY_INTER) %{
4312    base($reg);
4313    index($ireg);
4314    scale($scale);
4315    disp(0x0);
4316  %}
4317%}
4318
4319// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321  match(AddP (AddP reg (LShiftI ireg scale)) off);
4322
4323  op_cost(10);
4324  format %{"[$reg + $off + $ireg << $scale]" %}
4325  interface(MEMORY_INTER) %{
4326    base($reg);
4327    index($ireg);
4328    scale($scale);
4329    disp($off);
4330  %}
4331%}
4332
4333//----------Load Long Memory Operands------------------------------------------
4334// The load-long idiom will use it's address expression again after loading
4335// the first word of the long.  If the load-long destination overlaps with
4336// registers used in the addressing expression, the 2nd half will be loaded
4337// from a clobbered address.  Fix this by requiring that load-long use
4338// address registers that do not overlap with the load-long target.
4339
4340// load-long support
4341operand load_long_RegP() %{
4342  constraint(ALLOC_IN_RC(esi_reg));
4343  match(RegP);
4344  match(eSIRegP);
4345  op_cost(100);
4346  format %{  %}
4347  interface(REG_INTER);
4348%}
4349
4350// Indirect Memory Operand Long
4351operand load_long_indirect(load_long_RegP reg) %{
4352  constraint(ALLOC_IN_RC(esi_reg));
4353  match(reg);
4354
4355  format %{ "[$reg]" %}
4356  interface(MEMORY_INTER) %{
4357    base($reg);
4358    index(0x4);
4359    scale(0x0);
4360    disp(0x0);
4361  %}
4362%}
4363
4364// Indirect Memory Plus Long Offset Operand
4365operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366  match(AddP reg off);
4367
4368  format %{ "[$reg + $off]" %}
4369  interface(MEMORY_INTER) %{
4370    base($reg);
4371    index(0x4);
4372    scale(0x0);
4373    disp($off);
4374  %}
4375%}
4376
4377opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378
4379
4380//----------Special Memory Operands--------------------------------------------
4381// Stack Slot Operand - This operand is used for loading and storing temporary
4382//                      values on the stack where a match requires a value to
4383//                      flow through memory.
4384operand stackSlotP(sRegP reg) %{
4385  constraint(ALLOC_IN_RC(stack_slots));
4386  // No match rule because this operand is only generated in matching
4387  format %{ "[$reg]" %}
4388  interface(MEMORY_INTER) %{
4389    base(0x4);   // ESP
4390    index(0x4);  // No Index
4391    scale(0x0);  // No Scale
4392    disp($reg);  // Stack Offset
4393  %}
4394%}
4395
4396operand stackSlotI(sRegI reg) %{
4397  constraint(ALLOC_IN_RC(stack_slots));
4398  // No match rule because this operand is only generated in matching
4399  format %{ "[$reg]" %}
4400  interface(MEMORY_INTER) %{
4401    base(0x4);   // ESP
4402    index(0x4);  // No Index
4403    scale(0x0);  // No Scale
4404    disp($reg);  // Stack Offset
4405  %}
4406%}
4407
4408operand stackSlotF(sRegF reg) %{
4409  constraint(ALLOC_IN_RC(stack_slots));
4410  // No match rule because this operand is only generated in matching
4411  format %{ "[$reg]" %}
4412  interface(MEMORY_INTER) %{
4413    base(0x4);   // ESP
4414    index(0x4);  // No Index
4415    scale(0x0);  // No Scale
4416    disp($reg);  // Stack Offset
4417  %}
4418%}
4419
4420operand stackSlotD(sRegD reg) %{
4421  constraint(ALLOC_IN_RC(stack_slots));
4422  // No match rule because this operand is only generated in matching
4423  format %{ "[$reg]" %}
4424  interface(MEMORY_INTER) %{
4425    base(0x4);   // ESP
4426    index(0x4);  // No Index
4427    scale(0x0);  // No Scale
4428    disp($reg);  // Stack Offset
4429  %}
4430%}
4431
4432operand stackSlotL(sRegL reg) %{
4433  constraint(ALLOC_IN_RC(stack_slots));
4434  // No match rule because this operand is only generated in matching
4435  format %{ "[$reg]" %}
4436  interface(MEMORY_INTER) %{
4437    base(0x4);   // ESP
4438    index(0x4);  // No Index
4439    scale(0x0);  // No Scale
4440    disp($reg);  // Stack Offset
4441  %}
4442%}
4443
4444//----------Memory Operands - Win95 Implicit Null Variants----------------
4445// Indirect Memory Operand
4446operand indirect_win95_safe(eRegP_no_EBP reg)
4447%{
4448  constraint(ALLOC_IN_RC(int_reg));
4449  match(reg);
4450
4451  op_cost(100);
4452  format %{ "[$reg]" %}
4453  interface(MEMORY_INTER) %{
4454    base($reg);
4455    index(0x4);
4456    scale(0x0);
4457    disp(0x0);
4458  %}
4459%}
4460
4461// Indirect Memory Plus Short Offset Operand
4462operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4463%{
4464  match(AddP reg off);
4465
4466  op_cost(100);
4467  format %{ "[$reg + $off]" %}
4468  interface(MEMORY_INTER) %{
4469    base($reg);
4470    index(0x4);
4471    scale(0x0);
4472    disp($off);
4473  %}
4474%}
4475
4476// Indirect Memory Plus Long Offset Operand
4477operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4478%{
4479  match(AddP reg off);
4480
4481  op_cost(100);
4482  format %{ "[$reg + $off]" %}
4483  interface(MEMORY_INTER) %{
4484    base($reg);
4485    index(0x4);
4486    scale(0x0);
4487    disp($off);
4488  %}
4489%}
4490
4491// Indirect Memory Plus Index Register Plus Offset Operand
4492operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4493%{
4494  match(AddP (AddP reg ireg) off);
4495
4496  op_cost(100);
4497  format %{"[$reg + $off + $ireg]" %}
4498  interface(MEMORY_INTER) %{
4499    base($reg);
4500    index($ireg);
4501    scale(0x0);
4502    disp($off);
4503  %}
4504%}
4505
4506// Indirect Memory Times Scale Plus Index Register
4507operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4508%{
4509  match(AddP reg (LShiftI ireg scale));
4510
4511  op_cost(100);
4512  format %{"[$reg + $ireg << $scale]" %}
4513  interface(MEMORY_INTER) %{
4514    base($reg);
4515    index($ireg);
4516    scale($scale);
4517    disp(0x0);
4518  %}
4519%}
4520
4521// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4522operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4523%{
4524  match(AddP (AddP reg (LShiftI ireg scale)) off);
4525
4526  op_cost(100);
4527  format %{"[$reg + $off + $ireg << $scale]" %}
4528  interface(MEMORY_INTER) %{
4529    base($reg);
4530    index($ireg);
4531    scale($scale);
4532    disp($off);
4533  %}
4534%}
4535
4536//----------Conditional Branch Operands----------------------------------------
4537// Comparison Op  - This is the operation of the comparison, and is limited to
4538//                  the following set of codes:
4539//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4540//
4541// Other attributes of the comparison, such as unsignedness, are specified
4542// by the comparison instruction that sets a condition code flags register.
4543// That result is represented by a flags operand whose subtype is appropriate
4544// to the unsignedness (etc.) of the comparison.
4545//
4546// Later, the instruction which matches both the Comparison Op (a Bool) and
4547// the flags (produced by the Cmp) specifies the coding of the comparison op
4548// by matching a specific subtype of Bool operand below, such as cmpOpU.
4549
4550// Comparision Code
4551operand cmpOp() %{
4552  match(Bool);
4553
4554  format %{ "" %}
4555  interface(COND_INTER) %{
4556    equal(0x4, "e");
4557    not_equal(0x5, "ne");
4558    less(0xC, "l");
4559    greater_equal(0xD, "ge");
4560    less_equal(0xE, "le");
4561    greater(0xF, "g");
4562    overflow(0x0, "o");
4563    no_overflow(0x1, "no");
4564  %}
4565%}
4566
4567// Comparison Code, unsigned compare.  Used by FP also, with
4568// C2 (unordered) turned into GT or LT already.  The other bits
4569// C0 and C3 are turned into Carry & Zero flags.
4570operand cmpOpU() %{
4571  match(Bool);
4572
4573  format %{ "" %}
4574  interface(COND_INTER) %{
4575    equal(0x4, "e");
4576    not_equal(0x5, "ne");
4577    less(0x2, "b");
4578    greater_equal(0x3, "nb");
4579    less_equal(0x6, "be");
4580    greater(0x7, "nbe");
4581    overflow(0x0, "o");
4582    no_overflow(0x1, "no");
4583  %}
4584%}
4585
4586// Floating comparisons that don't require any fixup for the unordered case
4587operand cmpOpUCF() %{
4588  match(Bool);
4589  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4590            n->as_Bool()->_test._test == BoolTest::ge ||
4591            n->as_Bool()->_test._test == BoolTest::le ||
4592            n->as_Bool()->_test._test == BoolTest::gt);
4593  format %{ "" %}
4594  interface(COND_INTER) %{
4595    equal(0x4, "e");
4596    not_equal(0x5, "ne");
4597    less(0x2, "b");
4598    greater_equal(0x3, "nb");
4599    less_equal(0x6, "be");
4600    greater(0x7, "nbe");
4601    overflow(0x0, "o");
4602    no_overflow(0x1, "no");
4603  %}
4604%}
4605
4606
4607// Floating comparisons that can be fixed up with extra conditional jumps
4608operand cmpOpUCF2() %{
4609  match(Bool);
4610  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4611            n->as_Bool()->_test._test == BoolTest::eq);
4612  format %{ "" %}
4613  interface(COND_INTER) %{
4614    equal(0x4, "e");
4615    not_equal(0x5, "ne");
4616    less(0x2, "b");
4617    greater_equal(0x3, "nb");
4618    less_equal(0x6, "be");
4619    greater(0x7, "nbe");
4620    overflow(0x0, "o");
4621    no_overflow(0x1, "no");
4622  %}
4623%}
4624
4625// Comparison Code for FP conditional move
4626operand cmpOp_fcmov() %{
4627  match(Bool);
4628
4629  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4630            n->as_Bool()->_test._test != BoolTest::no_overflow);
4631  format %{ "" %}
4632  interface(COND_INTER) %{
4633    equal        (0x0C8);
4634    not_equal    (0x1C8);
4635    less         (0x0C0);
4636    greater_equal(0x1C0);
4637    less_equal   (0x0D0);
4638    greater      (0x1D0);
4639    overflow(0x0, "o"); // not really supported by the instruction
4640    no_overflow(0x1, "no"); // not really supported by the instruction
4641  %}
4642%}
4643
4644// Comparison Code used in long compares
4645operand cmpOp_commute() %{
4646  match(Bool);
4647
4648  format %{ "" %}
4649  interface(COND_INTER) %{
4650    equal(0x4, "e");
4651    not_equal(0x5, "ne");
4652    less(0xF, "g");
4653    greater_equal(0xE, "le");
4654    less_equal(0xD, "ge");
4655    greater(0xC, "l");
4656    overflow(0x0, "o");
4657    no_overflow(0x1, "no");
4658  %}
4659%}
4660
4661// Comparison Code used in unsigned long compares
4662operand cmpOpU_commute() %{
4663  match(Bool);
4664
4665  format %{ "" %}
4666  interface(COND_INTER) %{
4667    equal(0x4, "e");
4668    not_equal(0x5, "ne");
4669    less(0x7, "nbe");
4670    greater_equal(0x6, "be");
4671    less_equal(0x3, "nb");
4672    greater(0x2, "b");
4673    overflow(0x0, "o");
4674    no_overflow(0x1, "no");
4675  %}
4676%}
4677
4678//----------OPERAND CLASSES----------------------------------------------------
4679// Operand Classes are groups of operands that are used as to simplify
4680// instruction definitions by not requiring the AD writer to specify separate
4681// instructions for every form of operand when the instruction accepts
4682// multiple operand types with the same basic encoding and format.  The classic
4683// case of this is memory operands.
4684
4685opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4686               indIndex, indIndexScale, indIndexScaleOffset);
4687
4688// Long memory operations are encoded in 2 instructions and a +4 offset.
4689// This means some kind of offset is always required and you cannot use
4690// an oop as the offset (done when working on static globals).
4691opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4692                    indIndex, indIndexScale, indIndexScaleOffset);
4693
4694
4695//----------PIPELINE-----------------------------------------------------------
4696// Rules which define the behavior of the target architectures pipeline.
4697pipeline %{
4698
4699//----------ATTRIBUTES---------------------------------------------------------
4700attributes %{
4701  variable_size_instructions;        // Fixed size instructions
4702  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4703  instruction_unit_size = 1;         // An instruction is 1 bytes long
4704  instruction_fetch_unit_size = 16;  // The processor fetches one line
4705  instruction_fetch_units = 1;       // of 16 bytes
4706
4707  // List of nop instructions
4708  nops( MachNop );
4709%}
4710
4711//----------RESOURCES----------------------------------------------------------
4712// Resources are the functional units available to the machine
4713
4714// Generic P2/P3 pipeline
4715// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4716// 3 instructions decoded per cycle.
4717// 2 load/store ops per cycle, 1 branch, 1 FPU,
4718// 2 ALU op, only ALU0 handles mul/div instructions.
4719resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4720           MS0, MS1, MEM = MS0 | MS1,
4721           BR, FPU,
4722           ALU0, ALU1, ALU = ALU0 | ALU1 );
4723
4724//----------PIPELINE DESCRIPTION-----------------------------------------------
4725// Pipeline Description specifies the stages in the machine's pipeline
4726
4727// Generic P2/P3 pipeline
4728pipe_desc(S0, S1, S2, S3, S4, S5);
4729
4730//----------PIPELINE CLASSES---------------------------------------------------
4731// Pipeline Classes describe the stages in which input and output are
4732// referenced by the hardware pipeline.
4733
4734// Naming convention: ialu or fpu
4735// Then: _reg
4736// Then: _reg if there is a 2nd register
4737// Then: _long if it's a pair of instructions implementing a long
4738// Then: _fat if it requires the big decoder
4739//   Or: _mem if it requires the big decoder and a memory unit.
4740
4741// Integer ALU reg operation
4742pipe_class ialu_reg(rRegI dst) %{
4743    single_instruction;
4744    dst    : S4(write);
4745    dst    : S3(read);
4746    DECODE : S0;        // any decoder
4747    ALU    : S3;        // any alu
4748%}
4749
4750// Long ALU reg operation
4751pipe_class ialu_reg_long(eRegL dst) %{
4752    instruction_count(2);
4753    dst    : S4(write);
4754    dst    : S3(read);
4755    DECODE : S0(2);     // any 2 decoders
4756    ALU    : S3(2);     // both alus
4757%}
4758
4759// Integer ALU reg operation using big decoder
4760pipe_class ialu_reg_fat(rRegI dst) %{
4761    single_instruction;
4762    dst    : S4(write);
4763    dst    : S3(read);
4764    D0     : S0;        // big decoder only
4765    ALU    : S3;        // any alu
4766%}
4767
4768// Long ALU reg operation using big decoder
4769pipe_class ialu_reg_long_fat(eRegL dst) %{
4770    instruction_count(2);
4771    dst    : S4(write);
4772    dst    : S3(read);
4773    D0     : S0(2);     // big decoder only; twice
4774    ALU    : S3(2);     // any 2 alus
4775%}
4776
4777// Integer ALU reg-reg operation
4778pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4779    single_instruction;
4780    dst    : S4(write);
4781    src    : S3(read);
4782    DECODE : S0;        // any decoder
4783    ALU    : S3;        // any alu
4784%}
4785
4786// Long ALU reg-reg operation
4787pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4788    instruction_count(2);
4789    dst    : S4(write);
4790    src    : S3(read);
4791    DECODE : S0(2);     // any 2 decoders
4792    ALU    : S3(2);     // both alus
4793%}
4794
4795// Integer ALU reg-reg operation
4796pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4797    single_instruction;
4798    dst    : S4(write);
4799    src    : S3(read);
4800    D0     : S0;        // big decoder only
4801    ALU    : S3;        // any alu
4802%}
4803
4804// Long ALU reg-reg operation
4805pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4806    instruction_count(2);
4807    dst    : S4(write);
4808    src    : S3(read);
4809    D0     : S0(2);     // big decoder only; twice
4810    ALU    : S3(2);     // both alus
4811%}
4812
4813// Integer ALU reg-mem operation
4814pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4815    single_instruction;
4816    dst    : S5(write);
4817    mem    : S3(read);
4818    D0     : S0;        // big decoder only
4819    ALU    : S4;        // any alu
4820    MEM    : S3;        // any mem
4821%}
4822
4823// Long ALU reg-mem operation
4824pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4825    instruction_count(2);
4826    dst    : S5(write);
4827    mem    : S3(read);
4828    D0     : S0(2);     // big decoder only; twice
4829    ALU    : S4(2);     // any 2 alus
4830    MEM    : S3(2);     // both mems
4831%}
4832
4833// Integer mem operation (prefetch)
4834pipe_class ialu_mem(memory mem)
4835%{
4836    single_instruction;
4837    mem    : S3(read);
4838    D0     : S0;        // big decoder only
4839    MEM    : S3;        // any mem
4840%}
4841
4842// Integer Store to Memory
4843pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4844    single_instruction;
4845    mem    : S3(read);
4846    src    : S5(read);
4847    D0     : S0;        // big decoder only
4848    ALU    : S4;        // any alu
4849    MEM    : S3;
4850%}
4851
4852// Long Store to Memory
4853pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4854    instruction_count(2);
4855    mem    : S3(read);
4856    src    : S5(read);
4857    D0     : S0(2);     // big decoder only; twice
4858    ALU    : S4(2);     // any 2 alus
4859    MEM    : S3(2);     // Both mems
4860%}
4861
4862// Integer Store to Memory
4863pipe_class ialu_mem_imm(memory mem) %{
4864    single_instruction;
4865    mem    : S3(read);
4866    D0     : S0;        // big decoder only
4867    ALU    : S4;        // any alu
4868    MEM    : S3;
4869%}
4870
4871// Integer ALU0 reg-reg operation
4872pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4873    single_instruction;
4874    dst    : S4(write);
4875    src    : S3(read);
4876    D0     : S0;        // Big decoder only
4877    ALU0   : S3;        // only alu0
4878%}
4879
4880// Integer ALU0 reg-mem operation
4881pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4882    single_instruction;
4883    dst    : S5(write);
4884    mem    : S3(read);
4885    D0     : S0;        // big decoder only
4886    ALU0   : S4;        // ALU0 only
4887    MEM    : S3;        // any mem
4888%}
4889
4890// Integer ALU reg-reg operation
4891pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4892    single_instruction;
4893    cr     : S4(write);
4894    src1   : S3(read);
4895    src2   : S3(read);
4896    DECODE : S0;        // any decoder
4897    ALU    : S3;        // any alu
4898%}
4899
4900// Integer ALU reg-imm operation
4901pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4902    single_instruction;
4903    cr     : S4(write);
4904    src1   : S3(read);
4905    DECODE : S0;        // any decoder
4906    ALU    : S3;        // any alu
4907%}
4908
4909// Integer ALU reg-mem operation
4910pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4911    single_instruction;
4912    cr     : S4(write);
4913    src1   : S3(read);
4914    src2   : S3(read);
4915    D0     : S0;        // big decoder only
4916    ALU    : S4;        // any alu
4917    MEM    : S3;
4918%}
4919
4920// Conditional move reg-reg
4921pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4922    instruction_count(4);
4923    y      : S4(read);
4924    q      : S3(read);
4925    p      : S3(read);
4926    DECODE : S0(4);     // any decoder
4927%}
4928
4929// Conditional move reg-reg
4930pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4931    single_instruction;
4932    dst    : S4(write);
4933    src    : S3(read);
4934    cr     : S3(read);
4935    DECODE : S0;        // any decoder
4936%}
4937
4938// Conditional move reg-mem
4939pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4940    single_instruction;
4941    dst    : S4(write);
4942    src    : S3(read);
4943    cr     : S3(read);
4944    DECODE : S0;        // any decoder
4945    MEM    : S3;
4946%}
4947
4948// Conditional move reg-reg long
4949pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4950    single_instruction;
4951    dst    : S4(write);
4952    src    : S3(read);
4953    cr     : S3(read);
4954    DECODE : S0(2);     // any 2 decoders
4955%}
4956
4957// Conditional move double reg-reg
4958pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4959    single_instruction;
4960    dst    : S4(write);
4961    src    : S3(read);
4962    cr     : S3(read);
4963    DECODE : S0;        // any decoder
4964%}
4965
4966// Float reg-reg operation
4967pipe_class fpu_reg(regDPR dst) %{
4968    instruction_count(2);
4969    dst    : S3(read);
4970    DECODE : S0(2);     // any 2 decoders
4971    FPU    : S3;
4972%}
4973
4974// Float reg-reg operation
4975pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4976    instruction_count(2);
4977    dst    : S4(write);
4978    src    : S3(read);
4979    DECODE : S0(2);     // any 2 decoders
4980    FPU    : S3;
4981%}
4982
4983// Float reg-reg operation
4984pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4985    instruction_count(3);
4986    dst    : S4(write);
4987    src1   : S3(read);
4988    src2   : S3(read);
4989    DECODE : S0(3);     // any 3 decoders
4990    FPU    : S3(2);
4991%}
4992
4993// Float reg-reg operation
4994pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4995    instruction_count(4);
4996    dst    : S4(write);
4997    src1   : S3(read);
4998    src2   : S3(read);
4999    src3   : S3(read);
5000    DECODE : S0(4);     // any 3 decoders
5001    FPU    : S3(2);
5002%}
5003
5004// Float reg-reg operation
5005pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5006    instruction_count(4);
5007    dst    : S4(write);
5008    src1   : S3(read);
5009    src2   : S3(read);
5010    src3   : S3(read);
5011    DECODE : S1(3);     // any 3 decoders
5012    D0     : S0;        // Big decoder only
5013    FPU    : S3(2);
5014    MEM    : S3;
5015%}
5016
5017// Float reg-mem operation
5018pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5019    instruction_count(2);
5020    dst    : S5(write);
5021    mem    : S3(read);
5022    D0     : S0;        // big decoder only
5023    DECODE : S1;        // any decoder for FPU POP
5024    FPU    : S4;
5025    MEM    : S3;        // any mem
5026%}
5027
5028// Float reg-mem operation
5029pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5030    instruction_count(3);
5031    dst    : S5(write);
5032    src1   : S3(read);
5033    mem    : S3(read);
5034    D0     : S0;        // big decoder only
5035    DECODE : S1(2);     // any decoder for FPU POP
5036    FPU    : S4;
5037    MEM    : S3;        // any mem
5038%}
5039
5040// Float mem-reg operation
5041pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5042    instruction_count(2);
5043    src    : S5(read);
5044    mem    : S3(read);
5045    DECODE : S0;        // any decoder for FPU PUSH
5046    D0     : S1;        // big decoder only
5047    FPU    : S4;
5048    MEM    : S3;        // any mem
5049%}
5050
5051pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5052    instruction_count(3);
5053    src1   : S3(read);
5054    src2   : S3(read);
5055    mem    : S3(read);
5056    DECODE : S0(2);     // any decoder for FPU PUSH
5057    D0     : S1;        // big decoder only
5058    FPU    : S4;
5059    MEM    : S3;        // any mem
5060%}
5061
5062pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5063    instruction_count(3);
5064    src1   : S3(read);
5065    src2   : S3(read);
5066    mem    : S4(read);
5067    DECODE : S0;        // any decoder for FPU PUSH
5068    D0     : S0(2);     // big decoder only
5069    FPU    : S4;
5070    MEM    : S3(2);     // any mem
5071%}
5072
5073pipe_class fpu_mem_mem(memory dst, memory src1) %{
5074    instruction_count(2);
5075    src1   : S3(read);
5076    dst    : S4(read);
5077    D0     : S0(2);     // big decoder only
5078    MEM    : S3(2);     // any mem
5079%}
5080
5081pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5082    instruction_count(3);
5083    src1   : S3(read);
5084    src2   : S3(read);
5085    dst    : S4(read);
5086    D0     : S0(3);     // big decoder only
5087    FPU    : S4;
5088    MEM    : S3(3);     // any mem
5089%}
5090
5091pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5092    instruction_count(3);
5093    src1   : S4(read);
5094    mem    : S4(read);
5095    DECODE : S0;        // any decoder for FPU PUSH
5096    D0     : S0(2);     // big decoder only
5097    FPU    : S4;
5098    MEM    : S3(2);     // any mem
5099%}
5100
5101// Float load constant
5102pipe_class fpu_reg_con(regDPR dst) %{
5103    instruction_count(2);
5104    dst    : S5(write);
5105    D0     : S0;        // big decoder only for the load
5106    DECODE : S1;        // any decoder for FPU POP
5107    FPU    : S4;
5108    MEM    : S3;        // any mem
5109%}
5110
5111// Float load constant
5112pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5113    instruction_count(3);
5114    dst    : S5(write);
5115    src    : S3(read);
5116    D0     : S0;        // big decoder only for the load
5117    DECODE : S1(2);     // any decoder for FPU POP
5118    FPU    : S4;
5119    MEM    : S3;        // any mem
5120%}
5121
5122// UnConditional branch
5123pipe_class pipe_jmp( label labl ) %{
5124    single_instruction;
5125    BR   : S3;
5126%}
5127
5128// Conditional branch
5129pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5130    single_instruction;
5131    cr    : S1(read);
5132    BR    : S3;
5133%}
5134
5135// Allocation idiom
5136pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5137    instruction_count(1); force_serialization;
5138    fixed_latency(6);
5139    heap_ptr : S3(read);
5140    DECODE   : S0(3);
5141    D0       : S2;
5142    MEM      : S3;
5143    ALU      : S3(2);
5144    dst      : S5(write);
5145    BR       : S5;
5146%}
5147
5148// Generic big/slow expanded idiom
5149pipe_class pipe_slow(  ) %{
5150    instruction_count(10); multiple_bundles; force_serialization;
5151    fixed_latency(100);
5152    D0  : S0(2);
5153    MEM : S3(2);
5154%}
5155
5156// The real do-nothing guy
5157pipe_class empty( ) %{
5158    instruction_count(0);
5159%}
5160
5161// Define the class for the Nop node
5162define %{
5163   MachNop = empty;
5164%}
5165
5166%}
5167
5168//----------INSTRUCTIONS-------------------------------------------------------
5169//
5170// match      -- States which machine-independent subtree may be replaced
5171//               by this instruction.
5172// ins_cost   -- The estimated cost of this instruction is used by instruction
5173//               selection to identify a minimum cost tree of machine
5174//               instructions that matches a tree of machine-independent
5175//               instructions.
5176// format     -- A string providing the disassembly for this instruction.
5177//               The value of an instruction's operand may be inserted
5178//               by referring to it with a '$' prefix.
5179// opcode     -- Three instruction opcodes may be provided.  These are referred
5180//               to within an encode class as $primary, $secondary, and $tertiary
5181//               respectively.  The primary opcode is commonly used to
5182//               indicate the type of machine instruction, while secondary
5183//               and tertiary are often used for prefix options or addressing
5184//               modes.
5185// ins_encode -- A list of encode classes with parameters. The encode class
5186//               name must have been defined in an 'enc_class' specification
5187//               in the encode section of the architecture description.
5188
5189//----------BSWAP-Instruction--------------------------------------------------
5190instruct bytes_reverse_int(rRegI dst) %{
5191  match(Set dst (ReverseBytesI dst));
5192
5193  format %{ "BSWAP  $dst" %}
5194  opcode(0x0F, 0xC8);
5195  ins_encode( OpcP, OpcSReg(dst) );
5196  ins_pipe( ialu_reg );
5197%}
5198
5199instruct bytes_reverse_long(eRegL dst) %{
5200  match(Set dst (ReverseBytesL dst));
5201
5202  format %{ "BSWAP  $dst.lo\n\t"
5203            "BSWAP  $dst.hi\n\t"
5204            "XCHG   $dst.lo $dst.hi" %}
5205
5206  ins_cost(125);
5207  ins_encode( bswap_long_bytes(dst) );
5208  ins_pipe( ialu_reg_reg);
5209%}
5210
5211instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5212  match(Set dst (ReverseBytesUS dst));
5213  effect(KILL cr);
5214
5215  format %{ "BSWAP  $dst\n\t"
5216            "SHR    $dst,16\n\t" %}
5217  ins_encode %{
5218    __ bswapl($dst$$Register);
5219    __ shrl($dst$$Register, 16);
5220  %}
5221  ins_pipe( ialu_reg );
5222%}
5223
5224instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5225  match(Set dst (ReverseBytesS dst));
5226  effect(KILL cr);
5227
5228  format %{ "BSWAP  $dst\n\t"
5229            "SAR    $dst,16\n\t" %}
5230  ins_encode %{
5231    __ bswapl($dst$$Register);
5232    __ sarl($dst$$Register, 16);
5233  %}
5234  ins_pipe( ialu_reg );
5235%}
5236
5237
5238//---------- Zeros Count Instructions ------------------------------------------
5239
5240instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5241  predicate(UseCountLeadingZerosInstruction);
5242  match(Set dst (CountLeadingZerosI src));
5243  effect(KILL cr);
5244
5245  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5246  ins_encode %{
5247    __ lzcntl($dst$$Register, $src$$Register);
5248  %}
5249  ins_pipe(ialu_reg);
5250%}
5251
5252instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5253  predicate(!UseCountLeadingZerosInstruction);
5254  match(Set dst (CountLeadingZerosI src));
5255  effect(KILL cr);
5256
5257  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5258            "JNZ    skip\n\t"
5259            "MOV    $dst, -1\n"
5260      "skip:\n\t"
5261            "NEG    $dst\n\t"
5262            "ADD    $dst, 31" %}
5263  ins_encode %{
5264    Register Rdst = $dst$$Register;
5265    Register Rsrc = $src$$Register;
5266    Label skip;
5267    __ bsrl(Rdst, Rsrc);
5268    __ jccb(Assembler::notZero, skip);
5269    __ movl(Rdst, -1);
5270    __ bind(skip);
5271    __ negl(Rdst);
5272    __ addl(Rdst, BitsPerInt - 1);
5273  %}
5274  ins_pipe(ialu_reg);
5275%}
5276
5277instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5278  predicate(UseCountLeadingZerosInstruction);
5279  match(Set dst (CountLeadingZerosL src));
5280  effect(TEMP dst, KILL cr);
5281
5282  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5283            "JNC    done\n\t"
5284            "LZCNT  $dst, $src.lo\n\t"
5285            "ADD    $dst, 32\n"
5286      "done:" %}
5287  ins_encode %{
5288    Register Rdst = $dst$$Register;
5289    Register Rsrc = $src$$Register;
5290    Label done;
5291    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5292    __ jccb(Assembler::carryClear, done);
5293    __ lzcntl(Rdst, Rsrc);
5294    __ addl(Rdst, BitsPerInt);
5295    __ bind(done);
5296  %}
5297  ins_pipe(ialu_reg);
5298%}
5299
5300instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5301  predicate(!UseCountLeadingZerosInstruction);
5302  match(Set dst (CountLeadingZerosL src));
5303  effect(TEMP dst, KILL cr);
5304
5305  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5306            "JZ     msw_is_zero\n\t"
5307            "ADD    $dst, 32\n\t"
5308            "JMP    not_zero\n"
5309      "msw_is_zero:\n\t"
5310            "BSR    $dst, $src.lo\n\t"
5311            "JNZ    not_zero\n\t"
5312            "MOV    $dst, -1\n"
5313      "not_zero:\n\t"
5314            "NEG    $dst\n\t"
5315            "ADD    $dst, 63\n" %}
5316 ins_encode %{
5317    Register Rdst = $dst$$Register;
5318    Register Rsrc = $src$$Register;
5319    Label msw_is_zero;
5320    Label not_zero;
5321    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5322    __ jccb(Assembler::zero, msw_is_zero);
5323    __ addl(Rdst, BitsPerInt);
5324    __ jmpb(not_zero);
5325    __ bind(msw_is_zero);
5326    __ bsrl(Rdst, Rsrc);
5327    __ jccb(Assembler::notZero, not_zero);
5328    __ movl(Rdst, -1);
5329    __ bind(not_zero);
5330    __ negl(Rdst);
5331    __ addl(Rdst, BitsPerLong - 1);
5332  %}
5333  ins_pipe(ialu_reg);
5334%}
5335
5336instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5337  predicate(UseCountTrailingZerosInstruction);
5338  match(Set dst (CountTrailingZerosI src));
5339  effect(KILL cr);
5340
5341  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5342  ins_encode %{
5343    __ tzcntl($dst$$Register, $src$$Register);
5344  %}
5345  ins_pipe(ialu_reg);
5346%}
5347
5348instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5349  predicate(!UseCountTrailingZerosInstruction);
5350  match(Set dst (CountTrailingZerosI src));
5351  effect(KILL cr);
5352
5353  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5354            "JNZ    done\n\t"
5355            "MOV    $dst, 32\n"
5356      "done:" %}
5357  ins_encode %{
5358    Register Rdst = $dst$$Register;
5359    Label done;
5360    __ bsfl(Rdst, $src$$Register);
5361    __ jccb(Assembler::notZero, done);
5362    __ movl(Rdst, BitsPerInt);
5363    __ bind(done);
5364  %}
5365  ins_pipe(ialu_reg);
5366%}
5367
5368instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5369  predicate(UseCountTrailingZerosInstruction);
5370  match(Set dst (CountTrailingZerosL src));
5371  effect(TEMP dst, KILL cr);
5372
5373  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5374            "JNC    done\n\t"
5375            "TZCNT  $dst, $src.hi\n\t"
5376            "ADD    $dst, 32\n"
5377            "done:" %}
5378  ins_encode %{
5379    Register Rdst = $dst$$Register;
5380    Register Rsrc = $src$$Register;
5381    Label done;
5382    __ tzcntl(Rdst, Rsrc);
5383    __ jccb(Assembler::carryClear, done);
5384    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5385    __ addl(Rdst, BitsPerInt);
5386    __ bind(done);
5387  %}
5388  ins_pipe(ialu_reg);
5389%}
5390
5391instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5392  predicate(!UseCountTrailingZerosInstruction);
5393  match(Set dst (CountTrailingZerosL src));
5394  effect(TEMP dst, KILL cr);
5395
5396  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5397            "JNZ    done\n\t"
5398            "BSF    $dst, $src.hi\n\t"
5399            "JNZ    msw_not_zero\n\t"
5400            "MOV    $dst, 32\n"
5401      "msw_not_zero:\n\t"
5402            "ADD    $dst, 32\n"
5403      "done:" %}
5404  ins_encode %{
5405    Register Rdst = $dst$$Register;
5406    Register Rsrc = $src$$Register;
5407    Label msw_not_zero;
5408    Label done;
5409    __ bsfl(Rdst, Rsrc);
5410    __ jccb(Assembler::notZero, done);
5411    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5412    __ jccb(Assembler::notZero, msw_not_zero);
5413    __ movl(Rdst, BitsPerInt);
5414    __ bind(msw_not_zero);
5415    __ addl(Rdst, BitsPerInt);
5416    __ bind(done);
5417  %}
5418  ins_pipe(ialu_reg);
5419%}
5420
5421
5422//---------- Population Count Instructions -------------------------------------
5423
5424instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5425  predicate(UsePopCountInstruction);
5426  match(Set dst (PopCountI src));
5427  effect(KILL cr);
5428
5429  format %{ "POPCNT $dst, $src" %}
5430  ins_encode %{
5431    __ popcntl($dst$$Register, $src$$Register);
5432  %}
5433  ins_pipe(ialu_reg);
5434%}
5435
5436instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5437  predicate(UsePopCountInstruction);
5438  match(Set dst (PopCountI (LoadI mem)));
5439  effect(KILL cr);
5440
5441  format %{ "POPCNT $dst, $mem" %}
5442  ins_encode %{
5443    __ popcntl($dst$$Register, $mem$$Address);
5444  %}
5445  ins_pipe(ialu_reg);
5446%}
5447
5448// Note: Long.bitCount(long) returns an int.
5449instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5450  predicate(UsePopCountInstruction);
5451  match(Set dst (PopCountL src));
5452  effect(KILL cr, TEMP tmp, TEMP dst);
5453
5454  format %{ "POPCNT $dst, $src.lo\n\t"
5455            "POPCNT $tmp, $src.hi\n\t"
5456            "ADD    $dst, $tmp" %}
5457  ins_encode %{
5458    __ popcntl($dst$$Register, $src$$Register);
5459    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5460    __ addl($dst$$Register, $tmp$$Register);
5461  %}
5462  ins_pipe(ialu_reg);
5463%}
5464
5465// Note: Long.bitCount(long) returns an int.
5466instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5467  predicate(UsePopCountInstruction);
5468  match(Set dst (PopCountL (LoadL mem)));
5469  effect(KILL cr, TEMP tmp, TEMP dst);
5470
5471  format %{ "POPCNT $dst, $mem\n\t"
5472            "POPCNT $tmp, $mem+4\n\t"
5473            "ADD    $dst, $tmp" %}
5474  ins_encode %{
5475    //__ popcntl($dst$$Register, $mem$$Address$$first);
5476    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5477    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5478    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5479    __ addl($dst$$Register, $tmp$$Register);
5480  %}
5481  ins_pipe(ialu_reg);
5482%}
5483
5484
5485//----------Load/Store/Move Instructions---------------------------------------
5486//----------Load Instructions--------------------------------------------------
5487// Load Byte (8bit signed)
5488instruct loadB(xRegI dst, memory mem) %{
5489  match(Set dst (LoadB mem));
5490
5491  ins_cost(125);
5492  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5493
5494  ins_encode %{
5495    __ movsbl($dst$$Register, $mem$$Address);
5496  %}
5497
5498  ins_pipe(ialu_reg_mem);
5499%}
5500
5501// Load Byte (8bit signed) into Long Register
5502instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5503  match(Set dst (ConvI2L (LoadB mem)));
5504  effect(KILL cr);
5505
5506  ins_cost(375);
5507  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5508            "MOV    $dst.hi,$dst.lo\n\t"
5509            "SAR    $dst.hi,7" %}
5510
5511  ins_encode %{
5512    __ movsbl($dst$$Register, $mem$$Address);
5513    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5514    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5515  %}
5516
5517  ins_pipe(ialu_reg_mem);
5518%}
5519
5520// Load Unsigned Byte (8bit UNsigned)
5521instruct loadUB(xRegI dst, memory mem) %{
5522  match(Set dst (LoadUB mem));
5523
5524  ins_cost(125);
5525  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5526
5527  ins_encode %{
5528    __ movzbl($dst$$Register, $mem$$Address);
5529  %}
5530
5531  ins_pipe(ialu_reg_mem);
5532%}
5533
5534// Load Unsigned Byte (8 bit UNsigned) into Long Register
5535instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5536  match(Set dst (ConvI2L (LoadUB mem)));
5537  effect(KILL cr);
5538
5539  ins_cost(250);
5540  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5541            "XOR    $dst.hi,$dst.hi" %}
5542
5543  ins_encode %{
5544    Register Rdst = $dst$$Register;
5545    __ movzbl(Rdst, $mem$$Address);
5546    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5547  %}
5548
5549  ins_pipe(ialu_reg_mem);
5550%}
5551
5552// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5553instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5554  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5555  effect(KILL cr);
5556
5557  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5558            "XOR    $dst.hi,$dst.hi\n\t"
5559            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5560  ins_encode %{
5561    Register Rdst = $dst$$Register;
5562    __ movzbl(Rdst, $mem$$Address);
5563    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5565  %}
5566  ins_pipe(ialu_reg_mem);
5567%}
5568
5569// Load Short (16bit signed)
5570instruct loadS(rRegI dst, memory mem) %{
5571  match(Set dst (LoadS mem));
5572
5573  ins_cost(125);
5574  format %{ "MOVSX  $dst,$mem\t# short" %}
5575
5576  ins_encode %{
5577    __ movswl($dst$$Register, $mem$$Address);
5578  %}
5579
5580  ins_pipe(ialu_reg_mem);
5581%}
5582
5583// Load Short (16 bit signed) to Byte (8 bit signed)
5584instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5585  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5586
5587  ins_cost(125);
5588  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5589  ins_encode %{
5590    __ movsbl($dst$$Register, $mem$$Address);
5591  %}
5592  ins_pipe(ialu_reg_mem);
5593%}
5594
5595// Load Short (16bit signed) into Long Register
5596instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5597  match(Set dst (ConvI2L (LoadS mem)));
5598  effect(KILL cr);
5599
5600  ins_cost(375);
5601  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5602            "MOV    $dst.hi,$dst.lo\n\t"
5603            "SAR    $dst.hi,15" %}
5604
5605  ins_encode %{
5606    __ movswl($dst$$Register, $mem$$Address);
5607    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5608    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5609  %}
5610
5611  ins_pipe(ialu_reg_mem);
5612%}
5613
5614// Load Unsigned Short/Char (16bit unsigned)
5615instruct loadUS(rRegI dst, memory mem) %{
5616  match(Set dst (LoadUS mem));
5617
5618  ins_cost(125);
5619  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5620
5621  ins_encode %{
5622    __ movzwl($dst$$Register, $mem$$Address);
5623  %}
5624
5625  ins_pipe(ialu_reg_mem);
5626%}
5627
5628// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5629instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5630  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5631
5632  ins_cost(125);
5633  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5634  ins_encode %{
5635    __ movsbl($dst$$Register, $mem$$Address);
5636  %}
5637  ins_pipe(ialu_reg_mem);
5638%}
5639
5640// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5641instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642  match(Set dst (ConvI2L (LoadUS mem)));
5643  effect(KILL cr);
5644
5645  ins_cost(250);
5646  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5647            "XOR    $dst.hi,$dst.hi" %}
5648
5649  ins_encode %{
5650    __ movzwl($dst$$Register, $mem$$Address);
5651    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5652  %}
5653
5654  ins_pipe(ialu_reg_mem);
5655%}
5656
5657// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5658instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5659  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5660  effect(KILL cr);
5661
5662  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5663            "XOR    $dst.hi,$dst.hi" %}
5664  ins_encode %{
5665    Register Rdst = $dst$$Register;
5666    __ movzbl(Rdst, $mem$$Address);
5667    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5668  %}
5669  ins_pipe(ialu_reg_mem);
5670%}
5671
5672// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5673instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5674  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5675  effect(KILL cr);
5676
5677  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5678            "XOR    $dst.hi,$dst.hi\n\t"
5679            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5680  ins_encode %{
5681    Register Rdst = $dst$$Register;
5682    __ movzwl(Rdst, $mem$$Address);
5683    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5684    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5685  %}
5686  ins_pipe(ialu_reg_mem);
5687%}
5688
5689// Load Integer
5690instruct loadI(rRegI dst, memory mem) %{
5691  match(Set dst (LoadI mem));
5692
5693  ins_cost(125);
5694  format %{ "MOV    $dst,$mem\t# int" %}
5695
5696  ins_encode %{
5697    __ movl($dst$$Register, $mem$$Address);
5698  %}
5699
5700  ins_pipe(ialu_reg_mem);
5701%}
5702
5703// Load Integer (32 bit signed) to Byte (8 bit signed)
5704instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5705  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5706
5707  ins_cost(125);
5708  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5709  ins_encode %{
5710    __ movsbl($dst$$Register, $mem$$Address);
5711  %}
5712  ins_pipe(ialu_reg_mem);
5713%}
5714
5715// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5716instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5717  match(Set dst (AndI (LoadI mem) mask));
5718
5719  ins_cost(125);
5720  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5721  ins_encode %{
5722    __ movzbl($dst$$Register, $mem$$Address);
5723  %}
5724  ins_pipe(ialu_reg_mem);
5725%}
5726
5727// Load Integer (32 bit signed) to Short (16 bit signed)
5728instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5729  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5730
5731  ins_cost(125);
5732  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5733  ins_encode %{
5734    __ movswl($dst$$Register, $mem$$Address);
5735  %}
5736  ins_pipe(ialu_reg_mem);
5737%}
5738
5739// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5740instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5741  match(Set dst (AndI (LoadI mem) mask));
5742
5743  ins_cost(125);
5744  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5745  ins_encode %{
5746    __ movzwl($dst$$Register, $mem$$Address);
5747  %}
5748  ins_pipe(ialu_reg_mem);
5749%}
5750
5751// Load Integer into Long Register
5752instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5753  match(Set dst (ConvI2L (LoadI mem)));
5754  effect(KILL cr);
5755
5756  ins_cost(375);
5757  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5758            "MOV    $dst.hi,$dst.lo\n\t"
5759            "SAR    $dst.hi,31" %}
5760
5761  ins_encode %{
5762    __ movl($dst$$Register, $mem$$Address);
5763    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5764    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5765  %}
5766
5767  ins_pipe(ialu_reg_mem);
5768%}
5769
5770// Load Integer with mask 0xFF into Long Register
5771instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5772  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5773  effect(KILL cr);
5774
5775  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5776            "XOR    $dst.hi,$dst.hi" %}
5777  ins_encode %{
5778    Register Rdst = $dst$$Register;
5779    __ movzbl(Rdst, $mem$$Address);
5780    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5781  %}
5782  ins_pipe(ialu_reg_mem);
5783%}
5784
5785// Load Integer with mask 0xFFFF into Long Register
5786instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5787  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5788  effect(KILL cr);
5789
5790  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5791            "XOR    $dst.hi,$dst.hi" %}
5792  ins_encode %{
5793    Register Rdst = $dst$$Register;
5794    __ movzwl(Rdst, $mem$$Address);
5795    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5796  %}
5797  ins_pipe(ialu_reg_mem);
5798%}
5799
5800// Load Integer with 31-bit mask into Long Register
5801instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5802  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5803  effect(KILL cr);
5804
5805  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5806            "XOR    $dst.hi,$dst.hi\n\t"
5807            "AND    $dst.lo,$mask" %}
5808  ins_encode %{
5809    Register Rdst = $dst$$Register;
5810    __ movl(Rdst, $mem$$Address);
5811    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5812    __ andl(Rdst, $mask$$constant);
5813  %}
5814  ins_pipe(ialu_reg_mem);
5815%}
5816
5817// Load Unsigned Integer into Long Register
5818instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5819  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5820  effect(KILL cr);
5821
5822  ins_cost(250);
5823  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5824            "XOR    $dst.hi,$dst.hi" %}
5825
5826  ins_encode %{
5827    __ movl($dst$$Register, $mem$$Address);
5828    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5829  %}
5830
5831  ins_pipe(ialu_reg_mem);
5832%}
5833
5834// Load Long.  Cannot clobber address while loading, so restrict address
5835// register to ESI
5836instruct loadL(eRegL dst, load_long_memory mem) %{
5837  predicate(!((LoadLNode*)n)->require_atomic_access());
5838  match(Set dst (LoadL mem));
5839
5840  ins_cost(250);
5841  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5842            "MOV    $dst.hi,$mem+4" %}
5843
5844  ins_encode %{
5845    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5846    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5847    __ movl($dst$$Register, Amemlo);
5848    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5849  %}
5850
5851  ins_pipe(ialu_reg_long_mem);
5852%}
5853
5854// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5855// then store it down to the stack and reload on the int
5856// side.
5857instruct loadL_volatile(stackSlotL dst, memory mem) %{
5858  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5859  match(Set dst (LoadL mem));
5860
5861  ins_cost(200);
5862  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5863            "FISTp  $dst" %}
5864  ins_encode(enc_loadL_volatile(mem,dst));
5865  ins_pipe( fpu_reg_mem );
5866%}
5867
5868instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5869  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5870  match(Set dst (LoadL mem));
5871  effect(TEMP tmp);
5872  ins_cost(180);
5873  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5874            "MOVSD  $dst,$tmp" %}
5875  ins_encode %{
5876    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5877    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5878  %}
5879  ins_pipe( pipe_slow );
5880%}
5881
5882instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5883  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5884  match(Set dst (LoadL mem));
5885  effect(TEMP tmp);
5886  ins_cost(160);
5887  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5888            "MOVD   $dst.lo,$tmp\n\t"
5889            "PSRLQ  $tmp,32\n\t"
5890            "MOVD   $dst.hi,$tmp" %}
5891  ins_encode %{
5892    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5893    __ movdl($dst$$Register, $tmp$$XMMRegister);
5894    __ psrlq($tmp$$XMMRegister, 32);
5895    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5896  %}
5897  ins_pipe( pipe_slow );
5898%}
5899
5900// Load Range
5901instruct loadRange(rRegI dst, memory mem) %{
5902  match(Set dst (LoadRange mem));
5903
5904  ins_cost(125);
5905  format %{ "MOV    $dst,$mem" %}
5906  opcode(0x8B);
5907  ins_encode( OpcP, RegMem(dst,mem));
5908  ins_pipe( ialu_reg_mem );
5909%}
5910
5911
5912// Load Pointer
5913instruct loadP(eRegP dst, memory mem) %{
5914  match(Set dst (LoadP mem));
5915
5916  ins_cost(125);
5917  format %{ "MOV    $dst,$mem" %}
5918  opcode(0x8B);
5919  ins_encode( OpcP, RegMem(dst,mem));
5920  ins_pipe( ialu_reg_mem );
5921%}
5922
5923// Load Klass Pointer
5924instruct loadKlass(eRegP dst, memory mem) %{
5925  match(Set dst (LoadKlass mem));
5926
5927  ins_cost(125);
5928  format %{ "MOV    $dst,$mem" %}
5929  opcode(0x8B);
5930  ins_encode( OpcP, RegMem(dst,mem));
5931  ins_pipe( ialu_reg_mem );
5932%}
5933
5934// Load Double
5935instruct loadDPR(regDPR dst, memory mem) %{
5936  predicate(UseSSE<=1);
5937  match(Set dst (LoadD mem));
5938
5939  ins_cost(150);
5940  format %{ "FLD_D  ST,$mem\n\t"
5941            "FSTP   $dst" %}
5942  opcode(0xDD);               /* DD /0 */
5943  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5944              Pop_Reg_DPR(dst) );
5945  ins_pipe( fpu_reg_mem );
5946%}
5947
5948// Load Double to XMM
5949instruct loadD(regD dst, memory mem) %{
5950  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5951  match(Set dst (LoadD mem));
5952  ins_cost(145);
5953  format %{ "MOVSD  $dst,$mem" %}
5954  ins_encode %{
5955    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5956  %}
5957  ins_pipe( pipe_slow );
5958%}
5959
5960instruct loadD_partial(regD dst, memory mem) %{
5961  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5962  match(Set dst (LoadD mem));
5963  ins_cost(145);
5964  format %{ "MOVLPD $dst,$mem" %}
5965  ins_encode %{
5966    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5967  %}
5968  ins_pipe( pipe_slow );
5969%}
5970
5971// Load to XMM register (single-precision floating point)
5972// MOVSS instruction
5973instruct loadF(regF dst, memory mem) %{
5974  predicate(UseSSE>=1);
5975  match(Set dst (LoadF mem));
5976  ins_cost(145);
5977  format %{ "MOVSS  $dst,$mem" %}
5978  ins_encode %{
5979    __ movflt ($dst$$XMMRegister, $mem$$Address);
5980  %}
5981  ins_pipe( pipe_slow );
5982%}
5983
5984// Load Float
5985instruct loadFPR(regFPR dst, memory mem) %{
5986  predicate(UseSSE==0);
5987  match(Set dst (LoadF mem));
5988
5989  ins_cost(150);
5990  format %{ "FLD_S  ST,$mem\n\t"
5991            "FSTP   $dst" %}
5992  opcode(0xD9);               /* D9 /0 */
5993  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5994              Pop_Reg_FPR(dst) );
5995  ins_pipe( fpu_reg_mem );
5996%}
5997
5998// Load Effective Address
5999instruct leaP8(eRegP dst, indOffset8 mem) %{
6000  match(Set dst mem);
6001
6002  ins_cost(110);
6003  format %{ "LEA    $dst,$mem" %}
6004  opcode(0x8D);
6005  ins_encode( OpcP, RegMem(dst,mem));
6006  ins_pipe( ialu_reg_reg_fat );
6007%}
6008
6009instruct leaP32(eRegP dst, indOffset32 mem) %{
6010  match(Set dst mem);
6011
6012  ins_cost(110);
6013  format %{ "LEA    $dst,$mem" %}
6014  opcode(0x8D);
6015  ins_encode( OpcP, RegMem(dst,mem));
6016  ins_pipe( ialu_reg_reg_fat );
6017%}
6018
6019instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6020  match(Set dst mem);
6021
6022  ins_cost(110);
6023  format %{ "LEA    $dst,$mem" %}
6024  opcode(0x8D);
6025  ins_encode( OpcP, RegMem(dst,mem));
6026  ins_pipe( ialu_reg_reg_fat );
6027%}
6028
6029instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6030  match(Set dst mem);
6031
6032  ins_cost(110);
6033  format %{ "LEA    $dst,$mem" %}
6034  opcode(0x8D);
6035  ins_encode( OpcP, RegMem(dst,mem));
6036  ins_pipe( ialu_reg_reg_fat );
6037%}
6038
6039instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6040  match(Set dst mem);
6041
6042  ins_cost(110);
6043  format %{ "LEA    $dst,$mem" %}
6044  opcode(0x8D);
6045  ins_encode( OpcP, RegMem(dst,mem));
6046  ins_pipe( ialu_reg_reg_fat );
6047%}
6048
6049// Load Constant
6050instruct loadConI(rRegI dst, immI src) %{
6051  match(Set dst src);
6052
6053  format %{ "MOV    $dst,$src" %}
6054  ins_encode( LdImmI(dst, src) );
6055  ins_pipe( ialu_reg_fat );
6056%}
6057
6058// Load Constant zero
6059instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6060  match(Set dst src);
6061  effect(KILL cr);
6062
6063  ins_cost(50);
6064  format %{ "XOR    $dst,$dst" %}
6065  opcode(0x33);  /* + rd */
6066  ins_encode( OpcP, RegReg( dst, dst ) );
6067  ins_pipe( ialu_reg );
6068%}
6069
6070instruct loadConP(eRegP dst, immP src) %{
6071  match(Set dst src);
6072
6073  format %{ "MOV    $dst,$src" %}
6074  opcode(0xB8);  /* + rd */
6075  ins_encode( LdImmP(dst, src) );
6076  ins_pipe( ialu_reg_fat );
6077%}
6078
6079instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6080  match(Set dst src);
6081  effect(KILL cr);
6082  ins_cost(200);
6083  format %{ "MOV    $dst.lo,$src.lo\n\t"
6084            "MOV    $dst.hi,$src.hi" %}
6085  opcode(0xB8);
6086  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6087  ins_pipe( ialu_reg_long_fat );
6088%}
6089
6090instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6091  match(Set dst src);
6092  effect(KILL cr);
6093  ins_cost(150);
6094  format %{ "XOR    $dst.lo,$dst.lo\n\t"
6095            "XOR    $dst.hi,$dst.hi" %}
6096  opcode(0x33,0x33);
6097  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6098  ins_pipe( ialu_reg_long );
6099%}
6100
6101// The instruction usage is guarded by predicate in operand immFPR().
6102instruct loadConFPR(regFPR dst, immFPR con) %{
6103  match(Set dst con);
6104  ins_cost(125);
6105  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6106            "FSTP   $dst" %}
6107  ins_encode %{
6108    __ fld_s($constantaddress($con));
6109    __ fstp_d($dst$$reg);
6110  %}
6111  ins_pipe(fpu_reg_con);
6112%}
6113
6114// The instruction usage is guarded by predicate in operand immFPR0().
6115instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6116  match(Set dst con);
6117  ins_cost(125);
6118  format %{ "FLDZ   ST\n\t"
6119            "FSTP   $dst" %}
6120  ins_encode %{
6121    __ fldz();
6122    __ fstp_d($dst$$reg);
6123  %}
6124  ins_pipe(fpu_reg_con);
6125%}
6126
6127// The instruction usage is guarded by predicate in operand immFPR1().
6128instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6129  match(Set dst con);
6130  ins_cost(125);
6131  format %{ "FLD1   ST\n\t"
6132            "FSTP   $dst" %}
6133  ins_encode %{
6134    __ fld1();
6135    __ fstp_d($dst$$reg);
6136  %}
6137  ins_pipe(fpu_reg_con);
6138%}
6139
6140// The instruction usage is guarded by predicate in operand immF().
6141instruct loadConF(regF dst, immF con) %{
6142  match(Set dst con);
6143  ins_cost(125);
6144  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6145  ins_encode %{
6146    __ movflt($dst$$XMMRegister, $constantaddress($con));
6147  %}
6148  ins_pipe(pipe_slow);
6149%}
6150
6151// The instruction usage is guarded by predicate in operand immF0().
6152instruct loadConF0(regF dst, immF0 src) %{
6153  match(Set dst src);
6154  ins_cost(100);
6155  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6156  ins_encode %{
6157    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6158  %}
6159  ins_pipe(pipe_slow);
6160%}
6161
6162// The instruction usage is guarded by predicate in operand immDPR().
6163instruct loadConDPR(regDPR dst, immDPR con) %{
6164  match(Set dst con);
6165  ins_cost(125);
6166
6167  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6168            "FSTP   $dst" %}
6169  ins_encode %{
6170    __ fld_d($constantaddress($con));
6171    __ fstp_d($dst$$reg);
6172  %}
6173  ins_pipe(fpu_reg_con);
6174%}
6175
6176// The instruction usage is guarded by predicate in operand immDPR0().
6177instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6178  match(Set dst con);
6179  ins_cost(125);
6180
6181  format %{ "FLDZ   ST\n\t"
6182            "FSTP   $dst" %}
6183  ins_encode %{
6184    __ fldz();
6185    __ fstp_d($dst$$reg);
6186  %}
6187  ins_pipe(fpu_reg_con);
6188%}
6189
6190// The instruction usage is guarded by predicate in operand immDPR1().
6191instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6192  match(Set dst con);
6193  ins_cost(125);
6194
6195  format %{ "FLD1   ST\n\t"
6196            "FSTP   $dst" %}
6197  ins_encode %{
6198    __ fld1();
6199    __ fstp_d($dst$$reg);
6200  %}
6201  ins_pipe(fpu_reg_con);
6202%}
6203
6204// The instruction usage is guarded by predicate in operand immD().
6205instruct loadConD(regD dst, immD con) %{
6206  match(Set dst con);
6207  ins_cost(125);
6208  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6209  ins_encode %{
6210    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6211  %}
6212  ins_pipe(pipe_slow);
6213%}
6214
6215// The instruction usage is guarded by predicate in operand immD0().
6216instruct loadConD0(regD dst, immD0 src) %{
6217  match(Set dst src);
6218  ins_cost(100);
6219  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6220  ins_encode %{
6221    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6222  %}
6223  ins_pipe( pipe_slow );
6224%}
6225
6226// Load Stack Slot
6227instruct loadSSI(rRegI dst, stackSlotI src) %{
6228  match(Set dst src);
6229  ins_cost(125);
6230
6231  format %{ "MOV    $dst,$src" %}
6232  opcode(0x8B);
6233  ins_encode( OpcP, RegMem(dst,src));
6234  ins_pipe( ialu_reg_mem );
6235%}
6236
6237instruct loadSSL(eRegL dst, stackSlotL src) %{
6238  match(Set dst src);
6239
6240  ins_cost(200);
6241  format %{ "MOV    $dst,$src.lo\n\t"
6242            "MOV    $dst+4,$src.hi" %}
6243  opcode(0x8B, 0x8B);
6244  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6245  ins_pipe( ialu_mem_long_reg );
6246%}
6247
6248// Load Stack Slot
6249instruct loadSSP(eRegP dst, stackSlotP src) %{
6250  match(Set dst src);
6251  ins_cost(125);
6252
6253  format %{ "MOV    $dst,$src" %}
6254  opcode(0x8B);
6255  ins_encode( OpcP, RegMem(dst,src));
6256  ins_pipe( ialu_reg_mem );
6257%}
6258
6259// Load Stack Slot
6260instruct loadSSF(regFPR dst, stackSlotF src) %{
6261  match(Set dst src);
6262  ins_cost(125);
6263
6264  format %{ "FLD_S  $src\n\t"
6265            "FSTP   $dst" %}
6266  opcode(0xD9);               /* D9 /0, FLD m32real */
6267  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6268              Pop_Reg_FPR(dst) );
6269  ins_pipe( fpu_reg_mem );
6270%}
6271
6272// Load Stack Slot
6273instruct loadSSD(regDPR dst, stackSlotD src) %{
6274  match(Set dst src);
6275  ins_cost(125);
6276
6277  format %{ "FLD_D  $src\n\t"
6278            "FSTP   $dst" %}
6279  opcode(0xDD);               /* DD /0, FLD m64real */
6280  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6281              Pop_Reg_DPR(dst) );
6282  ins_pipe( fpu_reg_mem );
6283%}
6284
6285// Prefetch instructions for allocation.
6286// Must be safe to execute with invalid address (cannot fault).
6287
6288instruct prefetchAlloc0( memory mem ) %{
6289  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6290  match(PrefetchAllocation mem);
6291  ins_cost(0);
6292  size(0);
6293  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6294  ins_encode();
6295  ins_pipe(empty);
6296%}
6297
6298instruct prefetchAlloc( memory mem ) %{
6299  predicate(AllocatePrefetchInstr==3);
6300  match( PrefetchAllocation mem );
6301  ins_cost(100);
6302
6303  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6304  ins_encode %{
6305    __ prefetchw($mem$$Address);
6306  %}
6307  ins_pipe(ialu_mem);
6308%}
6309
6310instruct prefetchAllocNTA( memory mem ) %{
6311  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6312  match(PrefetchAllocation mem);
6313  ins_cost(100);
6314
6315  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6316  ins_encode %{
6317    __ prefetchnta($mem$$Address);
6318  %}
6319  ins_pipe(ialu_mem);
6320%}
6321
6322instruct prefetchAllocT0( memory mem ) %{
6323  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6324  match(PrefetchAllocation mem);
6325  ins_cost(100);
6326
6327  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6328  ins_encode %{
6329    __ prefetcht0($mem$$Address);
6330  %}
6331  ins_pipe(ialu_mem);
6332%}
6333
6334instruct prefetchAllocT2( memory mem ) %{
6335  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6336  match(PrefetchAllocation mem);
6337  ins_cost(100);
6338
6339  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6340  ins_encode %{
6341    __ prefetcht2($mem$$Address);
6342  %}
6343  ins_pipe(ialu_mem);
6344%}
6345
6346//----------Store Instructions-------------------------------------------------
6347
6348// Store Byte
6349instruct storeB(memory mem, xRegI src) %{
6350  match(Set mem (StoreB mem src));
6351
6352  ins_cost(125);
6353  format %{ "MOV8   $mem,$src" %}
6354  opcode(0x88);
6355  ins_encode( OpcP, RegMem( src, mem ) );
6356  ins_pipe( ialu_mem_reg );
6357%}
6358
6359// Store Char/Short
6360instruct storeC(memory mem, rRegI src) %{
6361  match(Set mem (StoreC mem src));
6362
6363  ins_cost(125);
6364  format %{ "MOV16  $mem,$src" %}
6365  opcode(0x89, 0x66);
6366  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6367  ins_pipe( ialu_mem_reg );
6368%}
6369
6370// Store Integer
6371instruct storeI(memory mem, rRegI src) %{
6372  match(Set mem (StoreI mem src));
6373
6374  ins_cost(125);
6375  format %{ "MOV    $mem,$src" %}
6376  opcode(0x89);
6377  ins_encode( OpcP, RegMem( src, mem ) );
6378  ins_pipe( ialu_mem_reg );
6379%}
6380
6381// Store Long
6382instruct storeL(long_memory mem, eRegL src) %{
6383  predicate(!((StoreLNode*)n)->require_atomic_access());
6384  match(Set mem (StoreL mem src));
6385
6386  ins_cost(200);
6387  format %{ "MOV    $mem,$src.lo\n\t"
6388            "MOV    $mem+4,$src.hi" %}
6389  opcode(0x89, 0x89);
6390  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6391  ins_pipe( ialu_mem_long_reg );
6392%}
6393
6394// Store Long to Integer
6395instruct storeL2I(memory mem, eRegL src) %{
6396  match(Set mem (StoreI mem (ConvL2I src)));
6397
6398  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6399  ins_encode %{
6400    __ movl($mem$$Address, $src$$Register);
6401  %}
6402  ins_pipe(ialu_mem_reg);
6403%}
6404
6405// Volatile Store Long.  Must be atomic, so move it into
6406// the FP TOS and then do a 64-bit FIST.  Has to probe the
6407// target address before the store (for null-ptr checks)
6408// so the memory operand is used twice in the encoding.
6409instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6410  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6411  match(Set mem (StoreL mem src));
6412  effect( KILL cr );
6413  ins_cost(400);
6414  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6415            "FILD   $src\n\t"
6416            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6417  opcode(0x3B);
6418  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6419  ins_pipe( fpu_reg_mem );
6420%}
6421
6422instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6423  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6424  match(Set mem (StoreL mem src));
6425  effect( TEMP tmp, KILL cr );
6426  ins_cost(380);
6427  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6428            "MOVSD  $tmp,$src\n\t"
6429            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6430  ins_encode %{
6431    __ cmpl(rax, $mem$$Address);
6432    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6433    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6434  %}
6435  ins_pipe( pipe_slow );
6436%}
6437
6438instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6439  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6440  match(Set mem (StoreL mem src));
6441  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6442  ins_cost(360);
6443  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6444            "MOVD   $tmp,$src.lo\n\t"
6445            "MOVD   $tmp2,$src.hi\n\t"
6446            "PUNPCKLDQ $tmp,$tmp2\n\t"
6447            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6448  ins_encode %{
6449    __ cmpl(rax, $mem$$Address);
6450    __ movdl($tmp$$XMMRegister, $src$$Register);
6451    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6452    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6453    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6454  %}
6455  ins_pipe( pipe_slow );
6456%}
6457
6458// Store Pointer; for storing unknown oops and raw pointers
6459instruct storeP(memory mem, anyRegP src) %{
6460  match(Set mem (StoreP mem src));
6461
6462  ins_cost(125);
6463  format %{ "MOV    $mem,$src" %}
6464  opcode(0x89);
6465  ins_encode( OpcP, RegMem( src, mem ) );
6466  ins_pipe( ialu_mem_reg );
6467%}
6468
6469// Store Integer Immediate
6470instruct storeImmI(memory mem, immI src) %{
6471  match(Set mem (StoreI mem src));
6472
6473  ins_cost(150);
6474  format %{ "MOV    $mem,$src" %}
6475  opcode(0xC7);               /* C7 /0 */
6476  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6477  ins_pipe( ialu_mem_imm );
6478%}
6479
6480// Store Short/Char Immediate
6481instruct storeImmI16(memory mem, immI16 src) %{
6482  predicate(UseStoreImmI16);
6483  match(Set mem (StoreC mem src));
6484
6485  ins_cost(150);
6486  format %{ "MOV16  $mem,$src" %}
6487  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6488  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6489  ins_pipe( ialu_mem_imm );
6490%}
6491
6492// Store Pointer Immediate; null pointers or constant oops that do not
6493// need card-mark barriers.
6494instruct storeImmP(memory mem, immP src) %{
6495  match(Set mem (StoreP mem src));
6496
6497  ins_cost(150);
6498  format %{ "MOV    $mem,$src" %}
6499  opcode(0xC7);               /* C7 /0 */
6500  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6501  ins_pipe( ialu_mem_imm );
6502%}
6503
6504// Store Byte Immediate
6505instruct storeImmB(memory mem, immI8 src) %{
6506  match(Set mem (StoreB mem src));
6507
6508  ins_cost(150);
6509  format %{ "MOV8   $mem,$src" %}
6510  opcode(0xC6);               /* C6 /0 */
6511  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6512  ins_pipe( ialu_mem_imm );
6513%}
6514
6515// Store CMS card-mark Immediate
6516instruct storeImmCM(memory mem, immI8 src) %{
6517  match(Set mem (StoreCM mem src));
6518
6519  ins_cost(150);
6520  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6521  opcode(0xC6);               /* C6 /0 */
6522  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6523  ins_pipe( ialu_mem_imm );
6524%}
6525
6526// Store Double
6527instruct storeDPR( memory mem, regDPR1 src) %{
6528  predicate(UseSSE<=1);
6529  match(Set mem (StoreD mem src));
6530
6531  ins_cost(100);
6532  format %{ "FST_D  $mem,$src" %}
6533  opcode(0xDD);       /* DD /2 */
6534  ins_encode( enc_FPR_store(mem,src) );
6535  ins_pipe( fpu_mem_reg );
6536%}
6537
6538// Store double does rounding on x86
6539instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6540  predicate(UseSSE<=1);
6541  match(Set mem (StoreD mem (RoundDouble src)));
6542
6543  ins_cost(100);
6544  format %{ "FST_D  $mem,$src\t# round" %}
6545  opcode(0xDD);       /* DD /2 */
6546  ins_encode( enc_FPR_store(mem,src) );
6547  ins_pipe( fpu_mem_reg );
6548%}
6549
6550// Store XMM register to memory (double-precision floating points)
6551// MOVSD instruction
6552instruct storeD(memory mem, regD src) %{
6553  predicate(UseSSE>=2);
6554  match(Set mem (StoreD mem src));
6555  ins_cost(95);
6556  format %{ "MOVSD  $mem,$src" %}
6557  ins_encode %{
6558    __ movdbl($mem$$Address, $src$$XMMRegister);
6559  %}
6560  ins_pipe( pipe_slow );
6561%}
6562
6563// Load Double
6564instruct MoveD2VL(vlRegD dst, regD src) %{
6565  match(Set dst src);
6566  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6567  ins_encode %{
6568    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6569  %}
6570  ins_pipe( fpu_reg_reg );
6571%}
6572
6573// Load Double
6574instruct MoveVL2D(regD dst, vlRegD src) %{
6575  match(Set dst src);
6576  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6577  ins_encode %{
6578    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6579  %}
6580  ins_pipe( fpu_reg_reg );
6581%}
6582
6583// Store XMM register to memory (single-precision floating point)
6584// MOVSS instruction
6585instruct storeF(memory mem, regF src) %{
6586  predicate(UseSSE>=1);
6587  match(Set mem (StoreF mem src));
6588  ins_cost(95);
6589  format %{ "MOVSS  $mem,$src" %}
6590  ins_encode %{
6591    __ movflt($mem$$Address, $src$$XMMRegister);
6592  %}
6593  ins_pipe( pipe_slow );
6594%}
6595
6596// Load Float
6597instruct MoveF2VL(vlRegF dst, regF src) %{
6598  match(Set dst src);
6599  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6600  ins_encode %{
6601    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6602  %}
6603  ins_pipe( fpu_reg_reg );
6604%}
6605
6606// Load Float
6607instruct MoveVL2F(regF dst, vlRegF src) %{
6608  match(Set dst src);
6609  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6610  ins_encode %{
6611    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6612  %}
6613  ins_pipe( fpu_reg_reg );
6614%}
6615
6616// Store Float
6617instruct storeFPR( memory mem, regFPR1 src) %{
6618  predicate(UseSSE==0);
6619  match(Set mem (StoreF mem src));
6620
6621  ins_cost(100);
6622  format %{ "FST_S  $mem,$src" %}
6623  opcode(0xD9);       /* D9 /2 */
6624  ins_encode( enc_FPR_store(mem,src) );
6625  ins_pipe( fpu_mem_reg );
6626%}
6627
6628// Store Float does rounding on x86
6629instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6630  predicate(UseSSE==0);
6631  match(Set mem (StoreF mem (RoundFloat src)));
6632
6633  ins_cost(100);
6634  format %{ "FST_S  $mem,$src\t# round" %}
6635  opcode(0xD9);       /* D9 /2 */
6636  ins_encode( enc_FPR_store(mem,src) );
6637  ins_pipe( fpu_mem_reg );
6638%}
6639
6640// Store Float does rounding on x86
6641instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6642  predicate(UseSSE<=1);
6643  match(Set mem (StoreF mem (ConvD2F src)));
6644
6645  ins_cost(100);
6646  format %{ "FST_S  $mem,$src\t# D-round" %}
6647  opcode(0xD9);       /* D9 /2 */
6648  ins_encode( enc_FPR_store(mem,src) );
6649  ins_pipe( fpu_mem_reg );
6650%}
6651
6652// Store immediate Float value (it is faster than store from FPU register)
6653// The instruction usage is guarded by predicate in operand immFPR().
6654instruct storeFPR_imm( memory mem, immFPR src) %{
6655  match(Set mem (StoreF mem src));
6656
6657  ins_cost(50);
6658  format %{ "MOV    $mem,$src\t# store float" %}
6659  opcode(0xC7);               /* C7 /0 */
6660  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6661  ins_pipe( ialu_mem_imm );
6662%}
6663
6664// Store immediate Float value (it is faster than store from XMM register)
6665// The instruction usage is guarded by predicate in operand immF().
6666instruct storeF_imm( memory mem, immF src) %{
6667  match(Set mem (StoreF mem src));
6668
6669  ins_cost(50);
6670  format %{ "MOV    $mem,$src\t# store float" %}
6671  opcode(0xC7);               /* C7 /0 */
6672  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6673  ins_pipe( ialu_mem_imm );
6674%}
6675
6676// Store Integer to stack slot
6677instruct storeSSI(stackSlotI dst, rRegI src) %{
6678  match(Set dst src);
6679
6680  ins_cost(100);
6681  format %{ "MOV    $dst,$src" %}
6682  opcode(0x89);
6683  ins_encode( OpcPRegSS( dst, src ) );
6684  ins_pipe( ialu_mem_reg );
6685%}
6686
6687// Store Integer to stack slot
6688instruct storeSSP(stackSlotP dst, eRegP src) %{
6689  match(Set dst src);
6690
6691  ins_cost(100);
6692  format %{ "MOV    $dst,$src" %}
6693  opcode(0x89);
6694  ins_encode( OpcPRegSS( dst, src ) );
6695  ins_pipe( ialu_mem_reg );
6696%}
6697
6698// Store Long to stack slot
6699instruct storeSSL(stackSlotL dst, eRegL src) %{
6700  match(Set dst src);
6701
6702  ins_cost(200);
6703  format %{ "MOV    $dst,$src.lo\n\t"
6704            "MOV    $dst+4,$src.hi" %}
6705  opcode(0x89, 0x89);
6706  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6707  ins_pipe( ialu_mem_long_reg );
6708%}
6709
6710//----------MemBar Instructions-----------------------------------------------
6711// Memory barrier flavors
6712
6713instruct membar_acquire() %{
6714  match(MemBarAcquire);
6715  match(LoadFence);
6716  ins_cost(400);
6717
6718  size(0);
6719  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6720  ins_encode();
6721  ins_pipe(empty);
6722%}
6723
6724instruct membar_acquire_lock() %{
6725  match(MemBarAcquireLock);
6726  ins_cost(0);
6727
6728  size(0);
6729  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6730  ins_encode( );
6731  ins_pipe(empty);
6732%}
6733
6734instruct membar_release() %{
6735  match(MemBarRelease);
6736  match(StoreFence);
6737  ins_cost(400);
6738
6739  size(0);
6740  format %{ "MEMBAR-release ! (empty encoding)" %}
6741  ins_encode( );
6742  ins_pipe(empty);
6743%}
6744
6745instruct membar_release_lock() %{
6746  match(MemBarReleaseLock);
6747  ins_cost(0);
6748
6749  size(0);
6750  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6751  ins_encode( );
6752  ins_pipe(empty);
6753%}
6754
6755instruct membar_volatile(eFlagsReg cr) %{
6756  match(MemBarVolatile);
6757  effect(KILL cr);
6758  ins_cost(400);
6759
6760  format %{
6761    $$template
6762    $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6763  %}
6764  ins_encode %{
6765    __ membar(Assembler::StoreLoad);
6766  %}
6767  ins_pipe(pipe_slow);
6768%}
6769
6770instruct unnecessary_membar_volatile() %{
6771  match(MemBarVolatile);
6772  predicate(Matcher::post_store_load_barrier(n));
6773  ins_cost(0);
6774
6775  size(0);
6776  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6777  ins_encode( );
6778  ins_pipe(empty);
6779%}
6780
6781instruct membar_storestore() %{
6782  match(MemBarStoreStore);
6783  ins_cost(0);
6784
6785  size(0);
6786  format %{ "MEMBAR-storestore (empty encoding)" %}
6787  ins_encode( );
6788  ins_pipe(empty);
6789%}
6790
6791//----------Move Instructions--------------------------------------------------
6792instruct castX2P(eAXRegP dst, eAXRegI src) %{
6793  match(Set dst (CastX2P src));
6794  format %{ "# X2P  $dst, $src" %}
6795  ins_encode( /*empty encoding*/ );
6796  ins_cost(0);
6797  ins_pipe(empty);
6798%}
6799
6800instruct castP2X(rRegI dst, eRegP src ) %{
6801  match(Set dst (CastP2X src));
6802  ins_cost(50);
6803  format %{ "MOV    $dst, $src\t# CastP2X" %}
6804  ins_encode( enc_Copy( dst, src) );
6805  ins_pipe( ialu_reg_reg );
6806%}
6807
6808//----------Conditional Move---------------------------------------------------
6809// Conditional move
6810instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6811  predicate(!VM_Version::supports_cmov() );
6812  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6813  ins_cost(200);
6814  format %{ "J$cop,us skip\t# signed cmove\n\t"
6815            "MOV    $dst,$src\n"
6816      "skip:" %}
6817  ins_encode %{
6818    Label Lskip;
6819    // Invert sense of branch from sense of CMOV
6820    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6821    __ movl($dst$$Register, $src$$Register);
6822    __ bind(Lskip);
6823  %}
6824  ins_pipe( pipe_cmov_reg );
6825%}
6826
6827instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6828  predicate(!VM_Version::supports_cmov() );
6829  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6830  ins_cost(200);
6831  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6832            "MOV    $dst,$src\n"
6833      "skip:" %}
6834  ins_encode %{
6835    Label Lskip;
6836    // Invert sense of branch from sense of CMOV
6837    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6838    __ movl($dst$$Register, $src$$Register);
6839    __ bind(Lskip);
6840  %}
6841  ins_pipe( pipe_cmov_reg );
6842%}
6843
6844instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6845  predicate(VM_Version::supports_cmov() );
6846  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6847  ins_cost(200);
6848  format %{ "CMOV$cop $dst,$src" %}
6849  opcode(0x0F,0x40);
6850  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6851  ins_pipe( pipe_cmov_reg );
6852%}
6853
6854instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6855  predicate(VM_Version::supports_cmov() );
6856  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6857  ins_cost(200);
6858  format %{ "CMOV$cop $dst,$src" %}
6859  opcode(0x0F,0x40);
6860  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6861  ins_pipe( pipe_cmov_reg );
6862%}
6863
6864instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6865  predicate(VM_Version::supports_cmov() );
6866  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6867  ins_cost(200);
6868  expand %{
6869    cmovI_regU(cop, cr, dst, src);
6870  %}
6871%}
6872
6873// Conditional move
6874instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6875  predicate(VM_Version::supports_cmov() );
6876  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6877  ins_cost(250);
6878  format %{ "CMOV$cop $dst,$src" %}
6879  opcode(0x0F,0x40);
6880  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6881  ins_pipe( pipe_cmov_mem );
6882%}
6883
6884// Conditional move
6885instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6886  predicate(VM_Version::supports_cmov() );
6887  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6888  ins_cost(250);
6889  format %{ "CMOV$cop $dst,$src" %}
6890  opcode(0x0F,0x40);
6891  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6892  ins_pipe( pipe_cmov_mem );
6893%}
6894
6895instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6896  predicate(VM_Version::supports_cmov() );
6897  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6898  ins_cost(250);
6899  expand %{
6900    cmovI_memU(cop, cr, dst, src);
6901  %}
6902%}
6903
6904// Conditional move
6905instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6906  predicate(VM_Version::supports_cmov() );
6907  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6908  ins_cost(200);
6909  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6910  opcode(0x0F,0x40);
6911  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6912  ins_pipe( pipe_cmov_reg );
6913%}
6914
6915// Conditional move (non-P6 version)
6916// Note:  a CMoveP is generated for  stubs and native wrappers
6917//        regardless of whether we are on a P6, so we
6918//        emulate a cmov here
6919instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6920  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6921  ins_cost(300);
6922  format %{ "Jn$cop   skip\n\t"
6923          "MOV    $dst,$src\t# pointer\n"
6924      "skip:" %}
6925  opcode(0x8b);
6926  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6927  ins_pipe( pipe_cmov_reg );
6928%}
6929
6930// Conditional move
6931instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6932  predicate(VM_Version::supports_cmov() );
6933  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6934  ins_cost(200);
6935  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6936  opcode(0x0F,0x40);
6937  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6938  ins_pipe( pipe_cmov_reg );
6939%}
6940
6941instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6942  predicate(VM_Version::supports_cmov() );
6943  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6944  ins_cost(200);
6945  expand %{
6946    cmovP_regU(cop, cr, dst, src);
6947  %}
6948%}
6949
6950// DISABLED: Requires the ADLC to emit a bottom_type call that
6951// correctly meets the two pointer arguments; one is an incoming
6952// register but the other is a memory operand.  ALSO appears to
6953// be buggy with implicit null checks.
6954//
6955//// Conditional move
6956//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6957//  predicate(VM_Version::supports_cmov() );
6958//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6959//  ins_cost(250);
6960//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6961//  opcode(0x0F,0x40);
6962//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6963//  ins_pipe( pipe_cmov_mem );
6964//%}
6965//
6966//// Conditional move
6967//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6968//  predicate(VM_Version::supports_cmov() );
6969//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6970//  ins_cost(250);
6971//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6972//  opcode(0x0F,0x40);
6973//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6974//  ins_pipe( pipe_cmov_mem );
6975//%}
6976
6977// Conditional move
6978instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6979  predicate(UseSSE<=1);
6980  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6981  ins_cost(200);
6982  format %{ "FCMOV$cop $dst,$src\t# double" %}
6983  opcode(0xDA);
6984  ins_encode( enc_cmov_dpr(cop,src) );
6985  ins_pipe( pipe_cmovDPR_reg );
6986%}
6987
6988// Conditional move
6989instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6990  predicate(UseSSE==0);
6991  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6992  ins_cost(200);
6993  format %{ "FCMOV$cop $dst,$src\t# float" %}
6994  opcode(0xDA);
6995  ins_encode( enc_cmov_dpr(cop,src) );
6996  ins_pipe( pipe_cmovDPR_reg );
6997%}
6998
6999// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7000instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7001  predicate(UseSSE<=1);
7002  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7003  ins_cost(200);
7004  format %{ "Jn$cop   skip\n\t"
7005            "MOV    $dst,$src\t# double\n"
7006      "skip:" %}
7007  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7008  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7009  ins_pipe( pipe_cmovDPR_reg );
7010%}
7011
7012// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7013instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7014  predicate(UseSSE==0);
7015  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7016  ins_cost(200);
7017  format %{ "Jn$cop    skip\n\t"
7018            "MOV    $dst,$src\t# float\n"
7019      "skip:" %}
7020  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7021  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7022  ins_pipe( pipe_cmovDPR_reg );
7023%}
7024
7025// No CMOVE with SSE/SSE2
7026instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7027  predicate (UseSSE>=1);
7028  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7029  ins_cost(200);
7030  format %{ "Jn$cop   skip\n\t"
7031            "MOVSS  $dst,$src\t# float\n"
7032      "skip:" %}
7033  ins_encode %{
7034    Label skip;
7035    // Invert sense of branch from sense of CMOV
7036    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7037    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7038    __ bind(skip);
7039  %}
7040  ins_pipe( pipe_slow );
7041%}
7042
7043// No CMOVE with SSE/SSE2
7044instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7045  predicate (UseSSE>=2);
7046  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7047  ins_cost(200);
7048  format %{ "Jn$cop   skip\n\t"
7049            "MOVSD  $dst,$src\t# float\n"
7050      "skip:" %}
7051  ins_encode %{
7052    Label skip;
7053    // Invert sense of branch from sense of CMOV
7054    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7055    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7056    __ bind(skip);
7057  %}
7058  ins_pipe( pipe_slow );
7059%}
7060
7061// unsigned version
7062instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7063  predicate (UseSSE>=1);
7064  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7065  ins_cost(200);
7066  format %{ "Jn$cop   skip\n\t"
7067            "MOVSS  $dst,$src\t# float\n"
7068      "skip:" %}
7069  ins_encode %{
7070    Label skip;
7071    // Invert sense of branch from sense of CMOV
7072    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7073    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7074    __ bind(skip);
7075  %}
7076  ins_pipe( pipe_slow );
7077%}
7078
7079instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7080  predicate (UseSSE>=1);
7081  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7082  ins_cost(200);
7083  expand %{
7084    fcmovF_regU(cop, cr, dst, src);
7085  %}
7086%}
7087
7088// unsigned version
7089instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7090  predicate (UseSSE>=2);
7091  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7092  ins_cost(200);
7093  format %{ "Jn$cop   skip\n\t"
7094            "MOVSD  $dst,$src\t# float\n"
7095      "skip:" %}
7096  ins_encode %{
7097    Label skip;
7098    // Invert sense of branch from sense of CMOV
7099    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7100    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7101    __ bind(skip);
7102  %}
7103  ins_pipe( pipe_slow );
7104%}
7105
7106instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7107  predicate (UseSSE>=2);
7108  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7109  ins_cost(200);
7110  expand %{
7111    fcmovD_regU(cop, cr, dst, src);
7112  %}
7113%}
7114
7115instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7116  predicate(VM_Version::supports_cmov() );
7117  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7118  ins_cost(200);
7119  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7120            "CMOV$cop $dst.hi,$src.hi" %}
7121  opcode(0x0F,0x40);
7122  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7123  ins_pipe( pipe_cmov_reg_long );
7124%}
7125
7126instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7127  predicate(VM_Version::supports_cmov() );
7128  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7129  ins_cost(200);
7130  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7131            "CMOV$cop $dst.hi,$src.hi" %}
7132  opcode(0x0F,0x40);
7133  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7134  ins_pipe( pipe_cmov_reg_long );
7135%}
7136
7137instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7138  predicate(VM_Version::supports_cmov() );
7139  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7140  ins_cost(200);
7141  expand %{
7142    cmovL_regU(cop, cr, dst, src);
7143  %}
7144%}
7145
7146//----------Arithmetic Instructions--------------------------------------------
7147//----------Addition Instructions----------------------------------------------
7148
7149// Integer Addition Instructions
7150instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7151  match(Set dst (AddI dst src));
7152  effect(KILL cr);
7153
7154  size(2);
7155  format %{ "ADD    $dst,$src" %}
7156  opcode(0x03);
7157  ins_encode( OpcP, RegReg( dst, src) );
7158  ins_pipe( ialu_reg_reg );
7159%}
7160
7161instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7162  match(Set dst (AddI dst src));
7163  effect(KILL cr);
7164
7165  format %{ "ADD    $dst,$src" %}
7166  opcode(0x81, 0x00); /* /0 id */
7167  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7168  ins_pipe( ialu_reg );
7169%}
7170
7171instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7172  predicate(UseIncDec);
7173  match(Set dst (AddI dst src));
7174  effect(KILL cr);
7175
7176  size(1);
7177  format %{ "INC    $dst" %}
7178  opcode(0x40); /*  */
7179  ins_encode( Opc_plus( primary, dst ) );
7180  ins_pipe( ialu_reg );
7181%}
7182
7183instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7184  match(Set dst (AddI src0 src1));
7185  ins_cost(110);
7186
7187  format %{ "LEA    $dst,[$src0 + $src1]" %}
7188  opcode(0x8D); /* 0x8D /r */
7189  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7190  ins_pipe( ialu_reg_reg );
7191%}
7192
7193instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7194  match(Set dst (AddP src0 src1));
7195  ins_cost(110);
7196
7197  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7198  opcode(0x8D); /* 0x8D /r */
7199  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7200  ins_pipe( ialu_reg_reg );
7201%}
7202
7203instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7204  predicate(UseIncDec);
7205  match(Set dst (AddI dst src));
7206  effect(KILL cr);
7207
7208  size(1);
7209  format %{ "DEC    $dst" %}
7210  opcode(0x48); /*  */
7211  ins_encode( Opc_plus( primary, dst ) );
7212  ins_pipe( ialu_reg );
7213%}
7214
7215instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7216  match(Set dst (AddP dst src));
7217  effect(KILL cr);
7218
7219  size(2);
7220  format %{ "ADD    $dst,$src" %}
7221  opcode(0x03);
7222  ins_encode( OpcP, RegReg( dst, src) );
7223  ins_pipe( ialu_reg_reg );
7224%}
7225
7226instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7227  match(Set dst (AddP dst src));
7228  effect(KILL cr);
7229
7230  format %{ "ADD    $dst,$src" %}
7231  opcode(0x81,0x00); /* Opcode 81 /0 id */
7232  // ins_encode( RegImm( dst, src) );
7233  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7234  ins_pipe( ialu_reg );
7235%}
7236
7237instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7238  match(Set dst (AddI dst (LoadI src)));
7239  effect(KILL cr);
7240
7241  ins_cost(125);
7242  format %{ "ADD    $dst,$src" %}
7243  opcode(0x03);
7244  ins_encode( OpcP, RegMem( dst, src) );
7245  ins_pipe( ialu_reg_mem );
7246%}
7247
7248instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7249  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7250  effect(KILL cr);
7251
7252  ins_cost(150);
7253  format %{ "ADD    $dst,$src" %}
7254  opcode(0x01);  /* Opcode 01 /r */
7255  ins_encode( OpcP, RegMem( src, dst ) );
7256  ins_pipe( ialu_mem_reg );
7257%}
7258
7259// Add Memory with Immediate
7260instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7261  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7262  effect(KILL cr);
7263
7264  ins_cost(125);
7265  format %{ "ADD    $dst,$src" %}
7266  opcode(0x81);               /* Opcode 81 /0 id */
7267  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7268  ins_pipe( ialu_mem_imm );
7269%}
7270
7271instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7272  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7273  effect(KILL cr);
7274
7275  ins_cost(125);
7276  format %{ "INC    $dst" %}
7277  opcode(0xFF);               /* Opcode FF /0 */
7278  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7279  ins_pipe( ialu_mem_imm );
7280%}
7281
7282instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7283  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7284  effect(KILL cr);
7285
7286  ins_cost(125);
7287  format %{ "DEC    $dst" %}
7288  opcode(0xFF);               /* Opcode FF /1 */
7289  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7290  ins_pipe( ialu_mem_imm );
7291%}
7292
7293
7294instruct checkCastPP( eRegP dst ) %{
7295  match(Set dst (CheckCastPP dst));
7296
7297  size(0);
7298  format %{ "#checkcastPP of $dst" %}
7299  ins_encode( /*empty encoding*/ );
7300  ins_pipe( empty );
7301%}
7302
7303instruct castPP( eRegP dst ) %{
7304  match(Set dst (CastPP dst));
7305  format %{ "#castPP of $dst" %}
7306  ins_encode( /*empty encoding*/ );
7307  ins_pipe( empty );
7308%}
7309
7310instruct castII( rRegI dst ) %{
7311  match(Set dst (CastII dst));
7312  format %{ "#castII of $dst" %}
7313  ins_encode( /*empty encoding*/ );
7314  ins_cost(0);
7315  ins_pipe( empty );
7316%}
7317
7318
7319// Load-locked - same as a regular pointer load when used with compare-swap
7320instruct loadPLocked(eRegP dst, memory mem) %{
7321  match(Set dst (LoadPLocked mem));
7322
7323  ins_cost(125);
7324  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7325  opcode(0x8B);
7326  ins_encode( OpcP, RegMem(dst,mem));
7327  ins_pipe( ialu_reg_mem );
7328%}
7329
7330// Conditional-store of the updated heap-top.
7331// Used during allocation of the shared heap.
7332// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7333instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7334  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7335  // EAX is killed if there is contention, but then it's also unused.
7336  // In the common case of no contention, EAX holds the new oop address.
7337  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7338  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7339  ins_pipe( pipe_cmpxchg );
7340%}
7341
7342// Conditional-store of an int value.
7343// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7344instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7345  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7346  effect(KILL oldval);
7347  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7348  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7349  ins_pipe( pipe_cmpxchg );
7350%}
7351
7352// Conditional-store of a long value.
7353// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7354instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7355  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7356  effect(KILL oldval);
7357  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7358            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7359            "XCHG   EBX,ECX"
7360  %}
7361  ins_encode %{
7362    // Note: we need to swap rbx, and rcx before and after the
7363    //       cmpxchg8 instruction because the instruction uses
7364    //       rcx as the high order word of the new value to store but
7365    //       our register encoding uses rbx.
7366    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7367    __ lock();
7368    __ cmpxchg8($mem$$Address);
7369    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7370  %}
7371  ins_pipe( pipe_cmpxchg );
7372%}
7373
7374// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7375
7376instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7377  predicate(VM_Version::supports_cx8());
7378  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7379  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7380  effect(KILL cr, KILL oldval);
7381  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7382            "MOV    $res,0\n\t"
7383            "JNE,s  fail\n\t"
7384            "MOV    $res,1\n"
7385          "fail:" %}
7386  ins_encode( enc_cmpxchg8(mem_ptr),
7387              enc_flags_ne_to_boolean(res) );
7388  ins_pipe( pipe_cmpxchg );
7389%}
7390
7391instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7392  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7393  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7394  effect(KILL cr, KILL oldval);
7395  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7396            "MOV    $res,0\n\t"
7397            "JNE,s  fail\n\t"
7398            "MOV    $res,1\n"
7399          "fail:" %}
7400  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7401  ins_pipe( pipe_cmpxchg );
7402%}
7403
7404instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7405  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7406  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7407  effect(KILL cr, KILL oldval);
7408  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7409            "MOV    $res,0\n\t"
7410            "JNE,s  fail\n\t"
7411            "MOV    $res,1\n"
7412          "fail:" %}
7413  ins_encode( enc_cmpxchgb(mem_ptr),
7414              enc_flags_ne_to_boolean(res) );
7415  ins_pipe( pipe_cmpxchg );
7416%}
7417
7418instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7419  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7420  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7421  effect(KILL cr, KILL oldval);
7422  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7423            "MOV    $res,0\n\t"
7424            "JNE,s  fail\n\t"
7425            "MOV    $res,1\n"
7426          "fail:" %}
7427  ins_encode( enc_cmpxchgw(mem_ptr),
7428              enc_flags_ne_to_boolean(res) );
7429  ins_pipe( pipe_cmpxchg );
7430%}
7431
7432instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7433  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7434  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7435  effect(KILL cr, KILL oldval);
7436  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7437            "MOV    $res,0\n\t"
7438            "JNE,s  fail\n\t"
7439            "MOV    $res,1\n"
7440          "fail:" %}
7441  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7442  ins_pipe( pipe_cmpxchg );
7443%}
7444
7445instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7446  predicate(VM_Version::supports_cx8());
7447  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7448  effect(KILL cr);
7449  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7450  ins_encode( enc_cmpxchg8(mem_ptr) );
7451  ins_pipe( pipe_cmpxchg );
7452%}
7453
7454instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7455  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7456  effect(KILL cr);
7457  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7458  ins_encode( enc_cmpxchg(mem_ptr) );
7459  ins_pipe( pipe_cmpxchg );
7460%}
7461
7462instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7463  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7464  effect(KILL cr);
7465  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7466  ins_encode( enc_cmpxchgb(mem_ptr) );
7467  ins_pipe( pipe_cmpxchg );
7468%}
7469
7470instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7471  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7472  effect(KILL cr);
7473  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7474  ins_encode( enc_cmpxchgw(mem_ptr) );
7475  ins_pipe( pipe_cmpxchg );
7476%}
7477
7478instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7479  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7480  effect(KILL cr);
7481  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7482  ins_encode( enc_cmpxchg(mem_ptr) );
7483  ins_pipe( pipe_cmpxchg );
7484%}
7485
7486instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7487  predicate(n->as_LoadStore()->result_not_used());
7488  match(Set dummy (GetAndAddB mem add));
7489  effect(KILL cr);
7490  format %{ "ADDB  [$mem],$add" %}
7491  ins_encode %{
7492    __ lock();
7493    __ addb($mem$$Address, $add$$constant);
7494  %}
7495  ins_pipe( pipe_cmpxchg );
7496%}
7497
7498// Important to match to xRegI: only 8-bit regs.
7499instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7500  match(Set newval (GetAndAddB mem newval));
7501  effect(KILL cr);
7502  format %{ "XADDB  [$mem],$newval" %}
7503  ins_encode %{
7504    __ lock();
7505    __ xaddb($mem$$Address, $newval$$Register);
7506  %}
7507  ins_pipe( pipe_cmpxchg );
7508%}
7509
7510instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7511  predicate(n->as_LoadStore()->result_not_used());
7512  match(Set dummy (GetAndAddS mem add));
7513  effect(KILL cr);
7514  format %{ "ADDS  [$mem],$add" %}
7515  ins_encode %{
7516    __ lock();
7517    __ addw($mem$$Address, $add$$constant);
7518  %}
7519  ins_pipe( pipe_cmpxchg );
7520%}
7521
7522instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7523  match(Set newval (GetAndAddS mem newval));
7524  effect(KILL cr);
7525  format %{ "XADDS  [$mem],$newval" %}
7526  ins_encode %{
7527    __ lock();
7528    __ xaddw($mem$$Address, $newval$$Register);
7529  %}
7530  ins_pipe( pipe_cmpxchg );
7531%}
7532
7533instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7534  predicate(n->as_LoadStore()->result_not_used());
7535  match(Set dummy (GetAndAddI mem add));
7536  effect(KILL cr);
7537  format %{ "ADDL  [$mem],$add" %}
7538  ins_encode %{
7539    __ lock();
7540    __ addl($mem$$Address, $add$$constant);
7541  %}
7542  ins_pipe( pipe_cmpxchg );
7543%}
7544
7545instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7546  match(Set newval (GetAndAddI mem newval));
7547  effect(KILL cr);
7548  format %{ "XADDL  [$mem],$newval" %}
7549  ins_encode %{
7550    __ lock();
7551    __ xaddl($mem$$Address, $newval$$Register);
7552  %}
7553  ins_pipe( pipe_cmpxchg );
7554%}
7555
7556// Important to match to xRegI: only 8-bit regs.
7557instruct xchgB( memory mem, xRegI newval) %{
7558  match(Set newval (GetAndSetB mem newval));
7559  format %{ "XCHGB  $newval,[$mem]" %}
7560  ins_encode %{
7561    __ xchgb($newval$$Register, $mem$$Address);
7562  %}
7563  ins_pipe( pipe_cmpxchg );
7564%}
7565
7566instruct xchgS( memory mem, rRegI newval) %{
7567  match(Set newval (GetAndSetS mem newval));
7568  format %{ "XCHGW  $newval,[$mem]" %}
7569  ins_encode %{
7570    __ xchgw($newval$$Register, $mem$$Address);
7571  %}
7572  ins_pipe( pipe_cmpxchg );
7573%}
7574
7575instruct xchgI( memory mem, rRegI newval) %{
7576  match(Set newval (GetAndSetI mem newval));
7577  format %{ "XCHGL  $newval,[$mem]" %}
7578  ins_encode %{
7579    __ xchgl($newval$$Register, $mem$$Address);
7580  %}
7581  ins_pipe( pipe_cmpxchg );
7582%}
7583
7584instruct xchgP( memory mem, pRegP newval) %{
7585  match(Set newval (GetAndSetP mem newval));
7586  format %{ "XCHGL  $newval,[$mem]" %}
7587  ins_encode %{
7588    __ xchgl($newval$$Register, $mem$$Address);
7589  %}
7590  ins_pipe( pipe_cmpxchg );
7591%}
7592
7593//----------Subtraction Instructions-------------------------------------------
7594
7595// Integer Subtraction Instructions
7596instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7597  match(Set dst (SubI dst src));
7598  effect(KILL cr);
7599
7600  size(2);
7601  format %{ "SUB    $dst,$src" %}
7602  opcode(0x2B);
7603  ins_encode( OpcP, RegReg( dst, src) );
7604  ins_pipe( ialu_reg_reg );
7605%}
7606
7607instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7608  match(Set dst (SubI dst src));
7609  effect(KILL cr);
7610
7611  format %{ "SUB    $dst,$src" %}
7612  opcode(0x81,0x05);  /* Opcode 81 /5 */
7613  // ins_encode( RegImm( dst, src) );
7614  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7615  ins_pipe( ialu_reg );
7616%}
7617
7618instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7619  match(Set dst (SubI dst (LoadI src)));
7620  effect(KILL cr);
7621
7622  ins_cost(125);
7623  format %{ "SUB    $dst,$src" %}
7624  opcode(0x2B);
7625  ins_encode( OpcP, RegMem( dst, src) );
7626  ins_pipe( ialu_reg_mem );
7627%}
7628
7629instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7630  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7631  effect(KILL cr);
7632
7633  ins_cost(150);
7634  format %{ "SUB    $dst,$src" %}
7635  opcode(0x29);  /* Opcode 29 /r */
7636  ins_encode( OpcP, RegMem( src, dst ) );
7637  ins_pipe( ialu_mem_reg );
7638%}
7639
7640// Subtract from a pointer
7641instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7642  match(Set dst (AddP dst (SubI zero src)));
7643  effect(KILL cr);
7644
7645  size(2);
7646  format %{ "SUB    $dst,$src" %}
7647  opcode(0x2B);
7648  ins_encode( OpcP, RegReg( dst, src) );
7649  ins_pipe( ialu_reg_reg );
7650%}
7651
7652instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7653  match(Set dst (SubI zero dst));
7654  effect(KILL cr);
7655
7656  size(2);
7657  format %{ "NEG    $dst" %}
7658  opcode(0xF7,0x03);  // Opcode F7 /3
7659  ins_encode( OpcP, RegOpc( dst ) );
7660  ins_pipe( ialu_reg );
7661%}
7662
7663//----------Multiplication/Division Instructions-------------------------------
7664// Integer Multiplication Instructions
7665// Multiply Register
7666instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7667  match(Set dst (MulI dst src));
7668  effect(KILL cr);
7669
7670  size(3);
7671  ins_cost(300);
7672  format %{ "IMUL   $dst,$src" %}
7673  opcode(0xAF, 0x0F);
7674  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7675  ins_pipe( ialu_reg_reg_alu0 );
7676%}
7677
7678// Multiply 32-bit Immediate
7679instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7680  match(Set dst (MulI src imm));
7681  effect(KILL cr);
7682
7683  ins_cost(300);
7684  format %{ "IMUL   $dst,$src,$imm" %}
7685  opcode(0x69);  /* 69 /r id */
7686  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7687  ins_pipe( ialu_reg_reg_alu0 );
7688%}
7689
7690instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7691  match(Set dst src);
7692  effect(KILL cr);
7693
7694  // Note that this is artificially increased to make it more expensive than loadConL
7695  ins_cost(250);
7696  format %{ "MOV    EAX,$src\t// low word only" %}
7697  opcode(0xB8);
7698  ins_encode( LdImmL_Lo(dst, src) );
7699  ins_pipe( ialu_reg_fat );
7700%}
7701
7702// Multiply by 32-bit Immediate, taking the shifted high order results
7703//  (special case for shift by 32)
7704instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7705  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7706  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7707             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7708             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7709  effect(USE src1, KILL cr);
7710
7711  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7712  ins_cost(0*100 + 1*400 - 150);
7713  format %{ "IMUL   EDX:EAX,$src1" %}
7714  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7715  ins_pipe( pipe_slow );
7716%}
7717
7718// Multiply by 32-bit Immediate, taking the shifted high order results
7719instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7720  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7721  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7722             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7723             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7724  effect(USE src1, KILL cr);
7725
7726  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7727  ins_cost(1*100 + 1*400 - 150);
7728  format %{ "IMUL   EDX:EAX,$src1\n\t"
7729            "SAR    EDX,$cnt-32" %}
7730  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7731  ins_pipe( pipe_slow );
7732%}
7733
7734// Multiply Memory 32-bit Immediate
7735instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7736  match(Set dst (MulI (LoadI src) imm));
7737  effect(KILL cr);
7738
7739  ins_cost(300);
7740  format %{ "IMUL   $dst,$src,$imm" %}
7741  opcode(0x69);  /* 69 /r id */
7742  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7743  ins_pipe( ialu_reg_mem_alu0 );
7744%}
7745
7746// Multiply Memory
7747instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7748  match(Set dst (MulI dst (LoadI src)));
7749  effect(KILL cr);
7750
7751  ins_cost(350);
7752  format %{ "IMUL   $dst,$src" %}
7753  opcode(0xAF, 0x0F);
7754  ins_encode( OpcS, OpcP, RegMem( dst, src) );
7755  ins_pipe( ialu_reg_mem_alu0 );
7756%}
7757
7758instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7759%{
7760  match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7761  effect(KILL cr, KILL src2);
7762
7763  expand %{ mulI_eReg(dst, src1, cr);
7764           mulI_eReg(src2, src3, cr);
7765           addI_eReg(dst, src2, cr); %}
7766%}
7767
7768// Multiply Register Int to Long
7769instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7770  // Basic Idea: long = (long)int * (long)int
7771  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7772  effect(DEF dst, USE src, USE src1, KILL flags);
7773
7774  ins_cost(300);
7775  format %{ "IMUL   $dst,$src1" %}
7776
7777  ins_encode( long_int_multiply( dst, src1 ) );
7778  ins_pipe( ialu_reg_reg_alu0 );
7779%}
7780
7781instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7782  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7783  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7784  effect(KILL flags);
7785
7786  ins_cost(300);
7787  format %{ "MUL    $dst,$src1" %}
7788
7789  ins_encode( long_uint_multiply(dst, src1) );
7790  ins_pipe( ialu_reg_reg_alu0 );
7791%}
7792
7793// Multiply Register Long
7794instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7795  match(Set dst (MulL dst src));
7796  effect(KILL cr, TEMP tmp);
7797  ins_cost(4*100+3*400);
7798// Basic idea: lo(result) = lo(x_lo * y_lo)
7799//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7800  format %{ "MOV    $tmp,$src.lo\n\t"
7801            "IMUL   $tmp,EDX\n\t"
7802            "MOV    EDX,$src.hi\n\t"
7803            "IMUL   EDX,EAX\n\t"
7804            "ADD    $tmp,EDX\n\t"
7805            "MUL    EDX:EAX,$src.lo\n\t"
7806            "ADD    EDX,$tmp" %}
7807  ins_encode( long_multiply( dst, src, tmp ) );
7808  ins_pipe( pipe_slow );
7809%}
7810
7811// Multiply Register Long where the left operand's high 32 bits are zero
7812instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7813  predicate(is_operand_hi32_zero(n->in(1)));
7814  match(Set dst (MulL dst src));
7815  effect(KILL cr, TEMP tmp);
7816  ins_cost(2*100+2*400);
7817// Basic idea: lo(result) = lo(x_lo * y_lo)
7818//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7819  format %{ "MOV    $tmp,$src.hi\n\t"
7820            "IMUL   $tmp,EAX\n\t"
7821            "MUL    EDX:EAX,$src.lo\n\t"
7822            "ADD    EDX,$tmp" %}
7823  ins_encode %{
7824    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7825    __ imull($tmp$$Register, rax);
7826    __ mull($src$$Register);
7827    __ addl(rdx, $tmp$$Register);
7828  %}
7829  ins_pipe( pipe_slow );
7830%}
7831
7832// Multiply Register Long where the right operand's high 32 bits are zero
7833instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7834  predicate(is_operand_hi32_zero(n->in(2)));
7835  match(Set dst (MulL dst src));
7836  effect(KILL cr, TEMP tmp);
7837  ins_cost(2*100+2*400);
7838// Basic idea: lo(result) = lo(x_lo * y_lo)
7839//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7840  format %{ "MOV    $tmp,$src.lo\n\t"
7841            "IMUL   $tmp,EDX\n\t"
7842            "MUL    EDX:EAX,$src.lo\n\t"
7843            "ADD    EDX,$tmp" %}
7844  ins_encode %{
7845    __ movl($tmp$$Register, $src$$Register);
7846    __ imull($tmp$$Register, rdx);
7847    __ mull($src$$Register);
7848    __ addl(rdx, $tmp$$Register);
7849  %}
7850  ins_pipe( pipe_slow );
7851%}
7852
7853// Multiply Register Long where the left and the right operands' high 32 bits are zero
7854instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7855  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7856  match(Set dst (MulL dst src));
7857  effect(KILL cr);
7858  ins_cost(1*400);
7859// Basic idea: lo(result) = lo(x_lo * y_lo)
7860//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7861  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7862  ins_encode %{
7863    __ mull($src$$Register);
7864  %}
7865  ins_pipe( pipe_slow );
7866%}
7867
7868// Multiply Register Long by small constant
7869instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7870  match(Set dst (MulL dst src));
7871  effect(KILL cr, TEMP tmp);
7872  ins_cost(2*100+2*400);
7873  size(12);
7874// Basic idea: lo(result) = lo(src * EAX)
7875//             hi(result) = hi(src * EAX) + lo(src * EDX)
7876  format %{ "IMUL   $tmp,EDX,$src\n\t"
7877            "MOV    EDX,$src\n\t"
7878            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7879            "ADD    EDX,$tmp" %}
7880  ins_encode( long_multiply_con( dst, src, tmp ) );
7881  ins_pipe( pipe_slow );
7882%}
7883
7884// Integer DIV with Register
7885instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7886  match(Set rax (DivI rax div));
7887  effect(KILL rdx, KILL cr);
7888  size(26);
7889  ins_cost(30*100+10*100);
7890  format %{ "CMP    EAX,0x80000000\n\t"
7891            "JNE,s  normal\n\t"
7892            "XOR    EDX,EDX\n\t"
7893            "CMP    ECX,-1\n\t"
7894            "JE,s   done\n"
7895    "normal: CDQ\n\t"
7896            "IDIV   $div\n\t"
7897    "done:"        %}
7898  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7899  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7900  ins_pipe( ialu_reg_reg_alu0 );
7901%}
7902
7903// Divide Register Long
7904instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7905  match(Set dst (DivL src1 src2));
7906  effect( KILL cr, KILL cx, KILL bx );
7907  ins_cost(10000);
7908  format %{ "PUSH   $src1.hi\n\t"
7909            "PUSH   $src1.lo\n\t"
7910            "PUSH   $src2.hi\n\t"
7911            "PUSH   $src2.lo\n\t"
7912            "CALL   SharedRuntime::ldiv\n\t"
7913            "ADD    ESP,16" %}
7914  ins_encode( long_div(src1,src2) );
7915  ins_pipe( pipe_slow );
7916%}
7917
7918// Integer DIVMOD with Register, both quotient and mod results
7919instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7920  match(DivModI rax div);
7921  effect(KILL cr);
7922  size(26);
7923  ins_cost(30*100+10*100);
7924  format %{ "CMP    EAX,0x80000000\n\t"
7925            "JNE,s  normal\n\t"
7926            "XOR    EDX,EDX\n\t"
7927            "CMP    ECX,-1\n\t"
7928            "JE,s   done\n"
7929    "normal: CDQ\n\t"
7930            "IDIV   $div\n\t"
7931    "done:"        %}
7932  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7933  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7934  ins_pipe( pipe_slow );
7935%}
7936
7937// Integer MOD with Register
7938instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7939  match(Set rdx (ModI rax div));
7940  effect(KILL rax, KILL cr);
7941
7942  size(26);
7943  ins_cost(300);
7944  format %{ "CDQ\n\t"
7945            "IDIV   $div" %}
7946  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7947  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7948  ins_pipe( ialu_reg_reg_alu0 );
7949%}
7950
7951// Remainder Register Long
7952instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7953  match(Set dst (ModL src1 src2));
7954  effect( KILL cr, KILL cx, KILL bx );
7955  ins_cost(10000);
7956  format %{ "PUSH   $src1.hi\n\t"
7957            "PUSH   $src1.lo\n\t"
7958            "PUSH   $src2.hi\n\t"
7959            "PUSH   $src2.lo\n\t"
7960            "CALL   SharedRuntime::lrem\n\t"
7961            "ADD    ESP,16" %}
7962  ins_encode( long_mod(src1,src2) );
7963  ins_pipe( pipe_slow );
7964%}
7965
7966// Divide Register Long (no special case since divisor != -1)
7967instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7968  match(Set dst (DivL dst imm));
7969  effect( TEMP tmp, TEMP tmp2, KILL cr );
7970  ins_cost(1000);
7971  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7972            "XOR    $tmp2,$tmp2\n\t"
7973            "CMP    $tmp,EDX\n\t"
7974            "JA,s   fast\n\t"
7975            "MOV    $tmp2,EAX\n\t"
7976            "MOV    EAX,EDX\n\t"
7977            "MOV    EDX,0\n\t"
7978            "JLE,s  pos\n\t"
7979            "LNEG   EAX : $tmp2\n\t"
7980            "DIV    $tmp # unsigned division\n\t"
7981            "XCHG   EAX,$tmp2\n\t"
7982            "DIV    $tmp\n\t"
7983            "LNEG   $tmp2 : EAX\n\t"
7984            "JMP,s  done\n"
7985    "pos:\n\t"
7986            "DIV    $tmp\n\t"
7987            "XCHG   EAX,$tmp2\n"
7988    "fast:\n\t"
7989            "DIV    $tmp\n"
7990    "done:\n\t"
7991            "MOV    EDX,$tmp2\n\t"
7992            "NEG    EDX:EAX # if $imm < 0" %}
7993  ins_encode %{
7994    int con = (int)$imm$$constant;
7995    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7996    int pcon = (con > 0) ? con : -con;
7997    Label Lfast, Lpos, Ldone;
7998
7999    __ movl($tmp$$Register, pcon);
8000    __ xorl($tmp2$$Register,$tmp2$$Register);
8001    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8002    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8003
8004    __ movl($tmp2$$Register, $dst$$Register); // save
8005    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8006    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8007    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8008
8009    // Negative dividend.
8010    // convert value to positive to use unsigned division
8011    __ lneg($dst$$Register, $tmp2$$Register);
8012    __ divl($tmp$$Register);
8013    __ xchgl($dst$$Register, $tmp2$$Register);
8014    __ divl($tmp$$Register);
8015    // revert result back to negative
8016    __ lneg($tmp2$$Register, $dst$$Register);
8017    __ jmpb(Ldone);
8018
8019    __ bind(Lpos);
8020    __ divl($tmp$$Register); // Use unsigned division
8021    __ xchgl($dst$$Register, $tmp2$$Register);
8022    // Fallthrow for final divide, tmp2 has 32 bit hi result
8023
8024    __ bind(Lfast);
8025    // fast path: src is positive
8026    __ divl($tmp$$Register); // Use unsigned division
8027
8028    __ bind(Ldone);
8029    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8030    if (con < 0) {
8031      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8032    }
8033  %}
8034  ins_pipe( pipe_slow );
8035%}
8036
8037// Remainder Register Long (remainder fit into 32 bits)
8038instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8039  match(Set dst (ModL dst imm));
8040  effect( TEMP tmp, TEMP tmp2, KILL cr );
8041  ins_cost(1000);
8042  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8043            "CMP    $tmp,EDX\n\t"
8044            "JA,s   fast\n\t"
8045            "MOV    $tmp2,EAX\n\t"
8046            "MOV    EAX,EDX\n\t"
8047            "MOV    EDX,0\n\t"
8048            "JLE,s  pos\n\t"
8049            "LNEG   EAX : $tmp2\n\t"
8050            "DIV    $tmp # unsigned division\n\t"
8051            "MOV    EAX,$tmp2\n\t"
8052            "DIV    $tmp\n\t"
8053            "NEG    EDX\n\t"
8054            "JMP,s  done\n"
8055    "pos:\n\t"
8056            "DIV    $tmp\n\t"
8057            "MOV    EAX,$tmp2\n"
8058    "fast:\n\t"
8059            "DIV    $tmp\n"
8060    "done:\n\t"
8061            "MOV    EAX,EDX\n\t"
8062            "SAR    EDX,31\n\t" %}
8063  ins_encode %{
8064    int con = (int)$imm$$constant;
8065    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8066    int pcon = (con > 0) ? con : -con;
8067    Label  Lfast, Lpos, Ldone;
8068
8069    __ movl($tmp$$Register, pcon);
8070    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8071    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8072
8073    __ movl($tmp2$$Register, $dst$$Register); // save
8074    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8075    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8076    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8077
8078    // Negative dividend.
8079    // convert value to positive to use unsigned division
8080    __ lneg($dst$$Register, $tmp2$$Register);
8081    __ divl($tmp$$Register);
8082    __ movl($dst$$Register, $tmp2$$Register);
8083    __ divl($tmp$$Register);
8084    // revert remainder back to negative
8085    __ negl(HIGH_FROM_LOW($dst$$Register));
8086    __ jmpb(Ldone);
8087
8088    __ bind(Lpos);
8089    __ divl($tmp$$Register);
8090    __ movl($dst$$Register, $tmp2$$Register);
8091
8092    __ bind(Lfast);
8093    // fast path: src is positive
8094    __ divl($tmp$$Register);
8095
8096    __ bind(Ldone);
8097    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8098    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8099
8100  %}
8101  ins_pipe( pipe_slow );
8102%}
8103
8104// Integer Shift Instructions
8105// Shift Left by one
8106instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8107  match(Set dst (LShiftI dst shift));
8108  effect(KILL cr);
8109
8110  size(2);
8111  format %{ "SHL    $dst,$shift" %}
8112  opcode(0xD1, 0x4);  /* D1 /4 */
8113  ins_encode( OpcP, RegOpc( dst ) );
8114  ins_pipe( ialu_reg );
8115%}
8116
8117// Shift Left by 8-bit immediate
8118instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8119  match(Set dst (LShiftI dst shift));
8120  effect(KILL cr);
8121
8122  size(3);
8123  format %{ "SHL    $dst,$shift" %}
8124  opcode(0xC1, 0x4);  /* C1 /4 ib */
8125  ins_encode( RegOpcImm( dst, shift) );
8126  ins_pipe( ialu_reg );
8127%}
8128
8129// Shift Left by variable
8130instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8131  match(Set dst (LShiftI dst shift));
8132  effect(KILL cr);
8133
8134  size(2);
8135  format %{ "SHL    $dst,$shift" %}
8136  opcode(0xD3, 0x4);  /* D3 /4 */
8137  ins_encode( OpcP, RegOpc( dst ) );
8138  ins_pipe( ialu_reg_reg );
8139%}
8140
8141// Arithmetic shift right by one
8142instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8143  match(Set dst (RShiftI dst shift));
8144  effect(KILL cr);
8145
8146  size(2);
8147  format %{ "SAR    $dst,$shift" %}
8148  opcode(0xD1, 0x7);  /* D1 /7 */
8149  ins_encode( OpcP, RegOpc( dst ) );
8150  ins_pipe( ialu_reg );
8151%}
8152
8153// Arithmetic shift right by one
8154instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8155  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8156  effect(KILL cr);
8157  format %{ "SAR    $dst,$shift" %}
8158  opcode(0xD1, 0x7);  /* D1 /7 */
8159  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8160  ins_pipe( ialu_mem_imm );
8161%}
8162
8163// Arithmetic Shift Right by 8-bit immediate
8164instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8165  match(Set dst (RShiftI dst shift));
8166  effect(KILL cr);
8167
8168  size(3);
8169  format %{ "SAR    $dst,$shift" %}
8170  opcode(0xC1, 0x7);  /* C1 /7 ib */
8171  ins_encode( RegOpcImm( dst, shift ) );
8172  ins_pipe( ialu_mem_imm );
8173%}
8174
8175// Arithmetic Shift Right by 8-bit immediate
8176instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8177  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8178  effect(KILL cr);
8179
8180  format %{ "SAR    $dst,$shift" %}
8181  opcode(0xC1, 0x7);  /* C1 /7 ib */
8182  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8183  ins_pipe( ialu_mem_imm );
8184%}
8185
8186// Arithmetic Shift Right by variable
8187instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8188  match(Set dst (RShiftI dst shift));
8189  effect(KILL cr);
8190
8191  size(2);
8192  format %{ "SAR    $dst,$shift" %}
8193  opcode(0xD3, 0x7);  /* D3 /7 */
8194  ins_encode( OpcP, RegOpc( dst ) );
8195  ins_pipe( ialu_reg_reg );
8196%}
8197
8198// Logical shift right by one
8199instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8200  match(Set dst (URShiftI dst shift));
8201  effect(KILL cr);
8202
8203  size(2);
8204  format %{ "SHR    $dst,$shift" %}
8205  opcode(0xD1, 0x5);  /* D1 /5 */
8206  ins_encode( OpcP, RegOpc( dst ) );
8207  ins_pipe( ialu_reg );
8208%}
8209
8210// Logical Shift Right by 8-bit immediate
8211instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8212  match(Set dst (URShiftI dst shift));
8213  effect(KILL cr);
8214
8215  size(3);
8216  format %{ "SHR    $dst,$shift" %}
8217  opcode(0xC1, 0x5);  /* C1 /5 ib */
8218  ins_encode( RegOpcImm( dst, shift) );
8219  ins_pipe( ialu_reg );
8220%}
8221
8222
8223// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8224// This idiom is used by the compiler for the i2b bytecode.
8225instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8226  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8227
8228  size(3);
8229  format %{ "MOVSX  $dst,$src :8" %}
8230  ins_encode %{
8231    __ movsbl($dst$$Register, $src$$Register);
8232  %}
8233  ins_pipe(ialu_reg_reg);
8234%}
8235
8236// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8237// This idiom is used by the compiler the i2s bytecode.
8238instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8239  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8240
8241  size(3);
8242  format %{ "MOVSX  $dst,$src :16" %}
8243  ins_encode %{
8244    __ movswl($dst$$Register, $src$$Register);
8245  %}
8246  ins_pipe(ialu_reg_reg);
8247%}
8248
8249
8250// Logical Shift Right by variable
8251instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8252  match(Set dst (URShiftI dst shift));
8253  effect(KILL cr);
8254
8255  size(2);
8256  format %{ "SHR    $dst,$shift" %}
8257  opcode(0xD3, 0x5);  /* D3 /5 */
8258  ins_encode( OpcP, RegOpc( dst ) );
8259  ins_pipe( ialu_reg_reg );
8260%}
8261
8262
8263//----------Logical Instructions-----------------------------------------------
8264//----------Integer Logical Instructions---------------------------------------
8265// And Instructions
8266// And Register with Register
8267instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8268  match(Set dst (AndI dst src));
8269  effect(KILL cr);
8270
8271  size(2);
8272  format %{ "AND    $dst,$src" %}
8273  opcode(0x23);
8274  ins_encode( OpcP, RegReg( dst, src) );
8275  ins_pipe( ialu_reg_reg );
8276%}
8277
8278// And Register with Immediate
8279instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8280  match(Set dst (AndI dst src));
8281  effect(KILL cr);
8282
8283  format %{ "AND    $dst,$src" %}
8284  opcode(0x81,0x04);  /* Opcode 81 /4 */
8285  // ins_encode( RegImm( dst, src) );
8286  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8287  ins_pipe( ialu_reg );
8288%}
8289
8290// And Register with Memory
8291instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8292  match(Set dst (AndI dst (LoadI src)));
8293  effect(KILL cr);
8294
8295  ins_cost(125);
8296  format %{ "AND    $dst,$src" %}
8297  opcode(0x23);
8298  ins_encode( OpcP, RegMem( dst, src) );
8299  ins_pipe( ialu_reg_mem );
8300%}
8301
8302// And Memory with Register
8303instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8304  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8305  effect(KILL cr);
8306
8307  ins_cost(150);
8308  format %{ "AND    $dst,$src" %}
8309  opcode(0x21);  /* Opcode 21 /r */
8310  ins_encode( OpcP, RegMem( src, dst ) );
8311  ins_pipe( ialu_mem_reg );
8312%}
8313
8314// And Memory with Immediate
8315instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8316  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8317  effect(KILL cr);
8318
8319  ins_cost(125);
8320  format %{ "AND    $dst,$src" %}
8321  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8322  // ins_encode( MemImm( dst, src) );
8323  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8324  ins_pipe( ialu_mem_imm );
8325%}
8326
8327// BMI1 instructions
8328instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8329  match(Set dst (AndI (XorI src1 minus_1) src2));
8330  predicate(UseBMI1Instructions);
8331  effect(KILL cr);
8332
8333  format %{ "ANDNL  $dst, $src1, $src2" %}
8334
8335  ins_encode %{
8336    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8337  %}
8338  ins_pipe(ialu_reg);
8339%}
8340
8341instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8342  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8343  predicate(UseBMI1Instructions);
8344  effect(KILL cr);
8345
8346  ins_cost(125);
8347  format %{ "ANDNL  $dst, $src1, $src2" %}
8348
8349  ins_encode %{
8350    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8351  %}
8352  ins_pipe(ialu_reg_mem);
8353%}
8354
8355instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8356  match(Set dst (AndI (SubI imm_zero src) src));
8357  predicate(UseBMI1Instructions);
8358  effect(KILL cr);
8359
8360  format %{ "BLSIL  $dst, $src" %}
8361
8362  ins_encode %{
8363    __ blsil($dst$$Register, $src$$Register);
8364  %}
8365  ins_pipe(ialu_reg);
8366%}
8367
8368instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8369  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8370  predicate(UseBMI1Instructions);
8371  effect(KILL cr);
8372
8373  ins_cost(125);
8374  format %{ "BLSIL  $dst, $src" %}
8375
8376  ins_encode %{
8377    __ blsil($dst$$Register, $src$$Address);
8378  %}
8379  ins_pipe(ialu_reg_mem);
8380%}
8381
8382instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8383%{
8384  match(Set dst (XorI (AddI src minus_1) src));
8385  predicate(UseBMI1Instructions);
8386  effect(KILL cr);
8387
8388  format %{ "BLSMSKL $dst, $src" %}
8389
8390  ins_encode %{
8391    __ blsmskl($dst$$Register, $src$$Register);
8392  %}
8393
8394  ins_pipe(ialu_reg);
8395%}
8396
8397instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8398%{
8399  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8400  predicate(UseBMI1Instructions);
8401  effect(KILL cr);
8402
8403  ins_cost(125);
8404  format %{ "BLSMSKL $dst, $src" %}
8405
8406  ins_encode %{
8407    __ blsmskl($dst$$Register, $src$$Address);
8408  %}
8409
8410  ins_pipe(ialu_reg_mem);
8411%}
8412
8413instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8414%{
8415  match(Set dst (AndI (AddI src minus_1) src) );
8416  predicate(UseBMI1Instructions);
8417  effect(KILL cr);
8418
8419  format %{ "BLSRL  $dst, $src" %}
8420
8421  ins_encode %{
8422    __ blsrl($dst$$Register, $src$$Register);
8423  %}
8424
8425  ins_pipe(ialu_reg);
8426%}
8427
8428instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8429%{
8430  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8431  predicate(UseBMI1Instructions);
8432  effect(KILL cr);
8433
8434  ins_cost(125);
8435  format %{ "BLSRL  $dst, $src" %}
8436
8437  ins_encode %{
8438    __ blsrl($dst$$Register, $src$$Address);
8439  %}
8440
8441  ins_pipe(ialu_reg_mem);
8442%}
8443
8444// Or Instructions
8445// Or Register with Register
8446instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8447  match(Set dst (OrI dst src));
8448  effect(KILL cr);
8449
8450  size(2);
8451  format %{ "OR     $dst,$src" %}
8452  opcode(0x0B);
8453  ins_encode( OpcP, RegReg( dst, src) );
8454  ins_pipe( ialu_reg_reg );
8455%}
8456
8457instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8458  match(Set dst (OrI dst (CastP2X src)));
8459  effect(KILL cr);
8460
8461  size(2);
8462  format %{ "OR     $dst,$src" %}
8463  opcode(0x0B);
8464  ins_encode( OpcP, RegReg( dst, src) );
8465  ins_pipe( ialu_reg_reg );
8466%}
8467
8468
8469// Or Register with Immediate
8470instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8471  match(Set dst (OrI dst src));
8472  effect(KILL cr);
8473
8474  format %{ "OR     $dst,$src" %}
8475  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8476  // ins_encode( RegImm( dst, src) );
8477  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8478  ins_pipe( ialu_reg );
8479%}
8480
8481// Or Register with Memory
8482instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8483  match(Set dst (OrI dst (LoadI src)));
8484  effect(KILL cr);
8485
8486  ins_cost(125);
8487  format %{ "OR     $dst,$src" %}
8488  opcode(0x0B);
8489  ins_encode( OpcP, RegMem( dst, src) );
8490  ins_pipe( ialu_reg_mem );
8491%}
8492
8493// Or Memory with Register
8494instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8495  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8496  effect(KILL cr);
8497
8498  ins_cost(150);
8499  format %{ "OR     $dst,$src" %}
8500  opcode(0x09);  /* Opcode 09 /r */
8501  ins_encode( OpcP, RegMem( src, dst ) );
8502  ins_pipe( ialu_mem_reg );
8503%}
8504
8505// Or Memory with Immediate
8506instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8507  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8508  effect(KILL cr);
8509
8510  ins_cost(125);
8511  format %{ "OR     $dst,$src" %}
8512  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8513  // ins_encode( MemImm( dst, src) );
8514  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8515  ins_pipe( ialu_mem_imm );
8516%}
8517
8518// ROL/ROR
8519// ROL expand
8520instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8521  effect(USE_DEF dst, USE shift, KILL cr);
8522
8523  format %{ "ROL    $dst, $shift" %}
8524  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8525  ins_encode( OpcP, RegOpc( dst ));
8526  ins_pipe( ialu_reg );
8527%}
8528
8529instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8530  effect(USE_DEF dst, USE shift, KILL cr);
8531
8532  format %{ "ROL    $dst, $shift" %}
8533  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8534  ins_encode( RegOpcImm(dst, shift) );
8535  ins_pipe(ialu_reg);
8536%}
8537
8538instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8539  effect(USE_DEF dst, USE shift, KILL cr);
8540
8541  format %{ "ROL    $dst, $shift" %}
8542  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8543  ins_encode(OpcP, RegOpc(dst));
8544  ins_pipe( ialu_reg_reg );
8545%}
8546// end of ROL expand
8547
8548// ROL 32bit by one once
8549instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8550  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8551
8552  expand %{
8553    rolI_eReg_imm1(dst, lshift, cr);
8554  %}
8555%}
8556
8557// ROL 32bit var by imm8 once
8558instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8559  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8560  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8561
8562  expand %{
8563    rolI_eReg_imm8(dst, lshift, cr);
8564  %}
8565%}
8566
8567// ROL 32bit var by var once
8568instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8569  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8570
8571  expand %{
8572    rolI_eReg_CL(dst, shift, cr);
8573  %}
8574%}
8575
8576// ROL 32bit var by var once
8577instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8578  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8579
8580  expand %{
8581    rolI_eReg_CL(dst, shift, cr);
8582  %}
8583%}
8584
8585// ROR expand
8586instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8587  effect(USE_DEF dst, USE shift, KILL cr);
8588
8589  format %{ "ROR    $dst, $shift" %}
8590  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8591  ins_encode( OpcP, RegOpc( dst ) );
8592  ins_pipe( ialu_reg );
8593%}
8594
8595instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8596  effect (USE_DEF dst, USE shift, KILL cr);
8597
8598  format %{ "ROR    $dst, $shift" %}
8599  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8600  ins_encode( RegOpcImm(dst, shift) );
8601  ins_pipe( ialu_reg );
8602%}
8603
8604instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8605  effect(USE_DEF dst, USE shift, KILL cr);
8606
8607  format %{ "ROR    $dst, $shift" %}
8608  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8609  ins_encode(OpcP, RegOpc(dst));
8610  ins_pipe( ialu_reg_reg );
8611%}
8612// end of ROR expand
8613
8614// ROR right once
8615instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8616  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8617
8618  expand %{
8619    rorI_eReg_imm1(dst, rshift, cr);
8620  %}
8621%}
8622
8623// ROR 32bit by immI8 once
8624instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8625  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8626  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8627
8628  expand %{
8629    rorI_eReg_imm8(dst, rshift, cr);
8630  %}
8631%}
8632
8633// ROR 32bit var by var once
8634instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8635  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8636
8637  expand %{
8638    rorI_eReg_CL(dst, shift, cr);
8639  %}
8640%}
8641
8642// ROR 32bit var by var once
8643instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8644  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8645
8646  expand %{
8647    rorI_eReg_CL(dst, shift, cr);
8648  %}
8649%}
8650
8651// Xor Instructions
8652// Xor Register with Register
8653instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8654  match(Set dst (XorI dst src));
8655  effect(KILL cr);
8656
8657  size(2);
8658  format %{ "XOR    $dst,$src" %}
8659  opcode(0x33);
8660  ins_encode( OpcP, RegReg( dst, src) );
8661  ins_pipe( ialu_reg_reg );
8662%}
8663
8664// Xor Register with Immediate -1
8665instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8666  match(Set dst (XorI dst imm));
8667
8668  size(2);
8669  format %{ "NOT    $dst" %}
8670  ins_encode %{
8671     __ notl($dst$$Register);
8672  %}
8673  ins_pipe( ialu_reg );
8674%}
8675
8676// Xor Register with Immediate
8677instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8678  match(Set dst (XorI dst src));
8679  effect(KILL cr);
8680
8681  format %{ "XOR    $dst,$src" %}
8682  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8683  // ins_encode( RegImm( dst, src) );
8684  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8685  ins_pipe( ialu_reg );
8686%}
8687
8688// Xor Register with Memory
8689instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8690  match(Set dst (XorI dst (LoadI src)));
8691  effect(KILL cr);
8692
8693  ins_cost(125);
8694  format %{ "XOR    $dst,$src" %}
8695  opcode(0x33);
8696  ins_encode( OpcP, RegMem(dst, src) );
8697  ins_pipe( ialu_reg_mem );
8698%}
8699
8700// Xor Memory with Register
8701instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8702  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8703  effect(KILL cr);
8704
8705  ins_cost(150);
8706  format %{ "XOR    $dst,$src" %}
8707  opcode(0x31);  /* Opcode 31 /r */
8708  ins_encode( OpcP, RegMem( src, dst ) );
8709  ins_pipe( ialu_mem_reg );
8710%}
8711
8712// Xor Memory with Immediate
8713instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8714  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8715  effect(KILL cr);
8716
8717  ins_cost(125);
8718  format %{ "XOR    $dst,$src" %}
8719  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8720  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8721  ins_pipe( ialu_mem_imm );
8722%}
8723
8724//----------Convert Int to Boolean---------------------------------------------
8725
8726instruct movI_nocopy(rRegI dst, rRegI src) %{
8727  effect( DEF dst, USE src );
8728  format %{ "MOV    $dst,$src" %}
8729  ins_encode( enc_Copy( dst, src) );
8730  ins_pipe( ialu_reg_reg );
8731%}
8732
8733instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8734  effect( USE_DEF dst, USE src, KILL cr );
8735
8736  size(4);
8737  format %{ "NEG    $dst\n\t"
8738            "ADC    $dst,$src" %}
8739  ins_encode( neg_reg(dst),
8740              OpcRegReg(0x13,dst,src) );
8741  ins_pipe( ialu_reg_reg_long );
8742%}
8743
8744instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8745  match(Set dst (Conv2B src));
8746
8747  expand %{
8748    movI_nocopy(dst,src);
8749    ci2b(dst,src,cr);
8750  %}
8751%}
8752
8753instruct movP_nocopy(rRegI dst, eRegP src) %{
8754  effect( DEF dst, USE src );
8755  format %{ "MOV    $dst,$src" %}
8756  ins_encode( enc_Copy( dst, src) );
8757  ins_pipe( ialu_reg_reg );
8758%}
8759
8760instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8761  effect( USE_DEF dst, USE src, KILL cr );
8762  format %{ "NEG    $dst\n\t"
8763            "ADC    $dst,$src" %}
8764  ins_encode( neg_reg(dst),
8765              OpcRegReg(0x13,dst,src) );
8766  ins_pipe( ialu_reg_reg_long );
8767%}
8768
8769instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8770  match(Set dst (Conv2B src));
8771
8772  expand %{
8773    movP_nocopy(dst,src);
8774    cp2b(dst,src,cr);
8775  %}
8776%}
8777
8778instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8779  match(Set dst (CmpLTMask p q));
8780  effect(KILL cr);
8781  ins_cost(400);
8782
8783  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8784  format %{ "XOR    $dst,$dst\n\t"
8785            "CMP    $p,$q\n\t"
8786            "SETlt  $dst\n\t"
8787            "NEG    $dst" %}
8788  ins_encode %{
8789    Register Rp = $p$$Register;
8790    Register Rq = $q$$Register;
8791    Register Rd = $dst$$Register;
8792    Label done;
8793    __ xorl(Rd, Rd);
8794    __ cmpl(Rp, Rq);
8795    __ setb(Assembler::less, Rd);
8796    __ negl(Rd);
8797  %}
8798
8799  ins_pipe(pipe_slow);
8800%}
8801
8802instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8803  match(Set dst (CmpLTMask dst zero));
8804  effect(DEF dst, KILL cr);
8805  ins_cost(100);
8806
8807  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8808  ins_encode %{
8809  __ sarl($dst$$Register, 31);
8810  %}
8811  ins_pipe(ialu_reg);
8812%}
8813
8814/* better to save a register than avoid a branch */
8815instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8816  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8817  effect(KILL cr);
8818  ins_cost(400);
8819  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8820            "JGE    done\n\t"
8821            "ADD    $p,$y\n"
8822            "done:  " %}
8823  ins_encode %{
8824    Register Rp = $p$$Register;
8825    Register Rq = $q$$Register;
8826    Register Ry = $y$$Register;
8827    Label done;
8828    __ subl(Rp, Rq);
8829    __ jccb(Assembler::greaterEqual, done);
8830    __ addl(Rp, Ry);
8831    __ bind(done);
8832  %}
8833
8834  ins_pipe(pipe_cmplt);
8835%}
8836
8837/* better to save a register than avoid a branch */
8838instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8839  match(Set y (AndI (CmpLTMask p q) y));
8840  effect(KILL cr);
8841
8842  ins_cost(300);
8843
8844  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8845            "JLT      done\n\t"
8846            "XORL     $y, $y\n"
8847            "done:  " %}
8848  ins_encode %{
8849    Register Rp = $p$$Register;
8850    Register Rq = $q$$Register;
8851    Register Ry = $y$$Register;
8852    Label done;
8853    __ cmpl(Rp, Rq);
8854    __ jccb(Assembler::less, done);
8855    __ xorl(Ry, Ry);
8856    __ bind(done);
8857  %}
8858
8859  ins_pipe(pipe_cmplt);
8860%}
8861
8862/* If I enable this, I encourage spilling in the inner loop of compress.
8863instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8864  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8865*/
8866//----------Overflow Math Instructions-----------------------------------------
8867
8868instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8869%{
8870  match(Set cr (OverflowAddI op1 op2));
8871  effect(DEF cr, USE_KILL op1, USE op2);
8872
8873  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8874
8875  ins_encode %{
8876    __ addl($op1$$Register, $op2$$Register);
8877  %}
8878  ins_pipe(ialu_reg_reg);
8879%}
8880
8881instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8882%{
8883  match(Set cr (OverflowAddI op1 op2));
8884  effect(DEF cr, USE_KILL op1, USE op2);
8885
8886  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8887
8888  ins_encode %{
8889    __ addl($op1$$Register, $op2$$constant);
8890  %}
8891  ins_pipe(ialu_reg_reg);
8892%}
8893
8894instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8895%{
8896  match(Set cr (OverflowSubI op1 op2));
8897
8898  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8899  ins_encode %{
8900    __ cmpl($op1$$Register, $op2$$Register);
8901  %}
8902  ins_pipe(ialu_reg_reg);
8903%}
8904
8905instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8906%{
8907  match(Set cr (OverflowSubI op1 op2));
8908
8909  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8910  ins_encode %{
8911    __ cmpl($op1$$Register, $op2$$constant);
8912  %}
8913  ins_pipe(ialu_reg_reg);
8914%}
8915
8916instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8917%{
8918  match(Set cr (OverflowSubI zero op2));
8919  effect(DEF cr, USE_KILL op2);
8920
8921  format %{ "NEG    $op2\t# overflow check int" %}
8922  ins_encode %{
8923    __ negl($op2$$Register);
8924  %}
8925  ins_pipe(ialu_reg_reg);
8926%}
8927
8928instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8929%{
8930  match(Set cr (OverflowMulI op1 op2));
8931  effect(DEF cr, USE_KILL op1, USE op2);
8932
8933  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8934  ins_encode %{
8935    __ imull($op1$$Register, $op2$$Register);
8936  %}
8937  ins_pipe(ialu_reg_reg_alu0);
8938%}
8939
8940instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8941%{
8942  match(Set cr (OverflowMulI op1 op2));
8943  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8944
8945  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8946  ins_encode %{
8947    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8948  %}
8949  ins_pipe(ialu_reg_reg_alu0);
8950%}
8951
8952//----------Long Instructions------------------------------------------------
8953// Add Long Register with Register
8954instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8955  match(Set dst (AddL dst src));
8956  effect(KILL cr);
8957  ins_cost(200);
8958  format %{ "ADD    $dst.lo,$src.lo\n\t"
8959            "ADC    $dst.hi,$src.hi" %}
8960  opcode(0x03, 0x13);
8961  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8962  ins_pipe( ialu_reg_reg_long );
8963%}
8964
8965// Add Long Register with Immediate
8966instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8967  match(Set dst (AddL dst src));
8968  effect(KILL cr);
8969  format %{ "ADD    $dst.lo,$src.lo\n\t"
8970            "ADC    $dst.hi,$src.hi" %}
8971  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8972  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8973  ins_pipe( ialu_reg_long );
8974%}
8975
8976// Add Long Register with Memory
8977instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8978  match(Set dst (AddL dst (LoadL mem)));
8979  effect(KILL cr);
8980  ins_cost(125);
8981  format %{ "ADD    $dst.lo,$mem\n\t"
8982            "ADC    $dst.hi,$mem+4" %}
8983  opcode(0x03, 0x13);
8984  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8985  ins_pipe( ialu_reg_long_mem );
8986%}
8987
8988// Subtract Long Register with Register.
8989instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8990  match(Set dst (SubL dst src));
8991  effect(KILL cr);
8992  ins_cost(200);
8993  format %{ "SUB    $dst.lo,$src.lo\n\t"
8994            "SBB    $dst.hi,$src.hi" %}
8995  opcode(0x2B, 0x1B);
8996  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8997  ins_pipe( ialu_reg_reg_long );
8998%}
8999
9000// Subtract Long Register with Immediate
9001instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9002  match(Set dst (SubL dst src));
9003  effect(KILL cr);
9004  format %{ "SUB    $dst.lo,$src.lo\n\t"
9005            "SBB    $dst.hi,$src.hi" %}
9006  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9007  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9008  ins_pipe( ialu_reg_long );
9009%}
9010
9011// Subtract Long Register with Memory
9012instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9013  match(Set dst (SubL dst (LoadL mem)));
9014  effect(KILL cr);
9015  ins_cost(125);
9016  format %{ "SUB    $dst.lo,$mem\n\t"
9017            "SBB    $dst.hi,$mem+4" %}
9018  opcode(0x2B, 0x1B);
9019  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9020  ins_pipe( ialu_reg_long_mem );
9021%}
9022
9023instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9024  match(Set dst (SubL zero dst));
9025  effect(KILL cr);
9026  ins_cost(300);
9027  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9028  ins_encode( neg_long(dst) );
9029  ins_pipe( ialu_reg_reg_long );
9030%}
9031
9032// And Long Register with Register
9033instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9034  match(Set dst (AndL dst src));
9035  effect(KILL cr);
9036  format %{ "AND    $dst.lo,$src.lo\n\t"
9037            "AND    $dst.hi,$src.hi" %}
9038  opcode(0x23,0x23);
9039  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9040  ins_pipe( ialu_reg_reg_long );
9041%}
9042
9043// And Long Register with Immediate
9044instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9045  match(Set dst (AndL dst src));
9046  effect(KILL cr);
9047  format %{ "AND    $dst.lo,$src.lo\n\t"
9048            "AND    $dst.hi,$src.hi" %}
9049  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9050  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9051  ins_pipe( ialu_reg_long );
9052%}
9053
9054// And Long Register with Memory
9055instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9056  match(Set dst (AndL dst (LoadL mem)));
9057  effect(KILL cr);
9058  ins_cost(125);
9059  format %{ "AND    $dst.lo,$mem\n\t"
9060            "AND    $dst.hi,$mem+4" %}
9061  opcode(0x23, 0x23);
9062  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9063  ins_pipe( ialu_reg_long_mem );
9064%}
9065
9066// BMI1 instructions
9067instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9068  match(Set dst (AndL (XorL src1 minus_1) src2));
9069  predicate(UseBMI1Instructions);
9070  effect(KILL cr, TEMP dst);
9071
9072  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9073            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9074         %}
9075
9076  ins_encode %{
9077    Register Rdst = $dst$$Register;
9078    Register Rsrc1 = $src1$$Register;
9079    Register Rsrc2 = $src2$$Register;
9080    __ andnl(Rdst, Rsrc1, Rsrc2);
9081    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9082  %}
9083  ins_pipe(ialu_reg_reg_long);
9084%}
9085
9086instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9087  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9088  predicate(UseBMI1Instructions);
9089  effect(KILL cr, TEMP dst);
9090
9091  ins_cost(125);
9092  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9093            "ANDNL  $dst.hi, $src1.hi, $src2+4"
9094         %}
9095
9096  ins_encode %{
9097    Register Rdst = $dst$$Register;
9098    Register Rsrc1 = $src1$$Register;
9099    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9100
9101    __ andnl(Rdst, Rsrc1, $src2$$Address);
9102    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9103  %}
9104  ins_pipe(ialu_reg_mem);
9105%}
9106
9107instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9108  match(Set dst (AndL (SubL imm_zero src) src));
9109  predicate(UseBMI1Instructions);
9110  effect(KILL cr, TEMP dst);
9111
9112  format %{ "MOVL   $dst.hi, 0\n\t"
9113            "BLSIL  $dst.lo, $src.lo\n\t"
9114            "JNZ    done\n\t"
9115            "BLSIL  $dst.hi, $src.hi\n"
9116            "done:"
9117         %}
9118
9119  ins_encode %{
9120    Label done;
9121    Register Rdst = $dst$$Register;
9122    Register Rsrc = $src$$Register;
9123    __ movl(HIGH_FROM_LOW(Rdst), 0);
9124    __ blsil(Rdst, Rsrc);
9125    __ jccb(Assembler::notZero, done);
9126    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9127    __ bind(done);
9128  %}
9129  ins_pipe(ialu_reg);
9130%}
9131
9132instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9133  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9134  predicate(UseBMI1Instructions);
9135  effect(KILL cr, TEMP dst);
9136
9137  ins_cost(125);
9138  format %{ "MOVL   $dst.hi, 0\n\t"
9139            "BLSIL  $dst.lo, $src\n\t"
9140            "JNZ    done\n\t"
9141            "BLSIL  $dst.hi, $src+4\n"
9142            "done:"
9143         %}
9144
9145  ins_encode %{
9146    Label done;
9147    Register Rdst = $dst$$Register;
9148    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9149
9150    __ movl(HIGH_FROM_LOW(Rdst), 0);
9151    __ blsil(Rdst, $src$$Address);
9152    __ jccb(Assembler::notZero, done);
9153    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9154    __ bind(done);
9155  %}
9156  ins_pipe(ialu_reg_mem);
9157%}
9158
9159instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9160%{
9161  match(Set dst (XorL (AddL src minus_1) src));
9162  predicate(UseBMI1Instructions);
9163  effect(KILL cr, TEMP dst);
9164
9165  format %{ "MOVL    $dst.hi, 0\n\t"
9166            "BLSMSKL $dst.lo, $src.lo\n\t"
9167            "JNC     done\n\t"
9168            "BLSMSKL $dst.hi, $src.hi\n"
9169            "done:"
9170         %}
9171
9172  ins_encode %{
9173    Label done;
9174    Register Rdst = $dst$$Register;
9175    Register Rsrc = $src$$Register;
9176    __ movl(HIGH_FROM_LOW(Rdst), 0);
9177    __ blsmskl(Rdst, Rsrc);
9178    __ jccb(Assembler::carryClear, done);
9179    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9180    __ bind(done);
9181  %}
9182
9183  ins_pipe(ialu_reg);
9184%}
9185
9186instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9187%{
9188  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9189  predicate(UseBMI1Instructions);
9190  effect(KILL cr, TEMP dst);
9191
9192  ins_cost(125);
9193  format %{ "MOVL    $dst.hi, 0\n\t"
9194            "BLSMSKL $dst.lo, $src\n\t"
9195            "JNC     done\n\t"
9196            "BLSMSKL $dst.hi, $src+4\n"
9197            "done:"
9198         %}
9199
9200  ins_encode %{
9201    Label done;
9202    Register Rdst = $dst$$Register;
9203    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9204
9205    __ movl(HIGH_FROM_LOW(Rdst), 0);
9206    __ blsmskl(Rdst, $src$$Address);
9207    __ jccb(Assembler::carryClear, done);
9208    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9209    __ bind(done);
9210  %}
9211
9212  ins_pipe(ialu_reg_mem);
9213%}
9214
9215instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9216%{
9217  match(Set dst (AndL (AddL src minus_1) src) );
9218  predicate(UseBMI1Instructions);
9219  effect(KILL cr, TEMP dst);
9220
9221  format %{ "MOVL   $dst.hi, $src.hi\n\t"
9222            "BLSRL  $dst.lo, $src.lo\n\t"
9223            "JNC    done\n\t"
9224            "BLSRL  $dst.hi, $src.hi\n"
9225            "done:"
9226  %}
9227
9228  ins_encode %{
9229    Label done;
9230    Register Rdst = $dst$$Register;
9231    Register Rsrc = $src$$Register;
9232    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9233    __ blsrl(Rdst, Rsrc);
9234    __ jccb(Assembler::carryClear, done);
9235    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9236    __ bind(done);
9237  %}
9238
9239  ins_pipe(ialu_reg);
9240%}
9241
9242instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9243%{
9244  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9245  predicate(UseBMI1Instructions);
9246  effect(KILL cr, TEMP dst);
9247
9248  ins_cost(125);
9249  format %{ "MOVL   $dst.hi, $src+4\n\t"
9250            "BLSRL  $dst.lo, $src\n\t"
9251            "JNC    done\n\t"
9252            "BLSRL  $dst.hi, $src+4\n"
9253            "done:"
9254  %}
9255
9256  ins_encode %{
9257    Label done;
9258    Register Rdst = $dst$$Register;
9259    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9260    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9261    __ blsrl(Rdst, $src$$Address);
9262    __ jccb(Assembler::carryClear, done);
9263    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9264    __ bind(done);
9265  %}
9266
9267  ins_pipe(ialu_reg_mem);
9268%}
9269
9270// Or Long Register with Register
9271instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9272  match(Set dst (OrL dst src));
9273  effect(KILL cr);
9274  format %{ "OR     $dst.lo,$src.lo\n\t"
9275            "OR     $dst.hi,$src.hi" %}
9276  opcode(0x0B,0x0B);
9277  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9278  ins_pipe( ialu_reg_reg_long );
9279%}
9280
9281// Or Long Register with Immediate
9282instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9283  match(Set dst (OrL dst src));
9284  effect(KILL cr);
9285  format %{ "OR     $dst.lo,$src.lo\n\t"
9286            "OR     $dst.hi,$src.hi" %}
9287  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9288  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9289  ins_pipe( ialu_reg_long );
9290%}
9291
9292// Or Long Register with Memory
9293instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9294  match(Set dst (OrL dst (LoadL mem)));
9295  effect(KILL cr);
9296  ins_cost(125);
9297  format %{ "OR     $dst.lo,$mem\n\t"
9298            "OR     $dst.hi,$mem+4" %}
9299  opcode(0x0B,0x0B);
9300  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9301  ins_pipe( ialu_reg_long_mem );
9302%}
9303
9304// Xor Long Register with Register
9305instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9306  match(Set dst (XorL dst src));
9307  effect(KILL cr);
9308  format %{ "XOR    $dst.lo,$src.lo\n\t"
9309            "XOR    $dst.hi,$src.hi" %}
9310  opcode(0x33,0x33);
9311  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9312  ins_pipe( ialu_reg_reg_long );
9313%}
9314
9315// Xor Long Register with Immediate -1
9316instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9317  match(Set dst (XorL dst imm));
9318  format %{ "NOT    $dst.lo\n\t"
9319            "NOT    $dst.hi" %}
9320  ins_encode %{
9321     __ notl($dst$$Register);
9322     __ notl(HIGH_FROM_LOW($dst$$Register));
9323  %}
9324  ins_pipe( ialu_reg_long );
9325%}
9326
9327// Xor Long Register with Immediate
9328instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9329  match(Set dst (XorL dst src));
9330  effect(KILL cr);
9331  format %{ "XOR    $dst.lo,$src.lo\n\t"
9332            "XOR    $dst.hi,$src.hi" %}
9333  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9334  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9335  ins_pipe( ialu_reg_long );
9336%}
9337
9338// Xor Long Register with Memory
9339instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9340  match(Set dst (XorL dst (LoadL mem)));
9341  effect(KILL cr);
9342  ins_cost(125);
9343  format %{ "XOR    $dst.lo,$mem\n\t"
9344            "XOR    $dst.hi,$mem+4" %}
9345  opcode(0x33,0x33);
9346  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9347  ins_pipe( ialu_reg_long_mem );
9348%}
9349
9350// Shift Left Long by 1
9351instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9352  predicate(UseNewLongLShift);
9353  match(Set dst (LShiftL dst cnt));
9354  effect(KILL cr);
9355  ins_cost(100);
9356  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9357            "ADC    $dst.hi,$dst.hi" %}
9358  ins_encode %{
9359    __ addl($dst$$Register,$dst$$Register);
9360    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9361  %}
9362  ins_pipe( ialu_reg_long );
9363%}
9364
9365// Shift Left Long by 2
9366instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9367  predicate(UseNewLongLShift);
9368  match(Set dst (LShiftL dst cnt));
9369  effect(KILL cr);
9370  ins_cost(100);
9371  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9372            "ADC    $dst.hi,$dst.hi\n\t"
9373            "ADD    $dst.lo,$dst.lo\n\t"
9374            "ADC    $dst.hi,$dst.hi" %}
9375  ins_encode %{
9376    __ addl($dst$$Register,$dst$$Register);
9377    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9378    __ addl($dst$$Register,$dst$$Register);
9379    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9380  %}
9381  ins_pipe( ialu_reg_long );
9382%}
9383
9384// Shift Left Long by 3
9385instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9386  predicate(UseNewLongLShift);
9387  match(Set dst (LShiftL dst cnt));
9388  effect(KILL cr);
9389  ins_cost(100);
9390  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9391            "ADC    $dst.hi,$dst.hi\n\t"
9392            "ADD    $dst.lo,$dst.lo\n\t"
9393            "ADC    $dst.hi,$dst.hi\n\t"
9394            "ADD    $dst.lo,$dst.lo\n\t"
9395            "ADC    $dst.hi,$dst.hi" %}
9396  ins_encode %{
9397    __ addl($dst$$Register,$dst$$Register);
9398    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9399    __ addl($dst$$Register,$dst$$Register);
9400    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9401    __ addl($dst$$Register,$dst$$Register);
9402    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9403  %}
9404  ins_pipe( ialu_reg_long );
9405%}
9406
9407// Shift Left Long by 1-31
9408instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9409  match(Set dst (LShiftL dst cnt));
9410  effect(KILL cr);
9411  ins_cost(200);
9412  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9413            "SHL    $dst.lo,$cnt" %}
9414  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9415  ins_encode( move_long_small_shift(dst,cnt) );
9416  ins_pipe( ialu_reg_long );
9417%}
9418
9419// Shift Left Long by 32-63
9420instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9421  match(Set dst (LShiftL dst cnt));
9422  effect(KILL cr);
9423  ins_cost(300);
9424  format %{ "MOV    $dst.hi,$dst.lo\n"
9425          "\tSHL    $dst.hi,$cnt-32\n"
9426          "\tXOR    $dst.lo,$dst.lo" %}
9427  opcode(0xC1, 0x4);  /* C1 /4 ib */
9428  ins_encode( move_long_big_shift_clr(dst,cnt) );
9429  ins_pipe( ialu_reg_long );
9430%}
9431
9432// Shift Left Long by variable
9433instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9434  match(Set dst (LShiftL dst shift));
9435  effect(KILL cr);
9436  ins_cost(500+200);
9437  size(17);
9438  format %{ "TEST   $shift,32\n\t"
9439            "JEQ,s  small\n\t"
9440            "MOV    $dst.hi,$dst.lo\n\t"
9441            "XOR    $dst.lo,$dst.lo\n"
9442    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9443            "SHL    $dst.lo,$shift" %}
9444  ins_encode( shift_left_long( dst, shift ) );
9445  ins_pipe( pipe_slow );
9446%}
9447
9448// Shift Right Long by 1-31
9449instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9450  match(Set dst (URShiftL dst cnt));
9451  effect(KILL cr);
9452  ins_cost(200);
9453  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9454            "SHR    $dst.hi,$cnt" %}
9455  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9456  ins_encode( move_long_small_shift(dst,cnt) );
9457  ins_pipe( ialu_reg_long );
9458%}
9459
9460// Shift Right Long by 32-63
9461instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9462  match(Set dst (URShiftL dst cnt));
9463  effect(KILL cr);
9464  ins_cost(300);
9465  format %{ "MOV    $dst.lo,$dst.hi\n"
9466          "\tSHR    $dst.lo,$cnt-32\n"
9467          "\tXOR    $dst.hi,$dst.hi" %}
9468  opcode(0xC1, 0x5);  /* C1 /5 ib */
9469  ins_encode( move_long_big_shift_clr(dst,cnt) );
9470  ins_pipe( ialu_reg_long );
9471%}
9472
9473// Shift Right Long by variable
9474instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9475  match(Set dst (URShiftL dst shift));
9476  effect(KILL cr);
9477  ins_cost(600);
9478  size(17);
9479  format %{ "TEST   $shift,32\n\t"
9480            "JEQ,s  small\n\t"
9481            "MOV    $dst.lo,$dst.hi\n\t"
9482            "XOR    $dst.hi,$dst.hi\n"
9483    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9484            "SHR    $dst.hi,$shift" %}
9485  ins_encode( shift_right_long( dst, shift ) );
9486  ins_pipe( pipe_slow );
9487%}
9488
9489// Shift Right Long by 1-31
9490instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9491  match(Set dst (RShiftL dst cnt));
9492  effect(KILL cr);
9493  ins_cost(200);
9494  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9495            "SAR    $dst.hi,$cnt" %}
9496  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9497  ins_encode( move_long_small_shift(dst,cnt) );
9498  ins_pipe( ialu_reg_long );
9499%}
9500
9501// Shift Right Long by 32-63
9502instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9503  match(Set dst (RShiftL dst cnt));
9504  effect(KILL cr);
9505  ins_cost(300);
9506  format %{ "MOV    $dst.lo,$dst.hi\n"
9507          "\tSAR    $dst.lo,$cnt-32\n"
9508          "\tSAR    $dst.hi,31" %}
9509  opcode(0xC1, 0x7);  /* C1 /7 ib */
9510  ins_encode( move_long_big_shift_sign(dst,cnt) );
9511  ins_pipe( ialu_reg_long );
9512%}
9513
9514// Shift Right arithmetic Long by variable
9515instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9516  match(Set dst (RShiftL dst shift));
9517  effect(KILL cr);
9518  ins_cost(600);
9519  size(18);
9520  format %{ "TEST   $shift,32\n\t"
9521            "JEQ,s  small\n\t"
9522            "MOV    $dst.lo,$dst.hi\n\t"
9523            "SAR    $dst.hi,31\n"
9524    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9525            "SAR    $dst.hi,$shift" %}
9526  ins_encode( shift_right_arith_long( dst, shift ) );
9527  ins_pipe( pipe_slow );
9528%}
9529
9530
9531//----------Double Instructions------------------------------------------------
9532// Double Math
9533
9534// Compare & branch
9535
9536// P6 version of float compare, sets condition codes in EFLAGS
9537instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9538  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9539  match(Set cr (CmpD src1 src2));
9540  effect(KILL rax);
9541  ins_cost(150);
9542  format %{ "FLD    $src1\n\t"
9543            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9544            "JNP    exit\n\t"
9545            "MOV    ah,1       // saw a NaN, set CF\n\t"
9546            "SAHF\n"
9547     "exit:\tNOP               // avoid branch to branch" %}
9548  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9549  ins_encode( Push_Reg_DPR(src1),
9550              OpcP, RegOpc(src2),
9551              cmpF_P6_fixup );
9552  ins_pipe( pipe_slow );
9553%}
9554
9555instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9556  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9557  match(Set cr (CmpD src1 src2));
9558  ins_cost(150);
9559  format %{ "FLD    $src1\n\t"
9560            "FUCOMIP ST,$src2  // P6 instruction" %}
9561  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9562  ins_encode( Push_Reg_DPR(src1),
9563              OpcP, RegOpc(src2));
9564  ins_pipe( pipe_slow );
9565%}
9566
9567// Compare & branch
9568instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9569  predicate(UseSSE<=1);
9570  match(Set cr (CmpD src1 src2));
9571  effect(KILL rax);
9572  ins_cost(200);
9573  format %{ "FLD    $src1\n\t"
9574            "FCOMp  $src2\n\t"
9575            "FNSTSW AX\n\t"
9576            "TEST   AX,0x400\n\t"
9577            "JZ,s   flags\n\t"
9578            "MOV    AH,1\t# unordered treat as LT\n"
9579    "flags:\tSAHF" %}
9580  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9581  ins_encode( Push_Reg_DPR(src1),
9582              OpcP, RegOpc(src2),
9583              fpu_flags);
9584  ins_pipe( pipe_slow );
9585%}
9586
9587// Compare vs zero into -1,0,1
9588instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9589  predicate(UseSSE<=1);
9590  match(Set dst (CmpD3 src1 zero));
9591  effect(KILL cr, KILL rax);
9592  ins_cost(280);
9593  format %{ "FTSTD  $dst,$src1" %}
9594  opcode(0xE4, 0xD9);
9595  ins_encode( Push_Reg_DPR(src1),
9596              OpcS, OpcP, PopFPU,
9597              CmpF_Result(dst));
9598  ins_pipe( pipe_slow );
9599%}
9600
9601// Compare into -1,0,1
9602instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9603  predicate(UseSSE<=1);
9604  match(Set dst (CmpD3 src1 src2));
9605  effect(KILL cr, KILL rax);
9606  ins_cost(300);
9607  format %{ "FCMPD  $dst,$src1,$src2" %}
9608  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9609  ins_encode( Push_Reg_DPR(src1),
9610              OpcP, RegOpc(src2),
9611              CmpF_Result(dst));
9612  ins_pipe( pipe_slow );
9613%}
9614
9615// float compare and set condition codes in EFLAGS by XMM regs
9616instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9617  predicate(UseSSE>=2);
9618  match(Set cr (CmpD src1 src2));
9619  ins_cost(145);
9620  format %{ "UCOMISD $src1,$src2\n\t"
9621            "JNP,s   exit\n\t"
9622            "PUSHF\t# saw NaN, set CF\n\t"
9623            "AND     [rsp], #0xffffff2b\n\t"
9624            "POPF\n"
9625    "exit:" %}
9626  ins_encode %{
9627    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9628    emit_cmpfp_fixup(_masm);
9629  %}
9630  ins_pipe( pipe_slow );
9631%}
9632
9633instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9634  predicate(UseSSE>=2);
9635  match(Set cr (CmpD src1 src2));
9636  ins_cost(100);
9637  format %{ "UCOMISD $src1,$src2" %}
9638  ins_encode %{
9639    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9640  %}
9641  ins_pipe( pipe_slow );
9642%}
9643
9644// float compare and set condition codes in EFLAGS by XMM regs
9645instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9646  predicate(UseSSE>=2);
9647  match(Set cr (CmpD src1 (LoadD src2)));
9648  ins_cost(145);
9649  format %{ "UCOMISD $src1,$src2\n\t"
9650            "JNP,s   exit\n\t"
9651            "PUSHF\t# saw NaN, set CF\n\t"
9652            "AND     [rsp], #0xffffff2b\n\t"
9653            "POPF\n"
9654    "exit:" %}
9655  ins_encode %{
9656    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9657    emit_cmpfp_fixup(_masm);
9658  %}
9659  ins_pipe( pipe_slow );
9660%}
9661
9662instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9663  predicate(UseSSE>=2);
9664  match(Set cr (CmpD src1 (LoadD src2)));
9665  ins_cost(100);
9666  format %{ "UCOMISD $src1,$src2" %}
9667  ins_encode %{
9668    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9669  %}
9670  ins_pipe( pipe_slow );
9671%}
9672
9673// Compare into -1,0,1 in XMM
9674instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9675  predicate(UseSSE>=2);
9676  match(Set dst (CmpD3 src1 src2));
9677  effect(KILL cr);
9678  ins_cost(255);
9679  format %{ "UCOMISD $src1, $src2\n\t"
9680            "MOV     $dst, #-1\n\t"
9681            "JP,s    done\n\t"
9682            "JB,s    done\n\t"
9683            "SETNE   $dst\n\t"
9684            "MOVZB   $dst, $dst\n"
9685    "done:" %}
9686  ins_encode %{
9687    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9688    emit_cmpfp3(_masm, $dst$$Register);
9689  %}
9690  ins_pipe( pipe_slow );
9691%}
9692
9693// Compare into -1,0,1 in XMM and memory
9694instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9695  predicate(UseSSE>=2);
9696  match(Set dst (CmpD3 src1 (LoadD src2)));
9697  effect(KILL cr);
9698  ins_cost(275);
9699  format %{ "UCOMISD $src1, $src2\n\t"
9700            "MOV     $dst, #-1\n\t"
9701            "JP,s    done\n\t"
9702            "JB,s    done\n\t"
9703            "SETNE   $dst\n\t"
9704            "MOVZB   $dst, $dst\n"
9705    "done:" %}
9706  ins_encode %{
9707    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9708    emit_cmpfp3(_masm, $dst$$Register);
9709  %}
9710  ins_pipe( pipe_slow );
9711%}
9712
9713
9714instruct subDPR_reg(regDPR dst, regDPR src) %{
9715  predicate (UseSSE <=1);
9716  match(Set dst (SubD dst src));
9717
9718  format %{ "FLD    $src\n\t"
9719            "DSUBp  $dst,ST" %}
9720  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9721  ins_cost(150);
9722  ins_encode( Push_Reg_DPR(src),
9723              OpcP, RegOpc(dst) );
9724  ins_pipe( fpu_reg_reg );
9725%}
9726
9727instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9728  predicate (UseSSE <=1);
9729  match(Set dst (RoundDouble (SubD src1 src2)));
9730  ins_cost(250);
9731
9732  format %{ "FLD    $src2\n\t"
9733            "DSUB   ST,$src1\n\t"
9734            "FSTP_D $dst\t# D-round" %}
9735  opcode(0xD8, 0x5);
9736  ins_encode( Push_Reg_DPR(src2),
9737              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9738  ins_pipe( fpu_mem_reg_reg );
9739%}
9740
9741
9742instruct subDPR_reg_mem(regDPR dst, memory src) %{
9743  predicate (UseSSE <=1);
9744  match(Set dst (SubD dst (LoadD src)));
9745  ins_cost(150);
9746
9747  format %{ "FLD    $src\n\t"
9748            "DSUBp  $dst,ST" %}
9749  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9750  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9751              OpcP, RegOpc(dst) );
9752  ins_pipe( fpu_reg_mem );
9753%}
9754
9755instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9756  predicate (UseSSE<=1);
9757  match(Set dst (AbsD src));
9758  ins_cost(100);
9759  format %{ "FABS" %}
9760  opcode(0xE1, 0xD9);
9761  ins_encode( OpcS, OpcP );
9762  ins_pipe( fpu_reg_reg );
9763%}
9764
9765instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9766  predicate(UseSSE<=1);
9767  match(Set dst (NegD src));
9768  ins_cost(100);
9769  format %{ "FCHS" %}
9770  opcode(0xE0, 0xD9);
9771  ins_encode( OpcS, OpcP );
9772  ins_pipe( fpu_reg_reg );
9773%}
9774
9775instruct addDPR_reg(regDPR dst, regDPR src) %{
9776  predicate(UseSSE<=1);
9777  match(Set dst (AddD dst src));
9778  format %{ "FLD    $src\n\t"
9779            "DADD   $dst,ST" %}
9780  size(4);
9781  ins_cost(150);
9782  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9783  ins_encode( Push_Reg_DPR(src),
9784              OpcP, RegOpc(dst) );
9785  ins_pipe( fpu_reg_reg );
9786%}
9787
9788
9789instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9790  predicate(UseSSE<=1);
9791  match(Set dst (RoundDouble (AddD src1 src2)));
9792  ins_cost(250);
9793
9794  format %{ "FLD    $src2\n\t"
9795            "DADD   ST,$src1\n\t"
9796            "FSTP_D $dst\t# D-round" %}
9797  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9798  ins_encode( Push_Reg_DPR(src2),
9799              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9800  ins_pipe( fpu_mem_reg_reg );
9801%}
9802
9803
9804instruct addDPR_reg_mem(regDPR dst, memory src) %{
9805  predicate(UseSSE<=1);
9806  match(Set dst (AddD dst (LoadD src)));
9807  ins_cost(150);
9808
9809  format %{ "FLD    $src\n\t"
9810            "DADDp  $dst,ST" %}
9811  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9812  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9813              OpcP, RegOpc(dst) );
9814  ins_pipe( fpu_reg_mem );
9815%}
9816
9817// add-to-memory
9818instruct addDPR_mem_reg(memory dst, regDPR src) %{
9819  predicate(UseSSE<=1);
9820  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9821  ins_cost(150);
9822
9823  format %{ "FLD_D  $dst\n\t"
9824            "DADD   ST,$src\n\t"
9825            "FST_D  $dst" %}
9826  opcode(0xDD, 0x0);
9827  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9828              Opcode(0xD8), RegOpc(src),
9829              set_instruction_start,
9830              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9831  ins_pipe( fpu_reg_mem );
9832%}
9833
9834instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9835  predicate(UseSSE<=1);
9836  match(Set dst (AddD dst con));
9837  ins_cost(125);
9838  format %{ "FLD1\n\t"
9839            "DADDp  $dst,ST" %}
9840  ins_encode %{
9841    __ fld1();
9842    __ faddp($dst$$reg);
9843  %}
9844  ins_pipe(fpu_reg);
9845%}
9846
9847instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9848  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9849  match(Set dst (AddD dst con));
9850  ins_cost(200);
9851  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9852            "DADDp  $dst,ST" %}
9853  ins_encode %{
9854    __ fld_d($constantaddress($con));
9855    __ faddp($dst$$reg);
9856  %}
9857  ins_pipe(fpu_reg_mem);
9858%}
9859
9860instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9861  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9862  match(Set dst (RoundDouble (AddD src con)));
9863  ins_cost(200);
9864  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9865            "DADD   ST,$src\n\t"
9866            "FSTP_D $dst\t# D-round" %}
9867  ins_encode %{
9868    __ fld_d($constantaddress($con));
9869    __ fadd($src$$reg);
9870    __ fstp_d(Address(rsp, $dst$$disp));
9871  %}
9872  ins_pipe(fpu_mem_reg_con);
9873%}
9874
9875instruct mulDPR_reg(regDPR dst, regDPR src) %{
9876  predicate(UseSSE<=1);
9877  match(Set dst (MulD dst src));
9878  format %{ "FLD    $src\n\t"
9879            "DMULp  $dst,ST" %}
9880  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9881  ins_cost(150);
9882  ins_encode( Push_Reg_DPR(src),
9883              OpcP, RegOpc(dst) );
9884  ins_pipe( fpu_reg_reg );
9885%}
9886
9887// Strict FP instruction biases argument before multiply then
9888// biases result to avoid double rounding of subnormals.
9889//
9890// scale arg1 by multiplying arg1 by 2^(-15360)
9891// load arg2
9892// multiply scaled arg1 by arg2
9893// rescale product by 2^(15360)
9894//
9895instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9896  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9897  match(Set dst (MulD dst src));
9898  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9899
9900  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9901            "DMULp  $dst,ST\n\t"
9902            "FLD    $src\n\t"
9903            "DMULp  $dst,ST\n\t"
9904            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9905            "DMULp  $dst,ST\n\t" %}
9906  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9907  ins_encode( strictfp_bias1(dst),
9908              Push_Reg_DPR(src),
9909              OpcP, RegOpc(dst),
9910              strictfp_bias2(dst) );
9911  ins_pipe( fpu_reg_reg );
9912%}
9913
9914instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9915  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9916  match(Set dst (MulD dst con));
9917  ins_cost(200);
9918  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9919            "DMULp  $dst,ST" %}
9920  ins_encode %{
9921    __ fld_d($constantaddress($con));
9922    __ fmulp($dst$$reg);
9923  %}
9924  ins_pipe(fpu_reg_mem);
9925%}
9926
9927
9928instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9929  predicate( UseSSE<=1 );
9930  match(Set dst (MulD dst (LoadD src)));
9931  ins_cost(200);
9932  format %{ "FLD_D  $src\n\t"
9933            "DMULp  $dst,ST" %}
9934  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9935  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9936              OpcP, RegOpc(dst) );
9937  ins_pipe( fpu_reg_mem );
9938%}
9939
9940//
9941// Cisc-alternate to reg-reg multiply
9942instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9943  predicate( UseSSE<=1 );
9944  match(Set dst (MulD src (LoadD mem)));
9945  ins_cost(250);
9946  format %{ "FLD_D  $mem\n\t"
9947            "DMUL   ST,$src\n\t"
9948            "FSTP_D $dst" %}
9949  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9950  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9951              OpcReg_FPR(src),
9952              Pop_Reg_DPR(dst) );
9953  ins_pipe( fpu_reg_reg_mem );
9954%}
9955
9956
9957// MACRO3 -- addDPR a mulDPR
9958// This instruction is a '2-address' instruction in that the result goes
9959// back to src2.  This eliminates a move from the macro; possibly the
9960// register allocator will have to add it back (and maybe not).
9961instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9962  predicate( UseSSE<=1 );
9963  match(Set src2 (AddD (MulD src0 src1) src2));
9964  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9965            "DMUL   ST,$src1\n\t"
9966            "DADDp  $src2,ST" %}
9967  ins_cost(250);
9968  opcode(0xDD); /* LoadD DD /0 */
9969  ins_encode( Push_Reg_FPR(src0),
9970              FMul_ST_reg(src1),
9971              FAddP_reg_ST(src2) );
9972  ins_pipe( fpu_reg_reg_reg );
9973%}
9974
9975
9976// MACRO3 -- subDPR a mulDPR
9977instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9978  predicate( UseSSE<=1 );
9979  match(Set src2 (SubD (MulD src0 src1) src2));
9980  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9981            "DMUL   ST,$src1\n\t"
9982            "DSUBRp $src2,ST" %}
9983  ins_cost(250);
9984  ins_encode( Push_Reg_FPR(src0),
9985              FMul_ST_reg(src1),
9986              Opcode(0xDE), Opc_plus(0xE0,src2));
9987  ins_pipe( fpu_reg_reg_reg );
9988%}
9989
9990
9991instruct divDPR_reg(regDPR dst, regDPR src) %{
9992  predicate( UseSSE<=1 );
9993  match(Set dst (DivD dst src));
9994
9995  format %{ "FLD    $src\n\t"
9996            "FDIVp  $dst,ST" %}
9997  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9998  ins_cost(150);
9999  ins_encode( Push_Reg_DPR(src),
10000              OpcP, RegOpc(dst) );
10001  ins_pipe( fpu_reg_reg );
10002%}
10003
10004// Strict FP instruction biases argument before division then
10005// biases result, to avoid double rounding of subnormals.
10006//
10007// scale dividend by multiplying dividend by 2^(-15360)
10008// load divisor
10009// divide scaled dividend by divisor
10010// rescale quotient by 2^(15360)
10011//
10012instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10013  predicate (UseSSE<=1);
10014  match(Set dst (DivD dst src));
10015  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10016  ins_cost(01);
10017
10018  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10019            "DMULp  $dst,ST\n\t"
10020            "FLD    $src\n\t"
10021            "FDIVp  $dst,ST\n\t"
10022            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10023            "DMULp  $dst,ST\n\t" %}
10024  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10025  ins_encode( strictfp_bias1(dst),
10026              Push_Reg_DPR(src),
10027              OpcP, RegOpc(dst),
10028              strictfp_bias2(dst) );
10029  ins_pipe( fpu_reg_reg );
10030%}
10031
10032instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10033  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10034  match(Set dst (RoundDouble (DivD src1 src2)));
10035
10036  format %{ "FLD    $src1\n\t"
10037            "FDIV   ST,$src2\n\t"
10038            "FSTP_D $dst\t# D-round" %}
10039  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10040  ins_encode( Push_Reg_DPR(src1),
10041              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10042  ins_pipe( fpu_mem_reg_reg );
10043%}
10044
10045
10046instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10047  predicate(UseSSE<=1);
10048  match(Set dst (ModD dst src));
10049  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10050
10051  format %{ "DMOD   $dst,$src" %}
10052  ins_cost(250);
10053  ins_encode(Push_Reg_Mod_DPR(dst, src),
10054              emitModDPR(),
10055              Push_Result_Mod_DPR(src),
10056              Pop_Reg_DPR(dst));
10057  ins_pipe( pipe_slow );
10058%}
10059
10060instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10061  predicate(UseSSE>=2);
10062  match(Set dst (ModD src0 src1));
10063  effect(KILL rax, KILL cr);
10064
10065  format %{ "SUB    ESP,8\t # DMOD\n"
10066          "\tMOVSD  [ESP+0],$src1\n"
10067          "\tFLD_D  [ESP+0]\n"
10068          "\tMOVSD  [ESP+0],$src0\n"
10069          "\tFLD_D  [ESP+0]\n"
10070     "loop:\tFPREM\n"
10071          "\tFWAIT\n"
10072          "\tFNSTSW AX\n"
10073          "\tSAHF\n"
10074          "\tJP     loop\n"
10075          "\tFSTP_D [ESP+0]\n"
10076          "\tMOVSD  $dst,[ESP+0]\n"
10077          "\tADD    ESP,8\n"
10078          "\tFSTP   ST0\t # Restore FPU Stack"
10079    %}
10080  ins_cost(250);
10081  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10082  ins_pipe( pipe_slow );
10083%}
10084
10085instruct atanDPR_reg(regDPR dst, regDPR src) %{
10086  predicate (UseSSE<=1);
10087  match(Set dst(AtanD dst src));
10088  format %{ "DATA   $dst,$src" %}
10089  opcode(0xD9, 0xF3);
10090  ins_encode( Push_Reg_DPR(src),
10091              OpcP, OpcS, RegOpc(dst) );
10092  ins_pipe( pipe_slow );
10093%}
10094
10095instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10096  predicate (UseSSE>=2);
10097  match(Set dst(AtanD dst src));
10098  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10099  format %{ "DATA   $dst,$src" %}
10100  opcode(0xD9, 0xF3);
10101  ins_encode( Push_SrcD(src),
10102              OpcP, OpcS, Push_ResultD(dst) );
10103  ins_pipe( pipe_slow );
10104%}
10105
10106instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10107  predicate (UseSSE<=1);
10108  match(Set dst (SqrtD src));
10109  format %{ "DSQRT  $dst,$src" %}
10110  opcode(0xFA, 0xD9);
10111  ins_encode( Push_Reg_DPR(src),
10112              OpcS, OpcP, Pop_Reg_DPR(dst) );
10113  ins_pipe( pipe_slow );
10114%}
10115
10116//-------------Float Instructions-------------------------------
10117// Float Math
10118
10119// Code for float compare:
10120//     fcompp();
10121//     fwait(); fnstsw_ax();
10122//     sahf();
10123//     movl(dst, unordered_result);
10124//     jcc(Assembler::parity, exit);
10125//     movl(dst, less_result);
10126//     jcc(Assembler::below, exit);
10127//     movl(dst, equal_result);
10128//     jcc(Assembler::equal, exit);
10129//     movl(dst, greater_result);
10130//   exit:
10131
10132// P6 version of float compare, sets condition codes in EFLAGS
10133instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10134  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10135  match(Set cr (CmpF src1 src2));
10136  effect(KILL rax);
10137  ins_cost(150);
10138  format %{ "FLD    $src1\n\t"
10139            "FUCOMIP ST,$src2  // P6 instruction\n\t"
10140            "JNP    exit\n\t"
10141            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10142            "SAHF\n"
10143     "exit:\tNOP               // avoid branch to branch" %}
10144  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10145  ins_encode( Push_Reg_DPR(src1),
10146              OpcP, RegOpc(src2),
10147              cmpF_P6_fixup );
10148  ins_pipe( pipe_slow );
10149%}
10150
10151instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10152  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10153  match(Set cr (CmpF src1 src2));
10154  ins_cost(100);
10155  format %{ "FLD    $src1\n\t"
10156            "FUCOMIP ST,$src2  // P6 instruction" %}
10157  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10158  ins_encode( Push_Reg_DPR(src1),
10159              OpcP, RegOpc(src2));
10160  ins_pipe( pipe_slow );
10161%}
10162
10163
10164// Compare & branch
10165instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10166  predicate(UseSSE == 0);
10167  match(Set cr (CmpF src1 src2));
10168  effect(KILL rax);
10169  ins_cost(200);
10170  format %{ "FLD    $src1\n\t"
10171            "FCOMp  $src2\n\t"
10172            "FNSTSW AX\n\t"
10173            "TEST   AX,0x400\n\t"
10174            "JZ,s   flags\n\t"
10175            "MOV    AH,1\t# unordered treat as LT\n"
10176    "flags:\tSAHF" %}
10177  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10178  ins_encode( Push_Reg_DPR(src1),
10179              OpcP, RegOpc(src2),
10180              fpu_flags);
10181  ins_pipe( pipe_slow );
10182%}
10183
10184// Compare vs zero into -1,0,1
10185instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10186  predicate(UseSSE == 0);
10187  match(Set dst (CmpF3 src1 zero));
10188  effect(KILL cr, KILL rax);
10189  ins_cost(280);
10190  format %{ "FTSTF  $dst,$src1" %}
10191  opcode(0xE4, 0xD9);
10192  ins_encode( Push_Reg_DPR(src1),
10193              OpcS, OpcP, PopFPU,
10194              CmpF_Result(dst));
10195  ins_pipe( pipe_slow );
10196%}
10197
10198// Compare into -1,0,1
10199instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10200  predicate(UseSSE == 0);
10201  match(Set dst (CmpF3 src1 src2));
10202  effect(KILL cr, KILL rax);
10203  ins_cost(300);
10204  format %{ "FCMPF  $dst,$src1,$src2" %}
10205  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10206  ins_encode( Push_Reg_DPR(src1),
10207              OpcP, RegOpc(src2),
10208              CmpF_Result(dst));
10209  ins_pipe( pipe_slow );
10210%}
10211
10212// float compare and set condition codes in EFLAGS by XMM regs
10213instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10214  predicate(UseSSE>=1);
10215  match(Set cr (CmpF src1 src2));
10216  ins_cost(145);
10217  format %{ "UCOMISS $src1,$src2\n\t"
10218            "JNP,s   exit\n\t"
10219            "PUSHF\t# saw NaN, set CF\n\t"
10220            "AND     [rsp], #0xffffff2b\n\t"
10221            "POPF\n"
10222    "exit:" %}
10223  ins_encode %{
10224    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10225    emit_cmpfp_fixup(_masm);
10226  %}
10227  ins_pipe( pipe_slow );
10228%}
10229
10230instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10231  predicate(UseSSE>=1);
10232  match(Set cr (CmpF src1 src2));
10233  ins_cost(100);
10234  format %{ "UCOMISS $src1,$src2" %}
10235  ins_encode %{
10236    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10237  %}
10238  ins_pipe( pipe_slow );
10239%}
10240
10241// float compare and set condition codes in EFLAGS by XMM regs
10242instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10243  predicate(UseSSE>=1);
10244  match(Set cr (CmpF src1 (LoadF src2)));
10245  ins_cost(165);
10246  format %{ "UCOMISS $src1,$src2\n\t"
10247            "JNP,s   exit\n\t"
10248            "PUSHF\t# saw NaN, set CF\n\t"
10249            "AND     [rsp], #0xffffff2b\n\t"
10250            "POPF\n"
10251    "exit:" %}
10252  ins_encode %{
10253    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10254    emit_cmpfp_fixup(_masm);
10255  %}
10256  ins_pipe( pipe_slow );
10257%}
10258
10259instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10260  predicate(UseSSE>=1);
10261  match(Set cr (CmpF src1 (LoadF src2)));
10262  ins_cost(100);
10263  format %{ "UCOMISS $src1,$src2" %}
10264  ins_encode %{
10265    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10266  %}
10267  ins_pipe( pipe_slow );
10268%}
10269
10270// Compare into -1,0,1 in XMM
10271instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10272  predicate(UseSSE>=1);
10273  match(Set dst (CmpF3 src1 src2));
10274  effect(KILL cr);
10275  ins_cost(255);
10276  format %{ "UCOMISS $src1, $src2\n\t"
10277            "MOV     $dst, #-1\n\t"
10278            "JP,s    done\n\t"
10279            "JB,s    done\n\t"
10280            "SETNE   $dst\n\t"
10281            "MOVZB   $dst, $dst\n"
10282    "done:" %}
10283  ins_encode %{
10284    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10285    emit_cmpfp3(_masm, $dst$$Register);
10286  %}
10287  ins_pipe( pipe_slow );
10288%}
10289
10290// Compare into -1,0,1 in XMM and memory
10291instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10292  predicate(UseSSE>=1);
10293  match(Set dst (CmpF3 src1 (LoadF src2)));
10294  effect(KILL cr);
10295  ins_cost(275);
10296  format %{ "UCOMISS $src1, $src2\n\t"
10297            "MOV     $dst, #-1\n\t"
10298            "JP,s    done\n\t"
10299            "JB,s    done\n\t"
10300            "SETNE   $dst\n\t"
10301            "MOVZB   $dst, $dst\n"
10302    "done:" %}
10303  ins_encode %{
10304    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10305    emit_cmpfp3(_masm, $dst$$Register);
10306  %}
10307  ins_pipe( pipe_slow );
10308%}
10309
10310// Spill to obtain 24-bit precision
10311instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10312  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10313  match(Set dst (SubF src1 src2));
10314
10315  format %{ "FSUB   $dst,$src1 - $src2" %}
10316  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10317  ins_encode( Push_Reg_FPR(src1),
10318              OpcReg_FPR(src2),
10319              Pop_Mem_FPR(dst) );
10320  ins_pipe( fpu_mem_reg_reg );
10321%}
10322//
10323// This instruction does not round to 24-bits
10324instruct subFPR_reg(regFPR dst, regFPR src) %{
10325  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10326  match(Set dst (SubF dst src));
10327
10328  format %{ "FSUB   $dst,$src" %}
10329  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10330  ins_encode( Push_Reg_FPR(src),
10331              OpcP, RegOpc(dst) );
10332  ins_pipe( fpu_reg_reg );
10333%}
10334
10335// Spill to obtain 24-bit precision
10336instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10337  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10338  match(Set dst (AddF src1 src2));
10339
10340  format %{ "FADD   $dst,$src1,$src2" %}
10341  opcode(0xD8, 0x0); /* D8 C0+i */
10342  ins_encode( Push_Reg_FPR(src2),
10343              OpcReg_FPR(src1),
10344              Pop_Mem_FPR(dst) );
10345  ins_pipe( fpu_mem_reg_reg );
10346%}
10347//
10348// This instruction does not round to 24-bits
10349instruct addFPR_reg(regFPR dst, regFPR src) %{
10350  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10351  match(Set dst (AddF dst src));
10352
10353  format %{ "FLD    $src\n\t"
10354            "FADDp  $dst,ST" %}
10355  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10356  ins_encode( Push_Reg_FPR(src),
10357              OpcP, RegOpc(dst) );
10358  ins_pipe( fpu_reg_reg );
10359%}
10360
10361instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10362  predicate(UseSSE==0);
10363  match(Set dst (AbsF src));
10364  ins_cost(100);
10365  format %{ "FABS" %}
10366  opcode(0xE1, 0xD9);
10367  ins_encode( OpcS, OpcP );
10368  ins_pipe( fpu_reg_reg );
10369%}
10370
10371instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10372  predicate(UseSSE==0);
10373  match(Set dst (NegF src));
10374  ins_cost(100);
10375  format %{ "FCHS" %}
10376  opcode(0xE0, 0xD9);
10377  ins_encode( OpcS, OpcP );
10378  ins_pipe( fpu_reg_reg );
10379%}
10380
10381// Cisc-alternate to addFPR_reg
10382// Spill to obtain 24-bit precision
10383instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10384  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385  match(Set dst (AddF src1 (LoadF src2)));
10386
10387  format %{ "FLD    $src2\n\t"
10388            "FADD   ST,$src1\n\t"
10389            "FSTP_S $dst" %}
10390  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10391  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10392              OpcReg_FPR(src1),
10393              Pop_Mem_FPR(dst) );
10394  ins_pipe( fpu_mem_reg_mem );
10395%}
10396//
10397// Cisc-alternate to addFPR_reg
10398// This instruction does not round to 24-bits
10399instruct addFPR_reg_mem(regFPR dst, memory src) %{
10400  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10401  match(Set dst (AddF dst (LoadF src)));
10402
10403  format %{ "FADD   $dst,$src" %}
10404  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10405  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10406              OpcP, RegOpc(dst) );
10407  ins_pipe( fpu_reg_mem );
10408%}
10409
10410// // Following two instructions for _222_mpegaudio
10411// Spill to obtain 24-bit precision
10412instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10413  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10414  match(Set dst (AddF src1 src2));
10415
10416  format %{ "FADD   $dst,$src1,$src2" %}
10417  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10418  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10419              OpcReg_FPR(src2),
10420              Pop_Mem_FPR(dst) );
10421  ins_pipe( fpu_mem_reg_mem );
10422%}
10423
10424// Cisc-spill variant
10425// Spill to obtain 24-bit precision
10426instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10427  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10428  match(Set dst (AddF src1 (LoadF src2)));
10429
10430  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10431  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10432  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10433              set_instruction_start,
10434              OpcP, RMopc_Mem(secondary,src1),
10435              Pop_Mem_FPR(dst) );
10436  ins_pipe( fpu_mem_mem_mem );
10437%}
10438
10439// Spill to obtain 24-bit precision
10440instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10441  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10442  match(Set dst (AddF src1 src2));
10443
10444  format %{ "FADD   $dst,$src1,$src2" %}
10445  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10446  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10447              set_instruction_start,
10448              OpcP, RMopc_Mem(secondary,src1),
10449              Pop_Mem_FPR(dst) );
10450  ins_pipe( fpu_mem_mem_mem );
10451%}
10452
10453
10454// Spill to obtain 24-bit precision
10455instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10456  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10457  match(Set dst (AddF src con));
10458  format %{ "FLD    $src\n\t"
10459            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10460            "FSTP_S $dst"  %}
10461  ins_encode %{
10462    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10463    __ fadd_s($constantaddress($con));
10464    __ fstp_s(Address(rsp, $dst$$disp));
10465  %}
10466  ins_pipe(fpu_mem_reg_con);
10467%}
10468//
10469// This instruction does not round to 24-bits
10470instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10471  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10472  match(Set dst (AddF src con));
10473  format %{ "FLD    $src\n\t"
10474            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10475            "FSTP   $dst"  %}
10476  ins_encode %{
10477    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10478    __ fadd_s($constantaddress($con));
10479    __ fstp_d($dst$$reg);
10480  %}
10481  ins_pipe(fpu_reg_reg_con);
10482%}
10483
10484// Spill to obtain 24-bit precision
10485instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10486  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10487  match(Set dst (MulF src1 src2));
10488
10489  format %{ "FLD    $src1\n\t"
10490            "FMUL   $src2\n\t"
10491            "FSTP_S $dst"  %}
10492  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10493  ins_encode( Push_Reg_FPR(src1),
10494              OpcReg_FPR(src2),
10495              Pop_Mem_FPR(dst) );
10496  ins_pipe( fpu_mem_reg_reg );
10497%}
10498//
10499// This instruction does not round to 24-bits
10500instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10501  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10502  match(Set dst (MulF src1 src2));
10503
10504  format %{ "FLD    $src1\n\t"
10505            "FMUL   $src2\n\t"
10506            "FSTP_S $dst"  %}
10507  opcode(0xD8, 0x1); /* D8 C8+i */
10508  ins_encode( Push_Reg_FPR(src2),
10509              OpcReg_FPR(src1),
10510              Pop_Reg_FPR(dst) );
10511  ins_pipe( fpu_reg_reg_reg );
10512%}
10513
10514
10515// Spill to obtain 24-bit precision
10516// Cisc-alternate to reg-reg multiply
10517instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10518  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10519  match(Set dst (MulF src1 (LoadF src2)));
10520
10521  format %{ "FLD_S  $src2\n\t"
10522            "FMUL   $src1\n\t"
10523            "FSTP_S $dst"  %}
10524  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10525  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10526              OpcReg_FPR(src1),
10527              Pop_Mem_FPR(dst) );
10528  ins_pipe( fpu_mem_reg_mem );
10529%}
10530//
10531// This instruction does not round to 24-bits
10532// Cisc-alternate to reg-reg multiply
10533instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10534  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10535  match(Set dst (MulF src1 (LoadF src2)));
10536
10537  format %{ "FMUL   $dst,$src1,$src2" %}
10538  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10539  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10540              OpcReg_FPR(src1),
10541              Pop_Reg_FPR(dst) );
10542  ins_pipe( fpu_reg_reg_mem );
10543%}
10544
10545// Spill to obtain 24-bit precision
10546instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10547  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10548  match(Set dst (MulF src1 src2));
10549
10550  format %{ "FMUL   $dst,$src1,$src2" %}
10551  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10552  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10553              set_instruction_start,
10554              OpcP, RMopc_Mem(secondary,src1),
10555              Pop_Mem_FPR(dst) );
10556  ins_pipe( fpu_mem_mem_mem );
10557%}
10558
10559// Spill to obtain 24-bit precision
10560instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10561  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10562  match(Set dst (MulF src con));
10563
10564  format %{ "FLD    $src\n\t"
10565            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10566            "FSTP_S $dst"  %}
10567  ins_encode %{
10568    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10569    __ fmul_s($constantaddress($con));
10570    __ fstp_s(Address(rsp, $dst$$disp));
10571  %}
10572  ins_pipe(fpu_mem_reg_con);
10573%}
10574//
10575// This instruction does not round to 24-bits
10576instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10577  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578  match(Set dst (MulF src con));
10579
10580  format %{ "FLD    $src\n\t"
10581            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10582            "FSTP   $dst"  %}
10583  ins_encode %{
10584    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10585    __ fmul_s($constantaddress($con));
10586    __ fstp_d($dst$$reg);
10587  %}
10588  ins_pipe(fpu_reg_reg_con);
10589%}
10590
10591
10592//
10593// MACRO1 -- subsume unshared load into mulFPR
10594// This instruction does not round to 24-bits
10595instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10596  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10597  match(Set dst (MulF (LoadF mem1) src));
10598
10599  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10600            "FMUL   ST,$src\n\t"
10601            "FSTP   $dst" %}
10602  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10603  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10604              OpcReg_FPR(src),
10605              Pop_Reg_FPR(dst) );
10606  ins_pipe( fpu_reg_reg_mem );
10607%}
10608//
10609// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10610// This instruction does not round to 24-bits
10611instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10612  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10613  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10614  ins_cost(95);
10615
10616  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10617            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10618            "FADD   ST,$src2\n\t"
10619            "FSTP   $dst" %}
10620  opcode(0xD9); /* LoadF D9 /0 */
10621  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10622              FMul_ST_reg(src1),
10623              FAdd_ST_reg(src2),
10624              Pop_Reg_FPR(dst) );
10625  ins_pipe( fpu_reg_mem_reg_reg );
10626%}
10627
10628// MACRO3 -- addFPR a mulFPR
10629// This instruction does not round to 24-bits.  It is a '2-address'
10630// instruction in that the result goes back to src2.  This eliminates
10631// a move from the macro; possibly the register allocator will have
10632// to add it back (and maybe not).
10633instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10634  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10635  match(Set src2 (AddF (MulF src0 src1) src2));
10636
10637  format %{ "FLD    $src0     ===MACRO3===\n\t"
10638            "FMUL   ST,$src1\n\t"
10639            "FADDP  $src2,ST" %}
10640  opcode(0xD9); /* LoadF D9 /0 */
10641  ins_encode( Push_Reg_FPR(src0),
10642              FMul_ST_reg(src1),
10643              FAddP_reg_ST(src2) );
10644  ins_pipe( fpu_reg_reg_reg );
10645%}
10646
10647// MACRO4 -- divFPR subFPR
10648// This instruction does not round to 24-bits
10649instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10650  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10651  match(Set dst (DivF (SubF src2 src1) src3));
10652
10653  format %{ "FLD    $src2   ===MACRO4===\n\t"
10654            "FSUB   ST,$src1\n\t"
10655            "FDIV   ST,$src3\n\t"
10656            "FSTP  $dst" %}
10657  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10658  ins_encode( Push_Reg_FPR(src2),
10659              subFPR_divFPR_encode(src1,src3),
10660              Pop_Reg_FPR(dst) );
10661  ins_pipe( fpu_reg_reg_reg_reg );
10662%}
10663
10664// Spill to obtain 24-bit precision
10665instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10666  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10667  match(Set dst (DivF src1 src2));
10668
10669  format %{ "FDIV   $dst,$src1,$src2" %}
10670  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10671  ins_encode( Push_Reg_FPR(src1),
10672              OpcReg_FPR(src2),
10673              Pop_Mem_FPR(dst) );
10674  ins_pipe( fpu_mem_reg_reg );
10675%}
10676//
10677// This instruction does not round to 24-bits
10678instruct divFPR_reg(regFPR dst, regFPR src) %{
10679  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10680  match(Set dst (DivF dst src));
10681
10682  format %{ "FDIV   $dst,$src" %}
10683  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10684  ins_encode( Push_Reg_FPR(src),
10685              OpcP, RegOpc(dst) );
10686  ins_pipe( fpu_reg_reg );
10687%}
10688
10689
10690// Spill to obtain 24-bit precision
10691instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10692  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10693  match(Set dst (ModF src1 src2));
10694  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10695
10696  format %{ "FMOD   $dst,$src1,$src2" %}
10697  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10698              emitModDPR(),
10699              Push_Result_Mod_DPR(src2),
10700              Pop_Mem_FPR(dst));
10701  ins_pipe( pipe_slow );
10702%}
10703//
10704// This instruction does not round to 24-bits
10705instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10706  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10707  match(Set dst (ModF dst src));
10708  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10709
10710  format %{ "FMOD   $dst,$src" %}
10711  ins_encode(Push_Reg_Mod_DPR(dst, src),
10712              emitModDPR(),
10713              Push_Result_Mod_DPR(src),
10714              Pop_Reg_FPR(dst));
10715  ins_pipe( pipe_slow );
10716%}
10717
10718instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10719  predicate(UseSSE>=1);
10720  match(Set dst (ModF src0 src1));
10721  effect(KILL rax, KILL cr);
10722  format %{ "SUB    ESP,4\t # FMOD\n"
10723          "\tMOVSS  [ESP+0],$src1\n"
10724          "\tFLD_S  [ESP+0]\n"
10725          "\tMOVSS  [ESP+0],$src0\n"
10726          "\tFLD_S  [ESP+0]\n"
10727     "loop:\tFPREM\n"
10728          "\tFWAIT\n"
10729          "\tFNSTSW AX\n"
10730          "\tSAHF\n"
10731          "\tJP     loop\n"
10732          "\tFSTP_S [ESP+0]\n"
10733          "\tMOVSS  $dst,[ESP+0]\n"
10734          "\tADD    ESP,4\n"
10735          "\tFSTP   ST0\t # Restore FPU Stack"
10736    %}
10737  ins_cost(250);
10738  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10739  ins_pipe( pipe_slow );
10740%}
10741
10742
10743//----------Arithmetic Conversion Instructions---------------------------------
10744// The conversions operations are all Alpha sorted.  Please keep it that way!
10745
10746instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10747  predicate(UseSSE==0);
10748  match(Set dst (RoundFloat src));
10749  ins_cost(125);
10750  format %{ "FST_S  $dst,$src\t# F-round" %}
10751  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10752  ins_pipe( fpu_mem_reg );
10753%}
10754
10755instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10756  predicate(UseSSE<=1);
10757  match(Set dst (RoundDouble src));
10758  ins_cost(125);
10759  format %{ "FST_D  $dst,$src\t# D-round" %}
10760  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10761  ins_pipe( fpu_mem_reg );
10762%}
10763
10764// Force rounding to 24-bit precision and 6-bit exponent
10765instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10766  predicate(UseSSE==0);
10767  match(Set dst (ConvD2F src));
10768  format %{ "FST_S  $dst,$src\t# F-round" %}
10769  expand %{
10770    roundFloat_mem_reg(dst,src);
10771  %}
10772%}
10773
10774// Force rounding to 24-bit precision and 6-bit exponent
10775instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10776  predicate(UseSSE==1);
10777  match(Set dst (ConvD2F src));
10778  effect( KILL cr );
10779  format %{ "SUB    ESP,4\n\t"
10780            "FST_S  [ESP],$src\t# F-round\n\t"
10781            "MOVSS  $dst,[ESP]\n\t"
10782            "ADD ESP,4" %}
10783  ins_encode %{
10784    __ subptr(rsp, 4);
10785    if ($src$$reg != FPR1L_enc) {
10786      __ fld_s($src$$reg-1);
10787      __ fstp_s(Address(rsp, 0));
10788    } else {
10789      __ fst_s(Address(rsp, 0));
10790    }
10791    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10792    __ addptr(rsp, 4);
10793  %}
10794  ins_pipe( pipe_slow );
10795%}
10796
10797// Force rounding double precision to single precision
10798instruct convD2F_reg(regF dst, regD src) %{
10799  predicate(UseSSE>=2);
10800  match(Set dst (ConvD2F src));
10801  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10802  ins_encode %{
10803    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10804  %}
10805  ins_pipe( pipe_slow );
10806%}
10807
10808instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10809  predicate(UseSSE==0);
10810  match(Set dst (ConvF2D src));
10811  format %{ "FST_S  $dst,$src\t# D-round" %}
10812  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10813  ins_pipe( fpu_reg_reg );
10814%}
10815
10816instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10817  predicate(UseSSE==1);
10818  match(Set dst (ConvF2D src));
10819  format %{ "FST_D  $dst,$src\t# D-round" %}
10820  expand %{
10821    roundDouble_mem_reg(dst,src);
10822  %}
10823%}
10824
10825instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10826  predicate(UseSSE==1);
10827  match(Set dst (ConvF2D src));
10828  effect( KILL cr );
10829  format %{ "SUB    ESP,4\n\t"
10830            "MOVSS  [ESP] $src\n\t"
10831            "FLD_S  [ESP]\n\t"
10832            "ADD    ESP,4\n\t"
10833            "FSTP   $dst\t# D-round" %}
10834  ins_encode %{
10835    __ subptr(rsp, 4);
10836    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10837    __ fld_s(Address(rsp, 0));
10838    __ addptr(rsp, 4);
10839    __ fstp_d($dst$$reg);
10840  %}
10841  ins_pipe( pipe_slow );
10842%}
10843
10844instruct convF2D_reg(regD dst, regF src) %{
10845  predicate(UseSSE>=2);
10846  match(Set dst (ConvF2D src));
10847  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10848  ins_encode %{
10849    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10850  %}
10851  ins_pipe( pipe_slow );
10852%}
10853
10854// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10855instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10856  predicate(UseSSE<=1);
10857  match(Set dst (ConvD2I src));
10858  effect( KILL tmp, KILL cr );
10859  format %{ "FLD    $src\t# Convert double to int \n\t"
10860            "FLDCW  trunc mode\n\t"
10861            "SUB    ESP,4\n\t"
10862            "FISTp  [ESP + #0]\n\t"
10863            "FLDCW  std/24-bit mode\n\t"
10864            "POP    EAX\n\t"
10865            "CMP    EAX,0x80000000\n\t"
10866            "JNE,s  fast\n\t"
10867            "FLD_D  $src\n\t"
10868            "CALL   d2i_wrapper\n"
10869      "fast:" %}
10870  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10871  ins_pipe( pipe_slow );
10872%}
10873
10874// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10875instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10876  predicate(UseSSE>=2);
10877  match(Set dst (ConvD2I src));
10878  effect( KILL tmp, KILL cr );
10879  format %{ "CVTTSD2SI $dst, $src\n\t"
10880            "CMP    $dst,0x80000000\n\t"
10881            "JNE,s  fast\n\t"
10882            "SUB    ESP, 8\n\t"
10883            "MOVSD  [ESP], $src\n\t"
10884            "FLD_D  [ESP]\n\t"
10885            "ADD    ESP, 8\n\t"
10886            "CALL   d2i_wrapper\n"
10887      "fast:" %}
10888  ins_encode %{
10889    Label fast;
10890    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10891    __ cmpl($dst$$Register, 0x80000000);
10892    __ jccb(Assembler::notEqual, fast);
10893    __ subptr(rsp, 8);
10894    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10895    __ fld_d(Address(rsp, 0));
10896    __ addptr(rsp, 8);
10897    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10898    __ bind(fast);
10899  %}
10900  ins_pipe( pipe_slow );
10901%}
10902
10903instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10904  predicate(UseSSE<=1);
10905  match(Set dst (ConvD2L src));
10906  effect( KILL cr );
10907  format %{ "FLD    $src\t# Convert double to long\n\t"
10908            "FLDCW  trunc mode\n\t"
10909            "SUB    ESP,8\n\t"
10910            "FISTp  [ESP + #0]\n\t"
10911            "FLDCW  std/24-bit mode\n\t"
10912            "POP    EAX\n\t"
10913            "POP    EDX\n\t"
10914            "CMP    EDX,0x80000000\n\t"
10915            "JNE,s  fast\n\t"
10916            "TEST   EAX,EAX\n\t"
10917            "JNE,s  fast\n\t"
10918            "FLD    $src\n\t"
10919            "CALL   d2l_wrapper\n"
10920      "fast:" %}
10921  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10922  ins_pipe( pipe_slow );
10923%}
10924
10925// XMM lacks a float/double->long conversion, so use the old FPU stack.
10926instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10927  predicate (UseSSE>=2);
10928  match(Set dst (ConvD2L src));
10929  effect( KILL cr );
10930  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10931            "MOVSD  [ESP],$src\n\t"
10932            "FLD_D  [ESP]\n\t"
10933            "FLDCW  trunc mode\n\t"
10934            "FISTp  [ESP + #0]\n\t"
10935            "FLDCW  std/24-bit mode\n\t"
10936            "POP    EAX\n\t"
10937            "POP    EDX\n\t"
10938            "CMP    EDX,0x80000000\n\t"
10939            "JNE,s  fast\n\t"
10940            "TEST   EAX,EAX\n\t"
10941            "JNE,s  fast\n\t"
10942            "SUB    ESP,8\n\t"
10943            "MOVSD  [ESP],$src\n\t"
10944            "FLD_D  [ESP]\n\t"
10945            "ADD    ESP,8\n\t"
10946            "CALL   d2l_wrapper\n"
10947      "fast:" %}
10948  ins_encode %{
10949    Label fast;
10950    __ subptr(rsp, 8);
10951    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10952    __ fld_d(Address(rsp, 0));
10953    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10954    __ fistp_d(Address(rsp, 0));
10955    // Restore the rounding mode, mask the exception
10956    if (Compile::current()->in_24_bit_fp_mode()) {
10957      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10958    } else {
10959      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10960    }
10961    // Load the converted long, adjust CPU stack
10962    __ pop(rax);
10963    __ pop(rdx);
10964    __ cmpl(rdx, 0x80000000);
10965    __ jccb(Assembler::notEqual, fast);
10966    __ testl(rax, rax);
10967    __ jccb(Assembler::notEqual, fast);
10968    __ subptr(rsp, 8);
10969    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10970    __ fld_d(Address(rsp, 0));
10971    __ addptr(rsp, 8);
10972    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10973    __ bind(fast);
10974  %}
10975  ins_pipe( pipe_slow );
10976%}
10977
10978// Convert a double to an int.  Java semantics require we do complex
10979// manglations in the corner cases.  So we set the rounding mode to
10980// 'zero', store the darned double down as an int, and reset the
10981// rounding mode to 'nearest'.  The hardware stores a flag value down
10982// if we would overflow or converted a NAN; we check for this and
10983// and go the slow path if needed.
10984instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10985  predicate(UseSSE==0);
10986  match(Set dst (ConvF2I src));
10987  effect( KILL tmp, KILL cr );
10988  format %{ "FLD    $src\t# Convert float to int \n\t"
10989            "FLDCW  trunc mode\n\t"
10990            "SUB    ESP,4\n\t"
10991            "FISTp  [ESP + #0]\n\t"
10992            "FLDCW  std/24-bit mode\n\t"
10993            "POP    EAX\n\t"
10994            "CMP    EAX,0x80000000\n\t"
10995            "JNE,s  fast\n\t"
10996            "FLD    $src\n\t"
10997            "CALL   d2i_wrapper\n"
10998      "fast:" %}
10999  // DPR2I_encoding works for FPR2I
11000  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11001  ins_pipe( pipe_slow );
11002%}
11003
11004// Convert a float in xmm to an int reg.
11005instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11006  predicate(UseSSE>=1);
11007  match(Set dst (ConvF2I src));
11008  effect( KILL tmp, KILL cr );
11009  format %{ "CVTTSS2SI $dst, $src\n\t"
11010            "CMP    $dst,0x80000000\n\t"
11011            "JNE,s  fast\n\t"
11012            "SUB    ESP, 4\n\t"
11013            "MOVSS  [ESP], $src\n\t"
11014            "FLD    [ESP]\n\t"
11015            "ADD    ESP, 4\n\t"
11016            "CALL   d2i_wrapper\n"
11017      "fast:" %}
11018  ins_encode %{
11019    Label fast;
11020    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11021    __ cmpl($dst$$Register, 0x80000000);
11022    __ jccb(Assembler::notEqual, fast);
11023    __ subptr(rsp, 4);
11024    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11025    __ fld_s(Address(rsp, 0));
11026    __ addptr(rsp, 4);
11027    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11028    __ bind(fast);
11029  %}
11030  ins_pipe( pipe_slow );
11031%}
11032
11033instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11034  predicate(UseSSE==0);
11035  match(Set dst (ConvF2L src));
11036  effect( KILL cr );
11037  format %{ "FLD    $src\t# Convert float to long\n\t"
11038            "FLDCW  trunc mode\n\t"
11039            "SUB    ESP,8\n\t"
11040            "FISTp  [ESP + #0]\n\t"
11041            "FLDCW  std/24-bit mode\n\t"
11042            "POP    EAX\n\t"
11043            "POP    EDX\n\t"
11044            "CMP    EDX,0x80000000\n\t"
11045            "JNE,s  fast\n\t"
11046            "TEST   EAX,EAX\n\t"
11047            "JNE,s  fast\n\t"
11048            "FLD    $src\n\t"
11049            "CALL   d2l_wrapper\n"
11050      "fast:" %}
11051  // DPR2L_encoding works for FPR2L
11052  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11053  ins_pipe( pipe_slow );
11054%}
11055
11056// XMM lacks a float/double->long conversion, so use the old FPU stack.
11057instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11058  predicate (UseSSE>=1);
11059  match(Set dst (ConvF2L src));
11060  effect( KILL cr );
11061  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11062            "MOVSS  [ESP],$src\n\t"
11063            "FLD_S  [ESP]\n\t"
11064            "FLDCW  trunc mode\n\t"
11065            "FISTp  [ESP + #0]\n\t"
11066            "FLDCW  std/24-bit mode\n\t"
11067            "POP    EAX\n\t"
11068            "POP    EDX\n\t"
11069            "CMP    EDX,0x80000000\n\t"
11070            "JNE,s  fast\n\t"
11071            "TEST   EAX,EAX\n\t"
11072            "JNE,s  fast\n\t"
11073            "SUB    ESP,4\t# Convert float to long\n\t"
11074            "MOVSS  [ESP],$src\n\t"
11075            "FLD_S  [ESP]\n\t"
11076            "ADD    ESP,4\n\t"
11077            "CALL   d2l_wrapper\n"
11078      "fast:" %}
11079  ins_encode %{
11080    Label fast;
11081    __ subptr(rsp, 8);
11082    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11083    __ fld_s(Address(rsp, 0));
11084    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11085    __ fistp_d(Address(rsp, 0));
11086    // Restore the rounding mode, mask the exception
11087    if (Compile::current()->in_24_bit_fp_mode()) {
11088      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11089    } else {
11090      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11091    }
11092    // Load the converted long, adjust CPU stack
11093    __ pop(rax);
11094    __ pop(rdx);
11095    __ cmpl(rdx, 0x80000000);
11096    __ jccb(Assembler::notEqual, fast);
11097    __ testl(rax, rax);
11098    __ jccb(Assembler::notEqual, fast);
11099    __ subptr(rsp, 4);
11100    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11101    __ fld_s(Address(rsp, 0));
11102    __ addptr(rsp, 4);
11103    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11104    __ bind(fast);
11105  %}
11106  ins_pipe( pipe_slow );
11107%}
11108
11109instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11110  predicate( UseSSE<=1 );
11111  match(Set dst (ConvI2D src));
11112  format %{ "FILD   $src\n\t"
11113            "FSTP   $dst" %}
11114  opcode(0xDB, 0x0);  /* DB /0 */
11115  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11116  ins_pipe( fpu_reg_mem );
11117%}
11118
11119instruct convI2D_reg(regD dst, rRegI src) %{
11120  predicate( UseSSE>=2 && !UseXmmI2D );
11121  match(Set dst (ConvI2D src));
11122  format %{ "CVTSI2SD $dst,$src" %}
11123  ins_encode %{
11124    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11125  %}
11126  ins_pipe( pipe_slow );
11127%}
11128
11129instruct convI2D_mem(regD dst, memory mem) %{
11130  predicate( UseSSE>=2 );
11131  match(Set dst (ConvI2D (LoadI mem)));
11132  format %{ "CVTSI2SD $dst,$mem" %}
11133  ins_encode %{
11134    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11135  %}
11136  ins_pipe( pipe_slow );
11137%}
11138
11139instruct convXI2D_reg(regD dst, rRegI src)
11140%{
11141  predicate( UseSSE>=2 && UseXmmI2D );
11142  match(Set dst (ConvI2D src));
11143
11144  format %{ "MOVD  $dst,$src\n\t"
11145            "CVTDQ2PD $dst,$dst\t# i2d" %}
11146  ins_encode %{
11147    __ movdl($dst$$XMMRegister, $src$$Register);
11148    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11149  %}
11150  ins_pipe(pipe_slow); // XXX
11151%}
11152
11153instruct convI2DPR_mem(regDPR dst, memory mem) %{
11154  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11155  match(Set dst (ConvI2D (LoadI mem)));
11156  format %{ "FILD   $mem\n\t"
11157            "FSTP   $dst" %}
11158  opcode(0xDB);      /* DB /0 */
11159  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11160              Pop_Reg_DPR(dst));
11161  ins_pipe( fpu_reg_mem );
11162%}
11163
11164// Convert a byte to a float; no rounding step needed.
11165instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11166  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11167  match(Set dst (ConvI2F src));
11168  format %{ "FILD   $src\n\t"
11169            "FSTP   $dst" %}
11170
11171  opcode(0xDB, 0x0);  /* DB /0 */
11172  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11173  ins_pipe( fpu_reg_mem );
11174%}
11175
11176// In 24-bit mode, force exponent rounding by storing back out
11177instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11178  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11179  match(Set dst (ConvI2F src));
11180  ins_cost(200);
11181  format %{ "FILD   $src\n\t"
11182            "FSTP_S $dst" %}
11183  opcode(0xDB, 0x0);  /* DB /0 */
11184  ins_encode( Push_Mem_I(src),
11185              Pop_Mem_FPR(dst));
11186  ins_pipe( fpu_mem_mem );
11187%}
11188
11189// In 24-bit mode, force exponent rounding by storing back out
11190instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11191  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11192  match(Set dst (ConvI2F (LoadI mem)));
11193  ins_cost(200);
11194  format %{ "FILD   $mem\n\t"
11195            "FSTP_S $dst" %}
11196  opcode(0xDB);  /* DB /0 */
11197  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11198              Pop_Mem_FPR(dst));
11199  ins_pipe( fpu_mem_mem );
11200%}
11201
11202// This instruction does not round to 24-bits
11203instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11204  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205  match(Set dst (ConvI2F src));
11206  format %{ "FILD   $src\n\t"
11207            "FSTP   $dst" %}
11208  opcode(0xDB, 0x0);  /* DB /0 */
11209  ins_encode( Push_Mem_I(src),
11210              Pop_Reg_FPR(dst));
11211  ins_pipe( fpu_reg_mem );
11212%}
11213
11214// This instruction does not round to 24-bits
11215instruct convI2FPR_mem(regFPR dst, memory mem) %{
11216  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11217  match(Set dst (ConvI2F (LoadI mem)));
11218  format %{ "FILD   $mem\n\t"
11219            "FSTP   $dst" %}
11220  opcode(0xDB);      /* DB /0 */
11221  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11222              Pop_Reg_FPR(dst));
11223  ins_pipe( fpu_reg_mem );
11224%}
11225
11226// Convert an int to a float in xmm; no rounding step needed.
11227instruct convI2F_reg(regF dst, rRegI src) %{
11228  predicate( UseSSE==1 || (UseSSE>=2 && !UseXmmI2F) );
11229  match(Set dst (ConvI2F src));
11230  format %{ "CVTSI2SS $dst, $src" %}
11231  ins_encode %{
11232    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11233  %}
11234  ins_pipe( pipe_slow );
11235%}
11236
11237 instruct convXI2F_reg(regF dst, rRegI src)
11238%{
11239  predicate( UseSSE>=2 && UseXmmI2F );
11240  match(Set dst (ConvI2F src));
11241
11242  format %{ "MOVD  $dst,$src\n\t"
11243            "CVTDQ2PS $dst,$dst\t# i2f" %}
11244  ins_encode %{
11245    __ movdl($dst$$XMMRegister, $src$$Register);
11246    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11247  %}
11248  ins_pipe(pipe_slow); // XXX
11249%}
11250
11251instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11252  match(Set dst (ConvI2L src));
11253  effect(KILL cr);
11254  ins_cost(375);
11255  format %{ "MOV    $dst.lo,$src\n\t"
11256            "MOV    $dst.hi,$src\n\t"
11257            "SAR    $dst.hi,31" %}
11258  ins_encode(convert_int_long(dst,src));
11259  ins_pipe( ialu_reg_reg_long );
11260%}
11261
11262// Zero-extend convert int to long
11263instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11264  match(Set dst (AndL (ConvI2L src) mask) );
11265  effect( KILL flags );
11266  ins_cost(250);
11267  format %{ "MOV    $dst.lo,$src\n\t"
11268            "XOR    $dst.hi,$dst.hi" %}
11269  opcode(0x33); // XOR
11270  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11271  ins_pipe( ialu_reg_reg_long );
11272%}
11273
11274// Zero-extend long
11275instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11276  match(Set dst (AndL src mask) );
11277  effect( KILL flags );
11278  ins_cost(250);
11279  format %{ "MOV    $dst.lo,$src.lo\n\t"
11280            "XOR    $dst.hi,$dst.hi\n\t" %}
11281  opcode(0x33); // XOR
11282  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11283  ins_pipe( ialu_reg_reg_long );
11284%}
11285
11286instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11287  predicate (UseSSE<=1);
11288  match(Set dst (ConvL2D src));
11289  effect( KILL cr );
11290  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11291            "PUSH   $src.lo\n\t"
11292            "FILD   ST,[ESP + #0]\n\t"
11293            "ADD    ESP,8\n\t"
11294            "FSTP_D $dst\t# D-round" %}
11295  opcode(0xDF, 0x5);  /* DF /5 */
11296  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11297  ins_pipe( pipe_slow );
11298%}
11299
11300instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11301  predicate (UseSSE>=2);
11302  match(Set dst (ConvL2D src));
11303  effect( KILL cr );
11304  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11305            "PUSH   $src.lo\n\t"
11306            "FILD_D [ESP]\n\t"
11307            "FSTP_D [ESP]\n\t"
11308            "MOVSD  $dst,[ESP]\n\t"
11309            "ADD    ESP,8" %}
11310  opcode(0xDF, 0x5);  /* DF /5 */
11311  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11312  ins_pipe( pipe_slow );
11313%}
11314
11315instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11316  predicate (UseSSE>=1);
11317  match(Set dst (ConvL2F src));
11318  effect( KILL cr );
11319  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11320            "PUSH   $src.lo\n\t"
11321            "FILD_D [ESP]\n\t"
11322            "FSTP_S [ESP]\n\t"
11323            "MOVSS  $dst,[ESP]\n\t"
11324            "ADD    ESP,8" %}
11325  opcode(0xDF, 0x5);  /* DF /5 */
11326  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11327  ins_pipe( pipe_slow );
11328%}
11329
11330instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11331  match(Set dst (ConvL2F src));
11332  effect( KILL cr );
11333  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11334            "PUSH   $src.lo\n\t"
11335            "FILD   ST,[ESP + #0]\n\t"
11336            "ADD    ESP,8\n\t"
11337            "FSTP_S $dst\t# F-round" %}
11338  opcode(0xDF, 0x5);  /* DF /5 */
11339  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11340  ins_pipe( pipe_slow );
11341%}
11342
11343instruct convL2I_reg( rRegI dst, eRegL src ) %{
11344  match(Set dst (ConvL2I src));
11345  effect( DEF dst, USE src );
11346  format %{ "MOV    $dst,$src.lo" %}
11347  ins_encode(enc_CopyL_Lo(dst,src));
11348  ins_pipe( ialu_reg_reg );
11349%}
11350
11351instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11352  match(Set dst (MoveF2I src));
11353  effect( DEF dst, USE src );
11354  ins_cost(100);
11355  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11356  ins_encode %{
11357    __ movl($dst$$Register, Address(rsp, $src$$disp));
11358  %}
11359  ins_pipe( ialu_reg_mem );
11360%}
11361
11362instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11363  predicate(UseSSE==0);
11364  match(Set dst (MoveF2I src));
11365  effect( DEF dst, USE src );
11366
11367  ins_cost(125);
11368  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11369  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11370  ins_pipe( fpu_mem_reg );
11371%}
11372
11373instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11374  predicate(UseSSE>=1);
11375  match(Set dst (MoveF2I src));
11376  effect( DEF dst, USE src );
11377
11378  ins_cost(95);
11379  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11380  ins_encode %{
11381    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11382  %}
11383  ins_pipe( pipe_slow );
11384%}
11385
11386instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11387  predicate(UseSSE>=2);
11388  match(Set dst (MoveF2I src));
11389  effect( DEF dst, USE src );
11390  ins_cost(85);
11391  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11392  ins_encode %{
11393    __ movdl($dst$$Register, $src$$XMMRegister);
11394  %}
11395  ins_pipe( pipe_slow );
11396%}
11397
11398instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11399  match(Set dst (MoveI2F src));
11400  effect( DEF dst, USE src );
11401
11402  ins_cost(100);
11403  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11404  ins_encode %{
11405    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11406  %}
11407  ins_pipe( ialu_mem_reg );
11408%}
11409
11410
11411instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11412  predicate(UseSSE==0);
11413  match(Set dst (MoveI2F src));
11414  effect(DEF dst, USE src);
11415
11416  ins_cost(125);
11417  format %{ "FLD_S  $src\n\t"
11418            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11419  opcode(0xD9);               /* D9 /0, FLD m32real */
11420  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11421              Pop_Reg_FPR(dst) );
11422  ins_pipe( fpu_reg_mem );
11423%}
11424
11425instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11426  predicate(UseSSE>=1);
11427  match(Set dst (MoveI2F src));
11428  effect( DEF dst, USE src );
11429
11430  ins_cost(95);
11431  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11432  ins_encode %{
11433    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11434  %}
11435  ins_pipe( pipe_slow );
11436%}
11437
11438instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11439  predicate(UseSSE>=2);
11440  match(Set dst (MoveI2F src));
11441  effect( DEF dst, USE src );
11442
11443  ins_cost(85);
11444  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11445  ins_encode %{
11446    __ movdl($dst$$XMMRegister, $src$$Register);
11447  %}
11448  ins_pipe( pipe_slow );
11449%}
11450
11451instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11452  match(Set dst (MoveD2L src));
11453  effect(DEF dst, USE src);
11454
11455  ins_cost(250);
11456  format %{ "MOV    $dst.lo,$src\n\t"
11457            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11458  opcode(0x8B, 0x8B);
11459  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11460  ins_pipe( ialu_mem_long_reg );
11461%}
11462
11463instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11464  predicate(UseSSE<=1);
11465  match(Set dst (MoveD2L src));
11466  effect(DEF dst, USE src);
11467
11468  ins_cost(125);
11469  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11470  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11471  ins_pipe( fpu_mem_reg );
11472%}
11473
11474instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11475  predicate(UseSSE>=2);
11476  match(Set dst (MoveD2L src));
11477  effect(DEF dst, USE src);
11478  ins_cost(95);
11479  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11480  ins_encode %{
11481    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11482  %}
11483  ins_pipe( pipe_slow );
11484%}
11485
11486instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11487  predicate(UseSSE>=2);
11488  match(Set dst (MoveD2L src));
11489  effect(DEF dst, USE src, TEMP tmp);
11490  ins_cost(85);
11491  format %{ "MOVD   $dst.lo,$src\n\t"
11492            "PSHUFLW $tmp,$src,0x4E\n\t"
11493            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11494  ins_encode %{
11495    __ movdl($dst$$Register, $src$$XMMRegister);
11496    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11497    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11498  %}
11499  ins_pipe( pipe_slow );
11500%}
11501
11502instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11503  match(Set dst (MoveL2D src));
11504  effect(DEF dst, USE src);
11505
11506  ins_cost(200);
11507  format %{ "MOV    $dst,$src.lo\n\t"
11508            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11509  opcode(0x89, 0x89);
11510  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11511  ins_pipe( ialu_mem_long_reg );
11512%}
11513
11514
11515instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11516  predicate(UseSSE<=1);
11517  match(Set dst (MoveL2D src));
11518  effect(DEF dst, USE src);
11519  ins_cost(125);
11520
11521  format %{ "FLD_D  $src\n\t"
11522            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11523  opcode(0xDD);               /* DD /0, FLD m64real */
11524  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11525              Pop_Reg_DPR(dst) );
11526  ins_pipe( fpu_reg_mem );
11527%}
11528
11529
11530instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11531  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11532  match(Set dst (MoveL2D src));
11533  effect(DEF dst, USE src);
11534
11535  ins_cost(95);
11536  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11537  ins_encode %{
11538    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11539  %}
11540  ins_pipe( pipe_slow );
11541%}
11542
11543instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11544  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11545  match(Set dst (MoveL2D src));
11546  effect(DEF dst, USE src);
11547
11548  ins_cost(95);
11549  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11550  ins_encode %{
11551    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11552  %}
11553  ins_pipe( pipe_slow );
11554%}
11555
11556instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11557  predicate(UseSSE>=2);
11558  match(Set dst (MoveL2D src));
11559  effect(TEMP dst, USE src, TEMP tmp);
11560  ins_cost(85);
11561  format %{ "MOVD   $dst,$src.lo\n\t"
11562            "MOVD   $tmp,$src.hi\n\t"
11563            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11564  ins_encode %{
11565    __ movdl($dst$$XMMRegister, $src$$Register);
11566    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11567    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11568  %}
11569  ins_pipe( pipe_slow );
11570%}
11571
11572
11573// =======================================================================
11574// fast clearing of an array
11575instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11576  predicate(!((ClearArrayNode*)n)->is_large());
11577  match(Set dummy (ClearArray cnt base));
11578  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11579
11580  format %{ $$template
11581    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11582    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11583    $$emit$$"JG     LARGE\n\t"
11584    $$emit$$"SHL    ECX, 1\n\t"
11585    $$emit$$"DEC    ECX\n\t"
11586    $$emit$$"JS     DONE\t# Zero length\n\t"
11587    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11588    $$emit$$"DEC    ECX\n\t"
11589    $$emit$$"JGE    LOOP\n\t"
11590    $$emit$$"JMP    DONE\n\t"
11591    $$emit$$"# LARGE:\n\t"
11592    if (UseFastStosb) {
11593       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11594       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11595    } else if (UseXMMForObjInit) {
11596       $$emit$$"MOV     RDI,RAX\n\t"
11597       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11598       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11599       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11600       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11601       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11602       $$emit$$"ADD     0x40,RAX\n\t"
11603       $$emit$$"# L_zero_64_bytes:\n\t"
11604       $$emit$$"SUB     0x8,RCX\n\t"
11605       $$emit$$"JGE     L_loop\n\t"
11606       $$emit$$"ADD     0x4,RCX\n\t"
11607       $$emit$$"JL      L_tail\n\t"
11608       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11609       $$emit$$"ADD     0x20,RAX\n\t"
11610       $$emit$$"SUB     0x4,RCX\n\t"
11611       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11612       $$emit$$"ADD     0x4,RCX\n\t"
11613       $$emit$$"JLE     L_end\n\t"
11614       $$emit$$"DEC     RCX\n\t"
11615       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11616       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11617       $$emit$$"ADD     0x8,RAX\n\t"
11618       $$emit$$"DEC     RCX\n\t"
11619       $$emit$$"JGE     L_sloop\n\t"
11620       $$emit$$"# L_end:\n\t"
11621    } else {
11622       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11623       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11624    }
11625    $$emit$$"# DONE"
11626  %}
11627  ins_encode %{
11628    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11629                 $tmp$$XMMRegister, false);
11630  %}
11631  ins_pipe( pipe_slow );
11632%}
11633
11634instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11635  predicate(((ClearArrayNode*)n)->is_large());
11636  match(Set dummy (ClearArray cnt base));
11637  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11638  format %{ $$template
11639    if (UseFastStosb) {
11640       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11641       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11642       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11643    } else if (UseXMMForObjInit) {
11644       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11645       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11646       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11647       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11648       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11649       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11650       $$emit$$"ADD     0x40,RAX\n\t"
11651       $$emit$$"# L_zero_64_bytes:\n\t"
11652       $$emit$$"SUB     0x8,RCX\n\t"
11653       $$emit$$"JGE     L_loop\n\t"
11654       $$emit$$"ADD     0x4,RCX\n\t"
11655       $$emit$$"JL      L_tail\n\t"
11656       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11657       $$emit$$"ADD     0x20,RAX\n\t"
11658       $$emit$$"SUB     0x4,RCX\n\t"
11659       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11660       $$emit$$"ADD     0x4,RCX\n\t"
11661       $$emit$$"JLE     L_end\n\t"
11662       $$emit$$"DEC     RCX\n\t"
11663       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11664       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11665       $$emit$$"ADD     0x8,RAX\n\t"
11666       $$emit$$"DEC     RCX\n\t"
11667       $$emit$$"JGE     L_sloop\n\t"
11668       $$emit$$"# L_end:\n\t"
11669    } else {
11670       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11671       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11672       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11673    }
11674    $$emit$$"# DONE"
11675  %}
11676  ins_encode %{
11677    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11678                 $tmp$$XMMRegister, true);
11679  %}
11680  ins_pipe( pipe_slow );
11681%}
11682
11683instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11684                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11685  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11686  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11687  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11688
11689  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11690  ins_encode %{
11691    __ string_compare($str1$$Register, $str2$$Register,
11692                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11693                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11694  %}
11695  ins_pipe( pipe_slow );
11696%}
11697
11698instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11699                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11700  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11701  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11702  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11703
11704  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11705  ins_encode %{
11706    __ string_compare($str1$$Register, $str2$$Register,
11707                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11708                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11709  %}
11710  ins_pipe( pipe_slow );
11711%}
11712
11713instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11714                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11715  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11716  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11717  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11718
11719  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11720  ins_encode %{
11721    __ string_compare($str1$$Register, $str2$$Register,
11722                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11723                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11724  %}
11725  ins_pipe( pipe_slow );
11726%}
11727
11728instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11729                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11730  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11731  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11732  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11733
11734  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11735  ins_encode %{
11736    __ string_compare($str2$$Register, $str1$$Register,
11737                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11738                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11739  %}
11740  ins_pipe( pipe_slow );
11741%}
11742
11743// fast string equals
11744instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11745                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11746  match(Set result (StrEquals (Binary str1 str2) cnt));
11747  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11748
11749  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11750  ins_encode %{
11751    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11752                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11753                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11754  %}
11755
11756  ins_pipe( pipe_slow );
11757%}
11758
11759// fast search of substring with known size.
11760instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11761                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11762  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11763  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11764  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11765
11766  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11767  ins_encode %{
11768    int icnt2 = (int)$int_cnt2$$constant;
11769    if (icnt2 >= 16) {
11770      // IndexOf for constant substrings with size >= 16 elements
11771      // which don't need to be loaded through stack.
11772      __ string_indexofC8($str1$$Register, $str2$$Register,
11773                          $cnt1$$Register, $cnt2$$Register,
11774                          icnt2, $result$$Register,
11775                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11776    } else {
11777      // Small strings are loaded through stack if they cross page boundary.
11778      __ string_indexof($str1$$Register, $str2$$Register,
11779                        $cnt1$$Register, $cnt2$$Register,
11780                        icnt2, $result$$Register,
11781                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11782    }
11783  %}
11784  ins_pipe( pipe_slow );
11785%}
11786
11787// fast search of substring with known size.
11788instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11789                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11790  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11791  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11792  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11793
11794  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11795  ins_encode %{
11796    int icnt2 = (int)$int_cnt2$$constant;
11797    if (icnt2 >= 8) {
11798      // IndexOf for constant substrings with size >= 8 elements
11799      // which don't need to be loaded through stack.
11800      __ string_indexofC8($str1$$Register, $str2$$Register,
11801                          $cnt1$$Register, $cnt2$$Register,
11802                          icnt2, $result$$Register,
11803                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11804    } else {
11805      // Small strings are loaded through stack if they cross page boundary.
11806      __ string_indexof($str1$$Register, $str2$$Register,
11807                        $cnt1$$Register, $cnt2$$Register,
11808                        icnt2, $result$$Register,
11809                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11810    }
11811  %}
11812  ins_pipe( pipe_slow );
11813%}
11814
11815// fast search of substring with known size.
11816instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11817                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11818  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11819  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11820  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11821
11822  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11823  ins_encode %{
11824    int icnt2 = (int)$int_cnt2$$constant;
11825    if (icnt2 >= 8) {
11826      // IndexOf for constant substrings with size >= 8 elements
11827      // which don't need to be loaded through stack.
11828      __ string_indexofC8($str1$$Register, $str2$$Register,
11829                          $cnt1$$Register, $cnt2$$Register,
11830                          icnt2, $result$$Register,
11831                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11832    } else {
11833      // Small strings are loaded through stack if they cross page boundary.
11834      __ string_indexof($str1$$Register, $str2$$Register,
11835                        $cnt1$$Register, $cnt2$$Register,
11836                        icnt2, $result$$Register,
11837                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11838    }
11839  %}
11840  ins_pipe( pipe_slow );
11841%}
11842
11843instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11844                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11845  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11846  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11847  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11848
11849  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11850  ins_encode %{
11851    __ string_indexof($str1$$Register, $str2$$Register,
11852                      $cnt1$$Register, $cnt2$$Register,
11853                      (-1), $result$$Register,
11854                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11855  %}
11856  ins_pipe( pipe_slow );
11857%}
11858
11859instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11860                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11861  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11862  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11863  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11864
11865  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11866  ins_encode %{
11867    __ string_indexof($str1$$Register, $str2$$Register,
11868                      $cnt1$$Register, $cnt2$$Register,
11869                      (-1), $result$$Register,
11870                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11871  %}
11872  ins_pipe( pipe_slow );
11873%}
11874
11875instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11876                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11877  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11878  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11879  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11880
11881  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11882  ins_encode %{
11883    __ string_indexof($str1$$Register, $str2$$Register,
11884                      $cnt1$$Register, $cnt2$$Register,
11885                      (-1), $result$$Register,
11886                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11887  %}
11888  ins_pipe( pipe_slow );
11889%}
11890
11891instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11892                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11893  predicate(UseSSE42Intrinsics);
11894  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11895  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11896  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11897  ins_encode %{
11898    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11899                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11900  %}
11901  ins_pipe( pipe_slow );
11902%}
11903
11904// fast array equals
11905instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11906                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11907%{
11908  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11909  match(Set result (AryEq ary1 ary2));
11910  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11911  //ins_cost(300);
11912
11913  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11914  ins_encode %{
11915    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11916                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11917                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11918  %}
11919  ins_pipe( pipe_slow );
11920%}
11921
11922instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11923                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11924%{
11925  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11926  match(Set result (AryEq ary1 ary2));
11927  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11928  //ins_cost(300);
11929
11930  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11931  ins_encode %{
11932    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11933                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11934                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11935  %}
11936  ins_pipe( pipe_slow );
11937%}
11938
11939instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11940                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11941%{
11942  match(Set result (HasNegatives ary1 len));
11943  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11944
11945  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11946  ins_encode %{
11947    __ has_negatives($ary1$$Register, $len$$Register,
11948                     $result$$Register, $tmp3$$Register,
11949                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11950  %}
11951  ins_pipe( pipe_slow );
11952%}
11953
11954// fast char[] to byte[] compression
11955instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11956                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11957  match(Set result (StrCompressedCopy src (Binary dst len)));
11958  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11959
11960  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11961  ins_encode %{
11962    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11963                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11964                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11965  %}
11966  ins_pipe( pipe_slow );
11967%}
11968
11969// fast byte[] to char[] inflation
11970instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11971                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11972  match(Set dummy (StrInflatedCopy src (Binary dst len)));
11973  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11974
11975  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11976  ins_encode %{
11977    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11978                          $tmp1$$XMMRegister, $tmp2$$Register);
11979  %}
11980  ins_pipe( pipe_slow );
11981%}
11982
11983// encode char[] to byte[] in ISO_8859_1
11984instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11985                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11986                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11987  match(Set result (EncodeISOArray src (Binary dst len)));
11988  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11989
11990  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11991  ins_encode %{
11992    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11993                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11994                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11995  %}
11996  ins_pipe( pipe_slow );
11997%}
11998
11999
12000//----------Control Flow Instructions------------------------------------------
12001// Signed compare Instructions
12002instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12003  match(Set cr (CmpI op1 op2));
12004  effect( DEF cr, USE op1, USE op2 );
12005  format %{ "CMP    $op1,$op2" %}
12006  opcode(0x3B);  /* Opcode 3B /r */
12007  ins_encode( OpcP, RegReg( op1, op2) );
12008  ins_pipe( ialu_cr_reg_reg );
12009%}
12010
12011instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12012  match(Set cr (CmpI op1 op2));
12013  effect( DEF cr, USE op1 );
12014  format %{ "CMP    $op1,$op2" %}
12015  opcode(0x81,0x07);  /* Opcode 81 /7 */
12016  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12017  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12018  ins_pipe( ialu_cr_reg_imm );
12019%}
12020
12021// Cisc-spilled version of cmpI_eReg
12022instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12023  match(Set cr (CmpI op1 (LoadI op2)));
12024
12025  format %{ "CMP    $op1,$op2" %}
12026  ins_cost(500);
12027  opcode(0x3B);  /* Opcode 3B /r */
12028  ins_encode( OpcP, RegMem( op1, op2) );
12029  ins_pipe( ialu_cr_reg_mem );
12030%}
12031
12032instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12033  match(Set cr (CmpI src zero));
12034  effect( DEF cr, USE src );
12035
12036  format %{ "TEST   $src,$src" %}
12037  opcode(0x85);
12038  ins_encode( OpcP, RegReg( src, src ) );
12039  ins_pipe( ialu_cr_reg_imm );
12040%}
12041
12042instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12043  match(Set cr (CmpI (AndI src con) zero));
12044
12045  format %{ "TEST   $src,$con" %}
12046  opcode(0xF7,0x00);
12047  ins_encode( OpcP, RegOpc(src), Con32(con) );
12048  ins_pipe( ialu_cr_reg_imm );
12049%}
12050
12051instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12052  match(Set cr (CmpI (AndI src mem) zero));
12053
12054  format %{ "TEST   $src,$mem" %}
12055  opcode(0x85);
12056  ins_encode( OpcP, RegMem( src, mem ) );
12057  ins_pipe( ialu_cr_reg_mem );
12058%}
12059
12060// Unsigned compare Instructions; really, same as signed except they
12061// produce an eFlagsRegU instead of eFlagsReg.
12062instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12063  match(Set cr (CmpU op1 op2));
12064
12065  format %{ "CMPu   $op1,$op2" %}
12066  opcode(0x3B);  /* Opcode 3B /r */
12067  ins_encode( OpcP, RegReg( op1, op2) );
12068  ins_pipe( ialu_cr_reg_reg );
12069%}
12070
12071instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12072  match(Set cr (CmpU op1 op2));
12073
12074  format %{ "CMPu   $op1,$op2" %}
12075  opcode(0x81,0x07);  /* Opcode 81 /7 */
12076  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12077  ins_pipe( ialu_cr_reg_imm );
12078%}
12079
12080// // Cisc-spilled version of cmpU_eReg
12081instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12082  match(Set cr (CmpU op1 (LoadI op2)));
12083
12084  format %{ "CMPu   $op1,$op2" %}
12085  ins_cost(500);
12086  opcode(0x3B);  /* Opcode 3B /r */
12087  ins_encode( OpcP, RegMem( op1, op2) );
12088  ins_pipe( ialu_cr_reg_mem );
12089%}
12090
12091// // Cisc-spilled version of cmpU_eReg
12092//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12093//  match(Set cr (CmpU (LoadI op1) op2));
12094//
12095//  format %{ "CMPu   $op1,$op2" %}
12096//  ins_cost(500);
12097//  opcode(0x39);  /* Opcode 39 /r */
12098//  ins_encode( OpcP, RegMem( op1, op2) );
12099//%}
12100
12101instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12102  match(Set cr (CmpU src zero));
12103
12104  format %{ "TESTu  $src,$src" %}
12105  opcode(0x85);
12106  ins_encode( OpcP, RegReg( src, src ) );
12107  ins_pipe( ialu_cr_reg_imm );
12108%}
12109
12110// Unsigned pointer compare Instructions
12111instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12112  match(Set cr (CmpP op1 op2));
12113
12114  format %{ "CMPu   $op1,$op2" %}
12115  opcode(0x3B);  /* Opcode 3B /r */
12116  ins_encode( OpcP, RegReg( op1, op2) );
12117  ins_pipe( ialu_cr_reg_reg );
12118%}
12119
12120instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12121  match(Set cr (CmpP op1 op2));
12122
12123  format %{ "CMPu   $op1,$op2" %}
12124  opcode(0x81,0x07);  /* Opcode 81 /7 */
12125  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12126  ins_pipe( ialu_cr_reg_imm );
12127%}
12128
12129// // Cisc-spilled version of cmpP_eReg
12130instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12131  match(Set cr (CmpP op1 (LoadP op2)));
12132
12133  format %{ "CMPu   $op1,$op2" %}
12134  ins_cost(500);
12135  opcode(0x3B);  /* Opcode 3B /r */
12136  ins_encode( OpcP, RegMem( op1, op2) );
12137  ins_pipe( ialu_cr_reg_mem );
12138%}
12139
12140// // Cisc-spilled version of cmpP_eReg
12141//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12142//  match(Set cr (CmpP (LoadP op1) op2));
12143//
12144//  format %{ "CMPu   $op1,$op2" %}
12145//  ins_cost(500);
12146//  opcode(0x39);  /* Opcode 39 /r */
12147//  ins_encode( OpcP, RegMem( op1, op2) );
12148//%}
12149
12150// Compare raw pointer (used in out-of-heap check).
12151// Only works because non-oop pointers must be raw pointers
12152// and raw pointers have no anti-dependencies.
12153instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12154  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12155  match(Set cr (CmpP op1 (LoadP op2)));
12156
12157  format %{ "CMPu   $op1,$op2" %}
12158  opcode(0x3B);  /* Opcode 3B /r */
12159  ins_encode( OpcP, RegMem( op1, op2) );
12160  ins_pipe( ialu_cr_reg_mem );
12161%}
12162
12163//
12164// This will generate a signed flags result. This should be ok
12165// since any compare to a zero should be eq/neq.
12166instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12167  match(Set cr (CmpP src zero));
12168
12169  format %{ "TEST   $src,$src" %}
12170  opcode(0x85);
12171  ins_encode( OpcP, RegReg( src, src ) );
12172  ins_pipe( ialu_cr_reg_imm );
12173%}
12174
12175// Cisc-spilled version of testP_reg
12176// This will generate a signed flags result. This should be ok
12177// since any compare to a zero should be eq/neq.
12178instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12179  match(Set cr (CmpP (LoadP op) zero));
12180
12181  format %{ "TEST   $op,0xFFFFFFFF" %}
12182  ins_cost(500);
12183  opcode(0xF7);               /* Opcode F7 /0 */
12184  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12185  ins_pipe( ialu_cr_reg_imm );
12186%}
12187
12188// Yanked all unsigned pointer compare operations.
12189// Pointer compares are done with CmpP which is already unsigned.
12190
12191//----------Max and Min--------------------------------------------------------
12192// Min Instructions
12193////
12194//   *** Min and Max using the conditional move are slower than the
12195//   *** branch version on a Pentium III.
12196// // Conditional move for min
12197//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12198//  effect( USE_DEF op2, USE op1, USE cr );
12199//  format %{ "CMOVlt $op2,$op1\t! min" %}
12200//  opcode(0x4C,0x0F);
12201//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12202//  ins_pipe( pipe_cmov_reg );
12203//%}
12204//
12205//// Min Register with Register (P6 version)
12206//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12207//  predicate(VM_Version::supports_cmov() );
12208//  match(Set op2 (MinI op1 op2));
12209//  ins_cost(200);
12210//  expand %{
12211//    eFlagsReg cr;
12212//    compI_eReg(cr,op1,op2);
12213//    cmovI_reg_lt(op2,op1,cr);
12214//  %}
12215//%}
12216
12217// Min Register with Register (generic version)
12218instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12219  match(Set dst (MinI dst src));
12220  effect(KILL flags);
12221  ins_cost(300);
12222
12223  format %{ "MIN    $dst,$src" %}
12224  opcode(0xCC);
12225  ins_encode( min_enc(dst,src) );
12226  ins_pipe( pipe_slow );
12227%}
12228
12229// Max Register with Register
12230//   *** Min and Max using the conditional move are slower than the
12231//   *** branch version on a Pentium III.
12232// // Conditional move for max
12233//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12234//  effect( USE_DEF op2, USE op1, USE cr );
12235//  format %{ "CMOVgt $op2,$op1\t! max" %}
12236//  opcode(0x4F,0x0F);
12237//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12238//  ins_pipe( pipe_cmov_reg );
12239//%}
12240//
12241// // Max Register with Register (P6 version)
12242//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12243//  predicate(VM_Version::supports_cmov() );
12244//  match(Set op2 (MaxI op1 op2));
12245//  ins_cost(200);
12246//  expand %{
12247//    eFlagsReg cr;
12248//    compI_eReg(cr,op1,op2);
12249//    cmovI_reg_gt(op2,op1,cr);
12250//  %}
12251//%}
12252
12253// Max Register with Register (generic version)
12254instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12255  match(Set dst (MaxI dst src));
12256  effect(KILL flags);
12257  ins_cost(300);
12258
12259  format %{ "MAX    $dst,$src" %}
12260  opcode(0xCC);
12261  ins_encode( max_enc(dst,src) );
12262  ins_pipe( pipe_slow );
12263%}
12264
12265// ============================================================================
12266// Counted Loop limit node which represents exact final iterator value.
12267// Note: the resulting value should fit into integer range since
12268// counted loops have limit check on overflow.
12269instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12270  match(Set limit (LoopLimit (Binary init limit) stride));
12271  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12272  ins_cost(300);
12273
12274  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12275  ins_encode %{
12276    int strd = (int)$stride$$constant;
12277    assert(strd != 1 && strd != -1, "sanity");
12278    int m1 = (strd > 0) ? 1 : -1;
12279    // Convert limit to long (EAX:EDX)
12280    __ cdql();
12281    // Convert init to long (init:tmp)
12282    __ movl($tmp$$Register, $init$$Register);
12283    __ sarl($tmp$$Register, 31);
12284    // $limit - $init
12285    __ subl($limit$$Register, $init$$Register);
12286    __ sbbl($limit_hi$$Register, $tmp$$Register);
12287    // + ($stride - 1)
12288    if (strd > 0) {
12289      __ addl($limit$$Register, (strd - 1));
12290      __ adcl($limit_hi$$Register, 0);
12291      __ movl($tmp$$Register, strd);
12292    } else {
12293      __ addl($limit$$Register, (strd + 1));
12294      __ adcl($limit_hi$$Register, -1);
12295      __ lneg($limit_hi$$Register, $limit$$Register);
12296      __ movl($tmp$$Register, -strd);
12297    }
12298    // signed devision: (EAX:EDX) / pos_stride
12299    __ idivl($tmp$$Register);
12300    if (strd < 0) {
12301      // restore sign
12302      __ negl($tmp$$Register);
12303    }
12304    // (EAX) * stride
12305    __ mull($tmp$$Register);
12306    // + init (ignore upper bits)
12307    __ addl($limit$$Register, $init$$Register);
12308  %}
12309  ins_pipe( pipe_slow );
12310%}
12311
12312// ============================================================================
12313// Branch Instructions
12314// Jump Table
12315instruct jumpXtnd(rRegI switch_val) %{
12316  match(Jump switch_val);
12317  ins_cost(350);
12318  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12319  ins_encode %{
12320    // Jump to Address(table_base + switch_reg)
12321    Address index(noreg, $switch_val$$Register, Address::times_1);
12322    __ jump(ArrayAddress($constantaddress, index));
12323  %}
12324  ins_pipe(pipe_jmp);
12325%}
12326
12327// Jump Direct - Label defines a relative address from JMP+1
12328instruct jmpDir(label labl) %{
12329  match(Goto);
12330  effect(USE labl);
12331
12332  ins_cost(300);
12333  format %{ "JMP    $labl" %}
12334  size(5);
12335  ins_encode %{
12336    Label* L = $labl$$label;
12337    __ jmp(*L, false); // Always long jump
12338  %}
12339  ins_pipe( pipe_jmp );
12340%}
12341
12342// Jump Direct Conditional - Label defines a relative address from Jcc+1
12343instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12344  match(If cop cr);
12345  effect(USE labl);
12346
12347  ins_cost(300);
12348  format %{ "J$cop    $labl" %}
12349  size(6);
12350  ins_encode %{
12351    Label* L = $labl$$label;
12352    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12353  %}
12354  ins_pipe( pipe_jcc );
12355%}
12356
12357// Jump Direct Conditional - Label defines a relative address from Jcc+1
12358instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12359  predicate(!n->has_vector_mask_set());
12360  match(CountedLoopEnd cop cr);
12361  effect(USE labl);
12362
12363  ins_cost(300);
12364  format %{ "J$cop    $labl\t# Loop end" %}
12365  size(6);
12366  ins_encode %{
12367    Label* L = $labl$$label;
12368    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12369  %}
12370  ins_pipe( pipe_jcc );
12371%}
12372
12373// Jump Direct Conditional - Label defines a relative address from Jcc+1
12374instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12375  predicate(!n->has_vector_mask_set());
12376  match(CountedLoopEnd cop cmp);
12377  effect(USE labl);
12378
12379  ins_cost(300);
12380  format %{ "J$cop,u  $labl\t# Loop end" %}
12381  size(6);
12382  ins_encode %{
12383    Label* L = $labl$$label;
12384    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12385  %}
12386  ins_pipe( pipe_jcc );
12387%}
12388
12389instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12390  predicate(!n->has_vector_mask_set());
12391  match(CountedLoopEnd cop cmp);
12392  effect(USE labl);
12393
12394  ins_cost(200);
12395  format %{ "J$cop,u  $labl\t# Loop end" %}
12396  size(6);
12397  ins_encode %{
12398    Label* L = $labl$$label;
12399    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12400  %}
12401  ins_pipe( pipe_jcc );
12402%}
12403
12404// mask version
12405// Jump Direct Conditional - Label defines a relative address from Jcc+1
12406instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12407  predicate(n->has_vector_mask_set());
12408  match(CountedLoopEnd cop cr);
12409  effect(USE labl);
12410
12411  ins_cost(400);
12412  format %{ "J$cop    $labl\t# Loop end\n\t"
12413            "restorevectmask \t# vector mask restore for loops" %}
12414  size(10);
12415  ins_encode %{
12416    Label* L = $labl$$label;
12417    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12418    __ restorevectmask();
12419  %}
12420  ins_pipe( pipe_jcc );
12421%}
12422
12423// Jump Direct Conditional - Label defines a relative address from Jcc+1
12424instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12425  predicate(n->has_vector_mask_set());
12426  match(CountedLoopEnd cop cmp);
12427  effect(USE labl);
12428
12429  ins_cost(400);
12430  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12431            "restorevectmask \t# vector mask restore for loops" %}
12432  size(10);
12433  ins_encode %{
12434    Label* L = $labl$$label;
12435    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12436    __ restorevectmask();
12437  %}
12438  ins_pipe( pipe_jcc );
12439%}
12440
12441instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12442  predicate(n->has_vector_mask_set());
12443  match(CountedLoopEnd cop cmp);
12444  effect(USE labl);
12445
12446  ins_cost(300);
12447  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12448            "restorevectmask \t# vector mask restore for loops" %}
12449  size(10);
12450  ins_encode %{
12451    Label* L = $labl$$label;
12452    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12453    __ restorevectmask();
12454  %}
12455  ins_pipe( pipe_jcc );
12456%}
12457
12458// Jump Direct Conditional - using unsigned comparison
12459instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12460  match(If cop cmp);
12461  effect(USE labl);
12462
12463  ins_cost(300);
12464  format %{ "J$cop,u  $labl" %}
12465  size(6);
12466  ins_encode %{
12467    Label* L = $labl$$label;
12468    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12469  %}
12470  ins_pipe(pipe_jcc);
12471%}
12472
12473instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12474  match(If cop cmp);
12475  effect(USE labl);
12476
12477  ins_cost(200);
12478  format %{ "J$cop,u  $labl" %}
12479  size(6);
12480  ins_encode %{
12481    Label* L = $labl$$label;
12482    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12483  %}
12484  ins_pipe(pipe_jcc);
12485%}
12486
12487instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12488  match(If cop cmp);
12489  effect(USE labl);
12490
12491  ins_cost(200);
12492  format %{ $$template
12493    if ($cop$$cmpcode == Assembler::notEqual) {
12494      $$emit$$"JP,u   $labl\n\t"
12495      $$emit$$"J$cop,u   $labl"
12496    } else {
12497      $$emit$$"JP,u   done\n\t"
12498      $$emit$$"J$cop,u   $labl\n\t"
12499      $$emit$$"done:"
12500    }
12501  %}
12502  ins_encode %{
12503    Label* l = $labl$$label;
12504    if ($cop$$cmpcode == Assembler::notEqual) {
12505      __ jcc(Assembler::parity, *l, false);
12506      __ jcc(Assembler::notEqual, *l, false);
12507    } else if ($cop$$cmpcode == Assembler::equal) {
12508      Label done;
12509      __ jccb(Assembler::parity, done);
12510      __ jcc(Assembler::equal, *l, false);
12511      __ bind(done);
12512    } else {
12513       ShouldNotReachHere();
12514    }
12515  %}
12516  ins_pipe(pipe_jcc);
12517%}
12518
12519// ============================================================================
12520// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12521// array for an instance of the superklass.  Set a hidden internal cache on a
12522// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12523// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12524instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12525  match(Set result (PartialSubtypeCheck sub super));
12526  effect( KILL rcx, KILL cr );
12527
12528  ins_cost(1100);  // slightly larger than the next version
12529  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12530            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12531            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12532            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12533            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12534            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12535            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12536     "miss:\t" %}
12537
12538  opcode(0x1); // Force a XOR of EDI
12539  ins_encode( enc_PartialSubtypeCheck() );
12540  ins_pipe( pipe_slow );
12541%}
12542
12543instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12544  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12545  effect( KILL rcx, KILL result );
12546
12547  ins_cost(1000);
12548  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12549            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12550            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12551            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12552            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12553            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12554     "miss:\t" %}
12555
12556  opcode(0x0);  // No need to XOR EDI
12557  ins_encode( enc_PartialSubtypeCheck() );
12558  ins_pipe( pipe_slow );
12559%}
12560
12561// ============================================================================
12562// Branch Instructions -- short offset versions
12563//
12564// These instructions are used to replace jumps of a long offset (the default
12565// match) with jumps of a shorter offset.  These instructions are all tagged
12566// with the ins_short_branch attribute, which causes the ADLC to suppress the
12567// match rules in general matching.  Instead, the ADLC generates a conversion
12568// method in the MachNode which can be used to do in-place replacement of the
12569// long variant with the shorter variant.  The compiler will determine if a
12570// branch can be taken by the is_short_branch_offset() predicate in the machine
12571// specific code section of the file.
12572
12573// Jump Direct - Label defines a relative address from JMP+1
12574instruct jmpDir_short(label labl) %{
12575  match(Goto);
12576  effect(USE labl);
12577
12578  ins_cost(300);
12579  format %{ "JMP,s  $labl" %}
12580  size(2);
12581  ins_encode %{
12582    Label* L = $labl$$label;
12583    __ jmpb(*L);
12584  %}
12585  ins_pipe( pipe_jmp );
12586  ins_short_branch(1);
12587%}
12588
12589// Jump Direct Conditional - Label defines a relative address from Jcc+1
12590instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12591  match(If cop cr);
12592  effect(USE labl);
12593
12594  ins_cost(300);
12595  format %{ "J$cop,s  $labl" %}
12596  size(2);
12597  ins_encode %{
12598    Label* L = $labl$$label;
12599    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12600  %}
12601  ins_pipe( pipe_jcc );
12602  ins_short_branch(1);
12603%}
12604
12605// Jump Direct Conditional - Label defines a relative address from Jcc+1
12606instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12607  match(CountedLoopEnd cop cr);
12608  effect(USE labl);
12609
12610  ins_cost(300);
12611  format %{ "J$cop,s  $labl\t# Loop end" %}
12612  size(2);
12613  ins_encode %{
12614    Label* L = $labl$$label;
12615    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12616  %}
12617  ins_pipe( pipe_jcc );
12618  ins_short_branch(1);
12619%}
12620
12621// Jump Direct Conditional - Label defines a relative address from Jcc+1
12622instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12623  match(CountedLoopEnd cop cmp);
12624  effect(USE labl);
12625
12626  ins_cost(300);
12627  format %{ "J$cop,us $labl\t# Loop end" %}
12628  size(2);
12629  ins_encode %{
12630    Label* L = $labl$$label;
12631    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12632  %}
12633  ins_pipe( pipe_jcc );
12634  ins_short_branch(1);
12635%}
12636
12637instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12638  match(CountedLoopEnd cop cmp);
12639  effect(USE labl);
12640
12641  ins_cost(300);
12642  format %{ "J$cop,us $labl\t# Loop end" %}
12643  size(2);
12644  ins_encode %{
12645    Label* L = $labl$$label;
12646    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12647  %}
12648  ins_pipe( pipe_jcc );
12649  ins_short_branch(1);
12650%}
12651
12652// Jump Direct Conditional - using unsigned comparison
12653instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12654  match(If cop cmp);
12655  effect(USE labl);
12656
12657  ins_cost(300);
12658  format %{ "J$cop,us $labl" %}
12659  size(2);
12660  ins_encode %{
12661    Label* L = $labl$$label;
12662    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12663  %}
12664  ins_pipe( pipe_jcc );
12665  ins_short_branch(1);
12666%}
12667
12668instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12669  match(If cop cmp);
12670  effect(USE labl);
12671
12672  ins_cost(300);
12673  format %{ "J$cop,us $labl" %}
12674  size(2);
12675  ins_encode %{
12676    Label* L = $labl$$label;
12677    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12678  %}
12679  ins_pipe( pipe_jcc );
12680  ins_short_branch(1);
12681%}
12682
12683instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12684  match(If cop cmp);
12685  effect(USE labl);
12686
12687  ins_cost(300);
12688  format %{ $$template
12689    if ($cop$$cmpcode == Assembler::notEqual) {
12690      $$emit$$"JP,u,s   $labl\n\t"
12691      $$emit$$"J$cop,u,s   $labl"
12692    } else {
12693      $$emit$$"JP,u,s   done\n\t"
12694      $$emit$$"J$cop,u,s  $labl\n\t"
12695      $$emit$$"done:"
12696    }
12697  %}
12698  size(4);
12699  ins_encode %{
12700    Label* l = $labl$$label;
12701    if ($cop$$cmpcode == Assembler::notEqual) {
12702      __ jccb(Assembler::parity, *l);
12703      __ jccb(Assembler::notEqual, *l);
12704    } else if ($cop$$cmpcode == Assembler::equal) {
12705      Label done;
12706      __ jccb(Assembler::parity, done);
12707      __ jccb(Assembler::equal, *l);
12708      __ bind(done);
12709    } else {
12710       ShouldNotReachHere();
12711    }
12712  %}
12713  ins_pipe(pipe_jcc);
12714  ins_short_branch(1);
12715%}
12716
12717// ============================================================================
12718// Long Compare
12719//
12720// Currently we hold longs in 2 registers.  Comparing such values efficiently
12721// is tricky.  The flavor of compare used depends on whether we are testing
12722// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12723// The GE test is the negated LT test.  The LE test can be had by commuting
12724// the operands (yielding a GE test) and then negating; negate again for the
12725// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12726// NE test is negated from that.
12727
12728// Due to a shortcoming in the ADLC, it mixes up expressions like:
12729// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12730// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12731// are collapsed internally in the ADLC's dfa-gen code.  The match for
12732// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12733// foo match ends up with the wrong leaf.  One fix is to not match both
12734// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12735// both forms beat the trinary form of long-compare and both are very useful
12736// on Intel which has so few registers.
12737
12738// Manifest a CmpL result in an integer register.  Very painful.
12739// This is the test to avoid.
12740instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12741  match(Set dst (CmpL3 src1 src2));
12742  effect( KILL flags );
12743  ins_cost(1000);
12744  format %{ "XOR    $dst,$dst\n\t"
12745            "CMP    $src1.hi,$src2.hi\n\t"
12746            "JLT,s  m_one\n\t"
12747            "JGT,s  p_one\n\t"
12748            "CMP    $src1.lo,$src2.lo\n\t"
12749            "JB,s   m_one\n\t"
12750            "JEQ,s  done\n"
12751    "p_one:\tINC    $dst\n\t"
12752            "JMP,s  done\n"
12753    "m_one:\tDEC    $dst\n"
12754     "done:" %}
12755  ins_encode %{
12756    Label p_one, m_one, done;
12757    __ xorptr($dst$$Register, $dst$$Register);
12758    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12759    __ jccb(Assembler::less,    m_one);
12760    __ jccb(Assembler::greater, p_one);
12761    __ cmpl($src1$$Register, $src2$$Register);
12762    __ jccb(Assembler::below,   m_one);
12763    __ jccb(Assembler::equal,   done);
12764    __ bind(p_one);
12765    __ incrementl($dst$$Register);
12766    __ jmpb(done);
12767    __ bind(m_one);
12768    __ decrementl($dst$$Register);
12769    __ bind(done);
12770  %}
12771  ins_pipe( pipe_slow );
12772%}
12773
12774//======
12775// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12776// compares.  Can be used for LE or GT compares by reversing arguments.
12777// NOT GOOD FOR EQ/NE tests.
12778instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12779  match( Set flags (CmpL src zero ));
12780  ins_cost(100);
12781  format %{ "TEST   $src.hi,$src.hi" %}
12782  opcode(0x85);
12783  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12784  ins_pipe( ialu_cr_reg_reg );
12785%}
12786
12787// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12788// compares.  Can be used for LE or GT compares by reversing arguments.
12789// NOT GOOD FOR EQ/NE tests.
12790instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12791  match( Set flags (CmpL src1 src2 ));
12792  effect( TEMP tmp );
12793  ins_cost(300);
12794  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12795            "MOV    $tmp,$src1.hi\n\t"
12796            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12797  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12798  ins_pipe( ialu_cr_reg_reg );
12799%}
12800
12801// Long compares reg < zero/req OR reg >= zero/req.
12802// Just a wrapper for a normal branch, plus the predicate test.
12803instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12804  match(If cmp flags);
12805  effect(USE labl);
12806  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12807  expand %{
12808    jmpCon(cmp,flags,labl);    // JLT or JGE...
12809  %}
12810%}
12811
12812//======
12813// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12814// compares.  Can be used for LE or GT compares by reversing arguments.
12815// NOT GOOD FOR EQ/NE tests.
12816instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12817  match(Set flags (CmpUL src zero));
12818  ins_cost(100);
12819  format %{ "TEST   $src.hi,$src.hi" %}
12820  opcode(0x85);
12821  ins_encode(OpcP, RegReg_Hi2(src, src));
12822  ins_pipe(ialu_cr_reg_reg);
12823%}
12824
12825// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12826// compares.  Can be used for LE or GT compares by reversing arguments.
12827// NOT GOOD FOR EQ/NE tests.
12828instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12829  match(Set flags (CmpUL src1 src2));
12830  effect(TEMP tmp);
12831  ins_cost(300);
12832  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12833            "MOV    $tmp,$src1.hi\n\t"
12834            "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12835  ins_encode(long_cmp_flags2(src1, src2, tmp));
12836  ins_pipe(ialu_cr_reg_reg);
12837%}
12838
12839// Unsigned long compares reg < zero/req OR reg >= zero/req.
12840// Just a wrapper for a normal branch, plus the predicate test.
12841instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12842  match(If cmp flags);
12843  effect(USE labl);
12844  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12845  expand %{
12846    jmpCon(cmp, flags, labl);    // JLT or JGE...
12847  %}
12848%}
12849
12850// Compare 2 longs and CMOVE longs.
12851instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12852  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12853  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12854  ins_cost(400);
12855  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12856            "CMOV$cmp $dst.hi,$src.hi" %}
12857  opcode(0x0F,0x40);
12858  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12859  ins_pipe( pipe_cmov_reg_long );
12860%}
12861
12862instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12863  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12864  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12865  ins_cost(500);
12866  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12867            "CMOV$cmp $dst.hi,$src.hi" %}
12868  opcode(0x0F,0x40);
12869  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12870  ins_pipe( pipe_cmov_reg_long );
12871%}
12872
12873// Compare 2 longs and CMOVE ints.
12874instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12875  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12876  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12877  ins_cost(200);
12878  format %{ "CMOV$cmp $dst,$src" %}
12879  opcode(0x0F,0x40);
12880  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12881  ins_pipe( pipe_cmov_reg );
12882%}
12883
12884instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12885  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12886  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12887  ins_cost(250);
12888  format %{ "CMOV$cmp $dst,$src" %}
12889  opcode(0x0F,0x40);
12890  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12891  ins_pipe( pipe_cmov_mem );
12892%}
12893
12894// Compare 2 longs and CMOVE ints.
12895instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12896  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12897  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12898  ins_cost(200);
12899  format %{ "CMOV$cmp $dst,$src" %}
12900  opcode(0x0F,0x40);
12901  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12902  ins_pipe( pipe_cmov_reg );
12903%}
12904
12905// Compare 2 longs and CMOVE doubles
12906instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12907  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12908  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12909  ins_cost(200);
12910  expand %{
12911    fcmovDPR_regS(cmp,flags,dst,src);
12912  %}
12913%}
12914
12915// Compare 2 longs and CMOVE doubles
12916instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12917  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12918  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12919  ins_cost(200);
12920  expand %{
12921    fcmovD_regS(cmp,flags,dst,src);
12922  %}
12923%}
12924
12925instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12926  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12927  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12928  ins_cost(200);
12929  expand %{
12930    fcmovFPR_regS(cmp,flags,dst,src);
12931  %}
12932%}
12933
12934instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12935  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )  );
12936  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12937  ins_cost(200);
12938  expand %{
12939    fcmovF_regS(cmp,flags,dst,src);
12940  %}
12941%}
12942
12943//======
12944// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12945instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12946  match( Set flags (CmpL src zero ));
12947  effect(TEMP tmp);
12948  ins_cost(200);
12949  format %{ "MOV    $tmp,$src.lo\n\t"
12950            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12951  ins_encode( long_cmp_flags0( src, tmp ) );
12952  ins_pipe( ialu_reg_reg_long );
12953%}
12954
12955// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12956instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12957  match( Set flags (CmpL src1 src2 ));
12958  ins_cost(200+300);
12959  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12960            "JNE,s  skip\n\t"
12961            "CMP    $src1.hi,$src2.hi\n\t"
12962     "skip:\t" %}
12963  ins_encode( long_cmp_flags1( src1, src2 ) );
12964  ins_pipe( ialu_cr_reg_reg );
12965%}
12966
12967// Long compare reg == zero/reg OR reg != zero/reg
12968// Just a wrapper for a normal branch, plus the predicate test.
12969instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12970  match(If cmp flags);
12971  effect(USE labl);
12972  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12973  expand %{
12974    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12975  %}
12976%}
12977
12978//======
12979// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12980instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12981  match(Set flags (CmpUL src zero));
12982  effect(TEMP tmp);
12983  ins_cost(200);
12984  format %{ "MOV    $tmp,$src.lo\n\t"
12985            "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12986  ins_encode(long_cmp_flags0(src, tmp));
12987  ins_pipe(ialu_reg_reg_long);
12988%}
12989
12990// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12991instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12992  match(Set flags (CmpUL src1 src2));
12993  ins_cost(200+300);
12994  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12995            "JNE,s  skip\n\t"
12996            "CMP    $src1.hi,$src2.hi\n\t"
12997     "skip:\t" %}
12998  ins_encode(long_cmp_flags1(src1, src2));
12999  ins_pipe(ialu_cr_reg_reg);
13000%}
13001
13002// Unsigned long compare reg == zero/reg OR reg != zero/reg
13003// Just a wrapper for a normal branch, plus the predicate test.
13004instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13005  match(If cmp flags);
13006  effect(USE labl);
13007  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13008  expand %{
13009    jmpCon(cmp, flags, labl);    // JEQ or JNE...
13010  %}
13011%}
13012
13013// Compare 2 longs and CMOVE longs.
13014instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13015  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13016  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13017  ins_cost(400);
13018  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13019            "CMOV$cmp $dst.hi,$src.hi" %}
13020  opcode(0x0F,0x40);
13021  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13022  ins_pipe( pipe_cmov_reg_long );
13023%}
13024
13025instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13026  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13027  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13028  ins_cost(500);
13029  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13030            "CMOV$cmp $dst.hi,$src.hi" %}
13031  opcode(0x0F,0x40);
13032  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13033  ins_pipe( pipe_cmov_reg_long );
13034%}
13035
13036// Compare 2 longs and CMOVE ints.
13037instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13038  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13039  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13040  ins_cost(200);
13041  format %{ "CMOV$cmp $dst,$src" %}
13042  opcode(0x0F,0x40);
13043  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13044  ins_pipe( pipe_cmov_reg );
13045%}
13046
13047instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13048  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13049  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13050  ins_cost(250);
13051  format %{ "CMOV$cmp $dst,$src" %}
13052  opcode(0x0F,0x40);
13053  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13054  ins_pipe( pipe_cmov_mem );
13055%}
13056
13057// Compare 2 longs and CMOVE ints.
13058instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13059  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13060  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13061  ins_cost(200);
13062  format %{ "CMOV$cmp $dst,$src" %}
13063  opcode(0x0F,0x40);
13064  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13065  ins_pipe( pipe_cmov_reg );
13066%}
13067
13068// Compare 2 longs and CMOVE doubles
13069instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13070  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13071  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13072  ins_cost(200);
13073  expand %{
13074    fcmovDPR_regS(cmp,flags,dst,src);
13075  %}
13076%}
13077
13078// Compare 2 longs and CMOVE doubles
13079instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13080  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13081  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13082  ins_cost(200);
13083  expand %{
13084    fcmovD_regS(cmp,flags,dst,src);
13085  %}
13086%}
13087
13088instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13089  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ) );
13090  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13091  ins_cost(200);
13092  expand %{
13093    fcmovFPR_regS(cmp,flags,dst,src);
13094  %}
13095%}
13096
13097instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13098  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13099  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13100  ins_cost(200);
13101  expand %{
13102    fcmovF_regS(cmp,flags,dst,src);
13103  %}
13104%}
13105
13106//======
13107// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13108// Same as cmpL_reg_flags_LEGT except must negate src
13109instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13110  match( Set flags (CmpL src zero ));
13111  effect( TEMP tmp );
13112  ins_cost(300);
13113  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13114            "CMP    $tmp,$src.lo\n\t"
13115            "SBB    $tmp,$src.hi\n\t" %}
13116  ins_encode( long_cmp_flags3(src, tmp) );
13117  ins_pipe( ialu_reg_reg_long );
13118%}
13119
13120// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13121// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13122// requires a commuted test to get the same result.
13123instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13124  match( Set flags (CmpL src1 src2 ));
13125  effect( TEMP tmp );
13126  ins_cost(300);
13127  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13128            "MOV    $tmp,$src2.hi\n\t"
13129            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13130  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13131  ins_pipe( ialu_cr_reg_reg );
13132%}
13133
13134// Long compares reg < zero/req OR reg >= zero/req.
13135// Just a wrapper for a normal branch, plus the predicate test
13136instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13137  match(If cmp flags);
13138  effect(USE labl);
13139  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13140  ins_cost(300);
13141  expand %{
13142    jmpCon(cmp,flags,labl);    // JGT or JLE...
13143  %}
13144%}
13145
13146//======
13147// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13148// Same as cmpUL_reg_flags_LEGT except must negate src
13149instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13150  match(Set flags (CmpUL src zero));
13151  effect(TEMP tmp);
13152  ins_cost(300);
13153  format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13154            "CMP    $tmp,$src.lo\n\t"
13155            "SBB    $tmp,$src.hi\n\t" %}
13156  ins_encode(long_cmp_flags3(src, tmp));
13157  ins_pipe(ialu_reg_reg_long);
13158%}
13159
13160// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13161// Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13162// requires a commuted test to get the same result.
13163instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13164  match(Set flags (CmpUL src1 src2));
13165  effect(TEMP tmp);
13166  ins_cost(300);
13167  format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13168            "MOV    $tmp,$src2.hi\n\t"
13169            "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13170  ins_encode(long_cmp_flags2( src2, src1, tmp));
13171  ins_pipe(ialu_cr_reg_reg);
13172%}
13173
13174// Unsigned long compares reg < zero/req OR reg >= zero/req.
13175// Just a wrapper for a normal branch, plus the predicate test
13176instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13177  match(If cmp flags);
13178  effect(USE labl);
13179  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13180  ins_cost(300);
13181  expand %{
13182    jmpCon(cmp, flags, labl);    // JGT or JLE...
13183  %}
13184%}
13185
13186// Compare 2 longs and CMOVE longs.
13187instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13188  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13189  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13190  ins_cost(400);
13191  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13192            "CMOV$cmp $dst.hi,$src.hi" %}
13193  opcode(0x0F,0x40);
13194  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13195  ins_pipe( pipe_cmov_reg_long );
13196%}
13197
13198instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13199  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13200  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13201  ins_cost(500);
13202  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13203            "CMOV$cmp $dst.hi,$src.hi+4" %}
13204  opcode(0x0F,0x40);
13205  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13206  ins_pipe( pipe_cmov_reg_long );
13207%}
13208
13209// Compare 2 longs and CMOVE ints.
13210instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13211  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13212  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13213  ins_cost(200);
13214  format %{ "CMOV$cmp $dst,$src" %}
13215  opcode(0x0F,0x40);
13216  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13217  ins_pipe( pipe_cmov_reg );
13218%}
13219
13220instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13221  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13222  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13223  ins_cost(250);
13224  format %{ "CMOV$cmp $dst,$src" %}
13225  opcode(0x0F,0x40);
13226  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13227  ins_pipe( pipe_cmov_mem );
13228%}
13229
13230// Compare 2 longs and CMOVE ptrs.
13231instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13232  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13233  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13234  ins_cost(200);
13235  format %{ "CMOV$cmp $dst,$src" %}
13236  opcode(0x0F,0x40);
13237  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13238  ins_pipe( pipe_cmov_reg );
13239%}
13240
13241// Compare 2 longs and CMOVE doubles
13242instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13243  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13244  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13245  ins_cost(200);
13246  expand %{
13247    fcmovDPR_regS(cmp,flags,dst,src);
13248  %}
13249%}
13250
13251// Compare 2 longs and CMOVE doubles
13252instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13253  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13254  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13255  ins_cost(200);
13256  expand %{
13257    fcmovD_regS(cmp,flags,dst,src);
13258  %}
13259%}
13260
13261instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13262  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) );
13263  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13264  ins_cost(200);
13265  expand %{
13266    fcmovFPR_regS(cmp,flags,dst,src);
13267  %}
13268%}
13269
13270
13271instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13272  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) );
13273  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13274  ins_cost(200);
13275  expand %{
13276    fcmovF_regS(cmp,flags,dst,src);
13277  %}
13278%}
13279
13280
13281// ============================================================================
13282// Procedure Call/Return Instructions
13283// Call Java Static Instruction
13284// Note: If this code changes, the corresponding ret_addr_offset() and
13285//       compute_padding() functions will have to be adjusted.
13286instruct CallStaticJavaDirect(method meth) %{
13287  match(CallStaticJava);
13288  effect(USE meth);
13289
13290  ins_cost(300);
13291  format %{ "CALL,static " %}
13292  opcode(0xE8); /* E8 cd */
13293  ins_encode( pre_call_resets,
13294              Java_Static_Call( meth ),
13295              call_epilog,
13296              post_call_FPU );
13297  ins_pipe( pipe_slow );
13298  ins_alignment(4);
13299%}
13300
13301// Call Java Dynamic Instruction
13302// Note: If this code changes, the corresponding ret_addr_offset() and
13303//       compute_padding() functions will have to be adjusted.
13304instruct CallDynamicJavaDirect(method meth) %{
13305  match(CallDynamicJava);
13306  effect(USE meth);
13307
13308  ins_cost(300);
13309  format %{ "MOV    EAX,(oop)-1\n\t"
13310            "CALL,dynamic" %}
13311  opcode(0xE8); /* E8 cd */
13312  ins_encode( pre_call_resets,
13313              Java_Dynamic_Call( meth ),
13314              call_epilog,
13315              post_call_FPU );
13316  ins_pipe( pipe_slow );
13317  ins_alignment(4);
13318%}
13319
13320// Call Runtime Instruction
13321instruct CallRuntimeDirect(method meth) %{
13322  match(CallRuntime );
13323  effect(USE meth);
13324
13325  ins_cost(300);
13326  format %{ "CALL,runtime " %}
13327  opcode(0xE8); /* E8 cd */
13328  // Use FFREEs to clear entries in float stack
13329  ins_encode( pre_call_resets,
13330              FFree_Float_Stack_All,
13331              Java_To_Runtime( meth ),
13332              post_call_FPU );
13333  ins_pipe( pipe_slow );
13334%}
13335
13336// Call runtime without safepoint
13337instruct CallLeafDirect(method meth) %{
13338  match(CallLeaf);
13339  effect(USE meth);
13340
13341  ins_cost(300);
13342  format %{ "CALL_LEAF,runtime " %}
13343  opcode(0xE8); /* E8 cd */
13344  ins_encode( pre_call_resets,
13345              FFree_Float_Stack_All,
13346              Java_To_Runtime( meth ),
13347              Verify_FPU_For_Leaf, post_call_FPU );
13348  ins_pipe( pipe_slow );
13349%}
13350
13351instruct CallLeafNoFPDirect(method meth) %{
13352  match(CallLeafNoFP);
13353  effect(USE meth);
13354
13355  ins_cost(300);
13356  format %{ "CALL_LEAF_NOFP,runtime " %}
13357  opcode(0xE8); /* E8 cd */
13358  ins_encode(pre_call_resets, Java_To_Runtime(meth));
13359  ins_pipe( pipe_slow );
13360%}
13361
13362
13363// Return Instruction
13364// Remove the return address & jump to it.
13365instruct Ret() %{
13366  match(Return);
13367  format %{ "RET" %}
13368  opcode(0xC3);
13369  ins_encode(OpcP);
13370  ins_pipe( pipe_jmp );
13371%}
13372
13373// Tail Call; Jump from runtime stub to Java code.
13374// Also known as an 'interprocedural jump'.
13375// Target of jump will eventually return to caller.
13376// TailJump below removes the return address.
13377instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13378  match(TailCall jump_target method_oop );
13379  ins_cost(300);
13380  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13381  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13382  ins_encode( OpcP, RegOpc(jump_target) );
13383  ins_pipe( pipe_jmp );
13384%}
13385
13386
13387// Tail Jump; remove the return address; jump to target.
13388// TailCall above leaves the return address around.
13389instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13390  match( TailJump jump_target ex_oop );
13391  ins_cost(300);
13392  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13393            "JMP    $jump_target " %}
13394  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13395  ins_encode( enc_pop_rdx,
13396              OpcP, RegOpc(jump_target) );
13397  ins_pipe( pipe_jmp );
13398%}
13399
13400// Create exception oop: created by stack-crawling runtime code.
13401// Created exception is now available to this handler, and is setup
13402// just prior to jumping to this handler.  No code emitted.
13403instruct CreateException( eAXRegP ex_oop )
13404%{
13405  match(Set ex_oop (CreateEx));
13406
13407  size(0);
13408  // use the following format syntax
13409  format %{ "# exception oop is in EAX; no code emitted" %}
13410  ins_encode();
13411  ins_pipe( empty );
13412%}
13413
13414
13415// Rethrow exception:
13416// The exception oop will come in the first argument position.
13417// Then JUMP (not call) to the rethrow stub code.
13418instruct RethrowException()
13419%{
13420  match(Rethrow);
13421
13422  // use the following format syntax
13423  format %{ "JMP    rethrow_stub" %}
13424  ins_encode(enc_rethrow);
13425  ins_pipe( pipe_jmp );
13426%}
13427
13428// inlined locking and unlocking
13429
13430instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13431  predicate(Compile::current()->use_rtm());
13432  match(Set cr (FastLock object box));
13433  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13434  ins_cost(300);
13435  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13436  ins_encode %{
13437    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13438                 $scr$$Register, $cx1$$Register, $cx2$$Register,
13439                 _counters, _rtm_counters, _stack_rtm_counters,
13440                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13441                 true, ra_->C->profile_rtm());
13442  %}
13443  ins_pipe(pipe_slow);
13444%}
13445
13446instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13447  predicate(!Compile::current()->use_rtm());
13448  match(Set cr (FastLock object box));
13449  effect(TEMP tmp, TEMP scr, USE_KILL box);
13450  ins_cost(300);
13451  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13452  ins_encode %{
13453    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13454                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13455  %}
13456  ins_pipe(pipe_slow);
13457%}
13458
13459instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13460  match(Set cr (FastUnlock object box));
13461  effect(TEMP tmp, USE_KILL box);
13462  ins_cost(300);
13463  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13464  ins_encode %{
13465    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13466  %}
13467  ins_pipe(pipe_slow);
13468%}
13469
13470
13471
13472// ============================================================================
13473// Safepoint Instruction
13474instruct safePoint_poll(eFlagsReg cr) %{
13475  predicate(SafepointMechanism::uses_global_page_poll());
13476  match(SafePoint);
13477  effect(KILL cr);
13478
13479  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13480  // On SPARC that might be acceptable as we can generate the address with
13481  // just a sethi, saving an or.  By polling at offset 0 we can end up
13482  // putting additional pressure on the index-0 in the D$.  Because of
13483  // alignment (just like the situation at hand) the lower indices tend
13484  // to see more traffic.  It'd be better to change the polling address
13485  // to offset 0 of the last $line in the polling page.
13486
13487  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13488  ins_cost(125);
13489  size(6) ;
13490  ins_encode( Safepoint_Poll() );
13491  ins_pipe( ialu_reg_mem );
13492%}
13493
13494instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13495  predicate(SafepointMechanism::uses_thread_local_poll());
13496  match(SafePoint poll);
13497  effect(KILL cr, USE poll);
13498
13499  format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13500  ins_cost(125);
13501  // EBP would need size(3)
13502  size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13503  ins_encode %{
13504    __ relocate(relocInfo::poll_type);
13505    address pre_pc = __ pc();
13506    __ testl(rax, Address($poll$$Register, 0));
13507    address post_pc = __ pc();
13508    guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13509  %}
13510  ins_pipe(ialu_reg_mem);
13511%}
13512
13513
13514// ============================================================================
13515// This name is KNOWN by the ADLC and cannot be changed.
13516// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13517// for this guy.
13518instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13519  match(Set dst (ThreadLocal));
13520  effect(DEF dst, KILL cr);
13521
13522  format %{ "MOV    $dst, Thread::current()" %}
13523  ins_encode %{
13524    Register dstReg = as_Register($dst$$reg);
13525    __ get_thread(dstReg);
13526  %}
13527  ins_pipe( ialu_reg_fat );
13528%}
13529
13530
13531
13532//----------PEEPHOLE RULES-----------------------------------------------------
13533// These must follow all instruction definitions as they use the names
13534// defined in the instructions definitions.
13535//
13536// peepmatch ( root_instr_name [preceding_instruction]* );
13537//
13538// peepconstraint %{
13539// (instruction_number.operand_name relational_op instruction_number.operand_name
13540//  [, ...] );
13541// // instruction numbers are zero-based using left to right order in peepmatch
13542//
13543// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13544// // provide an instruction_number.operand_name for each operand that appears
13545// // in the replacement instruction's match rule
13546//
13547// ---------VM FLAGS---------------------------------------------------------
13548//
13549// All peephole optimizations can be turned off using -XX:-OptoPeephole
13550//
13551// Each peephole rule is given an identifying number starting with zero and
13552// increasing by one in the order seen by the parser.  An individual peephole
13553// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13554// on the command-line.
13555//
13556// ---------CURRENT LIMITATIONS----------------------------------------------
13557//
13558// Only match adjacent instructions in same basic block
13559// Only equality constraints
13560// Only constraints between operands, not (0.dest_reg == EAX_enc)
13561// Only one replacement instruction
13562//
13563// ---------EXAMPLE----------------------------------------------------------
13564//
13565// // pertinent parts of existing instructions in architecture description
13566// instruct movI(rRegI dst, rRegI src) %{
13567//   match(Set dst (CopyI src));
13568// %}
13569//
13570// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13571//   match(Set dst (AddI dst src));
13572//   effect(KILL cr);
13573// %}
13574//
13575// // Change (inc mov) to lea
13576// peephole %{
13577//   // increment preceeded by register-register move
13578//   peepmatch ( incI_eReg movI );
13579//   // require that the destination register of the increment
13580//   // match the destination register of the move
13581//   peepconstraint ( 0.dst == 1.dst );
13582//   // construct a replacement instruction that sets
13583//   // the destination to ( move's source register + one )
13584//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13585// %}
13586//
13587// Implementation no longer uses movX instructions since
13588// machine-independent system no longer uses CopyX nodes.
13589//
13590// peephole %{
13591//   peepmatch ( incI_eReg movI );
13592//   peepconstraint ( 0.dst == 1.dst );
13593//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13594// %}
13595//
13596// peephole %{
13597//   peepmatch ( decI_eReg movI );
13598//   peepconstraint ( 0.dst == 1.dst );
13599//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13600// %}
13601//
13602// peephole %{
13603//   peepmatch ( addI_eReg_imm movI );
13604//   peepconstraint ( 0.dst == 1.dst );
13605//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13606// %}
13607//
13608// peephole %{
13609//   peepmatch ( addP_eReg_imm movP );
13610//   peepconstraint ( 0.dst == 1.dst );
13611//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13612// %}
13613
13614// // Change load of spilled value to only a spill
13615// instruct storeI(memory mem, rRegI src) %{
13616//   match(Set mem (StoreI mem src));
13617// %}
13618//
13619// instruct loadI(rRegI dst, memory mem) %{
13620//   match(Set dst (LoadI mem));
13621// %}
13622//
13623peephole %{
13624  peepmatch ( loadI storeI );
13625  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13626  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13627%}
13628
13629//----------SMARTSPILL RULES---------------------------------------------------
13630// These must follow all instruction definitions as they use the names
13631// defined in the instructions definitions.
13632