1//
2// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// architecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78// allocator, and only shows up in the encodings.
79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81// Ok so here's the trick FPR1 is really st(0) except in the midst
82// of emission of assembly for a machnode. During the emission the fpu stack
83// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84// the stack will not have this element so FPR1 == st(0) from the
85// oopMap viewpoint. This same weirdness with numbering causes
86// instruction encoding to have to play games with the register
87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88// where it does flt->flt moves to see an example
89//
90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104//
105// Empty fill registers, which are never used, but supply alignment to xmm regs
106//
107reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116// Specify priority of register selection within phases of register
117// allocation.  Highest priority is first.  A useful heuristic is to
118// give registers a low priority when they are required by machine
119// instructions, like EAX and EDX.  Registers which are used as
120// pairs must fall on an even boundary (witness the FPR#L's in this list).
121// For the Intel integer registers, the equivalent Long pairs are
122// EDX:EAX, EBX:ECX, and EDI:EBP.
123alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126                    FPR6L, FPR6H, FPR7L, FPR7H,
127                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130//----------Architecture Description Register Classes--------------------------
131// Several register classes are automatically defined based upon information in
132// this architecture description.
133// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
137//
138// Class for no registers (empty set).
139reg_class no_reg();
140
141// Class for all registers
142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
143// Class for all registers (excluding EBP)
144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
145// Dynamic register class that selects at runtime between register classes
146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
149
150// Class for general registers
151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
152// Class for general registers (excluding EBP).
153// This register class can be used for implicit null checks on win95.
154// It is also safe for use by tailjumps (we don't want to allocate in ebp).
155// Used also if the PreserveFramePointer flag is true.
156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
157// Dynamic register class that selects between int_reg and int_reg_no_ebp.
158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
159
160// Class of "X" registers
161reg_class int_x_reg(EBX, ECX, EDX, EAX);
162
163// Class of registers that can appear in an address with no offset.
164// EBP and ESP require an extra instruction byte for zero offset.
165// Used in fast-unlock
166reg_class p_reg(EDX, EDI, ESI, EBX);
167
168// Class for general registers excluding ECX
169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
170// Class for general registers excluding ECX (and EBP)
171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
174
175// Class for general registers excluding EAX
176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
177
178// Class for general registers excluding EAX and EBX.
179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
180// Class for general registers excluding EAX and EBX (and EBP)
181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
184
185// Class of EAX (for multiply and divide operations)
186reg_class eax_reg(EAX);
187
188// Class of EBX (for atomic add)
189reg_class ebx_reg(EBX);
190
191// Class of ECX (for shift and JCXZ operations and cmpLTMask)
192reg_class ecx_reg(ECX);
193
194// Class of EDX (for multiply and divide operations)
195reg_class edx_reg(EDX);
196
197// Class of EDI (for synchronization)
198reg_class edi_reg(EDI);
199
200// Class of ESI (for synchronization)
201reg_class esi_reg(ESI);
202
203// Singleton class for stack pointer
204reg_class sp_reg(ESP);
205
206// Singleton class for instruction pointer
207// reg_class ip_reg(EIP);
208
209// Class of integer register pairs
210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
211// Class of integer register pairs (excluding EBP and EDI);
212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
213// Dynamic register class that selects between long_reg and long_reg_no_ebp.
214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
215
216// Class of integer register pairs that aligns with calling convention
217reg_class eadx_reg( EAX,EDX );
218reg_class ebcx_reg( ECX,EBX );
219
220// Not AX or DX, used in divides
221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
222// Not AX or DX (and neither EBP), used in divides
223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
226
227// Floating point registers.  Notice FPR0 is not a choice.
228// FPR0 is not ever allocated; we use clever encodings to fake
229// a 2-address instructions out of Intels FP stack.
230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
231
232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
233                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234                      FPR7L,FPR7H );
235
236reg_class fp_flt_reg0( FPR1L );
237reg_class fp_dbl_reg0( FPR1L,FPR1H );
238reg_class fp_dbl_reg1( FPR2L,FPR2H );
239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
240                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
241
242%}
243
244
245//----------SOURCE BLOCK-------------------------------------------------------
246// This is a block of C++ code which provides values, functions, and
247// definitions necessary in the rest of the architecture description
248source_hpp %{
249// Must be visible to the DFA in dfa_x86_32.cpp
250extern bool is_operand_hi32_zero(Node* n);
251%}
252
253source %{
254#define   RELOC_IMM32    Assembler::imm_operand
255#define   RELOC_DISP32   Assembler::disp32_operand
256
257#define __ _masm.
258
259// How to find the high register of a Long pair, given the low register
260#define   HIGH_FROM_LOW(x) ((x)+2)
261
262// These masks are used to provide 128-bit aligned bitmasks to the XMM
263// instructions, to allow sign-masking or sign-bit flipping.  They allow
264// fast versions of NegF/NegD and AbsF/AbsD.
265
266// Note: 'double' and 'long long' have 32-bits alignment on x86.
267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
268  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
269  // of 128-bits operands for SSE instructions.
270  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
271  // Store the value to a 128-bits operand.
272  operand[0] = lo;
273  operand[1] = hi;
274  return operand;
275}
276
277// Buffer for 128-bits masks used by SSE instructions.
278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
279
280// Static initialization during VM startup.
281static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
283static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
285
286// Offset hacking within calls.
287static int pre_call_resets_size() {
288  int size = 0;
289  Compile* C = Compile::current();
290  if (C->in_24_bit_fp_mode()) {
291    size += 6; // fldcw
292  }
293  if (VM_Version::supports_vzeroupper()) {
294    size += 3; // vzeroupper
295  }
296  return size;
297}
298
299// !!!!! Special hack to get all type of calls to specify the byte offset
300//       from the start of the call to the point where the return address
301//       will point.
302int MachCallStaticJavaNode::ret_addr_offset() {
303  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
304}
305
306int MachCallDynamicJavaNode::ret_addr_offset() {
307  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
308}
309
310static int sizeof_FFree_Float_Stack_All = -1;
311
312int MachCallRuntimeNode::ret_addr_offset() {
313  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
314  return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
315}
316
317// Indicate if the safepoint node needs the polling page as an input.
318// Since x86 does have absolute addressing, it doesn't.
319bool SafePointNode::needs_polling_address_input() {
320  return SafepointMechanism::uses_thread_local_poll();
321}
322
323//
324// Compute padding required for nodes which need alignment
325//
326
327// The address of the call instruction needs to be 4-byte aligned to
328// ensure that it does not span a cache line so that it can be patched.
329int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
330  current_offset += pre_call_resets_size();  // skip fldcw, if any
331  current_offset += 1;      // skip call opcode byte
332  return align_up(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_resets_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return align_up(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // rRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && (disp_reloc == relocInfo::none) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( disp_reloc != relocInfo::none ) {
435            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( disp_reloc != relocInfo::none ) {
443            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && (disp_reloc == relocInfo::none) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( disp_reloc != relocInfo::none ) {
473          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
532  ShouldNotReachHere();
533}
534
535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
536  // Empty encoding
537}
538
539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
540  return 0;
541}
542
543#ifndef PRODUCT
544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
545  st->print("# MachConstantBaseNode (empty encoding)");
546}
547#endif
548
549
550//=============================================================================
551#ifndef PRODUCT
552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553  Compile* C = ra_->C;
554
555  int framesize = C->frame_size_in_bytes();
556  int bangsize = C->bang_size_in_bytes();
557  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
558  // Remove wordSize for return addr which is already pushed.
559  framesize -= wordSize;
560
561  if (C->need_stack_bang(bangsize)) {
562    framesize -= wordSize;
563    st->print("# stack bang (%d bytes)", bangsize);
564    st->print("\n\t");
565    st->print("PUSH   EBP\t# Save EBP");
566    if (PreserveFramePointer) {
567      st->print("\n\t");
568      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
569    }
570    if (framesize) {
571      st->print("\n\t");
572      st->print("SUB    ESP, #%d\t# Create frame",framesize);
573    }
574  } else {
575    st->print("SUB    ESP, #%d\t# Create frame",framesize);
576    st->print("\n\t");
577    framesize -= wordSize;
578    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
579    if (PreserveFramePointer) {
580      st->print("\n\t");
581      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
582      if (framesize > 0) {
583        st->print("\n\t");
584        st->print("ADD    EBP, #%d", framesize);
585      }
586    }
587  }
588
589  if (VerifyStackAtCalls) {
590    st->print("\n\t");
591    framesize -= wordSize;
592    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
593  }
594
595  if( C->in_24_bit_fp_mode() ) {
596    st->print("\n\t");
597    st->print("FLDCW  \t# load 24 bit fpu control word");
598  }
599  if (UseSSE >= 2 && VerifyFPU) {
600    st->print("\n\t");
601    st->print("# verify FPU stack (must be clean on entry)");
602  }
603
604#ifdef ASSERT
605  if (VerifyStackAtCalls) {
606    st->print("\n\t");
607    st->print("# stack alignment check");
608  }
609#endif
610  st->cr();
611}
612#endif
613
614
615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
616  Compile* C = ra_->C;
617  MacroAssembler _masm(&cbuf);
618
619  int framesize = C->frame_size_in_bytes();
620  int bangsize = C->bang_size_in_bytes();
621
622  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
623
624  C->set_frame_complete(cbuf.insts_size());
625
626  if (C->has_mach_constant_base_node()) {
627    // NOTE: We set the table base offset here because users might be
628    // emitted before MachConstantBaseNode.
629    Compile::ConstantTable& constant_table = C->constant_table();
630    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
631  }
632}
633
634uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
635  return MachNode::size(ra_); // too many variables; just compute it the hard way
636}
637
638int MachPrologNode::reloc() const {
639  return 0; // a large enough number
640}
641
642//=============================================================================
643#ifndef PRODUCT
644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
645  Compile *C = ra_->C;
646  int framesize = C->frame_size_in_bytes();
647  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
648  // Remove two words for return addr and rbp,
649  framesize -= 2*wordSize;
650
651  if (C->max_vector_size() > 16) {
652    st->print("VZEROUPPER");
653    st->cr(); st->print("\t");
654  }
655  if (C->in_24_bit_fp_mode()) {
656    st->print("FLDCW  standard control word");
657    st->cr(); st->print("\t");
658  }
659  if (framesize) {
660    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
661    st->cr(); st->print("\t");
662  }
663  st->print_cr("POPL   EBP"); st->print("\t");
664  if (do_polling() && C->is_method_compilation()) {
665    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
666    st->cr(); st->print("\t");
667  }
668}
669#endif
670
671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
672  Compile *C = ra_->C;
673  MacroAssembler _masm(&cbuf);
674
675  if (C->max_vector_size() > 16) {
676    // Clear upper bits of YMM registers when current compiled code uses
677    // wide vectors to avoid AVX <-> SSE transition penalty during call.
678    _masm.vzeroupper();
679  }
680  // If method set FPU control word, restore to standard control word
681  if (C->in_24_bit_fp_mode()) {
682    _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
683  }
684
685  int framesize = C->frame_size_in_bytes();
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
691
692  if (framesize >= 128) {
693    emit_opcode(cbuf, 0x81); // add  SP, #framesize
694    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695    emit_d32(cbuf, framesize);
696  } else if (framesize) {
697    emit_opcode(cbuf, 0x83); // add  SP, #framesize
698    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
699    emit_d8(cbuf, framesize);
700  }
701
702  emit_opcode(cbuf, 0x58 | EBP_enc);
703
704  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
705    __ reserved_stack_check();
706  }
707
708  if (do_polling() && C->is_method_compilation()) {
709    if (SafepointMechanism::uses_thread_local_poll()) {
710      Register pollReg = as_Register(EBX_enc);
711      MacroAssembler masm(&cbuf);
712      masm.get_thread(pollReg);
713      masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
714      masm.relocate(relocInfo::poll_return_type);
715      masm.testl(rax, Address(pollReg, 0));
716    } else {
717      cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
718      emit_opcode(cbuf,0x85);
719      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
720      emit_d32(cbuf, (intptr_t)os::get_polling_page());
721    }
722  }
723}
724
725uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
726  return MachNode::size(ra_); // too many variables; just compute it
727                              // the hard way
728}
729
730int MachEpilogNode::reloc() const {
731  return 0; // a large enough number
732}
733
734const Pipeline * MachEpilogNode::pipeline() const {
735  return MachNode::pipeline_class();
736}
737
738int MachEpilogNode::safepoint_offset() const { return 0; }
739
740//=============================================================================
741
742enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
743static enum RC rc_class( OptoReg::Name reg ) {
744
745  if( !OptoReg::is_valid(reg)  ) return rc_bad;
746  if (OptoReg::is_stack(reg)) return rc_stack;
747
748  VMReg r = OptoReg::as_VMReg(reg);
749  if (r->is_Register()) return rc_int;
750  if (r->is_FloatRegister()) {
751    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
752    return rc_float;
753  }
754  assert(r->is_XMMRegister(), "must be");
755  return rc_xmm;
756}
757
758static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
759                        int opcode, const char *op_str, int size, outputStream* st ) {
760  if( cbuf ) {
761    emit_opcode  (*cbuf, opcode );
762    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
763#ifndef PRODUCT
764  } else if( !do_size ) {
765    if( size != 0 ) st->print("\n\t");
766    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
767      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
768      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
769    } else { // FLD, FST, PUSH, POP
770      st->print("%s [ESP + #%d]",op_str,offset);
771    }
772#endif
773  }
774  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
775  return size+3+offset_size;
776}
777
778// Helper for XMM registers.  Extra opcode bits, limited syntax.
779static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
780                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
781  int in_size_in_bits = Assembler::EVEX_32bit;
782  int evex_encoding = 0;
783  if (reg_lo+1 == reg_hi) {
784    in_size_in_bits = Assembler::EVEX_64bit;
785    evex_encoding = Assembler::VEX_W;
786  }
787  if (cbuf) {
788    MacroAssembler _masm(cbuf);
789    // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
790    //                          it maps more cases to single byte displacement
791    _masm.set_managed();
792    if (reg_lo+1 == reg_hi) { // double move?
793      if (is_load) {
794        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
795      } else {
796        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
797      }
798    } else {
799      if (is_load) {
800        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
801      } else {
802        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
803      }
804    }
805#ifndef PRODUCT
806  } else if (!do_size) {
807    if (size != 0) st->print("\n\t");
808    if (reg_lo+1 == reg_hi) { // double move?
809      if (is_load) st->print("%s %s,[ESP + #%d]",
810                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
811                              Matcher::regName[reg_lo], offset);
812      else         st->print("MOVSD  [ESP + #%d],%s",
813                              offset, Matcher::regName[reg_lo]);
814    } else {
815      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
816                              Matcher::regName[reg_lo], offset);
817      else         st->print("MOVSS  [ESP + #%d],%s",
818                              offset, Matcher::regName[reg_lo]);
819    }
820#endif
821  }
822  bool is_single_byte = false;
823  if ((UseAVX > 2) && (offset != 0)) {
824    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
825  }
826  int offset_size = 0;
827  if (UseAVX > 2 ) {
828    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
829  } else {
830    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
831  }
832  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
833  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
834  return size+5+offset_size;
835}
836
837
838static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
839                            int src_hi, int dst_hi, int size, outputStream* st ) {
840  if (cbuf) {
841    MacroAssembler _masm(cbuf);
842    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
843    _masm.set_managed();
844    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
845      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
846                as_XMMRegister(Matcher::_regEncode[src_lo]));
847    } else {
848      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
849                as_XMMRegister(Matcher::_regEncode[src_lo]));
850    }
851#ifndef PRODUCT
852  } else if (!do_size) {
853    if (size != 0) st->print("\n\t");
854    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
855      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
856        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
857      } else {
858        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
859      }
860    } else {
861      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
862        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
863      } else {
864        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
865      }
866    }
867#endif
868  }
869  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
870  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
871  int sz = (UseAVX > 2) ? 6 : 4;
872  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
873      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
874  return size + sz;
875}
876
877static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
878                            int src_hi, int dst_hi, int size, outputStream* st ) {
879  // 32-bit
880  if (cbuf) {
881    MacroAssembler _masm(cbuf);
882    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
883    _masm.set_managed();
884    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
885             as_Register(Matcher::_regEncode[src_lo]));
886#ifndef PRODUCT
887  } else if (!do_size) {
888    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
889#endif
890  }
891  return (UseAVX> 2) ? 6 : 4;
892}
893
894
895static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
896                                 int src_hi, int dst_hi, int size, outputStream* st ) {
897  // 32-bit
898  if (cbuf) {
899    MacroAssembler _masm(cbuf);
900    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
901    _masm.set_managed();
902    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
903             as_XMMRegister(Matcher::_regEncode[src_lo]));
904#ifndef PRODUCT
905  } else if (!do_size) {
906    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
907#endif
908  }
909  return (UseAVX> 2) ? 6 : 4;
910}
911
912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
913  if( cbuf ) {
914    emit_opcode(*cbuf, 0x8B );
915    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
916#ifndef PRODUCT
917  } else if( !do_size ) {
918    if( size != 0 ) st->print("\n\t");
919    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
920#endif
921  }
922  return size+2;
923}
924
925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
926                                 int offset, int size, outputStream* st ) {
927  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
928    if( cbuf ) {
929      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
930      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
931#ifndef PRODUCT
932    } else if( !do_size ) {
933      if( size != 0 ) st->print("\n\t");
934      st->print("FLD    %s",Matcher::regName[src_lo]);
935#endif
936    }
937    size += 2;
938  }
939
940  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
941  const char *op_str;
942  int op;
943  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
944    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
945    op = 0xDD;
946  } else {                   // 32-bit store
947    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
948    op = 0xD9;
949    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
950  }
951
952  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
953}
954
955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
957                          int src_hi, int dst_hi, uint ireg, outputStream* st);
958
959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
960                            int stack_offset, int reg, uint ireg, outputStream* st);
961
962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
963                                     int dst_offset, uint ireg, outputStream* st) {
964  int calc_size = 0;
965  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
966  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
967  switch (ireg) {
968  case Op_VecS:
969    calc_size = 3+src_offset_size + 3+dst_offset_size;
970    break;
971  case Op_VecD: {
972    calc_size = 3+src_offset_size + 3+dst_offset_size;
973    int tmp_src_offset = src_offset + 4;
974    int tmp_dst_offset = dst_offset + 4;
975    src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
976    dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
977    calc_size += 3+src_offset_size + 3+dst_offset_size;
978    break;
979  }
980  case Op_VecX:
981  case Op_VecY:
982  case Op_VecZ:
983    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
984    break;
985  default:
986    ShouldNotReachHere();
987  }
988  if (cbuf) {
989    MacroAssembler _masm(cbuf);
990    int offset = __ offset();
991    switch (ireg) {
992    case Op_VecS:
993      __ pushl(Address(rsp, src_offset));
994      __ popl (Address(rsp, dst_offset));
995      break;
996    case Op_VecD:
997      __ pushl(Address(rsp, src_offset));
998      __ popl (Address(rsp, dst_offset));
999      __ pushl(Address(rsp, src_offset+4));
1000      __ popl (Address(rsp, dst_offset+4));
1001      break;
1002    case Op_VecX:
1003      __ movdqu(Address(rsp, -16), xmm0);
1004      __ movdqu(xmm0, Address(rsp, src_offset));
1005      __ movdqu(Address(rsp, dst_offset), xmm0);
1006      __ movdqu(xmm0, Address(rsp, -16));
1007      break;
1008    case Op_VecY:
1009      __ vmovdqu(Address(rsp, -32), xmm0);
1010      __ vmovdqu(xmm0, Address(rsp, src_offset));
1011      __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012      __ vmovdqu(xmm0, Address(rsp, -32));
1013      break;
1014    case Op_VecZ:
1015      __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018      __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019      break;
1020    default:
1021      ShouldNotReachHere();
1022    }
1023    int size = __ offset() - offset;
1024    assert(size == calc_size, "incorrect size calculation");
1025    return size;
1026#ifndef PRODUCT
1027  } else if (!do_size) {
1028    switch (ireg) {
1029    case Op_VecS:
1030      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                "popl    [rsp + #%d]",
1032                src_offset, dst_offset);
1033      break;
1034    case Op_VecD:
1035      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                "popq    [rsp + #%d]\n\t"
1037                "pushl   [rsp + #%d]\n\t"
1038                "popq    [rsp + #%d]",
1039                src_offset, dst_offset, src_offset+4, dst_offset+4);
1040      break;
1041     case Op_VecX:
1042      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                "movdqu  xmm0, [rsp + #%d]\n\t"
1044                "movdqu  [rsp + #%d], xmm0\n\t"
1045                "movdqu  xmm0, [rsp - #16]",
1046                src_offset, dst_offset);
1047      break;
1048    case Op_VecY:
1049      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                "vmovdqu [rsp + #%d], xmm0\n\t"
1052                "vmovdqu xmm0, [rsp - #32]",
1053                src_offset, dst_offset);
1054      break;
1055    case Op_VecZ:
1056      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                "vmovdqu [rsp + #%d], xmm0\n\t"
1059                "vmovdqu xmm0, [rsp - #64]",
1060                src_offset, dst_offset);
1061      break;
1062    default:
1063      ShouldNotReachHere();
1064    }
1065#endif
1066  }
1067  return calc_size;
1068}
1069
1070uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071  // Get registers to move
1072  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074  OptoReg::Name dst_second = ra_->get_reg_second(this );
1075  OptoReg::Name dst_first = ra_->get_reg_first(this );
1076
1077  enum RC src_second_rc = rc_class(src_second);
1078  enum RC src_first_rc = rc_class(src_first);
1079  enum RC dst_second_rc = rc_class(dst_second);
1080  enum RC dst_first_rc = rc_class(dst_first);
1081
1082  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083
1084  // Generate spill code!
1085  int size = 0;
1086
1087  if( src_first == dst_first && src_second == dst_second )
1088    return size;            // Self copy, no move
1089
1090  if (bottom_type()->isa_vect() != NULL) {
1091    uint ireg = ideal_reg();
1092    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096      // mem -> mem
1097      int src_offset = ra_->reg2offset(src_first);
1098      int dst_offset = ra_->reg2offset(dst_first);
1099      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103      int stack_offset = ra_->reg2offset(dst_first);
1104      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106      int stack_offset = ra_->reg2offset(src_first);
1107      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108    } else {
1109      ShouldNotReachHere();
1110    }
1111  }
1112
1113  // --------------------------------------
1114  // Check for mem-mem move.  push/pop to move.
1115  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116    if( src_second == dst_first ) { // overlapping stack copy ranges
1117      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121    }
1122    // move low bits
1123    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128    }
1129    return size;
1130  }
1131
1132  // --------------------------------------
1133  // Check for integer reg-reg copy
1134  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136
1137  // Check for integer store
1138  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140
1141  // Check for integer load
1142  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144
1145  // Check for integer reg-xmm reg copy
1146  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148            "no 64 bit integer-float reg moves" );
1149    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150  }
1151  // --------------------------------------
1152  // Check for float reg-reg copy
1153  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156    if( cbuf ) {
1157
1158      // Note the mucking with the register encode to compensate for the 0/1
1159      // indexing issue mentioned in a comment in the reg_def sections
1160      // for FPR registers many lines above here.
1161
1162      if( src_first != FPR1L_num ) {
1163        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167     } else {
1168        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170     }
1171#ifndef PRODUCT
1172    } else if( !do_size ) {
1173      if( size != 0 ) st->print("\n\t");
1174      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176#endif
1177    }
1178    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179  }
1180
1181  // Check for float store
1182  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184  }
1185
1186  // Check for float load
1187  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188    int offset = ra_->reg2offset(src_first);
1189    const char *op_str;
1190    int op;
1191    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192      op_str = "FLD_D";
1193      op = 0xDD;
1194    } else {                   // 32-bit load
1195      op_str = "FLD_S";
1196      op = 0xD9;
1197      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198    }
1199    if( cbuf ) {
1200      emit_opcode  (*cbuf, op );
1201      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204#ifndef PRODUCT
1205    } else if( !do_size ) {
1206      if( size != 0 ) st->print("\n\t");
1207      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208#endif
1209    }
1210    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211    return size + 3+offset_size+2;
1212  }
1213
1214  // Check for xmm reg-reg copy
1215  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217            (src_first+1 == src_second && dst_first+1 == dst_second),
1218            "no non-adjacent float-moves" );
1219    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220  }
1221
1222  // Check for xmm reg-integer reg copy
1223  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225            "no 64 bit float-integer reg moves" );
1226    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227  }
1228
1229  // Check for xmm store
1230  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232  }
1233
1234  // Check for float xmm load
1235  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237  }
1238
1239  // Copy from float reg to xmm reg
1240  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241    // copy to the top of stack from floating point reg
1242    // and use LEA to preserve flags
1243    if( cbuf ) {
1244      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247      emit_d8(*cbuf,0xF8);
1248#ifndef PRODUCT
1249    } else if( !do_size ) {
1250      if( size != 0 ) st->print("\n\t");
1251      st->print("LEA    ESP,[ESP-8]");
1252#endif
1253    }
1254    size += 4;
1255
1256    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257
1258    // Copy from the temp memory to the xmm reg.
1259    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260
1261    if( cbuf ) {
1262      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265      emit_d8(*cbuf,0x08);
1266#ifndef PRODUCT
1267    } else if( !do_size ) {
1268      if( size != 0 ) st->print("\n\t");
1269      st->print("LEA    ESP,[ESP+8]");
1270#endif
1271    }
1272    size += 4;
1273    return size;
1274  }
1275
1276  assert( size > 0, "missed a case" );
1277
1278  // --------------------------------------------------------------------
1279  // Check for second bits still needing moving.
1280  if( src_second == dst_second )
1281    return size;               // Self copy; no move
1282  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283
1284  // Check for second word int-int move
1285  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287
1288  // Check for second word integer store
1289  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291
1292  // Check for second word integer load
1293  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295
1296
1297  Unimplemented();
1298  return 0; // Mute compiler
1299}
1300
1301#ifndef PRODUCT
1302void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303  implementation( NULL, ra_, false, st );
1304}
1305#endif
1306
1307void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308  implementation( &cbuf, ra_, false, NULL );
1309}
1310
1311uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312  return MachNode::size(ra_);
1313}
1314
1315
1316//=============================================================================
1317#ifndef PRODUCT
1318void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320  int reg = ra_->get_reg_first(this);
1321  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322}
1323#endif
1324
1325void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327  int reg = ra_->get_encode(this);
1328  if( offset >= 128 ) {
1329    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330    emit_rm(cbuf, 0x2, reg, 0x04);
1331    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332    emit_d32(cbuf, offset);
1333  }
1334  else {
1335    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336    emit_rm(cbuf, 0x1, reg, 0x04);
1337    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338    emit_d8(cbuf, offset);
1339  }
1340}
1341
1342uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344  if( offset >= 128 ) {
1345    return 7;
1346  }
1347  else {
1348    return 4;
1349  }
1350}
1351
1352//=============================================================================
1353#ifndef PRODUCT
1354void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357  st->print_cr("\tNOP");
1358  st->print_cr("\tNOP");
1359  if( !OptoBreakpoint )
1360    st->print_cr("\tNOP");
1361}
1362#endif
1363
1364void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365  MacroAssembler masm(&cbuf);
1366#ifdef ASSERT
1367  uint insts_size = cbuf.insts_size();
1368#endif
1369  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370  masm.jump_cc(Assembler::notEqual,
1371               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372  /* WARNING these NOPs are critical so that verified entry point is properly
1373     aligned for patching by NativeJump::patch_verified_entry() */
1374  int nops_cnt = 2;
1375  if( !OptoBreakpoint ) // Leave space for int3
1376     nops_cnt += 1;
1377  masm.nop(nops_cnt);
1378
1379  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380}
1381
1382uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383  return OptoBreakpoint ? 11 : 12;
1384}
1385
1386
1387//=============================================================================
1388
1389int Matcher::regnum_to_fpu_offset(int regnum) {
1390  return regnum - 32; // The FP registers are in the second chunk
1391}
1392
1393// This is UltraSparc specific, true just means we have fast l2f conversion
1394const bool Matcher::convL2FSupported(void) {
1395  return true;
1396}
1397
1398// Is this branch offset short enough that a short branch can be used?
1399//
1400// NOTE: If the platform does not provide any short branch variants, then
1401//       this method should return false for offset 0.
1402bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403  // The passed offset is relative to address of the branch.
1404  // On 86 a branch displacement is calculated relative to address
1405  // of a next instruction.
1406  offset -= br_size;
1407
1408  // the short version of jmpConUCF2 contains multiple branches,
1409  // making the reach slightly less
1410  if (rule == jmpConUCF2_rule)
1411    return (-126 <= offset && offset <= 125);
1412  return (-128 <= offset && offset <= 127);
1413}
1414
1415const bool Matcher::isSimpleConstant64(jlong value) {
1416  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417  return false;
1418}
1419
1420// The ecx parameter to rep stos for the ClearArray node is in dwords.
1421const bool Matcher::init_array_count_is_in_bytes = false;
1422
1423// Needs 2 CMOV's for longs.
1424const int Matcher::long_cmove_cost() { return 1; }
1425
1426// No CMOVF/CMOVD with SSE/SSE2
1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428
1429// Does the CPU require late expand (see block.cpp for description of late expand)?
1430const bool Matcher::require_postalloc_expand = false;
1431
1432// Do we need to mask the count passed to shift instructions or does
1433// the cpu only look at the lower 5/6 bits anyway?
1434const bool Matcher::need_masked_shift_count = false;
1435
1436bool Matcher::narrow_oop_use_complex_address() {
1437  ShouldNotCallThis();
1438  return true;
1439}
1440
1441bool Matcher::narrow_klass_use_complex_address() {
1442  ShouldNotCallThis();
1443  return true;
1444}
1445
1446bool Matcher::const_oop_prefer_decode() {
1447  ShouldNotCallThis();
1448  return true;
1449}
1450
1451bool Matcher::const_klass_prefer_decode() {
1452  ShouldNotCallThis();
1453  return true;
1454}
1455
1456// Is it better to copy float constants, or load them directly from memory?
1457// Intel can load a float constant from a direct address, requiring no
1458// extra registers.  Most RISCs will have to materialize an address into a
1459// register first, so they would do better to copy the constant from stack.
1460const bool Matcher::rematerialize_float_constants = true;
1461
1462// If CPU can load and store mis-aligned doubles directly then no fixup is
1463// needed.  Else we split the double into 2 integer pieces and move it
1464// piece-by-piece.  Only happens when passing doubles into C code as the
1465// Java calling convention forces doubles to be aligned.
1466const bool Matcher::misaligned_doubles_ok = true;
1467
1468
1469void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470  // Get the memory operand from the node
1471  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474  uint opcnt     = 1;                 // First operand
1475  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476  while( idx >= skipped+num_edges ) {
1477    skipped += num_edges;
1478    opcnt++;                          // Bump operand count
1479    assert( opcnt < numopnds, "Accessing non-existent operand" );
1480    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481  }
1482
1483  MachOper *memory = node->_opnds[opcnt];
1484  MachOper *new_memory = NULL;
1485  switch (memory->opcode()) {
1486  case DIRECT:
1487  case INDOFFSET32X:
1488    // No transformation necessary.
1489    return;
1490  case INDIRECT:
1491    new_memory = new indirect_win95_safeOper( );
1492    break;
1493  case INDOFFSET8:
1494    new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495    break;
1496  case INDOFFSET32:
1497    new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498    break;
1499  case INDINDEXOFFSET:
1500    new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501    break;
1502  case INDINDEXSCALE:
1503    new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504    break;
1505  case INDINDEXSCALEOFFSET:
1506    new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507    break;
1508  case LOAD_LONG_INDIRECT:
1509  case LOAD_LONG_INDOFFSET32:
1510    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511    return;
1512  default:
1513    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514    return;
1515  }
1516  node->_opnds[opcnt] = new_memory;
1517}
1518
1519// Advertise here if the CPU requires explicit rounding operations
1520// to implement the UseStrictFP mode.
1521const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522
1523// Are floats conerted to double when stored to stack during deoptimization?
1524// On x32 it is stored with convertion only when FPU is used for floats.
1525bool Matcher::float_in_double() { return (UseSSE == 0); }
1526
1527// Do ints take an entire long register or just half?
1528const bool Matcher::int_in_long = false;
1529
1530// Return whether or not this register is ever used as an argument.  This
1531// function is used on startup to build the trampoline stubs in generateOptoStub.
1532// Registers not mentioned will be killed by the VM call in the trampoline, and
1533// arguments in those registers not be available to the callee.
1534bool Matcher::can_be_java_arg( int reg ) {
1535  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538  return false;
1539}
1540
1541bool Matcher::is_spillable_arg( int reg ) {
1542  return can_be_java_arg(reg);
1543}
1544
1545bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546  // Use hardware integer DIV instruction when
1547  // it is faster than a code which use multiply.
1548  // Only when constant divisor fits into 32 bit
1549  // (min_jint is excluded to get only correct
1550  // positive 32 bit values from negative).
1551  return VM_Version::has_fast_idiv() &&
1552         (divisor == (int)divisor && divisor != min_jint);
1553}
1554
1555// Register for DIVI projection of divmodI
1556RegMask Matcher::divI_proj_mask() {
1557  return EAX_REG_mask();
1558}
1559
1560// Register for MODI projection of divmodI
1561RegMask Matcher::modI_proj_mask() {
1562  return EDX_REG_mask();
1563}
1564
1565// Register for DIVL projection of divmodL
1566RegMask Matcher::divL_proj_mask() {
1567  ShouldNotReachHere();
1568  return RegMask();
1569}
1570
1571// Register for MODL projection of divmodL
1572RegMask Matcher::modL_proj_mask() {
1573  ShouldNotReachHere();
1574  return RegMask();
1575}
1576
1577const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578  return NO_REG_mask();
1579}
1580
1581// Returns true if the high 32 bits of the value is known to be zero.
1582bool is_operand_hi32_zero(Node* n) {
1583  int opc = n->Opcode();
1584  if (opc == Op_AndL) {
1585    Node* o2 = n->in(2);
1586    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587      return true;
1588    }
1589  }
1590  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591    return true;
1592  }
1593  return false;
1594}
1595
1596%}
1597
1598//----------ENCODING BLOCK-----------------------------------------------------
1599// This block specifies the encoding classes used by the compiler to output
1600// byte streams.  Encoding classes generate functions which are called by
1601// Machine Instruction Nodes in order to generate the bit encoding of the
1602// instruction.  Operands specify their base encoding interface with the
1603// interface keyword.  There are currently supported four interfaces,
1604// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605// operand to generate a function which returns its register number when
1606// queried.   CONST_INTER causes an operand to generate a function which
1607// returns the value of the constant when queried.  MEMORY_INTER causes an
1608// operand to generate four functions which return the Base Register, the
1609// Index Register, the Scale Value, and the Offset Value of the operand when
1610// queried.  COND_INTER causes an operand to generate six functions which
1611// return the encoding code (ie - encoding bits for the instruction)
1612// associated with each basic boolean condition for a conditional instruction.
1613// Instructions specify two basic values for encoding.  They use the
1614// ins_encode keyword to specify their encoding class (which must be one of
1615// the class names specified in the encoding block), and they use the
1616// opcode keyword to specify, in order, their primary, secondary, and
1617// tertiary opcode.  Only the opcode sections which a particular instruction
1618// needs for encoding need to be specified.
1619encode %{
1620  // Build emit functions for each basic byte or larger field in the intel
1621  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622  // code in the enc_class source block.  Emit functions will live in the
1623  // main source block for now.  In future, we can generalize this by
1624  // adding a syntax that specifies the sizes of fields in an order,
1625  // so that the adlc can build the emit functions automagically
1626
1627  // Emit primary opcode
1628  enc_class OpcP %{
1629    emit_opcode(cbuf, $primary);
1630  %}
1631
1632  // Emit secondary opcode
1633  enc_class OpcS %{
1634    emit_opcode(cbuf, $secondary);
1635  %}
1636
1637  // Emit opcode directly
1638  enc_class Opcode(immI d8) %{
1639    emit_opcode(cbuf, $d8$$constant);
1640  %}
1641
1642  enc_class SizePrefix %{
1643    emit_opcode(cbuf,0x66);
1644  %}
1645
1646  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648  %}
1649
1650  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651    emit_opcode(cbuf,$opcode$$constant);
1652    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653  %}
1654
1655  enc_class mov_r32_imm0( rRegI dst ) %{
1656    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658  %}
1659
1660  enc_class cdq_enc %{
1661    // Full implementation of Java idiv and irem; checks for
1662    // special case as described in JVM spec., p.243 & p.271.
1663    //
1664    //         normal case                           special case
1665    //
1666    // input : rax,: dividend                         min_int
1667    //         reg: divisor                          -1
1668    //
1669    // output: rax,: quotient  (= rax, idiv reg)       min_int
1670    //         rdx: remainder (= rax, irem reg)       0
1671    //
1672    //  Code sequnce:
1673    //
1674    //  81 F8 00 00 00 80    cmp         rax,80000000h
1675    //  0F 85 0B 00 00 00    jne         normal_case
1676    //  33 D2                xor         rdx,edx
1677    //  83 F9 FF             cmp         rcx,0FFh
1678    //  0F 84 03 00 00 00    je          done
1679    //                  normal_case:
1680    //  99                   cdq
1681    //  F7 F9                idiv        rax,ecx
1682    //                  done:
1683    //
1684    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695    // normal_case:
1696    emit_opcode(cbuf,0x99);                                         // cdq
1697    // idiv (note: must be emitted by the user of this rule)
1698    // normal:
1699  %}
1700
1701  // Dense encoding for older common ops
1702  enc_class Opc_plus(immI opcode, rRegI reg) %{
1703    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704  %}
1705
1706
1707  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709    // Check for 8-bit immediate, and set sign extend bit in opcode
1710    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711      emit_opcode(cbuf, $primary | 0x02);
1712    }
1713    else {                          // If 32-bit immediate
1714      emit_opcode(cbuf, $primary);
1715    }
1716  %}
1717
1718  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719    // Emit primary opcode and set sign-extend bit
1720    // Check for 8-bit immediate, and set sign extend bit in opcode
1721    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722      emit_opcode(cbuf, $primary | 0x02);    }
1723    else {                          // If 32-bit immediate
1724      emit_opcode(cbuf, $primary);
1725    }
1726    // Emit r/m byte with secondary opcode, after primary opcode.
1727    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728  %}
1729
1730  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731    // Check for 8-bit immediate, and set sign extend bit in opcode
1732    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733      $$$emit8$imm$$constant;
1734    }
1735    else {                          // If 32-bit immediate
1736      // Output immediate
1737      $$$emit32$imm$$constant;
1738    }
1739  %}
1740
1741  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742    // Emit primary opcode and set sign-extend bit
1743    // Check for 8-bit immediate, and set sign extend bit in opcode
1744    int con = (int)$imm$$constant; // Throw away top bits
1745    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746    // Emit r/m byte with secondary opcode, after primary opcode.
1747    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749    else                               emit_d32(cbuf,con);
1750  %}
1751
1752  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753    // Emit primary opcode and set sign-extend bit
1754    // Check for 8-bit immediate, and set sign extend bit in opcode
1755    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757    // Emit r/m byte with tertiary opcode, after primary opcode.
1758    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760    else                               emit_d32(cbuf,con);
1761  %}
1762
1763  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764    emit_cc(cbuf, $secondary, $dst$$reg );
1765  %}
1766
1767  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768    int destlo = $dst$$reg;
1769    int desthi = HIGH_FROM_LOW(destlo);
1770    // bswap lo
1771    emit_opcode(cbuf, 0x0F);
1772    emit_cc(cbuf, 0xC8, destlo);
1773    // bswap hi
1774    emit_opcode(cbuf, 0x0F);
1775    emit_cc(cbuf, 0xC8, desthi);
1776    // xchg lo and hi
1777    emit_opcode(cbuf, 0x87);
1778    emit_rm(cbuf, 0x3, destlo, desthi);
1779  %}
1780
1781  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783  %}
1784
1785  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786    $$$emit8$primary;
1787    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788  %}
1789
1790  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792    emit_d8(cbuf, op >> 8 );
1793    emit_d8(cbuf, op & 255);
1794  %}
1795
1796  // emulate a CMOV with a conditional branch around a MOV
1797  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798    // Invert sense of branch from sense of CMOV
1799    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800    emit_d8( cbuf, $brOffs$$constant );
1801  %}
1802
1803  enc_class enc_PartialSubtypeCheck( ) %{
1804    Register Redi = as_Register(EDI_enc); // result register
1805    Register Reax = as_Register(EAX_enc); // super class
1806    Register Recx = as_Register(ECX_enc); // killed
1807    Register Resi = as_Register(ESI_enc); // sub class
1808    Label miss;
1809
1810    MacroAssembler _masm(&cbuf);
1811    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                     NULL, &miss,
1813                                     /*set_cond_codes:*/ true);
1814    if ($primary) {
1815      __ xorptr(Redi, Redi);
1816    }
1817    __ bind(miss);
1818  %}
1819
1820  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821    MacroAssembler masm(&cbuf);
1822    int start = masm.offset();
1823    if (UseSSE >= 2) {
1824      if (VerifyFPU) {
1825        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826      }
1827    } else {
1828      // External c_calling_convention expects the FPU stack to be 'clean'.
1829      // Compiled code leaves it dirty.  Do cleanup now.
1830      masm.empty_FPU_stack();
1831    }
1832    if (sizeof_FFree_Float_Stack_All == -1) {
1833      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834    } else {
1835      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836    }
1837  %}
1838
1839  enc_class Verify_FPU_For_Leaf %{
1840    if( VerifyFPU ) {
1841      MacroAssembler masm(&cbuf);
1842      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843    }
1844  %}
1845
1846  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847    // This is the instruction starting address for relocation info.
1848    cbuf.set_insts_mark();
1849    $$$emit8$primary;
1850    // CALL directly to the runtime
1851    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                runtime_call_Relocation::spec(), RELOC_IMM32 );
1853
1854    if (UseSSE >= 2) {
1855      MacroAssembler _masm(&cbuf);
1856      BasicType rt = tf()->return_type();
1857
1858      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859        // A C runtime call where the return value is unused.  In SSE2+
1860        // mode the result needs to be removed from the FPU stack.  It's
1861        // likely that this function call could be removed by the
1862        // optimizer if the C function is a pure function.
1863        __ ffree(0);
1864      } else if (rt == T_FLOAT) {
1865        __ lea(rsp, Address(rsp, -4));
1866        __ fstp_s(Address(rsp, 0));
1867        __ movflt(xmm0, Address(rsp, 0));
1868        __ lea(rsp, Address(rsp,  4));
1869      } else if (rt == T_DOUBLE) {
1870        __ lea(rsp, Address(rsp, -8));
1871        __ fstp_d(Address(rsp, 0));
1872        __ movdbl(xmm0, Address(rsp, 0));
1873        __ lea(rsp, Address(rsp,  8));
1874      }
1875    }
1876  %}
1877
1878  enc_class pre_call_resets %{
1879    // If method sets FPU control word restore it here
1880    debug_only(int off0 = cbuf.insts_size());
1881    if (ra_->C->in_24_bit_fp_mode()) {
1882      MacroAssembler _masm(&cbuf);
1883      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884    }
1885    // Clear upper bits of YMM registers when current compiled code uses
1886    // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887    MacroAssembler _masm(&cbuf);
1888    __ vzeroupper();
1889    debug_only(int off1 = cbuf.insts_size());
1890    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891  %}
1892
1893  enc_class post_call_FPU %{
1894    // If method sets FPU control word do it here also
1895    if (Compile::current()->in_24_bit_fp_mode()) {
1896      MacroAssembler masm(&cbuf);
1897      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898    }
1899  %}
1900
1901  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903    // who we intended to call.
1904    cbuf.set_insts_mark();
1905    $$$emit8$primary;
1906
1907    if (!_method) {
1908      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                     runtime_call_Relocation::spec(),
1910                     RELOC_IMM32);
1911    } else {
1912      int method_index = resolved_method_index(cbuf);
1913      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                  : static_call_Relocation::spec(method_index);
1915      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                     rspec, RELOC_DISP32);
1917      // Emit stubs for static call.
1918      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919      if (stub == NULL) {
1920        ciEnv::current()->record_failure("CodeCache is full");
1921        return;
1922      }
1923    }
1924  %}
1925
1926  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927    MacroAssembler _masm(&cbuf);
1928    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929  %}
1930
1931  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932    int disp = in_bytes(Method::from_compiled_offset());
1933    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934
1935    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936    cbuf.set_insts_mark();
1937    $$$emit8$primary;
1938    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939    emit_d8(cbuf, disp);             // Displacement
1940
1941  %}
1942
1943//   Following encoding is no longer used, but may be restored if calling
1944//   convention changes significantly.
1945//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946//
1947//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948//     // int ic_reg     = Matcher::inline_cache_reg();
1949//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951//     // int imo_encode = Matcher::_regEncode[imo_reg];
1952//
1953//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954//     // // so we load it immediately before the call
1955//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957//
1958//     // xor rbp,ebp
1959//     emit_opcode(cbuf, 0x33);
1960//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961//
1962//     // CALL to interpreter.
1963//     cbuf.set_insts_mark();
1964//     $$$emit8$primary;
1965//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967//   %}
1968
1969  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970    $$$emit8$primary;
1971    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972    $$$emit8$shift$$constant;
1973  %}
1974
1975  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976    // Load immediate does not have a zero or sign extended version
1977    // for 8-bit immediates
1978    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979    $$$emit32$src$$constant;
1980  %}
1981
1982  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983    // Load immediate does not have a zero or sign extended version
1984    // for 8-bit immediates
1985    emit_opcode(cbuf, $primary + $dst$$reg);
1986    $$$emit32$src$$constant;
1987  %}
1988
1989  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990    // Load immediate does not have a zero or sign extended version
1991    // for 8-bit immediates
1992    int dst_enc = $dst$$reg;
1993    int src_con = $src$$constant & 0x0FFFFFFFFL;
1994    if (src_con == 0) {
1995      // xor dst, dst
1996      emit_opcode(cbuf, 0x33);
1997      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998    } else {
1999      emit_opcode(cbuf, $primary + dst_enc);
2000      emit_d32(cbuf, src_con);
2001    }
2002  %}
2003
2004  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005    // Load immediate does not have a zero or sign extended version
2006    // for 8-bit immediates
2007    int dst_enc = $dst$$reg + 2;
2008    int src_con = ((julong)($src$$constant)) >> 32;
2009    if (src_con == 0) {
2010      // xor dst, dst
2011      emit_opcode(cbuf, 0x33);
2012      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013    } else {
2014      emit_opcode(cbuf, $primary + dst_enc);
2015      emit_d32(cbuf, src_con);
2016    }
2017  %}
2018
2019
2020  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021  enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023  %}
2024
2025  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027  %}
2028
2029  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031  %}
2032
2033  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034    $$$emit8$primary;
2035    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036  %}
2037
2038  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039    $$$emit8$secondary;
2040    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041  %}
2042
2043  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045  %}
2046
2047  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049  %}
2050
2051  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053  %}
2054
2055  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056    // Output immediate
2057    $$$emit32$src$$constant;
2058  %}
2059
2060  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061    // Output Float immediate bits
2062    jfloat jf = $src$$constant;
2063    int    jf_as_bits = jint_cast( jf );
2064    emit_d32(cbuf, jf_as_bits);
2065  %}
2066
2067  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068    // Output Float immediate bits
2069    jfloat jf = $src$$constant;
2070    int    jf_as_bits = jint_cast( jf );
2071    emit_d32(cbuf, jf_as_bits);
2072  %}
2073
2074  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075    // Output immediate
2076    $$$emit16$src$$constant;
2077  %}
2078
2079  enc_class Con_d32(immI src) %{
2080    emit_d32(cbuf,$src$$constant);
2081  %}
2082
2083  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084    // Output immediate memory reference
2085    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086    emit_d32(cbuf, 0x00);
2087  %}
2088
2089  enc_class lock_prefix( ) %{
2090    if( os::is_MP() )
2091      emit_opcode(cbuf,0xF0);         // [Lock]
2092  %}
2093
2094  // Cmp-xchg long value.
2095  // Note: we need to swap rbx, and rcx before and after the
2096  //       cmpxchg8 instruction because the instruction uses
2097  //       rcx as the high order word of the new value to store but
2098  //       our register encoding uses rbx,.
2099  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2100
2101    // XCHG  rbx,ecx
2102    emit_opcode(cbuf,0x87);
2103    emit_opcode(cbuf,0xD9);
2104    // [Lock]
2105    if( os::is_MP() )
2106      emit_opcode(cbuf,0xF0);
2107    // CMPXCHG8 [Eptr]
2108    emit_opcode(cbuf,0x0F);
2109    emit_opcode(cbuf,0xC7);
2110    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2111    // XCHG  rbx,ecx
2112    emit_opcode(cbuf,0x87);
2113    emit_opcode(cbuf,0xD9);
2114  %}
2115
2116  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2117    // [Lock]
2118    if( os::is_MP() )
2119      emit_opcode(cbuf,0xF0);
2120
2121    // CMPXCHG [Eptr]
2122    emit_opcode(cbuf,0x0F);
2123    emit_opcode(cbuf,0xB1);
2124    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2125  %}
2126
2127  enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2128    // [Lock]
2129    if( os::is_MP() )
2130      emit_opcode(cbuf,0xF0);
2131
2132    // CMPXCHGB [Eptr]
2133    emit_opcode(cbuf,0x0F);
2134    emit_opcode(cbuf,0xB0);
2135    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2136  %}
2137
2138  enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2139    // [Lock]
2140    if( os::is_MP() )
2141      emit_opcode(cbuf,0xF0);
2142
2143    // 16-bit mode
2144    emit_opcode(cbuf, 0x66);
2145
2146    // CMPXCHGW [Eptr]
2147    emit_opcode(cbuf,0x0F);
2148    emit_opcode(cbuf,0xB1);
2149    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2150  %}
2151
2152  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2153    int res_encoding = $res$$reg;
2154
2155    // MOV  res,0
2156    emit_opcode( cbuf, 0xB8 + res_encoding);
2157    emit_d32( cbuf, 0 );
2158    // JNE,s  fail
2159    emit_opcode(cbuf,0x75);
2160    emit_d8(cbuf, 5 );
2161    // MOV  res,1
2162    emit_opcode( cbuf, 0xB8 + res_encoding);
2163    emit_d32( cbuf, 1 );
2164    // fail:
2165  %}
2166
2167  enc_class set_instruction_start( ) %{
2168    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2169  %}
2170
2171  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2172    int reg_encoding = $ereg$$reg;
2173    int base  = $mem$$base;
2174    int index = $mem$$index;
2175    int scale = $mem$$scale;
2176    int displace = $mem$$disp;
2177    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2178    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2179  %}
2180
2181  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2182    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2183    int base  = $mem$$base;
2184    int index = $mem$$index;
2185    int scale = $mem$$scale;
2186    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2187    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2188    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2189  %}
2190
2191  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2192    int r1, r2;
2193    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2194    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2195    emit_opcode(cbuf,0x0F);
2196    emit_opcode(cbuf,$tertiary);
2197    emit_rm(cbuf, 0x3, r1, r2);
2198    emit_d8(cbuf,$cnt$$constant);
2199    emit_d8(cbuf,$primary);
2200    emit_rm(cbuf, 0x3, $secondary, r1);
2201    emit_d8(cbuf,$cnt$$constant);
2202  %}
2203
2204  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2205    emit_opcode( cbuf, 0x8B ); // Move
2206    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2207    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2208      emit_d8(cbuf,$primary);
2209      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2210      emit_d8(cbuf,$cnt$$constant-32);
2211    }
2212    emit_d8(cbuf,$primary);
2213    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2214    emit_d8(cbuf,31);
2215  %}
2216
2217  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2218    int r1, r2;
2219    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2220    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2221
2222    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2223    emit_rm(cbuf, 0x3, r1, r2);
2224    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2225      emit_opcode(cbuf,$primary);
2226      emit_rm(cbuf, 0x3, $secondary, r1);
2227      emit_d8(cbuf,$cnt$$constant-32);
2228    }
2229    emit_opcode(cbuf,0x33);  // XOR r2,r2
2230    emit_rm(cbuf, 0x3, r2, r2);
2231  %}
2232
2233  // Clone of RegMem but accepts an extra parameter to access each
2234  // half of a double in memory; it never needs relocation info.
2235  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2236    emit_opcode(cbuf,$opcode$$constant);
2237    int reg_encoding = $rm_reg$$reg;
2238    int base     = $mem$$base;
2239    int index    = $mem$$index;
2240    int scale    = $mem$$scale;
2241    int displace = $mem$$disp + $disp_for_half$$constant;
2242    relocInfo::relocType disp_reloc = relocInfo::none;
2243    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2244  %}
2245
2246  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2247  //
2248  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2249  // and it never needs relocation information.
2250  // Frequently used to move data between FPU's Stack Top and memory.
2251  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2252    int rm_byte_opcode = $rm_opcode$$constant;
2253    int base     = $mem$$base;
2254    int index    = $mem$$index;
2255    int scale    = $mem$$scale;
2256    int displace = $mem$$disp;
2257    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2258    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2259  %}
2260
2261  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2262    int rm_byte_opcode = $rm_opcode$$constant;
2263    int base     = $mem$$base;
2264    int index    = $mem$$index;
2265    int scale    = $mem$$scale;
2266    int displace = $mem$$disp;
2267    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2268    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2269  %}
2270
2271  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2272    int reg_encoding = $dst$$reg;
2273    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2274    int index        = 0x04;            // 0x04 indicates no index
2275    int scale        = 0x00;            // 0x00 indicates no scale
2276    int displace     = $src1$$constant; // 0x00 indicates no displacement
2277    relocInfo::relocType disp_reloc = relocInfo::none;
2278    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2279  %}
2280
2281  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2282    // Compare dst,src
2283    emit_opcode(cbuf,0x3B);
2284    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2285    // jmp dst < src around move
2286    emit_opcode(cbuf,0x7C);
2287    emit_d8(cbuf,2);
2288    // move dst,src
2289    emit_opcode(cbuf,0x8B);
2290    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2291  %}
2292
2293  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2294    // Compare dst,src
2295    emit_opcode(cbuf,0x3B);
2296    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2297    // jmp dst > src around move
2298    emit_opcode(cbuf,0x7F);
2299    emit_d8(cbuf,2);
2300    // move dst,src
2301    emit_opcode(cbuf,0x8B);
2302    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2303  %}
2304
2305  enc_class enc_FPR_store(memory mem, regDPR src) %{
2306    // If src is FPR1, we can just FST to store it.
2307    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2308    int reg_encoding = 0x2; // Just store
2309    int base  = $mem$$base;
2310    int index = $mem$$index;
2311    int scale = $mem$$scale;
2312    int displace = $mem$$disp;
2313    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2314    if( $src$$reg != FPR1L_enc ) {
2315      reg_encoding = 0x3;  // Store & pop
2316      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2317      emit_d8( cbuf, 0xC0-1+$src$$reg );
2318    }
2319    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2320    emit_opcode(cbuf,$primary);
2321    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2322  %}
2323
2324  enc_class neg_reg(rRegI dst) %{
2325    // NEG $dst
2326    emit_opcode(cbuf,0xF7);
2327    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2328  %}
2329
2330  enc_class setLT_reg(eCXRegI dst) %{
2331    // SETLT $dst
2332    emit_opcode(cbuf,0x0F);
2333    emit_opcode(cbuf,0x9C);
2334    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2335  %}
2336
2337  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2338    int tmpReg = $tmp$$reg;
2339
2340    // SUB $p,$q
2341    emit_opcode(cbuf,0x2B);
2342    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2343    // SBB $tmp,$tmp
2344    emit_opcode(cbuf,0x1B);
2345    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2346    // AND $tmp,$y
2347    emit_opcode(cbuf,0x23);
2348    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2349    // ADD $p,$tmp
2350    emit_opcode(cbuf,0x03);
2351    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2352  %}
2353
2354  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2355    // TEST shift,32
2356    emit_opcode(cbuf,0xF7);
2357    emit_rm(cbuf, 0x3, 0, ECX_enc);
2358    emit_d32(cbuf,0x20);
2359    // JEQ,s small
2360    emit_opcode(cbuf, 0x74);
2361    emit_d8(cbuf, 0x04);
2362    // MOV    $dst.hi,$dst.lo
2363    emit_opcode( cbuf, 0x8B );
2364    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2365    // CLR    $dst.lo
2366    emit_opcode(cbuf, 0x33);
2367    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2368// small:
2369    // SHLD   $dst.hi,$dst.lo,$shift
2370    emit_opcode(cbuf,0x0F);
2371    emit_opcode(cbuf,0xA5);
2372    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2373    // SHL    $dst.lo,$shift"
2374    emit_opcode(cbuf,0xD3);
2375    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2376  %}
2377
2378  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2379    // TEST shift,32
2380    emit_opcode(cbuf,0xF7);
2381    emit_rm(cbuf, 0x3, 0, ECX_enc);
2382    emit_d32(cbuf,0x20);
2383    // JEQ,s small
2384    emit_opcode(cbuf, 0x74);
2385    emit_d8(cbuf, 0x04);
2386    // MOV    $dst.lo,$dst.hi
2387    emit_opcode( cbuf, 0x8B );
2388    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2389    // CLR    $dst.hi
2390    emit_opcode(cbuf, 0x33);
2391    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2392// small:
2393    // SHRD   $dst.lo,$dst.hi,$shift
2394    emit_opcode(cbuf,0x0F);
2395    emit_opcode(cbuf,0xAD);
2396    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2397    // SHR    $dst.hi,$shift"
2398    emit_opcode(cbuf,0xD3);
2399    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2400  %}
2401
2402  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2403    // TEST shift,32
2404    emit_opcode(cbuf,0xF7);
2405    emit_rm(cbuf, 0x3, 0, ECX_enc);
2406    emit_d32(cbuf,0x20);
2407    // JEQ,s small
2408    emit_opcode(cbuf, 0x74);
2409    emit_d8(cbuf, 0x05);
2410    // MOV    $dst.lo,$dst.hi
2411    emit_opcode( cbuf, 0x8B );
2412    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2413    // SAR    $dst.hi,31
2414    emit_opcode(cbuf, 0xC1);
2415    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2416    emit_d8(cbuf, 0x1F );
2417// small:
2418    // SHRD   $dst.lo,$dst.hi,$shift
2419    emit_opcode(cbuf,0x0F);
2420    emit_opcode(cbuf,0xAD);
2421    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2422    // SAR    $dst.hi,$shift"
2423    emit_opcode(cbuf,0xD3);
2424    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2425  %}
2426
2427
2428  // ----------------- Encodings for floating point unit -----------------
2429  // May leave result in FPU-TOS or FPU reg depending on opcodes
2430  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2431    $$$emit8$primary;
2432    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2433  %}
2434
2435  // Pop argument in FPR0 with FSTP ST(0)
2436  enc_class PopFPU() %{
2437    emit_opcode( cbuf, 0xDD );
2438    emit_d8( cbuf, 0xD8 );
2439  %}
2440
2441  // !!!!! equivalent to Pop_Reg_F
2442  enc_class Pop_Reg_DPR( regDPR dst ) %{
2443    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2444    emit_d8( cbuf, 0xD8+$dst$$reg );
2445  %}
2446
2447  enc_class Push_Reg_DPR( regDPR dst ) %{
2448    emit_opcode( cbuf, 0xD9 );
2449    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2450  %}
2451
2452  enc_class strictfp_bias1( regDPR dst ) %{
2453    emit_opcode( cbuf, 0xDB );           // FLD m80real
2454    emit_opcode( cbuf, 0x2D );
2455    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2456    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2457    emit_opcode( cbuf, 0xC8+$dst$$reg );
2458  %}
2459
2460  enc_class strictfp_bias2( regDPR dst ) %{
2461    emit_opcode( cbuf, 0xDB );           // FLD m80real
2462    emit_opcode( cbuf, 0x2D );
2463    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2464    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2465    emit_opcode( cbuf, 0xC8+$dst$$reg );
2466  %}
2467
2468  // Special case for moving an integer register to a stack slot.
2469  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2471  %}
2472
2473  // Special case for moving a register to a stack slot.
2474  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2475    // Opcode already emitted
2476    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2477    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2478    emit_d32(cbuf, $dst$$disp);   // Displacement
2479  %}
2480
2481  // Push the integer in stackSlot 'src' onto FP-stack
2482  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2483    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2484  %}
2485
2486  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2487  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2488    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2489  %}
2490
2491  // Same as Pop_Mem_F except for opcode
2492  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2493  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2494    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2495  %}
2496
2497  enc_class Pop_Reg_FPR( regFPR dst ) %{
2498    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2499    emit_d8( cbuf, 0xD8+$dst$$reg );
2500  %}
2501
2502  enc_class Push_Reg_FPR( regFPR dst ) %{
2503    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2504    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2505  %}
2506
2507  // Push FPU's float to a stack-slot, and pop FPU-stack
2508  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2509    int pop = 0x02;
2510    if ($src$$reg != FPR1L_enc) {
2511      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2512      emit_d8( cbuf, 0xC0-1+$src$$reg );
2513      pop = 0x03;
2514    }
2515    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2516  %}
2517
2518  // Push FPU's double to a stack-slot, and pop FPU-stack
2519  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2520    int pop = 0x02;
2521    if ($src$$reg != FPR1L_enc) {
2522      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2523      emit_d8( cbuf, 0xC0-1+$src$$reg );
2524      pop = 0x03;
2525    }
2526    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2527  %}
2528
2529  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2530  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2531    int pop = 0xD0 - 1; // -1 since we skip FLD
2532    if ($src$$reg != FPR1L_enc) {
2533      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2534      emit_d8( cbuf, 0xC0-1+$src$$reg );
2535      pop = 0xD8;
2536    }
2537    emit_opcode( cbuf, 0xDD );
2538    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2539  %}
2540
2541
2542  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2543    // load dst in FPR0
2544    emit_opcode( cbuf, 0xD9 );
2545    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2546    if ($src$$reg != FPR1L_enc) {
2547      // fincstp
2548      emit_opcode (cbuf, 0xD9);
2549      emit_opcode (cbuf, 0xF7);
2550      // swap src with FPR1:
2551      // FXCH FPR1 with src
2552      emit_opcode(cbuf, 0xD9);
2553      emit_d8(cbuf, 0xC8-1+$src$$reg );
2554      // fdecstp
2555      emit_opcode (cbuf, 0xD9);
2556      emit_opcode (cbuf, 0xF6);
2557    }
2558  %}
2559
2560  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2561    MacroAssembler _masm(&cbuf);
2562    __ subptr(rsp, 8);
2563    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2564    __ fld_d(Address(rsp, 0));
2565    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2566    __ fld_d(Address(rsp, 0));
2567  %}
2568
2569  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2570    MacroAssembler _masm(&cbuf);
2571    __ subptr(rsp, 4);
2572    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2573    __ fld_s(Address(rsp, 0));
2574    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2575    __ fld_s(Address(rsp, 0));
2576  %}
2577
2578  enc_class Push_ResultD(regD dst) %{
2579    MacroAssembler _masm(&cbuf);
2580    __ fstp_d(Address(rsp, 0));
2581    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2582    __ addptr(rsp, 8);
2583  %}
2584
2585  enc_class Push_ResultF(regF dst, immI d8) %{
2586    MacroAssembler _masm(&cbuf);
2587    __ fstp_s(Address(rsp, 0));
2588    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2589    __ addptr(rsp, $d8$$constant);
2590  %}
2591
2592  enc_class Push_SrcD(regD src) %{
2593    MacroAssembler _masm(&cbuf);
2594    __ subptr(rsp, 8);
2595    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2596    __ fld_d(Address(rsp, 0));
2597  %}
2598
2599  enc_class push_stack_temp_qword() %{
2600    MacroAssembler _masm(&cbuf);
2601    __ subptr(rsp, 8);
2602  %}
2603
2604  enc_class pop_stack_temp_qword() %{
2605    MacroAssembler _masm(&cbuf);
2606    __ addptr(rsp, 8);
2607  %}
2608
2609  enc_class push_xmm_to_fpr1(regD src) %{
2610    MacroAssembler _masm(&cbuf);
2611    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2612    __ fld_d(Address(rsp, 0));
2613  %}
2614
2615  enc_class Push_Result_Mod_DPR( regDPR src) %{
2616    if ($src$$reg != FPR1L_enc) {
2617      // fincstp
2618      emit_opcode (cbuf, 0xD9);
2619      emit_opcode (cbuf, 0xF7);
2620      // FXCH FPR1 with src
2621      emit_opcode(cbuf, 0xD9);
2622      emit_d8(cbuf, 0xC8-1+$src$$reg );
2623      // fdecstp
2624      emit_opcode (cbuf, 0xD9);
2625      emit_opcode (cbuf, 0xF6);
2626    }
2627    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2628    // // FSTP   FPR$dst$$reg
2629    // emit_opcode( cbuf, 0xDD );
2630    // emit_d8( cbuf, 0xD8+$dst$$reg );
2631  %}
2632
2633  enc_class fnstsw_sahf_skip_parity() %{
2634    // fnstsw ax
2635    emit_opcode( cbuf, 0xDF );
2636    emit_opcode( cbuf, 0xE0 );
2637    // sahf
2638    emit_opcode( cbuf, 0x9E );
2639    // jnp  ::skip
2640    emit_opcode( cbuf, 0x7B );
2641    emit_opcode( cbuf, 0x05 );
2642  %}
2643
2644  enc_class emitModDPR() %{
2645    // fprem must be iterative
2646    // :: loop
2647    // fprem
2648    emit_opcode( cbuf, 0xD9 );
2649    emit_opcode( cbuf, 0xF8 );
2650    // wait
2651    emit_opcode( cbuf, 0x9b );
2652    // fnstsw ax
2653    emit_opcode( cbuf, 0xDF );
2654    emit_opcode( cbuf, 0xE0 );
2655    // sahf
2656    emit_opcode( cbuf, 0x9E );
2657    // jp  ::loop
2658    emit_opcode( cbuf, 0x0F );
2659    emit_opcode( cbuf, 0x8A );
2660    emit_opcode( cbuf, 0xF4 );
2661    emit_opcode( cbuf, 0xFF );
2662    emit_opcode( cbuf, 0xFF );
2663    emit_opcode( cbuf, 0xFF );
2664  %}
2665
2666  enc_class fpu_flags() %{
2667    // fnstsw_ax
2668    emit_opcode( cbuf, 0xDF);
2669    emit_opcode( cbuf, 0xE0);
2670    // test ax,0x0400
2671    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2672    emit_opcode( cbuf, 0xA9 );
2673    emit_d16   ( cbuf, 0x0400 );
2674    // // // This sequence works, but stalls for 12-16 cycles on PPro
2675    // // test rax,0x0400
2676    // emit_opcode( cbuf, 0xA9 );
2677    // emit_d32   ( cbuf, 0x00000400 );
2678    //
2679    // jz exit (no unordered comparison)
2680    emit_opcode( cbuf, 0x74 );
2681    emit_d8    ( cbuf, 0x02 );
2682    // mov ah,1 - treat as LT case (set carry flag)
2683    emit_opcode( cbuf, 0xB4 );
2684    emit_d8    ( cbuf, 0x01 );
2685    // sahf
2686    emit_opcode( cbuf, 0x9E);
2687  %}
2688
2689  enc_class cmpF_P6_fixup() %{
2690    // Fixup the integer flags in case comparison involved a NaN
2691    //
2692    // JNP exit (no unordered comparison, P-flag is set by NaN)
2693    emit_opcode( cbuf, 0x7B );
2694    emit_d8    ( cbuf, 0x03 );
2695    // MOV AH,1 - treat as LT case (set carry flag)
2696    emit_opcode( cbuf, 0xB4 );
2697    emit_d8    ( cbuf, 0x01 );
2698    // SAHF
2699    emit_opcode( cbuf, 0x9E);
2700    // NOP     // target for branch to avoid branch to branch
2701    emit_opcode( cbuf, 0x90);
2702  %}
2703
2704//     fnstsw_ax();
2705//     sahf();
2706//     movl(dst, nan_result);
2707//     jcc(Assembler::parity, exit);
2708//     movl(dst, less_result);
2709//     jcc(Assembler::below, exit);
2710//     movl(dst, equal_result);
2711//     jcc(Assembler::equal, exit);
2712//     movl(dst, greater_result);
2713
2714// less_result     =  1;
2715// greater_result  = -1;
2716// equal_result    = 0;
2717// nan_result      = -1;
2718
2719  enc_class CmpF_Result(rRegI dst) %{
2720    // fnstsw_ax();
2721    emit_opcode( cbuf, 0xDF);
2722    emit_opcode( cbuf, 0xE0);
2723    // sahf
2724    emit_opcode( cbuf, 0x9E);
2725    // movl(dst, nan_result);
2726    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2727    emit_d32( cbuf, -1 );
2728    // jcc(Assembler::parity, exit);
2729    emit_opcode( cbuf, 0x7A );
2730    emit_d8    ( cbuf, 0x13 );
2731    // movl(dst, less_result);
2732    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2733    emit_d32( cbuf, -1 );
2734    // jcc(Assembler::below, exit);
2735    emit_opcode( cbuf, 0x72 );
2736    emit_d8    ( cbuf, 0x0C );
2737    // movl(dst, equal_result);
2738    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2739    emit_d32( cbuf, 0 );
2740    // jcc(Assembler::equal, exit);
2741    emit_opcode( cbuf, 0x74 );
2742    emit_d8    ( cbuf, 0x05 );
2743    // movl(dst, greater_result);
2744    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2745    emit_d32( cbuf, 1 );
2746  %}
2747
2748
2749  // Compare the longs and set flags
2750  // BROKEN!  Do Not use as-is
2751  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2752    // CMP    $src1.hi,$src2.hi
2753    emit_opcode( cbuf, 0x3B );
2754    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2755    // JNE,s  done
2756    emit_opcode(cbuf,0x75);
2757    emit_d8(cbuf, 2 );
2758    // CMP    $src1.lo,$src2.lo
2759    emit_opcode( cbuf, 0x3B );
2760    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2761// done:
2762  %}
2763
2764  enc_class convert_int_long( regL dst, rRegI src ) %{
2765    // mov $dst.lo,$src
2766    int dst_encoding = $dst$$reg;
2767    int src_encoding = $src$$reg;
2768    encode_Copy( cbuf, dst_encoding  , src_encoding );
2769    // mov $dst.hi,$src
2770    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2771    // sar $dst.hi,31
2772    emit_opcode( cbuf, 0xC1 );
2773    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2774    emit_d8(cbuf, 0x1F );
2775  %}
2776
2777  enc_class convert_long_double( eRegL src ) %{
2778    // push $src.hi
2779    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2780    // push $src.lo
2781    emit_opcode(cbuf, 0x50+$src$$reg  );
2782    // fild 64-bits at [SP]
2783    emit_opcode(cbuf,0xdf);
2784    emit_d8(cbuf, 0x6C);
2785    emit_d8(cbuf, 0x24);
2786    emit_d8(cbuf, 0x00);
2787    // pop stack
2788    emit_opcode(cbuf, 0x83); // add  SP, #8
2789    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2790    emit_d8(cbuf, 0x8);
2791  %}
2792
2793  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2794    // IMUL   EDX:EAX,$src1
2795    emit_opcode( cbuf, 0xF7 );
2796    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2797    // SAR    EDX,$cnt-32
2798    int shift_count = ((int)$cnt$$constant) - 32;
2799    if (shift_count > 0) {
2800      emit_opcode(cbuf, 0xC1);
2801      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2802      emit_d8(cbuf, shift_count);
2803    }
2804  %}
2805
2806  // this version doesn't have add sp, 8
2807  enc_class convert_long_double2( eRegL src ) %{
2808    // push $src.hi
2809    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2810    // push $src.lo
2811    emit_opcode(cbuf, 0x50+$src$$reg  );
2812    // fild 64-bits at [SP]
2813    emit_opcode(cbuf,0xdf);
2814    emit_d8(cbuf, 0x6C);
2815    emit_d8(cbuf, 0x24);
2816    emit_d8(cbuf, 0x00);
2817  %}
2818
2819  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2820    // Basic idea: long = (long)int * (long)int
2821    // IMUL EDX:EAX, src
2822    emit_opcode( cbuf, 0xF7 );
2823    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2824  %}
2825
2826  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2827    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2828    // MUL EDX:EAX, src
2829    emit_opcode( cbuf, 0xF7 );
2830    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2831  %}
2832
2833  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2834    // Basic idea: lo(result) = lo(x_lo * y_lo)
2835    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2836    // MOV    $tmp,$src.lo
2837    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2838    // IMUL   $tmp,EDX
2839    emit_opcode( cbuf, 0x0F );
2840    emit_opcode( cbuf, 0xAF );
2841    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2842    // MOV    EDX,$src.hi
2843    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2844    // IMUL   EDX,EAX
2845    emit_opcode( cbuf, 0x0F );
2846    emit_opcode( cbuf, 0xAF );
2847    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2848    // ADD    $tmp,EDX
2849    emit_opcode( cbuf, 0x03 );
2850    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2851    // MUL   EDX:EAX,$src.lo
2852    emit_opcode( cbuf, 0xF7 );
2853    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2854    // ADD    EDX,ESI
2855    emit_opcode( cbuf, 0x03 );
2856    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2857  %}
2858
2859  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2860    // Basic idea: lo(result) = lo(src * y_lo)
2861    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2862    // IMUL   $tmp,EDX,$src
2863    emit_opcode( cbuf, 0x6B );
2864    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2865    emit_d8( cbuf, (int)$src$$constant );
2866    // MOV    EDX,$src
2867    emit_opcode(cbuf, 0xB8 + EDX_enc);
2868    emit_d32( cbuf, (int)$src$$constant );
2869    // MUL   EDX:EAX,EDX
2870    emit_opcode( cbuf, 0xF7 );
2871    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2872    // ADD    EDX,ESI
2873    emit_opcode( cbuf, 0x03 );
2874    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2875  %}
2876
2877  enc_class long_div( eRegL src1, eRegL src2 ) %{
2878    // PUSH src1.hi
2879    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2880    // PUSH src1.lo
2881    emit_opcode(cbuf,               0x50+$src1$$reg  );
2882    // PUSH src2.hi
2883    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2884    // PUSH src2.lo
2885    emit_opcode(cbuf,               0x50+$src2$$reg  );
2886    // CALL directly to the runtime
2887    cbuf.set_insts_mark();
2888    emit_opcode(cbuf,0xE8);       // Call into runtime
2889    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2890    // Restore stack
2891    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2892    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2893    emit_d8(cbuf, 4*4);
2894  %}
2895
2896  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2897    // PUSH src1.hi
2898    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2899    // PUSH src1.lo
2900    emit_opcode(cbuf,               0x50+$src1$$reg  );
2901    // PUSH src2.hi
2902    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2903    // PUSH src2.lo
2904    emit_opcode(cbuf,               0x50+$src2$$reg  );
2905    // CALL directly to the runtime
2906    cbuf.set_insts_mark();
2907    emit_opcode(cbuf,0xE8);       // Call into runtime
2908    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2909    // Restore stack
2910    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2911    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2912    emit_d8(cbuf, 4*4);
2913  %}
2914
2915  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2916    // MOV   $tmp,$src.lo
2917    emit_opcode(cbuf, 0x8B);
2918    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2919    // OR    $tmp,$src.hi
2920    emit_opcode(cbuf, 0x0B);
2921    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2922  %}
2923
2924  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2925    // CMP    $src1.lo,$src2.lo
2926    emit_opcode( cbuf, 0x3B );
2927    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2928    // JNE,s  skip
2929    emit_cc(cbuf, 0x70, 0x5);
2930    emit_d8(cbuf,2);
2931    // CMP    $src1.hi,$src2.hi
2932    emit_opcode( cbuf, 0x3B );
2933    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2934  %}
2935
2936  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2937    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2938    emit_opcode( cbuf, 0x3B );
2939    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2940    // MOV    $tmp,$src1.hi
2941    emit_opcode( cbuf, 0x8B );
2942    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2943    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2944    emit_opcode( cbuf, 0x1B );
2945    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2946  %}
2947
2948  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2949    // XOR    $tmp,$tmp
2950    emit_opcode(cbuf,0x33);  // XOR
2951    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2952    // CMP    $tmp,$src.lo
2953    emit_opcode( cbuf, 0x3B );
2954    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2955    // SBB    $tmp,$src.hi
2956    emit_opcode( cbuf, 0x1B );
2957    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2958  %}
2959
2960 // Sniff, sniff... smells like Gnu Superoptimizer
2961  enc_class neg_long( eRegL dst ) %{
2962    emit_opcode(cbuf,0xF7);    // NEG hi
2963    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2964    emit_opcode(cbuf,0xF7);    // NEG lo
2965    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2966    emit_opcode(cbuf,0x83);    // SBB hi,0
2967    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2968    emit_d8    (cbuf,0 );
2969  %}
2970
2971  enc_class enc_pop_rdx() %{
2972    emit_opcode(cbuf,0x5A);
2973  %}
2974
2975  enc_class enc_rethrow() %{
2976    cbuf.set_insts_mark();
2977    emit_opcode(cbuf, 0xE9);        // jmp    entry
2978    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2979                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2980  %}
2981
2982
2983  // Convert a double to an int.  Java semantics require we do complex
2984  // manglelations in the corner cases.  So we set the rounding mode to
2985  // 'zero', store the darned double down as an int, and reset the
2986  // rounding mode to 'nearest'.  The hardware throws an exception which
2987  // patches up the correct value directly to the stack.
2988  enc_class DPR2I_encoding( regDPR src ) %{
2989    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2990    // exceptions here, so that a NAN or other corner-case value will
2991    // thrown an exception (but normal values get converted at full speed).
2992    // However, I2C adapters and other float-stack manglers leave pending
2993    // invalid-op exceptions hanging.  We would have to clear them before
2994    // enabling them and that is more expensive than just testing for the
2995    // invalid value Intel stores down in the corner cases.
2996    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2997    emit_opcode(cbuf,0x2D);
2998    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2999    // Allocate a word
3000    emit_opcode(cbuf,0x83);            // SUB ESP,4
3001    emit_opcode(cbuf,0xEC);
3002    emit_d8(cbuf,0x04);
3003    // Encoding assumes a double has been pushed into FPR0.
3004    // Store down the double as an int, popping the FPU stack
3005    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3006    emit_opcode(cbuf,0x1C);
3007    emit_d8(cbuf,0x24);
3008    // Restore the rounding mode; mask the exception
3009    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3010    emit_opcode(cbuf,0x2D);
3011    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3012        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3013        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3014
3015    // Load the converted int; adjust CPU stack
3016    emit_opcode(cbuf,0x58);       // POP EAX
3017    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3018    emit_d32   (cbuf,0x80000000); //         0x80000000
3019    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3020    emit_d8    (cbuf,0x07);       // Size of slow_call
3021    // Push src onto stack slow-path
3022    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3023    emit_d8    (cbuf,0xC0-1+$src$$reg );
3024    // CALL directly to the runtime
3025    cbuf.set_insts_mark();
3026    emit_opcode(cbuf,0xE8);       // Call into runtime
3027    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3028    // Carry on here...
3029  %}
3030
3031  enc_class DPR2L_encoding( regDPR src ) %{
3032    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3033    emit_opcode(cbuf,0x2D);
3034    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3035    // Allocate a word
3036    emit_opcode(cbuf,0x83);            // SUB ESP,8
3037    emit_opcode(cbuf,0xEC);
3038    emit_d8(cbuf,0x08);
3039    // Encoding assumes a double has been pushed into FPR0.
3040    // Store down the double as a long, popping the FPU stack
3041    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3042    emit_opcode(cbuf,0x3C);
3043    emit_d8(cbuf,0x24);
3044    // Restore the rounding mode; mask the exception
3045    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3046    emit_opcode(cbuf,0x2D);
3047    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3048        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3049        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3050
3051    // Load the converted int; adjust CPU stack
3052    emit_opcode(cbuf,0x58);       // POP EAX
3053    emit_opcode(cbuf,0x5A);       // POP EDX
3054    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3055    emit_d8    (cbuf,0xFA);       // rdx
3056    emit_d32   (cbuf,0x80000000); //         0x80000000
3057    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3058    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3059    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3060    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3061    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3062    emit_d8    (cbuf,0x07);       // Size of slow_call
3063    // Push src onto stack slow-path
3064    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3065    emit_d8    (cbuf,0xC0-1+$src$$reg );
3066    // CALL directly to the runtime
3067    cbuf.set_insts_mark();
3068    emit_opcode(cbuf,0xE8);       // Call into runtime
3069    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3070    // Carry on here...
3071  %}
3072
3073  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3074    // Operand was loaded from memory into fp ST (stack top)
3075    // FMUL   ST,$src  /* D8 C8+i */
3076    emit_opcode(cbuf, 0xD8);
3077    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3078  %}
3079
3080  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3081    // FADDP  ST,src2  /* D8 C0+i */
3082    emit_opcode(cbuf, 0xD8);
3083    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3084    //could use FADDP  src2,fpST  /* DE C0+i */
3085  %}
3086
3087  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3088    // FADDP  src2,ST  /* DE C0+i */
3089    emit_opcode(cbuf, 0xDE);
3090    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3091  %}
3092
3093  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3094    // Operand has been loaded into fp ST (stack top)
3095      // FSUB   ST,$src1
3096      emit_opcode(cbuf, 0xD8);
3097      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3098
3099      // FDIV
3100      emit_opcode(cbuf, 0xD8);
3101      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3102  %}
3103
3104  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3105    // Operand was loaded from memory into fp ST (stack top)
3106    // FADD   ST,$src  /* D8 C0+i */
3107    emit_opcode(cbuf, 0xD8);
3108    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3109
3110    // FMUL  ST,src2  /* D8 C*+i */
3111    emit_opcode(cbuf, 0xD8);
3112    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3113  %}
3114
3115
3116  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3117    // Operand was loaded from memory into fp ST (stack top)
3118    // FADD   ST,$src  /* D8 C0+i */
3119    emit_opcode(cbuf, 0xD8);
3120    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3121
3122    // FMULP  src2,ST  /* DE C8+i */
3123    emit_opcode(cbuf, 0xDE);
3124    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3125  %}
3126
3127  // Atomically load the volatile long
3128  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3129    emit_opcode(cbuf,0xDF);
3130    int rm_byte_opcode = 0x05;
3131    int base     = $mem$$base;
3132    int index    = $mem$$index;
3133    int scale    = $mem$$scale;
3134    int displace = $mem$$disp;
3135    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3136    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3137    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3138  %}
3139
3140  // Volatile Store Long.  Must be atomic, so move it into
3141  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3142  // target address before the store (for null-ptr checks)
3143  // so the memory operand is used twice in the encoding.
3144  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3145    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3146    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3147    emit_opcode(cbuf,0xDF);
3148    int rm_byte_opcode = 0x07;
3149    int base     = $mem$$base;
3150    int index    = $mem$$index;
3151    int scale    = $mem$$scale;
3152    int displace = $mem$$disp;
3153    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3154    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3155  %}
3156
3157  // Safepoint Poll.  This polls the safepoint page, and causes an
3158  // exception if it is not readable. Unfortunately, it kills the condition code
3159  // in the process
3160  // We current use TESTL [spp],EDI
3161  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3162
3163  enc_class Safepoint_Poll() %{
3164    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3165    emit_opcode(cbuf,0x85);
3166    emit_rm (cbuf, 0x0, 0x7, 0x5);
3167    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3168  %}
3169%}
3170
3171
3172//----------FRAME--------------------------------------------------------------
3173// Definition of frame structure and management information.
3174//
3175//  S T A C K   L A Y O U T    Allocators stack-slot number
3176//                             |   (to get allocators register number
3177//  G  Owned by    |        |  v    add OptoReg::stack0())
3178//  r   CALLER     |        |
3179//  o     |        +--------+      pad to even-align allocators stack-slot
3180//  w     V        |  pad0  |        numbers; owned by CALLER
3181//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3182//  h     ^        |   in   |  5
3183//        |        |  args  |  4   Holes in incoming args owned by SELF
3184//  |     |        |        |  3
3185//  |     |        +--------+
3186//  V     |        | old out|      Empty on Intel, window on Sparc
3187//        |    old |preserve|      Must be even aligned.
3188//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3189//        |        |   in   |  3   area for Intel ret address
3190//     Owned by    |preserve|      Empty on Sparc.
3191//       SELF      +--------+
3192//        |        |  pad2  |  2   pad to align old SP
3193//        |        +--------+  1
3194//        |        | locks  |  0
3195//        |        +--------+----> OptoReg::stack0(), even aligned
3196//        |        |  pad1  | 11   pad to align new SP
3197//        |        +--------+
3198//        |        |        | 10
3199//        |        | spills |  9   spills
3200//        V        |        |  8   (pad0 slot for callee)
3201//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3202//        ^        |  out   |  7
3203//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3204//     Owned by    +--------+
3205//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3206//        |    new |preserve|      Must be even-aligned.
3207//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3208//        |        |        |
3209//
3210// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3211//         known from SELF's arguments and the Java calling convention.
3212//         Region 6-7 is determined per call site.
3213// Note 2: If the calling convention leaves holes in the incoming argument
3214//         area, those holes are owned by SELF.  Holes in the outgoing area
3215//         are owned by the CALLEE.  Holes should not be nessecary in the
3216//         incoming area, as the Java calling convention is completely under
3217//         the control of the AD file.  Doubles can be sorted and packed to
3218//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3219//         varargs C calling conventions.
3220// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3221//         even aligned with pad0 as needed.
3222//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3223//         region 6-11 is even aligned; it may be padded out more so that
3224//         the region from SP to FP meets the minimum stack alignment.
3225
3226frame %{
3227  // What direction does stack grow in (assumed to be same for C & Java)
3228  stack_direction(TOWARDS_LOW);
3229
3230  // These three registers define part of the calling convention
3231  // between compiled code and the interpreter.
3232  inline_cache_reg(EAX);                // Inline Cache Register
3233  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3234
3235  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3236  cisc_spilling_operand_name(indOffset32);
3237
3238  // Number of stack slots consumed by locking an object
3239  sync_stack_slots(1);
3240
3241  // Compiled code's Frame Pointer
3242  frame_pointer(ESP);
3243  // Interpreter stores its frame pointer in a register which is
3244  // stored to the stack by I2CAdaptors.
3245  // I2CAdaptors convert from interpreted java to compiled java.
3246  interpreter_frame_pointer(EBP);
3247
3248  // Stack alignment requirement
3249  // Alignment size in bytes (128-bit -> 16 bytes)
3250  stack_alignment(StackAlignmentInBytes);
3251
3252  // Number of stack slots between incoming argument block and the start of
3253  // a new frame.  The PROLOG must add this many slots to the stack.  The
3254  // EPILOG must remove this many slots.  Intel needs one slot for
3255  // return address and one for rbp, (must save rbp)
3256  in_preserve_stack_slots(2+VerifyStackAtCalls);
3257
3258  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3259  // for calls to C.  Supports the var-args backing area for register parms.
3260  varargs_C_out_slots_killed(0);
3261
3262  // The after-PROLOG location of the return address.  Location of
3263  // return address specifies a type (REG or STACK) and a number
3264  // representing the register number (i.e. - use a register name) or
3265  // stack slot.
3266  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3267  // Otherwise, it is above the locks and verification slot and alignment word
3268  return_addr(STACK - 1 +
3269              align_up((Compile::current()->in_preserve_stack_slots() +
3270                        Compile::current()->fixed_slots()),
3271                       stack_alignment_in_slots()));
3272
3273  // Body of function which returns an integer array locating
3274  // arguments either in registers or in stack slots.  Passed an array
3275  // of ideal registers called "sig" and a "length" count.  Stack-slot
3276  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3277  // arguments for a CALLEE.  Incoming stack arguments are
3278  // automatically biased by the preserve_stack_slots field above.
3279  calling_convention %{
3280    // No difference between ingoing/outgoing just pass false
3281    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3282  %}
3283
3284
3285  // Body of function which returns an integer array locating
3286  // arguments either in registers or in stack slots.  Passed an array
3287  // of ideal registers called "sig" and a "length" count.  Stack-slot
3288  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3289  // arguments for a CALLEE.  Incoming stack arguments are
3290  // automatically biased by the preserve_stack_slots field above.
3291  c_calling_convention %{
3292    // This is obviously always outgoing
3293    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3294  %}
3295
3296  // Location of C & interpreter return values
3297  c_return_value %{
3298    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3299    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3300    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3301
3302    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3303    // that C functions return float and double results in XMM0.
3304    if( ideal_reg == Op_RegD && UseSSE>=2 )
3305      return OptoRegPair(XMM0b_num,XMM0_num);
3306    if( ideal_reg == Op_RegF && UseSSE>=2 )
3307      return OptoRegPair(OptoReg::Bad,XMM0_num);
3308
3309    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3310  %}
3311
3312  // Location of return values
3313  return_value %{
3314    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3315    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3316    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3317    if( ideal_reg == Op_RegD && UseSSE>=2 )
3318      return OptoRegPair(XMM0b_num,XMM0_num);
3319    if( ideal_reg == Op_RegF && UseSSE>=1 )
3320      return OptoRegPair(OptoReg::Bad,XMM0_num);
3321    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3322  %}
3323
3324%}
3325
3326//----------ATTRIBUTES---------------------------------------------------------
3327//----------Operand Attributes-------------------------------------------------
3328op_attrib op_cost(0);        // Required cost attribute
3329
3330//----------Instruction Attributes---------------------------------------------
3331ins_attrib ins_cost(100);       // Required cost attribute
3332ins_attrib ins_size(8);         // Required size attribute (in bits)
3333ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3334                                // non-matching short branch variant of some
3335                                                            // long branch?
3336ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3337                                // specifies the alignment that some part of the instruction (not
3338                                // necessarily the start) requires.  If > 1, a compute_padding()
3339                                // function must be provided for the instruction
3340
3341//----------OPERANDS-----------------------------------------------------------
3342// Operand definitions must precede instruction definitions for correct parsing
3343// in the ADLC because operands constitute user defined types which are used in
3344// instruction definitions.
3345
3346//----------Simple Operands----------------------------------------------------
3347// Immediate Operands
3348// Integer Immediate
3349operand immI() %{
3350  match(ConI);
3351
3352  op_cost(10);
3353  format %{ %}
3354  interface(CONST_INTER);
3355%}
3356
3357// Constant for test vs zero
3358operand immI0() %{
3359  predicate(n->get_int() == 0);
3360  match(ConI);
3361
3362  op_cost(0);
3363  format %{ %}
3364  interface(CONST_INTER);
3365%}
3366
3367// Constant for increment
3368operand immI1() %{
3369  predicate(n->get_int() == 1);
3370  match(ConI);
3371
3372  op_cost(0);
3373  format %{ %}
3374  interface(CONST_INTER);
3375%}
3376
3377// Constant for decrement
3378operand immI_M1() %{
3379  predicate(n->get_int() == -1);
3380  match(ConI);
3381
3382  op_cost(0);
3383  format %{ %}
3384  interface(CONST_INTER);
3385%}
3386
3387// Valid scale values for addressing modes
3388operand immI2() %{
3389  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3390  match(ConI);
3391
3392  format %{ %}
3393  interface(CONST_INTER);
3394%}
3395
3396operand immI8() %{
3397  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3398  match(ConI);
3399
3400  op_cost(5);
3401  format %{ %}
3402  interface(CONST_INTER);
3403%}
3404
3405operand immI16() %{
3406  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3407  match(ConI);
3408
3409  op_cost(10);
3410  format %{ %}
3411  interface(CONST_INTER);
3412%}
3413
3414// Int Immediate non-negative
3415operand immU31()
3416%{
3417  predicate(n->get_int() >= 0);
3418  match(ConI);
3419
3420  op_cost(0);
3421  format %{ %}
3422  interface(CONST_INTER);
3423%}
3424
3425// Constant for long shifts
3426operand immI_32() %{
3427  predicate( n->get_int() == 32 );
3428  match(ConI);
3429
3430  op_cost(0);
3431  format %{ %}
3432  interface(CONST_INTER);
3433%}
3434
3435operand immI_1_31() %{
3436  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3437  match(ConI);
3438
3439  op_cost(0);
3440  format %{ %}
3441  interface(CONST_INTER);
3442%}
3443
3444operand immI_32_63() %{
3445  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3446  match(ConI);
3447  op_cost(0);
3448
3449  format %{ %}
3450  interface(CONST_INTER);
3451%}
3452
3453operand immI_1() %{
3454  predicate( n->get_int() == 1 );
3455  match(ConI);
3456
3457  op_cost(0);
3458  format %{ %}
3459  interface(CONST_INTER);
3460%}
3461
3462operand immI_2() %{
3463  predicate( n->get_int() == 2 );
3464  match(ConI);
3465
3466  op_cost(0);
3467  format %{ %}
3468  interface(CONST_INTER);
3469%}
3470
3471operand immI_3() %{
3472  predicate( n->get_int() == 3 );
3473  match(ConI);
3474
3475  op_cost(0);
3476  format %{ %}
3477  interface(CONST_INTER);
3478%}
3479
3480// Pointer Immediate
3481operand immP() %{
3482  match(ConP);
3483
3484  op_cost(10);
3485  format %{ %}
3486  interface(CONST_INTER);
3487%}
3488
3489// NULL Pointer Immediate
3490operand immP0() %{
3491  predicate( n->get_ptr() == 0 );
3492  match(ConP);
3493  op_cost(0);
3494
3495  format %{ %}
3496  interface(CONST_INTER);
3497%}
3498
3499// Long Immediate
3500operand immL() %{
3501  match(ConL);
3502
3503  op_cost(20);
3504  format %{ %}
3505  interface(CONST_INTER);
3506%}
3507
3508// Long Immediate zero
3509operand immL0() %{
3510  predicate( n->get_long() == 0L );
3511  match(ConL);
3512  op_cost(0);
3513
3514  format %{ %}
3515  interface(CONST_INTER);
3516%}
3517
3518// Long Immediate zero
3519operand immL_M1() %{
3520  predicate( n->get_long() == -1L );
3521  match(ConL);
3522  op_cost(0);
3523
3524  format %{ %}
3525  interface(CONST_INTER);
3526%}
3527
3528// Long immediate from 0 to 127.
3529// Used for a shorter form of long mul by 10.
3530operand immL_127() %{
3531  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3532  match(ConL);
3533  op_cost(0);
3534
3535  format %{ %}
3536  interface(CONST_INTER);
3537%}
3538
3539// Long Immediate: low 32-bit mask
3540operand immL_32bits() %{
3541  predicate(n->get_long() == 0xFFFFFFFFL);
3542  match(ConL);
3543  op_cost(0);
3544
3545  format %{ %}
3546  interface(CONST_INTER);
3547%}
3548
3549// Long Immediate: low 32-bit mask
3550operand immL32() %{
3551  predicate(n->get_long() == (int)(n->get_long()));
3552  match(ConL);
3553  op_cost(20);
3554
3555  format %{ %}
3556  interface(CONST_INTER);
3557%}
3558
3559//Double Immediate zero
3560operand immDPR0() %{
3561  // Do additional (and counter-intuitive) test against NaN to work around VC++
3562  // bug that generates code such that NaNs compare equal to 0.0
3563  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3564  match(ConD);
3565
3566  op_cost(5);
3567  format %{ %}
3568  interface(CONST_INTER);
3569%}
3570
3571// Double Immediate one
3572operand immDPR1() %{
3573  predicate( UseSSE<=1 && n->getd() == 1.0 );
3574  match(ConD);
3575
3576  op_cost(5);
3577  format %{ %}
3578  interface(CONST_INTER);
3579%}
3580
3581// Double Immediate
3582operand immDPR() %{
3583  predicate(UseSSE<=1);
3584  match(ConD);
3585
3586  op_cost(5);
3587  format %{ %}
3588  interface(CONST_INTER);
3589%}
3590
3591operand immD() %{
3592  predicate(UseSSE>=2);
3593  match(ConD);
3594
3595  op_cost(5);
3596  format %{ %}
3597  interface(CONST_INTER);
3598%}
3599
3600// Double Immediate zero
3601operand immD0() %{
3602  // Do additional (and counter-intuitive) test against NaN to work around VC++
3603  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3604  // compare equal to -0.0.
3605  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3606  match(ConD);
3607
3608  format %{ %}
3609  interface(CONST_INTER);
3610%}
3611
3612// Float Immediate zero
3613operand immFPR0() %{
3614  predicate(UseSSE == 0 && n->getf() == 0.0F);
3615  match(ConF);
3616
3617  op_cost(5);
3618  format %{ %}
3619  interface(CONST_INTER);
3620%}
3621
3622// Float Immediate one
3623operand immFPR1() %{
3624  predicate(UseSSE == 0 && n->getf() == 1.0F);
3625  match(ConF);
3626
3627  op_cost(5);
3628  format %{ %}
3629  interface(CONST_INTER);
3630%}
3631
3632// Float Immediate
3633operand immFPR() %{
3634  predicate( UseSSE == 0 );
3635  match(ConF);
3636
3637  op_cost(5);
3638  format %{ %}
3639  interface(CONST_INTER);
3640%}
3641
3642// Float Immediate
3643operand immF() %{
3644  predicate(UseSSE >= 1);
3645  match(ConF);
3646
3647  op_cost(5);
3648  format %{ %}
3649  interface(CONST_INTER);
3650%}
3651
3652// Float Immediate zero.  Zero and not -0.0
3653operand immF0() %{
3654  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3655  match(ConF);
3656
3657  op_cost(5);
3658  format %{ %}
3659  interface(CONST_INTER);
3660%}
3661
3662// Immediates for special shifts (sign extend)
3663
3664// Constants for increment
3665operand immI_16() %{
3666  predicate( n->get_int() == 16 );
3667  match(ConI);
3668
3669  format %{ %}
3670  interface(CONST_INTER);
3671%}
3672
3673operand immI_24() %{
3674  predicate( n->get_int() == 24 );
3675  match(ConI);
3676
3677  format %{ %}
3678  interface(CONST_INTER);
3679%}
3680
3681// Constant for byte-wide masking
3682operand immI_255() %{
3683  predicate( n->get_int() == 255 );
3684  match(ConI);
3685
3686  format %{ %}
3687  interface(CONST_INTER);
3688%}
3689
3690// Constant for short-wide masking
3691operand immI_65535() %{
3692  predicate(n->get_int() == 65535);
3693  match(ConI);
3694
3695  format %{ %}
3696  interface(CONST_INTER);
3697%}
3698
3699// Register Operands
3700// Integer Register
3701operand rRegI() %{
3702  constraint(ALLOC_IN_RC(int_reg));
3703  match(RegI);
3704  match(xRegI);
3705  match(eAXRegI);
3706  match(eBXRegI);
3707  match(eCXRegI);
3708  match(eDXRegI);
3709  match(eDIRegI);
3710  match(eSIRegI);
3711
3712  format %{ %}
3713  interface(REG_INTER);
3714%}
3715
3716// Subset of Integer Register
3717operand xRegI(rRegI reg) %{
3718  constraint(ALLOC_IN_RC(int_x_reg));
3719  match(reg);
3720  match(eAXRegI);
3721  match(eBXRegI);
3722  match(eCXRegI);
3723  match(eDXRegI);
3724
3725  format %{ %}
3726  interface(REG_INTER);
3727%}
3728
3729// Special Registers
3730operand eAXRegI(xRegI reg) %{
3731  constraint(ALLOC_IN_RC(eax_reg));
3732  match(reg);
3733  match(rRegI);
3734
3735  format %{ "EAX" %}
3736  interface(REG_INTER);
3737%}
3738
3739// Special Registers
3740operand eBXRegI(xRegI reg) %{
3741  constraint(ALLOC_IN_RC(ebx_reg));
3742  match(reg);
3743  match(rRegI);
3744
3745  format %{ "EBX" %}
3746  interface(REG_INTER);
3747%}
3748
3749operand eCXRegI(xRegI reg) %{
3750  constraint(ALLOC_IN_RC(ecx_reg));
3751  match(reg);
3752  match(rRegI);
3753
3754  format %{ "ECX" %}
3755  interface(REG_INTER);
3756%}
3757
3758operand eDXRegI(xRegI reg) %{
3759  constraint(ALLOC_IN_RC(edx_reg));
3760  match(reg);
3761  match(rRegI);
3762
3763  format %{ "EDX" %}
3764  interface(REG_INTER);
3765%}
3766
3767operand eDIRegI(xRegI reg) %{
3768  constraint(ALLOC_IN_RC(edi_reg));
3769  match(reg);
3770  match(rRegI);
3771
3772  format %{ "EDI" %}
3773  interface(REG_INTER);
3774%}
3775
3776operand naxRegI() %{
3777  constraint(ALLOC_IN_RC(nax_reg));
3778  match(RegI);
3779  match(eCXRegI);
3780  match(eDXRegI);
3781  match(eSIRegI);
3782  match(eDIRegI);
3783
3784  format %{ %}
3785  interface(REG_INTER);
3786%}
3787
3788operand nadxRegI() %{
3789  constraint(ALLOC_IN_RC(nadx_reg));
3790  match(RegI);
3791  match(eBXRegI);
3792  match(eCXRegI);
3793  match(eSIRegI);
3794  match(eDIRegI);
3795
3796  format %{ %}
3797  interface(REG_INTER);
3798%}
3799
3800operand ncxRegI() %{
3801  constraint(ALLOC_IN_RC(ncx_reg));
3802  match(RegI);
3803  match(eAXRegI);
3804  match(eDXRegI);
3805  match(eSIRegI);
3806  match(eDIRegI);
3807
3808  format %{ %}
3809  interface(REG_INTER);
3810%}
3811
3812// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3813// //
3814operand eSIRegI(xRegI reg) %{
3815   constraint(ALLOC_IN_RC(esi_reg));
3816   match(reg);
3817   match(rRegI);
3818
3819   format %{ "ESI" %}
3820   interface(REG_INTER);
3821%}
3822
3823// Pointer Register
3824operand anyRegP() %{
3825  constraint(ALLOC_IN_RC(any_reg));
3826  match(RegP);
3827  match(eAXRegP);
3828  match(eBXRegP);
3829  match(eCXRegP);
3830  match(eDIRegP);
3831  match(eRegP);
3832
3833  format %{ %}
3834  interface(REG_INTER);
3835%}
3836
3837operand eRegP() %{
3838  constraint(ALLOC_IN_RC(int_reg));
3839  match(RegP);
3840  match(eAXRegP);
3841  match(eBXRegP);
3842  match(eCXRegP);
3843  match(eDIRegP);
3844
3845  format %{ %}
3846  interface(REG_INTER);
3847%}
3848
3849// On windows95, EBP is not safe to use for implicit null tests.
3850operand eRegP_no_EBP() %{
3851  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3852  match(RegP);
3853  match(eAXRegP);
3854  match(eBXRegP);
3855  match(eCXRegP);
3856  match(eDIRegP);
3857
3858  op_cost(100);
3859  format %{ %}
3860  interface(REG_INTER);
3861%}
3862
3863operand naxRegP() %{
3864  constraint(ALLOC_IN_RC(nax_reg));
3865  match(RegP);
3866  match(eBXRegP);
3867  match(eDXRegP);
3868  match(eCXRegP);
3869  match(eSIRegP);
3870  match(eDIRegP);
3871
3872  format %{ %}
3873  interface(REG_INTER);
3874%}
3875
3876operand nabxRegP() %{
3877  constraint(ALLOC_IN_RC(nabx_reg));
3878  match(RegP);
3879  match(eCXRegP);
3880  match(eDXRegP);
3881  match(eSIRegP);
3882  match(eDIRegP);
3883
3884  format %{ %}
3885  interface(REG_INTER);
3886%}
3887
3888operand pRegP() %{
3889  constraint(ALLOC_IN_RC(p_reg));
3890  match(RegP);
3891  match(eBXRegP);
3892  match(eDXRegP);
3893  match(eSIRegP);
3894  match(eDIRegP);
3895
3896  format %{ %}
3897  interface(REG_INTER);
3898%}
3899
3900// Special Registers
3901// Return a pointer value
3902operand eAXRegP(eRegP reg) %{
3903  constraint(ALLOC_IN_RC(eax_reg));
3904  match(reg);
3905  format %{ "EAX" %}
3906  interface(REG_INTER);
3907%}
3908
3909// Used in AtomicAdd
3910operand eBXRegP(eRegP reg) %{
3911  constraint(ALLOC_IN_RC(ebx_reg));
3912  match(reg);
3913  format %{ "EBX" %}
3914  interface(REG_INTER);
3915%}
3916
3917// Tail-call (interprocedural jump) to interpreter
3918operand eCXRegP(eRegP reg) %{
3919  constraint(ALLOC_IN_RC(ecx_reg));
3920  match(reg);
3921  format %{ "ECX" %}
3922  interface(REG_INTER);
3923%}
3924
3925operand eSIRegP(eRegP reg) %{
3926  constraint(ALLOC_IN_RC(esi_reg));
3927  match(reg);
3928  format %{ "ESI" %}
3929  interface(REG_INTER);
3930%}
3931
3932// Used in rep stosw
3933operand eDIRegP(eRegP reg) %{
3934  constraint(ALLOC_IN_RC(edi_reg));
3935  match(reg);
3936  format %{ "EDI" %}
3937  interface(REG_INTER);
3938%}
3939
3940operand eRegL() %{
3941  constraint(ALLOC_IN_RC(long_reg));
3942  match(RegL);
3943  match(eADXRegL);
3944
3945  format %{ %}
3946  interface(REG_INTER);
3947%}
3948
3949operand eADXRegL( eRegL reg ) %{
3950  constraint(ALLOC_IN_RC(eadx_reg));
3951  match(reg);
3952
3953  format %{ "EDX:EAX" %}
3954  interface(REG_INTER);
3955%}
3956
3957operand eBCXRegL( eRegL reg ) %{
3958  constraint(ALLOC_IN_RC(ebcx_reg));
3959  match(reg);
3960
3961  format %{ "EBX:ECX" %}
3962  interface(REG_INTER);
3963%}
3964
3965// Special case for integer high multiply
3966operand eADXRegL_low_only() %{
3967  constraint(ALLOC_IN_RC(eadx_reg));
3968  match(RegL);
3969
3970  format %{ "EAX" %}
3971  interface(REG_INTER);
3972%}
3973
3974// Flags register, used as output of compare instructions
3975operand eFlagsReg() %{
3976  constraint(ALLOC_IN_RC(int_flags));
3977  match(RegFlags);
3978
3979  format %{ "EFLAGS" %}
3980  interface(REG_INTER);
3981%}
3982
3983// Flags register, used as output of FLOATING POINT compare instructions
3984operand eFlagsRegU() %{
3985  constraint(ALLOC_IN_RC(int_flags));
3986  match(RegFlags);
3987
3988  format %{ "EFLAGS_U" %}
3989  interface(REG_INTER);
3990%}
3991
3992operand eFlagsRegUCF() %{
3993  constraint(ALLOC_IN_RC(int_flags));
3994  match(RegFlags);
3995  predicate(false);
3996
3997  format %{ "EFLAGS_U_CF" %}
3998  interface(REG_INTER);
3999%}
4000
4001// Condition Code Register used by long compare
4002operand flagsReg_long_LTGE() %{
4003  constraint(ALLOC_IN_RC(int_flags));
4004  match(RegFlags);
4005  format %{ "FLAGS_LTGE" %}
4006  interface(REG_INTER);
4007%}
4008operand flagsReg_long_EQNE() %{
4009  constraint(ALLOC_IN_RC(int_flags));
4010  match(RegFlags);
4011  format %{ "FLAGS_EQNE" %}
4012  interface(REG_INTER);
4013%}
4014operand flagsReg_long_LEGT() %{
4015  constraint(ALLOC_IN_RC(int_flags));
4016  match(RegFlags);
4017  format %{ "FLAGS_LEGT" %}
4018  interface(REG_INTER);
4019%}
4020
4021// Condition Code Register used by unsigned long compare
4022operand flagsReg_ulong_LTGE() %{
4023  constraint(ALLOC_IN_RC(int_flags));
4024  match(RegFlags);
4025  format %{ "FLAGS_U_LTGE" %}
4026  interface(REG_INTER);
4027%}
4028operand flagsReg_ulong_EQNE() %{
4029  constraint(ALLOC_IN_RC(int_flags));
4030  match(RegFlags);
4031  format %{ "FLAGS_U_EQNE" %}
4032  interface(REG_INTER);
4033%}
4034operand flagsReg_ulong_LEGT() %{
4035  constraint(ALLOC_IN_RC(int_flags));
4036  match(RegFlags);
4037  format %{ "FLAGS_U_LEGT" %}
4038  interface(REG_INTER);
4039%}
4040
4041// Float register operands
4042operand regDPR() %{
4043  predicate( UseSSE < 2 );
4044  constraint(ALLOC_IN_RC(fp_dbl_reg));
4045  match(RegD);
4046  match(regDPR1);
4047  match(regDPR2);
4048  format %{ %}
4049  interface(REG_INTER);
4050%}
4051
4052operand regDPR1(regDPR reg) %{
4053  predicate( UseSSE < 2 );
4054  constraint(ALLOC_IN_RC(fp_dbl_reg0));
4055  match(reg);
4056  format %{ "FPR1" %}
4057  interface(REG_INTER);
4058%}
4059
4060operand regDPR2(regDPR reg) %{
4061  predicate( UseSSE < 2 );
4062  constraint(ALLOC_IN_RC(fp_dbl_reg1));
4063  match(reg);
4064  format %{ "FPR2" %}
4065  interface(REG_INTER);
4066%}
4067
4068operand regnotDPR1(regDPR reg) %{
4069  predicate( UseSSE < 2 );
4070  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4071  match(reg);
4072  format %{ %}
4073  interface(REG_INTER);
4074%}
4075
4076// Float register operands
4077operand regFPR() %{
4078  predicate( UseSSE < 2 );
4079  constraint(ALLOC_IN_RC(fp_flt_reg));
4080  match(RegF);
4081  match(regFPR1);
4082  format %{ %}
4083  interface(REG_INTER);
4084%}
4085
4086// Float register operands
4087operand regFPR1(regFPR reg) %{
4088  predicate( UseSSE < 2 );
4089  constraint(ALLOC_IN_RC(fp_flt_reg0));
4090  match(reg);
4091  format %{ "FPR1" %}
4092  interface(REG_INTER);
4093%}
4094
4095// XMM Float register operands
4096operand regF() %{
4097  predicate( UseSSE>=1 );
4098  constraint(ALLOC_IN_RC(float_reg_legacy));
4099  match(RegF);
4100  format %{ %}
4101  interface(REG_INTER);
4102%}
4103
4104// Float register operands
4105operand vlRegF() %{
4106   constraint(ALLOC_IN_RC(float_reg_vl));
4107   match(RegF);
4108
4109   format %{ %}
4110   interface(REG_INTER);
4111%}
4112
4113// XMM Double register operands
4114operand regD() %{
4115  predicate( UseSSE>=2 );
4116  constraint(ALLOC_IN_RC(double_reg_legacy));
4117  match(RegD);
4118  format %{ %}
4119  interface(REG_INTER);
4120%}
4121
4122// Double register operands
4123operand vlRegD() %{
4124   constraint(ALLOC_IN_RC(double_reg_vl));
4125   match(RegD);
4126
4127   format %{ %}
4128   interface(REG_INTER);
4129%}
4130
4131// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4132// runtime code generation via reg_class_dynamic.
4133operand vecS() %{
4134  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4135  match(VecS);
4136
4137  format %{ %}
4138  interface(REG_INTER);
4139%}
4140
4141operand legVecS() %{
4142  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4143  match(VecS);
4144
4145  format %{ %}
4146  interface(REG_INTER);
4147%}
4148
4149operand vecD() %{
4150  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4151  match(VecD);
4152
4153  format %{ %}
4154  interface(REG_INTER);
4155%}
4156
4157operand legVecD() %{
4158  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4159  match(VecD);
4160
4161  format %{ %}
4162  interface(REG_INTER);
4163%}
4164
4165operand vecX() %{
4166  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4167  match(VecX);
4168
4169  format %{ %}
4170  interface(REG_INTER);
4171%}
4172
4173operand legVecX() %{
4174  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4175  match(VecX);
4176
4177  format %{ %}
4178  interface(REG_INTER);
4179%}
4180
4181operand vecY() %{
4182  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4183  match(VecY);
4184
4185  format %{ %}
4186  interface(REG_INTER);
4187%}
4188
4189operand legVecY() %{
4190  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4191  match(VecY);
4192
4193  format %{ %}
4194  interface(REG_INTER);
4195%}
4196
4197//----------Memory Operands----------------------------------------------------
4198// Direct Memory Operand
4199operand direct(immP addr) %{
4200  match(addr);
4201
4202  format %{ "[$addr]" %}
4203  interface(MEMORY_INTER) %{
4204    base(0xFFFFFFFF);
4205    index(0x4);
4206    scale(0x0);
4207    disp($addr);
4208  %}
4209%}
4210
4211// Indirect Memory Operand
4212operand indirect(eRegP reg) %{
4213  constraint(ALLOC_IN_RC(int_reg));
4214  match(reg);
4215
4216  format %{ "[$reg]" %}
4217  interface(MEMORY_INTER) %{
4218    base($reg);
4219    index(0x4);
4220    scale(0x0);
4221    disp(0x0);
4222  %}
4223%}
4224
4225// Indirect Memory Plus Short Offset Operand
4226operand indOffset8(eRegP reg, immI8 off) %{
4227  match(AddP reg off);
4228
4229  format %{ "[$reg + $off]" %}
4230  interface(MEMORY_INTER) %{
4231    base($reg);
4232    index(0x4);
4233    scale(0x0);
4234    disp($off);
4235  %}
4236%}
4237
4238// Indirect Memory Plus Long Offset Operand
4239operand indOffset32(eRegP reg, immI off) %{
4240  match(AddP reg off);
4241
4242  format %{ "[$reg + $off]" %}
4243  interface(MEMORY_INTER) %{
4244    base($reg);
4245    index(0x4);
4246    scale(0x0);
4247    disp($off);
4248  %}
4249%}
4250
4251// Indirect Memory Plus Long Offset Operand
4252operand indOffset32X(rRegI reg, immP off) %{
4253  match(AddP off reg);
4254
4255  format %{ "[$reg + $off]" %}
4256  interface(MEMORY_INTER) %{
4257    base($reg);
4258    index(0x4);
4259    scale(0x0);
4260    disp($off);
4261  %}
4262%}
4263
4264// Indirect Memory Plus Index Register Plus Offset Operand
4265operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4266  match(AddP (AddP reg ireg) off);
4267
4268  op_cost(10);
4269  format %{"[$reg + $off + $ireg]" %}
4270  interface(MEMORY_INTER) %{
4271    base($reg);
4272    index($ireg);
4273    scale(0x0);
4274    disp($off);
4275  %}
4276%}
4277
4278// Indirect Memory Plus Index Register Plus Offset Operand
4279operand indIndex(eRegP reg, rRegI ireg) %{
4280  match(AddP reg ireg);
4281
4282  op_cost(10);
4283  format %{"[$reg + $ireg]" %}
4284  interface(MEMORY_INTER) %{
4285    base($reg);
4286    index($ireg);
4287    scale(0x0);
4288    disp(0x0);
4289  %}
4290%}
4291
4292// // -------------------------------------------------------------------------
4293// // 486 architecture doesn't support "scale * index + offset" with out a base
4294// // -------------------------------------------------------------------------
4295// // Scaled Memory Operands
4296// // Indirect Memory Times Scale Plus Offset Operand
4297// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4298//   match(AddP off (LShiftI ireg scale));
4299//
4300//   op_cost(10);
4301//   format %{"[$off + $ireg << $scale]" %}
4302//   interface(MEMORY_INTER) %{
4303//     base(0x4);
4304//     index($ireg);
4305//     scale($scale);
4306//     disp($off);
4307//   %}
4308// %}
4309
4310// Indirect Memory Times Scale Plus Index Register
4311operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4312  match(AddP reg (LShiftI ireg scale));
4313
4314  op_cost(10);
4315  format %{"[$reg + $ireg << $scale]" %}
4316  interface(MEMORY_INTER) %{
4317    base($reg);
4318    index($ireg);
4319    scale($scale);
4320    disp(0x0);
4321  %}
4322%}
4323
4324// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4325operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4326  match(AddP (AddP reg (LShiftI ireg scale)) off);
4327
4328  op_cost(10);
4329  format %{"[$reg + $off + $ireg << $scale]" %}
4330  interface(MEMORY_INTER) %{
4331    base($reg);
4332    index($ireg);
4333    scale($scale);
4334    disp($off);
4335  %}
4336%}
4337
4338//----------Load Long Memory Operands------------------------------------------
4339// The load-long idiom will use it's address expression again after loading
4340// the first word of the long.  If the load-long destination overlaps with
4341// registers used in the addressing expression, the 2nd half will be loaded
4342// from a clobbered address.  Fix this by requiring that load-long use
4343// address registers that do not overlap with the load-long target.
4344
4345// load-long support
4346operand load_long_RegP() %{
4347  constraint(ALLOC_IN_RC(esi_reg));
4348  match(RegP);
4349  match(eSIRegP);
4350  op_cost(100);
4351  format %{  %}
4352  interface(REG_INTER);
4353%}
4354
4355// Indirect Memory Operand Long
4356operand load_long_indirect(load_long_RegP reg) %{
4357  constraint(ALLOC_IN_RC(esi_reg));
4358  match(reg);
4359
4360  format %{ "[$reg]" %}
4361  interface(MEMORY_INTER) %{
4362    base($reg);
4363    index(0x4);
4364    scale(0x0);
4365    disp(0x0);
4366  %}
4367%}
4368
4369// Indirect Memory Plus Long Offset Operand
4370operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4371  match(AddP reg off);
4372
4373  format %{ "[$reg + $off]" %}
4374  interface(MEMORY_INTER) %{
4375    base($reg);
4376    index(0x4);
4377    scale(0x0);
4378    disp($off);
4379  %}
4380%}
4381
4382opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4383
4384
4385//----------Special Memory Operands--------------------------------------------
4386// Stack Slot Operand - This operand is used for loading and storing temporary
4387//                      values on the stack where a match requires a value to
4388//                      flow through memory.
4389operand stackSlotP(sRegP reg) %{
4390  constraint(ALLOC_IN_RC(stack_slots));
4391  // No match rule because this operand is only generated in matching
4392  format %{ "[$reg]" %}
4393  interface(MEMORY_INTER) %{
4394    base(0x4);   // ESP
4395    index(0x4);  // No Index
4396    scale(0x0);  // No Scale
4397    disp($reg);  // Stack Offset
4398  %}
4399%}
4400
4401operand stackSlotI(sRegI reg) %{
4402  constraint(ALLOC_IN_RC(stack_slots));
4403  // No match rule because this operand is only generated in matching
4404  format %{ "[$reg]" %}
4405  interface(MEMORY_INTER) %{
4406    base(0x4);   // ESP
4407    index(0x4);  // No Index
4408    scale(0x0);  // No Scale
4409    disp($reg);  // Stack Offset
4410  %}
4411%}
4412
4413operand stackSlotF(sRegF reg) %{
4414  constraint(ALLOC_IN_RC(stack_slots));
4415  // No match rule because this operand is only generated in matching
4416  format %{ "[$reg]" %}
4417  interface(MEMORY_INTER) %{
4418    base(0x4);   // ESP
4419    index(0x4);  // No Index
4420    scale(0x0);  // No Scale
4421    disp($reg);  // Stack Offset
4422  %}
4423%}
4424
4425operand stackSlotD(sRegD reg) %{
4426  constraint(ALLOC_IN_RC(stack_slots));
4427  // No match rule because this operand is only generated in matching
4428  format %{ "[$reg]" %}
4429  interface(MEMORY_INTER) %{
4430    base(0x4);   // ESP
4431    index(0x4);  // No Index
4432    scale(0x0);  // No Scale
4433    disp($reg);  // Stack Offset
4434  %}
4435%}
4436
4437operand stackSlotL(sRegL reg) %{
4438  constraint(ALLOC_IN_RC(stack_slots));
4439  // No match rule because this operand is only generated in matching
4440  format %{ "[$reg]" %}
4441  interface(MEMORY_INTER) %{
4442    base(0x4);   // ESP
4443    index(0x4);  // No Index
4444    scale(0x0);  // No Scale
4445    disp($reg);  // Stack Offset
4446  %}
4447%}
4448
4449//----------Memory Operands - Win95 Implicit Null Variants----------------
4450// Indirect Memory Operand
4451operand indirect_win95_safe(eRegP_no_EBP reg)
4452%{
4453  constraint(ALLOC_IN_RC(int_reg));
4454  match(reg);
4455
4456  op_cost(100);
4457  format %{ "[$reg]" %}
4458  interface(MEMORY_INTER) %{
4459    base($reg);
4460    index(0x4);
4461    scale(0x0);
4462    disp(0x0);
4463  %}
4464%}
4465
4466// Indirect Memory Plus Short Offset Operand
4467operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4468%{
4469  match(AddP reg off);
4470
4471  op_cost(100);
4472  format %{ "[$reg + $off]" %}
4473  interface(MEMORY_INTER) %{
4474    base($reg);
4475    index(0x4);
4476    scale(0x0);
4477    disp($off);
4478  %}
4479%}
4480
4481// Indirect Memory Plus Long Offset Operand
4482operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4483%{
4484  match(AddP reg off);
4485
4486  op_cost(100);
4487  format %{ "[$reg + $off]" %}
4488  interface(MEMORY_INTER) %{
4489    base($reg);
4490    index(0x4);
4491    scale(0x0);
4492    disp($off);
4493  %}
4494%}
4495
4496// Indirect Memory Plus Index Register Plus Offset Operand
4497operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4498%{
4499  match(AddP (AddP reg ireg) off);
4500
4501  op_cost(100);
4502  format %{"[$reg + $off + $ireg]" %}
4503  interface(MEMORY_INTER) %{
4504    base($reg);
4505    index($ireg);
4506    scale(0x0);
4507    disp($off);
4508  %}
4509%}
4510
4511// Indirect Memory Times Scale Plus Index Register
4512operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4513%{
4514  match(AddP reg (LShiftI ireg scale));
4515
4516  op_cost(100);
4517  format %{"[$reg + $ireg << $scale]" %}
4518  interface(MEMORY_INTER) %{
4519    base($reg);
4520    index($ireg);
4521    scale($scale);
4522    disp(0x0);
4523  %}
4524%}
4525
4526// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4527operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4528%{
4529  match(AddP (AddP reg (LShiftI ireg scale)) off);
4530
4531  op_cost(100);
4532  format %{"[$reg + $off + $ireg << $scale]" %}
4533  interface(MEMORY_INTER) %{
4534    base($reg);
4535    index($ireg);
4536    scale($scale);
4537    disp($off);
4538  %}
4539%}
4540
4541//----------Conditional Branch Operands----------------------------------------
4542// Comparison Op  - This is the operation of the comparison, and is limited to
4543//                  the following set of codes:
4544//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4545//
4546// Other attributes of the comparison, such as unsignedness, are specified
4547// by the comparison instruction that sets a condition code flags register.
4548// That result is represented by a flags operand whose subtype is appropriate
4549// to the unsignedness (etc.) of the comparison.
4550//
4551// Later, the instruction which matches both the Comparison Op (a Bool) and
4552// the flags (produced by the Cmp) specifies the coding of the comparison op
4553// by matching a specific subtype of Bool operand below, such as cmpOpU.
4554
4555// Comparision Code
4556operand cmpOp() %{
4557  match(Bool);
4558
4559  format %{ "" %}
4560  interface(COND_INTER) %{
4561    equal(0x4, "e");
4562    not_equal(0x5, "ne");
4563    less(0xC, "l");
4564    greater_equal(0xD, "ge");
4565    less_equal(0xE, "le");
4566    greater(0xF, "g");
4567    overflow(0x0, "o");
4568    no_overflow(0x1, "no");
4569  %}
4570%}
4571
4572// Comparison Code, unsigned compare.  Used by FP also, with
4573// C2 (unordered) turned into GT or LT already.  The other bits
4574// C0 and C3 are turned into Carry & Zero flags.
4575operand cmpOpU() %{
4576  match(Bool);
4577
4578  format %{ "" %}
4579  interface(COND_INTER) %{
4580    equal(0x4, "e");
4581    not_equal(0x5, "ne");
4582    less(0x2, "b");
4583    greater_equal(0x3, "nb");
4584    less_equal(0x6, "be");
4585    greater(0x7, "nbe");
4586    overflow(0x0, "o");
4587    no_overflow(0x1, "no");
4588  %}
4589%}
4590
4591// Floating comparisons that don't require any fixup for the unordered case
4592operand cmpOpUCF() %{
4593  match(Bool);
4594  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4595            n->as_Bool()->_test._test == BoolTest::ge ||
4596            n->as_Bool()->_test._test == BoolTest::le ||
4597            n->as_Bool()->_test._test == BoolTest::gt);
4598  format %{ "" %}
4599  interface(COND_INTER) %{
4600    equal(0x4, "e");
4601    not_equal(0x5, "ne");
4602    less(0x2, "b");
4603    greater_equal(0x3, "nb");
4604    less_equal(0x6, "be");
4605    greater(0x7, "nbe");
4606    overflow(0x0, "o");
4607    no_overflow(0x1, "no");
4608  %}
4609%}
4610
4611
4612// Floating comparisons that can be fixed up with extra conditional jumps
4613operand cmpOpUCF2() %{
4614  match(Bool);
4615  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4616            n->as_Bool()->_test._test == BoolTest::eq);
4617  format %{ "" %}
4618  interface(COND_INTER) %{
4619    equal(0x4, "e");
4620    not_equal(0x5, "ne");
4621    less(0x2, "b");
4622    greater_equal(0x3, "nb");
4623    less_equal(0x6, "be");
4624    greater(0x7, "nbe");
4625    overflow(0x0, "o");
4626    no_overflow(0x1, "no");
4627  %}
4628%}
4629
4630// Comparison Code for FP conditional move
4631operand cmpOp_fcmov() %{
4632  match(Bool);
4633
4634  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4635            n->as_Bool()->_test._test != BoolTest::no_overflow);
4636  format %{ "" %}
4637  interface(COND_INTER) %{
4638    equal        (0x0C8);
4639    not_equal    (0x1C8);
4640    less         (0x0C0);
4641    greater_equal(0x1C0);
4642    less_equal   (0x0D0);
4643    greater      (0x1D0);
4644    overflow(0x0, "o"); // not really supported by the instruction
4645    no_overflow(0x1, "no"); // not really supported by the instruction
4646  %}
4647%}
4648
4649// Comparison Code used in long compares
4650operand cmpOp_commute() %{
4651  match(Bool);
4652
4653  format %{ "" %}
4654  interface(COND_INTER) %{
4655    equal(0x4, "e");
4656    not_equal(0x5, "ne");
4657    less(0xF, "g");
4658    greater_equal(0xE, "le");
4659    less_equal(0xD, "ge");
4660    greater(0xC, "l");
4661    overflow(0x0, "o");
4662    no_overflow(0x1, "no");
4663  %}
4664%}
4665
4666// Comparison Code used in unsigned long compares
4667operand cmpOpU_commute() %{
4668  match(Bool);
4669
4670  format %{ "" %}
4671  interface(COND_INTER) %{
4672    equal(0x4, "e");
4673    not_equal(0x5, "ne");
4674    less(0x7, "nbe");
4675    greater_equal(0x6, "be");
4676    less_equal(0x3, "nb");
4677    greater(0x2, "b");
4678    overflow(0x0, "o");
4679    no_overflow(0x1, "no");
4680  %}
4681%}
4682
4683//----------OPERAND CLASSES----------------------------------------------------
4684// Operand Classes are groups of operands that are used as to simplify
4685// instruction definitions by not requiring the AD writer to specify separate
4686// instructions for every form of operand when the instruction accepts
4687// multiple operand types with the same basic encoding and format.  The classic
4688// case of this is memory operands.
4689
4690opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4691               indIndex, indIndexScale, indIndexScaleOffset);
4692
4693// Long memory operations are encoded in 2 instructions and a +4 offset.
4694// This means some kind of offset is always required and you cannot use
4695// an oop as the offset (done when working on static globals).
4696opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4697                    indIndex, indIndexScale, indIndexScaleOffset);
4698
4699
4700//----------PIPELINE-----------------------------------------------------------
4701// Rules which define the behavior of the target architectures pipeline.
4702pipeline %{
4703
4704//----------ATTRIBUTES---------------------------------------------------------
4705attributes %{
4706  variable_size_instructions;        // Fixed size instructions
4707  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4708  instruction_unit_size = 1;         // An instruction is 1 bytes long
4709  instruction_fetch_unit_size = 16;  // The processor fetches one line
4710  instruction_fetch_units = 1;       // of 16 bytes
4711
4712  // List of nop instructions
4713  nops( MachNop );
4714%}
4715
4716//----------RESOURCES----------------------------------------------------------
4717// Resources are the functional units available to the machine
4718
4719// Generic P2/P3 pipeline
4720// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4721// 3 instructions decoded per cycle.
4722// 2 load/store ops per cycle, 1 branch, 1 FPU,
4723// 2 ALU op, only ALU0 handles mul/div instructions.
4724resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4725           MS0, MS1, MEM = MS0 | MS1,
4726           BR, FPU,
4727           ALU0, ALU1, ALU = ALU0 | ALU1 );
4728
4729//----------PIPELINE DESCRIPTION-----------------------------------------------
4730// Pipeline Description specifies the stages in the machine's pipeline
4731
4732// Generic P2/P3 pipeline
4733pipe_desc(S0, S1, S2, S3, S4, S5);
4734
4735//----------PIPELINE CLASSES---------------------------------------------------
4736// Pipeline Classes describe the stages in which input and output are
4737// referenced by the hardware pipeline.
4738
4739// Naming convention: ialu or fpu
4740// Then: _reg
4741// Then: _reg if there is a 2nd register
4742// Then: _long if it's a pair of instructions implementing a long
4743// Then: _fat if it requires the big decoder
4744//   Or: _mem if it requires the big decoder and a memory unit.
4745
4746// Integer ALU reg operation
4747pipe_class ialu_reg(rRegI dst) %{
4748    single_instruction;
4749    dst    : S4(write);
4750    dst    : S3(read);
4751    DECODE : S0;        // any decoder
4752    ALU    : S3;        // any alu
4753%}
4754
4755// Long ALU reg operation
4756pipe_class ialu_reg_long(eRegL dst) %{
4757    instruction_count(2);
4758    dst    : S4(write);
4759    dst    : S3(read);
4760    DECODE : S0(2);     // any 2 decoders
4761    ALU    : S3(2);     // both alus
4762%}
4763
4764// Integer ALU reg operation using big decoder
4765pipe_class ialu_reg_fat(rRegI dst) %{
4766    single_instruction;
4767    dst    : S4(write);
4768    dst    : S3(read);
4769    D0     : S0;        // big decoder only
4770    ALU    : S3;        // any alu
4771%}
4772
4773// Long ALU reg operation using big decoder
4774pipe_class ialu_reg_long_fat(eRegL dst) %{
4775    instruction_count(2);
4776    dst    : S4(write);
4777    dst    : S3(read);
4778    D0     : S0(2);     // big decoder only; twice
4779    ALU    : S3(2);     // any 2 alus
4780%}
4781
4782// Integer ALU reg-reg operation
4783pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4784    single_instruction;
4785    dst    : S4(write);
4786    src    : S3(read);
4787    DECODE : S0;        // any decoder
4788    ALU    : S3;        // any alu
4789%}
4790
4791// Long ALU reg-reg operation
4792pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4793    instruction_count(2);
4794    dst    : S4(write);
4795    src    : S3(read);
4796    DECODE : S0(2);     // any 2 decoders
4797    ALU    : S3(2);     // both alus
4798%}
4799
4800// Integer ALU reg-reg operation
4801pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4802    single_instruction;
4803    dst    : S4(write);
4804    src    : S3(read);
4805    D0     : S0;        // big decoder only
4806    ALU    : S3;        // any alu
4807%}
4808
4809// Long ALU reg-reg operation
4810pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4811    instruction_count(2);
4812    dst    : S4(write);
4813    src    : S3(read);
4814    D0     : S0(2);     // big decoder only; twice
4815    ALU    : S3(2);     // both alus
4816%}
4817
4818// Integer ALU reg-mem operation
4819pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4820    single_instruction;
4821    dst    : S5(write);
4822    mem    : S3(read);
4823    D0     : S0;        // big decoder only
4824    ALU    : S4;        // any alu
4825    MEM    : S3;        // any mem
4826%}
4827
4828// Long ALU reg-mem operation
4829pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4830    instruction_count(2);
4831    dst    : S5(write);
4832    mem    : S3(read);
4833    D0     : S0(2);     // big decoder only; twice
4834    ALU    : S4(2);     // any 2 alus
4835    MEM    : S3(2);     // both mems
4836%}
4837
4838// Integer mem operation (prefetch)
4839pipe_class ialu_mem(memory mem)
4840%{
4841    single_instruction;
4842    mem    : S3(read);
4843    D0     : S0;        // big decoder only
4844    MEM    : S3;        // any mem
4845%}
4846
4847// Integer Store to Memory
4848pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4849    single_instruction;
4850    mem    : S3(read);
4851    src    : S5(read);
4852    D0     : S0;        // big decoder only
4853    ALU    : S4;        // any alu
4854    MEM    : S3;
4855%}
4856
4857// Long Store to Memory
4858pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4859    instruction_count(2);
4860    mem    : S3(read);
4861    src    : S5(read);
4862    D0     : S0(2);     // big decoder only; twice
4863    ALU    : S4(2);     // any 2 alus
4864    MEM    : S3(2);     // Both mems
4865%}
4866
4867// Integer Store to Memory
4868pipe_class ialu_mem_imm(memory mem) %{
4869    single_instruction;
4870    mem    : S3(read);
4871    D0     : S0;        // big decoder only
4872    ALU    : S4;        // any alu
4873    MEM    : S3;
4874%}
4875
4876// Integer ALU0 reg-reg operation
4877pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4878    single_instruction;
4879    dst    : S4(write);
4880    src    : S3(read);
4881    D0     : S0;        // Big decoder only
4882    ALU0   : S3;        // only alu0
4883%}
4884
4885// Integer ALU0 reg-mem operation
4886pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4887    single_instruction;
4888    dst    : S5(write);
4889    mem    : S3(read);
4890    D0     : S0;        // big decoder only
4891    ALU0   : S4;        // ALU0 only
4892    MEM    : S3;        // any mem
4893%}
4894
4895// Integer ALU reg-reg operation
4896pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4897    single_instruction;
4898    cr     : S4(write);
4899    src1   : S3(read);
4900    src2   : S3(read);
4901    DECODE : S0;        // any decoder
4902    ALU    : S3;        // any alu
4903%}
4904
4905// Integer ALU reg-imm operation
4906pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4907    single_instruction;
4908    cr     : S4(write);
4909    src1   : S3(read);
4910    DECODE : S0;        // any decoder
4911    ALU    : S3;        // any alu
4912%}
4913
4914// Integer ALU reg-mem operation
4915pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4916    single_instruction;
4917    cr     : S4(write);
4918    src1   : S3(read);
4919    src2   : S3(read);
4920    D0     : S0;        // big decoder only
4921    ALU    : S4;        // any alu
4922    MEM    : S3;
4923%}
4924
4925// Conditional move reg-reg
4926pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4927    instruction_count(4);
4928    y      : S4(read);
4929    q      : S3(read);
4930    p      : S3(read);
4931    DECODE : S0(4);     // any decoder
4932%}
4933
4934// Conditional move reg-reg
4935pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4936    single_instruction;
4937    dst    : S4(write);
4938    src    : S3(read);
4939    cr     : S3(read);
4940    DECODE : S0;        // any decoder
4941%}
4942
4943// Conditional move reg-mem
4944pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4945    single_instruction;
4946    dst    : S4(write);
4947    src    : S3(read);
4948    cr     : S3(read);
4949    DECODE : S0;        // any decoder
4950    MEM    : S3;
4951%}
4952
4953// Conditional move reg-reg long
4954pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4955    single_instruction;
4956    dst    : S4(write);
4957    src    : S3(read);
4958    cr     : S3(read);
4959    DECODE : S0(2);     // any 2 decoders
4960%}
4961
4962// Conditional move double reg-reg
4963pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4964    single_instruction;
4965    dst    : S4(write);
4966    src    : S3(read);
4967    cr     : S3(read);
4968    DECODE : S0;        // any decoder
4969%}
4970
4971// Float reg-reg operation
4972pipe_class fpu_reg(regDPR dst) %{
4973    instruction_count(2);
4974    dst    : S3(read);
4975    DECODE : S0(2);     // any 2 decoders
4976    FPU    : S3;
4977%}
4978
4979// Float reg-reg operation
4980pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4981    instruction_count(2);
4982    dst    : S4(write);
4983    src    : S3(read);
4984    DECODE : S0(2);     // any 2 decoders
4985    FPU    : S3;
4986%}
4987
4988// Float reg-reg operation
4989pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4990    instruction_count(3);
4991    dst    : S4(write);
4992    src1   : S3(read);
4993    src2   : S3(read);
4994    DECODE : S0(3);     // any 3 decoders
4995    FPU    : S3(2);
4996%}
4997
4998// Float reg-reg operation
4999pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5000    instruction_count(4);
5001    dst    : S4(write);
5002    src1   : S3(read);
5003    src2   : S3(read);
5004    src3   : S3(read);
5005    DECODE : S0(4);     // any 3 decoders
5006    FPU    : S3(2);
5007%}
5008
5009// Float reg-reg operation
5010pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5011    instruction_count(4);
5012    dst    : S4(write);
5013    src1   : S3(read);
5014    src2   : S3(read);
5015    src3   : S3(read);
5016    DECODE : S1(3);     // any 3 decoders
5017    D0     : S0;        // Big decoder only
5018    FPU    : S3(2);
5019    MEM    : S3;
5020%}
5021
5022// Float reg-mem operation
5023pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5024    instruction_count(2);
5025    dst    : S5(write);
5026    mem    : S3(read);
5027    D0     : S0;        // big decoder only
5028    DECODE : S1;        // any decoder for FPU POP
5029    FPU    : S4;
5030    MEM    : S3;        // any mem
5031%}
5032
5033// Float reg-mem operation
5034pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5035    instruction_count(3);
5036    dst    : S5(write);
5037    src1   : S3(read);
5038    mem    : S3(read);
5039    D0     : S0;        // big decoder only
5040    DECODE : S1(2);     // any decoder for FPU POP
5041    FPU    : S4;
5042    MEM    : S3;        // any mem
5043%}
5044
5045// Float mem-reg operation
5046pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5047    instruction_count(2);
5048    src    : S5(read);
5049    mem    : S3(read);
5050    DECODE : S0;        // any decoder for FPU PUSH
5051    D0     : S1;        // big decoder only
5052    FPU    : S4;
5053    MEM    : S3;        // any mem
5054%}
5055
5056pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5057    instruction_count(3);
5058    src1   : S3(read);
5059    src2   : S3(read);
5060    mem    : S3(read);
5061    DECODE : S0(2);     // any decoder for FPU PUSH
5062    D0     : S1;        // big decoder only
5063    FPU    : S4;
5064    MEM    : S3;        // any mem
5065%}
5066
5067pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5068    instruction_count(3);
5069    src1   : S3(read);
5070    src2   : S3(read);
5071    mem    : S4(read);
5072    DECODE : S0;        // any decoder for FPU PUSH
5073    D0     : S0(2);     // big decoder only
5074    FPU    : S4;
5075    MEM    : S3(2);     // any mem
5076%}
5077
5078pipe_class fpu_mem_mem(memory dst, memory src1) %{
5079    instruction_count(2);
5080    src1   : S3(read);
5081    dst    : S4(read);
5082    D0     : S0(2);     // big decoder only
5083    MEM    : S3(2);     // any mem
5084%}
5085
5086pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5087    instruction_count(3);
5088    src1   : S3(read);
5089    src2   : S3(read);
5090    dst    : S4(read);
5091    D0     : S0(3);     // big decoder only
5092    FPU    : S4;
5093    MEM    : S3(3);     // any mem
5094%}
5095
5096pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5097    instruction_count(3);
5098    src1   : S4(read);
5099    mem    : S4(read);
5100    DECODE : S0;        // any decoder for FPU PUSH
5101    D0     : S0(2);     // big decoder only
5102    FPU    : S4;
5103    MEM    : S3(2);     // any mem
5104%}
5105
5106// Float load constant
5107pipe_class fpu_reg_con(regDPR dst) %{
5108    instruction_count(2);
5109    dst    : S5(write);
5110    D0     : S0;        // big decoder only for the load
5111    DECODE : S1;        // any decoder for FPU POP
5112    FPU    : S4;
5113    MEM    : S3;        // any mem
5114%}
5115
5116// Float load constant
5117pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5118    instruction_count(3);
5119    dst    : S5(write);
5120    src    : S3(read);
5121    D0     : S0;        // big decoder only for the load
5122    DECODE : S1(2);     // any decoder for FPU POP
5123    FPU    : S4;
5124    MEM    : S3;        // any mem
5125%}
5126
5127// UnConditional branch
5128pipe_class pipe_jmp( label labl ) %{
5129    single_instruction;
5130    BR   : S3;
5131%}
5132
5133// Conditional branch
5134pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5135    single_instruction;
5136    cr    : S1(read);
5137    BR    : S3;
5138%}
5139
5140// Allocation idiom
5141pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5142    instruction_count(1); force_serialization;
5143    fixed_latency(6);
5144    heap_ptr : S3(read);
5145    DECODE   : S0(3);
5146    D0       : S2;
5147    MEM      : S3;
5148    ALU      : S3(2);
5149    dst      : S5(write);
5150    BR       : S5;
5151%}
5152
5153// Generic big/slow expanded idiom
5154pipe_class pipe_slow(  ) %{
5155    instruction_count(10); multiple_bundles; force_serialization;
5156    fixed_latency(100);
5157    D0  : S0(2);
5158    MEM : S3(2);
5159%}
5160
5161// The real do-nothing guy
5162pipe_class empty( ) %{
5163    instruction_count(0);
5164%}
5165
5166// Define the class for the Nop node
5167define %{
5168   MachNop = empty;
5169%}
5170
5171%}
5172
5173//----------INSTRUCTIONS-------------------------------------------------------
5174//
5175// match      -- States which machine-independent subtree may be replaced
5176//               by this instruction.
5177// ins_cost   -- The estimated cost of this instruction is used by instruction
5178//               selection to identify a minimum cost tree of machine
5179//               instructions that matches a tree of machine-independent
5180//               instructions.
5181// format     -- A string providing the disassembly for this instruction.
5182//               The value of an instruction's operand may be inserted
5183//               by referring to it with a '$' prefix.
5184// opcode     -- Three instruction opcodes may be provided.  These are referred
5185//               to within an encode class as $primary, $secondary, and $tertiary
5186//               respectively.  The primary opcode is commonly used to
5187//               indicate the type of machine instruction, while secondary
5188//               and tertiary are often used for prefix options or addressing
5189//               modes.
5190// ins_encode -- A list of encode classes with parameters. The encode class
5191//               name must have been defined in an 'enc_class' specification
5192//               in the encode section of the architecture description.
5193
5194//----------BSWAP-Instruction--------------------------------------------------
5195instruct bytes_reverse_int(rRegI dst) %{
5196  match(Set dst (ReverseBytesI dst));
5197
5198  format %{ "BSWAP  $dst" %}
5199  opcode(0x0F, 0xC8);
5200  ins_encode( OpcP, OpcSReg(dst) );
5201  ins_pipe( ialu_reg );
5202%}
5203
5204instruct bytes_reverse_long(eRegL dst) %{
5205  match(Set dst (ReverseBytesL dst));
5206
5207  format %{ "BSWAP  $dst.lo\n\t"
5208            "BSWAP  $dst.hi\n\t"
5209            "XCHG   $dst.lo $dst.hi" %}
5210
5211  ins_cost(125);
5212  ins_encode( bswap_long_bytes(dst) );
5213  ins_pipe( ialu_reg_reg);
5214%}
5215
5216instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5217  match(Set dst (ReverseBytesUS dst));
5218  effect(KILL cr);
5219
5220  format %{ "BSWAP  $dst\n\t"
5221            "SHR    $dst,16\n\t" %}
5222  ins_encode %{
5223    __ bswapl($dst$$Register);
5224    __ shrl($dst$$Register, 16);
5225  %}
5226  ins_pipe( ialu_reg );
5227%}
5228
5229instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5230  match(Set dst (ReverseBytesS dst));
5231  effect(KILL cr);
5232
5233  format %{ "BSWAP  $dst\n\t"
5234            "SAR    $dst,16\n\t" %}
5235  ins_encode %{
5236    __ bswapl($dst$$Register);
5237    __ sarl($dst$$Register, 16);
5238  %}
5239  ins_pipe( ialu_reg );
5240%}
5241
5242
5243//---------- Zeros Count Instructions ------------------------------------------
5244
5245instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5246  predicate(UseCountLeadingZerosInstruction);
5247  match(Set dst (CountLeadingZerosI src));
5248  effect(KILL cr);
5249
5250  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5251  ins_encode %{
5252    __ lzcntl($dst$$Register, $src$$Register);
5253  %}
5254  ins_pipe(ialu_reg);
5255%}
5256
5257instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5258  predicate(!UseCountLeadingZerosInstruction);
5259  match(Set dst (CountLeadingZerosI src));
5260  effect(KILL cr);
5261
5262  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5263            "JNZ    skip\n\t"
5264            "MOV    $dst, -1\n"
5265      "skip:\n\t"
5266            "NEG    $dst\n\t"
5267            "ADD    $dst, 31" %}
5268  ins_encode %{
5269    Register Rdst = $dst$$Register;
5270    Register Rsrc = $src$$Register;
5271    Label skip;
5272    __ bsrl(Rdst, Rsrc);
5273    __ jccb(Assembler::notZero, skip);
5274    __ movl(Rdst, -1);
5275    __ bind(skip);
5276    __ negl(Rdst);
5277    __ addl(Rdst, BitsPerInt - 1);
5278  %}
5279  ins_pipe(ialu_reg);
5280%}
5281
5282instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5283  predicate(UseCountLeadingZerosInstruction);
5284  match(Set dst (CountLeadingZerosL src));
5285  effect(TEMP dst, KILL cr);
5286
5287  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5288            "JNC    done\n\t"
5289            "LZCNT  $dst, $src.lo\n\t"
5290            "ADD    $dst, 32\n"
5291      "done:" %}
5292  ins_encode %{
5293    Register Rdst = $dst$$Register;
5294    Register Rsrc = $src$$Register;
5295    Label done;
5296    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5297    __ jccb(Assembler::carryClear, done);
5298    __ lzcntl(Rdst, Rsrc);
5299    __ addl(Rdst, BitsPerInt);
5300    __ bind(done);
5301  %}
5302  ins_pipe(ialu_reg);
5303%}
5304
5305instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5306  predicate(!UseCountLeadingZerosInstruction);
5307  match(Set dst (CountLeadingZerosL src));
5308  effect(TEMP dst, KILL cr);
5309
5310  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5311            "JZ     msw_is_zero\n\t"
5312            "ADD    $dst, 32\n\t"
5313            "JMP    not_zero\n"
5314      "msw_is_zero:\n\t"
5315            "BSR    $dst, $src.lo\n\t"
5316            "JNZ    not_zero\n\t"
5317            "MOV    $dst, -1\n"
5318      "not_zero:\n\t"
5319            "NEG    $dst\n\t"
5320            "ADD    $dst, 63\n" %}
5321 ins_encode %{
5322    Register Rdst = $dst$$Register;
5323    Register Rsrc = $src$$Register;
5324    Label msw_is_zero;
5325    Label not_zero;
5326    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5327    __ jccb(Assembler::zero, msw_is_zero);
5328    __ addl(Rdst, BitsPerInt);
5329    __ jmpb(not_zero);
5330    __ bind(msw_is_zero);
5331    __ bsrl(Rdst, Rsrc);
5332    __ jccb(Assembler::notZero, not_zero);
5333    __ movl(Rdst, -1);
5334    __ bind(not_zero);
5335    __ negl(Rdst);
5336    __ addl(Rdst, BitsPerLong - 1);
5337  %}
5338  ins_pipe(ialu_reg);
5339%}
5340
5341instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5342  predicate(UseCountTrailingZerosInstruction);
5343  match(Set dst (CountTrailingZerosI src));
5344  effect(KILL cr);
5345
5346  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5347  ins_encode %{
5348    __ tzcntl($dst$$Register, $src$$Register);
5349  %}
5350  ins_pipe(ialu_reg);
5351%}
5352
5353instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5354  predicate(!UseCountTrailingZerosInstruction);
5355  match(Set dst (CountTrailingZerosI src));
5356  effect(KILL cr);
5357
5358  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5359            "JNZ    done\n\t"
5360            "MOV    $dst, 32\n"
5361      "done:" %}
5362  ins_encode %{
5363    Register Rdst = $dst$$Register;
5364    Label done;
5365    __ bsfl(Rdst, $src$$Register);
5366    __ jccb(Assembler::notZero, done);
5367    __ movl(Rdst, BitsPerInt);
5368    __ bind(done);
5369  %}
5370  ins_pipe(ialu_reg);
5371%}
5372
5373instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5374  predicate(UseCountTrailingZerosInstruction);
5375  match(Set dst (CountTrailingZerosL src));
5376  effect(TEMP dst, KILL cr);
5377
5378  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5379            "JNC    done\n\t"
5380            "TZCNT  $dst, $src.hi\n\t"
5381            "ADD    $dst, 32\n"
5382            "done:" %}
5383  ins_encode %{
5384    Register Rdst = $dst$$Register;
5385    Register Rsrc = $src$$Register;
5386    Label done;
5387    __ tzcntl(Rdst, Rsrc);
5388    __ jccb(Assembler::carryClear, done);
5389    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5390    __ addl(Rdst, BitsPerInt);
5391    __ bind(done);
5392  %}
5393  ins_pipe(ialu_reg);
5394%}
5395
5396instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5397  predicate(!UseCountTrailingZerosInstruction);
5398  match(Set dst (CountTrailingZerosL src));
5399  effect(TEMP dst, KILL cr);
5400
5401  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5402            "JNZ    done\n\t"
5403            "BSF    $dst, $src.hi\n\t"
5404            "JNZ    msw_not_zero\n\t"
5405            "MOV    $dst, 32\n"
5406      "msw_not_zero:\n\t"
5407            "ADD    $dst, 32\n"
5408      "done:" %}
5409  ins_encode %{
5410    Register Rdst = $dst$$Register;
5411    Register Rsrc = $src$$Register;
5412    Label msw_not_zero;
5413    Label done;
5414    __ bsfl(Rdst, Rsrc);
5415    __ jccb(Assembler::notZero, done);
5416    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5417    __ jccb(Assembler::notZero, msw_not_zero);
5418    __ movl(Rdst, BitsPerInt);
5419    __ bind(msw_not_zero);
5420    __ addl(Rdst, BitsPerInt);
5421    __ bind(done);
5422  %}
5423  ins_pipe(ialu_reg);
5424%}
5425
5426
5427//---------- Population Count Instructions -------------------------------------
5428
5429instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5430  predicate(UsePopCountInstruction);
5431  match(Set dst (PopCountI src));
5432  effect(KILL cr);
5433
5434  format %{ "POPCNT $dst, $src" %}
5435  ins_encode %{
5436    __ popcntl($dst$$Register, $src$$Register);
5437  %}
5438  ins_pipe(ialu_reg);
5439%}
5440
5441instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5442  predicate(UsePopCountInstruction);
5443  match(Set dst (PopCountI (LoadI mem)));
5444  effect(KILL cr);
5445
5446  format %{ "POPCNT $dst, $mem" %}
5447  ins_encode %{
5448    __ popcntl($dst$$Register, $mem$$Address);
5449  %}
5450  ins_pipe(ialu_reg);
5451%}
5452
5453// Note: Long.bitCount(long) returns an int.
5454instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5455  predicate(UsePopCountInstruction);
5456  match(Set dst (PopCountL src));
5457  effect(KILL cr, TEMP tmp, TEMP dst);
5458
5459  format %{ "POPCNT $dst, $src.lo\n\t"
5460            "POPCNT $tmp, $src.hi\n\t"
5461            "ADD    $dst, $tmp" %}
5462  ins_encode %{
5463    __ popcntl($dst$$Register, $src$$Register);
5464    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5465    __ addl($dst$$Register, $tmp$$Register);
5466  %}
5467  ins_pipe(ialu_reg);
5468%}
5469
5470// Note: Long.bitCount(long) returns an int.
5471instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5472  predicate(UsePopCountInstruction);
5473  match(Set dst (PopCountL (LoadL mem)));
5474  effect(KILL cr, TEMP tmp, TEMP dst);
5475
5476  format %{ "POPCNT $dst, $mem\n\t"
5477            "POPCNT $tmp, $mem+4\n\t"
5478            "ADD    $dst, $tmp" %}
5479  ins_encode %{
5480    //__ popcntl($dst$$Register, $mem$$Address$$first);
5481    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5482    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5483    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5484    __ addl($dst$$Register, $tmp$$Register);
5485  %}
5486  ins_pipe(ialu_reg);
5487%}
5488
5489
5490//----------Load/Store/Move Instructions---------------------------------------
5491//----------Load Instructions--------------------------------------------------
5492// Load Byte (8bit signed)
5493instruct loadB(xRegI dst, memory mem) %{
5494  match(Set dst (LoadB mem));
5495
5496  ins_cost(125);
5497  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5498
5499  ins_encode %{
5500    __ movsbl($dst$$Register, $mem$$Address);
5501  %}
5502
5503  ins_pipe(ialu_reg_mem);
5504%}
5505
5506// Load Byte (8bit signed) into Long Register
5507instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5508  match(Set dst (ConvI2L (LoadB mem)));
5509  effect(KILL cr);
5510
5511  ins_cost(375);
5512  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5513            "MOV    $dst.hi,$dst.lo\n\t"
5514            "SAR    $dst.hi,7" %}
5515
5516  ins_encode %{
5517    __ movsbl($dst$$Register, $mem$$Address);
5518    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5519    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5520  %}
5521
5522  ins_pipe(ialu_reg_mem);
5523%}
5524
5525// Load Unsigned Byte (8bit UNsigned)
5526instruct loadUB(xRegI dst, memory mem) %{
5527  match(Set dst (LoadUB mem));
5528
5529  ins_cost(125);
5530  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5531
5532  ins_encode %{
5533    __ movzbl($dst$$Register, $mem$$Address);
5534  %}
5535
5536  ins_pipe(ialu_reg_mem);
5537%}
5538
5539// Load Unsigned Byte (8 bit UNsigned) into Long Register
5540instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5541  match(Set dst (ConvI2L (LoadUB mem)));
5542  effect(KILL cr);
5543
5544  ins_cost(250);
5545  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5546            "XOR    $dst.hi,$dst.hi" %}
5547
5548  ins_encode %{
5549    Register Rdst = $dst$$Register;
5550    __ movzbl(Rdst, $mem$$Address);
5551    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5552  %}
5553
5554  ins_pipe(ialu_reg_mem);
5555%}
5556
5557// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5558instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5559  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5560  effect(KILL cr);
5561
5562  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5563            "XOR    $dst.hi,$dst.hi\n\t"
5564            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5565  ins_encode %{
5566    Register Rdst = $dst$$Register;
5567    __ movzbl(Rdst, $mem$$Address);
5568    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5569    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5570  %}
5571  ins_pipe(ialu_reg_mem);
5572%}
5573
5574// Load Short (16bit signed)
5575instruct loadS(rRegI dst, memory mem) %{
5576  match(Set dst (LoadS mem));
5577
5578  ins_cost(125);
5579  format %{ "MOVSX  $dst,$mem\t# short" %}
5580
5581  ins_encode %{
5582    __ movswl($dst$$Register, $mem$$Address);
5583  %}
5584
5585  ins_pipe(ialu_reg_mem);
5586%}
5587
5588// Load Short (16 bit signed) to Byte (8 bit signed)
5589instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5590  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5591
5592  ins_cost(125);
5593  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5594  ins_encode %{
5595    __ movsbl($dst$$Register, $mem$$Address);
5596  %}
5597  ins_pipe(ialu_reg_mem);
5598%}
5599
5600// Load Short (16bit signed) into Long Register
5601instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5602  match(Set dst (ConvI2L (LoadS mem)));
5603  effect(KILL cr);
5604
5605  ins_cost(375);
5606  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5607            "MOV    $dst.hi,$dst.lo\n\t"
5608            "SAR    $dst.hi,15" %}
5609
5610  ins_encode %{
5611    __ movswl($dst$$Register, $mem$$Address);
5612    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5613    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5614  %}
5615
5616  ins_pipe(ialu_reg_mem);
5617%}
5618
5619// Load Unsigned Short/Char (16bit unsigned)
5620instruct loadUS(rRegI dst, memory mem) %{
5621  match(Set dst (LoadUS mem));
5622
5623  ins_cost(125);
5624  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5625
5626  ins_encode %{
5627    __ movzwl($dst$$Register, $mem$$Address);
5628  %}
5629
5630  ins_pipe(ialu_reg_mem);
5631%}
5632
5633// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5634instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5635  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5636
5637  ins_cost(125);
5638  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5639  ins_encode %{
5640    __ movsbl($dst$$Register, $mem$$Address);
5641  %}
5642  ins_pipe(ialu_reg_mem);
5643%}
5644
5645// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5646instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5647  match(Set dst (ConvI2L (LoadUS mem)));
5648  effect(KILL cr);
5649
5650  ins_cost(250);
5651  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5652            "XOR    $dst.hi,$dst.hi" %}
5653
5654  ins_encode %{
5655    __ movzwl($dst$$Register, $mem$$Address);
5656    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5657  %}
5658
5659  ins_pipe(ialu_reg_mem);
5660%}
5661
5662// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5663instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5664  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5665  effect(KILL cr);
5666
5667  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5668            "XOR    $dst.hi,$dst.hi" %}
5669  ins_encode %{
5670    Register Rdst = $dst$$Register;
5671    __ movzbl(Rdst, $mem$$Address);
5672    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5673  %}
5674  ins_pipe(ialu_reg_mem);
5675%}
5676
5677// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5678instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5679  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5680  effect(KILL cr);
5681
5682  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5683            "XOR    $dst.hi,$dst.hi\n\t"
5684            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5685  ins_encode %{
5686    Register Rdst = $dst$$Register;
5687    __ movzwl(Rdst, $mem$$Address);
5688    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5689    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5690  %}
5691  ins_pipe(ialu_reg_mem);
5692%}
5693
5694// Load Integer
5695instruct loadI(rRegI dst, memory mem) %{
5696  match(Set dst (LoadI mem));
5697
5698  ins_cost(125);
5699  format %{ "MOV    $dst,$mem\t# int" %}
5700
5701  ins_encode %{
5702    __ movl($dst$$Register, $mem$$Address);
5703  %}
5704
5705  ins_pipe(ialu_reg_mem);
5706%}
5707
5708// Load Integer (32 bit signed) to Byte (8 bit signed)
5709instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5710  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5711
5712  ins_cost(125);
5713  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5714  ins_encode %{
5715    __ movsbl($dst$$Register, $mem$$Address);
5716  %}
5717  ins_pipe(ialu_reg_mem);
5718%}
5719
5720// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5721instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5722  match(Set dst (AndI (LoadI mem) mask));
5723
5724  ins_cost(125);
5725  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5726  ins_encode %{
5727    __ movzbl($dst$$Register, $mem$$Address);
5728  %}
5729  ins_pipe(ialu_reg_mem);
5730%}
5731
5732// Load Integer (32 bit signed) to Short (16 bit signed)
5733instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5734  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5735
5736  ins_cost(125);
5737  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5738  ins_encode %{
5739    __ movswl($dst$$Register, $mem$$Address);
5740  %}
5741  ins_pipe(ialu_reg_mem);
5742%}
5743
5744// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5745instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5746  match(Set dst (AndI (LoadI mem) mask));
5747
5748  ins_cost(125);
5749  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5750  ins_encode %{
5751    __ movzwl($dst$$Register, $mem$$Address);
5752  %}
5753  ins_pipe(ialu_reg_mem);
5754%}
5755
5756// Load Integer into Long Register
5757instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5758  match(Set dst (ConvI2L (LoadI mem)));
5759  effect(KILL cr);
5760
5761  ins_cost(375);
5762  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5763            "MOV    $dst.hi,$dst.lo\n\t"
5764            "SAR    $dst.hi,31" %}
5765
5766  ins_encode %{
5767    __ movl($dst$$Register, $mem$$Address);
5768    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5769    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5770  %}
5771
5772  ins_pipe(ialu_reg_mem);
5773%}
5774
5775// Load Integer with mask 0xFF into Long Register
5776instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5777  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5778  effect(KILL cr);
5779
5780  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5781            "XOR    $dst.hi,$dst.hi" %}
5782  ins_encode %{
5783    Register Rdst = $dst$$Register;
5784    __ movzbl(Rdst, $mem$$Address);
5785    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5786  %}
5787  ins_pipe(ialu_reg_mem);
5788%}
5789
5790// Load Integer with mask 0xFFFF into Long Register
5791instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5792  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5793  effect(KILL cr);
5794
5795  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5796            "XOR    $dst.hi,$dst.hi" %}
5797  ins_encode %{
5798    Register Rdst = $dst$$Register;
5799    __ movzwl(Rdst, $mem$$Address);
5800    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5801  %}
5802  ins_pipe(ialu_reg_mem);
5803%}
5804
5805// Load Integer with 31-bit mask into Long Register
5806instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5807  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5808  effect(KILL cr);
5809
5810  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5811            "XOR    $dst.hi,$dst.hi\n\t"
5812            "AND    $dst.lo,$mask" %}
5813  ins_encode %{
5814    Register Rdst = $dst$$Register;
5815    __ movl(Rdst, $mem$$Address);
5816    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5817    __ andl(Rdst, $mask$$constant);
5818  %}
5819  ins_pipe(ialu_reg_mem);
5820%}
5821
5822// Load Unsigned Integer into Long Register
5823instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5824  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5825  effect(KILL cr);
5826
5827  ins_cost(250);
5828  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5829            "XOR    $dst.hi,$dst.hi" %}
5830
5831  ins_encode %{
5832    __ movl($dst$$Register, $mem$$Address);
5833    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5834  %}
5835
5836  ins_pipe(ialu_reg_mem);
5837%}
5838
5839// Load Long.  Cannot clobber address while loading, so restrict address
5840// register to ESI
5841instruct loadL(eRegL dst, load_long_memory mem) %{
5842  predicate(!((LoadLNode*)n)->require_atomic_access());
5843  match(Set dst (LoadL mem));
5844
5845  ins_cost(250);
5846  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5847            "MOV    $dst.hi,$mem+4" %}
5848
5849  ins_encode %{
5850    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5851    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5852    __ movl($dst$$Register, Amemlo);
5853    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5854  %}
5855
5856  ins_pipe(ialu_reg_long_mem);
5857%}
5858
5859// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5860// then store it down to the stack and reload on the int
5861// side.
5862instruct loadL_volatile(stackSlotL dst, memory mem) %{
5863  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5864  match(Set dst (LoadL mem));
5865
5866  ins_cost(200);
5867  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5868            "FISTp  $dst" %}
5869  ins_encode(enc_loadL_volatile(mem,dst));
5870  ins_pipe( fpu_reg_mem );
5871%}
5872
5873instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5874  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5875  match(Set dst (LoadL mem));
5876  effect(TEMP tmp);
5877  ins_cost(180);
5878  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5879            "MOVSD  $dst,$tmp" %}
5880  ins_encode %{
5881    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5882    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5883  %}
5884  ins_pipe( pipe_slow );
5885%}
5886
5887instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5888  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5889  match(Set dst (LoadL mem));
5890  effect(TEMP tmp);
5891  ins_cost(160);
5892  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5893            "MOVD   $dst.lo,$tmp\n\t"
5894            "PSRLQ  $tmp,32\n\t"
5895            "MOVD   $dst.hi,$tmp" %}
5896  ins_encode %{
5897    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5898    __ movdl($dst$$Register, $tmp$$XMMRegister);
5899    __ psrlq($tmp$$XMMRegister, 32);
5900    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5901  %}
5902  ins_pipe( pipe_slow );
5903%}
5904
5905// Load Range
5906instruct loadRange(rRegI dst, memory mem) %{
5907  match(Set dst (LoadRange mem));
5908
5909  ins_cost(125);
5910  format %{ "MOV    $dst,$mem" %}
5911  opcode(0x8B);
5912  ins_encode( OpcP, RegMem(dst,mem));
5913  ins_pipe( ialu_reg_mem );
5914%}
5915
5916
5917// Load Pointer
5918instruct loadP(eRegP dst, memory mem) %{
5919  match(Set dst (LoadP mem));
5920
5921  ins_cost(125);
5922  format %{ "MOV    $dst,$mem" %}
5923  opcode(0x8B);
5924  ins_encode( OpcP, RegMem(dst,mem));
5925  ins_pipe( ialu_reg_mem );
5926%}
5927
5928// Load Klass Pointer
5929instruct loadKlass(eRegP dst, memory mem) %{
5930  match(Set dst (LoadKlass mem));
5931
5932  ins_cost(125);
5933  format %{ "MOV    $dst,$mem" %}
5934  opcode(0x8B);
5935  ins_encode( OpcP, RegMem(dst,mem));
5936  ins_pipe( ialu_reg_mem );
5937%}
5938
5939// Load Double
5940instruct loadDPR(regDPR dst, memory mem) %{
5941  predicate(UseSSE<=1);
5942  match(Set dst (LoadD mem));
5943
5944  ins_cost(150);
5945  format %{ "FLD_D  ST,$mem\n\t"
5946            "FSTP   $dst" %}
5947  opcode(0xDD);               /* DD /0 */
5948  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5949              Pop_Reg_DPR(dst) );
5950  ins_pipe( fpu_reg_mem );
5951%}
5952
5953// Load Double to XMM
5954instruct loadD(regD dst, memory mem) %{
5955  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5956  match(Set dst (LoadD mem));
5957  ins_cost(145);
5958  format %{ "MOVSD  $dst,$mem" %}
5959  ins_encode %{
5960    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5961  %}
5962  ins_pipe( pipe_slow );
5963%}
5964
5965instruct loadD_partial(regD dst, memory mem) %{
5966  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5967  match(Set dst (LoadD mem));
5968  ins_cost(145);
5969  format %{ "MOVLPD $dst,$mem" %}
5970  ins_encode %{
5971    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5972  %}
5973  ins_pipe( pipe_slow );
5974%}
5975
5976// Load to XMM register (single-precision floating point)
5977// MOVSS instruction
5978instruct loadF(regF dst, memory mem) %{
5979  predicate(UseSSE>=1);
5980  match(Set dst (LoadF mem));
5981  ins_cost(145);
5982  format %{ "MOVSS  $dst,$mem" %}
5983  ins_encode %{
5984    __ movflt ($dst$$XMMRegister, $mem$$Address);
5985  %}
5986  ins_pipe( pipe_slow );
5987%}
5988
5989// Load Float
5990instruct loadFPR(regFPR dst, memory mem) %{
5991  predicate(UseSSE==0);
5992  match(Set dst (LoadF mem));
5993
5994  ins_cost(150);
5995  format %{ "FLD_S  ST,$mem\n\t"
5996            "FSTP   $dst" %}
5997  opcode(0xD9);               /* D9 /0 */
5998  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5999              Pop_Reg_FPR(dst) );
6000  ins_pipe( fpu_reg_mem );
6001%}
6002
6003// Load Effective Address
6004instruct leaP8(eRegP dst, indOffset8 mem) %{
6005  match(Set dst mem);
6006
6007  ins_cost(110);
6008  format %{ "LEA    $dst,$mem" %}
6009  opcode(0x8D);
6010  ins_encode( OpcP, RegMem(dst,mem));
6011  ins_pipe( ialu_reg_reg_fat );
6012%}
6013
6014instruct leaP32(eRegP dst, indOffset32 mem) %{
6015  match(Set dst mem);
6016
6017  ins_cost(110);
6018  format %{ "LEA    $dst,$mem" %}
6019  opcode(0x8D);
6020  ins_encode( OpcP, RegMem(dst,mem));
6021  ins_pipe( ialu_reg_reg_fat );
6022%}
6023
6024instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6025  match(Set dst mem);
6026
6027  ins_cost(110);
6028  format %{ "LEA    $dst,$mem" %}
6029  opcode(0x8D);
6030  ins_encode( OpcP, RegMem(dst,mem));
6031  ins_pipe( ialu_reg_reg_fat );
6032%}
6033
6034instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6035  match(Set dst mem);
6036
6037  ins_cost(110);
6038  format %{ "LEA    $dst,$mem" %}
6039  opcode(0x8D);
6040  ins_encode( OpcP, RegMem(dst,mem));
6041  ins_pipe( ialu_reg_reg_fat );
6042%}
6043
6044instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6045  match(Set dst mem);
6046
6047  ins_cost(110);
6048  format %{ "LEA    $dst,$mem" %}
6049  opcode(0x8D);
6050  ins_encode( OpcP, RegMem(dst,mem));
6051  ins_pipe( ialu_reg_reg_fat );
6052%}
6053
6054// Load Constant
6055instruct loadConI(rRegI dst, immI src) %{
6056  match(Set dst src);
6057
6058  format %{ "MOV    $dst,$src" %}
6059  ins_encode( LdImmI(dst, src) );
6060  ins_pipe( ialu_reg_fat );
6061%}
6062
6063// Load Constant zero
6064instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6065  match(Set dst src);
6066  effect(KILL cr);
6067
6068  ins_cost(50);
6069  format %{ "XOR    $dst,$dst" %}
6070  opcode(0x33);  /* + rd */
6071  ins_encode( OpcP, RegReg( dst, dst ) );
6072  ins_pipe( ialu_reg );
6073%}
6074
6075instruct loadConP(eRegP dst, immP src) %{
6076  match(Set dst src);
6077
6078  format %{ "MOV    $dst,$src" %}
6079  opcode(0xB8);  /* + rd */
6080  ins_encode( LdImmP(dst, src) );
6081  ins_pipe( ialu_reg_fat );
6082%}
6083
6084instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6085  match(Set dst src);
6086  effect(KILL cr);
6087  ins_cost(200);
6088  format %{ "MOV    $dst.lo,$src.lo\n\t"
6089            "MOV    $dst.hi,$src.hi" %}
6090  opcode(0xB8);
6091  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6092  ins_pipe( ialu_reg_long_fat );
6093%}
6094
6095instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6096  match(Set dst src);
6097  effect(KILL cr);
6098  ins_cost(150);
6099  format %{ "XOR    $dst.lo,$dst.lo\n\t"
6100            "XOR    $dst.hi,$dst.hi" %}
6101  opcode(0x33,0x33);
6102  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6103  ins_pipe( ialu_reg_long );
6104%}
6105
6106// The instruction usage is guarded by predicate in operand immFPR().
6107instruct loadConFPR(regFPR dst, immFPR con) %{
6108  match(Set dst con);
6109  ins_cost(125);
6110  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6111            "FSTP   $dst" %}
6112  ins_encode %{
6113    __ fld_s($constantaddress($con));
6114    __ fstp_d($dst$$reg);
6115  %}
6116  ins_pipe(fpu_reg_con);
6117%}
6118
6119// The instruction usage is guarded by predicate in operand immFPR0().
6120instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6121  match(Set dst con);
6122  ins_cost(125);
6123  format %{ "FLDZ   ST\n\t"
6124            "FSTP   $dst" %}
6125  ins_encode %{
6126    __ fldz();
6127    __ fstp_d($dst$$reg);
6128  %}
6129  ins_pipe(fpu_reg_con);
6130%}
6131
6132// The instruction usage is guarded by predicate in operand immFPR1().
6133instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6134  match(Set dst con);
6135  ins_cost(125);
6136  format %{ "FLD1   ST\n\t"
6137            "FSTP   $dst" %}
6138  ins_encode %{
6139    __ fld1();
6140    __ fstp_d($dst$$reg);
6141  %}
6142  ins_pipe(fpu_reg_con);
6143%}
6144
6145// The instruction usage is guarded by predicate in operand immF().
6146instruct loadConF(regF dst, immF con) %{
6147  match(Set dst con);
6148  ins_cost(125);
6149  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6150  ins_encode %{
6151    __ movflt($dst$$XMMRegister, $constantaddress($con));
6152  %}
6153  ins_pipe(pipe_slow);
6154%}
6155
6156// The instruction usage is guarded by predicate in operand immF0().
6157instruct loadConF0(regF dst, immF0 src) %{
6158  match(Set dst src);
6159  ins_cost(100);
6160  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6161  ins_encode %{
6162    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6163  %}
6164  ins_pipe(pipe_slow);
6165%}
6166
6167// The instruction usage is guarded by predicate in operand immDPR().
6168instruct loadConDPR(regDPR dst, immDPR con) %{
6169  match(Set dst con);
6170  ins_cost(125);
6171
6172  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6173            "FSTP   $dst" %}
6174  ins_encode %{
6175    __ fld_d($constantaddress($con));
6176    __ fstp_d($dst$$reg);
6177  %}
6178  ins_pipe(fpu_reg_con);
6179%}
6180
6181// The instruction usage is guarded by predicate in operand immDPR0().
6182instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6183  match(Set dst con);
6184  ins_cost(125);
6185
6186  format %{ "FLDZ   ST\n\t"
6187            "FSTP   $dst" %}
6188  ins_encode %{
6189    __ fldz();
6190    __ fstp_d($dst$$reg);
6191  %}
6192  ins_pipe(fpu_reg_con);
6193%}
6194
6195// The instruction usage is guarded by predicate in operand immDPR1().
6196instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6197  match(Set dst con);
6198  ins_cost(125);
6199
6200  format %{ "FLD1   ST\n\t"
6201            "FSTP   $dst" %}
6202  ins_encode %{
6203    __ fld1();
6204    __ fstp_d($dst$$reg);
6205  %}
6206  ins_pipe(fpu_reg_con);
6207%}
6208
6209// The instruction usage is guarded by predicate in operand immD().
6210instruct loadConD(regD dst, immD con) %{
6211  match(Set dst con);
6212  ins_cost(125);
6213  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6214  ins_encode %{
6215    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6216  %}
6217  ins_pipe(pipe_slow);
6218%}
6219
6220// The instruction usage is guarded by predicate in operand immD0().
6221instruct loadConD0(regD dst, immD0 src) %{
6222  match(Set dst src);
6223  ins_cost(100);
6224  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6225  ins_encode %{
6226    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6227  %}
6228  ins_pipe( pipe_slow );
6229%}
6230
6231// Load Stack Slot
6232instruct loadSSI(rRegI dst, stackSlotI src) %{
6233  match(Set dst src);
6234  ins_cost(125);
6235
6236  format %{ "MOV    $dst,$src" %}
6237  opcode(0x8B);
6238  ins_encode( OpcP, RegMem(dst,src));
6239  ins_pipe( ialu_reg_mem );
6240%}
6241
6242instruct loadSSL(eRegL dst, stackSlotL src) %{
6243  match(Set dst src);
6244
6245  ins_cost(200);
6246  format %{ "MOV    $dst,$src.lo\n\t"
6247            "MOV    $dst+4,$src.hi" %}
6248  opcode(0x8B, 0x8B);
6249  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6250  ins_pipe( ialu_mem_long_reg );
6251%}
6252
6253// Load Stack Slot
6254instruct loadSSP(eRegP dst, stackSlotP src) %{
6255  match(Set dst src);
6256  ins_cost(125);
6257
6258  format %{ "MOV    $dst,$src" %}
6259  opcode(0x8B);
6260  ins_encode( OpcP, RegMem(dst,src));
6261  ins_pipe( ialu_reg_mem );
6262%}
6263
6264// Load Stack Slot
6265instruct loadSSF(regFPR dst, stackSlotF src) %{
6266  match(Set dst src);
6267  ins_cost(125);
6268
6269  format %{ "FLD_S  $src\n\t"
6270            "FSTP   $dst" %}
6271  opcode(0xD9);               /* D9 /0, FLD m32real */
6272  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6273              Pop_Reg_FPR(dst) );
6274  ins_pipe( fpu_reg_mem );
6275%}
6276
6277// Load Stack Slot
6278instruct loadSSD(regDPR dst, stackSlotD src) %{
6279  match(Set dst src);
6280  ins_cost(125);
6281
6282  format %{ "FLD_D  $src\n\t"
6283            "FSTP   $dst" %}
6284  opcode(0xDD);               /* DD /0, FLD m64real */
6285  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6286              Pop_Reg_DPR(dst) );
6287  ins_pipe( fpu_reg_mem );
6288%}
6289
6290// Prefetch instructions for allocation.
6291// Must be safe to execute with invalid address (cannot fault).
6292
6293instruct prefetchAlloc0( memory mem ) %{
6294  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6295  match(PrefetchAllocation mem);
6296  ins_cost(0);
6297  size(0);
6298  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6299  ins_encode();
6300  ins_pipe(empty);
6301%}
6302
6303instruct prefetchAlloc( memory mem ) %{
6304  predicate(AllocatePrefetchInstr==3);
6305  match( PrefetchAllocation mem );
6306  ins_cost(100);
6307
6308  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6309  ins_encode %{
6310    __ prefetchw($mem$$Address);
6311  %}
6312  ins_pipe(ialu_mem);
6313%}
6314
6315instruct prefetchAllocNTA( memory mem ) %{
6316  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6317  match(PrefetchAllocation mem);
6318  ins_cost(100);
6319
6320  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6321  ins_encode %{
6322    __ prefetchnta($mem$$Address);
6323  %}
6324  ins_pipe(ialu_mem);
6325%}
6326
6327instruct prefetchAllocT0( memory mem ) %{
6328  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6329  match(PrefetchAllocation mem);
6330  ins_cost(100);
6331
6332  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6333  ins_encode %{
6334    __ prefetcht0($mem$$Address);
6335  %}
6336  ins_pipe(ialu_mem);
6337%}
6338
6339instruct prefetchAllocT2( memory mem ) %{
6340  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6341  match(PrefetchAllocation mem);
6342  ins_cost(100);
6343
6344  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6345  ins_encode %{
6346    __ prefetcht2($mem$$Address);
6347  %}
6348  ins_pipe(ialu_mem);
6349%}
6350
6351//----------Store Instructions-------------------------------------------------
6352
6353// Store Byte
6354instruct storeB(memory mem, xRegI src) %{
6355  match(Set mem (StoreB mem src));
6356
6357  ins_cost(125);
6358  format %{ "MOV8   $mem,$src" %}
6359  opcode(0x88);
6360  ins_encode( OpcP, RegMem( src, mem ) );
6361  ins_pipe( ialu_mem_reg );
6362%}
6363
6364// Store Char/Short
6365instruct storeC(memory mem, rRegI src) %{
6366  match(Set mem (StoreC mem src));
6367
6368  ins_cost(125);
6369  format %{ "MOV16  $mem,$src" %}
6370  opcode(0x89, 0x66);
6371  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6372  ins_pipe( ialu_mem_reg );
6373%}
6374
6375// Store Integer
6376instruct storeI(memory mem, rRegI src) %{
6377  match(Set mem (StoreI mem src));
6378
6379  ins_cost(125);
6380  format %{ "MOV    $mem,$src" %}
6381  opcode(0x89);
6382  ins_encode( OpcP, RegMem( src, mem ) );
6383  ins_pipe( ialu_mem_reg );
6384%}
6385
6386// Store Long
6387instruct storeL(long_memory mem, eRegL src) %{
6388  predicate(!((StoreLNode*)n)->require_atomic_access());
6389  match(Set mem (StoreL mem src));
6390
6391  ins_cost(200);
6392  format %{ "MOV    $mem,$src.lo\n\t"
6393            "MOV    $mem+4,$src.hi" %}
6394  opcode(0x89, 0x89);
6395  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6396  ins_pipe( ialu_mem_long_reg );
6397%}
6398
6399// Store Long to Integer
6400instruct storeL2I(memory mem, eRegL src) %{
6401  match(Set mem (StoreI mem (ConvL2I src)));
6402
6403  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6404  ins_encode %{
6405    __ movl($mem$$Address, $src$$Register);
6406  %}
6407  ins_pipe(ialu_mem_reg);
6408%}
6409
6410// Volatile Store Long.  Must be atomic, so move it into
6411// the FP TOS and then do a 64-bit FIST.  Has to probe the
6412// target address before the store (for null-ptr checks)
6413// so the memory operand is used twice in the encoding.
6414instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6415  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6416  match(Set mem (StoreL mem src));
6417  effect( KILL cr );
6418  ins_cost(400);
6419  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6420            "FILD   $src\n\t"
6421            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6422  opcode(0x3B);
6423  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6424  ins_pipe( fpu_reg_mem );
6425%}
6426
6427instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6428  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6429  match(Set mem (StoreL mem src));
6430  effect( TEMP tmp, KILL cr );
6431  ins_cost(380);
6432  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6433            "MOVSD  $tmp,$src\n\t"
6434            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6435  ins_encode %{
6436    __ cmpl(rax, $mem$$Address);
6437    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6438    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6439  %}
6440  ins_pipe( pipe_slow );
6441%}
6442
6443instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6444  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6445  match(Set mem (StoreL mem src));
6446  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6447  ins_cost(360);
6448  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6449            "MOVD   $tmp,$src.lo\n\t"
6450            "MOVD   $tmp2,$src.hi\n\t"
6451            "PUNPCKLDQ $tmp,$tmp2\n\t"
6452            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6453  ins_encode %{
6454    __ cmpl(rax, $mem$$Address);
6455    __ movdl($tmp$$XMMRegister, $src$$Register);
6456    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6457    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6458    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6459  %}
6460  ins_pipe( pipe_slow );
6461%}
6462
6463// Store Pointer; for storing unknown oops and raw pointers
6464instruct storeP(memory mem, anyRegP src) %{
6465  match(Set mem (StoreP mem src));
6466
6467  ins_cost(125);
6468  format %{ "MOV    $mem,$src" %}
6469  opcode(0x89);
6470  ins_encode( OpcP, RegMem( src, mem ) );
6471  ins_pipe( ialu_mem_reg );
6472%}
6473
6474// Store Integer Immediate
6475instruct storeImmI(memory mem, immI src) %{
6476  match(Set mem (StoreI mem src));
6477
6478  ins_cost(150);
6479  format %{ "MOV    $mem,$src" %}
6480  opcode(0xC7);               /* C7 /0 */
6481  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6482  ins_pipe( ialu_mem_imm );
6483%}
6484
6485// Store Short/Char Immediate
6486instruct storeImmI16(memory mem, immI16 src) %{
6487  predicate(UseStoreImmI16);
6488  match(Set mem (StoreC mem src));
6489
6490  ins_cost(150);
6491  format %{ "MOV16  $mem,$src" %}
6492  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6493  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6494  ins_pipe( ialu_mem_imm );
6495%}
6496
6497// Store Pointer Immediate; null pointers or constant oops that do not
6498// need card-mark barriers.
6499instruct storeImmP(memory mem, immP src) %{
6500  match(Set mem (StoreP mem src));
6501
6502  ins_cost(150);
6503  format %{ "MOV    $mem,$src" %}
6504  opcode(0xC7);               /* C7 /0 */
6505  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6506  ins_pipe( ialu_mem_imm );
6507%}
6508
6509// Store Byte Immediate
6510instruct storeImmB(memory mem, immI8 src) %{
6511  match(Set mem (StoreB mem src));
6512
6513  ins_cost(150);
6514  format %{ "MOV8   $mem,$src" %}
6515  opcode(0xC6);               /* C6 /0 */
6516  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6517  ins_pipe( ialu_mem_imm );
6518%}
6519
6520// Store CMS card-mark Immediate
6521instruct storeImmCM(memory mem, immI8 src) %{
6522  match(Set mem (StoreCM mem src));
6523
6524  ins_cost(150);
6525  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6526  opcode(0xC6);               /* C6 /0 */
6527  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6528  ins_pipe( ialu_mem_imm );
6529%}
6530
6531// Store Double
6532instruct storeDPR( memory mem, regDPR1 src) %{
6533  predicate(UseSSE<=1);
6534  match(Set mem (StoreD mem src));
6535
6536  ins_cost(100);
6537  format %{ "FST_D  $mem,$src" %}
6538  opcode(0xDD);       /* DD /2 */
6539  ins_encode( enc_FPR_store(mem,src) );
6540  ins_pipe( fpu_mem_reg );
6541%}
6542
6543// Store double does rounding on x86
6544instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6545  predicate(UseSSE<=1);
6546  match(Set mem (StoreD mem (RoundDouble src)));
6547
6548  ins_cost(100);
6549  format %{ "FST_D  $mem,$src\t# round" %}
6550  opcode(0xDD);       /* DD /2 */
6551  ins_encode( enc_FPR_store(mem,src) );
6552  ins_pipe( fpu_mem_reg );
6553%}
6554
6555// Store XMM register to memory (double-precision floating points)
6556// MOVSD instruction
6557instruct storeD(memory mem, regD src) %{
6558  predicate(UseSSE>=2);
6559  match(Set mem (StoreD mem src));
6560  ins_cost(95);
6561  format %{ "MOVSD  $mem,$src" %}
6562  ins_encode %{
6563    __ movdbl($mem$$Address, $src$$XMMRegister);
6564  %}
6565  ins_pipe( pipe_slow );
6566%}
6567
6568// Load Double
6569instruct MoveD2VL(vlRegD dst, regD src) %{
6570  match(Set dst src);
6571  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6572  ins_encode %{
6573    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6574  %}
6575  ins_pipe( fpu_reg_reg );
6576%}
6577
6578// Load Double
6579instruct MoveVL2D(regD dst, vlRegD src) %{
6580  match(Set dst src);
6581  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6582  ins_encode %{
6583    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6584  %}
6585  ins_pipe( fpu_reg_reg );
6586%}
6587
6588// Store XMM register to memory (single-precision floating point)
6589// MOVSS instruction
6590instruct storeF(memory mem, regF src) %{
6591  predicate(UseSSE>=1);
6592  match(Set mem (StoreF mem src));
6593  ins_cost(95);
6594  format %{ "MOVSS  $mem,$src" %}
6595  ins_encode %{
6596    __ movflt($mem$$Address, $src$$XMMRegister);
6597  %}
6598  ins_pipe( pipe_slow );
6599%}
6600
6601// Load Float
6602instruct MoveF2VL(vlRegF dst, regF src) %{
6603  match(Set dst src);
6604  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6605  ins_encode %{
6606    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6607  %}
6608  ins_pipe( fpu_reg_reg );
6609%}
6610
6611// Load Float
6612instruct MoveVL2F(regF dst, vlRegF src) %{
6613  match(Set dst src);
6614  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6615  ins_encode %{
6616    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6617  %}
6618  ins_pipe( fpu_reg_reg );
6619%}
6620
6621// Store Float
6622instruct storeFPR( memory mem, regFPR1 src) %{
6623  predicate(UseSSE==0);
6624  match(Set mem (StoreF mem src));
6625
6626  ins_cost(100);
6627  format %{ "FST_S  $mem,$src" %}
6628  opcode(0xD9);       /* D9 /2 */
6629  ins_encode( enc_FPR_store(mem,src) );
6630  ins_pipe( fpu_mem_reg );
6631%}
6632
6633// Store Float does rounding on x86
6634instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6635  predicate(UseSSE==0);
6636  match(Set mem (StoreF mem (RoundFloat src)));
6637
6638  ins_cost(100);
6639  format %{ "FST_S  $mem,$src\t# round" %}
6640  opcode(0xD9);       /* D9 /2 */
6641  ins_encode( enc_FPR_store(mem,src) );
6642  ins_pipe( fpu_mem_reg );
6643%}
6644
6645// Store Float does rounding on x86
6646instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6647  predicate(UseSSE<=1);
6648  match(Set mem (StoreF mem (ConvD2F src)));
6649
6650  ins_cost(100);
6651  format %{ "FST_S  $mem,$src\t# D-round" %}
6652  opcode(0xD9);       /* D9 /2 */
6653  ins_encode( enc_FPR_store(mem,src) );
6654  ins_pipe( fpu_mem_reg );
6655%}
6656
6657// Store immediate Float value (it is faster than store from FPU register)
6658// The instruction usage is guarded by predicate in operand immFPR().
6659instruct storeFPR_imm( memory mem, immFPR src) %{
6660  match(Set mem (StoreF mem src));
6661
6662  ins_cost(50);
6663  format %{ "MOV    $mem,$src\t# store float" %}
6664  opcode(0xC7);               /* C7 /0 */
6665  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6666  ins_pipe( ialu_mem_imm );
6667%}
6668
6669// Store immediate Float value (it is faster than store from XMM register)
6670// The instruction usage is guarded by predicate in operand immF().
6671instruct storeF_imm( memory mem, immF src) %{
6672  match(Set mem (StoreF mem src));
6673
6674  ins_cost(50);
6675  format %{ "MOV    $mem,$src\t# store float" %}
6676  opcode(0xC7);               /* C7 /0 */
6677  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6678  ins_pipe( ialu_mem_imm );
6679%}
6680
6681// Store Integer to stack slot
6682instruct storeSSI(stackSlotI dst, rRegI src) %{
6683  match(Set dst src);
6684
6685  ins_cost(100);
6686  format %{ "MOV    $dst,$src" %}
6687  opcode(0x89);
6688  ins_encode( OpcPRegSS( dst, src ) );
6689  ins_pipe( ialu_mem_reg );
6690%}
6691
6692// Store Integer to stack slot
6693instruct storeSSP(stackSlotP dst, eRegP src) %{
6694  match(Set dst src);
6695
6696  ins_cost(100);
6697  format %{ "MOV    $dst,$src" %}
6698  opcode(0x89);
6699  ins_encode( OpcPRegSS( dst, src ) );
6700  ins_pipe( ialu_mem_reg );
6701%}
6702
6703// Store Long to stack slot
6704instruct storeSSL(stackSlotL dst, eRegL src) %{
6705  match(Set dst src);
6706
6707  ins_cost(200);
6708  format %{ "MOV    $dst,$src.lo\n\t"
6709            "MOV    $dst+4,$src.hi" %}
6710  opcode(0x89, 0x89);
6711  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6712  ins_pipe( ialu_mem_long_reg );
6713%}
6714
6715//----------MemBar Instructions-----------------------------------------------
6716// Memory barrier flavors
6717
6718instruct membar_acquire() %{
6719  match(MemBarAcquire);
6720  match(LoadFence);
6721  ins_cost(400);
6722
6723  size(0);
6724  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6725  ins_encode();
6726  ins_pipe(empty);
6727%}
6728
6729instruct membar_acquire_lock() %{
6730  match(MemBarAcquireLock);
6731  ins_cost(0);
6732
6733  size(0);
6734  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6735  ins_encode( );
6736  ins_pipe(empty);
6737%}
6738
6739instruct membar_release() %{
6740  match(MemBarRelease);
6741  match(StoreFence);
6742  ins_cost(400);
6743
6744  size(0);
6745  format %{ "MEMBAR-release ! (empty encoding)" %}
6746  ins_encode( );
6747  ins_pipe(empty);
6748%}
6749
6750instruct membar_release_lock() %{
6751  match(MemBarReleaseLock);
6752  ins_cost(0);
6753
6754  size(0);
6755  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6756  ins_encode( );
6757  ins_pipe(empty);
6758%}
6759
6760instruct membar_volatile(eFlagsReg cr) %{
6761  match(MemBarVolatile);
6762  effect(KILL cr);
6763  ins_cost(400);
6764
6765  format %{
6766    $$template
6767    if (os::is_MP()) {
6768      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6769    } else {
6770      $$emit$$"MEMBAR-volatile ! (empty encoding)"
6771    }
6772  %}
6773  ins_encode %{
6774    __ membar(Assembler::StoreLoad);
6775  %}
6776  ins_pipe(pipe_slow);
6777%}
6778
6779instruct unnecessary_membar_volatile() %{
6780  match(MemBarVolatile);
6781  predicate(Matcher::post_store_load_barrier(n));
6782  ins_cost(0);
6783
6784  size(0);
6785  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6786  ins_encode( );
6787  ins_pipe(empty);
6788%}
6789
6790instruct membar_storestore() %{
6791  match(MemBarStoreStore);
6792  ins_cost(0);
6793
6794  size(0);
6795  format %{ "MEMBAR-storestore (empty encoding)" %}
6796  ins_encode( );
6797  ins_pipe(empty);
6798%}
6799
6800//----------Move Instructions--------------------------------------------------
6801instruct castX2P(eAXRegP dst, eAXRegI src) %{
6802  match(Set dst (CastX2P src));
6803  format %{ "# X2P  $dst, $src" %}
6804  ins_encode( /*empty encoding*/ );
6805  ins_cost(0);
6806  ins_pipe(empty);
6807%}
6808
6809instruct castP2X(rRegI dst, eRegP src ) %{
6810  match(Set dst (CastP2X src));
6811  ins_cost(50);
6812  format %{ "MOV    $dst, $src\t# CastP2X" %}
6813  ins_encode( enc_Copy( dst, src) );
6814  ins_pipe( ialu_reg_reg );
6815%}
6816
6817//----------Conditional Move---------------------------------------------------
6818// Conditional move
6819instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6820  predicate(!VM_Version::supports_cmov() );
6821  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6822  ins_cost(200);
6823  format %{ "J$cop,us skip\t# signed cmove\n\t"
6824            "MOV    $dst,$src\n"
6825      "skip:" %}
6826  ins_encode %{
6827    Label Lskip;
6828    // Invert sense of branch from sense of CMOV
6829    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6830    __ movl($dst$$Register, $src$$Register);
6831    __ bind(Lskip);
6832  %}
6833  ins_pipe( pipe_cmov_reg );
6834%}
6835
6836instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6837  predicate(!VM_Version::supports_cmov() );
6838  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6839  ins_cost(200);
6840  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6841            "MOV    $dst,$src\n"
6842      "skip:" %}
6843  ins_encode %{
6844    Label Lskip;
6845    // Invert sense of branch from sense of CMOV
6846    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6847    __ movl($dst$$Register, $src$$Register);
6848    __ bind(Lskip);
6849  %}
6850  ins_pipe( pipe_cmov_reg );
6851%}
6852
6853instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6854  predicate(VM_Version::supports_cmov() );
6855  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6856  ins_cost(200);
6857  format %{ "CMOV$cop $dst,$src" %}
6858  opcode(0x0F,0x40);
6859  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6860  ins_pipe( pipe_cmov_reg );
6861%}
6862
6863instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6864  predicate(VM_Version::supports_cmov() );
6865  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6866  ins_cost(200);
6867  format %{ "CMOV$cop $dst,$src" %}
6868  opcode(0x0F,0x40);
6869  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6870  ins_pipe( pipe_cmov_reg );
6871%}
6872
6873instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6874  predicate(VM_Version::supports_cmov() );
6875  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6876  ins_cost(200);
6877  expand %{
6878    cmovI_regU(cop, cr, dst, src);
6879  %}
6880%}
6881
6882// Conditional move
6883instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6884  predicate(VM_Version::supports_cmov() );
6885  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6886  ins_cost(250);
6887  format %{ "CMOV$cop $dst,$src" %}
6888  opcode(0x0F,0x40);
6889  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6890  ins_pipe( pipe_cmov_mem );
6891%}
6892
6893// Conditional move
6894instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6895  predicate(VM_Version::supports_cmov() );
6896  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6897  ins_cost(250);
6898  format %{ "CMOV$cop $dst,$src" %}
6899  opcode(0x0F,0x40);
6900  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6901  ins_pipe( pipe_cmov_mem );
6902%}
6903
6904instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6905  predicate(VM_Version::supports_cmov() );
6906  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6907  ins_cost(250);
6908  expand %{
6909    cmovI_memU(cop, cr, dst, src);
6910  %}
6911%}
6912
6913// Conditional move
6914instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6915  predicate(VM_Version::supports_cmov() );
6916  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6917  ins_cost(200);
6918  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6919  opcode(0x0F,0x40);
6920  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6921  ins_pipe( pipe_cmov_reg );
6922%}
6923
6924// Conditional move (non-P6 version)
6925// Note:  a CMoveP is generated for  stubs and native wrappers
6926//        regardless of whether we are on a P6, so we
6927//        emulate a cmov here
6928instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6929  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6930  ins_cost(300);
6931  format %{ "Jn$cop   skip\n\t"
6932          "MOV    $dst,$src\t# pointer\n"
6933      "skip:" %}
6934  opcode(0x8b);
6935  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6936  ins_pipe( pipe_cmov_reg );
6937%}
6938
6939// Conditional move
6940instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6941  predicate(VM_Version::supports_cmov() );
6942  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6943  ins_cost(200);
6944  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6945  opcode(0x0F,0x40);
6946  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6947  ins_pipe( pipe_cmov_reg );
6948%}
6949
6950instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6951  predicate(VM_Version::supports_cmov() );
6952  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6953  ins_cost(200);
6954  expand %{
6955    cmovP_regU(cop, cr, dst, src);
6956  %}
6957%}
6958
6959// DISABLED: Requires the ADLC to emit a bottom_type call that
6960// correctly meets the two pointer arguments; one is an incoming
6961// register but the other is a memory operand.  ALSO appears to
6962// be buggy with implicit null checks.
6963//
6964//// Conditional move
6965//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6966//  predicate(VM_Version::supports_cmov() );
6967//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6968//  ins_cost(250);
6969//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6970//  opcode(0x0F,0x40);
6971//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6972//  ins_pipe( pipe_cmov_mem );
6973//%}
6974//
6975//// Conditional move
6976//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6977//  predicate(VM_Version::supports_cmov() );
6978//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6979//  ins_cost(250);
6980//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6981//  opcode(0x0F,0x40);
6982//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6983//  ins_pipe( pipe_cmov_mem );
6984//%}
6985
6986// Conditional move
6987instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6988  predicate(UseSSE<=1);
6989  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6990  ins_cost(200);
6991  format %{ "FCMOV$cop $dst,$src\t# double" %}
6992  opcode(0xDA);
6993  ins_encode( enc_cmov_dpr(cop,src) );
6994  ins_pipe( pipe_cmovDPR_reg );
6995%}
6996
6997// Conditional move
6998instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6999  predicate(UseSSE==0);
7000  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7001  ins_cost(200);
7002  format %{ "FCMOV$cop $dst,$src\t# float" %}
7003  opcode(0xDA);
7004  ins_encode( enc_cmov_dpr(cop,src) );
7005  ins_pipe( pipe_cmovDPR_reg );
7006%}
7007
7008// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7009instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7010  predicate(UseSSE<=1);
7011  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7012  ins_cost(200);
7013  format %{ "Jn$cop   skip\n\t"
7014            "MOV    $dst,$src\t# double\n"
7015      "skip:" %}
7016  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7017  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7018  ins_pipe( pipe_cmovDPR_reg );
7019%}
7020
7021// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7022instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7023  predicate(UseSSE==0);
7024  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7025  ins_cost(200);
7026  format %{ "Jn$cop    skip\n\t"
7027            "MOV    $dst,$src\t# float\n"
7028      "skip:" %}
7029  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7030  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7031  ins_pipe( pipe_cmovDPR_reg );
7032%}
7033
7034// No CMOVE with SSE/SSE2
7035instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7036  predicate (UseSSE>=1);
7037  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7038  ins_cost(200);
7039  format %{ "Jn$cop   skip\n\t"
7040            "MOVSS  $dst,$src\t# float\n"
7041      "skip:" %}
7042  ins_encode %{
7043    Label skip;
7044    // Invert sense of branch from sense of CMOV
7045    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7046    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7047    __ bind(skip);
7048  %}
7049  ins_pipe( pipe_slow );
7050%}
7051
7052// No CMOVE with SSE/SSE2
7053instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7054  predicate (UseSSE>=2);
7055  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7056  ins_cost(200);
7057  format %{ "Jn$cop   skip\n\t"
7058            "MOVSD  $dst,$src\t# float\n"
7059      "skip:" %}
7060  ins_encode %{
7061    Label skip;
7062    // Invert sense of branch from sense of CMOV
7063    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7064    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7065    __ bind(skip);
7066  %}
7067  ins_pipe( pipe_slow );
7068%}
7069
7070// unsigned version
7071instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7072  predicate (UseSSE>=1);
7073  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7074  ins_cost(200);
7075  format %{ "Jn$cop   skip\n\t"
7076            "MOVSS  $dst,$src\t# float\n"
7077      "skip:" %}
7078  ins_encode %{
7079    Label skip;
7080    // Invert sense of branch from sense of CMOV
7081    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7082    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7083    __ bind(skip);
7084  %}
7085  ins_pipe( pipe_slow );
7086%}
7087
7088instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7089  predicate (UseSSE>=1);
7090  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7091  ins_cost(200);
7092  expand %{
7093    fcmovF_regU(cop, cr, dst, src);
7094  %}
7095%}
7096
7097// unsigned version
7098instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7099  predicate (UseSSE>=2);
7100  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7101  ins_cost(200);
7102  format %{ "Jn$cop   skip\n\t"
7103            "MOVSD  $dst,$src\t# float\n"
7104      "skip:" %}
7105  ins_encode %{
7106    Label skip;
7107    // Invert sense of branch from sense of CMOV
7108    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7109    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7110    __ bind(skip);
7111  %}
7112  ins_pipe( pipe_slow );
7113%}
7114
7115instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7116  predicate (UseSSE>=2);
7117  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7118  ins_cost(200);
7119  expand %{
7120    fcmovD_regU(cop, cr, dst, src);
7121  %}
7122%}
7123
7124instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7125  predicate(VM_Version::supports_cmov() );
7126  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7127  ins_cost(200);
7128  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7129            "CMOV$cop $dst.hi,$src.hi" %}
7130  opcode(0x0F,0x40);
7131  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7132  ins_pipe( pipe_cmov_reg_long );
7133%}
7134
7135instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7136  predicate(VM_Version::supports_cmov() );
7137  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7138  ins_cost(200);
7139  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7140            "CMOV$cop $dst.hi,$src.hi" %}
7141  opcode(0x0F,0x40);
7142  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7143  ins_pipe( pipe_cmov_reg_long );
7144%}
7145
7146instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7147  predicate(VM_Version::supports_cmov() );
7148  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7149  ins_cost(200);
7150  expand %{
7151    cmovL_regU(cop, cr, dst, src);
7152  %}
7153%}
7154
7155//----------Arithmetic Instructions--------------------------------------------
7156//----------Addition Instructions----------------------------------------------
7157
7158// Integer Addition Instructions
7159instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7160  match(Set dst (AddI dst src));
7161  effect(KILL cr);
7162
7163  size(2);
7164  format %{ "ADD    $dst,$src" %}
7165  opcode(0x03);
7166  ins_encode( OpcP, RegReg( dst, src) );
7167  ins_pipe( ialu_reg_reg );
7168%}
7169
7170instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7171  match(Set dst (AddI dst src));
7172  effect(KILL cr);
7173
7174  format %{ "ADD    $dst,$src" %}
7175  opcode(0x81, 0x00); /* /0 id */
7176  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7177  ins_pipe( ialu_reg );
7178%}
7179
7180instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7181  predicate(UseIncDec);
7182  match(Set dst (AddI dst src));
7183  effect(KILL cr);
7184
7185  size(1);
7186  format %{ "INC    $dst" %}
7187  opcode(0x40); /*  */
7188  ins_encode( Opc_plus( primary, dst ) );
7189  ins_pipe( ialu_reg );
7190%}
7191
7192instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7193  match(Set dst (AddI src0 src1));
7194  ins_cost(110);
7195
7196  format %{ "LEA    $dst,[$src0 + $src1]" %}
7197  opcode(0x8D); /* 0x8D /r */
7198  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7199  ins_pipe( ialu_reg_reg );
7200%}
7201
7202instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7203  match(Set dst (AddP src0 src1));
7204  ins_cost(110);
7205
7206  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7207  opcode(0x8D); /* 0x8D /r */
7208  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7209  ins_pipe( ialu_reg_reg );
7210%}
7211
7212instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7213  predicate(UseIncDec);
7214  match(Set dst (AddI dst src));
7215  effect(KILL cr);
7216
7217  size(1);
7218  format %{ "DEC    $dst" %}
7219  opcode(0x48); /*  */
7220  ins_encode( Opc_plus( primary, dst ) );
7221  ins_pipe( ialu_reg );
7222%}
7223
7224instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7225  match(Set dst (AddP dst src));
7226  effect(KILL cr);
7227
7228  size(2);
7229  format %{ "ADD    $dst,$src" %}
7230  opcode(0x03);
7231  ins_encode( OpcP, RegReg( dst, src) );
7232  ins_pipe( ialu_reg_reg );
7233%}
7234
7235instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7236  match(Set dst (AddP dst src));
7237  effect(KILL cr);
7238
7239  format %{ "ADD    $dst,$src" %}
7240  opcode(0x81,0x00); /* Opcode 81 /0 id */
7241  // ins_encode( RegImm( dst, src) );
7242  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7243  ins_pipe( ialu_reg );
7244%}
7245
7246instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7247  match(Set dst (AddI dst (LoadI src)));
7248  effect(KILL cr);
7249
7250  ins_cost(125);
7251  format %{ "ADD    $dst,$src" %}
7252  opcode(0x03);
7253  ins_encode( OpcP, RegMem( dst, src) );
7254  ins_pipe( ialu_reg_mem );
7255%}
7256
7257instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7258  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7259  effect(KILL cr);
7260
7261  ins_cost(150);
7262  format %{ "ADD    $dst,$src" %}
7263  opcode(0x01);  /* Opcode 01 /r */
7264  ins_encode( OpcP, RegMem( src, dst ) );
7265  ins_pipe( ialu_mem_reg );
7266%}
7267
7268// Add Memory with Immediate
7269instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7270  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7271  effect(KILL cr);
7272
7273  ins_cost(125);
7274  format %{ "ADD    $dst,$src" %}
7275  opcode(0x81);               /* Opcode 81 /0 id */
7276  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7277  ins_pipe( ialu_mem_imm );
7278%}
7279
7280instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7281  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7282  effect(KILL cr);
7283
7284  ins_cost(125);
7285  format %{ "INC    $dst" %}
7286  opcode(0xFF);               /* Opcode FF /0 */
7287  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7288  ins_pipe( ialu_mem_imm );
7289%}
7290
7291instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7292  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7293  effect(KILL cr);
7294
7295  ins_cost(125);
7296  format %{ "DEC    $dst" %}
7297  opcode(0xFF);               /* Opcode FF /1 */
7298  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7299  ins_pipe( ialu_mem_imm );
7300%}
7301
7302
7303instruct checkCastPP( eRegP dst ) %{
7304  match(Set dst (CheckCastPP dst));
7305
7306  size(0);
7307  format %{ "#checkcastPP of $dst" %}
7308  ins_encode( /*empty encoding*/ );
7309  ins_pipe( empty );
7310%}
7311
7312instruct castPP( eRegP dst ) %{
7313  match(Set dst (CastPP dst));
7314  format %{ "#castPP of $dst" %}
7315  ins_encode( /*empty encoding*/ );
7316  ins_pipe( empty );
7317%}
7318
7319instruct castII( rRegI dst ) %{
7320  match(Set dst (CastII dst));
7321  format %{ "#castII of $dst" %}
7322  ins_encode( /*empty encoding*/ );
7323  ins_cost(0);
7324  ins_pipe( empty );
7325%}
7326
7327// Load-locked - same as a regular pointer load when used with compare-swap
7328instruct loadPLocked(eRegP dst, memory mem) %{
7329  match(Set dst (LoadPLocked mem));
7330
7331  ins_cost(125);
7332  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7333  opcode(0x8B);
7334  ins_encode( OpcP, RegMem(dst,mem));
7335  ins_pipe( ialu_reg_mem );
7336%}
7337
7338// Conditional-store of the updated heap-top.
7339// Used during allocation of the shared heap.
7340// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7341instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7342  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7343  // EAX is killed if there is contention, but then it's also unused.
7344  // In the common case of no contention, EAX holds the new oop address.
7345  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7346  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7347  ins_pipe( pipe_cmpxchg );
7348%}
7349
7350// Conditional-store of an int value.
7351// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7352instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7353  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7354  effect(KILL oldval);
7355  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7356  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7357  ins_pipe( pipe_cmpxchg );
7358%}
7359
7360// Conditional-store of a long value.
7361// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7362instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7363  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7364  effect(KILL oldval);
7365  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7366            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7367            "XCHG   EBX,ECX"
7368  %}
7369  ins_encode %{
7370    // Note: we need to swap rbx, and rcx before and after the
7371    //       cmpxchg8 instruction because the instruction uses
7372    //       rcx as the high order word of the new value to store but
7373    //       our register encoding uses rbx.
7374    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7375    if( os::is_MP() )
7376      __ lock();
7377    __ cmpxchg8($mem$$Address);
7378    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7379  %}
7380  ins_pipe( pipe_cmpxchg );
7381%}
7382
7383// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7384
7385instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7386  predicate(VM_Version::supports_cx8());
7387  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7388  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7389  effect(KILL cr, KILL oldval);
7390  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7391            "MOV    $res,0\n\t"
7392            "JNE,s  fail\n\t"
7393            "MOV    $res,1\n"
7394          "fail:" %}
7395  ins_encode( enc_cmpxchg8(mem_ptr),
7396              enc_flags_ne_to_boolean(res) );
7397  ins_pipe( pipe_cmpxchg );
7398%}
7399
7400instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7401  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7402  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7403  effect(KILL cr, KILL oldval);
7404  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7405            "MOV    $res,0\n\t"
7406            "JNE,s  fail\n\t"
7407            "MOV    $res,1\n"
7408          "fail:" %}
7409  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7410  ins_pipe( pipe_cmpxchg );
7411%}
7412
7413instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7414  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7415  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7416  effect(KILL cr, KILL oldval);
7417  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7418            "MOV    $res,0\n\t"
7419            "JNE,s  fail\n\t"
7420            "MOV    $res,1\n"
7421          "fail:" %}
7422  ins_encode( enc_cmpxchgb(mem_ptr),
7423              enc_flags_ne_to_boolean(res) );
7424  ins_pipe( pipe_cmpxchg );
7425%}
7426
7427instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7428  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7429  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7430  effect(KILL cr, KILL oldval);
7431  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7432            "MOV    $res,0\n\t"
7433            "JNE,s  fail\n\t"
7434            "MOV    $res,1\n"
7435          "fail:" %}
7436  ins_encode( enc_cmpxchgw(mem_ptr),
7437              enc_flags_ne_to_boolean(res) );
7438  ins_pipe( pipe_cmpxchg );
7439%}
7440
7441instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7442  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7443  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7444  effect(KILL cr, KILL oldval);
7445  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7446            "MOV    $res,0\n\t"
7447            "JNE,s  fail\n\t"
7448            "MOV    $res,1\n"
7449          "fail:" %}
7450  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7451  ins_pipe( pipe_cmpxchg );
7452%}
7453
7454instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7455  predicate(VM_Version::supports_cx8());
7456  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7457  effect(KILL cr);
7458  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7459  ins_encode( enc_cmpxchg8(mem_ptr) );
7460  ins_pipe( pipe_cmpxchg );
7461%}
7462
7463instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7464  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7465  effect(KILL cr);
7466  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7467  ins_encode( enc_cmpxchg(mem_ptr) );
7468  ins_pipe( pipe_cmpxchg );
7469%}
7470
7471instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7472  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7473  effect(KILL cr);
7474  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7475  ins_encode( enc_cmpxchgb(mem_ptr) );
7476  ins_pipe( pipe_cmpxchg );
7477%}
7478
7479instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7480  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7481  effect(KILL cr);
7482  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7483  ins_encode( enc_cmpxchgw(mem_ptr) );
7484  ins_pipe( pipe_cmpxchg );
7485%}
7486
7487instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7488  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7489  effect(KILL cr);
7490  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7491  ins_encode( enc_cmpxchg(mem_ptr) );
7492  ins_pipe( pipe_cmpxchg );
7493%}
7494
7495instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7496  predicate(n->as_LoadStore()->result_not_used());
7497  match(Set dummy (GetAndAddB mem add));
7498  effect(KILL cr);
7499  format %{ "ADDB  [$mem],$add" %}
7500  ins_encode %{
7501    if (os::is_MP()) { __ lock(); }
7502    __ addb($mem$$Address, $add$$constant);
7503  %}
7504  ins_pipe( pipe_cmpxchg );
7505%}
7506
7507// Important to match to xRegI: only 8-bit regs.
7508instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7509  match(Set newval (GetAndAddB mem newval));
7510  effect(KILL cr);
7511  format %{ "XADDB  [$mem],$newval" %}
7512  ins_encode %{
7513    if (os::is_MP()) { __ lock(); }
7514    __ xaddb($mem$$Address, $newval$$Register);
7515  %}
7516  ins_pipe( pipe_cmpxchg );
7517%}
7518
7519instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7520  predicate(n->as_LoadStore()->result_not_used());
7521  match(Set dummy (GetAndAddS mem add));
7522  effect(KILL cr);
7523  format %{ "ADDS  [$mem],$add" %}
7524  ins_encode %{
7525    if (os::is_MP()) { __ lock(); }
7526    __ addw($mem$$Address, $add$$constant);
7527  %}
7528  ins_pipe( pipe_cmpxchg );
7529%}
7530
7531instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7532  match(Set newval (GetAndAddS mem newval));
7533  effect(KILL cr);
7534  format %{ "XADDS  [$mem],$newval" %}
7535  ins_encode %{
7536    if (os::is_MP()) { __ lock(); }
7537    __ xaddw($mem$$Address, $newval$$Register);
7538  %}
7539  ins_pipe( pipe_cmpxchg );
7540%}
7541
7542instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7543  predicate(n->as_LoadStore()->result_not_used());
7544  match(Set dummy (GetAndAddI mem add));
7545  effect(KILL cr);
7546  format %{ "ADDL  [$mem],$add" %}
7547  ins_encode %{
7548    if (os::is_MP()) { __ lock(); }
7549    __ addl($mem$$Address, $add$$constant);
7550  %}
7551  ins_pipe( pipe_cmpxchg );
7552%}
7553
7554instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7555  match(Set newval (GetAndAddI mem newval));
7556  effect(KILL cr);
7557  format %{ "XADDL  [$mem],$newval" %}
7558  ins_encode %{
7559    if (os::is_MP()) { __ lock(); }
7560    __ xaddl($mem$$Address, $newval$$Register);
7561  %}
7562  ins_pipe( pipe_cmpxchg );
7563%}
7564
7565// Important to match to xRegI: only 8-bit regs.
7566instruct xchgB( memory mem, xRegI newval) %{
7567  match(Set newval (GetAndSetB mem newval));
7568  format %{ "XCHGB  $newval,[$mem]" %}
7569  ins_encode %{
7570    __ xchgb($newval$$Register, $mem$$Address);
7571  %}
7572  ins_pipe( pipe_cmpxchg );
7573%}
7574
7575instruct xchgS( memory mem, rRegI newval) %{
7576  match(Set newval (GetAndSetS mem newval));
7577  format %{ "XCHGW  $newval,[$mem]" %}
7578  ins_encode %{
7579    __ xchgw($newval$$Register, $mem$$Address);
7580  %}
7581  ins_pipe( pipe_cmpxchg );
7582%}
7583
7584instruct xchgI( memory mem, rRegI newval) %{
7585  match(Set newval (GetAndSetI mem newval));
7586  format %{ "XCHGL  $newval,[$mem]" %}
7587  ins_encode %{
7588    __ xchgl($newval$$Register, $mem$$Address);
7589  %}
7590  ins_pipe( pipe_cmpxchg );
7591%}
7592
7593instruct xchgP( memory mem, pRegP newval) %{
7594  match(Set newval (GetAndSetP mem newval));
7595  format %{ "XCHGL  $newval,[$mem]" %}
7596  ins_encode %{
7597    __ xchgl($newval$$Register, $mem$$Address);
7598  %}
7599  ins_pipe( pipe_cmpxchg );
7600%}
7601
7602//----------Subtraction Instructions-------------------------------------------
7603
7604// Integer Subtraction Instructions
7605instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7606  match(Set dst (SubI dst src));
7607  effect(KILL cr);
7608
7609  size(2);
7610  format %{ "SUB    $dst,$src" %}
7611  opcode(0x2B);
7612  ins_encode( OpcP, RegReg( dst, src) );
7613  ins_pipe( ialu_reg_reg );
7614%}
7615
7616instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7617  match(Set dst (SubI dst src));
7618  effect(KILL cr);
7619
7620  format %{ "SUB    $dst,$src" %}
7621  opcode(0x81,0x05);  /* Opcode 81 /5 */
7622  // ins_encode( RegImm( dst, src) );
7623  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7624  ins_pipe( ialu_reg );
7625%}
7626
7627instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7628  match(Set dst (SubI dst (LoadI src)));
7629  effect(KILL cr);
7630
7631  ins_cost(125);
7632  format %{ "SUB    $dst,$src" %}
7633  opcode(0x2B);
7634  ins_encode( OpcP, RegMem( dst, src) );
7635  ins_pipe( ialu_reg_mem );
7636%}
7637
7638instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7639  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7640  effect(KILL cr);
7641
7642  ins_cost(150);
7643  format %{ "SUB    $dst,$src" %}
7644  opcode(0x29);  /* Opcode 29 /r */
7645  ins_encode( OpcP, RegMem( src, dst ) );
7646  ins_pipe( ialu_mem_reg );
7647%}
7648
7649// Subtract from a pointer
7650instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7651  match(Set dst (AddP dst (SubI zero src)));
7652  effect(KILL cr);
7653
7654  size(2);
7655  format %{ "SUB    $dst,$src" %}
7656  opcode(0x2B);
7657  ins_encode( OpcP, RegReg( dst, src) );
7658  ins_pipe( ialu_reg_reg );
7659%}
7660
7661instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7662  match(Set dst (SubI zero dst));
7663  effect(KILL cr);
7664
7665  size(2);
7666  format %{ "NEG    $dst" %}
7667  opcode(0xF7,0x03);  // Opcode F7 /3
7668  ins_encode( OpcP, RegOpc( dst ) );
7669  ins_pipe( ialu_reg );
7670%}
7671
7672//----------Multiplication/Division Instructions-------------------------------
7673// Integer Multiplication Instructions
7674// Multiply Register
7675instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7676  match(Set dst (MulI dst src));
7677  effect(KILL cr);
7678
7679  size(3);
7680  ins_cost(300);
7681  format %{ "IMUL   $dst,$src" %}
7682  opcode(0xAF, 0x0F);
7683  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7684  ins_pipe( ialu_reg_reg_alu0 );
7685%}
7686
7687// Multiply 32-bit Immediate
7688instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7689  match(Set dst (MulI src imm));
7690  effect(KILL cr);
7691
7692  ins_cost(300);
7693  format %{ "IMUL   $dst,$src,$imm" %}
7694  opcode(0x69);  /* 69 /r id */
7695  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7696  ins_pipe( ialu_reg_reg_alu0 );
7697%}
7698
7699instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7700  match(Set dst src);
7701  effect(KILL cr);
7702
7703  // Note that this is artificially increased to make it more expensive than loadConL
7704  ins_cost(250);
7705  format %{ "MOV    EAX,$src\t// low word only" %}
7706  opcode(0xB8);
7707  ins_encode( LdImmL_Lo(dst, src) );
7708  ins_pipe( ialu_reg_fat );
7709%}
7710
7711// Multiply by 32-bit Immediate, taking the shifted high order results
7712//  (special case for shift by 32)
7713instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7714  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7715  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7716             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7717             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7718  effect(USE src1, KILL cr);
7719
7720  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7721  ins_cost(0*100 + 1*400 - 150);
7722  format %{ "IMUL   EDX:EAX,$src1" %}
7723  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7724  ins_pipe( pipe_slow );
7725%}
7726
7727// Multiply by 32-bit Immediate, taking the shifted high order results
7728instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7729  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7730  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7731             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7732             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7733  effect(USE src1, KILL cr);
7734
7735  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7736  ins_cost(1*100 + 1*400 - 150);
7737  format %{ "IMUL   EDX:EAX,$src1\n\t"
7738            "SAR    EDX,$cnt-32" %}
7739  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7740  ins_pipe( pipe_slow );
7741%}
7742
7743// Multiply Memory 32-bit Immediate
7744instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7745  match(Set dst (MulI (LoadI src) imm));
7746  effect(KILL cr);
7747
7748  ins_cost(300);
7749  format %{ "IMUL   $dst,$src,$imm" %}
7750  opcode(0x69);  /* 69 /r id */
7751  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7752  ins_pipe( ialu_reg_mem_alu0 );
7753%}
7754
7755// Multiply Memory
7756instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7757  match(Set dst (MulI dst (LoadI src)));
7758  effect(KILL cr);
7759
7760  ins_cost(350);
7761  format %{ "IMUL   $dst,$src" %}
7762  opcode(0xAF, 0x0F);
7763  ins_encode( OpcS, OpcP, RegMem( dst, src) );
7764  ins_pipe( ialu_reg_mem_alu0 );
7765%}
7766
7767// Multiply Register Int to Long
7768instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7769  // Basic Idea: long = (long)int * (long)int
7770  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7771  effect(DEF dst, USE src, USE src1, KILL flags);
7772
7773  ins_cost(300);
7774  format %{ "IMUL   $dst,$src1" %}
7775
7776  ins_encode( long_int_multiply( dst, src1 ) );
7777  ins_pipe( ialu_reg_reg_alu0 );
7778%}
7779
7780instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7781  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7782  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7783  effect(KILL flags);
7784
7785  ins_cost(300);
7786  format %{ "MUL    $dst,$src1" %}
7787
7788  ins_encode( long_uint_multiply(dst, src1) );
7789  ins_pipe( ialu_reg_reg_alu0 );
7790%}
7791
7792// Multiply Register Long
7793instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7794  match(Set dst (MulL dst src));
7795  effect(KILL cr, TEMP tmp);
7796  ins_cost(4*100+3*400);
7797// Basic idea: lo(result) = lo(x_lo * y_lo)
7798//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7799  format %{ "MOV    $tmp,$src.lo\n\t"
7800            "IMUL   $tmp,EDX\n\t"
7801            "MOV    EDX,$src.hi\n\t"
7802            "IMUL   EDX,EAX\n\t"
7803            "ADD    $tmp,EDX\n\t"
7804            "MUL    EDX:EAX,$src.lo\n\t"
7805            "ADD    EDX,$tmp" %}
7806  ins_encode( long_multiply( dst, src, tmp ) );
7807  ins_pipe( pipe_slow );
7808%}
7809
7810// Multiply Register Long where the left operand's high 32 bits are zero
7811instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7812  predicate(is_operand_hi32_zero(n->in(1)));
7813  match(Set dst (MulL dst src));
7814  effect(KILL cr, TEMP tmp);
7815  ins_cost(2*100+2*400);
7816// Basic idea: lo(result) = lo(x_lo * y_lo)
7817//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7818  format %{ "MOV    $tmp,$src.hi\n\t"
7819            "IMUL   $tmp,EAX\n\t"
7820            "MUL    EDX:EAX,$src.lo\n\t"
7821            "ADD    EDX,$tmp" %}
7822  ins_encode %{
7823    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7824    __ imull($tmp$$Register, rax);
7825    __ mull($src$$Register);
7826    __ addl(rdx, $tmp$$Register);
7827  %}
7828  ins_pipe( pipe_slow );
7829%}
7830
7831// Multiply Register Long where the right operand's high 32 bits are zero
7832instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7833  predicate(is_operand_hi32_zero(n->in(2)));
7834  match(Set dst (MulL dst src));
7835  effect(KILL cr, TEMP tmp);
7836  ins_cost(2*100+2*400);
7837// Basic idea: lo(result) = lo(x_lo * y_lo)
7838//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7839  format %{ "MOV    $tmp,$src.lo\n\t"
7840            "IMUL   $tmp,EDX\n\t"
7841            "MUL    EDX:EAX,$src.lo\n\t"
7842            "ADD    EDX,$tmp" %}
7843  ins_encode %{
7844    __ movl($tmp$$Register, $src$$Register);
7845    __ imull($tmp$$Register, rdx);
7846    __ mull($src$$Register);
7847    __ addl(rdx, $tmp$$Register);
7848  %}
7849  ins_pipe( pipe_slow );
7850%}
7851
7852// Multiply Register Long where the left and the right operands' high 32 bits are zero
7853instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7854  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7855  match(Set dst (MulL dst src));
7856  effect(KILL cr);
7857  ins_cost(1*400);
7858// Basic idea: lo(result) = lo(x_lo * y_lo)
7859//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7860  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7861  ins_encode %{
7862    __ mull($src$$Register);
7863  %}
7864  ins_pipe( pipe_slow );
7865%}
7866
7867// Multiply Register Long by small constant
7868instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7869  match(Set dst (MulL dst src));
7870  effect(KILL cr, TEMP tmp);
7871  ins_cost(2*100+2*400);
7872  size(12);
7873// Basic idea: lo(result) = lo(src * EAX)
7874//             hi(result) = hi(src * EAX) + lo(src * EDX)
7875  format %{ "IMUL   $tmp,EDX,$src\n\t"
7876            "MOV    EDX,$src\n\t"
7877            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7878            "ADD    EDX,$tmp" %}
7879  ins_encode( long_multiply_con( dst, src, tmp ) );
7880  ins_pipe( pipe_slow );
7881%}
7882
7883// Integer DIV with Register
7884instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7885  match(Set rax (DivI rax div));
7886  effect(KILL rdx, KILL cr);
7887  size(26);
7888  ins_cost(30*100+10*100);
7889  format %{ "CMP    EAX,0x80000000\n\t"
7890            "JNE,s  normal\n\t"
7891            "XOR    EDX,EDX\n\t"
7892            "CMP    ECX,-1\n\t"
7893            "JE,s   done\n"
7894    "normal: CDQ\n\t"
7895            "IDIV   $div\n\t"
7896    "done:"        %}
7897  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7898  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7899  ins_pipe( ialu_reg_reg_alu0 );
7900%}
7901
7902// Divide Register Long
7903instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7904  match(Set dst (DivL src1 src2));
7905  effect( KILL cr, KILL cx, KILL bx );
7906  ins_cost(10000);
7907  format %{ "PUSH   $src1.hi\n\t"
7908            "PUSH   $src1.lo\n\t"
7909            "PUSH   $src2.hi\n\t"
7910            "PUSH   $src2.lo\n\t"
7911            "CALL   SharedRuntime::ldiv\n\t"
7912            "ADD    ESP,16" %}
7913  ins_encode( long_div(src1,src2) );
7914  ins_pipe( pipe_slow );
7915%}
7916
7917// Integer DIVMOD with Register, both quotient and mod results
7918instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7919  match(DivModI rax div);
7920  effect(KILL cr);
7921  size(26);
7922  ins_cost(30*100+10*100);
7923  format %{ "CMP    EAX,0x80000000\n\t"
7924            "JNE,s  normal\n\t"
7925            "XOR    EDX,EDX\n\t"
7926            "CMP    ECX,-1\n\t"
7927            "JE,s   done\n"
7928    "normal: CDQ\n\t"
7929            "IDIV   $div\n\t"
7930    "done:"        %}
7931  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7932  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7933  ins_pipe( pipe_slow );
7934%}
7935
7936// Integer MOD with Register
7937instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7938  match(Set rdx (ModI rax div));
7939  effect(KILL rax, KILL cr);
7940
7941  size(26);
7942  ins_cost(300);
7943  format %{ "CDQ\n\t"
7944            "IDIV   $div" %}
7945  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7946  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7947  ins_pipe( ialu_reg_reg_alu0 );
7948%}
7949
7950// Remainder Register Long
7951instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7952  match(Set dst (ModL src1 src2));
7953  effect( KILL cr, KILL cx, KILL bx );
7954  ins_cost(10000);
7955  format %{ "PUSH   $src1.hi\n\t"
7956            "PUSH   $src1.lo\n\t"
7957            "PUSH   $src2.hi\n\t"
7958            "PUSH   $src2.lo\n\t"
7959            "CALL   SharedRuntime::lrem\n\t"
7960            "ADD    ESP,16" %}
7961  ins_encode( long_mod(src1,src2) );
7962  ins_pipe( pipe_slow );
7963%}
7964
7965// Divide Register Long (no special case since divisor != -1)
7966instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7967  match(Set dst (DivL dst imm));
7968  effect( TEMP tmp, TEMP tmp2, KILL cr );
7969  ins_cost(1000);
7970  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7971            "XOR    $tmp2,$tmp2\n\t"
7972            "CMP    $tmp,EDX\n\t"
7973            "JA,s   fast\n\t"
7974            "MOV    $tmp2,EAX\n\t"
7975            "MOV    EAX,EDX\n\t"
7976            "MOV    EDX,0\n\t"
7977            "JLE,s  pos\n\t"
7978            "LNEG   EAX : $tmp2\n\t"
7979            "DIV    $tmp # unsigned division\n\t"
7980            "XCHG   EAX,$tmp2\n\t"
7981            "DIV    $tmp\n\t"
7982            "LNEG   $tmp2 : EAX\n\t"
7983            "JMP,s  done\n"
7984    "pos:\n\t"
7985            "DIV    $tmp\n\t"
7986            "XCHG   EAX,$tmp2\n"
7987    "fast:\n\t"
7988            "DIV    $tmp\n"
7989    "done:\n\t"
7990            "MOV    EDX,$tmp2\n\t"
7991            "NEG    EDX:EAX # if $imm < 0" %}
7992  ins_encode %{
7993    int con = (int)$imm$$constant;
7994    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7995    int pcon = (con > 0) ? con : -con;
7996    Label Lfast, Lpos, Ldone;
7997
7998    __ movl($tmp$$Register, pcon);
7999    __ xorl($tmp2$$Register,$tmp2$$Register);
8000    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8001    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8002
8003    __ movl($tmp2$$Register, $dst$$Register); // save
8004    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8005    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8006    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8007
8008    // Negative dividend.
8009    // convert value to positive to use unsigned division
8010    __ lneg($dst$$Register, $tmp2$$Register);
8011    __ divl($tmp$$Register);
8012    __ xchgl($dst$$Register, $tmp2$$Register);
8013    __ divl($tmp$$Register);
8014    // revert result back to negative
8015    __ lneg($tmp2$$Register, $dst$$Register);
8016    __ jmpb(Ldone);
8017
8018    __ bind(Lpos);
8019    __ divl($tmp$$Register); // Use unsigned division
8020    __ xchgl($dst$$Register, $tmp2$$Register);
8021    // Fallthrow for final divide, tmp2 has 32 bit hi result
8022
8023    __ bind(Lfast);
8024    // fast path: src is positive
8025    __ divl($tmp$$Register); // Use unsigned division
8026
8027    __ bind(Ldone);
8028    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8029    if (con < 0) {
8030      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8031    }
8032  %}
8033  ins_pipe( pipe_slow );
8034%}
8035
8036// Remainder Register Long (remainder fit into 32 bits)
8037instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8038  match(Set dst (ModL dst imm));
8039  effect( TEMP tmp, TEMP tmp2, KILL cr );
8040  ins_cost(1000);
8041  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8042            "CMP    $tmp,EDX\n\t"
8043            "JA,s   fast\n\t"
8044            "MOV    $tmp2,EAX\n\t"
8045            "MOV    EAX,EDX\n\t"
8046            "MOV    EDX,0\n\t"
8047            "JLE,s  pos\n\t"
8048            "LNEG   EAX : $tmp2\n\t"
8049            "DIV    $tmp # unsigned division\n\t"
8050            "MOV    EAX,$tmp2\n\t"
8051            "DIV    $tmp\n\t"
8052            "NEG    EDX\n\t"
8053            "JMP,s  done\n"
8054    "pos:\n\t"
8055            "DIV    $tmp\n\t"
8056            "MOV    EAX,$tmp2\n"
8057    "fast:\n\t"
8058            "DIV    $tmp\n"
8059    "done:\n\t"
8060            "MOV    EAX,EDX\n\t"
8061            "SAR    EDX,31\n\t" %}
8062  ins_encode %{
8063    int con = (int)$imm$$constant;
8064    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8065    int pcon = (con > 0) ? con : -con;
8066    Label  Lfast, Lpos, Ldone;
8067
8068    __ movl($tmp$$Register, pcon);
8069    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8070    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8071
8072    __ movl($tmp2$$Register, $dst$$Register); // save
8073    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8074    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8075    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8076
8077    // Negative dividend.
8078    // convert value to positive to use unsigned division
8079    __ lneg($dst$$Register, $tmp2$$Register);
8080    __ divl($tmp$$Register);
8081    __ movl($dst$$Register, $tmp2$$Register);
8082    __ divl($tmp$$Register);
8083    // revert remainder back to negative
8084    __ negl(HIGH_FROM_LOW($dst$$Register));
8085    __ jmpb(Ldone);
8086
8087    __ bind(Lpos);
8088    __ divl($tmp$$Register);
8089    __ movl($dst$$Register, $tmp2$$Register);
8090
8091    __ bind(Lfast);
8092    // fast path: src is positive
8093    __ divl($tmp$$Register);
8094
8095    __ bind(Ldone);
8096    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8097    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8098
8099  %}
8100  ins_pipe( pipe_slow );
8101%}
8102
8103// Integer Shift Instructions
8104// Shift Left by one
8105instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8106  match(Set dst (LShiftI dst shift));
8107  effect(KILL cr);
8108
8109  size(2);
8110  format %{ "SHL    $dst,$shift" %}
8111  opcode(0xD1, 0x4);  /* D1 /4 */
8112  ins_encode( OpcP, RegOpc( dst ) );
8113  ins_pipe( ialu_reg );
8114%}
8115
8116// Shift Left by 8-bit immediate
8117instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8118  match(Set dst (LShiftI dst shift));
8119  effect(KILL cr);
8120
8121  size(3);
8122  format %{ "SHL    $dst,$shift" %}
8123  opcode(0xC1, 0x4);  /* C1 /4 ib */
8124  ins_encode( RegOpcImm( dst, shift) );
8125  ins_pipe( ialu_reg );
8126%}
8127
8128// Shift Left by variable
8129instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8130  match(Set dst (LShiftI dst shift));
8131  effect(KILL cr);
8132
8133  size(2);
8134  format %{ "SHL    $dst,$shift" %}
8135  opcode(0xD3, 0x4);  /* D3 /4 */
8136  ins_encode( OpcP, RegOpc( dst ) );
8137  ins_pipe( ialu_reg_reg );
8138%}
8139
8140// Arithmetic shift right by one
8141instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8142  match(Set dst (RShiftI dst shift));
8143  effect(KILL cr);
8144
8145  size(2);
8146  format %{ "SAR    $dst,$shift" %}
8147  opcode(0xD1, 0x7);  /* D1 /7 */
8148  ins_encode( OpcP, RegOpc( dst ) );
8149  ins_pipe( ialu_reg );
8150%}
8151
8152// Arithmetic shift right by one
8153instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8154  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8155  effect(KILL cr);
8156  format %{ "SAR    $dst,$shift" %}
8157  opcode(0xD1, 0x7);  /* D1 /7 */
8158  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8159  ins_pipe( ialu_mem_imm );
8160%}
8161
8162// Arithmetic Shift Right by 8-bit immediate
8163instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8164  match(Set dst (RShiftI dst shift));
8165  effect(KILL cr);
8166
8167  size(3);
8168  format %{ "SAR    $dst,$shift" %}
8169  opcode(0xC1, 0x7);  /* C1 /7 ib */
8170  ins_encode( RegOpcImm( dst, shift ) );
8171  ins_pipe( ialu_mem_imm );
8172%}
8173
8174// Arithmetic Shift Right by 8-bit immediate
8175instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8176  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8177  effect(KILL cr);
8178
8179  format %{ "SAR    $dst,$shift" %}
8180  opcode(0xC1, 0x7);  /* C1 /7 ib */
8181  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8182  ins_pipe( ialu_mem_imm );
8183%}
8184
8185// Arithmetic Shift Right by variable
8186instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8187  match(Set dst (RShiftI dst shift));
8188  effect(KILL cr);
8189
8190  size(2);
8191  format %{ "SAR    $dst,$shift" %}
8192  opcode(0xD3, 0x7);  /* D3 /7 */
8193  ins_encode( OpcP, RegOpc( dst ) );
8194  ins_pipe( ialu_reg_reg );
8195%}
8196
8197// Logical shift right by one
8198instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8199  match(Set dst (URShiftI dst shift));
8200  effect(KILL cr);
8201
8202  size(2);
8203  format %{ "SHR    $dst,$shift" %}
8204  opcode(0xD1, 0x5);  /* D1 /5 */
8205  ins_encode( OpcP, RegOpc( dst ) );
8206  ins_pipe( ialu_reg );
8207%}
8208
8209// Logical Shift Right by 8-bit immediate
8210instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8211  match(Set dst (URShiftI dst shift));
8212  effect(KILL cr);
8213
8214  size(3);
8215  format %{ "SHR    $dst,$shift" %}
8216  opcode(0xC1, 0x5);  /* C1 /5 ib */
8217  ins_encode( RegOpcImm( dst, shift) );
8218  ins_pipe( ialu_reg );
8219%}
8220
8221
8222// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8223// This idiom is used by the compiler for the i2b bytecode.
8224instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8225  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8226
8227  size(3);
8228  format %{ "MOVSX  $dst,$src :8" %}
8229  ins_encode %{
8230    __ movsbl($dst$$Register, $src$$Register);
8231  %}
8232  ins_pipe(ialu_reg_reg);
8233%}
8234
8235// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8236// This idiom is used by the compiler the i2s bytecode.
8237instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8238  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8239
8240  size(3);
8241  format %{ "MOVSX  $dst,$src :16" %}
8242  ins_encode %{
8243    __ movswl($dst$$Register, $src$$Register);
8244  %}
8245  ins_pipe(ialu_reg_reg);
8246%}
8247
8248
8249// Logical Shift Right by variable
8250instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8251  match(Set dst (URShiftI dst shift));
8252  effect(KILL cr);
8253
8254  size(2);
8255  format %{ "SHR    $dst,$shift" %}
8256  opcode(0xD3, 0x5);  /* D3 /5 */
8257  ins_encode( OpcP, RegOpc( dst ) );
8258  ins_pipe( ialu_reg_reg );
8259%}
8260
8261
8262//----------Logical Instructions-----------------------------------------------
8263//----------Integer Logical Instructions---------------------------------------
8264// And Instructions
8265// And Register with Register
8266instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8267  match(Set dst (AndI dst src));
8268  effect(KILL cr);
8269
8270  size(2);
8271  format %{ "AND    $dst,$src" %}
8272  opcode(0x23);
8273  ins_encode( OpcP, RegReg( dst, src) );
8274  ins_pipe( ialu_reg_reg );
8275%}
8276
8277// And Register with Immediate
8278instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8279  match(Set dst (AndI dst src));
8280  effect(KILL cr);
8281
8282  format %{ "AND    $dst,$src" %}
8283  opcode(0x81,0x04);  /* Opcode 81 /4 */
8284  // ins_encode( RegImm( dst, src) );
8285  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8286  ins_pipe( ialu_reg );
8287%}
8288
8289// And Register with Memory
8290instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8291  match(Set dst (AndI dst (LoadI src)));
8292  effect(KILL cr);
8293
8294  ins_cost(125);
8295  format %{ "AND    $dst,$src" %}
8296  opcode(0x23);
8297  ins_encode( OpcP, RegMem( dst, src) );
8298  ins_pipe( ialu_reg_mem );
8299%}
8300
8301// And Memory with Register
8302instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8303  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8304  effect(KILL cr);
8305
8306  ins_cost(150);
8307  format %{ "AND    $dst,$src" %}
8308  opcode(0x21);  /* Opcode 21 /r */
8309  ins_encode( OpcP, RegMem( src, dst ) );
8310  ins_pipe( ialu_mem_reg );
8311%}
8312
8313// And Memory with Immediate
8314instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8315  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8316  effect(KILL cr);
8317
8318  ins_cost(125);
8319  format %{ "AND    $dst,$src" %}
8320  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8321  // ins_encode( MemImm( dst, src) );
8322  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8323  ins_pipe( ialu_mem_imm );
8324%}
8325
8326// BMI1 instructions
8327instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8328  match(Set dst (AndI (XorI src1 minus_1) src2));
8329  predicate(UseBMI1Instructions);
8330  effect(KILL cr);
8331
8332  format %{ "ANDNL  $dst, $src1, $src2" %}
8333
8334  ins_encode %{
8335    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8336  %}
8337  ins_pipe(ialu_reg);
8338%}
8339
8340instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8341  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8342  predicate(UseBMI1Instructions);
8343  effect(KILL cr);
8344
8345  ins_cost(125);
8346  format %{ "ANDNL  $dst, $src1, $src2" %}
8347
8348  ins_encode %{
8349    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8350  %}
8351  ins_pipe(ialu_reg_mem);
8352%}
8353
8354instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8355  match(Set dst (AndI (SubI imm_zero src) src));
8356  predicate(UseBMI1Instructions);
8357  effect(KILL cr);
8358
8359  format %{ "BLSIL  $dst, $src" %}
8360
8361  ins_encode %{
8362    __ blsil($dst$$Register, $src$$Register);
8363  %}
8364  ins_pipe(ialu_reg);
8365%}
8366
8367instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8368  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8369  predicate(UseBMI1Instructions);
8370  effect(KILL cr);
8371
8372  ins_cost(125);
8373  format %{ "BLSIL  $dst, $src" %}
8374
8375  ins_encode %{
8376    __ blsil($dst$$Register, $src$$Address);
8377  %}
8378  ins_pipe(ialu_reg_mem);
8379%}
8380
8381instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8382%{
8383  match(Set dst (XorI (AddI src minus_1) src));
8384  predicate(UseBMI1Instructions);
8385  effect(KILL cr);
8386
8387  format %{ "BLSMSKL $dst, $src" %}
8388
8389  ins_encode %{
8390    __ blsmskl($dst$$Register, $src$$Register);
8391  %}
8392
8393  ins_pipe(ialu_reg);
8394%}
8395
8396instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8397%{
8398  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8399  predicate(UseBMI1Instructions);
8400  effect(KILL cr);
8401
8402  ins_cost(125);
8403  format %{ "BLSMSKL $dst, $src" %}
8404
8405  ins_encode %{
8406    __ blsmskl($dst$$Register, $src$$Address);
8407  %}
8408
8409  ins_pipe(ialu_reg_mem);
8410%}
8411
8412instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8413%{
8414  match(Set dst (AndI (AddI src minus_1) src) );
8415  predicate(UseBMI1Instructions);
8416  effect(KILL cr);
8417
8418  format %{ "BLSRL  $dst, $src" %}
8419
8420  ins_encode %{
8421    __ blsrl($dst$$Register, $src$$Register);
8422  %}
8423
8424  ins_pipe(ialu_reg);
8425%}
8426
8427instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8428%{
8429  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8430  predicate(UseBMI1Instructions);
8431  effect(KILL cr);
8432
8433  ins_cost(125);
8434  format %{ "BLSRL  $dst, $src" %}
8435
8436  ins_encode %{
8437    __ blsrl($dst$$Register, $src$$Address);
8438  %}
8439
8440  ins_pipe(ialu_reg_mem);
8441%}
8442
8443// Or Instructions
8444// Or Register with Register
8445instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8446  match(Set dst (OrI dst src));
8447  effect(KILL cr);
8448
8449  size(2);
8450  format %{ "OR     $dst,$src" %}
8451  opcode(0x0B);
8452  ins_encode( OpcP, RegReg( dst, src) );
8453  ins_pipe( ialu_reg_reg );
8454%}
8455
8456instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8457  match(Set dst (OrI dst (CastP2X src)));
8458  effect(KILL cr);
8459
8460  size(2);
8461  format %{ "OR     $dst,$src" %}
8462  opcode(0x0B);
8463  ins_encode( OpcP, RegReg( dst, src) );
8464  ins_pipe( ialu_reg_reg );
8465%}
8466
8467
8468// Or Register with Immediate
8469instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8470  match(Set dst (OrI dst src));
8471  effect(KILL cr);
8472
8473  format %{ "OR     $dst,$src" %}
8474  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8475  // ins_encode( RegImm( dst, src) );
8476  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8477  ins_pipe( ialu_reg );
8478%}
8479
8480// Or Register with Memory
8481instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8482  match(Set dst (OrI dst (LoadI src)));
8483  effect(KILL cr);
8484
8485  ins_cost(125);
8486  format %{ "OR     $dst,$src" %}
8487  opcode(0x0B);
8488  ins_encode( OpcP, RegMem( dst, src) );
8489  ins_pipe( ialu_reg_mem );
8490%}
8491
8492// Or Memory with Register
8493instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8494  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8495  effect(KILL cr);
8496
8497  ins_cost(150);
8498  format %{ "OR     $dst,$src" %}
8499  opcode(0x09);  /* Opcode 09 /r */
8500  ins_encode( OpcP, RegMem( src, dst ) );
8501  ins_pipe( ialu_mem_reg );
8502%}
8503
8504// Or Memory with Immediate
8505instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8506  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8507  effect(KILL cr);
8508
8509  ins_cost(125);
8510  format %{ "OR     $dst,$src" %}
8511  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8512  // ins_encode( MemImm( dst, src) );
8513  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8514  ins_pipe( ialu_mem_imm );
8515%}
8516
8517// ROL/ROR
8518// ROL expand
8519instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8520  effect(USE_DEF dst, USE shift, KILL cr);
8521
8522  format %{ "ROL    $dst, $shift" %}
8523  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8524  ins_encode( OpcP, RegOpc( dst ));
8525  ins_pipe( ialu_reg );
8526%}
8527
8528instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8529  effect(USE_DEF dst, USE shift, KILL cr);
8530
8531  format %{ "ROL    $dst, $shift" %}
8532  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8533  ins_encode( RegOpcImm(dst, shift) );
8534  ins_pipe(ialu_reg);
8535%}
8536
8537instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8538  effect(USE_DEF dst, USE shift, KILL cr);
8539
8540  format %{ "ROL    $dst, $shift" %}
8541  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8542  ins_encode(OpcP, RegOpc(dst));
8543  ins_pipe( ialu_reg_reg );
8544%}
8545// end of ROL expand
8546
8547// ROL 32bit by one once
8548instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8549  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8550
8551  expand %{
8552    rolI_eReg_imm1(dst, lshift, cr);
8553  %}
8554%}
8555
8556// ROL 32bit var by imm8 once
8557instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8558  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8559  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8560
8561  expand %{
8562    rolI_eReg_imm8(dst, lshift, cr);
8563  %}
8564%}
8565
8566// ROL 32bit var by var once
8567instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8568  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8569
8570  expand %{
8571    rolI_eReg_CL(dst, shift, cr);
8572  %}
8573%}
8574
8575// ROL 32bit var by var once
8576instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8577  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8578
8579  expand %{
8580    rolI_eReg_CL(dst, shift, cr);
8581  %}
8582%}
8583
8584// ROR expand
8585instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8586  effect(USE_DEF dst, USE shift, KILL cr);
8587
8588  format %{ "ROR    $dst, $shift" %}
8589  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8590  ins_encode( OpcP, RegOpc( dst ) );
8591  ins_pipe( ialu_reg );
8592%}
8593
8594instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8595  effect (USE_DEF dst, USE shift, KILL cr);
8596
8597  format %{ "ROR    $dst, $shift" %}
8598  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8599  ins_encode( RegOpcImm(dst, shift) );
8600  ins_pipe( ialu_reg );
8601%}
8602
8603instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8604  effect(USE_DEF dst, USE shift, KILL cr);
8605
8606  format %{ "ROR    $dst, $shift" %}
8607  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8608  ins_encode(OpcP, RegOpc(dst));
8609  ins_pipe( ialu_reg_reg );
8610%}
8611// end of ROR expand
8612
8613// ROR right once
8614instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8615  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8616
8617  expand %{
8618    rorI_eReg_imm1(dst, rshift, cr);
8619  %}
8620%}
8621
8622// ROR 32bit by immI8 once
8623instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8624  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8625  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8626
8627  expand %{
8628    rorI_eReg_imm8(dst, rshift, cr);
8629  %}
8630%}
8631
8632// ROR 32bit var by var once
8633instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8634  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8635
8636  expand %{
8637    rorI_eReg_CL(dst, shift, cr);
8638  %}
8639%}
8640
8641// ROR 32bit var by var once
8642instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8643  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8644
8645  expand %{
8646    rorI_eReg_CL(dst, shift, cr);
8647  %}
8648%}
8649
8650// Xor Instructions
8651// Xor Register with Register
8652instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8653  match(Set dst (XorI dst src));
8654  effect(KILL cr);
8655
8656  size(2);
8657  format %{ "XOR    $dst,$src" %}
8658  opcode(0x33);
8659  ins_encode( OpcP, RegReg( dst, src) );
8660  ins_pipe( ialu_reg_reg );
8661%}
8662
8663// Xor Register with Immediate -1
8664instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8665  match(Set dst (XorI dst imm));
8666
8667  size(2);
8668  format %{ "NOT    $dst" %}
8669  ins_encode %{
8670     __ notl($dst$$Register);
8671  %}
8672  ins_pipe( ialu_reg );
8673%}
8674
8675// Xor Register with Immediate
8676instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8677  match(Set dst (XorI dst src));
8678  effect(KILL cr);
8679
8680  format %{ "XOR    $dst,$src" %}
8681  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8682  // ins_encode( RegImm( dst, src) );
8683  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8684  ins_pipe( ialu_reg );
8685%}
8686
8687// Xor Register with Memory
8688instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8689  match(Set dst (XorI dst (LoadI src)));
8690  effect(KILL cr);
8691
8692  ins_cost(125);
8693  format %{ "XOR    $dst,$src" %}
8694  opcode(0x33);
8695  ins_encode( OpcP, RegMem(dst, src) );
8696  ins_pipe( ialu_reg_mem );
8697%}
8698
8699// Xor Memory with Register
8700instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8701  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8702  effect(KILL cr);
8703
8704  ins_cost(150);
8705  format %{ "XOR    $dst,$src" %}
8706  opcode(0x31);  /* Opcode 31 /r */
8707  ins_encode( OpcP, RegMem( src, dst ) );
8708  ins_pipe( ialu_mem_reg );
8709%}
8710
8711// Xor Memory with Immediate
8712instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8713  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8714  effect(KILL cr);
8715
8716  ins_cost(125);
8717  format %{ "XOR    $dst,$src" %}
8718  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8719  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8720  ins_pipe( ialu_mem_imm );
8721%}
8722
8723//----------Convert Int to Boolean---------------------------------------------
8724
8725instruct movI_nocopy(rRegI dst, rRegI src) %{
8726  effect( DEF dst, USE src );
8727  format %{ "MOV    $dst,$src" %}
8728  ins_encode( enc_Copy( dst, src) );
8729  ins_pipe( ialu_reg_reg );
8730%}
8731
8732instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8733  effect( USE_DEF dst, USE src, KILL cr );
8734
8735  size(4);
8736  format %{ "NEG    $dst\n\t"
8737            "ADC    $dst,$src" %}
8738  ins_encode( neg_reg(dst),
8739              OpcRegReg(0x13,dst,src) );
8740  ins_pipe( ialu_reg_reg_long );
8741%}
8742
8743instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8744  match(Set dst (Conv2B src));
8745
8746  expand %{
8747    movI_nocopy(dst,src);
8748    ci2b(dst,src,cr);
8749  %}
8750%}
8751
8752instruct movP_nocopy(rRegI dst, eRegP src) %{
8753  effect( DEF dst, USE src );
8754  format %{ "MOV    $dst,$src" %}
8755  ins_encode( enc_Copy( dst, src) );
8756  ins_pipe( ialu_reg_reg );
8757%}
8758
8759instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8760  effect( USE_DEF dst, USE src, KILL cr );
8761  format %{ "NEG    $dst\n\t"
8762            "ADC    $dst,$src" %}
8763  ins_encode( neg_reg(dst),
8764              OpcRegReg(0x13,dst,src) );
8765  ins_pipe( ialu_reg_reg_long );
8766%}
8767
8768instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8769  match(Set dst (Conv2B src));
8770
8771  expand %{
8772    movP_nocopy(dst,src);
8773    cp2b(dst,src,cr);
8774  %}
8775%}
8776
8777instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8778  match(Set dst (CmpLTMask p q));
8779  effect(KILL cr);
8780  ins_cost(400);
8781
8782  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8783  format %{ "XOR    $dst,$dst\n\t"
8784            "CMP    $p,$q\n\t"
8785            "SETlt  $dst\n\t"
8786            "NEG    $dst" %}
8787  ins_encode %{
8788    Register Rp = $p$$Register;
8789    Register Rq = $q$$Register;
8790    Register Rd = $dst$$Register;
8791    Label done;
8792    __ xorl(Rd, Rd);
8793    __ cmpl(Rp, Rq);
8794    __ setb(Assembler::less, Rd);
8795    __ negl(Rd);
8796  %}
8797
8798  ins_pipe(pipe_slow);
8799%}
8800
8801instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8802  match(Set dst (CmpLTMask dst zero));
8803  effect(DEF dst, KILL cr);
8804  ins_cost(100);
8805
8806  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8807  ins_encode %{
8808  __ sarl($dst$$Register, 31);
8809  %}
8810  ins_pipe(ialu_reg);
8811%}
8812
8813/* better to save a register than avoid a branch */
8814instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8815  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8816  effect(KILL cr);
8817  ins_cost(400);
8818  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8819            "JGE    done\n\t"
8820            "ADD    $p,$y\n"
8821            "done:  " %}
8822  ins_encode %{
8823    Register Rp = $p$$Register;
8824    Register Rq = $q$$Register;
8825    Register Ry = $y$$Register;
8826    Label done;
8827    __ subl(Rp, Rq);
8828    __ jccb(Assembler::greaterEqual, done);
8829    __ addl(Rp, Ry);
8830    __ bind(done);
8831  %}
8832
8833  ins_pipe(pipe_cmplt);
8834%}
8835
8836/* better to save a register than avoid a branch */
8837instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8838  match(Set y (AndI (CmpLTMask p q) y));
8839  effect(KILL cr);
8840
8841  ins_cost(300);
8842
8843  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8844            "JLT      done\n\t"
8845            "XORL     $y, $y\n"
8846            "done:  " %}
8847  ins_encode %{
8848    Register Rp = $p$$Register;
8849    Register Rq = $q$$Register;
8850    Register Ry = $y$$Register;
8851    Label done;
8852    __ cmpl(Rp, Rq);
8853    __ jccb(Assembler::less, done);
8854    __ xorl(Ry, Ry);
8855    __ bind(done);
8856  %}
8857
8858  ins_pipe(pipe_cmplt);
8859%}
8860
8861/* If I enable this, I encourage spilling in the inner loop of compress.
8862instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8863  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8864*/
8865//----------Overflow Math Instructions-----------------------------------------
8866
8867instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8868%{
8869  match(Set cr (OverflowAddI op1 op2));
8870  effect(DEF cr, USE_KILL op1, USE op2);
8871
8872  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8873
8874  ins_encode %{
8875    __ addl($op1$$Register, $op2$$Register);
8876  %}
8877  ins_pipe(ialu_reg_reg);
8878%}
8879
8880instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8881%{
8882  match(Set cr (OverflowAddI op1 op2));
8883  effect(DEF cr, USE_KILL op1, USE op2);
8884
8885  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8886
8887  ins_encode %{
8888    __ addl($op1$$Register, $op2$$constant);
8889  %}
8890  ins_pipe(ialu_reg_reg);
8891%}
8892
8893instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8894%{
8895  match(Set cr (OverflowSubI op1 op2));
8896
8897  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8898  ins_encode %{
8899    __ cmpl($op1$$Register, $op2$$Register);
8900  %}
8901  ins_pipe(ialu_reg_reg);
8902%}
8903
8904instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8905%{
8906  match(Set cr (OverflowSubI op1 op2));
8907
8908  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8909  ins_encode %{
8910    __ cmpl($op1$$Register, $op2$$constant);
8911  %}
8912  ins_pipe(ialu_reg_reg);
8913%}
8914
8915instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8916%{
8917  match(Set cr (OverflowSubI zero op2));
8918  effect(DEF cr, USE_KILL op2);
8919
8920  format %{ "NEG    $op2\t# overflow check int" %}
8921  ins_encode %{
8922    __ negl($op2$$Register);
8923  %}
8924  ins_pipe(ialu_reg_reg);
8925%}
8926
8927instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8928%{
8929  match(Set cr (OverflowMulI op1 op2));
8930  effect(DEF cr, USE_KILL op1, USE op2);
8931
8932  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8933  ins_encode %{
8934    __ imull($op1$$Register, $op2$$Register);
8935  %}
8936  ins_pipe(ialu_reg_reg_alu0);
8937%}
8938
8939instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8940%{
8941  match(Set cr (OverflowMulI op1 op2));
8942  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8943
8944  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8945  ins_encode %{
8946    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8947  %}
8948  ins_pipe(ialu_reg_reg_alu0);
8949%}
8950
8951// Integer Absolute Instructions
8952instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8953%{
8954  match(Set dst (AbsI src));
8955  effect(TEMP dst, TEMP tmp, KILL cr);
8956  format %{ "movl $tmp, $src\n\t"
8957            "sarl $tmp, 31\n\t"
8958            "movl $dst, $src\n\t"
8959            "xorl $dst, $tmp\n\t"
8960            "subl $dst, $tmp\n"
8961          %}
8962  ins_encode %{
8963    __ movl($tmp$$Register, $src$$Register);
8964    __ sarl($tmp$$Register, 31);
8965    __ movl($dst$$Register, $src$$Register);
8966    __ xorl($dst$$Register, $tmp$$Register);
8967    __ subl($dst$$Register, $tmp$$Register);
8968  %}
8969
8970  ins_pipe(ialu_reg_reg);
8971%}
8972
8973//----------Long Instructions------------------------------------------------
8974// Add Long Register with Register
8975instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8976  match(Set dst (AddL dst src));
8977  effect(KILL cr);
8978  ins_cost(200);
8979  format %{ "ADD    $dst.lo,$src.lo\n\t"
8980            "ADC    $dst.hi,$src.hi" %}
8981  opcode(0x03, 0x13);
8982  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8983  ins_pipe( ialu_reg_reg_long );
8984%}
8985
8986// Add Long Register with Immediate
8987instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8988  match(Set dst (AddL dst src));
8989  effect(KILL cr);
8990  format %{ "ADD    $dst.lo,$src.lo\n\t"
8991            "ADC    $dst.hi,$src.hi" %}
8992  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8993  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8994  ins_pipe( ialu_reg_long );
8995%}
8996
8997// Add Long Register with Memory
8998instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8999  match(Set dst (AddL dst (LoadL mem)));
9000  effect(KILL cr);
9001  ins_cost(125);
9002  format %{ "ADD    $dst.lo,$mem\n\t"
9003            "ADC    $dst.hi,$mem+4" %}
9004  opcode(0x03, 0x13);
9005  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9006  ins_pipe( ialu_reg_long_mem );
9007%}
9008
9009// Subtract Long Register with Register.
9010instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9011  match(Set dst (SubL dst src));
9012  effect(KILL cr);
9013  ins_cost(200);
9014  format %{ "SUB    $dst.lo,$src.lo\n\t"
9015            "SBB    $dst.hi,$src.hi" %}
9016  opcode(0x2B, 0x1B);
9017  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9018  ins_pipe( ialu_reg_reg_long );
9019%}
9020
9021// Subtract Long Register with Immediate
9022instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9023  match(Set dst (SubL dst src));
9024  effect(KILL cr);
9025  format %{ "SUB    $dst.lo,$src.lo\n\t"
9026            "SBB    $dst.hi,$src.hi" %}
9027  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9028  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9029  ins_pipe( ialu_reg_long );
9030%}
9031
9032// Subtract Long Register with Memory
9033instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9034  match(Set dst (SubL dst (LoadL mem)));
9035  effect(KILL cr);
9036  ins_cost(125);
9037  format %{ "SUB    $dst.lo,$mem\n\t"
9038            "SBB    $dst.hi,$mem+4" %}
9039  opcode(0x2B, 0x1B);
9040  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9041  ins_pipe( ialu_reg_long_mem );
9042%}
9043
9044instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9045  match(Set dst (SubL zero dst));
9046  effect(KILL cr);
9047  ins_cost(300);
9048  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9049  ins_encode( neg_long(dst) );
9050  ins_pipe( ialu_reg_reg_long );
9051%}
9052
9053// And Long Register with Register
9054instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9055  match(Set dst (AndL dst src));
9056  effect(KILL cr);
9057  format %{ "AND    $dst.lo,$src.lo\n\t"
9058            "AND    $dst.hi,$src.hi" %}
9059  opcode(0x23,0x23);
9060  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9061  ins_pipe( ialu_reg_reg_long );
9062%}
9063
9064// And Long Register with Immediate
9065instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9066  match(Set dst (AndL dst src));
9067  effect(KILL cr);
9068  format %{ "AND    $dst.lo,$src.lo\n\t"
9069            "AND    $dst.hi,$src.hi" %}
9070  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9071  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9072  ins_pipe( ialu_reg_long );
9073%}
9074
9075// And Long Register with Memory
9076instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9077  match(Set dst (AndL dst (LoadL mem)));
9078  effect(KILL cr);
9079  ins_cost(125);
9080  format %{ "AND    $dst.lo,$mem\n\t"
9081            "AND    $dst.hi,$mem+4" %}
9082  opcode(0x23, 0x23);
9083  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9084  ins_pipe( ialu_reg_long_mem );
9085%}
9086
9087// BMI1 instructions
9088instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9089  match(Set dst (AndL (XorL src1 minus_1) src2));
9090  predicate(UseBMI1Instructions);
9091  effect(KILL cr, TEMP dst);
9092
9093  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9094            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9095         %}
9096
9097  ins_encode %{
9098    Register Rdst = $dst$$Register;
9099    Register Rsrc1 = $src1$$Register;
9100    Register Rsrc2 = $src2$$Register;
9101    __ andnl(Rdst, Rsrc1, Rsrc2);
9102    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9103  %}
9104  ins_pipe(ialu_reg_reg_long);
9105%}
9106
9107instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9108  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9109  predicate(UseBMI1Instructions);
9110  effect(KILL cr, TEMP dst);
9111
9112  ins_cost(125);
9113  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9114            "ANDNL  $dst.hi, $src1.hi, $src2+4"
9115         %}
9116
9117  ins_encode %{
9118    Register Rdst = $dst$$Register;
9119    Register Rsrc1 = $src1$$Register;
9120    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9121
9122    __ andnl(Rdst, Rsrc1, $src2$$Address);
9123    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9124  %}
9125  ins_pipe(ialu_reg_mem);
9126%}
9127
9128instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9129  match(Set dst (AndL (SubL imm_zero src) src));
9130  predicate(UseBMI1Instructions);
9131  effect(KILL cr, TEMP dst);
9132
9133  format %{ "MOVL   $dst.hi, 0\n\t"
9134            "BLSIL  $dst.lo, $src.lo\n\t"
9135            "JNZ    done\n\t"
9136            "BLSIL  $dst.hi, $src.hi\n"
9137            "done:"
9138         %}
9139
9140  ins_encode %{
9141    Label done;
9142    Register Rdst = $dst$$Register;
9143    Register Rsrc = $src$$Register;
9144    __ movl(HIGH_FROM_LOW(Rdst), 0);
9145    __ blsil(Rdst, Rsrc);
9146    __ jccb(Assembler::notZero, done);
9147    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9148    __ bind(done);
9149  %}
9150  ins_pipe(ialu_reg);
9151%}
9152
9153instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9154  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9155  predicate(UseBMI1Instructions);
9156  effect(KILL cr, TEMP dst);
9157
9158  ins_cost(125);
9159  format %{ "MOVL   $dst.hi, 0\n\t"
9160            "BLSIL  $dst.lo, $src\n\t"
9161            "JNZ    done\n\t"
9162            "BLSIL  $dst.hi, $src+4\n"
9163            "done:"
9164         %}
9165
9166  ins_encode %{
9167    Label done;
9168    Register Rdst = $dst$$Register;
9169    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9170
9171    __ movl(HIGH_FROM_LOW(Rdst), 0);
9172    __ blsil(Rdst, $src$$Address);
9173    __ jccb(Assembler::notZero, done);
9174    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9175    __ bind(done);
9176  %}
9177  ins_pipe(ialu_reg_mem);
9178%}
9179
9180instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9181%{
9182  match(Set dst (XorL (AddL src minus_1) src));
9183  predicate(UseBMI1Instructions);
9184  effect(KILL cr, TEMP dst);
9185
9186  format %{ "MOVL    $dst.hi, 0\n\t"
9187            "BLSMSKL $dst.lo, $src.lo\n\t"
9188            "JNC     done\n\t"
9189            "BLSMSKL $dst.hi, $src.hi\n"
9190            "done:"
9191         %}
9192
9193  ins_encode %{
9194    Label done;
9195    Register Rdst = $dst$$Register;
9196    Register Rsrc = $src$$Register;
9197    __ movl(HIGH_FROM_LOW(Rdst), 0);
9198    __ blsmskl(Rdst, Rsrc);
9199    __ jccb(Assembler::carryClear, done);
9200    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9201    __ bind(done);
9202  %}
9203
9204  ins_pipe(ialu_reg);
9205%}
9206
9207instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9208%{
9209  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9210  predicate(UseBMI1Instructions);
9211  effect(KILL cr, TEMP dst);
9212
9213  ins_cost(125);
9214  format %{ "MOVL    $dst.hi, 0\n\t"
9215            "BLSMSKL $dst.lo, $src\n\t"
9216            "JNC     done\n\t"
9217            "BLSMSKL $dst.hi, $src+4\n"
9218            "done:"
9219         %}
9220
9221  ins_encode %{
9222    Label done;
9223    Register Rdst = $dst$$Register;
9224    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9225
9226    __ movl(HIGH_FROM_LOW(Rdst), 0);
9227    __ blsmskl(Rdst, $src$$Address);
9228    __ jccb(Assembler::carryClear, done);
9229    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9230    __ bind(done);
9231  %}
9232
9233  ins_pipe(ialu_reg_mem);
9234%}
9235
9236instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9237%{
9238  match(Set dst (AndL (AddL src minus_1) src) );
9239  predicate(UseBMI1Instructions);
9240  effect(KILL cr, TEMP dst);
9241
9242  format %{ "MOVL   $dst.hi, $src.hi\n\t"
9243            "BLSRL  $dst.lo, $src.lo\n\t"
9244            "JNC    done\n\t"
9245            "BLSRL  $dst.hi, $src.hi\n"
9246            "done:"
9247  %}
9248
9249  ins_encode %{
9250    Label done;
9251    Register Rdst = $dst$$Register;
9252    Register Rsrc = $src$$Register;
9253    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9254    __ blsrl(Rdst, Rsrc);
9255    __ jccb(Assembler::carryClear, done);
9256    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9257    __ bind(done);
9258  %}
9259
9260  ins_pipe(ialu_reg);
9261%}
9262
9263instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9264%{
9265  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9266  predicate(UseBMI1Instructions);
9267  effect(KILL cr, TEMP dst);
9268
9269  ins_cost(125);
9270  format %{ "MOVL   $dst.hi, $src+4\n\t"
9271            "BLSRL  $dst.lo, $src\n\t"
9272            "JNC    done\n\t"
9273            "BLSRL  $dst.hi, $src+4\n"
9274            "done:"
9275  %}
9276
9277  ins_encode %{
9278    Label done;
9279    Register Rdst = $dst$$Register;
9280    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9281    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9282    __ blsrl(Rdst, $src$$Address);
9283    __ jccb(Assembler::carryClear, done);
9284    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9285    __ bind(done);
9286  %}
9287
9288  ins_pipe(ialu_reg_mem);
9289%}
9290
9291// Or Long Register with Register
9292instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9293  match(Set dst (OrL dst src));
9294  effect(KILL cr);
9295  format %{ "OR     $dst.lo,$src.lo\n\t"
9296            "OR     $dst.hi,$src.hi" %}
9297  opcode(0x0B,0x0B);
9298  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9299  ins_pipe( ialu_reg_reg_long );
9300%}
9301
9302// Or Long Register with Immediate
9303instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9304  match(Set dst (OrL dst src));
9305  effect(KILL cr);
9306  format %{ "OR     $dst.lo,$src.lo\n\t"
9307            "OR     $dst.hi,$src.hi" %}
9308  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9309  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9310  ins_pipe( ialu_reg_long );
9311%}
9312
9313// Or Long Register with Memory
9314instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9315  match(Set dst (OrL dst (LoadL mem)));
9316  effect(KILL cr);
9317  ins_cost(125);
9318  format %{ "OR     $dst.lo,$mem\n\t"
9319            "OR     $dst.hi,$mem+4" %}
9320  opcode(0x0B,0x0B);
9321  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9322  ins_pipe( ialu_reg_long_mem );
9323%}
9324
9325// Xor Long Register with Register
9326instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9327  match(Set dst (XorL dst src));
9328  effect(KILL cr);
9329  format %{ "XOR    $dst.lo,$src.lo\n\t"
9330            "XOR    $dst.hi,$src.hi" %}
9331  opcode(0x33,0x33);
9332  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9333  ins_pipe( ialu_reg_reg_long );
9334%}
9335
9336// Xor Long Register with Immediate -1
9337instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9338  match(Set dst (XorL dst imm));
9339  format %{ "NOT    $dst.lo\n\t"
9340            "NOT    $dst.hi" %}
9341  ins_encode %{
9342     __ notl($dst$$Register);
9343     __ notl(HIGH_FROM_LOW($dst$$Register));
9344  %}
9345  ins_pipe( ialu_reg_long );
9346%}
9347
9348// Xor Long Register with Immediate
9349instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9350  match(Set dst (XorL dst src));
9351  effect(KILL cr);
9352  format %{ "XOR    $dst.lo,$src.lo\n\t"
9353            "XOR    $dst.hi,$src.hi" %}
9354  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9355  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9356  ins_pipe( ialu_reg_long );
9357%}
9358
9359// Xor Long Register with Memory
9360instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9361  match(Set dst (XorL dst (LoadL mem)));
9362  effect(KILL cr);
9363  ins_cost(125);
9364  format %{ "XOR    $dst.lo,$mem\n\t"
9365            "XOR    $dst.hi,$mem+4" %}
9366  opcode(0x33,0x33);
9367  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9368  ins_pipe( ialu_reg_long_mem );
9369%}
9370
9371// Shift Left Long by 1
9372instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9373  predicate(UseNewLongLShift);
9374  match(Set dst (LShiftL dst cnt));
9375  effect(KILL cr);
9376  ins_cost(100);
9377  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9378            "ADC    $dst.hi,$dst.hi" %}
9379  ins_encode %{
9380    __ addl($dst$$Register,$dst$$Register);
9381    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9382  %}
9383  ins_pipe( ialu_reg_long );
9384%}
9385
9386// Shift Left Long by 2
9387instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9388  predicate(UseNewLongLShift);
9389  match(Set dst (LShiftL dst cnt));
9390  effect(KILL cr);
9391  ins_cost(100);
9392  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9393            "ADC    $dst.hi,$dst.hi\n\t"
9394            "ADD    $dst.lo,$dst.lo\n\t"
9395            "ADC    $dst.hi,$dst.hi" %}
9396  ins_encode %{
9397    __ addl($dst$$Register,$dst$$Register);
9398    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9399    __ addl($dst$$Register,$dst$$Register);
9400    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9401  %}
9402  ins_pipe( ialu_reg_long );
9403%}
9404
9405// Shift Left Long by 3
9406instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9407  predicate(UseNewLongLShift);
9408  match(Set dst (LShiftL dst cnt));
9409  effect(KILL cr);
9410  ins_cost(100);
9411  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9412            "ADC    $dst.hi,$dst.hi\n\t"
9413            "ADD    $dst.lo,$dst.lo\n\t"
9414            "ADC    $dst.hi,$dst.hi\n\t"
9415            "ADD    $dst.lo,$dst.lo\n\t"
9416            "ADC    $dst.hi,$dst.hi" %}
9417  ins_encode %{
9418    __ addl($dst$$Register,$dst$$Register);
9419    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9420    __ addl($dst$$Register,$dst$$Register);
9421    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9422    __ addl($dst$$Register,$dst$$Register);
9423    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9424  %}
9425  ins_pipe( ialu_reg_long );
9426%}
9427
9428// Shift Left Long by 1-31
9429instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9430  match(Set dst (LShiftL dst cnt));
9431  effect(KILL cr);
9432  ins_cost(200);
9433  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9434            "SHL    $dst.lo,$cnt" %}
9435  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9436  ins_encode( move_long_small_shift(dst,cnt) );
9437  ins_pipe( ialu_reg_long );
9438%}
9439
9440// Shift Left Long by 32-63
9441instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9442  match(Set dst (LShiftL dst cnt));
9443  effect(KILL cr);
9444  ins_cost(300);
9445  format %{ "MOV    $dst.hi,$dst.lo\n"
9446          "\tSHL    $dst.hi,$cnt-32\n"
9447          "\tXOR    $dst.lo,$dst.lo" %}
9448  opcode(0xC1, 0x4);  /* C1 /4 ib */
9449  ins_encode( move_long_big_shift_clr(dst,cnt) );
9450  ins_pipe( ialu_reg_long );
9451%}
9452
9453// Shift Left Long by variable
9454instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9455  match(Set dst (LShiftL dst shift));
9456  effect(KILL cr);
9457  ins_cost(500+200);
9458  size(17);
9459  format %{ "TEST   $shift,32\n\t"
9460            "JEQ,s  small\n\t"
9461            "MOV    $dst.hi,$dst.lo\n\t"
9462            "XOR    $dst.lo,$dst.lo\n"
9463    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9464            "SHL    $dst.lo,$shift" %}
9465  ins_encode( shift_left_long( dst, shift ) );
9466  ins_pipe( pipe_slow );
9467%}
9468
9469// Shift Right Long by 1-31
9470instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9471  match(Set dst (URShiftL dst cnt));
9472  effect(KILL cr);
9473  ins_cost(200);
9474  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9475            "SHR    $dst.hi,$cnt" %}
9476  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9477  ins_encode( move_long_small_shift(dst,cnt) );
9478  ins_pipe( ialu_reg_long );
9479%}
9480
9481// Shift Right Long by 32-63
9482instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9483  match(Set dst (URShiftL dst cnt));
9484  effect(KILL cr);
9485  ins_cost(300);
9486  format %{ "MOV    $dst.lo,$dst.hi\n"
9487          "\tSHR    $dst.lo,$cnt-32\n"
9488          "\tXOR    $dst.hi,$dst.hi" %}
9489  opcode(0xC1, 0x5);  /* C1 /5 ib */
9490  ins_encode( move_long_big_shift_clr(dst,cnt) );
9491  ins_pipe( ialu_reg_long );
9492%}
9493
9494// Shift Right Long by variable
9495instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9496  match(Set dst (URShiftL dst shift));
9497  effect(KILL cr);
9498  ins_cost(600);
9499  size(17);
9500  format %{ "TEST   $shift,32\n\t"
9501            "JEQ,s  small\n\t"
9502            "MOV    $dst.lo,$dst.hi\n\t"
9503            "XOR    $dst.hi,$dst.hi\n"
9504    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9505            "SHR    $dst.hi,$shift" %}
9506  ins_encode( shift_right_long( dst, shift ) );
9507  ins_pipe( pipe_slow );
9508%}
9509
9510// Shift Right Long by 1-31
9511instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9512  match(Set dst (RShiftL dst cnt));
9513  effect(KILL cr);
9514  ins_cost(200);
9515  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9516            "SAR    $dst.hi,$cnt" %}
9517  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9518  ins_encode( move_long_small_shift(dst,cnt) );
9519  ins_pipe( ialu_reg_long );
9520%}
9521
9522// Shift Right Long by 32-63
9523instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9524  match(Set dst (RShiftL dst cnt));
9525  effect(KILL cr);
9526  ins_cost(300);
9527  format %{ "MOV    $dst.lo,$dst.hi\n"
9528          "\tSAR    $dst.lo,$cnt-32\n"
9529          "\tSAR    $dst.hi,31" %}
9530  opcode(0xC1, 0x7);  /* C1 /7 ib */
9531  ins_encode( move_long_big_shift_sign(dst,cnt) );
9532  ins_pipe( ialu_reg_long );
9533%}
9534
9535// Shift Right arithmetic Long by variable
9536instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9537  match(Set dst (RShiftL dst shift));
9538  effect(KILL cr);
9539  ins_cost(600);
9540  size(18);
9541  format %{ "TEST   $shift,32\n\t"
9542            "JEQ,s  small\n\t"
9543            "MOV    $dst.lo,$dst.hi\n\t"
9544            "SAR    $dst.hi,31\n"
9545    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9546            "SAR    $dst.hi,$shift" %}
9547  ins_encode( shift_right_arith_long( dst, shift ) );
9548  ins_pipe( pipe_slow );
9549%}
9550
9551
9552//----------Double Instructions------------------------------------------------
9553// Double Math
9554
9555// Compare & branch
9556
9557// P6 version of float compare, sets condition codes in EFLAGS
9558instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9559  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9560  match(Set cr (CmpD src1 src2));
9561  effect(KILL rax);
9562  ins_cost(150);
9563  format %{ "FLD    $src1\n\t"
9564            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9565            "JNP    exit\n\t"
9566            "MOV    ah,1       // saw a NaN, set CF\n\t"
9567            "SAHF\n"
9568     "exit:\tNOP               // avoid branch to branch" %}
9569  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9570  ins_encode( Push_Reg_DPR(src1),
9571              OpcP, RegOpc(src2),
9572              cmpF_P6_fixup );
9573  ins_pipe( pipe_slow );
9574%}
9575
9576instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9577  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9578  match(Set cr (CmpD src1 src2));
9579  ins_cost(150);
9580  format %{ "FLD    $src1\n\t"
9581            "FUCOMIP ST,$src2  // P6 instruction" %}
9582  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9583  ins_encode( Push_Reg_DPR(src1),
9584              OpcP, RegOpc(src2));
9585  ins_pipe( pipe_slow );
9586%}
9587
9588// Compare & branch
9589instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9590  predicate(UseSSE<=1);
9591  match(Set cr (CmpD src1 src2));
9592  effect(KILL rax);
9593  ins_cost(200);
9594  format %{ "FLD    $src1\n\t"
9595            "FCOMp  $src2\n\t"
9596            "FNSTSW AX\n\t"
9597            "TEST   AX,0x400\n\t"
9598            "JZ,s   flags\n\t"
9599            "MOV    AH,1\t# unordered treat as LT\n"
9600    "flags:\tSAHF" %}
9601  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9602  ins_encode( Push_Reg_DPR(src1),
9603              OpcP, RegOpc(src2),
9604              fpu_flags);
9605  ins_pipe( pipe_slow );
9606%}
9607
9608// Compare vs zero into -1,0,1
9609instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9610  predicate(UseSSE<=1);
9611  match(Set dst (CmpD3 src1 zero));
9612  effect(KILL cr, KILL rax);
9613  ins_cost(280);
9614  format %{ "FTSTD  $dst,$src1" %}
9615  opcode(0xE4, 0xD9);
9616  ins_encode( Push_Reg_DPR(src1),
9617              OpcS, OpcP, PopFPU,
9618              CmpF_Result(dst));
9619  ins_pipe( pipe_slow );
9620%}
9621
9622// Compare into -1,0,1
9623instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9624  predicate(UseSSE<=1);
9625  match(Set dst (CmpD3 src1 src2));
9626  effect(KILL cr, KILL rax);
9627  ins_cost(300);
9628  format %{ "FCMPD  $dst,$src1,$src2" %}
9629  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9630  ins_encode( Push_Reg_DPR(src1),
9631              OpcP, RegOpc(src2),
9632              CmpF_Result(dst));
9633  ins_pipe( pipe_slow );
9634%}
9635
9636// float compare and set condition codes in EFLAGS by XMM regs
9637instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9638  predicate(UseSSE>=2);
9639  match(Set cr (CmpD src1 src2));
9640  ins_cost(145);
9641  format %{ "UCOMISD $src1,$src2\n\t"
9642            "JNP,s   exit\n\t"
9643            "PUSHF\t# saw NaN, set CF\n\t"
9644            "AND     [rsp], #0xffffff2b\n\t"
9645            "POPF\n"
9646    "exit:" %}
9647  ins_encode %{
9648    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9649    emit_cmpfp_fixup(_masm);
9650  %}
9651  ins_pipe( pipe_slow );
9652%}
9653
9654instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9655  predicate(UseSSE>=2);
9656  match(Set cr (CmpD src1 src2));
9657  ins_cost(100);
9658  format %{ "UCOMISD $src1,$src2" %}
9659  ins_encode %{
9660    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9661  %}
9662  ins_pipe( pipe_slow );
9663%}
9664
9665// float compare and set condition codes in EFLAGS by XMM regs
9666instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9667  predicate(UseSSE>=2);
9668  match(Set cr (CmpD src1 (LoadD src2)));
9669  ins_cost(145);
9670  format %{ "UCOMISD $src1,$src2\n\t"
9671            "JNP,s   exit\n\t"
9672            "PUSHF\t# saw NaN, set CF\n\t"
9673            "AND     [rsp], #0xffffff2b\n\t"
9674            "POPF\n"
9675    "exit:" %}
9676  ins_encode %{
9677    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9678    emit_cmpfp_fixup(_masm);
9679  %}
9680  ins_pipe( pipe_slow );
9681%}
9682
9683instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9684  predicate(UseSSE>=2);
9685  match(Set cr (CmpD src1 (LoadD src2)));
9686  ins_cost(100);
9687  format %{ "UCOMISD $src1,$src2" %}
9688  ins_encode %{
9689    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9690  %}
9691  ins_pipe( pipe_slow );
9692%}
9693
9694// Compare into -1,0,1 in XMM
9695instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9696  predicate(UseSSE>=2);
9697  match(Set dst (CmpD3 src1 src2));
9698  effect(KILL cr);
9699  ins_cost(255);
9700  format %{ "UCOMISD $src1, $src2\n\t"
9701            "MOV     $dst, #-1\n\t"
9702            "JP,s    done\n\t"
9703            "JB,s    done\n\t"
9704            "SETNE   $dst\n\t"
9705            "MOVZB   $dst, $dst\n"
9706    "done:" %}
9707  ins_encode %{
9708    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9709    emit_cmpfp3(_masm, $dst$$Register);
9710  %}
9711  ins_pipe( pipe_slow );
9712%}
9713
9714// Compare into -1,0,1 in XMM and memory
9715instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9716  predicate(UseSSE>=2);
9717  match(Set dst (CmpD3 src1 (LoadD src2)));
9718  effect(KILL cr);
9719  ins_cost(275);
9720  format %{ "UCOMISD $src1, $src2\n\t"
9721            "MOV     $dst, #-1\n\t"
9722            "JP,s    done\n\t"
9723            "JB,s    done\n\t"
9724            "SETNE   $dst\n\t"
9725            "MOVZB   $dst, $dst\n"
9726    "done:" %}
9727  ins_encode %{
9728    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9729    emit_cmpfp3(_masm, $dst$$Register);
9730  %}
9731  ins_pipe( pipe_slow );
9732%}
9733
9734
9735instruct subDPR_reg(regDPR dst, regDPR src) %{
9736  predicate (UseSSE <=1);
9737  match(Set dst (SubD dst src));
9738
9739  format %{ "FLD    $src\n\t"
9740            "DSUBp  $dst,ST" %}
9741  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9742  ins_cost(150);
9743  ins_encode( Push_Reg_DPR(src),
9744              OpcP, RegOpc(dst) );
9745  ins_pipe( fpu_reg_reg );
9746%}
9747
9748instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9749  predicate (UseSSE <=1);
9750  match(Set dst (RoundDouble (SubD src1 src2)));
9751  ins_cost(250);
9752
9753  format %{ "FLD    $src2\n\t"
9754            "DSUB   ST,$src1\n\t"
9755            "FSTP_D $dst\t# D-round" %}
9756  opcode(0xD8, 0x5);
9757  ins_encode( Push_Reg_DPR(src2),
9758              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9759  ins_pipe( fpu_mem_reg_reg );
9760%}
9761
9762
9763instruct subDPR_reg_mem(regDPR dst, memory src) %{
9764  predicate (UseSSE <=1);
9765  match(Set dst (SubD dst (LoadD src)));
9766  ins_cost(150);
9767
9768  format %{ "FLD    $src\n\t"
9769            "DSUBp  $dst,ST" %}
9770  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9771  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9772              OpcP, RegOpc(dst) );
9773  ins_pipe( fpu_reg_mem );
9774%}
9775
9776instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9777  predicate (UseSSE<=1);
9778  match(Set dst (AbsD src));
9779  ins_cost(100);
9780  format %{ "FABS" %}
9781  opcode(0xE1, 0xD9);
9782  ins_encode( OpcS, OpcP );
9783  ins_pipe( fpu_reg_reg );
9784%}
9785
9786instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9787  predicate(UseSSE<=1);
9788  match(Set dst (NegD src));
9789  ins_cost(100);
9790  format %{ "FCHS" %}
9791  opcode(0xE0, 0xD9);
9792  ins_encode( OpcS, OpcP );
9793  ins_pipe( fpu_reg_reg );
9794%}
9795
9796instruct addDPR_reg(regDPR dst, regDPR src) %{
9797  predicate(UseSSE<=1);
9798  match(Set dst (AddD dst src));
9799  format %{ "FLD    $src\n\t"
9800            "DADD   $dst,ST" %}
9801  size(4);
9802  ins_cost(150);
9803  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9804  ins_encode( Push_Reg_DPR(src),
9805              OpcP, RegOpc(dst) );
9806  ins_pipe( fpu_reg_reg );
9807%}
9808
9809
9810instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9811  predicate(UseSSE<=1);
9812  match(Set dst (RoundDouble (AddD src1 src2)));
9813  ins_cost(250);
9814
9815  format %{ "FLD    $src2\n\t"
9816            "DADD   ST,$src1\n\t"
9817            "FSTP_D $dst\t# D-round" %}
9818  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9819  ins_encode( Push_Reg_DPR(src2),
9820              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9821  ins_pipe( fpu_mem_reg_reg );
9822%}
9823
9824
9825instruct addDPR_reg_mem(regDPR dst, memory src) %{
9826  predicate(UseSSE<=1);
9827  match(Set dst (AddD dst (LoadD src)));
9828  ins_cost(150);
9829
9830  format %{ "FLD    $src\n\t"
9831            "DADDp  $dst,ST" %}
9832  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9833  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9834              OpcP, RegOpc(dst) );
9835  ins_pipe( fpu_reg_mem );
9836%}
9837
9838// add-to-memory
9839instruct addDPR_mem_reg(memory dst, regDPR src) %{
9840  predicate(UseSSE<=1);
9841  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9842  ins_cost(150);
9843
9844  format %{ "FLD_D  $dst\n\t"
9845            "DADD   ST,$src\n\t"
9846            "FST_D  $dst" %}
9847  opcode(0xDD, 0x0);
9848  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9849              Opcode(0xD8), RegOpc(src),
9850              set_instruction_start,
9851              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9852  ins_pipe( fpu_reg_mem );
9853%}
9854
9855instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9856  predicate(UseSSE<=1);
9857  match(Set dst (AddD dst con));
9858  ins_cost(125);
9859  format %{ "FLD1\n\t"
9860            "DADDp  $dst,ST" %}
9861  ins_encode %{
9862    __ fld1();
9863    __ faddp($dst$$reg);
9864  %}
9865  ins_pipe(fpu_reg);
9866%}
9867
9868instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9869  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9870  match(Set dst (AddD dst con));
9871  ins_cost(200);
9872  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9873            "DADDp  $dst,ST" %}
9874  ins_encode %{
9875    __ fld_d($constantaddress($con));
9876    __ faddp($dst$$reg);
9877  %}
9878  ins_pipe(fpu_reg_mem);
9879%}
9880
9881instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9882  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9883  match(Set dst (RoundDouble (AddD src con)));
9884  ins_cost(200);
9885  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9886            "DADD   ST,$src\n\t"
9887            "FSTP_D $dst\t# D-round" %}
9888  ins_encode %{
9889    __ fld_d($constantaddress($con));
9890    __ fadd($src$$reg);
9891    __ fstp_d(Address(rsp, $dst$$disp));
9892  %}
9893  ins_pipe(fpu_mem_reg_con);
9894%}
9895
9896instruct mulDPR_reg(regDPR dst, regDPR src) %{
9897  predicate(UseSSE<=1);
9898  match(Set dst (MulD dst src));
9899  format %{ "FLD    $src\n\t"
9900            "DMULp  $dst,ST" %}
9901  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9902  ins_cost(150);
9903  ins_encode( Push_Reg_DPR(src),
9904              OpcP, RegOpc(dst) );
9905  ins_pipe( fpu_reg_reg );
9906%}
9907
9908// Strict FP instruction biases argument before multiply then
9909// biases result to avoid double rounding of subnormals.
9910//
9911// scale arg1 by multiplying arg1 by 2^(-15360)
9912// load arg2
9913// multiply scaled arg1 by arg2
9914// rescale product by 2^(15360)
9915//
9916instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9917  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9918  match(Set dst (MulD dst src));
9919  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9920
9921  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9922            "DMULp  $dst,ST\n\t"
9923            "FLD    $src\n\t"
9924            "DMULp  $dst,ST\n\t"
9925            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9926            "DMULp  $dst,ST\n\t" %}
9927  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9928  ins_encode( strictfp_bias1(dst),
9929              Push_Reg_DPR(src),
9930              OpcP, RegOpc(dst),
9931              strictfp_bias2(dst) );
9932  ins_pipe( fpu_reg_reg );
9933%}
9934
9935instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9936  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9937  match(Set dst (MulD dst con));
9938  ins_cost(200);
9939  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9940            "DMULp  $dst,ST" %}
9941  ins_encode %{
9942    __ fld_d($constantaddress($con));
9943    __ fmulp($dst$$reg);
9944  %}
9945  ins_pipe(fpu_reg_mem);
9946%}
9947
9948
9949instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9950  predicate( UseSSE<=1 );
9951  match(Set dst (MulD dst (LoadD src)));
9952  ins_cost(200);
9953  format %{ "FLD_D  $src\n\t"
9954            "DMULp  $dst,ST" %}
9955  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9956  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9957              OpcP, RegOpc(dst) );
9958  ins_pipe( fpu_reg_mem );
9959%}
9960
9961//
9962// Cisc-alternate to reg-reg multiply
9963instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9964  predicate( UseSSE<=1 );
9965  match(Set dst (MulD src (LoadD mem)));
9966  ins_cost(250);
9967  format %{ "FLD_D  $mem\n\t"
9968            "DMUL   ST,$src\n\t"
9969            "FSTP_D $dst" %}
9970  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9971  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9972              OpcReg_FPR(src),
9973              Pop_Reg_DPR(dst) );
9974  ins_pipe( fpu_reg_reg_mem );
9975%}
9976
9977
9978// MACRO3 -- addDPR a mulDPR
9979// This instruction is a '2-address' instruction in that the result goes
9980// back to src2.  This eliminates a move from the macro; possibly the
9981// register allocator will have to add it back (and maybe not).
9982instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9983  predicate( UseSSE<=1 );
9984  match(Set src2 (AddD (MulD src0 src1) src2));
9985  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9986            "DMUL   ST,$src1\n\t"
9987            "DADDp  $src2,ST" %}
9988  ins_cost(250);
9989  opcode(0xDD); /* LoadD DD /0 */
9990  ins_encode( Push_Reg_FPR(src0),
9991              FMul_ST_reg(src1),
9992              FAddP_reg_ST(src2) );
9993  ins_pipe( fpu_reg_reg_reg );
9994%}
9995
9996
9997// MACRO3 -- subDPR a mulDPR
9998instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9999  predicate( UseSSE<=1 );
10000  match(Set src2 (SubD (MulD src0 src1) src2));
10001  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10002            "DMUL   ST,$src1\n\t"
10003            "DSUBRp $src2,ST" %}
10004  ins_cost(250);
10005  ins_encode( Push_Reg_FPR(src0),
10006              FMul_ST_reg(src1),
10007              Opcode(0xDE), Opc_plus(0xE0,src2));
10008  ins_pipe( fpu_reg_reg_reg );
10009%}
10010
10011
10012instruct divDPR_reg(regDPR dst, regDPR src) %{
10013  predicate( UseSSE<=1 );
10014  match(Set dst (DivD dst src));
10015
10016  format %{ "FLD    $src\n\t"
10017            "FDIVp  $dst,ST" %}
10018  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10019  ins_cost(150);
10020  ins_encode( Push_Reg_DPR(src),
10021              OpcP, RegOpc(dst) );
10022  ins_pipe( fpu_reg_reg );
10023%}
10024
10025// Strict FP instruction biases argument before division then
10026// biases result, to avoid double rounding of subnormals.
10027//
10028// scale dividend by multiplying dividend by 2^(-15360)
10029// load divisor
10030// divide scaled dividend by divisor
10031// rescale quotient by 2^(15360)
10032//
10033instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10034  predicate (UseSSE<=1);
10035  match(Set dst (DivD dst src));
10036  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10037  ins_cost(01);
10038
10039  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10040            "DMULp  $dst,ST\n\t"
10041            "FLD    $src\n\t"
10042            "FDIVp  $dst,ST\n\t"
10043            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10044            "DMULp  $dst,ST\n\t" %}
10045  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10046  ins_encode( strictfp_bias1(dst),
10047              Push_Reg_DPR(src),
10048              OpcP, RegOpc(dst),
10049              strictfp_bias2(dst) );
10050  ins_pipe( fpu_reg_reg );
10051%}
10052
10053instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10054  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10055  match(Set dst (RoundDouble (DivD src1 src2)));
10056
10057  format %{ "FLD    $src1\n\t"
10058            "FDIV   ST,$src2\n\t"
10059            "FSTP_D $dst\t# D-round" %}
10060  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10061  ins_encode( Push_Reg_DPR(src1),
10062              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10063  ins_pipe( fpu_mem_reg_reg );
10064%}
10065
10066
10067instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10068  predicate(UseSSE<=1);
10069  match(Set dst (ModD dst src));
10070  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10071
10072  format %{ "DMOD   $dst,$src" %}
10073  ins_cost(250);
10074  ins_encode(Push_Reg_Mod_DPR(dst, src),
10075              emitModDPR(),
10076              Push_Result_Mod_DPR(src),
10077              Pop_Reg_DPR(dst));
10078  ins_pipe( pipe_slow );
10079%}
10080
10081instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10082  predicate(UseSSE>=2);
10083  match(Set dst (ModD src0 src1));
10084  effect(KILL rax, KILL cr);
10085
10086  format %{ "SUB    ESP,8\t # DMOD\n"
10087          "\tMOVSD  [ESP+0],$src1\n"
10088          "\tFLD_D  [ESP+0]\n"
10089          "\tMOVSD  [ESP+0],$src0\n"
10090          "\tFLD_D  [ESP+0]\n"
10091     "loop:\tFPREM\n"
10092          "\tFWAIT\n"
10093          "\tFNSTSW AX\n"
10094          "\tSAHF\n"
10095          "\tJP     loop\n"
10096          "\tFSTP_D [ESP+0]\n"
10097          "\tMOVSD  $dst,[ESP+0]\n"
10098          "\tADD    ESP,8\n"
10099          "\tFSTP   ST0\t # Restore FPU Stack"
10100    %}
10101  ins_cost(250);
10102  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10103  ins_pipe( pipe_slow );
10104%}
10105
10106instruct atanDPR_reg(regDPR dst, regDPR src) %{
10107  predicate (UseSSE<=1);
10108  match(Set dst(AtanD dst src));
10109  format %{ "DATA   $dst,$src" %}
10110  opcode(0xD9, 0xF3);
10111  ins_encode( Push_Reg_DPR(src),
10112              OpcP, OpcS, RegOpc(dst) );
10113  ins_pipe( pipe_slow );
10114%}
10115
10116instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10117  predicate (UseSSE>=2);
10118  match(Set dst(AtanD dst src));
10119  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10120  format %{ "DATA   $dst,$src" %}
10121  opcode(0xD9, 0xF3);
10122  ins_encode( Push_SrcD(src),
10123              OpcP, OpcS, Push_ResultD(dst) );
10124  ins_pipe( pipe_slow );
10125%}
10126
10127instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10128  predicate (UseSSE<=1);
10129  match(Set dst (SqrtD src));
10130  format %{ "DSQRT  $dst,$src" %}
10131  opcode(0xFA, 0xD9);
10132  ins_encode( Push_Reg_DPR(src),
10133              OpcS, OpcP, Pop_Reg_DPR(dst) );
10134  ins_pipe( pipe_slow );
10135%}
10136
10137//-------------Float Instructions-------------------------------
10138// Float Math
10139
10140// Code for float compare:
10141//     fcompp();
10142//     fwait(); fnstsw_ax();
10143//     sahf();
10144//     movl(dst, unordered_result);
10145//     jcc(Assembler::parity, exit);
10146//     movl(dst, less_result);
10147//     jcc(Assembler::below, exit);
10148//     movl(dst, equal_result);
10149//     jcc(Assembler::equal, exit);
10150//     movl(dst, greater_result);
10151//   exit:
10152
10153// P6 version of float compare, sets condition codes in EFLAGS
10154instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10155  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10156  match(Set cr (CmpF src1 src2));
10157  effect(KILL rax);
10158  ins_cost(150);
10159  format %{ "FLD    $src1\n\t"
10160            "FUCOMIP ST,$src2  // P6 instruction\n\t"
10161            "JNP    exit\n\t"
10162            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10163            "SAHF\n"
10164     "exit:\tNOP               // avoid branch to branch" %}
10165  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10166  ins_encode( Push_Reg_DPR(src1),
10167              OpcP, RegOpc(src2),
10168              cmpF_P6_fixup );
10169  ins_pipe( pipe_slow );
10170%}
10171
10172instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10173  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10174  match(Set cr (CmpF src1 src2));
10175  ins_cost(100);
10176  format %{ "FLD    $src1\n\t"
10177            "FUCOMIP ST,$src2  // P6 instruction" %}
10178  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10179  ins_encode( Push_Reg_DPR(src1),
10180              OpcP, RegOpc(src2));
10181  ins_pipe( pipe_slow );
10182%}
10183
10184
10185// Compare & branch
10186instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10187  predicate(UseSSE == 0);
10188  match(Set cr (CmpF src1 src2));
10189  effect(KILL rax);
10190  ins_cost(200);
10191  format %{ "FLD    $src1\n\t"
10192            "FCOMp  $src2\n\t"
10193            "FNSTSW AX\n\t"
10194            "TEST   AX,0x400\n\t"
10195            "JZ,s   flags\n\t"
10196            "MOV    AH,1\t# unordered treat as LT\n"
10197    "flags:\tSAHF" %}
10198  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10199  ins_encode( Push_Reg_DPR(src1),
10200              OpcP, RegOpc(src2),
10201              fpu_flags);
10202  ins_pipe( pipe_slow );
10203%}
10204
10205// Compare vs zero into -1,0,1
10206instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10207  predicate(UseSSE == 0);
10208  match(Set dst (CmpF3 src1 zero));
10209  effect(KILL cr, KILL rax);
10210  ins_cost(280);
10211  format %{ "FTSTF  $dst,$src1" %}
10212  opcode(0xE4, 0xD9);
10213  ins_encode( Push_Reg_DPR(src1),
10214              OpcS, OpcP, PopFPU,
10215              CmpF_Result(dst));
10216  ins_pipe( pipe_slow );
10217%}
10218
10219// Compare into -1,0,1
10220instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10221  predicate(UseSSE == 0);
10222  match(Set dst (CmpF3 src1 src2));
10223  effect(KILL cr, KILL rax);
10224  ins_cost(300);
10225  format %{ "FCMPF  $dst,$src1,$src2" %}
10226  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10227  ins_encode( Push_Reg_DPR(src1),
10228              OpcP, RegOpc(src2),
10229              CmpF_Result(dst));
10230  ins_pipe( pipe_slow );
10231%}
10232
10233// float compare and set condition codes in EFLAGS by XMM regs
10234instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10235  predicate(UseSSE>=1);
10236  match(Set cr (CmpF src1 src2));
10237  ins_cost(145);
10238  format %{ "UCOMISS $src1,$src2\n\t"
10239            "JNP,s   exit\n\t"
10240            "PUSHF\t# saw NaN, set CF\n\t"
10241            "AND     [rsp], #0xffffff2b\n\t"
10242            "POPF\n"
10243    "exit:" %}
10244  ins_encode %{
10245    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10246    emit_cmpfp_fixup(_masm);
10247  %}
10248  ins_pipe( pipe_slow );
10249%}
10250
10251instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10252  predicate(UseSSE>=1);
10253  match(Set cr (CmpF src1 src2));
10254  ins_cost(100);
10255  format %{ "UCOMISS $src1,$src2" %}
10256  ins_encode %{
10257    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10258  %}
10259  ins_pipe( pipe_slow );
10260%}
10261
10262// float compare and set condition codes in EFLAGS by XMM regs
10263instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10264  predicate(UseSSE>=1);
10265  match(Set cr (CmpF src1 (LoadF src2)));
10266  ins_cost(165);
10267  format %{ "UCOMISS $src1,$src2\n\t"
10268            "JNP,s   exit\n\t"
10269            "PUSHF\t# saw NaN, set CF\n\t"
10270            "AND     [rsp], #0xffffff2b\n\t"
10271            "POPF\n"
10272    "exit:" %}
10273  ins_encode %{
10274    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10275    emit_cmpfp_fixup(_masm);
10276  %}
10277  ins_pipe( pipe_slow );
10278%}
10279
10280instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10281  predicate(UseSSE>=1);
10282  match(Set cr (CmpF src1 (LoadF src2)));
10283  ins_cost(100);
10284  format %{ "UCOMISS $src1,$src2" %}
10285  ins_encode %{
10286    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10287  %}
10288  ins_pipe( pipe_slow );
10289%}
10290
10291// Compare into -1,0,1 in XMM
10292instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10293  predicate(UseSSE>=1);
10294  match(Set dst (CmpF3 src1 src2));
10295  effect(KILL cr);
10296  ins_cost(255);
10297  format %{ "UCOMISS $src1, $src2\n\t"
10298            "MOV     $dst, #-1\n\t"
10299            "JP,s    done\n\t"
10300            "JB,s    done\n\t"
10301            "SETNE   $dst\n\t"
10302            "MOVZB   $dst, $dst\n"
10303    "done:" %}
10304  ins_encode %{
10305    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10306    emit_cmpfp3(_masm, $dst$$Register);
10307  %}
10308  ins_pipe( pipe_slow );
10309%}
10310
10311// Compare into -1,0,1 in XMM and memory
10312instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10313  predicate(UseSSE>=1);
10314  match(Set dst (CmpF3 src1 (LoadF src2)));
10315  effect(KILL cr);
10316  ins_cost(275);
10317  format %{ "UCOMISS $src1, $src2\n\t"
10318            "MOV     $dst, #-1\n\t"
10319            "JP,s    done\n\t"
10320            "JB,s    done\n\t"
10321            "SETNE   $dst\n\t"
10322            "MOVZB   $dst, $dst\n"
10323    "done:" %}
10324  ins_encode %{
10325    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10326    emit_cmpfp3(_masm, $dst$$Register);
10327  %}
10328  ins_pipe( pipe_slow );
10329%}
10330
10331// Spill to obtain 24-bit precision
10332instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10333  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334  match(Set dst (SubF src1 src2));
10335
10336  format %{ "FSUB   $dst,$src1 - $src2" %}
10337  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10338  ins_encode( Push_Reg_FPR(src1),
10339              OpcReg_FPR(src2),
10340              Pop_Mem_FPR(dst) );
10341  ins_pipe( fpu_mem_reg_reg );
10342%}
10343//
10344// This instruction does not round to 24-bits
10345instruct subFPR_reg(regFPR dst, regFPR src) %{
10346  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10347  match(Set dst (SubF dst src));
10348
10349  format %{ "FSUB   $dst,$src" %}
10350  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10351  ins_encode( Push_Reg_FPR(src),
10352              OpcP, RegOpc(dst) );
10353  ins_pipe( fpu_reg_reg );
10354%}
10355
10356// Spill to obtain 24-bit precision
10357instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10358  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10359  match(Set dst (AddF src1 src2));
10360
10361  format %{ "FADD   $dst,$src1,$src2" %}
10362  opcode(0xD8, 0x0); /* D8 C0+i */
10363  ins_encode( Push_Reg_FPR(src2),
10364              OpcReg_FPR(src1),
10365              Pop_Mem_FPR(dst) );
10366  ins_pipe( fpu_mem_reg_reg );
10367%}
10368//
10369// This instruction does not round to 24-bits
10370instruct addFPR_reg(regFPR dst, regFPR src) %{
10371  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10372  match(Set dst (AddF dst src));
10373
10374  format %{ "FLD    $src\n\t"
10375            "FADDp  $dst,ST" %}
10376  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10377  ins_encode( Push_Reg_FPR(src),
10378              OpcP, RegOpc(dst) );
10379  ins_pipe( fpu_reg_reg );
10380%}
10381
10382instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10383  predicate(UseSSE==0);
10384  match(Set dst (AbsF src));
10385  ins_cost(100);
10386  format %{ "FABS" %}
10387  opcode(0xE1, 0xD9);
10388  ins_encode( OpcS, OpcP );
10389  ins_pipe( fpu_reg_reg );
10390%}
10391
10392instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10393  predicate(UseSSE==0);
10394  match(Set dst (NegF src));
10395  ins_cost(100);
10396  format %{ "FCHS" %}
10397  opcode(0xE0, 0xD9);
10398  ins_encode( OpcS, OpcP );
10399  ins_pipe( fpu_reg_reg );
10400%}
10401
10402// Cisc-alternate to addFPR_reg
10403// Spill to obtain 24-bit precision
10404instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10405  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10406  match(Set dst (AddF src1 (LoadF src2)));
10407
10408  format %{ "FLD    $src2\n\t"
10409            "FADD   ST,$src1\n\t"
10410            "FSTP_S $dst" %}
10411  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10412  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10413              OpcReg_FPR(src1),
10414              Pop_Mem_FPR(dst) );
10415  ins_pipe( fpu_mem_reg_mem );
10416%}
10417//
10418// Cisc-alternate to addFPR_reg
10419// This instruction does not round to 24-bits
10420instruct addFPR_reg_mem(regFPR dst, memory src) %{
10421  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10422  match(Set dst (AddF dst (LoadF src)));
10423
10424  format %{ "FADD   $dst,$src" %}
10425  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10426  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10427              OpcP, RegOpc(dst) );
10428  ins_pipe( fpu_reg_mem );
10429%}
10430
10431// // Following two instructions for _222_mpegaudio
10432// Spill to obtain 24-bit precision
10433instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10434  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10435  match(Set dst (AddF src1 src2));
10436
10437  format %{ "FADD   $dst,$src1,$src2" %}
10438  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10439  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10440              OpcReg_FPR(src2),
10441              Pop_Mem_FPR(dst) );
10442  ins_pipe( fpu_mem_reg_mem );
10443%}
10444
10445// Cisc-spill variant
10446// Spill to obtain 24-bit precision
10447instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10448  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10449  match(Set dst (AddF src1 (LoadF src2)));
10450
10451  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10452  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10453  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10454              set_instruction_start,
10455              OpcP, RMopc_Mem(secondary,src1),
10456              Pop_Mem_FPR(dst) );
10457  ins_pipe( fpu_mem_mem_mem );
10458%}
10459
10460// Spill to obtain 24-bit precision
10461instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10462  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10463  match(Set dst (AddF src1 src2));
10464
10465  format %{ "FADD   $dst,$src1,$src2" %}
10466  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10467  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10468              set_instruction_start,
10469              OpcP, RMopc_Mem(secondary,src1),
10470              Pop_Mem_FPR(dst) );
10471  ins_pipe( fpu_mem_mem_mem );
10472%}
10473
10474
10475// Spill to obtain 24-bit precision
10476instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10477  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10478  match(Set dst (AddF src con));
10479  format %{ "FLD    $src\n\t"
10480            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10481            "FSTP_S $dst"  %}
10482  ins_encode %{
10483    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10484    __ fadd_s($constantaddress($con));
10485    __ fstp_s(Address(rsp, $dst$$disp));
10486  %}
10487  ins_pipe(fpu_mem_reg_con);
10488%}
10489//
10490// This instruction does not round to 24-bits
10491instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10492  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10493  match(Set dst (AddF src con));
10494  format %{ "FLD    $src\n\t"
10495            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10496            "FSTP   $dst"  %}
10497  ins_encode %{
10498    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10499    __ fadd_s($constantaddress($con));
10500    __ fstp_d($dst$$reg);
10501  %}
10502  ins_pipe(fpu_reg_reg_con);
10503%}
10504
10505// Spill to obtain 24-bit precision
10506instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10507  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10508  match(Set dst (MulF src1 src2));
10509
10510  format %{ "FLD    $src1\n\t"
10511            "FMUL   $src2\n\t"
10512            "FSTP_S $dst"  %}
10513  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10514  ins_encode( Push_Reg_FPR(src1),
10515              OpcReg_FPR(src2),
10516              Pop_Mem_FPR(dst) );
10517  ins_pipe( fpu_mem_reg_reg );
10518%}
10519//
10520// This instruction does not round to 24-bits
10521instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10522  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10523  match(Set dst (MulF src1 src2));
10524
10525  format %{ "FLD    $src1\n\t"
10526            "FMUL   $src2\n\t"
10527            "FSTP_S $dst"  %}
10528  opcode(0xD8, 0x1); /* D8 C8+i */
10529  ins_encode( Push_Reg_FPR(src2),
10530              OpcReg_FPR(src1),
10531              Pop_Reg_FPR(dst) );
10532  ins_pipe( fpu_reg_reg_reg );
10533%}
10534
10535
10536// Spill to obtain 24-bit precision
10537// Cisc-alternate to reg-reg multiply
10538instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10539  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10540  match(Set dst (MulF src1 (LoadF src2)));
10541
10542  format %{ "FLD_S  $src2\n\t"
10543            "FMUL   $src1\n\t"
10544            "FSTP_S $dst"  %}
10545  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10546  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10547              OpcReg_FPR(src1),
10548              Pop_Mem_FPR(dst) );
10549  ins_pipe( fpu_mem_reg_mem );
10550%}
10551//
10552// This instruction does not round to 24-bits
10553// Cisc-alternate to reg-reg multiply
10554instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10555  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556  match(Set dst (MulF src1 (LoadF src2)));
10557
10558  format %{ "FMUL   $dst,$src1,$src2" %}
10559  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10560  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10561              OpcReg_FPR(src1),
10562              Pop_Reg_FPR(dst) );
10563  ins_pipe( fpu_reg_reg_mem );
10564%}
10565
10566// Spill to obtain 24-bit precision
10567instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10568  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10569  match(Set dst (MulF src1 src2));
10570
10571  format %{ "FMUL   $dst,$src1,$src2" %}
10572  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10573  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10574              set_instruction_start,
10575              OpcP, RMopc_Mem(secondary,src1),
10576              Pop_Mem_FPR(dst) );
10577  ins_pipe( fpu_mem_mem_mem );
10578%}
10579
10580// Spill to obtain 24-bit precision
10581instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10582  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10583  match(Set dst (MulF src con));
10584
10585  format %{ "FLD    $src\n\t"
10586            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10587            "FSTP_S $dst"  %}
10588  ins_encode %{
10589    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10590    __ fmul_s($constantaddress($con));
10591    __ fstp_s(Address(rsp, $dst$$disp));
10592  %}
10593  ins_pipe(fpu_mem_reg_con);
10594%}
10595//
10596// This instruction does not round to 24-bits
10597instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10598  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10599  match(Set dst (MulF src con));
10600
10601  format %{ "FLD    $src\n\t"
10602            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10603            "FSTP   $dst"  %}
10604  ins_encode %{
10605    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10606    __ fmul_s($constantaddress($con));
10607    __ fstp_d($dst$$reg);
10608  %}
10609  ins_pipe(fpu_reg_reg_con);
10610%}
10611
10612
10613//
10614// MACRO1 -- subsume unshared load into mulFPR
10615// This instruction does not round to 24-bits
10616instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10617  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10618  match(Set dst (MulF (LoadF mem1) src));
10619
10620  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10621            "FMUL   ST,$src\n\t"
10622            "FSTP   $dst" %}
10623  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10624  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10625              OpcReg_FPR(src),
10626              Pop_Reg_FPR(dst) );
10627  ins_pipe( fpu_reg_reg_mem );
10628%}
10629//
10630// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10631// This instruction does not round to 24-bits
10632instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10633  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10634  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10635  ins_cost(95);
10636
10637  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10638            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10639            "FADD   ST,$src2\n\t"
10640            "FSTP   $dst" %}
10641  opcode(0xD9); /* LoadF D9 /0 */
10642  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10643              FMul_ST_reg(src1),
10644              FAdd_ST_reg(src2),
10645              Pop_Reg_FPR(dst) );
10646  ins_pipe( fpu_reg_mem_reg_reg );
10647%}
10648
10649// MACRO3 -- addFPR a mulFPR
10650// This instruction does not round to 24-bits.  It is a '2-address'
10651// instruction in that the result goes back to src2.  This eliminates
10652// a move from the macro; possibly the register allocator will have
10653// to add it back (and maybe not).
10654instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10655  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10656  match(Set src2 (AddF (MulF src0 src1) src2));
10657
10658  format %{ "FLD    $src0     ===MACRO3===\n\t"
10659            "FMUL   ST,$src1\n\t"
10660            "FADDP  $src2,ST" %}
10661  opcode(0xD9); /* LoadF D9 /0 */
10662  ins_encode( Push_Reg_FPR(src0),
10663              FMul_ST_reg(src1),
10664              FAddP_reg_ST(src2) );
10665  ins_pipe( fpu_reg_reg_reg );
10666%}
10667
10668// MACRO4 -- divFPR subFPR
10669// This instruction does not round to 24-bits
10670instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10671  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10672  match(Set dst (DivF (SubF src2 src1) src3));
10673
10674  format %{ "FLD    $src2   ===MACRO4===\n\t"
10675            "FSUB   ST,$src1\n\t"
10676            "FDIV   ST,$src3\n\t"
10677            "FSTP  $dst" %}
10678  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10679  ins_encode( Push_Reg_FPR(src2),
10680              subFPR_divFPR_encode(src1,src3),
10681              Pop_Reg_FPR(dst) );
10682  ins_pipe( fpu_reg_reg_reg_reg );
10683%}
10684
10685// Spill to obtain 24-bit precision
10686instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10687  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10688  match(Set dst (DivF src1 src2));
10689
10690  format %{ "FDIV   $dst,$src1,$src2" %}
10691  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10692  ins_encode( Push_Reg_FPR(src1),
10693              OpcReg_FPR(src2),
10694              Pop_Mem_FPR(dst) );
10695  ins_pipe( fpu_mem_reg_reg );
10696%}
10697//
10698// This instruction does not round to 24-bits
10699instruct divFPR_reg(regFPR dst, regFPR src) %{
10700  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10701  match(Set dst (DivF dst src));
10702
10703  format %{ "FDIV   $dst,$src" %}
10704  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10705  ins_encode( Push_Reg_FPR(src),
10706              OpcP, RegOpc(dst) );
10707  ins_pipe( fpu_reg_reg );
10708%}
10709
10710
10711// Spill to obtain 24-bit precision
10712instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10713  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10714  match(Set dst (ModF src1 src2));
10715  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10716
10717  format %{ "FMOD   $dst,$src1,$src2" %}
10718  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10719              emitModDPR(),
10720              Push_Result_Mod_DPR(src2),
10721              Pop_Mem_FPR(dst));
10722  ins_pipe( pipe_slow );
10723%}
10724//
10725// This instruction does not round to 24-bits
10726instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10727  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10728  match(Set dst (ModF dst src));
10729  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10730
10731  format %{ "FMOD   $dst,$src" %}
10732  ins_encode(Push_Reg_Mod_DPR(dst, src),
10733              emitModDPR(),
10734              Push_Result_Mod_DPR(src),
10735              Pop_Reg_FPR(dst));
10736  ins_pipe( pipe_slow );
10737%}
10738
10739instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10740  predicate(UseSSE>=1);
10741  match(Set dst (ModF src0 src1));
10742  effect(KILL rax, KILL cr);
10743  format %{ "SUB    ESP,4\t # FMOD\n"
10744          "\tMOVSS  [ESP+0],$src1\n"
10745          "\tFLD_S  [ESP+0]\n"
10746          "\tMOVSS  [ESP+0],$src0\n"
10747          "\tFLD_S  [ESP+0]\n"
10748     "loop:\tFPREM\n"
10749          "\tFWAIT\n"
10750          "\tFNSTSW AX\n"
10751          "\tSAHF\n"
10752          "\tJP     loop\n"
10753          "\tFSTP_S [ESP+0]\n"
10754          "\tMOVSS  $dst,[ESP+0]\n"
10755          "\tADD    ESP,4\n"
10756          "\tFSTP   ST0\t # Restore FPU Stack"
10757    %}
10758  ins_cost(250);
10759  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10760  ins_pipe( pipe_slow );
10761%}
10762
10763
10764//----------Arithmetic Conversion Instructions---------------------------------
10765// The conversions operations are all Alpha sorted.  Please keep it that way!
10766
10767instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10768  predicate(UseSSE==0);
10769  match(Set dst (RoundFloat src));
10770  ins_cost(125);
10771  format %{ "FST_S  $dst,$src\t# F-round" %}
10772  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10773  ins_pipe( fpu_mem_reg );
10774%}
10775
10776instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10777  predicate(UseSSE<=1);
10778  match(Set dst (RoundDouble src));
10779  ins_cost(125);
10780  format %{ "FST_D  $dst,$src\t# D-round" %}
10781  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10782  ins_pipe( fpu_mem_reg );
10783%}
10784
10785// Force rounding to 24-bit precision and 6-bit exponent
10786instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10787  predicate(UseSSE==0);
10788  match(Set dst (ConvD2F src));
10789  format %{ "FST_S  $dst,$src\t# F-round" %}
10790  expand %{
10791    roundFloat_mem_reg(dst,src);
10792  %}
10793%}
10794
10795// Force rounding to 24-bit precision and 6-bit exponent
10796instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10797  predicate(UseSSE==1);
10798  match(Set dst (ConvD2F src));
10799  effect( KILL cr );
10800  format %{ "SUB    ESP,4\n\t"
10801            "FST_S  [ESP],$src\t# F-round\n\t"
10802            "MOVSS  $dst,[ESP]\n\t"
10803            "ADD ESP,4" %}
10804  ins_encode %{
10805    __ subptr(rsp, 4);
10806    if ($src$$reg != FPR1L_enc) {
10807      __ fld_s($src$$reg-1);
10808      __ fstp_s(Address(rsp, 0));
10809    } else {
10810      __ fst_s(Address(rsp, 0));
10811    }
10812    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10813    __ addptr(rsp, 4);
10814  %}
10815  ins_pipe( pipe_slow );
10816%}
10817
10818// Force rounding double precision to single precision
10819instruct convD2F_reg(regF dst, regD src) %{
10820  predicate(UseSSE>=2);
10821  match(Set dst (ConvD2F src));
10822  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10823  ins_encode %{
10824    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10825  %}
10826  ins_pipe( pipe_slow );
10827%}
10828
10829instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10830  predicate(UseSSE==0);
10831  match(Set dst (ConvF2D src));
10832  format %{ "FST_S  $dst,$src\t# D-round" %}
10833  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10834  ins_pipe( fpu_reg_reg );
10835%}
10836
10837instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10838  predicate(UseSSE==1);
10839  match(Set dst (ConvF2D src));
10840  format %{ "FST_D  $dst,$src\t# D-round" %}
10841  expand %{
10842    roundDouble_mem_reg(dst,src);
10843  %}
10844%}
10845
10846instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10847  predicate(UseSSE==1);
10848  match(Set dst (ConvF2D src));
10849  effect( KILL cr );
10850  format %{ "SUB    ESP,4\n\t"
10851            "MOVSS  [ESP] $src\n\t"
10852            "FLD_S  [ESP]\n\t"
10853            "ADD    ESP,4\n\t"
10854            "FSTP   $dst\t# D-round" %}
10855  ins_encode %{
10856    __ subptr(rsp, 4);
10857    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10858    __ fld_s(Address(rsp, 0));
10859    __ addptr(rsp, 4);
10860    __ fstp_d($dst$$reg);
10861  %}
10862  ins_pipe( pipe_slow );
10863%}
10864
10865instruct convF2D_reg(regD dst, regF src) %{
10866  predicate(UseSSE>=2);
10867  match(Set dst (ConvF2D src));
10868  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10869  ins_encode %{
10870    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10871  %}
10872  ins_pipe( pipe_slow );
10873%}
10874
10875// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10876instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10877  predicate(UseSSE<=1);
10878  match(Set dst (ConvD2I src));
10879  effect( KILL tmp, KILL cr );
10880  format %{ "FLD    $src\t# Convert double to int \n\t"
10881            "FLDCW  trunc mode\n\t"
10882            "SUB    ESP,4\n\t"
10883            "FISTp  [ESP + #0]\n\t"
10884            "FLDCW  std/24-bit mode\n\t"
10885            "POP    EAX\n\t"
10886            "CMP    EAX,0x80000000\n\t"
10887            "JNE,s  fast\n\t"
10888            "FLD_D  $src\n\t"
10889            "CALL   d2i_wrapper\n"
10890      "fast:" %}
10891  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10892  ins_pipe( pipe_slow );
10893%}
10894
10895// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10896instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10897  predicate(UseSSE>=2);
10898  match(Set dst (ConvD2I src));
10899  effect( KILL tmp, KILL cr );
10900  format %{ "CVTTSD2SI $dst, $src\n\t"
10901            "CMP    $dst,0x80000000\n\t"
10902            "JNE,s  fast\n\t"
10903            "SUB    ESP, 8\n\t"
10904            "MOVSD  [ESP], $src\n\t"
10905            "FLD_D  [ESP]\n\t"
10906            "ADD    ESP, 8\n\t"
10907            "CALL   d2i_wrapper\n"
10908      "fast:" %}
10909  ins_encode %{
10910    Label fast;
10911    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10912    __ cmpl($dst$$Register, 0x80000000);
10913    __ jccb(Assembler::notEqual, fast);
10914    __ subptr(rsp, 8);
10915    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10916    __ fld_d(Address(rsp, 0));
10917    __ addptr(rsp, 8);
10918    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10919    __ bind(fast);
10920  %}
10921  ins_pipe( pipe_slow );
10922%}
10923
10924instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10925  predicate(UseSSE<=1);
10926  match(Set dst (ConvD2L src));
10927  effect( KILL cr );
10928  format %{ "FLD    $src\t# Convert double to long\n\t"
10929            "FLDCW  trunc mode\n\t"
10930            "SUB    ESP,8\n\t"
10931            "FISTp  [ESP + #0]\n\t"
10932            "FLDCW  std/24-bit mode\n\t"
10933            "POP    EAX\n\t"
10934            "POP    EDX\n\t"
10935            "CMP    EDX,0x80000000\n\t"
10936            "JNE,s  fast\n\t"
10937            "TEST   EAX,EAX\n\t"
10938            "JNE,s  fast\n\t"
10939            "FLD    $src\n\t"
10940            "CALL   d2l_wrapper\n"
10941      "fast:" %}
10942  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10943  ins_pipe( pipe_slow );
10944%}
10945
10946// XMM lacks a float/double->long conversion, so use the old FPU stack.
10947instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10948  predicate (UseSSE>=2);
10949  match(Set dst (ConvD2L src));
10950  effect( KILL cr );
10951  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10952            "MOVSD  [ESP],$src\n\t"
10953            "FLD_D  [ESP]\n\t"
10954            "FLDCW  trunc mode\n\t"
10955            "FISTp  [ESP + #0]\n\t"
10956            "FLDCW  std/24-bit mode\n\t"
10957            "POP    EAX\n\t"
10958            "POP    EDX\n\t"
10959            "CMP    EDX,0x80000000\n\t"
10960            "JNE,s  fast\n\t"
10961            "TEST   EAX,EAX\n\t"
10962            "JNE,s  fast\n\t"
10963            "SUB    ESP,8\n\t"
10964            "MOVSD  [ESP],$src\n\t"
10965            "FLD_D  [ESP]\n\t"
10966            "ADD    ESP,8\n\t"
10967            "CALL   d2l_wrapper\n"
10968      "fast:" %}
10969  ins_encode %{
10970    Label fast;
10971    __ subptr(rsp, 8);
10972    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10973    __ fld_d(Address(rsp, 0));
10974    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10975    __ fistp_d(Address(rsp, 0));
10976    // Restore the rounding mode, mask the exception
10977    if (Compile::current()->in_24_bit_fp_mode()) {
10978      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10979    } else {
10980      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10981    }
10982    // Load the converted long, adjust CPU stack
10983    __ pop(rax);
10984    __ pop(rdx);
10985    __ cmpl(rdx, 0x80000000);
10986    __ jccb(Assembler::notEqual, fast);
10987    __ testl(rax, rax);
10988    __ jccb(Assembler::notEqual, fast);
10989    __ subptr(rsp, 8);
10990    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10991    __ fld_d(Address(rsp, 0));
10992    __ addptr(rsp, 8);
10993    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10994    __ bind(fast);
10995  %}
10996  ins_pipe( pipe_slow );
10997%}
10998
10999// Convert a double to an int.  Java semantics require we do complex
11000// manglations in the corner cases.  So we set the rounding mode to
11001// 'zero', store the darned double down as an int, and reset the
11002// rounding mode to 'nearest'.  The hardware stores a flag value down
11003// if we would overflow or converted a NAN; we check for this and
11004// and go the slow path if needed.
11005instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11006  predicate(UseSSE==0);
11007  match(Set dst (ConvF2I src));
11008  effect( KILL tmp, KILL cr );
11009  format %{ "FLD    $src\t# Convert float to int \n\t"
11010            "FLDCW  trunc mode\n\t"
11011            "SUB    ESP,4\n\t"
11012            "FISTp  [ESP + #0]\n\t"
11013            "FLDCW  std/24-bit mode\n\t"
11014            "POP    EAX\n\t"
11015            "CMP    EAX,0x80000000\n\t"
11016            "JNE,s  fast\n\t"
11017            "FLD    $src\n\t"
11018            "CALL   d2i_wrapper\n"
11019      "fast:" %}
11020  // DPR2I_encoding works for FPR2I
11021  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11022  ins_pipe( pipe_slow );
11023%}
11024
11025// Convert a float in xmm to an int reg.
11026instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11027  predicate(UseSSE>=1);
11028  match(Set dst (ConvF2I src));
11029  effect( KILL tmp, KILL cr );
11030  format %{ "CVTTSS2SI $dst, $src\n\t"
11031            "CMP    $dst,0x80000000\n\t"
11032            "JNE,s  fast\n\t"
11033            "SUB    ESP, 4\n\t"
11034            "MOVSS  [ESP], $src\n\t"
11035            "FLD    [ESP]\n\t"
11036            "ADD    ESP, 4\n\t"
11037            "CALL   d2i_wrapper\n"
11038      "fast:" %}
11039  ins_encode %{
11040    Label fast;
11041    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11042    __ cmpl($dst$$Register, 0x80000000);
11043    __ jccb(Assembler::notEqual, fast);
11044    __ subptr(rsp, 4);
11045    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11046    __ fld_s(Address(rsp, 0));
11047    __ addptr(rsp, 4);
11048    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11049    __ bind(fast);
11050  %}
11051  ins_pipe( pipe_slow );
11052%}
11053
11054instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11055  predicate(UseSSE==0);
11056  match(Set dst (ConvF2L src));
11057  effect( KILL cr );
11058  format %{ "FLD    $src\t# Convert float to long\n\t"
11059            "FLDCW  trunc mode\n\t"
11060            "SUB    ESP,8\n\t"
11061            "FISTp  [ESP + #0]\n\t"
11062            "FLDCW  std/24-bit mode\n\t"
11063            "POP    EAX\n\t"
11064            "POP    EDX\n\t"
11065            "CMP    EDX,0x80000000\n\t"
11066            "JNE,s  fast\n\t"
11067            "TEST   EAX,EAX\n\t"
11068            "JNE,s  fast\n\t"
11069            "FLD    $src\n\t"
11070            "CALL   d2l_wrapper\n"
11071      "fast:" %}
11072  // DPR2L_encoding works for FPR2L
11073  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11074  ins_pipe( pipe_slow );
11075%}
11076
11077// XMM lacks a float/double->long conversion, so use the old FPU stack.
11078instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11079  predicate (UseSSE>=1);
11080  match(Set dst (ConvF2L src));
11081  effect( KILL cr );
11082  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11083            "MOVSS  [ESP],$src\n\t"
11084            "FLD_S  [ESP]\n\t"
11085            "FLDCW  trunc mode\n\t"
11086            "FISTp  [ESP + #0]\n\t"
11087            "FLDCW  std/24-bit mode\n\t"
11088            "POP    EAX\n\t"
11089            "POP    EDX\n\t"
11090            "CMP    EDX,0x80000000\n\t"
11091            "JNE,s  fast\n\t"
11092            "TEST   EAX,EAX\n\t"
11093            "JNE,s  fast\n\t"
11094            "SUB    ESP,4\t# Convert float to long\n\t"
11095            "MOVSS  [ESP],$src\n\t"
11096            "FLD_S  [ESP]\n\t"
11097            "ADD    ESP,4\n\t"
11098            "CALL   d2l_wrapper\n"
11099      "fast:" %}
11100  ins_encode %{
11101    Label fast;
11102    __ subptr(rsp, 8);
11103    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11104    __ fld_s(Address(rsp, 0));
11105    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11106    __ fistp_d(Address(rsp, 0));
11107    // Restore the rounding mode, mask the exception
11108    if (Compile::current()->in_24_bit_fp_mode()) {
11109      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11110    } else {
11111      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11112    }
11113    // Load the converted long, adjust CPU stack
11114    __ pop(rax);
11115    __ pop(rdx);
11116    __ cmpl(rdx, 0x80000000);
11117    __ jccb(Assembler::notEqual, fast);
11118    __ testl(rax, rax);
11119    __ jccb(Assembler::notEqual, fast);
11120    __ subptr(rsp, 4);
11121    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11122    __ fld_s(Address(rsp, 0));
11123    __ addptr(rsp, 4);
11124    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11125    __ bind(fast);
11126  %}
11127  ins_pipe( pipe_slow );
11128%}
11129
11130instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11131  predicate( UseSSE<=1 );
11132  match(Set dst (ConvI2D src));
11133  format %{ "FILD   $src\n\t"
11134            "FSTP   $dst" %}
11135  opcode(0xDB, 0x0);  /* DB /0 */
11136  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11137  ins_pipe( fpu_reg_mem );
11138%}
11139
11140instruct convI2D_reg(regD dst, rRegI src) %{
11141  predicate( UseSSE>=2 && !UseXmmI2D );
11142  match(Set dst (ConvI2D src));
11143  format %{ "CVTSI2SD $dst,$src" %}
11144  ins_encode %{
11145    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11146  %}
11147  ins_pipe( pipe_slow );
11148%}
11149
11150instruct convI2D_mem(regD dst, memory mem) %{
11151  predicate( UseSSE>=2 );
11152  match(Set dst (ConvI2D (LoadI mem)));
11153  format %{ "CVTSI2SD $dst,$mem" %}
11154  ins_encode %{
11155    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11156  %}
11157  ins_pipe( pipe_slow );
11158%}
11159
11160instruct convXI2D_reg(regD dst, rRegI src)
11161%{
11162  predicate( UseSSE>=2 && UseXmmI2D );
11163  match(Set dst (ConvI2D src));
11164
11165  format %{ "MOVD  $dst,$src\n\t"
11166            "CVTDQ2PD $dst,$dst\t# i2d" %}
11167  ins_encode %{
11168    __ movdl($dst$$XMMRegister, $src$$Register);
11169    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11170  %}
11171  ins_pipe(pipe_slow); // XXX
11172%}
11173
11174instruct convI2DPR_mem(regDPR dst, memory mem) %{
11175  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11176  match(Set dst (ConvI2D (LoadI mem)));
11177  format %{ "FILD   $mem\n\t"
11178            "FSTP   $dst" %}
11179  opcode(0xDB);      /* DB /0 */
11180  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11181              Pop_Reg_DPR(dst));
11182  ins_pipe( fpu_reg_mem );
11183%}
11184
11185// Convert a byte to a float; no rounding step needed.
11186instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11187  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11188  match(Set dst (ConvI2F src));
11189  format %{ "FILD   $src\n\t"
11190            "FSTP   $dst" %}
11191
11192  opcode(0xDB, 0x0);  /* DB /0 */
11193  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11194  ins_pipe( fpu_reg_mem );
11195%}
11196
11197// In 24-bit mode, force exponent rounding by storing back out
11198instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11199  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11200  match(Set dst (ConvI2F src));
11201  ins_cost(200);
11202  format %{ "FILD   $src\n\t"
11203            "FSTP_S $dst" %}
11204  opcode(0xDB, 0x0);  /* DB /0 */
11205  ins_encode( Push_Mem_I(src),
11206              Pop_Mem_FPR(dst));
11207  ins_pipe( fpu_mem_mem );
11208%}
11209
11210// In 24-bit mode, force exponent rounding by storing back out
11211instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11212  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11213  match(Set dst (ConvI2F (LoadI mem)));
11214  ins_cost(200);
11215  format %{ "FILD   $mem\n\t"
11216            "FSTP_S $dst" %}
11217  opcode(0xDB);  /* DB /0 */
11218  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11219              Pop_Mem_FPR(dst));
11220  ins_pipe( fpu_mem_mem );
11221%}
11222
11223// This instruction does not round to 24-bits
11224instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11225  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11226  match(Set dst (ConvI2F src));
11227  format %{ "FILD   $src\n\t"
11228            "FSTP   $dst" %}
11229  opcode(0xDB, 0x0);  /* DB /0 */
11230  ins_encode( Push_Mem_I(src),
11231              Pop_Reg_FPR(dst));
11232  ins_pipe( fpu_reg_mem );
11233%}
11234
11235// This instruction does not round to 24-bits
11236instruct convI2FPR_mem(regFPR dst, memory mem) %{
11237  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11238  match(Set dst (ConvI2F (LoadI mem)));
11239  format %{ "FILD   $mem\n\t"
11240            "FSTP   $dst" %}
11241  opcode(0xDB);      /* DB /0 */
11242  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11243              Pop_Reg_FPR(dst));
11244  ins_pipe( fpu_reg_mem );
11245%}
11246
11247// Convert an int to a float in xmm; no rounding step needed.
11248instruct convI2F_reg(regF dst, rRegI src) %{
11249  predicate( UseSSE==1 || (UseSSE>=2 && !UseXmmI2F) );
11250  match(Set dst (ConvI2F src));
11251  format %{ "CVTSI2SS $dst, $src" %}
11252  ins_encode %{
11253    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11254  %}
11255  ins_pipe( pipe_slow );
11256%}
11257
11258 instruct convXI2F_reg(regF dst, rRegI src)
11259%{
11260  predicate( UseSSE>=2 && UseXmmI2F );
11261  match(Set dst (ConvI2F src));
11262
11263  format %{ "MOVD  $dst,$src\n\t"
11264            "CVTDQ2PS $dst,$dst\t# i2f" %}
11265  ins_encode %{
11266    __ movdl($dst$$XMMRegister, $src$$Register);
11267    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11268  %}
11269  ins_pipe(pipe_slow); // XXX
11270%}
11271
11272instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11273  match(Set dst (ConvI2L src));
11274  effect(KILL cr);
11275  ins_cost(375);
11276  format %{ "MOV    $dst.lo,$src\n\t"
11277            "MOV    $dst.hi,$src\n\t"
11278            "SAR    $dst.hi,31" %}
11279  ins_encode(convert_int_long(dst,src));
11280  ins_pipe( ialu_reg_reg_long );
11281%}
11282
11283// Zero-extend convert int to long
11284instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11285  match(Set dst (AndL (ConvI2L src) mask) );
11286  effect( KILL flags );
11287  ins_cost(250);
11288  format %{ "MOV    $dst.lo,$src\n\t"
11289            "XOR    $dst.hi,$dst.hi" %}
11290  opcode(0x33); // XOR
11291  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11292  ins_pipe( ialu_reg_reg_long );
11293%}
11294
11295// Zero-extend long
11296instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11297  match(Set dst (AndL src mask) );
11298  effect( KILL flags );
11299  ins_cost(250);
11300  format %{ "MOV    $dst.lo,$src.lo\n\t"
11301            "XOR    $dst.hi,$dst.hi\n\t" %}
11302  opcode(0x33); // XOR
11303  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11304  ins_pipe( ialu_reg_reg_long );
11305%}
11306
11307instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11308  predicate (UseSSE<=1);
11309  match(Set dst (ConvL2D src));
11310  effect( KILL cr );
11311  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11312            "PUSH   $src.lo\n\t"
11313            "FILD   ST,[ESP + #0]\n\t"
11314            "ADD    ESP,8\n\t"
11315            "FSTP_D $dst\t# D-round" %}
11316  opcode(0xDF, 0x5);  /* DF /5 */
11317  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11318  ins_pipe( pipe_slow );
11319%}
11320
11321instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11322  predicate (UseSSE>=2);
11323  match(Set dst (ConvL2D src));
11324  effect( KILL cr );
11325  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11326            "PUSH   $src.lo\n\t"
11327            "FILD_D [ESP]\n\t"
11328            "FSTP_D [ESP]\n\t"
11329            "MOVSD  $dst,[ESP]\n\t"
11330            "ADD    ESP,8" %}
11331  opcode(0xDF, 0x5);  /* DF /5 */
11332  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11333  ins_pipe( pipe_slow );
11334%}
11335
11336instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11337  predicate (UseSSE>=1);
11338  match(Set dst (ConvL2F src));
11339  effect( KILL cr );
11340  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11341            "PUSH   $src.lo\n\t"
11342            "FILD_D [ESP]\n\t"
11343            "FSTP_S [ESP]\n\t"
11344            "MOVSS  $dst,[ESP]\n\t"
11345            "ADD    ESP,8" %}
11346  opcode(0xDF, 0x5);  /* DF /5 */
11347  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11348  ins_pipe( pipe_slow );
11349%}
11350
11351instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11352  match(Set dst (ConvL2F src));
11353  effect( KILL cr );
11354  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11355            "PUSH   $src.lo\n\t"
11356            "FILD   ST,[ESP + #0]\n\t"
11357            "ADD    ESP,8\n\t"
11358            "FSTP_S $dst\t# F-round" %}
11359  opcode(0xDF, 0x5);  /* DF /5 */
11360  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11361  ins_pipe( pipe_slow );
11362%}
11363
11364instruct convL2I_reg( rRegI dst, eRegL src ) %{
11365  match(Set dst (ConvL2I src));
11366  effect( DEF dst, USE src );
11367  format %{ "MOV    $dst,$src.lo" %}
11368  ins_encode(enc_CopyL_Lo(dst,src));
11369  ins_pipe( ialu_reg_reg );
11370%}
11371
11372instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11373  match(Set dst (MoveF2I src));
11374  effect( DEF dst, USE src );
11375  ins_cost(100);
11376  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11377  ins_encode %{
11378    __ movl($dst$$Register, Address(rsp, $src$$disp));
11379  %}
11380  ins_pipe( ialu_reg_mem );
11381%}
11382
11383instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11384  predicate(UseSSE==0);
11385  match(Set dst (MoveF2I src));
11386  effect( DEF dst, USE src );
11387
11388  ins_cost(125);
11389  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11390  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11391  ins_pipe( fpu_mem_reg );
11392%}
11393
11394instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11395  predicate(UseSSE>=1);
11396  match(Set dst (MoveF2I src));
11397  effect( DEF dst, USE src );
11398
11399  ins_cost(95);
11400  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11401  ins_encode %{
11402    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11403  %}
11404  ins_pipe( pipe_slow );
11405%}
11406
11407instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11408  predicate(UseSSE>=2);
11409  match(Set dst (MoveF2I src));
11410  effect( DEF dst, USE src );
11411  ins_cost(85);
11412  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11413  ins_encode %{
11414    __ movdl($dst$$Register, $src$$XMMRegister);
11415  %}
11416  ins_pipe( pipe_slow );
11417%}
11418
11419instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11420  match(Set dst (MoveI2F src));
11421  effect( DEF dst, USE src );
11422
11423  ins_cost(100);
11424  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11425  ins_encode %{
11426    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11427  %}
11428  ins_pipe( ialu_mem_reg );
11429%}
11430
11431
11432instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11433  predicate(UseSSE==0);
11434  match(Set dst (MoveI2F src));
11435  effect(DEF dst, USE src);
11436
11437  ins_cost(125);
11438  format %{ "FLD_S  $src\n\t"
11439            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11440  opcode(0xD9);               /* D9 /0, FLD m32real */
11441  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11442              Pop_Reg_FPR(dst) );
11443  ins_pipe( fpu_reg_mem );
11444%}
11445
11446instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11447  predicate(UseSSE>=1);
11448  match(Set dst (MoveI2F src));
11449  effect( DEF dst, USE src );
11450
11451  ins_cost(95);
11452  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11453  ins_encode %{
11454    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11455  %}
11456  ins_pipe( pipe_slow );
11457%}
11458
11459instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11460  predicate(UseSSE>=2);
11461  match(Set dst (MoveI2F src));
11462  effect( DEF dst, USE src );
11463
11464  ins_cost(85);
11465  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11466  ins_encode %{
11467    __ movdl($dst$$XMMRegister, $src$$Register);
11468  %}
11469  ins_pipe( pipe_slow );
11470%}
11471
11472instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11473  match(Set dst (MoveD2L src));
11474  effect(DEF dst, USE src);
11475
11476  ins_cost(250);
11477  format %{ "MOV    $dst.lo,$src\n\t"
11478            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11479  opcode(0x8B, 0x8B);
11480  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11481  ins_pipe( ialu_mem_long_reg );
11482%}
11483
11484instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11485  predicate(UseSSE<=1);
11486  match(Set dst (MoveD2L src));
11487  effect(DEF dst, USE src);
11488
11489  ins_cost(125);
11490  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11491  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11492  ins_pipe( fpu_mem_reg );
11493%}
11494
11495instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11496  predicate(UseSSE>=2);
11497  match(Set dst (MoveD2L src));
11498  effect(DEF dst, USE src);
11499  ins_cost(95);
11500  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11501  ins_encode %{
11502    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11503  %}
11504  ins_pipe( pipe_slow );
11505%}
11506
11507instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11508  predicate(UseSSE>=2);
11509  match(Set dst (MoveD2L src));
11510  effect(DEF dst, USE src, TEMP tmp);
11511  ins_cost(85);
11512  format %{ "MOVD   $dst.lo,$src\n\t"
11513            "PSHUFLW $tmp,$src,0x4E\n\t"
11514            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11515  ins_encode %{
11516    __ movdl($dst$$Register, $src$$XMMRegister);
11517    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11518    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11519  %}
11520  ins_pipe( pipe_slow );
11521%}
11522
11523instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11524  match(Set dst (MoveL2D src));
11525  effect(DEF dst, USE src);
11526
11527  ins_cost(200);
11528  format %{ "MOV    $dst,$src.lo\n\t"
11529            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11530  opcode(0x89, 0x89);
11531  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11532  ins_pipe( ialu_mem_long_reg );
11533%}
11534
11535
11536instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11537  predicate(UseSSE<=1);
11538  match(Set dst (MoveL2D src));
11539  effect(DEF dst, USE src);
11540  ins_cost(125);
11541
11542  format %{ "FLD_D  $src\n\t"
11543            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11544  opcode(0xDD);               /* DD /0, FLD m64real */
11545  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11546              Pop_Reg_DPR(dst) );
11547  ins_pipe( fpu_reg_mem );
11548%}
11549
11550
11551instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11552  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11553  match(Set dst (MoveL2D src));
11554  effect(DEF dst, USE src);
11555
11556  ins_cost(95);
11557  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11558  ins_encode %{
11559    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11560  %}
11561  ins_pipe( pipe_slow );
11562%}
11563
11564instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11565  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11566  match(Set dst (MoveL2D src));
11567  effect(DEF dst, USE src);
11568
11569  ins_cost(95);
11570  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11571  ins_encode %{
11572    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11573  %}
11574  ins_pipe( pipe_slow );
11575%}
11576
11577instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11578  predicate(UseSSE>=2);
11579  match(Set dst (MoveL2D src));
11580  effect(TEMP dst, USE src, TEMP tmp);
11581  ins_cost(85);
11582  format %{ "MOVD   $dst,$src.lo\n\t"
11583            "MOVD   $tmp,$src.hi\n\t"
11584            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11585  ins_encode %{
11586    __ movdl($dst$$XMMRegister, $src$$Register);
11587    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11588    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11589  %}
11590  ins_pipe( pipe_slow );
11591%}
11592
11593
11594// =======================================================================
11595// fast clearing of an array
11596instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11597  predicate(!((ClearArrayNode*)n)->is_large());
11598  match(Set dummy (ClearArray cnt base));
11599  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11600
11601  format %{ $$template
11602    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11603    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11604    $$emit$$"JG     LARGE\n\t"
11605    $$emit$$"SHL    ECX, 1\n\t"
11606    $$emit$$"DEC    ECX\n\t"
11607    $$emit$$"JS     DONE\t# Zero length\n\t"
11608    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11609    $$emit$$"DEC    ECX\n\t"
11610    $$emit$$"JGE    LOOP\n\t"
11611    $$emit$$"JMP    DONE\n\t"
11612    $$emit$$"# LARGE:\n\t"
11613    if (UseFastStosb) {
11614       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11615       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11616    } else if (UseXMMForObjInit) {
11617       $$emit$$"MOV     RDI,RAX\n\t"
11618       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11619       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11620       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11621       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11622       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11623       $$emit$$"ADD     0x40,RAX\n\t"
11624       $$emit$$"# L_zero_64_bytes:\n\t"
11625       $$emit$$"SUB     0x8,RCX\n\t"
11626       $$emit$$"JGE     L_loop\n\t"
11627       $$emit$$"ADD     0x4,RCX\n\t"
11628       $$emit$$"JL      L_tail\n\t"
11629       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630       $$emit$$"ADD     0x20,RAX\n\t"
11631       $$emit$$"SUB     0x4,RCX\n\t"
11632       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11633       $$emit$$"ADD     0x4,RCX\n\t"
11634       $$emit$$"JLE     L_end\n\t"
11635       $$emit$$"DEC     RCX\n\t"
11636       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11637       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11638       $$emit$$"ADD     0x8,RAX\n\t"
11639       $$emit$$"DEC     RCX\n\t"
11640       $$emit$$"JGE     L_sloop\n\t"
11641       $$emit$$"# L_end:\n\t"
11642    } else {
11643       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11644       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11645    }
11646    $$emit$$"# DONE"
11647  %}
11648  ins_encode %{
11649    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11650                 $tmp$$XMMRegister, false);
11651  %}
11652  ins_pipe( pipe_slow );
11653%}
11654
11655instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11656  predicate(((ClearArrayNode*)n)->is_large());
11657  match(Set dummy (ClearArray cnt base));
11658  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11659  format %{ $$template
11660    if (UseFastStosb) {
11661       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11662       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11663       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11664    } else if (UseXMMForObjInit) {
11665       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11666       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11667       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11668       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11669       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11670       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11671       $$emit$$"ADD     0x40,RAX\n\t"
11672       $$emit$$"# L_zero_64_bytes:\n\t"
11673       $$emit$$"SUB     0x8,RCX\n\t"
11674       $$emit$$"JGE     L_loop\n\t"
11675       $$emit$$"ADD     0x4,RCX\n\t"
11676       $$emit$$"JL      L_tail\n\t"
11677       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11678       $$emit$$"ADD     0x20,RAX\n\t"
11679       $$emit$$"SUB     0x4,RCX\n\t"
11680       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11681       $$emit$$"ADD     0x4,RCX\n\t"
11682       $$emit$$"JLE     L_end\n\t"
11683       $$emit$$"DEC     RCX\n\t"
11684       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11685       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11686       $$emit$$"ADD     0x8,RAX\n\t"
11687       $$emit$$"DEC     RCX\n\t"
11688       $$emit$$"JGE     L_sloop\n\t"
11689       $$emit$$"# L_end:\n\t"
11690    } else {
11691       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11692       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11693       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11694    }
11695    $$emit$$"# DONE"
11696  %}
11697  ins_encode %{
11698    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11699                 $tmp$$XMMRegister, true);
11700  %}
11701  ins_pipe( pipe_slow );
11702%}
11703
11704instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11705                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11706  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11707  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11708  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11709
11710  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11711  ins_encode %{
11712    __ string_compare($str1$$Register, $str2$$Register,
11713                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11714                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11715  %}
11716  ins_pipe( pipe_slow );
11717%}
11718
11719instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11720                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11721  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11722  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11723  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11724
11725  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11726  ins_encode %{
11727    __ string_compare($str1$$Register, $str2$$Register,
11728                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11729                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11730  %}
11731  ins_pipe( pipe_slow );
11732%}
11733
11734instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11735                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11736  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11737  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11738  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11739
11740  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11741  ins_encode %{
11742    __ string_compare($str1$$Register, $str2$$Register,
11743                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11744                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11745  %}
11746  ins_pipe( pipe_slow );
11747%}
11748
11749instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11750                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11751  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11752  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11753  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11754
11755  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11756  ins_encode %{
11757    __ string_compare($str2$$Register, $str1$$Register,
11758                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11759                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11760  %}
11761  ins_pipe( pipe_slow );
11762%}
11763
11764// fast string equals
11765instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11766                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11767  match(Set result (StrEquals (Binary str1 str2) cnt));
11768  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11769
11770  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11771  ins_encode %{
11772    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11773                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11774                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11775  %}
11776
11777  ins_pipe( pipe_slow );
11778%}
11779
11780// fast search of substring with known size.
11781instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11782                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11783  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11784  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11785  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11786
11787  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11788  ins_encode %{
11789    int icnt2 = (int)$int_cnt2$$constant;
11790    if (icnt2 >= 16) {
11791      // IndexOf for constant substrings with size >= 16 elements
11792      // which don't need to be loaded through stack.
11793      __ string_indexofC8($str1$$Register, $str2$$Register,
11794                          $cnt1$$Register, $cnt2$$Register,
11795                          icnt2, $result$$Register,
11796                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11797    } else {
11798      // Small strings are loaded through stack if they cross page boundary.
11799      __ string_indexof($str1$$Register, $str2$$Register,
11800                        $cnt1$$Register, $cnt2$$Register,
11801                        icnt2, $result$$Register,
11802                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11803    }
11804  %}
11805  ins_pipe( pipe_slow );
11806%}
11807
11808// fast search of substring with known size.
11809instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11810                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11811  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11812  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11813  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11814
11815  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11816  ins_encode %{
11817    int icnt2 = (int)$int_cnt2$$constant;
11818    if (icnt2 >= 8) {
11819      // IndexOf for constant substrings with size >= 8 elements
11820      // which don't need to be loaded through stack.
11821      __ string_indexofC8($str1$$Register, $str2$$Register,
11822                          $cnt1$$Register, $cnt2$$Register,
11823                          icnt2, $result$$Register,
11824                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11825    } else {
11826      // Small strings are loaded through stack if they cross page boundary.
11827      __ string_indexof($str1$$Register, $str2$$Register,
11828                        $cnt1$$Register, $cnt2$$Register,
11829                        icnt2, $result$$Register,
11830                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11831    }
11832  %}
11833  ins_pipe( pipe_slow );
11834%}
11835
11836// fast search of substring with known size.
11837instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11838                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11839  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11840  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11841  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11842
11843  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11844  ins_encode %{
11845    int icnt2 = (int)$int_cnt2$$constant;
11846    if (icnt2 >= 8) {
11847      // IndexOf for constant substrings with size >= 8 elements
11848      // which don't need to be loaded through stack.
11849      __ string_indexofC8($str1$$Register, $str2$$Register,
11850                          $cnt1$$Register, $cnt2$$Register,
11851                          icnt2, $result$$Register,
11852                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11853    } else {
11854      // Small strings are loaded through stack if they cross page boundary.
11855      __ string_indexof($str1$$Register, $str2$$Register,
11856                        $cnt1$$Register, $cnt2$$Register,
11857                        icnt2, $result$$Register,
11858                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11859    }
11860  %}
11861  ins_pipe( pipe_slow );
11862%}
11863
11864instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11865                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11866  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11867  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11868  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11869
11870  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11871  ins_encode %{
11872    __ string_indexof($str1$$Register, $str2$$Register,
11873                      $cnt1$$Register, $cnt2$$Register,
11874                      (-1), $result$$Register,
11875                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11876  %}
11877  ins_pipe( pipe_slow );
11878%}
11879
11880instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11881                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11882  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11883  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11884  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11885
11886  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11887  ins_encode %{
11888    __ string_indexof($str1$$Register, $str2$$Register,
11889                      $cnt1$$Register, $cnt2$$Register,
11890                      (-1), $result$$Register,
11891                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11892  %}
11893  ins_pipe( pipe_slow );
11894%}
11895
11896instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11897                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11898  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11899  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11900  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11901
11902  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11903  ins_encode %{
11904    __ string_indexof($str1$$Register, $str2$$Register,
11905                      $cnt1$$Register, $cnt2$$Register,
11906                      (-1), $result$$Register,
11907                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11908  %}
11909  ins_pipe( pipe_slow );
11910%}
11911
11912instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11913                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11914  predicate(UseSSE42Intrinsics);
11915  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11916  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11917  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11918  ins_encode %{
11919    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11920                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11921  %}
11922  ins_pipe( pipe_slow );
11923%}
11924
11925// fast array equals
11926instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11927                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11928%{
11929  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11930  match(Set result (AryEq ary1 ary2));
11931  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11932  //ins_cost(300);
11933
11934  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11935  ins_encode %{
11936    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11937                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11938                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11939  %}
11940  ins_pipe( pipe_slow );
11941%}
11942
11943instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11944                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11945%{
11946  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11947  match(Set result (AryEq ary1 ary2));
11948  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11949  //ins_cost(300);
11950
11951  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11952  ins_encode %{
11953    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11954                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11955                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11956  %}
11957  ins_pipe( pipe_slow );
11958%}
11959
11960instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11961                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11962%{
11963  match(Set result (HasNegatives ary1 len));
11964  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11965
11966  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11967  ins_encode %{
11968    __ has_negatives($ary1$$Register, $len$$Register,
11969                     $result$$Register, $tmp3$$Register,
11970                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11971  %}
11972  ins_pipe( pipe_slow );
11973%}
11974
11975// fast char[] to byte[] compression
11976instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11977                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11978  match(Set result (StrCompressedCopy src (Binary dst len)));
11979  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11980
11981  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11982  ins_encode %{
11983    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11984                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11985                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11986  %}
11987  ins_pipe( pipe_slow );
11988%}
11989
11990// fast byte[] to char[] inflation
11991instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11992                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11993  match(Set dummy (StrInflatedCopy src (Binary dst len)));
11994  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11995
11996  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11997  ins_encode %{
11998    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11999                          $tmp1$$XMMRegister, $tmp2$$Register);
12000  %}
12001  ins_pipe( pipe_slow );
12002%}
12003
12004// encode char[] to byte[] in ISO_8859_1
12005instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12006                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12007                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12008  match(Set result (EncodeISOArray src (Binary dst len)));
12009  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12010
12011  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12012  ins_encode %{
12013    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12014                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12015                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12016  %}
12017  ins_pipe( pipe_slow );
12018%}
12019
12020
12021//----------Control Flow Instructions------------------------------------------
12022// Signed compare Instructions
12023instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12024  match(Set cr (CmpI op1 op2));
12025  effect( DEF cr, USE op1, USE op2 );
12026  format %{ "CMP    $op1,$op2" %}
12027  opcode(0x3B);  /* Opcode 3B /r */
12028  ins_encode( OpcP, RegReg( op1, op2) );
12029  ins_pipe( ialu_cr_reg_reg );
12030%}
12031
12032instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12033  match(Set cr (CmpI op1 op2));
12034  effect( DEF cr, USE op1 );
12035  format %{ "CMP    $op1,$op2" %}
12036  opcode(0x81,0x07);  /* Opcode 81 /7 */
12037  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12038  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12039  ins_pipe( ialu_cr_reg_imm );
12040%}
12041
12042// Cisc-spilled version of cmpI_eReg
12043instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12044  match(Set cr (CmpI op1 (LoadI op2)));
12045
12046  format %{ "CMP    $op1,$op2" %}
12047  ins_cost(500);
12048  opcode(0x3B);  /* Opcode 3B /r */
12049  ins_encode( OpcP, RegMem( op1, op2) );
12050  ins_pipe( ialu_cr_reg_mem );
12051%}
12052
12053instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12054  match(Set cr (CmpI src zero));
12055  effect( DEF cr, USE src );
12056
12057  format %{ "TEST   $src,$src" %}
12058  opcode(0x85);
12059  ins_encode( OpcP, RegReg( src, src ) );
12060  ins_pipe( ialu_cr_reg_imm );
12061%}
12062
12063instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12064  match(Set cr (CmpI (AndI src con) zero));
12065
12066  format %{ "TEST   $src,$con" %}
12067  opcode(0xF7,0x00);
12068  ins_encode( OpcP, RegOpc(src), Con32(con) );
12069  ins_pipe( ialu_cr_reg_imm );
12070%}
12071
12072instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12073  match(Set cr (CmpI (AndI src mem) zero));
12074
12075  format %{ "TEST   $src,$mem" %}
12076  opcode(0x85);
12077  ins_encode( OpcP, RegMem( src, mem ) );
12078  ins_pipe( ialu_cr_reg_mem );
12079%}
12080
12081// Unsigned compare Instructions; really, same as signed except they
12082// produce an eFlagsRegU instead of eFlagsReg.
12083instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12084  match(Set cr (CmpU op1 op2));
12085
12086  format %{ "CMPu   $op1,$op2" %}
12087  opcode(0x3B);  /* Opcode 3B /r */
12088  ins_encode( OpcP, RegReg( op1, op2) );
12089  ins_pipe( ialu_cr_reg_reg );
12090%}
12091
12092instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12093  match(Set cr (CmpU op1 op2));
12094
12095  format %{ "CMPu   $op1,$op2" %}
12096  opcode(0x81,0x07);  /* Opcode 81 /7 */
12097  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12098  ins_pipe( ialu_cr_reg_imm );
12099%}
12100
12101// // Cisc-spilled version of cmpU_eReg
12102instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12103  match(Set cr (CmpU op1 (LoadI op2)));
12104
12105  format %{ "CMPu   $op1,$op2" %}
12106  ins_cost(500);
12107  opcode(0x3B);  /* Opcode 3B /r */
12108  ins_encode( OpcP, RegMem( op1, op2) );
12109  ins_pipe( ialu_cr_reg_mem );
12110%}
12111
12112// // Cisc-spilled version of cmpU_eReg
12113//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12114//  match(Set cr (CmpU (LoadI op1) op2));
12115//
12116//  format %{ "CMPu   $op1,$op2" %}
12117//  ins_cost(500);
12118//  opcode(0x39);  /* Opcode 39 /r */
12119//  ins_encode( OpcP, RegMem( op1, op2) );
12120//%}
12121
12122instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12123  match(Set cr (CmpU src zero));
12124
12125  format %{ "TESTu  $src,$src" %}
12126  opcode(0x85);
12127  ins_encode( OpcP, RegReg( src, src ) );
12128  ins_pipe( ialu_cr_reg_imm );
12129%}
12130
12131// Unsigned pointer compare Instructions
12132instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12133  match(Set cr (CmpP op1 op2));
12134
12135  format %{ "CMPu   $op1,$op2" %}
12136  opcode(0x3B);  /* Opcode 3B /r */
12137  ins_encode( OpcP, RegReg( op1, op2) );
12138  ins_pipe( ialu_cr_reg_reg );
12139%}
12140
12141instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12142  match(Set cr (CmpP op1 op2));
12143
12144  format %{ "CMPu   $op1,$op2" %}
12145  opcode(0x81,0x07);  /* Opcode 81 /7 */
12146  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12147  ins_pipe( ialu_cr_reg_imm );
12148%}
12149
12150// // Cisc-spilled version of cmpP_eReg
12151instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12152  match(Set cr (CmpP op1 (LoadP op2)));
12153
12154  format %{ "CMPu   $op1,$op2" %}
12155  ins_cost(500);
12156  opcode(0x3B);  /* Opcode 3B /r */
12157  ins_encode( OpcP, RegMem( op1, op2) );
12158  ins_pipe( ialu_cr_reg_mem );
12159%}
12160
12161// // Cisc-spilled version of cmpP_eReg
12162//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12163//  match(Set cr (CmpP (LoadP op1) op2));
12164//
12165//  format %{ "CMPu   $op1,$op2" %}
12166//  ins_cost(500);
12167//  opcode(0x39);  /* Opcode 39 /r */
12168//  ins_encode( OpcP, RegMem( op1, op2) );
12169//%}
12170
12171// Compare raw pointer (used in out-of-heap check).
12172// Only works because non-oop pointers must be raw pointers
12173// and raw pointers have no anti-dependencies.
12174instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12175  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12176  match(Set cr (CmpP op1 (LoadP op2)));
12177
12178  format %{ "CMPu   $op1,$op2" %}
12179  opcode(0x3B);  /* Opcode 3B /r */
12180  ins_encode( OpcP, RegMem( op1, op2) );
12181  ins_pipe( ialu_cr_reg_mem );
12182%}
12183
12184//
12185// This will generate a signed flags result. This should be ok
12186// since any compare to a zero should be eq/neq.
12187instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12188  match(Set cr (CmpP src zero));
12189
12190  format %{ "TEST   $src,$src" %}
12191  opcode(0x85);
12192  ins_encode( OpcP, RegReg( src, src ) );
12193  ins_pipe( ialu_cr_reg_imm );
12194%}
12195
12196// Cisc-spilled version of testP_reg
12197// This will generate a signed flags result. This should be ok
12198// since any compare to a zero should be eq/neq.
12199instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12200  match(Set cr (CmpP (LoadP op) zero));
12201
12202  format %{ "TEST   $op,0xFFFFFFFF" %}
12203  ins_cost(500);
12204  opcode(0xF7);               /* Opcode F7 /0 */
12205  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12206  ins_pipe( ialu_cr_reg_imm );
12207%}
12208
12209// Yanked all unsigned pointer compare operations.
12210// Pointer compares are done with CmpP which is already unsigned.
12211
12212//----------Max and Min--------------------------------------------------------
12213// Min Instructions
12214////
12215//   *** Min and Max using the conditional move are slower than the
12216//   *** branch version on a Pentium III.
12217// // Conditional move for min
12218//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12219//  effect( USE_DEF op2, USE op1, USE cr );
12220//  format %{ "CMOVlt $op2,$op1\t! min" %}
12221//  opcode(0x4C,0x0F);
12222//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12223//  ins_pipe( pipe_cmov_reg );
12224//%}
12225//
12226//// Min Register with Register (P6 version)
12227//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12228//  predicate(VM_Version::supports_cmov() );
12229//  match(Set op2 (MinI op1 op2));
12230//  ins_cost(200);
12231//  expand %{
12232//    eFlagsReg cr;
12233//    compI_eReg(cr,op1,op2);
12234//    cmovI_reg_lt(op2,op1,cr);
12235//  %}
12236//%}
12237
12238// Min Register with Register (generic version)
12239instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12240  match(Set dst (MinI dst src));
12241  effect(KILL flags);
12242  ins_cost(300);
12243
12244  format %{ "MIN    $dst,$src" %}
12245  opcode(0xCC);
12246  ins_encode( min_enc(dst,src) );
12247  ins_pipe( pipe_slow );
12248%}
12249
12250// Max Register with Register
12251//   *** Min and Max using the conditional move are slower than the
12252//   *** branch version on a Pentium III.
12253// // Conditional move for max
12254//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12255//  effect( USE_DEF op2, USE op1, USE cr );
12256//  format %{ "CMOVgt $op2,$op1\t! max" %}
12257//  opcode(0x4F,0x0F);
12258//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12259//  ins_pipe( pipe_cmov_reg );
12260//%}
12261//
12262// // Max Register with Register (P6 version)
12263//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12264//  predicate(VM_Version::supports_cmov() );
12265//  match(Set op2 (MaxI op1 op2));
12266//  ins_cost(200);
12267//  expand %{
12268//    eFlagsReg cr;
12269//    compI_eReg(cr,op1,op2);
12270//    cmovI_reg_gt(op2,op1,cr);
12271//  %}
12272//%}
12273
12274// Max Register with Register (generic version)
12275instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12276  match(Set dst (MaxI dst src));
12277  effect(KILL flags);
12278  ins_cost(300);
12279
12280  format %{ "MAX    $dst,$src" %}
12281  opcode(0xCC);
12282  ins_encode( max_enc(dst,src) );
12283  ins_pipe( pipe_slow );
12284%}
12285
12286// ============================================================================
12287// Counted Loop limit node which represents exact final iterator value.
12288// Note: the resulting value should fit into integer range since
12289// counted loops have limit check on overflow.
12290instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12291  match(Set limit (LoopLimit (Binary init limit) stride));
12292  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12293  ins_cost(300);
12294
12295  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12296  ins_encode %{
12297    int strd = (int)$stride$$constant;
12298    assert(strd != 1 && strd != -1, "sanity");
12299    int m1 = (strd > 0) ? 1 : -1;
12300    // Convert limit to long (EAX:EDX)
12301    __ cdql();
12302    // Convert init to long (init:tmp)
12303    __ movl($tmp$$Register, $init$$Register);
12304    __ sarl($tmp$$Register, 31);
12305    // $limit - $init
12306    __ subl($limit$$Register, $init$$Register);
12307    __ sbbl($limit_hi$$Register, $tmp$$Register);
12308    // + ($stride - 1)
12309    if (strd > 0) {
12310      __ addl($limit$$Register, (strd - 1));
12311      __ adcl($limit_hi$$Register, 0);
12312      __ movl($tmp$$Register, strd);
12313    } else {
12314      __ addl($limit$$Register, (strd + 1));
12315      __ adcl($limit_hi$$Register, -1);
12316      __ lneg($limit_hi$$Register, $limit$$Register);
12317      __ movl($tmp$$Register, -strd);
12318    }
12319    // signed devision: (EAX:EDX) / pos_stride
12320    __ idivl($tmp$$Register);
12321    if (strd < 0) {
12322      // restore sign
12323      __ negl($tmp$$Register);
12324    }
12325    // (EAX) * stride
12326    __ mull($tmp$$Register);
12327    // + init (ignore upper bits)
12328    __ addl($limit$$Register, $init$$Register);
12329  %}
12330  ins_pipe( pipe_slow );
12331%}
12332
12333// ============================================================================
12334// Branch Instructions
12335// Jump Table
12336instruct jumpXtnd(rRegI switch_val) %{
12337  match(Jump switch_val);
12338  ins_cost(350);
12339  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12340  ins_encode %{
12341    // Jump to Address(table_base + switch_reg)
12342    Address index(noreg, $switch_val$$Register, Address::times_1);
12343    __ jump(ArrayAddress($constantaddress, index));
12344  %}
12345  ins_pipe(pipe_jmp);
12346%}
12347
12348// Jump Direct - Label defines a relative address from JMP+1
12349instruct jmpDir(label labl) %{
12350  match(Goto);
12351  effect(USE labl);
12352
12353  ins_cost(300);
12354  format %{ "JMP    $labl" %}
12355  size(5);
12356  ins_encode %{
12357    Label* L = $labl$$label;
12358    __ jmp(*L, false); // Always long jump
12359  %}
12360  ins_pipe( pipe_jmp );
12361%}
12362
12363// Jump Direct Conditional - Label defines a relative address from Jcc+1
12364instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12365  match(If cop cr);
12366  effect(USE labl);
12367
12368  ins_cost(300);
12369  format %{ "J$cop    $labl" %}
12370  size(6);
12371  ins_encode %{
12372    Label* L = $labl$$label;
12373    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12374  %}
12375  ins_pipe( pipe_jcc );
12376%}
12377
12378// Jump Direct Conditional - Label defines a relative address from Jcc+1
12379instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12380  predicate(!n->has_vector_mask_set());
12381  match(CountedLoopEnd cop cr);
12382  effect(USE labl);
12383
12384  ins_cost(300);
12385  format %{ "J$cop    $labl\t# Loop end" %}
12386  size(6);
12387  ins_encode %{
12388    Label* L = $labl$$label;
12389    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12390  %}
12391  ins_pipe( pipe_jcc );
12392%}
12393
12394// Jump Direct Conditional - Label defines a relative address from Jcc+1
12395instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12396  predicate(!n->has_vector_mask_set());
12397  match(CountedLoopEnd cop cmp);
12398  effect(USE labl);
12399
12400  ins_cost(300);
12401  format %{ "J$cop,u  $labl\t# Loop end" %}
12402  size(6);
12403  ins_encode %{
12404    Label* L = $labl$$label;
12405    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12406  %}
12407  ins_pipe( pipe_jcc );
12408%}
12409
12410instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12411  predicate(!n->has_vector_mask_set());
12412  match(CountedLoopEnd cop cmp);
12413  effect(USE labl);
12414
12415  ins_cost(200);
12416  format %{ "J$cop,u  $labl\t# Loop end" %}
12417  size(6);
12418  ins_encode %{
12419    Label* L = $labl$$label;
12420    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12421  %}
12422  ins_pipe( pipe_jcc );
12423%}
12424
12425// mask version
12426// Jump Direct Conditional - Label defines a relative address from Jcc+1
12427instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12428  predicate(n->has_vector_mask_set());
12429  match(CountedLoopEnd cop cr);
12430  effect(USE labl);
12431
12432  ins_cost(400);
12433  format %{ "J$cop    $labl\t# Loop end\n\t"
12434            "restorevectmask \t# vector mask restore for loops" %}
12435  size(10);
12436  ins_encode %{
12437    Label* L = $labl$$label;
12438    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12439    __ restorevectmask();
12440  %}
12441  ins_pipe( pipe_jcc );
12442%}
12443
12444// Jump Direct Conditional - Label defines a relative address from Jcc+1
12445instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12446  predicate(n->has_vector_mask_set());
12447  match(CountedLoopEnd cop cmp);
12448  effect(USE labl);
12449
12450  ins_cost(400);
12451  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12452            "restorevectmask \t# vector mask restore for loops" %}
12453  size(10);
12454  ins_encode %{
12455    Label* L = $labl$$label;
12456    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12457    __ restorevectmask();
12458  %}
12459  ins_pipe( pipe_jcc );
12460%}
12461
12462instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12463  predicate(n->has_vector_mask_set());
12464  match(CountedLoopEnd cop cmp);
12465  effect(USE labl);
12466
12467  ins_cost(300);
12468  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12469            "restorevectmask \t# vector mask restore for loops" %}
12470  size(10);
12471  ins_encode %{
12472    Label* L = $labl$$label;
12473    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12474    __ restorevectmask();
12475  %}
12476  ins_pipe( pipe_jcc );
12477%}
12478
12479// Jump Direct Conditional - using unsigned comparison
12480instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12481  match(If cop cmp);
12482  effect(USE labl);
12483
12484  ins_cost(300);
12485  format %{ "J$cop,u  $labl" %}
12486  size(6);
12487  ins_encode %{
12488    Label* L = $labl$$label;
12489    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12490  %}
12491  ins_pipe(pipe_jcc);
12492%}
12493
12494instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12495  match(If cop cmp);
12496  effect(USE labl);
12497
12498  ins_cost(200);
12499  format %{ "J$cop,u  $labl" %}
12500  size(6);
12501  ins_encode %{
12502    Label* L = $labl$$label;
12503    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12504  %}
12505  ins_pipe(pipe_jcc);
12506%}
12507
12508instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12509  match(If cop cmp);
12510  effect(USE labl);
12511
12512  ins_cost(200);
12513  format %{ $$template
12514    if ($cop$$cmpcode == Assembler::notEqual) {
12515      $$emit$$"JP,u   $labl\n\t"
12516      $$emit$$"J$cop,u   $labl"
12517    } else {
12518      $$emit$$"JP,u   done\n\t"
12519      $$emit$$"J$cop,u   $labl\n\t"
12520      $$emit$$"done:"
12521    }
12522  %}
12523  ins_encode %{
12524    Label* l = $labl$$label;
12525    if ($cop$$cmpcode == Assembler::notEqual) {
12526      __ jcc(Assembler::parity, *l, false);
12527      __ jcc(Assembler::notEqual, *l, false);
12528    } else if ($cop$$cmpcode == Assembler::equal) {
12529      Label done;
12530      __ jccb(Assembler::parity, done);
12531      __ jcc(Assembler::equal, *l, false);
12532      __ bind(done);
12533    } else {
12534       ShouldNotReachHere();
12535    }
12536  %}
12537  ins_pipe(pipe_jcc);
12538%}
12539
12540// ============================================================================
12541// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12542// array for an instance of the superklass.  Set a hidden internal cache on a
12543// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12544// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12545instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12546  match(Set result (PartialSubtypeCheck sub super));
12547  effect( KILL rcx, KILL cr );
12548
12549  ins_cost(1100);  // slightly larger than the next version
12550  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12551            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12552            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12553            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12554            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12555            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12556            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12557     "miss:\t" %}
12558
12559  opcode(0x1); // Force a XOR of EDI
12560  ins_encode( enc_PartialSubtypeCheck() );
12561  ins_pipe( pipe_slow );
12562%}
12563
12564instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12565  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12566  effect( KILL rcx, KILL result );
12567
12568  ins_cost(1000);
12569  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12570            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12571            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12572            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12573            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12574            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12575     "miss:\t" %}
12576
12577  opcode(0x0);  // No need to XOR EDI
12578  ins_encode( enc_PartialSubtypeCheck() );
12579  ins_pipe( pipe_slow );
12580%}
12581
12582// ============================================================================
12583// Branch Instructions -- short offset versions
12584//
12585// These instructions are used to replace jumps of a long offset (the default
12586// match) with jumps of a shorter offset.  These instructions are all tagged
12587// with the ins_short_branch attribute, which causes the ADLC to suppress the
12588// match rules in general matching.  Instead, the ADLC generates a conversion
12589// method in the MachNode which can be used to do in-place replacement of the
12590// long variant with the shorter variant.  The compiler will determine if a
12591// branch can be taken by the is_short_branch_offset() predicate in the machine
12592// specific code section of the file.
12593
12594// Jump Direct - Label defines a relative address from JMP+1
12595instruct jmpDir_short(label labl) %{
12596  match(Goto);
12597  effect(USE labl);
12598
12599  ins_cost(300);
12600  format %{ "JMP,s  $labl" %}
12601  size(2);
12602  ins_encode %{
12603    Label* L = $labl$$label;
12604    __ jmpb(*L);
12605  %}
12606  ins_pipe( pipe_jmp );
12607  ins_short_branch(1);
12608%}
12609
12610// Jump Direct Conditional - Label defines a relative address from Jcc+1
12611instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12612  match(If cop cr);
12613  effect(USE labl);
12614
12615  ins_cost(300);
12616  format %{ "J$cop,s  $labl" %}
12617  size(2);
12618  ins_encode %{
12619    Label* L = $labl$$label;
12620    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12621  %}
12622  ins_pipe( pipe_jcc );
12623  ins_short_branch(1);
12624%}
12625
12626// Jump Direct Conditional - Label defines a relative address from Jcc+1
12627instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12628  match(CountedLoopEnd cop cr);
12629  effect(USE labl);
12630
12631  ins_cost(300);
12632  format %{ "J$cop,s  $labl\t# Loop end" %}
12633  size(2);
12634  ins_encode %{
12635    Label* L = $labl$$label;
12636    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12637  %}
12638  ins_pipe( pipe_jcc );
12639  ins_short_branch(1);
12640%}
12641
12642// Jump Direct Conditional - Label defines a relative address from Jcc+1
12643instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12644  match(CountedLoopEnd cop cmp);
12645  effect(USE labl);
12646
12647  ins_cost(300);
12648  format %{ "J$cop,us $labl\t# Loop end" %}
12649  size(2);
12650  ins_encode %{
12651    Label* L = $labl$$label;
12652    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12653  %}
12654  ins_pipe( pipe_jcc );
12655  ins_short_branch(1);
12656%}
12657
12658instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12659  match(CountedLoopEnd cop cmp);
12660  effect(USE labl);
12661
12662  ins_cost(300);
12663  format %{ "J$cop,us $labl\t# Loop end" %}
12664  size(2);
12665  ins_encode %{
12666    Label* L = $labl$$label;
12667    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12668  %}
12669  ins_pipe( pipe_jcc );
12670  ins_short_branch(1);
12671%}
12672
12673// Jump Direct Conditional - using unsigned comparison
12674instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12675  match(If cop cmp);
12676  effect(USE labl);
12677
12678  ins_cost(300);
12679  format %{ "J$cop,us $labl" %}
12680  size(2);
12681  ins_encode %{
12682    Label* L = $labl$$label;
12683    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12684  %}
12685  ins_pipe( pipe_jcc );
12686  ins_short_branch(1);
12687%}
12688
12689instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12690  match(If cop cmp);
12691  effect(USE labl);
12692
12693  ins_cost(300);
12694  format %{ "J$cop,us $labl" %}
12695  size(2);
12696  ins_encode %{
12697    Label* L = $labl$$label;
12698    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12699  %}
12700  ins_pipe( pipe_jcc );
12701  ins_short_branch(1);
12702%}
12703
12704instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12705  match(If cop cmp);
12706  effect(USE labl);
12707
12708  ins_cost(300);
12709  format %{ $$template
12710    if ($cop$$cmpcode == Assembler::notEqual) {
12711      $$emit$$"JP,u,s   $labl\n\t"
12712      $$emit$$"J$cop,u,s   $labl"
12713    } else {
12714      $$emit$$"JP,u,s   done\n\t"
12715      $$emit$$"J$cop,u,s  $labl\n\t"
12716      $$emit$$"done:"
12717    }
12718  %}
12719  size(4);
12720  ins_encode %{
12721    Label* l = $labl$$label;
12722    if ($cop$$cmpcode == Assembler::notEqual) {
12723      __ jccb(Assembler::parity, *l);
12724      __ jccb(Assembler::notEqual, *l);
12725    } else if ($cop$$cmpcode == Assembler::equal) {
12726      Label done;
12727      __ jccb(Assembler::parity, done);
12728      __ jccb(Assembler::equal, *l);
12729      __ bind(done);
12730    } else {
12731       ShouldNotReachHere();
12732    }
12733  %}
12734  ins_pipe(pipe_jcc);
12735  ins_short_branch(1);
12736%}
12737
12738// ============================================================================
12739// Long Compare
12740//
12741// Currently we hold longs in 2 registers.  Comparing such values efficiently
12742// is tricky.  The flavor of compare used depends on whether we are testing
12743// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12744// The GE test is the negated LT test.  The LE test can be had by commuting
12745// the operands (yielding a GE test) and then negating; negate again for the
12746// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12747// NE test is negated from that.
12748
12749// Due to a shortcoming in the ADLC, it mixes up expressions like:
12750// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12751// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12752// are collapsed internally in the ADLC's dfa-gen code.  The match for
12753// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12754// foo match ends up with the wrong leaf.  One fix is to not match both
12755// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12756// both forms beat the trinary form of long-compare and both are very useful
12757// on Intel which has so few registers.
12758
12759// Manifest a CmpL result in an integer register.  Very painful.
12760// This is the test to avoid.
12761instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12762  match(Set dst (CmpL3 src1 src2));
12763  effect( KILL flags );
12764  ins_cost(1000);
12765  format %{ "XOR    $dst,$dst\n\t"
12766            "CMP    $src1.hi,$src2.hi\n\t"
12767            "JLT,s  m_one\n\t"
12768            "JGT,s  p_one\n\t"
12769            "CMP    $src1.lo,$src2.lo\n\t"
12770            "JB,s   m_one\n\t"
12771            "JEQ,s  done\n"
12772    "p_one:\tINC    $dst\n\t"
12773            "JMP,s  done\n"
12774    "m_one:\tDEC    $dst\n"
12775     "done:" %}
12776  ins_encode %{
12777    Label p_one, m_one, done;
12778    __ xorptr($dst$$Register, $dst$$Register);
12779    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12780    __ jccb(Assembler::less,    m_one);
12781    __ jccb(Assembler::greater, p_one);
12782    __ cmpl($src1$$Register, $src2$$Register);
12783    __ jccb(Assembler::below,   m_one);
12784    __ jccb(Assembler::equal,   done);
12785    __ bind(p_one);
12786    __ incrementl($dst$$Register);
12787    __ jmpb(done);
12788    __ bind(m_one);
12789    __ decrementl($dst$$Register);
12790    __ bind(done);
12791  %}
12792  ins_pipe( pipe_slow );
12793%}
12794
12795//======
12796// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12797// compares.  Can be used for LE or GT compares by reversing arguments.
12798// NOT GOOD FOR EQ/NE tests.
12799instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12800  match( Set flags (CmpL src zero ));
12801  ins_cost(100);
12802  format %{ "TEST   $src.hi,$src.hi" %}
12803  opcode(0x85);
12804  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12805  ins_pipe( ialu_cr_reg_reg );
12806%}
12807
12808// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12809// compares.  Can be used for LE or GT compares by reversing arguments.
12810// NOT GOOD FOR EQ/NE tests.
12811instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12812  match( Set flags (CmpL src1 src2 ));
12813  effect( TEMP tmp );
12814  ins_cost(300);
12815  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12816            "MOV    $tmp,$src1.hi\n\t"
12817            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12818  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12819  ins_pipe( ialu_cr_reg_reg );
12820%}
12821
12822// Long compares reg < zero/req OR reg >= zero/req.
12823// Just a wrapper for a normal branch, plus the predicate test.
12824instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12825  match(If cmp flags);
12826  effect(USE labl);
12827  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12828  expand %{
12829    jmpCon(cmp,flags,labl);    // JLT or JGE...
12830  %}
12831%}
12832
12833//======
12834// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12835// compares.  Can be used for LE or GT compares by reversing arguments.
12836// NOT GOOD FOR EQ/NE tests.
12837instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12838  match(Set flags (CmpUL src zero));
12839  ins_cost(100);
12840  format %{ "TEST   $src.hi,$src.hi" %}
12841  opcode(0x85);
12842  ins_encode(OpcP, RegReg_Hi2(src, src));
12843  ins_pipe(ialu_cr_reg_reg);
12844%}
12845
12846// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12847// compares.  Can be used for LE or GT compares by reversing arguments.
12848// NOT GOOD FOR EQ/NE tests.
12849instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12850  match(Set flags (CmpUL src1 src2));
12851  effect(TEMP tmp);
12852  ins_cost(300);
12853  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12854            "MOV    $tmp,$src1.hi\n\t"
12855            "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12856  ins_encode(long_cmp_flags2(src1, src2, tmp));
12857  ins_pipe(ialu_cr_reg_reg);
12858%}
12859
12860// Unsigned long compares reg < zero/req OR reg >= zero/req.
12861// Just a wrapper for a normal branch, plus the predicate test.
12862instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12863  match(If cmp flags);
12864  effect(USE labl);
12865  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12866  expand %{
12867    jmpCon(cmp, flags, labl);    // JLT or JGE...
12868  %}
12869%}
12870
12871// Compare 2 longs and CMOVE longs.
12872instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12873  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12874  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12875  ins_cost(400);
12876  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12877            "CMOV$cmp $dst.hi,$src.hi" %}
12878  opcode(0x0F,0x40);
12879  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12880  ins_pipe( pipe_cmov_reg_long );
12881%}
12882
12883instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12884  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12885  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12886  ins_cost(500);
12887  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12888            "CMOV$cmp $dst.hi,$src.hi" %}
12889  opcode(0x0F,0x40);
12890  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12891  ins_pipe( pipe_cmov_reg_long );
12892%}
12893
12894// Compare 2 longs and CMOVE ints.
12895instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12896  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12897  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12898  ins_cost(200);
12899  format %{ "CMOV$cmp $dst,$src" %}
12900  opcode(0x0F,0x40);
12901  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12902  ins_pipe( pipe_cmov_reg );
12903%}
12904
12905instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12906  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12907  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12908  ins_cost(250);
12909  format %{ "CMOV$cmp $dst,$src" %}
12910  opcode(0x0F,0x40);
12911  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12912  ins_pipe( pipe_cmov_mem );
12913%}
12914
12915// Compare 2 longs and CMOVE ints.
12916instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12917  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12918  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12919  ins_cost(200);
12920  format %{ "CMOV$cmp $dst,$src" %}
12921  opcode(0x0F,0x40);
12922  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12923  ins_pipe( pipe_cmov_reg );
12924%}
12925
12926// Compare 2 longs and CMOVE doubles
12927instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12928  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12929  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12930  ins_cost(200);
12931  expand %{
12932    fcmovDPR_regS(cmp,flags,dst,src);
12933  %}
12934%}
12935
12936// Compare 2 longs and CMOVE doubles
12937instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12938  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12939  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12940  ins_cost(200);
12941  expand %{
12942    fcmovD_regS(cmp,flags,dst,src);
12943  %}
12944%}
12945
12946instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12947  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ) );
12948  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12949  ins_cost(200);
12950  expand %{
12951    fcmovFPR_regS(cmp,flags,dst,src);
12952  %}
12953%}
12954
12955instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12956  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )  );
12957  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12958  ins_cost(200);
12959  expand %{
12960    fcmovF_regS(cmp,flags,dst,src);
12961  %}
12962%}
12963
12964//======
12965// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12966instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12967  match( Set flags (CmpL src zero ));
12968  effect(TEMP tmp);
12969  ins_cost(200);
12970  format %{ "MOV    $tmp,$src.lo\n\t"
12971            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12972  ins_encode( long_cmp_flags0( src, tmp ) );
12973  ins_pipe( ialu_reg_reg_long );
12974%}
12975
12976// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12977instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12978  match( Set flags (CmpL src1 src2 ));
12979  ins_cost(200+300);
12980  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12981            "JNE,s  skip\n\t"
12982            "CMP    $src1.hi,$src2.hi\n\t"
12983     "skip:\t" %}
12984  ins_encode( long_cmp_flags1( src1, src2 ) );
12985  ins_pipe( ialu_cr_reg_reg );
12986%}
12987
12988// Long compare reg == zero/reg OR reg != zero/reg
12989// Just a wrapper for a normal branch, plus the predicate test.
12990instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12991  match(If cmp flags);
12992  effect(USE labl);
12993  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12994  expand %{
12995    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12996  %}
12997%}
12998
12999//======
13000// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13001instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13002  match(Set flags (CmpUL src zero));
13003  effect(TEMP tmp);
13004  ins_cost(200);
13005  format %{ "MOV    $tmp,$src.lo\n\t"
13006            "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13007  ins_encode(long_cmp_flags0(src, tmp));
13008  ins_pipe(ialu_reg_reg_long);
13009%}
13010
13011// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13012instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13013  match(Set flags (CmpUL src1 src2));
13014  ins_cost(200+300);
13015  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13016            "JNE,s  skip\n\t"
13017            "CMP    $src1.hi,$src2.hi\n\t"
13018     "skip:\t" %}
13019  ins_encode(long_cmp_flags1(src1, src2));
13020  ins_pipe(ialu_cr_reg_reg);
13021%}
13022
13023// Unsigned long compare reg == zero/reg OR reg != zero/reg
13024// Just a wrapper for a normal branch, plus the predicate test.
13025instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13026  match(If cmp flags);
13027  effect(USE labl);
13028  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13029  expand %{
13030    jmpCon(cmp, flags, labl);    // JEQ or JNE...
13031  %}
13032%}
13033
13034// Compare 2 longs and CMOVE longs.
13035instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13036  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13037  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13038  ins_cost(400);
13039  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13040            "CMOV$cmp $dst.hi,$src.hi" %}
13041  opcode(0x0F,0x40);
13042  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13043  ins_pipe( pipe_cmov_reg_long );
13044%}
13045
13046instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13047  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13048  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13049  ins_cost(500);
13050  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13051            "CMOV$cmp $dst.hi,$src.hi" %}
13052  opcode(0x0F,0x40);
13053  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13054  ins_pipe( pipe_cmov_reg_long );
13055%}
13056
13057// Compare 2 longs and CMOVE ints.
13058instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13059  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13060  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13061  ins_cost(200);
13062  format %{ "CMOV$cmp $dst,$src" %}
13063  opcode(0x0F,0x40);
13064  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13065  ins_pipe( pipe_cmov_reg );
13066%}
13067
13068instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13069  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13070  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13071  ins_cost(250);
13072  format %{ "CMOV$cmp $dst,$src" %}
13073  opcode(0x0F,0x40);
13074  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13075  ins_pipe( pipe_cmov_mem );
13076%}
13077
13078// Compare 2 longs and CMOVE ints.
13079instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13080  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13081  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13082  ins_cost(200);
13083  format %{ "CMOV$cmp $dst,$src" %}
13084  opcode(0x0F,0x40);
13085  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13086  ins_pipe( pipe_cmov_reg );
13087%}
13088
13089// Compare 2 longs and CMOVE doubles
13090instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13091  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13092  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13093  ins_cost(200);
13094  expand %{
13095    fcmovDPR_regS(cmp,flags,dst,src);
13096  %}
13097%}
13098
13099// Compare 2 longs and CMOVE doubles
13100instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13101  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13102  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13103  ins_cost(200);
13104  expand %{
13105    fcmovD_regS(cmp,flags,dst,src);
13106  %}
13107%}
13108
13109instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13110  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ) );
13111  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13112  ins_cost(200);
13113  expand %{
13114    fcmovFPR_regS(cmp,flags,dst,src);
13115  %}
13116%}
13117
13118instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13119  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13120  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13121  ins_cost(200);
13122  expand %{
13123    fcmovF_regS(cmp,flags,dst,src);
13124  %}
13125%}
13126
13127//======
13128// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13129// Same as cmpL_reg_flags_LEGT except must negate src
13130instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13131  match( Set flags (CmpL src zero ));
13132  effect( TEMP tmp );
13133  ins_cost(300);
13134  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13135            "CMP    $tmp,$src.lo\n\t"
13136            "SBB    $tmp,$src.hi\n\t" %}
13137  ins_encode( long_cmp_flags3(src, tmp) );
13138  ins_pipe( ialu_reg_reg_long );
13139%}
13140
13141// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13142// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13143// requires a commuted test to get the same result.
13144instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13145  match( Set flags (CmpL src1 src2 ));
13146  effect( TEMP tmp );
13147  ins_cost(300);
13148  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13149            "MOV    $tmp,$src2.hi\n\t"
13150            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13151  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13152  ins_pipe( ialu_cr_reg_reg );
13153%}
13154
13155// Long compares reg < zero/req OR reg >= zero/req.
13156// Just a wrapper for a normal branch, plus the predicate test
13157instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13158  match(If cmp flags);
13159  effect(USE labl);
13160  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13161  ins_cost(300);
13162  expand %{
13163    jmpCon(cmp,flags,labl);    // JGT or JLE...
13164  %}
13165%}
13166
13167//======
13168// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13169// Same as cmpUL_reg_flags_LEGT except must negate src
13170instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13171  match(Set flags (CmpUL src zero));
13172  effect(TEMP tmp);
13173  ins_cost(300);
13174  format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13175            "CMP    $tmp,$src.lo\n\t"
13176            "SBB    $tmp,$src.hi\n\t" %}
13177  ins_encode(long_cmp_flags3(src, tmp));
13178  ins_pipe(ialu_reg_reg_long);
13179%}
13180
13181// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13182// Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13183// requires a commuted test to get the same result.
13184instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13185  match(Set flags (CmpUL src1 src2));
13186  effect(TEMP tmp);
13187  ins_cost(300);
13188  format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13189            "MOV    $tmp,$src2.hi\n\t"
13190            "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13191  ins_encode(long_cmp_flags2( src2, src1, tmp));
13192  ins_pipe(ialu_cr_reg_reg);
13193%}
13194
13195// Unsigned long compares reg < zero/req OR reg >= zero/req.
13196// Just a wrapper for a normal branch, plus the predicate test
13197instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13198  match(If cmp flags);
13199  effect(USE labl);
13200  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13201  ins_cost(300);
13202  expand %{
13203    jmpCon(cmp, flags, labl);    // JGT or JLE...
13204  %}
13205%}
13206
13207// Compare 2 longs and CMOVE longs.
13208instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13209  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13210  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13211  ins_cost(400);
13212  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13213            "CMOV$cmp $dst.hi,$src.hi" %}
13214  opcode(0x0F,0x40);
13215  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13216  ins_pipe( pipe_cmov_reg_long );
13217%}
13218
13219instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13220  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13221  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13222  ins_cost(500);
13223  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13224            "CMOV$cmp $dst.hi,$src.hi+4" %}
13225  opcode(0x0F,0x40);
13226  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13227  ins_pipe( pipe_cmov_reg_long );
13228%}
13229
13230// Compare 2 longs and CMOVE ints.
13231instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13232  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13233  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13234  ins_cost(200);
13235  format %{ "CMOV$cmp $dst,$src" %}
13236  opcode(0x0F,0x40);
13237  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13238  ins_pipe( pipe_cmov_reg );
13239%}
13240
13241instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13242  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13243  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13244  ins_cost(250);
13245  format %{ "CMOV$cmp $dst,$src" %}
13246  opcode(0x0F,0x40);
13247  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13248  ins_pipe( pipe_cmov_mem );
13249%}
13250
13251// Compare 2 longs and CMOVE ptrs.
13252instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13253  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13254  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13255  ins_cost(200);
13256  format %{ "CMOV$cmp $dst,$src" %}
13257  opcode(0x0F,0x40);
13258  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13259  ins_pipe( pipe_cmov_reg );
13260%}
13261
13262// Compare 2 longs and CMOVE doubles
13263instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13264  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13265  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13266  ins_cost(200);
13267  expand %{
13268    fcmovDPR_regS(cmp,flags,dst,src);
13269  %}
13270%}
13271
13272// Compare 2 longs and CMOVE doubles
13273instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13274  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13275  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13276  ins_cost(200);
13277  expand %{
13278    fcmovD_regS(cmp,flags,dst,src);
13279  %}
13280%}
13281
13282instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13283  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) );
13284  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13285  ins_cost(200);
13286  expand %{
13287    fcmovFPR_regS(cmp,flags,dst,src);
13288  %}
13289%}
13290
13291
13292instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13293  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ) );
13294  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13295  ins_cost(200);
13296  expand %{
13297    fcmovF_regS(cmp,flags,dst,src);
13298  %}
13299%}
13300
13301
13302// ============================================================================
13303// Procedure Call/Return Instructions
13304// Call Java Static Instruction
13305// Note: If this code changes, the corresponding ret_addr_offset() and
13306//       compute_padding() functions will have to be adjusted.
13307instruct CallStaticJavaDirect(method meth) %{
13308  match(CallStaticJava);
13309  effect(USE meth);
13310
13311  ins_cost(300);
13312  format %{ "CALL,static " %}
13313  opcode(0xE8); /* E8 cd */
13314  ins_encode( pre_call_resets,
13315              Java_Static_Call( meth ),
13316              call_epilog,
13317              post_call_FPU );
13318  ins_pipe( pipe_slow );
13319  ins_alignment(4);
13320%}
13321
13322// Call Java Dynamic Instruction
13323// Note: If this code changes, the corresponding ret_addr_offset() and
13324//       compute_padding() functions will have to be adjusted.
13325instruct CallDynamicJavaDirect(method meth) %{
13326  match(CallDynamicJava);
13327  effect(USE meth);
13328
13329  ins_cost(300);
13330  format %{ "MOV    EAX,(oop)-1\n\t"
13331            "CALL,dynamic" %}
13332  opcode(0xE8); /* E8 cd */
13333  ins_encode( pre_call_resets,
13334              Java_Dynamic_Call( meth ),
13335              call_epilog,
13336              post_call_FPU );
13337  ins_pipe( pipe_slow );
13338  ins_alignment(4);
13339%}
13340
13341// Call Runtime Instruction
13342instruct CallRuntimeDirect(method meth) %{
13343  match(CallRuntime );
13344  effect(USE meth);
13345
13346  ins_cost(300);
13347  format %{ "CALL,runtime " %}
13348  opcode(0xE8); /* E8 cd */
13349  // Use FFREEs to clear entries in float stack
13350  ins_encode( pre_call_resets,
13351              FFree_Float_Stack_All,
13352              Java_To_Runtime( meth ),
13353              post_call_FPU );
13354  ins_pipe( pipe_slow );
13355%}
13356
13357// Call runtime without safepoint
13358instruct CallLeafDirect(method meth) %{
13359  match(CallLeaf);
13360  effect(USE meth);
13361
13362  ins_cost(300);
13363  format %{ "CALL_LEAF,runtime " %}
13364  opcode(0xE8); /* E8 cd */
13365  ins_encode( pre_call_resets,
13366              FFree_Float_Stack_All,
13367              Java_To_Runtime( meth ),
13368              Verify_FPU_For_Leaf, post_call_FPU );
13369  ins_pipe( pipe_slow );
13370%}
13371
13372instruct CallLeafNoFPDirect(method meth) %{
13373  match(CallLeafNoFP);
13374  effect(USE meth);
13375
13376  ins_cost(300);
13377  format %{ "CALL_LEAF_NOFP,runtime " %}
13378  opcode(0xE8); /* E8 cd */
13379  ins_encode(pre_call_resets, Java_To_Runtime(meth));
13380  ins_pipe( pipe_slow );
13381%}
13382
13383
13384// Return Instruction
13385// Remove the return address & jump to it.
13386instruct Ret() %{
13387  match(Return);
13388  format %{ "RET" %}
13389  opcode(0xC3);
13390  ins_encode(OpcP);
13391  ins_pipe( pipe_jmp );
13392%}
13393
13394// Tail Call; Jump from runtime stub to Java code.
13395// Also known as an 'interprocedural jump'.
13396// Target of jump will eventually return to caller.
13397// TailJump below removes the return address.
13398instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13399  match(TailCall jump_target method_oop );
13400  ins_cost(300);
13401  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13402  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13403  ins_encode( OpcP, RegOpc(jump_target) );
13404  ins_pipe( pipe_jmp );
13405%}
13406
13407
13408// Tail Jump; remove the return address; jump to target.
13409// TailCall above leaves the return address around.
13410instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13411  match( TailJump jump_target ex_oop );
13412  ins_cost(300);
13413  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13414            "JMP    $jump_target " %}
13415  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13416  ins_encode( enc_pop_rdx,
13417              OpcP, RegOpc(jump_target) );
13418  ins_pipe( pipe_jmp );
13419%}
13420
13421// Create exception oop: created by stack-crawling runtime code.
13422// Created exception is now available to this handler, and is setup
13423// just prior to jumping to this handler.  No code emitted.
13424instruct CreateException( eAXRegP ex_oop )
13425%{
13426  match(Set ex_oop (CreateEx));
13427
13428  size(0);
13429  // use the following format syntax
13430  format %{ "# exception oop is in EAX; no code emitted" %}
13431  ins_encode();
13432  ins_pipe( empty );
13433%}
13434
13435
13436// Rethrow exception:
13437// The exception oop will come in the first argument position.
13438// Then JUMP (not call) to the rethrow stub code.
13439instruct RethrowException()
13440%{
13441  match(Rethrow);
13442
13443  // use the following format syntax
13444  format %{ "JMP    rethrow_stub" %}
13445  ins_encode(enc_rethrow);
13446  ins_pipe( pipe_jmp );
13447%}
13448
13449// inlined locking and unlocking
13450
13451instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13452  predicate(Compile::current()->use_rtm());
13453  match(Set cr (FastLock object box));
13454  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13455  ins_cost(300);
13456  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13457  ins_encode %{
13458    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13459                 $scr$$Register, $cx1$$Register, $cx2$$Register,
13460                 _counters, _rtm_counters, _stack_rtm_counters,
13461                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13462                 true, ra_->C->profile_rtm());
13463  %}
13464  ins_pipe(pipe_slow);
13465%}
13466
13467instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13468  predicate(!Compile::current()->use_rtm());
13469  match(Set cr (FastLock object box));
13470  effect(TEMP tmp, TEMP scr, USE_KILL box);
13471  ins_cost(300);
13472  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13473  ins_encode %{
13474    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13475                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13476  %}
13477  ins_pipe(pipe_slow);
13478%}
13479
13480instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13481  match(Set cr (FastUnlock object box));
13482  effect(TEMP tmp, USE_KILL box);
13483  ins_cost(300);
13484  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13485  ins_encode %{
13486    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13487  %}
13488  ins_pipe(pipe_slow);
13489%}
13490
13491
13492
13493// ============================================================================
13494// Safepoint Instruction
13495instruct safePoint_poll(eFlagsReg cr) %{
13496  predicate(SafepointMechanism::uses_global_page_poll());
13497  match(SafePoint);
13498  effect(KILL cr);
13499
13500  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13501  // On SPARC that might be acceptable as we can generate the address with
13502  // just a sethi, saving an or.  By polling at offset 0 we can end up
13503  // putting additional pressure on the index-0 in the D$.  Because of
13504  // alignment (just like the situation at hand) the lower indices tend
13505  // to see more traffic.  It'd be better to change the polling address
13506  // to offset 0 of the last $line in the polling page.
13507
13508  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13509  ins_cost(125);
13510  size(6) ;
13511  ins_encode( Safepoint_Poll() );
13512  ins_pipe( ialu_reg_mem );
13513%}
13514
13515instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13516  predicate(SafepointMechanism::uses_thread_local_poll());
13517  match(SafePoint poll);
13518  effect(KILL cr, USE poll);
13519
13520  format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13521  ins_cost(125);
13522  // EBP would need size(3)
13523  size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13524  ins_encode %{
13525    __ relocate(relocInfo::poll_type);
13526    address pre_pc = __ pc();
13527    __ testl(rax, Address($poll$$Register, 0));
13528    address post_pc = __ pc();
13529    guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13530  %}
13531  ins_pipe(ialu_reg_mem);
13532%}
13533
13534
13535// ============================================================================
13536// This name is KNOWN by the ADLC and cannot be changed.
13537// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13538// for this guy.
13539instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13540  match(Set dst (ThreadLocal));
13541  effect(DEF dst, KILL cr);
13542
13543  format %{ "MOV    $dst, Thread::current()" %}
13544  ins_encode %{
13545    Register dstReg = as_Register($dst$$reg);
13546    __ get_thread(dstReg);
13547  %}
13548  ins_pipe( ialu_reg_fat );
13549%}
13550
13551
13552
13553//----------PEEPHOLE RULES-----------------------------------------------------
13554// These must follow all instruction definitions as they use the names
13555// defined in the instructions definitions.
13556//
13557// peepmatch ( root_instr_name [preceding_instruction]* );
13558//
13559// peepconstraint %{
13560// (instruction_number.operand_name relational_op instruction_number.operand_name
13561//  [, ...] );
13562// // instruction numbers are zero-based using left to right order in peepmatch
13563//
13564// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13565// // provide an instruction_number.operand_name for each operand that appears
13566// // in the replacement instruction's match rule
13567//
13568// ---------VM FLAGS---------------------------------------------------------
13569//
13570// All peephole optimizations can be turned off using -XX:-OptoPeephole
13571//
13572// Each peephole rule is given an identifying number starting with zero and
13573// increasing by one in the order seen by the parser.  An individual peephole
13574// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13575// on the command-line.
13576//
13577// ---------CURRENT LIMITATIONS----------------------------------------------
13578//
13579// Only match adjacent instructions in same basic block
13580// Only equality constraints
13581// Only constraints between operands, not (0.dest_reg == EAX_enc)
13582// Only one replacement instruction
13583//
13584// ---------EXAMPLE----------------------------------------------------------
13585//
13586// // pertinent parts of existing instructions in architecture description
13587// instruct movI(rRegI dst, rRegI src) %{
13588//   match(Set dst (CopyI src));
13589// %}
13590//
13591// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13592//   match(Set dst (AddI dst src));
13593//   effect(KILL cr);
13594// %}
13595//
13596// // Change (inc mov) to lea
13597// peephole %{
13598//   // increment preceeded by register-register move
13599//   peepmatch ( incI_eReg movI );
13600//   // require that the destination register of the increment
13601//   // match the destination register of the move
13602//   peepconstraint ( 0.dst == 1.dst );
13603//   // construct a replacement instruction that sets
13604//   // the destination to ( move's source register + one )
13605//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13606// %}
13607//
13608// Implementation no longer uses movX instructions since
13609// machine-independent system no longer uses CopyX nodes.
13610//
13611// peephole %{
13612//   peepmatch ( incI_eReg movI );
13613//   peepconstraint ( 0.dst == 1.dst );
13614//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13615// %}
13616//
13617// peephole %{
13618//   peepmatch ( decI_eReg movI );
13619//   peepconstraint ( 0.dst == 1.dst );
13620//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13621// %}
13622//
13623// peephole %{
13624//   peepmatch ( addI_eReg_imm movI );
13625//   peepconstraint ( 0.dst == 1.dst );
13626//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13627// %}
13628//
13629// peephole %{
13630//   peepmatch ( addP_eReg_imm movP );
13631//   peepconstraint ( 0.dst == 1.dst );
13632//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13633// %}
13634
13635// // Change load of spilled value to only a spill
13636// instruct storeI(memory mem, rRegI src) %{
13637//   match(Set mem (StoreI mem src));
13638// %}
13639//
13640// instruct loadI(rRegI dst, memory mem) %{
13641//   match(Set dst (LoadI mem));
13642// %}
13643//
13644peephole %{
13645  peepmatch ( loadI storeI );
13646  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13647  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13648%}
13649
13650//----------SMARTSPILL RULES---------------------------------------------------
13651// These must follow all instruction definitions as they use the names
13652// defined in the instructions definitions.
13653