1 /*
2  * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 
25 package org.graalvm.compiler.asm.amd64;
26 
27 import static jdk.vm.ci.amd64.AMD64.CPU;
28 import static jdk.vm.ci.amd64.AMD64.XMM;
29 import static jdk.vm.ci.amd64.AMD64.r12;
30 import static jdk.vm.ci.amd64.AMD64.r13;
31 import static jdk.vm.ci.amd64.AMD64.rbp;
32 import static jdk.vm.ci.amd64.AMD64.rip;
33 import static jdk.vm.ci.amd64.AMD64.rsp;
34 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
35 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
36 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
50 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
51 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
52 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
53 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
54 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
55 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
56 import static org.graalvm.compiler.core.common.NumUtil.isByte;
57 import static org.graalvm.compiler.core.common.NumUtil.isInt;
58 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
59 import static org.graalvm.compiler.core.common.NumUtil.isUByte;
60 
61 import org.graalvm.compiler.asm.Assembler;
62 import org.graalvm.compiler.asm.Label;
63 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
64 import org.graalvm.compiler.core.common.NumUtil;
65 import org.graalvm.compiler.debug.GraalError;
66 
67 import jdk.vm.ci.amd64.AMD64;
68 import jdk.vm.ci.amd64.AMD64.CPUFeature;
69 import jdk.vm.ci.amd64.AMD64Kind;
70 import jdk.vm.ci.code.Register;
71 import jdk.vm.ci.code.Register.RegisterCategory;
72 import jdk.vm.ci.code.TargetDescription;
73 import jdk.vm.ci.meta.PlatformKind;
74 
75 /**
76  * This class implements an assembler that can encode most X86 instructions.
77  */
78 public class AMD64Assembler extends Assembler {
79 
80     private static final int MinEncodingNeedsRex = 8;
81 
82     /**
83      * The x86 condition codes used for conditional jumps/moves.
84      */
85     public enum ConditionFlag {
86         Zero(0x4, "|zero|"),
87         NotZero(0x5, "|nzero|"),
88         Equal(0x4, "="),
89         NotEqual(0x5, "!="),
90         Less(0xc, "<"),
91         LessEqual(0xe, "<="),
92         Greater(0xf, ">"),
93         GreaterEqual(0xd, ">="),
94         Below(0x2, "|<|"),
95         BelowEqual(0x6, "|<=|"),
96         Above(0x7, "|>|"),
97         AboveEqual(0x3, "|>=|"),
98         Overflow(0x0, "|of|"),
99         NoOverflow(0x1, "|nof|"),
100         CarrySet(0x2, "|carry|"),
101         CarryClear(0x3, "|ncarry|"),
102         Negative(0x8, "|neg|"),
103         Positive(0x9, "|pos|"),
104         Parity(0xa, "|par|"),
105         NoParity(0xb, "|npar|");
106 
107         private final int value;
108         private final String operator;
109 
ConditionFlag(int value, String operator)110         ConditionFlag(int value, String operator) {
111             this.value = value;
112             this.operator = operator;
113         }
114 
negate()115         public ConditionFlag negate() {
116             switch (this) {
117                 case Zero:
118                     return NotZero;
119                 case NotZero:
120                     return Zero;
121                 case Equal:
122                     return NotEqual;
123                 case NotEqual:
124                     return Equal;
125                 case Less:
126                     return GreaterEqual;
127                 case LessEqual:
128                     return Greater;
129                 case Greater:
130                     return LessEqual;
131                 case GreaterEqual:
132                     return Less;
133                 case Below:
134                     return AboveEqual;
135                 case BelowEqual:
136                     return Above;
137                 case Above:
138                     return BelowEqual;
139                 case AboveEqual:
140                     return Below;
141                 case Overflow:
142                     return NoOverflow;
143                 case NoOverflow:
144                     return Overflow;
145                 case CarrySet:
146                     return CarryClear;
147                 case CarryClear:
148                     return CarrySet;
149                 case Negative:
150                     return Positive;
151                 case Positive:
152                     return Negative;
153                 case Parity:
154                     return NoParity;
155                 case NoParity:
156                     return Parity;
157             }
158             throw new IllegalArgumentException();
159         }
160 
getValue()161         public int getValue() {
162             return value;
163         }
164 
165         @Override
toString()166         public String toString() {
167             return operator;
168         }
169     }
170 
171     /**
172      * Constants for X86 prefix bytes.
173      */
174     private static class Prefix {
175         private static final int REX = 0x40;
176         private static final int REXB = 0x41;
177         private static final int REXX = 0x42;
178         private static final int REXXB = 0x43;
179         private static final int REXR = 0x44;
180         private static final int REXRB = 0x45;
181         private static final int REXRX = 0x46;
182         private static final int REXRXB = 0x47;
183         private static final int REXW = 0x48;
184         private static final int REXWB = 0x49;
185         private static final int REXWX = 0x4A;
186         private static final int REXWXB = 0x4B;
187         private static final int REXWR = 0x4C;
188         private static final int REXWRB = 0x4D;
189         private static final int REXWRX = 0x4E;
190         private static final int REXWRXB = 0x4F;
191         private static final int VEX_3BYTES = 0xC4;
192         private static final int VEX_2BYTES = 0xC5;
193     }
194 
195     private static class VexPrefix {
196         private static final int VEX_R = 0x80;
197         private static final int VEX_W = 0x80;
198     }
199 
200     private static class VexSimdPrefix {
201         private static final int VEX_SIMD_NONE = 0x0;
202         private static final int VEX_SIMD_66 = 0x1;
203         private static final int VEX_SIMD_F3 = 0x2;
204         private static final int VEX_SIMD_F2 = 0x3;
205     }
206 
207     private static class VexOpcode {
208         private static final int VEX_OPCODE_NONE = 0x0;
209         private static final int VEX_OPCODE_0F = 0x1;
210         private static final int VEX_OPCODE_0F_38 = 0x2;
211         private static final int VEX_OPCODE_0F_3A = 0x3;
212     }
213 
214     public static class AvxVectorLen {
215         public static final int AVX_128bit = 0x0;
216         public static final int AVX_256bit = 0x1;
217         public static final int AVX_512bit = 0x2;
218         public static final int AVX_NoVec = 0x4;
219     }
220 
221     public static class EvexTupleType {
222         public static final int EVEX_FV = 0;
223         public static final int EVEX_HV = 4;
224         public static final int EVEX_FVM = 6;
225         public static final int EVEX_T1S = 7;
226         public static final int EVEX_T1F = 11;
227         public static final int EVEX_T2 = 13;
228         public static final int EVEX_T4 = 15;
229         public static final int EVEX_T8 = 17;
230         public static final int EVEX_HVM = 18;
231         public static final int EVEX_QVM = 19;
232         public static final int EVEX_OVM = 20;
233         public static final int EVEX_M128 = 21;
234         public static final int EVEX_DUP = 22;
235         public static final int EVEX_ETUP = 23;
236     }
237 
238     public static class EvexInputSizeInBits {
239         public static final int EVEX_8bit = 0;
240         public static final int EVEX_16bit = 1;
241         public static final int EVEX_32bit = 2;
242         public static final int EVEX_64bit = 3;
243         public static final int EVEX_NObit = 4;
244     }
245 
246     private AMD64InstructionAttr curAttributes;
247 
getCurAttributes()248     AMD64InstructionAttr getCurAttributes() {
249         return curAttributes;
250     }
251 
setCurAttributes(AMD64InstructionAttr attributes)252     void setCurAttributes(AMD64InstructionAttr attributes) {
253         curAttributes = attributes;
254     }
255 
256     /**
257      * The x86 operand sizes.
258      */
259     public enum OperandSize {
260         BYTE(1, AMD64Kind.BYTE) {
261             @Override
emitImmediate(AMD64Assembler asm, int imm)262             protected void emitImmediate(AMD64Assembler asm, int imm) {
263                 assert imm == (byte) imm;
264                 asm.emitByte(imm);
265             }
266 
267             @Override
immediateSize()268             protected int immediateSize() {
269                 return 1;
270             }
271         },
272 
273         WORD(2, AMD64Kind.WORD, 0x66) {
274             @Override
emitImmediate(AMD64Assembler asm, int imm)275             protected void emitImmediate(AMD64Assembler asm, int imm) {
276                 assert imm == (short) imm;
277                 asm.emitShort(imm);
278             }
279 
280             @Override
immediateSize()281             protected int immediateSize() {
282                 return 2;
283             }
284         },
285 
286         DWORD(4, AMD64Kind.DWORD) {
287             @Override
emitImmediate(AMD64Assembler asm, int imm)288             protected void emitImmediate(AMD64Assembler asm, int imm) {
289                 asm.emitInt(imm);
290             }
291 
292             @Override
immediateSize()293             protected int immediateSize() {
294                 return 4;
295             }
296         },
297 
298         QWORD(8, AMD64Kind.QWORD) {
299             @Override
emitImmediate(AMD64Assembler asm, int imm)300             protected void emitImmediate(AMD64Assembler asm, int imm) {
301                 asm.emitInt(imm);
302             }
303 
304             @Override
immediateSize()305             protected int immediateSize() {
306                 return 4;
307             }
308         },
309 
310         SS(4, AMD64Kind.SINGLE, 0xF3, true),
311 
312         SD(8, AMD64Kind.DOUBLE, 0xF2, true),
313 
314         PS(16, AMD64Kind.V128_SINGLE, true),
315 
316         PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
317 
318         private final int sizePrefix;
319         private final int bytes;
320         private final boolean xmm;
321         private final AMD64Kind kind;
322 
OperandSize(int bytes, AMD64Kind kind)323         OperandSize(int bytes, AMD64Kind kind) {
324             this(bytes, kind, 0);
325         }
326 
OperandSize(int bytes, AMD64Kind kind, int sizePrefix)327         OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
328             this(bytes, kind, sizePrefix, false);
329         }
330 
OperandSize(int bytes, AMD64Kind kind, boolean xmm)331         OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
332             this(bytes, kind, 0, xmm);
333         }
334 
OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm)335         OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
336             this.sizePrefix = sizePrefix;
337             this.bytes = bytes;
338             this.kind = kind;
339             this.xmm = xmm;
340         }
341 
getBytes()342         public int getBytes() {
343             return bytes;
344         }
345 
isXmmType()346         public boolean isXmmType() {
347             return xmm;
348         }
349 
getKind()350         public AMD64Kind getKind() {
351             return kind;
352         }
353 
get(PlatformKind kind)354         public static OperandSize get(PlatformKind kind) {
355             for (OperandSize operandSize : OperandSize.values()) {
356                 if (operandSize.kind.equals(kind)) {
357                     return operandSize;
358                 }
359             }
360             throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
361         }
362 
363         /**
364          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
365          * as sign-extended 32-bit values.
366          *
367          * @param asm
368          * @param imm
369          */
emitImmediate(AMD64Assembler asm, int imm)370         protected void emitImmediate(AMD64Assembler asm, int imm) {
371             throw new UnsupportedOperationException();
372         }
373 
immediateSize()374         protected int immediateSize() {
375             throw new UnsupportedOperationException();
376         }
377     }
378 
379     /**
380      * Operand size and register type constraints.
381      */
382     private enum OpAssertion {
383         ByteAssertion(CPU, CPU, BYTE),
384         ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD),
385         WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD),
386         DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD),
387         WordOrDwordAssertion(CPU, CPU, WORD, QWORD),
388         QwordAssertion(CPU, CPU, QWORD),
389         FloatAssertion(XMM, XMM, SS, SD, PS, PD),
390         PackedFloatAssertion(XMM, XMM, PS, PD),
391         SingleAssertion(XMM, XMM, SS),
392         DoubleAssertion(XMM, XMM, SD),
393         PackedDoubleAssertion(XMM, XMM, PD),
394         IntToFloatAssertion(XMM, CPU, DWORD, QWORD),
395         FloatToIntAssertion(CPU, XMM, DWORD, QWORD);
396 
397         private final RegisterCategory resultCategory;
398         private final RegisterCategory inputCategory;
399         private final OperandSize[] allowedSizes;
400 
OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes)401         OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
402             this.resultCategory = resultCategory;
403             this.inputCategory = inputCategory;
404             this.allowedSizes = allowedSizes;
405         }
406 
checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg)407         protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
408             assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
409             assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
410 
411             for (OperandSize s : allowedSizes) {
412                 if (size == s) {
413                     return true;
414                 }
415             }
416 
417             assert false : "invalid operand size " + size + " used in " + op;
418             return false;
419         }
420     }
421 
422     public abstract static class OperandDataAnnotation extends CodeAnnotation {
423         /**
424          * The position (bytes from the beginning of the method) of the operand.
425          */
426         public final int operandPosition;
427         /**
428          * The size of the operand, in bytes.
429          */
430         public final int operandSize;
431         /**
432          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
433          * RIP-relative operands are relative to this position.
434          */
435         public final int nextInstructionPosition;
436 
OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition)437         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
438             super(instructionPosition);
439 
440             this.operandPosition = operandPosition;
441             this.operandSize = operandSize;
442             this.nextInstructionPosition = nextInstructionPosition;
443         }
444 
445         @Override
toString()446         public String toString() {
447             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
448         }
449     }
450 
451     /**
452      * Annotation that stores additional information about the displacement of a
453      * {@link Assembler#getPlaceholder placeholder address} that needs patching.
454      */
455     public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition)456         AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
457             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
458         }
459     }
460 
461     /**
462      * Annotation that stores additional information about the immediate operand, e.g., of a call
463      * instruction, that needs patching.
464      */
465     public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition)466         ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
467             super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
468         }
469     }
470 
471     /**
472      * Constructs an assembler for the AMD64 architecture.
473      */
AMD64Assembler(TargetDescription target)474     public AMD64Assembler(TargetDescription target) {
475         super(target);
476     }
477 
supports(CPUFeature feature)478     public boolean supports(CPUFeature feature) {
479         return ((AMD64) target.arch).getFeatures().contains(feature);
480     }
481 
encode(Register r)482     private static int encode(Register r) {
483         assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
484         return r.encoding & 0x7;
485     }
486 
487     /**
488      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
489      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
490      * field. The X bit must be 0.
491      */
getRXB(Register reg, Register rm)492     protected static int getRXB(Register reg, Register rm) {
493         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
494         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
495         return rxb;
496     }
497 
498     /**
499      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
500      * are two cases for the memory operand:<br>
501      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
502      * <br>
503      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
504      */
getRXB(Register reg, AMD64Address rm)505     protected static int getRXB(Register reg, AMD64Address rm) {
506         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
507         if (!rm.getIndex().equals(Register.None)) {
508             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
509         }
510         if (!rm.getBase().equals(Register.None)) {
511             rxb |= (rm.getBase().encoding & 0x08) >> 3;
512         }
513         return rxb;
514     }
515 
516     /**
517      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
518      * <p>
519      * Format: [ 11 reg r/m ]
520      */
emitModRM(int reg, Register rm)521     protected void emitModRM(int reg, Register rm) {
522         assert (reg & 0x07) == reg;
523         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
524     }
525 
526     /**
527      * Emit the ModR/M byte for two register operands.
528      * <p>
529      * Format: [ 11 reg r/m ]
530      */
emitModRM(Register reg, Register rm)531     protected void emitModRM(Register reg, Register rm) {
532         emitModRM(reg.encoding & 0x07, rm);
533     }
534 
emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize)535     protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
536         assert !reg.equals(Register.None);
537         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
538     }
539 
540     /**
541      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
542      *
543      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
544      */
emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)545     protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
546         assert !reg.equals(Register.None);
547         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
548     }
549 
emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize)550     protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
551         emitOperandHelper(reg, addr, false, additionalInstructionSize);
552     }
553 
554     /**
555      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
556      * extension in the R field.
557      *
558      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
559      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
560      *            so that the start position of the next instruction can be computed even though
561      *            this instruction has not been completely emitted yet.
562      */
emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)563     protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
564         assert (reg & 0x07) == reg;
565         int regenc = reg << 3;
566 
567         Register base = addr.getBase();
568         Register index = addr.getIndex();
569 
570         AMD64Address.Scale scale = addr.getScale();
571         int disp = addr.getDisplacement();
572 
573         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
574             // [00 000 101] disp32
575             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
576             emitByte(0x05 | regenc);
577             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
578                 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
579             }
580             emitInt(disp);
581         } else if (base.isValid()) {
582             int baseenc = base.isValid() ? encode(base) : 0;
583             if (index.isValid()) {
584                 int indexenc = encode(index) << 3;
585                 // [base + indexscale + disp]
586                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
587                     // [base + indexscale]
588                     // [00 reg 100][ss index base]
589                     assert !index.equals(rsp) : "illegal addressing mode";
590                     emitByte(0x04 | regenc);
591                     emitByte(scale.log2 << 6 | indexenc | baseenc);
592                 } else if (isByte(disp) && !force4Byte) {
593                     // [base + indexscale + imm8]
594                     // [01 reg 100][ss index base] imm8
595                     assert !index.equals(rsp) : "illegal addressing mode";
596                     emitByte(0x44 | regenc);
597                     emitByte(scale.log2 << 6 | indexenc | baseenc);
598                     emitByte(disp & 0xFF);
599                 } else {
600                     // [base + indexscale + disp32]
601                     // [10 reg 100][ss index base] disp32
602                     assert !index.equals(rsp) : "illegal addressing mode";
603                     emitByte(0x84 | regenc);
604                     emitByte(scale.log2 << 6 | indexenc | baseenc);
605                     emitInt(disp);
606                 }
607             } else if (base.equals(rsp) || base.equals(r12)) {
608                 // [rsp + disp]
609                 if (disp == 0) {
610                     // [rsp]
611                     // [00 reg 100][00 100 100]
612                     emitByte(0x04 | regenc);
613                     emitByte(0x24);
614                 } else if (isByte(disp) && !force4Byte) {
615                     // [rsp + imm8]
616                     // [01 reg 100][00 100 100] disp8
617                     emitByte(0x44 | regenc);
618                     emitByte(0x24);
619                     emitByte(disp & 0xFF);
620                 } else {
621                     // [rsp + imm32]
622                     // [10 reg 100][00 100 100] disp32
623                     emitByte(0x84 | regenc);
624                     emitByte(0x24);
625                     emitInt(disp);
626                 }
627             } else {
628                 // [base + disp]
629                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
630                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
631                     // [base]
632                     // [00 reg base]
633                     emitByte(0x00 | regenc | baseenc);
634                 } else if (isByte(disp) && !force4Byte) {
635                     // [base + disp8]
636                     // [01 reg base] disp8
637                     emitByte(0x40 | regenc | baseenc);
638                     emitByte(disp & 0xFF);
639                 } else {
640                     // [base + disp32]
641                     // [10 reg base] disp32
642                     emitByte(0x80 | regenc | baseenc);
643                     emitInt(disp);
644                 }
645             }
646         } else {
647             if (index.isValid()) {
648                 int indexenc = encode(index) << 3;
649                 // [indexscale + disp]
650                 // [00 reg 100][ss index 101] disp32
651                 assert !index.equals(rsp) : "illegal addressing mode";
652                 emitByte(0x04 | regenc);
653                 emitByte(scale.log2 << 6 | indexenc | 0x05);
654                 emitInt(disp);
655             } else {
656                 // [disp] ABSOLUTE
657                 // [00 reg 100][00 100 101] disp32
658                 emitByte(0x04 | regenc);
659                 emitByte(0x25);
660                 emitInt(disp);
661             }
662         }
663         setCurAttributes(null);
664     }
665 
666     /**
667      * Base class for AMD64 opcodes.
668      */
669     public static class AMD64Op {
670 
671         protected static final int P_0F = 0x0F;
672         protected static final int P_0F38 = 0x380F;
673         protected static final int P_0F3A = 0x3A0F;
674 
675         private final String opcode;
676 
677         protected final int prefix1;
678         protected final int prefix2;
679         protected final int op;
680 
681         private final boolean dstIsByte;
682         private final boolean srcIsByte;
683 
684         private final OpAssertion assertion;
685         private final CPUFeature feature;
686 
AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)687         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
688             this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
689         }
690 
AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)691         protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
692             this.opcode = opcode;
693             this.prefix1 = prefix1;
694             this.prefix2 = prefix2;
695             this.op = op;
696 
697             this.dstIsByte = dstIsByte;
698             this.srcIsByte = srcIsByte;
699 
700             this.assertion = assertion;
701             this.feature = feature;
702         }
703 
emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc)704         protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
705             if (prefix1 != 0) {
706                 asm.emitByte(prefix1);
707             }
708             if (size.sizePrefix != 0) {
709                 asm.emitByte(size.sizePrefix);
710             }
711             int rexPrefix = 0x40 | rxb;
712             if (size == QWORD) {
713                 rexPrefix |= 0x08;
714             }
715             if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
716                 asm.emitByte(rexPrefix);
717             }
718             if (prefix2 > 0xFF) {
719                 asm.emitShort(prefix2);
720             } else if (prefix2 > 0) {
721                 asm.emitByte(prefix2);
722             }
723             asm.emitByte(op);
724         }
725 
verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg)726         protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
727             assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
728             assert assertion.checkOperands(this, size, resultReg, inputReg);
729             return true;
730         }
731 
732         @Override
toString()733         public String toString() {
734             return opcode;
735         }
736     }
737 
738     /**
739      * Base class for AMD64 opcodes with immediate operands.
740      */
741     public static class AMD64ImmOp extends AMD64Op {
742 
743         private final boolean immIsByte;
744 
AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion)745         protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
746             super(opcode, 0, prefix, op, assertion, null);
747             this.immIsByte = immIsByte;
748         }
749 
emitImmediate(AMD64Assembler asm, OperandSize size, int imm)750         protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
751             if (immIsByte) {
752                 assert imm == (byte) imm;
753                 asm.emitByte(imm);
754             } else {
755                 size.emitImmediate(asm, imm);
756             }
757         }
758 
immediateSize(OperandSize size)759         protected final int immediateSize(OperandSize size) {
760             if (immIsByte) {
761                 return 1;
762             } else {
763                 return size.bytes;
764             }
765         }
766     }
767 
768     /**
769      * Opcode with operand order of either RM or MR for 2 address forms.
770      */
771     public abstract static class AMD64RROp extends AMD64Op {
772 
AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)773         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
774             super(opcode, prefix1, prefix2, op, assertion, feature);
775         }
776 
AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)777         protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
778             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
779         }
780 
emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)781         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
782     }
783 
784     /**
785      * Opcode with operand order of either RM or MR for 3 address forms.
786      */
787     public abstract static class AMD64RRROp extends AMD64Op {
788 
AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)789         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
790             super(opcode, prefix1, prefix2, op, assertion, feature);
791         }
792 
AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)793         protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
794             super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
795         }
796 
emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src)797         public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
798     }
799 
800     /**
801      * Opcode with operand order of RM.
802      */
803     public static class AMD64RMOp extends AMD64RROp {
804         // @formatter:off
805         public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion);
806         public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
807         public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
808         public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
809         public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
810         public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
811         public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion);
812         public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion);
813         public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion);
814         public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion);
815         public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordAssertion);
816         public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
817         public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
818         public static final AMD64RMOp CMP    = new AMD64RMOp("CMP",                0x3B);
819 
820         // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
821         public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
822         public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
823         public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE);
824         public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE);
825 
826         // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
827         public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
828         public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
829         // @formatter:on
830 
AMD64RMOp(String opcode, int op)831         protected AMD64RMOp(String opcode, int op) {
832             this(opcode, 0, op);
833         }
834 
AMD64RMOp(String opcode, int op, OpAssertion assertion)835         protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
836             this(opcode, 0, op, assertion);
837         }
838 
AMD64RMOp(String opcode, int prefix, int op)839         protected AMD64RMOp(String opcode, int prefix, int op) {
840             this(opcode, 0, prefix, op, null);
841         }
842 
AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion)843         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
844             this(opcode, 0, prefix, op, assertion, null);
845         }
846 
AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)847         protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
848             this(opcode, 0, prefix, op, assertion, feature);
849         }
850 
AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion)851         protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
852             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
853         }
854 
AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature)855         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
856             this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature);
857         }
858 
AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)859         protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
860             super(opcode, prefix1, prefix2, op, assertion, feature);
861         }
862 
863         @Override
emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)864         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
865             assert verify(asm, size, dst, src);
866             boolean isSimd = false;
867             boolean noNds = false;
868 
869             switch (op) {
870                 case 0x2A:
871                 case 0x2C:
872                 case 0x2E:
873                 case 0x5A:
874                 case 0x6E:
875                     isSimd = true;
876                     noNds = true;
877                     break;
878                 case 0x10:
879                 case 0x51:
880                 case 0x54:
881                 case 0x55:
882                 case 0x56:
883                 case 0x57:
884                 case 0x58:
885                 case 0x59:
886                 case 0x5C:
887                 case 0x5D:
888                 case 0x5E:
889                 case 0x5F:
890                     isSimd = true;
891                     break;
892             }
893 
894             int opc = 0;
895             if (isSimd) {
896                 switch (prefix2) {
897                     case P_0F:
898                         opc = VexOpcode.VEX_OPCODE_0F;
899                         break;
900                     case P_0F38:
901                         opc = VexOpcode.VEX_OPCODE_0F_38;
902                         break;
903                     case P_0F3A:
904                         opc = VexOpcode.VEX_OPCODE_0F_3A;
905                         break;
906                     default:
907                         opc = VexOpcode.VEX_OPCODE_NONE;
908                         isSimd = false;
909                         break;
910                 }
911             }
912 
913             if (isSimd) {
914                 int pre;
915                 boolean rexVexW = (size == QWORD) ? true : false;
916                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
917                 int curPrefix = size.sizePrefix | prefix1;
918                 switch (curPrefix) {
919                     case 0x66:
920                         pre = VexSimdPrefix.VEX_SIMD_66;
921                         break;
922                     case 0xF2:
923                         pre = VexSimdPrefix.VEX_SIMD_F2;
924                         break;
925                     case 0xF3:
926                         pre = VexSimdPrefix.VEX_SIMD_F3;
927                         break;
928                     default:
929                         pre = VexSimdPrefix.VEX_SIMD_NONE;
930                         break;
931                 }
932                 int encode;
933                 if (noNds) {
934                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
935                 } else {
936                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
937                 }
938                 asm.emitByte(op);
939                 asm.emitByte(0xC0 | encode);
940             } else {
941                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
942                 asm.emitModRM(dst, src);
943             }
944         }
945 
emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src)946         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
947             assert verify(asm, size, dst, null);
948             boolean isSimd = false;
949             boolean noNds = false;
950 
951             switch (op) {
952                 case 0x10:
953                 case 0x2A:
954                 case 0x2C:
955                 case 0x2E:
956                 case 0x6E:
957                     isSimd = true;
958                     noNds = true;
959                     break;
960                 case 0x51:
961                 case 0x54:
962                 case 0x55:
963                 case 0x56:
964                 case 0x57:
965                 case 0x58:
966                 case 0x59:
967                 case 0x5C:
968                 case 0x5D:
969                 case 0x5E:
970                 case 0x5F:
971                     isSimd = true;
972                     break;
973             }
974 
975             int opc = 0;
976             if (isSimd) {
977                 switch (prefix2) {
978                     case P_0F:
979                         opc = VexOpcode.VEX_OPCODE_0F;
980                         break;
981                     case P_0F38:
982                         opc = VexOpcode.VEX_OPCODE_0F_38;
983                         break;
984                     case P_0F3A:
985                         opc = VexOpcode.VEX_OPCODE_0F_3A;
986                         break;
987                     default:
988                         isSimd = false;
989                         break;
990                 }
991             }
992 
993             if (isSimd) {
994                 int pre;
995                 boolean rexVexW = (size == QWORD) ? true : false;
996                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
997                 int curPrefix = size.sizePrefix | prefix1;
998                 switch (curPrefix) {
999                     case 0x66:
1000                         pre = VexSimdPrefix.VEX_SIMD_66;
1001                         break;
1002                     case 0xF2:
1003                         pre = VexSimdPrefix.VEX_SIMD_F2;
1004                         break;
1005                     case 0xF3:
1006                         pre = VexSimdPrefix.VEX_SIMD_F3;
1007                         break;
1008                     default:
1009                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1010                         break;
1011                 }
1012                 if (noNds) {
1013                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1014                 } else {
1015                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1016                 }
1017                 asm.emitByte(op);
1018                 asm.emitOperandHelper(dst, src, 0);
1019             } else {
1020                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1021                 asm.emitOperandHelper(dst, src, 0);
1022             }
1023         }
1024     }
1025 
1026     /**
1027      * Opcode with operand order of RM.
1028      */
1029     public static class AMD64RRMOp extends AMD64RRROp {
AMD64RRMOp(String opcode, int op)1030         protected AMD64RRMOp(String opcode, int op) {
1031             this(opcode, 0, op);
1032         }
1033 
AMD64RRMOp(String opcode, int op, OpAssertion assertion)1034         protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
1035             this(opcode, 0, op, assertion);
1036         }
1037 
AMD64RRMOp(String opcode, int prefix, int op)1038         protected AMD64RRMOp(String opcode, int prefix, int op) {
1039             this(opcode, 0, prefix, op, null);
1040         }
1041 
AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion)1042         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
1043             this(opcode, 0, prefix, op, assertion, null);
1044         }
1045 
AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)1046         protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1047             this(opcode, 0, prefix, op, assertion, feature);
1048         }
1049 
AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion)1050         protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
1051             super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
1052         }
1053 
AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature)1054         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
1055             this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature);
1056         }
1057 
AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)1058         protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1059             super(opcode, prefix1, prefix2, op, assertion, feature);
1060         }
1061 
1062         @Override
emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src)1063         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
1064             assert verify(asm, size, dst, src);
1065             int pre;
1066             int opc;
1067             boolean rexVexW = (size == QWORD) ? true : false;
1068             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1069             int curPrefix = size.sizePrefix | prefix1;
1070             switch (curPrefix) {
1071                 case 0x66:
1072                     pre = VexSimdPrefix.VEX_SIMD_66;
1073                     break;
1074                 case 0xF2:
1075                     pre = VexSimdPrefix.VEX_SIMD_F2;
1076                     break;
1077                 case 0xF3:
1078                     pre = VexSimdPrefix.VEX_SIMD_F3;
1079                     break;
1080                 default:
1081                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1082                     break;
1083             }
1084             switch (prefix2) {
1085                 case P_0F:
1086                     opc = VexOpcode.VEX_OPCODE_0F;
1087                     break;
1088                 case P_0F38:
1089                     opc = VexOpcode.VEX_OPCODE_0F_38;
1090                     break;
1091                 case P_0F3A:
1092                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1093                     break;
1094                 default:
1095                     throw GraalError.shouldNotReachHere("invalid VEX instruction prefix");
1096             }
1097             int encode;
1098             encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
1099             asm.emitByte(op);
1100             asm.emitByte(0xC0 | encode);
1101         }
1102 
emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src)1103         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
1104             assert verify(asm, size, dst, null);
1105             int pre;
1106             int opc;
1107             boolean rexVexW = (size == QWORD) ? true : false;
1108             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1109             int curPrefix = size.sizePrefix | prefix1;
1110             switch (curPrefix) {
1111                 case 0x66:
1112                     pre = VexSimdPrefix.VEX_SIMD_66;
1113                     break;
1114                 case 0xF2:
1115                     pre = VexSimdPrefix.VEX_SIMD_F2;
1116                     break;
1117                 case 0xF3:
1118                     pre = VexSimdPrefix.VEX_SIMD_F3;
1119                     break;
1120                 default:
1121                     pre = VexSimdPrefix.VEX_SIMD_NONE;
1122                     break;
1123             }
1124             switch (prefix2) {
1125                 case P_0F:
1126                     opc = VexOpcode.VEX_OPCODE_0F;
1127                     break;
1128                 case P_0F38:
1129                     opc = VexOpcode.VEX_OPCODE_0F_38;
1130                     break;
1131                 case P_0F3A:
1132                     opc = VexOpcode.VEX_OPCODE_0F_3A;
1133                     break;
1134                 default:
1135                     throw GraalError.shouldNotReachHere("invalid VEX instruction prefix");
1136             }
1137             asm.simdPrefix(dst, nds, src, pre, opc, attributes);
1138             asm.emitByte(op);
1139             asm.emitOperandHelper(dst, src, 0);
1140         }
1141     }
1142 
1143     /**
1144      * Opcode with operand order of MR.
1145      */
1146     public static class AMD64MROp extends AMD64RROp {
1147         // @formatter:off
1148         public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
1149         public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
1150 
1151         // MOVD and MOVQ are the same opcode, just with different operand size prefix
1152         // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
1153         public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
1154         public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
1155 
1156         // MOVSS and MOVSD are the same opcode, just with different operand size prefix
1157         public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE);
1158         public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE);
1159         // @formatter:on
1160 
AMD64MROp(String opcode, int op)1161         protected AMD64MROp(String opcode, int op) {
1162             this(opcode, 0, op);
1163         }
1164 
AMD64MROp(String opcode, int op, OpAssertion assertion)1165         protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
1166             this(opcode, 0, op, assertion);
1167         }
1168 
AMD64MROp(String opcode, int prefix, int op)1169         protected AMD64MROp(String opcode, int prefix, int op) {
1170             this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion);
1171         }
1172 
AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion)1173         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
1174             this(opcode, prefix, op, assertion, null);
1175         }
1176 
AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)1177         protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
1178             this(opcode, 0, prefix, op, assertion, feature);
1179         }
1180 
AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)1181         protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
1182             super(opcode, prefix1, prefix2, op, assertion, feature);
1183         }
1184 
1185         @Override
emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)1186         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
1187             assert verify(asm, size, src, dst);
1188             boolean isSimd = false;
1189             boolean noNds = false;
1190 
1191             switch (op) {
1192                 case 0x7E:
1193                     isSimd = true;
1194                     noNds = true;
1195                     break;
1196                 case 0x11:
1197                     isSimd = true;
1198                     break;
1199             }
1200 
1201             int opc = 0;
1202             if (isSimd) {
1203                 switch (prefix2) {
1204                     case P_0F:
1205                         opc = VexOpcode.VEX_OPCODE_0F;
1206                         break;
1207                     case P_0F38:
1208                         opc = VexOpcode.VEX_OPCODE_0F_38;
1209                         break;
1210                     case P_0F3A:
1211                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1212                         break;
1213                     default:
1214                         isSimd = false;
1215                         break;
1216                 }
1217             }
1218 
1219             if (isSimd) {
1220                 int pre;
1221                 boolean rexVexW = (size == QWORD) ? true : false;
1222                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1223                 int curPrefix = size.sizePrefix | prefix1;
1224                 switch (curPrefix) {
1225                     case 0x66:
1226                         pre = VexSimdPrefix.VEX_SIMD_66;
1227                         break;
1228                     case 0xF2:
1229                         pre = VexSimdPrefix.VEX_SIMD_F2;
1230                         break;
1231                     case 0xF3:
1232                         pre = VexSimdPrefix.VEX_SIMD_F3;
1233                         break;
1234                     default:
1235                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1236                         break;
1237                 }
1238                 int encode;
1239                 if (noNds) {
1240                     encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
1241                 } else {
1242                     encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
1243                 }
1244                 asm.emitByte(op);
1245                 asm.emitByte(0xC0 | encode);
1246             } else {
1247                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
1248                 asm.emitModRM(src, dst);
1249             }
1250         }
1251 
emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src)1252         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
1253             assert verify(asm, size, null, src);
1254             boolean isSimd = false;
1255 
1256             switch (op) {
1257                 case 0x7E:
1258                 case 0x11:
1259                     isSimd = true;
1260                     break;
1261             }
1262 
1263             int opc = 0;
1264             if (isSimd) {
1265                 switch (prefix2) {
1266                     case P_0F:
1267                         opc = VexOpcode.VEX_OPCODE_0F;
1268                         break;
1269                     case P_0F38:
1270                         opc = VexOpcode.VEX_OPCODE_0F_38;
1271                         break;
1272                     case P_0F3A:
1273                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1274                         break;
1275                     default:
1276                         isSimd = false;
1277                         break;
1278                 }
1279             }
1280 
1281             if (isSimd) {
1282                 int pre;
1283                 boolean rexVexW = (size == QWORD) ? true : false;
1284                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1285                 int curPrefix = size.sizePrefix | prefix1;
1286                 switch (curPrefix) {
1287                     case 0x66:
1288                         pre = VexSimdPrefix.VEX_SIMD_66;
1289                         break;
1290                     case 0xF2:
1291                         pre = VexSimdPrefix.VEX_SIMD_F2;
1292                         break;
1293                     case 0xF3:
1294                         pre = VexSimdPrefix.VEX_SIMD_F3;
1295                         break;
1296                     default:
1297                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1298                         break;
1299                 }
1300                 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
1301                 asm.emitByte(op);
1302                 asm.emitOperandHelper(src, dst, 0);
1303             } else {
1304                 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
1305                 asm.emitOperandHelper(src, dst, 0);
1306             }
1307         }
1308     }
1309 
1310     /**
1311      * Opcodes with operand order of M.
1312      */
1313     public static class AMD64MOp extends AMD64Op {
1314         // @formatter:off
1315         public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
1316         public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
1317         public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
1318         public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
1319         public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
1320         public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
1321         public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
1322         public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
1323         public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
1324         public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.WordOrDwordAssertion);
1325         // @formatter:on
1326 
1327         private final int ext;
1328 
AMD64MOp(String opcode, int op, int ext)1329         protected AMD64MOp(String opcode, int op, int ext) {
1330             this(opcode, 0, op, ext);
1331         }
1332 
AMD64MOp(String opcode, int prefix, int op, int ext)1333         protected AMD64MOp(String opcode, int prefix, int op, int ext) {
1334             this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion);
1335         }
1336 
AMD64MOp(String opcode, int op, int ext, OpAssertion assertion)1337         protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
1338             this(opcode, 0, op, ext, assertion);
1339         }
1340 
AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion)1341         protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
1342             super(opcode, 0, prefix, op, assertion, null);
1343             this.ext = ext;
1344         }
1345 
emit(AMD64Assembler asm, OperandSize size, Register dst)1346         public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
1347             assert verify(asm, size, dst, null);
1348             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1349             asm.emitModRM(ext, dst);
1350         }
1351 
emit(AMD64Assembler asm, OperandSize size, AMD64Address dst)1352         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
1353             assert verify(asm, size, null, null);
1354             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1355             asm.emitOperandHelper(ext, dst, 0);
1356         }
1357     }
1358 
1359     /**
1360      * Opcodes with operand order of MI.
1361      */
1362     public static class AMD64MIOp extends AMD64ImmOp {
1363         // @formatter:off
1364         public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
1365         public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
1366         public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
1367         // @formatter:on
1368 
1369         private final int ext;
1370 
AMD64MIOp(String opcode, boolean immIsByte, int op, int ext)1371         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
1372             this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion);
1373         }
1374 
AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion)1375         protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
1376             this(opcode, immIsByte, 0, op, ext, assertion);
1377         }
1378 
AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion)1379         protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
1380             super(opcode, immIsByte, prefix, op, assertion);
1381             this.ext = ext;
1382         }
1383 
emit(AMD64Assembler asm, OperandSize size, Register dst, int imm)1384         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
1385             assert verify(asm, size, dst, null);
1386             emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
1387             asm.emitModRM(ext, dst);
1388             emitImmediate(asm, size, imm);
1389         }
1390 
emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm)1391         public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
1392             assert verify(asm, size, null, null);
1393             emitOpcode(asm, size, getRXB(null, dst), 0, 0);
1394             asm.emitOperandHelper(ext, dst, immediateSize(size));
1395             emitImmediate(asm, size, imm);
1396         }
1397     }
1398 
1399     /**
1400      * Opcodes with operand order of RMI.
1401      *
1402      * We only have one form of round as the operation is always treated with single variant input,
1403      * making its extension to 3 address forms redundant.
1404      */
1405     public static class AMD64RMIOp extends AMD64ImmOp {
1406         // @formatter:off
1407         public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
1408         public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
1409         public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
1410         public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
1411         // @formatter:on
1412 
AMD64RMIOp(String opcode, boolean immIsByte, int op)1413         protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
1414             this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion);
1415         }
1416 
AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion)1417         protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
1418             super(opcode, immIsByte, prefix, op, assertion);
1419         }
1420 
emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm)1421         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
1422             assert verify(asm, size, dst, src);
1423             boolean isSimd = false;
1424             boolean noNds = false;
1425 
1426             switch (op) {
1427                 case 0x0A:
1428                 case 0x0B:
1429                     isSimd = true;
1430                     noNds = true;
1431                     break;
1432             }
1433 
1434             int opc = 0;
1435             if (isSimd) {
1436                 switch (prefix2) {
1437                     case P_0F:
1438                         opc = VexOpcode.VEX_OPCODE_0F;
1439                         break;
1440                     case P_0F38:
1441                         opc = VexOpcode.VEX_OPCODE_0F_38;
1442                         break;
1443                     case P_0F3A:
1444                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1445                         break;
1446                     default:
1447                         isSimd = false;
1448                         break;
1449                 }
1450             }
1451 
1452             if (isSimd) {
1453                 int pre;
1454                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1455                 int curPrefix = size.sizePrefix | prefix1;
1456                 switch (curPrefix) {
1457                     case 0x66:
1458                         pre = VexSimdPrefix.VEX_SIMD_66;
1459                         break;
1460                     case 0xF2:
1461                         pre = VexSimdPrefix.VEX_SIMD_F2;
1462                         break;
1463                     case 0xF3:
1464                         pre = VexSimdPrefix.VEX_SIMD_F3;
1465                         break;
1466                     default:
1467                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1468                         break;
1469                 }
1470                 int encode;
1471                 if (noNds) {
1472                     encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
1473                 } else {
1474                     encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
1475                 }
1476                 asm.emitByte(op);
1477                 asm.emitByte(0xC0 | encode);
1478                 emitImmediate(asm, size, imm);
1479             } else {
1480                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
1481                 asm.emitModRM(dst, src);
1482                 emitImmediate(asm, size, imm);
1483             }
1484         }
1485 
emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm)1486         public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
1487             assert verify(asm, size, dst, null);
1488 
1489             boolean isSimd = false;
1490             boolean noNds = false;
1491 
1492             switch (op) {
1493                 case 0x0A:
1494                 case 0x0B:
1495                     isSimd = true;
1496                     noNds = true;
1497                     break;
1498             }
1499 
1500             int opc = 0;
1501             if (isSimd) {
1502                 switch (prefix2) {
1503                     case P_0F:
1504                         opc = VexOpcode.VEX_OPCODE_0F;
1505                         break;
1506                     case P_0F38:
1507                         opc = VexOpcode.VEX_OPCODE_0F_38;
1508                         break;
1509                     case P_0F3A:
1510                         opc = VexOpcode.VEX_OPCODE_0F_3A;
1511                         break;
1512                     default:
1513                         isSimd = false;
1514                         break;
1515                 }
1516             }
1517 
1518             if (isSimd) {
1519                 int pre;
1520                 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
1521                 int curPrefix = size.sizePrefix | prefix1;
1522                 switch (curPrefix) {
1523                     case 0x66:
1524                         pre = VexSimdPrefix.VEX_SIMD_66;
1525                         break;
1526                     case 0xF2:
1527                         pre = VexSimdPrefix.VEX_SIMD_F2;
1528                         break;
1529                     case 0xF3:
1530                         pre = VexSimdPrefix.VEX_SIMD_F3;
1531                         break;
1532                     default:
1533                         pre = VexSimdPrefix.VEX_SIMD_NONE;
1534                         break;
1535                 }
1536                 if (noNds) {
1537                     asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
1538                 } else {
1539                     asm.simdPrefix(dst, dst, src, pre, opc, attributes);
1540                 }
1541                 asm.emitByte(op);
1542                 asm.emitOperandHelper(dst, src, immediateSize(size));
1543                 emitImmediate(asm, size, imm);
1544             } else {
1545                 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
1546                 asm.emitOperandHelper(dst, src, immediateSize(size));
1547                 emitImmediate(asm, size, imm);
1548             }
1549         }
1550     }
1551 
1552     public static class SSEOp extends AMD64RMOp {
1553         // @formatter:off
1554         public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion);
1555         public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion);
1556         public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion);
1557         public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion);
1558         public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatAssertion);
1559         public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
1560         public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatAssertion);
1561         public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatAssertion);
1562         public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatAssertion);
1563         public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatAssertion);
1564         public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
1565         public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
1566         public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
1567         public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
1568         public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
1569         public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
1570         public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
1571         public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
1572         // @formatter:on
1573 
SSEOp(String opcode, int prefix, int op)1574         protected SSEOp(String opcode, int prefix, int op) {
1575             this(opcode, prefix, op, OpAssertion.FloatAssertion);
1576         }
1577 
SSEOp(String opcode, int prefix, int op, OpAssertion assertion)1578         protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
1579             this(opcode, 0, prefix, op, assertion);
1580         }
1581 
SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion)1582         protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1583             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
1584         }
1585     }
1586 
1587     public static class AVXOp extends AMD64RRMOp {
1588         // @formatter:off
1589         public static final AVXOp AND       = new AVXOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatAssertion);
1590         public static final AVXOp ANDN      = new AVXOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatAssertion);
1591         public static final AVXOp OR        = new AVXOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatAssertion);
1592         public static final AVXOp XOR       = new AVXOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatAssertion);
1593         public static final AVXOp ADD       = new AVXOp("ADD",             P_0F, 0x58);
1594         public static final AVXOp MUL       = new AVXOp("MUL",             P_0F, 0x59);
1595         public static final AVXOp SUB       = new AVXOp("SUB",             P_0F, 0x5C);
1596         public static final AVXOp MIN       = new AVXOp("MIN",             P_0F, 0x5D);
1597         public static final AVXOp DIV       = new AVXOp("DIV",             P_0F, 0x5E);
1598         public static final AVXOp MAX       = new AVXOp("MAX",             P_0F, 0x5F);
1599         // @formatter:on
1600 
AVXOp(String opcode, int prefix, int op)1601         protected AVXOp(String opcode, int prefix, int op) {
1602             this(opcode, prefix, op, OpAssertion.FloatAssertion);
1603         }
1604 
AVXOp(String opcode, int prefix, int op, OpAssertion assertion)1605         protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
1606             this(opcode, 0, prefix, op, assertion);
1607         }
1608 
AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion)1609         protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
1610             super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
1611         }
1612     }
1613 
1614     /**
1615      * Arithmetic operation with operand order of RM, MR or MI.
1616      */
1617     public static final class AMD64BinaryArithmetic {
1618         // @formatter:off
1619         public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
1620         public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
1621         public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
1622         public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
1623         public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
1624         public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
1625         public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
1626         public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
1627         // @formatter:on
1628 
1629         private final AMD64MIOp byteImmOp;
1630         private final AMD64MROp byteMrOp;
1631         private final AMD64RMOp byteRmOp;
1632 
1633         private final AMD64MIOp immOp;
1634         private final AMD64MIOp immSxOp;
1635         private final AMD64MROp mrOp;
1636         private final AMD64RMOp rmOp;
1637 
AMD64BinaryArithmetic(String opcode, int code)1638         private AMD64BinaryArithmetic(String opcode, int code) {
1639             int baseOp = code << 3;
1640 
1641             byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
1642             byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
1643             byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
1644 
1645             immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion);
1646             immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion);
1647             mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion);
1648             rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion);
1649         }
1650 
getMIOpcode(OperandSize size, boolean sx)1651         public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
1652             if (size == BYTE) {
1653                 return byteImmOp;
1654             } else if (sx) {
1655                 return immSxOp;
1656             } else {
1657                 return immOp;
1658             }
1659         }
1660 
getMROpcode(OperandSize size)1661         public AMD64MROp getMROpcode(OperandSize size) {
1662             if (size == BYTE) {
1663                 return byteMrOp;
1664             } else {
1665                 return mrOp;
1666             }
1667         }
1668 
getRMOpcode(OperandSize size)1669         public AMD64RMOp getRMOpcode(OperandSize size) {
1670             if (size == BYTE) {
1671                 return byteRmOp;
1672             } else {
1673                 return rmOp;
1674             }
1675         }
1676     }
1677 
1678     /**
1679      * Shift operation with operand order of M1, MC or MI.
1680      */
1681     public static final class AMD64Shift {
1682         // @formatter:off
1683         public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
1684         public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
1685         public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
1686         public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
1687         public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
1688         public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
1689         public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
1690         // @formatter:on
1691 
1692         public final AMD64MOp m1Op;
1693         public final AMD64MOp mcOp;
1694         public final AMD64MIOp miOp;
1695 
AMD64Shift(String opcode, int code)1696         private AMD64Shift(String opcode, int code) {
1697             m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
1698             mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
1699             miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
1700         }
1701     }
1702 
addl(AMD64Address dst, int imm32)1703     public final void addl(AMD64Address dst, int imm32) {
1704         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1705     }
1706 
addl(Register dst, int imm32)1707     public final void addl(Register dst, int imm32) {
1708         ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1709     }
1710 
addl(Register dst, Register src)1711     public final void addl(Register dst, Register src) {
1712         ADD.rmOp.emit(this, DWORD, dst, src);
1713     }
1714 
addpd(Register dst, Register src)1715     public final void addpd(Register dst, Register src) {
1716         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1717         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1718         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1719         emitByte(0x58);
1720         emitByte(0xC0 | encode);
1721     }
1722 
addpd(Register dst, AMD64Address src)1723     public final void addpd(Register dst, AMD64Address src) {
1724         assert dst.getRegisterCategory().equals(AMD64.XMM);
1725         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1726         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1727         emitByte(0x58);
1728         emitOperandHelper(dst, src, 0);
1729     }
1730 
addsd(Register dst, Register src)1731     public final void addsd(Register dst, Register src) {
1732         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1733         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1734         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1735         emitByte(0x58);
1736         emitByte(0xC0 | encode);
1737     }
1738 
addsd(Register dst, AMD64Address src)1739     public final void addsd(Register dst, AMD64Address src) {
1740         assert dst.getRegisterCategory().equals(AMD64.XMM);
1741         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1742         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1743         emitByte(0x58);
1744         emitOperandHelper(dst, src, 0);
1745     }
1746 
addrNop4()1747     private void addrNop4() {
1748         // 4 bytes: NOP DWORD PTR [EAX+0]
1749         emitByte(0x0F);
1750         emitByte(0x1F);
1751         emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
1752         emitByte(0); // 8-bits offset (1 byte)
1753     }
1754 
addrNop5()1755     private void addrNop5() {
1756         // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1757         emitByte(0x0F);
1758         emitByte(0x1F);
1759         emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
1760         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1761         emitByte(0); // 8-bits offset (1 byte)
1762     }
1763 
addrNop7()1764     private void addrNop7() {
1765         // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1766         emitByte(0x0F);
1767         emitByte(0x1F);
1768         emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
1769         emitInt(0); // 32-bits offset (4 bytes)
1770     }
1771 
addrNop8()1772     private void addrNop8() {
1773         // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1774         emitByte(0x0F);
1775         emitByte(0x1F);
1776         emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1777         emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1778         emitInt(0); // 32-bits offset (4 bytes)
1779     }
1780 
andl(Register dst, int imm32)1781     public final void andl(Register dst, int imm32) {
1782         AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1783     }
1784 
andl(Register dst, Register src)1785     public final void andl(Register dst, Register src) {
1786         AND.rmOp.emit(this, DWORD, dst, src);
1787     }
1788 
andpd(Register dst, Register src)1789     public final void andpd(Register dst, Register src) {
1790         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1791         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1792         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1793         emitByte(0x54);
1794         emitByte(0xC0 | encode);
1795     }
1796 
andpd(Register dst, AMD64Address src)1797     public final void andpd(Register dst, AMD64Address src) {
1798         assert dst.getRegisterCategory().equals(AMD64.XMM);
1799         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1800         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1801         emitByte(0x54);
1802         emitOperandHelper(dst, src, 0);
1803     }
1804 
bsfq(Register dst, Register src)1805     public final void bsfq(Register dst, Register src) {
1806         int encode = prefixqAndEncode(dst.encoding(), src.encoding());
1807         emitByte(0x0F);
1808         emitByte(0xBC);
1809         emitByte(0xC0 | encode);
1810     }
1811 
bsrl(Register dst, Register src)1812     public final void bsrl(Register dst, Register src) {
1813         int encode = prefixAndEncode(dst.encoding(), src.encoding());
1814         emitByte(0x0F);
1815         emitByte(0xBD);
1816         emitByte(0xC0 | encode);
1817     }
1818 
bswapl(Register reg)1819     public final void bswapl(Register reg) {
1820         int encode = prefixAndEncode(reg.encoding);
1821         emitByte(0x0F);
1822         emitByte(0xC8 | encode);
1823     }
1824 
cdql()1825     public final void cdql() {
1826         emitByte(0x99);
1827     }
1828 
cmovl(ConditionFlag cc, Register dst, Register src)1829     public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1830         int encode = prefixAndEncode(dst.encoding, src.encoding);
1831         emitByte(0x0F);
1832         emitByte(0x40 | cc.getValue());
1833         emitByte(0xC0 | encode);
1834     }
1835 
cmovl(ConditionFlag cc, Register dst, AMD64Address src)1836     public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1837         prefix(src, dst);
1838         emitByte(0x0F);
1839         emitByte(0x40 | cc.getValue());
1840         emitOperandHelper(dst, src, 0);
1841     }
1842 
cmpl(Register dst, int imm32)1843     public final void cmpl(Register dst, int imm32) {
1844         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1845     }
1846 
cmpl(Register dst, Register src)1847     public final void cmpl(Register dst, Register src) {
1848         CMP.rmOp.emit(this, DWORD, dst, src);
1849     }
1850 
cmpl(Register dst, AMD64Address src)1851     public final void cmpl(Register dst, AMD64Address src) {
1852         CMP.rmOp.emit(this, DWORD, dst, src);
1853     }
1854 
cmpl(AMD64Address dst, int imm32)1855     public final void cmpl(AMD64Address dst, int imm32) {
1856         CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1857     }
1858 
1859     /**
1860      * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into
1861      * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared
1862      * values were equal, and cleared otherwise.
1863      */
cmpxchgb(Register reg, AMD64Address adr)1864     public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg
1865         prefixb(adr, reg);
1866         emitByte(0x0F);
1867         emitByte(0xB0);
1868         emitOperandHelper(reg, adr, 0);
1869     }
1870 
1871     /**
1872      * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg
1873      * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the
1874      * compared values were equal, and cleared otherwise.
1875      */
cmpxchgw(Register reg, AMD64Address adr)1876     public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg
1877         emitByte(0x66); // Switch to 16-bit mode.
1878         prefix(adr, reg);
1879         emitByte(0x0F);
1880         emitByte(0xB1);
1881         emitOperandHelper(reg, adr, 0);
1882     }
1883 
1884     /**
1885      * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg
1886      * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the
1887      * compared values were equal, and cleared otherwise.
1888      */
cmpxchgl(Register reg, AMD64Address adr)1889     public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1890         prefix(adr, reg);
1891         emitByte(0x0F);
1892         emitByte(0xB1);
1893         emitOperandHelper(reg, adr, 0);
1894     }
1895 
cvtsi2sdl(Register dst, Register src)1896     public final void cvtsi2sdl(Register dst, Register src) {
1897         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
1898         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1899         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1900         emitByte(0x2A);
1901         emitByte(0xC0 | encode);
1902     }
1903 
cvttsd2sil(Register dst, Register src)1904     public final void cvttsd2sil(Register dst, Register src) {
1905         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
1906         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1907         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1908         emitByte(0x2C);
1909         emitByte(0xC0 | encode);
1910     }
1911 
decl(AMD64Address dst)1912     protected final void decl(AMD64Address dst) {
1913         prefix(dst);
1914         emitByte(0xFF);
1915         emitOperandHelper(1, dst, 0);
1916     }
1917 
divsd(Register dst, Register src)1918     public final void divsd(Register dst, Register src) {
1919         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1920         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
1921         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
1922         emitByte(0x5E);
1923         emitByte(0xC0 | encode);
1924     }
1925 
evmovdquq(Register dst, AMD64Address src, int vectorLen)1926     public final void evmovdquq(Register dst, AMD64Address src, int vectorLen) {
1927         assert supports(CPUFeature.AVX512F);
1928         AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target);
1929         attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
1930         attributes.setIsEvexInstruction();
1931         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
1932         emitByte(0x6F);
1933         emitOperandHelper(dst, src, 0);
1934     }
1935 
evpcmpeqb(Register kdst, Register nds, AMD64Address src, int vectorLen)1936     public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src, int vectorLen) {
1937         assert supports(CPUFeature.AVX512BW);
1938         AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
1939         attributes.setIsEvexInstruction();
1940         attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
1941         vexPrefix(src, nds, kdst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
1942         emitByte(0x74);
1943         emitOperandHelper(kdst, src, 0);
1944     }
1945 
hlt()1946     public final void hlt() {
1947         emitByte(0xF4);
1948     }
1949 
imull(Register dst, Register src, int value)1950     public final void imull(Register dst, Register src, int value) {
1951         if (isByte(value)) {
1952             AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1953         } else {
1954             AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1955         }
1956     }
1957 
incl(AMD64Address dst)1958     protected final void incl(AMD64Address dst) {
1959         prefix(dst);
1960         emitByte(0xFF);
1961         emitOperandHelper(0, dst, 0);
1962     }
1963 
jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32)1964     public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1965         int shortSize = 2;
1966         int longSize = 6;
1967         long disp = jumpTarget - position();
1968         if (!forceDisp32 && isByte(disp - shortSize)) {
1969             // 0111 tttn #8-bit disp
1970             emitByte(0x70 | cc.getValue());
1971             emitByte((int) ((disp - shortSize) & 0xFF));
1972         } else {
1973             // 0000 1111 1000 tttn #32-bit disp
1974             assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1975             emitByte(0x0F);
1976             emitByte(0x80 | cc.getValue());
1977             emitInt((int) (disp - longSize));
1978         }
1979     }
1980 
jcc(ConditionFlag cc, Label l)1981     public final void jcc(ConditionFlag cc, Label l) {
1982         assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1983         if (l.isBound()) {
1984             jcc(cc, l.position(), false);
1985         } else {
1986             // Note: could eliminate cond. jumps to this jump if condition
1987             // is the same however, seems to be rather unlikely case.
1988             // Note: use jccb() if label to be bound is very close to get
1989             // an 8-bit displacement
1990             l.addPatchAt(position());
1991             emitByte(0x0F);
1992             emitByte(0x80 | cc.getValue());
1993             emitInt(0);
1994         }
1995 
1996     }
1997 
1998     public final void jccb(ConditionFlag cc, Label l) {
1999         if (l.isBound()) {
2000             int shortSize = 2;
2001             int entry = l.position();
2002             assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
2003             long disp = entry - position();
2004             // 0111 tttn #8-bit disp
2005             emitByte(0x70 | cc.getValue());
2006             emitByte((int) ((disp - shortSize) & 0xFF));
2007         } else {
2008             l.addPatchAt(position());
2009             emitByte(0x70 | cc.getValue());
2010             emitByte(0);
2011         }
2012     }
2013 
2014     public final void jmp(int jumpTarget, boolean forceDisp32) {
2015         int shortSize = 2;
2016         int longSize = 5;
2017         long disp = jumpTarget - position();
2018         if (!forceDisp32 && isByte(disp - shortSize)) {
2019             emitByte(0xEB);
2020             emitByte((int) ((disp - shortSize) & 0xFF));
2021         } else {
2022             emitByte(0xE9);
2023             emitInt((int) (disp - longSize));
2024         }
2025     }
2026 
2027     @Override
2028     public final void jmp(Label l) {
2029         if (l.isBound()) {
2030             jmp(l.position(), false);
2031         } else {
2032             // By default, forward jumps are always 32-bit displacements, since
2033             // we can't yet know where the label will be bound. If you're sure that
2034             // the forward jump will not run beyond 256 bytes, use jmpb to
2035             // force an 8-bit displacement.
2036 
2037             l.addPatchAt(position());
2038             emitByte(0xE9);
2039             emitInt(0);
2040         }
2041     }
2042 
2043     public final void jmp(Register entry) {
2044         int encode = prefixAndEncode(entry.encoding);
2045         emitByte(0xFF);
2046         emitByte(0xE0 | encode);
2047     }
2048 
2049     public final void jmp(AMD64Address adr) {
2050         prefix(adr);
2051         emitByte(0xFF);
2052         emitOperandHelper(rsp, adr, 0);
2053     }
2054 
2055     public final void jmpb(Label l) {
2056         if (l.isBound()) {
2057             int shortSize = 2;
2058             int entry = l.position();
2059             assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
2060             long offs = entry - position();
2061             emitByte(0xEB);
2062             emitByte((int) ((offs - shortSize) & 0xFF));
2063         } else {
2064 
2065             l.addPatchAt(position());
2066             emitByte(0xEB);
2067             emitByte(0);
2068         }
2069     }
2070 
2071     // This instruction produces ZF or CF flags
2072     public final void kortestql(Register src1, Register src2) {
2073         assert supports(CPUFeature.AVX512BW);
2074         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
2075         int encode = vexPrefixAndEncode(src1, Register.None, src2, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2076         emitByte(0x98);
2077         emitByte(0xC0 | encode);
2078     }
2079 
2080     public final void kmovql(Register dst, Register src) {
2081         assert supports(CPUFeature.AVX512BW);
2082         if (src.getRegisterCategory().equals(AMD64.MASK)) {
2083             // kmovql(KRegister dst, KRegister src)
2084             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
2085             int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2086             emitByte(0x90);
2087             emitByte(0xC0 | encode);
2088         } else {
2089             // kmovql(KRegister dst, Register src)
2090             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
2091             int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2092             emitByte(0x92);
2093             emitByte(0xC0 | encode);
2094         }
2095     }
2096 
2097     public final void lead(Register dst, AMD64Address src) {
2098         prefix(src, dst);
2099         emitByte(0x8D);
2100         emitOperandHelper(dst, src, 0);
2101     }
2102 
2103     public final void leaq(Register dst, AMD64Address src) {
2104         prefixq(src, dst);
2105         emitByte(0x8D);
2106         emitOperandHelper(dst, src, 0);
2107     }
2108 
2109     public final void leave() {
2110         emitByte(0xC9);
2111     }
2112 
2113     public final void lock() {
2114         emitByte(0xF0);
2115     }
2116 
2117     public final void movapd(Register dst, Register src) {
2118         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2119         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2120         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2121         emitByte(0x28);
2122         emitByte(0xC0 | encode);
2123     }
2124 
2125     public final void movaps(Register dst, Register src) {
2126         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2127         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2128         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2129         emitByte(0x28);
2130         emitByte(0xC0 | encode);
2131     }
2132 
2133     public final void movb(AMD64Address dst, int imm8) {
2134         prefix(dst);
2135         emitByte(0xC6);
2136         emitOperandHelper(0, dst, 1);
2137         emitByte(imm8);
2138     }
2139 
2140     public final void movb(AMD64Address dst, Register src) {
2141         assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
2142         prefixb(dst, src);
2143         emitByte(0x88);
2144         emitOperandHelper(src, dst, 0);
2145     }
2146 
2147     public final void movl(Register dst, int imm32) {
2148         int encode = prefixAndEncode(dst.encoding);
2149         emitByte(0xB8 | encode);
2150         emitInt(imm32);
2151     }
2152 
2153     public final void movl(Register dst, Register src) {
2154         int encode = prefixAndEncode(dst.encoding, src.encoding);
2155         emitByte(0x8B);
2156         emitByte(0xC0 | encode);
2157     }
2158 
2159     public final void movl(Register dst, AMD64Address src) {
2160         prefix(src, dst);
2161         emitByte(0x8B);
2162         emitOperandHelper(dst, src, 0);
2163     }
2164 
2165     /**
2166      * @param wide use 4 byte encoding for displacements that would normally fit in a byte
2167      */
2168     public final void movl(Register dst, AMD64Address src, boolean wide) {
2169         prefix(src, dst);
2170         emitByte(0x8B);
2171         emitOperandHelper(dst, src, wide, 0);
2172     }
2173 
2174     public final void movl(AMD64Address dst, int imm32) {
2175         prefix(dst);
2176         emitByte(0xC7);
2177         emitOperandHelper(0, dst, 4);
2178         emitInt(imm32);
2179     }
2180 
2181     public final void movl(AMD64Address dst, Register src) {
2182         prefix(dst, src);
2183         emitByte(0x89);
2184         emitOperandHelper(src, dst, 0);
2185     }
2186 
2187     /**
2188      * New CPUs require use of movsd and movss to avoid partial register stall when loading from
2189      * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
2190      * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
2191      * {@link AMD64MacroAssembler#movflt(Register, Register)}.
2192      */
2193     public final void movlpd(Register dst, AMD64Address src) {
2194         assert dst.getRegisterCategory().equals(AMD64.XMM);
2195         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2196         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2197         emitByte(0x12);
2198         emitOperandHelper(dst, src, 0);
2199     }
2200 
2201     public final void movlhps(Register dst, Register src) {
2202         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2203         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2204         int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2205         emitByte(0x16);
2206         emitByte(0xC0 | encode);
2207     }
2208 
2209     public final void movq(Register dst, AMD64Address src) {
2210         movq(dst, src, false);
2211     }
2212 
2213     public final void movq(Register dst, AMD64Address src, boolean wide) {
2214         if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2215             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2216             simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2217             emitByte(0x7E);
2218             emitOperandHelper(dst, src, wide, 0);
2219         } else {
2220             // gpr version of movq
2221             prefixq(src, dst);
2222             emitByte(0x8B);
2223             emitOperandHelper(dst, src, wide, 0);
2224         }
2225     }
2226 
2227     public final void movq(Register dst, Register src) {
2228         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2229         emitByte(0x8B);
2230         emitByte(0xC0 | encode);
2231     }
2232 
2233     public final void movq(AMD64Address dst, Register src) {
2234         if (src.getRegisterCategory().equals(AMD64.XMM)) {
2235             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2236             simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2237             emitByte(0xD6);
2238             emitOperandHelper(src, dst, 0);
2239         } else {
2240             // gpr version of movq
2241             prefixq(dst, src);
2242             emitByte(0x89);
2243             emitOperandHelper(src, dst, 0);
2244         }
2245     }
2246 
2247     public final void movsbl(Register dst, AMD64Address src) {
2248         prefix(src, dst);
2249         emitByte(0x0F);
2250         emitByte(0xBE);
2251         emitOperandHelper(dst, src, 0);
2252     }
2253 
2254     public final void movsbl(Register dst, Register src) {
2255         int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
2256         emitByte(0x0F);
2257         emitByte(0xBE);
2258         emitByte(0xC0 | encode);
2259     }
2260 
2261     public final void movsbq(Register dst, AMD64Address src) {
2262         prefixq(src, dst);
2263         emitByte(0x0F);
2264         emitByte(0xBE);
2265         emitOperandHelper(dst, src, 0);
2266     }
2267 
2268     public final void movsbq(Register dst, Register src) {
2269         int encode = prefixqAndEncode(dst.encoding, src.encoding);
2270         emitByte(0x0F);
2271         emitByte(0xBE);
2272         emitByte(0xC0 | encode);
2273     }
2274 
2275     public final void movsd(Register dst, Register src) {
2276         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2277         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2278         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2279         emitByte(0x10);
2280         emitByte(0xC0 | encode);
2281     }
2282 
2283     public final void movsd(Register dst, AMD64Address src) {
2284         assert dst.getRegisterCategory().equals(AMD64.XMM);
2285         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2286         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2287         emitByte(0x10);
2288         emitOperandHelper(dst, src, 0);
2289     }
2290 
2291     public final void movsd(AMD64Address dst, Register src) {
2292         assert src.getRegisterCategory().equals(AMD64.XMM);
2293         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2294         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2295         emitByte(0x11);
2296         emitOperandHelper(src, dst, 0);
2297     }
2298 
2299     public final void movss(Register dst, Register src) {
2300         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2301         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2302         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2303         emitByte(0x10);
2304         emitByte(0xC0 | encode);
2305     }
2306 
2307     public final void movss(Register dst, AMD64Address src) {
2308         assert dst.getRegisterCategory().equals(AMD64.XMM);
2309         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2310         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2311         emitByte(0x10);
2312         emitOperandHelper(dst, src, 0);
2313     }
2314 
2315     public final void movss(AMD64Address dst, Register src) {
2316         assert src.getRegisterCategory().equals(AMD64.XMM);
2317         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2318         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2319         emitByte(0x11);
2320         emitOperandHelper(src, dst, 0);
2321     }
2322 
2323     public final void mulpd(Register dst, Register src) {
2324         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2325         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2326         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2327         emitByte(0x59);
2328         emitByte(0xC0 | encode);
2329     }
2330 
2331     public final void mulpd(Register dst, AMD64Address src) {
2332         assert dst.getRegisterCategory().equals(AMD64.XMM);
2333         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2334         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2335         emitByte(0x59);
2336         emitOperandHelper(dst, src, 0);
2337     }
2338 
2339     public final void mulsd(Register dst, Register src) {
2340         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2341         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2342         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2343         emitByte(0x59);
2344         emitByte(0xC0 | encode);
2345     }
2346 
2347     public final void mulsd(Register dst, AMD64Address src) {
2348         assert dst.getRegisterCategory().equals(AMD64.XMM);
2349         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2350         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2351         emitByte(0x59);
2352         emitOperandHelper(dst, src, 0);
2353     }
2354 
2355     public final void mulss(Register dst, Register src) {
2356         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2357         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2358         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
2359         emitByte(0x59);
2360         emitByte(0xC0 | encode);
2361     }
2362 
2363     public final void movswl(Register dst, AMD64Address src) {
2364         prefix(src, dst);
2365         emitByte(0x0F);
2366         emitByte(0xBF);
2367         emitOperandHelper(dst, src, 0);
2368     }
2369 
2370     public final void movw(AMD64Address dst, int imm16) {
2371         emitByte(0x66); // switch to 16-bit mode
2372         prefix(dst);
2373         emitByte(0xC7);
2374         emitOperandHelper(0, dst, 2);
2375         emitShort(imm16);
2376     }
2377 
2378     public final void movw(AMD64Address dst, Register src) {
2379         emitByte(0x66);
2380         prefix(dst, src);
2381         emitByte(0x89);
2382         emitOperandHelper(src, dst, 0);
2383     }
2384 
2385     public final void movzbl(Register dst, AMD64Address src) {
2386         prefix(src, dst);
2387         emitByte(0x0F);
2388         emitByte(0xB6);
2389         emitOperandHelper(dst, src, 0);
2390     }
2391 
2392     public final void movzbl(Register dst, Register src) {
2393         AMD64RMOp.MOVZXB.emit(this, OperandSize.DWORD, dst, src);
2394     }
2395 
2396     public final void movzbq(Register dst, Register src) {
2397         AMD64RMOp.MOVZXB.emit(this, OperandSize.QWORD, dst, src);
2398     }
2399 
2400     public final void movzwl(Register dst, AMD64Address src) {
2401         prefix(src, dst);
2402         emitByte(0x0F);
2403         emitByte(0xB7);
2404         emitOperandHelper(dst, src, 0);
2405     }
2406 
2407     public final void negl(Register dst) {
2408         NEG.emit(this, DWORD, dst);
2409     }
2410 
2411     public final void notl(Register dst) {
2412         NOT.emit(this, DWORD, dst);
2413     }
2414 
2415     public final void notq(Register dst) {
2416         NOT.emit(this, QWORD, dst);
2417     }
2418 
2419     @Override
2420     public final void ensureUniquePC() {
2421         nop();
2422     }
2423 
2424     public final void nop() {
2425         nop(1);
2426     }
2427 
2428     public void nop(int count) {
2429         int i = count;
2430         if (UseNormalNop) {
2431             assert i > 0 : " ";
2432             // The fancy nops aren't currently recognized by debuggers making it a
2433             // pain to disassemble code while debugging. If assert are on clearly
2434             // speed is not an issue so simply use the single byte traditional nop
2435             // to do alignment.
2436 
2437             for (; i > 0; i--) {
2438                 emitByte(0x90);
2439             }
2440             return;
2441         }
2442 
2443         if (UseAddressNop) {
2444             //
2445             // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
2446             // 1: 0x90
2447             // 2: 0x66 0x90
2448             // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2449             // 4: 0x0F 0x1F 0x40 0x00
2450             // 5: 0x0F 0x1F 0x44 0x00 0x00
2451             // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2452             // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2453             // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2454             // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2455             // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2456             // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2457 
2458             // The rest coding is AMD specific - use consecutive Address nops
2459 
2460             // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2461             // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2462             // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2463             // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2464             // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2465             // Size prefixes (0x66) are added for larger sizes
2466 
2467             while (i >= 22) {
2468                 i -= 11;
2469                 emitByte(0x66); // size prefix
2470                 emitByte(0x66); // size prefix
2471                 emitByte(0x66); // size prefix
addrNop8()2472                 addrNop8();
2473             }
2474             // Generate first nop for size between 21-12
2475             switch (i) {
2476                 case 21:
2477                     i -= 11;
2478                     emitByte(0x66); // size prefix
2479                     emitByte(0x66); // size prefix
2480                     emitByte(0x66); // size prefix
addrNop8()2481                     addrNop8();
2482                     break;
2483                 case 20:
2484                 case 19:
2485                     i -= 10;
2486                     emitByte(0x66); // size prefix
2487                     emitByte(0x66); // size prefix
addrNop8()2488                     addrNop8();
2489                     break;
2490                 case 18:
2491                 case 17:
2492                     i -= 9;
2493                     emitByte(0x66); // size prefix
addrNop8()2494                     addrNop8();
2495                     break;
2496                 case 16:
2497                 case 15:
2498                     i -= 8;
addrNop8()2499                     addrNop8();
2500                     break;
2501                 case 14:
2502                 case 13:
2503                     i -= 7;
addrNop7()2504                     addrNop7();
2505                     break;
2506                 case 12:
2507                     i -= 6;
2508                     emitByte(0x66); // size prefix
addrNop5()2509                     addrNop5();
2510                     break;
2511                 default:
2512                     assert i < 12;
2513             }
2514 
2515             // Generate second nop for size between 11-1
2516             switch (i) {
2517                 case 11:
2518                     emitByte(0x66); // size prefix
2519                     emitByte(0x66); // size prefix
2520                     emitByte(0x66); // size prefix
2521                     addrNop8();
2522                     break;
2523                 case 10:
2524                     emitByte(0x66); // size prefix
2525                     emitByte(0x66); // size prefix
2526                     addrNop8();
2527                     break;
2528                 case 9:
2529                     emitByte(0x66); // size prefix
2530                     addrNop8();
2531                     break;
2532                 case 8:
2533                     addrNop8();
2534                     break;
2535                 case 7:
2536                     addrNop7();
2537                     break;
2538                 case 6:
2539                     emitByte(0x66); // size prefix
2540                     addrNop5();
2541                     break;
2542                 case 5:
2543                     addrNop5();
2544                     break;
2545                 case 4:
2546                     addrNop4();
2547                     break;
2548                 case 3:
2549                     // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2550                     emitByte(0x66); // size prefix
2551                     emitByte(0x66); // size prefix
2552                     emitByte(0x90); // nop
2553                     break;
2554                 case 2:
2555                     emitByte(0x66); // size prefix
2556                     emitByte(0x90); // nop
2557                     break;
2558                 case 1:
2559                     emitByte(0x90); // nop
2560                     break;
2561                 default:
2562                     assert i == 0;
2563             }
2564             return;
2565         }
2566 
2567         // Using nops with size prefixes "0x66 0x90".
2568         // From AMD Optimization Guide:
2569         // 1: 0x90
2570         // 2: 0x66 0x90
2571         // 3: 0x66 0x66 0x90
2572         // 4: 0x66 0x66 0x66 0x90
2573         // 5: 0x66 0x66 0x90 0x66 0x90
2574         // 6: 0x66 0x66 0x90 0x66 0x66 0x90
2575         // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2576         // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2577         // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2578         // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2579         //
2580         while (i > 12) {
2581             i -= 4;
2582             emitByte(0x66); // size prefix
2583             emitByte(0x66);
2584             emitByte(0x66);
2585             emitByte(0x90); // nop
2586         }
2587         // 1 - 12 nops
2588         if (i > 8) {
2589             if (i > 9) {
2590                 i -= 1;
2591                 emitByte(0x66);
2592             }
2593             i -= 3;
2594             emitByte(0x66);
2595             emitByte(0x66);
2596             emitByte(0x90);
2597         }
2598         // 1 - 8 nops
2599         if (i > 4) {
2600             if (i > 6) {
2601                 i -= 1;
2602                 emitByte(0x66);
2603             }
2604             i -= 3;
2605             emitByte(0x66);
2606             emitByte(0x66);
2607             emitByte(0x90);
2608         }
2609         switch (i) {
2610             case 4:
2611                 emitByte(0x66);
2612                 emitByte(0x66);
2613                 emitByte(0x66);
2614                 emitByte(0x90);
2615                 break;
2616             case 3:
2617                 emitByte(0x66);
2618                 emitByte(0x66);
2619                 emitByte(0x90);
2620                 break;
2621             case 2:
2622                 emitByte(0x66);
2623                 emitByte(0x90);
2624                 break;
2625             case 1:
2626                 emitByte(0x90);
2627                 break;
2628             default:
2629                 assert i == 0;
2630         }
2631     }
2632 
orl(Register dst, Register src)2633     public final void orl(Register dst, Register src) {
2634         OR.rmOp.emit(this, DWORD, dst, src);
2635     }
2636 
orl(Register dst, int imm32)2637     public final void orl(Register dst, int imm32) {
2638         OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2639     }
2640 
pop(Register dst)2641     public final void pop(Register dst) {
2642         int encode = prefixAndEncode(dst.encoding);
2643         emitByte(0x58 | encode);
2644     }
2645 
popfq()2646     public void popfq() {
2647         emitByte(0x9D);
2648     }
2649 
ptest(Register dst, Register src)2650     public final void ptest(Register dst, Register src) {
2651         assert supports(CPUFeature.SSE4_1);
2652         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2653         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2654         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2655         emitByte(0x17);
2656         emitByte(0xC0 | encode);
2657     }
2658 
vptest(Register dst, Register src)2659     public final void vptest(Register dst, Register src) {
2660         assert supports(CPUFeature.AVX);
2661         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2662         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2663         int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2664         emitByte(0x17);
2665         emitByte(0xC0 | encode);
2666     }
2667 
pcmpestri(Register dst, AMD64Address src, int imm8)2668     public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
2669         assert supports(CPUFeature.SSE4_2);
2670         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2671         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2672         emitByte(0x61);
2673         emitOperandHelper(dst, src, 0);
2674         emitByte(imm8);
2675     }
2676 
pcmpestri(Register dst, Register src, int imm8)2677     public final void pcmpestri(Register dst, Register src, int imm8) {
2678         assert supports(CPUFeature.SSE4_2);
2679         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2680         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
2681         emitByte(0x61);
2682         emitByte(0xC0 | encode);
2683         emitByte(imm8);
2684     }
2685 
pmovzxbw(Register dst, AMD64Address src)2686     public final void pmovzxbw(Register dst, AMD64Address src) {
2687         assert supports(CPUFeature.SSE4_2);
2688         // XXX legacy_mode should be: _legacy_mode_bw
2689         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
2690         attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
2691         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2692         emitByte(0x30);
2693         emitOperandHelper(dst, src, 0);
2694     }
2695 
vpmovzxbw(Register dst, AMD64Address src, int vectorLen)2696     public final void vpmovzxbw(Register dst, AMD64Address src, int vectorLen) {
2697         assert supports(CPUFeature.AVX);
2698         // XXX legacy_mode should be: _legacy_mode_bw
2699         AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
2700         attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
2701         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
2702         emitByte(0x30);
2703         emitOperandHelper(dst, src, 0);
2704     }
2705 
push(Register src)2706     public final void push(Register src) {
2707         int encode = prefixAndEncode(src.encoding);
2708         emitByte(0x50 | encode);
2709     }
2710 
pushfq()2711     public void pushfq() {
2712         emitByte(0x9c);
2713     }
2714 
paddd(Register dst, Register src)2715     public final void paddd(Register dst, Register src) {
2716         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2717         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2718         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2719         emitByte(0xFE);
2720         emitByte(0xC0 | encode);
2721     }
2722 
paddq(Register dst, Register src)2723     public final void paddq(Register dst, Register src) {
2724         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2725         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2726         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2727         emitByte(0xD4);
2728         emitByte(0xC0 | encode);
2729     }
2730 
pextrw(Register dst, Register src, int imm8)2731     public final void pextrw(Register dst, Register src, int imm8) {
2732         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
2733         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2734         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2735         emitByte(0xC5);
2736         emitByte(0xC0 | encode);
2737         emitByte(imm8);
2738     }
2739 
pinsrw(Register dst, Register src, int imm8)2740     public final void pinsrw(Register dst, Register src, int imm8) {
2741         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
2742         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2743         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2744         emitByte(0xC4);
2745         emitByte(0xC0 | encode);
2746         emitByte(imm8);
2747     }
2748 
por(Register dst, Register src)2749     public final void por(Register dst, Register src) {
2750         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2751         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2752         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2753         emitByte(0xEB);
2754         emitByte(0xC0 | encode);
2755     }
2756 
pand(Register dst, Register src)2757     public final void pand(Register dst, Register src) {
2758         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2759         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2760         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2761         emitByte(0xDB);
2762         emitByte(0xC0 | encode);
2763     }
2764 
pxor(Register dst, Register src)2765     public final void pxor(Register dst, Register src) {
2766         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2767         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2768         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2769         emitByte(0xEF);
2770         emitByte(0xC0 | encode);
2771     }
2772 
vpxor(Register dst, Register nds, Register src)2773     public final void vpxor(Register dst, Register nds, Register src) {
2774         assert supports(CPUFeature.AVX);
2775         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2776         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2777         int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2778         emitByte(0xEF);
2779         emitByte(0xC0 | encode);
2780     }
2781 
vpxor(Register dst, Register nds, AMD64Address src)2782     public final void vpxor(Register dst, Register nds, AMD64Address src) {
2783         assert supports(CPUFeature.AVX);
2784         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target);
2785         attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FV, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_32bit);
2786         vexPrefix(src, nds, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2787         emitByte(0xEF);
2788         emitOperandHelper(dst, src, 0);
2789     }
2790 
pslld(Register dst, int imm8)2791     public final void pslld(Register dst, int imm8) {
2792         assert isUByte(imm8) : "invalid value";
2793         assert dst.getRegisterCategory().equals(AMD64.XMM);
2794         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2795         // XMM6 is for /6 encoding: 66 0F 72 /6 ib
2796         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2797         emitByte(0x72);
2798         emitByte(0xC0 | encode);
2799         emitByte(imm8 & 0xFF);
2800     }
2801 
psllq(Register dst, Register shift)2802     public final void psllq(Register dst, Register shift) {
2803         assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
2804         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2805         int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2806         emitByte(0xF3);
2807         emitByte(0xC0 | encode);
2808     }
2809 
psllq(Register dst, int imm8)2810     public final void psllq(Register dst, int imm8) {
2811         assert isUByte(imm8) : "invalid value";
2812         assert dst.getRegisterCategory().equals(AMD64.XMM);
2813         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2814         // XMM6 is for /6 encoding: 66 0F 73 /6 ib
2815         int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2816         emitByte(0x73);
2817         emitByte(0xC0 | encode);
2818         emitByte(imm8);
2819     }
2820 
psrad(Register dst, int imm8)2821     public final void psrad(Register dst, int imm8) {
2822         assert isUByte(imm8) : "invalid value";
2823         assert dst.getRegisterCategory().equals(AMD64.XMM);
2824         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2825         // XMM4 is for /2 encoding: 66 0F 72 /4 ib
2826         int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2827         emitByte(0x72);
2828         emitByte(0xC0 | encode);
2829         emitByte(imm8);
2830     }
2831 
psrld(Register dst, int imm8)2832     public final void psrld(Register dst, int imm8) {
2833         assert isUByte(imm8) : "invalid value";
2834         assert dst.getRegisterCategory().equals(AMD64.XMM);
2835         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2836         // XMM2 is for /2 encoding: 66 0F 72 /2 ib
2837         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2838         emitByte(0x72);
2839         emitByte(0xC0 | encode);
2840         emitByte(imm8);
2841     }
2842 
psrlq(Register dst, int imm8)2843     public final void psrlq(Register dst, int imm8) {
2844         assert isUByte(imm8) : "invalid value";
2845         assert dst.getRegisterCategory().equals(AMD64.XMM);
2846         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2847         // XMM2 is for /2 encoding: 66 0F 73 /2 ib
2848         int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2849         emitByte(0x73);
2850         emitByte(0xC0 | encode);
2851         emitByte(imm8);
2852     }
2853 
psrldq(Register dst, int imm8)2854     public final void psrldq(Register dst, int imm8) {
2855         assert isUByte(imm8) : "invalid value";
2856         assert dst.getRegisterCategory().equals(AMD64.XMM);
2857         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2858         int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2859         emitByte(0x73);
2860         emitByte(0xC0 | encode);
2861         emitByte(imm8);
2862     }
2863 
pshufd(Register dst, Register src, int imm8)2864     public final void pshufd(Register dst, Register src, int imm8) {
2865         assert isUByte(imm8) : "invalid value";
2866         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2867         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2868         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2869         emitByte(0x70);
2870         emitByte(0xC0 | encode);
2871         emitByte(imm8);
2872     }
2873 
psubd(Register dst, Register src)2874     public final void psubd(Register dst, Register src) {
2875         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2876         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2877         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2878         emitByte(0xFA);
2879         emitByte(0xC0 | encode);
2880     }
2881 
rcpps(Register dst, Register src)2882     public final void rcpps(Register dst, Register src) {
2883         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2884         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
2885         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
2886         emitByte(0x53);
2887         emitByte(0xC0 | encode);
2888     }
2889 
ret(int imm16)2890     public final void ret(int imm16) {
2891         if (imm16 == 0) {
2892             emitByte(0xC3);
2893         } else {
2894             emitByte(0xC2);
2895             emitShort(imm16);
2896         }
2897     }
2898 
sarl(Register dst, int imm8)2899     public final void sarl(Register dst, int imm8) {
2900         int encode = prefixAndEncode(dst.encoding);
2901         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2902         if (imm8 == 1) {
2903             emitByte(0xD1);
2904             emitByte(0xF8 | encode);
2905         } else {
2906             emitByte(0xC1);
2907             emitByte(0xF8 | encode);
2908             emitByte(imm8);
2909         }
2910     }
2911 
shll(Register dst, int imm8)2912     public final void shll(Register dst, int imm8) {
2913         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2914         int encode = prefixAndEncode(dst.encoding);
2915         if (imm8 == 1) {
2916             emitByte(0xD1);
2917             emitByte(0xE0 | encode);
2918         } else {
2919             emitByte(0xC1);
2920             emitByte(0xE0 | encode);
2921             emitByte(imm8);
2922         }
2923     }
2924 
shll(Register dst)2925     public final void shll(Register dst) {
2926         int encode = prefixAndEncode(dst.encoding);
2927         emitByte(0xD3);
2928         emitByte(0xE0 | encode);
2929     }
2930 
shrl(Register dst, int imm8)2931     public final void shrl(Register dst, int imm8) {
2932         assert isShiftCount(imm8 >> 1) : "illegal shift count";
2933         int encode = prefixAndEncode(dst.encoding);
2934         emitByte(0xC1);
2935         emitByte(0xE8 | encode);
2936         emitByte(imm8);
2937     }
2938 
shrl(Register dst)2939     public final void shrl(Register dst) {
2940         int encode = prefixAndEncode(dst.encoding);
2941         emitByte(0xD3);
2942         emitByte(0xE8 | encode);
2943     }
2944 
subl(AMD64Address dst, int imm32)2945     public final void subl(AMD64Address dst, int imm32) {
2946         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2947     }
2948 
subl(Register dst, int imm32)2949     public final void subl(Register dst, int imm32) {
2950         SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
2951     }
2952 
subl(Register dst, Register src)2953     public final void subl(Register dst, Register src) {
2954         SUB.rmOp.emit(this, DWORD, dst, src);
2955     }
2956 
subpd(Register dst, Register src)2957     public final void subpd(Register dst, Register src) {
2958         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2959         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2960         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
2961         emitByte(0x5C);
2962         emitByte(0xC0 | encode);
2963     }
2964 
subsd(Register dst, Register src)2965     public final void subsd(Register dst, Register src) {
2966         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
2967         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2968         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2969         emitByte(0x5C);
2970         emitByte(0xC0 | encode);
2971     }
2972 
subsd(Register dst, AMD64Address src)2973     public final void subsd(Register dst, AMD64Address src) {
2974         assert dst.getRegisterCategory().equals(AMD64.XMM);
2975         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
2976         simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
2977         emitByte(0x5C);
2978         emitOperandHelper(dst, src, 0);
2979     }
2980 
testl(Register dst, int imm32)2981     public final void testl(Register dst, int imm32) {
2982         // not using emitArith because test
2983         // doesn't support sign-extension of
2984         // 8bit operands
2985         int encode = dst.encoding;
2986         if (encode == 0) {
2987             emitByte(0xA9);
2988         } else {
2989             encode = prefixAndEncode(encode);
2990             emitByte(0xF7);
2991             emitByte(0xC0 | encode);
2992         }
2993         emitInt(imm32);
2994     }
2995 
testl(Register dst, Register src)2996     public final void testl(Register dst, Register src) {
2997         int encode = prefixAndEncode(dst.encoding, src.encoding);
2998         emitByte(0x85);
2999         emitByte(0xC0 | encode);
3000     }
3001 
testl(Register dst, AMD64Address src)3002     public final void testl(Register dst, AMD64Address src) {
3003         prefix(src, dst);
3004         emitByte(0x85);
3005         emitOperandHelper(dst, src, 0);
3006     }
3007 
unpckhpd(Register dst, Register src)3008     public final void unpckhpd(Register dst, Register src) {
3009         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3010         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3011         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3012         emitByte(0x15);
3013         emitByte(0xC0 | encode);
3014     }
3015 
unpcklpd(Register dst, Register src)3016     public final void unpcklpd(Register dst, Register src) {
3017         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3018         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3019         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3020         emitByte(0x14);
3021         emitByte(0xC0 | encode);
3022     }
3023 
xorl(Register dst, Register src)3024     public final void xorl(Register dst, Register src) {
3025         XOR.rmOp.emit(this, DWORD, dst, src);
3026     }
3027 
xorpd(Register dst, Register src)3028     public final void xorpd(Register dst, Register src) {
3029         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3030         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3031         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3032         emitByte(0x57);
3033         emitByte(0xC0 | encode);
3034     }
3035 
xorps(Register dst, Register src)3036     public final void xorps(Register dst, Register src) {
3037         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3038         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3039         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3040         emitByte(0x57);
3041         emitByte(0xC0 | encode);
3042     }
3043 
decl(Register dst)3044     protected final void decl(Register dst) {
3045         // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
3046         int encode = prefixAndEncode(dst.encoding);
3047         emitByte(0xFF);
3048         emitByte(0xC8 | encode);
3049     }
3050 
incl(Register dst)3051     protected final void incl(Register dst) {
3052         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3053         int encode = prefixAndEncode(dst.encoding);
3054         emitByte(0xFF);
3055         emitByte(0xC0 | encode);
3056     }
3057 
prefixAndEncode(int regEnc)3058     private int prefixAndEncode(int regEnc) {
3059         return prefixAndEncode(regEnc, false);
3060     }
3061 
prefixAndEncode(int regEnc, boolean byteinst)3062     private int prefixAndEncode(int regEnc, boolean byteinst) {
3063         if (regEnc >= 8) {
3064             emitByte(Prefix.REXB);
3065             return regEnc - 8;
3066         } else if (byteinst && regEnc >= 4) {
3067             emitByte(Prefix.REX);
3068         }
3069         return regEnc;
3070     }
3071 
prefixqAndEncode(int regEnc)3072     private int prefixqAndEncode(int regEnc) {
3073         if (regEnc < 8) {
3074             emitByte(Prefix.REXW);
3075             return regEnc;
3076         } else {
3077             emitByte(Prefix.REXWB);
3078             return regEnc - 8;
3079         }
3080     }
3081 
prefixAndEncode(int dstEnc, int srcEnc)3082     private int prefixAndEncode(int dstEnc, int srcEnc) {
3083         return prefixAndEncode(dstEnc, false, srcEnc, false);
3084     }
3085 
prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte)3086     private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
3087         int srcEnc = srcEncoding;
3088         int dstEnc = dstEncoding;
3089         if (dstEnc < 8) {
3090             if (srcEnc >= 8) {
3091                 emitByte(Prefix.REXB);
3092                 srcEnc -= 8;
3093             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
3094                 emitByte(Prefix.REX);
3095             }
3096         } else {
3097             if (srcEnc < 8) {
3098                 emitByte(Prefix.REXR);
3099             } else {
3100                 emitByte(Prefix.REXRB);
3101                 srcEnc -= 8;
3102             }
3103             dstEnc -= 8;
3104         }
3105         return dstEnc << 3 | srcEnc;
3106     }
3107 
3108     /**
3109      * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
3110      * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
3111      *
3112      * @param regEncoding the encoding of the register part of the ModRM-Byte
3113      * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
3114      * @return the lower 6 bits of the ModRM-Byte that should be emitted
3115      */
prefixqAndEncode(int regEncoding, int rmEncoding)3116     private int prefixqAndEncode(int regEncoding, int rmEncoding) {
3117         int rmEnc = rmEncoding;
3118         int regEnc = regEncoding;
3119         if (regEnc < 8) {
3120             if (rmEnc < 8) {
3121                 emitByte(Prefix.REXW);
3122             } else {
3123                 emitByte(Prefix.REXWB);
3124                 rmEnc -= 8;
3125             }
3126         } else {
3127             if (rmEnc < 8) {
3128                 emitByte(Prefix.REXWR);
3129             } else {
3130                 emitByte(Prefix.REXWRB);
3131                 rmEnc -= 8;
3132             }
3133             regEnc -= 8;
3134         }
3135         return regEnc << 3 | rmEnc;
3136     }
3137 
vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes)3138     private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
3139         int vectorLen = attributes.getVectorLen();
3140         boolean vexW = attributes.isRexVexW();
3141         boolean isXorB = ((rxb & 0x3) > 0);
3142         if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
3143             emitByte(Prefix.VEX_3BYTES);
3144 
3145             int byte1 = (rxb << 5);
3146             byte1 = ((~byte1) & 0xE0) | opc;
3147             emitByte(byte1);
3148 
3149             int byte2 = ((~ndsEncoding) & 0xf) << 3;
3150             byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
3151             emitByte(byte2);
3152         } else {
3153             emitByte(Prefix.VEX_2BYTES);
3154 
3155             int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
3156             byte1 = (~byte1) & 0x80;
3157             byte1 |= ((~ndsEncoding) & 0xf) << 3;
3158             byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
3159             emitByte(byte1);
3160         }
3161     }
3162 
vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3163     private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3164         int rxb = getRXB(src, adr);
3165         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
3166         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
3167         setCurAttributes(attributes);
3168     }
3169 
vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3170     private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3171         int rxb = getRXB(dst, src);
3172         int ndsEncoding = nds.isValid() ? nds.encoding : 0;
3173         vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
3174         // return modrm byte components for operands
3175         return (((dst.encoding & 7) << 3) | (src.encoding & 7));
3176     }
3177 
simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes)3178     private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
3179         if (supports(CPUFeature.AVX)) {
3180             vexPrefix(adr, nds, xreg, pre, opc, attributes);
3181         } else {
3182             switch (pre) {
3183                 case VexSimdPrefix.VEX_SIMD_66:
3184                     emitByte(0x66);
3185                     break;
3186                 case VexSimdPrefix.VEX_SIMD_F2:
3187                     emitByte(0xF2);
3188                     break;
3189                 case VexSimdPrefix.VEX_SIMD_F3:
3190                     emitByte(0xF3);
3191                     break;
3192             }
3193             if (attributes.isRexVexW()) {
3194                 prefixq(adr, xreg);
3195             } else {
3196                 prefix(adr, xreg);
3197             }
3198             switch (opc) {
3199                 case VexOpcode.VEX_OPCODE_0F:
3200                     emitByte(0x0F);
3201                     break;
3202                 case VexOpcode.VEX_OPCODE_0F_38:
3203                     emitByte(0x0F);
3204                     emitByte(0x38);
3205                     break;
3206                 case VexOpcode.VEX_OPCODE_0F_3A:
3207                     emitByte(0x0F);
3208                     emitByte(0x3A);
3209                     break;
3210             }
3211         }
3212     }
3213 
simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3214     private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
3215         if (supports(CPUFeature.AVX)) {
3216             return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
3217         } else {
3218             switch (pre) {
3219                 case VexSimdPrefix.VEX_SIMD_66:
3220                     emitByte(0x66);
3221                     break;
3222                 case VexSimdPrefix.VEX_SIMD_F2:
3223                     emitByte(0xF2);
3224                     break;
3225                 case VexSimdPrefix.VEX_SIMD_F3:
3226                     emitByte(0xF3);
3227                     break;
3228             }
3229             int encode;
3230             int dstEncoding = dst.encoding;
3231             int srcEncoding = src.encoding;
3232             if (attributes.isRexVexW()) {
3233                 encode = prefixqAndEncode(dstEncoding, srcEncoding);
3234             } else {
3235                 encode = prefixAndEncode(dstEncoding, srcEncoding);
3236             }
3237             switch (opc) {
3238                 case VexOpcode.VEX_OPCODE_0F:
3239                     emitByte(0x0F);
3240                     break;
3241                 case VexOpcode.VEX_OPCODE_0F_38:
3242                     emitByte(0x0F);
3243                     emitByte(0x38);
3244                     break;
3245                 case VexOpcode.VEX_OPCODE_0F_3A:
3246                     emitByte(0x0F);
3247                     emitByte(0x3A);
3248                     break;
3249             }
3250             return encode;
3251         }
3252     }
3253 
needsRex(Register reg)3254     private static boolean needsRex(Register reg) {
3255         return reg.encoding >= MinEncodingNeedsRex;
3256     }
3257 
prefix(AMD64Address adr)3258     private void prefix(AMD64Address adr) {
3259         if (needsRex(adr.getBase())) {
3260             if (needsRex(adr.getIndex())) {
3261                 emitByte(Prefix.REXXB);
3262             } else {
3263                 emitByte(Prefix.REXB);
3264             }
3265         } else {
3266             if (needsRex(adr.getIndex())) {
3267                 emitByte(Prefix.REXX);
3268             }
3269         }
3270     }
3271 
prefixq(AMD64Address adr)3272     private void prefixq(AMD64Address adr) {
3273         if (needsRex(adr.getBase())) {
3274             if (needsRex(adr.getIndex())) {
3275                 emitByte(Prefix.REXWXB);
3276             } else {
3277                 emitByte(Prefix.REXWB);
3278             }
3279         } else {
3280             if (needsRex(adr.getIndex())) {
3281                 emitByte(Prefix.REXWX);
3282             } else {
3283                 emitByte(Prefix.REXW);
3284             }
3285         }
3286     }
3287 
prefixb(AMD64Address adr, Register reg)3288     private void prefixb(AMD64Address adr, Register reg) {
3289         prefix(adr, reg, true);
3290     }
3291 
prefix(AMD64Address adr, Register reg)3292     private void prefix(AMD64Address adr, Register reg) {
3293         prefix(adr, reg, false);
3294     }
3295 
prefix(AMD64Address adr, Register reg, boolean byteinst)3296     private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
3297         if (reg.encoding < 8) {
3298             if (needsRex(adr.getBase())) {
3299                 if (needsRex(adr.getIndex())) {
3300                     emitByte(Prefix.REXXB);
3301                 } else {
3302                     emitByte(Prefix.REXB);
3303                 }
3304             } else {
3305                 if (needsRex(adr.getIndex())) {
3306                     emitByte(Prefix.REXX);
3307                 } else if (byteinst && reg.encoding >= 4) {
3308                     emitByte(Prefix.REX);
3309                 }
3310             }
3311         } else {
3312             if (needsRex(adr.getBase())) {
3313                 if (needsRex(adr.getIndex())) {
3314                     emitByte(Prefix.REXRXB);
3315                 } else {
3316                     emitByte(Prefix.REXRB);
3317                 }
3318             } else {
3319                 if (needsRex(adr.getIndex())) {
3320                     emitByte(Prefix.REXRX);
3321                 } else {
3322                     emitByte(Prefix.REXR);
3323                 }
3324             }
3325         }
3326     }
3327 
prefixq(AMD64Address adr, Register src)3328     private void prefixq(AMD64Address adr, Register src) {
3329         if (src.encoding < 8) {
3330             if (needsRex(adr.getBase())) {
3331                 if (needsRex(adr.getIndex())) {
3332                     emitByte(Prefix.REXWXB);
3333                 } else {
3334                     emitByte(Prefix.REXWB);
3335                 }
3336             } else {
3337                 if (needsRex(adr.getIndex())) {
3338                     emitByte(Prefix.REXWX);
3339                 } else {
3340                     emitByte(Prefix.REXW);
3341                 }
3342             }
3343         } else {
3344             if (needsRex(adr.getBase())) {
3345                 if (needsRex(adr.getIndex())) {
3346                     emitByte(Prefix.REXWRXB);
3347                 } else {
3348                     emitByte(Prefix.REXWRB);
3349                 }
3350             } else {
3351                 if (needsRex(adr.getIndex())) {
3352                     emitByte(Prefix.REXWRX);
3353                 } else {
3354                     emitByte(Prefix.REXWR);
3355                 }
3356             }
3357         }
3358     }
3359 
addq(Register dst, int imm32)3360     public final void addq(Register dst, int imm32) {
3361         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3362     }
3363 
addq(AMD64Address dst, int imm32)3364     public final void addq(AMD64Address dst, int imm32) {
3365         ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3366     }
3367 
addq(Register dst, Register src)3368     public final void addq(Register dst, Register src) {
3369         ADD.rmOp.emit(this, QWORD, dst, src);
3370     }
3371 
addq(AMD64Address dst, Register src)3372     public final void addq(AMD64Address dst, Register src) {
3373         ADD.mrOp.emit(this, QWORD, dst, src);
3374     }
3375 
andq(Register dst, int imm32)3376     public final void andq(Register dst, int imm32) {
3377         AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3378     }
3379 
bsrq(Register dst, Register src)3380     public final void bsrq(Register dst, Register src) {
3381         int encode = prefixqAndEncode(dst.encoding(), src.encoding());
3382         emitByte(0x0F);
3383         emitByte(0xBD);
3384         emitByte(0xC0 | encode);
3385     }
3386 
bswapq(Register reg)3387     public final void bswapq(Register reg) {
3388         int encode = prefixqAndEncode(reg.encoding);
3389         emitByte(0x0F);
3390         emitByte(0xC8 | encode);
3391     }
3392 
cdqq()3393     public final void cdqq() {
3394         emitByte(Prefix.REXW);
3395         emitByte(0x99);
3396     }
3397 
cmovq(ConditionFlag cc, Register dst, Register src)3398     public final void cmovq(ConditionFlag cc, Register dst, Register src) {
3399         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3400         emitByte(0x0F);
3401         emitByte(0x40 | cc.getValue());
3402         emitByte(0xC0 | encode);
3403     }
3404 
setb(ConditionFlag cc, Register dst)3405     public final void setb(ConditionFlag cc, Register dst) {
3406         int encode = prefixAndEncode(dst.encoding, true);
3407         emitByte(0x0F);
3408         emitByte(0x90 | cc.getValue());
3409         emitByte(0xC0 | encode);
3410     }
3411 
cmovq(ConditionFlag cc, Register dst, AMD64Address src)3412     public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
3413         prefixq(src, dst);
3414         emitByte(0x0F);
3415         emitByte(0x40 | cc.getValue());
3416         emitOperandHelper(dst, src, 0);
3417     }
3418 
cmpq(Register dst, int imm32)3419     public final void cmpq(Register dst, int imm32) {
3420         CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3421     }
3422 
cmpq(Register dst, Register src)3423     public final void cmpq(Register dst, Register src) {
3424         CMP.rmOp.emit(this, QWORD, dst, src);
3425     }
3426 
cmpq(Register dst, AMD64Address src)3427     public final void cmpq(Register dst, AMD64Address src) {
3428         CMP.rmOp.emit(this, QWORD, dst, src);
3429     }
3430 
cmpxchgq(Register reg, AMD64Address adr)3431     public final void cmpxchgq(Register reg, AMD64Address adr) {
3432         prefixq(adr, reg);
3433         emitByte(0x0F);
3434         emitByte(0xB1);
3435         emitOperandHelper(reg, adr, 0);
3436     }
3437 
cvtdq2pd(Register dst, Register src)3438     public final void cvtdq2pd(Register dst, Register src) {
3439         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3440         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3441         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3442         emitByte(0xE6);
3443         emitByte(0xC0 | encode);
3444     }
3445 
cvtsi2sdq(Register dst, Register src)3446     public final void cvtsi2sdq(Register dst, Register src) {
3447         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
3448         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3449         int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3450         emitByte(0x2A);
3451         emitByte(0xC0 | encode);
3452     }
3453 
cvttsd2siq(Register dst, Register src)3454     public final void cvttsd2siq(Register dst, Register src) {
3455         assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
3456         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3457         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3458         emitByte(0x2C);
3459         emitByte(0xC0 | encode);
3460     }
3461 
cvttpd2dq(Register dst, Register src)3462     public final void cvttpd2dq(Register dst, Register src) {
3463         assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
3464         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3465         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3466         emitByte(0xE6);
3467         emitByte(0xC0 | encode);
3468     }
3469 
decq(Register dst)3470     protected final void decq(Register dst) {
3471         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3472         int encode = prefixqAndEncode(dst.encoding);
3473         emitByte(0xFF);
3474         emitByte(0xC8 | encode);
3475     }
3476 
decq(AMD64Address dst)3477     public final void decq(AMD64Address dst) {
3478         DEC.emit(this, QWORD, dst);
3479     }
3480 
imulq(Register dst, Register src)3481     public final void imulq(Register dst, Register src) {
3482         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3483         emitByte(0x0F);
3484         emitByte(0xAF);
3485         emitByte(0xC0 | encode);
3486     }
3487 
incq(Register dst)3488     public final void incq(Register dst) {
3489         // Don't use it directly. Use Macroincrementq() instead.
3490         // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
3491         int encode = prefixqAndEncode(dst.encoding);
3492         emitByte(0xFF);
3493         emitByte(0xC0 | encode);
3494     }
3495 
incq(AMD64Address dst)3496     public final void incq(AMD64Address dst) {
3497         INC.emit(this, QWORD, dst);
3498     }
3499 
movq(Register dst, long imm64)3500     public final void movq(Register dst, long imm64) {
3501         int encode = prefixqAndEncode(dst.encoding);
3502         emitByte(0xB8 | encode);
3503         emitLong(imm64);
3504     }
3505 
movslq(Register dst, int imm32)3506     public final void movslq(Register dst, int imm32) {
3507         int encode = prefixqAndEncode(dst.encoding);
3508         emitByte(0xC7);
3509         emitByte(0xC0 | encode);
3510         emitInt(imm32);
3511     }
3512 
movdq(Register dst, AMD64Address src)3513     public final void movdq(Register dst, AMD64Address src) {
3514         assert dst.getRegisterCategory().equals(AMD64.XMM);
3515         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3516         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3517         emitByte(0x6E);
3518         emitOperandHelper(dst, src, 0);
3519     }
3520 
movdq(AMD64Address dst, Register src)3521     public final void movdq(AMD64Address dst, Register src) {
3522         assert src.getRegisterCategory().equals(AMD64.XMM);
3523         // swap src/dst to get correct prefix
3524         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3525         simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3526         emitByte(0x7E);
3527         emitOperandHelper(src, dst, 0);
3528     }
3529 
movdq(Register dst, Register src)3530     public final void movdq(Register dst, Register src) {
3531         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3532             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3533             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3534             emitByte(0x6E);
3535             emitByte(0xC0 | encode);
3536         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3537             // swap src/dst to get correct prefix
3538             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3539             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3540             emitByte(0x7E);
3541             emitByte(0xC0 | encode);
3542         } else {
3543             throw new InternalError("should not reach here");
3544         }
3545     }
3546 
movdl(Register dst, Register src)3547     public final void movdl(Register dst, Register src) {
3548         if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
3549             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3550             int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3551             emitByte(0x6E);
3552             emitByte(0xC0 | encode);
3553         } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
3554             // swap src/dst to get correct prefix
3555             AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3556             int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3557             emitByte(0x7E);
3558             emitByte(0xC0 | encode);
3559         } else {
3560             throw new InternalError("should not reach here");
3561         }
3562     }
3563 
movdl(Register dst, AMD64Address src)3564     public final void movdl(Register dst, AMD64Address src) {
3565         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3566         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
3567         emitByte(0x6E);
3568         emitOperandHelper(dst, src, 0);
3569     }
3570 
movddup(Register dst, Register src)3571     public final void movddup(Register dst, Register src) {
3572         assert supports(CPUFeature.SSE3);
3573         assert dst.getRegisterCategory().equals(AMD64.XMM);
3574         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3575         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
3576         emitByte(0x12);
3577         emitByte(0xC0 | encode);
3578     }
3579 
movdqu(Register dst, AMD64Address src)3580     public final void movdqu(Register dst, AMD64Address src) {
3581         assert dst.getRegisterCategory().equals(AMD64.XMM);
3582         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3583         simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3584         emitByte(0x6F);
3585         emitOperandHelper(dst, src, 0);
3586     }
3587 
movdqu(Register dst, Register src)3588     public final void movdqu(Register dst, Register src) {
3589         assert dst.getRegisterCategory().equals(AMD64.XMM);
3590         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3591         int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3592         emitByte(0x6F);
3593         emitByte(0xC0 | encode);
3594     }
3595 
vmovdqu(Register dst, AMD64Address src)3596     public final void vmovdqu(Register dst, AMD64Address src) {
3597         assert supports(CPUFeature.AVX);
3598         assert dst.getRegisterCategory().equals(AMD64.XMM);
3599         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3600         vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
3601         emitByte(0x6F);
3602         emitOperandHelper(dst, src, 0);
3603     }
3604 
vzeroupper()3605     public final void vzeroupper() {
3606         assert supports(CPUFeature.AVX);
3607         AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
3608         vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
3609         emitByte(0x77);
3610     }
3611 
movslq(AMD64Address dst, int imm32)3612     public final void movslq(AMD64Address dst, int imm32) {
3613         prefixq(dst);
3614         emitByte(0xC7);
3615         emitOperandHelper(0, dst, 4);
3616         emitInt(imm32);
3617     }
3618 
movslq(Register dst, AMD64Address src)3619     public final void movslq(Register dst, AMD64Address src) {
3620         prefixq(src, dst);
3621         emitByte(0x63);
3622         emitOperandHelper(dst, src, 0);
3623     }
3624 
movslq(Register dst, Register src)3625     public final void movslq(Register dst, Register src) {
3626         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3627         emitByte(0x63);
3628         emitByte(0xC0 | encode);
3629     }
3630 
negq(Register dst)3631     public final void negq(Register dst) {
3632         int encode = prefixqAndEncode(dst.encoding);
3633         emitByte(0xF7);
3634         emitByte(0xD8 | encode);
3635     }
3636 
orq(Register dst, Register src)3637     public final void orq(Register dst, Register src) {
3638         OR.rmOp.emit(this, QWORD, dst, src);
3639     }
3640 
shlq(Register dst, int imm8)3641     public final void shlq(Register dst, int imm8) {
3642         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3643         int encode = prefixqAndEncode(dst.encoding);
3644         if (imm8 == 1) {
3645             emitByte(0xD1);
3646             emitByte(0xE0 | encode);
3647         } else {
3648             emitByte(0xC1);
3649             emitByte(0xE0 | encode);
3650             emitByte(imm8);
3651         }
3652     }
3653 
shlq(Register dst)3654     public final void shlq(Register dst) {
3655         int encode = prefixqAndEncode(dst.encoding);
3656         emitByte(0xD3);
3657         emitByte(0xE0 | encode);
3658     }
3659 
shrq(Register dst, int imm8)3660     public final void shrq(Register dst, int imm8) {
3661         assert isShiftCount(imm8 >> 1) : "illegal shift count";
3662         int encode = prefixqAndEncode(dst.encoding);
3663         if (imm8 == 1) {
3664             emitByte(0xD1);
3665             emitByte(0xE8 | encode);
3666         } else {
3667             emitByte(0xC1);
3668             emitByte(0xE8 | encode);
3669             emitByte(imm8);
3670         }
3671     }
3672 
shrq(Register dst)3673     public final void shrq(Register dst) {
3674         int encode = prefixqAndEncode(dst.encoding);
3675         emitByte(0xD3);
3676         emitByte(0xE8 | encode);
3677     }
3678 
sbbq(Register dst, Register src)3679     public final void sbbq(Register dst, Register src) {
3680         SBB.rmOp.emit(this, QWORD, dst, src);
3681     }
3682 
subq(Register dst, int imm32)3683     public final void subq(Register dst, int imm32) {
3684         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3685     }
3686 
subq(AMD64Address dst, int imm32)3687     public final void subq(AMD64Address dst, int imm32) {
3688         SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
3689     }
3690 
subqWide(Register dst, int imm32)3691     public final void subqWide(Register dst, int imm32) {
3692         // don't use the sign-extending version, forcing a 32-bit immediate
3693         SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
3694     }
3695 
subq(Register dst, Register src)3696     public final void subq(Register dst, Register src) {
3697         SUB.rmOp.emit(this, QWORD, dst, src);
3698     }
3699 
testq(Register dst, Register src)3700     public final void testq(Register dst, Register src) {
3701         int encode = prefixqAndEncode(dst.encoding, src.encoding);
3702         emitByte(0x85);
3703         emitByte(0xC0 | encode);
3704     }
3705 
btrq(Register src, int imm8)3706     public final void btrq(Register src, int imm8) {
3707         int encode = prefixqAndEncode(src.encoding);
3708         emitByte(0x0F);
3709         emitByte(0xBA);
3710         emitByte(0xF0 | encode);
3711         emitByte(imm8);
3712     }
3713 
xaddb(AMD64Address dst, Register src)3714     public final void xaddb(AMD64Address dst, Register src) {
3715         prefixb(dst, src);
3716         emitByte(0x0F);
3717         emitByte(0xC0);
3718         emitOperandHelper(src, dst, 0);
3719     }
3720 
xaddw(AMD64Address dst, Register src)3721     public final void xaddw(AMD64Address dst, Register src) {
3722         emitByte(0x66); // Switch to 16-bit mode.
3723         prefix(dst, src);
3724         emitByte(0x0F);
3725         emitByte(0xC1);
3726         emitOperandHelper(src, dst, 0);
3727     }
3728 
xaddl(AMD64Address dst, Register src)3729     public final void xaddl(AMD64Address dst, Register src) {
3730         prefix(dst, src);
3731         emitByte(0x0F);
3732         emitByte(0xC1);
3733         emitOperandHelper(src, dst, 0);
3734     }
3735 
xaddq(AMD64Address dst, Register src)3736     public final void xaddq(AMD64Address dst, Register src) {
3737         prefixq(dst, src);
3738         emitByte(0x0F);
3739         emitByte(0xC1);
3740         emitOperandHelper(src, dst, 0);
3741     }
3742 
xchgb(Register dst, AMD64Address src)3743     public final void xchgb(Register dst, AMD64Address src) {
3744         prefixb(src, dst);
3745         emitByte(0x86);
3746         emitOperandHelper(dst, src, 0);
3747     }
3748 
xchgw(Register dst, AMD64Address src)3749     public final void xchgw(Register dst, AMD64Address src) {
3750         emitByte(0x66);
3751         prefix(src, dst);
3752         emitByte(0x87);
3753         emitOperandHelper(dst, src, 0);
3754     }
3755 
xchgl(Register dst, AMD64Address src)3756     public final void xchgl(Register dst, AMD64Address src) {
3757         prefix(src, dst);
3758         emitByte(0x87);
3759         emitOperandHelper(dst, src, 0);
3760     }
3761 
xchgq(Register dst, AMD64Address src)3762     public final void xchgq(Register dst, AMD64Address src) {
3763         prefixq(src, dst);
3764         emitByte(0x87);
3765         emitOperandHelper(dst, src, 0);
3766     }
3767 
membar(int barriers)3768     public final void membar(int barriers) {
3769         if (target.isMP) {
3770             // We only have to handle StoreLoad
3771             if ((barriers & STORE_LOAD) != 0) {
3772                 // All usable chips support "locked" instructions which suffice
3773                 // as barriers, and are much faster than the alternative of
3774                 // using cpuid instruction. We use here a locked add [rsp],0.
3775                 // This is conveniently otherwise a no-op except for blowing
3776                 // flags.
3777                 // Any change to this code may need to revisit other places in
3778                 // the code where this idiom is used, in particular the
3779                 // orderAccess code.
3780                 lock();
3781                 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
3782             }
3783         }
3784     }
3785 
3786     @Override
patchJumpTarget(int branch, int branchTarget)3787     protected final void patchJumpTarget(int branch, int branchTarget) {
3788         int op = getByte(branch);
3789         assert op == 0xE8 // call
3790                         ||
3791                         op == 0x00 // jump table entry
3792                         || op == 0xE9 // jmp
3793                         || op == 0xEB // short jmp
3794                         || (op & 0xF0) == 0x70 // short jcc
3795                         || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
3796         : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
3797 
3798         if (op == 0x00) {
3799             int offsetToJumpTableBase = getShort(branch + 1);
3800             int jumpTableBase = branch - offsetToJumpTableBase;
3801             int imm32 = branchTarget - jumpTableBase;
3802             emitInt(imm32, branch);
3803         } else if (op == 0xEB || (op & 0xF0) == 0x70) {
3804 
3805             // short offset operators (jmp and jcc)
3806             final int imm8 = branchTarget - (branch + 2);
3807             /*
3808              * Since a wrongly patched short branch can potentially lead to working but really bad
3809              * behaving code we should always fail with an exception instead of having an assert.
3810              */
3811             if (!NumUtil.isByte(imm8)) {
3812                 throw new InternalError("branch displacement out of range: " + imm8);
3813             }
3814             emitByte(imm8, branch + 1);
3815 
3816         } else {
3817 
3818             int off = 1;
3819             if (op == 0x0F) {
3820                 off = 2;
3821             }
3822 
3823             int imm32 = branchTarget - (branch + 4 + off);
3824             emitInt(imm32, branch + off);
3825         }
3826     }
3827 
nullCheck(AMD64Address address)3828     public void nullCheck(AMD64Address address) {
3829         testl(AMD64.rax, address);
3830     }
3831 
3832     @Override
align(int modulus)3833     public void align(int modulus) {
3834         if (position() % modulus != 0) {
3835             nop(modulus - (position() % modulus));
3836         }
3837     }
3838 
3839     /**
3840      * Emits a direct call instruction. Note that the actual call target is not specified, because
3841      * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
3842      * responsible to add the call address to the appropriate patching tables.
3843      */
call()3844     public final void call() {
3845         if (codePatchingAnnotationConsumer != null) {
3846             int pos = position();
3847             codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
3848         }
3849         emitByte(0xE8);
3850         emitInt(0);
3851     }
3852 
call(Register src)3853     public final void call(Register src) {
3854         int encode = prefixAndEncode(src.encoding);
3855         emitByte(0xFF);
3856         emitByte(0xD0 | encode);
3857     }
3858 
int3()3859     public final void int3() {
3860         emitByte(0xCC);
3861     }
3862 
pause()3863     public final void pause() {
3864         emitByte(0xF3);
3865         emitByte(0x90);
3866     }
3867 
emitx87(int b1, int b2, int i)3868     private void emitx87(int b1, int b2, int i) {
3869         assert 0 <= i && i < 8 : "illegal stack offset";
3870         emitByte(b1);
3871         emitByte(b2 + i);
3872     }
3873 
3874     public final void fldd(AMD64Address src) {
3875         emitByte(0xDD);
3876         emitOperandHelper(0, src, 0);
3877     }
3878 
3879     public final void flds(AMD64Address src) {
3880         emitByte(0xD9);
3881         emitOperandHelper(0, src, 0);
3882     }
3883 
3884     public final void fldln2() {
3885         emitByte(0xD9);
3886         emitByte(0xED);
3887     }
3888 
3889     public final void fldlg2() {
3890         emitByte(0xD9);
3891         emitByte(0xEC);
3892     }
3893 
3894     public final void fyl2x() {
3895         emitByte(0xD9);
3896         emitByte(0xF1);
3897     }
3898 
3899     public final void fstps(AMD64Address src) {
3900         emitByte(0xD9);
3901         emitOperandHelper(3, src, 0);
3902     }
3903 
3904     public final void fstpd(AMD64Address src) {
3905         emitByte(0xDD);
3906         emitOperandHelper(3, src, 0);
3907     }
3908 
3909     private void emitFPUArith(int b1, int b2, int i) {
3910         assert 0 <= i && i < 8 : "illegal FPU register: " + i;
3911         emitByte(b1);
3912         emitByte(b2 + i);
3913     }
3914 
3915     public void ffree(int i) {
3916         emitFPUArith(0xDD, 0xC0, i);
3917     }
3918 
3919     public void fincstp() {
3920         emitByte(0xD9);
3921         emitByte(0xF7);
3922     }
3923 
3924     public void fxch(int i) {
3925         emitFPUArith(0xD9, 0xC8, i);
3926     }
3927 
3928     public void fnstswAX() {
3929         emitByte(0xDF);
3930         emitByte(0xE0);
3931     }
3932 
3933     public void fwait() {
3934         emitByte(0x9B);
3935     }
3936 
3937     public void fprem() {
3938         emitByte(0xD9);
3939         emitByte(0xF8);
3940     }
3941 
3942     public final void fsin() {
3943         emitByte(0xD9);
3944         emitByte(0xFE);
3945     }
3946 
3947     public final void fcos() {
3948         emitByte(0xD9);
3949         emitByte(0xFF);
3950     }
3951 
3952     public final void fptan() {
3953         emitByte(0xD9);
3954         emitByte(0xF2);
3955     }
3956 
3957     public final void fstp(int i) {
3958         emitx87(0xDD, 0xD8, i);
3959     }
3960 
3961     @Override
3962     public AMD64Address makeAddress(Register base, int displacement) {
3963         return new AMD64Address(base, displacement);
3964     }
3965 
3966     @Override
3967     public AMD64Address getPlaceholder(int instructionStartPosition) {
3968         return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
3969     }
3970 
3971     private void prefetchPrefix(AMD64Address src) {
3972         prefix(src);
3973         emitByte(0x0F);
3974     }
3975 
3976     public void prefetchnta(AMD64Address src) {
3977         prefetchPrefix(src);
3978         emitByte(0x18);
3979         emitOperandHelper(0, src, 0);
3980     }
3981 
3982     void prefetchr(AMD64Address src) {
3983         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
3984         prefetchPrefix(src);
3985         emitByte(0x0D);
3986         emitOperandHelper(0, src, 0);
3987     }
3988 
3989     public void prefetcht0(AMD64Address src) {
3990         assert supports(CPUFeature.SSE);
3991         prefetchPrefix(src);
3992         emitByte(0x18);
3993         emitOperandHelper(1, src, 0);
3994     }
3995 
3996     public void prefetcht1(AMD64Address src) {
3997         assert supports(CPUFeature.SSE);
3998         prefetchPrefix(src);
3999         emitByte(0x18);
4000         emitOperandHelper(2, src, 0);
4001     }
4002 
4003     public void prefetcht2(AMD64Address src) {
4004         assert supports(CPUFeature.SSE);
4005         prefix(src);
4006         emitByte(0x0f);
4007         emitByte(0x18);
4008         emitOperandHelper(3, src, 0);
4009     }
4010 
4011     public void prefetchw(AMD64Address src) {
4012         assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
4013         prefix(src);
4014         emitByte(0x0f);
4015         emitByte(0x0D);
4016         emitOperandHelper(1, src, 0);
4017     }
4018 
4019     public void rdtsc() {
4020         emitByte(0x0F);
4021         emitByte(0x31);
4022     }
4023 
4024     /**
4025      * Emits an instruction which is considered to be illegal. This is used if we deliberately want
4026      * to crash the program (debugging etc.).
4027      */
4028     public void illegal() {
4029         emitByte(0x0f);
4030         emitByte(0x0b);
4031     }
4032 
4033     public void lfence() {
4034         emitByte(0x0f);
4035         emitByte(0xae);
4036         emitByte(0xe8);
4037 
4038     }
4039 }
4040