1 /*
2  * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 
25 package org.graalvm.compiler.asm.amd64;
26 
27 import static jdk.vm.ci.amd64.AMD64.MASK;
28 import static jdk.vm.ci.amd64.AMD64.XMM;
29 import static jdk.vm.ci.amd64.AMD64.r12;
30 import static jdk.vm.ci.amd64.AMD64.r13;
31 import static jdk.vm.ci.amd64.AMD64.rbp;
32 import static jdk.vm.ci.amd64.AMD64.rsp;
33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0;
34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1;
35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0;
36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1;
37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128;
38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256;
39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512;
40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ;
41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F;
42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38;
43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A;
44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_;
45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66;
46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2;
47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3;
48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0;
49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1;
50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
51 import static org.graalvm.compiler.core.common.NumUtil.isByte;
52 
53 import org.graalvm.compiler.asm.Assembler;
54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
56 import org.graalvm.compiler.debug.GraalError;
57 
58 import jdk.vm.ci.amd64.AMD64;
59 import jdk.vm.ci.amd64.AMD64.CPUFeature;
60 import jdk.vm.ci.amd64.AMD64Kind;
61 import jdk.vm.ci.code.Register;
62 import jdk.vm.ci.code.Register.RegisterCategory;
63 import jdk.vm.ci.code.TargetDescription;
64 import jdk.vm.ci.meta.PlatformKind;
65 
66 /**
67  * This class implements an assembler that can encode most X86 instructions.
68  */
69 public abstract class AMD64BaseAssembler extends Assembler {
70 
71     private final SIMDEncoder simdEncoder;
72 
73     /**
74      * Constructs an assembler for the AMD64 architecture.
75      */
AMD64BaseAssembler(TargetDescription target)76     public AMD64BaseAssembler(TargetDescription target) {
77         super(target);
78 
79         if (supports(CPUFeature.AVX)) {
80             simdEncoder = new VEXEncoderImpl();
81         } else {
82             simdEncoder = new SSEEncoderImpl();
83         }
84     }
85 
86     /**
87      * The x86 operand sizes.
88      */
89     public enum OperandSize {
90         BYTE(1, AMD64Kind.BYTE) {
91             @Override
emitImmediate(AMD64BaseAssembler asm, int imm)92             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
93                 assert imm == (byte) imm;
94                 asm.emitByte(imm);
95             }
96 
97             @Override
immediateSize()98             protected int immediateSize() {
99                 return 1;
100             }
101         },
102 
103         WORD(2, AMD64Kind.WORD, 0x66) {
104             @Override
emitImmediate(AMD64BaseAssembler asm, int imm)105             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
106                 assert imm == (short) imm;
107                 asm.emitShort(imm);
108             }
109 
110             @Override
immediateSize()111             protected int immediateSize() {
112                 return 2;
113             }
114         },
115 
116         DWORD(4, AMD64Kind.DWORD) {
117             @Override
emitImmediate(AMD64BaseAssembler asm, int imm)118             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
119                 asm.emitInt(imm);
120             }
121 
122             @Override
immediateSize()123             protected int immediateSize() {
124                 return 4;
125             }
126         },
127 
128         QWORD(8, AMD64Kind.QWORD) {
129             @Override
emitImmediate(AMD64BaseAssembler asm, int imm)130             protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
131                 asm.emitInt(imm);
132             }
133 
134             @Override
immediateSize()135             protected int immediateSize() {
136                 return 4;
137             }
138         },
139 
140         SS(4, AMD64Kind.SINGLE, 0xF3, true),
141 
142         SD(8, AMD64Kind.DOUBLE, 0xF2, true),
143 
144         PS(16, AMD64Kind.V128_SINGLE, true),
145 
146         PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
147 
148         private final int sizePrefix;
149         private final int bytes;
150         private final boolean xmm;
151         private final AMD64Kind kind;
152 
OperandSize(int bytes, AMD64Kind kind)153         OperandSize(int bytes, AMD64Kind kind) {
154             this(bytes, kind, 0);
155         }
156 
OperandSize(int bytes, AMD64Kind kind, int sizePrefix)157         OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
158             this(bytes, kind, sizePrefix, false);
159         }
160 
OperandSize(int bytes, AMD64Kind kind, boolean xmm)161         OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
162             this(bytes, kind, 0, xmm);
163         }
164 
OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm)165         OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
166             this.sizePrefix = sizePrefix;
167             this.bytes = bytes;
168             this.kind = kind;
169             this.xmm = xmm;
170         }
171 
getSizePrefix()172         public int getSizePrefix() {
173             return sizePrefix;
174         }
175 
getBytes()176         public int getBytes() {
177             return bytes;
178         }
179 
isXmmType()180         public boolean isXmmType() {
181             return xmm;
182         }
183 
getKind()184         public AMD64Kind getKind() {
185             return kind;
186         }
187 
get(PlatformKind kind)188         public static OperandSize get(PlatformKind kind) {
189             for (OperandSize operandSize : OperandSize.values()) {
190                 if (operandSize.kind.equals(kind)) {
191                     return operandSize;
192                 }
193             }
194             throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
195         }
196 
197         /**
198          * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
199          * as sign-extended 32-bit values.
200          *
201          * @param asm
202          * @param imm
203          */
emitImmediate(AMD64BaseAssembler asm, int imm)204         protected void emitImmediate(AMD64BaseAssembler asm, int imm) {
205             throw new UnsupportedOperationException();
206         }
207 
immediateSize()208         protected int immediateSize() {
209             throw new UnsupportedOperationException();
210         }
211     }
212 
213     public static class OperandDataAnnotation extends CodeAnnotation {
214         /**
215          * The position (bytes from the beginning of the method) of the operand.
216          */
217         public final int operandPosition;
218         /**
219          * The size of the operand, in bytes.
220          */
221         public final int operandSize;
222         /**
223          * The position (bytes from the beginning of the method) of the next instruction. On AMD64,
224          * RIP-relative operands are relative to this position.
225          */
226         public final int nextInstructionPosition;
227 
OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition)228         OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
229             super(instructionPosition);
230 
231             this.operandPosition = operandPosition;
232             this.operandSize = operandSize;
233             this.nextInstructionPosition = nextInstructionPosition;
234         }
235 
236         @Override
toString()237         public String toString() {
238             return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
239         }
240     }
241 
annotatePatchingImmediate(int operandOffset, int operandSize)242     protected void annotatePatchingImmediate(int operandOffset, int operandSize) {
243         if (codePatchingAnnotationConsumer != null) {
244             int pos = position();
245             codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize));
246         }
247     }
248 
supports(CPUFeature feature)249     public final boolean supports(CPUFeature feature) {
250         return ((AMD64) target.arch).getFeatures().contains(feature);
251     }
252 
inRC(RegisterCategory rc, Register r)253     protected static boolean inRC(RegisterCategory rc, Register r) {
254         return r.getRegisterCategory().equals(rc);
255     }
256 
encode(Register r)257     protected static int encode(Register r) {
258         assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
259         return r.encoding & 0x7;
260     }
261 
262     private static final int MinEncodingNeedsRex = 8;
263 
264     /**
265      * Constants for X86 prefix bytes.
266      */
267     private static class Prefix {
268         private static final int REX = 0x40;
269         private static final int REXB = 0x41;
270         private static final int REXX = 0x42;
271         private static final int REXXB = 0x43;
272         private static final int REXR = 0x44;
273         private static final int REXRB = 0x45;
274         private static final int REXRX = 0x46;
275         private static final int REXRXB = 0x47;
276         private static final int REXW = 0x48;
277         private static final int REXWB = 0x49;
278         private static final int REXWX = 0x4A;
279         private static final int REXWXB = 0x4B;
280         private static final int REXWR = 0x4C;
281         private static final int REXWRB = 0x4D;
282         private static final int REXWRX = 0x4E;
283         private static final int REXWRXB = 0x4F;
284 
285         private static final int VEX2 = 0xC5;
286         private static final int VEX3 = 0xC4;
287         private static final int EVEX = 0x62;
288     }
289 
290     protected final void rexw() {
291         emitByte(Prefix.REXW);
292     }
293 
294     private static boolean isInvalidEncoding(Register reg) {
295         return Register.None.equals(reg) || AMD64.rip.equals(reg);
296     }
297 
298     protected final void prefix(Register reg) {
299         prefix(reg, false);
300     }
301 
302     protected final void prefix(Register reg, boolean byteinst) {
303         assert !isInvalidEncoding(reg);
304         int regEnc = reg.encoding;
305         if (regEnc >= 8) {
306             emitByte(Prefix.REXB);
307         } else if (byteinst && regEnc >= 4) {
308             emitByte(Prefix.REX);
309         }
310     }
311 
312     protected final void prefixq(Register reg) {
313         assert !isInvalidEncoding(reg);
314         if (reg.encoding < 8) {
315             emitByte(Prefix.REXW);
316         } else {
317             emitByte(Prefix.REXWB);
318         }
319     }
320 
321     protected final void prefix(Register dst, Register src) {
322         prefix(dst, false, src, false);
323     }
324 
325     protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) {
326         assert !isInvalidEncoding(dst) && !isInvalidEncoding(src);
327         int dstEnc = dst.encoding;
328         int srcEnc = src.encoding;
329         if (dstEnc < 8) {
330             if (srcEnc >= 8) {
331                 emitByte(Prefix.REXB);
332             } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
333                 emitByte(Prefix.REX);
334             }
335         } else {
336             if (srcEnc < 8) {
337                 emitByte(Prefix.REXR);
338             } else {
339                 emitByte(Prefix.REXRB);
340             }
341         }
342     }
343 
344     /**
345      * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded
346      * in the prefix.
347      */
348     protected final void prefixq(Register reg, Register rm) {
349         assert !isInvalidEncoding(reg) && !isInvalidEncoding(rm);
350         int regEnc = reg.encoding;
351         int rmEnc = rm.encoding;
352         if (regEnc < 8) {
353             if (rmEnc < 8) {
354                 emitByte(Prefix.REXW);
355             } else {
356                 emitByte(Prefix.REXWB);
357             }
358         } else {
359             if (rmEnc < 8) {
360                 emitByte(Prefix.REXWR);
361             } else {
362                 emitByte(Prefix.REXWRB);
363             }
364         }
365     }
366 
367     protected static boolean needsRex(Register reg) {
368         // rip is excluded implicitly.
369         return reg.encoding >= MinEncodingNeedsRex;
370     }
371 
372     protected static boolean needsRex(Register src, boolean srcIsByte) {
373         return srcIsByte ? src.encoding >= 4 : needsRex(src);
374     }
375 
376     protected final void prefix(AMD64Address adr) {
377         if (needsRex(adr.getBase())) {
378             if (needsRex(adr.getIndex())) {
379                 emitByte(Prefix.REXXB);
380             } else {
381                 emitByte(Prefix.REXB);
382             }
383         } else {
384             if (needsRex(adr.getIndex())) {
385                 emitByte(Prefix.REXX);
386             }
387         }
388     }
389 
390     protected final void prefixq(AMD64Address adr) {
391         if (needsRex(adr.getBase())) {
392             if (needsRex(adr.getIndex())) {
393                 emitByte(Prefix.REXWXB);
394             } else {
395                 emitByte(Prefix.REXWB);
396             }
397         } else {
398             if (needsRex(adr.getIndex())) {
399                 emitByte(Prefix.REXWX);
400             } else {
401                 emitByte(Prefix.REXW);
402             }
403         }
404     }
405 
406     protected void prefixb(AMD64Address adr, Register reg) {
407         prefix(adr, reg, true);
408     }
409 
410     protected void prefix(AMD64Address adr, Register reg) {
411         prefix(adr, reg, false);
412     }
413 
414     protected void prefix(AMD64Address adr, Register reg, boolean byteinst) {
415         assert !isInvalidEncoding(reg);
416         if (reg.encoding < 8) {
417             if (needsRex(adr.getBase())) {
418                 if (needsRex(adr.getIndex())) {
419                     emitByte(Prefix.REXXB);
420                 } else {
421                     emitByte(Prefix.REXB);
422                 }
423             } else {
424                 if (needsRex(adr.getIndex())) {
425                     emitByte(Prefix.REXX);
426                 } else if (byteinst && reg.encoding >= 4) {
427                     emitByte(Prefix.REX);
428                 }
429             }
430         } else {
431             if (needsRex(adr.getBase())) {
432                 if (needsRex(adr.getIndex())) {
433                     emitByte(Prefix.REXRXB);
434                 } else {
435                     emitByte(Prefix.REXRB);
436                 }
437             } else {
438                 if (needsRex(adr.getIndex())) {
439                     emitByte(Prefix.REXRX);
440                 } else {
441                     emitByte(Prefix.REXR);
442                 }
443             }
444         }
445     }
446 
447     protected void prefixq(AMD64Address adr, Register src) {
448         assert !isInvalidEncoding(src);
449         if (src.encoding < 8) {
450             if (needsRex(adr.getBase())) {
451                 if (needsRex(adr.getIndex())) {
452                     emitByte(Prefix.REXWXB);
453                 } else {
454                     emitByte(Prefix.REXWB);
455                 }
456             } else {
457                 if (needsRex(adr.getIndex())) {
458                     emitByte(Prefix.REXWX);
459                 } else {
460                     emitByte(Prefix.REXW);
461                 }
462             }
463         } else {
464             if (needsRex(adr.getBase())) {
465                 if (needsRex(adr.getIndex())) {
466                     emitByte(Prefix.REXWRXB);
467                 } else {
468                     emitByte(Prefix.REXWRB);
469                 }
470             } else {
471                 if (needsRex(adr.getIndex())) {
472                     emitByte(Prefix.REXWRX);
473                 } else {
474                     emitByte(Prefix.REXWR);
475                 }
476             }
477         }
478     }
479 
480     /**
481      * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
482      * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
483      * field. The X bit must be 0.
484      */
485     protected static int getRXB(Register reg, Register rm) {
486         assert !isInvalidEncoding(rm) && !isInvalidEncoding(reg);
487         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
488         rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
489         return rxb;
490     }
491 
492     /**
493      * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
494      * are two cases for the memory operand:<br>
495      * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
496      * <br>
497      * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
498      */
499     protected static int getRXB(Register reg, AMD64Address rm) {
500         assert !isInvalidEncoding(reg);
501         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
502         if (!isInvalidEncoding(rm.getIndex())) {
503             rxb |= (rm.getIndex().encoding & 0x08) >> 2;
504         }
505         if (!isInvalidEncoding(rm.getBase())) {
506             rxb |= (rm.getBase().encoding & 0x08) >> 3;
507         }
508         return rxb;
509     }
510 
511     /**
512      * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
513      * <p>
514      * Format: [ 11 reg r/m ]
515      */
516     protected final void emitModRM(int reg, Register rm) {
517         assert (reg & 0x07) == reg;
518         assert !isInvalidEncoding(rm);
519         emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
520     }
521 
522     /**
523      * Emit the ModR/M byte for two register operands.
524      * <p>
525      * Format: [ 11 reg r/m ]
526      */
527     protected final void emitModRM(Register reg, Register rm) {
528         assert !isInvalidEncoding(reg);
529         emitModRM(reg.encoding & 0x07, rm);
530     }
531 
532     public static final int DEFAULT_DISP8_SCALE = 1;
533 
534     /**
535      * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
536      *
537      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
538      */
emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)539     protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
540         assert !isInvalidEncoding(reg);
541         emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, DEFAULT_DISP8_SCALE);
542     }
543 
emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize)544     protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
545         emitOperandHelper(reg, addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE);
546     }
547 
emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize)548     protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
549         assert !isInvalidEncoding(reg);
550         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE);
551     }
552 
emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale)553     protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) {
554         assert !isInvalidEncoding(reg);
555         emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale);
556     }
557 
558     /**
559      * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
560      * extension in the R field.
561      *
562      * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
563      * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
564      *            so that the start position of the next instruction can be computed even though
565      *            this instruction has not been completely emitted yet.
566      * @param evexDisp8Scale the scaling factor for computing the compressed displacement of
567      *            EVEX-encoded instructions. This scaling factor only matters when the emitted
568      *            instruction uses one-byte-displacement form.
569      */
emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale)570     private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) {
571         assert (reg & 0x07) == reg;
572         int regenc = reg << 3;
573 
574         Register base = addr.getBase();
575         Register index = addr.getIndex();
576 
577         Scale scale = addr.getScale();
578         int disp = addr.getDisplacement();
579 
580         if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
581             // [00 reg 101] disp32
582             assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
583             emitByte(0x05 | regenc);
584             if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
585                 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
586             }
587             emitInt(disp);
588         } else if (base.isValid()) {
589             boolean overriddenForce4Byte = force4Byte;
590             int baseenc = base.isValid() ? encode(base) : 0;
591 
592             if (index.isValid()) {
593                 int indexenc = encode(index) << 3;
594                 // [base + indexscale + disp]
595                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
596                     // [base + indexscale]
597                     // [00 reg 100][ss index base]
598                     assert !index.equals(rsp) : "illegal addressing mode";
599                     emitByte(0x04 | regenc);
600                     emitByte(scale.log2 << 6 | indexenc | baseenc);
601                 } else {
602                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
603                         if (disp % evexDisp8Scale == 0) {
604                             int newDisp = disp / evexDisp8Scale;
605                             if (isByte(newDisp)) {
606                                 disp = newDisp;
607                                 assert isByte(disp) && !overriddenForce4Byte;
608                             }
609                         } else {
610                             overriddenForce4Byte = true;
611                         }
612                     }
613                     if (isByte(disp) && !overriddenForce4Byte) {
614                         // [base + indexscale + imm8]
615                         // [01 reg 100][ss index base] imm8
616                         assert !index.equals(rsp) : "illegal addressing mode";
617                         emitByte(0x44 | regenc);
618                         emitByte(scale.log2 << 6 | indexenc | baseenc);
619                         emitByte(disp & 0xFF);
620                     } else {
621                         // [base + indexscale + disp32]
622                         // [10 reg 100][ss index base] disp32
623                         assert !index.equals(rsp) : "illegal addressing mode";
624                         emitByte(0x84 | regenc);
625                         emitByte(scale.log2 << 6 | indexenc | baseenc);
626                         emitInt(disp);
627                     }
628                 }
629             } else if (base.equals(rsp) || base.equals(r12)) {
630                 // [rsp + disp]
631                 if (disp == 0) {
632                     // [rsp]
633                     // [00 reg 100][00 100 100]
634                     emitByte(0x04 | regenc);
635                     emitByte(0x24);
636                 } else {
637                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
638                         if (disp % evexDisp8Scale == 0) {
639                             int newDisp = disp / evexDisp8Scale;
640                             if (isByte(newDisp)) {
641                                 disp = newDisp;
642                                 assert isByte(disp) && !overriddenForce4Byte;
643                             }
644                         } else {
645                             overriddenForce4Byte = true;
646                         }
647                     }
648                     if (isByte(disp) && !overriddenForce4Byte) {
649                         // [rsp + imm8]
650                         // [01 reg 100][00 100 100] disp8
651                         emitByte(0x44 | regenc);
652                         emitByte(0x24);
653                         emitByte(disp & 0xFF);
654                     } else {
655                         // [rsp + imm32]
656                         // [10 reg 100][00 100 100] disp32
657                         emitByte(0x84 | regenc);
658                         emitByte(0x24);
659                         emitInt(disp);
660                     }
661                 }
662             } else {
663                 // [base + disp]
664                 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
665                 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
666                     // [base]
667                     // [00 reg base]
668                     emitByte(0x00 | regenc | baseenc);
669                 } else {
670                     if (evexDisp8Scale > 1 && !overriddenForce4Byte) {
671                         if (disp % evexDisp8Scale == 0) {
672                             int newDisp = disp / evexDisp8Scale;
673                             if (isByte(newDisp)) {
674                                 disp = newDisp;
675                                 assert isByte(disp) && !overriddenForce4Byte;
676                             }
677                         } else {
678                             overriddenForce4Byte = true;
679                         }
680                     }
681                     if (isByte(disp) && !overriddenForce4Byte) {
682                         // [base + disp8]
683                         // [01 reg base] disp8
684                         emitByte(0x40 | regenc | baseenc);
685                         emitByte(disp & 0xFF);
686                     } else {
687                         // [base + disp32]
688                         // [10 reg base] disp32
689                         emitByte(0x80 | regenc | baseenc);
690                         emitInt(disp);
691                     }
692                 }
693             }
694         } else {
695             if (index.isValid()) {
696                 int indexenc = encode(index) << 3;
697                 // [indexscale + disp]
698                 // [00 reg 100][ss index 101] disp32
699                 assert !index.equals(rsp) : "illegal addressing mode";
700                 emitByte(0x04 | regenc);
701                 emitByte(scale.log2 << 6 | indexenc | 0x05);
702                 emitInt(disp);
703             } else {
704                 // [disp] ABSOLUTE
705                 // [00 reg 100][00 100 101] disp32
706                 emitByte(0x04 | regenc);
707                 emitByte(0x25);
708                 emitInt(disp);
709             }
710         }
711     }
712 
713     private interface SIMDEncoder {
714 
715         void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
716 
717         void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW);
718 
719     }
720 
721     private class SSEEncoderImpl implements SIMDEncoder {
722 
723         @Override
simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)724         public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
725             assert (!nds.isValid()) || nds.equals(xreg);
726             if (sizePrefix > 0) {
727                 emitByte(sizePrefix);
728             }
729             if (isRexW) {
730                 prefixq(adr, xreg);
731             } else {
732                 prefix(adr, xreg);
733             }
734             if (opcodeEscapePrefix > 0xFF) {
735                 emitShort(opcodeEscapePrefix);
736             } else if (opcodeEscapePrefix > 0) {
737                 emitByte(opcodeEscapePrefix);
738             }
739         }
740 
741         @Override
simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)742         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
743             assert (!nds.isValid()) || nds.equals(dst) || nds.equals(src);
744             if (sizePrefix > 0) {
745                 emitByte(sizePrefix);
746             }
747             if (isRexW) {
748                 prefixq(dst, src);
749             } else {
750                 prefix(dst, src);
751             }
752             if (opcodeEscapePrefix > 0xFF) {
753                 emitShort(opcodeEscapePrefix);
754             } else if (opcodeEscapePrefix > 0) {
755                 emitByte(opcodeEscapePrefix);
756             }
757         }
758     }
759 
760     public static final class VEXPrefixConfig {
761         public static final int L128 = 0;
762         public static final int L256 = 1;
763         public static final int L512 = 2;
764         public static final int LZ = 0;
765 
766         public static final int W0 = 0;
767         public static final int W1 = 1;
768         public static final int WIG = 0;
769 
770         public static final int P_ = 0x0;
771         public static final int P_66 = 0x1;
772         public static final int P_F3 = 0x2;
773         public static final int P_F2 = 0x3;
774 
775         public static final int M_0F = 0x1;
776         public static final int M_0F38 = 0x2;
777         public static final int M_0F3A = 0x3;
778 
VEXPrefixConfig()779         private VEXPrefixConfig() {
780         }
781     }
782 
783     private class VEXEncoderImpl implements SIMDEncoder {
784 
sizePrefixToPP(int sizePrefix)785         private int sizePrefixToPP(int sizePrefix) {
786             switch (sizePrefix) {
787                 case 0x66:
788                     return P_66;
789                 case 0xF2:
790                     return P_F2;
791                 case 0xF3:
792                     return P_F3;
793                 default:
794                     return P_;
795             }
796         }
797 
opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix)798         private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
799             switch (opcodeEscapePrefix) {
800                 case 0x0F:
801                     return M_0F;
802                 case 0x380F:
803                     return M_0F38;
804                 case 0x3A0F:
805                     return M_0F3A;
806                 default:
807                     return 0;
808             }
809         }
810 
811         @Override
simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)812         public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
813             assert reg.encoding < 16 : "encoding out of range: " + reg.encoding;
814             assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
815             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true);
816         }
817 
818         @Override
819         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
820             assert dst.encoding < 16 : "encoding out of range: " + dst.encoding;
821             assert src.encoding < 16 : "encoding out of range: " + src.encoding;
822             assert nds.encoding < 16 : "encoding out of range: " + nds.encoding;
823             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true);
824         }
825     }
826 
827     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
828         simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
829     }
830 
831     protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
832         simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW);
833     }
834 
835     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) {
836         simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW);
837     }
838 
839     protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) {
840         simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
841     }
842 
843  // @formatter:off
844  //
845  // Instruction Format and VEX illustrated below (optional []):
846  //
847  // #of bytes:    2,3      1       1       1       1,2,4       1
848  // [Prefixes]    VEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
849  //                                             [Disp16,32]
850  //
851  // VEX: 0xC4 | P1 | P2
852  //
853  //     7   6   5   4   3   2   1   0
854  // P1  R   X   B   m   m   m   m   m      P[ 7:0]
855  // P2  W   v   v   v   v   L   p   p      P[15:8]
856  //
857  // VEX: 0xC5 | B1
858  //
859  //     7   6   5   4   3   2   1   0
860  // P1  R   v   v   v   v   L   p   p      P[7:0]
861  //
862  // Figure. Bit Field Layout of the VEX Prefix
863  //
864  // Table. VEX Prefix Bit Field Functional Grouping
865  //
866  // Notation        Bit field Group        Position        Comment
867  // ----------  -------------------------  --------  -------------------
868  // VEX.RXB     Next-8 register specifier  P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
869  // VEX.R       REX.R inverse              P[7]      Combine with EVEX.R and ModR/M.reg.
870  // VEX.X       REX.X inverse              P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
871  // VEX.B       REX.B inverse              P[5]
872  // VEX.mmmmmm  0F, 0F_38, 0F_3A encoding  P[4:0]    b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved)
873  //
874  // VEX.W       Opcode specific            P[15]
875  // VEX.vvvv    A register specifier       P[14:11]  In inverse form, b1111 if not used.
876  //                                        P[6:3]
877  // VEX.L       Vector length/RC           P[10]     b0/scalar or 128b vec, b1/256b vec.
878  //                                        P[2]
879  // VEX.pp      Compressed legacy prefix   P[9:8]    b00/None, b01/0x66, b10/0xF3, b11/0xF2
880  //                                        P[1:0]
881  // @formatter:on
882 
883     /**
884      * Low-level function to encode and emit the VEX prefix.
885      * <p>
886      * 2 byte form: [1100 0101] [R vvvv L pp]<br>
887      * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp]
888      * <p>
889      * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function
890      * performs the 1s complement conversion, the caller is expected to pass plain unencoded
891      * arguments.
892      * <p>
893      * The pp field encodes an extension to the opcode:<br>
894      * 00: no extension<br>
895      * 01: 66<br>
896      * 10: F3<br>
897      * 11: F2
898      * <p>
899      * The m-mmmm field encodes the leading bytes of the opcode:<br>
900      * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br>
901      * 00010: implied 0F 38 leading opcode bytes<br>
902      * 00011: implied 0F 3A leading opcode bytes
903      * <p>
904      * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the
905      * m-mmmm field.
906      */
907     protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) {
908         assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support";
909 
910         assert l == L128 || l == L256 : "invalid value for VEX.L";
911         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp";
912         assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm";
913         assert w == W0 || w == W1 : "invalid value for VEX.W";
914 
915         assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB";
916         assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv";
917 
918         int rxb1s = rxb ^ 0x07;
919         int vvvv1s = vvvv ^ 0x0F;
920         if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) {
921             // 2 byte encoding
922             int byte2 = 0;
923             byte2 |= (rxb1s & 0x04) << 5;
924             byte2 |= vvvv1s << 3;
925             byte2 |= l << 2;
926             byte2 |= pp;
927 
928             emitByte(Prefix.VEX2);
929             emitByte(byte2);
930         } else {
931             // 3 byte encoding
932             int byte2 = 0;
933             byte2 = (rxb1s & 0x07) << 5;
934             byte2 |= mmmmm;
935 
936             int byte3 = 0;
937             byte3 |= w << 7;
938             byte3 |= vvvv1s << 3;
939             byte3 |= l << 2;
940             byte3 |= pp;
941 
942             emitByte(Prefix.VEX3);
943             emitByte(byte2);
944             emitByte(byte3);
945         }
946     }
947 
948     public static int getLFlag(AVXSize size) {
949         switch (size) {
950             case XMM:
951                 return L128;
952             case YMM:
953                 return L256;
954             case ZMM:
955                 return L512;
956             default:
957                 return LZ;
958         }
959     }
960 
961     public static boolean isAVX512Register(Register reg) {
962         return reg != null && reg.isValid() && AMD64.XMM.equals(reg.getRegisterCategory()) && reg.encoding > 15;
963     }
964 
965     public final boolean vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) {
966         if (isAVX512Register(dst) || isAVX512Register(nds) || isAVX512Register(src) || size == AVXSize.ZMM) {
967             evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0);
968             return true;
969         }
970         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
971         return false;
972     }
973 
974     public final boolean vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) {
975         if (isAVX512Register(dst) || isAVX512Register(nds) || size == AVXSize.ZMM) {
976             evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0);
977             return true;
978         }
979         emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX);
980         return false;
981     }
982 
983     protected static final class EVEXPrefixConfig {
984         public static final int Z0 = 0x0;
985         public static final int Z1 = 0x1;
986 
987         public static final int B0 = 0x0;
988         public static final int B1 = 0x1;
989 
990         private EVEXPrefixConfig() {
991         }
992     }
993 
994     private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1;
995 
996     /**
997      * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a
998      * scaling factor N depending on the tuple type and the vector length.
999      *
1000      * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5
1001      */
1002     protected enum EVEXTuple {
1003         INVALID(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH),
1004         FV_NO_BROADCAST_32BIT(16, 32, 64),
1005         FV_BROADCAST_32BIT(4, 4, 4),
1006         FV_NO_BROADCAST_64BIT(16, 32, 64),
1007         FV_BROADCAST_64BIT(8, 8, 8),
1008         HV_NO_BROADCAST_32BIT(8, 16, 32),
1009         HV_BROADCAST_32BIT(4, 4, 4),
1010         FVM(16, 32, 64),
1011         T1S_8BIT(1, 1, 1),
1012         T1S_16BIT(2, 2, 2),
1013         T1S_32BIT(4, 4, 4),
1014         T1S_64BIT(8, 8, 8),
1015         T1F_32BIT(4, 4, 4),
1016         T1F_64BIT(8, 8, 8),
1017         T2_32BIT(8, 8, 8),
1018         T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
1019         T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16),
1020         T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
1021         T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32),
1022         HVM(8, 16, 32),
1023         QVM(4, 8, 16),
1024         OVM(2, 4, 8),
1025         M128(16, 16, 16),
1026         DUP(8, 32, 64);
1027 
1028         private final int scalingFactorVL128;
1029         private final int scalingFactorVL256;
1030         private final int scalingFactorVL512;
1031 
1032         EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) {
1033             this.scalingFactorVL128 = scalingFactorVL128;
1034             this.scalingFactorVL256 = scalingFactorVL256;
1035             this.scalingFactorVL512 = scalingFactorVL512;
1036         }
1037 
1038         private static int verifyScalingFactor(int scalingFactor) {
1039             if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) {
1040                 throw GraalError.shouldNotReachHere("Invalid scaling factor.");
1041             }
1042             return scalingFactor;
1043         }
1044 
1045         public int getDisp8ScalingFactor(AVXSize size) {
1046             switch (size) {
1047                 case XMM:
1048                     return verifyScalingFactor(scalingFactorVL128);
1049                 case YMM:
1050                     return verifyScalingFactor(scalingFactorVL256);
1051                 case ZMM:
1052                     return verifyScalingFactor(scalingFactorVL512);
1053                 default:
1054                     throw GraalError.shouldNotReachHere("Unsupported vector size.");
1055             }
1056         }
1057     }
1058 
1059     public static final class EVEXComparisonPredicate {
1060         public static final int EQ = 0;
1061         public static final int LT = 1;
1062         public static final int LE = 2;
1063         public static final int FALSE = 3;
1064         public static final int NEQ = 4;
1065         public static final int NLT = 5;
1066         public static final int NLE = 6;
1067         public static final int TRUE = 7;
1068     }
1069 
1070  // @formatter:off
1071  //
1072  // Instruction Format and EVEX illustrated below (optional []):
1073  //
1074  // #of bytes:      4       1       1       1       1,2,4       1
1075  // [Prefixes]    EVEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
1076  //                                              [Disp16,32]
1077  //
1078  // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding
1079  // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in
1080  // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted
1081  // as P1, P2, and P3 individually or collectively as P[23:0] (see below).
1082  //
1083  // EVEX: 0x62 | P1 | P2 | P3
1084  //
1085  //     7   6   5   4   3   2   1   0
1086  // P1  R   X   B   R'  0   0   m   m      P[ 7: 0]
1087  // P2  W   v   v   v   v   1   p   p      P[15: 8]
1088  // P3  z   L'  L   b   V'  a   a   a      P[23:16]
1089  //
1090  // Figure. Bit Field Layout of the EVEX Prefix
1091  //
1092  // Table. EVEX Prefix Bit Field Functional Grouping
1093  //
1094  // Notation        Bit field Group        Position        Comment
1095  // ---------  --------------------------  --------  -----------------------
1096  // EVEX.RXB   Next-8 register specifier   P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
1097  // EVEX.X     High-16 register specifier  P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
1098  // EVEX.R'    High-16 register specifier  P[4]      Combine with EVEX.R and ModR/M.reg.
1099  // --         Reserved                    P[3:2]    Must be 0.
1100  // EVEX.mm    Compressed legacy escape    P[1:0]    Identical to low two bits of VEX.mmmmm.
1101  //
1102  // EVEX.W     Osize promotion/Opcode ext  P[15]
1103  // EVEX.vvvv  NDS register specifier      P[14:11]  Same as VEX.vvvv.
1104  // --         Fixed Value                 P[10]     Must be 1.
1105  // EVEX.pp    Compressed legacy prefix    P[9:8]    Identical to VEX.pp.
1106  //
1107  // EVEX.z     Zeroing/Merging             P[23]
1108  // EVEX.L'L   Vector length/RC            P[22:21]
1109  // EVEX.b     Broadcast/RC/SAE Context    P[20]
1110  // EVEX.V'    High-16 NDS/VIDX register   P[19]     Combine with EVEX.vvvv or VSIB when present.
1111  // EVEX.aaa   Embedded opmask register    P[18:16]
1112  //
1113  // @formatter:on
1114 
1115     /**
1116      * Low-level function to encode and emit the EVEX prefix.
1117      * <p>
1118      * 62 [0 1 1 0 0 0 1 0]<br>
1119      * P1 [R X B R'0 0 m m]<br>
1120      * P2 [W v v v v 1 p p]<br>
1121      * P3 [z L'L b V'a a a]
1122      * <p>
1123      * The pp field encodes an extension to the opcode:<br>
1124      * 00: no extension<br>
1125      * 01: 66<br>
1126      * 10: F3<br>
1127      * 11: F2
1128      * <p>
1129      * The mm field encodes the leading bytes of the opcode:<br>
1130      * 01: implied 0F leading opcode byte<br>
1131      * 10: implied 0F 38 leading opcode bytes<br>
1132      * 11: implied 0F 3A leading opcode bytes
1133      * <p>
1134      * The z field encodes the merging mode (merge or zero).
1135      * <p>
1136      * The b field encodes the source broadcast or data rounding modes.
1137      * <p>
1138      * The aaa field encodes the operand mask register.
1139      */
1140     private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) {
1141         assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support";
1142 
1143         assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L";
1144         assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp";
1145         assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm";
1146         assert w == W0 || w == W1 : "invalid value for EVEX.W";
1147 
1148         assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
1149         assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
1150         assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv";
1151 
1152         assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
1153         assert b == B0 || b == B1 : "invalid value for EVEX.b";
1154         assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
1155 
1156         emitByte(Prefix.EVEX);
1157         int p1 = 0;
1158         p1 |= ((rxb ^ 0x07) & 0x07) << 5;
1159         p1 |= reg < 16 ? 0x10 : 0;
1160         p1 |= mm;
1161         emitByte(p1);
1162 
1163         int p2 = 0;
1164         p2 |= w << 7;
1165         p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
1166         p2 |= 0x04;
1167         p2 |= pp;
1168         emitByte(p2);
1169 
1170         int p3 = 0;
1171         p3 |= z << 7;
1172         p3 |= l << 5;
1173         p3 |= b << 4;
1174         p3 |= vvvvv < 16 ? 0x08 : 0;
1175         p3 |= aaa;
1176         emitByte(p3);
1177     }
1178 
1179     /**
1180      * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a
1181      * register index. The R bit extends the ModRM.reg field and the X and B bits extends the
1182      * ModRM.rm field.
1183      */
1184     private static int getRXBForEVEX(Register reg, Register rm) {
1185         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
1186         rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
1187         return rxb;
1188     }
1189 
1190     /**
1191      * Helper method for emitting EVEX prefix in the form of RRRR.
1192      */
1193     protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
1194         assert !mask.isValid() || inRC(MASK, mask);
1195         emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1196     }
1197 
1198     /**
1199      * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in
1200      * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the
1201      * user of this API should make sure to encode the operands using
1202      * {@link #emitOperandHelper(Register, AMD64Address, int, int)}.
1203      */
1204     protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
1205         assert !mask.isValid() || inRC(MASK, mask);
1206         emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
1207     }
1208 
1209 }
1210