1 /* 2 * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.MASK; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rsp; 33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1; 35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512; 40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 51 import static org.graalvm.compiler.core.common.NumUtil.isByte; 52 53 import org.graalvm.compiler.asm.Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 56 import org.graalvm.compiler.debug.GraalError; 57 58 import jdk.vm.ci.amd64.AMD64; 59 import jdk.vm.ci.amd64.AMD64.CPUFeature; 60 import jdk.vm.ci.amd64.AMD64Kind; 61 import jdk.vm.ci.code.Register; 62 import jdk.vm.ci.code.Register.RegisterCategory; 63 import jdk.vm.ci.code.TargetDescription; 64 import jdk.vm.ci.meta.PlatformKind; 65 66 /** 67 * This class implements an assembler that can encode most X86 instructions. 68 */ 69 public abstract class AMD64BaseAssembler extends Assembler { 70 71 private final SIMDEncoder simdEncoder; 72 73 /** 74 * Constructs an assembler for the AMD64 architecture. 75 */ AMD64BaseAssembler(TargetDescription target)76 public AMD64BaseAssembler(TargetDescription target) { 77 super(target); 78 79 if (supports(CPUFeature.AVX)) { 80 simdEncoder = new VEXEncoderImpl(); 81 } else { 82 simdEncoder = new SSEEncoderImpl(); 83 } 84 } 85 86 /** 87 * The x86 operand sizes. 88 */ 89 public enum OperandSize { 90 BYTE(1, AMD64Kind.BYTE) { 91 @Override emitImmediate(AMD64BaseAssembler asm, int imm)92 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 93 assert imm == (byte) imm; 94 asm.emitByte(imm); 95 } 96 97 @Override immediateSize()98 protected int immediateSize() { 99 return 1; 100 } 101 }, 102 103 WORD(2, AMD64Kind.WORD, 0x66) { 104 @Override emitImmediate(AMD64BaseAssembler asm, int imm)105 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 106 assert imm == (short) imm; 107 asm.emitShort(imm); 108 } 109 110 @Override immediateSize()111 protected int immediateSize() { 112 return 2; 113 } 114 }, 115 116 DWORD(4, AMD64Kind.DWORD) { 117 @Override emitImmediate(AMD64BaseAssembler asm, int imm)118 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 119 asm.emitInt(imm); 120 } 121 122 @Override immediateSize()123 protected int immediateSize() { 124 return 4; 125 } 126 }, 127 128 QWORD(8, AMD64Kind.QWORD) { 129 @Override emitImmediate(AMD64BaseAssembler asm, int imm)130 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 131 asm.emitInt(imm); 132 } 133 134 @Override immediateSize()135 protected int immediateSize() { 136 return 4; 137 } 138 }, 139 140 SS(4, AMD64Kind.SINGLE, 0xF3, true), 141 142 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 143 144 PS(16, AMD64Kind.V128_SINGLE, true), 145 146 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 147 148 private final int sizePrefix; 149 private final int bytes; 150 private final boolean xmm; 151 private final AMD64Kind kind; 152 OperandSize(int bytes, AMD64Kind kind)153 OperandSize(int bytes, AMD64Kind kind) { 154 this(bytes, kind, 0); 155 } 156 OperandSize(int bytes, AMD64Kind kind, int sizePrefix)157 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 158 this(bytes, kind, sizePrefix, false); 159 } 160 OperandSize(int bytes, AMD64Kind kind, boolean xmm)161 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 162 this(bytes, kind, 0, xmm); 163 } 164 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm)165 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 166 this.sizePrefix = sizePrefix; 167 this.bytes = bytes; 168 this.kind = kind; 169 this.xmm = xmm; 170 } 171 getSizePrefix()172 public int getSizePrefix() { 173 return sizePrefix; 174 } 175 getBytes()176 public int getBytes() { 177 return bytes; 178 } 179 isXmmType()180 public boolean isXmmType() { 181 return xmm; 182 } 183 getKind()184 public AMD64Kind getKind() { 185 return kind; 186 } 187 get(PlatformKind kind)188 public static OperandSize get(PlatformKind kind) { 189 for (OperandSize operandSize : OperandSize.values()) { 190 if (operandSize.kind.equals(kind)) { 191 return operandSize; 192 } 193 } 194 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 195 } 196 197 /** 198 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 199 * as sign-extended 32-bit values. 200 * 201 * @param asm 202 * @param imm 203 */ emitImmediate(AMD64BaseAssembler asm, int imm)204 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 205 throw new UnsupportedOperationException(); 206 } 207 immediateSize()208 protected int immediateSize() { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 public static class OperandDataAnnotation extends CodeAnnotation { 214 /** 215 * The position (bytes from the beginning of the method) of the operand. 216 */ 217 public final int operandPosition; 218 /** 219 * The size of the operand, in bytes. 220 */ 221 public final int operandSize; 222 /** 223 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 224 * RIP-relative operands are relative to this position. 225 */ 226 public final int nextInstructionPosition; 227 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition)228 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 229 super(instructionPosition); 230 231 this.operandPosition = operandPosition; 232 this.operandSize = operandSize; 233 this.nextInstructionPosition = nextInstructionPosition; 234 } 235 236 @Override toString()237 public String toString() { 238 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 239 } 240 } 241 annotatePatchingImmediate(int operandOffset, int operandSize)242 protected void annotatePatchingImmediate(int operandOffset, int operandSize) { 243 if (codePatchingAnnotationConsumer != null) { 244 int pos = position(); 245 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize)); 246 } 247 } 248 supports(CPUFeature feature)249 public final boolean supports(CPUFeature feature) { 250 return ((AMD64) target.arch).getFeatures().contains(feature); 251 } 252 inRC(RegisterCategory rc, Register r)253 protected static boolean inRC(RegisterCategory rc, Register r) { 254 return r.getRegisterCategory().equals(rc); 255 } 256 encode(Register r)257 protected static int encode(Register r) { 258 assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding; 259 return r.encoding & 0x7; 260 } 261 262 private static final int MinEncodingNeedsRex = 8; 263 264 /** 265 * Constants for X86 prefix bytes. 266 */ 267 private static class Prefix { 268 private static final int REX = 0x40; 269 private static final int REXB = 0x41; 270 private static final int REXX = 0x42; 271 private static final int REXXB = 0x43; 272 private static final int REXR = 0x44; 273 private static final int REXRB = 0x45; 274 private static final int REXRX = 0x46; 275 private static final int REXRXB = 0x47; 276 private static final int REXW = 0x48; 277 private static final int REXWB = 0x49; 278 private static final int REXWX = 0x4A; 279 private static final int REXWXB = 0x4B; 280 private static final int REXWR = 0x4C; 281 private static final int REXWRB = 0x4D; 282 private static final int REXWRX = 0x4E; 283 private static final int REXWRXB = 0x4F; 284 285 private static final int VEX2 = 0xC5; 286 private static final int VEX3 = 0xC4; 287 private static final int EVEX = 0x62; 288 } 289 290 protected final void rexw() { 291 emitByte(Prefix.REXW); 292 } 293 294 private static boolean isInvalidEncoding(Register reg) { 295 return Register.None.equals(reg) || AMD64.rip.equals(reg); 296 } 297 298 protected final void prefix(Register reg) { 299 prefix(reg, false); 300 } 301 302 protected final void prefix(Register reg, boolean byteinst) { 303 assert !isInvalidEncoding(reg); 304 int regEnc = reg.encoding; 305 if (regEnc >= 8) { 306 emitByte(Prefix.REXB); 307 } else if (byteinst && regEnc >= 4) { 308 emitByte(Prefix.REX); 309 } 310 } 311 312 protected final void prefixq(Register reg) { 313 assert !isInvalidEncoding(reg); 314 if (reg.encoding < 8) { 315 emitByte(Prefix.REXW); 316 } else { 317 emitByte(Prefix.REXWB); 318 } 319 } 320 321 protected final void prefix(Register dst, Register src) { 322 prefix(dst, false, src, false); 323 } 324 325 protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) { 326 assert !isInvalidEncoding(dst) && !isInvalidEncoding(src); 327 int dstEnc = dst.encoding; 328 int srcEnc = src.encoding; 329 if (dstEnc < 8) { 330 if (srcEnc >= 8) { 331 emitByte(Prefix.REXB); 332 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 333 emitByte(Prefix.REX); 334 } 335 } else { 336 if (srcEnc < 8) { 337 emitByte(Prefix.REXR); 338 } else { 339 emitByte(Prefix.REXRB); 340 } 341 } 342 } 343 344 /** 345 * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded 346 * in the prefix. 347 */ 348 protected final void prefixq(Register reg, Register rm) { 349 assert !isInvalidEncoding(reg) && !isInvalidEncoding(rm); 350 int regEnc = reg.encoding; 351 int rmEnc = rm.encoding; 352 if (regEnc < 8) { 353 if (rmEnc < 8) { 354 emitByte(Prefix.REXW); 355 } else { 356 emitByte(Prefix.REXWB); 357 } 358 } else { 359 if (rmEnc < 8) { 360 emitByte(Prefix.REXWR); 361 } else { 362 emitByte(Prefix.REXWRB); 363 } 364 } 365 } 366 367 protected static boolean needsRex(Register reg) { 368 // rip is excluded implicitly. 369 return reg.encoding >= MinEncodingNeedsRex; 370 } 371 372 protected static boolean needsRex(Register src, boolean srcIsByte) { 373 return srcIsByte ? src.encoding >= 4 : needsRex(src); 374 } 375 376 protected final void prefix(AMD64Address adr) { 377 if (needsRex(adr.getBase())) { 378 if (needsRex(adr.getIndex())) { 379 emitByte(Prefix.REXXB); 380 } else { 381 emitByte(Prefix.REXB); 382 } 383 } else { 384 if (needsRex(adr.getIndex())) { 385 emitByte(Prefix.REXX); 386 } 387 } 388 } 389 390 protected final void prefixq(AMD64Address adr) { 391 if (needsRex(adr.getBase())) { 392 if (needsRex(adr.getIndex())) { 393 emitByte(Prefix.REXWXB); 394 } else { 395 emitByte(Prefix.REXWB); 396 } 397 } else { 398 if (needsRex(adr.getIndex())) { 399 emitByte(Prefix.REXWX); 400 } else { 401 emitByte(Prefix.REXW); 402 } 403 } 404 } 405 406 protected void prefixb(AMD64Address adr, Register reg) { 407 prefix(adr, reg, true); 408 } 409 410 protected void prefix(AMD64Address adr, Register reg) { 411 prefix(adr, reg, false); 412 } 413 414 protected void prefix(AMD64Address adr, Register reg, boolean byteinst) { 415 assert !isInvalidEncoding(reg); 416 if (reg.encoding < 8) { 417 if (needsRex(adr.getBase())) { 418 if (needsRex(adr.getIndex())) { 419 emitByte(Prefix.REXXB); 420 } else { 421 emitByte(Prefix.REXB); 422 } 423 } else { 424 if (needsRex(adr.getIndex())) { 425 emitByte(Prefix.REXX); 426 } else if (byteinst && reg.encoding >= 4) { 427 emitByte(Prefix.REX); 428 } 429 } 430 } else { 431 if (needsRex(adr.getBase())) { 432 if (needsRex(adr.getIndex())) { 433 emitByte(Prefix.REXRXB); 434 } else { 435 emitByte(Prefix.REXRB); 436 } 437 } else { 438 if (needsRex(adr.getIndex())) { 439 emitByte(Prefix.REXRX); 440 } else { 441 emitByte(Prefix.REXR); 442 } 443 } 444 } 445 } 446 447 protected void prefixq(AMD64Address adr, Register src) { 448 assert !isInvalidEncoding(src); 449 if (src.encoding < 8) { 450 if (needsRex(adr.getBase())) { 451 if (needsRex(adr.getIndex())) { 452 emitByte(Prefix.REXWXB); 453 } else { 454 emitByte(Prefix.REXWB); 455 } 456 } else { 457 if (needsRex(adr.getIndex())) { 458 emitByte(Prefix.REXWX); 459 } else { 460 emitByte(Prefix.REXW); 461 } 462 } 463 } else { 464 if (needsRex(adr.getBase())) { 465 if (needsRex(adr.getIndex())) { 466 emitByte(Prefix.REXWRXB); 467 } else { 468 emitByte(Prefix.REXWRB); 469 } 470 } else { 471 if (needsRex(adr.getIndex())) { 472 emitByte(Prefix.REXWRX); 473 } else { 474 emitByte(Prefix.REXWR); 475 } 476 } 477 } 478 } 479 480 /** 481 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 482 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 483 * field. The X bit must be 0. 484 */ 485 protected static int getRXB(Register reg, Register rm) { 486 assert !isInvalidEncoding(rm) && !isInvalidEncoding(reg); 487 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 488 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 489 return rxb; 490 } 491 492 /** 493 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 494 * are two cases for the memory operand:<br> 495 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 496 * <br> 497 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 498 */ 499 protected static int getRXB(Register reg, AMD64Address rm) { 500 assert !isInvalidEncoding(reg); 501 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 502 if (!isInvalidEncoding(rm.getIndex())) { 503 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 504 } 505 if (!isInvalidEncoding(rm.getBase())) { 506 rxb |= (rm.getBase().encoding & 0x08) >> 3; 507 } 508 return rxb; 509 } 510 511 /** 512 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 513 * <p> 514 * Format: [ 11 reg r/m ] 515 */ 516 protected final void emitModRM(int reg, Register rm) { 517 assert (reg & 0x07) == reg; 518 assert !isInvalidEncoding(rm); 519 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 520 } 521 522 /** 523 * Emit the ModR/M byte for two register operands. 524 * <p> 525 * Format: [ 11 reg r/m ] 526 */ 527 protected final void emitModRM(Register reg, Register rm) { 528 assert !isInvalidEncoding(reg); 529 emitModRM(reg.encoding & 0x07, rm); 530 } 531 532 public static final int DEFAULT_DISP8_SCALE = 1; 533 534 /** 535 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 536 * 537 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 538 */ emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)539 protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 540 assert !isInvalidEncoding(reg); 541 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, DEFAULT_DISP8_SCALE); 542 } 543 emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize)544 protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 545 emitOperandHelper(reg, addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE); 546 } 547 emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize)548 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 549 assert !isInvalidEncoding(reg); 550 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE); 551 } 552 emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale)553 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) { 554 assert !isInvalidEncoding(reg); 555 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale); 556 } 557 558 /** 559 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 560 * extension in the R field. 561 * 562 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 563 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 564 * so that the start position of the next instruction can be computed even though 565 * this instruction has not been completely emitted yet. 566 * @param evexDisp8Scale the scaling factor for computing the compressed displacement of 567 * EVEX-encoded instructions. This scaling factor only matters when the emitted 568 * instruction uses one-byte-displacement form. 569 */ emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale)570 private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) { 571 assert (reg & 0x07) == reg; 572 int regenc = reg << 3; 573 574 Register base = addr.getBase(); 575 Register index = addr.getIndex(); 576 577 Scale scale = addr.getScale(); 578 int disp = addr.getDisplacement(); 579 580 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 581 // [00 reg 101] disp32 582 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 583 emitByte(0x05 | regenc); 584 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 585 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 586 } 587 emitInt(disp); 588 } else if (base.isValid()) { 589 boolean overriddenForce4Byte = force4Byte; 590 int baseenc = base.isValid() ? encode(base) : 0; 591 592 if (index.isValid()) { 593 int indexenc = encode(index) << 3; 594 // [base + indexscale + disp] 595 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 596 // [base + indexscale] 597 // [00 reg 100][ss index base] 598 assert !index.equals(rsp) : "illegal addressing mode"; 599 emitByte(0x04 | regenc); 600 emitByte(scale.log2 << 6 | indexenc | baseenc); 601 } else { 602 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 603 if (disp % evexDisp8Scale == 0) { 604 int newDisp = disp / evexDisp8Scale; 605 if (isByte(newDisp)) { 606 disp = newDisp; 607 assert isByte(disp) && !overriddenForce4Byte; 608 } 609 } else { 610 overriddenForce4Byte = true; 611 } 612 } 613 if (isByte(disp) && !overriddenForce4Byte) { 614 // [base + indexscale + imm8] 615 // [01 reg 100][ss index base] imm8 616 assert !index.equals(rsp) : "illegal addressing mode"; 617 emitByte(0x44 | regenc); 618 emitByte(scale.log2 << 6 | indexenc | baseenc); 619 emitByte(disp & 0xFF); 620 } else { 621 // [base + indexscale + disp32] 622 // [10 reg 100][ss index base] disp32 623 assert !index.equals(rsp) : "illegal addressing mode"; 624 emitByte(0x84 | regenc); 625 emitByte(scale.log2 << 6 | indexenc | baseenc); 626 emitInt(disp); 627 } 628 } 629 } else if (base.equals(rsp) || base.equals(r12)) { 630 // [rsp + disp] 631 if (disp == 0) { 632 // [rsp] 633 // [00 reg 100][00 100 100] 634 emitByte(0x04 | regenc); 635 emitByte(0x24); 636 } else { 637 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 638 if (disp % evexDisp8Scale == 0) { 639 int newDisp = disp / evexDisp8Scale; 640 if (isByte(newDisp)) { 641 disp = newDisp; 642 assert isByte(disp) && !overriddenForce4Byte; 643 } 644 } else { 645 overriddenForce4Byte = true; 646 } 647 } 648 if (isByte(disp) && !overriddenForce4Byte) { 649 // [rsp + imm8] 650 // [01 reg 100][00 100 100] disp8 651 emitByte(0x44 | regenc); 652 emitByte(0x24); 653 emitByte(disp & 0xFF); 654 } else { 655 // [rsp + imm32] 656 // [10 reg 100][00 100 100] disp32 657 emitByte(0x84 | regenc); 658 emitByte(0x24); 659 emitInt(disp); 660 } 661 } 662 } else { 663 // [base + disp] 664 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 665 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 666 // [base] 667 // [00 reg base] 668 emitByte(0x00 | regenc | baseenc); 669 } else { 670 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 671 if (disp % evexDisp8Scale == 0) { 672 int newDisp = disp / evexDisp8Scale; 673 if (isByte(newDisp)) { 674 disp = newDisp; 675 assert isByte(disp) && !overriddenForce4Byte; 676 } 677 } else { 678 overriddenForce4Byte = true; 679 } 680 } 681 if (isByte(disp) && !overriddenForce4Byte) { 682 // [base + disp8] 683 // [01 reg base] disp8 684 emitByte(0x40 | regenc | baseenc); 685 emitByte(disp & 0xFF); 686 } else { 687 // [base + disp32] 688 // [10 reg base] disp32 689 emitByte(0x80 | regenc | baseenc); 690 emitInt(disp); 691 } 692 } 693 } 694 } else { 695 if (index.isValid()) { 696 int indexenc = encode(index) << 3; 697 // [indexscale + disp] 698 // [00 reg 100][ss index 101] disp32 699 assert !index.equals(rsp) : "illegal addressing mode"; 700 emitByte(0x04 | regenc); 701 emitByte(scale.log2 << 6 | indexenc | 0x05); 702 emitInt(disp); 703 } else { 704 // [disp] ABSOLUTE 705 // [00 reg 100][00 100 101] disp32 706 emitByte(0x04 | regenc); 707 emitByte(0x25); 708 emitInt(disp); 709 } 710 } 711 } 712 713 private interface SIMDEncoder { 714 715 void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 716 717 void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 718 719 } 720 721 private class SSEEncoderImpl implements SIMDEncoder { 722 723 @Override simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)724 public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 725 assert (!nds.isValid()) || nds.equals(xreg); 726 if (sizePrefix > 0) { 727 emitByte(sizePrefix); 728 } 729 if (isRexW) { 730 prefixq(adr, xreg); 731 } else { 732 prefix(adr, xreg); 733 } 734 if (opcodeEscapePrefix > 0xFF) { 735 emitShort(opcodeEscapePrefix); 736 } else if (opcodeEscapePrefix > 0) { 737 emitByte(opcodeEscapePrefix); 738 } 739 } 740 741 @Override simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)742 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 743 assert (!nds.isValid()) || nds.equals(dst) || nds.equals(src); 744 if (sizePrefix > 0) { 745 emitByte(sizePrefix); 746 } 747 if (isRexW) { 748 prefixq(dst, src); 749 } else { 750 prefix(dst, src); 751 } 752 if (opcodeEscapePrefix > 0xFF) { 753 emitShort(opcodeEscapePrefix); 754 } else if (opcodeEscapePrefix > 0) { 755 emitByte(opcodeEscapePrefix); 756 } 757 } 758 } 759 760 public static final class VEXPrefixConfig { 761 public static final int L128 = 0; 762 public static final int L256 = 1; 763 public static final int L512 = 2; 764 public static final int LZ = 0; 765 766 public static final int W0 = 0; 767 public static final int W1 = 1; 768 public static final int WIG = 0; 769 770 public static final int P_ = 0x0; 771 public static final int P_66 = 0x1; 772 public static final int P_F3 = 0x2; 773 public static final int P_F2 = 0x3; 774 775 public static final int M_0F = 0x1; 776 public static final int M_0F38 = 0x2; 777 public static final int M_0F3A = 0x3; 778 VEXPrefixConfig()779 private VEXPrefixConfig() { 780 } 781 } 782 783 private class VEXEncoderImpl implements SIMDEncoder { 784 sizePrefixToPP(int sizePrefix)785 private int sizePrefixToPP(int sizePrefix) { 786 switch (sizePrefix) { 787 case 0x66: 788 return P_66; 789 case 0xF2: 790 return P_F2; 791 case 0xF3: 792 return P_F3; 793 default: 794 return P_; 795 } 796 } 797 opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix)798 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) { 799 switch (opcodeEscapePrefix) { 800 case 0x0F: 801 return M_0F; 802 case 0x380F: 803 return M_0F38; 804 case 0x3A0F: 805 return M_0F3A; 806 default: 807 return 0; 808 } 809 } 810 811 @Override simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)812 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 813 assert reg.encoding < 16 : "encoding out of range: " + reg.encoding; 814 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 815 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true); 816 } 817 818 @Override 819 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 820 assert dst.encoding < 16 : "encoding out of range: " + dst.encoding; 821 assert src.encoding < 16 : "encoding out of range: " + src.encoding; 822 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 823 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true); 824 } 825 } 826 827 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 828 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 829 } 830 831 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 832 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW); 833 } 834 835 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 836 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 837 } 838 839 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 840 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW); 841 } 842 843 // @formatter:off 844 // 845 // Instruction Format and VEX illustrated below (optional []): 846 // 847 // #of bytes: 2,3 1 1 1 1,2,4 1 848 // [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 849 // [Disp16,32] 850 // 851 // VEX: 0xC4 | P1 | P2 852 // 853 // 7 6 5 4 3 2 1 0 854 // P1 R X B m m m m m P[ 7:0] 855 // P2 W v v v v L p p P[15:8] 856 // 857 // VEX: 0xC5 | B1 858 // 859 // 7 6 5 4 3 2 1 0 860 // P1 R v v v v L p p P[7:0] 861 // 862 // Figure. Bit Field Layout of the VEX Prefix 863 // 864 // Table. VEX Prefix Bit Field Functional Grouping 865 // 866 // Notation Bit field Group Position Comment 867 // ---------- ------------------------- -------- ------------------- 868 // VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 869 // VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg. 870 // VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 871 // VEX.B REX.B inverse P[5] 872 // VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved) 873 // 874 // VEX.W Opcode specific P[15] 875 // VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used. 876 // P[6:3] 877 // VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec. 878 // P[2] 879 // VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2 880 // P[1:0] 881 // @formatter:on 882 883 /** 884 * Low-level function to encode and emit the VEX prefix. 885 * <p> 886 * 2 byte form: [1100 0101] [R vvvv L pp]<br> 887 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp] 888 * <p> 889 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function 890 * performs the 1s complement conversion, the caller is expected to pass plain unencoded 891 * arguments. 892 * <p> 893 * The pp field encodes an extension to the opcode:<br> 894 * 00: no extension<br> 895 * 01: 66<br> 896 * 10: F3<br> 897 * 11: F2 898 * <p> 899 * The m-mmmm field encodes the leading bytes of the opcode:<br> 900 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br> 901 * 00010: implied 0F 38 leading opcode bytes<br> 902 * 00011: implied 0F 3A leading opcode bytes 903 * <p> 904 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the 905 * m-mmmm field. 906 */ 907 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) { 908 assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support"; 909 910 assert l == L128 || l == L256 : "invalid value for VEX.L"; 911 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp"; 912 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm"; 913 assert w == W0 || w == W1 : "invalid value for VEX.W"; 914 915 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB"; 916 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv"; 917 918 int rxb1s = rxb ^ 0x07; 919 int vvvv1s = vvvv ^ 0x0F; 920 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) { 921 // 2 byte encoding 922 int byte2 = 0; 923 byte2 |= (rxb1s & 0x04) << 5; 924 byte2 |= vvvv1s << 3; 925 byte2 |= l << 2; 926 byte2 |= pp; 927 928 emitByte(Prefix.VEX2); 929 emitByte(byte2); 930 } else { 931 // 3 byte encoding 932 int byte2 = 0; 933 byte2 = (rxb1s & 0x07) << 5; 934 byte2 |= mmmmm; 935 936 int byte3 = 0; 937 byte3 |= w << 7; 938 byte3 |= vvvv1s << 3; 939 byte3 |= l << 2; 940 byte3 |= pp; 941 942 emitByte(Prefix.VEX3); 943 emitByte(byte2); 944 emitByte(byte3); 945 } 946 } 947 948 public static int getLFlag(AVXSize size) { 949 switch (size) { 950 case XMM: 951 return L128; 952 case YMM: 953 return L256; 954 case ZMM: 955 return L512; 956 default: 957 return LZ; 958 } 959 } 960 961 public static boolean isAVX512Register(Register reg) { 962 return reg != null && reg.isValid() && AMD64.XMM.equals(reg.getRegisterCategory()) && reg.encoding > 15; 963 } 964 965 public final boolean vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) { 966 if (isAVX512Register(dst) || isAVX512Register(nds) || isAVX512Register(src) || size == AVXSize.ZMM) { 967 evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0); 968 return true; 969 } 970 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 971 return false; 972 } 973 974 public final boolean vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) { 975 if (isAVX512Register(dst) || isAVX512Register(nds) || size == AVXSize.ZMM) { 976 evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0); 977 return true; 978 } 979 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 980 return false; 981 } 982 983 protected static final class EVEXPrefixConfig { 984 public static final int Z0 = 0x0; 985 public static final int Z1 = 0x1; 986 987 public static final int B0 = 0x0; 988 public static final int B1 = 0x1; 989 990 private EVEXPrefixConfig() { 991 } 992 } 993 994 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1; 995 996 /** 997 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a 998 * scaling factor N depending on the tuple type and the vector length. 999 * 1000 * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5 1001 */ 1002 protected enum EVEXTuple { 1003 INVALID(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH), 1004 FV_NO_BROADCAST_32BIT(16, 32, 64), 1005 FV_BROADCAST_32BIT(4, 4, 4), 1006 FV_NO_BROADCAST_64BIT(16, 32, 64), 1007 FV_BROADCAST_64BIT(8, 8, 8), 1008 HV_NO_BROADCAST_32BIT(8, 16, 32), 1009 HV_BROADCAST_32BIT(4, 4, 4), 1010 FVM(16, 32, 64), 1011 T1S_8BIT(1, 1, 1), 1012 T1S_16BIT(2, 2, 2), 1013 T1S_32BIT(4, 4, 4), 1014 T1S_64BIT(8, 8, 8), 1015 T1F_32BIT(4, 4, 4), 1016 T1F_64BIT(8, 8, 8), 1017 T2_32BIT(8, 8, 8), 1018 T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1019 T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1020 T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1021 T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1022 HVM(8, 16, 32), 1023 QVM(4, 8, 16), 1024 OVM(2, 4, 8), 1025 M128(16, 16, 16), 1026 DUP(8, 32, 64); 1027 1028 private final int scalingFactorVL128; 1029 private final int scalingFactorVL256; 1030 private final int scalingFactorVL512; 1031 1032 EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) { 1033 this.scalingFactorVL128 = scalingFactorVL128; 1034 this.scalingFactorVL256 = scalingFactorVL256; 1035 this.scalingFactorVL512 = scalingFactorVL512; 1036 } 1037 1038 private static int verifyScalingFactor(int scalingFactor) { 1039 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) { 1040 throw GraalError.shouldNotReachHere("Invalid scaling factor."); 1041 } 1042 return scalingFactor; 1043 } 1044 1045 public int getDisp8ScalingFactor(AVXSize size) { 1046 switch (size) { 1047 case XMM: 1048 return verifyScalingFactor(scalingFactorVL128); 1049 case YMM: 1050 return verifyScalingFactor(scalingFactorVL256); 1051 case ZMM: 1052 return verifyScalingFactor(scalingFactorVL512); 1053 default: 1054 throw GraalError.shouldNotReachHere("Unsupported vector size."); 1055 } 1056 } 1057 } 1058 1059 public static final class EVEXComparisonPredicate { 1060 public static final int EQ = 0; 1061 public static final int LT = 1; 1062 public static final int LE = 2; 1063 public static final int FALSE = 3; 1064 public static final int NEQ = 4; 1065 public static final int NLT = 5; 1066 public static final int NLE = 6; 1067 public static final int TRUE = 7; 1068 } 1069 1070 // @formatter:off 1071 // 1072 // Instruction Format and EVEX illustrated below (optional []): 1073 // 1074 // #of bytes: 4 1 1 1 1,2,4 1 1075 // [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 1076 // [Disp16,32] 1077 // 1078 // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding 1079 // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in 1080 // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted 1081 // as P1, P2, and P3 individually or collectively as P[23:0] (see below). 1082 // 1083 // EVEX: 0x62 | P1 | P2 | P3 1084 // 1085 // 7 6 5 4 3 2 1 0 1086 // P1 R X B R' 0 0 m m P[ 7: 0] 1087 // P2 W v v v v 1 p p P[15: 8] 1088 // P3 z L' L b V' a a a P[23:16] 1089 // 1090 // Figure. Bit Field Layout of the EVEX Prefix 1091 // 1092 // Table. EVEX Prefix Bit Field Functional Grouping 1093 // 1094 // Notation Bit field Group Position Comment 1095 // --------- -------------------------- -------- ----------------------- 1096 // EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 1097 // EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 1098 // EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg. 1099 // -- Reserved P[3:2] Must be 0. 1100 // EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm. 1101 // 1102 // EVEX.W Osize promotion/Opcode ext P[15] 1103 // EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv. 1104 // -- Fixed Value P[10] Must be 1. 1105 // EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp. 1106 // 1107 // EVEX.z Zeroing/Merging P[23] 1108 // EVEX.L'L Vector length/RC P[22:21] 1109 // EVEX.b Broadcast/RC/SAE Context P[20] 1110 // EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present. 1111 // EVEX.aaa Embedded opmask register P[18:16] 1112 // 1113 // @formatter:on 1114 1115 /** 1116 * Low-level function to encode and emit the EVEX prefix. 1117 * <p> 1118 * 62 [0 1 1 0 0 0 1 0]<br> 1119 * P1 [R X B R'0 0 m m]<br> 1120 * P2 [W v v v v 1 p p]<br> 1121 * P3 [z L'L b V'a a a] 1122 * <p> 1123 * The pp field encodes an extension to the opcode:<br> 1124 * 00: no extension<br> 1125 * 01: 66<br> 1126 * 10: F3<br> 1127 * 11: F2 1128 * <p> 1129 * The mm field encodes the leading bytes of the opcode:<br> 1130 * 01: implied 0F leading opcode byte<br> 1131 * 10: implied 0F 38 leading opcode bytes<br> 1132 * 11: implied 0F 3A leading opcode bytes 1133 * <p> 1134 * The z field encodes the merging mode (merge or zero). 1135 * <p> 1136 * The b field encodes the source broadcast or data rounding modes. 1137 * <p> 1138 * The aaa field encodes the operand mask register. 1139 */ 1140 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) { 1141 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support"; 1142 1143 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L"; 1144 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp"; 1145 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm"; 1146 assert w == W0 || w == W1 : "invalid value for EVEX.W"; 1147 1148 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB"; 1149 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'"; 1150 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv"; 1151 1152 assert z == Z0 || z == Z1 : "invalid value for EVEX.z"; 1153 assert b == B0 || b == B1 : "invalid value for EVEX.b"; 1154 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa"; 1155 1156 emitByte(Prefix.EVEX); 1157 int p1 = 0; 1158 p1 |= ((rxb ^ 0x07) & 0x07) << 5; 1159 p1 |= reg < 16 ? 0x10 : 0; 1160 p1 |= mm; 1161 emitByte(p1); 1162 1163 int p2 = 0; 1164 p2 |= w << 7; 1165 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3; 1166 p2 |= 0x04; 1167 p2 |= pp; 1168 emitByte(p2); 1169 1170 int p3 = 0; 1171 p3 |= z << 7; 1172 p3 |= l << 5; 1173 p3 |= b << 4; 1174 p3 |= vvvvv < 16 ? 0x08 : 0; 1175 p3 |= aaa; 1176 emitByte(p3); 1177 } 1178 1179 /** 1180 * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a 1181 * register index. The R bit extends the ModRM.reg field and the X and B bits extends the 1182 * ModRM.rm field. 1183 */ 1184 private static int getRXBForEVEX(Register reg, Register rm) { 1185 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 1186 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3; 1187 return rxb; 1188 } 1189 1190 /** 1191 * Helper method for emitting EVEX prefix in the form of RRRR. 1192 */ 1193 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) { 1194 assert !mask.isValid() || inRC(MASK, mask); 1195 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1196 } 1197 1198 /** 1199 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in 1200 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the 1201 * user of this API should make sure to encode the operands using 1202 * {@link #emitOperandHelper(Register, AMD64Address, int, int)}. 1203 */ 1204 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) { 1205 assert !mask.isValid() || inRC(MASK, mask); 1206 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1207 } 1208 1209 } 1210