1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.MASK; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rsp; 33 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; 34 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B1; 35 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z0; 36 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; 37 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L128; 38 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L256; 39 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.L512; 40 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.LZ; 41 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F; 42 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F38; 43 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.M_0F3A; 44 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_; 45 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_66; 46 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F2; 47 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.P_F3; 48 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W0; 49 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.W1; 50 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG; 51 import static org.graalvm.compiler.core.common.NumUtil.isByte; 52 53 import org.graalvm.compiler.asm.Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 55 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize; 56 import org.graalvm.compiler.debug.GraalError; 57 58 import jdk.vm.ci.amd64.AMD64; 59 import jdk.vm.ci.amd64.AMD64.CPUFeature; 60 import jdk.vm.ci.amd64.AMD64Kind; 61 import jdk.vm.ci.code.Register; 62 import jdk.vm.ci.code.Register.RegisterCategory; 63 import jdk.vm.ci.code.TargetDescription; 64 import jdk.vm.ci.meta.PlatformKind; 65 66 /** 67 * This class implements an assembler that can encode most X86 instructions. 68 */ 69 public abstract class AMD64BaseAssembler extends Assembler { 70 71 private final SIMDEncoder simdEncoder; 72 73 /** 74 * Constructs an assembler for the AMD64 architecture. 75 */ AMD64BaseAssembler(TargetDescription target)76 public AMD64BaseAssembler(TargetDescription target) { 77 super(target); 78 79 if (supports(CPUFeature.AVX)) { 80 simdEncoder = new VEXEncoderImpl(); 81 } else { 82 simdEncoder = new SSEEncoderImpl(); 83 } 84 } 85 86 /** 87 * The x86 operand sizes. 88 */ 89 public enum OperandSize { 90 BYTE(1, AMD64Kind.BYTE) { 91 @Override emitImmediate(AMD64BaseAssembler asm, int imm)92 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 93 assert imm == (byte) imm; 94 asm.emitByte(imm); 95 } 96 97 @Override immediateSize()98 protected int immediateSize() { 99 return 1; 100 } 101 }, 102 103 WORD(2, AMD64Kind.WORD, 0x66) { 104 @Override emitImmediate(AMD64BaseAssembler asm, int imm)105 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 106 assert imm == (short) imm; 107 asm.emitShort(imm); 108 } 109 110 @Override immediateSize()111 protected int immediateSize() { 112 return 2; 113 } 114 }, 115 116 DWORD(4, AMD64Kind.DWORD) { 117 @Override emitImmediate(AMD64BaseAssembler asm, int imm)118 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 119 asm.emitInt(imm); 120 } 121 122 @Override immediateSize()123 protected int immediateSize() { 124 return 4; 125 } 126 }, 127 128 QWORD(8, AMD64Kind.QWORD) { 129 @Override emitImmediate(AMD64BaseAssembler asm, int imm)130 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 131 asm.emitInt(imm); 132 } 133 134 @Override immediateSize()135 protected int immediateSize() { 136 return 4; 137 } 138 }, 139 140 SS(4, AMD64Kind.SINGLE, 0xF3, true), 141 142 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 143 144 PS(16, AMD64Kind.V128_SINGLE, true), 145 146 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 147 148 private final int sizePrefix; 149 private final int bytes; 150 private final boolean xmm; 151 private final AMD64Kind kind; 152 OperandSize(int bytes, AMD64Kind kind)153 OperandSize(int bytes, AMD64Kind kind) { 154 this(bytes, kind, 0); 155 } 156 OperandSize(int bytes, AMD64Kind kind, int sizePrefix)157 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 158 this(bytes, kind, sizePrefix, false); 159 } 160 OperandSize(int bytes, AMD64Kind kind, boolean xmm)161 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 162 this(bytes, kind, 0, xmm); 163 } 164 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm)165 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 166 this.sizePrefix = sizePrefix; 167 this.bytes = bytes; 168 this.kind = kind; 169 this.xmm = xmm; 170 } 171 getSizePrefix()172 public int getSizePrefix() { 173 return sizePrefix; 174 } 175 getBytes()176 public int getBytes() { 177 return bytes; 178 } 179 isXmmType()180 public boolean isXmmType() { 181 return xmm; 182 } 183 getKind()184 public AMD64Kind getKind() { 185 return kind; 186 } 187 get(PlatformKind kind)188 public static OperandSize get(PlatformKind kind) { 189 for (OperandSize operandSize : OperandSize.values()) { 190 if (operandSize.kind.equals(kind)) { 191 return operandSize; 192 } 193 } 194 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 195 } 196 197 /** 198 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 199 * as sign-extended 32-bit values. 200 * 201 * @param asm 202 * @param imm 203 */ emitImmediate(AMD64BaseAssembler asm, int imm)204 protected void emitImmediate(AMD64BaseAssembler asm, int imm) { 205 throw new UnsupportedOperationException(); 206 } 207 immediateSize()208 protected int immediateSize() { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 public static class OperandDataAnnotation extends CodeAnnotation { 214 /** 215 * The position (bytes from the beginning of the method) of the operand. 216 */ 217 public final int operandPosition; 218 /** 219 * The size of the operand, in bytes. 220 */ 221 public final int operandSize; 222 /** 223 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 224 * RIP-relative operands are relative to this position. 225 */ 226 public final int nextInstructionPosition; 227 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition)228 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 229 super(instructionPosition); 230 231 this.operandPosition = operandPosition; 232 this.operandSize = operandSize; 233 this.nextInstructionPosition = nextInstructionPosition; 234 } 235 236 @Override toString()237 public String toString() { 238 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 239 } 240 } 241 annotatePatchingImmediate(int operandOffset, int operandSize)242 protected void annotatePatchingImmediate(int operandOffset, int operandSize) { 243 if (codePatchingAnnotationConsumer != null) { 244 int pos = position(); 245 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(pos, pos + operandOffset, operandSize, pos + operandOffset + operandSize)); 246 } 247 } 248 supports(CPUFeature feature)249 public final boolean supports(CPUFeature feature) { 250 return ((AMD64) target.arch).getFeatures().contains(feature); 251 } 252 inRC(RegisterCategory rc, Register r)253 protected static boolean inRC(RegisterCategory rc, Register r) { 254 return r.getRegisterCategory().equals(rc); 255 } 256 encode(Register r)257 protected static int encode(Register r) { 258 assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding; 259 return r.encoding & 0x7; 260 } 261 262 private static final int MinEncodingNeedsRex = 8; 263 264 /** 265 * Constants for X86 prefix bytes. 266 */ 267 private static class Prefix { 268 private static final int REX = 0x40; 269 private static final int REXB = 0x41; 270 private static final int REXX = 0x42; 271 private static final int REXXB = 0x43; 272 private static final int REXR = 0x44; 273 private static final int REXRB = 0x45; 274 private static final int REXRX = 0x46; 275 private static final int REXRXB = 0x47; 276 private static final int REXW = 0x48; 277 private static final int REXWB = 0x49; 278 private static final int REXWX = 0x4A; 279 private static final int REXWXB = 0x4B; 280 private static final int REXWR = 0x4C; 281 private static final int REXWRB = 0x4D; 282 private static final int REXWRX = 0x4E; 283 private static final int REXWRXB = 0x4F; 284 285 private static final int VEX2 = 0xC5; 286 private static final int VEX3 = 0xC4; 287 private static final int EVEX = 0x62; 288 } 289 290 protected final void rexw() { 291 emitByte(Prefix.REXW); 292 } 293 294 protected final void prefix(Register reg) { 295 prefix(reg, false); 296 } 297 298 protected final void prefix(Register reg, boolean byteinst) { 299 int regEnc = reg.encoding; 300 if (regEnc >= 8) { 301 emitByte(Prefix.REXB); 302 } else if (byteinst && regEnc >= 4) { 303 emitByte(Prefix.REX); 304 } 305 } 306 307 protected final void prefixq(Register reg) { 308 if (reg.encoding < 8) { 309 emitByte(Prefix.REXW); 310 } else { 311 emitByte(Prefix.REXWB); 312 } 313 } 314 315 protected final void prefix(Register dst, Register src) { 316 prefix(dst, false, src, false); 317 } 318 319 protected final void prefix(Register dst, boolean dstIsByte, Register src, boolean srcIsByte) { 320 int dstEnc = dst.encoding; 321 int srcEnc = src.encoding; 322 if (dstEnc < 8) { 323 if (srcEnc >= 8) { 324 emitByte(Prefix.REXB); 325 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 326 emitByte(Prefix.REX); 327 } 328 } else { 329 if (srcEnc < 8) { 330 emitByte(Prefix.REXR); 331 } else { 332 emitByte(Prefix.REXRB); 333 } 334 } 335 } 336 337 /** 338 * Creates prefix for the operands. If the given operands exceed 3 bits, the 4th bit is encoded 339 * in the prefix. 340 */ 341 protected final void prefixq(Register reg, Register rm) { 342 int regEnc = reg.encoding; 343 int rmEnc = rm.encoding; 344 if (regEnc < 8) { 345 if (rmEnc < 8) { 346 emitByte(Prefix.REXW); 347 } else { 348 emitByte(Prefix.REXWB); 349 } 350 } else { 351 if (rmEnc < 8) { 352 emitByte(Prefix.REXWR); 353 } else { 354 emitByte(Prefix.REXWRB); 355 } 356 } 357 } 358 359 private static boolean needsRex(Register reg) { 360 return reg.encoding >= MinEncodingNeedsRex; 361 } 362 363 protected final void prefix(AMD64Address adr) { 364 if (needsRex(adr.getBase())) { 365 if (needsRex(adr.getIndex())) { 366 emitByte(Prefix.REXXB); 367 } else { 368 emitByte(Prefix.REXB); 369 } 370 } else { 371 if (needsRex(adr.getIndex())) { 372 emitByte(Prefix.REXX); 373 } 374 } 375 } 376 377 protected final void prefixq(AMD64Address adr) { 378 if (needsRex(adr.getBase())) { 379 if (needsRex(adr.getIndex())) { 380 emitByte(Prefix.REXWXB); 381 } else { 382 emitByte(Prefix.REXWB); 383 } 384 } else { 385 if (needsRex(adr.getIndex())) { 386 emitByte(Prefix.REXWX); 387 } else { 388 emitByte(Prefix.REXW); 389 } 390 } 391 } 392 393 protected void prefixb(AMD64Address adr, Register reg) { 394 prefix(adr, reg, true); 395 } 396 397 protected void prefix(AMD64Address adr, Register reg) { 398 prefix(adr, reg, false); 399 } 400 401 protected void prefix(AMD64Address adr, Register reg, boolean byteinst) { 402 if (reg.encoding < 8) { 403 if (needsRex(adr.getBase())) { 404 if (needsRex(adr.getIndex())) { 405 emitByte(Prefix.REXXB); 406 } else { 407 emitByte(Prefix.REXB); 408 } 409 } else { 410 if (needsRex(adr.getIndex())) { 411 emitByte(Prefix.REXX); 412 } else if (byteinst && reg.encoding >= 4) { 413 emitByte(Prefix.REX); 414 } 415 } 416 } else { 417 if (needsRex(adr.getBase())) { 418 if (needsRex(adr.getIndex())) { 419 emitByte(Prefix.REXRXB); 420 } else { 421 emitByte(Prefix.REXRB); 422 } 423 } else { 424 if (needsRex(adr.getIndex())) { 425 emitByte(Prefix.REXRX); 426 } else { 427 emitByte(Prefix.REXR); 428 } 429 } 430 } 431 } 432 433 protected void prefixq(AMD64Address adr, Register src) { 434 if (src.encoding < 8) { 435 if (needsRex(adr.getBase())) { 436 if (needsRex(adr.getIndex())) { 437 emitByte(Prefix.REXWXB); 438 } else { 439 emitByte(Prefix.REXWB); 440 } 441 } else { 442 if (needsRex(adr.getIndex())) { 443 emitByte(Prefix.REXWX); 444 } else { 445 emitByte(Prefix.REXW); 446 } 447 } 448 } else { 449 if (needsRex(adr.getBase())) { 450 if (needsRex(adr.getIndex())) { 451 emitByte(Prefix.REXWRXB); 452 } else { 453 emitByte(Prefix.REXWRB); 454 } 455 } else { 456 if (needsRex(adr.getIndex())) { 457 emitByte(Prefix.REXWRX); 458 } else { 459 emitByte(Prefix.REXWR); 460 } 461 } 462 } 463 } 464 465 /** 466 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 467 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 468 * field. The X bit must be 0. 469 */ 470 protected static int getRXB(Register reg, Register rm) { 471 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 472 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 473 return rxb; 474 } 475 476 /** 477 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 478 * are two cases for the memory operand:<br> 479 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 480 * <br> 481 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 482 */ 483 protected static int getRXB(Register reg, AMD64Address rm) { 484 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 485 if (!rm.getIndex().equals(Register.None)) { 486 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 487 } 488 if (!rm.getBase().equals(Register.None)) { 489 rxb |= (rm.getBase().encoding & 0x08) >> 3; 490 } 491 return rxb; 492 } 493 494 /** 495 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 496 * <p> 497 * Format: [ 11 reg r/m ] 498 */ 499 protected final void emitModRM(int reg, Register rm) { 500 assert (reg & 0x07) == reg; 501 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 502 } 503 504 /** 505 * Emit the ModR/M byte for two register operands. 506 * <p> 507 * Format: [ 11 reg r/m ] 508 */ 509 protected final void emitModRM(Register reg, Register rm) { 510 emitModRM(reg.encoding & 0x07, rm); 511 } 512 513 public static final int DEFAULT_DISP8_SCALE = 1; 514 515 /** 516 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 517 * 518 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 519 */ emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)520 protected final void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 521 assert !reg.equals(Register.None); 522 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize, DEFAULT_DISP8_SCALE); 523 } 524 emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize)525 protected final void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 526 emitOperandHelper(reg, addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE); 527 } 528 emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize)529 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 530 assert !reg.equals(Register.None); 531 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, DEFAULT_DISP8_SCALE); 532 } 533 emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale)534 protected final void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize, int evexDisp8Scale) { 535 assert !reg.equals(Register.None); 536 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize, evexDisp8Scale); 537 } 538 539 /** 540 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 541 * extension in the R field. 542 * 543 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 544 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 545 * so that the start position of the next instruction can be computed even though 546 * this instruction has not been completely emitted yet. 547 * @param evexDisp8Scale the scaling factor for computing the compressed displacement of 548 * EVEX-encoded instructions. This scaling factor only matters when the emitted 549 * instruction uses one-byte-displacement form. 550 */ emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale)551 private void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize, int evexDisp8Scale) { 552 assert (reg & 0x07) == reg; 553 int regenc = reg << 3; 554 555 Register base = addr.getBase(); 556 Register index = addr.getIndex(); 557 558 Scale scale = addr.getScale(); 559 int disp = addr.getDisplacement(); 560 561 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 562 // [00 000 101] disp32 563 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 564 emitByte(0x05 | regenc); 565 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 566 codePatchingAnnotationConsumer.accept(new OperandDataAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 567 } 568 emitInt(disp); 569 } else if (base.isValid()) { 570 boolean overriddenForce4Byte = force4Byte; 571 int baseenc = base.isValid() ? encode(base) : 0; 572 573 if (index.isValid()) { 574 int indexenc = encode(index) << 3; 575 // [base + indexscale + disp] 576 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 577 // [base + indexscale] 578 // [00 reg 100][ss index base] 579 assert !index.equals(rsp) : "illegal addressing mode"; 580 emitByte(0x04 | regenc); 581 emitByte(scale.log2 << 6 | indexenc | baseenc); 582 } else { 583 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 584 if (disp % evexDisp8Scale == 0) { 585 int newDisp = disp / evexDisp8Scale; 586 if (isByte(newDisp)) { 587 disp = newDisp; 588 assert isByte(disp) && !overriddenForce4Byte; 589 } 590 } else { 591 overriddenForce4Byte = true; 592 } 593 } 594 if (isByte(disp) && !overriddenForce4Byte) { 595 // [base + indexscale + imm8] 596 // [01 reg 100][ss index base] imm8 597 assert !index.equals(rsp) : "illegal addressing mode"; 598 emitByte(0x44 | regenc); 599 emitByte(scale.log2 << 6 | indexenc | baseenc); 600 emitByte(disp & 0xFF); 601 } else { 602 // [base + indexscale + disp32] 603 // [10 reg 100][ss index base] disp32 604 assert !index.equals(rsp) : "illegal addressing mode"; 605 emitByte(0x84 | regenc); 606 emitByte(scale.log2 << 6 | indexenc | baseenc); 607 emitInt(disp); 608 } 609 } 610 } else if (base.equals(rsp) || base.equals(r12)) { 611 // [rsp + disp] 612 if (disp == 0) { 613 // [rsp] 614 // [00 reg 100][00 100 100] 615 emitByte(0x04 | regenc); 616 emitByte(0x24); 617 } else { 618 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 619 if (disp % evexDisp8Scale == 0) { 620 int newDisp = disp / evexDisp8Scale; 621 if (isByte(newDisp)) { 622 disp = newDisp; 623 assert isByte(disp) && !overriddenForce4Byte; 624 } 625 } else { 626 overriddenForce4Byte = true; 627 } 628 } 629 if (isByte(disp) && !overriddenForce4Byte) { 630 // [rsp + imm8] 631 // [01 reg 100][00 100 100] disp8 632 emitByte(0x44 | regenc); 633 emitByte(0x24); 634 emitByte(disp & 0xFF); 635 } else { 636 // [rsp + imm32] 637 // [10 reg 100][00 100 100] disp32 638 emitByte(0x84 | regenc); 639 emitByte(0x24); 640 emitInt(disp); 641 } 642 } 643 } else { 644 // [base + disp] 645 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 646 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 647 // [base] 648 // [00 reg base] 649 emitByte(0x00 | regenc | baseenc); 650 } else { 651 if (evexDisp8Scale > 1 && !overriddenForce4Byte) { 652 if (disp % evexDisp8Scale == 0) { 653 int newDisp = disp / evexDisp8Scale; 654 if (isByte(newDisp)) { 655 disp = newDisp; 656 assert isByte(disp) && !overriddenForce4Byte; 657 } 658 } else { 659 overriddenForce4Byte = true; 660 } 661 } 662 if (isByte(disp) && !overriddenForce4Byte) { 663 // [base + disp8] 664 // [01 reg base] disp8 665 emitByte(0x40 | regenc | baseenc); 666 emitByte(disp & 0xFF); 667 } else { 668 // [base + disp32] 669 // [10 reg base] disp32 670 emitByte(0x80 | regenc | baseenc); 671 emitInt(disp); 672 } 673 } 674 } 675 } else { 676 if (index.isValid()) { 677 int indexenc = encode(index) << 3; 678 // [indexscale + disp] 679 // [00 reg 100][ss index 101] disp32 680 assert !index.equals(rsp) : "illegal addressing mode"; 681 emitByte(0x04 | regenc); 682 emitByte(scale.log2 << 6 | indexenc | 0x05); 683 emitInt(disp); 684 } else { 685 // [disp] ABSOLUTE 686 // [00 reg 100][00 100 101] disp32 687 emitByte(0x04 | regenc); 688 emitByte(0x25); 689 emitInt(disp); 690 } 691 } 692 } 693 694 private interface SIMDEncoder { 695 696 void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 697 698 void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW); 699 700 } 701 702 private class SSEEncoderImpl implements SIMDEncoder { 703 704 @Override simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)705 public void simdPrefix(Register xreg, Register nds, AMD64Address adr, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 706 assert (!nds.isValid()) || nds.equals(xreg); 707 if (sizePrefix > 0) { 708 emitByte(sizePrefix); 709 } 710 if (isRexW) { 711 prefixq(adr, xreg); 712 } else { 713 prefix(adr, xreg); 714 } 715 if (opcodeEscapePrefix > 0xFF) { 716 emitShort(opcodeEscapePrefix); 717 } else if (opcodeEscapePrefix > 0) { 718 emitByte(opcodeEscapePrefix); 719 } 720 } 721 722 @Override simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)723 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 724 assert (!nds.isValid()) || nds.equals(dst) || nds.equals(src); 725 if (sizePrefix > 0) { 726 emitByte(sizePrefix); 727 } 728 if (isRexW) { 729 prefixq(dst, src); 730 } else { 731 prefix(dst, src); 732 } 733 if (opcodeEscapePrefix > 0xFF) { 734 emitShort(opcodeEscapePrefix); 735 } else if (opcodeEscapePrefix > 0) { 736 emitByte(opcodeEscapePrefix); 737 } 738 } 739 } 740 741 public static final class VEXPrefixConfig { 742 public static final int L128 = 0; 743 public static final int L256 = 1; 744 public static final int L512 = 2; 745 public static final int LZ = 0; 746 747 public static final int W0 = 0; 748 public static final int W1 = 1; 749 public static final int WIG = 0; 750 751 public static final int P_ = 0x0; 752 public static final int P_66 = 0x1; 753 public static final int P_F3 = 0x2; 754 public static final int P_F2 = 0x3; 755 756 public static final int M_0F = 0x1; 757 public static final int M_0F38 = 0x2; 758 public static final int M_0F3A = 0x3; 759 VEXPrefixConfig()760 private VEXPrefixConfig() { 761 } 762 } 763 764 private class VEXEncoderImpl implements SIMDEncoder { 765 sizePrefixToPP(int sizePrefix)766 private int sizePrefixToPP(int sizePrefix) { 767 switch (sizePrefix) { 768 case 0x66: 769 return P_66; 770 case 0xF2: 771 return P_F2; 772 case 0xF3: 773 return P_F3; 774 default: 775 return P_; 776 } 777 } 778 opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix)779 private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) { 780 switch (opcodeEscapePrefix) { 781 case 0x0F: 782 return M_0F; 783 case 0x380F: 784 return M_0F38; 785 case 0x3A0F: 786 return M_0F3A; 787 default: 788 return 0; 789 } 790 } 791 792 @Override simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW)793 public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 794 assert reg.encoding < 16 : "encoding out of range: " + reg.encoding; 795 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 796 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0, true); 797 } 798 799 @Override 800 public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) { 801 assert dst.encoding < 16 : "encoding out of range: " + dst.encoding; 802 assert src.encoding < 16 : "encoding out of range: " + src.encoding; 803 assert nds.encoding < 16 : "encoding out of range: " + nds.encoding; 804 emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0, true); 805 } 806 } 807 808 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 809 simdEncoder.simdPrefix(xreg, nds, adr, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 810 } 811 812 protected final void simdPrefix(Register xreg, Register nds, AMD64Address adr, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 813 simdEncoder.simdPrefix(xreg, nds, adr, size.sizePrefix, opcodeEscapePrefix, isRexW); 814 } 815 816 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int overriddenSizePrefix, int opcodeEscapePrefix, boolean isRexW) { 817 simdEncoder.simdPrefix(dst, nds, src, overriddenSizePrefix != 0 ? overriddenSizePrefix : size.sizePrefix, opcodeEscapePrefix, isRexW); 818 } 819 820 protected final void simdPrefix(Register dst, Register nds, Register src, OperandSize size, int opcodeEscapePrefix, boolean isRexW) { 821 simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW); 822 } 823 824 // @formatter:off 825 // 826 // Instruction Format and VEX illustrated below (optional []): 827 // 828 // #of bytes: 2,3 1 1 1 1,2,4 1 829 // [Prefixes] VEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 830 // [Disp16,32] 831 // 832 // VEX: 0xC4 | P1 | P2 833 // 834 // 7 6 5 4 3 2 1 0 835 // P1 R X B m m m m m P[ 7:0] 836 // P2 W v v v v L p p P[15:8] 837 // 838 // VEX: 0xC5 | B1 839 // 840 // 7 6 5 4 3 2 1 0 841 // P1 R v v v v L p p P[7:0] 842 // 843 // Figure. Bit Field Layout of the VEX Prefix 844 // 845 // Table. VEX Prefix Bit Field Functional Grouping 846 // 847 // Notation Bit field Group Position Comment 848 // ---------- ------------------------- -------- ------------------- 849 // VEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 850 // VEX.R REX.R inverse P[7] Combine with EVEX.R and ModR/M.reg. 851 // VEX.X REX.X inverse P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 852 // VEX.B REX.B inverse P[5] 853 // VEX.mmmmmm 0F, 0F_38, 0F_3A encoding P[4:0] b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved) 854 // 855 // VEX.W Opcode specific P[15] 856 // VEX.vvvv A register specifier P[14:11] In inverse form, b1111 if not used. 857 // P[6:3] 858 // VEX.L Vector length/RC P[10] b0/scalar or 128b vec, b1/256b vec. 859 // P[2] 860 // VEX.pp Compressed legacy prefix P[9:8] b00/None, b01/0x66, b10/0xF3, b11/0xF2 861 // P[1:0] 862 // @formatter:on 863 864 /** 865 * Low-level function to encode and emit the VEX prefix. 866 * <p> 867 * 2 byte form: [1100 0101] [R vvvv L pp]<br> 868 * 3 byte form: [1100 0100] [RXB m-mmmm] [W vvvv L pp] 869 * <p> 870 * The RXB and vvvv fields are stored in 1's complement in the prefix encoding. This function 871 * performs the 1s complement conversion, the caller is expected to pass plain unencoded 872 * arguments. 873 * <p> 874 * The pp field encodes an extension to the opcode:<br> 875 * 00: no extension<br> 876 * 01: 66<br> 877 * 10: F3<br> 878 * 11: F2 879 * <p> 880 * The m-mmmm field encodes the leading bytes of the opcode:<br> 881 * 00001: implied 0F leading opcode byte (default in 2-byte encoding)<br> 882 * 00010: implied 0F 38 leading opcode bytes<br> 883 * 00011: implied 0F 3A leading opcode bytes 884 * <p> 885 * This function automatically chooses the 2 or 3 byte encoding, based on the XBW flags and the 886 * m-mmmm field. 887 */ 888 protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv, boolean checkAVX) { 889 assert !checkAVX || ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX) : "emitting VEX prefix on a CPU without AVX support"; 890 891 assert l == L128 || l == L256 : "invalid value for VEX.L"; 892 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for VEX.pp"; 893 assert mmmmm == M_0F || mmmmm == M_0F38 || mmmmm == M_0F3A : "invalid value for VEX.m-mmmm"; 894 assert w == W0 || w == W1 : "invalid value for VEX.W"; 895 896 assert (rxb & 0x07) == rxb : "invalid value for VEX.RXB"; 897 assert (vvvv & 0x0F) == vvvv : "invalid value for VEX.vvvv"; 898 899 int rxb1s = rxb ^ 0x07; 900 int vvvv1s = vvvv ^ 0x0F; 901 if ((rxb & 0x03) == 0 && w == WIG && mmmmm == M_0F) { 902 // 2 byte encoding 903 int byte2 = 0; 904 byte2 |= (rxb1s & 0x04) << 5; 905 byte2 |= vvvv1s << 3; 906 byte2 |= l << 2; 907 byte2 |= pp; 908 909 emitByte(Prefix.VEX2); 910 emitByte(byte2); 911 } else { 912 // 3 byte encoding 913 int byte2 = 0; 914 byte2 = (rxb1s & 0x07) << 5; 915 byte2 |= mmmmm; 916 917 int byte3 = 0; 918 byte3 |= w << 7; 919 byte3 |= vvvv1s << 3; 920 byte3 |= l << 2; 921 byte3 |= pp; 922 923 emitByte(Prefix.VEX3); 924 emitByte(byte2); 925 emitByte(byte3); 926 } 927 } 928 929 public static int getLFlag(AVXSize size) { 930 switch (size) { 931 case XMM: 932 return L128; 933 case YMM: 934 return L256; 935 case ZMM: 936 return L512; 937 default: 938 return LZ; 939 } 940 } 941 942 public static boolean isAVX512Register(Register reg) { 943 return reg != null && reg.isValid() && AMD64.XMM.equals(reg.getRegisterCategory()) && reg.encoding > 15; 944 } 945 946 public final boolean vexPrefix(Register dst, Register nds, Register src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) { 947 if (isAVX512Register(dst) || isAVX512Register(nds) || isAVX512Register(src) || size == AVXSize.ZMM) { 948 evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0); 949 return true; 950 } 951 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 952 return false; 953 } 954 955 public final boolean vexPrefix(Register dst, Register nds, AMD64Address src, AVXSize size, int pp, int mmmmm, int w, int wEvex, boolean checkAVX) { 956 if (isAVX512Register(dst) || isAVX512Register(nds) || size == AVXSize.ZMM) { 957 evexPrefix(dst, Register.None, nds, src, size, pp, mmmmm, wEvex, Z0, B0); 958 return true; 959 } 960 emitVEX(getLFlag(size), pp, mmmmm, w, getRXB(dst, src), nds.isValid() ? nds.encoding() : 0, checkAVX); 961 return false; 962 } 963 964 protected static final class EVEXPrefixConfig { 965 public static final int Z0 = 0x0; 966 public static final int Z1 = 0x1; 967 968 public static final int B0 = 0x0; 969 public static final int B1 = 0x1; 970 971 private EVEXPrefixConfig() { 972 } 973 } 974 975 private static final int NOT_SUPPORTED_VECTOR_LENGTH = -1; 976 977 /** 978 * EVEX-encoded instructions use a compressed displacement scheme by multiplying disp8 with a 979 * scaling factor N depending on the tuple type and the vector length. 980 * 981 * Reference: Intel Software Developer's Manual Volume 2, Section 2.6.5 982 */ 983 protected enum EVEXTuple { 984 INVALID(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH), 985 FV_NO_BROADCAST_32BIT(16, 32, 64), 986 FV_BROADCAST_32BIT(4, 4, 4), 987 FV_NO_BROADCAST_64BIT(16, 32, 64), 988 FV_BROADCAST_64BIT(8, 8, 8), 989 HV_NO_BROADCAST_32BIT(8, 16, 32), 990 HV_BROADCAST_32BIT(4, 4, 4), 991 FVM(16, 32, 64), 992 T1S_8BIT(1, 1, 1), 993 T1S_16BIT(2, 2, 2), 994 T1S_32BIT(4, 4, 4), 995 T1S_64BIT(8, 8, 8), 996 T1F_32BIT(4, 4, 4), 997 T1F_64BIT(8, 8, 8), 998 T2_32BIT(8, 8, 8), 999 T2_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1000 T4_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, 16, 16), 1001 T4_64BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1002 T8_32BIT(NOT_SUPPORTED_VECTOR_LENGTH, NOT_SUPPORTED_VECTOR_LENGTH, 32), 1003 HVM(8, 16, 32), 1004 QVM(4, 8, 16), 1005 OVM(2, 4, 8), 1006 M128(16, 16, 16), 1007 DUP(8, 32, 64); 1008 1009 private final int scalingFactorVL128; 1010 private final int scalingFactorVL256; 1011 private final int scalingFactorVL512; 1012 1013 EVEXTuple(int scalingFactorVL128, int scalingFactorVL256, int scalingFactorVL512) { 1014 this.scalingFactorVL128 = scalingFactorVL128; 1015 this.scalingFactorVL256 = scalingFactorVL256; 1016 this.scalingFactorVL512 = scalingFactorVL512; 1017 } 1018 1019 private static int verifyScalingFactor(int scalingFactor) { 1020 if (scalingFactor == NOT_SUPPORTED_VECTOR_LENGTH) { 1021 throw GraalError.shouldNotReachHere("Invalid scaling factor."); 1022 } 1023 return scalingFactor; 1024 } 1025 1026 public int getDisp8ScalingFactor(AVXSize size) { 1027 switch (size) { 1028 case XMM: 1029 return verifyScalingFactor(scalingFactorVL128); 1030 case YMM: 1031 return verifyScalingFactor(scalingFactorVL256); 1032 case ZMM: 1033 return verifyScalingFactor(scalingFactorVL512); 1034 default: 1035 throw GraalError.shouldNotReachHere("Unsupported vector size."); 1036 } 1037 } 1038 } 1039 1040 // @formatter:off 1041 // 1042 // Instruction Format and EVEX illustrated below (optional []): 1043 // 1044 // #of bytes: 4 1 1 1 1,2,4 1 1045 // [Prefixes] EVEX OpCode ModR/M [SIB] [Disp8*N] [Immediate] 1046 // [Disp16,32] 1047 // 1048 // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding 1049 // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in 1050 // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted 1051 // as P1, P2, and P3 individually or collectively as P[23:0] (see below). 1052 // 1053 // EVEX: 0x62 | P1 | P2 | P3 1054 // 1055 // 7 6 5 4 3 2 1 0 1056 // P1 R X B R' 0 0 m m P[ 7: 0] 1057 // P2 W v v v v 1 p p P[15: 8] 1058 // P3 z L' L b V' a a a P[23:16] 1059 // 1060 // Figure. Bit Field Layout of the EVEX Prefix 1061 // 1062 // Table. EVEX Prefix Bit Field Functional Grouping 1063 // 1064 // Notation Bit field Group Position Comment 1065 // --------- -------------------------- -------- ----------------------- 1066 // EVEX.RXB Next-8 register specifier P[7:5] Combine with ModR/M.reg, ModR/M.rm (base, index/vidx). 1067 // EVEX.X High-16 register specifier P[6] Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent. 1068 // EVEX.R' High-16 register specifier P[4] Combine with EVEX.R and ModR/M.reg. 1069 // -- Reserved P[3:2] Must be 0. 1070 // EVEX.mm Compressed legacy escape P[1:0] Identical to low two bits of VEX.mmmmm. 1071 // 1072 // EVEX.W Osize promotion/Opcode ext P[15] 1073 // EVEX.vvvv NDS register specifier P[14:11] Same as VEX.vvvv. 1074 // -- Fixed Value P[10] Must be 1. 1075 // EVEX.pp Compressed legacy prefix P[9:8] Identical to VEX.pp. 1076 // 1077 // EVEX.z Zeroing/Merging P[23] 1078 // EVEX.L'L Vector length/RC P[22:21] 1079 // EVEX.b Broadcast/RC/SAE Context P[20] 1080 // EVEX.V' High-16 NDS/VIDX register P[19] Combine with EVEX.vvvv or VSIB when present. 1081 // EVEX.aaa Embedded opmask register P[18:16] 1082 // 1083 // @formatter:on 1084 1085 /** 1086 * Low-level function to encode and emit the EVEX prefix. 1087 * <p> 1088 * 62 [0 1 1 0 0 0 1 0]<br> 1089 * P1 [R X B R'0 0 m m]<br> 1090 * P2 [W v v v v 1 p p]<br> 1091 * P3 [z L'L b V'a a a] 1092 * <p> 1093 * The pp field encodes an extension to the opcode:<br> 1094 * 00: no extension<br> 1095 * 01: 66<br> 1096 * 10: F3<br> 1097 * 11: F2 1098 * <p> 1099 * The mm field encodes the leading bytes of the opcode:<br> 1100 * 01: implied 0F leading opcode byte<br> 1101 * 10: implied 0F 38 leading opcode bytes<br> 1102 * 11: implied 0F 3A leading opcode bytes 1103 * <p> 1104 * The z field encodes the merging mode (merge or zero). 1105 * <p> 1106 * The b field encodes the source broadcast or data rounding modes. 1107 * <p> 1108 * The aaa field encodes the operand mask register. 1109 */ 1110 private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv, int z, int b, int aaa) { 1111 assert ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX512F) : "emitting EVEX prefix on a CPU without AVX512 support"; 1112 1113 assert l == L128 || l == L256 || l == L512 : "invalid value for EVEX.L'L"; 1114 assert pp == P_ || pp == P_66 || pp == P_F3 || pp == P_F2 : "invalid value for EVEX.pp"; 1115 assert mm == M_0F || mm == M_0F38 || mm == M_0F3A : "invalid value for EVEX.mm"; 1116 assert w == W0 || w == W1 : "invalid value for EVEX.W"; 1117 1118 assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB"; 1119 assert (reg & 0x1F) == reg : "invalid value for EVEX.R'"; 1120 assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv"; 1121 1122 assert z == Z0 || z == Z1 : "invalid value for EVEX.z"; 1123 assert b == B0 || b == B1 : "invalid value for EVEX.b"; 1124 assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa"; 1125 1126 emitByte(Prefix.EVEX); 1127 int p1 = 0; 1128 p1 |= ((rxb ^ 0x07) & 0x07) << 5; 1129 p1 |= reg < 16 ? 0x10 : 0; 1130 p1 |= mm; 1131 emitByte(p1); 1132 1133 int p2 = 0; 1134 p2 |= w << 7; 1135 p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3; 1136 p2 |= 0x04; 1137 p2 |= pp; 1138 emitByte(p2); 1139 1140 int p3 = 0; 1141 p3 |= z << 7; 1142 p3 |= l << 5; 1143 p3 |= b << 4; 1144 p3 |= vvvvv < 16 ? 0x08 : 0; 1145 p3 |= aaa; 1146 emitByte(p3); 1147 } 1148 1149 /** 1150 * Get RXB bits for register-register instructions in EVEX-encoding, where ModRM.rm contains a 1151 * register index. The R bit extends the ModRM.reg field and the X and B bits extends the 1152 * ModRM.rm field. 1153 */ 1154 private static int getRXBForEVEX(Register reg, Register rm) { 1155 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 1156 rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3; 1157 return rxb; 1158 } 1159 1160 /** 1161 * Helper method for emitting EVEX prefix in the form of RRRR. 1162 */ 1163 protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) { 1164 assert !mask.isValid() || inRC(MASK, mask); 1165 emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1166 } 1167 1168 /** 1169 * Helper method for emitting EVEX prefix in the form of RRRM. Because the memory addressing in 1170 * EVEX-encoded instructions employ a compressed displacement scheme when using disp8 form, the 1171 * user of this API should make sure to encode the operands using 1172 * {@link #emitOperandHelper(Register, AMD64Address, int, int)}. 1173 */ 1174 protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) { 1175 assert !mask.isValid() || inRC(MASK, mask); 1176 emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0); 1177 } 1178 1179 } 1180