1 /* 2 * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.CPU; 28 import static jdk.vm.ci.amd64.AMD64.XMM; 29 import static jdk.vm.ci.amd64.AMD64.r12; 30 import static jdk.vm.ci.amd64.AMD64.r13; 31 import static jdk.vm.ci.amd64.AMD64.rbp; 32 import static jdk.vm.ci.amd64.AMD64.rip; 33 import static jdk.vm.ci.amd64.AMD64.rsp; 34 import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD; 35 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop; 36 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop; 37 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD; 38 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND; 39 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP; 40 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR; 41 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB; 42 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB; 43 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR; 44 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC; 45 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC; 46 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG; 47 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT; 48 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE; 49 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD; 50 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD; 51 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS; 52 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD; 53 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD; 54 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS; 55 import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD; 56 import static org.graalvm.compiler.core.common.NumUtil.isByte; 57 import static org.graalvm.compiler.core.common.NumUtil.isInt; 58 import static org.graalvm.compiler.core.common.NumUtil.isShiftCount; 59 import static org.graalvm.compiler.core.common.NumUtil.isUByte; 60 61 import org.graalvm.compiler.asm.Assembler; 62 import org.graalvm.compiler.asm.Label; 63 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 64 import org.graalvm.compiler.core.common.NumUtil; 65 import org.graalvm.compiler.debug.GraalError; 66 67 import jdk.vm.ci.amd64.AMD64; 68 import jdk.vm.ci.amd64.AMD64.CPUFeature; 69 import jdk.vm.ci.amd64.AMD64Kind; 70 import jdk.vm.ci.code.Register; 71 import jdk.vm.ci.code.Register.RegisterCategory; 72 import jdk.vm.ci.code.TargetDescription; 73 import jdk.vm.ci.meta.PlatformKind; 74 75 /** 76 * This class implements an assembler that can encode most X86 instructions. 77 */ 78 public class AMD64Assembler extends Assembler { 79 80 private static final int MinEncodingNeedsRex = 8; 81 82 /** 83 * The x86 condition codes used for conditional jumps/moves. 84 */ 85 public enum ConditionFlag { 86 Zero(0x4, "|zero|"), 87 NotZero(0x5, "|nzero|"), 88 Equal(0x4, "="), 89 NotEqual(0x5, "!="), 90 Less(0xc, "<"), 91 LessEqual(0xe, "<="), 92 Greater(0xf, ">"), 93 GreaterEqual(0xd, ">="), 94 Below(0x2, "|<|"), 95 BelowEqual(0x6, "|<=|"), 96 Above(0x7, "|>|"), 97 AboveEqual(0x3, "|>=|"), 98 Overflow(0x0, "|of|"), 99 NoOverflow(0x1, "|nof|"), 100 CarrySet(0x2, "|carry|"), 101 CarryClear(0x3, "|ncarry|"), 102 Negative(0x8, "|neg|"), 103 Positive(0x9, "|pos|"), 104 Parity(0xa, "|par|"), 105 NoParity(0xb, "|npar|"); 106 107 private final int value; 108 private final String operator; 109 ConditionFlag(int value, String operator)110 ConditionFlag(int value, String operator) { 111 this.value = value; 112 this.operator = operator; 113 } 114 negate()115 public ConditionFlag negate() { 116 switch (this) { 117 case Zero: 118 return NotZero; 119 case NotZero: 120 return Zero; 121 case Equal: 122 return NotEqual; 123 case NotEqual: 124 return Equal; 125 case Less: 126 return GreaterEqual; 127 case LessEqual: 128 return Greater; 129 case Greater: 130 return LessEqual; 131 case GreaterEqual: 132 return Less; 133 case Below: 134 return AboveEqual; 135 case BelowEqual: 136 return Above; 137 case Above: 138 return BelowEqual; 139 case AboveEqual: 140 return Below; 141 case Overflow: 142 return NoOverflow; 143 case NoOverflow: 144 return Overflow; 145 case CarrySet: 146 return CarryClear; 147 case CarryClear: 148 return CarrySet; 149 case Negative: 150 return Positive; 151 case Positive: 152 return Negative; 153 case Parity: 154 return NoParity; 155 case NoParity: 156 return Parity; 157 } 158 throw new IllegalArgumentException(); 159 } 160 getValue()161 public int getValue() { 162 return value; 163 } 164 165 @Override toString()166 public String toString() { 167 return operator; 168 } 169 } 170 171 /** 172 * Constants for X86 prefix bytes. 173 */ 174 private static class Prefix { 175 private static final int REX = 0x40; 176 private static final int REXB = 0x41; 177 private static final int REXX = 0x42; 178 private static final int REXXB = 0x43; 179 private static final int REXR = 0x44; 180 private static final int REXRB = 0x45; 181 private static final int REXRX = 0x46; 182 private static final int REXRXB = 0x47; 183 private static final int REXW = 0x48; 184 private static final int REXWB = 0x49; 185 private static final int REXWX = 0x4A; 186 private static final int REXWXB = 0x4B; 187 private static final int REXWR = 0x4C; 188 private static final int REXWRB = 0x4D; 189 private static final int REXWRX = 0x4E; 190 private static final int REXWRXB = 0x4F; 191 private static final int VEX_3BYTES = 0xC4; 192 private static final int VEX_2BYTES = 0xC5; 193 } 194 195 private static class VexPrefix { 196 private static final int VEX_R = 0x80; 197 private static final int VEX_W = 0x80; 198 } 199 200 private static class VexSimdPrefix { 201 private static final int VEX_SIMD_NONE = 0x0; 202 private static final int VEX_SIMD_66 = 0x1; 203 private static final int VEX_SIMD_F3 = 0x2; 204 private static final int VEX_SIMD_F2 = 0x3; 205 } 206 207 private static class VexOpcode { 208 private static final int VEX_OPCODE_NONE = 0x0; 209 private static final int VEX_OPCODE_0F = 0x1; 210 private static final int VEX_OPCODE_0F_38 = 0x2; 211 private static final int VEX_OPCODE_0F_3A = 0x3; 212 } 213 214 public static class AvxVectorLen { 215 public static final int AVX_128bit = 0x0; 216 public static final int AVX_256bit = 0x1; 217 public static final int AVX_512bit = 0x2; 218 public static final int AVX_NoVec = 0x4; 219 } 220 221 public static class EvexTupleType { 222 public static final int EVEX_FV = 0; 223 public static final int EVEX_HV = 4; 224 public static final int EVEX_FVM = 6; 225 public static final int EVEX_T1S = 7; 226 public static final int EVEX_T1F = 11; 227 public static final int EVEX_T2 = 13; 228 public static final int EVEX_T4 = 15; 229 public static final int EVEX_T8 = 17; 230 public static final int EVEX_HVM = 18; 231 public static final int EVEX_QVM = 19; 232 public static final int EVEX_OVM = 20; 233 public static final int EVEX_M128 = 21; 234 public static final int EVEX_DUP = 22; 235 public static final int EVEX_ETUP = 23; 236 } 237 238 public static class EvexInputSizeInBits { 239 public static final int EVEX_8bit = 0; 240 public static final int EVEX_16bit = 1; 241 public static final int EVEX_32bit = 2; 242 public static final int EVEX_64bit = 3; 243 public static final int EVEX_NObit = 4; 244 } 245 246 private AMD64InstructionAttr curAttributes; 247 getCurAttributes()248 AMD64InstructionAttr getCurAttributes() { 249 return curAttributes; 250 } 251 setCurAttributes(AMD64InstructionAttr attributes)252 void setCurAttributes(AMD64InstructionAttr attributes) { 253 curAttributes = attributes; 254 } 255 256 /** 257 * The x86 operand sizes. 258 */ 259 public enum OperandSize { 260 BYTE(1, AMD64Kind.BYTE) { 261 @Override emitImmediate(AMD64Assembler asm, int imm)262 protected void emitImmediate(AMD64Assembler asm, int imm) { 263 assert imm == (byte) imm; 264 asm.emitByte(imm); 265 } 266 267 @Override immediateSize()268 protected int immediateSize() { 269 return 1; 270 } 271 }, 272 273 WORD(2, AMD64Kind.WORD, 0x66) { 274 @Override emitImmediate(AMD64Assembler asm, int imm)275 protected void emitImmediate(AMD64Assembler asm, int imm) { 276 assert imm == (short) imm; 277 asm.emitShort(imm); 278 } 279 280 @Override immediateSize()281 protected int immediateSize() { 282 return 2; 283 } 284 }, 285 286 DWORD(4, AMD64Kind.DWORD) { 287 @Override emitImmediate(AMD64Assembler asm, int imm)288 protected void emitImmediate(AMD64Assembler asm, int imm) { 289 asm.emitInt(imm); 290 } 291 292 @Override immediateSize()293 protected int immediateSize() { 294 return 4; 295 } 296 }, 297 298 QWORD(8, AMD64Kind.QWORD) { 299 @Override emitImmediate(AMD64Assembler asm, int imm)300 protected void emitImmediate(AMD64Assembler asm, int imm) { 301 asm.emitInt(imm); 302 } 303 304 @Override immediateSize()305 protected int immediateSize() { 306 return 4; 307 } 308 }, 309 310 SS(4, AMD64Kind.SINGLE, 0xF3, true), 311 312 SD(8, AMD64Kind.DOUBLE, 0xF2, true), 313 314 PS(16, AMD64Kind.V128_SINGLE, true), 315 316 PD(16, AMD64Kind.V128_DOUBLE, 0x66, true); 317 318 private final int sizePrefix; 319 private final int bytes; 320 private final boolean xmm; 321 private final AMD64Kind kind; 322 OperandSize(int bytes, AMD64Kind kind)323 OperandSize(int bytes, AMD64Kind kind) { 324 this(bytes, kind, 0); 325 } 326 OperandSize(int bytes, AMD64Kind kind, int sizePrefix)327 OperandSize(int bytes, AMD64Kind kind, int sizePrefix) { 328 this(bytes, kind, sizePrefix, false); 329 } 330 OperandSize(int bytes, AMD64Kind kind, boolean xmm)331 OperandSize(int bytes, AMD64Kind kind, boolean xmm) { 332 this(bytes, kind, 0, xmm); 333 } 334 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm)335 OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) { 336 this.sizePrefix = sizePrefix; 337 this.bytes = bytes; 338 this.kind = kind; 339 this.xmm = xmm; 340 } 341 getBytes()342 public int getBytes() { 343 return bytes; 344 } 345 isXmmType()346 public boolean isXmmType() { 347 return xmm; 348 } 349 getKind()350 public AMD64Kind getKind() { 351 return kind; 352 } 353 get(PlatformKind kind)354 public static OperandSize get(PlatformKind kind) { 355 for (OperandSize operandSize : OperandSize.values()) { 356 if (operandSize.kind.equals(kind)) { 357 return operandSize; 358 } 359 } 360 throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString()); 361 } 362 363 /** 364 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 365 * as sign-extended 32-bit values. 366 * 367 * @param asm 368 * @param imm 369 */ emitImmediate(AMD64Assembler asm, int imm)370 protected void emitImmediate(AMD64Assembler asm, int imm) { 371 throw new UnsupportedOperationException(); 372 } 373 immediateSize()374 protected int immediateSize() { 375 throw new UnsupportedOperationException(); 376 } 377 } 378 379 /** 380 * Operand size and register type constraints. 381 */ 382 private enum OpAssertion { 383 ByteAssertion(CPU, CPU, BYTE), 384 ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD), 385 WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD), 386 DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD), 387 WordOrDwordAssertion(CPU, CPU, WORD, QWORD), 388 QwordAssertion(CPU, CPU, QWORD), 389 FloatAssertion(XMM, XMM, SS, SD, PS, PD), 390 PackedFloatAssertion(XMM, XMM, PS, PD), 391 SingleAssertion(XMM, XMM, SS), 392 DoubleAssertion(XMM, XMM, SD), 393 PackedDoubleAssertion(XMM, XMM, PD), 394 IntToFloatAssertion(XMM, CPU, DWORD, QWORD), 395 FloatToIntAssertion(CPU, XMM, DWORD, QWORD); 396 397 private final RegisterCategory resultCategory; 398 private final RegisterCategory inputCategory; 399 private final OperandSize[] allowedSizes; 400 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes)401 OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 402 this.resultCategory = resultCategory; 403 this.inputCategory = inputCategory; 404 this.allowedSizes = allowedSizes; 405 } 406 checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg)407 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 408 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 409 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 410 411 for (OperandSize s : allowedSizes) { 412 if (size == s) { 413 return true; 414 } 415 } 416 417 assert false : "invalid operand size " + size + " used in " + op; 418 return false; 419 } 420 } 421 422 public abstract static class OperandDataAnnotation extends CodeAnnotation { 423 /** 424 * The position (bytes from the beginning of the method) of the operand. 425 */ 426 public final int operandPosition; 427 /** 428 * The size of the operand, in bytes. 429 */ 430 public final int operandSize; 431 /** 432 * The position (bytes from the beginning of the method) of the next instruction. On AMD64, 433 * RIP-relative operands are relative to this position. 434 */ 435 public final int nextInstructionPosition; 436 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition)437 OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) { 438 super(instructionPosition); 439 440 this.operandPosition = operandPosition; 441 this.operandSize = operandSize; 442 this.nextInstructionPosition = nextInstructionPosition; 443 } 444 445 @Override toString()446 public String toString() { 447 return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize; 448 } 449 } 450 451 /** 452 * Annotation that stores additional information about the displacement of a 453 * {@link Assembler#getPlaceholder placeholder address} that needs patching. 454 */ 455 public static class AddressDisplacementAnnotation extends OperandDataAnnotation { AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition)456 AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 457 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 458 } 459 } 460 461 /** 462 * Annotation that stores additional information about the immediate operand, e.g., of a call 463 * instruction, that needs patching. 464 */ 465 public static class ImmediateOperandAnnotation extends OperandDataAnnotation { ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition)466 ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) { 467 super(instructionPosition, operandPosition, operndSize, nextInstructionPosition); 468 } 469 } 470 471 /** 472 * Constructs an assembler for the AMD64 architecture. 473 */ AMD64Assembler(TargetDescription target)474 public AMD64Assembler(TargetDescription target) { 475 super(target); 476 } 477 supports(CPUFeature feature)478 public boolean supports(CPUFeature feature) { 479 return ((AMD64) target.arch).getFeatures().contains(feature); 480 } 481 encode(Register r)482 private static int encode(Register r) { 483 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; 484 return r.encoding & 0x7; 485 } 486 487 /** 488 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 489 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 490 * field. The X bit must be 0. 491 */ getRXB(Register reg, Register rm)492 protected static int getRXB(Register reg, Register rm) { 493 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 494 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 495 return rxb; 496 } 497 498 /** 499 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 500 * are two cases for the memory operand:<br> 501 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0. 502 * <br> 503 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 504 */ getRXB(Register reg, AMD64Address rm)505 protected static int getRXB(Register reg, AMD64Address rm) { 506 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 507 if (!rm.getIndex().equals(Register.None)) { 508 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 509 } 510 if (!rm.getBase().equals(Register.None)) { 511 rxb |= (rm.getBase().encoding & 0x08) >> 3; 512 } 513 return rxb; 514 } 515 516 /** 517 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 518 * <p> 519 * Format: [ 11 reg r/m ] 520 */ emitModRM(int reg, Register rm)521 protected void emitModRM(int reg, Register rm) { 522 assert (reg & 0x07) == reg; 523 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 524 } 525 526 /** 527 * Emit the ModR/M byte for two register operands. 528 * <p> 529 * Format: [ 11 reg r/m ] 530 */ emitModRM(Register reg, Register rm)531 protected void emitModRM(Register reg, Register rm) { 532 emitModRM(reg.encoding & 0x07, rm); 533 } 534 emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize)535 protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) { 536 assert !reg.equals(Register.None); 537 emitOperandHelper(encode(reg), addr, false, additionalInstructionSize); 538 } 539 540 /** 541 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 542 * 543 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 544 */ emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)545 protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 546 assert !reg.equals(Register.None); 547 emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize); 548 } 549 emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize)550 protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) { 551 emitOperandHelper(reg, addr, false, additionalInstructionSize); 552 } 553 554 /** 555 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 556 * extension in the R field. 557 * 558 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte 559 * @param additionalInstructionSize the number of bytes that will be emitted after the operand, 560 * so that the start position of the next instruction can be computed even though 561 * this instruction has not been completely emitted yet. 562 */ emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize)563 protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) { 564 assert (reg & 0x07) == reg; 565 int regenc = reg << 3; 566 567 Register base = addr.getBase(); 568 Register index = addr.getIndex(); 569 570 AMD64Address.Scale scale = addr.getScale(); 571 int disp = addr.getDisplacement(); 572 573 if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder() 574 // [00 000 101] disp32 575 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 576 emitByte(0x05 | regenc); 577 if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) { 578 codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize)); 579 } 580 emitInt(disp); 581 } else if (base.isValid()) { 582 int baseenc = base.isValid() ? encode(base) : 0; 583 if (index.isValid()) { 584 int indexenc = encode(index) << 3; 585 // [base + indexscale + disp] 586 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 587 // [base + indexscale] 588 // [00 reg 100][ss index base] 589 assert !index.equals(rsp) : "illegal addressing mode"; 590 emitByte(0x04 | regenc); 591 emitByte(scale.log2 << 6 | indexenc | baseenc); 592 } else if (isByte(disp) && !force4Byte) { 593 // [base + indexscale + imm8] 594 // [01 reg 100][ss index base] imm8 595 assert !index.equals(rsp) : "illegal addressing mode"; 596 emitByte(0x44 | regenc); 597 emitByte(scale.log2 << 6 | indexenc | baseenc); 598 emitByte(disp & 0xFF); 599 } else { 600 // [base + indexscale + disp32] 601 // [10 reg 100][ss index base] disp32 602 assert !index.equals(rsp) : "illegal addressing mode"; 603 emitByte(0x84 | regenc); 604 emitByte(scale.log2 << 6 | indexenc | baseenc); 605 emitInt(disp); 606 } 607 } else if (base.equals(rsp) || base.equals(r12)) { 608 // [rsp + disp] 609 if (disp == 0) { 610 // [rsp] 611 // [00 reg 100][00 100 100] 612 emitByte(0x04 | regenc); 613 emitByte(0x24); 614 } else if (isByte(disp) && !force4Byte) { 615 // [rsp + imm8] 616 // [01 reg 100][00 100 100] disp8 617 emitByte(0x44 | regenc); 618 emitByte(0x24); 619 emitByte(disp & 0xFF); 620 } else { 621 // [rsp + imm32] 622 // [10 reg 100][00 100 100] disp32 623 emitByte(0x84 | regenc); 624 emitByte(0x24); 625 emitInt(disp); 626 } 627 } else { 628 // [base + disp] 629 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 630 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 631 // [base] 632 // [00 reg base] 633 emitByte(0x00 | regenc | baseenc); 634 } else if (isByte(disp) && !force4Byte) { 635 // [base + disp8] 636 // [01 reg base] disp8 637 emitByte(0x40 | regenc | baseenc); 638 emitByte(disp & 0xFF); 639 } else { 640 // [base + disp32] 641 // [10 reg base] disp32 642 emitByte(0x80 | regenc | baseenc); 643 emitInt(disp); 644 } 645 } 646 } else { 647 if (index.isValid()) { 648 int indexenc = encode(index) << 3; 649 // [indexscale + disp] 650 // [00 reg 100][ss index 101] disp32 651 assert !index.equals(rsp) : "illegal addressing mode"; 652 emitByte(0x04 | regenc); 653 emitByte(scale.log2 << 6 | indexenc | 0x05); 654 emitInt(disp); 655 } else { 656 // [disp] ABSOLUTE 657 // [00 reg 100][00 100 101] disp32 658 emitByte(0x04 | regenc); 659 emitByte(0x25); 660 emitInt(disp); 661 } 662 } 663 setCurAttributes(null); 664 } 665 666 /** 667 * Base class for AMD64 opcodes. 668 */ 669 public static class AMD64Op { 670 671 protected static final int P_0F = 0x0F; 672 protected static final int P_0F38 = 0x380F; 673 protected static final int P_0F3A = 0x3A0F; 674 675 private final String opcode; 676 677 protected final int prefix1; 678 protected final int prefix2; 679 protected final int op; 680 681 private final boolean dstIsByte; 682 private final boolean srcIsByte; 683 684 private final OpAssertion assertion; 685 private final CPUFeature feature; 686 AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)687 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 688 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 689 } 690 AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)691 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 692 this.opcode = opcode; 693 this.prefix1 = prefix1; 694 this.prefix2 = prefix2; 695 this.op = op; 696 697 this.dstIsByte = dstIsByte; 698 this.srcIsByte = srcIsByte; 699 700 this.assertion = assertion; 701 this.feature = feature; 702 } 703 emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc)704 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 705 if (prefix1 != 0) { 706 asm.emitByte(prefix1); 707 } 708 if (size.sizePrefix != 0) { 709 asm.emitByte(size.sizePrefix); 710 } 711 int rexPrefix = 0x40 | rxb; 712 if (size == QWORD) { 713 rexPrefix |= 0x08; 714 } 715 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 716 asm.emitByte(rexPrefix); 717 } 718 if (prefix2 > 0xFF) { 719 asm.emitShort(prefix2); 720 } else if (prefix2 > 0) { 721 asm.emitByte(prefix2); 722 } 723 asm.emitByte(op); 724 } 725 verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg)726 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 727 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 728 assert assertion.checkOperands(this, size, resultReg, inputReg); 729 return true; 730 } 731 732 @Override toString()733 public String toString() { 734 return opcode; 735 } 736 } 737 738 /** 739 * Base class for AMD64 opcodes with immediate operands. 740 */ 741 public static class AMD64ImmOp extends AMD64Op { 742 743 private final boolean immIsByte; 744 AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion)745 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 746 super(opcode, 0, prefix, op, assertion, null); 747 this.immIsByte = immIsByte; 748 } 749 emitImmediate(AMD64Assembler asm, OperandSize size, int imm)750 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 751 if (immIsByte) { 752 assert imm == (byte) imm; 753 asm.emitByte(imm); 754 } else { 755 size.emitImmediate(asm, imm); 756 } 757 } 758 immediateSize(OperandSize size)759 protected final int immediateSize(OperandSize size) { 760 if (immIsByte) { 761 return 1; 762 } else { 763 return size.bytes; 764 } 765 } 766 } 767 768 /** 769 * Opcode with operand order of either RM or MR for 2 address forms. 770 */ 771 public abstract static class AMD64RROp extends AMD64Op { 772 AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)773 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 774 super(opcode, prefix1, prefix2, op, assertion, feature); 775 } 776 AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)777 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 778 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 779 } 780 emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)781 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 782 } 783 784 /** 785 * Opcode with operand order of either RM or MR for 3 address forms. 786 */ 787 public abstract static class AMD64RRROp extends AMD64Op { 788 AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)789 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 790 super(opcode, prefix1, prefix2, op, assertion, feature); 791 } 792 AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature)793 protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 794 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 795 } 796 emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src)797 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src); 798 } 799 800 /** 801 * Opcode with operand order of RM. 802 */ 803 public static class AMD64RMOp extends AMD64RROp { 804 // @formatter:off 805 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion); 806 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 807 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 808 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT); 809 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1); 810 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT); 811 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion); 812 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion); 813 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion); 814 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion); 815 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion); 816 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 817 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 818 public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B); 819 820 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 821 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 822 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 823 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 824 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE); 825 826 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 827 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 828 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 829 // @formatter:on 830 AMD64RMOp(String opcode, int op)831 protected AMD64RMOp(String opcode, int op) { 832 this(opcode, 0, op); 833 } 834 AMD64RMOp(String opcode, int op, OpAssertion assertion)835 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 836 this(opcode, 0, op, assertion); 837 } 838 AMD64RMOp(String opcode, int prefix, int op)839 protected AMD64RMOp(String opcode, int prefix, int op) { 840 this(opcode, 0, prefix, op, null); 841 } 842 AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion)843 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 844 this(opcode, 0, prefix, op, assertion, null); 845 } 846 AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)847 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 848 this(opcode, 0, prefix, op, assertion, feature); 849 } 850 AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion)851 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 852 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 853 } 854 AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature)855 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 856 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 857 } 858 AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)859 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 860 super(opcode, prefix1, prefix2, op, assertion, feature); 861 } 862 863 @Override emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)864 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 865 assert verify(asm, size, dst, src); 866 boolean isSimd = false; 867 boolean noNds = false; 868 869 switch (op) { 870 case 0x2A: 871 case 0x2C: 872 case 0x2E: 873 case 0x5A: 874 case 0x6E: 875 isSimd = true; 876 noNds = true; 877 break; 878 case 0x10: 879 case 0x51: 880 case 0x54: 881 case 0x55: 882 case 0x56: 883 case 0x57: 884 case 0x58: 885 case 0x59: 886 case 0x5C: 887 case 0x5D: 888 case 0x5E: 889 case 0x5F: 890 isSimd = true; 891 break; 892 } 893 894 int opc = 0; 895 if (isSimd) { 896 switch (prefix2) { 897 case P_0F: 898 opc = VexOpcode.VEX_OPCODE_0F; 899 break; 900 case P_0F38: 901 opc = VexOpcode.VEX_OPCODE_0F_38; 902 break; 903 case P_0F3A: 904 opc = VexOpcode.VEX_OPCODE_0F_3A; 905 break; 906 default: 907 opc = VexOpcode.VEX_OPCODE_NONE; 908 isSimd = false; 909 break; 910 } 911 } 912 913 if (isSimd) { 914 int pre; 915 boolean rexVexW = (size == QWORD) ? true : false; 916 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 917 int curPrefix = size.sizePrefix | prefix1; 918 switch (curPrefix) { 919 case 0x66: 920 pre = VexSimdPrefix.VEX_SIMD_66; 921 break; 922 case 0xF2: 923 pre = VexSimdPrefix.VEX_SIMD_F2; 924 break; 925 case 0xF3: 926 pre = VexSimdPrefix.VEX_SIMD_F3; 927 break; 928 default: 929 pre = VexSimdPrefix.VEX_SIMD_NONE; 930 break; 931 } 932 int encode; 933 if (noNds) { 934 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 935 } else { 936 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 937 } 938 asm.emitByte(op); 939 asm.emitByte(0xC0 | encode); 940 } else { 941 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 942 asm.emitModRM(dst, src); 943 } 944 } 945 emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src)946 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 947 assert verify(asm, size, dst, null); 948 boolean isSimd = false; 949 boolean noNds = false; 950 951 switch (op) { 952 case 0x10: 953 case 0x2A: 954 case 0x2C: 955 case 0x2E: 956 case 0x6E: 957 isSimd = true; 958 noNds = true; 959 break; 960 case 0x51: 961 case 0x54: 962 case 0x55: 963 case 0x56: 964 case 0x57: 965 case 0x58: 966 case 0x59: 967 case 0x5C: 968 case 0x5D: 969 case 0x5E: 970 case 0x5F: 971 isSimd = true; 972 break; 973 } 974 975 int opc = 0; 976 if (isSimd) { 977 switch (prefix2) { 978 case P_0F: 979 opc = VexOpcode.VEX_OPCODE_0F; 980 break; 981 case P_0F38: 982 opc = VexOpcode.VEX_OPCODE_0F_38; 983 break; 984 case P_0F3A: 985 opc = VexOpcode.VEX_OPCODE_0F_3A; 986 break; 987 default: 988 isSimd = false; 989 break; 990 } 991 } 992 993 if (isSimd) { 994 int pre; 995 boolean rexVexW = (size == QWORD) ? true : false; 996 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 997 int curPrefix = size.sizePrefix | prefix1; 998 switch (curPrefix) { 999 case 0x66: 1000 pre = VexSimdPrefix.VEX_SIMD_66; 1001 break; 1002 case 0xF2: 1003 pre = VexSimdPrefix.VEX_SIMD_F2; 1004 break; 1005 case 0xF3: 1006 pre = VexSimdPrefix.VEX_SIMD_F3; 1007 break; 1008 default: 1009 pre = VexSimdPrefix.VEX_SIMD_NONE; 1010 break; 1011 } 1012 if (noNds) { 1013 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 1014 } else { 1015 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 1016 } 1017 asm.emitByte(op); 1018 asm.emitOperandHelper(dst, src, 0); 1019 } else { 1020 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 1021 asm.emitOperandHelper(dst, src, 0); 1022 } 1023 } 1024 } 1025 1026 /** 1027 * Opcode with operand order of RM. 1028 */ 1029 public static class AMD64RRMOp extends AMD64RRROp { AMD64RRMOp(String opcode, int op)1030 protected AMD64RRMOp(String opcode, int op) { 1031 this(opcode, 0, op); 1032 } 1033 AMD64RRMOp(String opcode, int op, OpAssertion assertion)1034 protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) { 1035 this(opcode, 0, op, assertion); 1036 } 1037 AMD64RRMOp(String opcode, int prefix, int op)1038 protected AMD64RRMOp(String opcode, int prefix, int op) { 1039 this(opcode, 0, prefix, op, null); 1040 } 1041 AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion)1042 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) { 1043 this(opcode, 0, prefix, op, assertion, null); 1044 } 1045 AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)1046 protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 1047 this(opcode, 0, prefix, op, assertion, feature); 1048 } 1049 AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion)1050 protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 1051 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 1052 } 1053 AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature)1054 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 1055 this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature); 1056 } 1057 AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)1058 protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1059 super(opcode, prefix1, prefix2, op, assertion, feature); 1060 } 1061 1062 @Override emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src)1063 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) { 1064 assert verify(asm, size, dst, src); 1065 int pre; 1066 int opc; 1067 boolean rexVexW = (size == QWORD) ? true : false; 1068 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1069 int curPrefix = size.sizePrefix | prefix1; 1070 switch (curPrefix) { 1071 case 0x66: 1072 pre = VexSimdPrefix.VEX_SIMD_66; 1073 break; 1074 case 0xF2: 1075 pre = VexSimdPrefix.VEX_SIMD_F2; 1076 break; 1077 case 0xF3: 1078 pre = VexSimdPrefix.VEX_SIMD_F3; 1079 break; 1080 default: 1081 pre = VexSimdPrefix.VEX_SIMD_NONE; 1082 break; 1083 } 1084 switch (prefix2) { 1085 case P_0F: 1086 opc = VexOpcode.VEX_OPCODE_0F; 1087 break; 1088 case P_0F38: 1089 opc = VexOpcode.VEX_OPCODE_0F_38; 1090 break; 1091 case P_0F3A: 1092 opc = VexOpcode.VEX_OPCODE_0F_3A; 1093 break; 1094 default: 1095 throw GraalError.shouldNotReachHere("invalid VEX instruction prefix"); 1096 } 1097 int encode; 1098 encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes); 1099 asm.emitByte(op); 1100 asm.emitByte(0xC0 | encode); 1101 } 1102 emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src)1103 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) { 1104 assert verify(asm, size, dst, null); 1105 int pre; 1106 int opc; 1107 boolean rexVexW = (size == QWORD) ? true : false; 1108 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1109 int curPrefix = size.sizePrefix | prefix1; 1110 switch (curPrefix) { 1111 case 0x66: 1112 pre = VexSimdPrefix.VEX_SIMD_66; 1113 break; 1114 case 0xF2: 1115 pre = VexSimdPrefix.VEX_SIMD_F2; 1116 break; 1117 case 0xF3: 1118 pre = VexSimdPrefix.VEX_SIMD_F3; 1119 break; 1120 default: 1121 pre = VexSimdPrefix.VEX_SIMD_NONE; 1122 break; 1123 } 1124 switch (prefix2) { 1125 case P_0F: 1126 opc = VexOpcode.VEX_OPCODE_0F; 1127 break; 1128 case P_0F38: 1129 opc = VexOpcode.VEX_OPCODE_0F_38; 1130 break; 1131 case P_0F3A: 1132 opc = VexOpcode.VEX_OPCODE_0F_3A; 1133 break; 1134 default: 1135 throw GraalError.shouldNotReachHere("invalid VEX instruction prefix"); 1136 } 1137 asm.simdPrefix(dst, nds, src, pre, opc, attributes); 1138 asm.emitByte(op); 1139 asm.emitOperandHelper(dst, src, 0); 1140 } 1141 } 1142 1143 /** 1144 * Opcode with operand order of MR. 1145 */ 1146 public static class AMD64MROp extends AMD64RROp { 1147 // @formatter:off 1148 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 1149 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 1150 1151 // MOVD and MOVQ are the same opcode, just with different operand size prefix 1152 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 1153 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 1154 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2); 1155 1156 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 1157 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 1158 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE); 1159 // @formatter:on 1160 AMD64MROp(String opcode, int op)1161 protected AMD64MROp(String opcode, int op) { 1162 this(opcode, 0, op); 1163 } 1164 AMD64MROp(String opcode, int op, OpAssertion assertion)1165 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 1166 this(opcode, 0, op, assertion); 1167 } 1168 AMD64MROp(String opcode, int prefix, int op)1169 protected AMD64MROp(String opcode, int prefix, int op) { 1170 this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion); 1171 } 1172 AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion)1173 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 1174 this(opcode, prefix, op, assertion, null); 1175 } 1176 AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature)1177 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 1178 this(opcode, 0, prefix, op, assertion, feature); 1179 } 1180 AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature)1181 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 1182 super(opcode, prefix1, prefix2, op, assertion, feature); 1183 } 1184 1185 @Override emit(AMD64Assembler asm, OperandSize size, Register dst, Register src)1186 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 1187 assert verify(asm, size, src, dst); 1188 boolean isSimd = false; 1189 boolean noNds = false; 1190 1191 switch (op) { 1192 case 0x7E: 1193 isSimd = true; 1194 noNds = true; 1195 break; 1196 case 0x11: 1197 isSimd = true; 1198 break; 1199 } 1200 1201 int opc = 0; 1202 if (isSimd) { 1203 switch (prefix2) { 1204 case P_0F: 1205 opc = VexOpcode.VEX_OPCODE_0F; 1206 break; 1207 case P_0F38: 1208 opc = VexOpcode.VEX_OPCODE_0F_38; 1209 break; 1210 case P_0F3A: 1211 opc = VexOpcode.VEX_OPCODE_0F_3A; 1212 break; 1213 default: 1214 isSimd = false; 1215 break; 1216 } 1217 } 1218 1219 if (isSimd) { 1220 int pre; 1221 boolean rexVexW = (size == QWORD) ? true : false; 1222 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1223 int curPrefix = size.sizePrefix | prefix1; 1224 switch (curPrefix) { 1225 case 0x66: 1226 pre = VexSimdPrefix.VEX_SIMD_66; 1227 break; 1228 case 0xF2: 1229 pre = VexSimdPrefix.VEX_SIMD_F2; 1230 break; 1231 case 0xF3: 1232 pre = VexSimdPrefix.VEX_SIMD_F3; 1233 break; 1234 default: 1235 pre = VexSimdPrefix.VEX_SIMD_NONE; 1236 break; 1237 } 1238 int encode; 1239 if (noNds) { 1240 encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes); 1241 } else { 1242 encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes); 1243 } 1244 asm.emitByte(op); 1245 asm.emitByte(0xC0 | encode); 1246 } else { 1247 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 1248 asm.emitModRM(src, dst); 1249 } 1250 } 1251 emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src)1252 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 1253 assert verify(asm, size, null, src); 1254 boolean isSimd = false; 1255 1256 switch (op) { 1257 case 0x7E: 1258 case 0x11: 1259 isSimd = true; 1260 break; 1261 } 1262 1263 int opc = 0; 1264 if (isSimd) { 1265 switch (prefix2) { 1266 case P_0F: 1267 opc = VexOpcode.VEX_OPCODE_0F; 1268 break; 1269 case P_0F38: 1270 opc = VexOpcode.VEX_OPCODE_0F_38; 1271 break; 1272 case P_0F3A: 1273 opc = VexOpcode.VEX_OPCODE_0F_3A; 1274 break; 1275 default: 1276 isSimd = false; 1277 break; 1278 } 1279 } 1280 1281 if (isSimd) { 1282 int pre; 1283 boolean rexVexW = (size == QWORD) ? true : false; 1284 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1285 int curPrefix = size.sizePrefix | prefix1; 1286 switch (curPrefix) { 1287 case 0x66: 1288 pre = VexSimdPrefix.VEX_SIMD_66; 1289 break; 1290 case 0xF2: 1291 pre = VexSimdPrefix.VEX_SIMD_F2; 1292 break; 1293 case 0xF3: 1294 pre = VexSimdPrefix.VEX_SIMD_F3; 1295 break; 1296 default: 1297 pre = VexSimdPrefix.VEX_SIMD_NONE; 1298 break; 1299 } 1300 asm.simdPrefix(src, Register.None, dst, pre, opc, attributes); 1301 asm.emitByte(op); 1302 asm.emitOperandHelper(src, dst, 0); 1303 } else { 1304 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 1305 asm.emitOperandHelper(src, dst, 0); 1306 } 1307 } 1308 } 1309 1310 /** 1311 * Opcodes with operand order of M. 1312 */ 1313 public static class AMD64MOp extends AMD64Op { 1314 // @formatter:off 1315 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 1316 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 1317 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 1318 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 1319 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 1320 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 1321 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 1322 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 1323 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 1324 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion); 1325 // @formatter:on 1326 1327 private final int ext; 1328 AMD64MOp(String opcode, int op, int ext)1329 protected AMD64MOp(String opcode, int op, int ext) { 1330 this(opcode, 0, op, ext); 1331 } 1332 AMD64MOp(String opcode, int prefix, int op, int ext)1333 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 1334 this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion); 1335 } 1336 AMD64MOp(String opcode, int op, int ext, OpAssertion assertion)1337 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 1338 this(opcode, 0, op, ext, assertion); 1339 } 1340 AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion)1341 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 1342 super(opcode, 0, prefix, op, assertion, null); 1343 this.ext = ext; 1344 } 1345 emit(AMD64Assembler asm, OperandSize size, Register dst)1346 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 1347 assert verify(asm, size, dst, null); 1348 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1349 asm.emitModRM(ext, dst); 1350 } 1351 emit(AMD64Assembler asm, OperandSize size, AMD64Address dst)1352 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 1353 assert verify(asm, size, null, null); 1354 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1355 asm.emitOperandHelper(ext, dst, 0); 1356 } 1357 } 1358 1359 /** 1360 * Opcodes with operand order of MI. 1361 */ 1362 public static class AMD64MIOp extends AMD64ImmOp { 1363 // @formatter:off 1364 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 1365 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 1366 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 1367 // @formatter:on 1368 1369 private final int ext; 1370 AMD64MIOp(String opcode, boolean immIsByte, int op, int ext)1371 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 1372 this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion); 1373 } 1374 AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion)1375 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 1376 this(opcode, immIsByte, 0, op, ext, assertion); 1377 } 1378 AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion)1379 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 1380 super(opcode, immIsByte, prefix, op, assertion); 1381 this.ext = ext; 1382 } 1383 emit(AMD64Assembler asm, OperandSize size, Register dst, int imm)1384 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 1385 assert verify(asm, size, dst, null); 1386 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 1387 asm.emitModRM(ext, dst); 1388 emitImmediate(asm, size, imm); 1389 } 1390 emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm)1391 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 1392 assert verify(asm, size, null, null); 1393 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 1394 asm.emitOperandHelper(ext, dst, immediateSize(size)); 1395 emitImmediate(asm, size, imm); 1396 } 1397 } 1398 1399 /** 1400 * Opcodes with operand order of RMI. 1401 * 1402 * We only have one form of round as the operation is always treated with single variant input, 1403 * making its extension to 3 address forms redundant. 1404 */ 1405 public static class AMD64RMIOp extends AMD64ImmOp { 1406 // @formatter:off 1407 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 1408 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 1409 public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion); 1410 public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion); 1411 // @formatter:on 1412 AMD64RMIOp(String opcode, boolean immIsByte, int op)1413 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 1414 this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion); 1415 } 1416 AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion)1417 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 1418 super(opcode, immIsByte, prefix, op, assertion); 1419 } 1420 emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm)1421 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 1422 assert verify(asm, size, dst, src); 1423 boolean isSimd = false; 1424 boolean noNds = false; 1425 1426 switch (op) { 1427 case 0x0A: 1428 case 0x0B: 1429 isSimd = true; 1430 noNds = true; 1431 break; 1432 } 1433 1434 int opc = 0; 1435 if (isSimd) { 1436 switch (prefix2) { 1437 case P_0F: 1438 opc = VexOpcode.VEX_OPCODE_0F; 1439 break; 1440 case P_0F38: 1441 opc = VexOpcode.VEX_OPCODE_0F_38; 1442 break; 1443 case P_0F3A: 1444 opc = VexOpcode.VEX_OPCODE_0F_3A; 1445 break; 1446 default: 1447 isSimd = false; 1448 break; 1449 } 1450 } 1451 1452 if (isSimd) { 1453 int pre; 1454 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1455 int curPrefix = size.sizePrefix | prefix1; 1456 switch (curPrefix) { 1457 case 0x66: 1458 pre = VexSimdPrefix.VEX_SIMD_66; 1459 break; 1460 case 0xF2: 1461 pre = VexSimdPrefix.VEX_SIMD_F2; 1462 break; 1463 case 0xF3: 1464 pre = VexSimdPrefix.VEX_SIMD_F3; 1465 break; 1466 default: 1467 pre = VexSimdPrefix.VEX_SIMD_NONE; 1468 break; 1469 } 1470 int encode; 1471 if (noNds) { 1472 encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes); 1473 } else { 1474 encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes); 1475 } 1476 asm.emitByte(op); 1477 asm.emitByte(0xC0 | encode); 1478 emitImmediate(asm, size, imm); 1479 } else { 1480 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 1481 asm.emitModRM(dst, src); 1482 emitImmediate(asm, size, imm); 1483 } 1484 } 1485 emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm)1486 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 1487 assert verify(asm, size, dst, null); 1488 1489 boolean isSimd = false; 1490 boolean noNds = false; 1491 1492 switch (op) { 1493 case 0x0A: 1494 case 0x0B: 1495 isSimd = true; 1496 noNds = true; 1497 break; 1498 } 1499 1500 int opc = 0; 1501 if (isSimd) { 1502 switch (prefix2) { 1503 case P_0F: 1504 opc = VexOpcode.VEX_OPCODE_0F; 1505 break; 1506 case P_0F38: 1507 opc = VexOpcode.VEX_OPCODE_0F_38; 1508 break; 1509 case P_0F3A: 1510 opc = VexOpcode.VEX_OPCODE_0F_3A; 1511 break; 1512 default: 1513 isSimd = false; 1514 break; 1515 } 1516 } 1517 1518 if (isSimd) { 1519 int pre; 1520 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target); 1521 int curPrefix = size.sizePrefix | prefix1; 1522 switch (curPrefix) { 1523 case 0x66: 1524 pre = VexSimdPrefix.VEX_SIMD_66; 1525 break; 1526 case 0xF2: 1527 pre = VexSimdPrefix.VEX_SIMD_F2; 1528 break; 1529 case 0xF3: 1530 pre = VexSimdPrefix.VEX_SIMD_F3; 1531 break; 1532 default: 1533 pre = VexSimdPrefix.VEX_SIMD_NONE; 1534 break; 1535 } 1536 if (noNds) { 1537 asm.simdPrefix(dst, Register.None, src, pre, opc, attributes); 1538 } else { 1539 asm.simdPrefix(dst, dst, src, pre, opc, attributes); 1540 } 1541 asm.emitByte(op); 1542 asm.emitOperandHelper(dst, src, immediateSize(size)); 1543 emitImmediate(asm, size, imm); 1544 } else { 1545 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 1546 asm.emitOperandHelper(dst, src, immediateSize(size)); 1547 emitImmediate(asm, size, imm); 1548 } 1549 } 1550 } 1551 1552 public static class SSEOp extends AMD64RMOp { 1553 // @formatter:off 1554 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 1555 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion); 1556 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 1557 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion); 1558 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion); 1559 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 1560 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 1561 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 1562 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 1563 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 1564 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 1565 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 1566 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 1567 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 1568 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 1569 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 1570 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 1571 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 1572 // @formatter:on 1573 SSEOp(String opcode, int prefix, int op)1574 protected SSEOp(String opcode, int prefix, int op) { 1575 this(opcode, prefix, op, OpAssertion.FloatAssertion); 1576 } 1577 SSEOp(String opcode, int prefix, int op, OpAssertion assertion)1578 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 1579 this(opcode, 0, prefix, op, assertion); 1580 } 1581 SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion)1582 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1583 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 1584 } 1585 } 1586 1587 public static class AVXOp extends AMD64RRMOp { 1588 // @formatter:off 1589 public static final AVXOp AND = new AVXOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion); 1590 public static final AVXOp ANDN = new AVXOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion); 1591 public static final AVXOp OR = new AVXOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion); 1592 public static final AVXOp XOR = new AVXOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion); 1593 public static final AVXOp ADD = new AVXOp("ADD", P_0F, 0x58); 1594 public static final AVXOp MUL = new AVXOp("MUL", P_0F, 0x59); 1595 public static final AVXOp SUB = new AVXOp("SUB", P_0F, 0x5C); 1596 public static final AVXOp MIN = new AVXOp("MIN", P_0F, 0x5D); 1597 public static final AVXOp DIV = new AVXOp("DIV", P_0F, 0x5E); 1598 public static final AVXOp MAX = new AVXOp("MAX", P_0F, 0x5F); 1599 // @formatter:on 1600 AVXOp(String opcode, int prefix, int op)1601 protected AVXOp(String opcode, int prefix, int op) { 1602 this(opcode, prefix, op, OpAssertion.FloatAssertion); 1603 } 1604 AVXOp(String opcode, int prefix, int op, OpAssertion assertion)1605 protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) { 1606 this(opcode, 0, prefix, op, assertion); 1607 } 1608 AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion)1609 protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 1610 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX); 1611 } 1612 } 1613 1614 /** 1615 * Arithmetic operation with operand order of RM, MR or MI. 1616 */ 1617 public static final class AMD64BinaryArithmetic { 1618 // @formatter:off 1619 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 1620 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 1621 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 1622 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 1623 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 1624 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 1625 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 1626 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 1627 // @formatter:on 1628 1629 private final AMD64MIOp byteImmOp; 1630 private final AMD64MROp byteMrOp; 1631 private final AMD64RMOp byteRmOp; 1632 1633 private final AMD64MIOp immOp; 1634 private final AMD64MIOp immSxOp; 1635 private final AMD64MROp mrOp; 1636 private final AMD64RMOp rmOp; 1637 AMD64BinaryArithmetic(String opcode, int code)1638 private AMD64BinaryArithmetic(String opcode, int code) { 1639 int baseOp = code << 3; 1640 1641 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 1642 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 1643 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 1644 1645 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion); 1646 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion); 1647 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion); 1648 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion); 1649 } 1650 getMIOpcode(OperandSize size, boolean sx)1651 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 1652 if (size == BYTE) { 1653 return byteImmOp; 1654 } else if (sx) { 1655 return immSxOp; 1656 } else { 1657 return immOp; 1658 } 1659 } 1660 getMROpcode(OperandSize size)1661 public AMD64MROp getMROpcode(OperandSize size) { 1662 if (size == BYTE) { 1663 return byteMrOp; 1664 } else { 1665 return mrOp; 1666 } 1667 } 1668 getRMOpcode(OperandSize size)1669 public AMD64RMOp getRMOpcode(OperandSize size) { 1670 if (size == BYTE) { 1671 return byteRmOp; 1672 } else { 1673 return rmOp; 1674 } 1675 } 1676 } 1677 1678 /** 1679 * Shift operation with operand order of M1, MC or MI. 1680 */ 1681 public static final class AMD64Shift { 1682 // @formatter:off 1683 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 1684 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 1685 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 1686 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 1687 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 1688 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 1689 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 1690 // @formatter:on 1691 1692 public final AMD64MOp m1Op; 1693 public final AMD64MOp mcOp; 1694 public final AMD64MIOp miOp; 1695 AMD64Shift(String opcode, int code)1696 private AMD64Shift(String opcode, int code) { 1697 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion); 1698 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion); 1699 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion); 1700 } 1701 } 1702 addl(AMD64Address dst, int imm32)1703 public final void addl(AMD64Address dst, int imm32) { 1704 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1705 } 1706 addl(Register dst, int imm32)1707 public final void addl(Register dst, int imm32) { 1708 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1709 } 1710 addl(Register dst, Register src)1711 public final void addl(Register dst, Register src) { 1712 ADD.rmOp.emit(this, DWORD, dst, src); 1713 } 1714 addpd(Register dst, Register src)1715 public final void addpd(Register dst, Register src) { 1716 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1717 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1718 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1719 emitByte(0x58); 1720 emitByte(0xC0 | encode); 1721 } 1722 addpd(Register dst, AMD64Address src)1723 public final void addpd(Register dst, AMD64Address src) { 1724 assert dst.getRegisterCategory().equals(AMD64.XMM); 1725 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1726 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1727 emitByte(0x58); 1728 emitOperandHelper(dst, src, 0); 1729 } 1730 addsd(Register dst, Register src)1731 public final void addsd(Register dst, Register src) { 1732 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1733 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1734 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1735 emitByte(0x58); 1736 emitByte(0xC0 | encode); 1737 } 1738 addsd(Register dst, AMD64Address src)1739 public final void addsd(Register dst, AMD64Address src) { 1740 assert dst.getRegisterCategory().equals(AMD64.XMM); 1741 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1742 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1743 emitByte(0x58); 1744 emitOperandHelper(dst, src, 0); 1745 } 1746 addrNop4()1747 private void addrNop4() { 1748 // 4 bytes: NOP DWORD PTR [EAX+0] 1749 emitByte(0x0F); 1750 emitByte(0x1F); 1751 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 1752 emitByte(0); // 8-bits offset (1 byte) 1753 } 1754 addrNop5()1755 private void addrNop5() { 1756 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1757 emitByte(0x0F); 1758 emitByte(0x1F); 1759 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 1760 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1761 emitByte(0); // 8-bits offset (1 byte) 1762 } 1763 addrNop7()1764 private void addrNop7() { 1765 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1766 emitByte(0x0F); 1767 emitByte(0x1F); 1768 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 1769 emitInt(0); // 32-bits offset (4 bytes) 1770 } 1771 addrNop8()1772 private void addrNop8() { 1773 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1774 emitByte(0x0F); 1775 emitByte(0x1F); 1776 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1777 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1778 emitInt(0); // 32-bits offset (4 bytes) 1779 } 1780 andl(Register dst, int imm32)1781 public final void andl(Register dst, int imm32) { 1782 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1783 } 1784 andl(Register dst, Register src)1785 public final void andl(Register dst, Register src) { 1786 AND.rmOp.emit(this, DWORD, dst, src); 1787 } 1788 andpd(Register dst, Register src)1789 public final void andpd(Register dst, Register src) { 1790 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1791 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1792 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1793 emitByte(0x54); 1794 emitByte(0xC0 | encode); 1795 } 1796 andpd(Register dst, AMD64Address src)1797 public final void andpd(Register dst, AMD64Address src) { 1798 assert dst.getRegisterCategory().equals(AMD64.XMM); 1799 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1800 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1801 emitByte(0x54); 1802 emitOperandHelper(dst, src, 0); 1803 } 1804 bsfq(Register dst, Register src)1805 public final void bsfq(Register dst, Register src) { 1806 int encode = prefixqAndEncode(dst.encoding(), src.encoding()); 1807 emitByte(0x0F); 1808 emitByte(0xBC); 1809 emitByte(0xC0 | encode); 1810 } 1811 bsrl(Register dst, Register src)1812 public final void bsrl(Register dst, Register src) { 1813 int encode = prefixAndEncode(dst.encoding(), src.encoding()); 1814 emitByte(0x0F); 1815 emitByte(0xBD); 1816 emitByte(0xC0 | encode); 1817 } 1818 bswapl(Register reg)1819 public final void bswapl(Register reg) { 1820 int encode = prefixAndEncode(reg.encoding); 1821 emitByte(0x0F); 1822 emitByte(0xC8 | encode); 1823 } 1824 cdql()1825 public final void cdql() { 1826 emitByte(0x99); 1827 } 1828 cmovl(ConditionFlag cc, Register dst, Register src)1829 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1830 int encode = prefixAndEncode(dst.encoding, src.encoding); 1831 emitByte(0x0F); 1832 emitByte(0x40 | cc.getValue()); 1833 emitByte(0xC0 | encode); 1834 } 1835 cmovl(ConditionFlag cc, Register dst, AMD64Address src)1836 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1837 prefix(src, dst); 1838 emitByte(0x0F); 1839 emitByte(0x40 | cc.getValue()); 1840 emitOperandHelper(dst, src, 0); 1841 } 1842 cmpl(Register dst, int imm32)1843 public final void cmpl(Register dst, int imm32) { 1844 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1845 } 1846 cmpl(Register dst, Register src)1847 public final void cmpl(Register dst, Register src) { 1848 CMP.rmOp.emit(this, DWORD, dst, src); 1849 } 1850 cmpl(Register dst, AMD64Address src)1851 public final void cmpl(Register dst, AMD64Address src) { 1852 CMP.rmOp.emit(this, DWORD, dst, src); 1853 } 1854 cmpl(AMD64Address dst, int imm32)1855 public final void cmpl(AMD64Address dst, int imm32) { 1856 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1857 } 1858 1859 /** 1860 * The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into 1861 * adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared 1862 * values were equal, and cleared otherwise. 1863 */ cmpxchgb(Register reg, AMD64Address adr)1864 public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg 1865 prefixb(adr, reg); 1866 emitByte(0x0F); 1867 emitByte(0xB0); 1868 emitOperandHelper(reg, adr, 0); 1869 } 1870 1871 /** 1872 * The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1873 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1874 * compared values were equal, and cleared otherwise. 1875 */ cmpxchgw(Register reg, AMD64Address adr)1876 public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg 1877 emitByte(0x66); // Switch to 16-bit mode. 1878 prefix(adr, reg); 1879 emitByte(0x0F); 1880 emitByte(0xB1); 1881 emitOperandHelper(reg, adr, 0); 1882 } 1883 1884 /** 1885 * The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg 1886 * into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the 1887 * compared values were equal, and cleared otherwise. 1888 */ cmpxchgl(Register reg, AMD64Address adr)1889 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1890 prefix(adr, reg); 1891 emitByte(0x0F); 1892 emitByte(0xB1); 1893 emitOperandHelper(reg, adr, 0); 1894 } 1895 cvtsi2sdl(Register dst, Register src)1896 public final void cvtsi2sdl(Register dst, Register src) { 1897 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 1898 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1899 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1900 emitByte(0x2A); 1901 emitByte(0xC0 | encode); 1902 } 1903 cvttsd2sil(Register dst, Register src)1904 public final void cvttsd2sil(Register dst, Register src) { 1905 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 1906 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1907 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1908 emitByte(0x2C); 1909 emitByte(0xC0 | encode); 1910 } 1911 decl(AMD64Address dst)1912 protected final void decl(AMD64Address dst) { 1913 prefix(dst); 1914 emitByte(0xFF); 1915 emitOperandHelper(1, dst, 0); 1916 } 1917 divsd(Register dst, Register src)1918 public final void divsd(Register dst, Register src) { 1919 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1920 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 1921 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 1922 emitByte(0x5E); 1923 emitByte(0xC0 | encode); 1924 } 1925 evmovdquq(Register dst, AMD64Address src, int vectorLen)1926 public final void evmovdquq(Register dst, AMD64Address src, int vectorLen) { 1927 assert supports(CPUFeature.AVX512F); 1928 AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target); 1929 attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit); 1930 attributes.setIsEvexInstruction(); 1931 vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 1932 emitByte(0x6F); 1933 emitOperandHelper(dst, src, 0); 1934 } 1935 evpcmpeqb(Register kdst, Register nds, AMD64Address src, int vectorLen)1936 public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src, int vectorLen) { 1937 assert supports(CPUFeature.AVX512BW); 1938 AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target); 1939 attributes.setIsEvexInstruction(); 1940 attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit); 1941 vexPrefix(src, nds, kdst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 1942 emitByte(0x74); 1943 emitOperandHelper(kdst, src, 0); 1944 } 1945 hlt()1946 public final void hlt() { 1947 emitByte(0xF4); 1948 } 1949 imull(Register dst, Register src, int value)1950 public final void imull(Register dst, Register src, int value) { 1951 if (isByte(value)) { 1952 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1953 } else { 1954 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1955 } 1956 } 1957 incl(AMD64Address dst)1958 protected final void incl(AMD64Address dst) { 1959 prefix(dst); 1960 emitByte(0xFF); 1961 emitOperandHelper(0, dst, 0); 1962 } 1963 jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32)1964 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1965 int shortSize = 2; 1966 int longSize = 6; 1967 long disp = jumpTarget - position(); 1968 if (!forceDisp32 && isByte(disp - shortSize)) { 1969 // 0111 tttn #8-bit disp 1970 emitByte(0x70 | cc.getValue()); 1971 emitByte((int) ((disp - shortSize) & 0xFF)); 1972 } else { 1973 // 0000 1111 1000 tttn #32-bit disp 1974 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1975 emitByte(0x0F); 1976 emitByte(0x80 | cc.getValue()); 1977 emitInt((int) (disp - longSize)); 1978 } 1979 } 1980 jcc(ConditionFlag cc, Label l)1981 public final void jcc(ConditionFlag cc, Label l) { 1982 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1983 if (l.isBound()) { 1984 jcc(cc, l.position(), false); 1985 } else { 1986 // Note: could eliminate cond. jumps to this jump if condition 1987 // is the same however, seems to be rather unlikely case. 1988 // Note: use jccb() if label to be bound is very close to get 1989 // an 8-bit displacement 1990 l.addPatchAt(position()); 1991 emitByte(0x0F); 1992 emitByte(0x80 | cc.getValue()); 1993 emitInt(0); 1994 } 1995 1996 } 1997 1998 public final void jccb(ConditionFlag cc, Label l) { 1999 if (l.isBound()) { 2000 int shortSize = 2; 2001 int entry = l.position(); 2002 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 2003 long disp = entry - position(); 2004 // 0111 tttn #8-bit disp 2005 emitByte(0x70 | cc.getValue()); 2006 emitByte((int) ((disp - shortSize) & 0xFF)); 2007 } else { 2008 l.addPatchAt(position()); 2009 emitByte(0x70 | cc.getValue()); 2010 emitByte(0); 2011 } 2012 } 2013 2014 public final void jmp(int jumpTarget, boolean forceDisp32) { 2015 int shortSize = 2; 2016 int longSize = 5; 2017 long disp = jumpTarget - position(); 2018 if (!forceDisp32 && isByte(disp - shortSize)) { 2019 emitByte(0xEB); 2020 emitByte((int) ((disp - shortSize) & 0xFF)); 2021 } else { 2022 emitByte(0xE9); 2023 emitInt((int) (disp - longSize)); 2024 } 2025 } 2026 2027 @Override 2028 public final void jmp(Label l) { 2029 if (l.isBound()) { 2030 jmp(l.position(), false); 2031 } else { 2032 // By default, forward jumps are always 32-bit displacements, since 2033 // we can't yet know where the label will be bound. If you're sure that 2034 // the forward jump will not run beyond 256 bytes, use jmpb to 2035 // force an 8-bit displacement. 2036 2037 l.addPatchAt(position()); 2038 emitByte(0xE9); 2039 emitInt(0); 2040 } 2041 } 2042 2043 public final void jmp(Register entry) { 2044 int encode = prefixAndEncode(entry.encoding); 2045 emitByte(0xFF); 2046 emitByte(0xE0 | encode); 2047 } 2048 2049 public final void jmp(AMD64Address adr) { 2050 prefix(adr); 2051 emitByte(0xFF); 2052 emitOperandHelper(rsp, adr, 0); 2053 } 2054 2055 public final void jmpb(Label l) { 2056 if (l.isBound()) { 2057 int shortSize = 2; 2058 int entry = l.position(); 2059 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 2060 long offs = entry - position(); 2061 emitByte(0xEB); 2062 emitByte((int) ((offs - shortSize) & 0xFF)); 2063 } else { 2064 2065 l.addPatchAt(position()); 2066 emitByte(0xEB); 2067 emitByte(0); 2068 } 2069 } 2070 2071 // This instruction produces ZF or CF flags 2072 public final void kortestql(Register src1, Register src2) { 2073 assert supports(CPUFeature.AVX512BW); 2074 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target); 2075 int encode = vexPrefixAndEncode(src1, Register.None, src2, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2076 emitByte(0x98); 2077 emitByte(0xC0 | encode); 2078 } 2079 2080 public final void kmovql(Register dst, Register src) { 2081 assert supports(CPUFeature.AVX512BW); 2082 if (src.getRegisterCategory().equals(AMD64.MASK)) { 2083 // kmovql(KRegister dst, KRegister src) 2084 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target); 2085 int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2086 emitByte(0x90); 2087 emitByte(0xC0 | encode); 2088 } else { 2089 // kmovql(KRegister dst, Register src) 2090 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target); 2091 int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2092 emitByte(0x92); 2093 emitByte(0xC0 | encode); 2094 } 2095 } 2096 2097 public final void lead(Register dst, AMD64Address src) { 2098 prefix(src, dst); 2099 emitByte(0x8D); 2100 emitOperandHelper(dst, src, 0); 2101 } 2102 2103 public final void leaq(Register dst, AMD64Address src) { 2104 prefixq(src, dst); 2105 emitByte(0x8D); 2106 emitOperandHelper(dst, src, 0); 2107 } 2108 2109 public final void leave() { 2110 emitByte(0xC9); 2111 } 2112 2113 public final void lock() { 2114 emitByte(0xF0); 2115 } 2116 2117 public final void movapd(Register dst, Register src) { 2118 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2119 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2120 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2121 emitByte(0x28); 2122 emitByte(0xC0 | encode); 2123 } 2124 2125 public final void movaps(Register dst, Register src) { 2126 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2127 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2128 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2129 emitByte(0x28); 2130 emitByte(0xC0 | encode); 2131 } 2132 2133 public final void movb(AMD64Address dst, int imm8) { 2134 prefix(dst); 2135 emitByte(0xC6); 2136 emitOperandHelper(0, dst, 1); 2137 emitByte(imm8); 2138 } 2139 2140 public final void movb(AMD64Address dst, Register src) { 2141 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register"; 2142 prefixb(dst, src); 2143 emitByte(0x88); 2144 emitOperandHelper(src, dst, 0); 2145 } 2146 2147 public final void movl(Register dst, int imm32) { 2148 int encode = prefixAndEncode(dst.encoding); 2149 emitByte(0xB8 | encode); 2150 emitInt(imm32); 2151 } 2152 2153 public final void movl(Register dst, Register src) { 2154 int encode = prefixAndEncode(dst.encoding, src.encoding); 2155 emitByte(0x8B); 2156 emitByte(0xC0 | encode); 2157 } 2158 2159 public final void movl(Register dst, AMD64Address src) { 2160 prefix(src, dst); 2161 emitByte(0x8B); 2162 emitOperandHelper(dst, src, 0); 2163 } 2164 2165 /** 2166 * @param wide use 4 byte encoding for displacements that would normally fit in a byte 2167 */ 2168 public final void movl(Register dst, AMD64Address src, boolean wide) { 2169 prefix(src, dst); 2170 emitByte(0x8B); 2171 emitOperandHelper(dst, src, wide, 0); 2172 } 2173 2174 public final void movl(AMD64Address dst, int imm32) { 2175 prefix(dst); 2176 emitByte(0xC7); 2177 emitOperandHelper(0, dst, 4); 2178 emitInt(imm32); 2179 } 2180 2181 public final void movl(AMD64Address dst, Register src) { 2182 prefix(dst, src); 2183 emitByte(0x89); 2184 emitOperandHelper(src, dst, 0); 2185 } 2186 2187 /** 2188 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 2189 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 2190 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 2191 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 2192 */ 2193 public final void movlpd(Register dst, AMD64Address src) { 2194 assert dst.getRegisterCategory().equals(AMD64.XMM); 2195 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2196 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2197 emitByte(0x12); 2198 emitOperandHelper(dst, src, 0); 2199 } 2200 2201 public final void movlhps(Register dst, Register src) { 2202 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2203 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2204 int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2205 emitByte(0x16); 2206 emitByte(0xC0 | encode); 2207 } 2208 2209 public final void movq(Register dst, AMD64Address src) { 2210 movq(dst, src, false); 2211 } 2212 2213 public final void movq(Register dst, AMD64Address src, boolean wide) { 2214 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 2215 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2216 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2217 emitByte(0x7E); 2218 emitOperandHelper(dst, src, wide, 0); 2219 } else { 2220 // gpr version of movq 2221 prefixq(src, dst); 2222 emitByte(0x8B); 2223 emitOperandHelper(dst, src, wide, 0); 2224 } 2225 } 2226 2227 public final void movq(Register dst, Register src) { 2228 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2229 emitByte(0x8B); 2230 emitByte(0xC0 | encode); 2231 } 2232 2233 public final void movq(AMD64Address dst, Register src) { 2234 if (src.getRegisterCategory().equals(AMD64.XMM)) { 2235 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2236 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2237 emitByte(0xD6); 2238 emitOperandHelper(src, dst, 0); 2239 } else { 2240 // gpr version of movq 2241 prefixq(dst, src); 2242 emitByte(0x89); 2243 emitOperandHelper(src, dst, 0); 2244 } 2245 } 2246 2247 public final void movsbl(Register dst, AMD64Address src) { 2248 prefix(src, dst); 2249 emitByte(0x0F); 2250 emitByte(0xBE); 2251 emitOperandHelper(dst, src, 0); 2252 } 2253 2254 public final void movsbl(Register dst, Register src) { 2255 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true); 2256 emitByte(0x0F); 2257 emitByte(0xBE); 2258 emitByte(0xC0 | encode); 2259 } 2260 2261 public final void movsbq(Register dst, AMD64Address src) { 2262 prefixq(src, dst); 2263 emitByte(0x0F); 2264 emitByte(0xBE); 2265 emitOperandHelper(dst, src, 0); 2266 } 2267 2268 public final void movsbq(Register dst, Register src) { 2269 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2270 emitByte(0x0F); 2271 emitByte(0xBE); 2272 emitByte(0xC0 | encode); 2273 } 2274 2275 public final void movsd(Register dst, Register src) { 2276 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2277 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2278 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2279 emitByte(0x10); 2280 emitByte(0xC0 | encode); 2281 } 2282 2283 public final void movsd(Register dst, AMD64Address src) { 2284 assert dst.getRegisterCategory().equals(AMD64.XMM); 2285 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2286 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2287 emitByte(0x10); 2288 emitOperandHelper(dst, src, 0); 2289 } 2290 2291 public final void movsd(AMD64Address dst, Register src) { 2292 assert src.getRegisterCategory().equals(AMD64.XMM); 2293 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2294 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2295 emitByte(0x11); 2296 emitOperandHelper(src, dst, 0); 2297 } 2298 2299 public final void movss(Register dst, Register src) { 2300 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2301 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2302 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2303 emitByte(0x10); 2304 emitByte(0xC0 | encode); 2305 } 2306 2307 public final void movss(Register dst, AMD64Address src) { 2308 assert dst.getRegisterCategory().equals(AMD64.XMM); 2309 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2310 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2311 emitByte(0x10); 2312 emitOperandHelper(dst, src, 0); 2313 } 2314 2315 public final void movss(AMD64Address dst, Register src) { 2316 assert src.getRegisterCategory().equals(AMD64.XMM); 2317 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2318 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2319 emitByte(0x11); 2320 emitOperandHelper(src, dst, 0); 2321 } 2322 2323 public final void mulpd(Register dst, Register src) { 2324 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2325 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2326 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2327 emitByte(0x59); 2328 emitByte(0xC0 | encode); 2329 } 2330 2331 public final void mulpd(Register dst, AMD64Address src) { 2332 assert dst.getRegisterCategory().equals(AMD64.XMM); 2333 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2334 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2335 emitByte(0x59); 2336 emitOperandHelper(dst, src, 0); 2337 } 2338 2339 public final void mulsd(Register dst, Register src) { 2340 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2341 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2342 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2343 emitByte(0x59); 2344 emitByte(0xC0 | encode); 2345 } 2346 2347 public final void mulsd(Register dst, AMD64Address src) { 2348 assert dst.getRegisterCategory().equals(AMD64.XMM); 2349 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2350 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2351 emitByte(0x59); 2352 emitOperandHelper(dst, src, 0); 2353 } 2354 2355 public final void mulss(Register dst, Register src) { 2356 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2357 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2358 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 2359 emitByte(0x59); 2360 emitByte(0xC0 | encode); 2361 } 2362 2363 public final void movswl(Register dst, AMD64Address src) { 2364 prefix(src, dst); 2365 emitByte(0x0F); 2366 emitByte(0xBF); 2367 emitOperandHelper(dst, src, 0); 2368 } 2369 2370 public final void movw(AMD64Address dst, int imm16) { 2371 emitByte(0x66); // switch to 16-bit mode 2372 prefix(dst); 2373 emitByte(0xC7); 2374 emitOperandHelper(0, dst, 2); 2375 emitShort(imm16); 2376 } 2377 2378 public final void movw(AMD64Address dst, Register src) { 2379 emitByte(0x66); 2380 prefix(dst, src); 2381 emitByte(0x89); 2382 emitOperandHelper(src, dst, 0); 2383 } 2384 2385 public final void movzbl(Register dst, AMD64Address src) { 2386 prefix(src, dst); 2387 emitByte(0x0F); 2388 emitByte(0xB6); 2389 emitOperandHelper(dst, src, 0); 2390 } 2391 2392 public final void movzbl(Register dst, Register src) { 2393 AMD64RMOp.MOVZXB.emit(this, OperandSize.DWORD, dst, src); 2394 } 2395 2396 public final void movzbq(Register dst, Register src) { 2397 AMD64RMOp.MOVZXB.emit(this, OperandSize.QWORD, dst, src); 2398 } 2399 2400 public final void movzwl(Register dst, AMD64Address src) { 2401 prefix(src, dst); 2402 emitByte(0x0F); 2403 emitByte(0xB7); 2404 emitOperandHelper(dst, src, 0); 2405 } 2406 2407 public final void negl(Register dst) { 2408 NEG.emit(this, DWORD, dst); 2409 } 2410 2411 public final void notl(Register dst) { 2412 NOT.emit(this, DWORD, dst); 2413 } 2414 2415 public final void notq(Register dst) { 2416 NOT.emit(this, QWORD, dst); 2417 } 2418 2419 @Override 2420 public final void ensureUniquePC() { 2421 nop(); 2422 } 2423 2424 public final void nop() { 2425 nop(1); 2426 } 2427 2428 public void nop(int count) { 2429 int i = count; 2430 if (UseNormalNop) { 2431 assert i > 0 : " "; 2432 // The fancy nops aren't currently recognized by debuggers making it a 2433 // pain to disassemble code while debugging. If assert are on clearly 2434 // speed is not an issue so simply use the single byte traditional nop 2435 // to do alignment. 2436 2437 for (; i > 0; i--) { 2438 emitByte(0x90); 2439 } 2440 return; 2441 } 2442 2443 if (UseAddressNop) { 2444 // 2445 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 2446 // 1: 0x90 2447 // 2: 0x66 0x90 2448 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2449 // 4: 0x0F 0x1F 0x40 0x00 2450 // 5: 0x0F 0x1F 0x44 0x00 0x00 2451 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2452 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2453 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2454 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2455 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2456 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2457 2458 // The rest coding is AMD specific - use consecutive Address nops 2459 2460 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2461 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2462 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2463 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2464 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2465 // Size prefixes (0x66) are added for larger sizes 2466 2467 while (i >= 22) { 2468 i -= 11; 2469 emitByte(0x66); // size prefix 2470 emitByte(0x66); // size prefix 2471 emitByte(0x66); // size prefix addrNop8()2472 addrNop8(); 2473 } 2474 // Generate first nop for size between 21-12 2475 switch (i) { 2476 case 21: 2477 i -= 11; 2478 emitByte(0x66); // size prefix 2479 emitByte(0x66); // size prefix 2480 emitByte(0x66); // size prefix addrNop8()2481 addrNop8(); 2482 break; 2483 case 20: 2484 case 19: 2485 i -= 10; 2486 emitByte(0x66); // size prefix 2487 emitByte(0x66); // size prefix addrNop8()2488 addrNop8(); 2489 break; 2490 case 18: 2491 case 17: 2492 i -= 9; 2493 emitByte(0x66); // size prefix addrNop8()2494 addrNop8(); 2495 break; 2496 case 16: 2497 case 15: 2498 i -= 8; addrNop8()2499 addrNop8(); 2500 break; 2501 case 14: 2502 case 13: 2503 i -= 7; addrNop7()2504 addrNop7(); 2505 break; 2506 case 12: 2507 i -= 6; 2508 emitByte(0x66); // size prefix addrNop5()2509 addrNop5(); 2510 break; 2511 default: 2512 assert i < 12; 2513 } 2514 2515 // Generate second nop for size between 11-1 2516 switch (i) { 2517 case 11: 2518 emitByte(0x66); // size prefix 2519 emitByte(0x66); // size prefix 2520 emitByte(0x66); // size prefix 2521 addrNop8(); 2522 break; 2523 case 10: 2524 emitByte(0x66); // size prefix 2525 emitByte(0x66); // size prefix 2526 addrNop8(); 2527 break; 2528 case 9: 2529 emitByte(0x66); // size prefix 2530 addrNop8(); 2531 break; 2532 case 8: 2533 addrNop8(); 2534 break; 2535 case 7: 2536 addrNop7(); 2537 break; 2538 case 6: 2539 emitByte(0x66); // size prefix 2540 addrNop5(); 2541 break; 2542 case 5: 2543 addrNop5(); 2544 break; 2545 case 4: 2546 addrNop4(); 2547 break; 2548 case 3: 2549 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2550 emitByte(0x66); // size prefix 2551 emitByte(0x66); // size prefix 2552 emitByte(0x90); // nop 2553 break; 2554 case 2: 2555 emitByte(0x66); // size prefix 2556 emitByte(0x90); // nop 2557 break; 2558 case 1: 2559 emitByte(0x90); // nop 2560 break; 2561 default: 2562 assert i == 0; 2563 } 2564 return; 2565 } 2566 2567 // Using nops with size prefixes "0x66 0x90". 2568 // From AMD Optimization Guide: 2569 // 1: 0x90 2570 // 2: 0x66 0x90 2571 // 3: 0x66 0x66 0x90 2572 // 4: 0x66 0x66 0x66 0x90 2573 // 5: 0x66 0x66 0x90 0x66 0x90 2574 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2575 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2576 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2577 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2578 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2579 // 2580 while (i > 12) { 2581 i -= 4; 2582 emitByte(0x66); // size prefix 2583 emitByte(0x66); 2584 emitByte(0x66); 2585 emitByte(0x90); // nop 2586 } 2587 // 1 - 12 nops 2588 if (i > 8) { 2589 if (i > 9) { 2590 i -= 1; 2591 emitByte(0x66); 2592 } 2593 i -= 3; 2594 emitByte(0x66); 2595 emitByte(0x66); 2596 emitByte(0x90); 2597 } 2598 // 1 - 8 nops 2599 if (i > 4) { 2600 if (i > 6) { 2601 i -= 1; 2602 emitByte(0x66); 2603 } 2604 i -= 3; 2605 emitByte(0x66); 2606 emitByte(0x66); 2607 emitByte(0x90); 2608 } 2609 switch (i) { 2610 case 4: 2611 emitByte(0x66); 2612 emitByte(0x66); 2613 emitByte(0x66); 2614 emitByte(0x90); 2615 break; 2616 case 3: 2617 emitByte(0x66); 2618 emitByte(0x66); 2619 emitByte(0x90); 2620 break; 2621 case 2: 2622 emitByte(0x66); 2623 emitByte(0x90); 2624 break; 2625 case 1: 2626 emitByte(0x90); 2627 break; 2628 default: 2629 assert i == 0; 2630 } 2631 } 2632 orl(Register dst, Register src)2633 public final void orl(Register dst, Register src) { 2634 OR.rmOp.emit(this, DWORD, dst, src); 2635 } 2636 orl(Register dst, int imm32)2637 public final void orl(Register dst, int imm32) { 2638 OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2639 } 2640 pop(Register dst)2641 public final void pop(Register dst) { 2642 int encode = prefixAndEncode(dst.encoding); 2643 emitByte(0x58 | encode); 2644 } 2645 popfq()2646 public void popfq() { 2647 emitByte(0x9D); 2648 } 2649 ptest(Register dst, Register src)2650 public final void ptest(Register dst, Register src) { 2651 assert supports(CPUFeature.SSE4_1); 2652 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2653 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2654 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2655 emitByte(0x17); 2656 emitByte(0xC0 | encode); 2657 } 2658 vptest(Register dst, Register src)2659 public final void vptest(Register dst, Register src) { 2660 assert supports(CPUFeature.AVX); 2661 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2662 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2663 int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2664 emitByte(0x17); 2665 emitByte(0xC0 | encode); 2666 } 2667 pcmpestri(Register dst, AMD64Address src, int imm8)2668 public final void pcmpestri(Register dst, AMD64Address src, int imm8) { 2669 assert supports(CPUFeature.SSE4_2); 2670 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2671 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes); 2672 emitByte(0x61); 2673 emitOperandHelper(dst, src, 0); 2674 emitByte(imm8); 2675 } 2676 pcmpestri(Register dst, Register src, int imm8)2677 public final void pcmpestri(Register dst, Register src, int imm8) { 2678 assert supports(CPUFeature.SSE4_2); 2679 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2680 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes); 2681 emitByte(0x61); 2682 emitByte(0xC0 | encode); 2683 emitByte(imm8); 2684 } 2685 pmovzxbw(Register dst, AMD64Address src)2686 public final void pmovzxbw(Register dst, AMD64Address src) { 2687 assert supports(CPUFeature.SSE4_2); 2688 // XXX legacy_mode should be: _legacy_mode_bw 2689 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target); 2690 attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit); 2691 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2692 emitByte(0x30); 2693 emitOperandHelper(dst, src, 0); 2694 } 2695 vpmovzxbw(Register dst, AMD64Address src, int vectorLen)2696 public final void vpmovzxbw(Register dst, AMD64Address src, int vectorLen) { 2697 assert supports(CPUFeature.AVX); 2698 // XXX legacy_mode should be: _legacy_mode_bw 2699 AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target); 2700 attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit); 2701 vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes); 2702 emitByte(0x30); 2703 emitOperandHelper(dst, src, 0); 2704 } 2705 push(Register src)2706 public final void push(Register src) { 2707 int encode = prefixAndEncode(src.encoding); 2708 emitByte(0x50 | encode); 2709 } 2710 pushfq()2711 public void pushfq() { 2712 emitByte(0x9c); 2713 } 2714 paddd(Register dst, Register src)2715 public final void paddd(Register dst, Register src) { 2716 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2717 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2718 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2719 emitByte(0xFE); 2720 emitByte(0xC0 | encode); 2721 } 2722 paddq(Register dst, Register src)2723 public final void paddq(Register dst, Register src) { 2724 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2725 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2726 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2727 emitByte(0xD4); 2728 emitByte(0xC0 | encode); 2729 } 2730 pextrw(Register dst, Register src, int imm8)2731 public final void pextrw(Register dst, Register src, int imm8) { 2732 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 2733 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2734 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2735 emitByte(0xC5); 2736 emitByte(0xC0 | encode); 2737 emitByte(imm8); 2738 } 2739 pinsrw(Register dst, Register src, int imm8)2740 public final void pinsrw(Register dst, Register src, int imm8) { 2741 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 2742 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2743 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2744 emitByte(0xC4); 2745 emitByte(0xC0 | encode); 2746 emitByte(imm8); 2747 } 2748 por(Register dst, Register src)2749 public final void por(Register dst, Register src) { 2750 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2751 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2752 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2753 emitByte(0xEB); 2754 emitByte(0xC0 | encode); 2755 } 2756 pand(Register dst, Register src)2757 public final void pand(Register dst, Register src) { 2758 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2759 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2760 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2761 emitByte(0xDB); 2762 emitByte(0xC0 | encode); 2763 } 2764 pxor(Register dst, Register src)2765 public final void pxor(Register dst, Register src) { 2766 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2767 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2768 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2769 emitByte(0xEF); 2770 emitByte(0xC0 | encode); 2771 } 2772 vpxor(Register dst, Register nds, Register src)2773 public final void vpxor(Register dst, Register nds, Register src) { 2774 assert supports(CPUFeature.AVX); 2775 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2776 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2777 int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2778 emitByte(0xEF); 2779 emitByte(0xC0 | encode); 2780 } 2781 vpxor(Register dst, Register nds, AMD64Address src)2782 public final void vpxor(Register dst, Register nds, AMD64Address src) { 2783 assert supports(CPUFeature.AVX); 2784 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target); 2785 attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FV, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_32bit); 2786 vexPrefix(src, nds, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2787 emitByte(0xEF); 2788 emitOperandHelper(dst, src, 0); 2789 } 2790 pslld(Register dst, int imm8)2791 public final void pslld(Register dst, int imm8) { 2792 assert isUByte(imm8) : "invalid value"; 2793 assert dst.getRegisterCategory().equals(AMD64.XMM); 2794 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2795 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 2796 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2797 emitByte(0x72); 2798 emitByte(0xC0 | encode); 2799 emitByte(imm8 & 0xFF); 2800 } 2801 psllq(Register dst, Register shift)2802 public final void psllq(Register dst, Register shift) { 2803 assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM); 2804 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2805 int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2806 emitByte(0xF3); 2807 emitByte(0xC0 | encode); 2808 } 2809 psllq(Register dst, int imm8)2810 public final void psllq(Register dst, int imm8) { 2811 assert isUByte(imm8) : "invalid value"; 2812 assert dst.getRegisterCategory().equals(AMD64.XMM); 2813 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2814 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 2815 int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2816 emitByte(0x73); 2817 emitByte(0xC0 | encode); 2818 emitByte(imm8); 2819 } 2820 psrad(Register dst, int imm8)2821 public final void psrad(Register dst, int imm8) { 2822 assert isUByte(imm8) : "invalid value"; 2823 assert dst.getRegisterCategory().equals(AMD64.XMM); 2824 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2825 // XMM4 is for /2 encoding: 66 0F 72 /4 ib 2826 int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2827 emitByte(0x72); 2828 emitByte(0xC0 | encode); 2829 emitByte(imm8); 2830 } 2831 psrld(Register dst, int imm8)2832 public final void psrld(Register dst, int imm8) { 2833 assert isUByte(imm8) : "invalid value"; 2834 assert dst.getRegisterCategory().equals(AMD64.XMM); 2835 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2836 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 2837 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2838 emitByte(0x72); 2839 emitByte(0xC0 | encode); 2840 emitByte(imm8); 2841 } 2842 psrlq(Register dst, int imm8)2843 public final void psrlq(Register dst, int imm8) { 2844 assert isUByte(imm8) : "invalid value"; 2845 assert dst.getRegisterCategory().equals(AMD64.XMM); 2846 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2847 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 2848 int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2849 emitByte(0x73); 2850 emitByte(0xC0 | encode); 2851 emitByte(imm8); 2852 } 2853 psrldq(Register dst, int imm8)2854 public final void psrldq(Register dst, int imm8) { 2855 assert isUByte(imm8) : "invalid value"; 2856 assert dst.getRegisterCategory().equals(AMD64.XMM); 2857 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2858 int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2859 emitByte(0x73); 2860 emitByte(0xC0 | encode); 2861 emitByte(imm8); 2862 } 2863 pshufd(Register dst, Register src, int imm8)2864 public final void pshufd(Register dst, Register src, int imm8) { 2865 assert isUByte(imm8) : "invalid value"; 2866 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2867 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2868 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2869 emitByte(0x70); 2870 emitByte(0xC0 | encode); 2871 emitByte(imm8); 2872 } 2873 psubd(Register dst, Register src)2874 public final void psubd(Register dst, Register src) { 2875 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2876 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2877 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2878 emitByte(0xFA); 2879 emitByte(0xC0 | encode); 2880 } 2881 rcpps(Register dst, Register src)2882 public final void rcpps(Register dst, Register src) { 2883 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2884 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target); 2885 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 2886 emitByte(0x53); 2887 emitByte(0xC0 | encode); 2888 } 2889 ret(int imm16)2890 public final void ret(int imm16) { 2891 if (imm16 == 0) { 2892 emitByte(0xC3); 2893 } else { 2894 emitByte(0xC2); 2895 emitShort(imm16); 2896 } 2897 } 2898 sarl(Register dst, int imm8)2899 public final void sarl(Register dst, int imm8) { 2900 int encode = prefixAndEncode(dst.encoding); 2901 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2902 if (imm8 == 1) { 2903 emitByte(0xD1); 2904 emitByte(0xF8 | encode); 2905 } else { 2906 emitByte(0xC1); 2907 emitByte(0xF8 | encode); 2908 emitByte(imm8); 2909 } 2910 } 2911 shll(Register dst, int imm8)2912 public final void shll(Register dst, int imm8) { 2913 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2914 int encode = prefixAndEncode(dst.encoding); 2915 if (imm8 == 1) { 2916 emitByte(0xD1); 2917 emitByte(0xE0 | encode); 2918 } else { 2919 emitByte(0xC1); 2920 emitByte(0xE0 | encode); 2921 emitByte(imm8); 2922 } 2923 } 2924 shll(Register dst)2925 public final void shll(Register dst) { 2926 int encode = prefixAndEncode(dst.encoding); 2927 emitByte(0xD3); 2928 emitByte(0xE0 | encode); 2929 } 2930 shrl(Register dst, int imm8)2931 public final void shrl(Register dst, int imm8) { 2932 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2933 int encode = prefixAndEncode(dst.encoding); 2934 emitByte(0xC1); 2935 emitByte(0xE8 | encode); 2936 emitByte(imm8); 2937 } 2938 shrl(Register dst)2939 public final void shrl(Register dst) { 2940 int encode = prefixAndEncode(dst.encoding); 2941 emitByte(0xD3); 2942 emitByte(0xE8 | encode); 2943 } 2944 subl(AMD64Address dst, int imm32)2945 public final void subl(AMD64Address dst, int imm32) { 2946 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2947 } 2948 subl(Register dst, int imm32)2949 public final void subl(Register dst, int imm32) { 2950 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 2951 } 2952 subl(Register dst, Register src)2953 public final void subl(Register dst, Register src) { 2954 SUB.rmOp.emit(this, DWORD, dst, src); 2955 } 2956 subpd(Register dst, Register src)2957 public final void subpd(Register dst, Register src) { 2958 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2959 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2960 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 2961 emitByte(0x5C); 2962 emitByte(0xC0 | encode); 2963 } 2964 subsd(Register dst, Register src)2965 public final void subsd(Register dst, Register src) { 2966 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 2967 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2968 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2969 emitByte(0x5C); 2970 emitByte(0xC0 | encode); 2971 } 2972 subsd(Register dst, AMD64Address src)2973 public final void subsd(Register dst, AMD64Address src) { 2974 assert dst.getRegisterCategory().equals(AMD64.XMM); 2975 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 2976 simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 2977 emitByte(0x5C); 2978 emitOperandHelper(dst, src, 0); 2979 } 2980 testl(Register dst, int imm32)2981 public final void testl(Register dst, int imm32) { 2982 // not using emitArith because test 2983 // doesn't support sign-extension of 2984 // 8bit operands 2985 int encode = dst.encoding; 2986 if (encode == 0) { 2987 emitByte(0xA9); 2988 } else { 2989 encode = prefixAndEncode(encode); 2990 emitByte(0xF7); 2991 emitByte(0xC0 | encode); 2992 } 2993 emitInt(imm32); 2994 } 2995 testl(Register dst, Register src)2996 public final void testl(Register dst, Register src) { 2997 int encode = prefixAndEncode(dst.encoding, src.encoding); 2998 emitByte(0x85); 2999 emitByte(0xC0 | encode); 3000 } 3001 testl(Register dst, AMD64Address src)3002 public final void testl(Register dst, AMD64Address src) { 3003 prefix(src, dst); 3004 emitByte(0x85); 3005 emitOperandHelper(dst, src, 0); 3006 } 3007 unpckhpd(Register dst, Register src)3008 public final void unpckhpd(Register dst, Register src) { 3009 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3010 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3011 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3012 emitByte(0x15); 3013 emitByte(0xC0 | encode); 3014 } 3015 unpcklpd(Register dst, Register src)3016 public final void unpcklpd(Register dst, Register src) { 3017 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3018 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3019 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3020 emitByte(0x14); 3021 emitByte(0xC0 | encode); 3022 } 3023 xorl(Register dst, Register src)3024 public final void xorl(Register dst, Register src) { 3025 XOR.rmOp.emit(this, DWORD, dst, src); 3026 } 3027 xorpd(Register dst, Register src)3028 public final void xorpd(Register dst, Register src) { 3029 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3030 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3031 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3032 emitByte(0x57); 3033 emitByte(0xC0 | encode); 3034 } 3035 xorps(Register dst, Register src)3036 public final void xorps(Register dst, Register src) { 3037 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3038 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3039 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 3040 emitByte(0x57); 3041 emitByte(0xC0 | encode); 3042 } 3043 decl(Register dst)3044 protected final void decl(Register dst) { 3045 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3046 int encode = prefixAndEncode(dst.encoding); 3047 emitByte(0xFF); 3048 emitByte(0xC8 | encode); 3049 } 3050 incl(Register dst)3051 protected final void incl(Register dst) { 3052 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3053 int encode = prefixAndEncode(dst.encoding); 3054 emitByte(0xFF); 3055 emitByte(0xC0 | encode); 3056 } 3057 prefixAndEncode(int regEnc)3058 private int prefixAndEncode(int regEnc) { 3059 return prefixAndEncode(regEnc, false); 3060 } 3061 prefixAndEncode(int regEnc, boolean byteinst)3062 private int prefixAndEncode(int regEnc, boolean byteinst) { 3063 if (regEnc >= 8) { 3064 emitByte(Prefix.REXB); 3065 return regEnc - 8; 3066 } else if (byteinst && regEnc >= 4) { 3067 emitByte(Prefix.REX); 3068 } 3069 return regEnc; 3070 } 3071 prefixqAndEncode(int regEnc)3072 private int prefixqAndEncode(int regEnc) { 3073 if (regEnc < 8) { 3074 emitByte(Prefix.REXW); 3075 return regEnc; 3076 } else { 3077 emitByte(Prefix.REXWB); 3078 return regEnc - 8; 3079 } 3080 } 3081 prefixAndEncode(int dstEnc, int srcEnc)3082 private int prefixAndEncode(int dstEnc, int srcEnc) { 3083 return prefixAndEncode(dstEnc, false, srcEnc, false); 3084 } 3085 prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte)3086 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) { 3087 int srcEnc = srcEncoding; 3088 int dstEnc = dstEncoding; 3089 if (dstEnc < 8) { 3090 if (srcEnc >= 8) { 3091 emitByte(Prefix.REXB); 3092 srcEnc -= 8; 3093 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 3094 emitByte(Prefix.REX); 3095 } 3096 } else { 3097 if (srcEnc < 8) { 3098 emitByte(Prefix.REXR); 3099 } else { 3100 emitByte(Prefix.REXRB); 3101 srcEnc -= 8; 3102 } 3103 dstEnc -= 8; 3104 } 3105 return dstEnc << 3 | srcEnc; 3106 } 3107 3108 /** 3109 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand 3110 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix. 3111 * 3112 * @param regEncoding the encoding of the register part of the ModRM-Byte 3113 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte 3114 * @return the lower 6 bits of the ModRM-Byte that should be emitted 3115 */ prefixqAndEncode(int regEncoding, int rmEncoding)3116 private int prefixqAndEncode(int regEncoding, int rmEncoding) { 3117 int rmEnc = rmEncoding; 3118 int regEnc = regEncoding; 3119 if (regEnc < 8) { 3120 if (rmEnc < 8) { 3121 emitByte(Prefix.REXW); 3122 } else { 3123 emitByte(Prefix.REXWB); 3124 rmEnc -= 8; 3125 } 3126 } else { 3127 if (rmEnc < 8) { 3128 emitByte(Prefix.REXWR); 3129 } else { 3130 emitByte(Prefix.REXWRB); 3131 rmEnc -= 8; 3132 } 3133 regEnc -= 8; 3134 } 3135 return regEnc << 3 | rmEnc; 3136 } 3137 vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes)3138 private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) { 3139 int vectorLen = attributes.getVectorLen(); 3140 boolean vexW = attributes.isRexVexW(); 3141 boolean isXorB = ((rxb & 0x3) > 0); 3142 if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) { 3143 emitByte(Prefix.VEX_3BYTES); 3144 3145 int byte1 = (rxb << 5); 3146 byte1 = ((~byte1) & 0xE0) | opc; 3147 emitByte(byte1); 3148 3149 int byte2 = ((~ndsEncoding) & 0xf) << 3; 3150 byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre; 3151 emitByte(byte2); 3152 } else { 3153 emitByte(Prefix.VEX_2BYTES); 3154 3155 int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0; 3156 byte1 = (~byte1) & 0x80; 3157 byte1 |= ((~ndsEncoding) & 0xf) << 3; 3158 byte1 |= ((vectorLen > 0) ? 4 : 0) | pre; 3159 emitByte(byte1); 3160 } 3161 } 3162 vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3163 private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 3164 int rxb = getRXB(src, adr); 3165 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 3166 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 3167 setCurAttributes(attributes); 3168 } 3169 vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3170 private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 3171 int rxb = getRXB(dst, src); 3172 int ndsEncoding = nds.isValid() ? nds.encoding : 0; 3173 vexPrefix(rxb, ndsEncoding, pre, opc, attributes); 3174 // return modrm byte components for operands 3175 return (((dst.encoding & 7) << 3) | (src.encoding & 7)); 3176 } 3177 simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes)3178 private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) { 3179 if (supports(CPUFeature.AVX)) { 3180 vexPrefix(adr, nds, xreg, pre, opc, attributes); 3181 } else { 3182 switch (pre) { 3183 case VexSimdPrefix.VEX_SIMD_66: 3184 emitByte(0x66); 3185 break; 3186 case VexSimdPrefix.VEX_SIMD_F2: 3187 emitByte(0xF2); 3188 break; 3189 case VexSimdPrefix.VEX_SIMD_F3: 3190 emitByte(0xF3); 3191 break; 3192 } 3193 if (attributes.isRexVexW()) { 3194 prefixq(adr, xreg); 3195 } else { 3196 prefix(adr, xreg); 3197 } 3198 switch (opc) { 3199 case VexOpcode.VEX_OPCODE_0F: 3200 emitByte(0x0F); 3201 break; 3202 case VexOpcode.VEX_OPCODE_0F_38: 3203 emitByte(0x0F); 3204 emitByte(0x38); 3205 break; 3206 case VexOpcode.VEX_OPCODE_0F_3A: 3207 emitByte(0x0F); 3208 emitByte(0x3A); 3209 break; 3210 } 3211 } 3212 } 3213 simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes)3214 private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) { 3215 if (supports(CPUFeature.AVX)) { 3216 return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes); 3217 } else { 3218 switch (pre) { 3219 case VexSimdPrefix.VEX_SIMD_66: 3220 emitByte(0x66); 3221 break; 3222 case VexSimdPrefix.VEX_SIMD_F2: 3223 emitByte(0xF2); 3224 break; 3225 case VexSimdPrefix.VEX_SIMD_F3: 3226 emitByte(0xF3); 3227 break; 3228 } 3229 int encode; 3230 int dstEncoding = dst.encoding; 3231 int srcEncoding = src.encoding; 3232 if (attributes.isRexVexW()) { 3233 encode = prefixqAndEncode(dstEncoding, srcEncoding); 3234 } else { 3235 encode = prefixAndEncode(dstEncoding, srcEncoding); 3236 } 3237 switch (opc) { 3238 case VexOpcode.VEX_OPCODE_0F: 3239 emitByte(0x0F); 3240 break; 3241 case VexOpcode.VEX_OPCODE_0F_38: 3242 emitByte(0x0F); 3243 emitByte(0x38); 3244 break; 3245 case VexOpcode.VEX_OPCODE_0F_3A: 3246 emitByte(0x0F); 3247 emitByte(0x3A); 3248 break; 3249 } 3250 return encode; 3251 } 3252 } 3253 needsRex(Register reg)3254 private static boolean needsRex(Register reg) { 3255 return reg.encoding >= MinEncodingNeedsRex; 3256 } 3257 prefix(AMD64Address adr)3258 private void prefix(AMD64Address adr) { 3259 if (needsRex(adr.getBase())) { 3260 if (needsRex(adr.getIndex())) { 3261 emitByte(Prefix.REXXB); 3262 } else { 3263 emitByte(Prefix.REXB); 3264 } 3265 } else { 3266 if (needsRex(adr.getIndex())) { 3267 emitByte(Prefix.REXX); 3268 } 3269 } 3270 } 3271 prefixq(AMD64Address adr)3272 private void prefixq(AMD64Address adr) { 3273 if (needsRex(adr.getBase())) { 3274 if (needsRex(adr.getIndex())) { 3275 emitByte(Prefix.REXWXB); 3276 } else { 3277 emitByte(Prefix.REXWB); 3278 } 3279 } else { 3280 if (needsRex(adr.getIndex())) { 3281 emitByte(Prefix.REXWX); 3282 } else { 3283 emitByte(Prefix.REXW); 3284 } 3285 } 3286 } 3287 prefixb(AMD64Address adr, Register reg)3288 private void prefixb(AMD64Address adr, Register reg) { 3289 prefix(adr, reg, true); 3290 } 3291 prefix(AMD64Address adr, Register reg)3292 private void prefix(AMD64Address adr, Register reg) { 3293 prefix(adr, reg, false); 3294 } 3295 prefix(AMD64Address adr, Register reg, boolean byteinst)3296 private void prefix(AMD64Address adr, Register reg, boolean byteinst) { 3297 if (reg.encoding < 8) { 3298 if (needsRex(adr.getBase())) { 3299 if (needsRex(adr.getIndex())) { 3300 emitByte(Prefix.REXXB); 3301 } else { 3302 emitByte(Prefix.REXB); 3303 } 3304 } else { 3305 if (needsRex(adr.getIndex())) { 3306 emitByte(Prefix.REXX); 3307 } else if (byteinst && reg.encoding >= 4) { 3308 emitByte(Prefix.REX); 3309 } 3310 } 3311 } else { 3312 if (needsRex(adr.getBase())) { 3313 if (needsRex(adr.getIndex())) { 3314 emitByte(Prefix.REXRXB); 3315 } else { 3316 emitByte(Prefix.REXRB); 3317 } 3318 } else { 3319 if (needsRex(adr.getIndex())) { 3320 emitByte(Prefix.REXRX); 3321 } else { 3322 emitByte(Prefix.REXR); 3323 } 3324 } 3325 } 3326 } 3327 prefixq(AMD64Address adr, Register src)3328 private void prefixq(AMD64Address adr, Register src) { 3329 if (src.encoding < 8) { 3330 if (needsRex(adr.getBase())) { 3331 if (needsRex(adr.getIndex())) { 3332 emitByte(Prefix.REXWXB); 3333 } else { 3334 emitByte(Prefix.REXWB); 3335 } 3336 } else { 3337 if (needsRex(adr.getIndex())) { 3338 emitByte(Prefix.REXWX); 3339 } else { 3340 emitByte(Prefix.REXW); 3341 } 3342 } 3343 } else { 3344 if (needsRex(adr.getBase())) { 3345 if (needsRex(adr.getIndex())) { 3346 emitByte(Prefix.REXWRXB); 3347 } else { 3348 emitByte(Prefix.REXWRB); 3349 } 3350 } else { 3351 if (needsRex(adr.getIndex())) { 3352 emitByte(Prefix.REXWRX); 3353 } else { 3354 emitByte(Prefix.REXWR); 3355 } 3356 } 3357 } 3358 } 3359 addq(Register dst, int imm32)3360 public final void addq(Register dst, int imm32) { 3361 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3362 } 3363 addq(AMD64Address dst, int imm32)3364 public final void addq(AMD64Address dst, int imm32) { 3365 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3366 } 3367 addq(Register dst, Register src)3368 public final void addq(Register dst, Register src) { 3369 ADD.rmOp.emit(this, QWORD, dst, src); 3370 } 3371 addq(AMD64Address dst, Register src)3372 public final void addq(AMD64Address dst, Register src) { 3373 ADD.mrOp.emit(this, QWORD, dst, src); 3374 } 3375 andq(Register dst, int imm32)3376 public final void andq(Register dst, int imm32) { 3377 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3378 } 3379 bsrq(Register dst, Register src)3380 public final void bsrq(Register dst, Register src) { 3381 int encode = prefixqAndEncode(dst.encoding(), src.encoding()); 3382 emitByte(0x0F); 3383 emitByte(0xBD); 3384 emitByte(0xC0 | encode); 3385 } 3386 bswapq(Register reg)3387 public final void bswapq(Register reg) { 3388 int encode = prefixqAndEncode(reg.encoding); 3389 emitByte(0x0F); 3390 emitByte(0xC8 | encode); 3391 } 3392 cdqq()3393 public final void cdqq() { 3394 emitByte(Prefix.REXW); 3395 emitByte(0x99); 3396 } 3397 cmovq(ConditionFlag cc, Register dst, Register src)3398 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 3399 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3400 emitByte(0x0F); 3401 emitByte(0x40 | cc.getValue()); 3402 emitByte(0xC0 | encode); 3403 } 3404 setb(ConditionFlag cc, Register dst)3405 public final void setb(ConditionFlag cc, Register dst) { 3406 int encode = prefixAndEncode(dst.encoding, true); 3407 emitByte(0x0F); 3408 emitByte(0x90 | cc.getValue()); 3409 emitByte(0xC0 | encode); 3410 } 3411 cmovq(ConditionFlag cc, Register dst, AMD64Address src)3412 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 3413 prefixq(src, dst); 3414 emitByte(0x0F); 3415 emitByte(0x40 | cc.getValue()); 3416 emitOperandHelper(dst, src, 0); 3417 } 3418 cmpq(Register dst, int imm32)3419 public final void cmpq(Register dst, int imm32) { 3420 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3421 } 3422 cmpq(Register dst, Register src)3423 public final void cmpq(Register dst, Register src) { 3424 CMP.rmOp.emit(this, QWORD, dst, src); 3425 } 3426 cmpq(Register dst, AMD64Address src)3427 public final void cmpq(Register dst, AMD64Address src) { 3428 CMP.rmOp.emit(this, QWORD, dst, src); 3429 } 3430 cmpxchgq(Register reg, AMD64Address adr)3431 public final void cmpxchgq(Register reg, AMD64Address adr) { 3432 prefixq(adr, reg); 3433 emitByte(0x0F); 3434 emitByte(0xB1); 3435 emitOperandHelper(reg, adr, 0); 3436 } 3437 cvtdq2pd(Register dst, Register src)3438 public final void cvtdq2pd(Register dst, Register src) { 3439 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3440 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3441 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3442 emitByte(0xE6); 3443 emitByte(0xC0 | encode); 3444 } 3445 cvtsi2sdq(Register dst, Register src)3446 public final void cvtsi2sdq(Register dst, Register src) { 3447 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU); 3448 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3449 int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3450 emitByte(0x2A); 3451 emitByte(0xC0 | encode); 3452 } 3453 cvttsd2siq(Register dst, Register src)3454 public final void cvttsd2siq(Register dst, Register src) { 3455 assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM); 3456 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3457 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3458 emitByte(0x2C); 3459 emitByte(0xC0 | encode); 3460 } 3461 cvttpd2dq(Register dst, Register src)3462 public final void cvttpd2dq(Register dst, Register src) { 3463 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 3464 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3465 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3466 emitByte(0xE6); 3467 emitByte(0xC0 | encode); 3468 } 3469 decq(Register dst)3470 protected final void decq(Register dst) { 3471 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3472 int encode = prefixqAndEncode(dst.encoding); 3473 emitByte(0xFF); 3474 emitByte(0xC8 | encode); 3475 } 3476 decq(AMD64Address dst)3477 public final void decq(AMD64Address dst) { 3478 DEC.emit(this, QWORD, dst); 3479 } 3480 imulq(Register dst, Register src)3481 public final void imulq(Register dst, Register src) { 3482 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3483 emitByte(0x0F); 3484 emitByte(0xAF); 3485 emitByte(0xC0 | encode); 3486 } 3487 incq(Register dst)3488 public final void incq(Register dst) { 3489 // Don't use it directly. Use Macroincrementq() instead. 3490 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3491 int encode = prefixqAndEncode(dst.encoding); 3492 emitByte(0xFF); 3493 emitByte(0xC0 | encode); 3494 } 3495 incq(AMD64Address dst)3496 public final void incq(AMD64Address dst) { 3497 INC.emit(this, QWORD, dst); 3498 } 3499 movq(Register dst, long imm64)3500 public final void movq(Register dst, long imm64) { 3501 int encode = prefixqAndEncode(dst.encoding); 3502 emitByte(0xB8 | encode); 3503 emitLong(imm64); 3504 } 3505 movslq(Register dst, int imm32)3506 public final void movslq(Register dst, int imm32) { 3507 int encode = prefixqAndEncode(dst.encoding); 3508 emitByte(0xC7); 3509 emitByte(0xC0 | encode); 3510 emitInt(imm32); 3511 } 3512 movdq(Register dst, AMD64Address src)3513 public final void movdq(Register dst, AMD64Address src) { 3514 assert dst.getRegisterCategory().equals(AMD64.XMM); 3515 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3516 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3517 emitByte(0x6E); 3518 emitOperandHelper(dst, src, 0); 3519 } 3520 movdq(AMD64Address dst, Register src)3521 public final void movdq(AMD64Address dst, Register src) { 3522 assert src.getRegisterCategory().equals(AMD64.XMM); 3523 // swap src/dst to get correct prefix 3524 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3525 simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3526 emitByte(0x7E); 3527 emitOperandHelper(src, dst, 0); 3528 } 3529 movdq(Register dst, Register src)3530 public final void movdq(Register dst, Register src) { 3531 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3532 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3533 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3534 emitByte(0x6E); 3535 emitByte(0xC0 | encode); 3536 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3537 // swap src/dst to get correct prefix 3538 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3539 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3540 emitByte(0x7E); 3541 emitByte(0xC0 | encode); 3542 } else { 3543 throw new InternalError("should not reach here"); 3544 } 3545 } 3546 movdl(Register dst, Register src)3547 public final void movdl(Register dst, Register src) { 3548 if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) { 3549 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3550 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3551 emitByte(0x6E); 3552 emitByte(0xC0 | encode); 3553 } else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) { 3554 // swap src/dst to get correct prefix 3555 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3556 int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3557 emitByte(0x7E); 3558 emitByte(0xC0 | encode); 3559 } else { 3560 throw new InternalError("should not reach here"); 3561 } 3562 } 3563 movdl(Register dst, AMD64Address src)3564 public final void movdl(Register dst, AMD64Address src) { 3565 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3566 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes); 3567 emitByte(0x6E); 3568 emitOperandHelper(dst, src, 0); 3569 } 3570 movddup(Register dst, Register src)3571 public final void movddup(Register dst, Register src) { 3572 assert supports(CPUFeature.SSE3); 3573 assert dst.getRegisterCategory().equals(AMD64.XMM); 3574 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3575 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes); 3576 emitByte(0x12); 3577 emitByte(0xC0 | encode); 3578 } 3579 movdqu(Register dst, AMD64Address src)3580 public final void movdqu(Register dst, AMD64Address src) { 3581 assert dst.getRegisterCategory().equals(AMD64.XMM); 3582 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3583 simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3584 emitByte(0x6F); 3585 emitOperandHelper(dst, src, 0); 3586 } 3587 movdqu(Register dst, Register src)3588 public final void movdqu(Register dst, Register src) { 3589 assert dst.getRegisterCategory().equals(AMD64.XMM); 3590 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3591 int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3592 emitByte(0x6F); 3593 emitByte(0xC0 | encode); 3594 } 3595 vmovdqu(Register dst, AMD64Address src)3596 public final void vmovdqu(Register dst, AMD64Address src) { 3597 assert supports(CPUFeature.AVX); 3598 assert dst.getRegisterCategory().equals(AMD64.XMM); 3599 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3600 vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes); 3601 emitByte(0x6F); 3602 emitOperandHelper(dst, src, 0); 3603 } 3604 vzeroupper()3605 public final void vzeroupper() { 3606 assert supports(CPUFeature.AVX); 3607 AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target); 3608 vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes); 3609 emitByte(0x77); 3610 } 3611 movslq(AMD64Address dst, int imm32)3612 public final void movslq(AMD64Address dst, int imm32) { 3613 prefixq(dst); 3614 emitByte(0xC7); 3615 emitOperandHelper(0, dst, 4); 3616 emitInt(imm32); 3617 } 3618 movslq(Register dst, AMD64Address src)3619 public final void movslq(Register dst, AMD64Address src) { 3620 prefixq(src, dst); 3621 emitByte(0x63); 3622 emitOperandHelper(dst, src, 0); 3623 } 3624 movslq(Register dst, Register src)3625 public final void movslq(Register dst, Register src) { 3626 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3627 emitByte(0x63); 3628 emitByte(0xC0 | encode); 3629 } 3630 negq(Register dst)3631 public final void negq(Register dst) { 3632 int encode = prefixqAndEncode(dst.encoding); 3633 emitByte(0xF7); 3634 emitByte(0xD8 | encode); 3635 } 3636 orq(Register dst, Register src)3637 public final void orq(Register dst, Register src) { 3638 OR.rmOp.emit(this, QWORD, dst, src); 3639 } 3640 shlq(Register dst, int imm8)3641 public final void shlq(Register dst, int imm8) { 3642 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3643 int encode = prefixqAndEncode(dst.encoding); 3644 if (imm8 == 1) { 3645 emitByte(0xD1); 3646 emitByte(0xE0 | encode); 3647 } else { 3648 emitByte(0xC1); 3649 emitByte(0xE0 | encode); 3650 emitByte(imm8); 3651 } 3652 } 3653 shlq(Register dst)3654 public final void shlq(Register dst) { 3655 int encode = prefixqAndEncode(dst.encoding); 3656 emitByte(0xD3); 3657 emitByte(0xE0 | encode); 3658 } 3659 shrq(Register dst, int imm8)3660 public final void shrq(Register dst, int imm8) { 3661 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 3662 int encode = prefixqAndEncode(dst.encoding); 3663 if (imm8 == 1) { 3664 emitByte(0xD1); 3665 emitByte(0xE8 | encode); 3666 } else { 3667 emitByte(0xC1); 3668 emitByte(0xE8 | encode); 3669 emitByte(imm8); 3670 } 3671 } 3672 shrq(Register dst)3673 public final void shrq(Register dst) { 3674 int encode = prefixqAndEncode(dst.encoding); 3675 emitByte(0xD3); 3676 emitByte(0xE8 | encode); 3677 } 3678 sbbq(Register dst, Register src)3679 public final void sbbq(Register dst, Register src) { 3680 SBB.rmOp.emit(this, QWORD, dst, src); 3681 } 3682 subq(Register dst, int imm32)3683 public final void subq(Register dst, int imm32) { 3684 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3685 } 3686 subq(AMD64Address dst, int imm32)3687 public final void subq(AMD64Address dst, int imm32) { 3688 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 3689 } 3690 subqWide(Register dst, int imm32)3691 public final void subqWide(Register dst, int imm32) { 3692 // don't use the sign-extending version, forcing a 32-bit immediate 3693 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 3694 } 3695 subq(Register dst, Register src)3696 public final void subq(Register dst, Register src) { 3697 SUB.rmOp.emit(this, QWORD, dst, src); 3698 } 3699 testq(Register dst, Register src)3700 public final void testq(Register dst, Register src) { 3701 int encode = prefixqAndEncode(dst.encoding, src.encoding); 3702 emitByte(0x85); 3703 emitByte(0xC0 | encode); 3704 } 3705 btrq(Register src, int imm8)3706 public final void btrq(Register src, int imm8) { 3707 int encode = prefixqAndEncode(src.encoding); 3708 emitByte(0x0F); 3709 emitByte(0xBA); 3710 emitByte(0xF0 | encode); 3711 emitByte(imm8); 3712 } 3713 xaddb(AMD64Address dst, Register src)3714 public final void xaddb(AMD64Address dst, Register src) { 3715 prefixb(dst, src); 3716 emitByte(0x0F); 3717 emitByte(0xC0); 3718 emitOperandHelper(src, dst, 0); 3719 } 3720 xaddw(AMD64Address dst, Register src)3721 public final void xaddw(AMD64Address dst, Register src) { 3722 emitByte(0x66); // Switch to 16-bit mode. 3723 prefix(dst, src); 3724 emitByte(0x0F); 3725 emitByte(0xC1); 3726 emitOperandHelper(src, dst, 0); 3727 } 3728 xaddl(AMD64Address dst, Register src)3729 public final void xaddl(AMD64Address dst, Register src) { 3730 prefix(dst, src); 3731 emitByte(0x0F); 3732 emitByte(0xC1); 3733 emitOperandHelper(src, dst, 0); 3734 } 3735 xaddq(AMD64Address dst, Register src)3736 public final void xaddq(AMD64Address dst, Register src) { 3737 prefixq(dst, src); 3738 emitByte(0x0F); 3739 emitByte(0xC1); 3740 emitOperandHelper(src, dst, 0); 3741 } 3742 xchgb(Register dst, AMD64Address src)3743 public final void xchgb(Register dst, AMD64Address src) { 3744 prefixb(src, dst); 3745 emitByte(0x86); 3746 emitOperandHelper(dst, src, 0); 3747 } 3748 xchgw(Register dst, AMD64Address src)3749 public final void xchgw(Register dst, AMD64Address src) { 3750 emitByte(0x66); 3751 prefix(src, dst); 3752 emitByte(0x87); 3753 emitOperandHelper(dst, src, 0); 3754 } 3755 xchgl(Register dst, AMD64Address src)3756 public final void xchgl(Register dst, AMD64Address src) { 3757 prefix(src, dst); 3758 emitByte(0x87); 3759 emitOperandHelper(dst, src, 0); 3760 } 3761 xchgq(Register dst, AMD64Address src)3762 public final void xchgq(Register dst, AMD64Address src) { 3763 prefixq(src, dst); 3764 emitByte(0x87); 3765 emitOperandHelper(dst, src, 0); 3766 } 3767 membar(int barriers)3768 public final void membar(int barriers) { 3769 if (target.isMP) { 3770 // We only have to handle StoreLoad 3771 if ((barriers & STORE_LOAD) != 0) { 3772 // All usable chips support "locked" instructions which suffice 3773 // as barriers, and are much faster than the alternative of 3774 // using cpuid instruction. We use here a locked add [rsp],0. 3775 // This is conveniently otherwise a no-op except for blowing 3776 // flags. 3777 // Any change to this code may need to revisit other places in 3778 // the code where this idiom is used, in particular the 3779 // orderAccess code. 3780 lock(); 3781 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here 3782 } 3783 } 3784 } 3785 3786 @Override patchJumpTarget(int branch, int branchTarget)3787 protected final void patchJumpTarget(int branch, int branchTarget) { 3788 int op = getByte(branch); 3789 assert op == 0xE8 // call 3790 || 3791 op == 0x00 // jump table entry 3792 || op == 0xE9 // jmp 3793 || op == 0xEB // short jmp 3794 || (op & 0xF0) == 0x70 // short jcc 3795 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 3796 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 3797 3798 if (op == 0x00) { 3799 int offsetToJumpTableBase = getShort(branch + 1); 3800 int jumpTableBase = branch - offsetToJumpTableBase; 3801 int imm32 = branchTarget - jumpTableBase; 3802 emitInt(imm32, branch); 3803 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 3804 3805 // short offset operators (jmp and jcc) 3806 final int imm8 = branchTarget - (branch + 2); 3807 /* 3808 * Since a wrongly patched short branch can potentially lead to working but really bad 3809 * behaving code we should always fail with an exception instead of having an assert. 3810 */ 3811 if (!NumUtil.isByte(imm8)) { 3812 throw new InternalError("branch displacement out of range: " + imm8); 3813 } 3814 emitByte(imm8, branch + 1); 3815 3816 } else { 3817 3818 int off = 1; 3819 if (op == 0x0F) { 3820 off = 2; 3821 } 3822 3823 int imm32 = branchTarget - (branch + 4 + off); 3824 emitInt(imm32, branch + off); 3825 } 3826 } 3827 nullCheck(AMD64Address address)3828 public void nullCheck(AMD64Address address) { 3829 testl(AMD64.rax, address); 3830 } 3831 3832 @Override align(int modulus)3833 public void align(int modulus) { 3834 if (position() % modulus != 0) { 3835 nop(modulus - (position() % modulus)); 3836 } 3837 } 3838 3839 /** 3840 * Emits a direct call instruction. Note that the actual call target is not specified, because 3841 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 3842 * responsible to add the call address to the appropriate patching tables. 3843 */ call()3844 public final void call() { 3845 if (codePatchingAnnotationConsumer != null) { 3846 int pos = position(); 3847 codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5)); 3848 } 3849 emitByte(0xE8); 3850 emitInt(0); 3851 } 3852 call(Register src)3853 public final void call(Register src) { 3854 int encode = prefixAndEncode(src.encoding); 3855 emitByte(0xFF); 3856 emitByte(0xD0 | encode); 3857 } 3858 int3()3859 public final void int3() { 3860 emitByte(0xCC); 3861 } 3862 pause()3863 public final void pause() { 3864 emitByte(0xF3); 3865 emitByte(0x90); 3866 } 3867 emitx87(int b1, int b2, int i)3868 private void emitx87(int b1, int b2, int i) { 3869 assert 0 <= i && i < 8 : "illegal stack offset"; 3870 emitByte(b1); 3871 emitByte(b2 + i); 3872 } 3873 3874 public final void fldd(AMD64Address src) { 3875 emitByte(0xDD); 3876 emitOperandHelper(0, src, 0); 3877 } 3878 3879 public final void flds(AMD64Address src) { 3880 emitByte(0xD9); 3881 emitOperandHelper(0, src, 0); 3882 } 3883 3884 public final void fldln2() { 3885 emitByte(0xD9); 3886 emitByte(0xED); 3887 } 3888 3889 public final void fldlg2() { 3890 emitByte(0xD9); 3891 emitByte(0xEC); 3892 } 3893 3894 public final void fyl2x() { 3895 emitByte(0xD9); 3896 emitByte(0xF1); 3897 } 3898 3899 public final void fstps(AMD64Address src) { 3900 emitByte(0xD9); 3901 emitOperandHelper(3, src, 0); 3902 } 3903 3904 public final void fstpd(AMD64Address src) { 3905 emitByte(0xDD); 3906 emitOperandHelper(3, src, 0); 3907 } 3908 3909 private void emitFPUArith(int b1, int b2, int i) { 3910 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 3911 emitByte(b1); 3912 emitByte(b2 + i); 3913 } 3914 3915 public void ffree(int i) { 3916 emitFPUArith(0xDD, 0xC0, i); 3917 } 3918 3919 public void fincstp() { 3920 emitByte(0xD9); 3921 emitByte(0xF7); 3922 } 3923 3924 public void fxch(int i) { 3925 emitFPUArith(0xD9, 0xC8, i); 3926 } 3927 3928 public void fnstswAX() { 3929 emitByte(0xDF); 3930 emitByte(0xE0); 3931 } 3932 3933 public void fwait() { 3934 emitByte(0x9B); 3935 } 3936 3937 public void fprem() { 3938 emitByte(0xD9); 3939 emitByte(0xF8); 3940 } 3941 3942 public final void fsin() { 3943 emitByte(0xD9); 3944 emitByte(0xFE); 3945 } 3946 3947 public final void fcos() { 3948 emitByte(0xD9); 3949 emitByte(0xFF); 3950 } 3951 3952 public final void fptan() { 3953 emitByte(0xD9); 3954 emitByte(0xF2); 3955 } 3956 3957 public final void fstp(int i) { 3958 emitx87(0xDD, 0xD8, i); 3959 } 3960 3961 @Override 3962 public AMD64Address makeAddress(Register base, int displacement) { 3963 return new AMD64Address(base, displacement); 3964 } 3965 3966 @Override 3967 public AMD64Address getPlaceholder(int instructionStartPosition) { 3968 return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition); 3969 } 3970 3971 private void prefetchPrefix(AMD64Address src) { 3972 prefix(src); 3973 emitByte(0x0F); 3974 } 3975 3976 public void prefetchnta(AMD64Address src) { 3977 prefetchPrefix(src); 3978 emitByte(0x18); 3979 emitOperandHelper(0, src, 0); 3980 } 3981 3982 void prefetchr(AMD64Address src) { 3983 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 3984 prefetchPrefix(src); 3985 emitByte(0x0D); 3986 emitOperandHelper(0, src, 0); 3987 } 3988 3989 public void prefetcht0(AMD64Address src) { 3990 assert supports(CPUFeature.SSE); 3991 prefetchPrefix(src); 3992 emitByte(0x18); 3993 emitOperandHelper(1, src, 0); 3994 } 3995 3996 public void prefetcht1(AMD64Address src) { 3997 assert supports(CPUFeature.SSE); 3998 prefetchPrefix(src); 3999 emitByte(0x18); 4000 emitOperandHelper(2, src, 0); 4001 } 4002 4003 public void prefetcht2(AMD64Address src) { 4004 assert supports(CPUFeature.SSE); 4005 prefix(src); 4006 emitByte(0x0f); 4007 emitByte(0x18); 4008 emitOperandHelper(3, src, 0); 4009 } 4010 4011 public void prefetchw(AMD64Address src) { 4012 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 4013 prefix(src); 4014 emitByte(0x0f); 4015 emitByte(0x0D); 4016 emitOperandHelper(1, src, 0); 4017 } 4018 4019 public void rdtsc() { 4020 emitByte(0x0F); 4021 emitByte(0x31); 4022 } 4023 4024 /** 4025 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 4026 * to crash the program (debugging etc.). 4027 */ 4028 public void illegal() { 4029 emitByte(0x0f); 4030 emitByte(0x0b); 4031 } 4032 4033 public void lfence() { 4034 emitByte(0x0f); 4035 emitByte(0xae); 4036 emitByte(0xe8); 4037 4038 } 4039 } 4040