1 /* 2 ** Definitions for x86 and x64 CPUs. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #ifndef _LJ_TARGET_X86_H 7 #define _LJ_TARGET_X86_H 8 9 /* -- Registers IDs ------------------------------------------------------- */ 10 11 #if LJ_64 12 #define GPRDEF(_) \ 13 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ 14 _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) 15 #define FPRDEF(_) \ 16 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ 17 _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) 18 #else 19 #define GPRDEF(_) \ 20 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) 21 #define FPRDEF(_) \ 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) 23 #endif 24 #define VRIDDEF(_) \ 25 _(MRM) 26 27 #define RIDENUM(name) RID_##name, 28 29 enum { 30 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ 32 RID_MAX, 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 34 35 /* Calling conventions. */ 36 RID_SP = RID_ESP, 37 RID_RET = RID_EAX, 38 #if LJ_64 39 RID_FPRET = RID_XMM0, 40 #else 41 RID_RETLO = RID_EAX, 42 RID_RETHI = RID_EDX, 43 #endif 44 45 /* These definitions must match with the *.dasc file(s): */ 46 RID_BASE = RID_EDX, /* Interpreter BASE. */ 47 #if LJ_64 && !LJ_ABI_WIN 48 RID_LPC = RID_EBX, /* Interpreter PC. */ 49 RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */ 50 #else 51 RID_LPC = RID_ESI, /* Interpreter PC. */ 52 RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ 53 #endif 54 55 /* Register ranges [min, max) and number of registers. */ 56 RID_MIN_GPR = RID_EAX, 57 RID_MIN_FPR = RID_XMM0, 58 RID_MAX_GPR = RID_MIN_FPR, 59 RID_MAX_FPR = RID_MAX, 60 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, 61 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, 62 }; 63 64 /* -- Register sets ------------------------------------------------------- */ 65 66 /* Make use of all registers, except the stack pointer. */ 67 #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) 68 #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) 69 #define RSET_ALL (RSET_GPR|RSET_FPR) 70 #define RSET_INIT RSET_ALL 71 72 #if LJ_64 73 /* Note: this requires the use of FORCE_REX! */ 74 #define RSET_GPR8 RSET_GPR 75 #else 76 #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) 77 #endif 78 79 /* ABI-specific register sets. */ 80 #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) 81 #if LJ_64 82 #if LJ_ABI_WIN 83 /* Windows x64 ABI. */ 84 #define RSET_SCRATCH \ 85 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) 86 #define REGARG_GPRS \ 87 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) 88 #define REGARG_NUMGPR 4 89 #define REGARG_NUMFPR 4 90 #define REGARG_FIRSTFPR RID_XMM0 91 #define REGARG_LASTFPR RID_XMM3 92 #define STACKARG_OFS (4*8) 93 #else 94 /* The rest of the civilized x64 world has a common ABI. */ 95 #define RSET_SCRATCH \ 96 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) 97 #define REGARG_GPRS \ 98 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ 99 <<5))<<5))<<5))<<5))<<5)) 100 #define REGARG_NUMGPR 6 101 #define REGARG_NUMFPR 8 102 #define REGARG_FIRSTFPR RID_XMM0 103 #define REGARG_LASTFPR RID_XMM7 104 #define STACKARG_OFS 0 105 #endif 106 #else 107 /* Common x86 ABI. */ 108 #define RSET_SCRATCH (RSET_ACD|RSET_FPR) 109 #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ 110 #define REGARG_NUMGPR 2 /* Fastcall only. */ 111 #define REGARG_NUMFPR 0 112 #define STACKARG_OFS 0 113 #endif 114 115 #if LJ_64 116 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ 117 #undef rset_picktop 118 #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) 119 #endif 120 121 /* -- Spill slots --------------------------------------------------------- */ 122 123 /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. 124 ** 125 ** SPS_FIXED: Available fixed spill slots in interpreter frame. 126 ** This definition must match with the *.dasc file(s). 127 ** 128 ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. 129 */ 130 #if LJ_64 131 #if LJ_ABI_WIN 132 #define SPS_FIXED (4*2) 133 #define SPS_FIRST (4*2) /* Don't use callee register save area. */ 134 #else 135 #if LJ_GC64 136 #define SPS_FIXED 2 137 #else 138 #define SPS_FIXED 4 139 #endif 140 #define SPS_FIRST 2 141 #endif 142 #else 143 #define SPS_FIXED 6 144 #define SPS_FIRST 2 145 #endif 146 147 #define SPOFS_TMP 0 148 149 #define sps_scale(slot) (4 * (int32_t)(slot)) 150 #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) 151 152 /* -- Exit state ---------------------------------------------------------- */ 153 154 /* This definition must match with the *.dasc file(s). */ 155 typedef struct { 156 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 157 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 158 int32_t spill[256]; /* Spill slots. */ 159 } ExitState; 160 161 /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ 162 #define EXITSTUB_SPACING (2+2) 163 #define EXITSTUBS_PER_GROUP 32 164 165 /* -- x86 ModRM operand encoding ------------------------------------------ */ 166 167 typedef enum { 168 XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, 169 XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, 170 XM_MASK = 0xc0 171 } x86Mode; 172 173 /* Structure to hold variable ModRM operand. */ 174 typedef struct { 175 int32_t ofs; /* Offset. */ 176 uint8_t base; /* Base register or RID_NONE. */ 177 uint8_t idx; /* Index register or RID_NONE. */ 178 uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */ 179 } x86ModRM; 180 181 /* -- Opcodes ------------------------------------------------------------- */ 182 183 /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ 184 #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24))) 185 #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) 186 #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24))) 187 #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24))) 188 #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24))) 189 #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) 190 #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) 191 192 /* This list of x86 opcodes is not intended to be complete. Opcodes are only 193 ** included when needed. Take a look at DynASM or jit.dis_x86 to see the 194 ** whole mess. 195 */ 196 typedef enum { 197 /* Fixed length opcodes. XI_* prefix. */ 198 XI_NOP = 0x90, 199 XI_XCHGa = 0x90, 200 XI_CALL = 0xe8, 201 XI_JMP = 0xe9, 202 XI_JMPs = 0xeb, 203 XI_PUSH = 0x50, /* Really 50+r. */ 204 XI_JCCs = 0x70, /* Really 7x. */ 205 XI_JCCn = 0x80, /* Really 0f8x. */ 206 XI_LEA = 0x8d, 207 XI_MOVrib = 0xb0, /* Really b0+r. */ 208 XI_MOVri = 0xb8, /* Really b8+r. */ 209 XI_ARITHib = 0x80, 210 XI_ARITHi = 0x81, 211 XI_ARITHi8 = 0x83, 212 XI_PUSHi8 = 0x6a, 213 XI_TESTb = 0x84, 214 XI_TEST = 0x85, 215 XI_MOVmi = 0xc7, 216 XI_GROUP5 = 0xff, 217 218 /* Note: little-endian byte-order! */ 219 XI_FLDZ = 0xeed9, 220 XI_FLD1 = 0xe8d9, 221 XI_FLDLG2 = 0xecd9, 222 XI_FLDLN2 = 0xedd9, 223 XI_FDUP = 0xc0d9, /* Really fld st0. */ 224 XI_FPOP = 0xd8dd, /* Really fstp st0. */ 225 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ 226 XI_FRNDINT = 0xfcd9, 227 XI_FSIN = 0xfed9, 228 XI_FCOS = 0xffd9, 229 XI_FPTAN = 0xf2d9, 230 XI_FPATAN = 0xf3d9, 231 XI_FSCALE = 0xfdd9, 232 XI_FYL2X = 0xf1d9, 233 234 /* Variable-length opcodes. XO_* prefix. */ 235 XO_MOV = XO_(8b), 236 XO_MOVto = XO_(89), 237 XO_MOVtow = XO_66(89), 238 XO_MOVtob = XO_(88), 239 XO_MOVmi = XO_(c7), 240 XO_MOVmib = XO_(c6), 241 XO_LEA = XO_(8d), 242 XO_ARITHib = XO_(80), 243 XO_ARITHi = XO_(81), 244 XO_ARITHi8 = XO_(83), 245 XO_ARITHiw8 = XO_66(83), 246 XO_SHIFTi = XO_(c1), 247 XO_SHIFT1 = XO_(d1), 248 XO_SHIFTcl = XO_(d3), 249 XO_IMUL = XO_0f(af), 250 XO_IMULi = XO_(69), 251 XO_IMULi8 = XO_(6b), 252 XO_CMP = XO_(3b), 253 XO_TESTb = XO_(84), 254 XO_TEST = XO_(85), 255 XO_GROUP3b = XO_(f6), 256 XO_GROUP3 = XO_(f7), 257 XO_GROUP5b = XO_(fe), 258 XO_GROUP5 = XO_(ff), 259 XO_MOVZXb = XO_0f(b6), 260 XO_MOVZXw = XO_0f(b7), 261 XO_MOVSXb = XO_0f(be), 262 XO_MOVSXw = XO_0f(bf), 263 XO_MOVSXd = XO_(63), 264 XO_BSWAP = XO_0f(c8), 265 XO_CMOV = XO_0f(40), 266 267 XO_MOVSD = XO_f20f(10), 268 XO_MOVSDto = XO_f20f(11), 269 XO_MOVSS = XO_f30f(10), 270 XO_MOVSSto = XO_f30f(11), 271 XO_MOVLPD = XO_660f(12), 272 XO_MOVAPS = XO_0f(28), 273 XO_XORPS = XO_0f(57), 274 XO_ANDPS = XO_0f(54), 275 XO_ADDSD = XO_f20f(58), 276 XO_SUBSD = XO_f20f(5c), 277 XO_MULSD = XO_f20f(59), 278 XO_DIVSD = XO_f20f(5e), 279 XO_SQRTSD = XO_f20f(51), 280 XO_MINSD = XO_f20f(5d), 281 XO_MAXSD = XO_f20f(5f), 282 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 283 XO_UCOMISD = XO_660f(2e), 284 XO_CVTSI2SD = XO_f20f(2a), 285 XO_CVTTSD2SI= XO_f20f(2c), 286 XO_CVTSI2SS = XO_f30f(2a), 287 XO_CVTTSS2SI= XO_f30f(2c), 288 XO_CVTSS2SD = XO_f30f(5a), 289 XO_CVTSD2SS = XO_f20f(5a), 290 XO_ADDSS = XO_f30f(58), 291 XO_MOVD = XO_660f(6e), 292 XO_MOVDto = XO_660f(7e), 293 294 XO_FLDd = XO_(d9), XOg_FLDd = 0, 295 XO_FLDq = XO_(dd), XOg_FLDq = 0, 296 XO_FILDd = XO_(db), XOg_FILDd = 0, 297 XO_FILDq = XO_(df), XOg_FILDq = 5, 298 XO_FSTPd = XO_(d9), XOg_FSTPd = 3, 299 XO_FSTPq = XO_(dd), XOg_FSTPq = 3, 300 XO_FISTPq = XO_(df), XOg_FISTPq = 7, 301 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, 302 XO_FADDq = XO_(dc), XOg_FADDq = 0, 303 XO_FLDCW = XO_(d9), XOg_FLDCW = 5, 304 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 305 } x86Op; 306 307 /* x86 opcode groups. */ 308 typedef uint32_t x86Group; 309 310 #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) 311 #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) 312 #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) 313 #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) 314 315 #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) 316 #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27))) 317 318 typedef enum { 319 XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, 320 XOg_X_IMUL 321 } x86Arith; 322 323 typedef enum { 324 XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR 325 } x86Shift; 326 327 typedef enum { 328 XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV 329 } x86Group3; 330 331 typedef enum { 332 XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH 333 } x86Group5; 334 335 /* x86 condition codes. */ 336 typedef enum { 337 CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, 338 CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, 339 CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, 340 CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, 341 CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, 342 CC_NG = CC_LE, CC_G = CC_NLE 343 } x86CC; 344 345 #endif 346