1|// Low-level VM code for x64 CPUs in LJ_GC64 mode. 2|// Bytecode interpreter, fast functions and helper functions. 3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 4| 5|.arch x64 6|.section code_op, code_sub 7| 8|.actionlist build_actionlist 9|.globals GLOB_ 10|.globalnames globnames 11|.externnames extnames 12| 13|//----------------------------------------------------------------------- 14| 15|.if WIN 16|.define X64WIN, 1 // Windows/x64 calling conventions. 17|.endif 18| 19|// Fixed register assignments for the interpreter. 20|// This is very fragile and has many dependencies. Caveat emptor. 21|.define BASE, rdx // Not C callee-save, refetched anyway. 22|.if X64WIN 23|.define KBASE, rdi // Must be C callee-save. 24|.define PC, rsi // Must be C callee-save. 25|.define DISPATCH, rbx // Must be C callee-save. 26|.define KBASEd, edi 27|.define PCd, esi 28|.define DISPATCHd, ebx 29|.else 30|.define KBASE, r15 // Must be C callee-save. 31|.define PC, rbx // Must be C callee-save. 32|.define DISPATCH, r14 // Must be C callee-save. 33|.define KBASEd, r15d 34|.define PCd, ebx 35|.define DISPATCHd, r14d 36|.endif 37| 38|.define RA, rcx 39|.define RAd, ecx 40|.define RAH, ch 41|.define RAL, cl 42|.define RB, rbp // Must be rbp (C callee-save). 43|.define RBd, ebp 44|.define RC, rax // Must be rax. 45|.define RCd, eax 46|.define RCW, ax 47|.define RCH, ah 48|.define RCL, al 49|.define OP, RBd 50|.define RD, RC 51|.define RDd, RCd 52|.define RDW, RCW 53|.define RDL, RCL 54|.define TMPR, r10 55|.define TMPRd, r10d 56|.define ITYPE, r11 57|.define ITYPEd, r11d 58| 59|.if X64WIN 60|.define CARG1, rcx // x64/WIN64 C call arguments. 61|.define CARG2, rdx 62|.define CARG3, r8 63|.define CARG4, r9 64|.define CARG1d, ecx 65|.define CARG2d, edx 66|.define CARG3d, r8d 67|.define CARG4d, r9d 68|.else 69|.define CARG1, rdi // x64/POSIX C call arguments. 70|.define CARG2, rsi 71|.define CARG3, rdx 72|.define CARG4, rcx 73|.define CARG5, r8 74|.define CARG6, r9 75|.define CARG1d, edi 76|.define CARG2d, esi 77|.define CARG3d, edx 78|.define CARG4d, ecx 79|.define CARG5d, r8d 80|.define CARG6d, r9d 81|.endif 82| 83|// Type definitions. Some of these are only used for documentation. 84|.type L, lua_State 85|.type GL, global_State 86|.type TVALUE, TValue 87|.type GCOBJ, GCobj 88|.type STR, GCstr 89|.type TAB, GCtab 90|.type LFUNC, GCfuncL 91|.type CFUNC, GCfuncC 92|.type PROTO, GCproto 93|.type UPVAL, GCupval 94|.type NODE, Node 95|.type NARGS, int 96|.type TRACE, GCtrace 97|.type SBUF, SBuf 98| 99|// Stack layout while in interpreter. Must match with lj_frame.h. 100|//----------------------------------------------------------------------- 101|.if X64WIN // x64/Windows stack layout 102| 103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 104|.macro saveregs_ 105| push rdi; push rsi; push rbx 106| sub rsp, CFRAME_SPACE 107|.endmacro 108|.macro saveregs 109| push rbp; saveregs_ 110|.endmacro 111|.macro restoreregs 112| add rsp, CFRAME_SPACE 113| pop rbx; pop rsi; pop rdi; pop rbp 114|.endmacro 115| 116|.define SAVE_CFRAME, aword [rsp+aword*13] 117|.define SAVE_PC, aword [rsp+aword*12] 118|.define SAVE_L, aword [rsp+aword*11] 119|.define SAVE_ERRF, dword [rsp+dword*21] 120|.define SAVE_NRES, dword [rsp+dword*20] 121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter 122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 123|.define SAVE_R4, aword [rsp+aword*8] 124|.define SAVE_R3, aword [rsp+aword*7] 125|.define SAVE_R2, aword [rsp+aword*6] 126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 127|.define ARG5, aword [rsp+aword*4] 128|.define CSAVE_4, aword [rsp+aword*3] 129|.define CSAVE_3, aword [rsp+aword*2] 130|.define CSAVE_2, aword [rsp+aword*1] 131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. 132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee 133| 134|.define ARG5d, dword [rsp+dword*8] 135|.define TMP1, ARG5 // TMP1 overlaps ARG5 136|.define TMP1d, ARG5d 137|.define TMP1hi, dword [rsp+dword*9] 138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. 139| 140|//----------------------------------------------------------------------- 141|.else // x64/POSIX stack layout 142| 143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 144|.macro saveregs_ 145| push rbx; push r15; push r14 146|.if NO_UNWIND 147| push r13; push r12 148|.endif 149| sub rsp, CFRAME_SPACE 150|.endmacro 151|.macro saveregs 152| push rbp; saveregs_ 153|.endmacro 154|.macro restoreregs 155| add rsp, CFRAME_SPACE 156|.if NO_UNWIND 157| pop r12; pop r13 158|.endif 159| pop r14; pop r15; pop rbx; pop rbp 160|.endmacro 161| 162|//----- 16 byte aligned, 163|.if NO_UNWIND 164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. 165|.define SAVE_R4, aword [rsp+aword*10] 166|.define SAVE_R3, aword [rsp+aword*9] 167|.define SAVE_R2, aword [rsp+aword*8] 168|.define SAVE_R1, aword [rsp+aword*7] 169|.define SAVE_RU2, aword [rsp+aword*6] 170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. 171|.else 172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 173|.define SAVE_R4, aword [rsp+aword*8] 174|.define SAVE_R3, aword [rsp+aword*7] 175|.define SAVE_R2, aword [rsp+aword*6] 176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 177|.endif 178|.define SAVE_CFRAME, aword [rsp+aword*4] 179|.define SAVE_PC, aword [rsp+aword*3] 180|.define SAVE_L, aword [rsp+aword*2] 181|.define SAVE_ERRF, dword [rsp+dword*3] 182|.define SAVE_NRES, dword [rsp+dword*2] 183|.define TMP1, aword [rsp] //<-- rsp while in interpreter. 184|//----- 16 byte aligned 185| 186|.define TMP1d, dword [rsp] 187|.define TMP1hi, dword [rsp+dword*1] 188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. 189| 190|.endif 191| 192|//----------------------------------------------------------------------- 193| 194|// Instruction headers. 195|.macro ins_A; .endmacro 196|.macro ins_AD; .endmacro 197|.macro ins_AJ; .endmacro 198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro 199|.macro ins_AB_; movzx RBd, RCH; .endmacro 200|.macro ins_A_C; movzx RCd, RCL; .endmacro 201|.macro ins_AND; not RD; .endmacro 202| 203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). 204|.macro ins_NEXT 205| mov RCd, [PC] 206| movzx RAd, RCH 207| movzx OP, RCL 208| add PC, 4 209| shr RCd, 16 210| jmp aword [DISPATCH+OP*8] 211|.endmacro 212| 213|// Instruction footer. 214|.if 1 215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 216| .define ins_next, ins_NEXT 217| .define ins_next_, ins_NEXT 218|.else 219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 220| // Affects only certain kinds of benchmarks (and only with -j off). 221| // Around 10%-30% slower on Core2, a lot more slower on P4. 222| .macro ins_next 223| jmp ->ins_next 224| .endmacro 225| .macro ins_next_ 226| ->ins_next: 227| ins_NEXT 228| .endmacro 229|.endif 230| 231|// Call decode and dispatch. 232|.macro ins_callt 233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC 234| mov PC, LFUNC:RB->pc 235| mov RAd, [PC] 236| movzx OP, RAL 237| movzx RAd, RAH 238| add PC, 4 239| jmp aword [DISPATCH+OP*8] 240|.endmacro 241| 242|.macro ins_call 243| // BASE = new base, RB = LFUNC, RD = nargs+1 244| mov [BASE-8], PC 245| ins_callt 246|.endmacro 247| 248|//----------------------------------------------------------------------- 249| 250|// Macros to clear or set tags. 251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro 252|.macro settp, reg, tp 253| mov64 ITYPE, ((uint64_t)tp<<47) 254| or reg, ITYPE 255|.endmacro 256|.macro settp, dst, reg, tp 257| mov64 dst, ((uint64_t)tp<<47) 258| or dst, reg 259|.endmacro 260|.macro setint, reg 261| settp reg, LJ_TISNUM 262|.endmacro 263|.macro setint, dst, reg 264| settp dst, reg, LJ_TISNUM 265|.endmacro 266| 267|// Macros to test operand types. 268|.macro checktp_nc, reg, tp, target 269| mov ITYPE, reg 270| sar ITYPE, 47 271| cmp ITYPEd, tp 272| jne target 273|.endmacro 274|.macro checktp, reg, tp, target 275| mov ITYPE, reg 276| cleartp reg 277| sar ITYPE, 47 278| cmp ITYPEd, tp 279| jne target 280|.endmacro 281|.macro checktptp, src, tp, target 282| mov ITYPE, src 283| sar ITYPE, 47 284| cmp ITYPEd, tp 285| jne target 286|.endmacro 287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro 288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro 289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro 290| 291|.macro checknumx, reg, target, jump 292| mov ITYPE, reg 293| sar ITYPE, 47 294| cmp ITYPEd, LJ_TISNUM 295| jump target 296|.endmacro 297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro 298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro 299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro 300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro 301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro 302| 303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro 304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro 305| 306|// These operands must be used with movzx. 307|.define PC_OP, byte [PC-4] 308|.define PC_RA, byte [PC-3] 309|.define PC_RB, byte [PC-1] 310|.define PC_RC, byte [PC-2] 311|.define PC_RD, word [PC-2] 312| 313|.macro branchPC, reg 314| lea PC, [PC+reg*4-BCBIAS_J*4] 315|.endmacro 316| 317|// Assumes DISPATCH is relative to GL. 318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 320| 321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 322| 323|// Decrement hashed hotcount and trigger trace recorder if zero. 324|.macro hotloop, reg 325| mov reg, PCd 326| shr reg, 1 327| and reg, HOTCOUNT_PCMASK 328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP 329| jb ->vm_hotloop 330|.endmacro 331| 332|.macro hotcall, reg 333| mov reg, PCd 334| shr reg, 1 335| and reg, HOTCOUNT_PCMASK 336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL 337| jb ->vm_hotcall 338|.endmacro 339| 340|// Set current VM state. 341|.macro set_vmstate, st 342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st 343|.endmacro 344| 345|.macro fpop1; fstp st1; .endmacro 346| 347|// Synthesize SSE FP constants. 348|.macro sseconst_abs, reg, tmp // Synthesize abs mask. 349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp 350|.endmacro 351| 352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. 353| mov64 tmp, U64x(val,00000000); movd reg, tmp 354|.endmacro 355| 356|.macro sseconst_sign, reg, tmp // Synthesize sign mask. 357| sseconst_hi reg, tmp, 80000000 358|.endmacro 359|.macro sseconst_1, reg, tmp // Synthesize 1.0. 360| sseconst_hi reg, tmp, 3ff00000 361|.endmacro 362|.macro sseconst_m1, reg, tmp // Synthesize -1.0. 363| sseconst_hi reg, tmp, bff00000 364|.endmacro 365|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. 366| sseconst_hi reg, tmp, 43300000 367|.endmacro 368|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. 369| sseconst_hi reg, tmp, 43380000 370|.endmacro 371| 372|// Move table write barrier back. Overwrites reg. 373|.macro barrierback, tab, reg 374| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) 375| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] 376| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab 377| mov tab->gclist, reg 378|.endmacro 379| 380|//----------------------------------------------------------------------- 381 382/* Generate subroutines used by opcodes and other parts of the VM. */ 383/* The .code_sub section should be last to help static branch prediction. */ 384static void build_subroutines(BuildCtx *ctx) 385{ 386 |.code_sub 387 | 388 |//----------------------------------------------------------------------- 389 |//-- Return handling ---------------------------------------------------- 390 |//----------------------------------------------------------------------- 391 | 392 |->vm_returnp: 393 | test PCd, FRAME_P 394 | jz ->cont_dispatch 395 | 396 | // Return from pcall or xpcall fast func. 397 | and PC, -8 398 | sub BASE, PC // Restore caller base. 399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result. 400 | mov PC, [BASE-8] // Fetch PC of previous frame. 401 | // Prepending may overwrite the pcall frame, so do it at the end. 402 | mov_true ITYPE 403 | mov aword [BASE+RA], ITYPE // Prepend true to results. 404 | 405 |->vm_returnc: 406 | add RDd, 1 // RD = nresults+1 407 | jz ->vm_unwind_yield 408 | mov MULTRES, RDd 409 | test PC, FRAME_TYPE 410 | jz ->BC_RET_Z // Handle regular return to Lua. 411 | 412 |->vm_return: 413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return 414 | xor PC, FRAME_C 415 | test PCd, FRAME_TYPE 416 | jnz ->vm_returnp 417 | 418 | // Return to C. 419 | set_vmstate C 420 | and PC, -8 421 | sub PC, BASE 422 | neg PC // Previous base = BASE - delta. 423 | 424 | sub RDd, 1 425 | jz >2 426 |1: // Move results down. 427 | mov RB, [BASE+RA] 428 | mov [BASE-16], RB 429 | add BASE, 8 430 | sub RDd, 1 431 | jnz <1 432 |2: 433 | mov L:RB, SAVE_L 434 | mov L:RB->base, PC 435 |3: 436 | mov RDd, MULTRES 437 | mov RAd, SAVE_NRES // RA = wanted nresults+1 438 |4: 439 | cmp RAd, RDd 440 | jne >6 // More/less results wanted? 441 |5: 442 | sub BASE, 16 443 | mov L:RB->top, BASE 444 | 445 |->vm_leave_cp: 446 | mov RA, SAVE_CFRAME // Restore previous C frame. 447 | mov L:RB->cframe, RA 448 | xor eax, eax // Ok return status for vm_pcall. 449 | 450 |->vm_leave_unw: 451 | restoreregs 452 | ret 453 | 454 |6: 455 | jb >7 // Less results wanted? 456 | // More results wanted. Check stack size and fill up results with nil. 457 | cmp BASE, L:RB->maxstack 458 | ja >8 459 | mov aword [BASE-16], LJ_TNIL 460 | add BASE, 8 461 | add RDd, 1 462 | jmp <4 463 | 464 |7: // Less results wanted. 465 | test RAd, RAd 466 | jz <5 // But check for LUA_MULTRET+1. 467 | sub RA, RD // Negative result! 468 | lea BASE, [BASE+RA*8] // Correct top. 469 | jmp <5 470 | 471 |8: // Corner case: need to grow stack for filling up results. 472 | // This can happen if: 473 | // - A C function grows the stack (a lot). 474 | // - The GC shrinks the stack in between. 475 | // - A return back from a lua_call() with (high) nresults adjustment. 476 | mov L:RB->top, BASE // Save current top held in BASE (yes). 477 | mov MULTRES, RDd // Need to fill only remainder with nil. 478 | mov CARG2d, RAd 479 | mov CARG1, L:RB 480 | call extern lj_state_growstack // (lua_State *L, int n) 481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. 482 | jmp <3 483 | 484 |->vm_unwind_yield: 485 | mov al, LUA_YIELD 486 | jmp ->vm_unwind_c_eh 487 | 488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 489 | // (void *cframe, int errcode) 490 | mov eax, CARG2d // Error return status for vm_pcall. 491 | mov rsp, CARG1 492 |->vm_unwind_c_eh: // Landing pad for external unwinder. 493 | mov L:RB, SAVE_L 494 | mov GL:RB, L:RB->glref 495 | mov dword GL:RB->vmstate, ~LJ_VMST_C 496 | jmp ->vm_leave_unw 497 | 498 |->vm_unwind_rethrow: 499 |.if not X64WIN 500 | mov CARG1, SAVE_L 501 | mov CARG2d, eax 502 | restoreregs 503 | jmp extern lj_err_throw // (lua_State *L, int errcode) 504 |.endif 505 | 506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 507 | // (void *cframe) 508 | and CARG1, CFRAME_RAWMASK 509 | mov rsp, CARG1 510 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 511 | mov L:RB, SAVE_L 512 | mov RDd, 1+1 // Really 1+2 results, incr. later. 513 | mov BASE, L:RB->base 514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 515 | add DISPATCH, GG_G2DISP 516 | mov PC, [BASE-8] // Fetch PC of previous frame. 517 | mov_false RA 518 | mov RB, [BASE] 519 | mov [BASE-16], RA // Prepend false to error message. 520 | mov [BASE-8], RB 521 | mov RA, -16 // Results start at BASE+RA = BASE-16. 522 | set_vmstate INTERP 523 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 524 | 525 |//----------------------------------------------------------------------- 526 |//-- Grow stack for calls ----------------------------------------------- 527 |//----------------------------------------------------------------------- 528 | 529 |->vm_growstack_c: // Grow stack for C function. 530 | mov CARG2d, LUA_MINSTACK 531 | jmp >2 532 | 533 |->vm_growstack_v: // Grow stack for vararg Lua function. 534 | sub RD, 16 // LJ_FR2 535 | jmp >1 536 | 537 |->vm_growstack_f: // Grow stack for fixarg Lua function. 538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC 539 | lea RD, [BASE+NARGS:RD*8-8] 540 |1: 541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)] 542 | add PC, 4 // Must point after first instruction. 543 | mov L:RB->base, BASE 544 | mov L:RB->top, RD 545 | mov SAVE_PC, PC 546 | mov CARG2, RA 547 |2: 548 | // RB = L, L->base = new base, L->top = top 549 | mov CARG1, L:RB 550 | call extern lj_state_growstack // (lua_State *L, int n) 551 | mov BASE, L:RB->base 552 | mov RD, L:RB->top 553 | mov LFUNC:RB, [BASE-16] 554 | cleartp LFUNC:RB 555 | sub RD, BASE 556 | shr RDd, 3 557 | add NARGS:RDd, 1 558 | // BASE = new base, RB = LFUNC, RD = nargs+1 559 | ins_callt // Just retry the call. 560 | 561 |//----------------------------------------------------------------------- 562 |//-- Entry points into the assembler VM --------------------------------- 563 |//----------------------------------------------------------------------- 564 | 565 |->vm_resume: // Setup C frame and resume thread. 566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 567 | saveregs 568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 569 | mov SAVE_L, CARG1 570 | mov RA, CARG2 571 | mov PCd, FRAME_CP 572 | xor RDd, RDd 573 | lea KBASE, [esp+CFRAME_RESUME] 574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 575 | add DISPATCH, GG_G2DISP 576 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 577 | mov SAVE_CFRAME, RD 578 | mov SAVE_NRES, RDd 579 | mov SAVE_ERRF, RDd 580 | mov L:RB->cframe, KBASE 581 | cmp byte L:RB->status, RDL 582 | je >2 // Initial resume (like a call). 583 | 584 | // Resume after yield (like a return). 585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 586 | set_vmstate INTERP 587 | mov byte L:RB->status, RDL 588 | mov BASE, L:RB->base 589 | mov RD, L:RB->top 590 | sub RD, RA 591 | shr RDd, 3 592 | add RDd, 1 // RD = nresults+1 593 | sub RA, BASE // RA = resultofs 594 | mov PC, [BASE-8] 595 | mov MULTRES, RDd 596 | test PCd, FRAME_TYPE 597 | jz ->BC_RET_Z 598 | jmp ->vm_return 599 | 600 |->vm_pcall: // Setup protected C frame and enter VM. 601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 602 | saveregs 603 | mov PCd, FRAME_CP 604 | mov SAVE_ERRF, CARG4d 605 | jmp >1 606 | 607 |->vm_call: // Setup C frame and enter VM. 608 | // (lua_State *L, TValue *base, int nres1) 609 | saveregs 610 | mov PCd, FRAME_C 611 | 612 |1: // Entry point for vm_pcall above (PC = ftype). 613 | mov SAVE_NRES, CARG3d 614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 615 | mov SAVE_L, CARG1 616 | mov RA, CARG2 617 | 618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 620 | mov SAVE_CFRAME, KBASE 621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 622 | add DISPATCH, GG_G2DISP 623 | mov L:RB->cframe, rsp 624 | 625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). 626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 627 | set_vmstate INTERP 628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 629 | add PC, RA 630 | sub PC, BASE // PC = frame delta + frame type 631 | 632 | mov RD, L:RB->top 633 | sub RD, RA 634 | shr NARGS:RDd, 3 635 | add NARGS:RDd, 1 // RD = nargs+1 636 | 637 |->vm_call_dispatch: 638 | mov LFUNC:RB, [RA-16] 639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. 640 | 641 |->vm_call_dispatch_f: 642 | mov BASE, RA 643 | ins_call 644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC 645 | 646 |->vm_cpcall: // Setup protected C frame, call C. 647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 648 | saveregs 649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 650 | mov SAVE_L, CARG1 651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 652 | 653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 654 | sub KBASE, L:RB->top 655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 656 | mov SAVE_ERRF, 0 // No error function. 657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. 658 | add DISPATCH, GG_G2DISP 659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 660 | 661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 662 | mov SAVE_CFRAME, KBASE 663 | mov L:RB->cframe, rsp 664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 665 | 666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 667 | // TValue * (new base) or NULL returned in eax (RC). 668 | test RC, RC 669 | jz ->vm_leave_cp // No base? Just remove C frame. 670 | mov RA, RC 671 | mov PCd, FRAME_CP 672 | jmp <2 // Else continue with the call. 673 | 674 |//----------------------------------------------------------------------- 675 |//-- Metamethod handling ------------------------------------------------ 676 |//----------------------------------------------------------------------- 677 | 678 |//-- Continuation dispatch ---------------------------------------------- 679 | 680 |->cont_dispatch: 681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) 682 | add RA, BASE 683 | and PC, -8 684 | mov RB, BASE 685 | sub BASE, PC // Restore caller BASE. 686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. 687 | mov RC, RA // ... in [RC] 688 | mov PC, [RB-24] // Restore PC from [cont|PC]. 689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug. 690 |.if FFI 691 | cmp RA, 1 692 | jbe >1 693 |.endif 694 | mov LFUNC:KBASE, [BASE-16] 695 | cleartp LFUNC:KBASE 696 | mov KBASE, LFUNC:KBASE->pc 697 | mov KBASE, [KBASE+PC2PROTO(k)] 698 | // BASE = base, RC = result, RB = meta base 699 | jmp RA // Jump to continuation. 700 | 701 |.if FFI 702 |1: 703 | je ->cont_ffi_callback // cont = 1: return from FFI callback. 704 | // cont = 0: Tail call from C function. 705 | sub RB, BASE 706 | shr RBd, 3 707 | lea RDd, [RBd-3] 708 | jmp ->vm_call_tail 709 |.endif 710 | 711 |->cont_cat: // BASE = base, RC = result, RB = mbase 712 | movzx RAd, PC_RB 713 | sub RB, 32 714 | lea RA, [BASE+RA*8] 715 | sub RA, RB 716 | je ->cont_ra 717 | neg RA 718 | shr RAd, 3 719 |.if X64WIN 720 | mov CARG3d, RAd 721 | mov L:CARG1, SAVE_L 722 | mov L:CARG1->base, BASE 723 | mov RC, [RC] 724 | mov [RB], RC 725 | mov CARG2, RB 726 |.else 727 | mov L:CARG1, SAVE_L 728 | mov L:CARG1->base, BASE 729 | mov CARG3d, RAd 730 | mov RA, [RC] 731 | mov [RB], RA 732 | mov CARG2, RB 733 |.endif 734 | jmp ->BC_CAT_Z 735 | 736 |//-- Table indexing metamethods ----------------------------------------- 737 | 738 |->vmeta_tgets: 739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr * 740 | mov TMP1, STR:RC 741 | lea RC, TMP1 742 | cmp PC_OP, BC_GGET 743 | jne >1 744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * 745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 746 | mov [RB], TAB:RA 747 | jmp >2 748 | 749 |->vmeta_tgetb: 750 | movzx RCd, PC_RC 751 |.if DUALNUM 752 | setint RC 753 | mov TMP1, RC 754 |.else 755 | cvtsi2sd xmm0, RCd 756 | movsd TMP1, xmm0 757 |.endif 758 | lea RC, TMP1 759 | jmp >1 760 | 761 |->vmeta_tgetv: 762 | movzx RCd, PC_RC // Reload TValue *k from RC. 763 | lea RC, [BASE+RC*8] 764 |1: 765 | movzx RBd, PC_RB // Reload TValue *t from RB. 766 | lea RB, [BASE+RB*8] 767 |2: 768 | mov L:CARG1, SAVE_L 769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 770 | mov CARG2, RB 771 | mov CARG3, RC 772 | mov L:RB, L:CARG1 773 | mov SAVE_PC, PC 774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 776 | mov BASE, L:RB->base 777 | test RC, RC 778 | jz >3 779 |->cont_ra: // BASE = base, RC = result 780 | movzx RAd, PC_RA 781 | mov RB, [RC] 782 | mov [BASE+RA*8], RB 783 | ins_next 784 | 785 |3: // Call __index metamethod. 786 | // BASE = base, L->top = new base, stack = cont/func/t/k 787 | mov RA, L:RB->top 788 | mov [RA-24], PC // [cont|PC] 789 | lea PC, [RA+FRAME_CONT] 790 | sub PC, BASE 791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. 792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k). 793 | cleartp LFUNC:RB 794 | jmp ->vm_call_dispatch_f 795 | 796 |->vmeta_tgetr: 797 | mov CARG1, TAB:RB 798 | mov RB, BASE // Save BASE. 799 | mov CARG2d, RCd // Caveat: CARG2 == BASE 800 | call extern lj_tab_getinth // (GCtab *t, int32_t key) 801 | // cTValue * or NULL returned in eax (RC). 802 | movzx RAd, PC_RA 803 | mov BASE, RB // Restore BASE. 804 | test RC, RC 805 | jnz ->BC_TGETR_Z 806 | mov ITYPE, LJ_TNIL 807 | jmp ->BC_TGETR2_Z 808 | 809 |//----------------------------------------------------------------------- 810 | 811 |->vmeta_tsets: 812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr * 813 | mov TMP1, STR:RC 814 | lea RC, TMP1 815 | cmp PC_OP, BC_GSET 816 | jne >1 817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * 818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 819 | mov [RB], TAB:RA 820 | jmp >2 821 | 822 |->vmeta_tsetb: 823 | movzx RCd, PC_RC 824 |.if DUALNUM 825 | setint RC 826 | mov TMP1, RC 827 |.else 828 | cvtsi2sd xmm0, RCd 829 | movsd TMP1, xmm0 830 |.endif 831 | lea RC, TMP1 832 | jmp >1 833 | 834 |->vmeta_tsetv: 835 | movzx RCd, PC_RC // Reload TValue *k from RC. 836 | lea RC, [BASE+RC*8] 837 |1: 838 | movzx RBd, PC_RB // Reload TValue *t from RB. 839 | lea RB, [BASE+RB*8] 840 |2: 841 | mov L:CARG1, SAVE_L 842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 843 | mov CARG2, RB 844 | mov CARG3, RC 845 | mov L:RB, L:CARG1 846 | mov SAVE_PC, PC 847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 849 | mov BASE, L:RB->base 850 | test RC, RC 851 | jz >3 852 | // NOBARRIER: lj_meta_tset ensures the table is not black. 853 | movzx RAd, PC_RA 854 | mov RB, [BASE+RA*8] 855 | mov [RC], RB 856 |->cont_nop: // BASE = base, (RC = result) 857 | ins_next 858 | 859 |3: // Call __newindex metamethod. 860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 861 | mov RA, L:RB->top 862 | mov [RA-24], PC // [cont|PC] 863 | movzx RCd, PC_RA 864 | // Copy value to third argument. 865 | mov RB, [BASE+RC*8] 866 | mov [RA+16], RB 867 | lea PC, [RA+FRAME_CONT] 868 | sub PC, BASE 869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. 870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). 871 | cleartp LFUNC:RB 872 | jmp ->vm_call_dispatch_f 873 | 874 |->vmeta_tsetr: 875 |.if X64WIN 876 | mov L:CARG1, SAVE_L 877 | mov CARG3d, RCd 878 | mov L:CARG1->base, BASE 879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. 880 |.else 881 | mov L:CARG1, SAVE_L 882 | mov CARG2, TAB:RB 883 | mov L:CARG1->base, BASE 884 | mov RB, BASE // Save BASE. 885 | mov CARG3d, RCd // Caveat: CARG3 == BASE. 886 |.endif 887 | mov SAVE_PC, PC 888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 889 | // TValue * returned in eax (RC). 890 | movzx RAd, PC_RA 891 | mov BASE, RB // Restore BASE. 892 | jmp ->BC_TSETR_Z 893 | 894 |//-- Comparison metamethods --------------------------------------------- 895 | 896 |->vmeta_comp: 897 | movzx RDd, PC_RD 898 | movzx RAd, PC_RA 899 | mov L:RB, SAVE_L 900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. 901 |.if X64WIN 902 | lea CARG3, [BASE+RD*8] 903 | lea CARG2, [BASE+RA*8] 904 |.else 905 | lea CARG2, [BASE+RA*8] 906 | lea CARG3, [BASE+RD*8] 907 |.endif 908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. 909 | movzx CARG4d, PC_OP 910 | mov SAVE_PC, PC 911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 912 | // 0/1 or TValue * (metamethod) returned in eax (RC). 913 |3: 914 | mov BASE, L:RB->base 915 | cmp RC, 1 916 | ja ->vmeta_binop 917 |4: 918 | lea PC, [PC+4] 919 | jb >6 920 |5: 921 | movzx RDd, PC_RD 922 | branchPC RD 923 |6: 924 | ins_next 925 | 926 |->cont_condt: // BASE = base, RC = result 927 | add PC, 4 928 | mov ITYPE, [RC] 929 | sar ITYPE, 47 930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. 931 | jb <5 932 | jmp <6 933 | 934 |->cont_condf: // BASE = base, RC = result 935 | mov ITYPE, [RC] 936 | sar ITYPE, 47 937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. 938 | jmp <4 939 | 940 |->vmeta_equal: 941 | cleartp TAB:RD 942 | sub PC, 4 943 |.if X64WIN 944 | mov CARG3, RD 945 | mov CARG4d, RBd 946 | mov L:RB, SAVE_L 947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE. 948 | mov CARG2, RA 949 | mov CARG1, L:RB // Caveat: CARG1 == RA. 950 |.else 951 | mov CARG2, RA 952 | mov CARG4d, RBd // Caveat: CARG4 == RA. 953 | mov L:RB, SAVE_L 954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE. 955 | mov CARG3, RD 956 | mov CARG1, L:RB 957 |.endif 958 | mov SAVE_PC, PC 959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 960 | // 0/1 or TValue * (metamethod) returned in eax (RC). 961 | jmp <3 962 | 963 |->vmeta_equal_cd: 964 |.if FFI 965 | sub PC, 4 966 | mov L:RB, SAVE_L 967 | mov L:RB->base, BASE 968 | mov CARG1, L:RB 969 | mov CARG2d, dword [PC-4] 970 | mov SAVE_PC, PC 971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) 972 | // 0/1 or TValue * (metamethod) returned in eax (RC). 973 | jmp <3 974 |.endif 975 | 976 |->vmeta_istype: 977 | mov L:RB, SAVE_L 978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. 979 | mov CARG2d, RAd 980 | mov CARG3d, RDd 981 | mov L:CARG1, L:RB 982 | mov SAVE_PC, PC 983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 984 | mov BASE, L:RB->base 985 | jmp <6 986 | 987 |//-- Arithmetic metamethods --------------------------------------------- 988 | 989 |->vmeta_arith_vno: 990 |.if DUALNUM 991 | movzx RBd, PC_RB 992 | movzx RCd, PC_RC 993 |.endif 994 |->vmeta_arith_vn: 995 | lea RC, [KBASE+RC*8] 996 | jmp >1 997 | 998 |->vmeta_arith_nvo: 999 |.if DUALNUM 1000 | movzx RBd, PC_RB 1001 | movzx RCd, PC_RC 1002 |.endif 1003 |->vmeta_arith_nv: 1004 | lea TMPR, [KBASE+RC*8] 1005 | lea RC, [BASE+RB*8] 1006 | mov RB, TMPR 1007 | jmp >2 1008 | 1009 |->vmeta_unm: 1010 | lea RC, [BASE+RD*8] 1011 | mov RB, RC 1012 | jmp >2 1013 | 1014 |->vmeta_arith_vvo: 1015 |.if DUALNUM 1016 | movzx RBd, PC_RB 1017 | movzx RCd, PC_RC 1018 |.endif 1019 |->vmeta_arith_vv: 1020 | lea RC, [BASE+RC*8] 1021 |1: 1022 | lea RB, [BASE+RB*8] 1023 |2: 1024 | lea RA, [BASE+RA*8] 1025 |.if X64WIN 1026 | mov CARG3, RB 1027 | mov CARG4, RC 1028 | movzx RCd, PC_OP 1029 | mov ARG5d, RCd 1030 | mov L:RB, SAVE_L 1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE. 1032 | mov CARG2, RA 1033 | mov CARG1, L:RB // Caveat: CARG1 == RA. 1034 |.else 1035 | movzx CARG5d, PC_OP 1036 | mov CARG2, RA 1037 | mov CARG4, RC // Caveat: CARG4 == RA. 1038 | mov L:CARG1, SAVE_L 1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. 1040 | mov CARG3, RB 1041 | mov L:RB, L:CARG1 1042 |.endif 1043 | mov SAVE_PC, PC 1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 1046 | mov BASE, L:RB->base 1047 | test RC, RC 1048 | jz ->cont_nop 1049 | 1050 | // Call metamethod for binary op. 1051 |->vmeta_binop: 1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2 1053 | mov RA, RC 1054 | sub RC, BASE 1055 | mov [RA-24], PC // [cont|PC] 1056 | lea PC, [RC+FRAME_CONT] 1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). 1058 | jmp ->vm_call_dispatch 1059 | 1060 |->vmeta_len: 1061 | movzx RDd, PC_RD 1062 | mov L:RB, SAVE_L 1063 | mov L:RB->base, BASE 1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE 1065 | mov L:CARG1, L:RB 1066 | mov SAVE_PC, PC 1067 | call extern lj_meta_len // (lua_State *L, TValue *o) 1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC). 1069 | mov BASE, L:RB->base 1070#if LJ_52 1071 | test RC, RC 1072 | jne ->vmeta_binop // Binop call for compatibility. 1073 | movzx RDd, PC_RD 1074 | mov TAB:CARG1, [BASE+RD*8] 1075 | cleartp TAB:CARG1 1076 | jmp ->BC_LEN_Z 1077#else 1078 | jmp ->vmeta_binop // Binop call for compatibility. 1079#endif 1080 | 1081 |//-- Call metamethod ---------------------------------------------------- 1082 | 1083 |->vmeta_call_ra: 1084 | lea RA, [BASE+RA*8+16] 1085 |->vmeta_call: // Resolve and call __call metamethod. 1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return 1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us. 1088 | mov RB, RA 1089 |.if X64WIN 1090 | mov L:TMPR, SAVE_L 1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. 1092 | lea CARG2, [RA-16] 1093 | lea CARG3, [RA+NARGS:RD*8-8] 1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA. 1095 |.else 1096 | mov L:CARG1, SAVE_L 1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. 1098 | lea CARG2, [RA-16] 1099 | lea CARG3, [RA+NARGS:RD*8-8] 1100 |.endif 1101 | mov SAVE_PC, PC 1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1103 | mov RA, RB 1104 | mov L:RB, SAVE_L 1105 | mov BASE, L:RB->base 1106 | mov NARGS:RDd, TMP1d 1107 | mov LFUNC:RB, [RA-16] 1108 | add NARGS:RDd, 1 1109 | // This is fragile. L->base must not move, KBASE must always be defined. 1110 | cmp KBASE, BASE // Continue with CALLT if flag set. 1111 | je ->BC_CALLT_Z 1112 | cleartp LFUNC:RB 1113 | mov BASE, RA 1114 | ins_call // Otherwise call resolved metamethod. 1115 | 1116 |//-- Argument coercion for 'for' statement ------------------------------ 1117 | 1118 |->vmeta_for: 1119 | mov L:RB, SAVE_L 1120 | mov L:RB->base, BASE 1121 | mov CARG2, RA // Caveat: CARG2 == BASE 1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 1123 | mov SAVE_PC, PC 1124 | call extern lj_meta_for // (lua_State *L, TValue *base) 1125 | mov BASE, L:RB->base 1126 | mov RCd, [PC-4] 1127 | movzx RAd, RCH 1128 | movzx OP, RCL 1129 | shr RCd, 16 1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. 1131 | 1132 |//----------------------------------------------------------------------- 1133 |//-- Fast functions ----------------------------------------------------- 1134 |//----------------------------------------------------------------------- 1135 | 1136 |.macro .ffunc, name 1137 |->ff_ .. name: 1138 |.endmacro 1139 | 1140 |.macro .ffunc_1, name 1141 |->ff_ .. name: 1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback 1143 |.endmacro 1144 | 1145 |.macro .ffunc_2, name 1146 |->ff_ .. name: 1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback 1148 |.endmacro 1149 | 1150 |.macro .ffunc_n, name, op 1151 | .ffunc_1 name 1152 | checknumtp [BASE], ->fff_fallback 1153 | op xmm0, qword [BASE] 1154 |.endmacro 1155 | 1156 |.macro .ffunc_n, name 1157 | .ffunc_n name, movsd 1158 |.endmacro 1159 | 1160 |.macro .ffunc_nn, name 1161 | .ffunc_2 name 1162 | checknumtp [BASE], ->fff_fallback 1163 | checknumtp [BASE+8], ->fff_fallback 1164 | movsd xmm0, qword [BASE] 1165 | movsd xmm1, qword [BASE+8] 1166 |.endmacro 1167 | 1168 |// Inlined GC threshold check. Caveat: uses label 1. 1169 |.macro ffgccheck 1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] 1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] 1172 | jb >1 1173 | call ->fff_gcstep 1174 |1: 1175 |.endmacro 1176 | 1177 |//-- Base library: checks ----------------------------------------------- 1178 | 1179 |.ffunc_1 assert 1180 | mov ITYPE, [BASE] 1181 | mov RB, ITYPE 1182 | sar ITYPE, 47 1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback 1184 | mov PC, [BASE-8] 1185 | mov MULTRES, RDd 1186 | mov RB, [BASE] 1187 | mov [BASE-16], RB 1188 | sub RDd, 2 1189 | jz >2 1190 | mov RA, BASE 1191 |1: 1192 | add RA, 8 1193 | mov RB, [RA] 1194 | mov [RA-16], RB 1195 | sub RDd, 1 1196 | jnz <1 1197 |2: 1198 | mov RDd, MULTRES 1199 | jmp ->fff_res_ 1200 | 1201 |.ffunc_1 type 1202 | mov RC, [BASE] 1203 | sar RC, 47 1204 | mov RBd, LJ_TISNUM 1205 | cmp RCd, RBd 1206 | cmovb RCd, RBd 1207 | not RCd 1208 |2: 1209 | mov CFUNC:RB, [BASE-16] 1210 | cleartp CFUNC:RB 1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1212 | mov PC, [BASE-8] 1213 | settp STR:RC, LJ_TSTR 1214 | mov [BASE-16], STR:RC 1215 | jmp ->fff_res1 1216 | 1217 |//-- Base library: getters and setters --------------------------------- 1218 | 1219 |.ffunc_1 getmetatable 1220 | mov TAB:RB, [BASE] 1221 | mov PC, [BASE-8] 1222 | checktab TAB:RB, >6 1223 |1: // Field metatable must be at same offset for GCtab and GCudata! 1224 | mov TAB:RB, TAB:RB->metatable 1225 |2: 1226 | test TAB:RB, TAB:RB 1227 | mov aword [BASE-16], LJ_TNIL 1228 | jz ->fff_res1 1229 | settp TAB:RC, TAB:RB, LJ_TTAB 1230 | mov [BASE-16], TAB:RC // Store metatable as default result. 1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] 1232 | mov RAd, TAB:RB->hmask 1233 | and RAd, STR:RC->sid 1234 | settp STR:RC, LJ_TSTR 1235 | imul RAd, #NODE 1236 | add NODE:RA, TAB:RB->node 1237 |3: // Rearranged logic, because we expect _not_ to find the key. 1238 | cmp NODE:RA->key, STR:RC 1239 | je >5 1240 |4: 1241 | mov NODE:RA, NODE:RA->next 1242 | test NODE:RA, NODE:RA 1243 | jnz <3 1244 | jmp ->fff_res1 // Not found, keep default result. 1245 |5: 1246 | mov RB, NODE:RA->val 1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. 1248 | mov [BASE-16], RB // Return value of mt.__metatable. 1249 | jmp ->fff_res1 1250 | 1251 |6: 1252 | cmp ITYPEd, LJ_TUDATA; je <1 1253 | cmp ITYPEd, LJ_TISNUM; ja >7 1254 | mov ITYPEd, LJ_TISNUM 1255 |7: 1256 | not ITYPEd 1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] 1258 | jmp <2 1259 | 1260 |.ffunc_2 setmetatable 1261 | mov TAB:RB, [BASE] 1262 | mov TAB:TMPR, TAB:RB 1263 | checktab TAB:RB, ->fff_fallback 1264 | // Fast path: no mt for table yet and not clearing the mt. 1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1266 | mov TAB:RA, [BASE+8] 1267 | checktab TAB:RA, ->fff_fallback 1268 | mov TAB:RB->metatable, TAB:RA 1269 | mov PC, [BASE-8] 1270 | mov [BASE-16], TAB:TMPR // Return original table. 1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 1272 | jz >1 1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check. 1274 | barrierback TAB:RB, RC 1275 |1: 1276 | jmp ->fff_res1 1277 | 1278 |.ffunc_2 rawget 1279 |.if X64WIN 1280 | mov TAB:RA, [BASE] 1281 | checktab TAB:RA, ->fff_fallback 1282 | mov RB, BASE // Save BASE. 1283 | lea CARG3, [BASE+8] 1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. 1285 | mov CARG1, SAVE_L 1286 |.else 1287 | mov TAB:CARG2, [BASE] 1288 | checktab TAB:CARG2, ->fff_fallback 1289 | mov RB, BASE // Save BASE. 1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. 1291 | mov CARG1, SAVE_L 1292 |.endif 1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1294 | // cTValue * returned in eax (RD). 1295 | mov BASE, RB // Restore BASE. 1296 | // Copy table slot. 1297 | mov RB, [RD] 1298 | mov PC, [BASE-8] 1299 | mov [BASE-16], RB 1300 | jmp ->fff_res1 1301 | 1302 |//-- Base library: conversions ------------------------------------------ 1303 | 1304 |.ffunc tonumber 1305 | // Only handles the number case inline (without a base argument). 1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. 1307 | mov RB, [BASE] 1308 | checknumber RB, ->fff_fallback 1309 | mov PC, [BASE-8] 1310 | mov [BASE-16], RB 1311 | jmp ->fff_res1 1312 | 1313 |.ffunc_1 tostring 1314 | // Only handles the string or number case inline. 1315 | mov PC, [BASE-8] 1316 | mov STR:RB, [BASE] 1317 | checktp_nc STR:RB, LJ_TSTR, >3 1318 | // A __tostring method in the string base metatable is ignored. 1319 |2: 1320 | mov [BASE-16], STR:RB 1321 | jmp ->fff_res1 1322 |3: // Handle numbers inline, unless a number base metatable is present. 1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 1325 | jne ->fff_fallback 1326 | ffgccheck // Caveat: uses label 1. 1327 | mov L:RB, SAVE_L 1328 | mov L:RB->base, BASE // Add frame since C call can throw. 1329 | mov SAVE_PC, PC // Redundant (but a defined value). 1330 |.if not X64WIN 1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE 1332 |.endif 1333 | mov L:CARG1, L:RB 1334 |.if DUALNUM 1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o) 1336 |.else 1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) 1338 |.endif 1339 | // GCstr returned in eax (RD). 1340 | mov BASE, L:RB->base 1341 | settp STR:RB, RD, LJ_TSTR 1342 | jmp <2 1343 | 1344 |//-- Base library: iterators ------------------------------------------- 1345 | 1346 |.ffunc_1 next 1347 | je >2 // Missing 2nd arg? 1348 |1: 1349 | mov CARG1, [BASE] 1350 | mov PC, [BASE-8] 1351 | checktab CARG1, ->fff_fallback 1352 | mov RB, BASE // Save BASE. 1353 |.if X64WIN 1354 | lea CARG3, [BASE-16] 1355 | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. 1356 |.else 1357 | lea CARG2, [BASE+8] 1358 | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. 1359 |.endif 1360 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) 1361 | // 1=found, 0=end, -1=error returned in eax (RD). 1362 | mov BASE, RB // Restore BASE. 1363 | test RDd, RDd; jg ->fff_res2 // Found key/value. 1364 | js ->fff_fallback_2 // Invalid key. 1365 | // End of traversal: return nil. 1366 | mov aword [BASE-16], LJ_TNIL 1367 | jmp ->fff_res1 1368 |2: // Set missing 2nd arg to nil. 1369 | mov aword [BASE+8], LJ_TNIL 1370 | jmp <1 1371 | 1372 |.ffunc_1 pairs 1373 | mov TAB:RB, [BASE] 1374 | mov TMPR, TAB:RB 1375 | checktab TAB:RB, ->fff_fallback 1376#if LJ_52 1377 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1378#endif 1379 | mov CFUNC:RD, [BASE-16] 1380 | cleartp CFUNC:RD 1381 | mov CFUNC:RD, CFUNC:RD->upvalue[0] 1382 | settp CFUNC:RD, LJ_TFUNC 1383 | mov PC, [BASE-8] 1384 | mov [BASE-16], CFUNC:RD 1385 | mov [BASE-8], TMPR 1386 | mov aword [BASE], LJ_TNIL 1387 | mov RDd, 1+3 1388 | jmp ->fff_res 1389 | 1390 |.ffunc_2 ipairs_aux 1391 | mov TAB:RB, [BASE] 1392 | checktab TAB:RB, ->fff_fallback 1393 |.if DUALNUM 1394 | mov RA, [BASE+8] 1395 | checkint RA, ->fff_fallback 1396 |.else 1397 | checknumtp [BASE+8], ->fff_fallback 1398 | movsd xmm0, qword [BASE+8] 1399 |.endif 1400 | mov PC, [BASE-8] 1401 |.if DUALNUM 1402 | add RAd, 1 1403 | setint ITYPE, RA 1404 | mov [BASE-16], ITYPE 1405 |.else 1406 | sseconst_1 xmm1, TMPR 1407 | addsd xmm0, xmm1 1408 | cvttsd2si RAd, xmm0 1409 | movsd qword [BASE-16], xmm0 1410 |.endif 1411 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? 1412 | mov RD, TAB:RB->array 1413 | lea RD, [RD+RA*8] 1414 |1: 1415 | cmp aword [RD], LJ_TNIL; je ->fff_res0 1416 | // Copy array slot. 1417 | mov RB, [RD] 1418 | mov [BASE-8], RB 1419 |->fff_res2: 1420 | mov RDd, 1+2 1421 | jmp ->fff_res 1422 |2: // Check for empty hash part first. Otherwise call C function. 1423 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1424 |.if X64WIN 1425 | mov TMPR, BASE 1426 | mov CARG2d, RAd 1427 | mov CARG1, TAB:RB 1428 | mov RB, TMPR 1429 |.else 1430 | mov CARG1, TAB:RB 1431 | mov RB, BASE // Save BASE. 1432 | mov CARG2d, RAd // Caveat: CARG2 == BASE 1433 |.endif 1434 | call extern lj_tab_getinth // (GCtab *t, int32_t key) 1435 | // cTValue * or NULL returned in eax (RD). 1436 | mov BASE, RB 1437 | test RD, RD 1438 | jnz <1 1439 |->fff_res0: 1440 | mov RDd, 1+0 1441 | jmp ->fff_res 1442 | 1443 |.ffunc_1 ipairs 1444 | mov TAB:RB, [BASE] 1445 | mov TMPR, TAB:RB 1446 | checktab TAB:RB, ->fff_fallback 1447#if LJ_52 1448 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1449#endif 1450 | mov CFUNC:RD, [BASE-16] 1451 | cleartp CFUNC:RD 1452 | mov CFUNC:RD, CFUNC:RD->upvalue[0] 1453 | settp CFUNC:RD, LJ_TFUNC 1454 | mov PC, [BASE-8] 1455 | mov [BASE-16], CFUNC:RD 1456 | mov [BASE-8], TMPR 1457 |.if DUALNUM 1458 | mov64 RD, ((uint64_t)LJ_TISNUM<<47) 1459 | mov [BASE], RD 1460 |.else 1461 | mov qword [BASE], 0 1462 |.endif 1463 | mov RDd, 1+3 1464 | jmp ->fff_res 1465 | 1466 |//-- Base library: catch errors ---------------------------------------- 1467 | 1468 |.ffunc_1 pcall 1469 | lea RA, [BASE+16] 1470 | sub NARGS:RDd, 1 1471 | mov PCd, 16+FRAME_PCALL 1472 |1: 1473 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] 1474 | shr RB, HOOK_ACTIVE_SHIFT 1475 | and RB, 1 1476 | add PC, RB // Remember active hook before pcall. 1477 | // Note: this does a (harmless) copy of the function to the PC slot, too. 1478 | mov KBASE, RD 1479 |2: 1480 | mov RB, [RA+KBASE*8-24] 1481 | mov [RA+KBASE*8-16], RB 1482 | sub KBASE, 1 1483 | ja <2 1484 | jmp ->vm_call_dispatch 1485 | 1486 |.ffunc_2 xpcall 1487 | mov LFUNC:RA, [BASE+8] 1488 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback 1489 | mov LFUNC:RB, [BASE] // Swap function and traceback. 1490 | mov [BASE], LFUNC:RA 1491 | mov [BASE+8], LFUNC:RB 1492 | lea RA, [BASE+24] 1493 | sub NARGS:RDd, 2 1494 | mov PCd, 24+FRAME_PCALL 1495 | jmp <1 1496 | 1497 |//-- Coroutine library -------------------------------------------------- 1498 | 1499 |.macro coroutine_resume_wrap, resume 1500 |.if resume 1501 |.ffunc_1 coroutine_resume 1502 | mov L:RB, [BASE] 1503 | cleartp L:RB 1504 |.else 1505 |.ffunc coroutine_wrap_aux 1506 | mov CFUNC:RB, [BASE-16] 1507 | cleartp CFUNC:RB 1508 | mov L:RB, CFUNC:RB->upvalue[0].gcr 1509 | cleartp L:RB 1510 |.endif 1511 | mov PC, [BASE-8] 1512 | mov SAVE_PC, PC 1513 | mov TMP1, L:RB 1514 |.if resume 1515 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback 1516 |.endif 1517 | cmp aword L:RB->cframe, 0; jne ->fff_fallback 1518 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback 1519 | mov RA, L:RB->top 1520 | je >1 // Status != LUA_YIELD (i.e. 0)? 1521 | cmp RA, L:RB->base // Check for presence of initial func. 1522 | je ->fff_fallback 1523 | mov PC, [RA-8] // Move initial function up. 1524 | mov [RA], PC 1525 | add RA, 8 1526 |1: 1527 |.if resume 1528 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). 1529 |.else 1530 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). 1531 |.endif 1532 | cmp PC, L:RB->maxstack; ja ->fff_fallback 1533 | mov L:RB->top, PC 1534 | 1535 | mov L:RB, SAVE_L 1536 | mov L:RB->base, BASE 1537 |.if resume 1538 | add BASE, 8 // Keep resumed thread in stack for GC. 1539 |.endif 1540 | mov L:RB->top, BASE 1541 |.if resume 1542 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. 1543 |.else 1544 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. 1545 |.endif 1546 | sub RB, PC // Relative to PC. 1547 | 1548 | cmp PC, RA 1549 | je >3 1550 |2: // Move args to coroutine. 1551 | mov RC, [PC+RB] 1552 | mov [PC-8], RC 1553 | sub PC, 8 1554 | cmp PC, RA 1555 | jne <2 1556 |3: 1557 | mov CARG2, RA 1558 | mov CARG1, TMP1 1559 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1560 | 1561 | mov L:RB, SAVE_L 1562 | mov L:PC, TMP1 1563 | mov BASE, L:RB->base 1564 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 1565 | set_vmstate INTERP 1566 | 1567 | cmp eax, LUA_YIELD 1568 | ja >8 1569 |4: 1570 | mov RA, L:PC->base 1571 | mov KBASE, L:PC->top 1572 | mov L:PC->top, RA // Clear coroutine stack. 1573 | mov PC, KBASE 1574 | sub PC, RA 1575 | je >6 // No results? 1576 | lea RD, [BASE+PC] 1577 | shr PCd, 3 1578 | cmp RD, L:RB->maxstack 1579 | ja >9 // Need to grow stack? 1580 | 1581 | mov RB, BASE 1582 | sub RB, RA 1583 |5: // Move results from coroutine. 1584 | mov RD, [RA] 1585 | mov [RA+RB], RD 1586 | add RA, 8 1587 | cmp RA, KBASE 1588 | jne <5 1589 |6: 1590 |.if resume 1591 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. 1592 | mov_true ITYPE // Prepend true to results. 1593 | mov [BASE-8], ITYPE 1594 |.else 1595 | lea RDd, [PCd+1] // nresults+1 = 1 + results. 1596 |.endif 1597 |7: 1598 | mov PC, SAVE_PC 1599 | mov MULTRES, RDd 1600 |.if resume 1601 | mov RA, -8 1602 |.else 1603 | xor RAd, RAd 1604 |.endif 1605 | test PCd, FRAME_TYPE 1606 | jz ->BC_RET_Z 1607 | jmp ->vm_return 1608 | 1609 |8: // Coroutine returned with error (at co->top-1). 1610 |.if resume 1611 | mov_false ITYPE // Prepend false to results. 1612 | mov [BASE-8], ITYPE 1613 | mov RA, L:PC->top 1614 | sub RA, 8 1615 | mov L:PC->top, RA // Clear error from coroutine stack. 1616 | // Copy error message. 1617 | mov RD, [RA] 1618 | mov [BASE], RD 1619 | mov RDd, 1+2 // nresults+1 = 1 + false + error. 1620 | jmp <7 1621 |.else 1622 | mov CARG2, L:PC 1623 | mov CARG1, L:RB 1624 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1625 | // Error function does not return. 1626 |.endif 1627 | 1628 |9: // Handle stack expansion on return from yield. 1629 | mov L:RA, TMP1 1630 | mov L:RA->top, KBASE // Undo coroutine stack clearing. 1631 | mov CARG2, PC 1632 | mov CARG1, L:RB 1633 | call extern lj_state_growstack // (lua_State *L, int n) 1634 | mov L:PC, TMP1 1635 | mov BASE, L:RB->base 1636 | jmp <4 // Retry the stack move. 1637 |.endmacro 1638 | 1639 | coroutine_resume_wrap 1 // coroutine.resume 1640 | coroutine_resume_wrap 0 // coroutine.wrap 1641 | 1642 |.ffunc coroutine_yield 1643 | mov L:RB, SAVE_L 1644 | test aword L:RB->cframe, CFRAME_RESUME 1645 | jz ->fff_fallback 1646 | mov L:RB->base, BASE 1647 | lea RD, [BASE+NARGS:RD*8-8] 1648 | mov L:RB->top, RD 1649 | xor RDd, RDd 1650 | mov aword L:RB->cframe, RD 1651 | mov al, LUA_YIELD 1652 | mov byte L:RB->status, al 1653 | jmp ->vm_leave_unw 1654 | 1655 |//-- Math library ------------------------------------------------------- 1656 | 1657 | .ffunc_1 math_abs 1658 | mov RB, [BASE] 1659 |.if DUALNUM 1660 | checkint RB, >3 1661 | cmp RBd, 0; jns ->fff_resi 1662 | neg RBd; js >2 1663 |->fff_resbit: 1664 |->fff_resi: 1665 | setint RB 1666 |->fff_resRB: 1667 | mov PC, [BASE-8] 1668 | mov [BASE-16], RB 1669 | jmp ->fff_res1 1670 |2: 1671 | mov64 RB, U64x(41e00000,00000000) // 2^31. 1672 | jmp ->fff_resRB 1673 |3: 1674 | ja ->fff_fallback 1675 |.else 1676 | checknum RB, ->fff_fallback 1677 |.endif 1678 | shl RB, 1 1679 | shr RB, 1 1680 | mov PC, [BASE-8] 1681 | mov [BASE-16], RB 1682 | jmp ->fff_res1 1683 | 1684 |.ffunc_n math_sqrt, sqrtsd 1685 |->fff_resxmm0: 1686 | mov PC, [BASE-8] 1687 | movsd qword [BASE-16], xmm0 1688 | // fallthrough 1689 | 1690 |->fff_res1: 1691 | mov RDd, 1+1 1692 |->fff_res: 1693 | mov MULTRES, RDd 1694 |->fff_res_: 1695 | test PCd, FRAME_TYPE 1696 | jnz >7 1697 |5: 1698 | cmp PC_RB, RDL // More results expected? 1699 | ja >6 1700 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1701 | movzx RAd, PC_RA 1702 | neg RA 1703 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 1704 | ins_next 1705 | 1706 |6: // Fill up results with nil. 1707 | mov aword [BASE+RD*8-24], LJ_TNIL 1708 | add RD, 1 1709 | jmp <5 1710 | 1711 |7: // Non-standard return case. 1712 | mov RA, -16 // Results start at BASE+RA = BASE-16. 1713 | jmp ->vm_return 1714 | 1715 |.macro math_round, func 1716 | .ffunc math_ .. func 1717 |.if DUALNUM 1718 | mov RB, [BASE] 1719 | checknumx RB, ->fff_resRB, je 1720 | ja ->fff_fallback 1721 |.else 1722 | checknumtp [BASE], ->fff_fallback 1723 |.endif 1724 | movsd xmm0, qword [BASE] 1725 | call ->vm_ .. func .. _sse 1726 |.if DUALNUM 1727 | cvttsd2si RBd, xmm0 1728 | cmp RBd, 0x80000000 1729 | jne ->fff_resi 1730 | cvtsi2sd xmm1, RBd 1731 | ucomisd xmm0, xmm1 1732 | jp ->fff_resxmm0 1733 | je ->fff_resi 1734 |.endif 1735 | jmp ->fff_resxmm0 1736 |.endmacro 1737 | 1738 | math_round floor 1739 | math_round ceil 1740 | 1741 |.ffunc math_log 1742 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. 1743 | checknumtp [BASE], ->fff_fallback 1744 | movsd xmm0, qword [BASE] 1745 | mov RB, BASE 1746 | call extern log 1747 | mov BASE, RB 1748 | jmp ->fff_resxmm0 1749 | 1750 |.macro math_extern, func 1751 | .ffunc_n math_ .. func 1752 | mov RB, BASE 1753 | call extern func 1754 | mov BASE, RB 1755 | jmp ->fff_resxmm0 1756 |.endmacro 1757 | 1758 |.macro math_extern2, func 1759 | .ffunc_nn math_ .. func 1760 | mov RB, BASE 1761 | call extern func 1762 | mov BASE, RB 1763 | jmp ->fff_resxmm0 1764 |.endmacro 1765 | 1766 | math_extern log10 1767 | math_extern exp 1768 | math_extern sin 1769 | math_extern cos 1770 | math_extern tan 1771 | math_extern asin 1772 | math_extern acos 1773 | math_extern atan 1774 | math_extern sinh 1775 | math_extern cosh 1776 | math_extern tanh 1777 | math_extern2 pow 1778 | math_extern2 atan2 1779 | math_extern2 fmod 1780 | 1781 |.ffunc_2 math_ldexp 1782 | checknumtp [BASE], ->fff_fallback 1783 | checknumtp [BASE+8], ->fff_fallback 1784 | fld qword [BASE+8] 1785 | fld qword [BASE] 1786 | fscale 1787 | fpop1 1788 | mov PC, [BASE-8] 1789 | fstp qword [BASE-16] 1790 | jmp ->fff_res1 1791 | 1792 |.ffunc_n math_frexp 1793 | mov RB, BASE 1794 |.if X64WIN 1795 | lea CARG2, TMP1 // Caveat: CARG2 == BASE 1796 |.else 1797 | lea CARG1, TMP1 1798 |.endif 1799 | call extern frexp 1800 | mov BASE, RB 1801 | mov RBd, TMP1d 1802 | mov PC, [BASE-8] 1803 | movsd qword [BASE-16], xmm0 1804 |.if DUALNUM 1805 | setint RB 1806 | mov [BASE-8], RB 1807 |.else 1808 | cvtsi2sd xmm1, RBd 1809 | movsd qword [BASE-8], xmm1 1810 |.endif 1811 | mov RDd, 1+2 1812 | jmp ->fff_res 1813 | 1814 |.ffunc_n math_modf 1815 | mov RB, BASE 1816 |.if X64WIN 1817 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE 1818 |.else 1819 | lea CARG1, [BASE-16] 1820 |.endif 1821 | call extern modf 1822 | mov BASE, RB 1823 | mov PC, [BASE-8] 1824 | movsd qword [BASE-8], xmm0 1825 | mov RDd, 1+2 1826 | jmp ->fff_res 1827 | 1828 |.macro math_minmax, name, cmovop, sseop 1829 | .ffunc_1 name 1830 | mov RAd, 2 1831 |.if DUALNUM 1832 | mov RB, [BASE] 1833 | checkint RB, >4 1834 |1: // Handle integers. 1835 | cmp RAd, RDd; jae ->fff_resRB 1836 | mov TMPR, [BASE+RA*8-8] 1837 | checkint TMPR, >3 1838 | cmp RBd, TMPRd 1839 | cmovop RB, TMPR 1840 | add RAd, 1 1841 | jmp <1 1842 |3: 1843 | ja ->fff_fallback 1844 | // Convert intermediate result to number and continue below. 1845 | cvtsi2sd xmm0, RBd 1846 | jmp >6 1847 |4: 1848 | ja ->fff_fallback 1849 |.else 1850 | checknumtp [BASE], ->fff_fallback 1851 |.endif 1852 | 1853 | movsd xmm0, qword [BASE] 1854 |5: // Handle numbers or integers. 1855 | cmp RAd, RDd; jae ->fff_resxmm0 1856 |.if DUALNUM 1857 | mov RB, [BASE+RA*8-8] 1858 | checknumx RB, >6, jb 1859 | ja ->fff_fallback 1860 | cvtsi2sd xmm1, RBd 1861 | jmp >7 1862 |.else 1863 | checknumtp [BASE+RA*8-8], ->fff_fallback 1864 |.endif 1865 |6: 1866 | movsd xmm1, qword [BASE+RA*8-8] 1867 |7: 1868 | sseop xmm0, xmm1 1869 | add RAd, 1 1870 | jmp <5 1871 |.endmacro 1872 | 1873 | math_minmax math_min, cmovg, minsd 1874 | math_minmax math_max, cmovl, maxsd 1875 | 1876 |//-- String library ----------------------------------------------------- 1877 | 1878 |.ffunc string_byte // Only handle the 1-arg case here. 1879 | cmp NARGS:RDd, 1+1; jne ->fff_fallback 1880 | mov STR:RB, [BASE] 1881 | checkstr STR:RB, ->fff_fallback 1882 | mov PC, [BASE-8] 1883 | cmp dword STR:RB->len, 1 1884 | jb ->fff_res0 // Return no results for empty string. 1885 | movzx RBd, byte STR:RB[1] 1886 |.if DUALNUM 1887 | jmp ->fff_resi 1888 |.else 1889 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 1890 |.endif 1891 | 1892 |.ffunc string_char // Only handle the 1-arg case here. 1893 | ffgccheck 1894 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 1895 |.if DUALNUM 1896 | mov RB, [BASE] 1897 | checkint RB, ->fff_fallback 1898 |.else 1899 | checknumtp [BASE], ->fff_fallback 1900 | cvttsd2si RBd, qword [BASE] 1901 |.endif 1902 | cmp RBd, 255; ja ->fff_fallback 1903 | mov TMP1d, RBd 1904 | mov TMPRd, 1 1905 | lea RD, TMP1 // Points to stack. Little-endian. 1906 |->fff_newstr: 1907 | mov L:RB, SAVE_L 1908 | mov L:RB->base, BASE 1909 | mov CARG3d, TMPRd // Zero-extended to size_t. 1910 | mov CARG2, RD 1911 | mov CARG1, L:RB 1912 | mov SAVE_PC, PC 1913 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 1914 |->fff_resstr: 1915 | // GCstr * returned in eax (RD). 1916 | mov BASE, L:RB->base 1917 | mov PC, [BASE-8] 1918 | settp STR:RD, LJ_TSTR 1919 | mov [BASE-16], STR:RD 1920 | jmp ->fff_res1 1921 | 1922 |.ffunc string_sub 1923 | ffgccheck 1924 | mov TMPRd, -1 1925 | cmp NARGS:RDd, 1+2; jb ->fff_fallback 1926 | jna >1 1927 |.if DUALNUM 1928 | mov TMPR, [BASE+16] 1929 | checkint TMPR, ->fff_fallback 1930 |.else 1931 | checknumtp [BASE+16], ->fff_fallback 1932 | cvttsd2si TMPRd, qword [BASE+16] 1933 |.endif 1934 |1: 1935 | mov STR:RB, [BASE] 1936 | checkstr STR:RB, ->fff_fallback 1937 |.if DUALNUM 1938 | mov ITYPE, [BASE+8] 1939 | mov RAd, ITYPEd // Must clear hiword for lea below. 1940 | sar ITYPE, 47 1941 | cmp ITYPEd, LJ_TISNUM 1942 | jne ->fff_fallback 1943 |.else 1944 | checknumtp [BASE+8], ->fff_fallback 1945 | cvttsd2si RAd, qword [BASE+8] 1946 |.endif 1947 | mov RCd, STR:RB->len 1948 | cmp RCd, TMPRd // len < end? (unsigned compare) 1949 | jb >5 1950 |2: 1951 | test RAd, RAd // start <= 0? 1952 | jle >7 1953 |3: 1954 | sub TMPRd, RAd // start > end? 1955 | jl ->fff_emptystr 1956 | lea RD, [STR:RB+RAd+#STR-1] 1957 | add TMPRd, 1 1958 |4: 1959 | jmp ->fff_newstr 1960 | 1961 |5: // Negative end or overflow. 1962 | jl >6 1963 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) 1964 | jmp <2 1965 |6: // Overflow. 1966 | mov TMPRd, RCd // end = len 1967 | jmp <2 1968 | 1969 |7: // Negative start or underflow. 1970 | je >8 1971 | add RAd, RCd // start = start+(len+1) 1972 | add RAd, 1 1973 | jg <3 // start > 0? 1974 |8: // Underflow. 1975 | mov RAd, 1 // start = 1 1976 | jmp <3 1977 | 1978 |->fff_emptystr: // Range underflow. 1979 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. 1980 | jmp <4 1981 | 1982 |.macro ffstring_op, name 1983 | .ffunc_1 string_ .. name 1984 | ffgccheck 1985 |.if X64WIN 1986 | mov STR:TMPR, [BASE] 1987 | checkstr STR:TMPR, ->fff_fallback 1988 |.else 1989 | mov STR:CARG2, [BASE] 1990 | checkstr STR:CARG2, ->fff_fallback 1991 |.endif 1992 | mov L:RB, SAVE_L 1993 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] 1994 | mov L:RB->base, BASE 1995 |.if X64WIN 1996 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE 1997 |.endif 1998 | mov RC, SBUF:CARG1->b 1999 | mov SBUF:CARG1->L, L:RB 2000 | mov SBUF:CARG1->w, RC 2001 | mov SAVE_PC, PC 2002 | call extern lj_buf_putstr_ .. name 2003 | mov CARG1, rax 2004 | call extern lj_buf_tostr 2005 | jmp ->fff_resstr 2006 |.endmacro 2007 | 2008 |ffstring_op reverse 2009 |ffstring_op lower 2010 |ffstring_op upper 2011 | 2012 |//-- Bit library -------------------------------------------------------- 2013 | 2014 |.macro .ffunc_bit, name, kind, fdef 2015 | fdef name 2016 |.if kind == 2 2017 | sseconst_tobit xmm1, RB 2018 |.endif 2019 |.if DUALNUM 2020 | mov RB, [BASE] 2021 | checkint RB, >1 2022 |.if kind > 0 2023 | jmp >2 2024 |.else 2025 | jmp ->fff_resbit 2026 |.endif 2027 |1: 2028 | ja ->fff_fallback 2029 | movd xmm0, RB 2030 |.else 2031 | checknumtp [BASE], ->fff_fallback 2032 | movsd xmm0, qword [BASE] 2033 |.endif 2034 |.if kind < 2 2035 | sseconst_tobit xmm1, RB 2036 |.endif 2037 | addsd xmm0, xmm1 2038 | movd RBd, xmm0 2039 |2: 2040 |.endmacro 2041 | 2042 |.macro .ffunc_bit, name, kind 2043 | .ffunc_bit name, kind, .ffunc_1 2044 |.endmacro 2045 | 2046 |.ffunc_bit bit_tobit, 0 2047 | jmp ->fff_resbit 2048 | 2049 |.macro .ffunc_bit_op, name, ins 2050 | .ffunc_bit name, 2 2051 | mov TMPRd, NARGS:RDd // Save for fallback. 2052 | lea RD, [BASE+NARGS:RD*8-16] 2053 |1: 2054 | cmp RD, BASE 2055 | jbe ->fff_resbit 2056 |.if DUALNUM 2057 | mov RA, [RD] 2058 | checkint RA, >2 2059 | ins RBd, RAd 2060 | sub RD, 8 2061 | jmp <1 2062 |2: 2063 | ja ->fff_fallback_bit_op 2064 | movd xmm0, RA 2065 |.else 2066 | checknumtp [RD], ->fff_fallback_bit_op 2067 | movsd xmm0, qword [RD] 2068 |.endif 2069 | addsd xmm0, xmm1 2070 | movd RAd, xmm0 2071 | ins RBd, RAd 2072 | sub RD, 8 2073 | jmp <1 2074 |.endmacro 2075 | 2076 |.ffunc_bit_op bit_band, and 2077 |.ffunc_bit_op bit_bor, or 2078 |.ffunc_bit_op bit_bxor, xor 2079 | 2080 |.ffunc_bit bit_bswap, 1 2081 | bswap RBd 2082 | jmp ->fff_resbit 2083 | 2084 |.ffunc_bit bit_bnot, 1 2085 | not RBd 2086 |.if DUALNUM 2087 | jmp ->fff_resbit 2088 |.else 2089 |->fff_resbit: 2090 | cvtsi2sd xmm0, RBd 2091 | jmp ->fff_resxmm0 2092 |.endif 2093 | 2094 |->fff_fallback_bit_op: 2095 | mov NARGS:RDd, TMPRd // Restore for fallback 2096 | jmp ->fff_fallback 2097 | 2098 |.macro .ffunc_bit_sh, name, ins 2099 |.if DUALNUM 2100 | .ffunc_bit name, 1, .ffunc_2 2101 | // Note: no inline conversion from number for 2nd argument! 2102 | mov RA, [BASE+8] 2103 | checkint RA, ->fff_fallback 2104 |.else 2105 | .ffunc_nn name 2106 | sseconst_tobit xmm2, RB 2107 | addsd xmm0, xmm2 2108 | addsd xmm1, xmm2 2109 | movd RBd, xmm0 2110 | movd RAd, xmm1 2111 |.endif 2112 | ins RBd, cl // Assumes RA is ecx. 2113 | jmp ->fff_resbit 2114 |.endmacro 2115 | 2116 |.ffunc_bit_sh bit_lshift, shl 2117 |.ffunc_bit_sh bit_rshift, shr 2118 |.ffunc_bit_sh bit_arshift, sar 2119 |.ffunc_bit_sh bit_rol, rol 2120 |.ffunc_bit_sh bit_ror, ror 2121 | 2122 |//----------------------------------------------------------------------- 2123 | 2124 |->fff_fallback_2: 2125 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. 2126 | jmp ->fff_fallback 2127 |->fff_fallback_1: 2128 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. 2129 |->fff_fallback: // Call fast function fallback handler. 2130 | // BASE = new base, RD = nargs+1 2131 | mov L:RB, SAVE_L 2132 | mov PC, [BASE-8] // Fallback may overwrite PC. 2133 | mov SAVE_PC, PC // Redundant (but a defined value). 2134 | mov L:RB->base, BASE 2135 | lea RD, [BASE+NARGS:RD*8-8] 2136 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. 2137 | mov L:RB->top, RD 2138 | mov CFUNC:RD, [BASE-16] 2139 | cleartp CFUNC:RD 2140 | cmp RA, L:RB->maxstack 2141 | ja >5 // Need to grow stack. 2142 | mov CARG1, L:RB 2143 | call aword CFUNC:RD->f // (lua_State *L) 2144 | mov BASE, L:RB->base 2145 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2146 | test RDd, RDd; jg ->fff_res // Returned nresults+1? 2147 |1: 2148 | mov RA, L:RB->top 2149 | sub RA, BASE 2150 | shr RAd, 3 2151 | test RDd, RDd 2152 | lea NARGS:RDd, [RAd+1] 2153 | mov LFUNC:RB, [BASE-16] 2154 | jne ->vm_call_tail // Returned -1? 2155 | cleartp LFUNC:RB 2156 | ins_callt // Returned 0: retry fast path. 2157 | 2158 |// Reconstruct previous base for vmeta_call during tailcall. 2159 |->vm_call_tail: 2160 | mov RA, BASE 2161 | test PCd, FRAME_TYPE 2162 | jnz >3 2163 | movzx RBd, PC_RA 2164 | neg RB 2165 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 2166 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2167 |3: 2168 | mov RB, PC 2169 | and RB, -8 2170 | sub BASE, RB 2171 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2172 | 2173 |5: // Grow stack for fallback handler. 2174 | mov CARG2d, LUA_MINSTACK 2175 | mov CARG1, L:RB 2176 | call extern lj_state_growstack // (lua_State *L, int n) 2177 | mov BASE, L:RB->base 2178 | xor RDd, RDd // Simulate a return 0. 2179 | jmp <1 // Dumb retry (goes through ff first). 2180 | 2181 |->fff_gcstep: // Call GC step function. 2182 | // BASE = new base, RD = nargs+1 2183 | pop RB // Must keep stack at same level. 2184 | mov TMP1, RB // Save return address 2185 | mov L:RB, SAVE_L 2186 | mov SAVE_PC, PC // Redundant (but a defined value). 2187 | mov L:RB->base, BASE 2188 | lea RD, [BASE+NARGS:RD*8-8] 2189 | mov CARG1, L:RB 2190 | mov L:RB->top, RD 2191 | call extern lj_gc_step // (lua_State *L) 2192 | mov BASE, L:RB->base 2193 | mov RD, L:RB->top 2194 | sub RD, BASE 2195 | shr RDd, 3 2196 | add NARGS:RDd, 1 2197 | mov RB, TMP1 2198 | push RB // Restore return address. 2199 | ret 2200 | 2201 |//----------------------------------------------------------------------- 2202 |//-- Special dispatch targets ------------------------------------------- 2203 |//----------------------------------------------------------------------- 2204 | 2205 |->vm_record: // Dispatch target for recording phase. 2206 |.if JIT 2207 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2208 | test RDL, HOOK_VMEVENT // No recording while in vmevent. 2209 | jnz >5 2210 | // Decrement the hookcount for consistency, but always do the call. 2211 | test RDL, HOOK_ACTIVE 2212 | jnz >1 2213 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2214 | jz >1 2215 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2216 | jmp >1 2217 |.endif 2218 | 2219 |->vm_rethook: // Dispatch target for return hooks. 2220 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2221 | test RDL, HOOK_ACTIVE // Hook already active? 2222 | jnz >5 2223 | jmp >1 2224 | 2225 |->vm_inshook: // Dispatch target for instr/line hooks. 2226 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2227 | test RDL, HOOK_ACTIVE // Hook already active? 2228 | jnz >5 2229 | 2230 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2231 | jz >5 2232 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2233 | jz >1 2234 | test RDL, LUA_MASKLINE 2235 | jz >5 2236 |1: 2237 | mov L:RB, SAVE_L 2238 | mov L:RB->base, BASE 2239 | mov CARG2, PC // Caveat: CARG2 == BASE 2240 | mov CARG1, L:RB 2241 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2242 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2243 |3: 2244 | mov BASE, L:RB->base 2245 |4: 2246 | movzx RAd, PC_RA 2247 |5: 2248 | movzx OP, PC_OP 2249 | movzx RDd, PC_RD 2250 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. 2251 | 2252 |->cont_hook: // Continue from hook yield. 2253 | add PC, 4 2254 | mov RA, [RB-40] 2255 | mov MULTRES, RAd // Restore MULTRES for *M ins. 2256 | jmp <4 2257 | 2258 |->vm_hotloop: // Hot loop counter underflow. 2259 |.if JIT 2260 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). 2261 | cleartp LFUNC:RB 2262 | mov RB, LFUNC:RB->pc 2263 | movzx RDd, byte [RB+PC2PROTO(framesize)] 2264 | lea RD, [BASE+RD*8] 2265 | mov L:RB, SAVE_L 2266 | mov L:RB->base, BASE 2267 | mov L:RB->top, RD 2268 | mov CARG2, PC 2269 | lea CARG1, [DISPATCH+GG_DISP2J] 2270 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2271 | mov SAVE_PC, PC 2272 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2273 | jmp <3 2274 |.endif 2275 | 2276 |->vm_callhook: // Dispatch target for call hooks. 2277 | mov SAVE_PC, PC 2278 |.if JIT 2279 | jmp >1 2280 |.endif 2281 | 2282 |->vm_hotcall: // Hot call counter underflow. 2283 |.if JIT 2284 | mov SAVE_PC, PC 2285 | or PC, 1 // Marker for hot call. 2286 |1: 2287 |.endif 2288 | lea RD, [BASE+NARGS:RD*8-8] 2289 | mov L:RB, SAVE_L 2290 | mov L:RB->base, BASE 2291 | mov L:RB->top, RD 2292 | mov CARG2, PC 2293 | mov CARG1, L:RB 2294 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2295 | // ASMFunction returned in eax/rax (RD). 2296 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. 2297 |.if JIT 2298 | and PC, -2 2299 |.endif 2300 | mov BASE, L:RB->base 2301 | mov RA, RD 2302 | mov RD, L:RB->top 2303 | sub RD, BASE 2304 | mov RB, RA 2305 | movzx RAd, PC_RA 2306 | shr RDd, 3 2307 | add NARGS:RDd, 1 2308 | jmp RB 2309 | 2310 |->cont_stitch: // Trace stitching. 2311 |.if JIT 2312 | // BASE = base, RC = result, RB = mbase 2313 | mov TRACE:ITYPE, [RB-40] // Save previous trace. 2314 | cleartp TRACE:ITYPE 2315 | mov TMPRd, MULTRES 2316 | movzx RAd, PC_RA 2317 | lea RA, [BASE+RA*8] // Call base. 2318 | sub TMPRd, 1 2319 | jz >2 2320 |1: // Move results down. 2321 | mov RB, [RC] 2322 | mov [RA], RB 2323 | add RC, 8 2324 | add RA, 8 2325 | sub TMPRd, 1 2326 | jnz <1 2327 |2: 2328 | movzx RCd, PC_RA 2329 | movzx RBd, PC_RB 2330 | add RC, RB 2331 | lea RC, [BASE+RC*8-8] 2332 |3: 2333 | cmp RC, RA 2334 | ja >9 // More results wanted? 2335 | 2336 | test TRACE:ITYPE, TRACE:ITYPE 2337 | jz ->cont_nop 2338 | movzx RBd, word TRACE:ITYPE->traceno 2339 | movzx RDd, word TRACE:ITYPE->link 2340 | cmp RDd, RBd 2341 | je ->cont_nop // Blacklisted. 2342 | test RDd, RDd 2343 | jne =>BC_JLOOP // Jump to stitched trace. 2344 | 2345 | // Stitch a new trace to the previous trace. 2346 | mov [DISPATCH+DISPATCH_J(exitno)], RB 2347 | mov L:RB, SAVE_L 2348 | mov L:RB->base, BASE 2349 | mov CARG2, PC 2350 | lea CARG1, [DISPATCH+GG_DISP2J] 2351 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2352 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) 2353 | mov BASE, L:RB->base 2354 | jmp ->cont_nop 2355 | 2356 |9: // Fill up results with nil. 2357 | mov aword [RA], LJ_TNIL 2358 | add RA, 8 2359 | jmp <3 2360 |.endif 2361 | 2362 |->vm_profhook: // Dispatch target for profiler hook. 2363#if LJ_HASPROFILE 2364 | mov L:RB, SAVE_L 2365 | mov L:RB->base, BASE 2366 | mov CARG2, PC // Caveat: CARG2 == BASE 2367 | mov CARG1, L:RB 2368 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) 2369 | mov BASE, L:RB->base 2370 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2371 | sub PC, 4 2372 | jmp ->cont_nop 2373#endif 2374 | 2375 |//----------------------------------------------------------------------- 2376 |//-- Trace exit handler ------------------------------------------------- 2377 |//----------------------------------------------------------------------- 2378 | 2379 |// Called from an exit stub with the exit number on the stack. 2380 |// The 16 bit exit number is stored with two (sign-extended) push imm8. 2381 |->vm_exit_handler: 2382 |.if JIT 2383 | push r13; push r12 2384 | push r11; push r10; push r9; push r8 2385 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp 2386 | push rbx; push rdx; push rcx; push rax 2387 | movzx RCd, byte [rbp-8] // Reconstruct exit number. 2388 | mov RCH, byte [rbp-16] 2389 | mov [rbp-8], r15; mov [rbp-16], r14 2390 | // DISPATCH is preserved on-trace in LJ_GC64 mode. 2391 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. 2392 | set_vmstate EXIT 2393 | mov [DISPATCH+DISPATCH_J(exitno)], RCd 2394 | mov [DISPATCH+DISPATCH_J(parent)], RAd 2395 |.if X64WIN 2396 | sub rsp, 16*8+4*8 // Room for SSE regs + save area. 2397 |.else 2398 | sub rsp, 16*8 // Room for SSE regs. 2399 |.endif 2400 | add rbp, -128 2401 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 2402 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 2403 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 2404 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 2405 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 2406 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 2407 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 2408 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 2409 | // Caveat: RB is rbp. 2410 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] 2411 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2412 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2413 | mov L:RB->base, BASE 2414 |.if X64WIN 2415 | lea CARG2, [rsp+4*8] 2416 |.else 2417 | mov CARG2, rsp 2418 |.endif 2419 | lea CARG1, [DISPATCH+GG_DISP2J] 2420 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 2421 | call extern lj_trace_exit // (jit_State *J, ExitState *ex) 2422 | // MULTRES or negated error code returned in eax (RD). 2423 | mov RA, L:RB->cframe 2424 | and RA, CFRAME_RAWMASK 2425 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). 2426 | mov BASE, L:RB->base 2427 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. 2428 | jmp >1 2429 |.endif 2430 |->vm_exit_interp: 2431 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2432 |.if JIT 2433 | // Restore additional callee-save registers only used in compiled code. 2434 |.if X64WIN 2435 | lea RA, [rsp+10*16+4*8] 2436 |1: 2437 | movdqa xmm15, [RA-10*16] 2438 | movdqa xmm14, [RA-9*16] 2439 | movdqa xmm13, [RA-8*16] 2440 | movdqa xmm12, [RA-7*16] 2441 | movdqa xmm11, [RA-6*16] 2442 | movdqa xmm10, [RA-5*16] 2443 | movdqa xmm9, [RA-4*16] 2444 | movdqa xmm8, [RA-3*16] 2445 | movdqa xmm7, [RA-2*16] 2446 | mov rsp, RA // Reposition stack to C frame. 2447 | movdqa xmm6, [RA-1*16] 2448 | mov r15, CSAVE_1 2449 | mov r14, CSAVE_2 2450 | mov r13, CSAVE_3 2451 | mov r12, CSAVE_4 2452 |.else 2453 | lea RA, [rsp+16] 2454 |1: 2455 | mov r13, [RA-8] 2456 | mov r12, [RA] 2457 | mov rsp, RA // Reposition stack to C frame. 2458#ifdef LUA_USE_TRACE_LOGS 2459 | mov CARG1, SAVE_L 2460 | mov L:CARG1->base, BASE 2461 | mov RB, RD // Save RD 2462 | mov TMP1, PC // Save PC 2463 | mov CARG3, PC // CARG3 == BASE 2464 | mov CARG2d, dword [DISPATCH+DISPATCH_GL(vmstate)] 2465 | call extern lj_log_trace_direct_exit@8 2466 | mov PC, TMP1 2467 | mov RD, RB 2468 | mov RB, SAVE_L 2469 | mov BASE, L:RB->base 2470#endif 2471 |.endif 2472 | test RDd, RDd; js >9 // Check for error from exit. 2473 | mov L:RB, SAVE_L 2474 | mov MULTRES, RDd 2475 | mov LFUNC:KBASE, [BASE-16] 2476 | cleartp LFUNC:KBASE 2477 | mov KBASE, LFUNC:KBASE->pc 2478 | mov KBASE, [KBASE+PC2PROTO(k)] 2479 | mov L:RB->base, BASE 2480 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 2481 | set_vmstate INTERP 2482 | // Modified copy of ins_next which handles function header dispatch, too. 2483 | mov RCd, [PC] 2484 | movzx RAd, RCH 2485 | movzx OP, RCL 2486 | add PC, 4 2487 | shr RCd, 16 2488 | cmp OP, BC_FUNCF // Function header? 2489 | jb >3 2490 | cmp OP, BC_FUNCC+2 // Fast function? 2491 | jae >4 2492 |2: 2493 | mov RCd, MULTRES // RC/RD holds nres+1. 2494 |3: 2495 | jmp aword [DISPATCH+OP*8] 2496 | 2497 |4: // Check frame below fast function. 2498 | mov RC, [BASE-8] 2499 | test RCd, FRAME_TYPE 2500 | jnz <2 // Trace stitching continuation? 2501 | // Otherwise set KBASE for Lua function below fast function. 2502 | movzx RCd, byte [RC-3] 2503 | neg RC 2504 | mov LFUNC:KBASE, [BASE+RC*8-32] 2505 | cleartp LFUNC:KBASE 2506 | mov KBASE, LFUNC:KBASE->pc 2507 | mov KBASE, [KBASE+PC2PROTO(k)] 2508 | jmp <2 2509 | 2510 |9: // Rethrow error from the right C frame. 2511 | mov CARG2d, RDd 2512 | mov CARG1, L:RB 2513 | neg CARG2d 2514 | call extern lj_err_trace // (lua_State *L, int errcode) 2515 |.endif 2516 | 2517 |//----------------------------------------------------------------------- 2518 |//-- Math helper functions ---------------------------------------------- 2519 |//----------------------------------------------------------------------- 2520 | 2521 |// FP value rounding. Called by math.floor/math.ceil fast functions 2522 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2523 |.macro vm_round, name, mode, cond 2524 |->name: 2525 |->name .. _sse: 2526 | sseconst_abs xmm2, RD 2527 | sseconst_2p52 xmm3, RD 2528 | movaps xmm1, xmm0 2529 | andpd xmm1, xmm2 // |x| 2530 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. 2531 | jbe >1 2532 | andnpd xmm2, xmm0 // Isolate sign bit. 2533 |.if mode == 2 // trunc(x)? 2534 | movaps xmm0, xmm1 2535 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2536 | subsd xmm1, xmm3 2537 | sseconst_1 xmm3, RD 2538 | cmpsd xmm0, xmm1, 1 // |x| < result? 2539 | andpd xmm0, xmm3 2540 | subsd xmm1, xmm0 // If yes, subtract -1. 2541 | orpd xmm1, xmm2 // Merge sign bit back in. 2542 |.else 2543 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2544 | subsd xmm1, xmm3 2545 | orpd xmm1, xmm2 // Merge sign bit back in. 2546 | .if mode == 1 // ceil(x)? 2547 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. 2548 | cmpsd xmm0, xmm1, 6 // x > result? 2549 | .else // floor(x)? 2550 | sseconst_1 xmm2, RD 2551 | cmpsd xmm0, xmm1, 1 // x < result? 2552 | .endif 2553 | andpd xmm0, xmm2 2554 | subsd xmm1, xmm0 // If yes, subtract +-1. 2555 |.endif 2556 | movaps xmm0, xmm1 2557 |1: 2558 | ret 2559 |.endmacro 2560 | 2561 | vm_round vm_floor, 0, 1 2562 | vm_round vm_ceil, 1, JIT 2563 | vm_round vm_trunc, 2, JIT 2564 | 2565 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2566 |->vm_mod: 2567 |// Args in xmm0/xmm1, return value in xmm0. 2568 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2569 | movaps xmm5, xmm0 2570 | divsd xmm0, xmm1 2571 | sseconst_abs xmm2, RD 2572 | sseconst_2p52 xmm3, RD 2573 | movaps xmm4, xmm0 2574 | andpd xmm4, xmm2 // |x/y| 2575 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. 2576 | jbe >1 2577 | andnpd xmm2, xmm0 // Isolate sign bit. 2578 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 2579 | subsd xmm4, xmm3 2580 | orpd xmm4, xmm2 // Merge sign bit back in. 2581 | sseconst_1 xmm2, RD 2582 | cmpsd xmm0, xmm4, 1 // x/y < result? 2583 | andpd xmm0, xmm2 2584 | subsd xmm4, xmm0 // If yes, subtract 1.0. 2585 | movaps xmm0, xmm5 2586 | mulsd xmm1, xmm4 2587 | subsd xmm0, xmm1 2588 | ret 2589 |1: 2590 | mulsd xmm1, xmm0 2591 | movaps xmm0, xmm5 2592 | subsd xmm0, xmm1 2593 | ret 2594 | 2595 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2596 |->vm_powi_sse: 2597 | cmp eax, 1; jle >6 // i<=1? 2598 | // Now 1 < (unsigned)i <= 0x80000000. 2599 |1: // Handle leading zeros. 2600 | test eax, 1; jnz >2 2601 | mulsd xmm0, xmm0 2602 | shr eax, 1 2603 | jmp <1 2604 |2: 2605 | shr eax, 1; jz >5 2606 | movaps xmm1, xmm0 2607 |3: // Handle trailing bits. 2608 | mulsd xmm0, xmm0 2609 | shr eax, 1; jz >4 2610 | jnc <3 2611 | mulsd xmm1, xmm0 2612 | jmp <3 2613 |4: 2614 | mulsd xmm0, xmm1 2615 |5: 2616 | ret 2617 |6: 2618 | je <5 // x^1 ==> x 2619 | jb >7 // x^0 ==> 1 2620 | neg eax 2621 | call <1 2622 | sseconst_1 xmm1, RD 2623 | divsd xmm1, xmm0 2624 | movaps xmm0, xmm1 2625 | ret 2626 |7: 2627 | sseconst_1 xmm0, RD 2628 | ret 2629 | 2630 |//----------------------------------------------------------------------- 2631 |//-- Miscellaneous functions -------------------------------------------- 2632 |//----------------------------------------------------------------------- 2633 | 2634 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) 2635 |->vm_cpuid: 2636 | mov eax, CARG1d 2637 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 2638 | push rbx 2639 | xor ecx, ecx 2640 | cpuid 2641 | mov [rsi], eax 2642 | mov [rsi+4], ebx 2643 | mov [rsi+8], ecx 2644 | mov [rsi+12], edx 2645 | pop rbx 2646 | .if X64WIN; pop rsi; .endif 2647 | ret 2648 | 2649 |.define NEXT_TAB, TAB:CARG1 2650 |.define NEXT_IDX, CARG2d 2651 |.define NEXT_IDXa, CARG2 2652 |.define NEXT_PTR, RC 2653 |.define NEXT_PTRd, RCd 2654 |.define NEXT_TMP, CARG3 2655 |.define NEXT_ASIZE, CARG4d 2656 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro 2657 |.if X64WIN 2658 |.define NEXT_RES_PTR, [rsp+aword*5] 2659 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro 2660 |.else 2661 |.define NEXT_RES_PTR, [rsp+aword*1] 2662 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro 2663 |.endif 2664 | 2665 |// TValue *lj_vm_next(GCtab *t, uint32_t idx) 2666 |// Next idx returned in edx. 2667 |->vm_next: 2668 |.if JIT 2669 | mov NEXT_ASIZE, NEXT_TAB->asize 2670 |1: // Traverse array part. 2671 | cmp NEXT_IDX, NEXT_ASIZE; jae >5 2672 | mov NEXT_TMP, NEXT_TAB->array 2673 | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] 2674 | cmp NEXT_TMP, LJ_TNIL; je >2 2675 | lea NEXT_PTR, NEXT_RES_PTR 2676 | mov qword [NEXT_PTR], NEXT_TMP 2677 |.if DUALNUM 2678 | setint NEXT_TMP, NEXT_IDXa 2679 | mov qword [NEXT_PTR+qword*1], NEXT_TMP 2680 |.else 2681 | cvtsi2sd xmm0, NEXT_IDX 2682 | movsd qword [NEXT_PTR+qword*1], xmm0 2683 |.endif 2684 | NEXT_RES_IDX 1 2685 | ret 2686 |2: // Skip holes in array part. 2687 | add NEXT_IDX, 1 2688 | jmp <1 2689 | 2690 |5: // Traverse hash part. 2691 | sub NEXT_IDX, NEXT_ASIZE 2692 |6: 2693 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 2694 | imul NEXT_PTRd, NEXT_IDX, #NODE 2695 | add NODE:NEXT_PTR, NEXT_TAB->node 2696 | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 2697 | NEXT_RES_IDXL NEXT_ASIZE+1 2698 | ret 2699 |7: // Skip holes in hash part. 2700 | add NEXT_IDX, 1 2701 | jmp <6 2702 | 2703 |9: // End of iteration. Set the key to nil (not the value). 2704 | NEXT_RES_IDX NEXT_ASIZE 2705 | lea NEXT_PTR, NEXT_RES_PTR 2706 | mov qword [NEXT_PTR+qword*1], LJ_TNIL 2707 | ret 2708 |.endif 2709 | 2710 |//----------------------------------------------------------------------- 2711 |//-- Assertions --------------------------------------------------------- 2712 |//----------------------------------------------------------------------- 2713 | 2714 |->assert_bad_for_arg_type: 2715#ifdef LUA_USE_ASSERT 2716 | int3 2717#endif 2718 | int3 2719 | 2720 |//----------------------------------------------------------------------- 2721 |//-- FFI helper functions ----------------------------------------------- 2722 |//----------------------------------------------------------------------- 2723 | 2724 |// Handler for callback functions. Callback slot number in ah/al. 2725 |->vm_ffi_callback: 2726 |.if FFI 2727 |.type CTSTATE, CTState, PC 2728 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. 2729 | lea DISPATCH, [ebp+GG_G2DISP] 2730 | mov CTSTATE, GL:ebp->ctype_state 2731 | movzx eax, ax 2732 | mov CTSTATE->cb.slot, eax 2733 | mov CTSTATE->cb.gpr[0], CARG1 2734 | mov CTSTATE->cb.gpr[1], CARG2 2735 | mov CTSTATE->cb.gpr[2], CARG3 2736 | mov CTSTATE->cb.gpr[3], CARG4 2737 | movsd qword CTSTATE->cb.fpr[0], xmm0 2738 | movsd qword CTSTATE->cb.fpr[1], xmm1 2739 | movsd qword CTSTATE->cb.fpr[2], xmm2 2740 | movsd qword CTSTATE->cb.fpr[3], xmm3 2741 |.if X64WIN 2742 | lea rax, [rsp+CFRAME_SIZE+4*8] 2743 |.else 2744 | lea rax, [rsp+CFRAME_SIZE] 2745 | mov CTSTATE->cb.gpr[4], CARG5 2746 | mov CTSTATE->cb.gpr[5], CARG6 2747 | movsd qword CTSTATE->cb.fpr[4], xmm4 2748 | movsd qword CTSTATE->cb.fpr[5], xmm5 2749 | movsd qword CTSTATE->cb.fpr[6], xmm6 2750 | movsd qword CTSTATE->cb.fpr[7], xmm7 2751 |.endif 2752 | mov CTSTATE->cb.stack, rax 2753 | mov CARG2, rsp 2754 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. 2755 | mov CARG1, CTSTATE 2756 | call extern lj_ccallback_enter // (CTState *cts, void *cf) 2757 | // lua_State * returned in eax (RD). 2758 | set_vmstate INTERP 2759 | mov BASE, L:RD->base 2760 | mov RD, L:RD->top 2761 | sub RD, BASE 2762 | mov LFUNC:RB, [BASE-16] 2763 | cleartp LFUNC:RB 2764 | shr RD, 3 2765 | add RD, 1 2766 | ins_callt 2767 |.endif 2768 | 2769 |->cont_ffi_callback: // Return from FFI callback. 2770 |.if FFI 2771 | mov L:RA, SAVE_L 2772 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] 2773 | mov aword CTSTATE->L, L:RA 2774 | mov L:RA->base, BASE 2775 | mov L:RA->top, RB 2776 | mov CARG1, CTSTATE 2777 | mov CARG2, RC 2778 | call extern lj_ccallback_leave // (CTState *cts, TValue *o) 2779 | mov rax, CTSTATE->cb.gpr[0] 2780 | movsd xmm0, qword CTSTATE->cb.fpr[0] 2781 | jmp ->vm_leave_unw 2782 |.endif 2783 | 2784 |->vm_ffi_call: // Call C function via FFI. 2785 | // Caveat: needs special frame unwinding, see below. 2786 |.if FFI 2787 | .type CCSTATE, CCallState, rbx 2788 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 2789 | 2790 | // Readjust stack. 2791 | mov eax, CCSTATE->spadj 2792 | sub rsp, rax 2793 | 2794 | // Copy stack slots. 2795 | movzx ecx, byte CCSTATE->nsp 2796 | sub ecx, 1 2797 | js >2 2798 |1: 2799 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 2800 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 2801 | sub ecx, 1 2802 | jns <1 2803 |2: 2804 | 2805 | movzx eax, byte CCSTATE->nfpr 2806 | mov CARG1, CCSTATE->gpr[0] 2807 | mov CARG2, CCSTATE->gpr[1] 2808 | mov CARG3, CCSTATE->gpr[2] 2809 | mov CARG4, CCSTATE->gpr[3] 2810 |.if not X64WIN 2811 | mov CARG5, CCSTATE->gpr[4] 2812 | mov CARG6, CCSTATE->gpr[5] 2813 |.endif 2814 | test eax, eax; jz >5 2815 | movaps xmm0, CCSTATE->fpr[0] 2816 | movaps xmm1, CCSTATE->fpr[1] 2817 | movaps xmm2, CCSTATE->fpr[2] 2818 | movaps xmm3, CCSTATE->fpr[3] 2819 |.if not X64WIN 2820 | cmp eax, 4; jbe >5 2821 | movaps xmm4, CCSTATE->fpr[4] 2822 | movaps xmm5, CCSTATE->fpr[5] 2823 | movaps xmm6, CCSTATE->fpr[6] 2824 | movaps xmm7, CCSTATE->fpr[7] 2825 |.endif 2826 |5: 2827 | 2828 | call aword CCSTATE->func 2829 | 2830 | mov CCSTATE->gpr[0], rax 2831 | movaps CCSTATE->fpr[0], xmm0 2832 |.if not X64WIN 2833 | mov CCSTATE->gpr[1], rdx 2834 | movaps CCSTATE->fpr[1], xmm1 2835 |.endif 2836 | 2837 | mov rbx, [rbp-8]; leave; ret 2838 |.endif 2839 |// Note: vm_ffi_call must be the last function in this object file! 2840 | 2841 |//----------------------------------------------------------------------- 2842} 2843 2844/* Generate the code for a single instruction. */ 2845static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2846{ 2847 int vk = 0; 2848 |// Note: aligning all instructions does not pay off. 2849 |=>defop: 2850 2851 switch (op) { 2852 2853 /* -- Comparison ops ---------------------------------------------------- */ 2854 2855 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2856 2857 |.macro jmp_comp, lt, ge, le, gt, target 2858 ||switch (op) { 2859 ||case BC_ISLT: 2860 | lt target 2861 ||break; 2862 ||case BC_ISGE: 2863 | ge target 2864 ||break; 2865 ||case BC_ISLE: 2866 | le target 2867 ||break; 2868 ||case BC_ISGT: 2869 | gt target 2870 ||break; 2871 ||default: break; /* Shut up GCC. */ 2872 ||} 2873 |.endmacro 2874 2875 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2876 | // RA = src1, RD = src2, JMP with RD = target 2877 | ins_AD 2878 | mov ITYPE, [BASE+RA*8] 2879 | mov RB, [BASE+RD*8] 2880 | mov RA, ITYPE 2881 | mov RD, RB 2882 | sar ITYPE, 47 2883 | sar RB, 47 2884 |.if DUALNUM 2885 | cmp ITYPEd, LJ_TISNUM; jne >7 2886 | cmp RBd, LJ_TISNUM; jne >8 2887 | add PC, 4 2888 | cmp RAd, RDd 2889 | jmp_comp jge, jl, jg, jle, >9 2890 |6: 2891 | movzx RDd, PC_RD 2892 | branchPC RD 2893 |9: 2894 | ins_next 2895 | 2896 |7: // RA is not an integer. 2897 | ja ->vmeta_comp 2898 | // RA is a number. 2899 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp 2900 | // RA is a number, RD is an integer. 2901 | cvtsi2sd xmm0, RDd 2902 | jmp >2 2903 | 2904 |8: // RA is an integer, RD is not an integer. 2905 | ja ->vmeta_comp 2906 | // RA is an integer, RD is a number. 2907 | cvtsi2sd xmm1, RAd 2908 | movd xmm0, RD 2909 | jmp >3 2910 |.else 2911 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp 2912 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp 2913 |.endif 2914 |1: 2915 | movd xmm0, RD 2916 |2: 2917 | movd xmm1, RA 2918 |3: 2919 | add PC, 4 2920 | ucomisd xmm0, xmm1 2921 | // Unordered: all of ZF CF PF set, ordered: PF clear. 2922 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 2923 |.if DUALNUM 2924 | jmp_comp jbe, ja, jb, jae, <9 2925 | jmp <6 2926 |.else 2927 | jmp_comp jbe, ja, jb, jae, >1 2928 | movzx RDd, PC_RD 2929 | branchPC RD 2930 |1: 2931 | ins_next 2932 |.endif 2933 break; 2934 2935 case BC_ISEQV: case BC_ISNEV: 2936 vk = op == BC_ISEQV; 2937 | ins_AD // RA = src1, RD = src2, JMP with RD = target 2938 | mov RB, [BASE+RD*8] 2939 | mov ITYPE, [BASE+RA*8] 2940 | add PC, 4 2941 | mov RD, RB 2942 | mov RA, ITYPE 2943 | sar RB, 47 2944 | sar ITYPE, 47 2945 |.if DUALNUM 2946 | cmp RBd, LJ_TISNUM; jne >7 2947 | cmp ITYPEd, LJ_TISNUM; jne >8 2948 | cmp RDd, RAd 2949 if (vk) { 2950 | jne >9 2951 } else { 2952 | je >9 2953 } 2954 | movzx RDd, PC_RD 2955 | branchPC RD 2956 |9: 2957 | ins_next 2958 | 2959 |7: // RD is not an integer. 2960 | ja >5 2961 | // RD is a number. 2962 | movd xmm1, RD 2963 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 2964 | // RD is a number, RA is an integer. 2965 | cvtsi2sd xmm0, RAd 2966 | jmp >2 2967 | 2968 |8: // RD is an integer, RA is not an integer. 2969 | ja >5 2970 | // RD is an integer, RA is a number. 2971 | cvtsi2sd xmm1, RDd 2972 | jmp >1 2973 | 2974 |.else 2975 | cmp RBd, LJ_TISNUM; jae >5 2976 | cmp ITYPEd, LJ_TISNUM; jae >5 2977 | movd xmm1, RD 2978 |.endif 2979 |1: 2980 | movd xmm0, RA 2981 |2: 2982 | ucomisd xmm0, xmm1 2983 |4: 2984 iseqne_fp: 2985 if (vk) { 2986 | jp >2 // Unordered means not equal. 2987 | jne >2 2988 } else { 2989 | jp >2 // Unordered means not equal. 2990 | je >1 2991 } 2992 iseqne_end: 2993 if (vk) { 2994 |1: // EQ: Branch to the target. 2995 | movzx RDd, PC_RD 2996 | branchPC RD 2997 |2: // NE: Fallthrough to next instruction. 2998 |.if not FFI 2999 |3: 3000 |.endif 3001 } else { 3002 |.if not FFI 3003 |3: 3004 |.endif 3005 |2: // NE: Branch to the target. 3006 | movzx RDd, PC_RD 3007 | branchPC RD 3008 |1: // EQ: Fallthrough to next instruction. 3009 } 3010 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || 3011 op == BC_ISEQN || op == BC_ISNEN)) { 3012 | jmp <9 3013 } else { 3014 | ins_next 3015 } 3016 | 3017 if (op == BC_ISEQV || op == BC_ISNEV) { 3018 |5: // Either or both types are not numbers. 3019 |.if FFI 3020 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd 3021 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd 3022 |.endif 3023 | cmp RA, RD 3024 | je <1 // Same GCobjs or pvalues? 3025 | cmp RBd, ITYPEd 3026 | jne <2 // Not the same type? 3027 | cmp RBd, LJ_TISTABUD 3028 | ja <2 // Different objects and not table/ud? 3029 | 3030 | // Different tables or userdatas. Need to check __eq metamethod. 3031 | // Field metatable must be at same offset for GCtab and GCudata! 3032 | cleartp TAB:RA 3033 | mov TAB:RB, TAB:RA->metatable 3034 | test TAB:RB, TAB:RB 3035 | jz <2 // No metatable? 3036 | test byte TAB:RB->nomm, 1<<MM_eq 3037 | jnz <2 // Or 'no __eq' flag set? 3038 if (vk) { 3039 | xor RBd, RBd // ne = 0 3040 } else { 3041 | mov RBd, 1 // ne = 1 3042 } 3043 | jmp ->vmeta_equal // Handle __eq metamethod. 3044 } else { 3045 |.if FFI 3046 |3: 3047 | cmp ITYPEd, LJ_TCDATA 3048 if (LJ_DUALNUM && vk) { 3049 | jne <9 3050 } else { 3051 | jne <2 3052 } 3053 | jmp ->vmeta_equal_cd 3054 |.endif 3055 } 3056 break; 3057 case BC_ISEQS: case BC_ISNES: 3058 vk = op == BC_ISEQS; 3059 | ins_AND // RA = src, RD = str const, JMP with RD = target 3060 | mov RB, [BASE+RA*8] 3061 | add PC, 4 3062 | checkstr RB, >3 3063 | cmp RB, [KBASE+RD*8] 3064 iseqne_test: 3065 if (vk) { 3066 | jne >2 3067 } else { 3068 | je >1 3069 } 3070 goto iseqne_end; 3071 case BC_ISEQN: case BC_ISNEN: 3072 vk = op == BC_ISEQN; 3073 | ins_AD // RA = src, RD = num const, JMP with RD = target 3074 | mov RB, [BASE+RA*8] 3075 | add PC, 4 3076 |.if DUALNUM 3077 | checkint RB, >7 3078 | mov RD, [KBASE+RD*8] 3079 | checkint RD, >8 3080 | cmp RBd, RDd 3081 if (vk) { 3082 | jne >9 3083 } else { 3084 | je >9 3085 } 3086 | movzx RDd, PC_RD 3087 | branchPC RD 3088 |9: 3089 | ins_next 3090 | 3091 |7: // RA is not an integer. 3092 | ja >3 3093 | // RA is a number. 3094 | mov RD, [KBASE+RD*8] 3095 | checkint RD, >1 3096 | // RA is a number, RD is an integer. 3097 | cvtsi2sd xmm0, RDd 3098 | jmp >2 3099 | 3100 |8: // RA is an integer, RD is a number. 3101 | cvtsi2sd xmm0, RBd 3102 | movd xmm1, RD 3103 | ucomisd xmm0, xmm1 3104 | jmp >4 3105 |1: 3106 | movd xmm0, RD 3107 |.else 3108 | checknum RB, >3 3109 |1: 3110 | movsd xmm0, qword [KBASE+RD*8] 3111 |.endif 3112 |2: 3113 | ucomisd xmm0, qword [BASE+RA*8] 3114 |4: 3115 goto iseqne_fp; 3116 case BC_ISEQP: case BC_ISNEP: 3117 vk = op == BC_ISEQP; 3118 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target 3119 | mov RB, [BASE+RA*8] 3120 | sar RB, 47 3121 | add PC, 4 3122 | cmp RBd, RDd 3123 if (!LJ_HASFFI) goto iseqne_test; 3124 if (vk) { 3125 | jne >3 3126 | movzx RDd, PC_RD 3127 | branchPC RD 3128 |2: 3129 | ins_next 3130 |3: 3131 | cmp RBd, LJ_TCDATA; jne <2 3132 | jmp ->vmeta_equal_cd 3133 } else { 3134 | je >2 3135 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd 3136 | movzx RDd, PC_RD 3137 | branchPC RD 3138 |2: 3139 | ins_next 3140 } 3141 break; 3142 3143 /* -- Unary test and copy ops ------------------------------------------- */ 3144 3145 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3146 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target 3147 | mov ITYPE, [BASE+RD*8] 3148 | add PC, 4 3149 if (op == BC_ISTC || op == BC_ISFC) { 3150 | mov RB, ITYPE 3151 } 3152 | sar ITYPE, 47 3153 | cmp ITYPEd, LJ_TISTRUECOND 3154 if (op == BC_IST || op == BC_ISTC) { 3155 | jae >1 3156 } else { 3157 | jb >1 3158 } 3159 if (op == BC_ISTC || op == BC_ISFC) { 3160 | mov [BASE+RA*8], RB 3161 } 3162 | movzx RDd, PC_RD 3163 | branchPC RD 3164 |1: // Fallthrough to the next instruction. 3165 | ins_next 3166 break; 3167 3168 case BC_ISTYPE: 3169 | ins_AD // RA = src, RD = -type 3170 | mov RB, [BASE+RA*8] 3171 | sar RB, 47 3172 | add RBd, RDd 3173 | jne ->vmeta_istype 3174 | ins_next 3175 break; 3176 case BC_ISNUM: 3177 | ins_AD // RA = src, RD = -(TISNUM-1) 3178 | checknumtp [BASE+RA*8], ->vmeta_istype 3179 | ins_next 3180 break; 3181 3182 /* -- Unary ops --------------------------------------------------------- */ 3183 3184 case BC_MOV: 3185 | ins_AD // RA = dst, RD = src 3186 | mov RB, [BASE+RD*8] 3187 | mov [BASE+RA*8], RB 3188 | ins_next_ 3189 break; 3190 case BC_NOT: 3191 | ins_AD // RA = dst, RD = src 3192 | mov RB, [BASE+RD*8] 3193 | sar RB, 47 3194 | mov RCd, 2 3195 | cmp RB, LJ_TISTRUECOND 3196 | sbb RCd, 0 3197 | shl RC, 47 3198 | not RC 3199 | mov [BASE+RA*8], RC 3200 | ins_next 3201 break; 3202 case BC_UNM: 3203 | ins_AD // RA = dst, RD = src 3204 | mov RB, [BASE+RD*8] 3205 |.if DUALNUM 3206 | checkint RB, >5 3207 | neg RBd 3208 | jo >4 3209 | setint RB 3210 |9: 3211 | mov [BASE+RA*8], RB 3212 | ins_next 3213 |4: 3214 | mov64 RB, U64x(41e00000,00000000) // 2^31. 3215 | jmp <9 3216 |5: 3217 | ja ->vmeta_unm 3218 |.else 3219 | checknum RB, ->vmeta_unm 3220 |.endif 3221 | mov64 RD, U64x(80000000,00000000) 3222 | xor RB, RD 3223 |.if DUALNUM 3224 | jmp <9 3225 |.else 3226 | mov [BASE+RA*8], RB 3227 | ins_next 3228 |.endif 3229 break; 3230 case BC_LEN: 3231 | ins_AD // RA = dst, RD = src 3232 | mov RD, [BASE+RD*8] 3233 | checkstr RD, >2 3234 |.if DUALNUM 3235 | mov RDd, dword STR:RD->len 3236 |1: 3237 | setint RD 3238 | mov [BASE+RA*8], RD 3239 |.else 3240 | xorps xmm0, xmm0 3241 | cvtsi2sd xmm0, dword STR:RD->len 3242 |1: 3243 | movsd qword [BASE+RA*8], xmm0 3244 |.endif 3245 | ins_next 3246 |2: 3247 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len 3248 | mov TAB:CARG1, TAB:RD 3249#if LJ_52 3250 | mov TAB:RB, TAB:RD->metatable 3251 | cmp TAB:RB, 0 3252 | jnz >9 3253 |3: 3254#endif 3255 |->BC_LEN_Z: 3256 | mov RB, BASE // Save BASE. 3257 | call extern lj_tab_len // (GCtab *t) 3258 | // Length of table returned in eax (RD). 3259 |.if DUALNUM 3260 | // Nothing to do. 3261 |.else 3262 | cvtsi2sd xmm0, RDd 3263 |.endif 3264 | mov BASE, RB // Restore BASE. 3265 | movzx RAd, PC_RA 3266 | jmp <1 3267#if LJ_52 3268 |9: // Check for __len. 3269 | test byte TAB:RB->nomm, 1<<MM_len 3270 | jnz <3 3271 | jmp ->vmeta_len // 'no __len' flag NOT set: check. 3272#endif 3273 break; 3274 3275 /* -- Binary ops -------------------------------------------------------- */ 3276 3277 |.macro ins_arithpre, sseins, ssereg 3278 | ins_ABC 3279 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3280 ||switch (vk) { 3281 ||case 0: 3282 | checknumtp [BASE+RB*8], ->vmeta_arith_vn 3283 | .if DUALNUM 3284 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn 3285 | .endif 3286 | movsd xmm0, qword [BASE+RB*8] 3287 | sseins ssereg, qword [KBASE+RC*8] 3288 || break; 3289 ||case 1: 3290 | checknumtp [BASE+RB*8], ->vmeta_arith_nv 3291 | .if DUALNUM 3292 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv 3293 | .endif 3294 | movsd xmm0, qword [KBASE+RC*8] 3295 | sseins ssereg, qword [BASE+RB*8] 3296 || break; 3297 ||default: 3298 | checknumtp [BASE+RB*8], ->vmeta_arith_vv 3299 | checknumtp [BASE+RC*8], ->vmeta_arith_vv 3300 | movsd xmm0, qword [BASE+RB*8] 3301 | sseins ssereg, qword [BASE+RC*8] 3302 || break; 3303 ||} 3304 |.endmacro 3305 | 3306 |.macro ins_arithdn, intins 3307 | ins_ABC 3308 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3309 ||switch (vk) { 3310 ||case 0: 3311 | mov RB, [BASE+RB*8] 3312 | mov RC, [KBASE+RC*8] 3313 | checkint RB, ->vmeta_arith_vno 3314 | checkint RC, ->vmeta_arith_vno 3315 | intins RBd, RCd; jo ->vmeta_arith_vno 3316 || break; 3317 ||case 1: 3318 | mov RB, [BASE+RB*8] 3319 | mov RC, [KBASE+RC*8] 3320 | checkint RB, ->vmeta_arith_nvo 3321 | checkint RC, ->vmeta_arith_nvo 3322 | intins RCd, RBd; jo ->vmeta_arith_nvo 3323 || break; 3324 ||default: 3325 | mov RB, [BASE+RB*8] 3326 | mov RC, [BASE+RC*8] 3327 | checkint RB, ->vmeta_arith_vvo 3328 | checkint RC, ->vmeta_arith_vvo 3329 | intins RBd, RCd; jo ->vmeta_arith_vvo 3330 || break; 3331 ||} 3332 ||if (vk == 1) { 3333 | setint RC 3334 | mov [BASE+RA*8], RC 3335 ||} else { 3336 | setint RB 3337 | mov [BASE+RA*8], RB 3338 ||} 3339 | ins_next 3340 |.endmacro 3341 | 3342 |.macro ins_arithpost 3343 | movsd qword [BASE+RA*8], xmm0 3344 |.endmacro 3345 | 3346 |.macro ins_arith, sseins 3347 | ins_arithpre sseins, xmm0 3348 | ins_arithpost 3349 | ins_next 3350 |.endmacro 3351 | 3352 |.macro ins_arith, intins, sseins 3353 |.if DUALNUM 3354 | ins_arithdn intins 3355 |.else 3356 | ins_arith, sseins 3357 |.endif 3358 |.endmacro 3359 3360 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3361 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3362 | ins_arith add, addsd 3363 break; 3364 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3365 | ins_arith sub, subsd 3366 break; 3367 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3368 | ins_arith imul, mulsd 3369 break; 3370 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3371 | ins_arith divsd 3372 break; 3373 case BC_MODVN: 3374 | ins_arithpre movsd, xmm1 3375 |->BC_MODVN_Z: 3376 | call ->vm_mod 3377 | ins_arithpost 3378 | ins_next 3379 break; 3380 case BC_MODNV: case BC_MODVV: 3381 | ins_arithpre movsd, xmm1 3382 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3383 break; 3384 case BC_POW: 3385 | ins_arithpre movsd, xmm1 3386 | mov RB, BASE 3387 | call extern pow 3388 | movzx RAd, PC_RA 3389 | mov BASE, RB 3390 | ins_arithpost 3391 | ins_next 3392 break; 3393 3394 case BC_CAT: 3395 | ins_ABC // RA = dst, RB = src_start, RC = src_end 3396 | mov L:CARG1, SAVE_L 3397 | mov L:CARG1->base, BASE 3398 | lea CARG2, [BASE+RC*8] 3399 | mov CARG3d, RCd 3400 | sub CARG3d, RBd 3401 |->BC_CAT_Z: 3402 | mov L:RB, L:CARG1 3403 | mov SAVE_PC, PC 3404 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3405 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 3406 | mov BASE, L:RB->base 3407 | test RC, RC 3408 | jnz ->vmeta_binop 3409 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. 3410 | movzx RAd, PC_RA 3411 | mov RC, [BASE+RB*8] 3412 | mov [BASE+RA*8], RC 3413 | ins_next 3414 break; 3415 3416 /* -- Constant ops ------------------------------------------------------ */ 3417 3418 case BC_KSTR: 3419 | ins_AND // RA = dst, RD = str const (~) 3420 | mov RD, [KBASE+RD*8] 3421 | settp RD, LJ_TSTR 3422 | mov [BASE+RA*8], RD 3423 | ins_next 3424 break; 3425 case BC_KCDATA: 3426 |.if FFI 3427 | ins_AND // RA = dst, RD = cdata const (~) 3428 | mov RD, [KBASE+RD*8] 3429 | settp RD, LJ_TCDATA 3430 | mov [BASE+RA*8], RD 3431 | ins_next 3432 |.endif 3433 break; 3434 case BC_KSHORT: 3435 | ins_AD // RA = dst, RD = signed int16 literal 3436 |.if DUALNUM 3437 | movsx RDd, RDW 3438 | setint RD 3439 | mov [BASE+RA*8], RD 3440 |.else 3441 | movsx RDd, RDW // Sign-extend literal. 3442 | cvtsi2sd xmm0, RDd 3443 | movsd qword [BASE+RA*8], xmm0 3444 |.endif 3445 | ins_next 3446 break; 3447 case BC_KNUM: 3448 | ins_AD // RA = dst, RD = num const 3449 | movsd xmm0, qword [KBASE+RD*8] 3450 | movsd qword [BASE+RA*8], xmm0 3451 | ins_next 3452 break; 3453 case BC_KPRI: 3454 | ins_AD // RA = dst, RD = primitive type (~) 3455 | shl RD, 47 3456 | not RD 3457 | mov [BASE+RA*8], RD 3458 | ins_next 3459 break; 3460 case BC_KNIL: 3461 | ins_AD // RA = dst_start, RD = dst_end 3462 | lea RA, [BASE+RA*8+8] 3463 | lea RD, [BASE+RD*8] 3464 | mov RB, LJ_TNIL 3465 | mov [RA-8], RB // Sets minimum 2 slots. 3466 |1: 3467 | mov [RA], RB 3468 | add RA, 8 3469 | cmp RA, RD 3470 | jbe <1 3471 | ins_next 3472 break; 3473 3474 /* -- Upvalue and function ops ------------------------------------------ */ 3475 3476 case BC_UGET: 3477 | ins_AD // RA = dst, RD = upvalue # 3478 | mov LFUNC:RB, [BASE-16] 3479 | cleartp LFUNC:RB 3480 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] 3481 | mov RB, UPVAL:RB->v 3482 | mov RD, [RB] 3483 | mov [BASE+RA*8], RD 3484 | ins_next 3485 break; 3486 case BC_USETV: 3487#define TV2MARKOFS \ 3488 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) 3489 | ins_AD // RA = upvalue #, RD = src 3490 | mov LFUNC:RB, [BASE-16] 3491 | cleartp LFUNC:RB 3492 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3493 | cmp byte UPVAL:RB->closed, 0 3494 | mov RB, UPVAL:RB->v 3495 | mov RA, [BASE+RD*8] 3496 | mov [RB], RA 3497 | jz >1 3498 | // Check barrier for closed upvalue. 3499 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) 3500 | jnz >2 3501 |1: 3502 | ins_next 3503 | 3504 |2: // Upvalue is black. Check if new value is collectable and white. 3505 | mov RD, RA 3506 | sar RD, 47 3507 | sub RDd, LJ_TISGCV 3508 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) 3509 | jbe <1 3510 | cleartp GCOBJ:RA 3511 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) 3512 | jz <1 3513 | // Crossed a write barrier. Move the barrier forward. 3514 |.if not X64WIN 3515 | mov CARG2, RB 3516 | mov RB, BASE // Save BASE. 3517 |.else 3518 | xchg CARG2, RB // Save BASE (CARG2 == BASE). 3519 |.endif 3520 | lea GL:CARG1, [DISPATCH+GG_DISP2G] 3521 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3522 | mov BASE, RB // Restore BASE. 3523 | jmp <1 3524 break; 3525#undef TV2MARKOFS 3526 case BC_USETS: 3527 | ins_AND // RA = upvalue #, RD = str const (~) 3528 | mov LFUNC:RB, [BASE-16] 3529 | cleartp LFUNC:RB 3530 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3531 | mov STR:RA, [KBASE+RD*8] 3532 | mov RD, UPVAL:RB->v 3533 | settp STR:ITYPE, STR:RA, LJ_TSTR 3534 | mov [RD], STR:ITYPE 3535 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 3536 | jnz >2 3537 |1: 3538 | ins_next 3539 | 3540 |2: // Check if string is white and ensure upvalue is closed. 3541 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) 3542 | jz <1 3543 | cmp byte UPVAL:RB->closed, 0 3544 | jz <1 3545 | // Crossed a write barrier. Move the barrier forward. 3546 | mov RB, BASE // Save BASE (CARG2 == BASE). 3547 | mov CARG2, RD 3548 | lea GL:CARG1, [DISPATCH+GG_DISP2G] 3549 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3550 | mov BASE, RB // Restore BASE. 3551 | jmp <1 3552 break; 3553 case BC_USETN: 3554 | ins_AD // RA = upvalue #, RD = num const 3555 | mov LFUNC:RB, [BASE-16] 3556 | cleartp LFUNC:RB 3557 | movsd xmm0, qword [KBASE+RD*8] 3558 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3559 | mov RA, UPVAL:RB->v 3560 | movsd qword [RA], xmm0 3561 | ins_next 3562 break; 3563 case BC_USETP: 3564 | ins_AD // RA = upvalue #, RD = primitive type (~) 3565 | mov LFUNC:RB, [BASE-16] 3566 | cleartp LFUNC:RB 3567 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3568 | shl RD, 47 3569 | not RD 3570 | mov RA, UPVAL:RB->v 3571 | mov [RA], RD 3572 | ins_next 3573 break; 3574 case BC_UCLO: 3575 | ins_AD // RA = level, RD = target 3576 | branchPC RD // Do this first to free RD. 3577 | mov L:RB, SAVE_L 3578 | cmp aword L:RB->openupval, 0 3579 | je >1 3580 | mov L:RB->base, BASE 3581 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE 3582 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 3583 | call extern lj_func_closeuv // (lua_State *L, TValue *level) 3584 | mov BASE, L:RB->base 3585 |1: 3586 | ins_next 3587 break; 3588 3589 case BC_FNEW: 3590 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) 3591 | mov L:RB, SAVE_L 3592 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. 3593 | mov CARG3, [BASE-16] 3594 | cleartp CARG3 3595 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. 3596 | mov CARG1, L:RB 3597 | mov SAVE_PC, PC 3598 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3599 | call extern lj_func_newL_gc 3600 | // GCfuncL * returned in eax (RC). 3601 | mov BASE, L:RB->base 3602 | movzx RAd, PC_RA 3603 | settp LFUNC:RC, LJ_TFUNC 3604 | mov [BASE+RA*8], LFUNC:RC 3605 | ins_next 3606 break; 3607 3608 /* -- Table ops --------------------------------------------------------- */ 3609 3610 case BC_TNEW: 3611 | ins_AD // RA = dst, RD = hbits|asize 3612 | mov L:RB, SAVE_L 3613 | mov L:RB->base, BASE 3614 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 3615 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 3616 | mov SAVE_PC, PC 3617 | jae >5 3618 |1: 3619 | mov CARG3d, RDd 3620 | and RDd, 0x7ff 3621 | shr CARG3d, 11 3622 | cmp RDd, 0x7ff 3623 | je >3 3624 |2: 3625 | mov L:CARG1, L:RB 3626 | mov CARG2d, RDd 3627 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3628 | // Table * returned in eax (RC). 3629 | mov BASE, L:RB->base 3630 | movzx RAd, PC_RA 3631 | settp TAB:RC, LJ_TTAB 3632 | mov [BASE+RA*8], TAB:RC 3633 | ins_next 3634 |3: // Turn 0x7ff into 0x801. 3635 | mov RDd, 0x801 3636 | jmp <2 3637 |5: 3638 | mov L:CARG1, L:RB 3639 | call extern lj_gc_step_fixtop // (lua_State *L) 3640 | movzx RDd, PC_RD 3641 | jmp <1 3642 break; 3643 case BC_TDUP: 3644 | ins_AND // RA = dst, RD = table const (~) (holding template table) 3645 | mov L:RB, SAVE_L 3646 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 3647 | mov SAVE_PC, PC 3648 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 3649 | mov L:RB->base, BASE 3650 | jae >3 3651 |2: 3652 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE 3653 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 3654 | call extern lj_tab_dup // (lua_State *L, Table *kt) 3655 | // Table * returned in eax (RC). 3656 | mov BASE, L:RB->base 3657 | movzx RAd, PC_RA 3658 | settp TAB:RC, LJ_TTAB 3659 | mov [BASE+RA*8], TAB:RC 3660 | ins_next 3661 |3: 3662 | mov L:CARG1, L:RB 3663 | call extern lj_gc_step_fixtop // (lua_State *L) 3664 | movzx RDd, PC_RD // Need to reload RD. 3665 | not RD 3666 | jmp <2 3667 break; 3668 3669 case BC_GGET: 3670 | ins_AND // RA = dst, RD = str const (~) 3671 | mov LFUNC:RB, [BASE-16] 3672 | cleartp LFUNC:RB 3673 | mov TAB:RB, LFUNC:RB->env 3674 | mov STR:RC, [KBASE+RD*8] 3675 | jmp ->BC_TGETS_Z 3676 break; 3677 case BC_GSET: 3678 | ins_AND // RA = src, RD = str const (~) 3679 | mov LFUNC:RB, [BASE-16] 3680 | cleartp LFUNC:RB 3681 | mov TAB:RB, LFUNC:RB->env 3682 | mov STR:RC, [KBASE+RD*8] 3683 | jmp ->BC_TSETS_Z 3684 break; 3685 3686 case BC_TGETV: 3687 | ins_ABC // RA = dst, RB = table, RC = key 3688 | mov TAB:RB, [BASE+RB*8] 3689 | mov RC, [BASE+RC*8] 3690 | checktab TAB:RB, ->vmeta_tgetv 3691 | 3692 | // Integer key? 3693 |.if DUALNUM 3694 | checkint RC, >5 3695 |.else 3696 | // Convert number to int and back and compare. 3697 | checknum RC, >5 3698 | movd xmm0, RC 3699 | cvttsd2si RCd, xmm0 3700 | cvtsi2sd xmm1, RCd 3701 | ucomisd xmm0, xmm1 3702 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 3703 |.endif 3704 | cmp RCd, TAB:RB->asize // Takes care of unordered, too. 3705 | jae ->vmeta_tgetv // Not in array part? Use fallback. 3706 | shl RCd, 3 3707 | add RC, TAB:RB->array 3708 | // Get array slot. 3709 | mov ITYPE, [RC] 3710 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. 3711 | je >2 3712 |1: 3713 | mov [BASE+RA*8], ITYPE 3714 | ins_next 3715 | 3716 |2: // Check for __index if table value is nil. 3717 | mov TAB:TMPR, TAB:RB->metatable 3718 | test TAB:TMPR, TAB:TMPR 3719 | jz <1 3720 | test byte TAB:TMPR->nomm, 1<<MM_index 3721 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. 3722 | jmp <1 3723 | 3724 |5: // String key? 3725 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv 3726 | cleartp STR:RC 3727 | jmp ->BC_TGETS_Z 3728 break; 3729 case BC_TGETS: 3730 | ins_ABC // RA = dst, RB = table, RC = str const (~) 3731 | mov TAB:RB, [BASE+RB*8] 3732 | not RC 3733 | mov STR:RC, [KBASE+RC*8] 3734 | checktab TAB:RB, ->vmeta_tgets 3735 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * 3736 | mov TMPRd, TAB:RB->hmask 3737 | and TMPRd, STR:RC->sid 3738 | imul TMPRd, #NODE 3739 | add NODE:TMPR, TAB:RB->node 3740 | settp ITYPE, STR:RC, LJ_TSTR 3741 |1: 3742 | cmp NODE:TMPR->key, ITYPE 3743 | jne >4 3744 | // Get node value. 3745 | mov ITYPE, NODE:TMPR->val 3746 | cmp ITYPE, LJ_TNIL 3747 | je >5 // Key found, but nil value? 3748 |2: 3749 | mov [BASE+RA*8], ITYPE 3750 | ins_next 3751 | 3752 |4: // Follow hash chain. 3753 | mov NODE:TMPR, NODE:TMPR->next 3754 | test NODE:TMPR, NODE:TMPR 3755 | jnz <1 3756 | // End of hash chain: key not found, nil result. 3757 | mov ITYPE, LJ_TNIL 3758 | 3759 |5: // Check for __index if table value is nil. 3760 | mov TAB:TMPR, TAB:RB->metatable 3761 | test TAB:TMPR, TAB:TMPR 3762 | jz <2 // No metatable: done. 3763 | test byte TAB:TMPR->nomm, 1<<MM_index 3764 | jnz <2 // 'no __index' flag set: done. 3765 | jmp ->vmeta_tgets // Caveat: preserve STR:RC. 3766 break; 3767 case BC_TGETB: 3768 | ins_ABC // RA = dst, RB = table, RC = byte literal 3769 | mov TAB:RB, [BASE+RB*8] 3770 | checktab TAB:RB, ->vmeta_tgetb 3771 | cmp RCd, TAB:RB->asize 3772 | jae ->vmeta_tgetb 3773 | shl RCd, 3 3774 | add RC, TAB:RB->array 3775 | // Get array slot. 3776 | mov ITYPE, [RC] 3777 | cmp ITYPE, LJ_TNIL 3778 | je >2 3779 |1: 3780 | mov [BASE+RA*8], ITYPE 3781 | ins_next 3782 | 3783 |2: // Check for __index if table value is nil. 3784 | mov TAB:TMPR, TAB:RB->metatable 3785 | test TAB:TMPR, TAB:TMPR 3786 | jz <1 3787 | test byte TAB:TMPR->nomm, 1<<MM_index 3788 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. 3789 | jmp <1 3790 break; 3791 case BC_TGETR: 3792 | ins_ABC // RA = dst, RB = table, RC = key 3793 | mov TAB:RB, [BASE+RB*8] 3794 | cleartp TAB:RB 3795 |.if DUALNUM 3796 | mov RCd, dword [BASE+RC*8] 3797 |.else 3798 | cvttsd2si RCd, qword [BASE+RC*8] 3799 |.endif 3800 | cmp RCd, TAB:RB->asize 3801 | jae ->vmeta_tgetr // Not in array part? Use fallback. 3802 | shl RCd, 3 3803 | add RC, TAB:RB->array 3804 | // Get array slot. 3805 |->BC_TGETR_Z: 3806 | mov ITYPE, [RC] 3807 |->BC_TGETR2_Z: 3808 | mov [BASE+RA*8], ITYPE 3809 | ins_next 3810 break; 3811 3812 case BC_TSETV: 3813 | ins_ABC // RA = src, RB = table, RC = key 3814 | mov TAB:RB, [BASE+RB*8] 3815 | mov RC, [BASE+RC*8] 3816 | checktab TAB:RB, ->vmeta_tsetv 3817 | 3818 | // Integer key? 3819 |.if DUALNUM 3820 | checkint RC, >5 3821 |.else 3822 | // Convert number to int and back and compare. 3823 | checknum RC, >5 3824 | movd xmm0, RC 3825 | cvttsd2si RCd, xmm0 3826 | cvtsi2sd xmm1, RCd 3827 | ucomisd xmm0, xmm1 3828 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 3829 |.endif 3830 | cmp RCd, TAB:RB->asize // Takes care of unordered, too. 3831 | jae ->vmeta_tsetv 3832 | shl RCd, 3 3833 | add RC, TAB:RB->array 3834 | cmp aword [RC], LJ_TNIL 3835 | je >3 // Previous value is nil? 3836 |1: 3837 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3838 | jnz >7 3839 |2: // Set array slot. 3840 | mov RB, [BASE+RA*8] 3841 | mov [RC], RB 3842 | ins_next 3843 | 3844 |3: // Check for __newindex if previous value is nil. 3845 | mov TAB:TMPR, TAB:RB->metatable 3846 | test TAB:TMPR, TAB:TMPR 3847 | jz <1 3848 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3849 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. 3850 | jmp <1 3851 | 3852 |5: // String key? 3853 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv 3854 | cleartp STR:RC 3855 | jmp ->BC_TSETS_Z 3856 | 3857 |7: // Possible table write barrier for the value. Skip valiswhite check. 3858 | barrierback TAB:RB, TMPR 3859 | jmp <2 3860 break; 3861 case BC_TSETS: 3862 | ins_ABC // RA = src, RB = table, RC = str const (~) 3863 | mov TAB:RB, [BASE+RB*8] 3864 | not RC 3865 | mov STR:RC, [KBASE+RC*8] 3866 | checktab TAB:RB, ->vmeta_tsets 3867 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * 3868 | mov TMPRd, TAB:RB->hmask 3869 | and TMPRd, STR:RC->sid 3870 | imul TMPRd, #NODE 3871 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 3872 | add NODE:TMPR, TAB:RB->node 3873 | settp ITYPE, STR:RC, LJ_TSTR 3874 |1: 3875 | cmp NODE:TMPR->key, ITYPE 3876 | jne >5 3877 | // Ok, key found. Assumes: offsetof(Node, val) == 0 3878 | cmp aword [TMPR], LJ_TNIL 3879 | je >4 // Previous value is nil? 3880 |2: 3881 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3882 | jnz >7 3883 |3: // Set node value. 3884 | mov ITYPE, [BASE+RA*8] 3885 | mov [TMPR], ITYPE 3886 | ins_next 3887 | 3888 |4: // Check for __newindex if previous value is nil. 3889 | mov TAB:ITYPE, TAB:RB->metatable 3890 | test TAB:ITYPE, TAB:ITYPE 3891 | jz <2 3892 | test byte TAB:ITYPE->nomm, 1<<MM_newindex 3893 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 3894 | jmp <2 3895 | 3896 |5: // Follow hash chain. 3897 | mov NODE:TMPR, NODE:TMPR->next 3898 | test NODE:TMPR, NODE:TMPR 3899 | jnz <1 3900 | // End of hash chain: key not found, add a new one. 3901 | 3902 | // But check for __newindex first. 3903 | mov TAB:TMPR, TAB:RB->metatable 3904 | test TAB:TMPR, TAB:TMPR 3905 | jz >6 // No metatable: continue. 3906 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3907 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 3908 |6: 3909 | mov TMP1, ITYPE 3910 | mov L:CARG1, SAVE_L 3911 | mov L:CARG1->base, BASE 3912 | lea CARG3, TMP1 3913 | mov CARG2, TAB:RB 3914 | mov SAVE_PC, PC 3915 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 3916 | // Handles write barrier for the new key. TValue * returned in eax (RC). 3917 | mov L:CARG1, SAVE_L 3918 | mov BASE, L:CARG1->base 3919 | mov TMPR, rax 3920 | movzx RAd, PC_RA 3921 | jmp <2 // Must check write barrier for value. 3922 | 3923 |7: // Possible table write barrier for the value. Skip valiswhite check. 3924 | barrierback TAB:RB, ITYPE 3925 | jmp <3 3926 break; 3927 case BC_TSETB: 3928 | ins_ABC // RA = src, RB = table, RC = byte literal 3929 | mov TAB:RB, [BASE+RB*8] 3930 | checktab TAB:RB, ->vmeta_tsetb 3931 | cmp RCd, TAB:RB->asize 3932 | jae ->vmeta_tsetb 3933 | shl RCd, 3 3934 | add RC, TAB:RB->array 3935 | cmp aword [RC], LJ_TNIL 3936 | je >3 // Previous value is nil? 3937 |1: 3938 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3939 | jnz >7 3940 |2: // Set array slot. 3941 | mov ITYPE, [BASE+RA*8] 3942 | mov [RC], ITYPE 3943 | ins_next 3944 | 3945 |3: // Check for __newindex if previous value is nil. 3946 | mov TAB:TMPR, TAB:RB->metatable 3947 | test TAB:TMPR, TAB:TMPR 3948 | jz <1 3949 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3950 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. 3951 | jmp <1 3952 | 3953 |7: // Possible table write barrier for the value. Skip valiswhite check. 3954 | barrierback TAB:RB, TMPR 3955 | jmp <2 3956 break; 3957 case BC_TSETR: 3958 | ins_ABC // RA = src, RB = table, RC = key 3959 | mov TAB:RB, [BASE+RB*8] 3960 | cleartp TAB:RB 3961 |.if DUALNUM 3962 | mov RC, [BASE+RC*8] 3963 |.else 3964 | cvttsd2si RCd, qword [BASE+RC*8] 3965 |.endif 3966 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3967 | jnz >7 3968 |2: 3969 | cmp RCd, TAB:RB->asize 3970 | jae ->vmeta_tsetr 3971 | shl RCd, 3 3972 | add RC, TAB:RB->array 3973 | // Set array slot. 3974 |->BC_TSETR_Z: 3975 | mov ITYPE, [BASE+RA*8] 3976 | mov [RC], ITYPE 3977 | ins_next 3978 | 3979 |7: // Possible table write barrier for the value. Skip valiswhite check. 3980 | barrierback TAB:RB, TMPR 3981 | jmp <2 3982 break; 3983 3984 case BC_TSETM: 3985 | ins_AD // RA = base (table at base-1), RD = num const (start index) 3986 |1: 3987 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. 3988 | lea RA, [BASE+RA*8] 3989 | mov TAB:RB, [RA-8] // Guaranteed to be a table. 3990 | cleartp TAB:RB 3991 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3992 | jnz >7 3993 |2: 3994 | mov RDd, MULTRES 3995 | sub RDd, 1 3996 | jz >4 // Nothing to copy? 3997 | add RDd, TMPRd // Compute needed size. 3998 | cmp RDd, TAB:RB->asize 3999 | ja >5 // Doesn't fit into array part? 4000 | sub RDd, TMPRd 4001 | shl TMPRd, 3 4002 | add TMPR, TAB:RB->array 4003 |3: // Copy result slots to table. 4004 | mov RB, [RA] 4005 | add RA, 8 4006 | mov [TMPR], RB 4007 | add TMPR, 8 4008 | sub RDd, 1 4009 | jnz <3 4010 |4: 4011 | ins_next 4012 | 4013 |5: // Need to resize array part. 4014 | mov L:CARG1, SAVE_L 4015 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 4016 | mov CARG2, TAB:RB 4017 | mov CARG3d, RDd 4018 | mov L:RB, L:CARG1 4019 | mov SAVE_PC, PC 4020 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4021 | mov BASE, L:RB->base 4022 | movzx RAd, PC_RA // Restore RA. 4023 | movzx RDd, PC_RD // Restore RD. 4024 | jmp <1 // Retry. 4025 | 4026 |7: // Possible table write barrier for any value. Skip valiswhite check. 4027 | barrierback TAB:RB, RD 4028 | jmp <2 4029 break; 4030 4031 /* -- Calls and vararg handling ----------------------------------------- */ 4032 4033 case BC_CALL: case BC_CALLM: 4034 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs 4035 if (op == BC_CALLM) { 4036 | add NARGS:RDd, MULTRES 4037 } 4038 | mov LFUNC:RB, [BASE+RA*8] 4039 | checkfunc LFUNC:RB, ->vmeta_call_ra 4040 | lea BASE, [BASE+RA*8+16] 4041 | ins_call 4042 break; 4043 4044 case BC_CALLMT: 4045 | ins_AD // RA = base, RD = extra_nargs 4046 | add NARGS:RDd, MULTRES 4047 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. 4048 break; 4049 case BC_CALLT: 4050 | ins_AD // RA = base, RD = nargs+1 4051 | lea RA, [BASE+RA*8+16] 4052 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. 4053 | mov LFUNC:RB, [RA-16] 4054 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call 4055 |->BC_CALLT_Z: 4056 | mov PC, [BASE-8] 4057 | test PCd, FRAME_TYPE 4058 | jnz >7 4059 |1: 4060 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. 4061 | mov MULTRES, NARGS:RDd 4062 | sub NARGS:RDd, 1 4063 | jz >3 4064 |2: // Move args down. 4065 | mov RB, [RA] 4066 | add RA, 8 4067 | mov [KBASE], RB 4068 | add KBASE, 8 4069 | sub NARGS:RDd, 1 4070 | jnz <2 4071 | 4072 | mov LFUNC:RB, [BASE-16] 4073 |3: 4074 | cleartp LFUNC:RB 4075 | mov NARGS:RDd, MULTRES 4076 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? 4077 | ja >5 4078 |4: 4079 | ins_callt 4080 | 4081 |5: // Tailcall to a fast function. 4082 | test PCd, FRAME_TYPE // Lua frame below? 4083 | jnz <4 4084 | movzx RAd, PC_RA 4085 | neg RA 4086 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. 4087 | cleartp LFUNC:KBASE 4088 | mov KBASE, LFUNC:KBASE->pc 4089 | mov KBASE, [KBASE+PC2PROTO(k)] 4090 | jmp <4 4091 | 4092 |7: // Tailcall from a vararg function. 4093 | sub PC, FRAME_VARG 4094 | test PCd, FRAME_TYPEP 4095 | jnz >8 // Vararg frame below? 4096 | sub BASE, PC // Need to relocate BASE/KBASE down. 4097 | mov KBASE, BASE 4098 | mov PC, [BASE-8] 4099 | jmp <1 4100 |8: 4101 | add PCd, FRAME_VARG 4102 | jmp <1 4103 break; 4104 4105 case BC_ITERC: 4106 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) 4107 | lea RA, [BASE+RA*8+16] // fb = base+2 4108 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. 4109 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. 4110 | mov [RA], RB 4111 | mov [RA+8], RC 4112 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] 4113 | mov [RA-16], LFUNC:RB 4114 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. 4115 | checkfunc LFUNC:RB, ->vmeta_call 4116 | mov BASE, RA 4117 | ins_call 4118 break; 4119 4120 case BC_ITERN: 4121 |.if JIT 4122 | hotloop RBd 4123 |.endif 4124 |->vm_IITERN: 4125 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) 4126 | mov TAB:RB, [BASE+RA*8-16] 4127 | cleartp TAB:RB 4128 | mov RCd, [BASE+RA*8-8] // Get index from control var. 4129 | mov TMPRd, TAB:RB->asize 4130 | add PC, 4 4131 | mov ITYPE, TAB:RB->array 4132 |1: // Traverse array part. 4133 | cmp RCd, TMPRd; jae >5 // Index points after array part? 4134 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 4135 |.if not DUALNUM 4136 | cvtsi2sd xmm0, RCd 4137 |.endif 4138 | // Copy array slot to returned value. 4139 | mov RB, [ITYPE+RC*8] 4140 | mov [BASE+RA*8+8], RB 4141 | // Return array index as a numeric key. 4142 |.if DUALNUM 4143 | setint ITYPE, RC 4144 | mov [BASE+RA*8], ITYPE 4145 |.else 4146 | movsd qword [BASE+RA*8], xmm0 4147 |.endif 4148 | add RCd, 1 4149 | mov [BASE+RA*8-8], RCd // Update control var. 4150 |2: 4151 | movzx RDd, PC_RD // Get target from ITERL. 4152 | branchPC RD 4153 |3: 4154 | ins_next 4155 | 4156 |4: // Skip holes in array part. 4157 | add RCd, 1 4158 | jmp <1 4159 | 4160 |5: // Traverse hash part. 4161 | sub RCd, TMPRd 4162 |6: 4163 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. 4164 | imul ITYPEd, RCd, #NODE 4165 | add NODE:ITYPE, TAB:RB->node 4166 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 4167 | lea TMPRd, [RCd+TMPRd+1] 4168 | // Copy key and value from hash slot. 4169 | mov RB, NODE:ITYPE->key 4170 | mov RC, NODE:ITYPE->val 4171 | mov [BASE+RA*8], RB 4172 | mov [BASE+RA*8+8], RC 4173 | mov [BASE+RA*8-8], TMPRd 4174 | jmp <2 4175 | 4176 |7: // Skip holes in hash part. 4177 | add RCd, 1 4178 | jmp <6 4179 break; 4180 4181 case BC_ISNEXT: 4182 | ins_AD // RA = base, RD = target (points to ITERN) 4183 | mov CFUNC:RB, [BASE+RA*8-24] 4184 | checkfunc CFUNC:RB, >5 4185 | checktptp [BASE+RA*8-16], LJ_TTAB, >5 4186 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 4187 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4188 | branchPC RD 4189 | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) 4190 | mov [BASE+RA*8-8], TMPR // Initialize control var. 4191 |1: 4192 | ins_next 4193 |5: // Despecialize bytecode if any of the checks fail. 4194 | mov PC_OP, BC_JMP 4195 | branchPC RD 4196 |.if JIT 4197 | cmp byte [PC], BC_ITERN 4198 | jne >6 4199 |.endif 4200 | mov byte [PC], BC_ITERC 4201 | jmp <1 4202 |.if JIT 4203 |6: // Unpatch JLOOP. 4204 | mov RA, [DISPATCH+DISPATCH_J(trace)] 4205 | movzx RCd, word [PC+2] 4206 | mov TRACE:RA, [RA+RC*8] 4207 | mov eax, TRACE:RA->startins 4208 | mov al, BC_ITERC 4209 | mov dword [PC], eax 4210 | jmp <1 4211 |.endif 4212 break; 4213 4214 case BC_VARG: 4215 | ins_ABC // RA = base, RB = nresults+1, RC = numparams 4216 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] 4217 | lea RA, [BASE+RA*8] 4218 | sub TMPR, [BASE-8] 4219 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. 4220 | test RB, RB 4221 | jz >5 // Copy all varargs? 4222 | lea RB, [RA+RB*8-8] 4223 | cmp TMPR, BASE // No vararg slots? 4224 | jnb >2 4225 |1: // Copy vararg slots to destination slots. 4226 | mov RC, [TMPR-16] 4227 | add TMPR, 8 4228 | mov [RA], RC 4229 | add RA, 8 4230 | cmp RA, RB // All destination slots filled? 4231 | jnb >3 4232 | cmp TMPR, BASE // No more vararg slots? 4233 | jb <1 4234 |2: // Fill up remainder with nil. 4235 | mov aword [RA], LJ_TNIL 4236 | add RA, 8 4237 | cmp RA, RB 4238 | jb <2 4239 |3: 4240 | ins_next 4241 | 4242 |5: // Copy all varargs. 4243 | mov MULTRES, 1 // MULTRES = 0+1 4244 | mov RC, BASE 4245 | sub RC, TMPR 4246 | jbe <3 // No vararg slots? 4247 | mov RBd, RCd 4248 | shr RBd, 3 4249 | add RBd, 1 4250 | mov MULTRES, RBd // MULTRES = #varargs+1 4251 | mov L:RB, SAVE_L 4252 | add RC, RA 4253 | cmp RC, L:RB->maxstack 4254 | ja >7 // Need to grow stack? 4255 |6: // Copy all vararg slots. 4256 | mov RC, [TMPR-16] 4257 | add TMPR, 8 4258 | mov [RA], RC 4259 | add RA, 8 4260 | cmp TMPR, BASE // No more vararg slots? 4261 | jb <6 4262 | jmp <3 4263 | 4264 |7: // Grow stack for varargs. 4265 | mov L:RB->base, BASE 4266 | mov L:RB->top, RA 4267 | mov SAVE_PC, PC 4268 | sub TMPR, BASE // Need delta, because BASE may change. 4269 | mov TMP1hi, TMPRd 4270 | mov CARG2d, MULTRES 4271 | sub CARG2d, 1 4272 | mov CARG1, L:RB 4273 | call extern lj_state_growstack // (lua_State *L, int n) 4274 | mov BASE, L:RB->base 4275 | movsxd TMPR, TMP1hi 4276 | mov RA, L:RB->top 4277 | add TMPR, BASE 4278 | jmp <6 4279 break; 4280 4281 /* -- Returns ----------------------------------------------------------- */ 4282 4283 case BC_RETM: 4284 | ins_AD // RA = results, RD = extra_nresults 4285 | add RDd, MULTRES // MULTRES >=1, so RD >=1. 4286 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. 4287 break; 4288 4289 case BC_RET: case BC_RET0: case BC_RET1: 4290 | ins_AD // RA = results, RD = nresults+1 4291 if (op != BC_RET0) { 4292 | shl RAd, 3 4293 } 4294 |1: 4295 | mov PC, [BASE-8] 4296 | mov MULTRES, RDd // Save nresults+1. 4297 | test PCd, FRAME_TYPE // Check frame type marker. 4298 | jnz >7 // Not returning to a fixarg Lua func? 4299 switch (op) { 4300 case BC_RET: 4301 |->BC_RET_Z: 4302 | mov KBASE, BASE // Use KBASE for result move. 4303 | sub RDd, 1 4304 | jz >3 4305 |2: // Move results down. 4306 | mov RB, [KBASE+RA] 4307 | mov [KBASE-16], RB 4308 | add KBASE, 8 4309 | sub RDd, 1 4310 | jnz <2 4311 |3: 4312 | mov RDd, MULTRES // Note: MULTRES may be >255. 4313 | movzx RBd, PC_RB // So cannot compare with RDL! 4314 |5: 4315 | cmp RBd, RDd // More results expected? 4316 | ja >6 4317 break; 4318 case BC_RET1: 4319 | mov RB, [BASE+RA] 4320 | mov [BASE-16], RB 4321 /* fallthrough */ 4322 case BC_RET0: 4323 |5: 4324 | cmp PC_RB, RDL // More results expected? 4325 | ja >6 4326 default: 4327 break; 4328 } 4329 | movzx RAd, PC_RA 4330 | neg RA 4331 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 4332 | mov LFUNC:KBASE, [BASE-16] 4333 | cleartp LFUNC:KBASE 4334 | mov KBASE, LFUNC:KBASE->pc 4335 | mov KBASE, [KBASE+PC2PROTO(k)] 4336 | ins_next 4337 | 4338 |6: // Fill up results with nil. 4339 if (op == BC_RET) { 4340 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. 4341 | add KBASE, 8 4342 } else { 4343 | mov aword [BASE+RD*8-24], LJ_TNIL 4344 } 4345 | add RD, 1 4346 | jmp <5 4347 | 4348 |7: // Non-standard return case. 4349 | lea RB, [PC-FRAME_VARG] 4350 | test RBd, FRAME_TYPEP 4351 | jnz ->vm_return 4352 | // Return from vararg function: relocate BASE down and RA up. 4353 | sub BASE, RB 4354 if (op != BC_RET0) { 4355 | add RA, RB 4356 } 4357 | jmp <1 4358 break; 4359 4360 /* -- Loops and branches ------------------------------------------------ */ 4361 4362 |.define FOR_IDX, [RA] 4363 |.define FOR_STOP, [RA+8] 4364 |.define FOR_STEP, [RA+16] 4365 |.define FOR_EXT, [RA+24] 4366 4367 case BC_FORL: 4368 |.if JIT 4369 | hotloop RBd 4370 |.endif 4371 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 4372 break; 4373 4374 case BC_JFORI: 4375 case BC_JFORL: 4376#if !LJ_HASJIT 4377 break; 4378#endif 4379 case BC_FORI: 4380 case BC_IFORL: 4381 vk = (op == BC_IFORL || op == BC_JFORL); 4382 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) 4383 | lea RA, [BASE+RA*8] 4384 if (LJ_DUALNUM) { 4385 | mov RB, FOR_IDX 4386 | checkint RB, >9 4387 | mov TMPR, FOR_STOP 4388 if (!vk) { 4389 | checkint TMPR, ->vmeta_for 4390 | mov ITYPE, FOR_STEP 4391 | test ITYPEd, ITYPEd; js >5 4392 | sar ITYPE, 47; 4393 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for 4394 } else { 4395#ifdef LUA_USE_ASSERT 4396 | checkinttp FOR_STOP, ->assert_bad_for_arg_type 4397 | checkinttp FOR_STEP, ->assert_bad_for_arg_type 4398#endif 4399 | mov ITYPE, FOR_STEP 4400 | test ITYPEd, ITYPEd; js >5 4401 | add RBd, ITYPEd; jo >1 4402 | setint RB 4403 | mov FOR_IDX, RB 4404 } 4405 | cmp RBd, TMPRd 4406 | mov FOR_EXT, RB 4407 if (op == BC_FORI) { 4408 | jle >7 4409 |1: 4410 |6: 4411 | branchPC RD 4412 } else if (op == BC_JFORI) { 4413 | branchPC RD 4414 | movzx RDd, PC_RD 4415 | jle =>BC_JLOOP 4416 |1: 4417 |6: 4418 } else if (op == BC_IFORL) { 4419 | jg >7 4420 |6: 4421 | branchPC RD 4422 |1: 4423 } else { 4424 | jle =>BC_JLOOP 4425 |1: 4426 |6: 4427 } 4428 |7: 4429 | ins_next 4430 | 4431 |5: // Invert check for negative step. 4432 if (!vk) { 4433 | sar ITYPE, 47; 4434 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for 4435 } else { 4436 | add RBd, ITYPEd; jo <1 4437 | setint RB 4438 | mov FOR_IDX, RB 4439 } 4440 | cmp RBd, TMPRd 4441 | mov FOR_EXT, RB 4442 if (op == BC_FORI) { 4443 | jge <7 4444 } else if (op == BC_JFORI) { 4445 | branchPC RD 4446 | movzx RDd, PC_RD 4447 | jge =>BC_JLOOP 4448 } else if (op == BC_IFORL) { 4449 | jl <7 4450 } else { 4451 | jge =>BC_JLOOP 4452 } 4453 | jmp <6 4454 |9: // Fallback to FP variant. 4455 if (!vk) { 4456 | jae ->vmeta_for 4457 } 4458 } else if (!vk) { 4459 | checknumtp FOR_IDX, ->vmeta_for 4460 } 4461 if (!vk) { 4462 | checknumtp FOR_STOP, ->vmeta_for 4463 } else { 4464#ifdef LUA_USE_ASSERT 4465 | checknumtp FOR_STOP, ->assert_bad_for_arg_type 4466 | checknumtp FOR_STEP, ->assert_bad_for_arg_type 4467#endif 4468 } 4469 | mov RB, FOR_STEP 4470 if (!vk) { 4471 | checknum RB, ->vmeta_for 4472 } 4473 | movsd xmm0, qword FOR_IDX 4474 | movsd xmm1, qword FOR_STOP 4475 if (vk) { 4476 | addsd xmm0, qword FOR_STEP 4477 | movsd qword FOR_IDX, xmm0 4478 | test RB, RB; js >3 4479 } else { 4480 | jl >3 4481 } 4482 | ucomisd xmm1, xmm0 4483 |1: 4484 | movsd qword FOR_EXT, xmm0 4485 if (op == BC_FORI) { 4486 |.if DUALNUM 4487 | jnb <7 4488 |.else 4489 | jnb >2 4490 | branchPC RD 4491 |.endif 4492 } else if (op == BC_JFORI) { 4493 | branchPC RD 4494 | movzx RDd, PC_RD 4495 | jnb =>BC_JLOOP 4496 } else if (op == BC_IFORL) { 4497 |.if DUALNUM 4498 | jb <7 4499 |.else 4500 | jb >2 4501 | branchPC RD 4502 |.endif 4503 } else { 4504 | jnb =>BC_JLOOP 4505 } 4506 |.if DUALNUM 4507 | jmp <6 4508 |.else 4509 |2: 4510 | ins_next 4511 |.endif 4512 | 4513 |3: // Invert comparison if step is negative. 4514 | ucomisd xmm0, xmm1 4515 | jmp <1 4516 break; 4517 4518 case BC_ITERL: 4519 |.if JIT 4520 | hotloop RBd 4521 |.endif 4522 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 4523 break; 4524 4525 case BC_JITERL: 4526#if !LJ_HASJIT 4527 break; 4528#endif 4529 case BC_IITERL: 4530 | ins_AJ // RA = base, RD = target 4531 | lea RA, [BASE+RA*8] 4532 | mov RB, [RA] 4533 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. 4534 if (op == BC_JITERL) { 4535 | mov [RA-8], RB 4536 | jmp =>BC_JLOOP 4537 } else { 4538 | branchPC RD // Otherwise save control var + branch. 4539 | mov [RA-8], RB 4540 } 4541 |1: 4542 | ins_next 4543 break; 4544 4545 case BC_LOOP: 4546 | ins_A // RA = base, RD = target (loop extent) 4547 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4548 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4549 |.if JIT 4550 | hotloop RBd 4551 |.endif 4552 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 4553 break; 4554 4555 case BC_ILOOP: 4556 | ins_A // RA = base, RD = target (loop extent) 4557 | ins_next 4558 break; 4559 4560 case BC_JLOOP: 4561 |.if JIT 4562 | ins_AD // RA = base (ignored), RD = traceno 4563#ifdef LUA_USE_TRACE_LOGS 4564 |.if not X64WIN 4565 | mov L:RB, SAVE_L 4566 | mov L:RB->base, BASE // Save BASE 4567 | mov TMP1, RD // Save RD 4568 | mov CARG3, PC // CARG3 == BASE 4569 | mov CARG2, RD 4570 | mov CARG1, RB 4571 | call extern lj_log_trace_entry@8 4572 | mov RD, TMP1 4573 | mov BASE, L:RB->base 4574 |.endif 4575#endif 4576 | mov RA, [DISPATCH+DISPATCH_J(trace)] 4577 | mov TRACE:RD, [RA+RD*8] 4578 | mov RD, TRACE:RD->mcode 4579 | mov L:RB, SAVE_L 4580 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 4581 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB 4582 | // Save additional callee-save registers only used in compiled code. 4583 |.if X64WIN 4584 | mov CSAVE_4, r12 4585 | mov CSAVE_3, r13 4586 | mov CSAVE_2, r14 4587 | mov CSAVE_1, r15 4588 | mov RA, rsp 4589 | sub rsp, 10*16+4*8 4590 | movdqa [RA-1*16], xmm6 4591 | movdqa [RA-2*16], xmm7 4592 | movdqa [RA-3*16], xmm8 4593 | movdqa [RA-4*16], xmm9 4594 | movdqa [RA-5*16], xmm10 4595 | movdqa [RA-6*16], xmm11 4596 | movdqa [RA-7*16], xmm12 4597 | movdqa [RA-8*16], xmm13 4598 | movdqa [RA-9*16], xmm14 4599 | movdqa [RA-10*16], xmm15 4600 |.else 4601 | sub rsp, 16 4602 | mov [rsp+16], r12 4603 | mov [rsp+8], r13 4604 |.endif 4605 | jmp RD 4606 |.endif 4607 break; 4608 4609 case BC_JMP: 4610 | ins_AJ // RA = unused, RD = target 4611 | branchPC RD 4612 | ins_next 4613 break; 4614 4615 /* -- Function headers -------------------------------------------------- */ 4616 4617 /* 4618 ** Reminder: A function may be called with func/args above L->maxstack, 4619 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, 4620 ** too. This means all FUNC* ops (including fast functions) must check 4621 ** for stack overflow _before_ adding more slots! 4622 */ 4623 4624 case BC_FUNCF: 4625 |.if JIT 4626 | hotcall RBd 4627 |.endif 4628 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4629 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. 4630 break; 4631 4632 case BC_JFUNCF: 4633#if !LJ_HASJIT 4634 break; 4635#endif 4636 case BC_IFUNCF: 4637 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 4638 | mov KBASE, [PC-4+PC2PROTO(k)] 4639 | mov L:RB, SAVE_L 4640 | lea RA, [BASE+RA*8] // Top of frame. 4641 | cmp RA, L:RB->maxstack 4642 | ja ->vm_growstack_f 4643 | movzx RAd, byte [PC-4+PC2PROTO(numparams)] 4644 | cmp NARGS:RDd, RAd // Check for missing parameters. 4645 | jbe >3 4646 |2: 4647 if (op == BC_JFUNCF) { 4648 | movzx RDd, PC_RD 4649 | jmp =>BC_JLOOP 4650 } else { 4651 | ins_next 4652 } 4653 | 4654 |3: // Clear missing parameters. 4655 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL 4656 | add NARGS:RDd, 1 4657 | cmp NARGS:RDd, RAd 4658 | jbe <3 4659 | jmp <2 4660 break; 4661 4662 case BC_JFUNCV: 4663#if !LJ_HASJIT 4664 break; 4665#endif 4666 | int3 // NYI: compiled vararg functions 4667 break; /* NYI: compiled vararg functions. */ 4668 4669 case BC_IFUNCV: 4670 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 4671 | lea RBd, [NARGS:RD*8+FRAME_VARG+8] 4672 | lea RD, [BASE+NARGS:RD*8+8] 4673 | mov LFUNC:KBASE, [BASE-16] 4674 | mov [RD-8], RB // Store delta + FRAME_VARG. 4675 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. 4676 | mov L:RB, SAVE_L 4677 | lea RA, [RD+RA*8] 4678 | cmp RA, L:RB->maxstack 4679 | ja ->vm_growstack_v // Need to grow stack. 4680 | mov RA, BASE 4681 | mov BASE, RD 4682 | movzx RBd, byte [PC-4+PC2PROTO(numparams)] 4683 | test RBd, RBd 4684 | jz >2 4685 | add RA, 8 4686 |1: // Copy fixarg slots up to new frame. 4687 | add RA, 8 4688 | cmp RA, BASE 4689 | jnb >3 // Less args than parameters? 4690 | mov KBASE, [RA-16] 4691 | mov [RD], KBASE 4692 | add RD, 8 4693 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). 4694 | sub RBd, 1 4695 | jnz <1 4696 |2: 4697 if (op == BC_JFUNCV) { 4698 | movzx RDd, PC_RD 4699 | jmp =>BC_JLOOP 4700 } else { 4701 | mov KBASE, [PC-4+PC2PROTO(k)] 4702 | ins_next 4703 } 4704 | 4705 |3: // Clear missing parameters. 4706 | mov aword [RD], LJ_TNIL 4707 | add RD, 8 4708 | sub RBd, 1 4709 | jnz <3 4710 | jmp <2 4711 break; 4712 4713 case BC_FUNCC: 4714 case BC_FUNCCW: 4715 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 4716 | mov CFUNC:RB, [BASE-16] 4717 | cleartp CFUNC:RB 4718 | mov KBASE, CFUNC:RB->f 4719 | mov L:RB, SAVE_L 4720 | lea RD, [BASE+NARGS:RD*8-8] 4721 | mov L:RB->base, BASE 4722 | lea RA, [RD+8*LUA_MINSTACK] 4723 | cmp RA, L:RB->maxstack 4724 | mov L:RB->top, RD 4725 if (op == BC_FUNCC) { 4726 | mov CARG1, L:RB // Caveat: CARG1 may be RA. 4727 } else { 4728 | mov CARG2, KBASE 4729 | mov CARG1, L:RB // Caveat: CARG1 may be RA. 4730 } 4731 | ja ->vm_growstack_c // Need to grow stack. 4732 | set_vmstate C 4733 if (op == BC_FUNCC) { 4734 | call KBASE // (lua_State *L) 4735 } else { 4736 | // (lua_State *L, lua_CFunction f) 4737 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 4738 } 4739 | // nresults returned in eax (RD). 4740 | mov BASE, L:RB->base 4741 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 4742 | set_vmstate INTERP 4743 | lea RA, [BASE+RD*8] 4744 | neg RA 4745 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 4746 | mov PC, [BASE-8] // Fetch PC of caller. 4747 | jmp ->vm_returnc 4748 break; 4749 4750 /* ---------------------------------------------------------------------- */ 4751 4752 default: 4753 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 4754 exit(2); 4755 break; 4756 } 4757} 4758 4759static int build_backend(BuildCtx *ctx) 4760{ 4761 int op; 4762 dasm_growpc(Dst, BC__MAX); 4763 build_subroutines(ctx); 4764 |.code_op 4765 for (op = 0; op < BC__MAX; op++) 4766 build_ins(ctx, (BCOp)op, op); 4767 return BC__MAX; 4768} 4769 4770/* Emit pseudo frame-info for all assembler functions. */ 4771static void emit_asm_debug(BuildCtx *ctx) 4772{ 4773 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 4774 switch (ctx->mode) { 4775 case BUILD_elfasm: 4776 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 4777 fprintf(ctx->fp, 4778 ".Lframe0:\n" 4779 "\t.long .LECIE0-.LSCIE0\n" 4780 ".LSCIE0:\n" 4781 "\t.long 0xffffffff\n" 4782 "\t.byte 0x1\n" 4783 "\t.string \"\"\n" 4784 "\t.uleb128 0x1\n" 4785 "\t.sleb128 -8\n" 4786 "\t.byte 0x10\n" 4787 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4788 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4789 "\t.align 8\n" 4790 ".LECIE0:\n\n"); 4791 fprintf(ctx->fp, 4792 ".LSFDE0:\n" 4793 "\t.long .LEFDE0-.LASFDE0\n" 4794 ".LASFDE0:\n" 4795 "\t.long .Lframe0\n" 4796 "\t.quad .Lbegin\n" 4797 "\t.quad %d\n" 4798 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 4799 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4800 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4801 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 4802 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 4803#if LJ_NO_UNWIND 4804 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ 4805 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ 4806#endif 4807 "\t.align 8\n" 4808 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); 4809#if LJ_HASFFI 4810 fprintf(ctx->fp, 4811 ".LSFDE1:\n" 4812 "\t.long .LEFDE1-.LASFDE1\n" 4813 ".LASFDE1:\n" 4814 "\t.long .Lframe0\n" 4815 "\t.quad lj_vm_ffi_call\n" 4816 "\t.quad %d\n" 4817 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 4818 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4819 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 4820 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4821 "\t.align 8\n" 4822 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 4823#endif 4824#if !LJ_NO_UNWIND 4825#if LJ_TARGET_SOLARIS 4826 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 4827#else 4828 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 4829#endif 4830 fprintf(ctx->fp, 4831 ".Lframe1:\n" 4832 "\t.long .LECIE1-.LSCIE1\n" 4833 ".LSCIE1:\n" 4834 "\t.long 0\n" 4835 "\t.byte 0x1\n" 4836 "\t.string \"zPR\"\n" 4837 "\t.uleb128 0x1\n" 4838 "\t.sleb128 -8\n" 4839 "\t.byte 0x10\n" 4840 "\t.uleb128 6\n" /* augmentation length */ 4841 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4842 "\t.long lj_err_unwind_dwarf-.\n" 4843 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4844 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4845 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4846 "\t.align 8\n" 4847 ".LECIE1:\n\n"); 4848 fprintf(ctx->fp, 4849 ".LSFDE2:\n" 4850 "\t.long .LEFDE2-.LASFDE2\n" 4851 ".LASFDE2:\n" 4852 "\t.long .LASFDE2-.Lframe1\n" 4853 "\t.long .Lbegin-.\n" 4854 "\t.long %d\n" 4855 "\t.uleb128 0\n" /* augmentation length */ 4856 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 4857 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4858 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4859 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 4860 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 4861 "\t.align 8\n" 4862 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); 4863#if LJ_HASFFI 4864 fprintf(ctx->fp, 4865 ".Lframe2:\n" 4866 "\t.long .LECIE2-.LSCIE2\n" 4867 ".LSCIE2:\n" 4868 "\t.long 0\n" 4869 "\t.byte 0x1\n" 4870 "\t.string \"zR\"\n" 4871 "\t.uleb128 0x1\n" 4872 "\t.sleb128 -8\n" 4873 "\t.byte 0x10\n" 4874 "\t.uleb128 1\n" /* augmentation length */ 4875 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4876 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4877 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4878 "\t.align 8\n" 4879 ".LECIE2:\n\n"); 4880 fprintf(ctx->fp, 4881 ".LSFDE3:\n" 4882 "\t.long .LEFDE3-.LASFDE3\n" 4883 ".LASFDE3:\n" 4884 "\t.long .LASFDE3-.Lframe2\n" 4885 "\t.long lj_vm_ffi_call-.\n" 4886 "\t.long %d\n" 4887 "\t.uleb128 0\n" /* augmentation length */ 4888 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 4889 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4890 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 4891 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4892 "\t.align 8\n" 4893 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 4894#endif 4895#endif 4896 break; 4897#if !LJ_NO_UNWIND 4898 /* Mental note: never let Apple design an assembler. 4899 ** Or a linker. Or a plastic case. But I digress. 4900 */ 4901 case BUILD_machasm: { 4902#if LJ_HASFFI 4903 int fcsize = 0; 4904#endif 4905 int i; 4906 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); 4907 fprintf(ctx->fp, 4908 "EH_frame1:\n" 4909 "\t.set L$set$x,LECIEX-LSCIEX\n" 4910 "\t.long L$set$x\n" 4911 "LSCIEX:\n" 4912 "\t.long 0\n" 4913 "\t.byte 0x1\n" 4914 "\t.ascii \"zPR\\0\"\n" 4915 "\t.byte 0x1\n" 4916 "\t.byte 128-8\n" 4917 "\t.byte 0x10\n" 4918 "\t.byte 6\n" /* augmentation length */ 4919 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ 4920 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" 4921 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4922 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" 4923 "\t.byte 0x80+0x10\n\t.byte 0x1\n" 4924 "\t.align 3\n" 4925 "LECIEX:\n\n"); 4926 for (i = 0; i < ctx->nsym; i++) { 4927 const char *name = ctx->sym[i].name; 4928 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; 4929 if (size == 0) continue; 4930#if LJ_HASFFI 4931 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } 4932#endif 4933 fprintf(ctx->fp, 4934 "%s.eh:\n" 4935 "LSFDE%d:\n" 4936 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" 4937 "\t.long L$set$%d\n" 4938 "LASFDE%d:\n" 4939 "\t.long LASFDE%d-EH_frame1\n" 4940 "\t.long %s-.\n" 4941 "\t.long %d\n" 4942 "\t.byte 0\n" /* augmentation length */ 4943 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ 4944 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 4945 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 4946 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ 4947 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ 4948 "\t.align 3\n" 4949 "LEFDE%d:\n\n", 4950 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); 4951 } 4952#if LJ_HASFFI 4953 if (fcsize) { 4954 fprintf(ctx->fp, 4955 "EH_frame2:\n" 4956 "\t.set L$set$y,LECIEY-LSCIEY\n" 4957 "\t.long L$set$y\n" 4958 "LSCIEY:\n" 4959 "\t.long 0\n" 4960 "\t.byte 0x1\n" 4961 "\t.ascii \"zR\\0\"\n" 4962 "\t.byte 0x1\n" 4963 "\t.byte 128-8\n" 4964 "\t.byte 0x10\n" 4965 "\t.byte 1\n" /* augmentation length */ 4966 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4967 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" 4968 "\t.byte 0x80+0x10\n\t.byte 0x1\n" 4969 "\t.align 3\n" 4970 "LECIEY:\n\n"); 4971 fprintf(ctx->fp, 4972 "_lj_vm_ffi_call.eh:\n" 4973 "LSFDEY:\n" 4974 "\t.set L$set$yy,LEFDEY-LASFDEY\n" 4975 "\t.long L$set$yy\n" 4976 "LASFDEY:\n" 4977 "\t.long LASFDEY-EH_frame2\n" 4978 "\t.long _lj_vm_ffi_call-.\n" 4979 "\t.long %d\n" 4980 "\t.byte 0\n" /* augmentation length */ 4981 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ 4982 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 4983 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ 4984 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 4985 "\t.align 3\n" 4986 "LEFDEY:\n\n", fcsize); 4987 } 4988#endif 4989 fprintf(ctx->fp, ".subsections_via_symbols\n"); 4990 } 4991 break; 4992#endif 4993 default: /* Difficult for other modes. */ 4994 break; 4995 } 4996} 4997 4998