1|// Low-level VM code for x86 CPUs. 2|// Bytecode interpreter, fast functions and helper functions. 3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 4| 5|.if P64 6|.arch x64 7|.else 8|.arch x86 9|.endif 10|.section code_op, code_sub 11| 12|.actionlist build_actionlist 13|.globals GLOB_ 14|.globalnames globnames 15|.externnames extnames 16| 17|//----------------------------------------------------------------------- 18| 19|.if P64 20|.define X64, 1 21|.if WIN 22|.define X64WIN, 1 23|.endif 24|.endif 25| 26|// Fixed register assignments for the interpreter. 27|// This is very fragile and has many dependencies. Caveat emptor. 28|.define BASE, edx // Not C callee-save, refetched anyway. 29|.if not X64 30|.define KBASE, edi // Must be C callee-save. 31|.define KBASEa, KBASE 32|.define PC, esi // Must be C callee-save. 33|.define PCa, PC 34|.define DISPATCH, ebx // Must be C callee-save. 35|.elif X64WIN 36|.define KBASE, edi // Must be C callee-save. 37|.define KBASEa, rdi 38|.define PC, esi // Must be C callee-save. 39|.define PCa, rsi 40|.define DISPATCH, ebx // Must be C callee-save. 41|.else 42|.define KBASE, r15d // Must be C callee-save. 43|.define KBASEa, r15 44|.define PC, ebx // Must be C callee-save. 45|.define PCa, rbx 46|.define DISPATCH, r14d // Must be C callee-save. 47|.endif 48| 49|.define RA, ecx 50|.define RAH, ch 51|.define RAL, cl 52|.define RB, ebp // Must be ebp (C callee-save). 53|.define RC, eax // Must be eax. 54|.define RCW, ax 55|.define RCH, ah 56|.define RCL, al 57|.define OP, RB 58|.define RD, RC 59|.define RDW, RCW 60|.define RDL, RCL 61|.if X64 62|.define RAa, rcx 63|.define RBa, rbp 64|.define RCa, rax 65|.define RDa, rax 66|.else 67|.define RAa, RA 68|.define RBa, RB 69|.define RCa, RC 70|.define RDa, RD 71|.endif 72| 73|.if not X64 74|.define FCARG1, ecx // x86 fastcall arguments. 75|.define FCARG2, edx 76|.elif X64WIN 77|.define CARG1, rcx // x64/WIN64 C call arguments. 78|.define CARG2, rdx 79|.define CARG3, r8 80|.define CARG4, r9 81|.define CARG1d, ecx 82|.define CARG2d, edx 83|.define CARG3d, r8d 84|.define CARG4d, r9d 85|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. 86|.define FCARG2, CARG2d 87|.else 88|.define CARG1, rdi // x64/POSIX C call arguments. 89|.define CARG2, rsi 90|.define CARG3, rdx 91|.define CARG4, rcx 92|.define CARG5, r8 93|.define CARG6, r9 94|.define CARG1d, edi 95|.define CARG2d, esi 96|.define CARG3d, edx 97|.define CARG4d, ecx 98|.define CARG5d, r8d 99|.define CARG6d, r9d 100|.define FCARG1, CARG1d // Simulate x86 fastcall. 101|.define FCARG2, CARG2d 102|.endif 103| 104|// Type definitions. Some of these are only used for documentation. 105|.type L, lua_State 106|.type GL, global_State 107|.type TVALUE, TValue 108|.type GCOBJ, GCobj 109|.type STR, GCstr 110|.type TAB, GCtab 111|.type LFUNC, GCfuncL 112|.type CFUNC, GCfuncC 113|.type PROTO, GCproto 114|.type UPVAL, GCupval 115|.type NODE, Node 116|.type NARGS, int 117|.type TRACE, GCtrace 118|.type SBUF, SBuf 119| 120|// Stack layout while in interpreter. Must match with lj_frame.h. 121|//----------------------------------------------------------------------- 122|.if not X64 // x86 stack layout. 123| 124|.if WIN 125| 126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). 127|.macro saveregs_ 128| push edi; push esi; push ebx 129| push extern lj_err_unwind_win 130| fs; push dword [0] 131| fs; mov [0], esp 132| sub esp, CFRAME_SPACE 133|.endmacro 134|.macro restoreregs 135| add esp, CFRAME_SPACE 136| fs; pop dword [0] 137| pop edi // Short for esp += 4. 138| pop ebx; pop esi; pop edi; pop ebp 139|.endmacro 140| 141|.else 142| 143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 144|.macro saveregs_ 145| push edi; push esi; push ebx 146| sub esp, CFRAME_SPACE 147|.endmacro 148|.macro restoreregs 149| add esp, CFRAME_SPACE 150| pop ebx; pop esi; pop edi; pop ebp 151|.endmacro 152| 153|.endif 154| 155|.macro saveregs 156| push ebp; saveregs_ 157|.endmacro 158| 159|.if WIN 160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. 161|.define SAVE_NRES, aword [esp+aword*18] 162|.define SAVE_CFRAME, aword [esp+aword*17] 163|.define SAVE_L, aword [esp+aword*16] 164|//----- 16 byte aligned, ^^^ arguments from C caller 165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. 166|.define SAVE_R4, aword [esp+aword*14] 167|.define SAVE_R3, aword [esp+aword*13] 168|.define SAVE_R2, aword [esp+aword*12] 169|//----- 16 byte aligned 170|.define SAVE_R1, aword [esp+aword*11] 171|.define SEH_FUNC, aword [esp+aword*10] 172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. 173|.define UNUSED2, aword [esp+aword*8] 174|//----- 16 byte aligned 175|.define UNUSED1, aword [esp+aword*7] 176|.define SAVE_PC, aword [esp+aword*6] 177|.define TMP2, aword [esp+aword*5] 178|.define TMP1, aword [esp+aword*4] 179|//----- 16 byte aligned 180|.define ARG4, aword [esp+aword*3] 181|.define ARG3, aword [esp+aword*2] 182|.define ARG2, aword [esp+aword*1] 183|.define ARG1, aword [esp] //<-- esp while in interpreter. 184|//----- 16 byte aligned, ^^^ arguments for C callee 185|.else 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 187|.define SAVE_NRES, aword [esp+aword*14] 188|.define SAVE_CFRAME, aword [esp+aword*13] 189|.define SAVE_L, aword [esp+aword*12] 190|//----- 16 byte aligned, ^^^ arguments from C caller 191|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. 192|.define SAVE_R4, aword [esp+aword*10] 193|.define SAVE_R3, aword [esp+aword*9] 194|.define SAVE_R2, aword [esp+aword*8] 195|//----- 16 byte aligned 196|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. 197|.define SAVE_PC, aword [esp+aword*6] 198|.define TMP2, aword [esp+aword*5] 199|.define TMP1, aword [esp+aword*4] 200|//----- 16 byte aligned 201|.define ARG4, aword [esp+aword*3] 202|.define ARG3, aword [esp+aword*2] 203|.define ARG2, aword [esp+aword*1] 204|.define ARG1, aword [esp] //<-- esp while in interpreter. 205|//----- 16 byte aligned, ^^^ arguments for C callee 206|.endif 207| 208|// FPARGx overlaps ARGx and ARG(x+1) on x86. 209|.define FPARG3, qword [esp+qword*1] 210|.define FPARG1, qword [esp] 211|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). 212|.define TMPQ, qword [esp+aword*4] 213|.define TMP3, ARG4 214|.define ARG5, TMP1 215|.define TMPa, TMP1 216|.define MULTRES, TMP2 217| 218|// Arguments for vm_call and vm_pcall. 219|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! 220| 221|// Arguments for vm_cpcall. 222|.define INARG_CP_CALL, SAVE_ERRF 223|.define INARG_CP_UD, SAVE_NRES 224|.define INARG_CP_FUNC, SAVE_CFRAME 225| 226|//----------------------------------------------------------------------- 227|.elif X64WIN // x64/Windows stack layout 228| 229|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 230|.macro saveregs_ 231| push rdi; push rsi; push rbx 232| sub rsp, CFRAME_SPACE 233|.endmacro 234|.macro saveregs 235| push rbp; saveregs_ 236|.endmacro 237|.macro restoreregs 238| add rsp, CFRAME_SPACE 239| pop rbx; pop rsi; pop rdi; pop rbp 240|.endmacro 241| 242|.define SAVE_CFRAME, aword [rsp+aword*13] 243|.define SAVE_PC, dword [rsp+dword*25] 244|.define SAVE_L, dword [rsp+dword*24] 245|.define SAVE_ERRF, dword [rsp+dword*23] 246|.define SAVE_NRES, dword [rsp+dword*22] 247|.define TMP2, dword [rsp+dword*21] 248|.define TMP1, dword [rsp+dword*20] 249|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter 250|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 251|.define SAVE_R4, aword [rsp+aword*8] 252|.define SAVE_R3, aword [rsp+aword*7] 253|.define SAVE_R2, aword [rsp+aword*6] 254|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 255|.define ARG5, aword [rsp+aword*4] 256|.define CSAVE_4, aword [rsp+aword*3] 257|.define CSAVE_3, aword [rsp+aword*2] 258|.define CSAVE_2, aword [rsp+aword*1] 259|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. 260|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee 261| 262|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). 263|.define TMPQ, qword [rsp+aword*10] 264|.define MULTRES, TMP2 265|.define TMPa, ARG5 266|.define ARG5d, dword [rsp+aword*4] 267|.define TMP3, ARG5d 268| 269|//----------------------------------------------------------------------- 270|.else // x64/POSIX stack layout 271| 272|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 273|.macro saveregs_ 274| push rbx; push r15; push r14 275|.if NO_UNWIND 276| push r13; push r12 277|.endif 278| sub rsp, CFRAME_SPACE 279|.endmacro 280|.macro saveregs 281| push rbp; saveregs_ 282|.endmacro 283|.macro restoreregs 284| add rsp, CFRAME_SPACE 285|.if NO_UNWIND 286| pop r12; pop r13 287|.endif 288| pop r14; pop r15; pop rbx; pop rbp 289|.endmacro 290| 291|//----- 16 byte aligned, 292|.if NO_UNWIND 293|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. 294|.define SAVE_R4, aword [rsp+aword*10] 295|.define SAVE_R3, aword [rsp+aword*9] 296|.define SAVE_R2, aword [rsp+aword*8] 297|.define SAVE_R1, aword [rsp+aword*7] 298|.define SAVE_RU2, aword [rsp+aword*6] 299|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. 300|.else 301|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 302|.define SAVE_R4, aword [rsp+aword*8] 303|.define SAVE_R3, aword [rsp+aword*7] 304|.define SAVE_R2, aword [rsp+aword*6] 305|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 306|.endif 307|.define SAVE_CFRAME, aword [rsp+aword*4] 308|.define SAVE_PC, dword [rsp+dword*7] 309|.define SAVE_L, dword [rsp+dword*6] 310|.define SAVE_ERRF, dword [rsp+dword*5] 311|.define SAVE_NRES, dword [rsp+dword*4] 312|.define TMPa, aword [rsp+aword*1] 313|.define TMP2, dword [rsp+dword*1] 314|.define TMP1, dword [rsp] //<-- rsp while in interpreter. 315|//----- 16 byte aligned 316| 317|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). 318|.define TMPQ, qword [rsp] 319|.define TMP3, dword [rsp+aword*1] 320|.define MULTRES, TMP2 321| 322|.endif 323| 324|//----------------------------------------------------------------------- 325| 326|// Instruction headers. 327|.macro ins_A; .endmacro 328|.macro ins_AD; .endmacro 329|.macro ins_AJ; .endmacro 330|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro 331|.macro ins_AB_; movzx RB, RCH; .endmacro 332|.macro ins_A_C; movzx RC, RCL; .endmacro 333|.macro ins_AND; not RDa; .endmacro 334| 335|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). 336|.macro ins_NEXT 337| mov RC, [PC] 338| movzx RA, RCH 339| movzx OP, RCL 340| add PC, 4 341| shr RC, 16 342|.if X64 343| jmp aword [DISPATCH+OP*8] 344|.else 345| jmp aword [DISPATCH+OP*4] 346|.endif 347|.endmacro 348| 349|// Instruction footer. 350|.if 1 351| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 352| .define ins_next, ins_NEXT 353| .define ins_next_, ins_NEXT 354|.else 355| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 356| // Affects only certain kinds of benchmarks (and only with -j off). 357| // Around 10%-30% slower on Core2, a lot more slower on P4. 358| .macro ins_next 359| jmp ->ins_next 360| .endmacro 361| .macro ins_next_ 362| ->ins_next: 363| ins_NEXT 364| .endmacro 365|.endif 366| 367|// Call decode and dispatch. 368|.macro ins_callt 369| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC 370| mov PC, LFUNC:RB->pc 371| mov RA, [PC] 372| movzx OP, RAL 373| movzx RA, RAH 374| add PC, 4 375|.if X64 376| jmp aword [DISPATCH+OP*8] 377|.else 378| jmp aword [DISPATCH+OP*4] 379|.endif 380|.endmacro 381| 382|.macro ins_call 383| // BASE = new base, RB = LFUNC, RD = nargs+1 384| mov [BASE-4], PC 385| ins_callt 386|.endmacro 387| 388|//----------------------------------------------------------------------- 389| 390|// Macros to test operand types. 391|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro 392|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro 393|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro 394|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro 395|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro 396| 397|// These operands must be used with movzx. 398|.define PC_OP, byte [PC-4] 399|.define PC_RA, byte [PC-3] 400|.define PC_RB, byte [PC-1] 401|.define PC_RC, byte [PC-2] 402|.define PC_RD, word [PC-2] 403| 404|.macro branchPC, reg 405| lea PC, [PC+reg*4-BCBIAS_J*4] 406|.endmacro 407| 408|// Assumes DISPATCH is relative to GL. 409#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 410#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 411| 412#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 413| 414|// Decrement hashed hotcount and trigger trace recorder if zero. 415|.macro hotloop, reg 416| mov reg, PC 417| shr reg, 1 418| and reg, HOTCOUNT_PCMASK 419| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP 420| jb ->vm_hotloop 421|.endmacro 422| 423|.macro hotcall, reg 424| mov reg, PC 425| shr reg, 1 426| and reg, HOTCOUNT_PCMASK 427| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL 428| jb ->vm_hotcall 429|.endmacro 430| 431|// Set current VM state. 432|.macro set_vmstate, st 433| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st 434|.endmacro 435| 436|// x87 compares. 437|.macro fcomparepp // Compare and pop st0 >< st1. 438| fucomip st1 439| fpop 440|.endmacro 441| 442|.macro fpop1; fstp st1; .endmacro 443| 444|// Synthesize SSE FP constants. 445|.macro sseconst_abs, reg, tmp // Synthesize abs mask. 446|.if X64 447| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp 448|.else 449| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 450|.endif 451|.endmacro 452| 453|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. 454|.if X64 455| mov64 tmp, U64x(val,00000000); movd reg, tmp 456|.else 457| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 458|.endif 459|.endmacro 460| 461|.macro sseconst_sign, reg, tmp // Synthesize sign mask. 462| sseconst_hi reg, tmp, 80000000 463|.endmacro 464|.macro sseconst_1, reg, tmp // Synthesize 1.0. 465| sseconst_hi reg, tmp, 3ff00000 466|.endmacro 467|.macro sseconst_m1, reg, tmp // Synthesize -1.0. 468| sseconst_hi reg, tmp, bff00000 469|.endmacro 470|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. 471| sseconst_hi reg, tmp, 43300000 472|.endmacro 473|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. 474| sseconst_hi reg, tmp, 43380000 475|.endmacro 476| 477|// Move table write barrier back. Overwrites reg. 478|.macro barrierback, tab, reg 479| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) 480| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] 481| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab 482| mov tab->gclist, reg 483|.endmacro 484| 485|//----------------------------------------------------------------------- 486 487/* Generate subroutines used by opcodes and other parts of the VM. */ 488/* The .code_sub section should be last to help static branch prediction. */ 489static void build_subroutines(BuildCtx *ctx) 490{ 491 |.code_sub 492 | 493 |//----------------------------------------------------------------------- 494 |//-- Return handling ---------------------------------------------------- 495 |//----------------------------------------------------------------------- 496 | 497 |->vm_returnp: 498 | test PC, FRAME_P 499 | jz ->cont_dispatch 500 | 501 | // Return from pcall or xpcall fast func. 502 | and PC, -8 503 | sub BASE, PC // Restore caller base. 504 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. 505 | mov PC, [BASE-4] // Fetch PC of previous frame. 506 | // Prepending may overwrite the pcall frame, so do it at the end. 507 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. 508 | 509 |->vm_returnc: 510 | add RD, 1 // RD = nresults+1 511 | jz ->vm_unwind_yield 512 | mov MULTRES, RD 513 | test PC, FRAME_TYPE 514 | jz ->BC_RET_Z // Handle regular return to Lua. 515 | 516 |->vm_return: 517 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return 518 | xor PC, FRAME_C 519 | test PC, FRAME_TYPE 520 | jnz ->vm_returnp 521 | 522 | // Return to C. 523 | set_vmstate C 524 | and PC, -8 525 | sub PC, BASE 526 | neg PC // Previous base = BASE - delta. 527 | 528 | sub RD, 1 529 | jz >2 530 |1: // Move results down. 531 |.if X64 532 | mov RBa, [BASE+RA] 533 | mov [BASE-8], RBa 534 |.else 535 | mov RB, [BASE+RA] 536 | mov [BASE-8], RB 537 | mov RB, [BASE+RA+4] 538 | mov [BASE-4], RB 539 |.endif 540 | add BASE, 8 541 | sub RD, 1 542 | jnz <1 543 |2: 544 | mov L:RB, SAVE_L 545 | mov L:RB->base, PC 546 |3: 547 | mov RD, MULTRES 548 | mov RA, SAVE_NRES // RA = wanted nresults+1 549 |4: 550 | cmp RA, RD 551 | jne >6 // More/less results wanted? 552 |5: 553 | sub BASE, 8 554 | mov L:RB->top, BASE 555 | 556 |->vm_leave_cp: 557 | mov RAa, SAVE_CFRAME // Restore previous C frame. 558 | mov L:RB->cframe, RAa 559 | xor eax, eax // Ok return status for vm_pcall. 560 | 561 |->vm_leave_unw: 562 | restoreregs 563 | ret 564 | 565 |6: 566 | jb >7 // Less results wanted? 567 | // More results wanted. Check stack size and fill up results with nil. 568 | cmp BASE, L:RB->maxstack 569 | ja >8 570 | mov dword [BASE-4], LJ_TNIL 571 | add BASE, 8 572 | add RD, 1 573 | jmp <4 574 | 575 |7: // Less results wanted. 576 | test RA, RA 577 | jz <5 // But check for LUA_MULTRET+1. 578 | sub RA, RD // Negative result! 579 | lea BASE, [BASE+RA*8] // Correct top. 580 | jmp <5 581 | 582 |8: // Corner case: need to grow stack for filling up results. 583 | // This can happen if: 584 | // - A C function grows the stack (a lot). 585 | // - The GC shrinks the stack in between. 586 | // - A return back from a lua_call() with (high) nresults adjustment. 587 | mov L:RB->top, BASE // Save current top held in BASE (yes). 588 | mov MULTRES, RD // Need to fill only remainder with nil. 589 | mov FCARG2, RA 590 | mov FCARG1, L:RB 591 | call extern lj_state_growstack@8 // (lua_State *L, int n) 592 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. 593 | jmp <3 594 | 595 |->vm_unwind_yield: 596 | mov al, LUA_YIELD 597 | jmp ->vm_unwind_c_eh 598 | 599 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. 600 | // (void *cframe, int errcode) 601 |.if X64 602 | mov eax, CARG2d // Error return status for vm_pcall. 603 | mov rsp, CARG1 604 |.else 605 | mov eax, FCARG2 // Error return status for vm_pcall. 606 | mov esp, FCARG1 607 |.if WIN 608 | lea FCARG1, SEH_NEXT 609 | fs; mov [0], FCARG1 610 |.endif 611 |.endif 612 |->vm_unwind_c_eh: // Landing pad for external unwinder. 613 | mov L:RB, SAVE_L 614 | mov GL:RB, L:RB->glref 615 | mov dword GL:RB->vmstate, ~LJ_VMST_C 616 | jmp ->vm_leave_unw 617 | 618 |->vm_unwind_rethrow: 619 |.if X64 and not X64WIN 620 | mov FCARG1, SAVE_L 621 | mov FCARG2, eax 622 | restoreregs 623 | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) 624 |.endif 625 | 626 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. 627 | // (void *cframe) 628 |.if X64 629 | and CARG1, CFRAME_RAWMASK 630 | mov rsp, CARG1 631 |.else 632 | and FCARG1, CFRAME_RAWMASK 633 | mov esp, FCARG1 634 |.if WIN 635 | lea FCARG1, SEH_NEXT 636 | fs; mov [0], FCARG1 637 |.endif 638 |.endif 639 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 640 | mov L:RB, SAVE_L 641 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 642 | mov RD, 1+1 // Really 1+2 results, incr. later. 643 | mov BASE, L:RB->base 644 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 645 | add DISPATCH, GG_G2DISP 646 | mov PC, [BASE-4] // Fetch PC of previous frame. 647 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. 648 | set_vmstate INTERP 649 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 650 | 651 |.if WIN and not X64 652 |->vm_rtlunwind@16: // Thin layer around RtlUnwind. 653 | // (void *cframe, void *excptrec, void *unwinder, int errcode) 654 | mov [esp], FCARG1 // Return value for RtlUnwind. 655 | push FCARG2 // Exception record for RtlUnwind. 656 | push 0 // Ignored by RtlUnwind. 657 | push dword [FCARG1+CFRAME_OFS_SEH] 658 | call extern RtlUnwind@16 // Violates ABI (clobbers too much). 659 | mov FCARG1, eax 660 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). 661 | ret // Jump to unwinder. 662 |.endif 663 | 664 |//----------------------------------------------------------------------- 665 |//-- Grow stack for calls ----------------------------------------------- 666 |//----------------------------------------------------------------------- 667 | 668 |->vm_growstack_c: // Grow stack for C function. 669 | mov FCARG2, LUA_MINSTACK 670 | jmp >2 671 | 672 |->vm_growstack_v: // Grow stack for vararg Lua function. 673 | sub RD, 8 674 | jmp >1 675 | 676 |->vm_growstack_f: // Grow stack for fixarg Lua function. 677 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC 678 | lea RD, [BASE+NARGS:RD*8-8] 679 |1: 680 | movzx RA, byte [PC-4+PC2PROTO(framesize)] 681 | add PC, 4 // Must point after first instruction. 682 | mov L:RB->base, BASE 683 | mov L:RB->top, RD 684 | mov SAVE_PC, PC 685 | mov FCARG2, RA 686 |2: 687 | // RB = L, L->base = new base, L->top = top 688 | mov FCARG1, L:RB 689 | call extern lj_state_growstack@8 // (lua_State *L, int n) 690 | mov BASE, L:RB->base 691 | mov RD, L:RB->top 692 | mov LFUNC:RB, [BASE-8] 693 | sub RD, BASE 694 | shr RD, 3 695 | add NARGS:RD, 1 696 | // BASE = new base, RB = LFUNC, RD = nargs+1 697 | ins_callt // Just retry the call. 698 | 699 |//----------------------------------------------------------------------- 700 |//-- Entry points into the assembler VM --------------------------------- 701 |//----------------------------------------------------------------------- 702 | 703 |->vm_resume: // Setup C frame and resume thread. 704 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 705 | saveregs 706 |.if X64 707 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 708 | mov SAVE_L, CARG1d 709 | mov RA, CARG2d 710 |.else 711 | mov L:RB, SAVE_L 712 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 713 |.endif 714 | mov PC, FRAME_CP 715 | xor RD, RD 716 | lea KBASEa, [esp+CFRAME_RESUME] 717 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 718 | add DISPATCH, GG_G2DISP 719 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 720 | mov SAVE_CFRAME, RDa 721 |.if X64 722 | mov SAVE_NRES, RD 723 | mov SAVE_ERRF, RD 724 |.endif 725 | mov L:RB->cframe, KBASEa 726 | cmp byte L:RB->status, RDL 727 | je >2 // Initial resume (like a call). 728 | 729 | // Resume after yield (like a return). 730 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 731 | set_vmstate INTERP 732 | mov byte L:RB->status, RDL 733 | mov BASE, L:RB->base 734 | mov RD, L:RB->top 735 | sub RD, RA 736 | shr RD, 3 737 | add RD, 1 // RD = nresults+1 738 | sub RA, BASE // RA = resultofs 739 | mov PC, [BASE-4] 740 | mov MULTRES, RD 741 | test PC, FRAME_TYPE 742 | jz ->BC_RET_Z 743 | jmp ->vm_return 744 | 745 |->vm_pcall: // Setup protected C frame and enter VM. 746 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 747 | saveregs 748 | mov PC, FRAME_CP 749 |.if X64 750 | mov SAVE_ERRF, CARG4d 751 |.endif 752 | jmp >1 753 | 754 |->vm_call: // Setup C frame and enter VM. 755 | // (lua_State *L, TValue *base, int nres1) 756 | saveregs 757 | mov PC, FRAME_C 758 | 759 |1: // Entry point for vm_pcall above (PC = ftype). 760 |.if X64 761 | mov SAVE_NRES, CARG3d 762 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 763 | mov SAVE_L, CARG1d 764 | mov RA, CARG2d 765 |.else 766 | mov L:RB, SAVE_L 767 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 768 |.endif 769 | 770 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 771 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 772 | mov SAVE_CFRAME, KBASEa 773 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 774 | add DISPATCH, GG_G2DISP 775 |.if X64 776 | mov L:RB->cframe, rsp 777 |.else 778 | mov L:RB->cframe, esp 779 |.endif 780 | 781 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). 782 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 783 | set_vmstate INTERP 784 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 785 | add PC, RA 786 | sub PC, BASE // PC = frame delta + frame type 787 | 788 | mov RD, L:RB->top 789 | sub RD, RA 790 | shr NARGS:RD, 3 791 | add NARGS:RD, 1 // RD = nargs+1 792 | 793 |->vm_call_dispatch: 794 | mov LFUNC:RB, [RA-8] 795 | cmp dword [RA-4], LJ_TFUNC 796 | jne ->vmeta_call // Ensure KBASE defined and != BASE. 797 | 798 |->vm_call_dispatch_f: 799 | mov BASE, RA 800 | ins_call 801 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC 802 | 803 |->vm_cpcall: // Setup protected C frame, call C. 804 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 805 | saveregs 806 |.if X64 807 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 808 | mov SAVE_L, CARG1d 809 |.else 810 | mov L:RB, SAVE_L 811 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! 812 | mov RC, INARG_CP_UD // Get args before they are overwritten. 813 | mov RA, INARG_CP_FUNC 814 | mov BASE, INARG_CP_CALL 815 |.endif 816 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 817 | 818 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 819 | sub KBASE, L:RB->top 820 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 821 | mov SAVE_ERRF, 0 // No error function. 822 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 823 | add DISPATCH, GG_G2DISP 824 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 825 | 826 |.if X64 827 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 828 | mov SAVE_CFRAME, KBASEa 829 | mov L:RB->cframe, rsp 830 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 831 | 832 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 833 |.else 834 | mov ARG3, RC // Have to copy args downwards. 835 | mov ARG2, RA 836 | mov ARG1, L:RB 837 | 838 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 839 | mov SAVE_CFRAME, KBASE 840 | mov L:RB->cframe, esp 841 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 842 | 843 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 844 |.endif 845 | // TValue * (new base) or NULL returned in eax (RC). 846 | test RC, RC 847 | jz ->vm_leave_cp // No base? Just remove C frame. 848 | mov RA, RC 849 | mov PC, FRAME_CP 850 | jmp <2 // Else continue with the call. 851 | 852 |//----------------------------------------------------------------------- 853 |//-- Metamethod handling ------------------------------------------------ 854 |//----------------------------------------------------------------------- 855 | 856 |//-- Continuation dispatch ---------------------------------------------- 857 | 858 |->cont_dispatch: 859 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) 860 | add RA, BASE 861 | and PC, -8 862 | mov RB, BASE 863 | sub BASE, PC // Restore caller BASE. 864 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. 865 | mov RC, RA // ... in [RC] 866 | mov PC, [RB-12] // Restore PC from [cont|PC]. 867 |.if X64 868 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. 869 |.if FFI 870 | cmp RA, 1 871 | jbe >1 872 |.endif 873 | lea KBASEa, qword [=>0] 874 | add RAa, KBASEa 875 |.else 876 | mov RA, dword [RB-16] 877 |.if FFI 878 | cmp RA, 1 879 | jbe >1 880 |.endif 881 |.endif 882 | mov LFUNC:KBASE, [BASE-8] 883 | mov KBASE, LFUNC:KBASE->pc 884 | mov KBASE, [KBASE+PC2PROTO(k)] 885 | // BASE = base, RC = result, RB = meta base 886 | jmp RAa // Jump to continuation. 887 | 888 |.if FFI 889 |1: 890 | je ->cont_ffi_callback // cont = 1: return from FFI callback. 891 | // cont = 0: Tail call from C function. 892 | sub RB, BASE 893 | shr RB, 3 894 | lea RD, [RB-1] 895 | jmp ->vm_call_tail 896 |.endif 897 | 898 |->cont_cat: // BASE = base, RC = result, RB = mbase 899 | movzx RA, PC_RB 900 | sub RB, 16 901 | lea RA, [BASE+RA*8] 902 | sub RA, RB 903 | je ->cont_ra 904 | neg RA 905 | shr RA, 3 906 |.if X64WIN 907 | mov CARG3d, RA 908 | mov L:CARG1d, SAVE_L 909 | mov L:CARG1d->base, BASE 910 | mov RCa, [RC] 911 | mov [RB], RCa 912 | mov CARG2d, RB 913 |.elif X64 914 | mov L:CARG1d, SAVE_L 915 | mov L:CARG1d->base, BASE 916 | mov CARG3d, RA 917 | mov RAa, [RC] 918 | mov [RB], RAa 919 | mov CARG2d, RB 920 |.else 921 | mov ARG3, RA 922 | mov RA, [RC+4] 923 | mov RC, [RC] 924 | mov [RB+4], RA 925 | mov [RB], RC 926 | mov ARG2, RB 927 |.endif 928 | jmp ->BC_CAT_Z 929 | 930 |//-- Table indexing metamethods ----------------------------------------- 931 | 932 |->vmeta_tgets: 933 | mov TMP1, RC // RC = GCstr * 934 | mov TMP2, LJ_TSTR 935 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. 936 | cmp PC_OP, BC_GGET 937 | jne >1 938 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 939 | mov [RA], TAB:RB // RB = GCtab * 940 | mov dword [RA+4], LJ_TTAB 941 | mov RB, RA 942 | jmp >2 943 | 944 |->vmeta_tgetb: 945 | movzx RC, PC_RC 946 |.if DUALNUM 947 | mov TMP2, LJ_TISNUM 948 | mov TMP1, RC 949 |.else 950 | cvtsi2sd xmm0, RC 951 | movsd TMPQ, xmm0 952 |.endif 953 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 954 | jmp >1 955 | 956 |->vmeta_tgetv: 957 | movzx RC, PC_RC // Reload TValue *k from RC. 958 | lea RC, [BASE+RC*8] 959 |1: 960 | movzx RB, PC_RB // Reload TValue *t from RB. 961 | lea RB, [BASE+RB*8] 962 |2: 963 |.if X64 964 | mov L:CARG1d, SAVE_L 965 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 966 | mov CARG2d, RB 967 | mov CARG3, RCa // May be 64 bit ptr to stack. 968 | mov L:RB, L:CARG1d 969 |.else 970 | mov ARG2, RB 971 | mov L:RB, SAVE_L 972 | mov ARG3, RC 973 | mov ARG1, L:RB 974 | mov L:RB->base, BASE 975 |.endif 976 | mov SAVE_PC, PC 977 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 978 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 979 | mov BASE, L:RB->base 980 | test RC, RC 981 | jz >3 982 |->cont_ra: // BASE = base, RC = result 983 | movzx RA, PC_RA 984 |.if X64 985 | mov RBa, [RC] 986 | mov [BASE+RA*8], RBa 987 |.else 988 | mov RB, [RC+4] 989 | mov RC, [RC] 990 | mov [BASE+RA*8+4], RB 991 | mov [BASE+RA*8], RC 992 |.endif 993 | ins_next 994 | 995 |3: // Call __index metamethod. 996 | // BASE = base, L->top = new base, stack = cont/func/t/k 997 | mov RA, L:RB->top 998 | mov [RA-12], PC // [cont|PC] 999 | lea PC, [RA+FRAME_CONT] 1000 | sub PC, BASE 1001 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 1002 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 1003 | jmp ->vm_call_dispatch_f 1004 | 1005 |->vmeta_tgetr: 1006 | mov FCARG1, TAB:RB 1007 | mov RB, BASE // Save BASE. 1008 | mov FCARG2, RC // Caveat: FCARG2 == BASE 1009 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) 1010 | // cTValue * or NULL returned in eax (RC). 1011 | movzx RA, PC_RA 1012 | mov BASE, RB // Restore BASE. 1013 | test RC, RC 1014 | jnz ->BC_TGETR_Z 1015 | mov dword [BASE+RA*8+4], LJ_TNIL 1016 | jmp ->BC_TGETR2_Z 1017 | 1018 |//----------------------------------------------------------------------- 1019 | 1020 |->vmeta_tsets: 1021 | mov TMP1, RC // RC = GCstr * 1022 | mov TMP2, LJ_TSTR 1023 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. 1024 | cmp PC_OP, BC_GSET 1025 | jne >1 1026 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 1027 | mov [RA], TAB:RB // RB = GCtab * 1028 | mov dword [RA+4], LJ_TTAB 1029 | mov RB, RA 1030 | jmp >2 1031 | 1032 |->vmeta_tsetb: 1033 | movzx RC, PC_RC 1034 |.if DUALNUM 1035 | mov TMP2, LJ_TISNUM 1036 | mov TMP1, RC 1037 |.else 1038 | cvtsi2sd xmm0, RC 1039 | movsd TMPQ, xmm0 1040 |.endif 1041 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1042 | jmp >1 1043 | 1044 |->vmeta_tsetv: 1045 | movzx RC, PC_RC // Reload TValue *k from RC. 1046 | lea RC, [BASE+RC*8] 1047 |1: 1048 | movzx RB, PC_RB // Reload TValue *t from RB. 1049 | lea RB, [BASE+RB*8] 1050 |2: 1051 |.if X64 1052 | mov L:CARG1d, SAVE_L 1053 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1054 | mov CARG2d, RB 1055 | mov CARG3, RCa // May be 64 bit ptr to stack. 1056 | mov L:RB, L:CARG1d 1057 |.else 1058 | mov ARG2, RB 1059 | mov L:RB, SAVE_L 1060 | mov ARG3, RC 1061 | mov ARG1, L:RB 1062 | mov L:RB->base, BASE 1063 |.endif 1064 | mov SAVE_PC, PC 1065 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1066 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 1067 | mov BASE, L:RB->base 1068 | test RC, RC 1069 | jz >3 1070 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1071 | movzx RA, PC_RA 1072 |.if X64 1073 | mov RBa, [BASE+RA*8] 1074 | mov [RC], RBa 1075 |.else 1076 | mov RB, [BASE+RA*8+4] 1077 | mov RA, [BASE+RA*8] 1078 | mov [RC+4], RB 1079 | mov [RC], RA 1080 |.endif 1081 |->cont_nop: // BASE = base, (RC = result) 1082 | ins_next 1083 | 1084 |3: // Call __newindex metamethod. 1085 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 1086 | mov RA, L:RB->top 1087 | mov [RA-12], PC // [cont|PC] 1088 | movzx RC, PC_RA 1089 | // Copy value to third argument. 1090 |.if X64 1091 | mov RBa, [BASE+RC*8] 1092 | mov [RA+16], RBa 1093 |.else 1094 | mov RB, [BASE+RC*8+4] 1095 | mov RC, [BASE+RC*8] 1096 | mov [RA+20], RB 1097 | mov [RA+16], RC 1098 |.endif 1099 | lea PC, [RA+FRAME_CONT] 1100 | sub PC, BASE 1101 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 1102 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1103 | jmp ->vm_call_dispatch_f 1104 | 1105 |->vmeta_tsetr: 1106 |.if X64WIN 1107 | mov L:CARG1d, SAVE_L 1108 | mov CARG3d, RC 1109 | mov L:CARG1d->base, BASE 1110 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. 1111 |.elif X64 1112 | mov L:CARG1d, SAVE_L 1113 | mov CARG2d, TAB:RB 1114 | mov L:CARG1d->base, BASE 1115 | mov RB, BASE // Save BASE. 1116 | mov CARG3d, RC // Caveat: CARG3d == BASE. 1117 |.else 1118 | mov L:RA, SAVE_L 1119 | mov ARG2, TAB:RB 1120 | mov RB, BASE // Save BASE. 1121 | mov ARG3, RC 1122 | mov ARG1, L:RA 1123 | mov L:RA->base, BASE 1124 |.endif 1125 | mov SAVE_PC, PC 1126 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 1127 | // TValue * returned in eax (RC). 1128 | movzx RA, PC_RA 1129 | mov BASE, RB // Restore BASE. 1130 | jmp ->BC_TSETR_Z 1131 | 1132 |//-- Comparison metamethods --------------------------------------------- 1133 | 1134 |->vmeta_comp: 1135 |.if X64 1136 | mov L:RB, SAVE_L 1137 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. 1138 |.if X64WIN 1139 | lea CARG3d, [BASE+RD*8] 1140 | lea CARG2d, [BASE+RA*8] 1141 |.else 1142 | lea CARG2d, [BASE+RA*8] 1143 | lea CARG3d, [BASE+RD*8] 1144 |.endif 1145 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. 1146 | movzx CARG4d, PC_OP 1147 |.else 1148 | movzx RB, PC_OP 1149 | lea RD, [BASE+RD*8] 1150 | lea RA, [BASE+RA*8] 1151 | mov ARG4, RB 1152 | mov L:RB, SAVE_L 1153 | mov ARG3, RD 1154 | mov ARG2, RA 1155 | mov ARG1, L:RB 1156 | mov L:RB->base, BASE 1157 |.endif 1158 | mov SAVE_PC, PC 1159 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 1160 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1161 |3: 1162 | mov BASE, L:RB->base 1163 | cmp RC, 1 1164 | ja ->vmeta_binop 1165 |4: 1166 | lea PC, [PC+4] 1167 | jb >6 1168 |5: 1169 | movzx RD, PC_RD 1170 | branchPC RD 1171 |6: 1172 | ins_next 1173 | 1174 |->cont_condt: // BASE = base, RC = result 1175 | add PC, 4 1176 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. 1177 | jb <5 1178 | jmp <6 1179 | 1180 |->cont_condf: // BASE = base, RC = result 1181 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. 1182 | jmp <4 1183 | 1184 |->vmeta_equal: 1185 | sub PC, 4 1186 |.if X64WIN 1187 | mov CARG3d, RD 1188 | mov CARG4d, RB 1189 | mov L:RB, SAVE_L 1190 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. 1191 | mov CARG2d, RA 1192 | mov CARG1d, L:RB // Caveat: CARG1d == RA. 1193 |.elif X64 1194 | mov CARG2d, RA 1195 | mov CARG4d, RB // Caveat: CARG4d == RA. 1196 | mov L:RB, SAVE_L 1197 | mov L:RB->base, BASE // Caveat: CARG3d == BASE. 1198 | mov CARG3d, RD 1199 | mov CARG1d, L:RB 1200 |.else 1201 | mov ARG4, RB 1202 | mov L:RB, SAVE_L 1203 | mov ARG3, RD 1204 | mov ARG2, RA 1205 | mov ARG1, L:RB 1206 | mov L:RB->base, BASE 1207 |.endif 1208 | mov SAVE_PC, PC 1209 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1210 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1211 | jmp <3 1212 | 1213 |->vmeta_equal_cd: 1214 |.if FFI 1215 | sub PC, 4 1216 | mov L:RB, SAVE_L 1217 | mov L:RB->base, BASE 1218 | mov FCARG1, L:RB 1219 | mov FCARG2, dword [PC-4] 1220 | mov SAVE_PC, PC 1221 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) 1222 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1223 | jmp <3 1224 |.endif 1225 | 1226 |->vmeta_istype: 1227 |.if X64 1228 | mov L:RB, SAVE_L 1229 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1230 | mov CARG2d, RA 1231 | movzx CARG3d, PC_RD 1232 | mov L:CARG1d, L:RB 1233 |.else 1234 | movzx RD, PC_RD 1235 | mov ARG2, RA 1236 | mov L:RB, SAVE_L 1237 | mov ARG3, RD 1238 | mov ARG1, L:RB 1239 | mov L:RB->base, BASE 1240 |.endif 1241 | mov SAVE_PC, PC 1242 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 1243 | mov BASE, L:RB->base 1244 | jmp <6 1245 | 1246 |//-- Arithmetic metamethods --------------------------------------------- 1247 | 1248 |->vmeta_arith_vno: 1249 |.if DUALNUM 1250 | movzx RB, PC_RB 1251 |.endif 1252 |->vmeta_arith_vn: 1253 | lea RC, [KBASE+RC*8] 1254 | jmp >1 1255 | 1256 |->vmeta_arith_nvo: 1257 |.if DUALNUM 1258 | movzx RC, PC_RC 1259 |.endif 1260 |->vmeta_arith_nv: 1261 | lea RC, [KBASE+RC*8] 1262 | lea RB, [BASE+RB*8] 1263 | xchg RB, RC 1264 | jmp >2 1265 | 1266 |->vmeta_unm: 1267 | lea RC, [BASE+RD*8] 1268 | mov RB, RC 1269 | jmp >2 1270 | 1271 |->vmeta_arith_vvo: 1272 |.if DUALNUM 1273 | movzx RB, PC_RB 1274 |.endif 1275 |->vmeta_arith_vv: 1276 | lea RC, [BASE+RC*8] 1277 |1: 1278 | lea RB, [BASE+RB*8] 1279 |2: 1280 | lea RA, [BASE+RA*8] 1281 |.if X64WIN 1282 | mov CARG3d, RB 1283 | mov CARG4d, RC 1284 | movzx RC, PC_OP 1285 | mov ARG5d, RC 1286 | mov L:RB, SAVE_L 1287 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. 1288 | mov CARG2d, RA 1289 | mov CARG1d, L:RB // Caveat: CARG1d == RA. 1290 |.elif X64 1291 | movzx CARG5d, PC_OP 1292 | mov CARG2d, RA 1293 | mov CARG4d, RC // Caveat: CARG4d == RA. 1294 | mov L:CARG1d, SAVE_L 1295 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. 1296 | mov CARG3d, RB 1297 | mov L:RB, L:CARG1d 1298 |.else 1299 | mov ARG3, RB 1300 | mov L:RB, SAVE_L 1301 | mov ARG4, RC 1302 | movzx RC, PC_OP 1303 | mov ARG2, RA 1304 | mov ARG5, RC 1305 | mov ARG1, L:RB 1306 | mov L:RB->base, BASE 1307 |.endif 1308 | mov SAVE_PC, PC 1309 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1310 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 1311 | mov BASE, L:RB->base 1312 | test RC, RC 1313 | jz ->cont_nop 1314 | 1315 | // Call metamethod for binary op. 1316 |->vmeta_binop: 1317 | // BASE = base, RC = new base, stack = cont/func/o1/o2 1318 | mov RA, RC 1319 | sub RC, BASE 1320 | mov [RA-12], PC // [cont|PC] 1321 | lea PC, [RC+FRAME_CONT] 1322 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). 1323 | jmp ->vm_call_dispatch 1324 | 1325 |->vmeta_len: 1326 | mov L:RB, SAVE_L 1327 | mov L:RB->base, BASE 1328 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE 1329 | mov L:FCARG1, L:RB 1330 | mov SAVE_PC, PC 1331 | call extern lj_meta_len@8 // (lua_State *L, TValue *o) 1332 | // NULL (retry) or TValue * (metamethod) returned in eax (RC). 1333 | mov BASE, L:RB->base 1334#if LJ_52 1335 | test RC, RC 1336 | jne ->vmeta_binop // Binop call for compatibility. 1337 | movzx RD, PC_RD 1338 | mov TAB:FCARG1, [BASE+RD*8] 1339 | jmp ->BC_LEN_Z 1340#else 1341 | jmp ->vmeta_binop // Binop call for compatibility. 1342#endif 1343 | 1344 |//-- Call metamethod ---------------------------------------------------- 1345 | 1346 |->vmeta_call_ra: 1347 | lea RA, [BASE+RA*8+8] 1348 |->vmeta_call: // Resolve and call __call metamethod. 1349 | // BASE = old base, RA = new base, RC = nargs+1, PC = return 1350 | mov TMP2, RA // Save RA, RC for us. 1351 | mov TMP1, NARGS:RD 1352 | sub RA, 8 1353 |.if X64 1354 | mov L:RB, SAVE_L 1355 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1356 | mov CARG2d, RA 1357 | lea CARG3d, [RA+NARGS:RD*8] 1358 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 1359 |.else 1360 | lea RC, [RA+NARGS:RD*8] 1361 | mov L:RB, SAVE_L 1362 | mov ARG2, RA 1363 | mov ARG3, RC 1364 | mov ARG1, L:RB 1365 | mov L:RB->base, BASE // This is the callers base! 1366 |.endif 1367 | mov SAVE_PC, PC 1368 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1369 | mov BASE, L:RB->base 1370 | mov RA, TMP2 1371 | mov NARGS:RD, TMP1 1372 | mov LFUNC:RB, [RA-8] 1373 | add NARGS:RD, 1 1374 | // This is fragile. L->base must not move, KBASE must always be defined. 1375 |.if x64 1376 | cmp KBASEa, rdx // Continue with CALLT if flag set. 1377 |.else 1378 | cmp KBASE, BASE // Continue with CALLT if flag set. 1379 |.endif 1380 | je ->BC_CALLT_Z 1381 | mov BASE, RA 1382 | ins_call // Otherwise call resolved metamethod. 1383 | 1384 |//-- Argument coercion for 'for' statement ------------------------------ 1385 | 1386 |->vmeta_for: 1387 | mov L:RB, SAVE_L 1388 | mov L:RB->base, BASE 1389 | mov FCARG2, RA // Caveat: FCARG2 == BASE 1390 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 1391 | mov SAVE_PC, PC 1392 | call extern lj_meta_for@8 // (lua_State *L, TValue *base) 1393 | mov BASE, L:RB->base 1394 | mov RC, [PC-4] 1395 | movzx RA, RCH 1396 | movzx OP, RCL 1397 | shr RC, 16 1398 |.if X64 1399 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. 1400 |.else 1401 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. 1402 |.endif 1403 | 1404 |//----------------------------------------------------------------------- 1405 |//-- Fast functions ----------------------------------------------------- 1406 |//----------------------------------------------------------------------- 1407 | 1408 |.macro .ffunc, name 1409 |->ff_ .. name: 1410 |.endmacro 1411 | 1412 |.macro .ffunc_1, name 1413 |->ff_ .. name: 1414 | cmp NARGS:RD, 1+1; jb ->fff_fallback 1415 |.endmacro 1416 | 1417 |.macro .ffunc_2, name 1418 |->ff_ .. name: 1419 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1420 |.endmacro 1421 | 1422 |.macro .ffunc_nsse, name, op 1423 | .ffunc_1 name 1424 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1425 | op xmm0, qword [BASE] 1426 |.endmacro 1427 | 1428 |.macro .ffunc_nsse, name 1429 | .ffunc_nsse name, movsd 1430 |.endmacro 1431 | 1432 |.macro .ffunc_nnsse, name 1433 | .ffunc_2 name 1434 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1435 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback 1436 | movsd xmm0, qword [BASE] 1437 | movsd xmm1, qword [BASE+8] 1438 |.endmacro 1439 | 1440 |.macro .ffunc_nnr, name 1441 | .ffunc_2 name 1442 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1443 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback 1444 | fld qword [BASE+8] 1445 | fld qword [BASE] 1446 |.endmacro 1447 | 1448 |// Inlined GC threshold check. Caveat: uses label 1. 1449 |.macro ffgccheck 1450 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] 1451 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] 1452 | jb >1 1453 | call ->fff_gcstep 1454 |1: 1455 |.endmacro 1456 | 1457 |//-- Base library: checks ----------------------------------------------- 1458 | 1459 |.ffunc_1 assert 1460 | mov RB, [BASE+4] 1461 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback 1462 | mov PC, [BASE-4] 1463 | mov MULTRES, RD 1464 | mov [BASE-4], RB 1465 | mov RB, [BASE] 1466 | mov [BASE-8], RB 1467 | sub RD, 2 1468 | jz >2 1469 | mov RA, BASE 1470 |1: 1471 | add RA, 8 1472 |.if X64 1473 | mov RBa, [RA] 1474 | mov [RA-8], RBa 1475 |.else 1476 | mov RB, [RA+4] 1477 | mov [RA-4], RB 1478 | mov RB, [RA] 1479 | mov [RA-8], RB 1480 |.endif 1481 | sub RD, 1 1482 | jnz <1 1483 |2: 1484 | mov RD, MULTRES 1485 | jmp ->fff_res_ 1486 | 1487 |.ffunc_1 type 1488 | mov RB, [BASE+4] 1489 |.if X64 1490 | mov RA, RB 1491 | sar RA, 15 1492 | cmp RA, -2 1493 | je >3 1494 |.endif 1495 | mov RC, ~LJ_TNUMX 1496 | not RB 1497 | cmp RC, RB 1498 | cmova RC, RB 1499 |2: 1500 | mov CFUNC:RB, [BASE-8] 1501 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1502 | mov PC, [BASE-4] 1503 | mov dword [BASE-4], LJ_TSTR 1504 | mov [BASE-8], STR:RC 1505 | jmp ->fff_res1 1506 |.if X64 1507 |3: 1508 | mov RC, ~LJ_TLIGHTUD 1509 | jmp <2 1510 |.endif 1511 | 1512 |//-- Base library: getters and setters --------------------------------- 1513 | 1514 |.ffunc_1 getmetatable 1515 | mov RB, [BASE+4] 1516 | mov PC, [BASE-4] 1517 | cmp RB, LJ_TTAB; jne >6 1518 |1: // Field metatable must be at same offset for GCtab and GCudata! 1519 | mov TAB:RB, [BASE] 1520 | mov TAB:RB, TAB:RB->metatable 1521 |2: 1522 | test TAB:RB, TAB:RB 1523 | mov dword [BASE-4], LJ_TNIL 1524 | jz ->fff_res1 1525 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] 1526 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1527 | mov [BASE-8], TAB:RB 1528 | mov RA, TAB:RB->hmask 1529 | and RA, STR:RC->sid 1530 | imul RA, #NODE 1531 | add NODE:RA, TAB:RB->node 1532 |3: // Rearranged logic, because we expect _not_ to find the key. 1533 | cmp dword NODE:RA->key.it, LJ_TSTR 1534 | jne >4 1535 | cmp dword NODE:RA->key.gcr, STR:RC 1536 | je >5 1537 |4: 1538 | mov NODE:RA, NODE:RA->next 1539 | test NODE:RA, NODE:RA 1540 | jnz <3 1541 | jmp ->fff_res1 // Not found, keep default result. 1542 |5: 1543 | mov RB, [RA+4] 1544 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. 1545 | mov RC, [RA] 1546 | mov [BASE-4], RB // Return value of mt.__metatable. 1547 | mov [BASE-8], RC 1548 | jmp ->fff_res1 1549 | 1550 |6: 1551 | cmp RB, LJ_TUDATA; je <1 1552 |.if X64 1553 | cmp RB, LJ_TNUMX; ja >8 1554 | cmp RB, LJ_TISNUM; jbe >7 1555 | mov RB, LJ_TLIGHTUD 1556 | jmp >8 1557 |7: 1558 |.else 1559 | cmp RB, LJ_TISNUM; ja >8 1560 |.endif 1561 | mov RB, LJ_TNUMX 1562 |8: 1563 | not RB 1564 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] 1565 | jmp <2 1566 | 1567 |.ffunc_2 setmetatable 1568 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1569 | // Fast path: no mt for table yet and not clearing the mt. 1570 | mov TAB:RB, [BASE] 1571 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1572 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback 1573 | mov TAB:RC, [BASE+8] 1574 | mov TAB:RB->metatable, TAB:RC 1575 | mov PC, [BASE-4] 1576 | mov dword [BASE-4], LJ_TTAB // Return original table. 1577 | mov [BASE-8], TAB:RB 1578 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 1579 | jz >1 1580 | // Possible write barrier. Table is black, but skip iswhite(mt) check. 1581 | barrierback TAB:RB, RC 1582 |1: 1583 | jmp ->fff_res1 1584 | 1585 |.ffunc_2 rawget 1586 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1587 |.if X64WIN 1588 | mov RB, BASE // Save BASE. 1589 | lea CARG3d, [BASE+8] 1590 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1591 | mov CARG1d, SAVE_L 1592 |.elif X64 1593 | mov RB, BASE // Save BASE. 1594 | mov CARG2d, [BASE] 1595 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1596 | mov CARG1d, SAVE_L 1597 |.else 1598 | mov TAB:RD, [BASE] 1599 | mov L:RB, SAVE_L 1600 | mov ARG2, TAB:RD 1601 | mov ARG1, L:RB 1602 | mov RB, BASE // Save BASE. 1603 | add BASE, 8 1604 | mov ARG3, BASE 1605 |.endif 1606 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1607 | // cTValue * returned in eax (RD). 1608 | mov BASE, RB // Restore BASE. 1609 | // Copy table slot. 1610 |.if X64 1611 | mov RBa, [RD] 1612 | mov PC, [BASE-4] 1613 | mov [BASE-8], RBa 1614 |.else 1615 | mov RB, [RD] 1616 | mov RD, [RD+4] 1617 | mov PC, [BASE-4] 1618 | mov [BASE-8], RB 1619 | mov [BASE-4], RD 1620 |.endif 1621 | jmp ->fff_res1 1622 | 1623 |//-- Base library: conversions ------------------------------------------ 1624 | 1625 |.ffunc tonumber 1626 | // Only handles the number case inline (without a base argument). 1627 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 1628 | cmp dword [BASE+4], LJ_TISNUM 1629 |.if DUALNUM 1630 | jne >1 1631 | mov RB, dword [BASE]; jmp ->fff_resi 1632 |1: 1633 | ja ->fff_fallback 1634 |.else 1635 | jae ->fff_fallback 1636 |.endif 1637 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1638 | 1639 |.ffunc_1 tostring 1640 | // Only handles the string or number case inline. 1641 | mov PC, [BASE-4] 1642 | cmp dword [BASE+4], LJ_TSTR; jne >3 1643 | // A __tostring method in the string base metatable is ignored. 1644 | mov STR:RD, [BASE] 1645 |2: 1646 | mov dword [BASE-4], LJ_TSTR 1647 | mov [BASE-8], STR:RD 1648 | jmp ->fff_res1 1649 |3: // Handle numbers inline, unless a number base metatable is present. 1650 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1651 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 1652 | jne ->fff_fallback 1653 | ffgccheck // Caveat: uses label 1. 1654 | mov L:RB, SAVE_L 1655 | mov L:RB->base, BASE // Add frame since C call can throw. 1656 | mov SAVE_PC, PC // Redundant (but a defined value). 1657 |.if X64 and not X64WIN 1658 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE 1659 |.endif 1660 | mov L:FCARG1, L:RB 1661 |.if DUALNUM 1662 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) 1663 |.else 1664 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) 1665 |.endif 1666 | // GCstr returned in eax (RD). 1667 | mov BASE, L:RB->base 1668 | jmp <2 1669 | 1670 |//-- Base library: iterators ------------------------------------------- 1671 | 1672 |.ffunc_1 next 1673 | je >2 // Missing 2nd arg? 1674 |1: 1675 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1676 | mov PC, [BASE-4] 1677 | mov RB, BASE // Save BASE. 1678 |.if X64WIN 1679 | mov CARG1d, [BASE] 1680 | lea CARG3d, [BASE-8] 1681 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. 1682 |.elif X64 1683 | mov CARG1d, [BASE] 1684 | lea CARG2d, [BASE+8] 1685 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. 1686 |.else 1687 | mov TAB:RD, [BASE] 1688 | mov ARG1, TAB:RD 1689 | add BASE, 8 1690 | mov ARG2, BASE 1691 | sub BASE, 8+8 1692 | mov ARG3, BASE 1693 |.endif 1694 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) 1695 | // 1=found, 0=end, -1=error returned in eax (RD). 1696 | mov BASE, RB // Restore BASE. 1697 | test RD, RD; jg ->fff_res2 // Found key/value. 1698 | js ->fff_fallback_2 // Invalid key. 1699 | // End of traversal: return nil. 1700 | mov dword [BASE-4], LJ_TNIL 1701 | jmp ->fff_res1 1702 |2: // Set missing 2nd arg to nil. 1703 | mov dword [BASE+12], LJ_TNIL 1704 | jmp <1 1705 | 1706 |.ffunc_1 pairs 1707 | mov TAB:RB, [BASE] 1708 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1709#if LJ_52 1710 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1711#endif 1712 | mov CFUNC:RB, [BASE-8] 1713 | mov CFUNC:RD, CFUNC:RB->upvalue[0] 1714 | mov PC, [BASE-4] 1715 | mov dword [BASE-4], LJ_TFUNC 1716 | mov [BASE-8], CFUNC:RD 1717 | mov dword [BASE+12], LJ_TNIL 1718 | mov RD, 1+3 1719 | jmp ->fff_res 1720 | 1721 |.ffunc_2 ipairs_aux 1722 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1723 | cmp dword [BASE+12], LJ_TISNUM 1724 |.if DUALNUM 1725 | jne ->fff_fallback 1726 |.else 1727 | jae ->fff_fallback 1728 |.endif 1729 | mov PC, [BASE-4] 1730 |.if DUALNUM 1731 | mov RD, dword [BASE+8] 1732 | add RD, 1 1733 | mov dword [BASE-4], LJ_TISNUM 1734 | mov dword [BASE-8], RD 1735 |.else 1736 | movsd xmm0, qword [BASE+8] 1737 | sseconst_1 xmm1, RBa 1738 | addsd xmm0, xmm1 1739 | cvttsd2si RD, xmm0 1740 | movsd qword [BASE-8], xmm0 1741 |.endif 1742 | mov TAB:RB, [BASE] 1743 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1744 | shl RD, 3 1745 | add RD, TAB:RB->array 1746 |1: 1747 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 1748 | // Copy array slot. 1749 |.if X64 1750 | mov RBa, [RD] 1751 | mov [BASE], RBa 1752 |.else 1753 | mov RB, [RD] 1754 | mov RD, [RD+4] 1755 | mov [BASE], RB 1756 | mov [BASE+4], RD 1757 |.endif 1758 |->fff_res2: 1759 | mov RD, 1+2 1760 | jmp ->fff_res 1761 |2: // Check for empty hash part first. Otherwise call C function. 1762 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1763 | mov FCARG1, TAB:RB 1764 | mov RB, BASE // Save BASE. 1765 | mov FCARG2, RD // Caveat: FCARG2 == BASE 1766 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) 1767 | // cTValue * or NULL returned in eax (RD). 1768 | mov BASE, RB 1769 | test RD, RD 1770 | jnz <1 1771 |->fff_res0: 1772 | mov RD, 1+0 1773 | jmp ->fff_res 1774 | 1775 |.ffunc_1 ipairs 1776 | mov TAB:RB, [BASE] 1777 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1778#if LJ_52 1779 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1780#endif 1781 | mov CFUNC:RB, [BASE-8] 1782 | mov CFUNC:RD, CFUNC:RB->upvalue[0] 1783 | mov PC, [BASE-4] 1784 | mov dword [BASE-4], LJ_TFUNC 1785 | mov [BASE-8], CFUNC:RD 1786 |.if DUALNUM 1787 | mov dword [BASE+12], LJ_TISNUM 1788 | mov dword [BASE+8], 0 1789 |.else 1790 | xorps xmm0, xmm0 1791 | movsd qword [BASE+8], xmm0 1792 |.endif 1793 | mov RD, 1+3 1794 | jmp ->fff_res 1795 | 1796 |//-- Base library: catch errors ---------------------------------------- 1797 | 1798 |.ffunc_1 pcall 1799 | lea RA, [BASE+8] 1800 | sub NARGS:RD, 1 1801 | mov PC, 8+FRAME_PCALL 1802 |1: 1803 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] 1804 | shr RB, HOOK_ACTIVE_SHIFT 1805 | and RB, 1 1806 | add PC, RB // Remember active hook before pcall. 1807 | jmp ->vm_call_dispatch 1808 | 1809 |.ffunc_2 xpcall 1810 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback 1811 | mov RB, [BASE+4] // Swap function and traceback. 1812 | mov [BASE+12], RB 1813 | mov dword [BASE+4], LJ_TFUNC 1814 | mov LFUNC:RB, [BASE] 1815 | mov PC, [BASE+8] 1816 | mov [BASE+8], LFUNC:RB 1817 | mov [BASE], PC 1818 | lea RA, [BASE+16] 1819 | sub NARGS:RD, 2 1820 | mov PC, 16+FRAME_PCALL 1821 | jmp <1 1822 | 1823 |//-- Coroutine library -------------------------------------------------- 1824 | 1825 |.macro coroutine_resume_wrap, resume 1826 |.if resume 1827 |.ffunc_1 coroutine_resume 1828 | mov L:RB, [BASE] 1829 |.else 1830 |.ffunc coroutine_wrap_aux 1831 | mov CFUNC:RB, [BASE-8] 1832 | mov L:RB, CFUNC:RB->upvalue[0].gcr 1833 |.endif 1834 | mov PC, [BASE-4] 1835 | mov SAVE_PC, PC 1836 |.if X64 1837 | mov TMP1, L:RB 1838 |.else 1839 | mov ARG1, L:RB 1840 |.endif 1841 |.if resume 1842 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback 1843 |.endif 1844 | cmp aword L:RB->cframe, 0; jne ->fff_fallback 1845 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback 1846 | mov RA, L:RB->top 1847 | je >1 // Status != LUA_YIELD (i.e. 0)? 1848 | cmp RA, L:RB->base // Check for presence of initial func. 1849 | je ->fff_fallback 1850 |1: 1851 |.if resume 1852 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). 1853 |.else 1854 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). 1855 |.endif 1856 | cmp PC, L:RB->maxstack; ja ->fff_fallback 1857 | mov L:RB->top, PC 1858 | 1859 | mov L:RB, SAVE_L 1860 | mov L:RB->base, BASE 1861 |.if resume 1862 | add BASE, 8 // Keep resumed thread in stack for GC. 1863 |.endif 1864 | mov L:RB->top, BASE 1865 |.if resume 1866 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. 1867 |.else 1868 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. 1869 |.endif 1870 | sub RBa, PCa // Relative to PC. 1871 | 1872 | cmp PC, RA 1873 | je >3 1874 |2: // Move args to coroutine. 1875 |.if X64 1876 | mov RCa, [PC+RB] 1877 | mov [PC-8], RCa 1878 |.else 1879 | mov RC, [PC+RB+4] 1880 | mov [PC-4], RC 1881 | mov RC, [PC+RB] 1882 | mov [PC-8], RC 1883 |.endif 1884 | sub PC, 8 1885 | cmp PC, RA 1886 | jne <2 1887 |3: 1888 |.if X64 1889 | mov CARG2d, RA 1890 | mov CARG1d, TMP1 1891 |.else 1892 | mov ARG2, RA 1893 | xor RA, RA 1894 | mov ARG4, RA 1895 | mov ARG3, RA 1896 |.endif 1897 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1898 | 1899 | mov L:RB, SAVE_L 1900 |.if X64 1901 | mov L:PC, TMP1 1902 |.else 1903 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1904 |.endif 1905 | mov BASE, L:RB->base 1906 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 1907 | set_vmstate INTERP 1908 | 1909 | cmp eax, LUA_YIELD 1910 | ja >8 1911 |4: 1912 | mov RA, L:PC->base 1913 | mov KBASE, L:PC->top 1914 | mov L:PC->top, RA // Clear coroutine stack. 1915 | mov PC, KBASE 1916 | sub PC, RA 1917 | je >6 // No results? 1918 | lea RD, [BASE+PC] 1919 | shr PC, 3 1920 | cmp RD, L:RB->maxstack 1921 | ja >9 // Need to grow stack? 1922 | 1923 | mov RB, BASE 1924 | sub RBa, RAa 1925 |5: // Move results from coroutine. 1926 |.if X64 1927 | mov RDa, [RA] 1928 | mov [RA+RB], RDa 1929 |.else 1930 | mov RD, [RA] 1931 | mov [RA+RB], RD 1932 | mov RD, [RA+4] 1933 | mov [RA+RB+4], RD 1934 |.endif 1935 | add RA, 8 1936 | cmp RA, KBASE 1937 | jne <5 1938 |6: 1939 |.if resume 1940 | lea RD, [PC+2] // nresults+1 = 1 + true + results. 1941 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. 1942 |.else 1943 | lea RD, [PC+1] // nresults+1 = 1 + results. 1944 |.endif 1945 |7: 1946 | mov PC, SAVE_PC 1947 | mov MULTRES, RD 1948 |.if resume 1949 | mov RAa, -8 1950 |.else 1951 | xor RA, RA 1952 |.endif 1953 | test PC, FRAME_TYPE 1954 | jz ->BC_RET_Z 1955 | jmp ->vm_return 1956 | 1957 |8: // Coroutine returned with error (at co->top-1). 1958 |.if resume 1959 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. 1960 | mov RA, L:PC->top 1961 | sub RA, 8 1962 | mov L:PC->top, RA // Clear error from coroutine stack. 1963 | // Copy error message. 1964 |.if X64 1965 | mov RDa, [RA] 1966 | mov [BASE], RDa 1967 |.else 1968 | mov RD, [RA] 1969 | mov [BASE], RD 1970 | mov RD, [RA+4] 1971 | mov [BASE+4], RD 1972 |.endif 1973 | mov RD, 1+2 // nresults+1 = 1 + false + error. 1974 | jmp <7 1975 |.else 1976 | mov FCARG2, L:PC 1977 | mov FCARG1, L:RB 1978 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) 1979 | // Error function does not return. 1980 |.endif 1981 | 1982 |9: // Handle stack expansion on return from yield. 1983 |.if X64 1984 | mov L:RA, TMP1 1985 |.else 1986 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. 1987 |.endif 1988 | mov L:RA->top, KBASE // Undo coroutine stack clearing. 1989 | mov FCARG2, PC 1990 | mov FCARG1, L:RB 1991 | call extern lj_state_growstack@8 // (lua_State *L, int n) 1992 |.if X64 1993 | mov L:PC, TMP1 1994 |.else 1995 | mov L:PC, ARG1 1996 |.endif 1997 | mov BASE, L:RB->base 1998 | jmp <4 // Retry the stack move. 1999 |.endmacro 2000 | 2001 | coroutine_resume_wrap 1 // coroutine.resume 2002 | coroutine_resume_wrap 0 // coroutine.wrap 2003 | 2004 |.ffunc coroutine_yield 2005 | mov L:RB, SAVE_L 2006 | test aword L:RB->cframe, CFRAME_RESUME 2007 | jz ->fff_fallback 2008 | mov L:RB->base, BASE 2009 | lea RD, [BASE+NARGS:RD*8-8] 2010 | mov L:RB->top, RD 2011 | xor RD, RD 2012 | mov aword L:RB->cframe, RDa 2013 | mov al, LUA_YIELD 2014 | mov byte L:RB->status, al 2015 | jmp ->vm_leave_unw 2016 | 2017 |//-- Math library ------------------------------------------------------- 2018 | 2019 |.if not DUALNUM 2020 |->fff_resi: // Dummy. 2021 |.endif 2022 | 2023 |->fff_resn: 2024 | mov PC, [BASE-4] 2025 | fstp qword [BASE-8] 2026 | jmp ->fff_res1 2027 | 2028 | .ffunc_1 math_abs 2029 |.if DUALNUM 2030 | cmp dword [BASE+4], LJ_TISNUM; jne >2 2031 | mov RB, dword [BASE] 2032 | cmp RB, 0; jns ->fff_resi 2033 | neg RB; js >1 2034 |->fff_resbit: 2035 |->fff_resi: 2036 | mov PC, [BASE-4] 2037 | mov dword [BASE-4], LJ_TISNUM 2038 | mov dword [BASE-8], RB 2039 | jmp ->fff_res1 2040 |1: 2041 | mov PC, [BASE-4] 2042 | mov dword [BASE-4], 0x41e00000 // 2^31. 2043 | mov dword [BASE-8], 0 2044 | jmp ->fff_res1 2045 |2: 2046 | ja ->fff_fallback 2047 |.else 2048 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2049 |.endif 2050 | movsd xmm0, qword [BASE] 2051 | sseconst_abs xmm1, RDa 2052 | andps xmm0, xmm1 2053 |->fff_resxmm0: 2054 | mov PC, [BASE-4] 2055 | movsd qword [BASE-8], xmm0 2056 | // fallthrough 2057 | 2058 |->fff_res1: 2059 | mov RD, 1+1 2060 |->fff_res: 2061 | mov MULTRES, RD 2062 |->fff_res_: 2063 | test PC, FRAME_TYPE 2064 | jnz >7 2065 |5: 2066 | cmp PC_RB, RDL // More results expected? 2067 | ja >6 2068 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 2069 | movzx RA, PC_RA 2070 | not RAa // Note: ~RA = -(RA+1) 2071 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 2072 | ins_next 2073 | 2074 |6: // Fill up results with nil. 2075 | mov dword [BASE+RD*8-12], LJ_TNIL 2076 | add RD, 1 2077 | jmp <5 2078 | 2079 |7: // Non-standard return case. 2080 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2081 | jmp ->vm_return 2082 | 2083 |.if X64 2084 |.define fff_resfp, fff_resxmm0 2085 |.else 2086 |.define fff_resfp, fff_resn 2087 |.endif 2088 | 2089 |.macro math_round, func 2090 | .ffunc math_ .. func 2091 |.if DUALNUM 2092 | cmp dword [BASE+4], LJ_TISNUM; jne >1 2093 | mov RB, dword [BASE]; jmp ->fff_resi 2094 |1: 2095 | ja ->fff_fallback 2096 |.else 2097 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2098 |.endif 2099 | movsd xmm0, qword [BASE] 2100 | call ->vm_ .. func .. _sse 2101 |.if DUALNUM 2102 | cvttsd2si RB, xmm0 2103 | cmp RB, 0x80000000 2104 | jne ->fff_resi 2105 | cvtsi2sd xmm1, RB 2106 | ucomisd xmm0, xmm1 2107 | jp ->fff_resxmm0 2108 | je ->fff_resi 2109 |.endif 2110 | jmp ->fff_resxmm0 2111 |.endmacro 2112 | 2113 | math_round floor 2114 | math_round ceil 2115 | 2116 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2117 | 2118 |.ffunc math_log 2119 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2120 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2121 | movsd xmm0, qword [BASE] 2122 |.if not X64 2123 | movsd FPARG1, xmm0 2124 |.endif 2125 | mov RB, BASE 2126 | call extern log 2127 | mov BASE, RB 2128 | jmp ->fff_resfp 2129 | 2130 |.macro math_extern, func 2131 | .ffunc_nsse math_ .. func 2132 |.if not X64 2133 | movsd FPARG1, xmm0 2134 |.endif 2135 | mov RB, BASE 2136 | call extern func 2137 | mov BASE, RB 2138 | jmp ->fff_resfp 2139 |.endmacro 2140 | 2141 |.macro math_extern2, func 2142 | .ffunc_nnsse math_ .. func 2143 |.if not X64 2144 | movsd FPARG1, xmm0 2145 | movsd FPARG3, xmm1 2146 |.endif 2147 | mov RB, BASE 2148 | call extern func 2149 | mov BASE, RB 2150 | jmp ->fff_resfp 2151 |.endmacro 2152 | 2153 | math_extern log10 2154 | math_extern exp 2155 | math_extern sin 2156 | math_extern cos 2157 | math_extern tan 2158 | math_extern asin 2159 | math_extern acos 2160 | math_extern atan 2161 | math_extern sinh 2162 | math_extern cosh 2163 | math_extern tanh 2164 | math_extern2 pow 2165 | math_extern2 atan2 2166 | math_extern2 fmod 2167 | 2168 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2169 | 2170 |.ffunc_1 math_frexp 2171 | mov RB, [BASE+4] 2172 | cmp RB, LJ_TISNUM; jae ->fff_fallback 2173 | mov PC, [BASE-4] 2174 | mov RC, [BASE] 2175 | mov [BASE-4], RB; mov [BASE-8], RC 2176 | shl RB, 1; cmp RB, 0xffe00000; jae >3 2177 | or RC, RB; jz >3 2178 | mov RC, 1022 2179 | cmp RB, 0x00200000; jb >4 2180 |1: 2181 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2182 | cvtsi2sd xmm0, RB 2183 | mov RB, [BASE-4] 2184 | and RB, 0x800fffff // Mask off exponent. 2185 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2186 | mov [BASE-4], RB 2187 |2: 2188 | movsd qword [BASE], xmm0 2189 | mov RD, 1+2 2190 | jmp ->fff_res 2191 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2192 | xorps xmm0, xmm0; jmp <2 2193 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2194 | movsd xmm0, qword [BASE] 2195 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2196 | mulsd xmm0, xmm1 2197 | movsd qword [BASE-8], xmm0 2198 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2199 | 2200 |.ffunc_nsse math_modf 2201 | mov RB, [BASE+4] 2202 | mov PC, [BASE-4] 2203 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2204 | movaps xmm4, xmm0 2205 | call ->vm_trunc_sse 2206 | subsd xmm4, xmm0 2207 |1: 2208 | movsd qword [BASE-8], xmm0 2209 | movsd qword [BASE], xmm4 2210 | mov RC, [BASE-4]; mov RB, [BASE+4] 2211 | xor RC, RB; js >3 // Need to adjust sign? 2212 |2: 2213 | mov RD, 1+2 2214 | jmp ->fff_res 2215 |3: 2216 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2217 | jmp <2 2218 |4: 2219 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2220 | 2221 |.macro math_minmax, name, cmovop, sseop 2222 | .ffunc_1 name 2223 | mov RA, 2 2224 | cmp dword [BASE+4], LJ_TISNUM 2225 |.if DUALNUM 2226 | jne >4 2227 | mov RB, dword [BASE] 2228 |1: // Handle integers. 2229 | cmp RA, RD; jae ->fff_resi 2230 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 2231 | cmp RB, dword [BASE+RA*8-8] 2232 | cmovop RB, dword [BASE+RA*8-8] 2233 | add RA, 1 2234 | jmp <1 2235 |3: 2236 | ja ->fff_fallback 2237 | // Convert intermediate result to number and continue below. 2238 | cvtsi2sd xmm0, RB 2239 | jmp >6 2240 |4: 2241 | ja ->fff_fallback 2242 |.else 2243 | jae ->fff_fallback 2244 |.endif 2245 | 2246 | movsd xmm0, qword [BASE] 2247 |5: // Handle numbers or integers. 2248 | cmp RA, RD; jae ->fff_resxmm0 2249 | cmp dword [BASE+RA*8-4], LJ_TISNUM 2250 |.if DUALNUM 2251 | jb >6 2252 | ja ->fff_fallback 2253 | cvtsi2sd xmm1, dword [BASE+RA*8-8] 2254 | jmp >7 2255 |.else 2256 | jae ->fff_fallback 2257 |.endif 2258 |6: 2259 | movsd xmm1, qword [BASE+RA*8-8] 2260 |7: 2261 | sseop xmm0, xmm1 2262 | add RA, 1 2263 | jmp <5 2264 |.endmacro 2265 | 2266 | math_minmax math_min, cmovg, minsd 2267 | math_minmax math_max, cmovl, maxsd 2268 | 2269 |//-- String library ----------------------------------------------------- 2270 | 2271 |.ffunc string_byte // Only handle the 1-arg case here. 2272 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2273 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2274 | mov STR:RB, [BASE] 2275 | mov PC, [BASE-4] 2276 | cmp dword STR:RB->len, 1 2277 | jb ->fff_res0 // Return no results for empty string. 2278 | movzx RB, byte STR:RB[1] 2279 |.if DUALNUM 2280 | jmp ->fff_resi 2281 |.else 2282 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 2283 |.endif 2284 | 2285 |.ffunc string_char // Only handle the 1-arg case here. 2286 | ffgccheck 2287 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 2288 | cmp dword [BASE+4], LJ_TISNUM 2289 |.if DUALNUM 2290 | jne ->fff_fallback 2291 | mov RB, dword [BASE] 2292 | cmp RB, 255; ja ->fff_fallback 2293 | mov TMP2, RB 2294 |.else 2295 | jae ->fff_fallback 2296 | cvttsd2si RB, qword [BASE] 2297 | cmp RB, 255; ja ->fff_fallback 2298 | mov TMP2, RB 2299 |.endif 2300 |.if X64 2301 | mov TMP3, 1 2302 |.else 2303 | mov ARG3, 1 2304 |.endif 2305 | lea RDa, TMP2 // Points to stack. Little-endian. 2306 |->fff_newstr: 2307 | mov L:RB, SAVE_L 2308 | mov L:RB->base, BASE 2309 |.if X64 2310 | mov CARG3d, TMP3 // Zero-extended to size_t. 2311 | mov CARG2, RDa // May be 64 bit ptr to stack. 2312 | mov CARG1d, L:RB 2313 |.else 2314 | mov ARG2, RD 2315 | mov ARG1, L:RB 2316 |.endif 2317 | mov SAVE_PC, PC 2318 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2319 |->fff_resstr: 2320 | // GCstr * returned in eax (RD). 2321 | mov BASE, L:RB->base 2322 | mov PC, [BASE-4] 2323 | mov dword [BASE-4], LJ_TSTR 2324 | mov [BASE-8], STR:RD 2325 | jmp ->fff_res1 2326 | 2327 |.ffunc string_sub 2328 | ffgccheck 2329 | mov TMP2, -1 2330 | cmp NARGS:RD, 1+2; jb ->fff_fallback 2331 | jna >1 2332 | cmp dword [BASE+20], LJ_TISNUM 2333 |.if DUALNUM 2334 | jne ->fff_fallback 2335 | mov RB, dword [BASE+16] 2336 | mov TMP2, RB 2337 |.else 2338 | jae ->fff_fallback 2339 | cvttsd2si RB, qword [BASE+16] 2340 | mov TMP2, RB 2341 |.endif 2342 |1: 2343 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2344 | cmp dword [BASE+12], LJ_TISNUM 2345 |.if DUALNUM 2346 | jne ->fff_fallback 2347 |.else 2348 | jae ->fff_fallback 2349 |.endif 2350 | mov STR:RB, [BASE] 2351 | mov TMP3, STR:RB 2352 | mov RB, STR:RB->len 2353 |.if DUALNUM 2354 | mov RA, dword [BASE+8] 2355 |.else 2356 | cvttsd2si RA, qword [BASE+8] 2357 |.endif 2358 | mov RC, TMP2 2359 | cmp RB, RC // len < end? (unsigned compare) 2360 | jb >5 2361 |2: 2362 | test RA, RA // start <= 0? 2363 | jle >7 2364 |3: 2365 | mov STR:RB, TMP3 2366 | sub RC, RA // start > end? 2367 | jl ->fff_emptystr 2368 | lea RB, [STR:RB+RA+#STR-1] 2369 | add RC, 1 2370 |4: 2371 |.if X64 2372 | mov TMP3, RC 2373 |.else 2374 | mov ARG3, RC 2375 |.endif 2376 | mov RD, RB 2377 | jmp ->fff_newstr 2378 | 2379 |5: // Negative end or overflow. 2380 | jl >6 2381 | lea RC, [RC+RB+1] // end = end+(len+1) 2382 | jmp <2 2383 |6: // Overflow. 2384 | mov RC, RB // end = len 2385 | jmp <2 2386 | 2387 |7: // Negative start or underflow. 2388 | je >8 2389 | add RA, RB // start = start+(len+1) 2390 | add RA, 1 2391 | jg <3 // start > 0? 2392 |8: // Underflow. 2393 | mov RA, 1 // start = 1 2394 | jmp <3 2395 | 2396 |->fff_emptystr: // Range underflow. 2397 | xor RC, RC // Zero length. Any ptr in RB is ok. 2398 | jmp <4 2399 | 2400 |.macro ffstring_op, name 2401 | .ffunc_1 string_ .. name 2402 | ffgccheck 2403 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2404 | mov L:RB, SAVE_L 2405 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] 2406 | mov L:RB->base, BASE 2407 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE 2408 | mov RCa, SBUF:FCARG1->b 2409 | mov SBUF:FCARG1->L, L:RB 2410 | mov SBUF:FCARG1->w, RCa 2411 | mov SAVE_PC, PC 2412 | call extern lj_buf_putstr_ .. name .. @8 2413 | mov FCARG1, eax 2414 | call extern lj_buf_tostr@4 2415 | jmp ->fff_resstr 2416 |.endmacro 2417 | 2418 |ffstring_op reverse 2419 |ffstring_op lower 2420 |ffstring_op upper 2421 | 2422 |//-- Bit library -------------------------------------------------------- 2423 | 2424 |.macro .ffunc_bit, name, kind, fdef 2425 | fdef name 2426 |.if kind == 2 2427 | sseconst_tobit xmm1, RBa 2428 |.endif 2429 | cmp dword [BASE+4], LJ_TISNUM 2430 |.if DUALNUM 2431 | jne >1 2432 | mov RB, dword [BASE] 2433 |.if kind > 0 2434 | jmp >2 2435 |.else 2436 | jmp ->fff_resbit 2437 |.endif 2438 |1: 2439 | ja ->fff_fallback 2440 |.else 2441 | jae ->fff_fallback 2442 |.endif 2443 | movsd xmm0, qword [BASE] 2444 |.if kind < 2 2445 | sseconst_tobit xmm1, RBa 2446 |.endif 2447 | addsd xmm0, xmm1 2448 | movd RB, xmm0 2449 |2: 2450 |.endmacro 2451 | 2452 |.macro .ffunc_bit, name, kind 2453 | .ffunc_bit name, kind, .ffunc_1 2454 |.endmacro 2455 | 2456 |.ffunc_bit bit_tobit, 0 2457 | jmp ->fff_resbit 2458 | 2459 |.macro .ffunc_bit_op, name, ins 2460 | .ffunc_bit name, 2 2461 | mov TMP2, NARGS:RD // Save for fallback. 2462 | lea RD, [BASE+NARGS:RD*8-16] 2463 |1: 2464 | cmp RD, BASE 2465 | jbe ->fff_resbit 2466 | cmp dword [RD+4], LJ_TISNUM 2467 |.if DUALNUM 2468 | jne >2 2469 | ins RB, dword [RD] 2470 | sub RD, 8 2471 | jmp <1 2472 |2: 2473 | ja ->fff_fallback_bit_op 2474 |.else 2475 | jae ->fff_fallback_bit_op 2476 |.endif 2477 | movsd xmm0, qword [RD] 2478 | addsd xmm0, xmm1 2479 | movd RA, xmm0 2480 | ins RB, RA 2481 | sub RD, 8 2482 | jmp <1 2483 |.endmacro 2484 | 2485 |.ffunc_bit_op bit_band, and 2486 |.ffunc_bit_op bit_bor, or 2487 |.ffunc_bit_op bit_bxor, xor 2488 | 2489 |.ffunc_bit bit_bswap, 1 2490 | bswap RB 2491 | jmp ->fff_resbit 2492 | 2493 |.ffunc_bit bit_bnot, 1 2494 | not RB 2495 |.if DUALNUM 2496 | jmp ->fff_resbit 2497 |.else 2498 |->fff_resbit: 2499 | cvtsi2sd xmm0, RB 2500 | jmp ->fff_resxmm0 2501 |.endif 2502 | 2503 |->fff_fallback_bit_op: 2504 | mov NARGS:RD, TMP2 // Restore for fallback 2505 | jmp ->fff_fallback 2506 | 2507 |.macro .ffunc_bit_sh, name, ins 2508 |.if DUALNUM 2509 | .ffunc_bit name, 1, .ffunc_2 2510 | // Note: no inline conversion from number for 2nd argument! 2511 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2512 | mov RA, dword [BASE+8] 2513 |.else 2514 | .ffunc_nnsse name 2515 | sseconst_tobit xmm2, RBa 2516 | addsd xmm0, xmm2 2517 | addsd xmm1, xmm2 2518 | movd RB, xmm0 2519 | movd RA, xmm1 2520 |.endif 2521 | ins RB, cl // Assumes RA is ecx. 2522 | jmp ->fff_resbit 2523 |.endmacro 2524 | 2525 |.ffunc_bit_sh bit_lshift, shl 2526 |.ffunc_bit_sh bit_rshift, shr 2527 |.ffunc_bit_sh bit_arshift, sar 2528 |.ffunc_bit_sh bit_rol, rol 2529 |.ffunc_bit_sh bit_ror, ror 2530 | 2531 |//----------------------------------------------------------------------- 2532 | 2533 |->fff_fallback_2: 2534 | mov NARGS:RD, 1+2 // Other args are ignored, anyway. 2535 | jmp ->fff_fallback 2536 |->fff_fallback_1: 2537 | mov NARGS:RD, 1+1 // Other args are ignored, anyway. 2538 |->fff_fallback: // Call fast function fallback handler. 2539 | // BASE = new base, RD = nargs+1 2540 | mov L:RB, SAVE_L 2541 | mov PC, [BASE-4] // Fallback may overwrite PC. 2542 | mov SAVE_PC, PC // Redundant (but a defined value). 2543 | mov L:RB->base, BASE 2544 | lea RD, [BASE+NARGS:RD*8-8] 2545 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. 2546 | mov L:RB->top, RD 2547 | mov CFUNC:RD, [BASE-8] 2548 | cmp RA, L:RB->maxstack 2549 | ja >5 // Need to grow stack. 2550 |.if X64 2551 | mov CARG1d, L:RB 2552 |.else 2553 | mov ARG1, L:RB 2554 |.endif 2555 | call aword CFUNC:RD->f // (lua_State *L) 2556 | mov BASE, L:RB->base 2557 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2558 | test RD, RD; jg ->fff_res // Returned nresults+1? 2559 |1: 2560 | mov RA, L:RB->top 2561 | sub RA, BASE 2562 | shr RA, 3 2563 | test RD, RD 2564 | lea NARGS:RD, [RA+1] 2565 | mov LFUNC:RB, [BASE-8] 2566 | jne ->vm_call_tail // Returned -1? 2567 | ins_callt // Returned 0: retry fast path. 2568 | 2569 |// Reconstruct previous base for vmeta_call during tailcall. 2570 |->vm_call_tail: 2571 | mov RA, BASE 2572 | test PC, FRAME_TYPE 2573 | jnz >3 2574 | movzx RB, PC_RA 2575 | not RBa // Note: ~RB = -(RB+1) 2576 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 2577 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2578 |3: 2579 | mov RB, PC 2580 | and RB, -8 2581 | sub BASE, RB 2582 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2583 | 2584 |5: // Grow stack for fallback handler. 2585 | mov FCARG2, LUA_MINSTACK 2586 | mov FCARG1, L:RB 2587 | call extern lj_state_growstack@8 // (lua_State *L, int n) 2588 | mov BASE, L:RB->base 2589 | xor RD, RD // Simulate a return 0. 2590 | jmp <1 // Dumb retry (goes through ff first). 2591 | 2592 |->fff_gcstep: // Call GC step function. 2593 | // BASE = new base, RD = nargs+1 2594 | pop RBa // Must keep stack at same level. 2595 | mov TMPa, RBa // Save return address 2596 | mov L:RB, SAVE_L 2597 | mov SAVE_PC, PC // Redundant (but a defined value). 2598 | mov L:RB->base, BASE 2599 | lea RD, [BASE+NARGS:RD*8-8] 2600 | mov FCARG1, L:RB 2601 | mov L:RB->top, RD 2602 | call extern lj_gc_step@4 // (lua_State *L) 2603 | mov BASE, L:RB->base 2604 | mov RD, L:RB->top 2605 | sub RD, BASE 2606 | shr RD, 3 2607 | add NARGS:RD, 1 2608 | mov RBa, TMPa 2609 | push RBa // Restore return address. 2610 | ret 2611 | 2612 |//----------------------------------------------------------------------- 2613 |//-- Special dispatch targets ------------------------------------------- 2614 |//----------------------------------------------------------------------- 2615 | 2616 |->vm_record: // Dispatch target for recording phase. 2617 |.if JIT 2618 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2619 | test RDL, HOOK_VMEVENT // No recording while in vmevent. 2620 | jnz >5 2621 | // Decrement the hookcount for consistency, but always do the call. 2622 | test RDL, HOOK_ACTIVE 2623 | jnz >1 2624 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2625 | jz >1 2626 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2627 | jmp >1 2628 |.endif 2629 | 2630 |->vm_rethook: // Dispatch target for return hooks. 2631 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2632 | test RDL, HOOK_ACTIVE // Hook already active? 2633 | jnz >5 2634 | jmp >1 2635 | 2636 |->vm_inshook: // Dispatch target for instr/line hooks. 2637 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2638 | test RDL, HOOK_ACTIVE // Hook already active? 2639 | jnz >5 2640 | 2641 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2642 | jz >5 2643 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2644 | jz >1 2645 | test RDL, LUA_MASKLINE 2646 | jz >5 2647 |1: 2648 | mov L:RB, SAVE_L 2649 | mov L:RB->base, BASE 2650 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2651 | mov FCARG1, L:RB 2652 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2653 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) 2654 |3: 2655 | mov BASE, L:RB->base 2656 |4: 2657 | movzx RA, PC_RA 2658 |5: 2659 | movzx OP, PC_OP 2660 | movzx RD, PC_RD 2661 |.if X64 2662 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. 2663 |.else 2664 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. 2665 |.endif 2666 | 2667 |->cont_hook: // Continue from hook yield. 2668 | add PC, 4 2669 | mov RA, [RB-24] 2670 | mov MULTRES, RA // Restore MULTRES for *M ins. 2671 | jmp <4 2672 | 2673 |->vm_hotloop: // Hot loop counter underflow. 2674 |.if JIT 2675 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). 2676 | mov RB, LFUNC:RB->pc 2677 | movzx RD, byte [RB+PC2PROTO(framesize)] 2678 | lea RD, [BASE+RD*8] 2679 | mov L:RB, SAVE_L 2680 | mov L:RB->base, BASE 2681 | mov L:RB->top, RD 2682 | mov FCARG2, PC 2683 | lea FCARG1, [DISPATCH+GG_DISP2J] 2684 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2685 | mov SAVE_PC, PC 2686 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) 2687 | jmp <3 2688 |.endif 2689 | 2690 |->vm_callhook: // Dispatch target for call hooks. 2691 | mov SAVE_PC, PC 2692 |.if JIT 2693 | jmp >1 2694 |.endif 2695 | 2696 |->vm_hotcall: // Hot call counter underflow. 2697 |.if JIT 2698 | mov SAVE_PC, PC 2699 | or PC, 1 // Marker for hot call. 2700 |1: 2701 |.endif 2702 | lea RD, [BASE+NARGS:RD*8-8] 2703 | mov L:RB, SAVE_L 2704 | mov L:RB->base, BASE 2705 | mov L:RB->top, RD 2706 | mov FCARG2, PC 2707 | mov FCARG1, L:RB 2708 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) 2709 | // ASMFunction returned in eax/rax (RDa). 2710 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. 2711 |.if JIT 2712 | and PC, -2 2713 |.endif 2714 | mov BASE, L:RB->base 2715 | mov RAa, RDa 2716 | mov RD, L:RB->top 2717 | sub RD, BASE 2718 | mov RBa, RAa 2719 | movzx RA, PC_RA 2720 | shr RD, 3 2721 | add NARGS:RD, 1 2722 | jmp RBa 2723 | 2724 |->cont_stitch: // Trace stitching. 2725 |.if JIT 2726 | // BASE = base, RC = result, RB = mbase 2727 | mov TRACE:RA, [RB-24] // Save previous trace. 2728 | mov TMP1, TRACE:RA 2729 | mov TMP3, DISPATCH // Need one more register. 2730 | mov DISPATCH, MULTRES 2731 | movzx RA, PC_RA 2732 | lea RA, [BASE+RA*8] // Call base. 2733 | sub DISPATCH, 1 2734 | jz >2 2735 |1: // Move results down. 2736 |.if X64 2737 | mov RBa, [RC] 2738 | mov [RA], RBa 2739 |.else 2740 | mov RB, [RC] 2741 | mov [RA], RB 2742 | mov RB, [RC+4] 2743 | mov [RA+4], RB 2744 |.endif 2745 | add RC, 8 2746 | add RA, 8 2747 | sub DISPATCH, 1 2748 | jnz <1 2749 |2: 2750 | movzx RC, PC_RA 2751 | movzx RB, PC_RB 2752 | add RC, RB 2753 | lea RC, [BASE+RC*8-8] 2754 |3: 2755 | cmp RC, RA 2756 | ja >9 // More results wanted? 2757 | 2758 | mov DISPATCH, TMP3 2759 | mov TRACE:RD, TMP1 // Get previous trace. 2760 | movzx RB, word TRACE:RD->traceno 2761 | movzx RD, word TRACE:RD->link 2762 | cmp RD, RB 2763 | je ->cont_nop // Blacklisted. 2764 | test RD, RD 2765 | jne =>BC_JLOOP // Jump to stitched trace. 2766 | 2767 | // Stitch a new trace to the previous trace. 2768 | mov [DISPATCH+DISPATCH_J(exitno)], RB 2769 | mov L:RB, SAVE_L 2770 | mov L:RB->base, BASE 2771 | mov FCARG2, PC 2772 | lea FCARG1, [DISPATCH+GG_DISP2J] 2773 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2774 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) 2775 | mov BASE, L:RB->base 2776 | jmp ->cont_nop 2777 | 2778 |9: // Fill up results with nil. 2779 | mov dword [RA+4], LJ_TNIL 2780 | add RA, 8 2781 | jmp <3 2782 |.endif 2783 | 2784 |->vm_profhook: // Dispatch target for profiler hook. 2785#if LJ_HASPROFILE 2786 | mov L:RB, SAVE_L 2787 | mov L:RB->base, BASE 2788 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2789 | mov FCARG1, L:RB 2790 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) 2791 | mov BASE, L:RB->base 2792 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2793 | sub PC, 4 2794 | jmp ->cont_nop 2795#endif 2796 | 2797 |//----------------------------------------------------------------------- 2798 |//-- Trace exit handler ------------------------------------------------- 2799 |//----------------------------------------------------------------------- 2800 | 2801 |// Called from an exit stub with the exit number on the stack. 2802 |// The 16 bit exit number is stored with two (sign-extended) push imm8. 2803 |->vm_exit_handler: 2804 |.if JIT 2805 |.if X64 2806 | push r13; push r12 2807 | push r11; push r10; push r9; push r8 2808 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp 2809 | push rbx; push rdx; push rcx; push rax 2810 | movzx RC, byte [rbp-8] // Reconstruct exit number. 2811 | mov RCH, byte [rbp-16] 2812 | mov [rbp-8], r15; mov [rbp-16], r14 2813 |.else 2814 | push ebp; lea ebp, [esp+12]; push ebp 2815 | push ebx; push edx; push ecx; push eax 2816 | movzx RC, byte [ebp-4] // Reconstruct exit number. 2817 | mov RCH, byte [ebp-8] 2818 | mov [ebp-4], edi; mov [ebp-8], esi 2819 |.endif 2820 | // Caveat: DISPATCH is ebx. 2821 | mov DISPATCH, [ebp] 2822 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. 2823 | set_vmstate EXIT 2824 | mov [DISPATCH+DISPATCH_J(exitno)], RC 2825 | mov [DISPATCH+DISPATCH_J(parent)], RA 2826 |.if X64 2827 |.if X64WIN 2828 | sub rsp, 16*8+4*8 // Room for SSE regs + save area. 2829 |.else 2830 | sub rsp, 16*8 // Room for SSE regs. 2831 |.endif 2832 | add rbp, -128 2833 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 2834 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 2835 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 2836 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 2837 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 2838 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 2839 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 2840 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 2841 |.else 2842 | sub esp, 8*8+16 // Room for SSE regs + args. 2843 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 2844 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 2845 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 2846 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2847 |.endif 2848 | // Caveat: RB is ebp. 2849 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] 2850 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2851 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2852 | mov L:RB->base, BASE 2853 |.if X64WIN 2854 | lea CARG2, [rsp+4*8] 2855 |.elif X64 2856 | mov CARG2, rsp 2857 |.else 2858 | lea FCARG2, [esp+16] 2859 |.endif 2860 | lea FCARG1, [DISPATCH+GG_DISP2J] 2861 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 2862 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2863 | // MULTRES or negated error code returned in eax (RD). 2864 | mov RAa, L:RB->cframe 2865 | and RAa, CFRAME_RAWMASK 2866 |.if X64WIN 2867 | // Reposition stack later. 2868 |.elif X64 2869 | mov rsp, RAa // Reposition stack to C frame. 2870 |.else 2871 | mov esp, RAa // Reposition stack to C frame. 2872 |.endif 2873 | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). 2874 | mov BASE, L:RB->base 2875 | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. 2876 |.if X64 2877 | jmp >1 2878 |.endif 2879 |.endif 2880 |->vm_exit_interp: 2881 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2882 |.if JIT 2883 |.if X64 2884 | // Restore additional callee-save registers only used in compiled code. 2885 |.if X64WIN 2886 | lea RAa, [rsp+9*16+4*8] 2887 |1: 2888 | movdqa xmm15, [RAa-9*16] 2889 | movdqa xmm14, [RAa-8*16] 2890 | movdqa xmm13, [RAa-7*16] 2891 | movdqa xmm12, [RAa-6*16] 2892 | movdqa xmm11, [RAa-5*16] 2893 | movdqa xmm10, [RAa-4*16] 2894 | movdqa xmm9, [RAa-3*16] 2895 | movdqa xmm8, [RAa-2*16] 2896 | movdqa xmm7, [RAa-1*16] 2897 | mov rsp, RAa // Reposition stack to C frame. 2898 | movdqa xmm6, [RAa] 2899 | mov r15, CSAVE_3 2900 | mov r14, CSAVE_4 2901 |.else 2902 | add rsp, 16 // Reposition stack to C frame. 2903 |1: 2904 |.endif 2905 | mov r13, TMPa 2906 | mov r12, TMPQ 2907 |.endif 2908#ifdef LUA_USE_TRACE_LOGS 2909 |.if X64 2910 | mov FCARG1, SAVE_L 2911 | mov L:FCARG1->base, BASE 2912 | mov RB, RD // Save RD 2913 | mov TMP1, PC // Save PC 2914 | mov CARG3d, PC // CARG3d == BASE 2915 | mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)] 2916 | call extern lj_log_trace_direct_exit@8 2917 | mov PC, TMP1 2918 | mov RD, RB 2919 | mov RB, SAVE_L 2920 | mov BASE, L:RB->base 2921 |.endif 2922#endif 2923 | test RD, RD; js >9 // Check for error from exit. 2924 | mov L:RB, SAVE_L 2925 | mov MULTRES, RD 2926 | mov LFUNC:KBASE, [BASE-8] 2927 | mov KBASE, LFUNC:KBASE->pc 2928 | mov KBASE, [KBASE+PC2PROTO(k)] 2929 | mov L:RB->base, BASE 2930 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 2931 | set_vmstate INTERP 2932 | // Modified copy of ins_next which handles function header dispatch, too. 2933 | mov RC, [PC] 2934 | movzx RA, RCH 2935 | movzx OP, RCL 2936 | add PC, 4 2937 | shr RC, 16 2938 | cmp OP, BC_FUNCF // Function header? 2939 | jb >3 2940 | cmp OP, BC_FUNCC+2 // Fast function? 2941 | jae >4 2942 |2: 2943 | mov RC, MULTRES // RC/RD holds nres+1. 2944 |3: 2945 |.if X64 2946 | jmp aword [DISPATCH+OP*8] 2947 |.else 2948 | jmp aword [DISPATCH+OP*4] 2949 |.endif 2950 | 2951 |4: // Check frame below fast function. 2952 | mov RC, [BASE-4] 2953 | test RC, FRAME_TYPE 2954 | jnz <2 // Trace stitching continuation? 2955 | // Otherwise set KBASE for Lua function below fast function. 2956 | movzx RC, byte [RC-3] 2957 | not RCa 2958 | mov LFUNC:KBASE, [BASE+RC*8-8] 2959 | mov KBASE, LFUNC:KBASE->pc 2960 | mov KBASE, [KBASE+PC2PROTO(k)] 2961 | jmp <2 2962 | 2963 |9: // Rethrow error from the right C frame. 2964 | mov FCARG2, RD 2965 | mov FCARG1, L:RB 2966 | neg FCARG2 2967 | call extern lj_err_trace@8 // (lua_State *L, int errcode) 2968 |.endif 2969 | 2970 |//----------------------------------------------------------------------- 2971 |//-- Math helper functions ---------------------------------------------- 2972 |//----------------------------------------------------------------------- 2973 | 2974 |// FP value rounding. Called by math.floor/math.ceil fast functions 2975 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2976 |.macro vm_round, name, mode, cond 2977 |->name: 2978 |.if not X64 and cond 2979 | movsd xmm0, qword [esp+4] 2980 | call ->name .. _sse 2981 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. 2982 | fld qword [esp+4] 2983 | ret 2984 |.endif 2985 | 2986 |->name .. _sse: 2987 | sseconst_abs xmm2, RDa 2988 | sseconst_2p52 xmm3, RDa 2989 | movaps xmm1, xmm0 2990 | andpd xmm1, xmm2 // |x| 2991 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. 2992 | jbe >1 2993 | andnpd xmm2, xmm0 // Isolate sign bit. 2994 |.if mode == 2 // trunc(x)? 2995 | movaps xmm0, xmm1 2996 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2997 | subsd xmm1, xmm3 2998 | sseconst_1 xmm3, RDa 2999 | cmpsd xmm0, xmm1, 1 // |x| < result? 3000 | andpd xmm0, xmm3 3001 | subsd xmm1, xmm0 // If yes, subtract -1. 3002 | orpd xmm1, xmm2 // Merge sign bit back in. 3003 |.else 3004 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 3005 | subsd xmm1, xmm3 3006 | orpd xmm1, xmm2 // Merge sign bit back in. 3007 | .if mode == 1 // ceil(x)? 3008 | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. 3009 | cmpsd xmm0, xmm1, 6 // x > result? 3010 | .else // floor(x)? 3011 | sseconst_1 xmm2, RDa 3012 | cmpsd xmm0, xmm1, 1 // x < result? 3013 | .endif 3014 | andpd xmm0, xmm2 3015 | subsd xmm1, xmm0 // If yes, subtract +-1. 3016 |.endif 3017 | movaps xmm0, xmm1 3018 |1: 3019 | ret 3020 |.endmacro 3021 | 3022 | vm_round vm_floor, 0, 1 3023 | vm_round vm_ceil, 1, JIT 3024 | vm_round vm_trunc, 2, JIT 3025 | 3026 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3027 |->vm_mod: 3028 |// Args in xmm0/xmm1, return value in xmm0. 3029 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3030 | movaps xmm5, xmm0 3031 | divsd xmm0, xmm1 3032 | sseconst_abs xmm2, RDa 3033 | sseconst_2p52 xmm3, RDa 3034 | movaps xmm4, xmm0 3035 | andpd xmm4, xmm2 // |x/y| 3036 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. 3037 | jbe >1 3038 | andnpd xmm2, xmm0 // Isolate sign bit. 3039 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 3040 | subsd xmm4, xmm3 3041 | orpd xmm4, xmm2 // Merge sign bit back in. 3042 | sseconst_1 xmm2, RDa 3043 | cmpsd xmm0, xmm4, 1 // x/y < result? 3044 | andpd xmm0, xmm2 3045 | subsd xmm4, xmm0 // If yes, subtract 1.0. 3046 | movaps xmm0, xmm5 3047 | mulsd xmm1, xmm4 3048 | subsd xmm0, xmm1 3049 | ret 3050 |1: 3051 | mulsd xmm1, xmm0 3052 | movaps xmm0, xmm5 3053 | subsd xmm0, xmm1 3054 | ret 3055 | 3056 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3057 |->vm_powi_sse: 3058 | cmp eax, 1; jle >6 // i<=1? 3059 | // Now 1 < (unsigned)i <= 0x80000000. 3060 |1: // Handle leading zeros. 3061 | test eax, 1; jnz >2 3062 | mulsd xmm0, xmm0 3063 | shr eax, 1 3064 | jmp <1 3065 |2: 3066 | shr eax, 1; jz >5 3067 | movaps xmm1, xmm0 3068 |3: // Handle trailing bits. 3069 | mulsd xmm0, xmm0 3070 | shr eax, 1; jz >4 3071 | jnc <3 3072 | mulsd xmm1, xmm0 3073 | jmp <3 3074 |4: 3075 | mulsd xmm0, xmm1 3076 |5: 3077 | ret 3078 |6: 3079 | je <5 // x^1 ==> x 3080 | jb >7 // x^0 ==> 1 3081 | neg eax 3082 | call <1 3083 | sseconst_1 xmm1, RDa 3084 | divsd xmm1, xmm0 3085 | movaps xmm0, xmm1 3086 | ret 3087 |7: 3088 | sseconst_1 xmm0, RDa 3089 | ret 3090 | 3091 |//----------------------------------------------------------------------- 3092 |//-- Miscellaneous functions -------------------------------------------- 3093 |//----------------------------------------------------------------------- 3094 | 3095 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) 3096 |->vm_cpuid: 3097 |.if X64 3098 | mov eax, CARG1d 3099 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3100 | push rbx 3101 | xor ecx, ecx 3102 | cpuid 3103 | mov [rsi], eax 3104 | mov [rsi+4], ebx 3105 | mov [rsi+8], ecx 3106 | mov [rsi+12], edx 3107 | pop rbx 3108 | .if X64WIN; pop rsi; .endif 3109 | ret 3110 |.else 3111 | pushfd 3112 | pop edx 3113 | mov ecx, edx 3114 | xor edx, 0x00200000 // Toggle ID bit in flags. 3115 | push edx 3116 | popfd 3117 | pushfd 3118 | pop edx 3119 | xor eax, eax // Zero means no features supported. 3120 | cmp ecx, edx 3121 | jz >1 // No ID toggle means no CPUID support. 3122 | mov eax, [esp+4] // Argument 1 is function number. 3123 | push edi 3124 | push ebx 3125 | xor ecx, ecx 3126 | cpuid 3127 | mov edi, [esp+16] // Argument 2 is result area. 3128 | mov [edi], eax 3129 | mov [edi+4], ebx 3130 | mov [edi+8], ecx 3131 | mov [edi+12], edx 3132 | pop ebx 3133 | pop edi 3134 |1: 3135 | ret 3136 |.endif 3137 | 3138 |.define NEXT_TAB, TAB:FCARG1 3139 |.define NEXT_IDX, FCARG2 3140 |.define NEXT_PTR, RCa 3141 |.define NEXT_PTRd, RC 3142 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro 3143 |.if X64 3144 |.define NEXT_TMP, CARG3d 3145 |.define NEXT_TMPq, CARG3 3146 |.define NEXT_ASIZE, CARG4d 3147 |.macro NEXT_ENTER; .endmacro 3148 |.macro NEXT_LEAVE; ret; .endmacro 3149 |.if X64WIN 3150 |.define NEXT_RES_PTR, [rsp+aword*5] 3151 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro 3152 |.else 3153 |.define NEXT_RES_PTR, [rsp+aword*1] 3154 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro 3155 |.endif 3156 |.else 3157 |.define NEXT_ASIZE, esi 3158 |.define NEXT_TMP, edi 3159 |.macro NEXT_ENTER; push esi; push edi; .endmacro 3160 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro 3161 |.define NEXT_RES_PTR, [esp+dword*3] 3162 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro 3163 |.endif 3164 | 3165 |// TValue *lj_vm_next(GCtab *t, uint32_t idx) 3166 |// Next idx returned in edx. 3167 |->vm_next: 3168 |.if JIT 3169 | NEXT_ENTER 3170 | mov NEXT_ASIZE, NEXT_TAB->asize 3171 |1: // Traverse array part. 3172 | cmp NEXT_IDX, NEXT_ASIZE; jae >5 3173 | mov NEXT_TMP, NEXT_TAB->array 3174 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 3175 | lea NEXT_PTR, NEXT_RES_PTR 3176 |.if X64 3177 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] 3178 | mov qword [NEXT_PTR], NEXT_TMPq 3179 |.else 3180 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] 3181 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] 3182 | mov dword [NEXT_PTR+4], NEXT_ASIZE 3183 | mov dword [NEXT_PTR], NEXT_TMP 3184 |.endif 3185 |.if DUALNUM 3186 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM 3187 | mov dword [NEXT_PTR+dword*2], NEXT_IDX 3188 |.else 3189 | cvtsi2sd xmm0, NEXT_IDX 3190 | movsd qword [NEXT_PTR+dword*2], xmm0 3191 |.endif 3192 | NEXT_RES_IDX 1 3193 | NEXT_LEAVE 3194 |2: // Skip holes in array part. 3195 | add NEXT_IDX, 1 3196 | jmp <1 3197 | 3198 |5: // Traverse hash part. 3199 | sub NEXT_IDX, NEXT_ASIZE 3200 |6: 3201 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 3202 | imul NEXT_PTRd, NEXT_IDX, #NODE 3203 | add NODE:NEXT_PTRd, dword NEXT_TAB->node 3204 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 3205 | NEXT_RES_IDXL NEXT_ASIZE+1 3206 | NEXT_LEAVE 3207 |7: // Skip holes in hash part. 3208 | add NEXT_IDX, 1 3209 | jmp <6 3210 | 3211 |9: // End of iteration. Set the key to nil (not the value). 3212 | NEXT_RES_IDX NEXT_ASIZE 3213 | lea NEXT_PTR, NEXT_RES_PTR 3214 | mov dword [NEXT_PTR+dword*3], LJ_TNIL 3215 | NEXT_LEAVE 3216 |.endif 3217 | 3218 |//----------------------------------------------------------------------- 3219 |//-- Assertions --------------------------------------------------------- 3220 |//----------------------------------------------------------------------- 3221 | 3222 |->assert_bad_for_arg_type: 3223#ifdef LUA_USE_ASSERT 3224 | int3 3225#endif 3226 | int3 3227 | 3228 |//----------------------------------------------------------------------- 3229 |//-- FFI helper functions ----------------------------------------------- 3230 |//----------------------------------------------------------------------- 3231 | 3232 |// Handler for callback functions. Callback slot number in ah/al. 3233 |->vm_ffi_callback: 3234 |.if FFI 3235 |.type CTSTATE, CTState, PC 3236 |.if not X64 3237 | sub esp, 16 // Leave room for SAVE_ERRF etc. 3238 |.endif 3239 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. 3240 | lea DISPATCH, [ebp+GG_G2DISP] 3241 | mov CTSTATE, GL:ebp->ctype_state 3242 | movzx eax, ax 3243 | mov CTSTATE->cb.slot, eax 3244 |.if X64 3245 | mov CTSTATE->cb.gpr[0], CARG1 3246 | mov CTSTATE->cb.gpr[1], CARG2 3247 | mov CTSTATE->cb.gpr[2], CARG3 3248 | mov CTSTATE->cb.gpr[3], CARG4 3249 | movsd qword CTSTATE->cb.fpr[0], xmm0 3250 | movsd qword CTSTATE->cb.fpr[1], xmm1 3251 | movsd qword CTSTATE->cb.fpr[2], xmm2 3252 | movsd qword CTSTATE->cb.fpr[3], xmm3 3253 |.if X64WIN 3254 | lea rax, [rsp+CFRAME_SIZE+4*8] 3255 |.else 3256 | lea rax, [rsp+CFRAME_SIZE] 3257 | mov CTSTATE->cb.gpr[4], CARG5 3258 | mov CTSTATE->cb.gpr[5], CARG6 3259 | movsd qword CTSTATE->cb.fpr[4], xmm4 3260 | movsd qword CTSTATE->cb.fpr[5], xmm5 3261 | movsd qword CTSTATE->cb.fpr[6], xmm6 3262 | movsd qword CTSTATE->cb.fpr[7], xmm7 3263 |.endif 3264 | mov CTSTATE->cb.stack, rax 3265 | mov CARG2, rsp 3266 |.else 3267 | lea eax, [esp+CFRAME_SIZE+16] 3268 | mov CTSTATE->cb.gpr[0], FCARG1 3269 | mov CTSTATE->cb.gpr[1], FCARG2 3270 | mov CTSTATE->cb.stack, eax 3271 | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. 3272 | mov FCARG2, [esp+CFRAME_SIZE+8] 3273 | mov SAVE_RET, FCARG1 3274 | mov SAVE_R4, FCARG2 3275 | mov FCARG2, esp 3276 |.endif 3277 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. 3278 | mov FCARG1, CTSTATE 3279 | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) 3280 | // lua_State * returned in eax (RD). 3281 | set_vmstate INTERP 3282 | mov BASE, L:RD->base 3283 | mov RD, L:RD->top 3284 | sub RD, BASE 3285 | mov LFUNC:RB, [BASE-8] 3286 | shr RD, 3 3287 | add RD, 1 3288 | ins_callt 3289 |.endif 3290 | 3291 |->cont_ffi_callback: // Return from FFI callback. 3292 |.if FFI 3293 | mov L:RA, SAVE_L 3294 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] 3295 | mov aword CTSTATE->L, L:RAa 3296 | mov L:RA->base, BASE 3297 | mov L:RA->top, RB 3298 | mov FCARG1, CTSTATE 3299 | mov FCARG2, RC 3300 | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) 3301 |.if X64 3302 | mov rax, CTSTATE->cb.gpr[0] 3303 | movsd xmm0, qword CTSTATE->cb.fpr[0] 3304 | jmp ->vm_leave_unw 3305 |.else 3306 | mov L:RB, SAVE_L 3307 | mov eax, CTSTATE->cb.gpr[0] 3308 | mov edx, CTSTATE->cb.gpr[1] 3309 | cmp dword CTSTATE->cb.gpr[2], 1 3310 | jb >7 3311 | je >6 3312 | fld qword CTSTATE->cb.fpr[0].d 3313 | jmp >7 3314 |6: 3315 | fld dword CTSTATE->cb.fpr[0].f 3316 |7: 3317 | mov ecx, L:RB->top 3318 | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. 3319 | mov SAVE_L, ecx // Must be one slot above SAVE_RET 3320 | restoreregs 3321 | pop ecx // Move return addr from SAVE_RET. 3322 | add esp, [esp] // Adjust stack. 3323 | add esp, 16 3324 | push ecx 3325 | ret 3326 |.endif 3327 |.endif 3328 | 3329 |->vm_ffi_call@4: // Call C function via FFI. 3330 | // Caveat: needs special frame unwinding, see below. 3331 |.if FFI 3332 |.if X64 3333 | .type CCSTATE, CCallState, rbx 3334 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 3335 |.else 3336 | .type CCSTATE, CCallState, ebx 3337 | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 3338 |.endif 3339 | 3340 | // Readjust stack. 3341 |.if X64 3342 | mov eax, CCSTATE->spadj 3343 | sub rsp, rax 3344 |.else 3345 | sub esp, CCSTATE->spadj 3346 |.if WIN 3347 | mov CCSTATE->spadj, esp 3348 |.endif 3349 |.endif 3350 | 3351 | // Copy stack slots. 3352 | movzx ecx, byte CCSTATE->nsp 3353 | sub ecx, 1 3354 | js >2 3355 |1: 3356 |.if X64 3357 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 3358 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 3359 |.else 3360 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] 3361 | mov [esp+ecx*4], eax 3362 |.endif 3363 | sub ecx, 1 3364 | jns <1 3365 |2: 3366 | 3367 |.if X64 3368 | movzx eax, byte CCSTATE->nfpr 3369 | mov CARG1, CCSTATE->gpr[0] 3370 | mov CARG2, CCSTATE->gpr[1] 3371 | mov CARG3, CCSTATE->gpr[2] 3372 | mov CARG4, CCSTATE->gpr[3] 3373 |.if not X64WIN 3374 | mov CARG5, CCSTATE->gpr[4] 3375 | mov CARG6, CCSTATE->gpr[5] 3376 |.endif 3377 | test eax, eax; jz >5 3378 | movaps xmm0, CCSTATE->fpr[0] 3379 | movaps xmm1, CCSTATE->fpr[1] 3380 | movaps xmm2, CCSTATE->fpr[2] 3381 | movaps xmm3, CCSTATE->fpr[3] 3382 |.if not X64WIN 3383 | cmp eax, 4; jbe >5 3384 | movaps xmm4, CCSTATE->fpr[4] 3385 | movaps xmm5, CCSTATE->fpr[5] 3386 | movaps xmm6, CCSTATE->fpr[6] 3387 | movaps xmm7, CCSTATE->fpr[7] 3388 |.endif 3389 |5: 3390 |.else 3391 | mov FCARG1, CCSTATE->gpr[0] 3392 | mov FCARG2, CCSTATE->gpr[1] 3393 |.endif 3394 | 3395 | call aword CCSTATE->func 3396 | 3397 |.if X64 3398 | mov CCSTATE->gpr[0], rax 3399 | movaps CCSTATE->fpr[0], xmm0 3400 |.if not X64WIN 3401 | mov CCSTATE->gpr[1], rdx 3402 | movaps CCSTATE->fpr[1], xmm1 3403 |.endif 3404 |.else 3405 | mov CCSTATE->gpr[0], eax 3406 | mov CCSTATE->gpr[1], edx 3407 | cmp byte CCSTATE->resx87, 1 3408 | jb >7 3409 | je >6 3410 | fstp qword CCSTATE->fpr[0].d[0] 3411 | jmp >7 3412 |6: 3413 | fstp dword CCSTATE->fpr[0].f[0] 3414 |7: 3415 |.if WIN 3416 | sub CCSTATE->spadj, esp 3417 |.endif 3418 |.endif 3419 | 3420 |.if X64 3421 | mov rbx, [rbp-8]; leave; ret 3422 |.else 3423 | mov ebx, [ebp-4]; leave; ret 3424 |.endif 3425 |.endif 3426 |// Note: vm_ffi_call must be the last function in this object file! 3427 | 3428 |//----------------------------------------------------------------------- 3429} 3430 3431/* Generate the code for a single instruction. */ 3432static void build_ins(BuildCtx *ctx, BCOp op, int defop) 3433{ 3434 int vk = 0; 3435 |// Note: aligning all instructions does not pay off. 3436 |=>defop: 3437 3438 switch (op) { 3439 3440 /* -- Comparison ops ---------------------------------------------------- */ 3441 3442 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 3443 3444 |.macro jmp_comp, lt, ge, le, gt, target 3445 ||switch (op) { 3446 ||case BC_ISLT: 3447 | lt target 3448 ||break; 3449 ||case BC_ISGE: 3450 | ge target 3451 ||break; 3452 ||case BC_ISLE: 3453 | le target 3454 ||break; 3455 ||case BC_ISGT: 3456 | gt target 3457 ||break; 3458 ||default: break; /* Shut up GCC. */ 3459 ||} 3460 |.endmacro 3461 3462 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3463 | // RA = src1, RD = src2, JMP with RD = target 3464 | ins_AD 3465 |.if DUALNUM 3466 | checkint RA, >7 3467 | checkint RD, >8 3468 | mov RB, dword [BASE+RA*8] 3469 | add PC, 4 3470 | cmp RB, dword [BASE+RD*8] 3471 | jmp_comp jge, jl, jg, jle, >9 3472 |6: 3473 | movzx RD, PC_RD 3474 | branchPC RD 3475 |9: 3476 | ins_next 3477 | 3478 |7: // RA is not an integer. 3479 | ja ->vmeta_comp 3480 | // RA is a number. 3481 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3482 | // RA is a number, RD is an integer. 3483 | cvtsi2sd xmm0, dword [BASE+RD*8] 3484 | jmp >2 3485 | 3486 |8: // RA is an integer, RD is not an integer. 3487 | ja ->vmeta_comp 3488 | // RA is an integer, RD is a number. 3489 | cvtsi2sd xmm1, dword [BASE+RA*8] 3490 | movsd xmm0, qword [BASE+RD*8] 3491 | add PC, 4 3492 | ucomisd xmm0, xmm1 3493 | jmp_comp jbe, ja, jb, jae, <9 3494 | jmp <6 3495 |.else 3496 | checknum RA, ->vmeta_comp 3497 | checknum RD, ->vmeta_comp 3498 |.endif 3499 |1: 3500 | movsd xmm0, qword [BASE+RD*8] 3501 |2: 3502 | add PC, 4 3503 | ucomisd xmm0, qword [BASE+RA*8] 3504 |3: 3505 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3506 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3507 |.if DUALNUM 3508 | jmp_comp jbe, ja, jb, jae, <9 3509 | jmp <6 3510 |.else 3511 | jmp_comp jbe, ja, jb, jae, >1 3512 | movzx RD, PC_RD 3513 | branchPC RD 3514 |1: 3515 | ins_next 3516 |.endif 3517 break; 3518 3519 case BC_ISEQV: case BC_ISNEV: 3520 vk = op == BC_ISEQV; 3521 | ins_AD // RA = src1, RD = src2, JMP with RD = target 3522 | mov RB, [BASE+RD*8+4] 3523 | add PC, 4 3524 |.if DUALNUM 3525 | cmp RB, LJ_TISNUM; jne >7 3526 | checkint RA, >8 3527 | mov RB, dword [BASE+RD*8] 3528 | cmp RB, dword [BASE+RA*8] 3529 if (vk) { 3530 | jne >9 3531 } else { 3532 | je >9 3533 } 3534 | movzx RD, PC_RD 3535 | branchPC RD 3536 |9: 3537 | ins_next 3538 | 3539 |7: // RD is not an integer. 3540 | ja >5 3541 | // RD is a number. 3542 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3543 | // RD is a number, RA is an integer. 3544 | cvtsi2sd xmm0, dword [BASE+RA*8] 3545 | jmp >2 3546 | 3547 |8: // RD is an integer, RA is not an integer. 3548 | ja >5 3549 | // RD is an integer, RA is a number. 3550 | cvtsi2sd xmm0, dword [BASE+RD*8] 3551 | ucomisd xmm0, qword [BASE+RA*8] 3552 | jmp >4 3553 | 3554 |.else 3555 | cmp RB, LJ_TISNUM; jae >5 3556 | checknum RA, >5 3557 |.endif 3558 |1: 3559 | movsd xmm0, qword [BASE+RA*8] 3560 |2: 3561 | ucomisd xmm0, qword [BASE+RD*8] 3562 |4: 3563 iseqne_fp: 3564 if (vk) { 3565 | jp >2 // Unordered means not equal. 3566 | jne >2 3567 } else { 3568 | jp >2 // Unordered means not equal. 3569 | je >1 3570 } 3571 iseqne_end: 3572 if (vk) { 3573 |1: // EQ: Branch to the target. 3574 | movzx RD, PC_RD 3575 | branchPC RD 3576 |2: // NE: Fallthrough to next instruction. 3577 |.if not FFI 3578 |3: 3579 |.endif 3580 } else { 3581 |.if not FFI 3582 |3: 3583 |.endif 3584 |2: // NE: Branch to the target. 3585 | movzx RD, PC_RD 3586 | branchPC RD 3587 |1: // EQ: Fallthrough to next instruction. 3588 } 3589 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || 3590 op == BC_ISEQN || op == BC_ISNEN)) { 3591 | jmp <9 3592 } else { 3593 | ins_next 3594 } 3595 | 3596 if (op == BC_ISEQV || op == BC_ISNEV) { 3597 |5: // Either or both types are not numbers. 3598 |.if FFI 3599 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd 3600 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd 3601 |.endif 3602 | checktp RA, RB // Compare types. 3603 | jne <2 // Not the same type? 3604 | cmp RB, LJ_TISPRI 3605 | jae <1 // Same type and primitive type? 3606 | 3607 | // Same types and not a primitive type. Compare GCobj or pvalue. 3608 | mov RA, [BASE+RA*8] 3609 | mov RD, [BASE+RD*8] 3610 | cmp RA, RD 3611 | je <1 // Same GCobjs or pvalues? 3612 | cmp RB, LJ_TISTABUD 3613 | ja <2 // Different objects and not table/ud? 3614 |.if X64 3615 | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. 3616 | jb <2 3617 |.endif 3618 | 3619 | // Different tables or userdatas. Need to check __eq metamethod. 3620 | // Field metatable must be at same offset for GCtab and GCudata! 3621 | mov TAB:RB, TAB:RA->metatable 3622 | test TAB:RB, TAB:RB 3623 | jz <2 // No metatable? 3624 | test byte TAB:RB->nomm, 1<<MM_eq 3625 | jnz <2 // Or 'no __eq' flag set? 3626 if (vk) { 3627 | xor RB, RB // ne = 0 3628 } else { 3629 | mov RB, 1 // ne = 1 3630 } 3631 | jmp ->vmeta_equal // Handle __eq metamethod. 3632 } else { 3633 |.if FFI 3634 |3: 3635 | cmp RB, LJ_TCDATA 3636 if (LJ_DUALNUM && vk) { 3637 | jne <9 3638 } else { 3639 | jne <2 3640 } 3641 | jmp ->vmeta_equal_cd 3642 |.endif 3643 } 3644 break; 3645 case BC_ISEQS: case BC_ISNES: 3646 vk = op == BC_ISEQS; 3647 | ins_AND // RA = src, RD = str const, JMP with RD = target 3648 | mov RB, [BASE+RA*8+4] 3649 | add PC, 4 3650 | cmp RB, LJ_TSTR; jne >3 3651 | mov RA, [BASE+RA*8] 3652 | cmp RA, [KBASE+RD*4] 3653 iseqne_test: 3654 if (vk) { 3655 | jne >2 3656 } else { 3657 | je >1 3658 } 3659 goto iseqne_end; 3660 case BC_ISEQN: case BC_ISNEN: 3661 vk = op == BC_ISEQN; 3662 | ins_AD // RA = src, RD = num const, JMP with RD = target 3663 | mov RB, [BASE+RA*8+4] 3664 | add PC, 4 3665 |.if DUALNUM 3666 | cmp RB, LJ_TISNUM; jne >7 3667 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 3668 | mov RB, dword [KBASE+RD*8] 3669 | cmp RB, dword [BASE+RA*8] 3670 if (vk) { 3671 | jne >9 3672 } else { 3673 | je >9 3674 } 3675 | movzx RD, PC_RD 3676 | branchPC RD 3677 |9: 3678 | ins_next 3679 | 3680 |7: // RA is not an integer. 3681 | ja >3 3682 | // RA is a number. 3683 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3684 | // RA is a number, RD is an integer. 3685 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3686 | jmp >2 3687 | 3688 |8: // RA is an integer, RD is a number. 3689 | cvtsi2sd xmm0, dword [BASE+RA*8] 3690 | ucomisd xmm0, qword [KBASE+RD*8] 3691 | jmp >4 3692 |.else 3693 | cmp RB, LJ_TISNUM; jae >3 3694 |.endif 3695 |1: 3696 | movsd xmm0, qword [KBASE+RD*8] 3697 |2: 3698 | ucomisd xmm0, qword [BASE+RA*8] 3699 |4: 3700 goto iseqne_fp; 3701 case BC_ISEQP: case BC_ISNEP: 3702 vk = op == BC_ISEQP; 3703 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target 3704 | mov RB, [BASE+RA*8+4] 3705 | add PC, 4 3706 | cmp RB, RD 3707 if (!LJ_HASFFI) goto iseqne_test; 3708 if (vk) { 3709 | jne >3 3710 | movzx RD, PC_RD 3711 | branchPC RD 3712 |2: 3713 | ins_next 3714 |3: 3715 | cmp RB, LJ_TCDATA; jne <2 3716 | jmp ->vmeta_equal_cd 3717 } else { 3718 | je >2 3719 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd 3720 | movzx RD, PC_RD 3721 | branchPC RD 3722 |2: 3723 | ins_next 3724 } 3725 break; 3726 3727 /* -- Unary test and copy ops ------------------------------------------- */ 3728 3729 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3730 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target 3731 | mov RB, [BASE+RD*8+4] 3732 | add PC, 4 3733 | cmp RB, LJ_TISTRUECOND 3734 if (op == BC_IST || op == BC_ISTC) { 3735 | jae >1 3736 } else { 3737 | jb >1 3738 } 3739 if (op == BC_ISTC || op == BC_ISFC) { 3740 | mov [BASE+RA*8+4], RB 3741 | mov RB, [BASE+RD*8] 3742 | mov [BASE+RA*8], RB 3743 } 3744 | movzx RD, PC_RD 3745 | branchPC RD 3746 |1: // Fallthrough to the next instruction. 3747 | ins_next 3748 break; 3749 3750 case BC_ISTYPE: 3751 | ins_AD // RA = src, RD = -type 3752 | add RD, [BASE+RA*8+4] 3753 | jne ->vmeta_istype 3754 | ins_next 3755 break; 3756 case BC_ISNUM: 3757 | ins_AD // RA = src, RD = -(TISNUM-1) 3758 | checknum RA, ->vmeta_istype 3759 | ins_next 3760 break; 3761 3762 /* -- Unary ops --------------------------------------------------------- */ 3763 3764 case BC_MOV: 3765 | ins_AD // RA = dst, RD = src 3766 |.if X64 3767 | mov RBa, [BASE+RD*8] 3768 | mov [BASE+RA*8], RBa 3769 |.else 3770 | mov RB, [BASE+RD*8+4] 3771 | mov RD, [BASE+RD*8] 3772 | mov [BASE+RA*8+4], RB 3773 | mov [BASE+RA*8], RD 3774 |.endif 3775 | ins_next_ 3776 break; 3777 case BC_NOT: 3778 | ins_AD // RA = dst, RD = src 3779 | xor RB, RB 3780 | checktp RD, LJ_TISTRUECOND 3781 | adc RB, LJ_TTRUE 3782 | mov [BASE+RA*8+4], RB 3783 | ins_next 3784 break; 3785 case BC_UNM: 3786 | ins_AD // RA = dst, RD = src 3787 |.if DUALNUM 3788 | checkint RD, >5 3789 | mov RB, [BASE+RD*8] 3790 | neg RB 3791 | jo >4 3792 | mov dword [BASE+RA*8+4], LJ_TISNUM 3793 | mov dword [BASE+RA*8], RB 3794 |9: 3795 | ins_next 3796 |4: 3797 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. 3798 | mov dword [BASE+RA*8], 0 3799 | jmp <9 3800 |5: 3801 | ja ->vmeta_unm 3802 |.else 3803 | checknum RD, ->vmeta_unm 3804 |.endif 3805 | movsd xmm0, qword [BASE+RD*8] 3806 | sseconst_sign xmm1, RDa 3807 | xorps xmm0, xmm1 3808 | movsd qword [BASE+RA*8], xmm0 3809 |.if DUALNUM 3810 | jmp <9 3811 |.else 3812 | ins_next 3813 |.endif 3814 break; 3815 case BC_LEN: 3816 | ins_AD // RA = dst, RD = src 3817 | checkstr RD, >2 3818 | mov STR:RD, [BASE+RD*8] 3819 |.if DUALNUM 3820 | mov RD, dword STR:RD->len 3821 |1: 3822 | mov dword [BASE+RA*8+4], LJ_TISNUM 3823 | mov dword [BASE+RA*8], RD 3824 |.else 3825 | xorps xmm0, xmm0 3826 | cvtsi2sd xmm0, dword STR:RD->len 3827 |1: 3828 | movsd qword [BASE+RA*8], xmm0 3829 |.endif 3830 | ins_next 3831 |2: 3832 | checktab RD, ->vmeta_len 3833 | mov TAB:FCARG1, [BASE+RD*8] 3834#if LJ_52 3835 | mov TAB:RB, TAB:FCARG1->metatable 3836 | cmp TAB:RB, 0 3837 | jnz >9 3838 |3: 3839#endif 3840 |->BC_LEN_Z: 3841 | mov RB, BASE // Save BASE. 3842 | call extern lj_tab_len@4 // (GCtab *t) 3843 | // Length of table returned in eax (RD). 3844 |.if DUALNUM 3845 | // Nothing to do. 3846 |.else 3847 | cvtsi2sd xmm0, RD 3848 |.endif 3849 | mov BASE, RB // Restore BASE. 3850 | movzx RA, PC_RA 3851 | jmp <1 3852#if LJ_52 3853 |9: // Check for __len. 3854 | test byte TAB:RB->nomm, 1<<MM_len 3855 | jnz <3 3856 | jmp ->vmeta_len // 'no __len' flag NOT set: check. 3857#endif 3858 break; 3859 3860 /* -- Binary ops -------------------------------------------------------- */ 3861 3862 |.macro ins_arithpre, sseins, ssereg 3863 | ins_ABC 3864 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3865 ||switch (vk) { 3866 ||case 0: 3867 | checknum RB, ->vmeta_arith_vn 3868 | .if DUALNUM 3869 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3870 | .endif 3871 | movsd xmm0, qword [BASE+RB*8] 3872 | sseins ssereg, qword [KBASE+RC*8] 3873 || break; 3874 ||case 1: 3875 | checknum RB, ->vmeta_arith_nv 3876 | .if DUALNUM 3877 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3878 | .endif 3879 | movsd xmm0, qword [KBASE+RC*8] 3880 | sseins ssereg, qword [BASE+RB*8] 3881 || break; 3882 ||default: 3883 | checknum RB, ->vmeta_arith_vv 3884 | checknum RC, ->vmeta_arith_vv 3885 | movsd xmm0, qword [BASE+RB*8] 3886 | sseins ssereg, qword [BASE+RC*8] 3887 || break; 3888 ||} 3889 |.endmacro 3890 | 3891 |.macro ins_arithdn, intins 3892 | ins_ABC 3893 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3894 ||switch (vk) { 3895 ||case 0: 3896 | checkint RB, ->vmeta_arith_vn 3897 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn 3898 | mov RB, [BASE+RB*8] 3899 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno 3900 || break; 3901 ||case 1: 3902 | checkint RB, ->vmeta_arith_nv 3903 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv 3904 | mov RC, [KBASE+RC*8] 3905 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo 3906 || break; 3907 ||default: 3908 | checkint RB, ->vmeta_arith_vv 3909 | checkint RC, ->vmeta_arith_vv 3910 | mov RB, [BASE+RB*8] 3911 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo 3912 || break; 3913 ||} 3914 | mov dword [BASE+RA*8+4], LJ_TISNUM 3915 ||if (vk == 1) { 3916 | mov dword [BASE+RA*8], RC 3917 ||} else { 3918 | mov dword [BASE+RA*8], RB 3919 ||} 3920 | ins_next 3921 |.endmacro 3922 | 3923 |.macro ins_arithpost 3924 | movsd qword [BASE+RA*8], xmm0 3925 |.endmacro 3926 | 3927 |.macro ins_arith, sseins 3928 | ins_arithpre sseins, xmm0 3929 | ins_arithpost 3930 | ins_next 3931 |.endmacro 3932 | 3933 |.macro ins_arith, intins, sseins 3934 |.if DUALNUM 3935 | ins_arithdn intins 3936 |.else 3937 | ins_arith, sseins 3938 |.endif 3939 |.endmacro 3940 3941 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3942 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3943 | ins_arith add, addsd 3944 break; 3945 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3946 | ins_arith sub, subsd 3947 break; 3948 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3949 | ins_arith imul, mulsd 3950 break; 3951 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3952 | ins_arith divsd 3953 break; 3954 case BC_MODVN: 3955 | ins_arithpre movsd, xmm1 3956 |->BC_MODVN_Z: 3957 | call ->vm_mod 3958 | ins_arithpost 3959 | ins_next 3960 break; 3961 case BC_MODNV: case BC_MODVV: 3962 | ins_arithpre movsd, xmm1 3963 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3964 break; 3965 case BC_POW: 3966 | ins_arithpre movsd, xmm1 3967 | mov RB, BASE 3968 |.if not X64 3969 | movsd FPARG1, xmm0 3970 | movsd FPARG3, xmm1 3971 |.endif 3972 | call extern pow 3973 | movzx RA, PC_RA 3974 | mov BASE, RB 3975 |.if X64 3976 | ins_arithpost 3977 |.else 3978 | fstp qword [BASE+RA*8] 3979 |.endif 3980 | ins_next 3981 break; 3982 3983 case BC_CAT: 3984 | ins_ABC // RA = dst, RB = src_start, RC = src_end 3985 |.if X64 3986 | mov L:CARG1d, SAVE_L 3987 | mov L:CARG1d->base, BASE 3988 | lea CARG2d, [BASE+RC*8] 3989 | mov CARG3d, RC 3990 | sub CARG3d, RB 3991 |->BC_CAT_Z: 3992 | mov L:RB, L:CARG1d 3993 |.else 3994 | lea RA, [BASE+RC*8] 3995 | sub RC, RB 3996 | mov ARG2, RA 3997 | mov ARG3, RC 3998 |->BC_CAT_Z: 3999 | mov L:RB, SAVE_L 4000 | mov ARG1, L:RB 4001 | mov L:RB->base, BASE 4002 |.endif 4003 | mov SAVE_PC, PC 4004 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) 4005 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 4006 | mov BASE, L:RB->base 4007 | test RC, RC 4008 | jnz ->vmeta_binop 4009 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. 4010 | movzx RA, PC_RA 4011 |.if X64 4012 | mov RCa, [BASE+RB*8] 4013 | mov [BASE+RA*8], RCa 4014 |.else 4015 | mov RC, [BASE+RB*8+4] 4016 | mov RB, [BASE+RB*8] 4017 | mov [BASE+RA*8+4], RC 4018 | mov [BASE+RA*8], RB 4019 |.endif 4020 | ins_next 4021 break; 4022 4023 /* -- Constant ops ------------------------------------------------------ */ 4024 4025 case BC_KSTR: 4026 | ins_AND // RA = dst, RD = str const (~) 4027 | mov RD, [KBASE+RD*4] 4028 | mov dword [BASE+RA*8+4], LJ_TSTR 4029 | mov [BASE+RA*8], RD 4030 | ins_next 4031 break; 4032 case BC_KCDATA: 4033 |.if FFI 4034 | ins_AND // RA = dst, RD = cdata const (~) 4035 | mov RD, [KBASE+RD*4] 4036 | mov dword [BASE+RA*8+4], LJ_TCDATA 4037 | mov [BASE+RA*8], RD 4038 | ins_next 4039 |.endif 4040 break; 4041 case BC_KSHORT: 4042 | ins_AD // RA = dst, RD = signed int16 literal 4043 |.if DUALNUM 4044 | movsx RD, RDW 4045 | mov dword [BASE+RA*8+4], LJ_TISNUM 4046 | mov dword [BASE+RA*8], RD 4047 |.else 4048 | movsx RD, RDW // Sign-extend literal. 4049 | cvtsi2sd xmm0, RD 4050 | movsd qword [BASE+RA*8], xmm0 4051 |.endif 4052 | ins_next 4053 break; 4054 case BC_KNUM: 4055 | ins_AD // RA = dst, RD = num const 4056 | movsd xmm0, qword [KBASE+RD*8] 4057 | movsd qword [BASE+RA*8], xmm0 4058 | ins_next 4059 break; 4060 case BC_KPRI: 4061 | ins_AND // RA = dst, RD = primitive type (~) 4062 | mov [BASE+RA*8+4], RD 4063 | ins_next 4064 break; 4065 case BC_KNIL: 4066 | ins_AD // RA = dst_start, RD = dst_end 4067 | lea RA, [BASE+RA*8+12] 4068 | lea RD, [BASE+RD*8+4] 4069 | mov RB, LJ_TNIL 4070 | mov [RA-8], RB // Sets minimum 2 slots. 4071 |1: 4072 | mov [RA], RB 4073 | add RA, 8 4074 | cmp RA, RD 4075 | jbe <1 4076 | ins_next 4077 break; 4078 4079 /* -- Upvalue and function ops ------------------------------------------ */ 4080 4081 case BC_UGET: 4082 | ins_AD // RA = dst, RD = upvalue # 4083 | mov LFUNC:RB, [BASE-8] 4084 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] 4085 | mov RB, UPVAL:RB->v 4086 |.if X64 4087 | mov RDa, [RB] 4088 | mov [BASE+RA*8], RDa 4089 |.else 4090 | mov RD, [RB+4] 4091 | mov RB, [RB] 4092 | mov [BASE+RA*8+4], RD 4093 | mov [BASE+RA*8], RB 4094 |.endif 4095 | ins_next 4096 break; 4097 case BC_USETV: 4098#define TV2MARKOFS \ 4099 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) 4100 | ins_AD // RA = upvalue #, RD = src 4101 | mov LFUNC:RB, [BASE-8] 4102 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4103 | cmp byte UPVAL:RB->closed, 0 4104 | mov RB, UPVAL:RB->v 4105 | mov RA, [BASE+RD*8] 4106 | mov RD, [BASE+RD*8+4] 4107 | mov [RB], RA 4108 | mov [RB+4], RD 4109 | jz >1 4110 | // Check barrier for closed upvalue. 4111 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) 4112 | jnz >2 4113 |1: 4114 | ins_next 4115 | 4116 |2: // Upvalue is black. Check if new value is collectable and white. 4117 | sub RD, LJ_TISGCV 4118 | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) 4119 | jbe <1 4120 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) 4121 | jz <1 4122 | // Crossed a write barrier. Move the barrier forward. 4123 |.if X64 and not X64WIN 4124 | mov FCARG2, RB 4125 | mov RB, BASE // Save BASE. 4126 |.else 4127 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). 4128 |.endif 4129 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] 4130 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) 4131 | mov BASE, RB // Restore BASE. 4132 | jmp <1 4133 break; 4134#undef TV2MARKOFS 4135 case BC_USETS: 4136 | ins_AND // RA = upvalue #, RD = str const (~) 4137 | mov LFUNC:RB, [BASE-8] 4138 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4139 | mov GCOBJ:RA, [KBASE+RD*4] 4140 | mov RD, UPVAL:RB->v 4141 | mov [RD], GCOBJ:RA 4142 | mov dword [RD+4], LJ_TSTR 4143 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 4144 | jnz >2 4145 |1: 4146 | ins_next 4147 | 4148 |2: // Check if string is white and ensure upvalue is closed. 4149 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) 4150 | jz <1 4151 | cmp byte UPVAL:RB->closed, 0 4152 | jz <1 4153 | // Crossed a write barrier. Move the barrier forward. 4154 | mov RB, BASE // Save BASE (FCARG2 == BASE). 4155 | mov FCARG2, RD 4156 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] 4157 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) 4158 | mov BASE, RB // Restore BASE. 4159 | jmp <1 4160 break; 4161 case BC_USETN: 4162 | ins_AD // RA = upvalue #, RD = num const 4163 | mov LFUNC:RB, [BASE-8] 4164 | movsd xmm0, qword [KBASE+RD*8] 4165 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4166 | mov RA, UPVAL:RB->v 4167 | movsd qword [RA], xmm0 4168 | ins_next 4169 break; 4170 case BC_USETP: 4171 | ins_AND // RA = upvalue #, RD = primitive type (~) 4172 | mov LFUNC:RB, [BASE-8] 4173 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4174 | mov RA, UPVAL:RB->v 4175 | mov [RA+4], RD 4176 | ins_next 4177 break; 4178 case BC_UCLO: 4179 | ins_AD // RA = level, RD = target 4180 | branchPC RD // Do this first to free RD. 4181 | mov L:RB, SAVE_L 4182 | cmp dword L:RB->openupval, 0 4183 | je >1 4184 | mov L:RB->base, BASE 4185 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE 4186 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 4187 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) 4188 | mov BASE, L:RB->base 4189 |1: 4190 | ins_next 4191 break; 4192 4193 case BC_FNEW: 4194 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) 4195 |.if X64 4196 | mov L:RB, SAVE_L 4197 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 4198 | mov CARG3d, [BASE-8] 4199 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. 4200 | mov CARG1d, L:RB 4201 |.else 4202 | mov LFUNC:RA, [BASE-8] 4203 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. 4204 | mov L:RB, SAVE_L 4205 | mov ARG3, LFUNC:RA 4206 | mov ARG2, PROTO:RD 4207 | mov ARG1, L:RB 4208 | mov L:RB->base, BASE 4209 |.endif 4210 | mov SAVE_PC, PC 4211 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 4212 | call extern lj_func_newL_gc 4213 | // GCfuncL * returned in eax (RC). 4214 | mov BASE, L:RB->base 4215 | movzx RA, PC_RA 4216 | mov [BASE+RA*8], LFUNC:RC 4217 | mov dword [BASE+RA*8+4], LJ_TFUNC 4218 | ins_next 4219 break; 4220 4221 /* -- Table ops --------------------------------------------------------- */ 4222 4223 case BC_TNEW: 4224 | ins_AD // RA = dst, RD = hbits|asize 4225 | mov L:RB, SAVE_L 4226 | mov L:RB->base, BASE 4227 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 4228 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 4229 | mov SAVE_PC, PC 4230 | jae >5 4231 |1: 4232 |.if X64 4233 | mov CARG3d, RD 4234 | and RD, 0x7ff 4235 | shr CARG3d, 11 4236 |.else 4237 | mov RA, RD 4238 | and RD, 0x7ff 4239 | shr RA, 11 4240 | mov ARG3, RA 4241 |.endif 4242 | cmp RD, 0x7ff 4243 | je >3 4244 |2: 4245 |.if X64 4246 | mov L:CARG1d, L:RB 4247 | mov CARG2d, RD 4248 |.else 4249 | mov ARG1, L:RB 4250 | mov ARG2, RD 4251 |.endif 4252 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 4253 | // Table * returned in eax (RC). 4254 | mov BASE, L:RB->base 4255 | movzx RA, PC_RA 4256 | mov [BASE+RA*8], TAB:RC 4257 | mov dword [BASE+RA*8+4], LJ_TTAB 4258 | ins_next 4259 |3: // Turn 0x7ff into 0x801. 4260 | mov RD, 0x801 4261 | jmp <2 4262 |5: 4263 | mov L:FCARG1, L:RB 4264 | call extern lj_gc_step_fixtop@4 // (lua_State *L) 4265 | movzx RD, PC_RD 4266 | jmp <1 4267 break; 4268 case BC_TDUP: 4269 | ins_AND // RA = dst, RD = table const (~) (holding template table) 4270 | mov L:RB, SAVE_L 4271 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 4272 | mov SAVE_PC, PC 4273 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 4274 | mov L:RB->base, BASE 4275 | jae >3 4276 |2: 4277 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE 4278 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 4279 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) 4280 | // Table * returned in eax (RC). 4281 | mov BASE, L:RB->base 4282 | movzx RA, PC_RA 4283 | mov [BASE+RA*8], TAB:RC 4284 | mov dword [BASE+RA*8+4], LJ_TTAB 4285 | ins_next 4286 |3: 4287 | mov L:FCARG1, L:RB 4288 | call extern lj_gc_step_fixtop@4 // (lua_State *L) 4289 | movzx RD, PC_RD // Need to reload RD. 4290 | not RDa 4291 | jmp <2 4292 break; 4293 4294 case BC_GGET: 4295 | ins_AND // RA = dst, RD = str const (~) 4296 | mov LFUNC:RB, [BASE-8] 4297 | mov TAB:RB, LFUNC:RB->env 4298 | mov STR:RC, [KBASE+RD*4] 4299 | jmp ->BC_TGETS_Z 4300 break; 4301 case BC_GSET: 4302 | ins_AND // RA = src, RD = str const (~) 4303 | mov LFUNC:RB, [BASE-8] 4304 | mov TAB:RB, LFUNC:RB->env 4305 | mov STR:RC, [KBASE+RD*4] 4306 | jmp ->BC_TSETS_Z 4307 break; 4308 4309 case BC_TGETV: 4310 | ins_ABC // RA = dst, RB = table, RC = key 4311 | checktab RB, ->vmeta_tgetv 4312 | mov TAB:RB, [BASE+RB*8] 4313 | 4314 | // Integer key? 4315 |.if DUALNUM 4316 | checkint RC, >5 4317 | mov RC, dword [BASE+RC*8] 4318 |.else 4319 | // Convert number to int and back and compare. 4320 | checknum RC, >5 4321 | movsd xmm0, qword [BASE+RC*8] 4322 | cvttsd2si RC, xmm0 4323 | cvtsi2sd xmm1, RC 4324 | ucomisd xmm0, xmm1 4325 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4326 |.endif 4327 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4328 | jae ->vmeta_tgetv // Not in array part? Use fallback. 4329 | shl RC, 3 4330 | add RC, TAB:RB->array 4331 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4332 | je >2 4333 | // Get array slot. 4334 |.if X64 4335 | mov RBa, [RC] 4336 | mov [BASE+RA*8], RBa 4337 |.else 4338 | mov RB, [RC] 4339 | mov RC, [RC+4] 4340 | mov [BASE+RA*8], RB 4341 | mov [BASE+RA*8+4], RC 4342 |.endif 4343 |1: 4344 | ins_next 4345 | 4346 |2: // Check for __index if table value is nil. 4347 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4348 | jz >3 4349 | mov TAB:RA, TAB:RB->metatable 4350 | test byte TAB:RA->nomm, 1<<MM_index 4351 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. 4352 | movzx RA, PC_RA // Restore RA. 4353 |3: 4354 | mov dword [BASE+RA*8+4], LJ_TNIL 4355 | jmp <1 4356 | 4357 |5: // String key? 4358 | checkstr RC, ->vmeta_tgetv 4359 | mov STR:RC, [BASE+RC*8] 4360 | jmp ->BC_TGETS_Z 4361 break; 4362 case BC_TGETS: 4363 | ins_ABC // RA = dst, RB = table, RC = str const (~) 4364 | not RCa 4365 | mov STR:RC, [KBASE+RC*4] 4366 | checktab RB, ->vmeta_tgets 4367 | mov TAB:RB, [BASE+RB*8] 4368 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4369 | mov RA, TAB:RB->hmask 4370 | and RA, STR:RC->sid 4371 | imul RA, #NODE 4372 | add NODE:RA, TAB:RB->node 4373 |1: 4374 | cmp dword NODE:RA->key.it, LJ_TSTR 4375 | jne >4 4376 | cmp dword NODE:RA->key.gcr, STR:RC 4377 | jne >4 4378 | // Ok, key found. Assumes: offsetof(Node, val) == 0 4379 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4380 | je >5 // Key found, but nil value? 4381 | movzx RC, PC_RA 4382 | // Get node value. 4383 |.if X64 4384 | mov RBa, [RA] 4385 | mov [BASE+RC*8], RBa 4386 |.else 4387 | mov RB, [RA] 4388 | mov RA, [RA+4] 4389 | mov [BASE+RC*8], RB 4390 | mov [BASE+RC*8+4], RA 4391 |.endif 4392 |2: 4393 | ins_next 4394 | 4395 |3: 4396 | movzx RC, PC_RA 4397 | mov dword [BASE+RC*8+4], LJ_TNIL 4398 | jmp <2 4399 | 4400 |4: // Follow hash chain. 4401 | mov NODE:RA, NODE:RA->next 4402 | test NODE:RA, NODE:RA 4403 | jnz <1 4404 | // End of hash chain: key not found, nil result. 4405 | 4406 |5: // Check for __index if table value is nil. 4407 | mov TAB:RA, TAB:RB->metatable 4408 | test TAB:RA, TAB:RA 4409 | jz <3 // No metatable: done. 4410 | test byte TAB:RA->nomm, 1<<MM_index 4411 | jnz <3 // 'no __index' flag set: done. 4412 | jmp ->vmeta_tgets // Caveat: preserve STR:RC. 4413 break; 4414 case BC_TGETB: 4415 | ins_ABC // RA = dst, RB = table, RC = byte literal 4416 | checktab RB, ->vmeta_tgetb 4417 | mov TAB:RB, [BASE+RB*8] 4418 | cmp RC, TAB:RB->asize 4419 | jae ->vmeta_tgetb 4420 | shl RC, 3 4421 | add RC, TAB:RB->array 4422 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4423 | je >2 4424 | // Get array slot. 4425 |.if X64 4426 | mov RBa, [RC] 4427 | mov [BASE+RA*8], RBa 4428 |.else 4429 | mov RB, [RC] 4430 | mov RC, [RC+4] 4431 | mov [BASE+RA*8], RB 4432 | mov [BASE+RA*8+4], RC 4433 |.endif 4434 |1: 4435 | ins_next 4436 | 4437 |2: // Check for __index if table value is nil. 4438 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4439 | jz >3 4440 | mov TAB:RA, TAB:RB->metatable 4441 | test byte TAB:RA->nomm, 1<<MM_index 4442 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. 4443 | movzx RA, PC_RA // Restore RA. 4444 |3: 4445 | mov dword [BASE+RA*8+4], LJ_TNIL 4446 | jmp <1 4447 break; 4448 case BC_TGETR: 4449 | ins_ABC // RA = dst, RB = table, RC = key 4450 | mov TAB:RB, [BASE+RB*8] 4451 |.if DUALNUM 4452 | mov RC, dword [BASE+RC*8] 4453 |.else 4454 | cvttsd2si RC, qword [BASE+RC*8] 4455 |.endif 4456 | cmp RC, TAB:RB->asize 4457 | jae ->vmeta_tgetr // Not in array part? Use fallback. 4458 | shl RC, 3 4459 | add RC, TAB:RB->array 4460 | // Get array slot. 4461 |->BC_TGETR_Z: 4462 |.if X64 4463 | mov RBa, [RC] 4464 | mov [BASE+RA*8], RBa 4465 |.else 4466 | mov RB, [RC] 4467 | mov RC, [RC+4] 4468 | mov [BASE+RA*8], RB 4469 | mov [BASE+RA*8+4], RC 4470 |.endif 4471 |->BC_TGETR2_Z: 4472 | ins_next 4473 break; 4474 4475 case BC_TSETV: 4476 | ins_ABC // RA = src, RB = table, RC = key 4477 | checktab RB, ->vmeta_tsetv 4478 | mov TAB:RB, [BASE+RB*8] 4479 | 4480 | // Integer key? 4481 |.if DUALNUM 4482 | checkint RC, >5 4483 | mov RC, dword [BASE+RC*8] 4484 |.else 4485 | // Convert number to int and back and compare. 4486 | checknum RC, >5 4487 | movsd xmm0, qword [BASE+RC*8] 4488 | cvttsd2si RC, xmm0 4489 | cvtsi2sd xmm1, RC 4490 | ucomisd xmm0, xmm1 4491 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4492 |.endif 4493 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4494 | jae ->vmeta_tsetv 4495 | shl RC, 3 4496 | add RC, TAB:RB->array 4497 | cmp dword [RC+4], LJ_TNIL 4498 | je >3 // Previous value is nil? 4499 |1: 4500 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4501 | jnz >7 4502 |2: // Set array slot. 4503 |.if X64 4504 | mov RBa, [BASE+RA*8] 4505 | mov [RC], RBa 4506 |.else 4507 | mov RB, [BASE+RA*8+4] 4508 | mov RA, [BASE+RA*8] 4509 | mov [RC+4], RB 4510 | mov [RC], RA 4511 |.endif 4512 | ins_next 4513 | 4514 |3: // Check for __newindex if previous value is nil. 4515 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4516 | jz <1 4517 | mov TAB:RA, TAB:RB->metatable 4518 | test byte TAB:RA->nomm, 1<<MM_newindex 4519 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. 4520 | movzx RA, PC_RA // Restore RA. 4521 | jmp <1 4522 | 4523 |5: // String key? 4524 | checkstr RC, ->vmeta_tsetv 4525 | mov STR:RC, [BASE+RC*8] 4526 | jmp ->BC_TSETS_Z 4527 | 4528 |7: // Possible table write barrier for the value. Skip valiswhite check. 4529 | barrierback TAB:RB, RA 4530 | movzx RA, PC_RA // Restore RA. 4531 | jmp <2 4532 break; 4533 case BC_TSETS: 4534 | ins_ABC // RA = src, RB = table, RC = str const (~) 4535 | not RCa 4536 | mov STR:RC, [KBASE+RC*4] 4537 | checktab RB, ->vmeta_tsets 4538 | mov TAB:RB, [BASE+RB*8] 4539 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4540 | mov RA, TAB:RB->hmask 4541 | and RA, STR:RC->sid 4542 | imul RA, #NODE 4543 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4544 | add NODE:RA, TAB:RB->node 4545 |1: 4546 | cmp dword NODE:RA->key.it, LJ_TSTR 4547 | jne >5 4548 | cmp dword NODE:RA->key.gcr, STR:RC 4549 | jne >5 4550 | // Ok, key found. Assumes: offsetof(Node, val) == 0 4551 | cmp dword [RA+4], LJ_TNIL 4552 | je >4 // Previous value is nil? 4553 |2: 4554 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4555 | jnz >7 4556 |3: // Set node value. 4557 | movzx RC, PC_RA 4558 |.if X64 4559 | mov RBa, [BASE+RC*8] 4560 | mov [RA], RBa 4561 |.else 4562 | mov RB, [BASE+RC*8+4] 4563 | mov RC, [BASE+RC*8] 4564 | mov [RA+4], RB 4565 | mov [RA], RC 4566 |.endif 4567 | ins_next 4568 | 4569 |4: // Check for __newindex if previous value is nil. 4570 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4571 | jz <2 4572 | mov TMP1, RA // Save RA. 4573 | mov TAB:RA, TAB:RB->metatable 4574 | test byte TAB:RA->nomm, 1<<MM_newindex 4575 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4576 | mov RA, TMP1 // Restore RA. 4577 | jmp <2 4578 | 4579 |5: // Follow hash chain. 4580 | mov NODE:RA, NODE:RA->next 4581 | test NODE:RA, NODE:RA 4582 | jnz <1 4583 | // End of hash chain: key not found, add a new one. 4584 | 4585 | // But check for __newindex first. 4586 | mov TAB:RA, TAB:RB->metatable 4587 | test TAB:RA, TAB:RA 4588 | jz >6 // No metatable: continue. 4589 | test byte TAB:RA->nomm, 1<<MM_newindex 4590 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4591 |6: 4592 | mov TMP1, STR:RC 4593 | mov TMP2, LJ_TSTR 4594 | mov TMP3, TAB:RB // Save TAB:RB for us. 4595 |.if X64 4596 | mov L:CARG1d, SAVE_L 4597 | mov L:CARG1d->base, BASE 4598 | lea CARG3, TMP1 4599 | mov CARG2d, TAB:RB 4600 | mov L:RB, L:CARG1d 4601 |.else 4602 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. 4603 | mov ARG2, TAB:RB 4604 | mov L:RB, SAVE_L 4605 | mov ARG3, RC 4606 | mov ARG1, L:RB 4607 | mov L:RB->base, BASE 4608 |.endif 4609 | mov SAVE_PC, PC 4610 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4611 | // Handles write barrier for the new key. TValue * returned in eax (RC). 4612 | mov BASE, L:RB->base 4613 | mov TAB:RB, TMP3 // Need TAB:RB for barrier. 4614 | mov RA, eax 4615 | jmp <2 // Must check write barrier for value. 4616 | 4617 |7: // Possible table write barrier for the value. Skip valiswhite check. 4618 | barrierback TAB:RB, RC // Destroys STR:RC. 4619 | jmp <3 4620 break; 4621 case BC_TSETB: 4622 | ins_ABC // RA = src, RB = table, RC = byte literal 4623 | checktab RB, ->vmeta_tsetb 4624 | mov TAB:RB, [BASE+RB*8] 4625 | cmp RC, TAB:RB->asize 4626 | jae ->vmeta_tsetb 4627 | shl RC, 3 4628 | add RC, TAB:RB->array 4629 | cmp dword [RC+4], LJ_TNIL 4630 | je >3 // Previous value is nil? 4631 |1: 4632 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4633 | jnz >7 4634 |2: // Set array slot. 4635 |.if X64 4636 | mov RAa, [BASE+RA*8] 4637 | mov [RC], RAa 4638 |.else 4639 | mov RB, [BASE+RA*8+4] 4640 | mov RA, [BASE+RA*8] 4641 | mov [RC+4], RB 4642 | mov [RC], RA 4643 |.endif 4644 | ins_next 4645 | 4646 |3: // Check for __newindex if previous value is nil. 4647 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4648 | jz <1 4649 | mov TAB:RA, TAB:RB->metatable 4650 | test byte TAB:RA->nomm, 1<<MM_newindex 4651 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. 4652 | movzx RA, PC_RA // Restore RA. 4653 | jmp <1 4654 | 4655 |7: // Possible table write barrier for the value. Skip valiswhite check. 4656 | barrierback TAB:RB, RA 4657 | movzx RA, PC_RA // Restore RA. 4658 | jmp <2 4659 break; 4660 case BC_TSETR: 4661 | ins_ABC // RA = src, RB = table, RC = key 4662 | mov TAB:RB, [BASE+RB*8] 4663 |.if DUALNUM 4664 | mov RC, dword [BASE+RC*8] 4665 |.else 4666 | cvttsd2si RC, qword [BASE+RC*8] 4667 |.endif 4668 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4669 | jnz >7 4670 |2: 4671 | cmp RC, TAB:RB->asize 4672 | jae ->vmeta_tsetr 4673 | shl RC, 3 4674 | add RC, TAB:RB->array 4675 | // Set array slot. 4676 |->BC_TSETR_Z: 4677 |.if X64 4678 | mov RBa, [BASE+RA*8] 4679 | mov [RC], RBa 4680 |.else 4681 | mov RB, [BASE+RA*8+4] 4682 | mov RA, [BASE+RA*8] 4683 | mov [RC+4], RB 4684 | mov [RC], RA 4685 |.endif 4686 | ins_next 4687 | 4688 |7: // Possible table write barrier for the value. Skip valiswhite check. 4689 | barrierback TAB:RB, RA 4690 | movzx RA, PC_RA // Restore RA. 4691 | jmp <2 4692 break; 4693 4694 case BC_TSETM: 4695 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4696 | mov TMP1, KBASE // Need one more free register. 4697 | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. 4698 |1: 4699 | lea RA, [BASE+RA*8] 4700 | mov TAB:RB, [RA-8] // Guaranteed to be a table. 4701 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4702 | jnz >7 4703 |2: 4704 | mov RD, MULTRES 4705 | sub RD, 1 4706 | jz >4 // Nothing to copy? 4707 | add RD, KBASE // Compute needed size. 4708 | cmp RD, TAB:RB->asize 4709 | ja >5 // Doesn't fit into array part? 4710 | sub RD, KBASE 4711 | shl KBASE, 3 4712 | add KBASE, TAB:RB->array 4713 |3: // Copy result slots to table. 4714 |.if X64 4715 | mov RBa, [RA] 4716 | add RA, 8 4717 | mov [KBASE], RBa 4718 |.else 4719 | mov RB, [RA] 4720 | mov [KBASE], RB 4721 | mov RB, [RA+4] 4722 | add RA, 8 4723 | mov [KBASE+4], RB 4724 |.endif 4725 | add KBASE, 8 4726 | sub RD, 1 4727 | jnz <3 4728 |4: 4729 | mov KBASE, TMP1 4730 | ins_next 4731 | 4732 |5: // Need to resize array part. 4733 |.if X64 4734 | mov L:CARG1d, SAVE_L 4735 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 4736 | mov CARG2d, TAB:RB 4737 | mov CARG3d, RD 4738 | mov L:RB, L:CARG1d 4739 |.else 4740 | mov ARG2, TAB:RB 4741 | mov L:RB, SAVE_L 4742 | mov L:RB->base, BASE 4743 | mov ARG3, RD 4744 | mov ARG1, L:RB 4745 |.endif 4746 | mov SAVE_PC, PC 4747 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4748 | mov BASE, L:RB->base 4749 | movzx RA, PC_RA // Restore RA. 4750 | jmp <1 // Retry. 4751 | 4752 |7: // Possible table write barrier for any value. Skip valiswhite check. 4753 | barrierback TAB:RB, RD 4754 | jmp <2 4755 break; 4756 4757 /* -- Calls and vararg handling ----------------------------------------- */ 4758 4759 case BC_CALL: case BC_CALLM: 4760 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs 4761 if (op == BC_CALLM) { 4762 | add NARGS:RD, MULTRES 4763 } 4764 | cmp dword [BASE+RA*8+4], LJ_TFUNC 4765 | mov LFUNC:RB, [BASE+RA*8] 4766 | jne ->vmeta_call_ra 4767 | lea BASE, [BASE+RA*8+8] 4768 | ins_call 4769 break; 4770 4771 case BC_CALLMT: 4772 | ins_AD // RA = base, RD = extra_nargs 4773 | add NARGS:RD, MULTRES 4774 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. 4775 break; 4776 case BC_CALLT: 4777 | ins_AD // RA = base, RD = nargs+1 4778 | lea RA, [BASE+RA*8+8] 4779 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. 4780 | mov LFUNC:RB, [RA-8] 4781 | cmp dword [RA-4], LJ_TFUNC 4782 | jne ->vmeta_call 4783 |->BC_CALLT_Z: 4784 | mov PC, [BASE-4] 4785 | test PC, FRAME_TYPE 4786 | jnz >7 4787 |1: 4788 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. 4789 | mov MULTRES, NARGS:RD 4790 | sub NARGS:RD, 1 4791 | jz >3 4792 |2: // Move args down. 4793 |.if X64 4794 | mov RBa, [RA] 4795 | add RA, 8 4796 | mov [KBASE], RBa 4797 |.else 4798 | mov RB, [RA] 4799 | mov [KBASE], RB 4800 | mov RB, [RA+4] 4801 | add RA, 8 4802 | mov [KBASE+4], RB 4803 |.endif 4804 | add KBASE, 8 4805 | sub NARGS:RD, 1 4806 | jnz <2 4807 | 4808 | mov LFUNC:RB, [BASE-8] 4809 |3: 4810 | mov NARGS:RD, MULTRES 4811 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? 4812 | ja >5 4813 |4: 4814 | ins_callt 4815 | 4816 |5: // Tailcall to a fast function. 4817 | test PC, FRAME_TYPE // Lua frame below? 4818 | jnz <4 4819 | movzx RA, PC_RA 4820 | not RAa 4821 | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. 4822 | mov KBASE, LFUNC:KBASE->pc 4823 | mov KBASE, [KBASE+PC2PROTO(k)] 4824 | jmp <4 4825 | 4826 |7: // Tailcall from a vararg function. 4827 | sub PC, FRAME_VARG 4828 | test PC, FRAME_TYPEP 4829 | jnz >8 // Vararg frame below? 4830 | sub BASE, PC // Need to relocate BASE/KBASE down. 4831 | mov KBASE, BASE 4832 | mov PC, [BASE-4] 4833 | jmp <1 4834 |8: 4835 | add PC, FRAME_VARG 4836 | jmp <1 4837 break; 4838 4839 case BC_ITERC: 4840 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) 4841 | lea RA, [BASE+RA*8+8] // fb = base+1 4842 |.if X64 4843 | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. 4844 | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. 4845 | mov [RA], RBa 4846 | mov [RA+8], RCa 4847 |.else 4848 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. 4849 | mov RC, [RA-20] 4850 | mov [RA], RB 4851 | mov [RA+4], RC 4852 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. 4853 | mov RC, [RA-12] 4854 | mov [RA+8], RB 4855 | mov [RA+12], RC 4856 |.endif 4857 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] 4858 | mov RC, [RA-28] 4859 | mov [RA-8], LFUNC:RB 4860 | mov [RA-4], RC 4861 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. 4862 | mov NARGS:RD, 2+1 4863 | jne ->vmeta_call 4864 | mov BASE, RA 4865 | ins_call 4866 break; 4867 4868 case BC_ITERN: 4869 |.if JIT 4870 | hotloop RB 4871 |.endif 4872 |->vm_IITERN: 4873 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) 4874 | mov TMP1, KBASE // Need two more free registers. 4875 | mov TMP2, DISPATCH 4876 | mov TAB:RB, [BASE+RA*8-16] 4877 | mov RC, [BASE+RA*8-8] // Get index from control var. 4878 | mov DISPATCH, TAB:RB->asize 4879 | add PC, 4 4880 | mov KBASE, TAB:RB->array 4881 |1: // Traverse array part. 4882 | cmp RC, DISPATCH; jae >5 // Index points after array part? 4883 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 4884 |.if DUALNUM 4885 | mov dword [BASE+RA*8+4], LJ_TISNUM 4886 | mov dword [BASE+RA*8], RC 4887 |.else 4888 | cvtsi2sd xmm0, RC 4889 |.endif 4890 | // Copy array slot to returned value. 4891 |.if X64 4892 | mov RBa, [KBASE+RC*8] 4893 | mov [BASE+RA*8+8], RBa 4894 |.else 4895 | mov RB, [KBASE+RC*8+4] 4896 | mov [BASE+RA*8+12], RB 4897 | mov RB, [KBASE+RC*8] 4898 | mov [BASE+RA*8+8], RB 4899 |.endif 4900 | add RC, 1 4901 | // Return array index as a numeric key. 4902 |.if DUALNUM 4903 | // See above. 4904 |.else 4905 | movsd qword [BASE+RA*8], xmm0 4906 |.endif 4907 | mov [BASE+RA*8-8], RC // Update control var. 4908 |2: 4909 | movzx RD, PC_RD // Get target from ITERL. 4910 | branchPC RD 4911 |3: 4912 | mov DISPATCH, TMP2 4913 | mov KBASE, TMP1 4914 | ins_next 4915 | 4916 |4: // Skip holes in array part. 4917 | add RC, 1 4918 | jmp <1 4919 | 4920 |5: // Traverse hash part. 4921 | sub RC, DISPATCH 4922 |6: 4923 | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. 4924 | imul KBASE, RC, #NODE 4925 | add NODE:KBASE, TAB:RB->node 4926 | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 4927 | lea DISPATCH, [RC+DISPATCH+1] 4928 | // Copy key and value from hash slot. 4929 |.if X64 4930 | mov RBa, NODE:KBASE->key 4931 | mov RCa, NODE:KBASE->val 4932 | mov [BASE+RA*8], RBa 4933 | mov [BASE+RA*8+8], RCa 4934 |.else 4935 | mov RB, NODE:KBASE->key.gcr 4936 | mov RC, NODE:KBASE->key.it 4937 | mov [BASE+RA*8], RB 4938 | mov [BASE+RA*8+4], RC 4939 | mov RB, NODE:KBASE->val.gcr 4940 | mov RC, NODE:KBASE->val.it 4941 | mov [BASE+RA*8+8], RB 4942 | mov [BASE+RA*8+12], RC 4943 |.endif 4944 | mov [BASE+RA*8-8], DISPATCH 4945 | jmp <2 4946 | 4947 |7: // Skip holes in hash part. 4948 | add RC, 1 4949 | jmp <6 4950 break; 4951 4952 case BC_ISNEXT: 4953 | ins_AD // RA = base, RD = target (points to ITERN) 4954 | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 4955 | mov CFUNC:RB, [BASE+RA*8-24] 4956 | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 4957 | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 4958 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4959 | branchPC RD 4960 | mov dword [BASE+RA*8-8], 0 // Initialize control var. 4961 | mov dword [BASE+RA*8-4], LJ_KEYINDEX 4962 |1: 4963 | ins_next 4964 |5: // Despecialize bytecode if any of the checks fail. 4965 | mov PC_OP, BC_JMP 4966 | branchPC RD 4967 |.if JIT 4968 | cmp byte [PC], BC_ITERN 4969 | jne >6 4970 |.endif 4971 | mov byte [PC], BC_ITERC 4972 | jmp <1 4973 |.if JIT 4974 |6: // Unpatch JLOOP. 4975 | mov RA, [DISPATCH+DISPATCH_J(trace)] 4976 | movzx RC, word [PC+2] 4977 | mov TRACE:RA, [RA+RC*4] 4978 | mov eax, TRACE:RA->startins 4979 | mov al, BC_ITERC 4980 | mov dword [PC], eax 4981 | jmp <1 4982 |.endif 4983 break; 4984 4985 case BC_VARG: 4986 | ins_ABC // RA = base, RB = nresults+1, RC = numparams 4987 | mov TMP1, KBASE // Need one more free register. 4988 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] 4989 | lea RA, [BASE+RA*8] 4990 | sub KBASE, [BASE-4] 4991 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. 4992 | test RB, RB 4993 | jz >5 // Copy all varargs? 4994 | lea RB, [RA+RB*8-8] 4995 | cmp KBASE, BASE // No vararg slots? 4996 | jnb >2 4997 |1: // Copy vararg slots to destination slots. 4998 |.if X64 4999 | mov RCa, [KBASE-8] 5000 | add KBASE, 8 5001 | mov [RA], RCa 5002 |.else 5003 | mov RC, [KBASE-8] 5004 | mov [RA], RC 5005 | mov RC, [KBASE-4] 5006 | add KBASE, 8 5007 | mov [RA+4], RC 5008 |.endif 5009 | add RA, 8 5010 | cmp RA, RB // All destination slots filled? 5011 | jnb >3 5012 | cmp KBASE, BASE // No more vararg slots? 5013 | jb <1 5014 |2: // Fill up remainder with nil. 5015 | mov dword [RA+4], LJ_TNIL 5016 | add RA, 8 5017 | cmp RA, RB 5018 | jb <2 5019 |3: 5020 | mov KBASE, TMP1 5021 | ins_next 5022 | 5023 |5: // Copy all varargs. 5024 | mov MULTRES, 1 // MULTRES = 0+1 5025 | mov RC, BASE 5026 | sub RC, KBASE 5027 | jbe <3 // No vararg slots? 5028 | mov RB, RC 5029 | shr RB, 3 5030 | add RB, 1 5031 | mov MULTRES, RB // MULTRES = #varargs+1 5032 | mov L:RB, SAVE_L 5033 | add RC, RA 5034 | cmp RC, L:RB->maxstack 5035 | ja >7 // Need to grow stack? 5036 |6: // Copy all vararg slots. 5037 |.if X64 5038 | mov RCa, [KBASE-8] 5039 | add KBASE, 8 5040 | mov [RA], RCa 5041 |.else 5042 | mov RC, [KBASE-8] 5043 | mov [RA], RC 5044 | mov RC, [KBASE-4] 5045 | add KBASE, 8 5046 | mov [RA+4], RC 5047 |.endif 5048 | add RA, 8 5049 | cmp KBASE, BASE // No more vararg slots? 5050 | jb <6 5051 | jmp <3 5052 | 5053 |7: // Grow stack for varargs. 5054 | mov L:RB->base, BASE 5055 | mov L:RB->top, RA 5056 | mov SAVE_PC, PC 5057 | sub KBASE, BASE // Need delta, because BASE may change. 5058 | mov FCARG2, MULTRES 5059 | sub FCARG2, 1 5060 | mov FCARG1, L:RB 5061 | call extern lj_state_growstack@8 // (lua_State *L, int n) 5062 | mov BASE, L:RB->base 5063 | mov RA, L:RB->top 5064 | add KBASE, BASE 5065 | jmp <6 5066 break; 5067 5068 /* -- Returns ----------------------------------------------------------- */ 5069 5070 case BC_RETM: 5071 | ins_AD // RA = results, RD = extra_nresults 5072 | add RD, MULTRES // MULTRES >=1, so RD >=1. 5073 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. 5074 break; 5075 5076 case BC_RET: case BC_RET0: case BC_RET1: 5077 | ins_AD // RA = results, RD = nresults+1 5078 if (op != BC_RET0) { 5079 | shl RA, 3 5080 } 5081 |1: 5082 | mov PC, [BASE-4] 5083 | mov MULTRES, RD // Save nresults+1. 5084 | test PC, FRAME_TYPE // Check frame type marker. 5085 | jnz >7 // Not returning to a fixarg Lua func? 5086 switch (op) { 5087 case BC_RET: 5088 |->BC_RET_Z: 5089 | mov KBASE, BASE // Use KBASE for result move. 5090 | sub RD, 1 5091 | jz >3 5092 |2: // Move results down. 5093 |.if X64 5094 | mov RBa, [KBASE+RA] 5095 | mov [KBASE-8], RBa 5096 |.else 5097 | mov RB, [KBASE+RA] 5098 | mov [KBASE-8], RB 5099 | mov RB, [KBASE+RA+4] 5100 | mov [KBASE-4], RB 5101 |.endif 5102 | add KBASE, 8 5103 | sub RD, 1 5104 | jnz <2 5105 |3: 5106 | mov RD, MULTRES // Note: MULTRES may be >255. 5107 | movzx RB, PC_RB // So cannot compare with RDL! 5108 |5: 5109 | cmp RB, RD // More results expected? 5110 | ja >6 5111 break; 5112 case BC_RET1: 5113 |.if X64 5114 | mov RBa, [BASE+RA] 5115 | mov [BASE-8], RBa 5116 |.else 5117 | mov RB, [BASE+RA+4] 5118 | mov [BASE-4], RB 5119 | mov RB, [BASE+RA] 5120 | mov [BASE-8], RB 5121 |.endif 5122 /* fallthrough */ 5123 case BC_RET0: 5124 |5: 5125 | cmp PC_RB, RDL // More results expected? 5126 | ja >6 5127 default: 5128 break; 5129 } 5130 | movzx RA, PC_RA 5131 | not RAa // Note: ~RA = -(RA+1) 5132 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 5133 | mov LFUNC:KBASE, [BASE-8] 5134 | mov KBASE, LFUNC:KBASE->pc 5135 | mov KBASE, [KBASE+PC2PROTO(k)] 5136 | ins_next 5137 | 5138 |6: // Fill up results with nil. 5139 if (op == BC_RET) { 5140 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. 5141 | add KBASE, 8 5142 } else { 5143 | mov dword [BASE+RD*8-12], LJ_TNIL 5144 } 5145 | add RD, 1 5146 | jmp <5 5147 | 5148 |7: // Non-standard return case. 5149 | lea RB, [PC-FRAME_VARG] 5150 | test RB, FRAME_TYPEP 5151 | jnz ->vm_return 5152 | // Return from vararg function: relocate BASE down and RA up. 5153 | sub BASE, RB 5154 if (op != BC_RET0) { 5155 | add RA, RB 5156 } 5157 | jmp <1 5158 break; 5159 5160 /* -- Loops and branches ------------------------------------------------ */ 5161 5162 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] 5163 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] 5164 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] 5165 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] 5166 5167 case BC_FORL: 5168 |.if JIT 5169 | hotloop RB 5170 |.endif 5171 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 5172 break; 5173 5174 case BC_JFORI: 5175 case BC_JFORL: 5176#if !LJ_HASJIT 5177 break; 5178#endif 5179 case BC_FORI: 5180 case BC_IFORL: 5181 vk = (op == BC_IFORL || op == BC_JFORL); 5182 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) 5183 | lea RA, [BASE+RA*8] 5184 if (LJ_DUALNUM) { 5185 | cmp FOR_TIDX, LJ_TISNUM; jne >9 5186 if (!vk) { 5187 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for 5188 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for 5189 | mov RB, dword FOR_IDX 5190 | cmp dword FOR_STEP, 0; jl >5 5191 } else { 5192#ifdef LUA_USE_ASSERT 5193 | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type 5194 | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type 5195#endif 5196 | mov RB, dword FOR_STEP 5197 | test RB, RB; js >5 5198 | add RB, dword FOR_IDX; jo >1 5199 | mov dword FOR_IDX, RB 5200 } 5201 | cmp RB, dword FOR_STOP 5202 | mov FOR_TEXT, LJ_TISNUM 5203 | mov dword FOR_EXT, RB 5204 if (op == BC_FORI) { 5205 | jle >7 5206 |1: 5207 |6: 5208 | branchPC RD 5209 } else if (op == BC_JFORI) { 5210 | branchPC RD 5211 | movzx RD, PC_RD 5212 | jle =>BC_JLOOP 5213 |1: 5214 |6: 5215 } else if (op == BC_IFORL) { 5216 | jg >7 5217 |6: 5218 | branchPC RD 5219 |1: 5220 } else { 5221 | jle =>BC_JLOOP 5222 |1: 5223 |6: 5224 } 5225 |7: 5226 | ins_next 5227 | 5228 |5: // Invert check for negative step. 5229 if (vk) { 5230 | add RB, dword FOR_IDX; jo <1 5231 | mov dword FOR_IDX, RB 5232 } 5233 | cmp RB, dword FOR_STOP 5234 | mov FOR_TEXT, LJ_TISNUM 5235 | mov dword FOR_EXT, RB 5236 if (op == BC_FORI) { 5237 | jge <7 5238 } else if (op == BC_JFORI) { 5239 | branchPC RD 5240 | movzx RD, PC_RD 5241 | jge =>BC_JLOOP 5242 } else if (op == BC_IFORL) { 5243 | jl <7 5244 } else { 5245 | jge =>BC_JLOOP 5246 } 5247 | jmp <6 5248 |9: // Fallback to FP variant. 5249 } else if (!vk) { 5250 | cmp FOR_TIDX, LJ_TISNUM 5251 } 5252 if (!vk) { 5253 | jae ->vmeta_for 5254 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for 5255 } else { 5256#ifdef LUA_USE_ASSERT 5257 | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type 5258 | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type 5259#endif 5260 } 5261 | mov RB, FOR_TSTEP // Load type/hiword of for step. 5262 if (!vk) { 5263 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5264 } 5265 | movsd xmm0, qword FOR_IDX 5266 | movsd xmm1, qword FOR_STOP 5267 if (vk) { 5268 | addsd xmm0, qword FOR_STEP 5269 | movsd qword FOR_IDX, xmm0 5270 | test RB, RB; js >3 5271 } else { 5272 | jl >3 5273 } 5274 | ucomisd xmm1, xmm0 5275 |1: 5276 | movsd qword FOR_EXT, xmm0 5277 if (op == BC_FORI) { 5278 |.if DUALNUM 5279 | jnb <7 5280 |.else 5281 | jnb >2 5282 | branchPC RD 5283 |.endif 5284 } else if (op == BC_JFORI) { 5285 | branchPC RD 5286 | movzx RD, PC_RD 5287 | jnb =>BC_JLOOP 5288 } else if (op == BC_IFORL) { 5289 |.if DUALNUM 5290 | jb <7 5291 |.else 5292 | jb >2 5293 | branchPC RD 5294 |.endif 5295 } else { 5296 | jnb =>BC_JLOOP 5297 } 5298 |.if DUALNUM 5299 | jmp <6 5300 |.else 5301 |2: 5302 | ins_next 5303 |.endif 5304 | 5305 |3: // Invert comparison if step is negative. 5306 | ucomisd xmm0, xmm1 5307 | jmp <1 5308 break; 5309 5310 case BC_ITERL: 5311 |.if JIT 5312 | hotloop RB 5313 |.endif 5314 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 5315 break; 5316 5317 case BC_JITERL: 5318#if !LJ_HASJIT 5319 break; 5320#endif 5321 case BC_IITERL: 5322 | ins_AJ // RA = base, RD = target 5323 | lea RA, [BASE+RA*8] 5324 | mov RB, [RA+4] 5325 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. 5326 if (op == BC_JITERL) { 5327 | mov [RA-4], RB 5328 | mov RB, [RA] 5329 | mov [RA-8], RB 5330 | jmp =>BC_JLOOP 5331 } else { 5332 | branchPC RD // Otherwise save control var + branch. 5333 | mov RD, [RA] 5334 | mov [RA-4], RB 5335 | mov [RA-8], RD 5336 } 5337 |1: 5338 | ins_next 5339 break; 5340 5341 case BC_LOOP: 5342 | ins_A // RA = base, RD = target (loop extent) 5343 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5344 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5345 |.if JIT 5346 | hotloop RB 5347 |.endif 5348 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5349 break; 5350 5351 case BC_ILOOP: 5352 | ins_A // RA = base, RD = target (loop extent) 5353 | ins_next 5354 break; 5355 5356 case BC_JLOOP: 5357 |.if JIT 5358 | ins_AD // RA = base (ignored), RD = traceno 5359#ifdef LUA_USE_TRACE_LOGS 5360 |.if X64 5361 | mov L:RB, SAVE_L 5362 | mov L:RB->base, BASE // Save BASE 5363 | mov TMP1, RD // Save RD 5364 | mov CARG3d, PC // CARG3d == BASE 5365 | mov FCARG2, RD 5366 | mov FCARG1, RB 5367 | call extern lj_log_trace_entry@8 5368 | mov RD, TMP1 5369 | mov BASE, L:RB->base 5370 |.endif 5371#endif 5372 | mov RA, [DISPATCH+DISPATCH_J(trace)] 5373 | mov TRACE:RD, [RA+RD*4] 5374 | mov RDa, TRACE:RD->mcode 5375 | mov L:RB, SAVE_L 5376 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5377 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB 5378 | // Save additional callee-save registers only used in compiled code. 5379 |.if X64WIN 5380 | mov TMPQ, r12 5381 | mov TMPa, r13 5382 | mov CSAVE_4, r14 5383 | mov CSAVE_3, r15 5384 | mov RAa, rsp 5385 | sub rsp, 9*16+4*8 5386 | movdqa [RAa], xmm6 5387 | movdqa [RAa-1*16], xmm7 5388 | movdqa [RAa-2*16], xmm8 5389 | movdqa [RAa-3*16], xmm9 5390 | movdqa [RAa-4*16], xmm10 5391 | movdqa [RAa-5*16], xmm11 5392 | movdqa [RAa-6*16], xmm12 5393 | movdqa [RAa-7*16], xmm13 5394 | movdqa [RAa-8*16], xmm14 5395 | movdqa [RAa-9*16], xmm15 5396 |.elif X64 5397 | mov TMPQ, r12 5398 | mov TMPa, r13 5399 | sub rsp, 16 5400 |.endif 5401 | jmp RDa 5402 |.endif 5403 break; 5404 5405 case BC_JMP: 5406 | ins_AJ // RA = unused, RD = target 5407 | branchPC RD 5408 | ins_next 5409 break; 5410 5411 /* -- Function headers -------------------------------------------------- */ 5412 5413 /* 5414 ** Reminder: A function may be called with func/args above L->maxstack, 5415 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, 5416 ** too. This means all FUNC* ops (including fast functions) must check 5417 ** for stack overflow _before_ adding more slots! 5418 */ 5419 5420 case BC_FUNCF: 5421 |.if JIT 5422 | hotcall RB 5423 |.endif 5424 case BC_FUNCV: /* NYI: compiled vararg functions. */ 5425 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. 5426 break; 5427 5428 case BC_JFUNCF: 5429#if !LJ_HASJIT 5430 break; 5431#endif 5432 case BC_IFUNCF: 5433 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 5434 | mov KBASE, [PC-4+PC2PROTO(k)] 5435 | mov L:RB, SAVE_L 5436 | lea RA, [BASE+RA*8] // Top of frame. 5437 | cmp RA, L:RB->maxstack 5438 | ja ->vm_growstack_f 5439 | movzx RA, byte [PC-4+PC2PROTO(numparams)] 5440 | cmp NARGS:RD, RA // Check for missing parameters. 5441 | jbe >3 5442 |2: 5443 if (op == BC_JFUNCF) { 5444 | movzx RD, PC_RD 5445 | jmp =>BC_JLOOP 5446 } else { 5447 | ins_next 5448 } 5449 | 5450 |3: // Clear missing parameters. 5451 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL 5452 | add NARGS:RD, 1 5453 | cmp NARGS:RD, RA 5454 | jbe <3 5455 | jmp <2 5456 break; 5457 5458 case BC_JFUNCV: 5459#if !LJ_HASJIT 5460 break; 5461#endif 5462 | int3 // NYI: compiled vararg functions 5463 break; /* NYI: compiled vararg functions. */ 5464 5465 case BC_IFUNCV: 5466 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 5467 | lea RB, [NARGS:RD*8+FRAME_VARG] 5468 | lea RD, [BASE+NARGS:RD*8] 5469 | mov LFUNC:KBASE, [BASE-8] 5470 | mov [RD-4], RB // Store delta + FRAME_VARG. 5471 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. 5472 | mov L:RB, SAVE_L 5473 | lea RA, [RD+RA*8] 5474 | cmp RA, L:RB->maxstack 5475 | ja ->vm_growstack_v // Need to grow stack. 5476 | mov RA, BASE 5477 | mov BASE, RD 5478 | movzx RB, byte [PC-4+PC2PROTO(numparams)] 5479 | test RB, RB 5480 | jz >2 5481 |1: // Copy fixarg slots up to new frame. 5482 | add RA, 8 5483 | cmp RA, BASE 5484 | jnb >3 // Less args than parameters? 5485 | mov KBASE, [RA-8] 5486 | mov [RD], KBASE 5487 | mov KBASE, [RA-4] 5488 | mov [RD+4], KBASE 5489 | add RD, 8 5490 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). 5491 | sub RB, 1 5492 | jnz <1 5493 |2: 5494 if (op == BC_JFUNCV) { 5495 | movzx RD, PC_RD 5496 | jmp =>BC_JLOOP 5497 } else { 5498 | mov KBASE, [PC-4+PC2PROTO(k)] 5499 | ins_next 5500 } 5501 | 5502 |3: // Clear missing parameters. 5503 | mov dword [RD+4], LJ_TNIL 5504 | add RD, 8 5505 | sub RB, 1 5506 | jnz <3 5507 | jmp <2 5508 break; 5509 5510 case BC_FUNCC: 5511 case BC_FUNCCW: 5512 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 5513 | mov CFUNC:RB, [BASE-8] 5514 | mov KBASEa, CFUNC:RB->f 5515 | mov L:RB, SAVE_L 5516 | lea RD, [BASE+NARGS:RD*8-8] 5517 | mov L:RB->base, BASE 5518 | lea RA, [RD+8*LUA_MINSTACK] 5519 | cmp RA, L:RB->maxstack 5520 | mov L:RB->top, RD 5521 if (op == BC_FUNCC) { 5522 |.if X64 5523 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 5524 |.else 5525 | mov ARG1, L:RB 5526 |.endif 5527 } else { 5528 |.if X64 5529 | mov CARG2, KBASEa 5530 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 5531 |.else 5532 | mov ARG2, KBASEa 5533 | mov ARG1, L:RB 5534 |.endif 5535 } 5536 | ja ->vm_growstack_c // Need to grow stack. 5537 | set_vmstate C 5538 if (op == BC_FUNCC) { 5539 | call KBASEa // (lua_State *L) 5540 } else { 5541 | // (lua_State *L, lua_CFunction f) 5542 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5543 } 5544 | // nresults returned in eax (RD). 5545 | mov BASE, L:RB->base 5546 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 5547 | set_vmstate INTERP 5548 | lea RA, [BASE+RD*8] 5549 | neg RA 5550 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5551 | mov PC, [BASE-4] // Fetch PC of caller. 5552 | jmp ->vm_returnc 5553 break; 5554 5555 /* ---------------------------------------------------------------------- */ 5556 5557 default: 5558 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 5559 exit(2); 5560 break; 5561 } 5562} 5563 5564static int build_backend(BuildCtx *ctx) 5565{ 5566 int op; 5567 dasm_growpc(Dst, BC__MAX); 5568 build_subroutines(ctx); 5569 |.code_op 5570 for (op = 0; op < BC__MAX; op++) 5571 build_ins(ctx, (BCOp)op, op); 5572 return BC__MAX; 5573} 5574 5575/* Emit pseudo frame-info for all assembler functions. */ 5576static void emit_asm_debug(BuildCtx *ctx) 5577{ 5578 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5579#if LJ_64 5580#define SZPTR "8" 5581#define BSZPTR "3" 5582#define REG_SP "0x7" 5583#define REG_RA "0x10" 5584#else 5585#define SZPTR "4" 5586#define BSZPTR "2" 5587#define REG_SP "0x4" 5588#define REG_RA "0x8" 5589#endif 5590 switch (ctx->mode) { 5591 case BUILD_elfasm: 5592 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5593 fprintf(ctx->fp, 5594 ".Lframe0:\n" 5595 "\t.long .LECIE0-.LSCIE0\n" 5596 ".LSCIE0:\n" 5597 "\t.long 0xffffffff\n" 5598 "\t.byte 0x1\n" 5599 "\t.string \"\"\n" 5600 "\t.uleb128 0x1\n" 5601 "\t.sleb128 -" SZPTR "\n" 5602 "\t.byte " REG_RA "\n" 5603 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5604 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5605 "\t.align " SZPTR "\n" 5606 ".LECIE0:\n\n"); 5607 fprintf(ctx->fp, 5608 ".LSFDE0:\n" 5609 "\t.long .LEFDE0-.LASFDE0\n" 5610 ".LASFDE0:\n" 5611 "\t.long .Lframe0\n" 5612#if LJ_64 5613 "\t.quad .Lbegin\n" 5614 "\t.quad %d\n" 5615 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5616 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5617 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5618 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 5619 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 5620#if LJ_NO_UNWIND 5621 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ 5622 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ 5623#endif 5624#else 5625 "\t.long .Lbegin\n" 5626 "\t.long %d\n" 5627 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5628 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5629 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ 5630 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ 5631 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ 5632#endif 5633 "\t.align " SZPTR "\n" 5634 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); 5635#if LJ_HASFFI 5636 fprintf(ctx->fp, 5637 ".LSFDE1:\n" 5638 "\t.long .LEFDE1-.LASFDE1\n" 5639 ".LASFDE1:\n" 5640 "\t.long .Lframe0\n" 5641#if LJ_64 5642 "\t.quad lj_vm_ffi_call\n" 5643 "\t.quad %d\n" 5644 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 5645 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5646 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 5647 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5648#else 5649 "\t.long lj_vm_ffi_call\n" 5650 "\t.long %d\n" 5651 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ 5652 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5653 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ 5654 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ 5655#endif 5656 "\t.align " SZPTR "\n" 5657 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5658#endif 5659#if !LJ_NO_UNWIND 5660#if LJ_TARGET_SOLARIS 5661#if LJ_64 5662 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5663#else 5664 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); 5665#endif 5666#else 5667 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5668#endif 5669 fprintf(ctx->fp, 5670 ".Lframe1:\n" 5671 "\t.long .LECIE1-.LSCIE1\n" 5672 ".LSCIE1:\n" 5673 "\t.long 0\n" 5674 "\t.byte 0x1\n" 5675 "\t.string \"zPR\"\n" 5676 "\t.uleb128 0x1\n" 5677 "\t.sleb128 -" SZPTR "\n" 5678 "\t.byte " REG_RA "\n" 5679 "\t.uleb128 6\n" /* augmentation length */ 5680 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5681 "\t.long lj_err_unwind_dwarf-.\n" 5682 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5683 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5684 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5685 "\t.align " SZPTR "\n" 5686 ".LECIE1:\n\n"); 5687 fprintf(ctx->fp, 5688 ".LSFDE2:\n" 5689 "\t.long .LEFDE2-.LASFDE2\n" 5690 ".LASFDE2:\n" 5691 "\t.long .LASFDE2-.Lframe1\n" 5692 "\t.long .Lbegin-.\n" 5693 "\t.long %d\n" 5694 "\t.uleb128 0\n" /* augmentation length */ 5695 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5696#if LJ_64 5697 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5698 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5699 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 5700 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 5701#else 5702 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5703 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ 5704 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ 5705 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ 5706#endif 5707 "\t.align " SZPTR "\n" 5708 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); 5709#if LJ_HASFFI 5710 fprintf(ctx->fp, 5711 ".Lframe2:\n" 5712 "\t.long .LECIE2-.LSCIE2\n" 5713 ".LSCIE2:\n" 5714 "\t.long 0\n" 5715 "\t.byte 0x1\n" 5716 "\t.string \"zR\"\n" 5717 "\t.uleb128 0x1\n" 5718 "\t.sleb128 -" SZPTR "\n" 5719 "\t.byte " REG_RA "\n" 5720 "\t.uleb128 1\n" /* augmentation length */ 5721 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5722 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5723 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5724 "\t.align " SZPTR "\n" 5725 ".LECIE2:\n\n"); 5726 fprintf(ctx->fp, 5727 ".LSFDE3:\n" 5728 "\t.long .LEFDE3-.LASFDE3\n" 5729 ".LASFDE3:\n" 5730 "\t.long .LASFDE3-.Lframe2\n" 5731 "\t.long lj_vm_ffi_call-.\n" 5732 "\t.long %d\n" 5733 "\t.uleb128 0\n" /* augmentation length */ 5734#if LJ_64 5735 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 5736 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5737 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 5738 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5739#else 5740 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ 5741 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5742 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ 5743 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ 5744#endif 5745 "\t.align " SZPTR "\n" 5746 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5747#endif 5748#endif 5749 break; 5750#if !LJ_NO_UNWIND 5751 /* Mental note: never let Apple design an assembler. 5752 ** Or a linker. Or a plastic case. But I digress. 5753 */ 5754 case BUILD_machasm: { 5755#if LJ_HASFFI 5756 int fcsize = 0; 5757#endif 5758 int i; 5759 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); 5760 fprintf(ctx->fp, 5761 "EH_frame1:\n" 5762 "\t.set L$set$x,LECIEX-LSCIEX\n" 5763 "\t.long L$set$x\n" 5764 "LSCIEX:\n" 5765 "\t.long 0\n" 5766 "\t.byte 0x1\n" 5767 "\t.ascii \"zPR\\0\"\n" 5768 "\t.byte 0x1\n" 5769 "\t.byte 128-" SZPTR "\n" 5770 "\t.byte " REG_RA "\n" 5771 "\t.byte 6\n" /* augmentation length */ 5772 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ 5773#if LJ_64 5774 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" 5775 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5776 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" 5777#else 5778 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" 5779 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5780 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ 5781#endif 5782 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" 5783 "\t.align " BSZPTR "\n" 5784 "LECIEX:\n\n"); 5785 for (i = 0; i < ctx->nsym; i++) { 5786 const char *name = ctx->sym[i].name; 5787 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; 5788 if (size == 0) continue; 5789#if LJ_HASFFI 5790 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } 5791#endif 5792 fprintf(ctx->fp, 5793 "%s.eh:\n" 5794 "LSFDE%d:\n" 5795 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" 5796 "\t.long L$set$%d\n" 5797 "LASFDE%d:\n" 5798 "\t.long LASFDE%d-EH_frame1\n" 5799 "\t.long %s-.\n" 5800 "\t.long %d\n" 5801 "\t.byte 0\n" /* augmentation length */ 5802 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ 5803#if LJ_64 5804 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 5805 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 5806 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ 5807 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ 5808#else 5809 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ 5810 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ 5811 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ 5812 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ 5813#endif 5814 "\t.align " BSZPTR "\n" 5815 "LEFDE%d:\n\n", 5816 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); 5817 } 5818#if LJ_HASFFI 5819 if (fcsize) { 5820 fprintf(ctx->fp, 5821 "EH_frame2:\n" 5822 "\t.set L$set$y,LECIEY-LSCIEY\n" 5823 "\t.long L$set$y\n" 5824 "LSCIEY:\n" 5825 "\t.long 0\n" 5826 "\t.byte 0x1\n" 5827 "\t.ascii \"zR\\0\"\n" 5828 "\t.byte 0x1\n" 5829 "\t.byte 128-" SZPTR "\n" 5830 "\t.byte " REG_RA "\n" 5831 "\t.byte 1\n" /* augmentation length */ 5832#if LJ_64 5833 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5834 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" 5835#else 5836 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5837 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ 5838#endif 5839 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" 5840 "\t.align " BSZPTR "\n" 5841 "LECIEY:\n\n"); 5842 fprintf(ctx->fp, 5843 "_lj_vm_ffi_call.eh:\n" 5844 "LSFDEY:\n" 5845 "\t.set L$set$yy,LEFDEY-LASFDEY\n" 5846 "\t.long L$set$yy\n" 5847 "LASFDEY:\n" 5848 "\t.long LASFDEY-EH_frame2\n" 5849 "\t.long _lj_vm_ffi_call-.\n" 5850 "\t.long %d\n" 5851 "\t.byte 0\n" /* augmentation length */ 5852#if LJ_64 5853 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ 5854 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 5855 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ 5856 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 5857#else 5858 "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ 5859 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ 5860 "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ 5861 "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ 5862#endif 5863 "\t.align " BSZPTR "\n" 5864 "LEFDEY:\n\n", fcsize); 5865 } 5866#endif 5867#if !LJ_64 5868 fprintf(ctx->fp, 5869 "\t.non_lazy_symbol_pointer\n" 5870 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5871 ".indirect_symbol _lj_err_unwind_dwarf\n" 5872 ".long 0\n\n"); 5873 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); 5874 { 5875 const char *const *xn; 5876 for (xn = ctx->extnames; *xn; xn++) 5877 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) 5878 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); 5879 } 5880#endif 5881 fprintf(ctx->fp, ".subsections_via_symbols\n"); 5882 } 5883 break; 5884#endif 5885 default: /* Difficult for other modes. */ 5886 break; 5887 } 5888} 5889 5890