1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. 2|// Bytecode interpreter, fast functions and helper functions. 3|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h 4| 5|.arch ppc 6|.section code_op, code_sub 7| 8|.actionlist build_actionlist 9|.globals GLOB_ 10|.globalnames globnames 11|.externnames extnames 12| 13|// Note: The ragged indentation of the instructions is intentional. 14|// The starting columns indicate data dependencies. 15| 16|//----------------------------------------------------------------------- 17| 18|// DynASM defines used by the PPC port: 19|// 20|// P64 64 bit pointers (only for GPR64 testing). 21|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. 22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 25|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). 26|// Function pointers are really a struct: code, TOC, env (optional). 27|// TOCENV Function pointers have an environment pointer, too (not on PS3). 28|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). 29|// Must avoid (slow) micro-coded instructions. 30| 31|.if P64 32|.define TOC, 1 33|.define TOCENV, 1 34|.macro lpx, a, b, c; ldx a, b, c; .endmacro 35|.macro lp, a, b; ld a, b; .endmacro 36|.macro stp, a, b; std a, b; .endmacro 37|.define decode_OPP, decode_OP8 38|.if FFI 39|// Missing: Calling conventions, 64 bit regs, TOC. 40|.error lib_ffi not yet implemented for PPC64 41|.endif 42|.else 43|.macro lpx, a, b, c; lwzx a, b, c; .endmacro 44|.macro lp, a, b; lwz a, b; .endmacro 45|.macro stp, a, b; stw a, b; .endmacro 46|.define decode_OPP, decode_OP4 47|.endif 48| 49|// Convenience macros for TOC handling. 50|.if TOC 51|// Linker needs a TOC patch area for every external call relocation. 52|.macro blex, target; bl extern target@plt; nop; .endmacro 53|.macro .toc, a, b; a, b; .endmacro 54|.if P64 55|.define TOC_OFS, 8 56|.define ENV_OFS, 16 57|.else 58|.define TOC_OFS, 4 59|.define ENV_OFS, 8 60|.endif 61|.else // No TOC. 62|.macro blex, target; bl extern target@plt; .endmacro 63|.macro .toc, a, b; .endmacro 64|.endif 65|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro 66| 67|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro 68| 69|.macro andix., y, a, i 70|.if PPE 71| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) 72| cmpwi y, 0 73|.else 74| andi. y, a, i 75|.endif 76|.endmacro 77| 78|.macro clrso, reg 79|.if PPE 80| li reg, 0 81| mtxer reg 82|.else 83| mcrxr cr0 84|.endif 85|.endmacro 86| 87|.macro checkov, reg, noov 88|.if PPE 89| mfxer reg 90| add reg, reg, reg 91| cmpwi reg, 0 92| li reg, 0 93| mtxer reg 94| bgey noov 95|.else 96| mcrxr cr0 97| bley noov 98|.endif 99|.endmacro 100| 101|//----------------------------------------------------------------------- 102| 103|// Fixed register assignments for the interpreter. 104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 105| 106|// The following must be C callee-save (but BASE is often refetched). 107|.define BASE, r14 // Base of current Lua stack frame. 108|.define KBASE, r15 // Constants of current Lua function. 109|.define PC, r16 // Next PC. 110|.define DISPATCH, r17 // Opcode dispatch table. 111|.define LREG, r18 // Register holding lua_State (also in SAVE_L). 112|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. 113|.define JGL, r31 // On-trace: global_State + 32768. 114| 115|// Constants for type-comparisons, stores and conversions. C callee-save. 116|.define TISNUM, r22 117|.define TISNIL, r23 118|.define ZERO, r24 119|.define TOBIT, f30 // 2^52 + 2^51. 120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 121| 122|// The following temporaries are not saved across C calls, except for RA. 123|.define RA, r20 // Callee-save. 124|.define RB, r10 125|.define RC, r11 126|.define RD, r12 127|.define INS, r7 // Overlaps CARG5. 128| 129|.define TMP0, r0 130|.define TMP1, r8 131|.define TMP2, r9 132|.define TMP3, r6 // Overlaps CARG4. 133| 134|// Saved temporaries. 135|.define SAVE0, r21 136| 137|// Calling conventions. 138|.define CARG1, r3 139|.define CARG2, r4 140|.define CARG3, r5 141|.define CARG4, r6 // Overlaps TMP3. 142|.define CARG5, r7 // Overlaps INS. 143| 144|.define FARG1, f1 145|.define FARG2, f2 146| 147|.define CRET1, r3 148|.define CRET2, r4 149| 150|.define TOCREG, r2 // TOC register (only used by C code). 151|.define ENVREG, r11 // Environment pointer (nested C functions). 152| 153|// Stack layout while in interpreter. Must match with lj_frame.h. 154|.if GPR64 155|.if FRAME32 156| 157|// 456(sp) // \ 32/64 bit C frame info 158|.define TONUM_LO, 452(sp) // | 159|.define TONUM_HI, 448(sp) // | 160|.define TMPD_LO, 444(sp) // | 161|.define TMPD_HI, 440(sp) // | 162|.define SAVE_CR, 432(sp) // | 64 bit CR save. 163|.define SAVE_ERRF, 424(sp) // > Parameter save area. 164|.define SAVE_NRES, 420(sp) // | 165|.define SAVE_L, 416(sp) // | 166|.define SAVE_PC, 412(sp) // | 167|.define SAVE_MULTRES, 408(sp) // | 168|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. 169|// 392(sp) // Reserved. 170|.define CFRAME_SPACE, 384 // Delta for sp. 171|// Back chain for sp: 384(sp) <-- sp entering interpreter 172|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. 173|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. 174|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. 175|// 80(sp) // Needed for 16 byte stack frame alignment. 176|// 16(sp) // Callee parameter save area (ABI mandated). 177|// 8(sp) // Reserved 178|// Back chain for sp: 0(sp) <-- sp while in interpreter 179|// 32 bit sp stored in hi-part of 0(sp). 180| 181|.define TMPD_BLO, 447(sp) 182|.define TMPD, TMPD_HI 183|.define TONUM_D, TONUM_HI 184| 185|.else 186| 187|// 508(sp) // \ 32 bit C frame info. 188|.define SAVE_ERRF, 472(sp) // | 189|.define SAVE_NRES, 468(sp) // | 190|.define SAVE_L, 464(sp) // > Parameter save area. 191|.define SAVE_PC, 460(sp) // | 192|.define SAVE_MULTRES, 456(sp) // | 193|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. 194|.define SAVE_LR, 416(sp) 195|.define CFRAME_SPACE, 400 // Delta for sp. 196|// Back chain for sp: 400(sp) <-- sp entering interpreter 197|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. 198|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. 199|// 48(sp) // Callee parameter save area (ABI mandated). 200|.define SAVE_TOC, 40(sp) // TOC save area. 201|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). 202|.define TMPD_HI, 32(sp) // / 203|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). 204|.define TONUM_HI, 24(sp) // / 205|// Next frame lr: 16(sp) 206|.define SAVE_CR, 8(sp) // 64 bit CR save. 207|// Back chain for sp: 0(sp) <-- sp while in interpreter 208| 209|.define TMPD_BLO, 39(sp) 210|.define TMPD, TMPD_HI 211|.define TONUM_D, TONUM_HI 212| 213|.endif 214|.else 215| 216|.define SAVE_LR, 276(sp) 217|.define CFRAME_SPACE, 272 // Delta for sp. 218|// Back chain for sp: 272(sp) <-- sp entering interpreter 219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 221|.define SAVE_CR, 52(sp) // 32 bit CR save. 222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 223|.define SAVE_NRES, 44(sp) 224|.define SAVE_CFRAME, 40(sp) 225|.define SAVE_L, 36(sp) 226|.define SAVE_PC, 32(sp) 227|.define SAVE_MULTRES, 28(sp) 228|.define UNUSED1, 24(sp) 229|.define TMPD_LO, 20(sp) 230|.define TMPD_HI, 16(sp) 231|.define TONUM_LO, 12(sp) 232|.define TONUM_HI, 8(sp) 233|// Next frame lr: 4(sp) 234|// Back chain for sp: 0(sp) <-- sp while in interpreter 235| 236|.define TMPD_BLO, 23(sp) 237|.define TMPD, TMPD_HI 238|.define TONUM_D, TONUM_HI 239| 240|.endif 241| 242|.macro save_, reg 243|.if GPR64 244| std r..reg, SAVE_GPR_+(reg-14)*8(sp) 245|.else 246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 247|.endif 248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 249|.endmacro 250|.macro rest_, reg 251|.if GPR64 252| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) 253|.else 254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 255|.endif 256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 257|.endmacro 258| 259|.macro saveregs 260|.if GPR64 and not FRAME32 261| stdu sp, -CFRAME_SPACE(sp) 262|.else 263| stwu sp, -CFRAME_SPACE(sp) 264|.endif 265| save_ 14; save_ 15; save_ 16 266| mflr r0 267| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 268|.if GPR64 and not FRAME32 269| std r0, SAVE_LR 270|.else 271| stw r0, SAVE_LR 272|.endif 273| save_ 23; save_ 24; save_ 25 274| mfcr r0 275| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 276|.if GPR64 277| std r0, SAVE_CR 278|.else 279| stw r0, SAVE_CR 280|.endif 281| .toc std TOCREG, SAVE_TOC 282|.endmacro 283| 284|.macro restoreregs 285|.if GPR64 and not FRAME32 286| ld r0, SAVE_LR 287|.else 288| lwz r0, SAVE_LR 289|.endif 290|.if GPR64 291| ld r12, SAVE_CR 292|.else 293| lwz r12, SAVE_CR 294|.endif 295| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 296| mtlr r0; 297|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif 298| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 299|.if PPE; mtocrf 0x10, r12; .endif 300| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 301|.if PPE; mtocrf 0x08, r12; .endif 302| addi sp, sp, CFRAME_SPACE 303|.endmacro 304| 305|// Type definitions. Some of these are only used for documentation. 306|.type L, lua_State, LREG 307|.type GL, global_State 308|.type TVALUE, TValue 309|.type GCOBJ, GCobj 310|.type STR, GCstr 311|.type TAB, GCtab 312|.type LFUNC, GCfuncL 313|.type CFUNC, GCfuncC 314|.type PROTO, GCproto 315|.type UPVAL, GCupval 316|.type NODE, Node 317|.type NARGS8, int 318|.type TRACE, GCtrace 319|.type SBUF, SBuf 320| 321|//----------------------------------------------------------------------- 322| 323|// Trap for not-yet-implemented parts. 324|.macro NYI; tw 4, sp, sp; .endmacro 325| 326|// int/FP conversions. 327|.macro tonum_i, freg, reg 328| xoris reg, reg, 0x8000 329| stw reg, TONUM_LO 330| lfd freg, TONUM_D 331| fsub freg, freg, TONUM 332|.endmacro 333| 334|.macro tonum_u, freg, reg 335| stw reg, TONUM_LO 336| lfd freg, TONUM_D 337| fsub freg, freg, TOBIT 338|.endmacro 339| 340|.macro toint, reg, freg, tmpfreg 341| fctiwz tmpfreg, freg 342| stfd tmpfreg, TMPD 343| lwz reg, TMPD_LO 344|.endmacro 345| 346|.macro toint, reg, freg 347| toint reg, freg, freg 348|.endmacro 349| 350|//----------------------------------------------------------------------- 351| 352|// Access to frame relative to BASE. 353|.define FRAME_PC, -8 354|.define FRAME_FUNC, -4 355| 356|// Instruction decode. 357|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro 358|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro 359|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro 360|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro 361|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro 362|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro 363| 364|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro 365|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro 366| 367|// Instruction fetch. 368|.macro ins_NEXT1 369| lwz INS, 0(PC) 370| addi PC, PC, 4 371|.endmacro 372|// Instruction decode+dispatch. Note: optimized for e300! 373|.macro ins_NEXT2 374| decode_OPP TMP1, INS 375| lpx TMP0, DISPATCH, TMP1 376| mtctr TMP0 377| decode_RB8 RB, INS 378| decode_RD8 RD, INS 379| decode_RA8 RA, INS 380| decode_RC8 RC, INS 381| bctr 382|.endmacro 383|.macro ins_NEXT 384| ins_NEXT1 385| ins_NEXT2 386|.endmacro 387| 388|// Instruction footer. 389|.if 1 390| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 391| .define ins_next, ins_NEXT 392| .define ins_next_, ins_NEXT 393| .define ins_next1, ins_NEXT1 394| .define ins_next2, ins_NEXT2 395|.else 396| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 397| // Affects only certain kinds of benchmarks (and only with -j off). 398| .macro ins_next 399| b ->ins_next 400| .endmacro 401| .macro ins_next1 402| .endmacro 403| .macro ins_next2 404| b ->ins_next 405| .endmacro 406| .macro ins_next_ 407| ->ins_next: 408| ins_NEXT 409| .endmacro 410|.endif 411| 412|// Call decode and dispatch. 413|.macro ins_callt 414| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 415| lwz PC, LFUNC:RB->pc 416| lwz INS, 0(PC) 417| addi PC, PC, 4 418| decode_OPP TMP1, INS 419| decode_RA8 RA, INS 420| lpx TMP0, DISPATCH, TMP1 421| add RA, RA, BASE 422| mtctr TMP0 423| bctr 424|.endmacro 425| 426|.macro ins_call 427| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC 428| stw PC, FRAME_PC(BASE) 429| ins_callt 430|.endmacro 431| 432|//----------------------------------------------------------------------- 433| 434|// Macros to test operand types. 435|.macro checknum, reg; cmplw reg, TISNUM; .endmacro 436|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro 437|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro 438|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro 439|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro 440|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro 441| 442|.macro branch_RD 443| srwi TMP0, RD, 1 444| addis PC, PC, -(BCBIAS_J*4 >> 16) 445| add PC, PC, TMP0 446|.endmacro 447| 448|// Assumes DISPATCH is relative to GL. 449#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 450#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 451| 452#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 453| 454|.macro hotcheck, delta, target 455| rlwinm TMP1, PC, 31, 25, 30 456| addi TMP1, TMP1, GG_DISP2HOT 457| lhzx TMP2, DISPATCH, TMP1 458| addic. TMP2, TMP2, -delta 459| sthx TMP2, DISPATCH, TMP1 460| blt target 461|.endmacro 462| 463|.macro hotloop 464| hotcheck HOTCOUNT_LOOP, ->vm_hotloop 465|.endmacro 466| 467|.macro hotcall 468| hotcheck HOTCOUNT_CALL, ->vm_hotcall 469|.endmacro 470| 471|// Set current VM state. Uses TMP0. 472|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro 473|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro 474| 475|// Move table write barrier back. Overwrites mark and tmp. 476|.macro barrierback, tab, mark, tmp 477| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) 478| // Assumes LJ_GC_BLACK is 0x04. 479| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) 480| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) 481| stb mark, tab->marked 482| stw tmp, tab->gclist 483|.endmacro 484| 485|//----------------------------------------------------------------------- 486 487/* Generate subroutines used by opcodes and other parts of the VM. */ 488/* The .code_sub section should be last to help static branch prediction. */ 489static void build_subroutines(BuildCtx *ctx) 490{ 491 |.code_sub 492 | 493 |//----------------------------------------------------------------------- 494 |//-- Return handling ---------------------------------------------------- 495 |//----------------------------------------------------------------------- 496 | 497 |->vm_returnp: 498 | // See vm_return. Also: TMP2 = previous base. 499 | andix. TMP0, PC, FRAME_P 500 | li TMP1, LJ_TTRUE 501 | beq ->cont_dispatch 502 | 503 | // Return from pcall or xpcall fast func. 504 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. 505 | mr BASE, TMP2 // Restore caller base. 506 | // Prepending may overwrite the pcall frame, so do it at the end. 507 | stwu TMP1, FRAME_PC(RA) // Prepend true to results. 508 | 509 |->vm_returnc: 510 | addi RD, RD, 8 // RD = (nresults+1)*8. 511 | andix. TMP0, PC, FRAME_TYPE 512 | cmpwi cr1, RD, 0 513 | li CRET1, LUA_YIELD 514 | beq cr1, ->vm_unwind_c_eh 515 | mr MULTRES, RD 516 | beq ->BC_RET_Z // Handle regular return to Lua. 517 | 518 |->vm_return: 519 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return 520 | // TMP0 = PC & FRAME_TYPE 521 | cmpwi TMP0, FRAME_C 522 | rlwinm TMP2, PC, 0, 0, 28 523 | li_vmstate C 524 | sub TMP2, BASE, TMP2 // TMP2 = previous base. 525 | bney ->vm_returnp 526 | 527 | addic. TMP1, RD, -8 528 | stp TMP2, L->base 529 | lwz TMP2, SAVE_NRES 530 | subi BASE, BASE, 8 531 | st_vmstate 532 | slwi TMP2, TMP2, 3 533 | beq >2 534 |1: 535 | addic. TMP1, TMP1, -8 536 | lfd f0, 0(RA) 537 | addi RA, RA, 8 538 | stfd f0, 0(BASE) 539 | addi BASE, BASE, 8 540 | bney <1 541 | 542 |2: 543 | cmpw TMP2, RD // More/less results wanted? 544 | bne >6 545 |3: 546 | stp BASE, L->top // Store new top. 547 | 548 |->vm_leave_cp: 549 | lp TMP0, SAVE_CFRAME // Restore previous C frame. 550 | li CRET1, 0 // Ok return status for vm_pcall. 551 | stp TMP0, L->cframe 552 | 553 |->vm_leave_unw: 554 | restoreregs 555 | blr 556 | 557 |6: 558 | ble >7 // Less results wanted? 559 | // More results wanted. Check stack size and fill up results with nil. 560 | lwz TMP1, L->maxstack 561 | cmplw BASE, TMP1 562 | bge >8 563 | stw TISNIL, 0(BASE) 564 | addi RD, RD, 8 565 | addi BASE, BASE, 8 566 | b <2 567 | 568 |7: // Less results wanted. 569 | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? 570 | sub TMP0, RD, TMP2 571 | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 572 | and TMP0, TMP0, TMP1 573 | sub BASE, BASE, TMP0 // Either keep top or shrink it. 574 | b <3 575 | 576 |8: // Corner case: need to grow stack for filling up results. 577 | // This can happen if: 578 | // - A C function grows the stack (a lot). 579 | // - The GC shrinks the stack in between. 580 | // - A return back from a lua_call() with (high) nresults adjustment. 581 | stp BASE, L->top // Save current top held in BASE (yes). 582 | mr SAVE0, RD 583 | srwi CARG2, TMP2, 3 584 | mr CARG1, L 585 | bl extern lj_state_growstack // (lua_State *L, int n) 586 | lwz TMP2, SAVE_NRES 587 | mr RD, SAVE0 588 | slwi TMP2, TMP2, 3 589 | lp BASE, L->top // Need the (realloced) L->top in BASE. 590 | b <2 591 | 592 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 593 | // (void *cframe, int errcode) 594 | mr sp, CARG1 595 | mr CRET1, CARG2 596 |->vm_unwind_c_eh: // Landing pad for external unwinder. 597 | lwz L, SAVE_L 598 | .toc ld TOCREG, SAVE_TOC 599 | li TMP0, ~LJ_VMST_C 600 | lwz GL:TMP1, L->glref 601 | stw TMP0, GL:TMP1->vmstate 602 | b ->vm_leave_unw 603 | 604 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 605 | // (void *cframe) 606 |.if GPR64 607 | rldicr sp, CARG1, 0, 61 608 |.else 609 | rlwinm sp, CARG1, 0, 0, 29 610 |.endif 611 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 612 | lwz L, SAVE_L 613 | .toc ld TOCREG, SAVE_TOC 614 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 615 | lp BASE, L->base 616 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 617 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 618 | li ZERO, 0 619 | stw TMP3, TMPD 620 | li TMP1, LJ_TFALSE 621 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 622 | li TISNIL, LJ_TNIL 623 | li_vmstate INTERP 624 | lfs TOBIT, TMPD 625 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 626 | la RA, -8(BASE) // Results start at BASE-8. 627 | stw TMP3, TMPD 628 | addi DISPATCH, DISPATCH, GG_G2DISP 629 | stw TMP1, 0(RA) // Prepend false to error message. 630 | li RD, 16 // 2 results: false + error message. 631 | st_vmstate 632 | lfs TONUM, TMPD 633 | b ->vm_returnc 634 | 635 |//----------------------------------------------------------------------- 636 |//-- Grow stack for calls ----------------------------------------------- 637 |//----------------------------------------------------------------------- 638 | 639 |->vm_growstack_c: // Grow stack for C function. 640 | li CARG2, LUA_MINSTACK 641 | b >2 642 | 643 |->vm_growstack_l: // Grow stack for Lua function. 644 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC 645 | add RC, BASE, RC 646 | sub RA, RA, BASE 647 | stp BASE, L->base 648 | addi PC, PC, 4 // Must point after first instruction. 649 | stp RC, L->top 650 | srwi CARG2, RA, 3 651 |2: 652 | // L->base = new base, L->top = top 653 | stw PC, SAVE_PC 654 | mr CARG1, L 655 | bl extern lj_state_growstack // (lua_State *L, int n) 656 | lp BASE, L->base 657 | lp RC, L->top 658 | lwz LFUNC:RB, FRAME_FUNC(BASE) 659 | sub RC, RC, BASE 660 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 661 | ins_callt // Just retry the call. 662 | 663 |//----------------------------------------------------------------------- 664 |//-- Entry points into the assembler VM --------------------------------- 665 |//----------------------------------------------------------------------- 666 | 667 |->vm_resume: // Setup C frame and resume thread. 668 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 669 | saveregs 670 | mr L, CARG1 671 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 672 | mr BASE, CARG2 673 | lbz TMP1, L->status 674 | stw L, SAVE_L 675 | li PC, FRAME_CP 676 | addi TMP0, sp, CFRAME_RESUME 677 | addi DISPATCH, DISPATCH, GG_G2DISP 678 | stw CARG3, SAVE_NRES 679 | cmplwi TMP1, 0 680 | stw CARG3, SAVE_ERRF 681 | stp CARG3, SAVE_CFRAME 682 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 683 | stp TMP0, L->cframe 684 | beq >3 685 | 686 | // Resume after yield (like a return). 687 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 688 | mr RA, BASE 689 | lp BASE, L->base 690 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 691 | lp TMP1, L->top 692 | lwz PC, FRAME_PC(BASE) 693 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 694 | stb CARG3, L->status 695 | stw TMP3, TMPD 696 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 697 | lfs TOBIT, TMPD 698 | sub RD, TMP1, BASE 699 | stw TMP3, TMPD 700 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 701 | addi RD, RD, 8 702 | stw TMP0, TONUM_HI 703 | li_vmstate INTERP 704 | li ZERO, 0 705 | st_vmstate 706 | andix. TMP0, PC, FRAME_TYPE 707 | mr MULTRES, RD 708 | lfs TONUM, TMPD 709 | li TISNIL, LJ_TNIL 710 | beq ->BC_RET_Z 711 | b ->vm_return 712 | 713 |->vm_pcall: // Setup protected C frame and enter VM. 714 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 715 | saveregs 716 | li PC, FRAME_CP 717 | stw CARG4, SAVE_ERRF 718 | b >1 719 | 720 |->vm_call: // Setup C frame and enter VM. 721 | // (lua_State *L, TValue *base, int nres1) 722 | saveregs 723 | li PC, FRAME_C 724 | 725 |1: // Entry point for vm_pcall above (PC = ftype). 726 | lp TMP1, L:CARG1->cframe 727 | mr L, CARG1 728 | stw CARG3, SAVE_NRES 729 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 730 | stw CARG1, SAVE_L 731 | mr BASE, CARG2 732 | addi DISPATCH, DISPATCH, GG_G2DISP 733 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 734 | stp TMP1, SAVE_CFRAME 735 | stp sp, L->cframe // Add our C frame to cframe chain. 736 | 737 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 738 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 739 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 740 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 741 | lp TMP1, L->top 742 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 743 | add PC, PC, BASE 744 | stw TMP3, TMPD 745 | li ZERO, 0 746 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 747 | lfs TOBIT, TMPD 748 | sub PC, PC, TMP2 // PC = frame delta + frame type 749 | stw TMP3, TMPD 750 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 751 | sub NARGS8:RC, TMP1, BASE 752 | stw TMP0, TONUM_HI 753 | li_vmstate INTERP 754 | lfs TONUM, TMPD 755 | li TISNIL, LJ_TNIL 756 | st_vmstate 757 | 758 |->vm_call_dispatch: 759 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC 760 | lwz TMP0, FRAME_PC(BASE) 761 | lwz LFUNC:RB, FRAME_FUNC(BASE) 762 | checkfunc TMP0; bne ->vmeta_call 763 | 764 |->vm_call_dispatch_f: 765 | ins_call 766 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC 767 | 768 |->vm_cpcall: // Setup protected C frame, call C. 769 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 770 | saveregs 771 | mr L, CARG1 772 | lwz TMP0, L:CARG1->stack 773 | stw CARG1, SAVE_L 774 | lp TMP1, L->top 775 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 777 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 778 | lp TMP1, L->cframe 779 | addi DISPATCH, DISPATCH, GG_G2DISP 780 | .toc lp CARG4, 0(CARG4) 781 | li TMP2, 0 782 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 783 | stw TMP2, SAVE_ERRF // No error function. 784 | stp TMP1, SAVE_CFRAME 785 | stp sp, L->cframe // Add our C frame to cframe chain. 786 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 787 | mtctr CARG4 788 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 789 |.if PPE 790 | mr BASE, CRET1 791 | cmpwi CRET1, 0 792 |.else 793 | mr. BASE, CRET1 794 |.endif 795 | li PC, FRAME_CP 796 | bne <3 // Else continue with the call. 797 | b ->vm_leave_cp // No base? Just remove C frame. 798 | 799 |//----------------------------------------------------------------------- 800 |//-- Metamethod handling ------------------------------------------------ 801 |//----------------------------------------------------------------------- 802 | 803 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the 804 |// stack, so BASE doesn't need to be reloaded across these calls. 805 | 806 |//-- Continuation dispatch ---------------------------------------------- 807 | 808 |->cont_dispatch: 809 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 810 | lwz TMP0, -12(BASE) // Continuation. 811 | mr RB, BASE 812 | mr BASE, TMP2 // Restore caller BASE. 813 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) 814 |.if FFI 815 | cmplwi TMP0, 1 816 |.endif 817 | lwz PC, -16(RB) // Restore PC from [cont|PC]. 818 | subi TMP2, RD, 8 819 | lwz TMP1, LFUNC:TMP1->pc 820 | stwx TISNIL, RA, TMP2 // Ensure one valid arg. 821 |.if FFI 822 | ble >1 823 |.endif 824 | lwz KBASE, PC2PROTO(k)(TMP1) 825 | // BASE = base, RA = resultptr, RB = meta base 826 | mtctr TMP0 827 | bctr // Jump to continuation. 828 | 829 |.if FFI 830 |1: 831 | beq ->cont_ffi_callback // cont = 1: return from FFI callback. 832 | // cont = 0: tailcall from C function. 833 | subi TMP1, RB, 16 834 | sub RC, TMP1, BASE 835 | b ->vm_call_tail 836 |.endif 837 | 838 |->cont_cat: // RA = resultptr, RB = meta base 839 | lwz INS, -4(PC) 840 | subi CARG2, RB, 16 841 | decode_RB8 SAVE0, INS 842 | lfd f0, 0(RA) 843 | add TMP1, BASE, SAVE0 844 | stp BASE, L->base 845 | cmplw TMP1, CARG2 846 | sub CARG3, CARG2, TMP1 847 | decode_RA8 RA, INS 848 | stfd f0, 0(CARG2) 849 | bney ->BC_CAT_Z 850 | stfdx f0, BASE, RA 851 | b ->cont_nop 852 | 853 |//-- Table indexing metamethods ----------------------------------------- 854 | 855 |->vmeta_tgets1: 856 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 857 | li TMP0, LJ_TSTR 858 | decode_RB8 RB, INS 859 | stw STR:RC, 4(CARG3) 860 | add CARG2, BASE, RB 861 | stw TMP0, 0(CARG3) 862 | b >1 863 | 864 |->vmeta_tgets: 865 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 866 | li TMP0, LJ_TTAB 867 | stw TAB:RB, 4(CARG2) 868 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 869 | stw TMP0, 0(CARG2) 870 | li TMP1, LJ_TSTR 871 | stw STR:RC, 4(CARG3) 872 | stw TMP1, 0(CARG3) 873 | b >1 874 | 875 |->vmeta_tgetb: // TMP0 = index 876 |.if not DUALNUM 877 | tonum_u f0, TMP0 878 |.endif 879 | decode_RB8 RB, INS 880 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 881 | add CARG2, BASE, RB 882 |.if DUALNUM 883 | stw TISNUM, 0(CARG3) 884 | stw TMP0, 4(CARG3) 885 |.else 886 | stfd f0, 0(CARG3) 887 |.endif 888 | b >1 889 | 890 |->vmeta_tgetv: 891 | decode_RB8 RB, INS 892 | decode_RC8 RC, INS 893 | add CARG2, BASE, RB 894 | add CARG3, BASE, RC 895 |1: 896 | stp BASE, L->base 897 | mr CARG1, L 898 | stw PC, SAVE_PC 899 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 900 | // Returns TValue * (finished) or NULL (metamethod). 901 | cmplwi CRET1, 0 902 | beq >3 903 | lfd f0, 0(CRET1) 904 | ins_next1 905 | stfdx f0, BASE, RA 906 | ins_next2 907 | 908 |3: // Call __index metamethod. 909 | // BASE = base, L->top = new base, stack = cont/func/t/k 910 | subfic TMP1, BASE, FRAME_CONT 911 | lp BASE, L->top 912 | stw PC, -16(BASE) // [cont|PC] 913 | add PC, TMP1, BASE 914 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 915 | li NARGS8:RC, 16 // 2 args for func(t, k). 916 | b ->vm_call_dispatch_f 917 | 918 |->vmeta_tgetr: 919 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 920 | // Returns cTValue * or NULL. 921 | cmplwi CRET1, 0 922 | beq >1 923 | lfd f14, 0(CRET1) 924 | b ->BC_TGETR_Z 925 |1: 926 | stwx TISNIL, BASE, RA 927 | b ->cont_nop 928 | 929 |//----------------------------------------------------------------------- 930 | 931 |->vmeta_tsets1: 932 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 933 | li TMP0, LJ_TSTR 934 | decode_RB8 RB, INS 935 | stw STR:RC, 4(CARG3) 936 | add CARG2, BASE, RB 937 | stw TMP0, 0(CARG3) 938 | b >1 939 | 940 |->vmeta_tsets: 941 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 942 | li TMP0, LJ_TTAB 943 | stw TAB:RB, 4(CARG2) 944 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 945 | stw TMP0, 0(CARG2) 946 | li TMP1, LJ_TSTR 947 | stw STR:RC, 4(CARG3) 948 | stw TMP1, 0(CARG3) 949 | b >1 950 | 951 |->vmeta_tsetb: // TMP0 = index 952 |.if not DUALNUM 953 | tonum_u f0, TMP0 954 |.endif 955 | decode_RB8 RB, INS 956 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 957 | add CARG2, BASE, RB 958 |.if DUALNUM 959 | stw TISNUM, 0(CARG3) 960 | stw TMP0, 4(CARG3) 961 |.else 962 | stfd f0, 0(CARG3) 963 |.endif 964 | b >1 965 | 966 |->vmeta_tsetv: 967 | decode_RB8 RB, INS 968 | decode_RC8 RC, INS 969 | add CARG2, BASE, RB 970 | add CARG3, BASE, RC 971 |1: 972 | stp BASE, L->base 973 | mr CARG1, L 974 | stw PC, SAVE_PC 975 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 976 | // Returns TValue * (finished) or NULL (metamethod). 977 | cmplwi CRET1, 0 978 | lfdx f0, BASE, RA 979 | beq >3 980 | // NOBARRIER: lj_meta_tset ensures the table is not black. 981 | ins_next1 982 | stfd f0, 0(CRET1) 983 | ins_next2 984 | 985 |3: // Call __newindex metamethod. 986 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 987 | subfic TMP1, BASE, FRAME_CONT 988 | lp BASE, L->top 989 | stw PC, -16(BASE) // [cont|PC] 990 | add PC, TMP1, BASE 991 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 992 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 993 | stfd f0, 16(BASE) // Copy value to third argument. 994 | b ->vm_call_dispatch_f 995 | 996 |->vmeta_tsetr: 997 | stp BASE, L->base 998 | stw PC, SAVE_PC 999 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 1000 | // Returns TValue *. 1001 | stfd f14, 0(CRET1) 1002 | b ->cont_nop 1003 | 1004 |//-- Comparison metamethods --------------------------------------------- 1005 | 1006 |->vmeta_comp: 1007 | mr CARG1, L 1008 | subi PC, PC, 4 1009 |.if DUALNUM 1010 | mr CARG2, RA 1011 |.else 1012 | add CARG2, BASE, RA 1013 |.endif 1014 | stw PC, SAVE_PC 1015 |.if DUALNUM 1016 | mr CARG3, RD 1017 |.else 1018 | add CARG3, BASE, RD 1019 |.endif 1020 | stp BASE, L->base 1021 | decode_OP1 CARG4, INS 1022 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 1023 | // Returns 0/1 or TValue * (metamethod). 1024 |3: 1025 | cmplwi CRET1, 1 1026 | bgt ->vmeta_binop 1027 | subfic CRET1, CRET1, 0 1028 |4: 1029 | lwz INS, 0(PC) 1030 | addi PC, PC, 4 1031 | decode_RD4 TMP2, INS 1032 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 1033 | and TMP2, TMP2, CRET1 1034 | add PC, PC, TMP2 1035 |->cont_nop: 1036 | ins_next 1037 | 1038 |->cont_ra: // RA = resultptr 1039 | lwz INS, -4(PC) 1040 | lfd f0, 0(RA) 1041 | decode_RA8 TMP1, INS 1042 | stfdx f0, BASE, TMP1 1043 | b ->cont_nop 1044 | 1045 |->cont_condt: // RA = resultptr 1046 | lwz TMP0, 0(RA) 1047 | .gpr64 extsw TMP0, TMP0 1048 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. 1049 | subfe CRET1, CRET1, CRET1 1050 | not CRET1, CRET1 1051 | b <4 1052 | 1053 |->cont_condf: // RA = resultptr 1054 | lwz TMP0, 0(RA) 1055 | .gpr64 extsw TMP0, TMP0 1056 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. 1057 | subfe CRET1, CRET1, CRET1 1058 | b <4 1059 | 1060 |->vmeta_equal: 1061 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 1062 | subi PC, PC, 4 1063 | stp BASE, L->base 1064 | mr CARG1, L 1065 | stw PC, SAVE_PC 1066 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1067 | // Returns 0/1 or TValue * (metamethod). 1068 | b <3 1069 | 1070 |->vmeta_equal_cd: 1071 |.if FFI 1072 | mr CARG2, INS 1073 | subi PC, PC, 4 1074 | stp BASE, L->base 1075 | mr CARG1, L 1076 | stw PC, SAVE_PC 1077 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) 1078 | // Returns 0/1 or TValue * (metamethod). 1079 | b <3 1080 |.endif 1081 | 1082 |->vmeta_istype: 1083 | subi PC, PC, 4 1084 | stp BASE, L->base 1085 | srwi CARG2, RA, 3 1086 | mr CARG1, L 1087 | srwi CARG3, RD, 3 1088 | stw PC, SAVE_PC 1089 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 1090 | b ->cont_nop 1091 | 1092 |//-- Arithmetic metamethods --------------------------------------------- 1093 | 1094 |->vmeta_arith_nv: 1095 | add CARG3, KBASE, RC 1096 | add CARG4, BASE, RB 1097 | b >1 1098 |->vmeta_arith_nv2: 1099 |.if DUALNUM 1100 | mr CARG3, RC 1101 | mr CARG4, RB 1102 | b >1 1103 |.endif 1104 | 1105 |->vmeta_unm: 1106 | mr CARG3, RD 1107 | mr CARG4, RD 1108 | b >1 1109 | 1110 |->vmeta_arith_vn: 1111 | add CARG3, BASE, RB 1112 | add CARG4, KBASE, RC 1113 | b >1 1114 | 1115 |->vmeta_arith_vv: 1116 | add CARG3, BASE, RB 1117 | add CARG4, BASE, RC 1118 |.if DUALNUM 1119 | b >1 1120 |.endif 1121 |->vmeta_arith_vn2: 1122 |->vmeta_arith_vv2: 1123 |.if DUALNUM 1124 | mr CARG3, RB 1125 | mr CARG4, RC 1126 |.endif 1127 |1: 1128 | add CARG2, BASE, RA 1129 | stp BASE, L->base 1130 | mr CARG1, L 1131 | stw PC, SAVE_PC 1132 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. 1133 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1134 | // Returns NULL (finished) or TValue * (metamethod). 1135 | cmplwi CRET1, 0 1136 | beq ->cont_nop 1137 | 1138 | // Call metamethod for binary op. 1139 |->vmeta_binop: 1140 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 1141 | sub TMP1, CRET1, BASE 1142 | stw PC, -16(CRET1) // [cont|PC] 1143 | mr TMP2, BASE 1144 | addi PC, TMP1, FRAME_CONT 1145 | mr BASE, CRET1 1146 | li NARGS8:RC, 16 // 2 args for func(o1, o2). 1147 | b ->vm_call_dispatch 1148 | 1149 |->vmeta_len: 1150#if LJ_52 1151 | mr SAVE0, CARG1 1152#endif 1153 | mr CARG2, RD 1154 | stp BASE, L->base 1155 | mr CARG1, L 1156 | stw PC, SAVE_PC 1157 | bl extern lj_meta_len // (lua_State *L, TValue *o) 1158 | // Returns NULL (retry) or TValue * (metamethod base). 1159#if LJ_52 1160 | cmplwi CRET1, 0 1161 | bne ->vmeta_binop // Binop call for compatibility. 1162 | mr CARG1, SAVE0 1163 | b ->BC_LEN_Z 1164#else 1165 | b ->vmeta_binop // Binop call for compatibility. 1166#endif 1167 | 1168 |//-- Call metamethod ---------------------------------------------------- 1169 | 1170 |->vmeta_call: // Resolve and call __call metamethod. 1171 | // TMP2 = old base, BASE = new base, RC = nargs*8 1172 | mr CARG1, L 1173 | stp TMP2, L->base // This is the callers base! 1174 | subi CARG2, BASE, 8 1175 | stw PC, SAVE_PC 1176 | add CARG3, BASE, RC 1177 | mr SAVE0, NARGS8:RC 1178 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1179 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1180 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1181 | ins_call 1182 | 1183 |->vmeta_callt: // Resolve __call for BC_CALLT. 1184 | // BASE = old base, RA = new base, RC = nargs*8 1185 | mr CARG1, L 1186 | stp BASE, L->base 1187 | subi CARG2, RA, 8 1188 | stw PC, SAVE_PC 1189 | add CARG3, RA, RC 1190 | mr SAVE0, NARGS8:RC 1191 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1192 | lwz TMP1, FRAME_PC(BASE) 1193 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1194 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. 1195 | b ->BC_CALLT_Z 1196 | 1197 |//-- Argument coercion for 'for' statement ------------------------------ 1198 | 1199 |->vmeta_for: 1200 | mr CARG1, L 1201 | stp BASE, L->base 1202 | mr CARG2, RA 1203 | stw PC, SAVE_PC 1204 | mr SAVE0, INS 1205 | bl extern lj_meta_for // (lua_State *L, TValue *base) 1206 |.if JIT 1207 | decode_OP1 TMP0, SAVE0 1208 |.endif 1209 | decode_RA8 RA, SAVE0 1210 |.if JIT 1211 | cmpwi TMP0, BC_JFORI 1212 |.endif 1213 | decode_RD8 RD, SAVE0 1214 |.if JIT 1215 | beqy =>BC_JFORI 1216 |.endif 1217 | b =>BC_FORI 1218 | 1219 |//----------------------------------------------------------------------- 1220 |//-- Fast functions ----------------------------------------------------- 1221 |//----------------------------------------------------------------------- 1222 | 1223 |.macro .ffunc, name 1224 |->ff_ .. name: 1225 |.endmacro 1226 | 1227 |.macro .ffunc_1, name 1228 |->ff_ .. name: 1229 | cmplwi NARGS8:RC, 8 1230 | lwz CARG3, 0(BASE) 1231 | lwz CARG1, 4(BASE) 1232 | blt ->fff_fallback 1233 |.endmacro 1234 | 1235 |.macro .ffunc_2, name 1236 |->ff_ .. name: 1237 | cmplwi NARGS8:RC, 16 1238 | lwz CARG3, 0(BASE) 1239 | lwz CARG4, 8(BASE) 1240 | lwz CARG1, 4(BASE) 1241 | lwz CARG2, 12(BASE) 1242 | blt ->fff_fallback 1243 |.endmacro 1244 | 1245 |.macro .ffunc_n, name 1246 |->ff_ .. name: 1247 | cmplwi NARGS8:RC, 8 1248 | lwz CARG3, 0(BASE) 1249 | lfd FARG1, 0(BASE) 1250 | blt ->fff_fallback 1251 | checknum CARG3; bge ->fff_fallback 1252 |.endmacro 1253 | 1254 |.macro .ffunc_nn, name 1255 |->ff_ .. name: 1256 | cmplwi NARGS8:RC, 16 1257 | lwz CARG3, 0(BASE) 1258 | lfd FARG1, 0(BASE) 1259 | lwz CARG4, 8(BASE) 1260 | lfd FARG2, 8(BASE) 1261 | blt ->fff_fallback 1262 | checknum CARG3; bge ->fff_fallback 1263 | checknum CARG4; bge ->fff_fallback 1264 |.endmacro 1265 | 1266 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1267 |.macro ffgccheck 1268 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1269 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1270 | cmplw TMP0, TMP1 1271 | bgel ->fff_gcstep 1272 |.endmacro 1273 | 1274 |//-- Base library: checks ----------------------------------------------- 1275 | 1276 |.ffunc_1 assert 1277 | li TMP1, LJ_TFALSE 1278 | la RA, -8(BASE) 1279 | cmplw cr1, CARG3, TMP1 1280 | lwz PC, FRAME_PC(BASE) 1281 | bge cr1, ->fff_fallback 1282 | stw CARG3, 0(RA) 1283 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1284 | stw CARG1, 4(RA) 1285 | beq ->fff_res // Done if exactly 1 argument. 1286 | li TMP1, 8 1287 | subi RC, RC, 8 1288 |1: 1289 | cmplw TMP1, RC 1290 | lfdx f0, BASE, TMP1 1291 | stfdx f0, RA, TMP1 1292 | addi TMP1, TMP1, 8 1293 | bney <1 1294 | b ->fff_res 1295 | 1296 |.ffunc type 1297 | cmplwi NARGS8:RC, 8 1298 | lwz CARG1, 0(BASE) 1299 | blt ->fff_fallback 1300 | .gpr64 extsw CARG1, CARG1 1301 | subfc TMP0, TISNUM, CARG1 1302 | subfe TMP2, CARG1, CARG1 1303 | orc TMP1, TMP2, TMP0 1304 | addi TMP1, TMP1, ~LJ_TISNUM+1 1305 | slwi TMP1, TMP1, 3 1306 | la TMP2, CFUNC:RB->upvalue 1307 | lfdx FARG1, TMP2, TMP1 1308 | b ->fff_resn 1309 | 1310 |//-- Base library: getters and setters --------------------------------- 1311 | 1312 |.ffunc_1 getmetatable 1313 | checktab CARG3; bne >6 1314 |1: // Field metatable must be at same offset for GCtab and GCudata! 1315 | lwz TAB:CARG1, TAB:CARG1->metatable 1316 |2: 1317 | li CARG3, LJ_TNIL 1318 | cmplwi TAB:CARG1, 0 1319 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1320 | beq ->fff_restv 1321 | lwz TMP0, TAB:CARG1->hmask 1322 | li CARG3, LJ_TTAB // Use metatable as default result. 1323 | lwz TMP1, STR:RC->hash 1324 | lwz NODE:TMP2, TAB:CARG1->node 1325 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1326 | slwi TMP0, TMP1, 5 1327 | slwi TMP1, TMP1, 3 1328 | sub TMP1, TMP0, TMP1 1329 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 1330 |3: // Rearranged logic, because we expect _not_ to find the key. 1331 | lwz CARG4, NODE:TMP2->key 1332 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 1333 | lwz CARG2, NODE:TMP2->val 1334 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 1335 | checkstr CARG4; bne >4 1336 | cmpw TMP0, STR:RC; beq >5 1337 |4: 1338 | lwz NODE:TMP2, NODE:TMP2->next 1339 | cmplwi NODE:TMP2, 0 1340 | beq ->fff_restv // Not found, keep default result. 1341 | b <3 1342 |5: 1343 | checknil CARG2 1344 | beq ->fff_restv // Ditto for nil value. 1345 | mr CARG3, CARG2 // Return value of mt.__metatable. 1346 | mr CARG1, TMP1 1347 | b ->fff_restv 1348 | 1349 |6: 1350 | cmpwi CARG3, LJ_TUDATA; beq <1 1351 | .gpr64 extsw CARG3, CARG3 1352 | subfc TMP0, TISNUM, CARG3 1353 | subfe TMP2, CARG3, CARG3 1354 | orc TMP1, TMP2, TMP0 1355 | addi TMP1, TMP1, ~LJ_TISNUM+1 1356 | slwi TMP1, TMP1, 2 1357 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) 1358 | lwzx TAB:CARG1, TMP2, TMP1 1359 | b <2 1360 | 1361 |.ffunc_2 setmetatable 1362 | // Fast path: no mt for table yet and not clearing the mt. 1363 | checktab CARG3; bne ->fff_fallback 1364 | lwz TAB:TMP1, TAB:CARG1->metatable 1365 | checktab CARG4; bne ->fff_fallback 1366 | cmplwi TAB:TMP1, 0 1367 | lbz TMP3, TAB:CARG1->marked 1368 | bne ->fff_fallback 1369 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 1370 | stw TAB:CARG2, TAB:CARG1->metatable 1371 | beq ->fff_restv 1372 | barrierback TAB:CARG1, TMP3, TMP0 1373 | b ->fff_restv 1374 | 1375 |.ffunc rawget 1376 | cmplwi NARGS8:RC, 16 1377 | lwz CARG4, 0(BASE) 1378 | lwz TAB:CARG2, 4(BASE) 1379 | blt ->fff_fallback 1380 | checktab CARG4; bne ->fff_fallback 1381 | la CARG3, 8(BASE) 1382 | mr CARG1, L 1383 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1384 | // Returns cTValue *. 1385 | lfd FARG1, 0(CRET1) 1386 | b ->fff_resn 1387 | 1388 |//-- Base library: conversions ------------------------------------------ 1389 | 1390 |.ffunc tonumber 1391 | // Only handles the number case inline (without a base argument). 1392 | cmplwi NARGS8:RC, 8 1393 | lwz CARG1, 0(BASE) 1394 | lfd FARG1, 0(BASE) 1395 | bne ->fff_fallback // Exactly one argument. 1396 | checknum CARG1; bgt ->fff_fallback 1397 | b ->fff_resn 1398 | 1399 |.ffunc_1 tostring 1400 | // Only handles the string or number case inline. 1401 | checkstr CARG3 1402 | // A __tostring method in the string base metatable is ignored. 1403 | beq ->fff_restv // String key? 1404 | // Handle numbers inline, unless a number base metatable is present. 1405 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1406 | checknum CARG3 1407 | cmplwi cr1, TMP0, 0 1408 | stp BASE, L->base // Add frame since C call can throw. 1409 | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq 1410 | stw PC, SAVE_PC // Redundant (but a defined value). 1411 | beq ->fff_fallback 1412 | ffgccheck 1413 | mr CARG1, L 1414 | mr CARG2, BASE 1415 |.if DUALNUM 1416 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) 1417 |.else 1418 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) 1419 |.endif 1420 | // Returns GCstr *. 1421 | li CARG3, LJ_TSTR 1422 | b ->fff_restv 1423 | 1424 |//-- Base library: iterators ------------------------------------------- 1425 | 1426 |.ffunc next 1427 | cmplwi NARGS8:RC, 8 1428 | lwz CARG1, 0(BASE) 1429 | lwz TAB:CARG2, 4(BASE) 1430 | blt ->fff_fallback 1431 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1432 | checktab CARG1 1433 | lwz PC, FRAME_PC(BASE) 1434 | bne ->fff_fallback 1435 | stp BASE, L->base // Add frame since C call can throw. 1436 | mr CARG1, L 1437 | stp BASE, L->top // Dummy frame length is ok. 1438 | la CARG3, 8(BASE) 1439 | stw PC, SAVE_PC 1440 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1441 | // Returns 0 at end of traversal. 1442 | cmplwi CRET1, 0 1443 | li CARG3, LJ_TNIL 1444 | beq ->fff_restv // End of traversal: return nil. 1445 | lfd f0, 8(BASE) // Copy key and value to results. 1446 | la RA, -8(BASE) 1447 | lfd f1, 16(BASE) 1448 | stfd f0, 0(RA) 1449 | li RD, (2+1)*8 1450 | stfd f1, 8(RA) 1451 | b ->fff_res 1452 | 1453 |.ffunc_1 pairs 1454 | checktab CARG3 1455 | lwz PC, FRAME_PC(BASE) 1456 | bne ->fff_fallback 1457#if LJ_52 1458 | lwz TAB:TMP2, TAB:CARG1->metatable 1459 | lfd f0, CFUNC:RB->upvalue[0] 1460 | cmplwi TAB:TMP2, 0 1461 | la RA, -8(BASE) 1462 | bne ->fff_fallback 1463#else 1464 | lfd f0, CFUNC:RB->upvalue[0] 1465 | la RA, -8(BASE) 1466#endif 1467 | stw TISNIL, 8(BASE) 1468 | li RD, (3+1)*8 1469 | stfd f0, 0(RA) 1470 | b ->fff_res 1471 | 1472 |.ffunc ipairs_aux 1473 | cmplwi NARGS8:RC, 16 1474 | lwz CARG3, 0(BASE) 1475 | lwz TAB:CARG1, 4(BASE) 1476 | lwz CARG4, 8(BASE) 1477 |.if DUALNUM 1478 | lwz TMP2, 12(BASE) 1479 |.else 1480 | lfd FARG2, 8(BASE) 1481 |.endif 1482 | blt ->fff_fallback 1483 | checktab CARG3 1484 | checknum cr1, CARG4 1485 | lwz PC, FRAME_PC(BASE) 1486 |.if DUALNUM 1487 | bne ->fff_fallback 1488 | bne cr1, ->fff_fallback 1489 |.else 1490 | lus TMP0, 0x3ff0 1491 | stw ZERO, TMPD_LO 1492 | bne ->fff_fallback 1493 | stw TMP0, TMPD_HI 1494 | bge cr1, ->fff_fallback 1495 | lfd FARG1, TMPD 1496 | toint TMP2, FARG2, f0 1497 |.endif 1498 | lwz TMP0, TAB:CARG1->asize 1499 | lwz TMP1, TAB:CARG1->array 1500 |.if not DUALNUM 1501 | fadd FARG2, FARG2, FARG1 1502 |.endif 1503 | addi TMP2, TMP2, 1 1504 | la RA, -8(BASE) 1505 | cmplw TMP0, TMP2 1506 |.if DUALNUM 1507 | stw TISNUM, 0(RA) 1508 | slwi TMP3, TMP2, 3 1509 | stw TMP2, 4(RA) 1510 |.else 1511 | slwi TMP3, TMP2, 3 1512 | stfd FARG2, 0(RA) 1513 |.endif 1514 | ble >2 // Not in array part? 1515 | lwzx TMP2, TMP1, TMP3 1516 | lfdx f0, TMP1, TMP3 1517 |1: 1518 | checknil TMP2 1519 | li RD, (0+1)*8 1520 | beq ->fff_res // End of iteration, return 0 results. 1521 | li RD, (2+1)*8 1522 | stfd f0, 8(RA) 1523 | b ->fff_res 1524 |2: // Check for empty hash part first. Otherwise call C function. 1525 | lwz TMP0, TAB:CARG1->hmask 1526 | cmplwi TMP0, 0 1527 | li RD, (0+1)*8 1528 | beq ->fff_res 1529 | mr CARG2, TMP2 1530 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 1531 | // Returns cTValue * or NULL. 1532 | cmplwi CRET1, 0 1533 | li RD, (0+1)*8 1534 | beq ->fff_res 1535 | lwz TMP2, 0(CRET1) 1536 | lfd f0, 0(CRET1) 1537 | b <1 1538 | 1539 |.ffunc_1 ipairs 1540 | checktab CARG3 1541 | lwz PC, FRAME_PC(BASE) 1542 | bne ->fff_fallback 1543#if LJ_52 1544 | lwz TAB:TMP2, TAB:CARG1->metatable 1545 | lfd f0, CFUNC:RB->upvalue[0] 1546 | cmplwi TAB:TMP2, 0 1547 | la RA, -8(BASE) 1548 | bne ->fff_fallback 1549#else 1550 | lfd f0, CFUNC:RB->upvalue[0] 1551 | la RA, -8(BASE) 1552#endif 1553 |.if DUALNUM 1554 | stw TISNUM, 8(BASE) 1555 |.else 1556 | stw ZERO, 8(BASE) 1557 |.endif 1558 | stw ZERO, 12(BASE) 1559 | li RD, (3+1)*8 1560 | stfd f0, 0(RA) 1561 | b ->fff_res 1562 | 1563 |//-- Base library: catch errors ---------------------------------------- 1564 | 1565 |.ffunc pcall 1566 | cmplwi NARGS8:RC, 8 1567 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 1568 | blt ->fff_fallback 1569 | mr TMP2, BASE 1570 | la BASE, 8(BASE) 1571 | // Remember active hook before pcall. 1572 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 1573 | subi NARGS8:RC, NARGS8:RC, 8 1574 | addi PC, TMP3, 8+FRAME_PCALL 1575 | b ->vm_call_dispatch 1576 | 1577 |.ffunc xpcall 1578 | cmplwi NARGS8:RC, 16 1579 | lwz CARG4, 8(BASE) 1580 | lfd FARG2, 8(BASE) 1581 | lfd FARG1, 0(BASE) 1582 | blt ->fff_fallback 1583 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1584 | mr TMP2, BASE 1585 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1586 | la BASE, 16(BASE) 1587 | // Remember active hook before pcall. 1588 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1589 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1590 | subi NARGS8:RC, NARGS8:RC, 16 1591 | stfd FARG1, 8(TMP2) 1592 | addi PC, TMP1, 16+FRAME_PCALL 1593 | b ->vm_call_dispatch 1594 | 1595 |//-- Coroutine library -------------------------------------------------- 1596 | 1597 |.macro coroutine_resume_wrap, resume 1598 |.if resume 1599 |.ffunc_1 coroutine_resume 1600 | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback 1601 |.else 1602 |.ffunc coroutine_wrap_aux 1603 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr 1604 |.endif 1605 | lbz TMP0, L:CARG1->status 1606 | lp TMP1, L:CARG1->cframe 1607 | lp CARG2, L:CARG1->top 1608 | cmplwi cr0, TMP0, LUA_YIELD 1609 | lp TMP2, L:CARG1->base 1610 | cmplwi cr1, TMP1, 0 1611 | lwz TMP0, L:CARG1->maxstack 1612 | cmplw cr7, CARG2, TMP2 1613 | lwz PC, FRAME_PC(BASE) 1614 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 1615 | add TMP2, CARG2, NARGS8:RC 1616 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD 1617 | cmplw cr1, TMP2, TMP0 1618 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt 1619 | stw PC, SAVE_PC 1620 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov 1621 | stp BASE, L->base 1622 | blt cr6, ->fff_fallback 1623 |1: 1624 |.if resume 1625 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. 1626 | subi NARGS8:RC, NARGS8:RC, 8 1627 | subi TMP2, TMP2, 8 1628 |.endif 1629 | stp TMP2, L:CARG1->top 1630 | li TMP1, 0 1631 | stp BASE, L->top 1632 |2: // Move args to coroutine. 1633 | cmpw TMP1, NARGS8:RC 1634 | lfdx f0, BASE, TMP1 1635 | beq >3 1636 | stfdx f0, CARG2, TMP1 1637 | addi TMP1, TMP1, 8 1638 | b <2 1639 |3: 1640 | li CARG3, 0 1641 | mr L:SAVE0, L:CARG1 1642 | li CARG4, 0 1643 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1644 | // Returns thread status. 1645 |4: 1646 | lp TMP2, L:SAVE0->base 1647 | cmplwi CRET1, LUA_YIELD 1648 | lp TMP3, L:SAVE0->top 1649 | li_vmstate INTERP 1650 | lp BASE, L->base 1651 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 1652 | st_vmstate 1653 | bgt >8 1654 | sub RD, TMP3, TMP2 1655 | lwz TMP0, L->maxstack 1656 | cmplwi RD, 0 1657 | add TMP1, BASE, RD 1658 | beq >6 // No results? 1659 | cmplw TMP1, TMP0 1660 | li TMP1, 0 1661 | bgt >9 // Need to grow stack? 1662 | 1663 | subi TMP3, RD, 8 1664 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1665 |5: // Move results from coroutine. 1666 | cmplw TMP1, TMP3 1667 | lfdx f0, TMP2, TMP1 1668 | stfdx f0, BASE, TMP1 1669 | addi TMP1, TMP1, 8 1670 | bne <5 1671 |6: 1672 | andix. TMP0, PC, FRAME_TYPE 1673 |.if resume 1674 | li TMP1, LJ_TTRUE 1675 | la RA, -8(BASE) 1676 | stw TMP1, -8(BASE) // Prepend true to results. 1677 | addi RD, RD, 16 1678 |.else 1679 | mr RA, BASE 1680 | addi RD, RD, 8 1681 |.endif 1682 |7: 1683 | stw PC, SAVE_PC 1684 | mr MULTRES, RD 1685 | beq ->BC_RET_Z 1686 | b ->vm_return 1687 | 1688 |8: // Coroutine returned with error (at co->top-1). 1689 |.if resume 1690 | andix. TMP0, PC, FRAME_TYPE 1691 | la TMP3, -8(TMP3) 1692 | li TMP1, LJ_TFALSE 1693 | lfd f0, 0(TMP3) 1694 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1695 | li RD, (2+1)*8 1696 | stw TMP1, -8(BASE) // Prepend false to results. 1697 | la RA, -8(BASE) 1698 | stfd f0, 0(BASE) // Copy error message. 1699 | b <7 1700 |.else 1701 | mr CARG1, L 1702 | mr CARG2, L:SAVE0 1703 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1704 |.endif 1705 | 1706 |9: // Handle stack expansion on return from yield. 1707 | mr CARG1, L 1708 | srwi CARG2, RD, 3 1709 | bl extern lj_state_growstack // (lua_State *L, int n) 1710 | li CRET1, 0 1711 | b <4 1712 |.endmacro 1713 | 1714 | coroutine_resume_wrap 1 // coroutine.resume 1715 | coroutine_resume_wrap 0 // coroutine.wrap 1716 | 1717 |.ffunc coroutine_yield 1718 | lp TMP0, L->cframe 1719 | add TMP1, BASE, NARGS8:RC 1720 | stp BASE, L->base 1721 | andix. TMP0, TMP0, CFRAME_RESUME 1722 | stp TMP1, L->top 1723 | li CRET1, LUA_YIELD 1724 | beq ->fff_fallback 1725 | stp ZERO, L->cframe 1726 | stb CRET1, L->status 1727 | b ->vm_leave_unw 1728 | 1729 |//-- Math library ------------------------------------------------------- 1730 | 1731 |.ffunc_1 math_abs 1732 | checknum CARG3 1733 |.if DUALNUM 1734 | bne >2 1735 | srawi TMP1, CARG1, 31 1736 | xor TMP2, TMP1, CARG1 1737 |.if GPR64 1738 | lus TMP0, 0x8000 1739 | sub CARG1, TMP2, TMP1 1740 | cmplw CARG1, TMP0 1741 | beq >1 1742 |.else 1743 | sub. CARG1, TMP2, TMP1 1744 | blt >1 1745 |.endif 1746 |->fff_resi: 1747 | lwz PC, FRAME_PC(BASE) 1748 | la RA, -8(BASE) 1749 | stw TISNUM, -8(BASE) 1750 | stw CRET1, -4(BASE) 1751 | b ->fff_res1 1752 |1: 1753 | lus CARG3, 0x41e0 // 2^31. 1754 | li CARG1, 0 1755 | b ->fff_restv 1756 |2: 1757 |.endif 1758 | bge ->fff_fallback 1759 | rlwinm CARG3, CARG3, 0, 1, 31 1760 | // Fallthrough. 1761 | 1762 |->fff_restv: 1763 | // CARG3/CARG1 = TValue result. 1764 | lwz PC, FRAME_PC(BASE) 1765 | stw CARG3, -8(BASE) 1766 | la RA, -8(BASE) 1767 | stw CARG1, -4(BASE) 1768 |->fff_res1: 1769 | // RA = results, PC = return. 1770 | li RD, (1+1)*8 1771 |->fff_res: 1772 | // RA = results, RD = (nresults+1)*8, PC = return. 1773 | andix. TMP0, PC, FRAME_TYPE 1774 | mr MULTRES, RD 1775 | bney ->vm_return 1776 | lwz INS, -4(PC) 1777 | decode_RB8 RB, INS 1778 |5: 1779 | cmplw RB, RD // More results expected? 1780 | decode_RA8 TMP0, INS 1781 | bgt >6 1782 | ins_next1 1783 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1784 | sub BASE, RA, TMP0 1785 | ins_next2 1786 | 1787 |6: // Fill up results with nil. 1788 | subi TMP1, RD, 8 1789 | addi RD, RD, 8 1790 | stwx TISNIL, RA, TMP1 1791 | b <5 1792 | 1793 |.macro math_extern, func 1794 | .ffunc_n math_ .. func 1795 | blex func 1796 | b ->fff_resn 1797 |.endmacro 1798 | 1799 |.macro math_extern2, func 1800 | .ffunc_nn math_ .. func 1801 | blex func 1802 | b ->fff_resn 1803 |.endmacro 1804 | 1805 |.macro math_round, func 1806 | .ffunc_1 math_ .. func 1807 | checknum CARG3; beqy ->fff_restv 1808 | rlwinm TMP2, CARG3, 12, 21, 31 1809 | bge ->fff_fallback 1810 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 1811 | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? 1812 | subfic TMP0, TMP2, 31 1813 | blt >3 1814 | slwi TMP1, CARG3, 11 1815 | srwi TMP3, CARG1, 21 1816 | oris TMP1, TMP1, 0x8000 1817 | addi TMP2, TMP2, 1 1818 | or TMP1, TMP1, TMP3 1819 | slwi CARG2, CARG1, 11 1820 | bge cr1, >4 1821 | slw TMP3, TMP1, TMP2 1822 | srw RD, TMP1, TMP0 1823 | or TMP3, TMP3, CARG2 1824 | srawi TMP2, CARG3, 31 1825 |.if "func" == "floor" 1826 | and TMP1, TMP3, TMP2 1827 | addic TMP0, TMP1, -1 1828 | subfe TMP1, TMP0, TMP1 1829 | add CARG1, RD, TMP1 1830 | xor CARG1, CARG1, TMP2 1831 | sub CARG1, CARG1, TMP2 1832 | b ->fff_resi 1833 |.else 1834 | andc TMP1, TMP3, TMP2 1835 | addic TMP0, TMP1, -1 1836 | subfe TMP1, TMP0, TMP1 1837 | add CARG1, RD, TMP1 1838 | cmpw CARG1, RD 1839 | xor CARG1, CARG1, TMP2 1840 | sub CARG1, CARG1, TMP2 1841 | bge ->fff_resi 1842 | // Overflow to 2^31. 1843 | lus CARG3, 0x41e0 // 2^31. 1844 | li CARG1, 0 1845 | b ->fff_restv 1846 |.endif 1847 |3: // |x| < 1 1848 | slwi TMP2, CARG3, 1 1849 | srawi TMP1, CARG3, 31 1850 | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo 1851 |.if "func" == "floor" 1852 | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 1853 | subfic TMP2, TMP1, 0 1854 | subfe CARG1, CARG1, CARG1 1855 |.else 1856 | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 1857 | addic TMP2, TMP1, -1 1858 | subfe CARG1, TMP2, TMP1 1859 |.endif 1860 | b ->fff_resi 1861 |4: // exp >= 31. Check for -(2^31). 1862 | xoris TMP1, TMP1, 0x8000 1863 | srawi TMP2, CARG3, 31 1864 |.if "func" == "floor" 1865 | or TMP1, TMP1, CARG2 1866 |.endif 1867 |.if PPE 1868 | orc TMP1, TMP1, TMP2 1869 | cmpwi TMP1, 0 1870 |.else 1871 | orc. TMP1, TMP1, TMP2 1872 |.endif 1873 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 1874 | lus CARG1, 0x8000 // -(2^31). 1875 | beqy ->fff_resi 1876 |5: 1877 | lfd FARG1, 0(BASE) 1878 | blex func 1879 | b ->fff_resn 1880 |.endmacro 1881 | 1882 |.if DUALNUM 1883 | math_round floor 1884 | math_round ceil 1885 |.else 1886 | // NYI: use internal implementation. 1887 | math_extern floor 1888 | math_extern ceil 1889 |.endif 1890 | 1891 |.if SQRT 1892 |.ffunc_n math_sqrt 1893 | fsqrt FARG1, FARG1 1894 | b ->fff_resn 1895 |.else 1896 | math_extern sqrt 1897 |.endif 1898 | 1899 |.ffunc math_log 1900 | cmplwi NARGS8:RC, 8 1901 | lwz CARG3, 0(BASE) 1902 | lfd FARG1, 0(BASE) 1903 | bne ->fff_fallback // Need exactly 1 argument. 1904 | checknum CARG3; bge ->fff_fallback 1905 | blex log 1906 | b ->fff_resn 1907 | 1908 | math_extern log10 1909 | math_extern exp 1910 | math_extern sin 1911 | math_extern cos 1912 | math_extern tan 1913 | math_extern asin 1914 | math_extern acos 1915 | math_extern atan 1916 | math_extern sinh 1917 | math_extern cosh 1918 | math_extern tanh 1919 | math_extern2 pow 1920 | math_extern2 atan2 1921 | math_extern2 fmod 1922 | 1923 |.if DUALNUM 1924 |.ffunc math_ldexp 1925 | cmplwi NARGS8:RC, 16 1926 | lwz CARG3, 0(BASE) 1927 | lfd FARG1, 0(BASE) 1928 | lwz CARG4, 8(BASE) 1929 |.if GPR64 1930 | lwz CARG2, 12(BASE) 1931 |.else 1932 | lwz CARG1, 12(BASE) 1933 |.endif 1934 | blt ->fff_fallback 1935 | checknum CARG3; bge ->fff_fallback 1936 | checknum CARG4; bne ->fff_fallback 1937 |.else 1938 |.ffunc_nn math_ldexp 1939 |.if GPR64 1940 | toint CARG2, FARG2 1941 |.else 1942 | toint CARG1, FARG2 1943 |.endif 1944 |.endif 1945 | blex ldexp 1946 | b ->fff_resn 1947 | 1948 |.ffunc_n math_frexp 1949 |.if GPR64 1950 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 1951 |.else 1952 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 1953 |.endif 1954 | lwz PC, FRAME_PC(BASE) 1955 | blex frexp 1956 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1957 | la RA, -8(BASE) 1958 |.if not DUALNUM 1959 | tonum_i FARG2, TMP1 1960 |.endif 1961 | stfd FARG1, 0(RA) 1962 | li RD, (2+1)*8 1963 |.if DUALNUM 1964 | stw TISNUM, 8(RA) 1965 | stw TMP1, 12(RA) 1966 |.else 1967 | stfd FARG2, 8(RA) 1968 |.endif 1969 | b ->fff_res 1970 | 1971 |.ffunc_n math_modf 1972 |.if GPR64 1973 | la CARG2, -8(BASE) 1974 |.else 1975 | la CARG1, -8(BASE) 1976 |.endif 1977 | lwz PC, FRAME_PC(BASE) 1978 | blex modf 1979 | la RA, -8(BASE) 1980 | stfd FARG1, 0(BASE) 1981 | li RD, (2+1)*8 1982 | b ->fff_res 1983 | 1984 |.macro math_minmax, name, ismax 1985 |.if DUALNUM 1986 | .ffunc_1 name 1987 | checknum CARG3 1988 | addi TMP1, BASE, 8 1989 | add TMP2, BASE, NARGS8:RC 1990 | bne >4 1991 |1: // Handle integers. 1992 | lwz CARG4, 0(TMP1) 1993 | cmplw cr1, TMP1, TMP2 1994 | lwz CARG2, 4(TMP1) 1995 | bge cr1, ->fff_resi 1996 | checknum CARG4 1997 | xoris TMP0, CARG1, 0x8000 1998 | xoris TMP3, CARG2, 0x8000 1999 | bne >3 2000 | subfc TMP3, TMP3, TMP0 2001 | subfe TMP0, TMP0, TMP0 2002 |.if ismax 2003 | andc TMP3, TMP3, TMP0 2004 |.else 2005 | and TMP3, TMP3, TMP0 2006 |.endif 2007 | add CARG1, TMP3, CARG2 2008 |.if GPR64 2009 | rldicl CARG1, CARG1, 0, 32 2010 |.endif 2011 | addi TMP1, TMP1, 8 2012 | b <1 2013 |3: 2014 | bge ->fff_fallback 2015 | // Convert intermediate result to number and continue below. 2016 | tonum_i FARG1, CARG1 2017 | lfd FARG2, 0(TMP1) 2018 | b >6 2019 |4: 2020 | lfd FARG1, 0(BASE) 2021 | bge ->fff_fallback 2022 |5: // Handle numbers. 2023 | lwz CARG4, 0(TMP1) 2024 | cmplw cr1, TMP1, TMP2 2025 | lfd FARG2, 0(TMP1) 2026 | bge cr1, ->fff_resn 2027 | checknum CARG4; bge >7 2028 |6: 2029 | fsub f0, FARG1, FARG2 2030 | addi TMP1, TMP1, 8 2031 |.if ismax 2032 | fsel FARG1, f0, FARG1, FARG2 2033 |.else 2034 | fsel FARG1, f0, FARG2, FARG1 2035 |.endif 2036 | b <5 2037 |7: // Convert integer to number and continue above. 2038 | lwz CARG2, 4(TMP1) 2039 | bne ->fff_fallback 2040 | tonum_i FARG2, CARG2 2041 | b <6 2042 |.else 2043 | .ffunc_n name 2044 | li TMP1, 8 2045 |1: 2046 | lwzx CARG2, BASE, TMP1 2047 | lfdx FARG2, BASE, TMP1 2048 | cmplw cr1, TMP1, NARGS8:RC 2049 | checknum CARG2 2050 | bge cr1, ->fff_resn 2051 | bge ->fff_fallback 2052 | fsub f0, FARG1, FARG2 2053 | addi TMP1, TMP1, 8 2054 |.if ismax 2055 | fsel FARG1, f0, FARG1, FARG2 2056 |.else 2057 | fsel FARG1, f0, FARG2, FARG1 2058 |.endif 2059 | b <1 2060 |.endif 2061 |.endmacro 2062 | 2063 | math_minmax math_min, 0 2064 | math_minmax math_max, 1 2065 | 2066 |//-- String library ----------------------------------------------------- 2067 | 2068 |.ffunc string_byte // Only handle the 1-arg case here. 2069 | cmplwi NARGS8:RC, 8 2070 | lwz CARG3, 0(BASE) 2071 | lwz STR:CARG1, 4(BASE) 2072 | bne ->fff_fallback // Need exactly 1 argument. 2073 | checkstr CARG3 2074 | bne ->fff_fallback 2075 | lwz TMP0, STR:CARG1->len 2076 |.if DUALNUM 2077 | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). 2078 | li RD, (0+1)*8 2079 | lwz PC, FRAME_PC(BASE) 2080 | cmplwi TMP0, 0 2081 | la RA, -8(BASE) 2082 | beqy ->fff_res 2083 | b ->fff_resi 2084 |.else 2085 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 2086 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 2087 | subfe RD, TMP3, TMP0 2088 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. 2089 | addi RD, RD, 1 2090 | lfd f0, TONUM_D 2091 | la RA, -8(BASE) 2092 | lwz PC, FRAME_PC(BASE) 2093 | fsub f0, f0, TOBIT 2094 | slwi RD, RD, 3 2095 | stfd f0, 0(RA) 2096 | b ->fff_res 2097 |.endif 2098 | 2099 |.ffunc string_char // Only handle the 1-arg case here. 2100 | ffgccheck 2101 | cmplwi NARGS8:RC, 8 2102 | lwz CARG3, 0(BASE) 2103 |.if DUALNUM 2104 | lwz TMP0, 4(BASE) 2105 | bne ->fff_fallback // Exactly 1 argument. 2106 | checknum CARG3; bne ->fff_fallback 2107 | la CARG2, 7(BASE) 2108 |.else 2109 | lfd FARG1, 0(BASE) 2110 | bne ->fff_fallback // Exactly 1 argument. 2111 | checknum CARG3; bge ->fff_fallback 2112 | toint TMP0, FARG1 2113 | la CARG2, TMPD_BLO 2114 |.endif 2115 | li CARG3, 1 2116 | cmplwi TMP0, 255; bgt ->fff_fallback 2117 |->fff_newstr: 2118 | mr CARG1, L 2119 | stp BASE, L->base 2120 | stw PC, SAVE_PC 2121 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2122 |->fff_resstr: 2123 | // Returns GCstr *. 2124 | lp BASE, L->base 2125 | li CARG3, LJ_TSTR 2126 | b ->fff_restv 2127 | 2128 |.ffunc string_sub 2129 | ffgccheck 2130 | cmplwi NARGS8:RC, 16 2131 | lwz CARG3, 16(BASE) 2132 |.if not DUALNUM 2133 | lfd f0, 16(BASE) 2134 |.endif 2135 | lwz TMP0, 0(BASE) 2136 | lwz STR:CARG1, 4(BASE) 2137 | blt ->fff_fallback 2138 | lwz CARG2, 8(BASE) 2139 |.if DUALNUM 2140 | lwz TMP1, 12(BASE) 2141 |.else 2142 | lfd f1, 8(BASE) 2143 |.endif 2144 | li TMP2, -1 2145 | beq >1 2146 |.if DUALNUM 2147 | checknum CARG3 2148 | lwz TMP2, 20(BASE) 2149 | bne ->fff_fallback 2150 |1: 2151 | checknum CARG2; bne ->fff_fallback 2152 |.else 2153 | checknum CARG3; bge ->fff_fallback 2154 | toint TMP2, f0 2155 |1: 2156 | checknum CARG2; bge ->fff_fallback 2157 |.endif 2158 | checkstr TMP0; bne ->fff_fallback 2159 |.if not DUALNUM 2160 | toint TMP1, f1 2161 |.endif 2162 | lwz TMP0, STR:CARG1->len 2163 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 2164 | addi TMP3, TMP2, 1 2165 | blt >5 2166 |2: 2167 | cmpwi TMP1, 0 // start <= 0? 2168 | add TMP3, TMP1, TMP0 2169 | ble >7 2170 |3: 2171 | sub CARG3, TMP2, TMP1 2172 | addi CARG2, STR:CARG1, #STR-1 2173 | srawi TMP0, CARG3, 31 2174 | addi CARG3, CARG3, 1 2175 | add CARG2, CARG2, TMP1 2176 | andc CARG3, CARG3, TMP0 2177 |.if GPR64 2178 | rldicl CARG2, CARG2, 0, 32 2179 | rldicl CARG3, CARG3, 0, 32 2180 |.endif 2181 | b ->fff_newstr 2182 | 2183 |5: // Negative end or overflow. 2184 | cmpw TMP0, TMP2 // len >= end? (signed compare) 2185 | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. 2186 | bge <2 2187 | mr TMP2, TMP0 // Overflow: end = len. 2188 | b <2 2189 | 2190 |7: // Negative start or underflow. 2191 | .gpr64 extsw TMP1, TMP1 2192 | addic CARG3, TMP1, -1 2193 | subfe CARG3, CARG3, CARG3 2194 | srawi CARG2, TMP3, 31 // Note: modifies carry. 2195 | andc TMP3, TMP3, CARG3 2196 | andc TMP1, TMP3, CARG2 2197 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2198 | b <3 2199 | 2200 |.macro ffstring_op, name 2201 | .ffunc string_ .. name 2202 | ffgccheck 2203 | cmplwi NARGS8:RC, 8 2204 | lwz CARG3, 0(BASE) 2205 | lwz STR:CARG2, 4(BASE) 2206 | blt ->fff_fallback 2207 | checkstr CARG3 2208 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) 2209 | bne ->fff_fallback 2210 | lwz TMP0, SBUF:CARG1->b 2211 | stw L, SBUF:CARG1->L 2212 | stp BASE, L->base 2213 | stw PC, SAVE_PC 2214 | stw TMP0, SBUF:CARG1->p 2215 | bl extern lj_buf_putstr_ .. name 2216 | bl extern lj_buf_tostr 2217 | b ->fff_resstr 2218 |.endmacro 2219 | 2220 |ffstring_op reverse 2221 |ffstring_op lower 2222 |ffstring_op upper 2223 | 2224 |//-- Bit library -------------------------------------------------------- 2225 | 2226 |.macro .ffunc_bit, name 2227 |.if DUALNUM 2228 | .ffunc_1 bit_..name 2229 | checknum CARG3; bnel ->fff_tobit_fb 2230 |.else 2231 | .ffunc_n bit_..name 2232 | fadd FARG1, FARG1, TOBIT 2233 | stfd FARG1, TMPD 2234 | lwz CARG1, TMPD_LO 2235 |.endif 2236 |.endmacro 2237 | 2238 |.macro .ffunc_bit_op, name, ins 2239 | .ffunc_bit name 2240 | addi TMP1, BASE, 8 2241 | add TMP2, BASE, NARGS8:RC 2242 |1: 2243 | lwz CARG4, 0(TMP1) 2244 | cmplw cr1, TMP1, TMP2 2245 |.if DUALNUM 2246 | lwz CARG2, 4(TMP1) 2247 |.else 2248 | lfd FARG1, 0(TMP1) 2249 |.endif 2250 | bgey cr1, ->fff_resi 2251 | checknum CARG4 2252 |.if DUALNUM 2253 | bnel ->fff_bitop_fb 2254 |.else 2255 | fadd FARG1, FARG1, TOBIT 2256 | bge ->fff_fallback 2257 | stfd FARG1, TMPD 2258 | lwz CARG2, TMPD_LO 2259 |.endif 2260 | ins CARG1, CARG1, CARG2 2261 | addi TMP1, TMP1, 8 2262 | b <1 2263 |.endmacro 2264 | 2265 |.ffunc_bit_op band, and 2266 |.ffunc_bit_op bor, or 2267 |.ffunc_bit_op bxor, xor 2268 | 2269 |.ffunc_bit bswap 2270 | rotlwi TMP0, CARG1, 8 2271 | rlwimi TMP0, CARG1, 24, 0, 7 2272 | rlwimi TMP0, CARG1, 24, 16, 23 2273 | mr CRET1, TMP0 2274 | b ->fff_resi 2275 | 2276 |.ffunc_bit bnot 2277 | not CRET1, CARG1 2278 | b ->fff_resi 2279 | 2280 |.macro .ffunc_bit_sh, name, ins, shmod 2281 |.if DUALNUM 2282 | .ffunc_2 bit_..name 2283 | checknum CARG3; bnel ->fff_tobit_fb 2284 | // Note: no inline conversion from number for 2nd argument! 2285 | checknum CARG4; bne ->fff_fallback 2286 |.else 2287 | .ffunc_nn bit_..name 2288 | fadd FARG1, FARG1, TOBIT 2289 | fadd FARG2, FARG2, TOBIT 2290 | stfd FARG1, TMPD 2291 | lwz CARG1, TMPD_LO 2292 | stfd FARG2, TMPD 2293 | lwz CARG2, TMPD_LO 2294 |.endif 2295 |.if shmod == 1 2296 | rlwinm CARG2, CARG2, 0, 27, 31 2297 |.elif shmod == 2 2298 | neg CARG2, CARG2 2299 |.endif 2300 | ins CRET1, CARG1, CARG2 2301 | b ->fff_resi 2302 |.endmacro 2303 | 2304 |.ffunc_bit_sh lshift, slw, 1 2305 |.ffunc_bit_sh rshift, srw, 1 2306 |.ffunc_bit_sh arshift, sraw, 1 2307 |.ffunc_bit_sh rol, rotlw, 0 2308 |.ffunc_bit_sh ror, rotlw, 2 2309 | 2310 |.ffunc_bit tobit 2311 |.if DUALNUM 2312 | b ->fff_resi 2313 |.else 2314 |->fff_resi: 2315 | tonum_i FARG1, CRET1 2316 |.endif 2317 |->fff_resn: 2318 | lwz PC, FRAME_PC(BASE) 2319 | la RA, -8(BASE) 2320 | stfd FARG1, -8(BASE) 2321 | b ->fff_res1 2322 | 2323 |// Fallback FP number to bit conversion. 2324 |->fff_tobit_fb: 2325 |.if DUALNUM 2326 | lfd FARG1, 0(BASE) 2327 | bgt ->fff_fallback 2328 | fadd FARG1, FARG1, TOBIT 2329 | stfd FARG1, TMPD 2330 | lwz CARG1, TMPD_LO 2331 | blr 2332 |.endif 2333 |->fff_bitop_fb: 2334 |.if DUALNUM 2335 | lfd FARG1, 0(TMP1) 2336 | bgt ->fff_fallback 2337 | fadd FARG1, FARG1, TOBIT 2338 | stfd FARG1, TMPD 2339 | lwz CARG2, TMPD_LO 2340 | blr 2341 |.endif 2342 | 2343 |//----------------------------------------------------------------------- 2344 | 2345 |->fff_fallback: // Call fast function fallback handler. 2346 | // BASE = new base, RB = CFUNC, RC = nargs*8 2347 | lp TMP3, CFUNC:RB->f 2348 | add TMP1, BASE, NARGS8:RC 2349 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. 2350 | addi TMP0, TMP1, 8*LUA_MINSTACK 2351 | lwz TMP2, L->maxstack 2352 | stw PC, SAVE_PC // Redundant (but a defined value). 2353 | .toc lp TMP3, 0(TMP3) 2354 | cmplw TMP0, TMP2 2355 | stp BASE, L->base 2356 | stp TMP1, L->top 2357 | mr CARG1, L 2358 | bgt >5 // Need to grow stack. 2359 | mtctr TMP3 2360 | bctrl // (lua_State *L) 2361 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2362 | lp BASE, L->base 2363 | cmpwi CRET1, 0 2364 | slwi RD, CRET1, 3 2365 | la RA, -8(BASE) 2366 | bgt ->fff_res // Returned nresults+1? 2367 |1: // Returned 0 or -1: retry fast path. 2368 | lp TMP0, L->top 2369 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2370 | sub NARGS8:RC, TMP0, BASE 2371 | bne ->vm_call_tail // Returned -1? 2372 | ins_callt // Returned 0: retry fast path. 2373 | 2374 |// Reconstruct previous base for vmeta_call during tailcall. 2375 |->vm_call_tail: 2376 | andix. TMP0, PC, FRAME_TYPE 2377 | rlwinm TMP1, PC, 0, 0, 28 2378 | bne >3 2379 | lwz INS, -4(PC) 2380 | decode_RA8 TMP1, INS 2381 | addi TMP1, TMP1, 8 2382 |3: 2383 | sub TMP2, BASE, TMP1 2384 | b ->vm_call_dispatch // Resolve again for tailcall. 2385 | 2386 |5: // Grow stack for fallback handler. 2387 | li CARG2, LUA_MINSTACK 2388 | bl extern lj_state_growstack // (lua_State *L, int n) 2389 | lp BASE, L->base 2390 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. 2391 | b <1 2392 | 2393 |->fff_gcstep: // Call GC step function. 2394 | // BASE = new base, RC = nargs*8 2395 | mflr SAVE0 2396 | stp BASE, L->base 2397 | add TMP0, BASE, NARGS8:RC 2398 | stw PC, SAVE_PC // Redundant (but a defined value). 2399 | stp TMP0, L->top 2400 | mr CARG1, L 2401 | bl extern lj_gc_step // (lua_State *L) 2402 | lp BASE, L->base 2403 | mtlr SAVE0 2404 | lp TMP0, L->top 2405 | sub NARGS8:RC, TMP0, BASE 2406 | lwz CFUNC:RB, FRAME_FUNC(BASE) 2407 | blr 2408 | 2409 |//----------------------------------------------------------------------- 2410 |//-- Special dispatch targets ------------------------------------------- 2411 |//----------------------------------------------------------------------- 2412 | 2413 |->vm_record: // Dispatch target for recording phase. 2414 |.if JIT 2415 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2416 | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. 2417 | bne >5 2418 | // Decrement the hookcount for consistency, but always do the call. 2419 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2420 | andix. TMP0, TMP3, HOOK_ACTIVE 2421 | bne >1 2422 | subi TMP2, TMP2, 1 2423 | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT 2424 | beqy >1 2425 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2426 | b >1 2427 |.endif 2428 | 2429 |->vm_rethook: // Dispatch target for return hooks. 2430 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2431 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2432 | beq >1 2433 |5: // Re-dispatch to static ins. 2434 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. 2435 | lpx TMP0, DISPATCH, TMP1 2436 | mtctr TMP0 2437 | bctr 2438 | 2439 |->vm_inshook: // Dispatch target for instr/line hooks. 2440 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2441 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2442 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2443 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 2444 | bne <5 2445 | 2446 | cmpwi cr1, TMP0, 0 2447 | addic. TMP2, TMP2, -1 2448 | beq cr1, <5 2449 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2450 | beq >1 2451 | bge cr1, <5 2452 |1: 2453 | mr CARG1, L 2454 | stw MULTRES, SAVE_MULTRES 2455 | mr CARG2, PC 2456 | stp BASE, L->base 2457 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2458 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2459 |3: 2460 | lp BASE, L->base 2461 |4: // Re-dispatch to static ins. 2462 | lwz INS, -4(PC) 2463 | decode_OPP TMP1, INS 2464 | decode_RB8 RB, INS 2465 | addi TMP1, TMP1, GG_DISP2STATIC 2466 | decode_RD8 RD, INS 2467 | lpx TMP0, DISPATCH, TMP1 2468 | decode_RA8 RA, INS 2469 | decode_RC8 RC, INS 2470 | mtctr TMP0 2471 | bctr 2472 | 2473 |->cont_hook: // Continue from hook yield. 2474 | addi PC, PC, 4 2475 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. 2476 | b <4 2477 | 2478 |->vm_hotloop: // Hot loop counter underflow. 2479 |.if JIT 2480 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2481 | addi CARG1, DISPATCH, GG_DISP2J 2482 | stw PC, SAVE_PC 2483 | lwz TMP1, LFUNC:TMP1->pc 2484 | mr CARG2, PC 2485 | stw L, DISPATCH_J(L)(DISPATCH) 2486 | lbz TMP1, PC2PROTO(framesize)(TMP1) 2487 | stp BASE, L->base 2488 | slwi TMP1, TMP1, 3 2489 | add TMP1, BASE, TMP1 2490 | stp TMP1, L->top 2491 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2492 | b <3 2493 |.endif 2494 | 2495 |->vm_callhook: // Dispatch target for call hooks. 2496 | mr CARG2, PC 2497 |.if JIT 2498 | b >1 2499 |.endif 2500 | 2501 |->vm_hotcall: // Hot call counter underflow. 2502 |.if JIT 2503 | ori CARG2, PC, 1 2504 |1: 2505 |.endif 2506 | add TMP0, BASE, RC 2507 | stw PC, SAVE_PC 2508 | mr CARG1, L 2509 | stp BASE, L->base 2510 | sub RA, RA, BASE 2511 | stp TMP0, L->top 2512 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2513 | // Returns ASMFunction. 2514 | lp BASE, L->base 2515 | lp TMP0, L->top 2516 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. 2517 | sub NARGS8:RC, TMP0, BASE 2518 | add RA, BASE, RA 2519 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2520 | lwz INS, -4(PC) 2521 | mtctr CRET1 2522 | bctr 2523 | 2524 |->cont_stitch: // Trace stitching. 2525 |.if JIT 2526 | // RA = resultptr, RB = meta base 2527 | lwz INS, -4(PC) 2528 | lwz TRACE:TMP2, -20(RB) // Save previous trace. 2529 | addic. TMP1, MULTRES, -8 2530 | decode_RA8 RC, INS // Call base. 2531 | beq >2 2532 |1: // Move results down. 2533 | lfd f0, 0(RA) 2534 | addic. TMP1, TMP1, -8 2535 | addi RA, RA, 8 2536 | stfdx f0, BASE, RC 2537 | addi RC, RC, 8 2538 | bne <1 2539 |2: 2540 | decode_RA8 RA, INS 2541 | decode_RB8 RB, INS 2542 | add RA, RA, RB 2543 |3: 2544 | cmplw RA, RC 2545 | bgt >9 // More results wanted? 2546 | 2547 | lhz TMP3, TRACE:TMP2->traceno 2548 | lhz RD, TRACE:TMP2->link 2549 | cmpw RD, TMP3 2550 | cmpwi cr1, RD, 0 2551 | beq ->cont_nop // Blacklisted. 2552 | slwi RD, RD, 3 2553 | bne cr1, =>BC_JLOOP // Jump to stitched trace. 2554 | 2555 | // Stitch a new trace to the previous trace. 2556 | stw TMP3, DISPATCH_J(exitno)(DISPATCH) 2557 | stp L, DISPATCH_J(L)(DISPATCH) 2558 | stp BASE, L->base 2559 | addi CARG1, DISPATCH, GG_DISP2J 2560 | mr CARG2, PC 2561 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) 2562 | lp BASE, L->base 2563 | b ->cont_nop 2564 | 2565 |9: 2566 | stwx TISNIL, BASE, RC 2567 | addi RC, RC, 8 2568 | b <3 2569 |.endif 2570 | 2571 |->vm_profhook: // Dispatch target for profiler hook. 2572#if LJ_HASPROFILE 2573 | mr CARG1, L 2574 | stw MULTRES, SAVE_MULTRES 2575 | mr CARG2, PC 2576 | stp BASE, L->base 2577 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) 2578 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2579 | lp BASE, L->base 2580 | subi PC, PC, 4 2581 | b ->cont_nop 2582#endif 2583 | 2584 |//----------------------------------------------------------------------- 2585 |//-- Trace exit handler ------------------------------------------------- 2586 |//----------------------------------------------------------------------- 2587 | 2588 |.macro savex_, a, b, c, d 2589 | stfd f..a, 16+a*8(sp) 2590 | stfd f..b, 16+b*8(sp) 2591 | stfd f..c, 16+c*8(sp) 2592 | stfd f..d, 16+d*8(sp) 2593 |.endmacro 2594 | 2595 |->vm_exit_handler: 2596 |.if JIT 2597 | addi sp, sp, -(16+32*8+32*4) 2598 | stmw r2, 16+32*8+2*4(sp) 2599 | addi DISPATCH, JGL, -GG_DISP2G-32768 2600 | li CARG2, ~LJ_VMST_EXIT 2601 | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. 2602 | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) 2603 | savex_ 0,1,2,3 2604 | stw CARG1, 0(sp) // Store extended stack chain. 2605 | clrso TMP1 2606 | savex_ 4,5,6,7 2607 | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. 2608 | savex_ 8,9,10,11 2609 | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. 2610 | savex_ 12,13,14,15 2611 | mflr CARG3 2612 | li TMP1, 0 2613 | savex_ 16,17,18,19 2614 | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. 2615 | savex_ 20,21,22,23 2616 | lhz CARG4, 2(CARG3) // Load trace number. 2617 | savex_ 24,25,26,27 2618 | lwz L, DISPATCH_GL(cur_L)(DISPATCH) 2619 | savex_ 28,29,30,31 2620 | sub CARG3, TMP0, CARG3 // Compute exit number. 2621 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2622 | srwi CARG3, CARG3, 2 2623 | stp L, DISPATCH_J(L)(DISPATCH) 2624 | subi CARG3, CARG3, 2 2625 | stp BASE, L->base 2626 | stw CARG4, DISPATCH_J(parent)(DISPATCH) 2627 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) 2628 | addi CARG1, DISPATCH, GG_DISP2J 2629 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2630 | addi CARG2, sp, 16 2631 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2632 | // Returns MULTRES (unscaled) or negated error code. 2633 | lp TMP1, L->cframe 2634 | lwz TMP2, 0(sp) 2635 | lp BASE, L->base 2636 |.if GPR64 2637 | rldicr sp, TMP1, 0, 61 2638 |.else 2639 | rlwinm sp, TMP1, 0, 0, 29 2640 |.endif 2641 | lwz PC, SAVE_PC // Get SAVE_PC. 2642 | stw TMP2, 0(sp) 2643 | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). 2644 | b >1 2645 |.endif 2646 |->vm_exit_interp: 2647 |.if JIT 2648 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2649 | lwz L, SAVE_L 2650 | addi DISPATCH, JGL, -GG_DISP2G-32768 2651 | stp BASE, L->base 2652 |1: 2653 | cmpwi CARG1, 0 2654 | blt >9 // Check for error from exit. 2655 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2656 | slwi MULTRES, CARG1, 3 2657 | li TMP2, 0 2658 | stw MULTRES, SAVE_MULTRES 2659 | lwz TMP1, LFUNC:RB->pc 2660 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) 2661 | lwz KBASE, PC2PROTO(k)(TMP1) 2662 | // Setup type comparison constants. 2663 | li TISNUM, LJ_TISNUM 2664 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2665 | stw TMP3, TMPD 2666 | li ZERO, 0 2667 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2668 | lfs TOBIT, TMPD 2669 | stw TMP3, TMPD 2670 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2671 | li TISNIL, LJ_TNIL 2672 | stw TMP0, TONUM_HI 2673 | lfs TONUM, TMPD 2674 | // Modified copy of ins_next which handles function header dispatch, too. 2675 | lwz INS, 0(PC) 2676 | addi PC, PC, 4 2677 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. 2678 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2679 | decode_OPP TMP1, INS 2680 | decode_RA8 RA, INS 2681 | lpx TMP0, DISPATCH, TMP1 2682 | mtctr TMP0 2683 | cmplwi TMP1, BC_FUNCF*4 // Function header? 2684 | bge >2 2685 | decode_RB8 RB, INS 2686 | decode_RD8 RD, INS 2687 | decode_RC8 RC, INS 2688 | bctr 2689 |2: 2690 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? 2691 | blt >3 2692 | // Check frame below fast function. 2693 | lwz TMP1, FRAME_PC(BASE) 2694 | andix. TMP0, TMP1, FRAME_TYPE 2695 | bney >3 // Trace stitching continuation? 2696 | // Otherwise set KBASE for Lua function below fast function. 2697 | lwz TMP2, -4(TMP1) 2698 | decode_RA8 TMP0, TMP2 2699 | sub TMP1, BASE, TMP0 2700 | lwz LFUNC:TMP2, -12(TMP1) 2701 | lwz TMP1, LFUNC:TMP2->pc 2702 | lwz KBASE, PC2PROTO(k)(TMP1) 2703 |3: 2704 | subi RC, MULTRES, 8 2705 | add RA, RA, BASE 2706 | bctr 2707 | 2708 |9: // Rethrow error from the right C frame. 2709 | neg CARG2, CARG1 2710 | mr CARG1, L 2711 | bl extern lj_err_throw // (lua_State *L, int errcode) 2712 |.endif 2713 | 2714 |//----------------------------------------------------------------------- 2715 |//-- Math helper functions ---------------------------------------------- 2716 |//----------------------------------------------------------------------- 2717 | 2718 |// NYI: Use internal implementations of floor, ceil, trunc. 2719 | 2720 |->vm_modi: 2721 | divwo. TMP0, CARG1, CARG2 2722 | bso >1 2723 |.if GPR64 2724 | xor CARG3, CARG1, CARG2 2725 | cmpwi CARG3, 0 2726 |.else 2727 | xor. CARG3, CARG1, CARG2 2728 |.endif 2729 | mullw TMP0, TMP0, CARG2 2730 | sub CARG1, CARG1, TMP0 2731 | bgelr 2732 | cmpwi CARG1, 0; beqlr 2733 | add CARG1, CARG1, CARG2 2734 | blr 2735 |1: 2736 | cmpwi CARG2, 0 2737 | li CARG1, 0 2738 | beqlr 2739 | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. 2740 | blr 2741 | 2742 |//----------------------------------------------------------------------- 2743 |//-- Miscellaneous functions -------------------------------------------- 2744 |//----------------------------------------------------------------------- 2745 | 2746 |// void lj_vm_cachesync(void *start, void *end) 2747 |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. 2748 |// This is a good lower bound, except for very ancient PPC models. 2749 |->vm_cachesync: 2750 |.if JIT or FFI 2751 | // Compute start of first cache line and number of cache lines. 2752 | rlwinm CARG1, CARG1, 0, 0, 26 2753 | sub CARG2, CARG2, CARG1 2754 | addi CARG2, CARG2, 31 2755 | rlwinm. CARG2, CARG2, 27, 5, 31 2756 | beqlr 2757 | mtctr CARG2 2758 | mr CARG3, CARG1 2759 |1: // Flush D-Cache. 2760 | dcbst r0, CARG1 2761 | addi CARG1, CARG1, 32 2762 | bdnz <1 2763 | sync 2764 | mtctr CARG2 2765 |1: // Invalidate I-Cache. 2766 | icbi r0, CARG3 2767 | addi CARG3, CARG3, 32 2768 | bdnz <1 2769 | isync 2770 | blr 2771 |.endif 2772 | 2773 |//----------------------------------------------------------------------- 2774 |//-- FFI helper functions ----------------------------------------------- 2775 |//----------------------------------------------------------------------- 2776 | 2777 |// Handler for callback functions. Callback slot number in r11, g in r12. 2778 |->vm_ffi_callback: 2779 |.if FFI 2780 |.type CTSTATE, CTState, PC 2781 | saveregs 2782 | lwz CTSTATE, GL:r12->ctype_state 2783 | addi DISPATCH, r12, GG_G2DISP 2784 | stw r11, CTSTATE->cb.slot 2785 | stw r3, CTSTATE->cb.gpr[0] 2786 | stfd f1, CTSTATE->cb.fpr[0] 2787 | stw r4, CTSTATE->cb.gpr[1] 2788 | stfd f2, CTSTATE->cb.fpr[1] 2789 | stw r5, CTSTATE->cb.gpr[2] 2790 | stfd f3, CTSTATE->cb.fpr[2] 2791 | stw r6, CTSTATE->cb.gpr[3] 2792 | stfd f4, CTSTATE->cb.fpr[3] 2793 | stw r7, CTSTATE->cb.gpr[4] 2794 | stfd f5, CTSTATE->cb.fpr[4] 2795 | stw r8, CTSTATE->cb.gpr[5] 2796 | stfd f6, CTSTATE->cb.fpr[5] 2797 | stw r9, CTSTATE->cb.gpr[6] 2798 | stfd f7, CTSTATE->cb.fpr[6] 2799 | stw r10, CTSTATE->cb.gpr[7] 2800 | stfd f8, CTSTATE->cb.fpr[7] 2801 | addi TMP0, sp, CFRAME_SPACE+8 2802 | stw TMP0, CTSTATE->cb.stack 2803 | mr CARG1, CTSTATE 2804 | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. 2805 | mr CARG2, sp 2806 | bl extern lj_ccallback_enter // (CTState *cts, void *cf) 2807 | // Returns lua_State *. 2808 | lp BASE, L:CRET1->base 2809 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 2810 | lp RC, L:CRET1->top 2811 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2812 | li ZERO, 0 2813 | mr L, CRET1 2814 | stw TMP3, TMPD 2815 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2816 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2817 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2818 | stw TMP0, TONUM_HI 2819 | li TISNIL, LJ_TNIL 2820 | li_vmstate INTERP 2821 | lfs TOBIT, TMPD 2822 | stw TMP3, TMPD 2823 | sub RC, RC, BASE 2824 | st_vmstate 2825 | lfs TONUM, TMPD 2826 | ins_callt 2827 |.endif 2828 | 2829 |->cont_ffi_callback: // Return from FFI callback. 2830 |.if FFI 2831 | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) 2832 | stp BASE, L->base 2833 | stp RB, L->top 2834 | stp L, CTSTATE->L 2835 | mr CARG1, CTSTATE 2836 | mr CARG2, RA 2837 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 2838 | lwz CRET1, CTSTATE->cb.gpr[0] 2839 | lfd FARG1, CTSTATE->cb.fpr[0] 2840 | lwz CRET2, CTSTATE->cb.gpr[1] 2841 | b ->vm_leave_unw 2842 |.endif 2843 | 2844 |->vm_ffi_call: // Call C function via FFI. 2845 | // Caveat: needs special frame unwinding, see below. 2846 |.if FFI 2847 | .type CCSTATE, CCallState, CARG1 2848 | lwz TMP1, CCSTATE->spadj 2849 | mflr TMP0 2850 | lbz CARG2, CCSTATE->nsp 2851 | lbz CARG3, CCSTATE->nfpr 2852 | neg TMP1, TMP1 2853 | stw TMP0, 4(sp) 2854 | cmpwi cr1, CARG3, 0 2855 | mr TMP2, sp 2856 | addic. CARG2, CARG2, -1 2857 | stwux sp, sp, TMP1 2858 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 2859 | stw r14, -4(TMP2) 2860 | stw CCSTATE, -8(TMP2) 2861 | mr r14, TMP2 2862 | la TMP1, CCSTATE->stack 2863 | slwi CARG2, CARG2, 2 2864 | blty >2 2865 | la TMP2, 8(sp) 2866 |1: 2867 | lwzx TMP0, TMP1, CARG2 2868 | stwx TMP0, TMP2, CARG2 2869 | addic. CARG2, CARG2, -4 2870 | bge <1 2871 |2: 2872 | bney cr1, >3 2873 | lfd f1, CCSTATE->fpr[0] 2874 | lfd f2, CCSTATE->fpr[1] 2875 | lfd f3, CCSTATE->fpr[2] 2876 | lfd f4, CCSTATE->fpr[3] 2877 | lfd f5, CCSTATE->fpr[4] 2878 | lfd f6, CCSTATE->fpr[5] 2879 | lfd f7, CCSTATE->fpr[6] 2880 | lfd f8, CCSTATE->fpr[7] 2881 |3: 2882 | lp TMP0, CCSTATE->func 2883 | lwz CARG2, CCSTATE->gpr[1] 2884 | lwz CARG3, CCSTATE->gpr[2] 2885 | lwz CARG4, CCSTATE->gpr[3] 2886 | lwz CARG5, CCSTATE->gpr[4] 2887 | mtctr TMP0 2888 | lwz r8, CCSTATE->gpr[5] 2889 | lwz r9, CCSTATE->gpr[6] 2890 | lwz r10, CCSTATE->gpr[7] 2891 | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2892 | bctrl 2893 | lwz CCSTATE:TMP1, -8(r14) 2894 | lwz TMP2, -4(r14) 2895 | lwz TMP0, 4(r14) 2896 | stw CARG1, CCSTATE:TMP1->gpr[0] 2897 | stfd FARG1, CCSTATE:TMP1->fpr[0] 2898 | stw CARG2, CCSTATE:TMP1->gpr[1] 2899 | mtlr TMP0 2900 | stw CARG3, CCSTATE:TMP1->gpr[2] 2901 | mr sp, r14 2902 | stw CARG4, CCSTATE:TMP1->gpr[3] 2903 | mr r14, TMP2 2904 | blr 2905 |.endif 2906 |// Note: vm_ffi_call must be the last function in this object file! 2907 | 2908 |//----------------------------------------------------------------------- 2909} 2910 2911/* Generate the code for a single instruction. */ 2912static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2913{ 2914 int vk = 0; 2915 |=>defop: 2916 2917 switch (op) { 2918 2919 /* -- Comparison ops ---------------------------------------------------- */ 2920 2921 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2922 2923 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2924 | // RA = src1*8, RD = src2*8, JMP with RD = target 2925 |.if DUALNUM 2926 | lwzux TMP0, RA, BASE 2927 | addi PC, PC, 4 2928 | lwz CARG2, 4(RA) 2929 | lwzux TMP1, RD, BASE 2930 | lwz TMP2, -4(PC) 2931 | checknum cr0, TMP0 2932 | lwz CARG3, 4(RD) 2933 | decode_RD4 TMP2, TMP2 2934 | checknum cr1, TMP1 2935 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2936 | bne cr0, >7 2937 | bne cr1, >8 2938 | cmpw CARG2, CARG3 2939 if (op == BC_ISLT) { 2940 | bge >2 2941 } else if (op == BC_ISGE) { 2942 | blt >2 2943 } else if (op == BC_ISLE) { 2944 | bgt >2 2945 } else { 2946 | ble >2 2947 } 2948 |1: 2949 | add PC, PC, TMP2 2950 |2: 2951 | ins_next 2952 | 2953 |7: // RA is not an integer. 2954 | bgt cr0, ->vmeta_comp 2955 | // RA is a number. 2956 | lfd f0, 0(RA) 2957 | bgt cr1, ->vmeta_comp 2958 | blt cr1, >4 2959 | // RA is a number, RD is an integer. 2960 | tonum_i f1, CARG3 2961 | b >5 2962 | 2963 |8: // RA is an integer, RD is not an integer. 2964 | bgt cr1, ->vmeta_comp 2965 | // RA is an integer, RD is a number. 2966 | tonum_i f0, CARG2 2967 |4: 2968 | lfd f1, 0(RD) 2969 |5: 2970 | fcmpu cr0, f0, f1 2971 if (op == BC_ISLT) { 2972 | bge <2 2973 } else if (op == BC_ISGE) { 2974 | blt <2 2975 } else if (op == BC_ISLE) { 2976 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2977 | bge <2 2978 } else { 2979 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2980 | blt <2 2981 } 2982 | b <1 2983 |.else 2984 | lwzx TMP0, BASE, RA 2985 | addi PC, PC, 4 2986 | lfdx f0, BASE, RA 2987 | lwzx TMP1, BASE, RD 2988 | checknum cr0, TMP0 2989 | lwz TMP2, -4(PC) 2990 | lfdx f1, BASE, RD 2991 | checknum cr1, TMP1 2992 | decode_RD4 TMP2, TMP2 2993 | bge cr0, ->vmeta_comp 2994 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2995 | bge cr1, ->vmeta_comp 2996 | fcmpu cr0, f0, f1 2997 if (op == BC_ISLT) { 2998 | bge >1 2999 } else if (op == BC_ISGE) { 3000 | blt >1 3001 } else if (op == BC_ISLE) { 3002 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 3003 | bge >1 3004 } else { 3005 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 3006 | blt >1 3007 } 3008 | add PC, PC, TMP2 3009 |1: 3010 | ins_next 3011 |.endif 3012 break; 3013 3014 case BC_ISEQV: case BC_ISNEV: 3015 vk = op == BC_ISEQV; 3016 | // RA = src1*8, RD = src2*8, JMP with RD = target 3017 |.if DUALNUM 3018 | lwzux TMP0, RA, BASE 3019 | addi PC, PC, 4 3020 | lwz CARG2, 4(RA) 3021 | lwzux TMP1, RD, BASE 3022 | checknum cr0, TMP0 3023 | lwz TMP2, -4(PC) 3024 | checknum cr1, TMP1 3025 | decode_RD4 TMP2, TMP2 3026 | lwz CARG3, 4(RD) 3027 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3028 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3029 if (vk) { 3030 | ble cr7, ->BC_ISEQN_Z 3031 } else { 3032 | ble cr7, ->BC_ISNEN_Z 3033 } 3034 |.else 3035 | lwzux TMP0, RA, BASE 3036 | lwz TMP2, 0(PC) 3037 | lfd f0, 0(RA) 3038 | addi PC, PC, 4 3039 | lwzux TMP1, RD, BASE 3040 | checknum cr0, TMP0 3041 | decode_RD4 TMP2, TMP2 3042 | lfd f1, 0(RD) 3043 | checknum cr1, TMP1 3044 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3045 | bge cr0, >5 3046 | bge cr1, >5 3047 | fcmpu cr0, f0, f1 3048 if (vk) { 3049 | bne >1 3050 | add PC, PC, TMP2 3051 } else { 3052 | beq >1 3053 | add PC, PC, TMP2 3054 } 3055 |1: 3056 | ins_next 3057 |.endif 3058 |5: // Either or both types are not numbers. 3059 |.if not DUALNUM 3060 | lwz CARG2, 4(RA) 3061 | lwz CARG3, 4(RD) 3062 |.endif 3063 |.if FFI 3064 | cmpwi cr7, TMP0, LJ_TCDATA 3065 | cmpwi cr5, TMP1, LJ_TCDATA 3066 |.endif 3067 | not TMP3, TMP0 3068 | cmplw TMP0, TMP1 3069 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3070 |.if FFI 3071 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3072 |.endif 3073 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3074 |.if FFI 3075 | beq cr7, ->vmeta_equal_cd 3076 |.endif 3077 | cmplw cr5, CARG2, CARG3 3078 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3079 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3080 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3081 | mr SAVE0, PC 3082 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3083 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3084 if (vk) { 3085 | bne cr0, >6 3086 | add PC, PC, TMP2 3087 |6: 3088 } else { 3089 | beq cr0, >6 3090 | add PC, PC, TMP2 3091 |6: 3092 } 3093 |.if DUALNUM 3094 | bge cr0, >2 // Done if 1 or 2. 3095 |1: 3096 | ins_next 3097 |2: 3098 |.else 3099 | blt cr0, <1 // Done if 1 or 2. 3100 |.endif 3101 | blt cr6, <1 // Done if not tab/ud. 3102 | 3103 | // Different tables or userdatas. Need to check __eq metamethod. 3104 | // Field metatable must be at same offset for GCtab and GCudata! 3105 | lwz TAB:TMP2, TAB:CARG2->metatable 3106 | li CARG4, 1-vk // ne = 0 or 1. 3107 | cmplwi TAB:TMP2, 0 3108 | beq <1 // No metatable? 3109 | lbz TMP2, TAB:TMP2->nomm 3110 | andix. TMP2, TMP2, 1<<MM_eq 3111 | bne <1 // Or 'no __eq' flag set? 3112 | mr PC, SAVE0 // Restore old PC. 3113 | b ->vmeta_equal // Handle __eq metamethod. 3114 break; 3115 3116 case BC_ISEQS: case BC_ISNES: 3117 vk = op == BC_ISEQS; 3118 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target 3119 | lwzux TMP0, RA, BASE 3120 | srwi RD, RD, 1 3121 | lwz STR:TMP3, 4(RA) 3122 | lwz TMP2, 0(PC) 3123 | subfic RD, RD, -4 3124 | addi PC, PC, 4 3125 |.if FFI 3126 | cmpwi TMP0, LJ_TCDATA 3127 |.endif 3128 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 3129 | .gpr64 extsw TMP0, TMP0 3130 | subfic TMP0, TMP0, LJ_TSTR 3131 |.if FFI 3132 | beq ->vmeta_equal_cd 3133 |.endif 3134 | sub TMP1, STR:TMP1, STR:TMP3 3135 | or TMP0, TMP0, TMP1 3136 | decode_RD4 TMP2, TMP2 3137 | subfic TMP0, TMP0, 0 3138 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3139 | subfe TMP1, TMP1, TMP1 3140 if (vk) { 3141 | andc TMP2, TMP2, TMP1 3142 } else { 3143 | and TMP2, TMP2, TMP1 3144 } 3145 | add PC, PC, TMP2 3146 | ins_next 3147 break; 3148 3149 case BC_ISEQN: case BC_ISNEN: 3150 vk = op == BC_ISEQN; 3151 | // RA = src*8, RD = num_const*8, JMP with RD = target 3152 |.if DUALNUM 3153 | lwzux TMP0, RA, BASE 3154 | addi PC, PC, 4 3155 | lwz CARG2, 4(RA) 3156 | lwzux TMP1, RD, KBASE 3157 | checknum cr0, TMP0 3158 | lwz TMP2, -4(PC) 3159 | checknum cr1, TMP1 3160 | decode_RD4 TMP2, TMP2 3161 | lwz CARG3, 4(RD) 3162 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3163 if (vk) { 3164 |->BC_ISEQN_Z: 3165 } else { 3166 |->BC_ISNEN_Z: 3167 } 3168 | bne cr0, >7 3169 | bne cr1, >8 3170 | cmpw CARG2, CARG3 3171 |4: 3172 |.else 3173 if (vk) { 3174 |->BC_ISEQN_Z: // Dummy label. 3175 } else { 3176 |->BC_ISNEN_Z: // Dummy label. 3177 } 3178 | lwzx TMP0, BASE, RA 3179 | addi PC, PC, 4 3180 | lfdx f0, BASE, RA 3181 | lwz TMP2, -4(PC) 3182 | lfdx f1, KBASE, RD 3183 | decode_RD4 TMP2, TMP2 3184 | checknum TMP0 3185 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3186 | bge >3 3187 | fcmpu cr0, f0, f1 3188 |.endif 3189 if (vk) { 3190 | bne >1 3191 | add PC, PC, TMP2 3192 |1: 3193 |.if not FFI 3194 |3: 3195 |.endif 3196 } else { 3197 | beq >2 3198 |1: 3199 |.if not FFI 3200 |3: 3201 |.endif 3202 | add PC, PC, TMP2 3203 |2: 3204 } 3205 | ins_next 3206 |.if FFI 3207 |3: 3208 | cmpwi TMP0, LJ_TCDATA 3209 | beq ->vmeta_equal_cd 3210 | b <1 3211 |.endif 3212 |.if DUALNUM 3213 |7: // RA is not an integer. 3214 | bge cr0, <3 3215 | // RA is a number. 3216 | lfd f0, 0(RA) 3217 | blt cr1, >1 3218 | // RA is a number, RD is an integer. 3219 | tonum_i f1, CARG3 3220 | b >2 3221 | 3222 |8: // RA is an integer, RD is a number. 3223 | tonum_i f0, CARG2 3224 |1: 3225 | lfd f1, 0(RD) 3226 |2: 3227 | fcmpu cr0, f0, f1 3228 | b <4 3229 |.endif 3230 break; 3231 3232 case BC_ISEQP: case BC_ISNEP: 3233 vk = op == BC_ISEQP; 3234 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target 3235 | lwzx TMP0, BASE, RA 3236 | srwi TMP1, RD, 3 3237 | lwz TMP2, 0(PC) 3238 | not TMP1, TMP1 3239 | addi PC, PC, 4 3240 |.if FFI 3241 | cmpwi TMP0, LJ_TCDATA 3242 |.endif 3243 | sub TMP0, TMP0, TMP1 3244 |.if FFI 3245 | beq ->vmeta_equal_cd 3246 |.endif 3247 | decode_RD4 TMP2, TMP2 3248 | .gpr64 extsw TMP0, TMP0 3249 | addic TMP0, TMP0, -1 3250 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3251 | subfe TMP1, TMP1, TMP1 3252 if (vk) { 3253 | and TMP2, TMP2, TMP1 3254 } else { 3255 | andc TMP2, TMP2, TMP1 3256 } 3257 | add PC, PC, TMP2 3258 | ins_next 3259 break; 3260 3261 /* -- Unary test and copy ops ------------------------------------------- */ 3262 3263 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3264 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target 3265 | lwzx TMP0, BASE, RD 3266 | lwz INS, 0(PC) 3267 | addi PC, PC, 4 3268 if (op == BC_IST || op == BC_ISF) { 3269 | .gpr64 extsw TMP0, TMP0 3270 | subfic TMP0, TMP0, LJ_TTRUE 3271 | decode_RD4 TMP2, INS 3272 | subfe TMP1, TMP1, TMP1 3273 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3274 if (op == BC_IST) { 3275 | andc TMP2, TMP2, TMP1 3276 } else { 3277 | and TMP2, TMP2, TMP1 3278 } 3279 | add PC, PC, TMP2 3280 } else { 3281 | li TMP1, LJ_TFALSE 3282 | lfdx f0, BASE, RD 3283 | cmplw TMP0, TMP1 3284 if (op == BC_ISTC) { 3285 | bge >1 3286 } else { 3287 | blt >1 3288 } 3289 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3290 | decode_RD4 TMP2, INS 3291 | stfdx f0, BASE, RA 3292 | add PC, PC, TMP2 3293 |1: 3294 } 3295 | ins_next 3296 break; 3297 3298 case BC_ISTYPE: 3299 | // RA = src*8, RD = -type*8 3300 | lwzx TMP0, BASE, RA 3301 | srwi TMP1, RD, 3 3302 | ins_next1 3303 |.if not PPE and not GPR64 3304 | add. TMP0, TMP0, TMP1 3305 |.else 3306 | neg TMP1, TMP1 3307 | cmpw TMP0, TMP1 3308 |.endif 3309 | bne ->vmeta_istype 3310 | ins_next2 3311 break; 3312 case BC_ISNUM: 3313 | // RA = src*8, RD = -(TISNUM-1)*8 3314 | lwzx TMP0, BASE, RA 3315 | ins_next1 3316 | checknum TMP0 3317 | bge ->vmeta_istype 3318 | ins_next2 3319 break; 3320 3321 /* -- Unary ops --------------------------------------------------------- */ 3322 3323 case BC_MOV: 3324 | // RA = dst*8, RD = src*8 3325 | ins_next1 3326 | lfdx f0, BASE, RD 3327 | stfdx f0, BASE, RA 3328 | ins_next2 3329 break; 3330 case BC_NOT: 3331 | // RA = dst*8, RD = src*8 3332 | ins_next1 3333 | lwzx TMP0, BASE, RD 3334 | .gpr64 extsw TMP0, TMP0 3335 | subfic TMP1, TMP0, LJ_TTRUE 3336 | adde TMP0, TMP0, TMP1 3337 | stwx TMP0, BASE, RA 3338 | ins_next2 3339 break; 3340 case BC_UNM: 3341 | // RA = dst*8, RD = src*8 3342 | lwzux TMP1, RD, BASE 3343 | lwz TMP0, 4(RD) 3344 | checknum TMP1 3345 |.if DUALNUM 3346 | bne >5 3347 |.if GPR64 3348 | lus TMP2, 0x8000 3349 | neg TMP0, TMP0 3350 | cmplw TMP0, TMP2 3351 | beq >4 3352 |.else 3353 | nego. TMP0, TMP0 3354 | bso >4 3355 |1: 3356 |.endif 3357 | ins_next1 3358 | stwux TISNUM, RA, BASE 3359 | stw TMP0, 4(RA) 3360 |3: 3361 | ins_next2 3362 |4: 3363 |.if not GPR64 3364 | // Potential overflow. 3365 | checkov TMP1, <1 // Ignore unrelated overflow. 3366 |.endif 3367 | lus TMP1, 0x41e0 // 2^31. 3368 | li TMP0, 0 3369 | b >7 3370 |.endif 3371 |5: 3372 | bge ->vmeta_unm 3373 | xoris TMP1, TMP1, 0x8000 3374 |7: 3375 | ins_next1 3376 | stwux TMP1, RA, BASE 3377 | stw TMP0, 4(RA) 3378 |.if DUALNUM 3379 | b <3 3380 |.else 3381 | ins_next2 3382 |.endif 3383 break; 3384 case BC_LEN: 3385 | // RA = dst*8, RD = src*8 3386 | lwzux TMP0, RD, BASE 3387 | lwz CARG1, 4(RD) 3388 | checkstr TMP0; bne >2 3389 | lwz CRET1, STR:CARG1->len 3390 |1: 3391 |.if DUALNUM 3392 | ins_next1 3393 | stwux TISNUM, RA, BASE 3394 | stw CRET1, 4(RA) 3395 |.else 3396 | tonum_u f0, CRET1 // Result is a non-negative integer. 3397 | ins_next1 3398 | stfdx f0, BASE, RA 3399 |.endif 3400 | ins_next2 3401 |2: 3402 | checktab TMP0; bne ->vmeta_len 3403#if LJ_52 3404 | lwz TAB:TMP2, TAB:CARG1->metatable 3405 | cmplwi TAB:TMP2, 0 3406 | bne >9 3407 |3: 3408#endif 3409 |->BC_LEN_Z: 3410 | bl extern lj_tab_len // (GCtab *t) 3411 | // Returns uint32_t (but less than 2^31). 3412 | b <1 3413#if LJ_52 3414 |9: 3415 | lbz TMP0, TAB:TMP2->nomm 3416 | andix. TMP0, TMP0, 1<<MM_len 3417 | bne <3 // 'no __len' flag set: done. 3418 | b ->vmeta_len 3419#endif 3420 break; 3421 3422 /* -- Binary ops -------------------------------------------------------- */ 3423 3424 |.macro ins_arithpre 3425 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3426 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3427 ||switch (vk) { 3428 ||case 0: 3429 | lwzx TMP1, BASE, RB 3430 | .if DUALNUM 3431 | lwzx TMP2, KBASE, RC 3432 | .endif 3433 | lfdx f14, BASE, RB 3434 | lfdx f15, KBASE, RC 3435 | .if DUALNUM 3436 | checknum cr0, TMP1 3437 | checknum cr1, TMP2 3438 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3439 | bge ->vmeta_arith_vn 3440 | .else 3441 | checknum TMP1; bge ->vmeta_arith_vn 3442 | .endif 3443 || break; 3444 ||case 1: 3445 | lwzx TMP1, BASE, RB 3446 | .if DUALNUM 3447 | lwzx TMP2, KBASE, RC 3448 | .endif 3449 | lfdx f15, BASE, RB 3450 | lfdx f14, KBASE, RC 3451 | .if DUALNUM 3452 | checknum cr0, TMP1 3453 | checknum cr1, TMP2 3454 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3455 | bge ->vmeta_arith_nv 3456 | .else 3457 | checknum TMP1; bge ->vmeta_arith_nv 3458 | .endif 3459 || break; 3460 ||default: 3461 | lwzx TMP1, BASE, RB 3462 | lwzx TMP2, BASE, RC 3463 | lfdx f14, BASE, RB 3464 | lfdx f15, BASE, RC 3465 | checknum cr0, TMP1 3466 | checknum cr1, TMP2 3467 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3468 | bge ->vmeta_arith_vv 3469 || break; 3470 ||} 3471 |.endmacro 3472 | 3473 |.macro ins_arithfallback, ins 3474 ||switch (vk) { 3475 ||case 0: 3476 | ins ->vmeta_arith_vn2 3477 || break; 3478 ||case 1: 3479 | ins ->vmeta_arith_nv2 3480 || break; 3481 ||default: 3482 | ins ->vmeta_arith_vv2 3483 || break; 3484 ||} 3485 |.endmacro 3486 | 3487 |.macro intmod, a, b, c 3488 | bl ->vm_modi 3489 |.endmacro 3490 | 3491 |.macro fpmod, a, b, c 3492 |->BC_MODVN_Z: 3493 | fdiv FARG1, b, c 3494 | // NYI: Use internal implementation of floor. 3495 | blex floor // floor(b/c) 3496 | fmul a, FARG1, c 3497 | fsub a, b, a // b - floor(b/c)*c 3498 |.endmacro 3499 | 3500 |.macro ins_arithfp, fpins 3501 | ins_arithpre 3502 |.if "fpins" == "fpmod_" 3503 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3504 |.else 3505 | fpins f0, f14, f15 3506 | ins_next1 3507 | stfdx f0, BASE, RA 3508 | ins_next2 3509 |.endif 3510 |.endmacro 3511 | 3512 |.macro ins_arithdn, intins, fpins 3513 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3514 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3515 ||switch (vk) { 3516 ||case 0: 3517 | lwzux TMP1, RB, BASE 3518 | lwzux TMP2, RC, KBASE 3519 | lwz CARG1, 4(RB) 3520 | checknum cr0, TMP1 3521 | lwz CARG2, 4(RC) 3522 || break; 3523 ||case 1: 3524 | lwzux TMP1, RB, BASE 3525 | lwzux TMP2, RC, KBASE 3526 | lwz CARG2, 4(RB) 3527 | checknum cr0, TMP1 3528 | lwz CARG1, 4(RC) 3529 || break; 3530 ||default: 3531 | lwzux TMP1, RB, BASE 3532 | lwzux TMP2, RC, BASE 3533 | lwz CARG1, 4(RB) 3534 | checknum cr0, TMP1 3535 | lwz CARG2, 4(RC) 3536 || break; 3537 ||} 3538 | checknum cr1, TMP2 3539 | bne >5 3540 | bne cr1, >5 3541 | intins CARG1, CARG1, CARG2 3542 | bso >4 3543 |1: 3544 | ins_next1 3545 | stwux TISNUM, RA, BASE 3546 | stw CARG1, 4(RA) 3547 |2: 3548 | ins_next2 3549 |4: // Overflow. 3550 | checkov TMP0, <1 // Ignore unrelated overflow. 3551 | ins_arithfallback b 3552 |5: // FP variant. 3553 ||if (vk == 1) { 3554 | lfd f15, 0(RB) 3555 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3556 | lfd f14, 0(RC) 3557 ||} else { 3558 | lfd f14, 0(RB) 3559 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3560 | lfd f15, 0(RC) 3561 ||} 3562 | ins_arithfallback bge 3563 |.if "fpins" == "fpmod_" 3564 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3565 |.else 3566 | fpins f0, f14, f15 3567 | ins_next1 3568 | stfdx f0, BASE, RA 3569 | b <2 3570 |.endif 3571 |.endmacro 3572 | 3573 |.macro ins_arith, intins, fpins 3574 |.if DUALNUM 3575 | ins_arithdn intins, fpins 3576 |.else 3577 | ins_arithfp fpins 3578 |.endif 3579 |.endmacro 3580 3581 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3582 |.if GPR64 3583 |.macro addo32., y, a, b 3584 | // Need to check overflow for (a<<32) + (b<<32). 3585 | rldicr TMP0, a, 32, 31 3586 | rldicr TMP3, b, 32, 31 3587 | addo. TMP0, TMP0, TMP3 3588 | add y, a, b 3589 |.endmacro 3590 | ins_arith addo32., fadd 3591 |.else 3592 | ins_arith addo., fadd 3593 |.endif 3594 break; 3595 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3596 |.if GPR64 3597 |.macro subo32., y, a, b 3598 | // Need to check overflow for (a<<32) - (b<<32). 3599 | rldicr TMP0, a, 32, 31 3600 | rldicr TMP3, b, 32, 31 3601 | subo. TMP0, TMP0, TMP3 3602 | sub y, a, b 3603 |.endmacro 3604 | ins_arith subo32., fsub 3605 |.else 3606 | ins_arith subo., fsub 3607 |.endif 3608 break; 3609 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3610 | ins_arith mullwo., fmul 3611 break; 3612 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3613 | ins_arithfp fdiv 3614 break; 3615 case BC_MODVN: 3616 | ins_arith intmod, fpmod 3617 break; 3618 case BC_MODNV: case BC_MODVV: 3619 | ins_arith intmod, fpmod_ 3620 break; 3621 case BC_POW: 3622 | // NYI: (partial) integer arithmetic. 3623 | lwzx TMP1, BASE, RB 3624 | lfdx FARG1, BASE, RB 3625 | lwzx TMP2, BASE, RC 3626 | lfdx FARG2, BASE, RC 3627 | checknum cr0, TMP1 3628 | checknum cr1, TMP2 3629 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3630 | bge ->vmeta_arith_vv 3631 | blex pow 3632 | ins_next1 3633 | stfdx FARG1, BASE, RA 3634 | ins_next2 3635 break; 3636 3637 case BC_CAT: 3638 | // RA = dst*8, RB = src_start*8, RC = src_end*8 3639 | sub CARG3, RC, RB 3640 | stp BASE, L->base 3641 | add CARG2, BASE, RC 3642 | mr SAVE0, RB 3643 |->BC_CAT_Z: 3644 | stw PC, SAVE_PC 3645 | mr CARG1, L 3646 | srwi CARG3, CARG3, 3 3647 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3648 | // Returns NULL (finished) or TValue * (metamethod). 3649 | cmplwi CRET1, 0 3650 | lp BASE, L->base 3651 | bne ->vmeta_binop 3652 | ins_next1 3653 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 3654 | stfdx f0, BASE, RA 3655 | ins_next2 3656 break; 3657 3658 /* -- Constant ops ------------------------------------------------------ */ 3659 3660 case BC_KSTR: 3661 | // RA = dst*8, RD = str_const*8 (~) 3662 | srwi TMP1, RD, 1 3663 | subfic TMP1, TMP1, -4 3664 | ins_next1 3665 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 3666 | li TMP2, LJ_TSTR 3667 | stwux TMP2, RA, BASE 3668 | stw TMP0, 4(RA) 3669 | ins_next2 3670 break; 3671 case BC_KCDATA: 3672 |.if FFI 3673 | // RA = dst*8, RD = cdata_const*8 (~) 3674 | srwi TMP1, RD, 1 3675 | subfic TMP1, TMP1, -4 3676 | ins_next1 3677 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 3678 | li TMP2, LJ_TCDATA 3679 | stwux TMP2, RA, BASE 3680 | stw TMP0, 4(RA) 3681 | ins_next2 3682 |.endif 3683 break; 3684 case BC_KSHORT: 3685 | // RA = dst*8, RD = int16_literal*8 3686 |.if DUALNUM 3687 | slwi RD, RD, 13 3688 | srawi RD, RD, 16 3689 | ins_next1 3690 | stwux TISNUM, RA, BASE 3691 | stw RD, 4(RA) 3692 | ins_next2 3693 |.else 3694 | // The soft-float approach is faster. 3695 | slwi RD, RD, 13 3696 | srawi TMP1, RD, 31 3697 | xor TMP2, TMP1, RD 3698 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3699 | cntlzw TMP3, TMP2 3700 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3701 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3702 | subfic TMP3, RD, 0 3703 | slwi TMP1, TMP1, 20 3704 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) 3705 | subfe TMP0, TMP0, TMP0 3706 | add RD, RD, TMP1 // hi = hi + exponent-1 3707 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi 3708 | ins_next1 3709 | stwux RD, RA, BASE 3710 | stw ZERO, 4(RA) 3711 | ins_next2 3712 |.endif 3713 break; 3714 case BC_KNUM: 3715 | // RA = dst*8, RD = num_const*8 3716 | ins_next1 3717 | lfdx f0, KBASE, RD 3718 | stfdx f0, BASE, RA 3719 | ins_next2 3720 break; 3721 case BC_KPRI: 3722 | // RA = dst*8, RD = primitive_type*8 (~) 3723 | srwi TMP1, RD, 3 3724 | not TMP0, TMP1 3725 | ins_next1 3726 | stwx TMP0, BASE, RA 3727 | ins_next2 3728 break; 3729 case BC_KNIL: 3730 | // RA = base*8, RD = end*8 3731 | stwx TISNIL, BASE, RA 3732 | addi RA, RA, 8 3733 |1: 3734 | stwx TISNIL, BASE, RA 3735 | cmpw RA, RD 3736 | addi RA, RA, 8 3737 | blt <1 3738 | ins_next_ 3739 break; 3740 3741 /* -- Upvalue and function ops ------------------------------------------ */ 3742 3743 case BC_UGET: 3744 | // RA = dst*8, RD = uvnum*8 3745 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3746 | srwi RD, RD, 1 3747 | addi RD, RD, offsetof(GCfuncL, uvptr) 3748 | lwzx UPVAL:RB, LFUNC:RB, RD 3749 | ins_next1 3750 | lwz TMP1, UPVAL:RB->v 3751 | lfd f0, 0(TMP1) 3752 | stfdx f0, BASE, RA 3753 | ins_next2 3754 break; 3755 case BC_USETV: 3756 | // RA = uvnum*8, RD = src*8 3757 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3758 | srwi RA, RA, 1 3759 | addi RA, RA, offsetof(GCfuncL, uvptr) 3760 | lfdux f0, RD, BASE 3761 | lwzx UPVAL:RB, LFUNC:RB, RA 3762 | lbz TMP3, UPVAL:RB->marked 3763 | lwz CARG2, UPVAL:RB->v 3764 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3765 | lbz TMP0, UPVAL:RB->closed 3766 | lwz TMP2, 0(RD) 3767 | stfd f0, 0(CARG2) 3768 | cmplwi cr1, TMP0, 0 3769 | lwz TMP1, 4(RD) 3770 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3771 | subi TMP2, TMP2, (LJ_TNUMX+1) 3772 | bne >2 // Upvalue is closed and black? 3773 |1: 3774 | ins_next 3775 | 3776 |2: // Check if new value is collectable. 3777 | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3778 | bge <1 // tvisgcv(v) 3779 | lbz TMP3, GCOBJ:TMP1->gch.marked 3780 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3781 | la CARG1, GG_DISP2G(DISPATCH) 3782 | // Crossed a write barrier. Move the barrier forward. 3783 | beq <1 3784 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3785 | b <1 3786 break; 3787 case BC_USETS: 3788 | // RA = uvnum*8, RD = str_const*8 (~) 3789 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3790 | srwi TMP1, RD, 1 3791 | srwi RA, RA, 1 3792 | subfic TMP1, TMP1, -4 3793 | addi RA, RA, offsetof(GCfuncL, uvptr) 3794 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 3795 | lwzx UPVAL:RB, LFUNC:RB, RA 3796 | lbz TMP3, UPVAL:RB->marked 3797 | lwz CARG2, UPVAL:RB->v 3798 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3799 | lbz TMP3, STR:TMP1->marked 3800 | lbz TMP2, UPVAL:RB->closed 3801 | li TMP0, LJ_TSTR 3802 | stw STR:TMP1, 4(CARG2) 3803 | stw TMP0, 0(CARG2) 3804 | bne >2 3805 |1: 3806 | ins_next 3807 | 3808 |2: // Check if string is white and ensure upvalue is closed. 3809 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) 3810 | cmplwi cr1, TMP2, 0 3811 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3812 | la CARG1, GG_DISP2G(DISPATCH) 3813 | // Crossed a write barrier. Move the barrier forward. 3814 | beq <1 3815 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3816 | b <1 3817 break; 3818 case BC_USETN: 3819 | // RA = uvnum*8, RD = num_const*8 3820 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3821 | srwi RA, RA, 1 3822 | addi RA, RA, offsetof(GCfuncL, uvptr) 3823 | lfdx f0, KBASE, RD 3824 | lwzx UPVAL:RB, LFUNC:RB, RA 3825 | ins_next1 3826 | lwz TMP1, UPVAL:RB->v 3827 | stfd f0, 0(TMP1) 3828 | ins_next2 3829 break; 3830 case BC_USETP: 3831 | // RA = uvnum*8, RD = primitive_type*8 (~) 3832 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3833 | srwi RA, RA, 1 3834 | srwi TMP0, RD, 3 3835 | addi RA, RA, offsetof(GCfuncL, uvptr) 3836 | not TMP0, TMP0 3837 | lwzx UPVAL:RB, LFUNC:RB, RA 3838 | ins_next1 3839 | lwz TMP1, UPVAL:RB->v 3840 | stw TMP0, 0(TMP1) 3841 | ins_next2 3842 break; 3843 3844 case BC_UCLO: 3845 | // RA = level*8, RD = target 3846 | lwz TMP1, L->openupval 3847 | branch_RD // Do this first since RD is not saved. 3848 | stp BASE, L->base 3849 | cmplwi TMP1, 0 3850 | mr CARG1, L 3851 | beq >1 3852 | add CARG2, BASE, RA 3853 | bl extern lj_func_closeuv // (lua_State *L, TValue *level) 3854 | lp BASE, L->base 3855 |1: 3856 | ins_next 3857 break; 3858 3859 case BC_FNEW: 3860 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) 3861 | srwi TMP1, RD, 1 3862 | stp BASE, L->base 3863 | subfic TMP1, TMP1, -4 3864 | stw PC, SAVE_PC 3865 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3866 | mr CARG1, L 3867 | lwz CARG3, FRAME_FUNC(BASE) 3868 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3869 | bl extern lj_func_newL_gc 3870 | // Returns GCfuncL *. 3871 | lp BASE, L->base 3872 | li TMP0, LJ_TFUNC 3873 | stwux TMP0, RA, BASE 3874 | stw LFUNC:CRET1, 4(RA) 3875 | ins_next 3876 break; 3877 3878 /* -- Table ops --------------------------------------------------------- */ 3879 3880 case BC_TNEW: 3881 case BC_TDUP: 3882 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) 3883 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 3884 | mr CARG1, L 3885 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 3886 | stp BASE, L->base 3887 | cmplw TMP0, TMP1 3888 | stw PC, SAVE_PC 3889 | bge >5 3890 |1: 3891 if (op == BC_TNEW) { 3892 | rlwinm CARG2, RD, 29, 21, 31 3893 | rlwinm CARG3, RD, 18, 27, 31 3894 | cmpwi CARG2, 0x7ff; beq >3 3895 |2: 3896 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3897 | // Returns Table *. 3898 } else { 3899 | srwi TMP1, RD, 1 3900 | subfic TMP1, TMP1, -4 3901 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3902 | bl extern lj_tab_dup // (lua_State *L, Table *kt) 3903 | // Returns Table *. 3904 } 3905 | lp BASE, L->base 3906 | li TMP0, LJ_TTAB 3907 | stwux TMP0, RA, BASE 3908 | stw TAB:CRET1, 4(RA) 3909 | ins_next 3910 if (op == BC_TNEW) { 3911 |3: 3912 | li CARG2, 0x801 3913 | b <2 3914 } 3915 |5: 3916 | mr SAVE0, RD 3917 | bl extern lj_gc_step_fixtop // (lua_State *L) 3918 | mr RD, SAVE0 3919 | mr CARG1, L 3920 | b <1 3921 break; 3922 3923 case BC_GGET: 3924 | // RA = dst*8, RD = str_const*8 (~) 3925 case BC_GSET: 3926 | // RA = src*8, RD = str_const*8 (~) 3927 | lwz LFUNC:TMP2, FRAME_FUNC(BASE) 3928 | srwi TMP1, RD, 1 3929 | lwz TAB:RB, LFUNC:TMP2->env 3930 | subfic TMP1, TMP1, -4 3931 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3932 if (op == BC_GGET) { 3933 | b ->BC_TGETS_Z 3934 } else { 3935 | b ->BC_TSETS_Z 3936 } 3937 break; 3938 3939 case BC_TGETV: 3940 | // RA = dst*8, RB = table*8, RC = key*8 3941 | lwzux CARG1, RB, BASE 3942 | lwzux CARG2, RC, BASE 3943 | lwz TAB:RB, 4(RB) 3944 |.if DUALNUM 3945 | lwz RC, 4(RC) 3946 |.else 3947 | lfd f0, 0(RC) 3948 |.endif 3949 | checktab CARG1 3950 | checknum cr1, CARG2 3951 | bne ->vmeta_tgetv 3952 |.if DUALNUM 3953 | lwz TMP0, TAB:RB->asize 3954 | bne cr1, >5 3955 | lwz TMP1, TAB:RB->array 3956 | cmplw TMP0, RC 3957 | slwi TMP2, RC, 3 3958 |.else 3959 | bge cr1, >5 3960 | // Convert number key to integer, check for integerness and range. 3961 | fctiwz f1, f0 3962 | fadd f2, f0, TOBIT 3963 | stfd f1, TMPD 3964 | lwz TMP0, TAB:RB->asize 3965 | fsub f2, f2, TOBIT 3966 | lwz TMP2, TMPD_LO 3967 | lwz TMP1, TAB:RB->array 3968 | fcmpu cr1, f0, f2 3969 | cmplw cr0, TMP0, TMP2 3970 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3971 | slwi TMP2, TMP2, 3 3972 |.endif 3973 | ble ->vmeta_tgetv // Integer key and in array part? 3974 | lwzx TMP0, TMP1, TMP2 3975 | lfdx f14, TMP1, TMP2 3976 | checknil TMP0; beq >2 3977 |1: 3978 | ins_next1 3979 | stfdx f14, BASE, RA 3980 | ins_next2 3981 | 3982 |2: // Check for __index if table value is nil. 3983 | lwz TAB:TMP2, TAB:RB->metatable 3984 | cmplwi TAB:TMP2, 0 3985 | beq <1 // No metatable: done. 3986 | lbz TMP0, TAB:TMP2->nomm 3987 | andix. TMP0, TMP0, 1<<MM_index 3988 | bne <1 // 'no __index' flag set: done. 3989 | b ->vmeta_tgetv 3990 | 3991 |5: 3992 | checkstr CARG2; bne ->vmeta_tgetv 3993 |.if not DUALNUM 3994 | lwz STR:RC, 4(RC) 3995 |.endif 3996 | b ->BC_TGETS_Z // String key? 3997 break; 3998 case BC_TGETS: 3999 | // RA = dst*8, RB = table*8, RC = str_const*8 (~) 4000 | lwzux CARG1, RB, BASE 4001 | srwi TMP1, RC, 1 4002 | lwz TAB:RB, 4(RB) 4003 | subfic TMP1, TMP1, -4 4004 | checktab CARG1 4005 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 4006 | bne ->vmeta_tgets1 4007 |->BC_TGETS_Z: 4008 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4009 | lwz TMP0, TAB:RB->hmask 4010 | lwz TMP1, STR:RC->hash 4011 | lwz NODE:TMP2, TAB:RB->node 4012 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4013 | slwi TMP0, TMP1, 5 4014 | slwi TMP1, TMP1, 3 4015 | sub TMP1, TMP0, TMP1 4016 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4017 |1: 4018 | lwz CARG1, NODE:TMP2->key 4019 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4020 | lwz CARG2, NODE:TMP2->val 4021 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 4022 | checkstr CARG1; bne >4 4023 | cmpw TMP0, STR:RC; bne >4 4024 | checknil CARG2; beq >5 // Key found, but nil value? 4025 |3: 4026 | stwux CARG2, RA, BASE 4027 | stw TMP1, 4(RA) 4028 | ins_next 4029 | 4030 |4: // Follow hash chain. 4031 | lwz NODE:TMP2, NODE:TMP2->next 4032 | cmplwi NODE:TMP2, 0 4033 | bne <1 4034 | // End of hash chain: key not found, nil result. 4035 | li CARG2, LJ_TNIL 4036 | 4037 |5: // Check for __index if table value is nil. 4038 | lwz TAB:TMP2, TAB:RB->metatable 4039 | cmplwi TAB:TMP2, 0 4040 | beq <3 // No metatable: done. 4041 | lbz TMP0, TAB:TMP2->nomm 4042 | andix. TMP0, TMP0, 1<<MM_index 4043 | bne <3 // 'no __index' flag set: done. 4044 | b ->vmeta_tgets 4045 break; 4046 case BC_TGETB: 4047 | // RA = dst*8, RB = table*8, RC = index*8 4048 | lwzux CARG1, RB, BASE 4049 | srwi TMP0, RC, 3 4050 | lwz TAB:RB, 4(RB) 4051 | checktab CARG1; bne ->vmeta_tgetb 4052 | lwz TMP1, TAB:RB->asize 4053 | lwz TMP2, TAB:RB->array 4054 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4055 | lwzx TMP1, TMP2, RC 4056 | lfdx f0, TMP2, RC 4057 | checknil TMP1; beq >5 4058 |1: 4059 | ins_next1 4060 | stfdx f0, BASE, RA 4061 | ins_next2 4062 | 4063 |5: // Check for __index if table value is nil. 4064 | lwz TAB:TMP2, TAB:RB->metatable 4065 | cmplwi TAB:TMP2, 0 4066 | beq <1 // No metatable: done. 4067 | lbz TMP2, TAB:TMP2->nomm 4068 | andix. TMP2, TMP2, 1<<MM_index 4069 | bne <1 // 'no __index' flag set: done. 4070 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4071 break; 4072 case BC_TGETR: 4073 | // RA = dst*8, RB = table*8, RC = key*8 4074 | add RB, BASE, RB 4075 | lwz TAB:CARG1, 4(RB) 4076 |.if DUALNUM 4077 | add RC, BASE, RC 4078 | lwz TMP0, TAB:CARG1->asize 4079 | lwz CARG2, 4(RC) 4080 | lwz TMP1, TAB:CARG1->array 4081 |.else 4082 | lfdx f0, BASE, RC 4083 | lwz TMP0, TAB:CARG1->asize 4084 | toint CARG2, f0 4085 | lwz TMP1, TAB:CARG1->array 4086 |.endif 4087 | cmplw TMP0, CARG2 4088 | slwi TMP2, CARG2, 3 4089 | ble ->vmeta_tgetr // In array part? 4090 | lfdx f14, TMP1, TMP2 4091 |->BC_TGETR_Z: 4092 | ins_next1 4093 | stfdx f14, BASE, RA 4094 | ins_next2 4095 break; 4096 4097 case BC_TSETV: 4098 | // RA = src*8, RB = table*8, RC = key*8 4099 | lwzux CARG1, RB, BASE 4100 | lwzux CARG2, RC, BASE 4101 | lwz TAB:RB, 4(RB) 4102 |.if DUALNUM 4103 | lwz RC, 4(RC) 4104 |.else 4105 | lfd f0, 0(RC) 4106 |.endif 4107 | checktab CARG1 4108 | checknum cr1, CARG2 4109 | bne ->vmeta_tsetv 4110 |.if DUALNUM 4111 | lwz TMP0, TAB:RB->asize 4112 | bne cr1, >5 4113 | lwz TMP1, TAB:RB->array 4114 | cmplw TMP0, RC 4115 | slwi TMP0, RC, 3 4116 |.else 4117 | bge cr1, >5 4118 | // Convert number key to integer, check for integerness and range. 4119 | fctiwz f1, f0 4120 | fadd f2, f0, TOBIT 4121 | stfd f1, TMPD 4122 | lwz TMP0, TAB:RB->asize 4123 | fsub f2, f2, TOBIT 4124 | lwz TMP2, TMPD_LO 4125 | lwz TMP1, TAB:RB->array 4126 | fcmpu cr1, f0, f2 4127 | cmplw cr0, TMP0, TMP2 4128 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 4129 | slwi TMP0, TMP2, 3 4130 |.endif 4131 | ble ->vmeta_tsetv // Integer key and in array part? 4132 | lwzx TMP2, TMP1, TMP0 4133 | lbz TMP3, TAB:RB->marked 4134 | lfdx f14, BASE, RA 4135 | checknil TMP2; beq >3 4136 |1: 4137 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4138 | stfdx f14, TMP1, TMP0 4139 | bne >7 4140 |2: 4141 | ins_next 4142 | 4143 |3: // Check for __newindex if previous value is nil. 4144 | lwz TAB:TMP2, TAB:RB->metatable 4145 | cmplwi TAB:TMP2, 0 4146 | beq <1 // No metatable: done. 4147 | lbz TMP2, TAB:TMP2->nomm 4148 | andix. TMP2, TMP2, 1<<MM_newindex 4149 | bne <1 // 'no __newindex' flag set: done. 4150 | b ->vmeta_tsetv 4151 | 4152 |5: 4153 | checkstr CARG2; bne ->vmeta_tsetv 4154 |.if not DUALNUM 4155 | lwz STR:RC, 4(RC) 4156 |.endif 4157 | b ->BC_TSETS_Z // String key? 4158 | 4159 |7: // Possible table write barrier for the value. Skip valiswhite check. 4160 | barrierback TAB:RB, TMP3, TMP0 4161 | b <2 4162 break; 4163 case BC_TSETS: 4164 | // RA = src*8, RB = table*8, RC = str_const*8 (~) 4165 | lwzux CARG1, RB, BASE 4166 | srwi TMP1, RC, 1 4167 | lwz TAB:RB, 4(RB) 4168 | subfic TMP1, TMP1, -4 4169 | checktab CARG1 4170 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 4171 | bne ->vmeta_tsets1 4172 |->BC_TSETS_Z: 4173 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4174 | lwz TMP0, TAB:RB->hmask 4175 | lwz TMP1, STR:RC->hash 4176 | lwz NODE:TMP2, TAB:RB->node 4177 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4178 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4179 | lfdx f14, BASE, RA 4180 | slwi TMP0, TMP1, 5 4181 | slwi TMP1, TMP1, 3 4182 | sub TMP1, TMP0, TMP1 4183 | lbz TMP3, TAB:RB->marked 4184 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4185 |1: 4186 | lwz CARG1, NODE:TMP2->key 4187 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4188 | lwz CARG2, NODE:TMP2->val 4189 | lwz NODE:TMP1, NODE:TMP2->next 4190 | checkstr CARG1; bne >5 4191 | cmpw TMP0, STR:RC; bne >5 4192 | checknil CARG2; beq >4 // Key found, but nil value? 4193 |2: 4194 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4195 | stfd f14, NODE:TMP2->val 4196 | bne >7 4197 |3: 4198 | ins_next 4199 | 4200 |4: // Check for __newindex if previous value is nil. 4201 | lwz TAB:TMP1, TAB:RB->metatable 4202 | cmplwi TAB:TMP1, 0 4203 | beq <2 // No metatable: done. 4204 | lbz TMP0, TAB:TMP1->nomm 4205 | andix. TMP0, TMP0, 1<<MM_newindex 4206 | bne <2 // 'no __newindex' flag set: done. 4207 | b ->vmeta_tsets 4208 | 4209 |5: // Follow hash chain. 4210 | cmplwi NODE:TMP1, 0 4211 | mr NODE:TMP2, NODE:TMP1 4212 | bne <1 4213 | // End of hash chain: key not found, add a new one. 4214 | 4215 | // But check for __newindex first. 4216 | lwz TAB:TMP1, TAB:RB->metatable 4217 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 4218 | stw PC, SAVE_PC 4219 | mr CARG1, L 4220 | cmplwi TAB:TMP1, 0 4221 | stp BASE, L->base 4222 | beq >6 // No metatable: continue. 4223 | lbz TMP0, TAB:TMP1->nomm 4224 | andix. TMP0, TMP0, 1<<MM_newindex 4225 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4226 |6: 4227 | li TMP0, LJ_TSTR 4228 | stw STR:RC, 4(CARG3) 4229 | mr CARG2, TAB:RB 4230 | stw TMP0, 0(CARG3) 4231 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4232 | // Returns TValue *. 4233 | lp BASE, L->base 4234 | stfd f14, 0(CRET1) 4235 | b <3 // No 2nd write barrier needed. 4236 | 4237 |7: // Possible table write barrier for the value. Skip valiswhite check. 4238 | barrierback TAB:RB, TMP3, TMP0 4239 | b <3 4240 break; 4241 case BC_TSETB: 4242 | // RA = src*8, RB = table*8, RC = index*8 4243 | lwzux CARG1, RB, BASE 4244 | srwi TMP0, RC, 3 4245 | lwz TAB:RB, 4(RB) 4246 | checktab CARG1; bne ->vmeta_tsetb 4247 | lwz TMP1, TAB:RB->asize 4248 | lwz TMP2, TAB:RB->array 4249 | lbz TMP3, TAB:RB->marked 4250 | cmplw TMP0, TMP1 4251 | lfdx f14, BASE, RA 4252 | bge ->vmeta_tsetb 4253 | lwzx TMP1, TMP2, RC 4254 | checknil TMP1; beq >5 4255 |1: 4256 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4257 | stfdx f14, TMP2, RC 4258 | bne >7 4259 |2: 4260 | ins_next 4261 | 4262 |5: // Check for __newindex if previous value is nil. 4263 | lwz TAB:TMP1, TAB:RB->metatable 4264 | cmplwi TAB:TMP1, 0 4265 | beq <1 // No metatable: done. 4266 | lbz TMP1, TAB:TMP1->nomm 4267 | andix. TMP1, TMP1, 1<<MM_newindex 4268 | bne <1 // 'no __newindex' flag set: done. 4269 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4270 | 4271 |7: // Possible table write barrier for the value. Skip valiswhite check. 4272 | barrierback TAB:RB, TMP3, TMP0 4273 | b <2 4274 break; 4275 case BC_TSETR: 4276 | // RA = dst*8, RB = table*8, RC = key*8 4277 | add RB, BASE, RB 4278 | lwz TAB:CARG2, 4(RB) 4279 |.if DUALNUM 4280 | add RC, BASE, RC 4281 | lbz TMP3, TAB:CARG2->marked 4282 | lwz TMP0, TAB:CARG2->asize 4283 | lwz CARG3, 4(RC) 4284 | lwz TMP1, TAB:CARG2->array 4285 |.else 4286 | lfdx f0, BASE, RC 4287 | lbz TMP3, TAB:CARG2->marked 4288 | lwz TMP0, TAB:CARG2->asize 4289 | toint CARG3, f0 4290 | lwz TMP1, TAB:CARG2->array 4291 |.endif 4292 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4293 | bne >7 4294 |2: 4295 | cmplw TMP0, CARG3 4296 | slwi TMP2, CARG3, 3 4297 | lfdx f14, BASE, RA 4298 | ble ->vmeta_tsetr // In array part? 4299 | ins_next1 4300 | stfdx f14, TMP1, TMP2 4301 | ins_next2 4302 | 4303 |7: // Possible table write barrier for the value. Skip valiswhite check. 4304 | barrierback TAB:CARG2, TMP3, TMP2 4305 | b <2 4306 break; 4307 4308 4309 case BC_TSETM: 4310 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4311 | add RA, BASE, RA 4312 |1: 4313 | add TMP3, KBASE, RD 4314 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. 4315 | addic. TMP0, MULTRES, -8 4316 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. 4317 | srwi CARG3, TMP0, 3 4318 | beq >4 // Nothing to copy? 4319 | add CARG3, CARG3, TMP3 4320 | lwz TMP2, TAB:CARG2->asize 4321 | slwi TMP1, TMP3, 3 4322 | lbz TMP3, TAB:CARG2->marked 4323 | cmplw CARG3, TMP2 4324 | add TMP2, RA, TMP0 4325 | lwz TMP0, TAB:CARG2->array 4326 | bgt >5 4327 | add TMP1, TMP1, TMP0 4328 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4329 |3: // Copy result slots to table. 4330 | lfd f0, 0(RA) 4331 | addi RA, RA, 8 4332 | cmpw cr1, RA, TMP2 4333 | stfd f0, 0(TMP1) 4334 | addi TMP1, TMP1, 8 4335 | blt cr1, <3 4336 | bne >7 4337 |4: 4338 | ins_next 4339 | 4340 |5: // Need to resize array part. 4341 | stp BASE, L->base 4342 | mr CARG1, L 4343 | stw PC, SAVE_PC 4344 | mr SAVE0, RD 4345 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4346 | // Must not reallocate the stack. 4347 | mr RD, SAVE0 4348 | b <1 4349 | 4350 |7: // Possible table write barrier for any value. Skip valiswhite check. 4351 | barrierback TAB:CARG2, TMP3, TMP0 4352 | b <4 4353 break; 4354 4355 /* -- Calls and vararg handling ----------------------------------------- */ 4356 4357 case BC_CALLM: 4358 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 4359 | add NARGS8:RC, NARGS8:RC, MULTRES 4360 | // Fall through. Assumes BC_CALL follows. 4361 break; 4362 case BC_CALL: 4363 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 4364 | mr TMP2, BASE 4365 | lwzux TMP0, BASE, RA 4366 | lwz LFUNC:RB, 4(BASE) 4367 | subi NARGS8:RC, NARGS8:RC, 8 4368 | addi BASE, BASE, 8 4369 | checkfunc TMP0; bne ->vmeta_call 4370 | ins_call 4371 break; 4372 4373 case BC_CALLMT: 4374 | // RA = base*8, (RB = 0,) RC = extra_nargs*8 4375 | add NARGS8:RC, NARGS8:RC, MULTRES 4376 | // Fall through. Assumes BC_CALLT follows. 4377 break; 4378 case BC_CALLT: 4379 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 4380 | lwzux TMP0, RA, BASE 4381 | lwz LFUNC:RB, 4(RA) 4382 | subi NARGS8:RC, NARGS8:RC, 8 4383 | lwz TMP1, FRAME_PC(BASE) 4384 | checkfunc TMP0 4385 | addi RA, RA, 8 4386 | bne ->vmeta_callt 4387 |->BC_CALLT_Z: 4388 | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. 4389 | lbz TMP3, LFUNC:RB->ffid 4390 | xori TMP2, TMP1, FRAME_VARG 4391 | cmplwi cr1, NARGS8:RC, 0 4392 | bne >7 4393 |1: 4394 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. 4395 | li TMP2, 0 4396 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? 4397 | beq cr1, >3 4398 |2: 4399 | addi TMP3, TMP2, 8 4400 | lfdx f0, RA, TMP2 4401 | cmplw cr1, TMP3, NARGS8:RC 4402 | stfdx f0, BASE, TMP2 4403 | mr TMP2, TMP3 4404 | bne cr1, <2 4405 |3: 4406 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt 4407 | beq >5 4408 |4: 4409 | ins_callt 4410 | 4411 |5: // Tailcall to a fast function with a Lua frame below. 4412 | lwz INS, -4(TMP1) 4413 | decode_RA8 RA, INS 4414 | sub TMP1, BASE, RA 4415 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) 4416 | lwz TMP1, LFUNC:TMP1->pc 4417 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. 4418 | b <4 4419 | 4420 |7: // Tailcall from a vararg function. 4421 | andix. TMP0, TMP2, FRAME_TYPEP 4422 | bne <1 // Vararg frame below? 4423 | sub BASE, BASE, TMP2 // Relocate BASE down. 4424 | lwz TMP1, FRAME_PC(BASE) 4425 | andix. TMP0, TMP1, FRAME_TYPE 4426 | b <1 4427 break; 4428 4429 case BC_ITERC: 4430 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) 4431 | mr TMP2, BASE 4432 | add BASE, BASE, RA 4433 | lwz TMP1, -24(BASE) 4434 | lwz LFUNC:RB, -20(BASE) 4435 | lfd f1, -8(BASE) 4436 | lfd f0, -16(BASE) 4437 | stw TMP1, 0(BASE) // Copy callable. 4438 | stw LFUNC:RB, 4(BASE) 4439 | checkfunc TMP1 4440 | stfd f1, 16(BASE) // Copy control var. 4441 | li NARGS8:RC, 16 // Iterators get 2 arguments. 4442 | stfdu f0, 8(BASE) // Copy state. 4443 | bne ->vmeta_call 4444 | ins_call 4445 break; 4446 4447 case BC_ITERN: 4448 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4449 |.if JIT 4450 | // NYI: add hotloop, record BC_ITERN. 4451 |.endif 4452 | add RA, BASE, RA 4453 | lwz TAB:RB, -12(RA) 4454 | lwz RC, -4(RA) // Get index from control var. 4455 | lwz TMP0, TAB:RB->asize 4456 | lwz TMP1, TAB:RB->array 4457 | addi PC, PC, 4 4458 |1: // Traverse array part. 4459 | cmplw RC, TMP0 4460 | slwi TMP3, RC, 3 4461 | bge >5 // Index points after array part? 4462 | lwzx TMP2, TMP1, TMP3 4463 | lfdx f0, TMP1, TMP3 4464 | checknil TMP2 4465 | lwz INS, -4(PC) 4466 | beq >4 4467 |.if DUALNUM 4468 | stw RC, 4(RA) 4469 | stw TISNUM, 0(RA) 4470 |.else 4471 | tonum_u f1, RC 4472 |.endif 4473 | addi RC, RC, 1 4474 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 4475 | stfd f0, 8(RA) 4476 | decode_RD4 TMP1, INS 4477 | stw RC, -4(RA) // Update control var. 4478 | add PC, TMP1, TMP3 4479 |.if not DUALNUM 4480 | stfd f1, 0(RA) 4481 |.endif 4482 |3: 4483 | ins_next 4484 | 4485 |4: // Skip holes in array part. 4486 | addi RC, RC, 1 4487 | b <1 4488 | 4489 |5: // Traverse hash part. 4490 | lwz TMP1, TAB:RB->hmask 4491 | sub RC, RC, TMP0 4492 | lwz TMP2, TAB:RB->node 4493 |6: 4494 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. 4495 | slwi TMP3, RC, 5 4496 | bgty <3 4497 | slwi RB, RC, 3 4498 | sub TMP3, TMP3, RB 4499 | lwzx RB, TMP2, TMP3 4500 | lfdx f0, TMP2, TMP3 4501 | add NODE:TMP3, TMP2, TMP3 4502 | checknil RB 4503 | lwz INS, -4(PC) 4504 | beq >7 4505 | lfd f1, NODE:TMP3->key 4506 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 4507 | stfd f0, 8(RA) 4508 | add RC, RC, TMP0 4509 | decode_RD4 TMP1, INS 4510 | stfd f1, 0(RA) 4511 | addi RC, RC, 1 4512 | add PC, TMP1, TMP2 4513 | stw RC, -4(RA) // Update control var. 4514 | b <3 4515 | 4516 |7: // Skip holes in hash part. 4517 | addi RC, RC, 1 4518 | b <6 4519 break; 4520 4521 case BC_ISNEXT: 4522 | // RA = base*8, RD = target (points to ITERN) 4523 | add RA, BASE, RA 4524 | lwz TMP0, -24(RA) 4525 | lwz CFUNC:TMP1, -20(RA) 4526 | lwz TMP2, -16(RA) 4527 | lwz TMP3, -8(RA) 4528 | cmpwi cr0, TMP2, LJ_TTAB 4529 | cmpwi cr1, TMP0, LJ_TFUNC 4530 | cmpwi cr6, TMP3, LJ_TNIL 4531 | bne cr1, >5 4532 | lbz TMP1, CFUNC:TMP1->ffid 4533 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq 4534 | cmpwi cr7, TMP1, FF_next_N 4535 | srwi TMP0, RD, 1 4536 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4537 | add TMP3, PC, TMP0 4538 | bne cr0, >5 4539 | lus TMP1, 0xfffe 4540 | ori TMP1, TMP1, 0x7fff 4541 | stw ZERO, -4(RA) // Initialize control var. 4542 | stw TMP1, -8(RA) 4543 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4544 |1: 4545 | ins_next 4546 |5: // Despecialize bytecode if any of the checks fail. 4547 | li TMP0, BC_JMP 4548 | li TMP1, BC_ITERC 4549 | stb TMP0, -1(PC) 4550 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4551 | stb TMP1, 3(PC) 4552 | b <1 4553 break; 4554 4555 case BC_VARG: 4556 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 4557 | lwz TMP0, FRAME_PC(BASE) 4558 | add RC, BASE, RC 4559 | add RA, BASE, RA 4560 | addi RC, RC, FRAME_VARG 4561 | add TMP2, RA, RB 4562 | subi TMP3, BASE, 8 // TMP3 = vtop 4563 | sub RC, RC, TMP0 // RC = vbase 4564 | // Note: RC may now be even _above_ BASE if nargs was < numparams. 4565 | cmplwi cr1, RB, 0 4566 |.if PPE 4567 | sub TMP1, TMP3, RC 4568 | cmpwi TMP1, 0 4569 |.else 4570 | sub. TMP1, TMP3, RC 4571 |.endif 4572 | beq cr1, >5 // Copy all varargs? 4573 | subi TMP2, TMP2, 16 4574 | ble >2 // No vararg slots? 4575 |1: // Copy vararg slots to destination slots. 4576 | lfd f0, 0(RC) 4577 | addi RC, RC, 8 4578 | stfd f0, 0(RA) 4579 | cmplw RA, TMP2 4580 | cmplw cr1, RC, TMP3 4581 | bge >3 // All destination slots filled? 4582 | addi RA, RA, 8 4583 | blt cr1, <1 // More vararg slots? 4584 |2: // Fill up remainder with nil. 4585 | stw TISNIL, 0(RA) 4586 | cmplw RA, TMP2 4587 | addi RA, RA, 8 4588 | blt <2 4589 |3: 4590 | ins_next 4591 | 4592 |5: // Copy all varargs. 4593 | lwz TMP0, L->maxstack 4594 | li MULTRES, 8 // MULTRES = (0+1)*8 4595 | bley <3 // No vararg slots? 4596 | add TMP2, RA, TMP1 4597 | cmplw TMP2, TMP0 4598 | addi MULTRES, TMP1, 8 4599 | bgt >7 4600 |6: 4601 | lfd f0, 0(RC) 4602 | addi RC, RC, 8 4603 | stfd f0, 0(RA) 4604 | cmplw RC, TMP3 4605 | addi RA, RA, 8 4606 | blt <6 // More vararg slots? 4607 | b <3 4608 | 4609 |7: // Grow stack for varargs. 4610 | mr CARG1, L 4611 | stp RA, L->top 4612 | sub SAVE0, RC, BASE // Need delta, because BASE may change. 4613 | stp BASE, L->base 4614 | sub RA, RA, BASE 4615 | stw PC, SAVE_PC 4616 | srwi CARG2, TMP1, 3 4617 | bl extern lj_state_growstack // (lua_State *L, int n) 4618 | lp BASE, L->base 4619 | add RA, BASE, RA 4620 | add RC, BASE, SAVE0 4621 | subi TMP3, BASE, 8 4622 | b <6 4623 break; 4624 4625 /* -- Returns ----------------------------------------------------------- */ 4626 4627 case BC_RETM: 4628 | // RA = results*8, RD = extra_nresults*8 4629 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. 4630 | // Fall through. Assumes BC_RET follows. 4631 break; 4632 4633 case BC_RET: 4634 | // RA = results*8, RD = (nresults+1)*8 4635 | lwz PC, FRAME_PC(BASE) 4636 | add RA, BASE, RA 4637 | mr MULTRES, RD 4638 |1: 4639 | andix. TMP0, PC, FRAME_TYPE 4640 | xori TMP1, PC, FRAME_VARG 4641 | bne ->BC_RETV_Z 4642 | 4643 |->BC_RET_Z: 4644 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return 4645 | lwz INS, -4(PC) 4646 | cmpwi RD, 8 4647 | subi TMP2, BASE, 8 4648 | subi RC, RD, 8 4649 | decode_RB8 RB, INS 4650 | beq >3 4651 | li TMP1, 0 4652 |2: 4653 | addi TMP3, TMP1, 8 4654 | lfdx f0, RA, TMP1 4655 | cmpw TMP3, RC 4656 | stfdx f0, TMP2, TMP1 4657 | beq >3 4658 | addi TMP1, TMP3, 8 4659 | lfdx f1, RA, TMP3 4660 | cmpw TMP1, RC 4661 | stfdx f1, TMP2, TMP3 4662 | bne <2 4663 |3: 4664 |5: 4665 | cmplw RB, RD 4666 | decode_RA8 RA, INS 4667 | bgt >6 4668 | sub BASE, TMP2, RA 4669 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4670 | ins_next1 4671 | lwz TMP1, LFUNC:TMP1->pc 4672 | lwz KBASE, PC2PROTO(k)(TMP1) 4673 | ins_next2 4674 | 4675 |6: // Fill up results with nil. 4676 | subi TMP1, RD, 8 4677 | addi RD, RD, 8 4678 | stwx TISNIL, TMP2, TMP1 4679 | b <5 4680 | 4681 |->BC_RETV_Z: // Non-standard return case. 4682 | andix. TMP2, TMP1, FRAME_TYPEP 4683 | bne ->vm_return 4684 | // Return from vararg function: relocate BASE down. 4685 | sub BASE, BASE, TMP1 4686 | lwz PC, FRAME_PC(BASE) 4687 | b <1 4688 break; 4689 4690 case BC_RET0: case BC_RET1: 4691 | // RA = results*8, RD = (nresults+1)*8 4692 | lwz PC, FRAME_PC(BASE) 4693 | add RA, BASE, RA 4694 | mr MULTRES, RD 4695 | andix. TMP0, PC, FRAME_TYPE 4696 | xori TMP1, PC, FRAME_VARG 4697 | bney ->BC_RETV_Z 4698 | 4699 | lwz INS, -4(PC) 4700 | subi TMP2, BASE, 8 4701 | decode_RB8 RB, INS 4702 if (op == BC_RET1) { 4703 | lfd f0, 0(RA) 4704 | stfd f0, 0(TMP2) 4705 } 4706 |5: 4707 | cmplw RB, RD 4708 | decode_RA8 RA, INS 4709 | bgt >6 4710 | sub BASE, TMP2, RA 4711 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4712 | ins_next1 4713 | lwz TMP1, LFUNC:TMP1->pc 4714 | lwz KBASE, PC2PROTO(k)(TMP1) 4715 | ins_next2 4716 | 4717 |6: // Fill up results with nil. 4718 | subi TMP1, RD, 8 4719 | addi RD, RD, 8 4720 | stwx TISNIL, TMP2, TMP1 4721 | b <5 4722 break; 4723 4724 /* -- Loops and branches ------------------------------------------------ */ 4725 4726 case BC_FORL: 4727 |.if JIT 4728 | hotloop 4729 |.endif 4730 | // Fall through. Assumes BC_IFORL follows. 4731 break; 4732 4733 case BC_JFORI: 4734 case BC_JFORL: 4735#if !LJ_HASJIT 4736 break; 4737#endif 4738 case BC_FORI: 4739 case BC_IFORL: 4740 | // RA = base*8, RD = target (after end of loop or start of loop) 4741 vk = (op == BC_IFORL || op == BC_JFORL); 4742 |.if DUALNUM 4743 | // Integer loop. 4744 | lwzux TMP1, RA, BASE 4745 | lwz CARG1, FORL_IDX*8+4(RA) 4746 | cmplw cr0, TMP1, TISNUM 4747 if (vk) { 4748 | lwz CARG3, FORL_STEP*8+4(RA) 4749 | bne >9 4750 |.if GPR64 4751 | // Need to check overflow for (a<<32) + (b<<32). 4752 | rldicr TMP0, CARG1, 32, 31 4753 | rldicr TMP2, CARG3, 32, 31 4754 | add CARG1, CARG1, CARG3 4755 | addo. TMP0, TMP0, TMP2 4756 |.else 4757 | addo. CARG1, CARG1, CARG3 4758 |.endif 4759 | cmpwi cr6, CARG3, 0 4760 | lwz CARG2, FORL_STOP*8+4(RA) 4761 | bso >6 4762 |4: 4763 | stw CARG1, FORL_IDX*8+4(RA) 4764 } else { 4765 | lwz TMP3, FORL_STEP*8(RA) 4766 | lwz CARG3, FORL_STEP*8+4(RA) 4767 | lwz TMP2, FORL_STOP*8(RA) 4768 | lwz CARG2, FORL_STOP*8+4(RA) 4769 | cmplw cr7, TMP3, TISNUM 4770 | cmplw cr1, TMP2, TISNUM 4771 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4772 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4773 | cmpwi cr6, CARG3, 0 4774 | bne >9 4775 } 4776 | blt cr6, >5 4777 | cmpw CARG1, CARG2 4778 |1: 4779 | stw TISNUM, FORL_EXT*8(RA) 4780 if (op != BC_JFORL) { 4781 | srwi RD, RD, 1 4782 } 4783 | stw CARG1, FORL_EXT*8+4(RA) 4784 if (op != BC_JFORL) { 4785 | add RD, PC, RD 4786 } 4787 if (op == BC_FORI) { 4788 | bgt >3 // See FP loop below. 4789 } else if (op == BC_JFORI) { 4790 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4791 | bley >7 4792 } else if (op == BC_IFORL) { 4793 | bgt >2 4794 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4795 } else { 4796 | bley =>BC_JLOOP 4797 } 4798 |2: 4799 | ins_next 4800 |5: // Invert check for negative step. 4801 | cmpw CARG2, CARG1 4802 | b <1 4803 if (vk) { 4804 |6: // Potential overflow. 4805 | checkov TMP0, <4 // Ignore unrelated overflow. 4806 | b <2 4807 } 4808 |.endif 4809 if (vk) { 4810 |.if DUALNUM 4811 |9: // FP loop. 4812 | lfd f1, FORL_IDX*8(RA) 4813 |.else 4814 | lfdux f1, RA, BASE 4815 |.endif 4816 | lfd f3, FORL_STEP*8(RA) 4817 | lfd f2, FORL_STOP*8(RA) 4818 | lwz TMP3, FORL_STEP*8(RA) 4819 | fadd f1, f1, f3 4820 | stfd f1, FORL_IDX*8(RA) 4821 } else { 4822 |.if DUALNUM 4823 |9: // FP loop. 4824 |.else 4825 | lwzux TMP1, RA, BASE 4826 | lwz TMP3, FORL_STEP*8(RA) 4827 | lwz TMP2, FORL_STOP*8(RA) 4828 | cmplw cr0, TMP1, TISNUM 4829 | cmplw cr7, TMP3, TISNUM 4830 | cmplw cr1, TMP2, TISNUM 4831 |.endif 4832 | lfd f1, FORL_IDX*8(RA) 4833 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 4834 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4835 | lfd f2, FORL_STOP*8(RA) 4836 | bge ->vmeta_for 4837 } 4838 | cmpwi cr6, TMP3, 0 4839 if (op != BC_JFORL) { 4840 | srwi RD, RD, 1 4841 } 4842 | stfd f1, FORL_EXT*8(RA) 4843 if (op != BC_JFORL) { 4844 | add RD, PC, RD 4845 } 4846 | fcmpu cr0, f1, f2 4847 if (op == BC_JFORI) { 4848 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4849 } 4850 | blt cr6, >5 4851 if (op == BC_FORI) { 4852 | bgt >3 4853 } else if (op == BC_IFORL) { 4854 |.if DUALNUM 4855 | bgty <2 4856 |.else 4857 | bgt >2 4858 |.endif 4859 |1: 4860 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4861 } else if (op == BC_JFORI) { 4862 | bley >7 4863 } else { 4864 | bley =>BC_JLOOP 4865 } 4866 |.if DUALNUM 4867 | b <2 4868 |.else 4869 |2: 4870 | ins_next 4871 |.endif 4872 |5: // Negative step. 4873 if (op == BC_FORI) { 4874 | bge <2 4875 |3: // Used by integer loop, too. 4876 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4877 } else if (op == BC_IFORL) { 4878 | bgey <1 4879 } else if (op == BC_JFORI) { 4880 | bgey >7 4881 } else { 4882 | bgey =>BC_JLOOP 4883 } 4884 | b <2 4885 if (op == BC_JFORI) { 4886 |7: 4887 | lwz INS, -4(PC) 4888 | decode_RD8 RD, INS 4889 | b =>BC_JLOOP 4890 } 4891 break; 4892 4893 case BC_ITERL: 4894 |.if JIT 4895 | hotloop 4896 |.endif 4897 | // Fall through. Assumes BC_IITERL follows. 4898 break; 4899 4900 case BC_JITERL: 4901#if !LJ_HASJIT 4902 break; 4903#endif 4904 case BC_IITERL: 4905 | // RA = base*8, RD = target 4906 | lwzux TMP1, RA, BASE 4907 | lwz TMP2, 4(RA) 4908 | checknil TMP1; beq >1 // Stop if iterator returned nil. 4909 if (op == BC_JITERL) { 4910 | stw TMP1, -8(RA) 4911 | stw TMP2, -4(RA) 4912 | b =>BC_JLOOP 4913 } else { 4914 | branch_RD // Otherwise save control var + branch. 4915 | stw TMP1, -8(RA) 4916 | stw TMP2, -4(RA) 4917 } 4918 |1: 4919 | ins_next 4920 break; 4921 4922 case BC_LOOP: 4923 | // RA = base*8, RD = target (loop extent) 4924 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4925 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4926 |.if JIT 4927 | hotloop 4928 |.endif 4929 | // Fall through. Assumes BC_ILOOP follows. 4930 break; 4931 4932 case BC_ILOOP: 4933 | // RA = base*8, RD = target (loop extent) 4934 | ins_next 4935 break; 4936 4937 case BC_JLOOP: 4938 |.if JIT 4939 | // RA = base*8 (ignored), RD = traceno*8 4940 | lwz TMP1, DISPATCH_J(trace)(DISPATCH) 4941 | srwi RD, RD, 1 4942 | // Traces on PPC don't store the trace number, so use 0. 4943 | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) 4944 | lwzx TRACE:TMP2, TMP1, RD 4945 | clrso TMP1 4946 | lp TMP2, TRACE:TMP2->mcode 4947 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4948 | mtctr TMP2 4949 | addi JGL, DISPATCH, GG_DISP2G+32768 4950 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) 4951 | bctr 4952 |.endif 4953 break; 4954 4955 case BC_JMP: 4956 | // RA = base*8 (only used by trace recorder), RD = target 4957 | branch_RD 4958 | ins_next 4959 break; 4960 4961 /* -- Function headers -------------------------------------------------- */ 4962 4963 case BC_FUNCF: 4964 |.if JIT 4965 | hotcall 4966 |.endif 4967 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4968 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. 4969 break; 4970 4971 case BC_JFUNCF: 4972#if !LJ_HASJIT 4973 break; 4974#endif 4975 case BC_IFUNCF: 4976 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4977 | lwz TMP2, L->maxstack 4978 | lbz TMP1, -4+PC2PROTO(numparams)(PC) 4979 | lwz KBASE, -4+PC2PROTO(k)(PC) 4980 | cmplw RA, TMP2 4981 | slwi TMP1, TMP1, 3 4982 | bgt ->vm_growstack_l 4983 if (op != BC_JFUNCF) { 4984 | ins_next1 4985 } 4986 |2: 4987 | cmplw NARGS8:RC, TMP1 // Check for missing parameters. 4988 | blt >3 4989 if (op == BC_JFUNCF) { 4990 | decode_RD8 RD, INS 4991 | b =>BC_JLOOP 4992 } else { 4993 | ins_next2 4994 } 4995 | 4996 |3: // Clear missing parameters. 4997 | stwx TISNIL, BASE, NARGS8:RC 4998 | addi NARGS8:RC, NARGS8:RC, 8 4999 | b <2 5000 break; 5001 5002 case BC_JFUNCV: 5003#if !LJ_HASJIT 5004 break; 5005#endif 5006 | NYI // NYI: compiled vararg functions 5007 break; /* NYI: compiled vararg functions. */ 5008 5009 case BC_IFUNCV: 5010 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 5011 | lwz TMP2, L->maxstack 5012 | add TMP1, BASE, RC 5013 | add TMP0, RA, RC 5014 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. 5015 | addi TMP3, RC, 8+FRAME_VARG 5016 | lwz KBASE, -4+PC2PROTO(k)(PC) 5017 | cmplw TMP0, TMP2 5018 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. 5019 | bge ->vm_growstack_l 5020 | lbz TMP2, -4+PC2PROTO(numparams)(PC) 5021 | mr RA, BASE 5022 | mr RC, TMP1 5023 | ins_next1 5024 | cmpwi TMP2, 0 5025 | addi BASE, TMP1, 8 5026 | beq >3 5027 |1: 5028 | cmplw RA, RC // Less args than parameters? 5029 | lwz TMP0, 0(RA) 5030 | lwz TMP3, 4(RA) 5031 | bge >4 5032 | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). 5033 | addi RA, RA, 8 5034 |2: 5035 | addic. TMP2, TMP2, -1 5036 | stw TMP0, 8(TMP1) 5037 | stw TMP3, 12(TMP1) 5038 | addi TMP1, TMP1, 8 5039 | bne <1 5040 |3: 5041 | ins_next2 5042 | 5043 |4: // Clear missing parameters. 5044 | li TMP0, LJ_TNIL 5045 | b <2 5046 break; 5047 5048 case BC_FUNCC: 5049 case BC_FUNCCW: 5050 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 5051 if (op == BC_FUNCC) { 5052 | lp RD, CFUNC:RB->f 5053 } else { 5054 | lp RD, DISPATCH_GL(wrapf)(DISPATCH) 5055 } 5056 | add TMP1, RA, NARGS8:RC 5057 | lwz TMP2, L->maxstack 5058 | .toc lp TMP3, 0(RD) 5059 | add RC, BASE, NARGS8:RC 5060 | stp BASE, L->base 5061 | cmplw TMP1, TMP2 5062 | stp RC, L->top 5063 | li_vmstate C 5064 |.if TOC 5065 | mtctr TMP3 5066 |.else 5067 | mtctr RD 5068 |.endif 5069 if (op == BC_FUNCCW) { 5070 | lp CARG2, CFUNC:RB->f 5071 } 5072 | mr CARG1, L 5073 | bgt ->vm_growstack_c // Need to grow stack. 5074 | .toc lp TOCREG, TOC_OFS(RD) 5075 | .tocenv lp ENVREG, ENV_OFS(RD) 5076 | st_vmstate 5077 | bctrl // (lua_State *L [, lua_CFunction f]) 5078 | // Returns nresults. 5079 | lp BASE, L->base 5080 | .toc ld TOCREG, SAVE_TOC 5081 | slwi RD, CRET1, 3 5082 | lp TMP1, L->top 5083 | li_vmstate INTERP 5084 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5085 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 5086 | sub RA, TMP1, RD // RA = L->top - nresults*8 5087 | st_vmstate 5088 | b ->vm_returnc 5089 break; 5090 5091 /* ---------------------------------------------------------------------- */ 5092 5093 default: 5094 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 5095 exit(2); 5096 break; 5097 } 5098} 5099 5100static int build_backend(BuildCtx *ctx) 5101{ 5102 int op; 5103 5104 dasm_growpc(Dst, BC__MAX); 5105 5106 build_subroutines(ctx); 5107 5108 |.code_op 5109 for (op = 0; op < BC__MAX; op++) 5110 build_ins(ctx, (BCOp)op, op); 5111 5112 return BC__MAX; 5113} 5114 5115/* Emit pseudo frame-info for all assembler functions. */ 5116static void emit_asm_debug(BuildCtx *ctx) 5117{ 5118 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5119 int i; 5120 switch (ctx->mode) { 5121 case BUILD_elfasm: 5122 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5123 fprintf(ctx->fp, 5124 ".Lframe0:\n" 5125 "\t.long .LECIE0-.LSCIE0\n" 5126 ".LSCIE0:\n" 5127 "\t.long 0xffffffff\n" 5128 "\t.byte 0x1\n" 5129 "\t.string \"\"\n" 5130 "\t.uleb128 0x1\n" 5131 "\t.sleb128 -4\n" 5132 "\t.byte 65\n" 5133 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5134 "\t.align 2\n" 5135 ".LECIE0:\n\n"); 5136 fprintf(ctx->fp, 5137 ".LSFDE0:\n" 5138 "\t.long .LEFDE0-.LASFDE0\n" 5139 ".LASFDE0:\n" 5140 "\t.long .Lframe0\n" 5141 "\t.long .Lbegin\n" 5142 "\t.long %d\n" 5143 "\t.byte 0xe\n\t.uleb128 %d\n" 5144 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5145 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5146 fcofs, CFRAME_SIZE); 5147 for (i = 14; i <= 31; i++) 5148 fprintf(ctx->fp, 5149 "\t.byte %d\n\t.uleb128 %d\n" 5150 "\t.byte %d\n\t.uleb128 %d\n", 5151 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5152 fprintf(ctx->fp, 5153 "\t.align 2\n" 5154 ".LEFDE0:\n\n"); 5155#if LJ_HASFFI 5156 fprintf(ctx->fp, 5157 ".LSFDE1:\n" 5158 "\t.long .LEFDE1-.LASFDE1\n" 5159 ".LASFDE1:\n" 5160 "\t.long .Lframe0\n" 5161#if LJ_TARGET_PS3 5162 "\t.long .lj_vm_ffi_call\n" 5163#else 5164 "\t.long lj_vm_ffi_call\n" 5165#endif 5166 "\t.long %d\n" 5167 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5168 "\t.byte 0x8e\n\t.uleb128 2\n" 5169 "\t.byte 0xd\n\t.uleb128 0xe\n" 5170 "\t.align 2\n" 5171 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5172#endif 5173#if !LJ_NO_UNWIND 5174 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5175 fprintf(ctx->fp, 5176 ".Lframe1:\n" 5177 "\t.long .LECIE1-.LSCIE1\n" 5178 ".LSCIE1:\n" 5179 "\t.long 0\n" 5180 "\t.byte 0x1\n" 5181 "\t.string \"zPR\"\n" 5182 "\t.uleb128 0x1\n" 5183 "\t.sleb128 -4\n" 5184 "\t.byte 65\n" 5185 "\t.uleb128 6\n" /* augmentation length */ 5186 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5187 "\t.long lj_err_unwind_dwarf-.\n" 5188 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5189 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5190 "\t.align 2\n" 5191 ".LECIE1:\n\n"); 5192 fprintf(ctx->fp, 5193 ".LSFDE2:\n" 5194 "\t.long .LEFDE2-.LASFDE2\n" 5195 ".LASFDE2:\n" 5196 "\t.long .LASFDE2-.Lframe1\n" 5197 "\t.long .Lbegin-.\n" 5198 "\t.long %d\n" 5199 "\t.uleb128 0\n" /* augmentation length */ 5200 "\t.byte 0xe\n\t.uleb128 %d\n" 5201 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5202 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5203 fcofs, CFRAME_SIZE); 5204 for (i = 14; i <= 31; i++) 5205 fprintf(ctx->fp, 5206 "\t.byte %d\n\t.uleb128 %d\n" 5207 "\t.byte %d\n\t.uleb128 %d\n", 5208 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5209 fprintf(ctx->fp, 5210 "\t.align 2\n" 5211 ".LEFDE2:\n\n"); 5212#if LJ_HASFFI 5213 fprintf(ctx->fp, 5214 ".Lframe2:\n" 5215 "\t.long .LECIE2-.LSCIE2\n" 5216 ".LSCIE2:\n" 5217 "\t.long 0\n" 5218 "\t.byte 0x1\n" 5219 "\t.string \"zR\"\n" 5220 "\t.uleb128 0x1\n" 5221 "\t.sleb128 -4\n" 5222 "\t.byte 65\n" 5223 "\t.uleb128 1\n" /* augmentation length */ 5224 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5225 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5226 "\t.align 2\n" 5227 ".LECIE2:\n\n"); 5228 fprintf(ctx->fp, 5229 ".LSFDE3:\n" 5230 "\t.long .LEFDE3-.LASFDE3\n" 5231 ".LASFDE3:\n" 5232 "\t.long .LASFDE3-.Lframe2\n" 5233 "\t.long lj_vm_ffi_call-.\n" 5234 "\t.long %d\n" 5235 "\t.uleb128 0\n" /* augmentation length */ 5236 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5237 "\t.byte 0x8e\n\t.uleb128 2\n" 5238 "\t.byte 0xd\n\t.uleb128 0xe\n" 5239 "\t.align 2\n" 5240 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5241#endif 5242#endif 5243 break; 5244 default: 5245 break; 5246 } 5247} 5248 5249