1|// Low-level VM code for PowerPC CPUs. 2|// Bytecode interpreter, fast functions and helper functions. 3|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h 4| 5|.arch ppc 6|.section code_op, code_sub 7| 8|.actionlist build_actionlist 9|.globals GLOB_ 10|.globalnames globnames 11|.externnames extnames 12| 13|// Note: The ragged indentation of the instructions is intentional. 14|// The starting columns indicate data dependencies. 15| 16|//----------------------------------------------------------------------- 17| 18|// DynASM defines used by the PPC port: 19|// 20|// P64 64 bit pointers (only for GPR64 testing). 21|// Note: a full PPC64 _LP64 port is not planned. 22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 25|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). 26|// Function pointers are really a struct: code, TOC, env (optional). 27|// TOCENV Function pointers have an environment pointer, too (not on PS3). 28|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). 29|// Must avoid (slow) micro-coded instructions. 30| 31|.if P64 32|.define TOC, 1 33|.define TOCENV, 1 34|.macro lpx, a, b, c; ldx a, b, c; .endmacro 35|.macro lp, a, b; ld a, b; .endmacro 36|.macro stp, a, b; std a, b; .endmacro 37|.define decode_OPP, decode_OP8 38|.if FFI 39|// Missing: Calling conventions, 64 bit regs, TOC. 40|.error lib_ffi not yet implemented for PPC64 41|.endif 42|.else 43|.macro lpx, a, b, c; lwzx a, b, c; .endmacro 44|.macro lp, a, b; lwz a, b; .endmacro 45|.macro stp, a, b; stw a, b; .endmacro 46|.define decode_OPP, decode_OP4 47|.endif 48| 49|// Convenience macros for TOC handling. 50|.if TOC 51|// Linker needs a TOC patch area for every external call relocation. 52|.macro blex, target; bl extern target@plt; nop; .endmacro 53|.macro .toc, a, b; a, b; .endmacro 54|.if P64 55|.define TOC_OFS, 8 56|.define ENV_OFS, 16 57|.else 58|.define TOC_OFS, 4 59|.define ENV_OFS, 8 60|.endif 61|.else // No TOC. 62|.macro blex, target; bl extern target@plt; .endmacro 63|.macro .toc, a, b; .endmacro 64|.endif 65|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro 66| 67|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro 68| 69|.macro andix., y, a, i 70|.if PPE 71| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) 72| cmpwi y, 0 73|.else 74| andi. y, a, i 75|.endif 76|.endmacro 77| 78|.macro clrso, reg 79|.if PPE 80| li reg, 0 81| mtxer reg 82|.else 83| mcrxr cr0 84|.endif 85|.endmacro 86| 87|.macro checkov, reg, noov 88|.if PPE 89| mfxer reg 90| add reg, reg, reg 91| cmpwi reg, 0 92| li reg, 0 93| mtxer reg 94| bgey noov 95|.else 96| mcrxr cr0 97| bley noov 98|.endif 99|.endmacro 100| 101|//----------------------------------------------------------------------- 102| 103|// Fixed register assignments for the interpreter. 104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 105| 106|// The following must be C callee-save (but BASE is often refetched). 107|.define BASE, r14 // Base of current Lua stack frame. 108|.define KBASE, r15 // Constants of current Lua function. 109|.define PC, r16 // Next PC. 110|.define DISPATCH, r17 // Opcode dispatch table. 111|.define LREG, r18 // Register holding lua_State (also in SAVE_L). 112|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. 113|.define JGL, r31 // On-trace: global_State + 32768. 114| 115|// Constants for type-comparisons, stores and conversions. C callee-save. 116|.define TISNUM, r22 117|.define TISNIL, r23 118|.define ZERO, r24 119|.define TOBIT, f30 // 2^52 + 2^51. 120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 121| 122|// The following temporaries are not saved across C calls, except for RA. 123|.define RA, r20 // Callee-save. 124|.define RB, r10 125|.define RC, r11 126|.define RD, r12 127|.define INS, r7 // Overlaps CARG5. 128| 129|.define TMP0, r0 130|.define TMP1, r8 131|.define TMP2, r9 132|.define TMP3, r6 // Overlaps CARG4. 133| 134|// Saved temporaries. 135|.define SAVE0, r21 136| 137|// Calling conventions. 138|.define CARG1, r3 139|.define CARG2, r4 140|.define CARG3, r5 141|.define CARG4, r6 // Overlaps TMP3. 142|.define CARG5, r7 // Overlaps INS. 143| 144|.define FARG1, f1 145|.define FARG2, f2 146| 147|.define CRET1, r3 148|.define CRET2, r4 149| 150|.define TOCREG, r2 // TOC register (only used by C code). 151|.define ENVREG, r11 // Environment pointer (nested C functions). 152| 153|// Stack layout while in interpreter. Must match with lj_frame.h. 154|.if GPR64 155|.if FRAME32 156| 157|// 456(sp) // \ 32/64 bit C frame info 158|.define TONUM_LO, 452(sp) // | 159|.define TONUM_HI, 448(sp) // | 160|.define TMPD_LO, 444(sp) // | 161|.define TMPD_HI, 440(sp) // | 162|.define SAVE_CR, 432(sp) // | 64 bit CR save. 163|.define SAVE_ERRF, 424(sp) // > Parameter save area. 164|.define SAVE_NRES, 420(sp) // | 165|.define SAVE_L, 416(sp) // | 166|.define SAVE_PC, 412(sp) // | 167|.define SAVE_MULTRES, 408(sp) // | 168|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. 169|// 392(sp) // Reserved. 170|.define CFRAME_SPACE, 384 // Delta for sp. 171|// Back chain for sp: 384(sp) <-- sp entering interpreter 172|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. 173|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. 174|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. 175|// 80(sp) // Needed for 16 byte stack frame alignment. 176|// 16(sp) // Callee parameter save area (ABI mandated). 177|// 8(sp) // Reserved 178|// Back chain for sp: 0(sp) <-- sp while in interpreter 179|// 32 bit sp stored in hi-part of 0(sp). 180| 181|.define TMPD_BLO, 447(sp) 182|.define TMPD, TMPD_HI 183|.define TONUM_D, TONUM_HI 184| 185|.else 186| 187|// 508(sp) // \ 32 bit C frame info. 188|.define SAVE_ERRF, 472(sp) // | 189|.define SAVE_NRES, 468(sp) // | 190|.define SAVE_L, 464(sp) // > Parameter save area. 191|.define SAVE_PC, 460(sp) // | 192|.define SAVE_MULTRES, 456(sp) // | 193|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. 194|.define SAVE_LR, 416(sp) 195|.define CFRAME_SPACE, 400 // Delta for sp. 196|// Back chain for sp: 400(sp) <-- sp entering interpreter 197|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. 198|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. 199|// 48(sp) // Callee parameter save area (ABI mandated). 200|.define SAVE_TOC, 40(sp) // TOC save area. 201|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). 202|.define TMPD_HI, 32(sp) // / 203|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). 204|.define TONUM_HI, 24(sp) // / 205|// Next frame lr: 16(sp) 206|.define SAVE_CR, 8(sp) // 64 bit CR save. 207|// Back chain for sp: 0(sp) <-- sp while in interpreter 208| 209|.define TMPD_BLO, 39(sp) 210|.define TMPD, TMPD_HI 211|.define TONUM_D, TONUM_HI 212| 213|.endif 214|.else 215| 216|.define SAVE_LR, 276(sp) 217|.define CFRAME_SPACE, 272 // Delta for sp. 218|// Back chain for sp: 272(sp) <-- sp entering interpreter 219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 221|.define SAVE_CR, 52(sp) // 32 bit CR save. 222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 223|.define SAVE_NRES, 44(sp) 224|.define SAVE_CFRAME, 40(sp) 225|.define SAVE_L, 36(sp) 226|.define SAVE_PC, 32(sp) 227|.define SAVE_MULTRES, 28(sp) 228|.define UNUSED1, 24(sp) 229|.define TMPD_LO, 20(sp) 230|.define TMPD_HI, 16(sp) 231|.define TONUM_LO, 12(sp) 232|.define TONUM_HI, 8(sp) 233|// Next frame lr: 4(sp) 234|// Back chain for sp: 0(sp) <-- sp while in interpreter 235| 236|.define TMPD_BLO, 23(sp) 237|.define TMPD, TMPD_HI 238|.define TONUM_D, TONUM_HI 239| 240|.endif 241| 242|.macro save_, reg 243|.if GPR64 244| std r..reg, SAVE_GPR_+(reg-14)*8(sp) 245|.else 246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 247|.endif 248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 249|.endmacro 250|.macro rest_, reg 251|.if GPR64 252| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) 253|.else 254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 255|.endif 256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 257|.endmacro 258| 259|.macro saveregs 260|.if GPR64 and not FRAME32 261| stdu sp, -CFRAME_SPACE(sp) 262|.else 263| stwu sp, -CFRAME_SPACE(sp) 264|.endif 265| save_ 14; save_ 15; save_ 16 266| mflr r0 267| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 268|.if GPR64 and not FRAME32 269| std r0, SAVE_LR 270|.else 271| stw r0, SAVE_LR 272|.endif 273| save_ 23; save_ 24; save_ 25 274| mfcr r0 275| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 276|.if GPR64 277| std r0, SAVE_CR 278|.else 279| stw r0, SAVE_CR 280|.endif 281| .toc std TOCREG, SAVE_TOC 282|.endmacro 283| 284|.macro restoreregs 285|.if GPR64 and not FRAME32 286| ld r0, SAVE_LR 287|.else 288| lwz r0, SAVE_LR 289|.endif 290|.if GPR64 291| ld r12, SAVE_CR 292|.else 293| lwz r12, SAVE_CR 294|.endif 295| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 296| mtlr r0; 297|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif 298| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 299|.if PPE; mtocrf 0x10, r12; .endif 300| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 301|.if PPE; mtocrf 0x08, r12; .endif 302| addi sp, sp, CFRAME_SPACE 303|.endmacro 304| 305|// Type definitions. Some of these are only used for documentation. 306|.type L, lua_State, LREG 307|.type GL, global_State 308|.type TVALUE, TValue 309|.type GCOBJ, GCobj 310|.type STR, GCstr 311|.type TAB, GCtab 312|.type LFUNC, GCfuncL 313|.type CFUNC, GCfuncC 314|.type PROTO, GCproto 315|.type UPVAL, GCupval 316|.type NODE, Node 317|.type NARGS8, int 318|.type TRACE, GCtrace 319| 320|//----------------------------------------------------------------------- 321| 322|// These basic macros should really be part of DynASM. 323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro 324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro 325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro 326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro 327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro 328| 329|// Trap for not-yet-implemented parts. 330|.macro NYI; tw 4, sp, sp; .endmacro 331| 332|// int/FP conversions. 333|.macro tonum_i, freg, reg 334| xoris reg, reg, 0x8000 335| stw reg, TONUM_LO 336| lfd freg, TONUM_D 337| fsub freg, freg, TONUM 338|.endmacro 339| 340|.macro tonum_u, freg, reg 341| stw reg, TONUM_LO 342| lfd freg, TONUM_D 343| fsub freg, freg, TOBIT 344|.endmacro 345| 346|.macro toint, reg, freg, tmpfreg 347| fctiwz tmpfreg, freg 348| stfd tmpfreg, TMPD 349| lwz reg, TMPD_LO 350|.endmacro 351| 352|.macro toint, reg, freg 353| toint reg, freg, freg 354|.endmacro 355| 356|//----------------------------------------------------------------------- 357| 358|// Access to frame relative to BASE. 359|.define FRAME_PC, -8 360|.define FRAME_FUNC, -4 361| 362|// Instruction decode. 363|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro 364|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro 365|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro 366|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro 367|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro 368|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro 369| 370|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro 371|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro 372| 373|// Instruction fetch. 374|.macro ins_NEXT1 375| lwz INS, 0(PC) 376| addi PC, PC, 4 377|.endmacro 378|// Instruction decode+dispatch. Note: optimized for e300! 379|.macro ins_NEXT2 380| decode_OPP TMP1, INS 381| lpx TMP0, DISPATCH, TMP1 382| mtctr TMP0 383| decode_RB8 RB, INS 384| decode_RD8 RD, INS 385| decode_RA8 RA, INS 386| decode_RC8 RC, INS 387| bctr 388|.endmacro 389|.macro ins_NEXT 390| ins_NEXT1 391| ins_NEXT2 392|.endmacro 393| 394|// Instruction footer. 395|.if 1 396| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 397| .define ins_next, ins_NEXT 398| .define ins_next_, ins_NEXT 399| .define ins_next1, ins_NEXT1 400| .define ins_next2, ins_NEXT2 401|.else 402| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 403| // Affects only certain kinds of benchmarks (and only with -j off). 404| .macro ins_next 405| b ->ins_next 406| .endmacro 407| .macro ins_next1 408| .endmacro 409| .macro ins_next2 410| b ->ins_next 411| .endmacro 412| .macro ins_next_ 413| ->ins_next: 414| ins_NEXT 415| .endmacro 416|.endif 417| 418|// Call decode and dispatch. 419|.macro ins_callt 420| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 421| lwz PC, LFUNC:RB->pc 422| lwz INS, 0(PC) 423| addi PC, PC, 4 424| decode_OPP TMP1, INS 425| decode_RA8 RA, INS 426| lpx TMP0, DISPATCH, TMP1 427| add RA, RA, BASE 428| mtctr TMP0 429| bctr 430|.endmacro 431| 432|.macro ins_call 433| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC 434| stw PC, FRAME_PC(BASE) 435| ins_callt 436|.endmacro 437| 438|//----------------------------------------------------------------------- 439| 440|// Macros to test operand types. 441|.macro checknum, reg; cmplw reg, TISNUM; .endmacro 442|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro 443|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro 444|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro 445|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro 446|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro 447| 448|.macro branch_RD 449| srwi TMP0, RD, 1 450| addis PC, PC, -(BCBIAS_J*4 >> 16) 451| add PC, PC, TMP0 452|.endmacro 453| 454|// Assumes DISPATCH is relative to GL. 455#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 456#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 457| 458#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 459| 460|.macro hotcheck, delta, target 461| rlwinm TMP1, PC, 31, 25, 30 462| addi TMP1, TMP1, GG_DISP2HOT 463| lhzx TMP2, DISPATCH, TMP1 464| addic. TMP2, TMP2, -delta 465| sthx TMP2, DISPATCH, TMP1 466| blt target 467|.endmacro 468| 469|.macro hotloop 470| hotcheck HOTCOUNT_LOOP, ->vm_hotloop 471|.endmacro 472| 473|.macro hotcall 474| hotcheck HOTCOUNT_CALL, ->vm_hotcall 475|.endmacro 476| 477|// Set current VM state. Uses TMP0. 478|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro 479|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro 480| 481|// Move table write barrier back. Overwrites mark and tmp. 482|.macro barrierback, tab, mark, tmp 483| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) 484| // Assumes LJ_GC_BLACK is 0x04. 485| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) 486| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) 487| stb mark, tab->marked 488| stw tmp, tab->gclist 489|.endmacro 490| 491|//----------------------------------------------------------------------- 492 493/* Generate subroutines used by opcodes and other parts of the VM. */ 494/* The .code_sub section should be last to help static branch prediction. */ 495static void build_subroutines(BuildCtx *ctx) 496{ 497 |.code_sub 498 | 499 |//----------------------------------------------------------------------- 500 |//-- Return handling ---------------------------------------------------- 501 |//----------------------------------------------------------------------- 502 | 503 |->vm_returnp: 504 | // See vm_return. Also: TMP2 = previous base. 505 | andix. TMP0, PC, FRAME_P 506 | li TMP1, LJ_TTRUE 507 | beq ->cont_dispatch 508 | 509 | // Return from pcall or xpcall fast func. 510 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. 511 | mr BASE, TMP2 // Restore caller base. 512 | // Prepending may overwrite the pcall frame, so do it at the end. 513 | stwu TMP1, FRAME_PC(RA) // Prepend true to results. 514 | 515 |->vm_returnc: 516 | addi RD, RD, 8 // RD = (nresults+1)*8. 517 | andix. TMP0, PC, FRAME_TYPE 518 | cmpwi cr1, RD, 0 519 | li CRET1, LUA_YIELD 520 | beq cr1, ->vm_unwind_c_eh 521 | mr MULTRES, RD 522 | beq ->BC_RET_Z // Handle regular return to Lua. 523 | 524 |->vm_return: 525 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return 526 | // TMP0 = PC & FRAME_TYPE 527 | cmpwi TMP0, FRAME_C 528 | rlwinm TMP2, PC, 0, 0, 28 529 | li_vmstate C 530 | sub TMP2, BASE, TMP2 // TMP2 = previous base. 531 | bney ->vm_returnp 532 | 533 | addic. TMP1, RD, -8 534 | stp TMP2, L->base 535 | lwz TMP2, SAVE_NRES 536 | subi BASE, BASE, 8 537 | st_vmstate 538 | slwi TMP2, TMP2, 3 539 | beq >2 540 |1: 541 | addic. TMP1, TMP1, -8 542 | lfd f0, 0(RA) 543 | addi RA, RA, 8 544 | stfd f0, 0(BASE) 545 | addi BASE, BASE, 8 546 | bney <1 547 | 548 |2: 549 | cmpw TMP2, RD // More/less results wanted? 550 | bne >6 551 |3: 552 | stp BASE, L->top // Store new top. 553 | 554 |->vm_leave_cp: 555 | lp TMP0, SAVE_CFRAME // Restore previous C frame. 556 | li CRET1, 0 // Ok return status for vm_pcall. 557 | stp TMP0, L->cframe 558 | 559 |->vm_leave_unw: 560 | restoreregs 561 | blr 562 | 563 |6: 564 | ble >7 // Less results wanted? 565 | // More results wanted. Check stack size and fill up results with nil. 566 | lwz TMP1, L->maxstack 567 | cmplw BASE, TMP1 568 | bge >8 569 | stw TISNIL, 0(BASE) 570 | addi RD, RD, 8 571 | addi BASE, BASE, 8 572 | b <2 573 | 574 |7: // Less results wanted. 575 | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? 576 | sub TMP0, RD, TMP2 577 | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 578 | and TMP0, TMP0, TMP1 579 | sub BASE, BASE, TMP0 // Either keep top or shrink it. 580 | b <3 581 | 582 |8: // Corner case: need to grow stack for filling up results. 583 | // This can happen if: 584 | // - A C function grows the stack (a lot). 585 | // - The GC shrinks the stack in between. 586 | // - A return back from a lua_call() with (high) nresults adjustment. 587 | stp BASE, L->top // Save current top held in BASE (yes). 588 | mr SAVE0, RD 589 | srwi CARG2, TMP2, 3 590 | mr CARG1, L 591 | bl extern lj_state_growstack // (lua_State *L, int n) 592 | lwz TMP2, SAVE_NRES 593 | mr RD, SAVE0 594 | slwi TMP2, TMP2, 3 595 | lp BASE, L->top // Need the (realloced) L->top in BASE. 596 | b <2 597 | 598 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 599 | // (void *cframe, int errcode) 600 | mr sp, CARG1 601 | mr CRET1, CARG2 602 |->vm_unwind_c_eh: // Landing pad for external unwinder. 603 | lwz L, SAVE_L 604 | .toc ld TOCREG, SAVE_TOC 605 | li TMP0, ~LJ_VMST_C 606 | lwz GL:TMP1, L->glref 607 | stw TMP0, GL:TMP1->vmstate 608 | b ->vm_leave_unw 609 | 610 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 611 | // (void *cframe) 612 |.if GPR64 613 | rldicr sp, CARG1, 0, 61 614 |.else 615 | rlwinm sp, CARG1, 0, 0, 29 616 |.endif 617 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 618 | lwz L, SAVE_L 619 | .toc ld TOCREG, SAVE_TOC 620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 621 | lp BASE, L->base 622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 624 | li ZERO, 0 625 | stw TMP3, TMPD 626 | li TMP1, LJ_TFALSE 627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 628 | li TISNIL, LJ_TNIL 629 | li_vmstate INTERP 630 | lfs TOBIT, TMPD 631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 632 | la RA, -8(BASE) // Results start at BASE-8. 633 | stw TMP3, TMPD 634 | addi DISPATCH, DISPATCH, GG_G2DISP 635 | stw TMP1, 0(RA) // Prepend false to error message. 636 | li RD, 16 // 2 results: false + error message. 637 | st_vmstate 638 | lfs TONUM, TMPD 639 | b ->vm_returnc 640 | 641 |//----------------------------------------------------------------------- 642 |//-- Grow stack for calls ----------------------------------------------- 643 |//----------------------------------------------------------------------- 644 | 645 |->vm_growstack_c: // Grow stack for C function. 646 | li CARG2, LUA_MINSTACK 647 | b >2 648 | 649 |->vm_growstack_l: // Grow stack for Lua function. 650 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC 651 | add RC, BASE, RC 652 | sub RA, RA, BASE 653 | stp BASE, L->base 654 | addi PC, PC, 4 // Must point after first instruction. 655 | stp RC, L->top 656 | srwi CARG2, RA, 3 657 |2: 658 | // L->base = new base, L->top = top 659 | stw PC, SAVE_PC 660 | mr CARG1, L 661 | bl extern lj_state_growstack // (lua_State *L, int n) 662 | lp BASE, L->base 663 | lp RC, L->top 664 | lwz LFUNC:RB, FRAME_FUNC(BASE) 665 | sub RC, RC, BASE 666 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 667 | ins_callt // Just retry the call. 668 | 669 |//----------------------------------------------------------------------- 670 |//-- Entry points into the assembler VM --------------------------------- 671 |//----------------------------------------------------------------------- 672 | 673 |->vm_resume: // Setup C frame and resume thread. 674 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 675 | saveregs 676 | mr L, CARG1 677 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 678 | mr BASE, CARG2 679 | lbz TMP1, L->status 680 | stw L, SAVE_L 681 | li PC, FRAME_CP 682 | addi TMP0, sp, CFRAME_RESUME 683 | addi DISPATCH, DISPATCH, GG_G2DISP 684 | stw CARG3, SAVE_NRES 685 | cmplwi TMP1, 0 686 | stw CARG3, SAVE_ERRF 687 | stp TMP0, L->cframe 688 | stp CARG3, SAVE_CFRAME 689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 690 | beq >3 691 | 692 | // Resume after yield (like a return). 693 | mr RA, BASE 694 | lp BASE, L->base 695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 696 | lp TMP1, L->top 697 | lwz PC, FRAME_PC(BASE) 698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 699 | stb CARG3, L->status 700 | stw TMP3, TMPD 701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 702 | lfs TOBIT, TMPD 703 | sub RD, TMP1, BASE 704 | stw TMP3, TMPD 705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 706 | addi RD, RD, 8 707 | stw TMP0, TONUM_HI 708 | li_vmstate INTERP 709 | li ZERO, 0 710 | st_vmstate 711 | andix. TMP0, PC, FRAME_TYPE 712 | mr MULTRES, RD 713 | lfs TONUM, TMPD 714 | li TISNIL, LJ_TNIL 715 | beq ->BC_RET_Z 716 | b ->vm_return 717 | 718 |->vm_pcall: // Setup protected C frame and enter VM. 719 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 720 | saveregs 721 | li PC, FRAME_CP 722 | stw CARG4, SAVE_ERRF 723 | b >1 724 | 725 |->vm_call: // Setup C frame and enter VM. 726 | // (lua_State *L, TValue *base, int nres1) 727 | saveregs 728 | li PC, FRAME_C 729 | 730 |1: // Entry point for vm_pcall above (PC = ftype). 731 | lp TMP1, L:CARG1->cframe 732 | stw CARG3, SAVE_NRES 733 | mr L, CARG1 734 | stw CARG1, SAVE_L 735 | mr BASE, CARG2 736 | stp sp, L->cframe // Add our C frame to cframe chain. 737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 739 | stp TMP1, SAVE_CFRAME 740 | addi DISPATCH, DISPATCH, GG_G2DISP 741 | 742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 745 | lp TMP1, L->top 746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 747 | add PC, PC, BASE 748 | stw TMP3, TMPD 749 | li ZERO, 0 750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 751 | lfs TOBIT, TMPD 752 | sub PC, PC, TMP2 // PC = frame delta + frame type 753 | stw TMP3, TMPD 754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 755 | sub NARGS8:RC, TMP1, BASE 756 | stw TMP0, TONUM_HI 757 | li_vmstate INTERP 758 | lfs TONUM, TMPD 759 | li TISNIL, LJ_TNIL 760 | st_vmstate 761 | 762 |->vm_call_dispatch: 763 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC 764 | lwz TMP0, FRAME_PC(BASE) 765 | lwz LFUNC:RB, FRAME_FUNC(BASE) 766 | checkfunc TMP0; bne ->vmeta_call 767 | 768 |->vm_call_dispatch_f: 769 | ins_call 770 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC 771 | 772 |->vm_cpcall: // Setup protected C frame, call C. 773 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 774 | saveregs 775 | mr L, CARG1 776 | lwz TMP0, L:CARG1->stack 777 | stw CARG1, SAVE_L 778 | lp TMP1, L->top 779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 781 | lp TMP1, L->cframe 782 | stp sp, L->cframe // Add our C frame to cframe chain. 783 | .toc lp CARG4, 0(CARG4) 784 | li TMP2, 0 785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 786 | stw TMP2, SAVE_ERRF // No error function. 787 | stp TMP1, SAVE_CFRAME 788 | mtctr CARG4 789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 790 |.if PPE 791 | mr BASE, CRET1 792 | cmpwi CRET1, 0 793 |.else 794 | mr. BASE, CRET1 795 |.endif 796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 797 | li PC, FRAME_CP 798 | addi DISPATCH, DISPATCH, GG_G2DISP 799 | bne <3 // Else continue with the call. 800 | b ->vm_leave_cp // No base? Just remove C frame. 801 | 802 |//----------------------------------------------------------------------- 803 |//-- Metamethod handling ------------------------------------------------ 804 |//----------------------------------------------------------------------- 805 | 806 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the 807 |// stack, so BASE doesn't need to be reloaded across these calls. 808 | 809 |//-- Continuation dispatch ---------------------------------------------- 810 | 811 |->cont_dispatch: 812 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 813 | lwz TMP0, -12(BASE) // Continuation. 814 | mr RB, BASE 815 | mr BASE, TMP2 // Restore caller BASE. 816 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) 817 |.if FFI 818 | cmplwi TMP0, 1 819 |.endif 820 | lwz PC, -16(RB) // Restore PC from [cont|PC]. 821 | subi TMP2, RD, 8 822 | lwz TMP1, LFUNC:TMP1->pc 823 | stwx TISNIL, RA, TMP2 // Ensure one valid arg. 824 |.if FFI 825 | ble >1 826 |.endif 827 | lwz KBASE, PC2PROTO(k)(TMP1) 828 | // BASE = base, RA = resultptr, RB = meta base 829 | mtctr TMP0 830 | bctr // Jump to continuation. 831 | 832 |.if FFI 833 |1: 834 | beq ->cont_ffi_callback // cont = 1: return from FFI callback. 835 | // cont = 0: tailcall from C function. 836 | subi TMP1, RB, 16 837 | sub RC, TMP1, BASE 838 | b ->vm_call_tail 839 |.endif 840 | 841 |->cont_cat: // RA = resultptr, RB = meta base 842 | lwz INS, -4(PC) 843 | subi CARG2, RB, 16 844 | decode_RB8 SAVE0, INS 845 | lfd f0, 0(RA) 846 | add TMP1, BASE, SAVE0 847 | stp BASE, L->base 848 | cmplw TMP1, CARG2 849 | sub CARG3, CARG2, TMP1 850 | decode_RA8 RA, INS 851 | stfd f0, 0(CARG2) 852 | bney ->BC_CAT_Z 853 | stfdx f0, BASE, RA 854 | b ->cont_nop 855 | 856 |//-- Table indexing metamethods ----------------------------------------- 857 | 858 |->vmeta_tgets1: 859 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 860 | li TMP0, LJ_TSTR 861 | decode_RB8 RB, INS 862 | stw STR:RC, 4(CARG3) 863 | add CARG2, BASE, RB 864 | stw TMP0, 0(CARG3) 865 | b >1 866 | 867 |->vmeta_tgets: 868 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 869 | li TMP0, LJ_TTAB 870 | stw TAB:RB, 4(CARG2) 871 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 872 | stw TMP0, 0(CARG2) 873 | li TMP1, LJ_TSTR 874 | stw STR:RC, 4(CARG3) 875 | stw TMP1, 0(CARG3) 876 | b >1 877 | 878 |->vmeta_tgetb: // TMP0 = index 879 |.if not DUALNUM 880 | tonum_u f0, TMP0 881 |.endif 882 | decode_RB8 RB, INS 883 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 884 | add CARG2, BASE, RB 885 |.if DUALNUM 886 | stw TISNUM, 0(CARG3) 887 | stw TMP0, 4(CARG3) 888 |.else 889 | stfd f0, 0(CARG3) 890 |.endif 891 | b >1 892 | 893 |->vmeta_tgetv: 894 | decode_RB8 RB, INS 895 | decode_RC8 RC, INS 896 | add CARG2, BASE, RB 897 | add CARG3, BASE, RC 898 |1: 899 | stp BASE, L->base 900 | mr CARG1, L 901 | stw PC, SAVE_PC 902 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 903 | // Returns TValue * (finished) or NULL (metamethod). 904 | cmplwi CRET1, 0 905 | beq >3 906 | lfd f0, 0(CRET1) 907 | ins_next1 908 | stfdx f0, BASE, RA 909 | ins_next2 910 | 911 |3: // Call __index metamethod. 912 | // BASE = base, L->top = new base, stack = cont/func/t/k 913 | subfic TMP1, BASE, FRAME_CONT 914 | lp BASE, L->top 915 | stw PC, -16(BASE) // [cont|PC] 916 | add PC, TMP1, BASE 917 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 918 | li NARGS8:RC, 16 // 2 args for func(t, k). 919 | b ->vm_call_dispatch_f 920 | 921 |//----------------------------------------------------------------------- 922 | 923 |->vmeta_tsets1: 924 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 925 | li TMP0, LJ_TSTR 926 | decode_RB8 RB, INS 927 | stw STR:RC, 4(CARG3) 928 | add CARG2, BASE, RB 929 | stw TMP0, 0(CARG3) 930 | b >1 931 | 932 |->vmeta_tsets: 933 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 934 | li TMP0, LJ_TTAB 935 | stw TAB:RB, 4(CARG2) 936 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 937 | stw TMP0, 0(CARG2) 938 | li TMP1, LJ_TSTR 939 | stw STR:RC, 4(CARG3) 940 | stw TMP1, 0(CARG3) 941 | b >1 942 | 943 |->vmeta_tsetb: // TMP0 = index 944 |.if not DUALNUM 945 | tonum_u f0, TMP0 946 |.endif 947 | decode_RB8 RB, INS 948 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 949 | add CARG2, BASE, RB 950 |.if DUALNUM 951 | stw TISNUM, 0(CARG3) 952 | stw TMP0, 4(CARG3) 953 |.else 954 | stfd f0, 0(CARG3) 955 |.endif 956 | b >1 957 | 958 |->vmeta_tsetv: 959 | decode_RB8 RB, INS 960 | decode_RC8 RC, INS 961 | add CARG2, BASE, RB 962 | add CARG3, BASE, RC 963 |1: 964 | stp BASE, L->base 965 | mr CARG1, L 966 | stw PC, SAVE_PC 967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 968 | // Returns TValue * (finished) or NULL (metamethod). 969 | cmplwi CRET1, 0 970 | lfdx f0, BASE, RA 971 | beq >3 972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 973 | ins_next1 974 | stfd f0, 0(CRET1) 975 | ins_next2 976 | 977 |3: // Call __newindex metamethod. 978 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 979 | subfic TMP1, BASE, FRAME_CONT 980 | lp BASE, L->top 981 | stw PC, -16(BASE) // [cont|PC] 982 | add PC, TMP1, BASE 983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 985 | stfd f0, 16(BASE) // Copy value to third argument. 986 | b ->vm_call_dispatch_f 987 | 988 |//-- Comparison metamethods --------------------------------------------- 989 | 990 |->vmeta_comp: 991 | mr CARG1, L 992 | subi PC, PC, 4 993 |.if DUALNUM 994 | mr CARG2, RA 995 |.else 996 | add CARG2, BASE, RA 997 |.endif 998 | stw PC, SAVE_PC 999 |.if DUALNUM 1000 | mr CARG3, RD 1001 |.else 1002 | add CARG3, BASE, RD 1003 |.endif 1004 | stp BASE, L->base 1005 | decode_OP1 CARG4, INS 1006 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 1007 | // Returns 0/1 or TValue * (metamethod). 1008 |3: 1009 | cmplwi CRET1, 1 1010 | bgt ->vmeta_binop 1011 | subfic CRET1, CRET1, 0 1012 |4: 1013 | lwz INS, 0(PC) 1014 | addi PC, PC, 4 1015 | decode_RD4 TMP2, INS 1016 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 1017 | and TMP2, TMP2, CRET1 1018 | add PC, PC, TMP2 1019 |->cont_nop: 1020 | ins_next 1021 | 1022 |->cont_ra: // RA = resultptr 1023 | lwz INS, -4(PC) 1024 | lfd f0, 0(RA) 1025 | decode_RA8 TMP1, INS 1026 | stfdx f0, BASE, TMP1 1027 | b ->cont_nop 1028 | 1029 |->cont_condt: // RA = resultptr 1030 | lwz TMP0, 0(RA) 1031 | .gpr64 extsw TMP0, TMP0 1032 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. 1033 | subfe CRET1, CRET1, CRET1 1034 | not CRET1, CRET1 1035 | b <4 1036 | 1037 |->cont_condf: // RA = resultptr 1038 | lwz TMP0, 0(RA) 1039 | .gpr64 extsw TMP0, TMP0 1040 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. 1041 | subfe CRET1, CRET1, CRET1 1042 | b <4 1043 | 1044 |->vmeta_equal: 1045 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 1046 | subi PC, PC, 4 1047 | stp BASE, L->base 1048 | mr CARG1, L 1049 | stw PC, SAVE_PC 1050 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1051 | // Returns 0/1 or TValue * (metamethod). 1052 | b <3 1053 | 1054 |->vmeta_equal_cd: 1055 |.if FFI 1056 | mr CARG2, INS 1057 | subi PC, PC, 4 1058 | stp BASE, L->base 1059 | mr CARG1, L 1060 | stw PC, SAVE_PC 1061 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) 1062 | // Returns 0/1 or TValue * (metamethod). 1063 | b <3 1064 |.endif 1065 | 1066 |//-- Arithmetic metamethods --------------------------------------------- 1067 | 1068 |->vmeta_arith_nv: 1069 | add CARG3, KBASE, RC 1070 | add CARG4, BASE, RB 1071 | b >1 1072 |->vmeta_arith_nv2: 1073 |.if DUALNUM 1074 | mr CARG3, RC 1075 | mr CARG4, RB 1076 | b >1 1077 |.endif 1078 | 1079 |->vmeta_unm: 1080 | mr CARG3, RD 1081 | mr CARG4, RD 1082 | b >1 1083 | 1084 |->vmeta_arith_vn: 1085 | add CARG3, BASE, RB 1086 | add CARG4, KBASE, RC 1087 | b >1 1088 | 1089 |->vmeta_arith_vv: 1090 | add CARG3, BASE, RB 1091 | add CARG4, BASE, RC 1092 |.if DUALNUM 1093 | b >1 1094 |.endif 1095 |->vmeta_arith_vn2: 1096 |->vmeta_arith_vv2: 1097 |.if DUALNUM 1098 | mr CARG3, RB 1099 | mr CARG4, RC 1100 |.endif 1101 |1: 1102 | add CARG2, BASE, RA 1103 | stp BASE, L->base 1104 | mr CARG1, L 1105 | stw PC, SAVE_PC 1106 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. 1107 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1108 | // Returns NULL (finished) or TValue * (metamethod). 1109 | cmplwi CRET1, 0 1110 | beq ->cont_nop 1111 | 1112 | // Call metamethod for binary op. 1113 |->vmeta_binop: 1114 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 1115 | sub TMP1, CRET1, BASE 1116 | stw PC, -16(CRET1) // [cont|PC] 1117 | mr TMP2, BASE 1118 | addi PC, TMP1, FRAME_CONT 1119 | mr BASE, CRET1 1120 | li NARGS8:RC, 16 // 2 args for func(o1, o2). 1121 | b ->vm_call_dispatch 1122 | 1123 |->vmeta_len: 1124#if LJ_52 1125 | mr SAVE0, CARG1 1126#endif 1127 | mr CARG2, RD 1128 | stp BASE, L->base 1129 | mr CARG1, L 1130 | stw PC, SAVE_PC 1131 | bl extern lj_meta_len // (lua_State *L, TValue *o) 1132 | // Returns NULL (retry) or TValue * (metamethod base). 1133#if LJ_52 1134 | cmplwi CRET1, 0 1135 | bne ->vmeta_binop // Binop call for compatibility. 1136 | mr CARG1, SAVE0 1137 | b ->BC_LEN_Z 1138#else 1139 | b ->vmeta_binop // Binop call for compatibility. 1140#endif 1141 | 1142 |//-- Call metamethod ---------------------------------------------------- 1143 | 1144 |->vmeta_call: // Resolve and call __call metamethod. 1145 | // TMP2 = old base, BASE = new base, RC = nargs*8 1146 | mr CARG1, L 1147 | stp TMP2, L->base // This is the callers base! 1148 | subi CARG2, BASE, 8 1149 | stw PC, SAVE_PC 1150 | add CARG3, BASE, RC 1151 | mr SAVE0, NARGS8:RC 1152 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1153 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1154 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1155 | ins_call 1156 | 1157 |->vmeta_callt: // Resolve __call for BC_CALLT. 1158 | // BASE = old base, RA = new base, RC = nargs*8 1159 | mr CARG1, L 1160 | stp BASE, L->base 1161 | subi CARG2, RA, 8 1162 | stw PC, SAVE_PC 1163 | add CARG3, RA, RC 1164 | mr SAVE0, NARGS8:RC 1165 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1166 | lwz TMP1, FRAME_PC(BASE) 1167 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1168 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. 1169 | b ->BC_CALLT_Z 1170 | 1171 |//-- Argument coercion for 'for' statement ------------------------------ 1172 | 1173 |->vmeta_for: 1174 | mr CARG1, L 1175 | stp BASE, L->base 1176 | mr CARG2, RA 1177 | stw PC, SAVE_PC 1178 | mr SAVE0, INS 1179 | bl extern lj_meta_for // (lua_State *L, TValue *base) 1180 |.if JIT 1181 | decode_OP1 TMP0, SAVE0 1182 |.endif 1183 | decode_RA8 RA, SAVE0 1184 |.if JIT 1185 | cmpwi TMP0, BC_JFORI 1186 |.endif 1187 | decode_RD8 RD, SAVE0 1188 |.if JIT 1189 | beqy =>BC_JFORI 1190 |.endif 1191 | b =>BC_FORI 1192 | 1193 |//----------------------------------------------------------------------- 1194 |//-- Fast functions ----------------------------------------------------- 1195 |//----------------------------------------------------------------------- 1196 | 1197 |.macro .ffunc, name 1198 |->ff_ .. name: 1199 |.endmacro 1200 | 1201 |.macro .ffunc_1, name 1202 |->ff_ .. name: 1203 | cmplwi NARGS8:RC, 8 1204 | lwz CARG3, 0(BASE) 1205 | lwz CARG1, 4(BASE) 1206 | blt ->fff_fallback 1207 |.endmacro 1208 | 1209 |.macro .ffunc_2, name 1210 |->ff_ .. name: 1211 | cmplwi NARGS8:RC, 16 1212 | lwz CARG3, 0(BASE) 1213 | lwz CARG4, 8(BASE) 1214 | lwz CARG1, 4(BASE) 1215 | lwz CARG2, 12(BASE) 1216 | blt ->fff_fallback 1217 |.endmacro 1218 | 1219 |.macro .ffunc_n, name 1220 |->ff_ .. name: 1221 | cmplwi NARGS8:RC, 8 1222 | lwz CARG3, 0(BASE) 1223 | lfd FARG1, 0(BASE) 1224 | blt ->fff_fallback 1225 | checknum CARG3; bge ->fff_fallback 1226 |.endmacro 1227 | 1228 |.macro .ffunc_nn, name 1229 |->ff_ .. name: 1230 | cmplwi NARGS8:RC, 16 1231 | lwz CARG3, 0(BASE) 1232 | lfd FARG1, 0(BASE) 1233 | lwz CARG4, 8(BASE) 1234 | lfd FARG2, 8(BASE) 1235 | blt ->fff_fallback 1236 | checknum CARG3; bge ->fff_fallback 1237 | checknum CARG4; bge ->fff_fallback 1238 |.endmacro 1239 | 1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1241 |.macro ffgccheck 1242 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1243 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1244 | cmplw TMP0, TMP1 1245 | bgel ->fff_gcstep 1246 |.endmacro 1247 | 1248 |//-- Base library: checks ----------------------------------------------- 1249 | 1250 |.ffunc_1 assert 1251 | li TMP1, LJ_TFALSE 1252 | la RA, -8(BASE) 1253 | cmplw cr1, CARG3, TMP1 1254 | lwz PC, FRAME_PC(BASE) 1255 | bge cr1, ->fff_fallback 1256 | stw CARG3, 0(RA) 1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1258 | stw CARG1, 4(RA) 1259 | beq ->fff_res // Done if exactly 1 argument. 1260 | li TMP1, 8 1261 | subi RC, RC, 8 1262 |1: 1263 | cmplw TMP1, RC 1264 | lfdx f0, BASE, TMP1 1265 | stfdx f0, RA, TMP1 1266 | addi TMP1, TMP1, 8 1267 | bney <1 1268 | b ->fff_res 1269 | 1270 |.ffunc type 1271 | cmplwi NARGS8:RC, 8 1272 | lwz CARG1, 0(BASE) 1273 | blt ->fff_fallback 1274 | .gpr64 extsw CARG1, CARG1 1275 | subfc TMP0, TISNUM, CARG1 1276 | subfe TMP2, CARG1, CARG1 1277 | orc TMP1, TMP2, TMP0 1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1279 | slwi TMP1, TMP1, 3 1280 | la TMP2, CFUNC:RB->upvalue 1281 | lfdx FARG1, TMP2, TMP1 1282 | b ->fff_resn 1283 | 1284 |//-- Base library: getters and setters --------------------------------- 1285 | 1286 |.ffunc_1 getmetatable 1287 | checktab CARG3; bne >6 1288 |1: // Field metatable must be at same offset for GCtab and GCudata! 1289 | lwz TAB:CARG1, TAB:CARG1->metatable 1290 |2: 1291 | li CARG3, LJ_TNIL 1292 | cmplwi TAB:CARG1, 0 1293 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1294 | beq ->fff_restv 1295 | lwz TMP0, TAB:CARG1->hmask 1296 | li CARG3, LJ_TTAB // Use metatable as default result. 1297 | lwz TMP1, STR:RC->hash 1298 | lwz NODE:TMP2, TAB:CARG1->node 1299 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1300 | slwi TMP0, TMP1, 5 1301 | slwi TMP1, TMP1, 3 1302 | sub TMP1, TMP0, TMP1 1303 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 1304 |3: // Rearranged logic, because we expect _not_ to find the key. 1305 | lwz CARG4, NODE:TMP2->key 1306 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 1307 | lwz CARG2, NODE:TMP2->val 1308 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 1309 | checkstr CARG4; bne >4 1310 | cmpw TMP0, STR:RC; beq >5 1311 |4: 1312 | lwz NODE:TMP2, NODE:TMP2->next 1313 | cmplwi NODE:TMP2, 0 1314 | beq ->fff_restv // Not found, keep default result. 1315 | b <3 1316 |5: 1317 | checknil CARG2 1318 | beq ->fff_restv // Ditto for nil value. 1319 | mr CARG3, CARG2 // Return value of mt.__metatable. 1320 | mr CARG1, TMP1 1321 | b ->fff_restv 1322 | 1323 |6: 1324 | cmpwi CARG3, LJ_TUDATA; beq <1 1325 | .gpr64 extsw CARG3, CARG3 1326 | subfc TMP0, TISNUM, CARG3 1327 | subfe TMP2, CARG3, CARG3 1328 | orc TMP1, TMP2, TMP0 1329 | addi TMP1, TMP1, ~LJ_TISNUM+1 1330 | slwi TMP1, TMP1, 2 1331 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) 1332 | lwzx TAB:CARG1, TMP2, TMP1 1333 | b <2 1334 | 1335 |.ffunc_2 setmetatable 1336 | // Fast path: no mt for table yet and not clearing the mt. 1337 | checktab CARG3; bne ->fff_fallback 1338 | lwz TAB:TMP1, TAB:CARG1->metatable 1339 | checktab CARG4; bne ->fff_fallback 1340 | cmplwi TAB:TMP1, 0 1341 | lbz TMP3, TAB:CARG1->marked 1342 | bne ->fff_fallback 1343 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 1344 | stw TAB:CARG2, TAB:CARG1->metatable 1345 | beq ->fff_restv 1346 | barrierback TAB:CARG1, TMP3, TMP0 1347 | b ->fff_restv 1348 | 1349 |.ffunc rawget 1350 | cmplwi NARGS8:RC, 16 1351 | lwz CARG4, 0(BASE) 1352 | lwz TAB:CARG2, 4(BASE) 1353 | blt ->fff_fallback 1354 | checktab CARG4; bne ->fff_fallback 1355 | la CARG3, 8(BASE) 1356 | mr CARG1, L 1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1358 | // Returns cTValue *. 1359 | lfd FARG1, 0(CRET1) 1360 | b ->fff_resn 1361 | 1362 |//-- Base library: conversions ------------------------------------------ 1363 | 1364 |.ffunc tonumber 1365 | // Only handles the number case inline (without a base argument). 1366 | cmplwi NARGS8:RC, 8 1367 | lwz CARG1, 0(BASE) 1368 | lfd FARG1, 0(BASE) 1369 | bne ->fff_fallback // Exactly one argument. 1370 | checknum CARG1; bgt ->fff_fallback 1371 | b ->fff_resn 1372 | 1373 |.ffunc_1 tostring 1374 | // Only handles the string or number case inline. 1375 | checkstr CARG3 1376 | // A __tostring method in the string base metatable is ignored. 1377 | beq ->fff_restv // String key? 1378 | // Handle numbers inline, unless a number base metatable is present. 1379 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1380 | checknum CARG3 1381 | cmplwi cr1, TMP0, 0 1382 | stp BASE, L->base // Add frame since C call can throw. 1383 | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq 1384 | stw PC, SAVE_PC // Redundant (but a defined value). 1385 | beq ->fff_fallback 1386 | ffgccheck 1387 | mr CARG1, L 1388 | mr CARG2, BASE 1389 |.if DUALNUM 1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1391 |.else 1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1393 |.endif 1394 | // Returns GCstr *. 1395 | li CARG3, LJ_TSTR 1396 | b ->fff_restv 1397 | 1398 |//-- Base library: iterators ------------------------------------------- 1399 | 1400 |.ffunc next 1401 | cmplwi NARGS8:RC, 8 1402 | lwz CARG1, 0(BASE) 1403 | lwz TAB:CARG2, 4(BASE) 1404 | blt ->fff_fallback 1405 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1406 | checktab CARG1 1407 | lwz PC, FRAME_PC(BASE) 1408 | bne ->fff_fallback 1409 | stp BASE, L->base // Add frame since C call can throw. 1410 | mr CARG1, L 1411 | stp BASE, L->top // Dummy frame length is ok. 1412 | la CARG3, 8(BASE) 1413 | stw PC, SAVE_PC 1414 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1415 | // Returns 0 at end of traversal. 1416 | cmplwi CRET1, 0 1417 | li CARG3, LJ_TNIL 1418 | beq ->fff_restv // End of traversal: return nil. 1419 | lfd f0, 8(BASE) // Copy key and value to results. 1420 | la RA, -8(BASE) 1421 | lfd f1, 16(BASE) 1422 | stfd f0, 0(RA) 1423 | li RD, (2+1)*8 1424 | stfd f1, 8(RA) 1425 | b ->fff_res 1426 | 1427 |.ffunc_1 pairs 1428 | checktab CARG3 1429 | lwz PC, FRAME_PC(BASE) 1430 | bne ->fff_fallback 1431#if LJ_52 1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1433 | lfd f0, CFUNC:RB->upvalue[0] 1434 | cmplwi TAB:TMP2, 0 1435 | la RA, -8(BASE) 1436 | bne ->fff_fallback 1437#else 1438 | lfd f0, CFUNC:RB->upvalue[0] 1439 | la RA, -8(BASE) 1440#endif 1441 | stw TISNIL, 8(BASE) 1442 | li RD, (3+1)*8 1443 | stfd f0, 0(RA) 1444 | b ->fff_res 1445 | 1446 |.ffunc ipairs_aux 1447 | cmplwi NARGS8:RC, 16 1448 | lwz CARG3, 0(BASE) 1449 | lwz TAB:CARG1, 4(BASE) 1450 | lwz CARG4, 8(BASE) 1451 |.if DUALNUM 1452 | lwz TMP2, 12(BASE) 1453 |.else 1454 | lfd FARG2, 8(BASE) 1455 |.endif 1456 | blt ->fff_fallback 1457 | checktab CARG3 1458 | checknum cr1, CARG4 1459 | lwz PC, FRAME_PC(BASE) 1460 |.if DUALNUM 1461 | bne ->fff_fallback 1462 | bne cr1, ->fff_fallback 1463 |.else 1464 | lus TMP0, 0x3ff0 1465 | stw ZERO, TMPD_LO 1466 | bne ->fff_fallback 1467 | stw TMP0, TMPD_HI 1468 | bge cr1, ->fff_fallback 1469 | lfd FARG1, TMPD 1470 | toint TMP2, FARG2, f0 1471 |.endif 1472 | lwz TMP0, TAB:CARG1->asize 1473 | lwz TMP1, TAB:CARG1->array 1474 |.if not DUALNUM 1475 | fadd FARG2, FARG2, FARG1 1476 |.endif 1477 | addi TMP2, TMP2, 1 1478 | la RA, -8(BASE) 1479 | cmplw TMP0, TMP2 1480 |.if DUALNUM 1481 | stw TISNUM, 0(RA) 1482 | slwi TMP3, TMP2, 3 1483 | stw TMP2, 4(RA) 1484 |.else 1485 | slwi TMP3, TMP2, 3 1486 | stfd FARG2, 0(RA) 1487 |.endif 1488 | ble >2 // Not in array part? 1489 | lwzx TMP2, TMP1, TMP3 1490 | lfdx f0, TMP1, TMP3 1491 |1: 1492 | checknil TMP2 1493 | li RD, (0+1)*8 1494 | beq ->fff_res // End of iteration, return 0 results. 1495 | li RD, (2+1)*8 1496 | stfd f0, 8(RA) 1497 | b ->fff_res 1498 |2: // Check for empty hash part first. Otherwise call C function. 1499 | lwz TMP0, TAB:CARG1->hmask 1500 | cmplwi TMP0, 0 1501 | li RD, (0+1)*8 1502 | beq ->fff_res 1503 | mr CARG2, TMP2 1504 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 1505 | // Returns cTValue * or NULL. 1506 | cmplwi CRET1, 0 1507 | li RD, (0+1)*8 1508 | beq ->fff_res 1509 | lwz TMP2, 0(CRET1) 1510 | lfd f0, 0(CRET1) 1511 | b <1 1512 | 1513 |.ffunc_1 ipairs 1514 | checktab CARG3 1515 | lwz PC, FRAME_PC(BASE) 1516 | bne ->fff_fallback 1517#if LJ_52 1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1519 | lfd f0, CFUNC:RB->upvalue[0] 1520 | cmplwi TAB:TMP2, 0 1521 | la RA, -8(BASE) 1522 | bne ->fff_fallback 1523#else 1524 | lfd f0, CFUNC:RB->upvalue[0] 1525 | la RA, -8(BASE) 1526#endif 1527 |.if DUALNUM 1528 | stw TISNUM, 8(BASE) 1529 |.else 1530 | stw ZERO, 8(BASE) 1531 |.endif 1532 | stw ZERO, 12(BASE) 1533 | li RD, (3+1)*8 1534 | stfd f0, 0(RA) 1535 | b ->fff_res 1536 | 1537 |//-- Base library: catch errors ---------------------------------------- 1538 | 1539 |.ffunc pcall 1540 | cmplwi NARGS8:RC, 8 1541 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 1542 | blt ->fff_fallback 1543 | mr TMP2, BASE 1544 | la BASE, 8(BASE) 1545 | // Remember active hook before pcall. 1546 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 1547 | subi NARGS8:RC, NARGS8:RC, 8 1548 | addi PC, TMP3, 8+FRAME_PCALL 1549 | b ->vm_call_dispatch 1550 | 1551 |.ffunc xpcall 1552 | cmplwi NARGS8:RC, 16 1553 | lwz CARG4, 8(BASE) 1554 | lfd FARG2, 8(BASE) 1555 | lfd FARG1, 0(BASE) 1556 | blt ->fff_fallback 1557 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1558 | mr TMP2, BASE 1559 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1560 | la BASE, 16(BASE) 1561 | // Remember active hook before pcall. 1562 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1563 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1564 | subi NARGS8:RC, NARGS8:RC, 16 1565 | stfd FARG1, 8(TMP2) 1566 | addi PC, TMP1, 16+FRAME_PCALL 1567 | b ->vm_call_dispatch 1568 | 1569 |//-- Coroutine library -------------------------------------------------- 1570 | 1571 |.macro coroutine_resume_wrap, resume 1572 |.if resume 1573 |.ffunc_1 coroutine_resume 1574 | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback 1575 |.else 1576 |.ffunc coroutine_wrap_aux 1577 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr 1578 |.endif 1579 | lbz TMP0, L:CARG1->status 1580 | lp TMP1, L:CARG1->cframe 1581 | lp CARG2, L:CARG1->top 1582 | cmplwi cr0, TMP0, LUA_YIELD 1583 | lp TMP2, L:CARG1->base 1584 | cmplwi cr1, TMP1, 0 1585 | lwz TMP0, L:CARG1->maxstack 1586 | cmplw cr7, CARG2, TMP2 1587 | lwz PC, FRAME_PC(BASE) 1588 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 1589 | add TMP2, CARG2, NARGS8:RC 1590 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD 1591 | cmplw cr1, TMP2, TMP0 1592 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt 1593 | stw PC, SAVE_PC 1594 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov 1595 | stp BASE, L->base 1596 | blt cr6, ->fff_fallback 1597 |1: 1598 |.if resume 1599 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. 1600 | subi NARGS8:RC, NARGS8:RC, 8 1601 | subi TMP2, TMP2, 8 1602 |.endif 1603 | stp TMP2, L:CARG1->top 1604 | li TMP1, 0 1605 | stp BASE, L->top 1606 |2: // Move args to coroutine. 1607 | cmpw TMP1, NARGS8:RC 1608 | lfdx f0, BASE, TMP1 1609 | beq >3 1610 | stfdx f0, CARG2, TMP1 1611 | addi TMP1, TMP1, 8 1612 | b <2 1613 |3: 1614 | li CARG3, 0 1615 | mr L:SAVE0, L:CARG1 1616 | li CARG4, 0 1617 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1618 | // Returns thread status. 1619 |4: 1620 | lp TMP2, L:SAVE0->base 1621 | cmplwi CRET1, LUA_YIELD 1622 | lp TMP3, L:SAVE0->top 1623 | li_vmstate INTERP 1624 | lp BASE, L->base 1625 | st_vmstate 1626 | bgt >8 1627 | sub RD, TMP3, TMP2 1628 | lwz TMP0, L->maxstack 1629 | cmplwi RD, 0 1630 | add TMP1, BASE, RD 1631 | beq >6 // No results? 1632 | cmplw TMP1, TMP0 1633 | li TMP1, 0 1634 | bgt >9 // Need to grow stack? 1635 | 1636 | subi TMP3, RD, 8 1637 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1638 |5: // Move results from coroutine. 1639 | cmplw TMP1, TMP3 1640 | lfdx f0, TMP2, TMP1 1641 | stfdx f0, BASE, TMP1 1642 | addi TMP1, TMP1, 8 1643 | bne <5 1644 |6: 1645 | andix. TMP0, PC, FRAME_TYPE 1646 |.if resume 1647 | li TMP1, LJ_TTRUE 1648 | la RA, -8(BASE) 1649 | stw TMP1, -8(BASE) // Prepend true to results. 1650 | addi RD, RD, 16 1651 |.else 1652 | mr RA, BASE 1653 | addi RD, RD, 8 1654 |.endif 1655 |7: 1656 | stw PC, SAVE_PC 1657 | mr MULTRES, RD 1658 | beq ->BC_RET_Z 1659 | b ->vm_return 1660 | 1661 |8: // Coroutine returned with error (at co->top-1). 1662 |.if resume 1663 | andix. TMP0, PC, FRAME_TYPE 1664 | la TMP3, -8(TMP3) 1665 | li TMP1, LJ_TFALSE 1666 | lfd f0, 0(TMP3) 1667 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1668 | li RD, (2+1)*8 1669 | stw TMP1, -8(BASE) // Prepend false to results. 1670 | la RA, -8(BASE) 1671 | stfd f0, 0(BASE) // Copy error message. 1672 | b <7 1673 |.else 1674 | mr CARG1, L 1675 | mr CARG2, L:SAVE0 1676 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1677 |.endif 1678 | 1679 |9: // Handle stack expansion on return from yield. 1680 | mr CARG1, L 1681 | srwi CARG2, RD, 3 1682 | bl extern lj_state_growstack // (lua_State *L, int n) 1683 | li CRET1, 0 1684 | b <4 1685 |.endmacro 1686 | 1687 | coroutine_resume_wrap 1 // coroutine.resume 1688 | coroutine_resume_wrap 0 // coroutine.wrap 1689 | 1690 |.ffunc coroutine_yield 1691 | lp TMP0, L->cframe 1692 | add TMP1, BASE, NARGS8:RC 1693 | stp BASE, L->base 1694 | andix. TMP0, TMP0, CFRAME_RESUME 1695 | stp TMP1, L->top 1696 | li CRET1, LUA_YIELD 1697 | beq ->fff_fallback 1698 | stp ZERO, L->cframe 1699 | stb CRET1, L->status 1700 | b ->vm_leave_unw 1701 | 1702 |//-- Math library ------------------------------------------------------- 1703 | 1704 |.ffunc_1 math_abs 1705 | checknum CARG3 1706 |.if DUALNUM 1707 | bne >2 1708 | srawi TMP1, CARG1, 31 1709 | xor TMP2, TMP1, CARG1 1710 |.if GPR64 1711 | lus TMP0, 0x8000 1712 | sub CARG1, TMP2, TMP1 1713 | cmplw CARG1, TMP0 1714 | beq >1 1715 |.else 1716 | sub. CARG1, TMP2, TMP1 1717 | blt >1 1718 |.endif 1719 |->fff_resi: 1720 | lwz PC, FRAME_PC(BASE) 1721 | la RA, -8(BASE) 1722 | stw TISNUM, -8(BASE) 1723 | stw CRET1, -4(BASE) 1724 | b ->fff_res1 1725 |1: 1726 | lus CARG3, 0x41e0 // 2^31. 1727 | li CARG1, 0 1728 | b ->fff_restv 1729 |2: 1730 |.endif 1731 | bge ->fff_fallback 1732 | rlwinm CARG3, CARG3, 0, 1, 31 1733 | // Fallthrough. 1734 | 1735 |->fff_restv: 1736 | // CARG3/CARG1 = TValue result. 1737 | lwz PC, FRAME_PC(BASE) 1738 | stw CARG3, -8(BASE) 1739 | la RA, -8(BASE) 1740 | stw CARG1, -4(BASE) 1741 |->fff_res1: 1742 | // RA = results, PC = return. 1743 | li RD, (1+1)*8 1744 |->fff_res: 1745 | // RA = results, RD = (nresults+1)*8, PC = return. 1746 | andix. TMP0, PC, FRAME_TYPE 1747 | mr MULTRES, RD 1748 | bney ->vm_return 1749 | lwz INS, -4(PC) 1750 | decode_RB8 RB, INS 1751 |5: 1752 | cmplw RB, RD // More results expected? 1753 | decode_RA8 TMP0, INS 1754 | bgt >6 1755 | ins_next1 1756 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1757 | sub BASE, RA, TMP0 1758 | ins_next2 1759 | 1760 |6: // Fill up results with nil. 1761 | subi TMP1, RD, 8 1762 | addi RD, RD, 8 1763 | stwx TISNIL, RA, TMP1 1764 | b <5 1765 | 1766 |.macro math_extern, func 1767 | .ffunc_n math_ .. func 1768 | blex func 1769 | b ->fff_resn 1770 |.endmacro 1771 | 1772 |.macro math_extern2, func 1773 | .ffunc_nn math_ .. func 1774 | blex func 1775 | b ->fff_resn 1776 |.endmacro 1777 | 1778 |.macro math_round, func 1779 | .ffunc_1 math_ .. func 1780 | checknum CARG3; beqy ->fff_restv 1781 | rlwinm TMP2, CARG3, 12, 21, 31 1782 | bge ->fff_fallback 1783 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 1784 | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? 1785 | subfic TMP0, TMP2, 31 1786 | blt >3 1787 | slwi TMP1, CARG3, 11 1788 | srwi TMP3, CARG1, 21 1789 | oris TMP1, TMP1, 0x8000 1790 | addi TMP2, TMP2, 1 1791 | or TMP1, TMP1, TMP3 1792 | slwi CARG2, CARG1, 11 1793 | bge cr1, >4 1794 | slw TMP3, TMP1, TMP2 1795 | srw RD, TMP1, TMP0 1796 | or TMP3, TMP3, CARG2 1797 | srawi TMP2, CARG3, 31 1798 |.if "func" == "floor" 1799 | and TMP1, TMP3, TMP2 1800 | addic TMP0, TMP1, -1 1801 | subfe TMP1, TMP0, TMP1 1802 | add CARG1, RD, TMP1 1803 | xor CARG1, CARG1, TMP2 1804 | sub CARG1, CARG1, TMP2 1805 | b ->fff_resi 1806 |.else 1807 | andc TMP1, TMP3, TMP2 1808 | addic TMP0, TMP1, -1 1809 | subfe TMP1, TMP0, TMP1 1810 | add CARG1, RD, TMP1 1811 | cmpw CARG1, RD 1812 | xor CARG1, CARG1, TMP2 1813 | sub CARG1, CARG1, TMP2 1814 | bge ->fff_resi 1815 | // Overflow to 2^31. 1816 | lus CARG3, 0x41e0 // 2^31. 1817 | li CARG1, 0 1818 | b ->fff_restv 1819 |.endif 1820 |3: // |x| < 1 1821 | slwi TMP2, CARG3, 1 1822 | srawi TMP1, CARG3, 31 1823 | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo 1824 |.if "func" == "floor" 1825 | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 1826 | subfic TMP2, TMP1, 0 1827 | subfe CARG1, CARG1, CARG1 1828 |.else 1829 | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 1830 | addic TMP2, TMP1, -1 1831 | subfe CARG1, TMP2, TMP1 1832 |.endif 1833 | b ->fff_resi 1834 |4: // exp >= 31. Check for -(2^31). 1835 | xoris TMP1, TMP1, 0x8000 1836 | srawi TMP2, CARG3, 31 1837 |.if "func" == "floor" 1838 | or TMP1, TMP1, CARG2 1839 |.endif 1840 |.if PPE 1841 | orc TMP1, TMP1, TMP2 1842 | cmpwi TMP1, 0 1843 |.else 1844 | orc. TMP1, TMP1, TMP2 1845 |.endif 1846 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 1847 | lus CARG1, 0x8000 // -(2^31). 1848 | beqy ->fff_resi 1849 |5: 1850 | lfd FARG1, 0(BASE) 1851 | blex func 1852 | b ->fff_resn 1853 |.endmacro 1854 | 1855 |.if DUALNUM 1856 | math_round floor 1857 | math_round ceil 1858 |.else 1859 | // NYI: use internal implementation. 1860 | math_extern floor 1861 | math_extern ceil 1862 |.endif 1863 | 1864 |.if SQRT 1865 |.ffunc_n math_sqrt 1866 | fsqrt FARG1, FARG1 1867 | b ->fff_resn 1868 |.else 1869 | math_extern sqrt 1870 |.endif 1871 | 1872 |.ffunc math_log 1873 | cmplwi NARGS8:RC, 8 1874 | lwz CARG3, 0(BASE) 1875 | lfd FARG1, 0(BASE) 1876 | bne ->fff_fallback // Need exactly 1 argument. 1877 | checknum CARG3; bge ->fff_fallback 1878 | blex log 1879 | b ->fff_resn 1880 | 1881 | math_extern log10 1882 | math_extern exp 1883 | math_extern sin 1884 | math_extern cos 1885 | math_extern tan 1886 | math_extern asin 1887 | math_extern acos 1888 | math_extern atan 1889 | math_extern sinh 1890 | math_extern cosh 1891 | math_extern tanh 1892 | math_extern2 pow 1893 | math_extern2 atan2 1894 | math_extern2 fmod 1895 | 1896 |->ff_math_deg: 1897 |.ffunc_n math_rad 1898 | lfd FARG2, CFUNC:RB->upvalue[0] 1899 | fmul FARG1, FARG1, FARG2 1900 | b ->fff_resn 1901 | 1902 |.if DUALNUM 1903 |.ffunc math_ldexp 1904 | cmplwi NARGS8:RC, 16 1905 | lwz CARG3, 0(BASE) 1906 | lfd FARG1, 0(BASE) 1907 | lwz CARG4, 8(BASE) 1908 |.if GPR64 1909 | lwz CARG2, 12(BASE) 1910 |.else 1911 | lwz CARG1, 12(BASE) 1912 |.endif 1913 | blt ->fff_fallback 1914 | checknum CARG3; bge ->fff_fallback 1915 | checknum CARG4; bne ->fff_fallback 1916 |.else 1917 |.ffunc_nn math_ldexp 1918 |.if GPR64 1919 | toint CARG2, FARG2 1920 |.else 1921 | toint CARG1, FARG2 1922 |.endif 1923 |.endif 1924 | blex ldexp 1925 | b ->fff_resn 1926 | 1927 |.ffunc_n math_frexp 1928 |.if GPR64 1929 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 1930 |.else 1931 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 1932 |.endif 1933 | lwz PC, FRAME_PC(BASE) 1934 | blex frexp 1935 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1936 | la RA, -8(BASE) 1937 |.if not DUALNUM 1938 | tonum_i FARG2, TMP1 1939 |.endif 1940 | stfd FARG1, 0(RA) 1941 | li RD, (2+1)*8 1942 |.if DUALNUM 1943 | stw TISNUM, 8(RA) 1944 | stw TMP1, 12(RA) 1945 |.else 1946 | stfd FARG2, 8(RA) 1947 |.endif 1948 | b ->fff_res 1949 | 1950 |.ffunc_n math_modf 1951 |.if GPR64 1952 | la CARG2, -8(BASE) 1953 |.else 1954 | la CARG1, -8(BASE) 1955 |.endif 1956 | lwz PC, FRAME_PC(BASE) 1957 | blex modf 1958 | la RA, -8(BASE) 1959 | stfd FARG1, 0(BASE) 1960 | li RD, (2+1)*8 1961 | b ->fff_res 1962 | 1963 |.macro math_minmax, name, ismax 1964 |.if DUALNUM 1965 | .ffunc_1 name 1966 | checknum CARG3 1967 | addi TMP1, BASE, 8 1968 | add TMP2, BASE, NARGS8:RC 1969 | bne >4 1970 |1: // Handle integers. 1971 | lwz CARG4, 0(TMP1) 1972 | cmplw cr1, TMP1, TMP2 1973 | lwz CARG2, 4(TMP1) 1974 | bge cr1, ->fff_resi 1975 | checknum CARG4 1976 | xoris TMP0, CARG1, 0x8000 1977 | xoris TMP3, CARG2, 0x8000 1978 | bne >3 1979 | subfc TMP3, TMP3, TMP0 1980 | subfe TMP0, TMP0, TMP0 1981 |.if ismax 1982 | andc TMP3, TMP3, TMP0 1983 |.else 1984 | and TMP3, TMP3, TMP0 1985 |.endif 1986 | add CARG1, TMP3, CARG2 1987 |.if GPR64 1988 | rldicl CARG1, CARG1, 0, 32 1989 |.endif 1990 | addi TMP1, TMP1, 8 1991 | b <1 1992 |3: 1993 | bge ->fff_fallback 1994 | // Convert intermediate result to number and continue below. 1995 | tonum_i FARG1, CARG1 1996 | lfd FARG2, 0(TMP1) 1997 | b >6 1998 |4: 1999 | lfd FARG1, 0(BASE) 2000 | bge ->fff_fallback 2001 |5: // Handle numbers. 2002 | lwz CARG4, 0(TMP1) 2003 | cmplw cr1, TMP1, TMP2 2004 | lfd FARG2, 0(TMP1) 2005 | bge cr1, ->fff_resn 2006 | checknum CARG4; bge >7 2007 |6: 2008 | fsub f0, FARG1, FARG2 2009 | addi TMP1, TMP1, 8 2010 |.if ismax 2011 | fsel FARG1, f0, FARG1, FARG2 2012 |.else 2013 | fsel FARG1, f0, FARG2, FARG1 2014 |.endif 2015 | b <5 2016 |7: // Convert integer to number and continue above. 2017 | lwz CARG2, 4(TMP1) 2018 | bne ->fff_fallback 2019 | tonum_i FARG2, CARG2 2020 | b <6 2021 |.else 2022 | .ffunc_n name 2023 | li TMP1, 8 2024 |1: 2025 | lwzx CARG2, BASE, TMP1 2026 | lfdx FARG2, BASE, TMP1 2027 | cmplw cr1, TMP1, NARGS8:RC 2028 | checknum CARG2 2029 | bge cr1, ->fff_resn 2030 | bge ->fff_fallback 2031 | fsub f0, FARG1, FARG2 2032 | addi TMP1, TMP1, 8 2033 |.if ismax 2034 | fsel FARG1, f0, FARG1, FARG2 2035 |.else 2036 | fsel FARG1, f0, FARG2, FARG1 2037 |.endif 2038 | b <1 2039 |.endif 2040 |.endmacro 2041 | 2042 | math_minmax math_min, 0 2043 | math_minmax math_max, 1 2044 | 2045 |//-- String library ----------------------------------------------------- 2046 | 2047 |.ffunc_1 string_len 2048 | checkstr CARG3; bne ->fff_fallback 2049 | lwz CRET1, STR:CARG1->len 2050 | b ->fff_resi 2051 | 2052 |.ffunc string_byte // Only handle the 1-arg case here. 2053 | cmplwi NARGS8:RC, 8 2054 | lwz CARG3, 0(BASE) 2055 | lwz STR:CARG1, 4(BASE) 2056 | bne ->fff_fallback // Need exactly 1 argument. 2057 | checkstr CARG3 2058 | bne ->fff_fallback 2059 | lwz TMP0, STR:CARG1->len 2060 |.if DUALNUM 2061 | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). 2062 | li RD, (0+1)*8 2063 | lwz PC, FRAME_PC(BASE) 2064 | cmplwi TMP0, 0 2065 | la RA, -8(BASE) 2066 | beqy ->fff_res 2067 | b ->fff_resi 2068 |.else 2069 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 2070 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 2071 | subfe RD, TMP3, TMP0 2072 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. 2073 | addi RD, RD, 1 2074 | lfd f0, TONUM_D 2075 | la RA, -8(BASE) 2076 | lwz PC, FRAME_PC(BASE) 2077 | fsub f0, f0, TOBIT 2078 | slwi RD, RD, 3 2079 | stfd f0, 0(RA) 2080 | b ->fff_res 2081 |.endif 2082 | 2083 |.ffunc string_char // Only handle the 1-arg case here. 2084 | ffgccheck 2085 | cmplwi NARGS8:RC, 8 2086 | lwz CARG3, 0(BASE) 2087 |.if DUALNUM 2088 | lwz TMP0, 4(BASE) 2089 | bne ->fff_fallback // Exactly 1 argument. 2090 | checknum CARG3; bne ->fff_fallback 2091 | la CARG2, 7(BASE) 2092 |.else 2093 | lfd FARG1, 0(BASE) 2094 | bne ->fff_fallback // Exactly 1 argument. 2095 | checknum CARG3; bge ->fff_fallback 2096 | toint TMP0, FARG1 2097 | la CARG2, TMPD_BLO 2098 |.endif 2099 | li CARG3, 1 2100 | cmplwi TMP0, 255; bgt ->fff_fallback 2101 |->fff_newstr: 2102 | mr CARG1, L 2103 | stp BASE, L->base 2104 | stw PC, SAVE_PC 2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2106 | // Returns GCstr *. 2107 | lp BASE, L->base 2108 | li CARG3, LJ_TSTR 2109 | b ->fff_restv 2110 | 2111 |.ffunc string_sub 2112 | ffgccheck 2113 | cmplwi NARGS8:RC, 16 2114 | lwz CARG3, 16(BASE) 2115 |.if not DUALNUM 2116 | lfd f0, 16(BASE) 2117 |.endif 2118 | lwz TMP0, 0(BASE) 2119 | lwz STR:CARG1, 4(BASE) 2120 | blt ->fff_fallback 2121 | lwz CARG2, 8(BASE) 2122 |.if DUALNUM 2123 | lwz TMP1, 12(BASE) 2124 |.else 2125 | lfd f1, 8(BASE) 2126 |.endif 2127 | li TMP2, -1 2128 | beq >1 2129 |.if DUALNUM 2130 | checknum CARG3 2131 | lwz TMP2, 20(BASE) 2132 | bne ->fff_fallback 2133 |1: 2134 | checknum CARG2; bne ->fff_fallback 2135 |.else 2136 | checknum CARG3; bge ->fff_fallback 2137 | toint TMP2, f0 2138 |1: 2139 | checknum CARG2; bge ->fff_fallback 2140 |.endif 2141 | checkstr TMP0; bne ->fff_fallback 2142 |.if not DUALNUM 2143 | toint TMP1, f1 2144 |.endif 2145 | lwz TMP0, STR:CARG1->len 2146 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 2147 | addi TMP3, TMP2, 1 2148 | blt >5 2149 |2: 2150 | cmpwi TMP1, 0 // start <= 0? 2151 | add TMP3, TMP1, TMP0 2152 | ble >7 2153 |3: 2154 | sub CARG3, TMP2, TMP1 2155 | addi CARG2, STR:CARG1, #STR-1 2156 | srawi TMP0, CARG3, 31 2157 | addi CARG3, CARG3, 1 2158 | add CARG2, CARG2, TMP1 2159 | andc CARG3, CARG3, TMP0 2160 |.if GPR64 2161 | rldicl CARG2, CARG2, 0, 32 2162 | rldicl CARG3, CARG3, 0, 32 2163 |.endif 2164 | b ->fff_newstr 2165 | 2166 |5: // Negative end or overflow. 2167 | cmpw TMP0, TMP2 // len >= end? (signed compare) 2168 | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. 2169 | bge <2 2170 | mr TMP2, TMP0 // Overflow: end = len. 2171 | b <2 2172 | 2173 |7: // Negative start or underflow. 2174 | .gpr64 extsw TMP1, TMP1 2175 | addic CARG3, TMP1, -1 2176 | subfe CARG3, CARG3, CARG3 2177 | srawi CARG2, TMP3, 31 // Note: modifies carry. 2178 | andc TMP3, TMP3, CARG3 2179 | andc TMP1, TMP3, CARG2 2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2181 | b <3 2182 | 2183 |.ffunc string_rep // Only handle the 1-char case inline. 2184 | ffgccheck 2185 | cmplwi NARGS8:RC, 16 2186 | lwz TMP0, 0(BASE) 2187 | lwz STR:CARG1, 4(BASE) 2188 | lwz CARG4, 8(BASE) 2189 |.if DUALNUM 2190 | lwz CARG3, 12(BASE) 2191 |.else 2192 | lfd FARG2, 8(BASE) 2193 |.endif 2194 | bne ->fff_fallback // Exactly 2 arguments. 2195 | checkstr TMP0; bne ->fff_fallback 2196 |.if DUALNUM 2197 | checknum CARG4; bne ->fff_fallback 2198 |.else 2199 | checknum CARG4; bge ->fff_fallback 2200 | toint CARG3, FARG2 2201 |.endif 2202 | lwz TMP0, STR:CARG1->len 2203 | cmpwi CARG3, 0 2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2205 | ble >2 // Count <= 0? (or non-int) 2206 | cmplwi TMP0, 1 2207 | subi TMP2, CARG3, 1 2208 | blt >2 // Zero length string? 2209 | cmplw cr1, TMP1, CARG3 2210 | bne ->fff_fallback // Fallback for > 1-char strings. 2211 | lbz TMP0, STR:CARG1[1] 2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2213 | blt cr1, ->fff_fallback 2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2215 | cmplwi TMP2, 0 2216 | stbx TMP0, CARG2, TMP2 2217 | subi TMP2, TMP2, 1 2218 | bne <1 2219 | b ->fff_newstr 2220 |2: // Return empty string. 2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) 2222 | li CARG3, LJ_TSTR 2223 | b ->fff_restv 2224 | 2225 |.ffunc string_reverse 2226 | ffgccheck 2227 | cmplwi NARGS8:RC, 8 2228 | lwz CARG3, 0(BASE) 2229 | lwz STR:CARG1, 4(BASE) 2230 | blt ->fff_fallback 2231 | checkstr CARG3 2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2233 | bne ->fff_fallback 2234 | lwz CARG3, STR:CARG1->len 2235 | la CARG1, #STR(STR:CARG1) 2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2237 | li TMP2, 0 2238 | cmplw TMP1, CARG3 2239 | subi TMP3, CARG3, 1 2240 | blt ->fff_fallback 2241 |1: // Reverse string copy. 2242 | cmpwi TMP3, 0 2243 | lbzx TMP1, CARG1, TMP2 2244 | blty ->fff_newstr 2245 | stbx TMP1, CARG2, TMP3 2246 | subi TMP3, TMP3, 1 2247 | addi TMP2, TMP2, 1 2248 | b <1 2249 | 2250 |.macro ffstring_case, name, lo 2251 | .ffunc name 2252 | ffgccheck 2253 | cmplwi NARGS8:RC, 8 2254 | lwz CARG3, 0(BASE) 2255 | lwz STR:CARG1, 4(BASE) 2256 | blt ->fff_fallback 2257 | checkstr CARG3 2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2259 | bne ->fff_fallback 2260 | lwz CARG3, STR:CARG1->len 2261 | la CARG1, #STR(STR:CARG1) 2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2263 | cmplw TMP1, CARG3 2264 | li TMP2, 0 2265 | blt ->fff_fallback 2266 |1: // ASCII case conversion. 2267 | cmplw TMP2, CARG3 2268 | lbzx TMP1, CARG1, TMP2 2269 | bgey ->fff_newstr 2270 | subi TMP0, TMP1, lo 2271 | xori TMP3, TMP1, 0x20 2272 | addic TMP0, TMP0, -26 2273 | subfe TMP3, TMP3, TMP3 2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20. 2275 | xor TMP1, TMP1, TMP3 2276 | stbx TMP1, CARG2, TMP2 2277 | addi TMP2, TMP2, 1 2278 | b <1 2279 |.endmacro 2280 | 2281 |ffstring_case string_lower, 65 2282 |ffstring_case string_upper, 97 2283 | 2284 |//-- Table library ------------------------------------------------------ 2285 | 2286 |.ffunc_1 table_getn 2287 | checktab CARG3; bne ->fff_fallback 2288 | bl extern lj_tab_len // (GCtab *t) 2289 | // Returns uint32_t (but less than 2^31). 2290 | b ->fff_resi 2291 | 2292 |//-- Bit library -------------------------------------------------------- 2293 | 2294 |.macro .ffunc_bit, name 2295 |.if DUALNUM 2296 | .ffunc_1 bit_..name 2297 | checknum CARG3; bnel ->fff_tobit_fb 2298 |.else 2299 | .ffunc_n bit_..name 2300 | fadd FARG1, FARG1, TOBIT 2301 | stfd FARG1, TMPD 2302 | lwz CARG1, TMPD_LO 2303 |.endif 2304 |.endmacro 2305 | 2306 |.macro .ffunc_bit_op, name, ins 2307 | .ffunc_bit name 2308 | addi TMP1, BASE, 8 2309 | add TMP2, BASE, NARGS8:RC 2310 |1: 2311 | lwz CARG4, 0(TMP1) 2312 | cmplw cr1, TMP1, TMP2 2313 |.if DUALNUM 2314 | lwz CARG2, 4(TMP1) 2315 |.else 2316 | lfd FARG1, 0(TMP1) 2317 |.endif 2318 | bgey cr1, ->fff_resi 2319 | checknum CARG4 2320 |.if DUALNUM 2321 | bnel ->fff_bitop_fb 2322 |.else 2323 | fadd FARG1, FARG1, TOBIT 2324 | bge ->fff_fallback 2325 | stfd FARG1, TMPD 2326 | lwz CARG2, TMPD_LO 2327 |.endif 2328 | ins CARG1, CARG1, CARG2 2329 | addi TMP1, TMP1, 8 2330 | b <1 2331 |.endmacro 2332 | 2333 |.ffunc_bit_op band, and 2334 |.ffunc_bit_op bor, or 2335 |.ffunc_bit_op bxor, xor 2336 | 2337 |.ffunc_bit bswap 2338 | rotlwi TMP0, CARG1, 8 2339 | rlwimi TMP0, CARG1, 24, 0, 7 2340 | rlwimi TMP0, CARG1, 24, 16, 23 2341 | mr CRET1, TMP0 2342 | b ->fff_resi 2343 | 2344 |.ffunc_bit bnot 2345 | not CRET1, CARG1 2346 | b ->fff_resi 2347 | 2348 |.macro .ffunc_bit_sh, name, ins, shmod 2349 |.if DUALNUM 2350 | .ffunc_2 bit_..name 2351 | checknum CARG3; bnel ->fff_tobit_fb 2352 | // Note: no inline conversion from number for 2nd argument! 2353 | checknum CARG4; bne ->fff_fallback 2354 |.else 2355 | .ffunc_nn bit_..name 2356 | fadd FARG1, FARG1, TOBIT 2357 | fadd FARG2, FARG2, TOBIT 2358 | stfd FARG1, TMPD 2359 | lwz CARG1, TMPD_LO 2360 | stfd FARG2, TMPD 2361 | lwz CARG2, TMPD_LO 2362 |.endif 2363 |.if shmod == 1 2364 | rlwinm CARG2, CARG2, 0, 27, 31 2365 |.elif shmod == 2 2366 | neg CARG2, CARG2 2367 |.endif 2368 | ins CRET1, CARG1, CARG2 2369 | b ->fff_resi 2370 |.endmacro 2371 | 2372 |.ffunc_bit_sh lshift, slw, 1 2373 |.ffunc_bit_sh rshift, srw, 1 2374 |.ffunc_bit_sh arshift, sraw, 1 2375 |.ffunc_bit_sh rol, rotlw, 0 2376 |.ffunc_bit_sh ror, rotlw, 2 2377 | 2378 |.ffunc_bit tobit 2379 |.if DUALNUM 2380 | b ->fff_resi 2381 |.else 2382 |->fff_resi: 2383 | tonum_i FARG1, CRET1 2384 |.endif 2385 |->fff_resn: 2386 | lwz PC, FRAME_PC(BASE) 2387 | la RA, -8(BASE) 2388 | stfd FARG1, -8(BASE) 2389 | b ->fff_res1 2390 | 2391 |// Fallback FP number to bit conversion. 2392 |->fff_tobit_fb: 2393 |.if DUALNUM 2394 | lfd FARG1, 0(BASE) 2395 | bgt ->fff_fallback 2396 | fadd FARG1, FARG1, TOBIT 2397 | stfd FARG1, TMPD 2398 | lwz CARG1, TMPD_LO 2399 | blr 2400 |.endif 2401 |->fff_bitop_fb: 2402 |.if DUALNUM 2403 | lfd FARG1, 0(TMP1) 2404 | bgt ->fff_fallback 2405 | fadd FARG1, FARG1, TOBIT 2406 | stfd FARG1, TMPD 2407 | lwz CARG2, TMPD_LO 2408 | blr 2409 |.endif 2410 | 2411 |//----------------------------------------------------------------------- 2412 | 2413 |->fff_fallback: // Call fast function fallback handler. 2414 | // BASE = new base, RB = CFUNC, RC = nargs*8 2415 | lp TMP3, CFUNC:RB->f 2416 | add TMP1, BASE, NARGS8:RC 2417 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. 2418 | addi TMP0, TMP1, 8*LUA_MINSTACK 2419 | lwz TMP2, L->maxstack 2420 | stw PC, SAVE_PC // Redundant (but a defined value). 2421 | .toc lp TMP3, 0(TMP3) 2422 | cmplw TMP0, TMP2 2423 | stp BASE, L->base 2424 | stp TMP1, L->top 2425 | mr CARG1, L 2426 | bgt >5 // Need to grow stack. 2427 | mtctr TMP3 2428 | bctrl // (lua_State *L) 2429 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2430 | lp BASE, L->base 2431 | cmpwi CRET1, 0 2432 | slwi RD, CRET1, 3 2433 | la RA, -8(BASE) 2434 | bgt ->fff_res // Returned nresults+1? 2435 |1: // Returned 0 or -1: retry fast path. 2436 | lp TMP0, L->top 2437 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2438 | sub NARGS8:RC, TMP0, BASE 2439 | bne ->vm_call_tail // Returned -1? 2440 | ins_callt // Returned 0: retry fast path. 2441 | 2442 |// Reconstruct previous base for vmeta_call during tailcall. 2443 |->vm_call_tail: 2444 | andix. TMP0, PC, FRAME_TYPE 2445 | rlwinm TMP1, PC, 0, 0, 28 2446 | bne >3 2447 | lwz INS, -4(PC) 2448 | decode_RA8 TMP1, INS 2449 | addi TMP1, TMP1, 8 2450 |3: 2451 | sub TMP2, BASE, TMP1 2452 | b ->vm_call_dispatch // Resolve again for tailcall. 2453 | 2454 |5: // Grow stack for fallback handler. 2455 | li CARG2, LUA_MINSTACK 2456 | bl extern lj_state_growstack // (lua_State *L, int n) 2457 | lp BASE, L->base 2458 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. 2459 | b <1 2460 | 2461 |->fff_gcstep: // Call GC step function. 2462 | // BASE = new base, RC = nargs*8 2463 | mflr SAVE0 2464 | stp BASE, L->base 2465 | add TMP0, BASE, NARGS8:RC 2466 | stw PC, SAVE_PC // Redundant (but a defined value). 2467 | stp TMP0, L->top 2468 | mr CARG1, L 2469 | bl extern lj_gc_step // (lua_State *L) 2470 | lp BASE, L->base 2471 | mtlr SAVE0 2472 | lp TMP0, L->top 2473 | sub NARGS8:RC, TMP0, BASE 2474 | lwz CFUNC:RB, FRAME_FUNC(BASE) 2475 | blr 2476 | 2477 |//----------------------------------------------------------------------- 2478 |//-- Special dispatch targets ------------------------------------------- 2479 |//----------------------------------------------------------------------- 2480 | 2481 |->vm_record: // Dispatch target for recording phase. 2482 |.if JIT 2483 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2484 | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. 2485 | bne >5 2486 | // Decrement the hookcount for consistency, but always do the call. 2487 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2488 | andix. TMP0, TMP3, HOOK_ACTIVE 2489 | bne >1 2490 | subi TMP2, TMP2, 1 2491 | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT 2492 | beqy >1 2493 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2494 | b >1 2495 |.endif 2496 | 2497 |->vm_rethook: // Dispatch target for return hooks. 2498 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2499 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2500 | beq >1 2501 |5: // Re-dispatch to static ins. 2502 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. 2503 | lpx TMP0, DISPATCH, TMP1 2504 | mtctr TMP0 2505 | bctr 2506 | 2507 |->vm_inshook: // Dispatch target for instr/line hooks. 2508 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2509 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2510 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2511 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 2512 | bne <5 2513 | 2514 | cmpwi cr1, TMP0, 0 2515 | addic. TMP2, TMP2, -1 2516 | beq cr1, <5 2517 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2518 | beq >1 2519 | bge cr1, <5 2520 |1: 2521 | mr CARG1, L 2522 | stw MULTRES, SAVE_MULTRES 2523 | mr CARG2, PC 2524 | stp BASE, L->base 2525 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2526 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2527 |3: 2528 | lp BASE, L->base 2529 |4: // Re-dispatch to static ins. 2530 | lwz INS, -4(PC) 2531 | decode_OPP TMP1, INS 2532 | decode_RB8 RB, INS 2533 | addi TMP1, TMP1, GG_DISP2STATIC 2534 | decode_RD8 RD, INS 2535 | lpx TMP0, DISPATCH, TMP1 2536 | decode_RA8 RA, INS 2537 | decode_RC8 RC, INS 2538 | mtctr TMP0 2539 | bctr 2540 | 2541 |->cont_hook: // Continue from hook yield. 2542 | addi PC, PC, 4 2543 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. 2544 | b <4 2545 | 2546 |->vm_hotloop: // Hot loop counter underflow. 2547 |.if JIT 2548 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2549 | addi CARG1, DISPATCH, GG_DISP2J 2550 | stw PC, SAVE_PC 2551 | lwz TMP1, LFUNC:TMP1->pc 2552 | mr CARG2, PC 2553 | stw L, DISPATCH_J(L)(DISPATCH) 2554 | lbz TMP1, PC2PROTO(framesize)(TMP1) 2555 | stp BASE, L->base 2556 | slwi TMP1, TMP1, 3 2557 | add TMP1, BASE, TMP1 2558 | stp TMP1, L->top 2559 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2560 | b <3 2561 |.endif 2562 | 2563 |->vm_callhook: // Dispatch target for call hooks. 2564 | mr CARG2, PC 2565 |.if JIT 2566 | b >1 2567 |.endif 2568 | 2569 |->vm_hotcall: // Hot call counter underflow. 2570 |.if JIT 2571 | ori CARG2, PC, 1 2572 |1: 2573 |.endif 2574 | add TMP0, BASE, RC 2575 | stw PC, SAVE_PC 2576 | mr CARG1, L 2577 | stp BASE, L->base 2578 | sub RA, RA, BASE 2579 | stp TMP0, L->top 2580 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2581 | // Returns ASMFunction. 2582 | lp BASE, L->base 2583 | lp TMP0, L->top 2584 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. 2585 | sub NARGS8:RC, TMP0, BASE 2586 | add RA, BASE, RA 2587 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2588 | lwz INS, -4(PC) 2589 | mtctr CRET1 2590 | bctr 2591 | 2592 |//----------------------------------------------------------------------- 2593 |//-- Trace exit handler ------------------------------------------------- 2594 |//----------------------------------------------------------------------- 2595 | 2596 |.macro savex_, a, b, c, d 2597 | stfd f..a, 16+a*8(sp) 2598 | stfd f..b, 16+b*8(sp) 2599 | stfd f..c, 16+c*8(sp) 2600 | stfd f..d, 16+d*8(sp) 2601 |.endmacro 2602 | 2603 |->vm_exit_handler: 2604 |.if JIT 2605 | addi sp, sp, -(16+32*8+32*4) 2606 | stmw r2, 16+32*8+2*4(sp) 2607 | addi DISPATCH, JGL, -GG_DISP2G-32768 2608 | li CARG2, ~LJ_VMST_EXIT 2609 | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. 2610 | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) 2611 | savex_ 0,1,2,3 2612 | stw CARG1, 0(sp) // Store extended stack chain. 2613 | clrso TMP1 2614 | savex_ 4,5,6,7 2615 | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. 2616 | savex_ 8,9,10,11 2617 | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. 2618 | savex_ 12,13,14,15 2619 | mflr CARG3 2620 | li TMP1, 0 2621 | savex_ 16,17,18,19 2622 | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. 2623 | savex_ 20,21,22,23 2624 | lhz CARG4, 2(CARG3) // Load trace number. 2625 | savex_ 24,25,26,27 2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 2627 | savex_ 28,29,30,31 2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2630 | srwi CARG3, CARG3, 2 2631 | stw L, DISPATCH_J(L)(DISPATCH) 2632 | subi CARG3, CARG3, 2 2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH) 2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH) 2635 | stp BASE, L->base 2636 | addi CARG1, DISPATCH, GG_DISP2J 2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2638 | addi CARG2, sp, 16 2639 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2640 | // Returns MULTRES (unscaled) or negated error code. 2641 | lp TMP1, L->cframe 2642 | lwz TMP2, 0(sp) 2643 | lp BASE, L->base 2644 |.if GPR64 2645 | rldicr sp, TMP1, 0, 61 2646 |.else 2647 | rlwinm sp, TMP1, 0, 0, 29 2648 |.endif 2649 | lwz PC, SAVE_PC // Get SAVE_PC. 2650 | stw TMP2, 0(sp) 2651 | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). 2652 | b >1 2653 |.endif 2654 |->vm_exit_interp: 2655 |.if JIT 2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2657 | lwz L, SAVE_L 2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 2659 |1: 2660 | cmpwi CARG1, 0 2661 | blt >3 // Check for error from exit. 2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2663 | slwi MULTRES, CARG1, 3 2664 | li TMP2, 0 2665 | stw MULTRES, SAVE_MULTRES 2666 | lwz TMP1, LFUNC:TMP1->pc 2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 2668 | lwz KBASE, PC2PROTO(k)(TMP1) 2669 | // Setup type comparison constants. 2670 | li TISNUM, LJ_TISNUM 2671 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2672 | stw TMP3, TMPD 2673 | li ZERO, 0 2674 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2675 | lfs TOBIT, TMPD 2676 | stw TMP3, TMPD 2677 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2678 | li TISNIL, LJ_TNIL 2679 | stw TMP0, TONUM_HI 2680 | lfs TONUM, TMPD 2681 | // Modified copy of ins_next which handles function header dispatch, too. 2682 | lwz INS, 0(PC) 2683 | addi PC, PC, 4 2684 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. 2685 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2686 | decode_OPP TMP1, INS 2687 | decode_RA8 RA, INS 2688 | lpx TMP0, DISPATCH, TMP1 2689 | mtctr TMP0 2690 | cmplwi TMP1, BC_FUNCF*4 // Function header? 2691 | bge >2 2692 | decode_RB8 RB, INS 2693 | decode_RD8 RD, INS 2694 | decode_RC8 RC, INS 2695 | bctr 2696 |2: 2697 | subi RC, MULTRES, 8 2698 | add RA, RA, BASE 2699 | bctr 2700 | 2701 |3: // Rethrow error from the right C frame. 2702 | neg CARG2, CARG1 2703 | mr CARG1, L 2704 | bl extern lj_err_throw // (lua_State *L, int errcode) 2705 |.endif 2706 | 2707 |//----------------------------------------------------------------------- 2708 |//-- Math helper functions ---------------------------------------------- 2709 |//----------------------------------------------------------------------- 2710 | 2711 |// NYI: Use internal implementations of floor, ceil, trunc. 2712 | 2713 |->vm_modi: 2714 | divwo. TMP0, CARG1, CARG2 2715 | bso >1 2716 |.if GPR64 2717 | xor CARG3, CARG1, CARG2 2718 | cmpwi CARG3, 0 2719 |.else 2720 | xor. CARG3, CARG1, CARG2 2721 |.endif 2722 | mullw TMP0, TMP0, CARG2 2723 | sub CARG1, CARG1, TMP0 2724 | bgelr 2725 | cmpwi CARG1, 0; beqlr 2726 | add CARG1, CARG1, CARG2 2727 | blr 2728 |1: 2729 | cmpwi CARG2, 0 2730 | li CARG1, 0 2731 | beqlr 2732 | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. 2733 | blr 2734 | 2735 |//----------------------------------------------------------------------- 2736 |//-- Miscellaneous functions -------------------------------------------- 2737 |//----------------------------------------------------------------------- 2738 | 2739 |// void lj_vm_cachesync(void *start, void *end) 2740 |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. 2741 |// This is a good lower bound, except for very ancient PPC models. 2742 |->vm_cachesync: 2743 |.if JIT or FFI 2744 | // Compute start of first cache line and number of cache lines. 2745 | rlwinm CARG1, CARG1, 0, 0, 26 2746 | sub CARG2, CARG2, CARG1 2747 | addi CARG2, CARG2, 31 2748 | rlwinm. CARG2, CARG2, 27, 5, 31 2749 | beqlr 2750 | mtctr CARG2 2751 | mr CARG3, CARG1 2752 |1: // Flush D-Cache. 2753 | dcbst r0, CARG1 2754 | addi CARG1, CARG1, 32 2755 | bdnz <1 2756 | sync 2757 | mtctr CARG2 2758 |1: // Invalidate I-Cache. 2759 | icbi r0, CARG3 2760 | addi CARG3, CARG3, 32 2761 | bdnz <1 2762 | isync 2763 | blr 2764 |.endif 2765 | 2766 |//----------------------------------------------------------------------- 2767 |//-- FFI helper functions ----------------------------------------------- 2768 |//----------------------------------------------------------------------- 2769 | 2770 |// Handler for callback functions. Callback slot number in r11, g in r12. 2771 |->vm_ffi_callback: 2772 |.if FFI 2773 |.type CTSTATE, CTState, PC 2774 | saveregs 2775 | lwz CTSTATE, GL:r12->ctype_state 2776 | addi DISPATCH, r12, GG_G2DISP 2777 | stw r11, CTSTATE->cb.slot 2778 | stw r3, CTSTATE->cb.gpr[0] 2779 | stfd f1, CTSTATE->cb.fpr[0] 2780 | stw r4, CTSTATE->cb.gpr[1] 2781 | stfd f2, CTSTATE->cb.fpr[1] 2782 | stw r5, CTSTATE->cb.gpr[2] 2783 | stfd f3, CTSTATE->cb.fpr[2] 2784 | stw r6, CTSTATE->cb.gpr[3] 2785 | stfd f4, CTSTATE->cb.fpr[3] 2786 | stw r7, CTSTATE->cb.gpr[4] 2787 | stfd f5, CTSTATE->cb.fpr[4] 2788 | stw r8, CTSTATE->cb.gpr[5] 2789 | stfd f6, CTSTATE->cb.fpr[5] 2790 | stw r9, CTSTATE->cb.gpr[6] 2791 | stfd f7, CTSTATE->cb.fpr[6] 2792 | stw r10, CTSTATE->cb.gpr[7] 2793 | stfd f8, CTSTATE->cb.fpr[7] 2794 | addi TMP0, sp, CFRAME_SPACE+8 2795 | stw TMP0, CTSTATE->cb.stack 2796 | mr CARG1, CTSTATE 2797 | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. 2798 | mr CARG2, sp 2799 | bl extern lj_ccallback_enter // (CTState *cts, void *cf) 2800 | // Returns lua_State *. 2801 | lp BASE, L:CRET1->base 2802 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 2803 | lp RC, L:CRET1->top 2804 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2805 | li ZERO, 0 2806 | mr L, CRET1 2807 | stw TMP3, TMPD 2808 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2809 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2810 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2811 | stw TMP0, TONUM_HI 2812 | li TISNIL, LJ_TNIL 2813 | li_vmstate INTERP 2814 | lfs TOBIT, TMPD 2815 | stw TMP3, TMPD 2816 | sub RC, RC, BASE 2817 | st_vmstate 2818 | lfs TONUM, TMPD 2819 | ins_callt 2820 |.endif 2821 | 2822 |->cont_ffi_callback: // Return from FFI callback. 2823 |.if FFI 2824 | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) 2825 | stp BASE, L->base 2826 | stp RB, L->top 2827 | stp L, CTSTATE->L 2828 | mr CARG1, CTSTATE 2829 | mr CARG2, RA 2830 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 2831 | lwz CRET1, CTSTATE->cb.gpr[0] 2832 | lfd FARG1, CTSTATE->cb.fpr[0] 2833 | lwz CRET2, CTSTATE->cb.gpr[1] 2834 | b ->vm_leave_unw 2835 |.endif 2836 | 2837 |->vm_ffi_call: // Call C function via FFI. 2838 | // Caveat: needs special frame unwinding, see below. 2839 |.if FFI 2840 | .type CCSTATE, CCallState, CARG1 2841 | lwz TMP1, CCSTATE->spadj 2842 | mflr TMP0 2843 | lbz CARG2, CCSTATE->nsp 2844 | lbz CARG3, CCSTATE->nfpr 2845 | neg TMP1, TMP1 2846 | stw TMP0, 4(sp) 2847 | cmpwi cr1, CARG3, 0 2848 | mr TMP2, sp 2849 | addic. CARG2, CARG2, -1 2850 | stwux sp, sp, TMP1 2851 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 2852 | stw r14, -4(TMP2) 2853 | stw CCSTATE, -8(TMP2) 2854 | mr r14, TMP2 2855 | la TMP1, CCSTATE->stack 2856 | slwi CARG2, CARG2, 2 2857 | blty >2 2858 | la TMP2, 8(sp) 2859 |1: 2860 | lwzx TMP0, TMP1, CARG2 2861 | stwx TMP0, TMP2, CARG2 2862 | addic. CARG2, CARG2, -4 2863 | bge <1 2864 |2: 2865 | bney cr1, >3 2866 | lfd f1, CCSTATE->fpr[0] 2867 | lfd f2, CCSTATE->fpr[1] 2868 | lfd f3, CCSTATE->fpr[2] 2869 | lfd f4, CCSTATE->fpr[3] 2870 | lfd f5, CCSTATE->fpr[4] 2871 | lfd f6, CCSTATE->fpr[5] 2872 | lfd f7, CCSTATE->fpr[6] 2873 | lfd f8, CCSTATE->fpr[7] 2874 |3: 2875 | lp TMP0, CCSTATE->func 2876 | lwz CARG2, CCSTATE->gpr[1] 2877 | lwz CARG3, CCSTATE->gpr[2] 2878 | lwz CARG4, CCSTATE->gpr[3] 2879 | lwz CARG5, CCSTATE->gpr[4] 2880 | mtctr TMP0 2881 | lwz r8, CCSTATE->gpr[5] 2882 | lwz r9, CCSTATE->gpr[6] 2883 | lwz r10, CCSTATE->gpr[7] 2884 | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2885 | bctrl 2886 | lwz CCSTATE:TMP1, -8(r14) 2887 | lwz TMP2, -4(r14) 2888 | lwz TMP0, 4(r14) 2889 | stw CARG1, CCSTATE:TMP1->gpr[0] 2890 | stfd FARG1, CCSTATE:TMP1->fpr[0] 2891 | stw CARG2, CCSTATE:TMP1->gpr[1] 2892 | mtlr TMP0 2893 | stw CARG3, CCSTATE:TMP1->gpr[2] 2894 | mr sp, r14 2895 | stw CARG4, CCSTATE:TMP1->gpr[3] 2896 | mr r14, TMP2 2897 | blr 2898 |.endif 2899 |// Note: vm_ffi_call must be the last function in this object file! 2900 | 2901 |//----------------------------------------------------------------------- 2902} 2903 2904/* Generate the code for a single instruction. */ 2905static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2906{ 2907 int vk = 0; 2908 |=>defop: 2909 2910 switch (op) { 2911 2912 /* -- Comparison ops ---------------------------------------------------- */ 2913 2914 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2915 2916 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2917 | // RA = src1*8, RD = src2*8, JMP with RD = target 2918 |.if DUALNUM 2919 | lwzux TMP0, RA, BASE 2920 | addi PC, PC, 4 2921 | lwz CARG2, 4(RA) 2922 | lwzux TMP1, RD, BASE 2923 | lwz TMP2, -4(PC) 2924 | checknum cr0, TMP0 2925 | lwz CARG3, 4(RD) 2926 | decode_RD4 TMP2, TMP2 2927 | checknum cr1, TMP1 2928 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2929 | bne cr0, >7 2930 | bne cr1, >8 2931 | cmpw CARG2, CARG3 2932 if (op == BC_ISLT) { 2933 | bge >2 2934 } else if (op == BC_ISGE) { 2935 | blt >2 2936 } else if (op == BC_ISLE) { 2937 | bgt >2 2938 } else { 2939 | ble >2 2940 } 2941 |1: 2942 | add PC, PC, TMP2 2943 |2: 2944 | ins_next 2945 | 2946 |7: // RA is not an integer. 2947 | bgt cr0, ->vmeta_comp 2948 | // RA is a number. 2949 | lfd f0, 0(RA) 2950 | bgt cr1, ->vmeta_comp 2951 | blt cr1, >4 2952 | // RA is a number, RD is an integer. 2953 | tonum_i f1, CARG3 2954 | b >5 2955 | 2956 |8: // RA is an integer, RD is not an integer. 2957 | bgt cr1, ->vmeta_comp 2958 | // RA is an integer, RD is a number. 2959 | tonum_i f0, CARG2 2960 |4: 2961 | lfd f1, 0(RD) 2962 |5: 2963 | fcmpu cr0, f0, f1 2964 if (op == BC_ISLT) { 2965 | bge <2 2966 } else if (op == BC_ISGE) { 2967 | blt <2 2968 } else if (op == BC_ISLE) { 2969 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2970 | bge <2 2971 } else { 2972 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2973 | blt <2 2974 } 2975 | b <1 2976 |.else 2977 | lwzx TMP0, BASE, RA 2978 | addi PC, PC, 4 2979 | lfdx f0, BASE, RA 2980 | lwzx TMP1, BASE, RD 2981 | checknum cr0, TMP0 2982 | lwz TMP2, -4(PC) 2983 | lfdx f1, BASE, RD 2984 | checknum cr1, TMP1 2985 | decode_RD4 TMP2, TMP2 2986 | bge cr0, ->vmeta_comp 2987 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2988 | bge cr1, ->vmeta_comp 2989 | fcmpu cr0, f0, f1 2990 if (op == BC_ISLT) { 2991 | bge >1 2992 } else if (op == BC_ISGE) { 2993 | blt >1 2994 } else if (op == BC_ISLE) { 2995 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2996 | bge >1 2997 } else { 2998 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2999 | blt >1 3000 } 3001 | add PC, PC, TMP2 3002 |1: 3003 | ins_next 3004 |.endif 3005 break; 3006 3007 case BC_ISEQV: case BC_ISNEV: 3008 vk = op == BC_ISEQV; 3009 | // RA = src1*8, RD = src2*8, JMP with RD = target 3010 |.if DUALNUM 3011 | lwzux TMP0, RA, BASE 3012 | addi PC, PC, 4 3013 | lwz CARG2, 4(RA) 3014 | lwzux TMP1, RD, BASE 3015 | checknum cr0, TMP0 3016 | lwz TMP2, -4(PC) 3017 | checknum cr1, TMP1 3018 | decode_RD4 TMP2, TMP2 3019 | lwz CARG3, 4(RD) 3020 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3021 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3022 if (vk) { 3023 | ble cr7, ->BC_ISEQN_Z 3024 } else { 3025 | ble cr7, ->BC_ISNEN_Z 3026 } 3027 |.else 3028 | lwzux TMP0, RA, BASE 3029 | lwz TMP2, 0(PC) 3030 | lfd f0, 0(RA) 3031 | addi PC, PC, 4 3032 | lwzux TMP1, RD, BASE 3033 | checknum cr0, TMP0 3034 | decode_RD4 TMP2, TMP2 3035 | lfd f1, 0(RD) 3036 | checknum cr1, TMP1 3037 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3038 | bge cr0, >5 3039 | bge cr1, >5 3040 | fcmpu cr0, f0, f1 3041 if (vk) { 3042 | bne >1 3043 | add PC, PC, TMP2 3044 } else { 3045 | beq >1 3046 | add PC, PC, TMP2 3047 } 3048 |1: 3049 | ins_next 3050 |.endif 3051 |5: // Either or both types are not numbers. 3052 |.if not DUALNUM 3053 | lwz CARG2, 4(RA) 3054 | lwz CARG3, 4(RD) 3055 |.endif 3056 |.if FFI 3057 | cmpwi cr7, TMP0, LJ_TCDATA 3058 | cmpwi cr5, TMP1, LJ_TCDATA 3059 |.endif 3060 | not TMP3, TMP0 3061 | cmplw TMP0, TMP1 3062 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3063 |.if FFI 3064 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3065 |.endif 3066 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3067 |.if FFI 3068 | beq cr7, ->vmeta_equal_cd 3069 |.endif 3070 | cmplw cr5, CARG2, CARG3 3071 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3072 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3073 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3074 | mr SAVE0, PC 3075 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3076 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3077 if (vk) { 3078 | bne cr0, >6 3079 | add PC, PC, TMP2 3080 |6: 3081 } else { 3082 | beq cr0, >6 3083 | add PC, PC, TMP2 3084 |6: 3085 } 3086 |.if DUALNUM 3087 | bge cr0, >2 // Done if 1 or 2. 3088 |1: 3089 | ins_next 3090 |2: 3091 |.else 3092 | blt cr0, <1 // Done if 1 or 2. 3093 |.endif 3094 | blt cr6, <1 // Done if not tab/ud. 3095 | 3096 | // Different tables or userdatas. Need to check __eq metamethod. 3097 | // Field metatable must be at same offset for GCtab and GCudata! 3098 | lwz TAB:TMP2, TAB:CARG2->metatable 3099 | li CARG4, 1-vk // ne = 0 or 1. 3100 | cmplwi TAB:TMP2, 0 3101 | beq <1 // No metatable? 3102 | lbz TMP2, TAB:TMP2->nomm 3103 | andix. TMP2, TMP2, 1<<MM_eq 3104 | bne <1 // Or 'no __eq' flag set? 3105 | mr PC, SAVE0 // Restore old PC. 3106 | b ->vmeta_equal // Handle __eq metamethod. 3107 break; 3108 3109 case BC_ISEQS: case BC_ISNES: 3110 vk = op == BC_ISEQS; 3111 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target 3112 | lwzux TMP0, RA, BASE 3113 | srwi RD, RD, 1 3114 | lwz STR:TMP3, 4(RA) 3115 | lwz TMP2, 0(PC) 3116 | subfic RD, RD, -4 3117 | addi PC, PC, 4 3118 |.if FFI 3119 | cmpwi TMP0, LJ_TCDATA 3120 |.endif 3121 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 3122 | .gpr64 extsw TMP0, TMP0 3123 | subfic TMP0, TMP0, LJ_TSTR 3124 |.if FFI 3125 | beq ->vmeta_equal_cd 3126 |.endif 3127 | sub TMP1, STR:TMP1, STR:TMP3 3128 | or TMP0, TMP0, TMP1 3129 | decode_RD4 TMP2, TMP2 3130 | subfic TMP0, TMP0, 0 3131 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3132 | subfe TMP1, TMP1, TMP1 3133 if (vk) { 3134 | andc TMP2, TMP2, TMP1 3135 } else { 3136 | and TMP2, TMP2, TMP1 3137 } 3138 | add PC, PC, TMP2 3139 | ins_next 3140 break; 3141 3142 case BC_ISEQN: case BC_ISNEN: 3143 vk = op == BC_ISEQN; 3144 | // RA = src*8, RD = num_const*8, JMP with RD = target 3145 |.if DUALNUM 3146 | lwzux TMP0, RA, BASE 3147 | addi PC, PC, 4 3148 | lwz CARG2, 4(RA) 3149 | lwzux TMP1, RD, KBASE 3150 | checknum cr0, TMP0 3151 | lwz TMP2, -4(PC) 3152 | checknum cr1, TMP1 3153 | decode_RD4 TMP2, TMP2 3154 | lwz CARG3, 4(RD) 3155 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3156 if (vk) { 3157 |->BC_ISEQN_Z: 3158 } else { 3159 |->BC_ISNEN_Z: 3160 } 3161 | bne cr0, >7 3162 | bne cr1, >8 3163 | cmpw CARG2, CARG3 3164 |4: 3165 |.else 3166 if (vk) { 3167 |->BC_ISEQN_Z: // Dummy label. 3168 } else { 3169 |->BC_ISNEN_Z: // Dummy label. 3170 } 3171 | lwzx TMP0, BASE, RA 3172 | addi PC, PC, 4 3173 | lfdx f0, BASE, RA 3174 | lwz TMP2, -4(PC) 3175 | lfdx f1, KBASE, RD 3176 | decode_RD4 TMP2, TMP2 3177 | checknum TMP0 3178 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3179 | bge >3 3180 | fcmpu cr0, f0, f1 3181 |.endif 3182 if (vk) { 3183 | bne >1 3184 | add PC, PC, TMP2 3185 |1: 3186 |.if not FFI 3187 |3: 3188 |.endif 3189 } else { 3190 | beq >2 3191 |1: 3192 |.if not FFI 3193 |3: 3194 |.endif 3195 | add PC, PC, TMP2 3196 |2: 3197 } 3198 | ins_next 3199 |.if FFI 3200 |3: 3201 | cmpwi TMP0, LJ_TCDATA 3202 | beq ->vmeta_equal_cd 3203 | b <1 3204 |.endif 3205 |.if DUALNUM 3206 |7: // RA is not an integer. 3207 | bge cr0, <3 3208 | // RA is a number. 3209 | lfd f0, 0(RA) 3210 | blt cr1, >1 3211 | // RA is a number, RD is an integer. 3212 | tonum_i f1, CARG3 3213 | b >2 3214 | 3215 |8: // RA is an integer, RD is a number. 3216 | tonum_i f0, CARG2 3217 |1: 3218 | lfd f1, 0(RD) 3219 |2: 3220 | fcmpu cr0, f0, f1 3221 | b <4 3222 |.endif 3223 break; 3224 3225 case BC_ISEQP: case BC_ISNEP: 3226 vk = op == BC_ISEQP; 3227 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target 3228 | lwzx TMP0, BASE, RA 3229 | srwi TMP1, RD, 3 3230 | lwz TMP2, 0(PC) 3231 | not TMP1, TMP1 3232 | addi PC, PC, 4 3233 |.if FFI 3234 | cmpwi TMP0, LJ_TCDATA 3235 |.endif 3236 | sub TMP0, TMP0, TMP1 3237 |.if FFI 3238 | beq ->vmeta_equal_cd 3239 |.endif 3240 | decode_RD4 TMP2, TMP2 3241 | .gpr64 extsw TMP0, TMP0 3242 | addic TMP0, TMP0, -1 3243 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3244 | subfe TMP1, TMP1, TMP1 3245 if (vk) { 3246 | and TMP2, TMP2, TMP1 3247 } else { 3248 | andc TMP2, TMP2, TMP1 3249 } 3250 | add PC, PC, TMP2 3251 | ins_next 3252 break; 3253 3254 /* -- Unary test and copy ops ------------------------------------------- */ 3255 3256 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3257 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target 3258 | lwzx TMP0, BASE, RD 3259 | lwz INS, 0(PC) 3260 | addi PC, PC, 4 3261 if (op == BC_IST || op == BC_ISF) { 3262 | .gpr64 extsw TMP0, TMP0 3263 | subfic TMP0, TMP0, LJ_TTRUE 3264 | decode_RD4 TMP2, INS 3265 | subfe TMP1, TMP1, TMP1 3266 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3267 if (op == BC_IST) { 3268 | andc TMP2, TMP2, TMP1 3269 } else { 3270 | and TMP2, TMP2, TMP1 3271 } 3272 | add PC, PC, TMP2 3273 } else { 3274 | li TMP1, LJ_TFALSE 3275 | lfdx f0, BASE, RD 3276 | cmplw TMP0, TMP1 3277 if (op == BC_ISTC) { 3278 | bge >1 3279 } else { 3280 | blt >1 3281 } 3282 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3283 | decode_RD4 TMP2, INS 3284 | stfdx f0, BASE, RA 3285 | add PC, PC, TMP2 3286 |1: 3287 } 3288 | ins_next 3289 break; 3290 3291 /* -- Unary ops --------------------------------------------------------- */ 3292 3293 case BC_MOV: 3294 | // RA = dst*8, RD = src*8 3295 | ins_next1 3296 | lfdx f0, BASE, RD 3297 | stfdx f0, BASE, RA 3298 | ins_next2 3299 break; 3300 case BC_NOT: 3301 | // RA = dst*8, RD = src*8 3302 | ins_next1 3303 | lwzx TMP0, BASE, RD 3304 | .gpr64 extsw TMP0, TMP0 3305 | subfic TMP1, TMP0, LJ_TTRUE 3306 | adde TMP0, TMP0, TMP1 3307 | stwx TMP0, BASE, RA 3308 | ins_next2 3309 break; 3310 case BC_UNM: 3311 | // RA = dst*8, RD = src*8 3312 | lwzux TMP1, RD, BASE 3313 | lwz TMP0, 4(RD) 3314 | checknum TMP1 3315 |.if DUALNUM 3316 | bne >5 3317 |.if GPR64 3318 | lus TMP2, 0x8000 3319 | neg TMP0, TMP0 3320 | cmplw TMP0, TMP2 3321 | beq >4 3322 |.else 3323 | nego. TMP0, TMP0 3324 | bso >4 3325 |1: 3326 |.endif 3327 | ins_next1 3328 | stwux TISNUM, RA, BASE 3329 | stw TMP0, 4(RA) 3330 |3: 3331 | ins_next2 3332 |4: 3333 |.if not GPR64 3334 | // Potential overflow. 3335 | checkov TMP1, <1 // Ignore unrelated overflow. 3336 |.endif 3337 | lus TMP1, 0x41e0 // 2^31. 3338 | li TMP0, 0 3339 | b >7 3340 |.endif 3341 |5: 3342 | bge ->vmeta_unm 3343 | xoris TMP1, TMP1, 0x8000 3344 |7: 3345 | ins_next1 3346 | stwux TMP1, RA, BASE 3347 | stw TMP0, 4(RA) 3348 |.if DUALNUM 3349 | b <3 3350 |.else 3351 | ins_next2 3352 |.endif 3353 break; 3354 case BC_LEN: 3355 | // RA = dst*8, RD = src*8 3356 | lwzux TMP0, RD, BASE 3357 | lwz CARG1, 4(RD) 3358 | checkstr TMP0; bne >2 3359 | lwz CRET1, STR:CARG1->len 3360 |1: 3361 |.if DUALNUM 3362 | ins_next1 3363 | stwux TISNUM, RA, BASE 3364 | stw CRET1, 4(RA) 3365 |.else 3366 | tonum_u f0, CRET1 // Result is a non-negative integer. 3367 | ins_next1 3368 | stfdx f0, BASE, RA 3369 |.endif 3370 | ins_next2 3371 |2: 3372 | checktab TMP0; bne ->vmeta_len 3373#if LJ_52 3374 | lwz TAB:TMP2, TAB:CARG1->metatable 3375 | cmplwi TAB:TMP2, 0 3376 | bne >9 3377 |3: 3378#endif 3379 |->BC_LEN_Z: 3380 | bl extern lj_tab_len // (GCtab *t) 3381 | // Returns uint32_t (but less than 2^31). 3382 | b <1 3383#if LJ_52 3384 |9: 3385 | lbz TMP0, TAB:TMP2->nomm 3386 | andix. TMP0, TMP0, 1<<MM_len 3387 | bne <3 // 'no __len' flag set: done. 3388 | b ->vmeta_len 3389#endif 3390 break; 3391 3392 /* -- Binary ops -------------------------------------------------------- */ 3393 3394 |.macro ins_arithpre 3395 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3396 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3397 ||switch (vk) { 3398 ||case 0: 3399 | lwzx TMP1, BASE, RB 3400 | .if DUALNUM 3401 | lwzx TMP2, KBASE, RC 3402 | .endif 3403 | lfdx f14, BASE, RB 3404 | lfdx f15, KBASE, RC 3405 | .if DUALNUM 3406 | checknum cr0, TMP1 3407 | checknum cr1, TMP2 3408 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3409 | bge ->vmeta_arith_vn 3410 | .else 3411 | checknum TMP1; bge ->vmeta_arith_vn 3412 | .endif 3413 || break; 3414 ||case 1: 3415 | lwzx TMP1, BASE, RB 3416 | .if DUALNUM 3417 | lwzx TMP2, KBASE, RC 3418 | .endif 3419 | lfdx f15, BASE, RB 3420 | lfdx f14, KBASE, RC 3421 | .if DUALNUM 3422 | checknum cr0, TMP1 3423 | checknum cr1, TMP2 3424 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3425 | bge ->vmeta_arith_nv 3426 | .else 3427 | checknum TMP1; bge ->vmeta_arith_nv 3428 | .endif 3429 || break; 3430 ||default: 3431 | lwzx TMP1, BASE, RB 3432 | lwzx TMP2, BASE, RC 3433 | lfdx f14, BASE, RB 3434 | lfdx f15, BASE, RC 3435 | checknum cr0, TMP1 3436 | checknum cr1, TMP2 3437 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3438 | bge ->vmeta_arith_vv 3439 || break; 3440 ||} 3441 |.endmacro 3442 | 3443 |.macro ins_arithfallback, ins 3444 ||switch (vk) { 3445 ||case 0: 3446 | ins ->vmeta_arith_vn2 3447 || break; 3448 ||case 1: 3449 | ins ->vmeta_arith_nv2 3450 || break; 3451 ||default: 3452 | ins ->vmeta_arith_vv2 3453 || break; 3454 ||} 3455 |.endmacro 3456 | 3457 |.macro intmod, a, b, c 3458 | bl ->vm_modi 3459 |.endmacro 3460 | 3461 |.macro fpmod, a, b, c 3462 |->BC_MODVN_Z: 3463 | fdiv FARG1, b, c 3464 | // NYI: Use internal implementation of floor. 3465 | blex floor // floor(b/c) 3466 | fmul a, FARG1, c 3467 | fsub a, b, a // b - floor(b/c)*c 3468 |.endmacro 3469 | 3470 |.macro ins_arithfp, fpins 3471 | ins_arithpre 3472 |.if "fpins" == "fpmod_" 3473 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3474 |.else 3475 | fpins f0, f14, f15 3476 | ins_next1 3477 | stfdx f0, BASE, RA 3478 | ins_next2 3479 |.endif 3480 |.endmacro 3481 | 3482 |.macro ins_arithdn, intins, fpins 3483 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3484 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3485 ||switch (vk) { 3486 ||case 0: 3487 | lwzux TMP1, RB, BASE 3488 | lwzux TMP2, RC, KBASE 3489 | lwz CARG1, 4(RB) 3490 | checknum cr0, TMP1 3491 | lwz CARG2, 4(RC) 3492 || break; 3493 ||case 1: 3494 | lwzux TMP1, RB, BASE 3495 | lwzux TMP2, RC, KBASE 3496 | lwz CARG2, 4(RB) 3497 | checknum cr0, TMP1 3498 | lwz CARG1, 4(RC) 3499 || break; 3500 ||default: 3501 | lwzux TMP1, RB, BASE 3502 | lwzux TMP2, RC, BASE 3503 | lwz CARG1, 4(RB) 3504 | checknum cr0, TMP1 3505 | lwz CARG2, 4(RC) 3506 || break; 3507 ||} 3508 | checknum cr1, TMP2 3509 | bne >5 3510 | bne cr1, >5 3511 | intins CARG1, CARG1, CARG2 3512 | bso >4 3513 |1: 3514 | ins_next1 3515 | stwux TISNUM, RA, BASE 3516 | stw CARG1, 4(RA) 3517 |2: 3518 | ins_next2 3519 |4: // Overflow. 3520 | checkov TMP0, <1 // Ignore unrelated overflow. 3521 | ins_arithfallback b 3522 |5: // FP variant. 3523 ||if (vk == 1) { 3524 | lfd f15, 0(RB) 3525 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3526 | lfd f14, 0(RC) 3527 ||} else { 3528 | lfd f14, 0(RB) 3529 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3530 | lfd f15, 0(RC) 3531 ||} 3532 | ins_arithfallback bge 3533 |.if "fpins" == "fpmod_" 3534 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3535 |.else 3536 | fpins f0, f14, f15 3537 | ins_next1 3538 | stfdx f0, BASE, RA 3539 | b <2 3540 |.endif 3541 |.endmacro 3542 | 3543 |.macro ins_arith, intins, fpins 3544 |.if DUALNUM 3545 | ins_arithdn intins, fpins 3546 |.else 3547 | ins_arithfp fpins 3548 |.endif 3549 |.endmacro 3550 3551 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3552 |.if GPR64 3553 |.macro addo32., y, a, b 3554 | // Need to check overflow for (a<<32) + (b<<32). 3555 | rldicr TMP0, a, 32, 31 3556 | rldicr TMP3, b, 32, 31 3557 | addo. TMP0, TMP0, TMP3 3558 | add y, a, b 3559 |.endmacro 3560 | ins_arith addo32., fadd 3561 |.else 3562 | ins_arith addo., fadd 3563 |.endif 3564 break; 3565 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3566 |.if GPR64 3567 |.macro subo32., y, a, b 3568 | // Need to check overflow for (a<<32) - (b<<32). 3569 | rldicr TMP0, a, 32, 31 3570 | rldicr TMP3, b, 32, 31 3571 | subo. TMP0, TMP0, TMP3 3572 | sub y, a, b 3573 |.endmacro 3574 | ins_arith subo32., fsub 3575 |.else 3576 | ins_arith subo., fsub 3577 |.endif 3578 break; 3579 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3580 | ins_arith mullwo., fmul 3581 break; 3582 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3583 | ins_arithfp fdiv 3584 break; 3585 case BC_MODVN: 3586 | ins_arith intmod, fpmod 3587 break; 3588 case BC_MODNV: case BC_MODVV: 3589 | ins_arith intmod, fpmod_ 3590 break; 3591 case BC_POW: 3592 | // NYI: (partial) integer arithmetic. 3593 | lwzx TMP1, BASE, RB 3594 | lfdx FARG1, BASE, RB 3595 | lwzx TMP2, BASE, RC 3596 | lfdx FARG2, BASE, RC 3597 | checknum cr0, TMP1 3598 | checknum cr1, TMP2 3599 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3600 | bge ->vmeta_arith_vv 3601 | blex pow 3602 | ins_next1 3603 | stfdx FARG1, BASE, RA 3604 | ins_next2 3605 break; 3606 3607 case BC_CAT: 3608 | // RA = dst*8, RB = src_start*8, RC = src_end*8 3609 | sub CARG3, RC, RB 3610 | stp BASE, L->base 3611 | add CARG2, BASE, RC 3612 | mr SAVE0, RB 3613 |->BC_CAT_Z: 3614 | stw PC, SAVE_PC 3615 | mr CARG1, L 3616 | srwi CARG3, CARG3, 3 3617 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3618 | // Returns NULL (finished) or TValue * (metamethod). 3619 | cmplwi CRET1, 0 3620 | lp BASE, L->base 3621 | bne ->vmeta_binop 3622 | ins_next1 3623 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 3624 | stfdx f0, BASE, RA 3625 | ins_next2 3626 break; 3627 3628 /* -- Constant ops ------------------------------------------------------ */ 3629 3630 case BC_KSTR: 3631 | // RA = dst*8, RD = str_const*8 (~) 3632 | srwi TMP1, RD, 1 3633 | subfic TMP1, TMP1, -4 3634 | ins_next1 3635 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 3636 | li TMP2, LJ_TSTR 3637 | stwux TMP2, RA, BASE 3638 | stw TMP0, 4(RA) 3639 | ins_next2 3640 break; 3641 case BC_KCDATA: 3642 |.if FFI 3643 | // RA = dst*8, RD = cdata_const*8 (~) 3644 | srwi TMP1, RD, 1 3645 | subfic TMP1, TMP1, -4 3646 | ins_next1 3647 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 3648 | li TMP2, LJ_TCDATA 3649 | stwux TMP2, RA, BASE 3650 | stw TMP0, 4(RA) 3651 | ins_next2 3652 |.endif 3653 break; 3654 case BC_KSHORT: 3655 | // RA = dst*8, RD = int16_literal*8 3656 |.if DUALNUM 3657 | slwi RD, RD, 13 3658 | srawi RD, RD, 16 3659 | ins_next1 3660 | stwux TISNUM, RA, BASE 3661 | stw RD, 4(RA) 3662 | ins_next2 3663 |.else 3664 | // The soft-float approach is faster. 3665 | slwi RD, RD, 13 3666 | srawi TMP1, RD, 31 3667 | xor TMP2, TMP1, RD 3668 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3669 | cntlzw TMP3, TMP2 3670 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3671 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3672 | subfic TMP3, RD, 0 3673 | slwi TMP1, TMP1, 20 3674 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) 3675 | subfe TMP0, TMP0, TMP0 3676 | add RD, RD, TMP1 // hi = hi + exponent-1 3677 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi 3678 | ins_next1 3679 | stwux RD, RA, BASE 3680 | stw ZERO, 4(RA) 3681 | ins_next2 3682 |.endif 3683 break; 3684 case BC_KNUM: 3685 | // RA = dst*8, RD = num_const*8 3686 | ins_next1 3687 | lfdx f0, KBASE, RD 3688 | stfdx f0, BASE, RA 3689 | ins_next2 3690 break; 3691 case BC_KPRI: 3692 | // RA = dst*8, RD = primitive_type*8 (~) 3693 | srwi TMP1, RD, 3 3694 | not TMP0, TMP1 3695 | ins_next1 3696 | stwx TMP0, BASE, RA 3697 | ins_next2 3698 break; 3699 case BC_KNIL: 3700 | // RA = base*8, RD = end*8 3701 | stwx TISNIL, BASE, RA 3702 | addi RA, RA, 8 3703 |1: 3704 | stwx TISNIL, BASE, RA 3705 | cmpw RA, RD 3706 | addi RA, RA, 8 3707 | blt <1 3708 | ins_next_ 3709 break; 3710 3711 /* -- Upvalue and function ops ------------------------------------------ */ 3712 3713 case BC_UGET: 3714 | // RA = dst*8, RD = uvnum*8 3715 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3716 | srwi RD, RD, 1 3717 | addi RD, RD, offsetof(GCfuncL, uvptr) 3718 | lwzx UPVAL:RB, LFUNC:RB, RD 3719 | ins_next1 3720 | lwz TMP1, UPVAL:RB->v 3721 | lfd f0, 0(TMP1) 3722 | stfdx f0, BASE, RA 3723 | ins_next2 3724 break; 3725 case BC_USETV: 3726 | // RA = uvnum*8, RD = src*8 3727 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3728 | srwi RA, RA, 1 3729 | addi RA, RA, offsetof(GCfuncL, uvptr) 3730 | lfdux f0, RD, BASE 3731 | lwzx UPVAL:RB, LFUNC:RB, RA 3732 | lbz TMP3, UPVAL:RB->marked 3733 | lwz CARG2, UPVAL:RB->v 3734 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3735 | lbz TMP0, UPVAL:RB->closed 3736 | lwz TMP2, 0(RD) 3737 | stfd f0, 0(CARG2) 3738 | cmplwi cr1, TMP0, 0 3739 | lwz TMP1, 4(RD) 3740 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3741 | subi TMP2, TMP2, (LJ_TNUMX+1) 3742 | bne >2 // Upvalue is closed and black? 3743 |1: 3744 | ins_next 3745 | 3746 |2: // Check if new value is collectable. 3747 | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3748 | bge <1 // tvisgcv(v) 3749 | lbz TMP3, GCOBJ:TMP1->gch.marked 3750 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3751 | la CARG1, GG_DISP2G(DISPATCH) 3752 | // Crossed a write barrier. Move the barrier forward. 3753 | beq <1 3754 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3755 | b <1 3756 break; 3757 case BC_USETS: 3758 | // RA = uvnum*8, RD = str_const*8 (~) 3759 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3760 | srwi TMP1, RD, 1 3761 | srwi RA, RA, 1 3762 | subfic TMP1, TMP1, -4 3763 | addi RA, RA, offsetof(GCfuncL, uvptr) 3764 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 3765 | lwzx UPVAL:RB, LFUNC:RB, RA 3766 | lbz TMP3, UPVAL:RB->marked 3767 | lwz CARG2, UPVAL:RB->v 3768 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3769 | lbz TMP3, STR:TMP1->marked 3770 | lbz TMP2, UPVAL:RB->closed 3771 | li TMP0, LJ_TSTR 3772 | stw STR:TMP1, 4(CARG2) 3773 | stw TMP0, 0(CARG2) 3774 | bne >2 3775 |1: 3776 | ins_next 3777 | 3778 |2: // Check if string is white and ensure upvalue is closed. 3779 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) 3780 | cmplwi cr1, TMP2, 0 3781 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3782 | la CARG1, GG_DISP2G(DISPATCH) 3783 | // Crossed a write barrier. Move the barrier forward. 3784 | beq <1 3785 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3786 | b <1 3787 break; 3788 case BC_USETN: 3789 | // RA = uvnum*8, RD = num_const*8 3790 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3791 | srwi RA, RA, 1 3792 | addi RA, RA, offsetof(GCfuncL, uvptr) 3793 | lfdx f0, KBASE, RD 3794 | lwzx UPVAL:RB, LFUNC:RB, RA 3795 | ins_next1 3796 | lwz TMP1, UPVAL:RB->v 3797 | stfd f0, 0(TMP1) 3798 | ins_next2 3799 break; 3800 case BC_USETP: 3801 | // RA = uvnum*8, RD = primitive_type*8 (~) 3802 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3803 | srwi RA, RA, 1 3804 | srwi TMP0, RD, 3 3805 | addi RA, RA, offsetof(GCfuncL, uvptr) 3806 | not TMP0, TMP0 3807 | lwzx UPVAL:RB, LFUNC:RB, RA 3808 | ins_next1 3809 | lwz TMP1, UPVAL:RB->v 3810 | stw TMP0, 0(TMP1) 3811 | ins_next2 3812 break; 3813 3814 case BC_UCLO: 3815 | // RA = level*8, RD = target 3816 | lwz TMP1, L->openupval 3817 | branch_RD // Do this first since RD is not saved. 3818 | stp BASE, L->base 3819 | cmplwi TMP1, 0 3820 | mr CARG1, L 3821 | beq >1 3822 | add CARG2, BASE, RA 3823 | bl extern lj_func_closeuv // (lua_State *L, TValue *level) 3824 | lp BASE, L->base 3825 |1: 3826 | ins_next 3827 break; 3828 3829 case BC_FNEW: 3830 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) 3831 | srwi TMP1, RD, 1 3832 | stp BASE, L->base 3833 | subfic TMP1, TMP1, -4 3834 | stw PC, SAVE_PC 3835 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3836 | mr CARG1, L 3837 | lwz CARG3, FRAME_FUNC(BASE) 3838 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3839 | bl extern lj_func_newL_gc 3840 | // Returns GCfuncL *. 3841 | lp BASE, L->base 3842 | li TMP0, LJ_TFUNC 3843 | stwux TMP0, RA, BASE 3844 | stw LFUNC:CRET1, 4(RA) 3845 | ins_next 3846 break; 3847 3848 /* -- Table ops --------------------------------------------------------- */ 3849 3850 case BC_TNEW: 3851 case BC_TDUP: 3852 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) 3853 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 3854 | mr CARG1, L 3855 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 3856 | stp BASE, L->base 3857 | cmplw TMP0, TMP1 3858 | stw PC, SAVE_PC 3859 | bge >5 3860 |1: 3861 if (op == BC_TNEW) { 3862 | rlwinm CARG2, RD, 29, 21, 31 3863 | rlwinm CARG3, RD, 18, 27, 31 3864 | cmpwi CARG2, 0x7ff; beq >3 3865 |2: 3866 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3867 | // Returns Table *. 3868 } else { 3869 | srwi TMP1, RD, 1 3870 | subfic TMP1, TMP1, -4 3871 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3872 | bl extern lj_tab_dup // (lua_State *L, Table *kt) 3873 | // Returns Table *. 3874 } 3875 | lp BASE, L->base 3876 | li TMP0, LJ_TTAB 3877 | stwux TMP0, RA, BASE 3878 | stw TAB:CRET1, 4(RA) 3879 | ins_next 3880 if (op == BC_TNEW) { 3881 |3: 3882 | li CARG2, 0x801 3883 | b <2 3884 } 3885 |5: 3886 | mr SAVE0, RD 3887 | bl extern lj_gc_step_fixtop // (lua_State *L) 3888 | mr RD, SAVE0 3889 | mr CARG1, L 3890 | b <1 3891 break; 3892 3893 case BC_GGET: 3894 | // RA = dst*8, RD = str_const*8 (~) 3895 case BC_GSET: 3896 | // RA = src*8, RD = str_const*8 (~) 3897 | lwz LFUNC:TMP2, FRAME_FUNC(BASE) 3898 | srwi TMP1, RD, 1 3899 | lwz TAB:RB, LFUNC:TMP2->env 3900 | subfic TMP1, TMP1, -4 3901 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3902 if (op == BC_GGET) { 3903 | b ->BC_TGETS_Z 3904 } else { 3905 | b ->BC_TSETS_Z 3906 } 3907 break; 3908 3909 case BC_TGETV: 3910 | // RA = dst*8, RB = table*8, RC = key*8 3911 | lwzux CARG1, RB, BASE 3912 | lwzux CARG2, RC, BASE 3913 | lwz TAB:RB, 4(RB) 3914 |.if DUALNUM 3915 | lwz RC, 4(RC) 3916 |.else 3917 | lfd f0, 0(RC) 3918 |.endif 3919 | checktab CARG1 3920 | checknum cr1, CARG2 3921 | bne ->vmeta_tgetv 3922 |.if DUALNUM 3923 | lwz TMP0, TAB:RB->asize 3924 | bne cr1, >5 3925 | lwz TMP1, TAB:RB->array 3926 | cmplw TMP0, RC 3927 | slwi TMP2, RC, 3 3928 |.else 3929 | bge cr1, >5 3930 | // Convert number key to integer, check for integerness and range. 3931 | fctiwz f1, f0 3932 | fadd f2, f0, TOBIT 3933 | stfd f1, TMPD 3934 | lwz TMP0, TAB:RB->asize 3935 | fsub f2, f2, TOBIT 3936 | lwz TMP2, TMPD_LO 3937 | lwz TMP1, TAB:RB->array 3938 | fcmpu cr1, f0, f2 3939 | cmplw cr0, TMP0, TMP2 3940 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3941 | slwi TMP2, TMP2, 3 3942 |.endif 3943 | ble ->vmeta_tgetv // Integer key and in array part? 3944 | lwzx TMP0, TMP1, TMP2 3945 | lfdx f14, TMP1, TMP2 3946 | checknil TMP0; beq >2 3947 |1: 3948 | ins_next1 3949 | stfdx f14, BASE, RA 3950 | ins_next2 3951 | 3952 |2: // Check for __index if table value is nil. 3953 | lwz TAB:TMP2, TAB:RB->metatable 3954 | cmplwi TAB:TMP2, 0 3955 | beq <1 // No metatable: done. 3956 | lbz TMP0, TAB:TMP2->nomm 3957 | andix. TMP0, TMP0, 1<<MM_index 3958 | bne <1 // 'no __index' flag set: done. 3959 | b ->vmeta_tgetv 3960 | 3961 |5: 3962 | checkstr CARG2; bne ->vmeta_tgetv 3963 |.if not DUALNUM 3964 | lwz STR:RC, 4(RC) 3965 |.endif 3966 | b ->BC_TGETS_Z // String key? 3967 break; 3968 case BC_TGETS: 3969 | // RA = dst*8, RB = table*8, RC = str_const*8 (~) 3970 | lwzux CARG1, RB, BASE 3971 | srwi TMP1, RC, 1 3972 | lwz TAB:RB, 4(RB) 3973 | subfic TMP1, TMP1, -4 3974 | checktab CARG1 3975 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3976 | bne ->vmeta_tgets1 3977 |->BC_TGETS_Z: 3978 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 3979 | lwz TMP0, TAB:RB->hmask 3980 | lwz TMP1, STR:RC->hash 3981 | lwz NODE:TMP2, TAB:RB->node 3982 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 3983 | slwi TMP0, TMP1, 5 3984 | slwi TMP1, TMP1, 3 3985 | sub TMP1, TMP0, TMP1 3986 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 3987 |1: 3988 | lwz CARG1, NODE:TMP2->key 3989 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 3990 | lwz CARG2, NODE:TMP2->val 3991 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 3992 | checkstr CARG1; bne >4 3993 | cmpw TMP0, STR:RC; bne >4 3994 | checknil CARG2; beq >5 // Key found, but nil value? 3995 |3: 3996 | stwux CARG2, RA, BASE 3997 | stw TMP1, 4(RA) 3998 | ins_next 3999 | 4000 |4: // Follow hash chain. 4001 | lwz NODE:TMP2, NODE:TMP2->next 4002 | cmplwi NODE:TMP2, 0 4003 | bne <1 4004 | // End of hash chain: key not found, nil result. 4005 | li CARG2, LJ_TNIL 4006 | 4007 |5: // Check for __index if table value is nil. 4008 | lwz TAB:TMP2, TAB:RB->metatable 4009 | cmplwi TAB:TMP2, 0 4010 | beq <3 // No metatable: done. 4011 | lbz TMP0, TAB:TMP2->nomm 4012 | andix. TMP0, TMP0, 1<<MM_index 4013 | bne <3 // 'no __index' flag set: done. 4014 | b ->vmeta_tgets 4015 break; 4016 case BC_TGETB: 4017 | // RA = dst*8, RB = table*8, RC = index*8 4018 | lwzux CARG1, RB, BASE 4019 | srwi TMP0, RC, 3 4020 | lwz TAB:RB, 4(RB) 4021 | checktab CARG1; bne ->vmeta_tgetb 4022 | lwz TMP1, TAB:RB->asize 4023 | lwz TMP2, TAB:RB->array 4024 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4025 | lwzx TMP1, TMP2, RC 4026 | lfdx f0, TMP2, RC 4027 | checknil TMP1; beq >5 4028 |1: 4029 | ins_next1 4030 | stfdx f0, BASE, RA 4031 | ins_next2 4032 | 4033 |5: // Check for __index if table value is nil. 4034 | lwz TAB:TMP2, TAB:RB->metatable 4035 | cmplwi TAB:TMP2, 0 4036 | beq <1 // No metatable: done. 4037 | lbz TMP2, TAB:TMP2->nomm 4038 | andix. TMP2, TMP2, 1<<MM_index 4039 | bne <1 // 'no __index' flag set: done. 4040 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4041 break; 4042 4043 case BC_TSETV: 4044 | // RA = src*8, RB = table*8, RC = key*8 4045 | lwzux CARG1, RB, BASE 4046 | lwzux CARG2, RC, BASE 4047 | lwz TAB:RB, 4(RB) 4048 |.if DUALNUM 4049 | lwz RC, 4(RC) 4050 |.else 4051 | lfd f0, 0(RC) 4052 |.endif 4053 | checktab CARG1 4054 | checknum cr1, CARG2 4055 | bne ->vmeta_tsetv 4056 |.if DUALNUM 4057 | lwz TMP0, TAB:RB->asize 4058 | bne cr1, >5 4059 | lwz TMP1, TAB:RB->array 4060 | cmplw TMP0, RC 4061 | slwi TMP0, RC, 3 4062 |.else 4063 | bge cr1, >5 4064 | // Convert number key to integer, check for integerness and range. 4065 | fctiwz f1, f0 4066 | fadd f2, f0, TOBIT 4067 | stfd f1, TMPD 4068 | lwz TMP0, TAB:RB->asize 4069 | fsub f2, f2, TOBIT 4070 | lwz TMP2, TMPD_LO 4071 | lwz TMP1, TAB:RB->array 4072 | fcmpu cr1, f0, f2 4073 | cmplw cr0, TMP0, TMP2 4074 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 4075 | slwi TMP0, TMP2, 3 4076 |.endif 4077 | ble ->vmeta_tsetv // Integer key and in array part? 4078 | lwzx TMP2, TMP1, TMP0 4079 | lbz TMP3, TAB:RB->marked 4080 | lfdx f14, BASE, RA 4081 | checknil TMP2; beq >3 4082 |1: 4083 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4084 | stfdx f14, TMP1, TMP0 4085 | bne >7 4086 |2: 4087 | ins_next 4088 | 4089 |3: // Check for __newindex if previous value is nil. 4090 | lwz TAB:TMP2, TAB:RB->metatable 4091 | cmplwi TAB:TMP2, 0 4092 | beq <1 // No metatable: done. 4093 | lbz TMP2, TAB:TMP2->nomm 4094 | andix. TMP2, TMP2, 1<<MM_newindex 4095 | bne <1 // 'no __newindex' flag set: done. 4096 | b ->vmeta_tsetv 4097 | 4098 |5: 4099 | checkstr CARG2; bne ->vmeta_tsetv 4100 |.if not DUALNUM 4101 | lwz STR:RC, 4(RC) 4102 |.endif 4103 | b ->BC_TSETS_Z // String key? 4104 | 4105 |7: // Possible table write barrier for the value. Skip valiswhite check. 4106 | barrierback TAB:RB, TMP3, TMP0 4107 | b <2 4108 break; 4109 case BC_TSETS: 4110 | // RA = src*8, RB = table*8, RC = str_const*8 (~) 4111 | lwzux CARG1, RB, BASE 4112 | srwi TMP1, RC, 1 4113 | lwz TAB:RB, 4(RB) 4114 | subfic TMP1, TMP1, -4 4115 | checktab CARG1 4116 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 4117 | bne ->vmeta_tsets1 4118 |->BC_TSETS_Z: 4119 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4120 | lwz TMP0, TAB:RB->hmask 4121 | lwz TMP1, STR:RC->hash 4122 | lwz NODE:TMP2, TAB:RB->node 4123 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4124 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4125 | lfdx f14, BASE, RA 4126 | slwi TMP0, TMP1, 5 4127 | slwi TMP1, TMP1, 3 4128 | sub TMP1, TMP0, TMP1 4129 | lbz TMP3, TAB:RB->marked 4130 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4131 |1: 4132 | lwz CARG1, NODE:TMP2->key 4133 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4134 | lwz CARG2, NODE:TMP2->val 4135 | lwz NODE:TMP1, NODE:TMP2->next 4136 | checkstr CARG1; bne >5 4137 | cmpw TMP0, STR:RC; bne >5 4138 | checknil CARG2; beq >4 // Key found, but nil value? 4139 |2: 4140 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4141 | stfd f14, NODE:TMP2->val 4142 | bne >7 4143 |3: 4144 | ins_next 4145 | 4146 |4: // Check for __newindex if previous value is nil. 4147 | lwz TAB:TMP1, TAB:RB->metatable 4148 | cmplwi TAB:TMP1, 0 4149 | beq <2 // No metatable: done. 4150 | lbz TMP0, TAB:TMP1->nomm 4151 | andix. TMP0, TMP0, 1<<MM_newindex 4152 | bne <2 // 'no __newindex' flag set: done. 4153 | b ->vmeta_tsets 4154 | 4155 |5: // Follow hash chain. 4156 | cmplwi NODE:TMP1, 0 4157 | mr NODE:TMP2, NODE:TMP1 4158 | bne <1 4159 | // End of hash chain: key not found, add a new one. 4160 | 4161 | // But check for __newindex first. 4162 | lwz TAB:TMP1, TAB:RB->metatable 4163 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 4164 | stw PC, SAVE_PC 4165 | mr CARG1, L 4166 | cmplwi TAB:TMP1, 0 4167 | stp BASE, L->base 4168 | beq >6 // No metatable: continue. 4169 | lbz TMP0, TAB:TMP1->nomm 4170 | andix. TMP0, TMP0, 1<<MM_newindex 4171 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4172 |6: 4173 | li TMP0, LJ_TSTR 4174 | stw STR:RC, 4(CARG3) 4175 | mr CARG2, TAB:RB 4176 | stw TMP0, 0(CARG3) 4177 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4178 | // Returns TValue *. 4179 | lp BASE, L->base 4180 | stfd f14, 0(CRET1) 4181 | b <3 // No 2nd write barrier needed. 4182 | 4183 |7: // Possible table write barrier for the value. Skip valiswhite check. 4184 | barrierback TAB:RB, TMP3, TMP0 4185 | b <3 4186 break; 4187 case BC_TSETB: 4188 | // RA = src*8, RB = table*8, RC = index*8 4189 | lwzux CARG1, RB, BASE 4190 | srwi TMP0, RC, 3 4191 | lwz TAB:RB, 4(RB) 4192 | checktab CARG1; bne ->vmeta_tsetb 4193 | lwz TMP1, TAB:RB->asize 4194 | lwz TMP2, TAB:RB->array 4195 | lbz TMP3, TAB:RB->marked 4196 | cmplw TMP0, TMP1 4197 | lfdx f14, BASE, RA 4198 | bge ->vmeta_tsetb 4199 | lwzx TMP1, TMP2, RC 4200 | checknil TMP1; beq >5 4201 |1: 4202 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4203 | stfdx f14, TMP2, RC 4204 | bne >7 4205 |2: 4206 | ins_next 4207 | 4208 |5: // Check for __newindex if previous value is nil. 4209 | lwz TAB:TMP1, TAB:RB->metatable 4210 | cmplwi TAB:TMP1, 0 4211 | beq <1 // No metatable: done. 4212 | lbz TMP1, TAB:TMP1->nomm 4213 | andix. TMP1, TMP1, 1<<MM_newindex 4214 | bne <1 // 'no __newindex' flag set: done. 4215 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4216 | 4217 |7: // Possible table write barrier for the value. Skip valiswhite check. 4218 | barrierback TAB:RB, TMP3, TMP0 4219 | b <2 4220 break; 4221 4222 case BC_TSETM: 4223 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4224 | add RA, BASE, RA 4225 |1: 4226 | add TMP3, KBASE, RD 4227 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. 4228 | addic. TMP0, MULTRES, -8 4229 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. 4230 | srwi CARG3, TMP0, 3 4231 | beq >4 // Nothing to copy? 4232 | add CARG3, CARG3, TMP3 4233 | lwz TMP2, TAB:CARG2->asize 4234 | slwi TMP1, TMP3, 3 4235 | lbz TMP3, TAB:CARG2->marked 4236 | cmplw CARG3, TMP2 4237 | add TMP2, RA, TMP0 4238 | lwz TMP0, TAB:CARG2->array 4239 | bgt >5 4240 | add TMP1, TMP1, TMP0 4241 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4242 |3: // Copy result slots to table. 4243 | lfd f0, 0(RA) 4244 | addi RA, RA, 8 4245 | cmpw cr1, RA, TMP2 4246 | stfd f0, 0(TMP1) 4247 | addi TMP1, TMP1, 8 4248 | blt cr1, <3 4249 | bne >7 4250 |4: 4251 | ins_next 4252 | 4253 |5: // Need to resize array part. 4254 | stp BASE, L->base 4255 | mr CARG1, L 4256 | stw PC, SAVE_PC 4257 | mr SAVE0, RD 4258 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4259 | // Must not reallocate the stack. 4260 | mr RD, SAVE0 4261 | b <1 4262 | 4263 |7: // Possible table write barrier for any value. Skip valiswhite check. 4264 | barrierback TAB:CARG2, TMP3, TMP0 4265 | b <4 4266 break; 4267 4268 /* -- Calls and vararg handling ----------------------------------------- */ 4269 4270 case BC_CALLM: 4271 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 4272 | add NARGS8:RC, NARGS8:RC, MULTRES 4273 | // Fall through. Assumes BC_CALL follows. 4274 break; 4275 case BC_CALL: 4276 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 4277 | mr TMP2, BASE 4278 | lwzux TMP0, BASE, RA 4279 | lwz LFUNC:RB, 4(BASE) 4280 | subi NARGS8:RC, NARGS8:RC, 8 4281 | addi BASE, BASE, 8 4282 | checkfunc TMP0; bne ->vmeta_call 4283 | ins_call 4284 break; 4285 4286 case BC_CALLMT: 4287 | // RA = base*8, (RB = 0,) RC = extra_nargs*8 4288 | add NARGS8:RC, NARGS8:RC, MULTRES 4289 | // Fall through. Assumes BC_CALLT follows. 4290 break; 4291 case BC_CALLT: 4292 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 4293 | lwzux TMP0, RA, BASE 4294 | lwz LFUNC:RB, 4(RA) 4295 | subi NARGS8:RC, NARGS8:RC, 8 4296 | lwz TMP1, FRAME_PC(BASE) 4297 | checkfunc TMP0 4298 | addi RA, RA, 8 4299 | bne ->vmeta_callt 4300 |->BC_CALLT_Z: 4301 | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. 4302 | lbz TMP3, LFUNC:RB->ffid 4303 | xori TMP2, TMP1, FRAME_VARG 4304 | cmplwi cr1, NARGS8:RC, 0 4305 | bne >7 4306 |1: 4307 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. 4308 | li TMP2, 0 4309 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? 4310 | beq cr1, >3 4311 |2: 4312 | addi TMP3, TMP2, 8 4313 | lfdx f0, RA, TMP2 4314 | cmplw cr1, TMP3, NARGS8:RC 4315 | stfdx f0, BASE, TMP2 4316 | mr TMP2, TMP3 4317 | bne cr1, <2 4318 |3: 4319 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt 4320 | beq >5 4321 |4: 4322 | ins_callt 4323 | 4324 |5: // Tailcall to a fast function with a Lua frame below. 4325 | lwz INS, -4(TMP1) 4326 | decode_RA8 RA, INS 4327 | sub TMP1, BASE, RA 4328 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) 4329 | lwz TMP1, LFUNC:TMP1->pc 4330 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. 4331 | b <4 4332 | 4333 |7: // Tailcall from a vararg function. 4334 | andix. TMP0, TMP2, FRAME_TYPEP 4335 | bne <1 // Vararg frame below? 4336 | sub BASE, BASE, TMP2 // Relocate BASE down. 4337 | lwz TMP1, FRAME_PC(BASE) 4338 | andix. TMP0, TMP1, FRAME_TYPE 4339 | b <1 4340 break; 4341 4342 case BC_ITERC: 4343 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) 4344 | mr TMP2, BASE 4345 | add BASE, BASE, RA 4346 | lwz TMP1, -24(BASE) 4347 | lwz LFUNC:RB, -20(BASE) 4348 | lfd f1, -8(BASE) 4349 | lfd f0, -16(BASE) 4350 | stw TMP1, 0(BASE) // Copy callable. 4351 | stw LFUNC:RB, 4(BASE) 4352 | checkfunc TMP1 4353 | stfd f1, 16(BASE) // Copy control var. 4354 | li NARGS8:RC, 16 // Iterators get 2 arguments. 4355 | stfdu f0, 8(BASE) // Copy state. 4356 | bne ->vmeta_call 4357 | ins_call 4358 break; 4359 4360 case BC_ITERN: 4361 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4362 |.if JIT 4363 | // NYI: add hotloop, record BC_ITERN. 4364 |.endif 4365 | add RA, BASE, RA 4366 | lwz TAB:RB, -12(RA) 4367 | lwz RC, -4(RA) // Get index from control var. 4368 | lwz TMP0, TAB:RB->asize 4369 | lwz TMP1, TAB:RB->array 4370 | addi PC, PC, 4 4371 |1: // Traverse array part. 4372 | cmplw RC, TMP0 4373 | slwi TMP3, RC, 3 4374 | bge >5 // Index points after array part? 4375 | lwzx TMP2, TMP1, TMP3 4376 | lfdx f0, TMP1, TMP3 4377 | checknil TMP2 4378 | lwz INS, -4(PC) 4379 | beq >4 4380 |.if DUALNUM 4381 | stw RC, 4(RA) 4382 | stw TISNUM, 0(RA) 4383 |.else 4384 | tonum_u f1, RC 4385 |.endif 4386 | addi RC, RC, 1 4387 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 4388 | stfd f0, 8(RA) 4389 | decode_RD4 TMP1, INS 4390 | stw RC, -4(RA) // Update control var. 4391 | add PC, TMP1, TMP3 4392 |.if not DUALNUM 4393 | stfd f1, 0(RA) 4394 |.endif 4395 |3: 4396 | ins_next 4397 | 4398 |4: // Skip holes in array part. 4399 | addi RC, RC, 1 4400 | b <1 4401 | 4402 |5: // Traverse hash part. 4403 | lwz TMP1, TAB:RB->hmask 4404 | sub RC, RC, TMP0 4405 | lwz TMP2, TAB:RB->node 4406 |6: 4407 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. 4408 | slwi TMP3, RC, 5 4409 | bgty <3 4410 | slwi RB, RC, 3 4411 | sub TMP3, TMP3, RB 4412 | lwzx RB, TMP2, TMP3 4413 | lfdx f0, TMP2, TMP3 4414 | add NODE:TMP3, TMP2, TMP3 4415 | checknil RB 4416 | lwz INS, -4(PC) 4417 | beq >7 4418 | lfd f1, NODE:TMP3->key 4419 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 4420 | stfd f0, 8(RA) 4421 | add RC, RC, TMP0 4422 | decode_RD4 TMP1, INS 4423 | stfd f1, 0(RA) 4424 | addi RC, RC, 1 4425 | add PC, TMP1, TMP2 4426 | stw RC, -4(RA) // Update control var. 4427 | b <3 4428 | 4429 |7: // Skip holes in hash part. 4430 | addi RC, RC, 1 4431 | b <6 4432 break; 4433 4434 case BC_ISNEXT: 4435 | // RA = base*8, RD = target (points to ITERN) 4436 | add RA, BASE, RA 4437 | lwz TMP0, -24(RA) 4438 | lwz CFUNC:TMP1, -20(RA) 4439 | lwz TMP2, -16(RA) 4440 | lwz TMP3, -8(RA) 4441 | cmpwi cr0, TMP2, LJ_TTAB 4442 | cmpwi cr1, TMP0, LJ_TFUNC 4443 | cmpwi cr6, TMP3, LJ_TNIL 4444 | bne cr1, >5 4445 | lbz TMP1, CFUNC:TMP1->ffid 4446 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq 4447 | cmpwi cr7, TMP1, FF_next_N 4448 | srwi TMP0, RD, 1 4449 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4450 | add TMP3, PC, TMP0 4451 | bne cr0, >5 4452 | lus TMP1, 0xfffe 4453 | ori TMP1, TMP1, 0x7fff 4454 | stw ZERO, -4(RA) // Initialize control var. 4455 | stw TMP1, -8(RA) 4456 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4457 |1: 4458 | ins_next 4459 |5: // Despecialize bytecode if any of the checks fail. 4460 | li TMP0, BC_JMP 4461 | li TMP1, BC_ITERC 4462 | stb TMP0, -1(PC) 4463 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4464 | stb TMP1, 3(PC) 4465 | b <1 4466 break; 4467 4468 case BC_VARG: 4469 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 4470 | lwz TMP0, FRAME_PC(BASE) 4471 | add RC, BASE, RC 4472 | add RA, BASE, RA 4473 | addi RC, RC, FRAME_VARG 4474 | add TMP2, RA, RB 4475 | subi TMP3, BASE, 8 // TMP3 = vtop 4476 | sub RC, RC, TMP0 // RC = vbase 4477 | // Note: RC may now be even _above_ BASE if nargs was < numparams. 4478 | cmplwi cr1, RB, 0 4479 |.if PPE 4480 | sub TMP1, TMP3, RC 4481 | cmpwi TMP1, 0 4482 |.else 4483 | sub. TMP1, TMP3, RC 4484 |.endif 4485 | beq cr1, >5 // Copy all varargs? 4486 | subi TMP2, TMP2, 16 4487 | ble >2 // No vararg slots? 4488 |1: // Copy vararg slots to destination slots. 4489 | lfd f0, 0(RC) 4490 | addi RC, RC, 8 4491 | stfd f0, 0(RA) 4492 | cmplw RA, TMP2 4493 | cmplw cr1, RC, TMP3 4494 | bge >3 // All destination slots filled? 4495 | addi RA, RA, 8 4496 | blt cr1, <1 // More vararg slots? 4497 |2: // Fill up remainder with nil. 4498 | stw TISNIL, 0(RA) 4499 | cmplw RA, TMP2 4500 | addi RA, RA, 8 4501 | blt <2 4502 |3: 4503 | ins_next 4504 | 4505 |5: // Copy all varargs. 4506 | lwz TMP0, L->maxstack 4507 | li MULTRES, 8 // MULTRES = (0+1)*8 4508 | bley <3 // No vararg slots? 4509 | add TMP2, RA, TMP1 4510 | cmplw TMP2, TMP0 4511 | addi MULTRES, TMP1, 8 4512 | bgt >7 4513 |6: 4514 | lfd f0, 0(RC) 4515 | addi RC, RC, 8 4516 | stfd f0, 0(RA) 4517 | cmplw RC, TMP3 4518 | addi RA, RA, 8 4519 | blt <6 // More vararg slots? 4520 | b <3 4521 | 4522 |7: // Grow stack for varargs. 4523 | mr CARG1, L 4524 | stp RA, L->top 4525 | sub SAVE0, RC, BASE // Need delta, because BASE may change. 4526 | stp BASE, L->base 4527 | sub RA, RA, BASE 4528 | stw PC, SAVE_PC 4529 | srwi CARG2, TMP1, 3 4530 | bl extern lj_state_growstack // (lua_State *L, int n) 4531 | lp BASE, L->base 4532 | add RA, BASE, RA 4533 | add RC, BASE, SAVE0 4534 | subi TMP3, BASE, 8 4535 | b <6 4536 break; 4537 4538 /* -- Returns ----------------------------------------------------------- */ 4539 4540 case BC_RETM: 4541 | // RA = results*8, RD = extra_nresults*8 4542 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. 4543 | // Fall through. Assumes BC_RET follows. 4544 break; 4545 4546 case BC_RET: 4547 | // RA = results*8, RD = (nresults+1)*8 4548 | lwz PC, FRAME_PC(BASE) 4549 | add RA, BASE, RA 4550 | mr MULTRES, RD 4551 |1: 4552 | andix. TMP0, PC, FRAME_TYPE 4553 | xori TMP1, PC, FRAME_VARG 4554 | bne ->BC_RETV_Z 4555 | 4556 |->BC_RET_Z: 4557 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return 4558 | lwz INS, -4(PC) 4559 | cmpwi RD, 8 4560 | subi TMP2, BASE, 8 4561 | subi RC, RD, 8 4562 | decode_RB8 RB, INS 4563 | beq >3 4564 | li TMP1, 0 4565 |2: 4566 | addi TMP3, TMP1, 8 4567 | lfdx f0, RA, TMP1 4568 | cmpw TMP3, RC 4569 | stfdx f0, TMP2, TMP1 4570 | beq >3 4571 | addi TMP1, TMP3, 8 4572 | lfdx f1, RA, TMP3 4573 | cmpw TMP1, RC 4574 | stfdx f1, TMP2, TMP3 4575 | bne <2 4576 |3: 4577 |5: 4578 | cmplw RB, RD 4579 | decode_RA8 RA, INS 4580 | bgt >6 4581 | sub BASE, TMP2, RA 4582 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4583 | ins_next1 4584 | lwz TMP1, LFUNC:TMP1->pc 4585 | lwz KBASE, PC2PROTO(k)(TMP1) 4586 | ins_next2 4587 | 4588 |6: // Fill up results with nil. 4589 | subi TMP1, RD, 8 4590 | addi RD, RD, 8 4591 | stwx TISNIL, TMP2, TMP1 4592 | b <5 4593 | 4594 |->BC_RETV_Z: // Non-standard return case. 4595 | andix. TMP2, TMP1, FRAME_TYPEP 4596 | bne ->vm_return 4597 | // Return from vararg function: relocate BASE down. 4598 | sub BASE, BASE, TMP1 4599 | lwz PC, FRAME_PC(BASE) 4600 | b <1 4601 break; 4602 4603 case BC_RET0: case BC_RET1: 4604 | // RA = results*8, RD = (nresults+1)*8 4605 | lwz PC, FRAME_PC(BASE) 4606 | add RA, BASE, RA 4607 | mr MULTRES, RD 4608 | andix. TMP0, PC, FRAME_TYPE 4609 | xori TMP1, PC, FRAME_VARG 4610 | bney ->BC_RETV_Z 4611 | 4612 | lwz INS, -4(PC) 4613 | subi TMP2, BASE, 8 4614 | decode_RB8 RB, INS 4615 if (op == BC_RET1) { 4616 | lfd f0, 0(RA) 4617 | stfd f0, 0(TMP2) 4618 } 4619 |5: 4620 | cmplw RB, RD 4621 | decode_RA8 RA, INS 4622 | bgt >6 4623 | sub BASE, TMP2, RA 4624 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4625 | ins_next1 4626 | lwz TMP1, LFUNC:TMP1->pc 4627 | lwz KBASE, PC2PROTO(k)(TMP1) 4628 | ins_next2 4629 | 4630 |6: // Fill up results with nil. 4631 | subi TMP1, RD, 8 4632 | addi RD, RD, 8 4633 | stwx TISNIL, TMP2, TMP1 4634 | b <5 4635 break; 4636 4637 /* -- Loops and branches ------------------------------------------------ */ 4638 4639 case BC_FORL: 4640 |.if JIT 4641 | hotloop 4642 |.endif 4643 | // Fall through. Assumes BC_IFORL follows. 4644 break; 4645 4646 case BC_JFORI: 4647 case BC_JFORL: 4648#if !LJ_HASJIT 4649 break; 4650#endif 4651 case BC_FORI: 4652 case BC_IFORL: 4653 | // RA = base*8, RD = target (after end of loop or start of loop) 4654 vk = (op == BC_IFORL || op == BC_JFORL); 4655 |.if DUALNUM 4656 | // Integer loop. 4657 | lwzux TMP1, RA, BASE 4658 | lwz CARG1, FORL_IDX*8+4(RA) 4659 | cmplw cr0, TMP1, TISNUM 4660 if (vk) { 4661 | lwz CARG3, FORL_STEP*8+4(RA) 4662 | bne >9 4663 |.if GPR64 4664 | // Need to check overflow for (a<<32) + (b<<32). 4665 | rldicr TMP0, CARG1, 32, 31 4666 | rldicr TMP2, CARG3, 32, 31 4667 | add CARG1, CARG1, CARG3 4668 | addo. TMP0, TMP0, TMP2 4669 |.else 4670 | addo. CARG1, CARG1, CARG3 4671 |.endif 4672 | cmpwi cr6, CARG3, 0 4673 | lwz CARG2, FORL_STOP*8+4(RA) 4674 | bso >6 4675 |4: 4676 | stw CARG1, FORL_IDX*8+4(RA) 4677 } else { 4678 | lwz TMP3, FORL_STEP*8(RA) 4679 | lwz CARG3, FORL_STEP*8+4(RA) 4680 | lwz TMP2, FORL_STOP*8(RA) 4681 | lwz CARG2, FORL_STOP*8+4(RA) 4682 | cmplw cr7, TMP3, TISNUM 4683 | cmplw cr1, TMP2, TISNUM 4684 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4685 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4686 | cmpwi cr6, CARG3, 0 4687 | bne >9 4688 } 4689 | blt cr6, >5 4690 | cmpw CARG1, CARG2 4691 |1: 4692 | stw TISNUM, FORL_EXT*8(RA) 4693 if (op != BC_JFORL) { 4694 | srwi RD, RD, 1 4695 } 4696 | stw CARG1, FORL_EXT*8+4(RA) 4697 if (op != BC_JFORL) { 4698 | add RD, PC, RD 4699 } 4700 if (op == BC_FORI) { 4701 | bgt >3 // See FP loop below. 4702 } else if (op == BC_JFORI) { 4703 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4704 | bley >7 4705 } else if (op == BC_IFORL) { 4706 | bgt >2 4707 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4708 } else { 4709 | bley =>BC_JLOOP 4710 } 4711 |2: 4712 | ins_next 4713 |5: // Invert check for negative step. 4714 | cmpw CARG2, CARG1 4715 | b <1 4716 if (vk) { 4717 |6: // Potential overflow. 4718 | checkov TMP0, <4 // Ignore unrelated overflow. 4719 | b <2 4720 } 4721 |.endif 4722 if (vk) { 4723 |.if DUALNUM 4724 |9: // FP loop. 4725 | lfd f1, FORL_IDX*8(RA) 4726 |.else 4727 | lfdux f1, RA, BASE 4728 |.endif 4729 | lfd f3, FORL_STEP*8(RA) 4730 | lfd f2, FORL_STOP*8(RA) 4731 | lwz TMP3, FORL_STEP*8(RA) 4732 | fadd f1, f1, f3 4733 | stfd f1, FORL_IDX*8(RA) 4734 } else { 4735 |.if DUALNUM 4736 |9: // FP loop. 4737 |.else 4738 | lwzux TMP1, RA, BASE 4739 | lwz TMP3, FORL_STEP*8(RA) 4740 | lwz TMP2, FORL_STOP*8(RA) 4741 | cmplw cr0, TMP1, TISNUM 4742 | cmplw cr7, TMP3, TISNUM 4743 | cmplw cr1, TMP2, TISNUM 4744 |.endif 4745 | lfd f1, FORL_IDX*8(RA) 4746 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 4747 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4748 | lfd f2, FORL_STOP*8(RA) 4749 | bge ->vmeta_for 4750 } 4751 | cmpwi cr6, TMP3, 0 4752 if (op != BC_JFORL) { 4753 | srwi RD, RD, 1 4754 } 4755 | stfd f1, FORL_EXT*8(RA) 4756 if (op != BC_JFORL) { 4757 | add RD, PC, RD 4758 } 4759 | fcmpu cr0, f1, f2 4760 if (op == BC_JFORI) { 4761 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4762 } 4763 | blt cr6, >5 4764 if (op == BC_FORI) { 4765 | bgt >3 4766 } else if (op == BC_IFORL) { 4767 |.if DUALNUM 4768 | bgty <2 4769 |.else 4770 | bgt >2 4771 |.endif 4772 |1: 4773 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4774 } else if (op == BC_JFORI) { 4775 | bley >7 4776 } else { 4777 | bley =>BC_JLOOP 4778 } 4779 |.if DUALNUM 4780 | b <2 4781 |.else 4782 |2: 4783 | ins_next 4784 |.endif 4785 |5: // Negative step. 4786 if (op == BC_FORI) { 4787 | bge <2 4788 |3: // Used by integer loop, too. 4789 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4790 } else if (op == BC_IFORL) { 4791 | bgey <1 4792 } else if (op == BC_JFORI) { 4793 | bgey >7 4794 } else { 4795 | bgey =>BC_JLOOP 4796 } 4797 | b <2 4798 if (op == BC_JFORI) { 4799 |7: 4800 | lwz INS, -4(PC) 4801 | decode_RD8 RD, INS 4802 | b =>BC_JLOOP 4803 } 4804 break; 4805 4806 case BC_ITERL: 4807 |.if JIT 4808 | hotloop 4809 |.endif 4810 | // Fall through. Assumes BC_IITERL follows. 4811 break; 4812 4813 case BC_JITERL: 4814#if !LJ_HASJIT 4815 break; 4816#endif 4817 case BC_IITERL: 4818 | // RA = base*8, RD = target 4819 | lwzux TMP1, RA, BASE 4820 | lwz TMP2, 4(RA) 4821 | checknil TMP1; beq >1 // Stop if iterator returned nil. 4822 if (op == BC_JITERL) { 4823 | stw TMP1, -8(RA) 4824 | stw TMP2, -4(RA) 4825 | b =>BC_JLOOP 4826 } else { 4827 | branch_RD // Otherwise save control var + branch. 4828 | stw TMP1, -8(RA) 4829 | stw TMP2, -4(RA) 4830 } 4831 |1: 4832 | ins_next 4833 break; 4834 4835 case BC_LOOP: 4836 | // RA = base*8, RD = target (loop extent) 4837 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4838 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4839 |.if JIT 4840 | hotloop 4841 |.endif 4842 | // Fall through. Assumes BC_ILOOP follows. 4843 break; 4844 4845 case BC_ILOOP: 4846 | // RA = base*8, RD = target (loop extent) 4847 | ins_next 4848 break; 4849 4850 case BC_JLOOP: 4851 |.if JIT 4852 | // RA = base*8 (ignored), RD = traceno*8 4853 | lwz TMP1, DISPATCH_J(trace)(DISPATCH) 4854 | srwi RD, RD, 1 4855 | // Traces on PPC don't store the trace number, so use 0. 4856 | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) 4857 | lwzx TRACE:TMP2, TMP1, RD 4858 | clrso TMP1 4859 | lp TMP2, TRACE:TMP2->mcode 4860 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4861 | mtctr TMP2 4862 | stw L, DISPATCH_GL(jit_L)(DISPATCH) 4863 | addi JGL, DISPATCH, GG_DISP2G+32768 4864 | bctr 4865 |.endif 4866 break; 4867 4868 case BC_JMP: 4869 | // RA = base*8 (only used by trace recorder), RD = target 4870 | branch_RD 4871 | ins_next 4872 break; 4873 4874 /* -- Function headers -------------------------------------------------- */ 4875 4876 case BC_FUNCF: 4877 |.if JIT 4878 | hotcall 4879 |.endif 4880 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4881 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. 4882 break; 4883 4884 case BC_JFUNCF: 4885#if !LJ_HASJIT 4886 break; 4887#endif 4888 case BC_IFUNCF: 4889 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4890 | lwz TMP2, L->maxstack 4891 | lbz TMP1, -4+PC2PROTO(numparams)(PC) 4892 | lwz KBASE, -4+PC2PROTO(k)(PC) 4893 | cmplw RA, TMP2 4894 | slwi TMP1, TMP1, 3 4895 | bgt ->vm_growstack_l 4896 if (op != BC_JFUNCF) { 4897 | ins_next1 4898 } 4899 |2: 4900 | cmplw NARGS8:RC, TMP1 // Check for missing parameters. 4901 | blt >3 4902 if (op == BC_JFUNCF) { 4903 | decode_RD8 RD, INS 4904 | b =>BC_JLOOP 4905 } else { 4906 | ins_next2 4907 } 4908 | 4909 |3: // Clear missing parameters. 4910 | stwx TISNIL, BASE, NARGS8:RC 4911 | addi NARGS8:RC, NARGS8:RC, 8 4912 | b <2 4913 break; 4914 4915 case BC_JFUNCV: 4916#if !LJ_HASJIT 4917 break; 4918#endif 4919 | NYI // NYI: compiled vararg functions 4920 break; /* NYI: compiled vararg functions. */ 4921 4922 case BC_IFUNCV: 4923 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4924 | lwz TMP2, L->maxstack 4925 | add TMP1, BASE, RC 4926 | add TMP0, RA, RC 4927 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. 4928 | addi TMP3, RC, 8+FRAME_VARG 4929 | lwz KBASE, -4+PC2PROTO(k)(PC) 4930 | cmplw TMP0, TMP2 4931 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. 4932 | bge ->vm_growstack_l 4933 | lbz TMP2, -4+PC2PROTO(numparams)(PC) 4934 | mr RA, BASE 4935 | mr RC, TMP1 4936 | ins_next1 4937 | cmpwi TMP2, 0 4938 | addi BASE, TMP1, 8 4939 | beq >3 4940 |1: 4941 | cmplw RA, RC // Less args than parameters? 4942 | lwz TMP0, 0(RA) 4943 | lwz TMP3, 4(RA) 4944 | bge >4 4945 | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). 4946 | addi RA, RA, 8 4947 |2: 4948 | addic. TMP2, TMP2, -1 4949 | stw TMP0, 8(TMP1) 4950 | stw TMP3, 12(TMP1) 4951 | addi TMP1, TMP1, 8 4952 | bne <1 4953 |3: 4954 | ins_next2 4955 | 4956 |4: // Clear missing parameters. 4957 | li TMP0, LJ_TNIL 4958 | b <2 4959 break; 4960 4961 case BC_FUNCC: 4962 case BC_FUNCCW: 4963 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 4964 if (op == BC_FUNCC) { 4965 | lp RD, CFUNC:RB->f 4966 } else { 4967 | lp RD, DISPATCH_GL(wrapf)(DISPATCH) 4968 } 4969 | add TMP1, RA, NARGS8:RC 4970 | lwz TMP2, L->maxstack 4971 | .toc lp TMP3, 0(RD) 4972 | add RC, BASE, NARGS8:RC 4973 | stp BASE, L->base 4974 | cmplw TMP1, TMP2 4975 | stp RC, L->top 4976 | li_vmstate C 4977 |.if TOC 4978 | mtctr TMP3 4979 |.else 4980 | mtctr RD 4981 |.endif 4982 if (op == BC_FUNCCW) { 4983 | lp CARG2, CFUNC:RB->f 4984 } 4985 | mr CARG1, L 4986 | bgt ->vm_growstack_c // Need to grow stack. 4987 | .toc lp TOCREG, TOC_OFS(RD) 4988 | .tocenv lp ENVREG, ENV_OFS(RD) 4989 | st_vmstate 4990 | bctrl // (lua_State *L [, lua_CFunction f]) 4991 | // Returns nresults. 4992 | lp BASE, L->base 4993 | .toc ld TOCREG, SAVE_TOC 4994 | slwi RD, CRET1, 3 4995 | lp TMP1, L->top 4996 | li_vmstate INTERP 4997 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 4998 | sub RA, TMP1, RD // RA = L->top - nresults*8 4999 | st_vmstate 5000 | b ->vm_returnc 5001 break; 5002 5003 /* ---------------------------------------------------------------------- */ 5004 5005 default: 5006 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 5007 exit(2); 5008 break; 5009 } 5010} 5011 5012static int build_backend(BuildCtx *ctx) 5013{ 5014 int op; 5015 5016 dasm_growpc(Dst, BC__MAX); 5017 5018 build_subroutines(ctx); 5019 5020 |.code_op 5021 for (op = 0; op < BC__MAX; op++) 5022 build_ins(ctx, (BCOp)op, op); 5023 5024 return BC__MAX; 5025} 5026 5027/* Emit pseudo frame-info for all assembler functions. */ 5028static void emit_asm_debug(BuildCtx *ctx) 5029{ 5030 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5031 int i; 5032 switch (ctx->mode) { 5033 case BUILD_elfasm: 5034 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5035 fprintf(ctx->fp, 5036 ".Lframe0:\n" 5037 "\t.long .LECIE0-.LSCIE0\n" 5038 ".LSCIE0:\n" 5039 "\t.long 0xffffffff\n" 5040 "\t.byte 0x1\n" 5041 "\t.string \"\"\n" 5042 "\t.uleb128 0x1\n" 5043 "\t.sleb128 -4\n" 5044 "\t.byte 65\n" 5045 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5046 "\t.align 2\n" 5047 ".LECIE0:\n\n"); 5048 fprintf(ctx->fp, 5049 ".LSFDE0:\n" 5050 "\t.long .LEFDE0-.LASFDE0\n" 5051 ".LASFDE0:\n" 5052 "\t.long .Lframe0\n" 5053 "\t.long .Lbegin\n" 5054 "\t.long %d\n" 5055 "\t.byte 0xe\n\t.uleb128 %d\n" 5056 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5057 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5058 fcofs, CFRAME_SIZE); 5059 for (i = 14; i <= 31; i++) 5060 fprintf(ctx->fp, 5061 "\t.byte %d\n\t.uleb128 %d\n" 5062 "\t.byte %d\n\t.uleb128 %d\n", 5063 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5064 fprintf(ctx->fp, 5065 "\t.align 2\n" 5066 ".LEFDE0:\n\n"); 5067#if LJ_HASFFI 5068 fprintf(ctx->fp, 5069 ".LSFDE1:\n" 5070 "\t.long .LEFDE1-.LASFDE1\n" 5071 ".LASFDE1:\n" 5072 "\t.long .Lframe0\n" 5073#if LJ_TARGET_PS3 5074 "\t.long .lj_vm_ffi_call\n" 5075#else 5076 "\t.long lj_vm_ffi_call\n" 5077#endif 5078 "\t.long %d\n" 5079 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5080 "\t.byte 0x8e\n\t.uleb128 2\n" 5081 "\t.byte 0xd\n\t.uleb128 0xe\n" 5082 "\t.align 2\n" 5083 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5084#endif 5085#if !LJ_NO_UNWIND 5086 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5087 fprintf(ctx->fp, 5088 ".Lframe1:\n" 5089 "\t.long .LECIE1-.LSCIE1\n" 5090 ".LSCIE1:\n" 5091 "\t.long 0\n" 5092 "\t.byte 0x1\n" 5093 "\t.string \"zPR\"\n" 5094 "\t.uleb128 0x1\n" 5095 "\t.sleb128 -4\n" 5096 "\t.byte 65\n" 5097 "\t.uleb128 6\n" /* augmentation length */ 5098 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5099 "\t.long lj_err_unwind_dwarf-.\n" 5100 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5101 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5102 "\t.align 2\n" 5103 ".LECIE1:\n\n"); 5104 fprintf(ctx->fp, 5105 ".LSFDE2:\n" 5106 "\t.long .LEFDE2-.LASFDE2\n" 5107 ".LASFDE2:\n" 5108 "\t.long .LASFDE2-.Lframe1\n" 5109 "\t.long .Lbegin-.\n" 5110 "\t.long %d\n" 5111 "\t.uleb128 0\n" /* augmentation length */ 5112 "\t.byte 0xe\n\t.uleb128 %d\n" 5113 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5114 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5115 fcofs, CFRAME_SIZE); 5116 for (i = 14; i <= 31; i++) 5117 fprintf(ctx->fp, 5118 "\t.byte %d\n\t.uleb128 %d\n" 5119 "\t.byte %d\n\t.uleb128 %d\n", 5120 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5121 fprintf(ctx->fp, 5122 "\t.align 2\n" 5123 ".LEFDE2:\n\n"); 5124#if LJ_HASFFI 5125 fprintf(ctx->fp, 5126 ".Lframe2:\n" 5127 "\t.long .LECIE2-.LSCIE2\n" 5128 ".LSCIE2:\n" 5129 "\t.long 0\n" 5130 "\t.byte 0x1\n" 5131 "\t.string \"zR\"\n" 5132 "\t.uleb128 0x1\n" 5133 "\t.sleb128 -4\n" 5134 "\t.byte 65\n" 5135 "\t.uleb128 1\n" /* augmentation length */ 5136 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5137 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5138 "\t.align 2\n" 5139 ".LECIE2:\n\n"); 5140 fprintf(ctx->fp, 5141 ".LSFDE3:\n" 5142 "\t.long .LEFDE3-.LASFDE3\n" 5143 ".LASFDE3:\n" 5144 "\t.long .LASFDE3-.Lframe2\n" 5145 "\t.long lj_vm_ffi_call-.\n" 5146 "\t.long %d\n" 5147 "\t.uleb128 0\n" /* augmentation length */ 5148 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5149 "\t.byte 0x8e\n\t.uleb128 2\n" 5150 "\t.byte 0xd\n\t.uleb128 0xe\n" 5151 "\t.align 2\n" 5152 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5153#endif 5154#endif 5155 break; 5156 default: 5157 break; 5158 } 5159} 5160 5161