1|// Low-level VM code for PowerPC CPUs. 2|// Bytecode interpreter, fast functions and helper functions. 3|// Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h 4| 5|.arch ppc 6|.section code_op, code_sub 7| 8|.actionlist build_actionlist 9|.globals GLOB_ 10|.globalnames globnames 11|.externnames extnames 12| 13|// Note: The ragged indentation of the instructions is intentional. 14|// The starting columns indicate data dependencies. 15| 16|//----------------------------------------------------------------------- 17| 18|// DynASM defines used by the PPC port: 19|// 20|// P64 64 bit pointers (only for GPR64 testing). 21|// Note: a full PPC64 _LP64 port is not planned. 22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 25|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). 26|// Function pointers are really a struct: code, TOC, env (optional). 27|// TOCENV Function pointers have an environment pointer, too (not on PS3). 28|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). 29|// Must avoid (slow) micro-coded instructions. 30| 31|.if P64 32|.define TOC, 1 33|.define TOCENV, 1 34|.macro lpx, a, b, c; ldx a, b, c; .endmacro 35|.macro lp, a, b; ld a, b; .endmacro 36|.macro stp, a, b; std a, b; .endmacro 37|.define decode_OPP, decode_OP8 38|.if FFI 39|// Missing: Calling conventions, 64 bit regs, TOC. 40|.error lib_ffi not yet implemented for PPC64 41|.endif 42|.else 43|.macro lpx, a, b, c; lwzx a, b, c; .endmacro 44|.macro lp, a, b; lwz a, b; .endmacro 45|.macro stp, a, b; stw a, b; .endmacro 46|.define decode_OPP, decode_OP4 47|.endif 48| 49|// Convenience macros for TOC handling. 50|.if TOC 51|// Linker needs a TOC patch area for every external call relocation. 52|.macro blex, target; bl extern target@plt; nop; .endmacro 53|.macro .toc, a, b; a, b; .endmacro 54|.if P64 55|.define TOC_OFS, 8 56|.define ENV_OFS, 16 57|.else 58|.define TOC_OFS, 4 59|.define ENV_OFS, 8 60|.endif 61|.else // No TOC. 62|.macro blex, target; bl extern target@plt; .endmacro 63|.macro .toc, a, b; .endmacro 64|.endif 65|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro 66| 67|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro 68| 69|.macro andix., y, a, i 70|.if PPE 71| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) 72| cmpwi y, 0 73|.else 74| andi. y, a, i 75|.endif 76|.endmacro 77| 78|//----------------------------------------------------------------------- 79| 80|// Fixed register assignments for the interpreter. 81|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 82| 83|// The following must be C callee-save (but BASE is often refetched). 84|.define BASE, r14 // Base of current Lua stack frame. 85|.define KBASE, r15 // Constants of current Lua function. 86|.define PC, r16 // Next PC. 87|.define DISPATCH, r17 // Opcode dispatch table. 88|.define LREG, r18 // Register holding lua_State (also in SAVE_L). 89|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. 90|.define JGL, r31 // On-trace: global_State + 32768. 91| 92|// Constants for type-comparisons, stores and conversions. C callee-save. 93|.define TISNUM, r22 94|.define TISNIL, r23 95|.define ZERO, r24 96|.define TOBIT, f30 // 2^52 + 2^51. 97|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 98| 99|// The following temporaries are not saved across C calls, except for RA. 100|.define RA, r20 // Callee-save. 101|.define RB, r10 102|.define RC, r11 103|.define RD, r12 104|.define INS, r7 // Overlaps CARG5. 105| 106|.define TMP0, r0 107|.define TMP1, r8 108|.define TMP2, r9 109|.define TMP3, r6 // Overlaps CARG4. 110| 111|// Saved temporaries. 112|.define SAVE0, r21 113| 114|// Calling conventions. 115|.define CARG1, r3 116|.define CARG2, r4 117|.define CARG3, r5 118|.define CARG4, r6 // Overlaps TMP3. 119|.define CARG5, r7 // Overlaps INS. 120| 121|.define FARG1, f1 122|.define FARG2, f2 123| 124|.define CRET1, r3 125|.define CRET2, r4 126| 127|.define TOCREG, r2 // TOC register (only used by C code). 128|.define ENVREG, r11 // Environment pointer (nested C functions). 129| 130|// Stack layout while in interpreter. Must match with lj_frame.h. 131|.if GPR64 132|.if FRAME32 133| 134|// 456(sp) // \ 32/64 bit C frame info 135|.define TONUM_LO, 452(sp) // | 136|.define TONUM_HI, 448(sp) // | 137|.define TMPD_LO, 444(sp) // | 138|.define TMPD_HI, 440(sp) // | 139|.define SAVE_CR, 432(sp) // | 64 bit CR save. 140|.define SAVE_ERRF, 424(sp) // > Parameter save area. 141|.define SAVE_NRES, 420(sp) // | 142|.define SAVE_L, 416(sp) // | 143|.define SAVE_PC, 412(sp) // | 144|.define SAVE_MULTRES, 408(sp) // | 145|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. 146|// 392(sp) // Reserved. 147|.define CFRAME_SPACE, 384 // Delta for sp. 148|// Back chain for sp: 384(sp) <-- sp entering interpreter 149|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. 150|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. 151|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. 152|// 80(sp) // Needed for 16 byte stack frame alignment. 153|// 16(sp) // Callee parameter save area (ABI mandated). 154|// 8(sp) // Reserved 155|// Back chain for sp: 0(sp) <-- sp while in interpreter 156|// 32 bit sp stored in hi-part of 0(sp). 157| 158|.define TMPD_BLO, 447(sp) 159|.define TMPD, TMPD_HI 160|.define TONUM_D, TONUM_HI 161| 162|.else 163| 164|// 508(sp) // \ 32 bit C frame info. 165|.define SAVE_ERRF, 472(sp) // | 166|.define SAVE_NRES, 468(sp) // | 167|.define SAVE_L, 464(sp) // > Parameter save area. 168|.define SAVE_PC, 460(sp) // | 169|.define SAVE_MULTRES, 456(sp) // | 170|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. 171|.define SAVE_LR, 416(sp) 172|.define CFRAME_SPACE, 400 // Delta for sp. 173|// Back chain for sp: 400(sp) <-- sp entering interpreter 174|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. 175|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. 176|// 48(sp) // Callee parameter save area (ABI mandated). 177|.define SAVE_TOC, 40(sp) // TOC save area. 178|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). 179|.define TMPD_HI, 32(sp) // / 180|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). 181|.define TONUM_HI, 24(sp) // / 182|// Next frame lr: 16(sp) 183|.define SAVE_CR, 8(sp) // 64 bit CR save. 184|// Back chain for sp: 0(sp) <-- sp while in interpreter 185| 186|.define TMPD_BLO, 39(sp) 187|.define TMPD, TMPD_HI 188|.define TONUM_D, TONUM_HI 189| 190|.endif 191|.else 192| 193|.define SAVE_LR, 276(sp) 194|.define CFRAME_SPACE, 272 // Delta for sp. 195|// Back chain for sp: 272(sp) <-- sp entering interpreter 196|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 197|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 198|.define SAVE_CR, 52(sp) // 32 bit CR save. 199|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 200|.define SAVE_NRES, 44(sp) 201|.define SAVE_CFRAME, 40(sp) 202|.define SAVE_L, 36(sp) 203|.define SAVE_PC, 32(sp) 204|.define SAVE_MULTRES, 28(sp) 205|.define UNUSED1, 24(sp) 206|.define TMPD_LO, 20(sp) 207|.define TMPD_HI, 16(sp) 208|.define TONUM_LO, 12(sp) 209|.define TONUM_HI, 8(sp) 210|// Next frame lr: 4(sp) 211|// Back chain for sp: 0(sp) <-- sp while in interpreter 212| 213|.define TMPD_BLO, 23(sp) 214|.define TMPD, TMPD_HI 215|.define TONUM_D, TONUM_HI 216| 217|.endif 218| 219|.macro save_, reg 220|.if GPR64 221| std r..reg, SAVE_GPR_+(reg-14)*8(sp) 222|.else 223| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 224|.endif 225| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 226|.endmacro 227|.macro rest_, reg 228|.if GPR64 229| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) 230|.else 231| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 232|.endif 233| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 234|.endmacro 235| 236|.macro saveregs 237|.if GPR64 and not FRAME32 238| stdu sp, -CFRAME_SPACE(sp) 239|.else 240| stwu sp, -CFRAME_SPACE(sp) 241|.endif 242| save_ 14; save_ 15; save_ 16 243| mflr r0 244| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 245|.if GPR64 and not FRAME32 246| std r0, SAVE_LR 247|.else 248| stw r0, SAVE_LR 249|.endif 250| save_ 23; save_ 24; save_ 25 251| mfcr r0 252| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 253|.if GPR64 254| std r0, SAVE_CR 255|.else 256| stw r0, SAVE_CR 257|.endif 258| .toc std TOCREG, SAVE_TOC 259|.endmacro 260| 261|.macro restoreregs 262|.if GPR64 and not FRAME32 263| ld r0, SAVE_LR 264|.else 265| lwz r0, SAVE_LR 266|.endif 267|.if GPR64 268| ld r12, SAVE_CR 269|.else 270| lwz r12, SAVE_CR 271|.endif 272| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 273| mtlr r0; 274|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif 275| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 276|.if PPE; mtocrf 0x10, r12; .endif 277| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 278|.if PPE; mtocrf 0x08, r12; .endif 279| addi sp, sp, CFRAME_SPACE 280|.endmacro 281| 282|// Type definitions. Some of these are only used for documentation. 283|.type L, lua_State, LREG 284|.type GL, global_State 285|.type TVALUE, TValue 286|.type GCOBJ, GCobj 287|.type STR, GCstr 288|.type TAB, GCtab 289|.type LFUNC, GCfuncL 290|.type CFUNC, GCfuncC 291|.type PROTO, GCproto 292|.type UPVAL, GCupval 293|.type NODE, Node 294|.type NARGS8, int 295|.type TRACE, GCtrace 296| 297|//----------------------------------------------------------------------- 298| 299|// These basic macros should really be part of DynASM. 300|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro 301|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro 302|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro 303|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro 304|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro 305| 306|// Trap for not-yet-implemented parts. 307|.macro NYI; tw 4, sp, sp; .endmacro 308| 309|// int/FP conversions. 310|.macro tonum_i, freg, reg 311| xoris reg, reg, 0x8000 312| stw reg, TONUM_LO 313| lfd freg, TONUM_D 314| fsub freg, freg, TONUM 315|.endmacro 316| 317|.macro tonum_u, freg, reg 318| stw reg, TONUM_LO 319| lfd freg, TONUM_D 320| fsub freg, freg, TOBIT 321|.endmacro 322| 323|.macro toint, reg, freg, tmpfreg 324| fctiwz tmpfreg, freg 325| stfd tmpfreg, TMPD 326| lwz reg, TMPD_LO 327|.endmacro 328| 329|.macro toint, reg, freg 330| toint reg, freg, freg 331|.endmacro 332| 333|//----------------------------------------------------------------------- 334| 335|// Access to frame relative to BASE. 336|.define FRAME_PC, -8 337|.define FRAME_FUNC, -4 338| 339|// Instruction decode. 340|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro 341|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro 342|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro 343|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro 344|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro 345|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro 346| 347|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro 348|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro 349| 350|// Instruction fetch. 351|.macro ins_NEXT1 352| lwz INS, 0(PC) 353| addi PC, PC, 4 354|.endmacro 355|// Instruction decode+dispatch. Note: optimized for e300! 356|.macro ins_NEXT2 357| decode_OPP TMP1, INS 358| lpx TMP0, DISPATCH, TMP1 359| mtctr TMP0 360| decode_RB8 RB, INS 361| decode_RD8 RD, INS 362| decode_RA8 RA, INS 363| decode_RC8 RC, INS 364| bctr 365|.endmacro 366|.macro ins_NEXT 367| ins_NEXT1 368| ins_NEXT2 369|.endmacro 370| 371|// Instruction footer. 372|.if 1 373| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 374| .define ins_next, ins_NEXT 375| .define ins_next_, ins_NEXT 376| .define ins_next1, ins_NEXT1 377| .define ins_next2, ins_NEXT2 378|.else 379| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 380| // Affects only certain kinds of benchmarks (and only with -j off). 381| .macro ins_next 382| b ->ins_next 383| .endmacro 384| .macro ins_next1 385| .endmacro 386| .macro ins_next2 387| b ->ins_next 388| .endmacro 389| .macro ins_next_ 390| ->ins_next: 391| ins_NEXT 392| .endmacro 393|.endif 394| 395|// Call decode and dispatch. 396|.macro ins_callt 397| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 398| lwz PC, LFUNC:RB->pc 399| lwz INS, 0(PC) 400| addi PC, PC, 4 401| decode_OPP TMP1, INS 402| decode_RA8 RA, INS 403| lpx TMP0, DISPATCH, TMP1 404| add RA, RA, BASE 405| mtctr TMP0 406| bctr 407|.endmacro 408| 409|.macro ins_call 410| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC 411| stw PC, FRAME_PC(BASE) 412| ins_callt 413|.endmacro 414| 415|//----------------------------------------------------------------------- 416| 417|// Macros to test operand types. 418|.macro checknum, reg; cmplw reg, TISNUM; .endmacro 419|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro 420|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro 421|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro 422|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro 423|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro 424| 425|.macro branch_RD 426| srwi TMP0, RD, 1 427| addis PC, PC, -(BCBIAS_J*4 >> 16) 428| add PC, PC, TMP0 429|.endmacro 430| 431|// Assumes DISPATCH is relative to GL. 432#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 433#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 434| 435#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 436| 437|.macro hotcheck, delta, target 438| rlwinm TMP1, PC, 31, 25, 30 439| addi TMP1, TMP1, GG_DISP2HOT 440| lhzx TMP2, DISPATCH, TMP1 441| addic. TMP2, TMP2, -delta 442| sthx TMP2, DISPATCH, TMP1 443| blt target 444|.endmacro 445| 446|.macro hotloop 447| hotcheck HOTCOUNT_LOOP, ->vm_hotloop 448|.endmacro 449| 450|.macro hotcall 451| hotcheck HOTCOUNT_CALL, ->vm_hotcall 452|.endmacro 453| 454|// Set current VM state. Uses TMP0. 455|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro 456|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro 457| 458|// Move table write barrier back. Overwrites mark and tmp. 459|.macro barrierback, tab, mark, tmp 460| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) 461| // Assumes LJ_GC_BLACK is 0x04. 462| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) 463| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) 464| stb mark, tab->marked 465| stw tmp, tab->gclist 466|.endmacro 467| 468|//----------------------------------------------------------------------- 469 470/* Generate subroutines used by opcodes and other parts of the VM. */ 471/* The .code_sub section should be last to help static branch prediction. */ 472static void build_subroutines(BuildCtx *ctx) 473{ 474 |.code_sub 475 | 476 |//----------------------------------------------------------------------- 477 |//-- Return handling ---------------------------------------------------- 478 |//----------------------------------------------------------------------- 479 | 480 |->vm_returnp: 481 | // See vm_return. Also: TMP2 = previous base. 482 | andix. TMP0, PC, FRAME_P 483 | li TMP1, LJ_TTRUE 484 | beq ->cont_dispatch 485 | 486 | // Return from pcall or xpcall fast func. 487 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. 488 | mr BASE, TMP2 // Restore caller base. 489 | // Prepending may overwrite the pcall frame, so do it at the end. 490 | stwu TMP1, FRAME_PC(RA) // Prepend true to results. 491 | 492 |->vm_returnc: 493 | addi RD, RD, 8 // RD = (nresults+1)*8. 494 | andix. TMP0, PC, FRAME_TYPE 495 | cmpwi cr1, RD, 0 496 | li CRET1, LUA_YIELD 497 | beq cr1, ->vm_unwind_c_eh 498 | mr MULTRES, RD 499 | beq ->BC_RET_Z // Handle regular return to Lua. 500 | 501 |->vm_return: 502 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return 503 | // TMP0 = PC & FRAME_TYPE 504 | cmpwi TMP0, FRAME_C 505 | rlwinm TMP2, PC, 0, 0, 28 506 | li_vmstate C 507 | sub TMP2, BASE, TMP2 // TMP2 = previous base. 508 | bney ->vm_returnp 509 | 510 | addic. TMP1, RD, -8 511 | stp TMP2, L->base 512 | lwz TMP2, SAVE_NRES 513 | subi BASE, BASE, 8 514 | st_vmstate 515 | slwi TMP2, TMP2, 3 516 | beq >2 517 |1: 518 | addic. TMP1, TMP1, -8 519 | lfd f0, 0(RA) 520 | addi RA, RA, 8 521 | stfd f0, 0(BASE) 522 | addi BASE, BASE, 8 523 | bney <1 524 | 525 |2: 526 | cmpw TMP2, RD // More/less results wanted? 527 | bne >6 528 |3: 529 | stp BASE, L->top // Store new top. 530 | 531 |->vm_leave_cp: 532 | lp TMP0, SAVE_CFRAME // Restore previous C frame. 533 | li CRET1, 0 // Ok return status for vm_pcall. 534 | stp TMP0, L->cframe 535 | 536 |->vm_leave_unw: 537 | restoreregs 538 | blr 539 | 540 |6: 541 | ble >7 // Less results wanted? 542 | // More results wanted. Check stack size and fill up results with nil. 543 | lwz TMP1, L->maxstack 544 | cmplw BASE, TMP1 545 | bge >8 546 | stw TISNIL, 0(BASE) 547 | addi RD, RD, 8 548 | addi BASE, BASE, 8 549 | b <2 550 | 551 |7: // Less results wanted. 552 | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? 553 | sub TMP0, RD, TMP2 554 | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 555 | and TMP0, TMP0, TMP1 556 | sub BASE, BASE, TMP0 // Either keep top or shrink it. 557 | b <3 558 | 559 |8: // Corner case: need to grow stack for filling up results. 560 | // This can happen if: 561 | // - A C function grows the stack (a lot). 562 | // - The GC shrinks the stack in between. 563 | // - A return back from a lua_call() with (high) nresults adjustment. 564 | stp BASE, L->top // Save current top held in BASE (yes). 565 | mr SAVE0, RD 566 | mr CARG2, TMP2 567 | mr CARG1, L 568 | bl extern lj_state_growstack // (lua_State *L, int n) 569 | lwz TMP2, SAVE_NRES 570 | mr RD, SAVE0 571 | slwi TMP2, TMP2, 3 572 | lp BASE, L->top // Need the (realloced) L->top in BASE. 573 | b <2 574 | 575 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 576 | // (void *cframe, int errcode) 577 | mr sp, CARG1 578 | mr CRET1, CARG2 579 |->vm_unwind_c_eh: // Landing pad for external unwinder. 580 | lwz L, SAVE_L 581 | .toc ld TOCREG, SAVE_TOC 582 | li TMP0, ~LJ_VMST_C 583 | lwz GL:TMP1, L->glref 584 | stw TMP0, GL:TMP1->vmstate 585 | b ->vm_leave_unw 586 | 587 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 588 | // (void *cframe) 589 |.if GPR64 590 | rldicr sp, CARG1, 0, 61 591 |.else 592 | rlwinm sp, CARG1, 0, 0, 29 593 |.endif 594 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 595 | lwz L, SAVE_L 596 | .toc ld TOCREG, SAVE_TOC 597 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 598 | lp BASE, L->base 599 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 600 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 601 | li ZERO, 0 602 | stw TMP3, TMPD 603 | li TMP1, LJ_TFALSE 604 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 605 | li TISNIL, LJ_TNIL 606 | li_vmstate INTERP 607 | lfs TOBIT, TMPD 608 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 609 | la RA, -8(BASE) // Results start at BASE-8. 610 | stw TMP3, TMPD 611 | addi DISPATCH, DISPATCH, GG_G2DISP 612 | stw TMP1, 0(RA) // Prepend false to error message. 613 | li RD, 16 // 2 results: false + error message. 614 | st_vmstate 615 | lfs TONUM, TMPD 616 | b ->vm_returnc 617 | 618 |//----------------------------------------------------------------------- 619 |//-- Grow stack for calls ----------------------------------------------- 620 |//----------------------------------------------------------------------- 621 | 622 |->vm_growstack_c: // Grow stack for C function. 623 | li CARG2, LUA_MINSTACK 624 | b >2 625 | 626 |->vm_growstack_l: // Grow stack for Lua function. 627 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC 628 | add RC, BASE, RC 629 | sub RA, RA, BASE 630 | stp BASE, L->base 631 | addi PC, PC, 4 // Must point after first instruction. 632 | stp RC, L->top 633 | srwi CARG2, RA, 3 634 |2: 635 | // L->base = new base, L->top = top 636 | stw PC, SAVE_PC 637 | mr CARG1, L 638 | bl extern lj_state_growstack // (lua_State *L, int n) 639 | lp BASE, L->base 640 | lp RC, L->top 641 | lwz LFUNC:RB, FRAME_FUNC(BASE) 642 | sub RC, RC, BASE 643 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 644 | ins_callt // Just retry the call. 645 | 646 |//----------------------------------------------------------------------- 647 |//-- Entry points into the assembler VM --------------------------------- 648 |//----------------------------------------------------------------------- 649 | 650 |->vm_resume: // Setup C frame and resume thread. 651 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 652 | saveregs 653 | mr L, CARG1 654 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 655 | mr BASE, CARG2 656 | lbz TMP1, L->status 657 | stw L, SAVE_L 658 | li PC, FRAME_CP 659 | addi TMP0, sp, CFRAME_RESUME 660 | addi DISPATCH, DISPATCH, GG_G2DISP 661 | stw CARG3, SAVE_NRES 662 | cmplwi TMP1, 0 663 | stw CARG3, SAVE_ERRF 664 | stp TMP0, L->cframe 665 | stp CARG3, SAVE_CFRAME 666 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 667 | beq >3 668 | 669 | // Resume after yield (like a return). 670 | mr RA, BASE 671 | lp BASE, L->base 672 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 673 | lp TMP1, L->top 674 | lwz PC, FRAME_PC(BASE) 675 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 676 | stb CARG3, L->status 677 | stw TMP3, TMPD 678 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 679 | lfs TOBIT, TMPD 680 | sub RD, TMP1, BASE 681 | stw TMP3, TMPD 682 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 683 | addi RD, RD, 8 684 | stw TMP0, TONUM_HI 685 | li_vmstate INTERP 686 | li ZERO, 0 687 | st_vmstate 688 | andix. TMP0, PC, FRAME_TYPE 689 | mr MULTRES, RD 690 | lfs TONUM, TMPD 691 | li TISNIL, LJ_TNIL 692 | beq ->BC_RET_Z 693 | b ->vm_return 694 | 695 |->vm_pcall: // Setup protected C frame and enter VM. 696 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 697 | saveregs 698 | li PC, FRAME_CP 699 | stw CARG4, SAVE_ERRF 700 | b >1 701 | 702 |->vm_call: // Setup C frame and enter VM. 703 | // (lua_State *L, TValue *base, int nres1) 704 | saveregs 705 | li PC, FRAME_C 706 | 707 |1: // Entry point for vm_pcall above (PC = ftype). 708 | lp TMP1, L:CARG1->cframe 709 | stw CARG3, SAVE_NRES 710 | mr L, CARG1 711 | stw CARG1, SAVE_L 712 | mr BASE, CARG2 713 | stp sp, L->cframe // Add our C frame to cframe chain. 714 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 715 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 716 | stp TMP1, SAVE_CFRAME 717 | addi DISPATCH, DISPATCH, GG_G2DISP 718 | 719 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 720 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 721 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 722 | lp TMP1, L->top 723 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 724 | add PC, PC, BASE 725 | stw TMP3, TMPD 726 | li ZERO, 0 727 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 728 | lfs TOBIT, TMPD 729 | sub PC, PC, TMP2 // PC = frame delta + frame type 730 | stw TMP3, TMPD 731 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 732 | sub NARGS8:RC, TMP1, BASE 733 | stw TMP0, TONUM_HI 734 | li_vmstate INTERP 735 | lfs TONUM, TMPD 736 | li TISNIL, LJ_TNIL 737 | st_vmstate 738 | 739 |->vm_call_dispatch: 740 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC 741 | lwz TMP0, FRAME_PC(BASE) 742 | lwz LFUNC:RB, FRAME_FUNC(BASE) 743 | checkfunc TMP0; bne ->vmeta_call 744 | 745 |->vm_call_dispatch_f: 746 | ins_call 747 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC 748 | 749 |->vm_cpcall: // Setup protected C frame, call C. 750 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 751 | saveregs 752 | mr L, CARG1 753 | lwz TMP0, L:CARG1->stack 754 | stw CARG1, SAVE_L 755 | lp TMP1, L->top 756 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 757 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 758 | lp TMP1, L->cframe 759 | stp sp, L->cframe // Add our C frame to cframe chain. 760 | .toc lp CARG4, 0(CARG4) 761 | li TMP2, 0 762 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 763 | stw TMP2, SAVE_ERRF // No error function. 764 | stp TMP1, SAVE_CFRAME 765 | mtctr CARG4 766 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 767 |.if PPE 768 | mr BASE, CRET1 769 | cmpwi CRET1, 0 770 |.else 771 | mr. BASE, CRET1 772 |.endif 773 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 774 | li PC, FRAME_CP 775 | addi DISPATCH, DISPATCH, GG_G2DISP 776 | bne <3 // Else continue with the call. 777 | b ->vm_leave_cp // No base? Just remove C frame. 778 | 779 |//----------------------------------------------------------------------- 780 |//-- Metamethod handling ------------------------------------------------ 781 |//----------------------------------------------------------------------- 782 | 783 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the 784 |// stack, so BASE doesn't need to be reloaded across these calls. 785 | 786 |//-- Continuation dispatch ---------------------------------------------- 787 | 788 |->cont_dispatch: 789 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 790 | lwz TMP0, -12(BASE) // Continuation. 791 | mr RB, BASE 792 | mr BASE, TMP2 // Restore caller BASE. 793 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) 794 |.if FFI 795 | cmplwi TMP0, 1 796 |.endif 797 | lwz PC, -16(RB) // Restore PC from [cont|PC]. 798 | subi TMP2, RD, 8 799 | lwz TMP1, LFUNC:TMP1->pc 800 | stwx TISNIL, RA, TMP2 // Ensure one valid arg. 801 |.if FFI 802 | ble >1 803 |.endif 804 | lwz KBASE, PC2PROTO(k)(TMP1) 805 | // BASE = base, RA = resultptr, RB = meta base 806 | mtctr TMP0 807 | bctr // Jump to continuation. 808 | 809 |.if FFI 810 |1: 811 | beq ->cont_ffi_callback // cont = 1: return from FFI callback. 812 | // cont = 0: tailcall from C function. 813 | subi TMP1, RB, 16 814 | sub RC, TMP1, BASE 815 | b ->vm_call_tail 816 |.endif 817 | 818 |->cont_cat: // RA = resultptr, RB = meta base 819 | lwz INS, -4(PC) 820 | subi CARG2, RB, 16 821 | decode_RB8 SAVE0, INS 822 | lfd f0, 0(RA) 823 | add TMP1, BASE, SAVE0 824 | stp BASE, L->base 825 | cmplw TMP1, CARG2 826 | sub CARG3, CARG2, TMP1 827 | decode_RA8 RA, INS 828 | stfd f0, 0(CARG2) 829 | bney ->BC_CAT_Z 830 | stfdx f0, BASE, RA 831 | b ->cont_nop 832 | 833 |//-- Table indexing metamethods ----------------------------------------- 834 | 835 |->vmeta_tgets1: 836 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 837 | li TMP0, LJ_TSTR 838 | decode_RB8 RB, INS 839 | stw STR:RC, 4(CARG3) 840 | add CARG2, BASE, RB 841 | stw TMP0, 0(CARG3) 842 | b >1 843 | 844 |->vmeta_tgets: 845 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 846 | li TMP0, LJ_TTAB 847 | stw TAB:RB, 4(CARG2) 848 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 849 | stw TMP0, 0(CARG2) 850 | li TMP1, LJ_TSTR 851 | stw STR:RC, 4(CARG3) 852 | stw TMP1, 0(CARG3) 853 | b >1 854 | 855 |->vmeta_tgetb: // TMP0 = index 856 |.if not DUALNUM 857 | tonum_u f0, TMP0 858 |.endif 859 | decode_RB8 RB, INS 860 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 861 | add CARG2, BASE, RB 862 |.if DUALNUM 863 | stw TISNUM, 0(CARG3) 864 | stw TMP0, 4(CARG3) 865 |.else 866 | stfd f0, 0(CARG3) 867 |.endif 868 | b >1 869 | 870 |->vmeta_tgetv: 871 | decode_RB8 RB, INS 872 | decode_RC8 RC, INS 873 | add CARG2, BASE, RB 874 | add CARG3, BASE, RC 875 |1: 876 | stp BASE, L->base 877 | mr CARG1, L 878 | stw PC, SAVE_PC 879 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 880 | // Returns TValue * (finished) or NULL (metamethod). 881 | cmplwi CRET1, 0 882 | beq >3 883 | lfd f0, 0(CRET1) 884 | ins_next1 885 | stfdx f0, BASE, RA 886 | ins_next2 887 | 888 |3: // Call __index metamethod. 889 | // BASE = base, L->top = new base, stack = cont/func/t/k 890 | subfic TMP1, BASE, FRAME_CONT 891 | lp BASE, L->top 892 | stw PC, -16(BASE) // [cont|PC] 893 | add PC, TMP1, BASE 894 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 895 | li NARGS8:RC, 16 // 2 args for func(t, k). 896 | b ->vm_call_dispatch_f 897 | 898 |//----------------------------------------------------------------------- 899 | 900 |->vmeta_tsets1: 901 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 902 | li TMP0, LJ_TSTR 903 | decode_RB8 RB, INS 904 | stw STR:RC, 4(CARG3) 905 | add CARG2, BASE, RB 906 | stw TMP0, 0(CARG3) 907 | b >1 908 | 909 |->vmeta_tsets: 910 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 911 | li TMP0, LJ_TTAB 912 | stw TAB:RB, 4(CARG2) 913 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 914 | stw TMP0, 0(CARG2) 915 | li TMP1, LJ_TSTR 916 | stw STR:RC, 4(CARG3) 917 | stw TMP1, 0(CARG3) 918 | b >1 919 | 920 |->vmeta_tsetb: // TMP0 = index 921 |.if not DUALNUM 922 | tonum_u f0, TMP0 923 |.endif 924 | decode_RB8 RB, INS 925 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 926 | add CARG2, BASE, RB 927 |.if DUALNUM 928 | stw TISNUM, 0(CARG3) 929 | stw TMP0, 4(CARG3) 930 |.else 931 | stfd f0, 0(CARG3) 932 |.endif 933 | b >1 934 | 935 |->vmeta_tsetv: 936 | decode_RB8 RB, INS 937 | decode_RC8 RC, INS 938 | add CARG2, BASE, RB 939 | add CARG3, BASE, RC 940 |1: 941 | stp BASE, L->base 942 | mr CARG1, L 943 | stw PC, SAVE_PC 944 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 945 | // Returns TValue * (finished) or NULL (metamethod). 946 | cmplwi CRET1, 0 947 | lfdx f0, BASE, RA 948 | beq >3 949 | // NOBARRIER: lj_meta_tset ensures the table is not black. 950 | ins_next1 951 | stfd f0, 0(CRET1) 952 | ins_next2 953 | 954 |3: // Call __newindex metamethod. 955 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 956 | subfic TMP1, BASE, FRAME_CONT 957 | lp BASE, L->top 958 | stw PC, -16(BASE) // [cont|PC] 959 | add PC, TMP1, BASE 960 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 961 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 962 | stfd f0, 16(BASE) // Copy value to third argument. 963 | b ->vm_call_dispatch_f 964 | 965 |//-- Comparison metamethods --------------------------------------------- 966 | 967 |->vmeta_comp: 968 | mr CARG1, L 969 | subi PC, PC, 4 970 |.if DUALNUM 971 | mr CARG2, RA 972 |.else 973 | add CARG2, BASE, RA 974 |.endif 975 | stw PC, SAVE_PC 976 |.if DUALNUM 977 | mr CARG3, RD 978 |.else 979 | add CARG3, BASE, RD 980 |.endif 981 | stp BASE, L->base 982 | decode_OP1 CARG4, INS 983 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 984 | // Returns 0/1 or TValue * (metamethod). 985 |3: 986 | cmplwi CRET1, 1 987 | bgt ->vmeta_binop 988 | subfic CRET1, CRET1, 0 989 |4: 990 | lwz INS, 0(PC) 991 | addi PC, PC, 4 992 | decode_RD4 TMP2, INS 993 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 994 | and TMP2, TMP2, CRET1 995 | add PC, PC, TMP2 996 |->cont_nop: 997 | ins_next 998 | 999 |->cont_ra: // RA = resultptr 1000 | lwz INS, -4(PC) 1001 | lfd f0, 0(RA) 1002 | decode_RA8 TMP1, INS 1003 | stfdx f0, BASE, TMP1 1004 | b ->cont_nop 1005 | 1006 |->cont_condt: // RA = resultptr 1007 | lwz TMP0, 0(RA) 1008 | .gpr64 extsw TMP0, TMP0 1009 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. 1010 | subfe CRET1, CRET1, CRET1 1011 | not CRET1, CRET1 1012 | b <4 1013 | 1014 |->cont_condf: // RA = resultptr 1015 | lwz TMP0, 0(RA) 1016 | .gpr64 extsw TMP0, TMP0 1017 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. 1018 | subfe CRET1, CRET1, CRET1 1019 | b <4 1020 | 1021 |->vmeta_equal: 1022 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 1023 | subi PC, PC, 4 1024 | stp BASE, L->base 1025 | mr CARG1, L 1026 | stw PC, SAVE_PC 1027 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1028 | // Returns 0/1 or TValue * (metamethod). 1029 | b <3 1030 | 1031 |->vmeta_equal_cd: 1032 |.if FFI 1033 | mr CARG2, INS 1034 | subi PC, PC, 4 1035 | stp BASE, L->base 1036 | mr CARG1, L 1037 | stw PC, SAVE_PC 1038 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) 1039 | // Returns 0/1 or TValue * (metamethod). 1040 | b <3 1041 |.endif 1042 | 1043 |//-- Arithmetic metamethods --------------------------------------------- 1044 | 1045 |->vmeta_arith_nv: 1046 | add CARG3, KBASE, RC 1047 | add CARG4, BASE, RB 1048 | b >1 1049 |->vmeta_arith_nv2: 1050 |.if DUALNUM 1051 | mr CARG3, RC 1052 | mr CARG4, RB 1053 | b >1 1054 |.endif 1055 | 1056 |->vmeta_unm: 1057 | mr CARG3, RD 1058 | mr CARG4, RD 1059 | b >1 1060 | 1061 |->vmeta_arith_vn: 1062 | add CARG3, BASE, RB 1063 | add CARG4, KBASE, RC 1064 | b >1 1065 | 1066 |->vmeta_arith_vv: 1067 | add CARG3, BASE, RB 1068 | add CARG4, BASE, RC 1069 |.if DUALNUM 1070 | b >1 1071 |.endif 1072 |->vmeta_arith_vn2: 1073 |->vmeta_arith_vv2: 1074 |.if DUALNUM 1075 | mr CARG3, RB 1076 | mr CARG4, RC 1077 |.endif 1078 |1: 1079 | add CARG2, BASE, RA 1080 | stp BASE, L->base 1081 | mr CARG1, L 1082 | stw PC, SAVE_PC 1083 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. 1084 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1085 | // Returns NULL (finished) or TValue * (metamethod). 1086 | cmplwi CRET1, 0 1087 | beq ->cont_nop 1088 | 1089 | // Call metamethod for binary op. 1090 |->vmeta_binop: 1091 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 1092 | sub TMP1, CRET1, BASE 1093 | stw PC, -16(CRET1) // [cont|PC] 1094 | mr TMP2, BASE 1095 | addi PC, TMP1, FRAME_CONT 1096 | mr BASE, CRET1 1097 | li NARGS8:RC, 16 // 2 args for func(o1, o2). 1098 | b ->vm_call_dispatch 1099 | 1100 |->vmeta_len: 1101#if LJ_52 1102 | mr SAVE0, CARG1 1103#endif 1104 | mr CARG2, RD 1105 | stp BASE, L->base 1106 | mr CARG1, L 1107 | stw PC, SAVE_PC 1108 | bl extern lj_meta_len // (lua_State *L, TValue *o) 1109 | // Returns NULL (retry) or TValue * (metamethod base). 1110#if LJ_52 1111 | cmplwi CRET1, 0 1112 | bne ->vmeta_binop // Binop call for compatibility. 1113 | mr CARG1, SAVE0 1114 | b ->BC_LEN_Z 1115#else 1116 | b ->vmeta_binop // Binop call for compatibility. 1117#endif 1118 | 1119 |//-- Call metamethod ---------------------------------------------------- 1120 | 1121 |->vmeta_call: // Resolve and call __call metamethod. 1122 | // TMP2 = old base, BASE = new base, RC = nargs*8 1123 | mr CARG1, L 1124 | stp TMP2, L->base // This is the callers base! 1125 | subi CARG2, BASE, 8 1126 | stw PC, SAVE_PC 1127 | add CARG3, BASE, RC 1128 | mr SAVE0, NARGS8:RC 1129 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1130 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1131 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1132 | ins_call 1133 | 1134 |->vmeta_callt: // Resolve __call for BC_CALLT. 1135 | // BASE = old base, RA = new base, RC = nargs*8 1136 | mr CARG1, L 1137 | stp BASE, L->base 1138 | subi CARG2, RA, 8 1139 | stw PC, SAVE_PC 1140 | add CARG3, RA, RC 1141 | mr SAVE0, NARGS8:RC 1142 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1143 | lwz TMP1, FRAME_PC(BASE) 1144 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1145 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. 1146 | b ->BC_CALLT_Z 1147 | 1148 |//-- Argument coercion for 'for' statement ------------------------------ 1149 | 1150 |->vmeta_for: 1151 | mr CARG1, L 1152 | stp BASE, L->base 1153 | mr CARG2, RA 1154 | stw PC, SAVE_PC 1155 | mr SAVE0, INS 1156 | bl extern lj_meta_for // (lua_State *L, TValue *base) 1157 |.if JIT 1158 | decode_OP1 TMP0, SAVE0 1159 |.endif 1160 | decode_RA8 RA, SAVE0 1161 |.if JIT 1162 | cmpwi TMP0, BC_JFORI 1163 |.endif 1164 | decode_RD8 RD, SAVE0 1165 |.if JIT 1166 | beqy =>BC_JFORI 1167 |.endif 1168 | b =>BC_FORI 1169 | 1170 |//----------------------------------------------------------------------- 1171 |//-- Fast functions ----------------------------------------------------- 1172 |//----------------------------------------------------------------------- 1173 | 1174 |.macro .ffunc, name 1175 |->ff_ .. name: 1176 |.endmacro 1177 | 1178 |.macro .ffunc_1, name 1179 |->ff_ .. name: 1180 | cmplwi NARGS8:RC, 8 1181 | lwz CARG3, 0(BASE) 1182 | lwz CARG1, 4(BASE) 1183 | blt ->fff_fallback 1184 |.endmacro 1185 | 1186 |.macro .ffunc_2, name 1187 |->ff_ .. name: 1188 | cmplwi NARGS8:RC, 16 1189 | lwz CARG3, 0(BASE) 1190 | lwz CARG4, 8(BASE) 1191 | lwz CARG1, 4(BASE) 1192 | lwz CARG2, 12(BASE) 1193 | blt ->fff_fallback 1194 |.endmacro 1195 | 1196 |.macro .ffunc_n, name 1197 |->ff_ .. name: 1198 | cmplwi NARGS8:RC, 8 1199 | lwz CARG3, 0(BASE) 1200 | lfd FARG1, 0(BASE) 1201 | blt ->fff_fallback 1202 | checknum CARG3; bge ->fff_fallback 1203 |.endmacro 1204 | 1205 |.macro .ffunc_nn, name 1206 |->ff_ .. name: 1207 | cmplwi NARGS8:RC, 16 1208 | lwz CARG3, 0(BASE) 1209 | lfd FARG1, 0(BASE) 1210 | lwz CARG4, 8(BASE) 1211 | lfd FARG2, 8(BASE) 1212 | blt ->fff_fallback 1213 | checknum CARG3; bge ->fff_fallback 1214 | checknum CARG4; bge ->fff_fallback 1215 |.endmacro 1216 | 1217 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1218 |.macro ffgccheck 1219 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1220 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1221 | cmplw TMP0, TMP1 1222 | bgel ->fff_gcstep 1223 |.endmacro 1224 | 1225 |//-- Base library: checks ----------------------------------------------- 1226 | 1227 |.ffunc_1 assert 1228 | li TMP1, LJ_TFALSE 1229 | la RA, -8(BASE) 1230 | cmplw cr1, CARG3, TMP1 1231 | lwz PC, FRAME_PC(BASE) 1232 | bge cr1, ->fff_fallback 1233 | stw CARG3, 0(RA) 1234 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1235 | stw CARG1, 4(RA) 1236 | beq ->fff_res // Done if exactly 1 argument. 1237 | li TMP1, 8 1238 | subi RC, RC, 8 1239 |1: 1240 | cmplw TMP1, RC 1241 | lfdx f0, BASE, TMP1 1242 | stfdx f0, RA, TMP1 1243 | addi TMP1, TMP1, 8 1244 | bney <1 1245 | b ->fff_res 1246 | 1247 |.ffunc type 1248 | cmplwi NARGS8:RC, 8 1249 | lwz CARG1, 0(BASE) 1250 | blt ->fff_fallback 1251 | .gpr64 extsw CARG1, CARG1 1252 | subfc TMP0, TISNUM, CARG1 1253 | subfe TMP2, CARG1, CARG1 1254 | orc TMP1, TMP2, TMP0 1255 | addi TMP1, TMP1, ~LJ_TISNUM+1 1256 | slwi TMP1, TMP1, 3 1257 | la TMP2, CFUNC:RB->upvalue 1258 | lfdx FARG1, TMP2, TMP1 1259 | b ->fff_resn 1260 | 1261 |//-- Base library: getters and setters --------------------------------- 1262 | 1263 |.ffunc_1 getmetatable 1264 | checktab CARG3; bne >6 1265 |1: // Field metatable must be at same offset for GCtab and GCudata! 1266 | lwz TAB:CARG1, TAB:CARG1->metatable 1267 |2: 1268 | li CARG3, LJ_TNIL 1269 | cmplwi TAB:CARG1, 0 1270 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1271 | beq ->fff_restv 1272 | lwz TMP0, TAB:CARG1->hmask 1273 | li CARG3, LJ_TTAB // Use metatable as default result. 1274 | lwz TMP1, STR:RC->hash 1275 | lwz NODE:TMP2, TAB:CARG1->node 1276 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1277 | slwi TMP0, TMP1, 5 1278 | slwi TMP1, TMP1, 3 1279 | sub TMP1, TMP0, TMP1 1280 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 1281 |3: // Rearranged logic, because we expect _not_ to find the key. 1282 | lwz CARG4, NODE:TMP2->key 1283 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 1284 | lwz CARG2, NODE:TMP2->val 1285 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 1286 | checkstr CARG4; bne >4 1287 | cmpw TMP0, STR:RC; beq >5 1288 |4: 1289 | lwz NODE:TMP2, NODE:TMP2->next 1290 | cmplwi NODE:TMP2, 0 1291 | beq ->fff_restv // Not found, keep default result. 1292 | b <3 1293 |5: 1294 | checknil CARG2 1295 | beq ->fff_restv // Ditto for nil value. 1296 | mr CARG3, CARG2 // Return value of mt.__metatable. 1297 | mr CARG1, TMP1 1298 | b ->fff_restv 1299 | 1300 |6: 1301 | cmpwi CARG3, LJ_TUDATA; beq <1 1302 | .gpr64 extsw CARG3, CARG3 1303 | subfc TMP0, TISNUM, CARG3 1304 | subfe TMP2, CARG3, CARG3 1305 | orc TMP1, TMP2, TMP0 1306 | addi TMP1, TMP1, ~LJ_TISNUM+1 1307 | slwi TMP1, TMP1, 2 1308 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) 1309 | lwzx TAB:CARG1, TMP2, TMP1 1310 | b <2 1311 | 1312 |.ffunc_2 setmetatable 1313 | // Fast path: no mt for table yet and not clearing the mt. 1314 | checktab CARG3; bne ->fff_fallback 1315 | lwz TAB:TMP1, TAB:CARG1->metatable 1316 | checktab CARG4; bne ->fff_fallback 1317 | cmplwi TAB:TMP1, 0 1318 | lbz TMP3, TAB:CARG1->marked 1319 | bne ->fff_fallback 1320 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 1321 | stw TAB:CARG2, TAB:CARG1->metatable 1322 | beq ->fff_restv 1323 | barrierback TAB:CARG1, TMP3, TMP0 1324 | b ->fff_restv 1325 | 1326 |.ffunc rawget 1327 | cmplwi NARGS8:RC, 16 1328 | lwz CARG4, 0(BASE) 1329 | lwz TAB:CARG2, 4(BASE) 1330 | blt ->fff_fallback 1331 | checktab CARG4; bne ->fff_fallback 1332 | la CARG3, 8(BASE) 1333 | mr CARG1, L 1334 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1335 | // Returns cTValue *. 1336 | lfd FARG1, 0(CRET1) 1337 | b ->fff_resn 1338 | 1339 |//-- Base library: conversions ------------------------------------------ 1340 | 1341 |.ffunc tonumber 1342 | // Only handles the number case inline (without a base argument). 1343 | cmplwi NARGS8:RC, 8 1344 | lwz CARG1, 0(BASE) 1345 | lfd FARG1, 0(BASE) 1346 | bne ->fff_fallback // Exactly one argument. 1347 | checknum CARG1; bgt ->fff_fallback 1348 | b ->fff_resn 1349 | 1350 |.ffunc_1 tostring 1351 | // Only handles the string or number case inline. 1352 | checkstr CARG3 1353 | // A __tostring method in the string base metatable is ignored. 1354 | beq ->fff_restv // String key? 1355 | // Handle numbers inline, unless a number base metatable is present. 1356 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1357 | checknum CARG3 1358 | cmplwi cr1, TMP0, 0 1359 | stp BASE, L->base // Add frame since C call can throw. 1360 | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq 1361 | stw PC, SAVE_PC // Redundant (but a defined value). 1362 | beq ->fff_fallback 1363 | ffgccheck 1364 | mr CARG1, L 1365 | mr CARG2, BASE 1366 |.if DUALNUM 1367 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1368 |.else 1369 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1370 |.endif 1371 | // Returns GCstr *. 1372 | li CARG3, LJ_TSTR 1373 | b ->fff_restv 1374 | 1375 |//-- Base library: iterators ------------------------------------------- 1376 | 1377 |.ffunc next 1378 | cmplwi NARGS8:RC, 8 1379 | lwz CARG1, 0(BASE) 1380 | lwz TAB:CARG2, 4(BASE) 1381 | blt ->fff_fallback 1382 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1383 | checktab CARG1 1384 | lwz PC, FRAME_PC(BASE) 1385 | bne ->fff_fallback 1386 | stp BASE, L->base // Add frame since C call can throw. 1387 | mr CARG1, L 1388 | stp BASE, L->top // Dummy frame length is ok. 1389 | la CARG3, 8(BASE) 1390 | stw PC, SAVE_PC 1391 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1392 | // Returns 0 at end of traversal. 1393 | cmplwi CRET1, 0 1394 | li CARG3, LJ_TNIL 1395 | beq ->fff_restv // End of traversal: return nil. 1396 | lfd f0, 8(BASE) // Copy key and value to results. 1397 | la RA, -8(BASE) 1398 | lfd f1, 16(BASE) 1399 | stfd f0, 0(RA) 1400 | li RD, (2+1)*8 1401 | stfd f1, 8(RA) 1402 | b ->fff_res 1403 | 1404 |.ffunc_1 pairs 1405 | checktab CARG3 1406 | lwz PC, FRAME_PC(BASE) 1407 | bne ->fff_fallback 1408#if LJ_52 1409 | lwz TAB:TMP2, TAB:CARG1->metatable 1410 | lfd f0, CFUNC:RB->upvalue[0] 1411 | cmplwi TAB:TMP2, 0 1412 | la RA, -8(BASE) 1413 | bne ->fff_fallback 1414#else 1415 | lfd f0, CFUNC:RB->upvalue[0] 1416 | la RA, -8(BASE) 1417#endif 1418 | stw TISNIL, 8(BASE) 1419 | li RD, (3+1)*8 1420 | stfd f0, 0(RA) 1421 | b ->fff_res 1422 | 1423 |.ffunc ipairs_aux 1424 | cmplwi NARGS8:RC, 16 1425 | lwz CARG3, 0(BASE) 1426 | lwz TAB:CARG1, 4(BASE) 1427 | lwz CARG4, 8(BASE) 1428 |.if DUALNUM 1429 | lwz TMP2, 12(BASE) 1430 |.else 1431 | lfd FARG2, 8(BASE) 1432 |.endif 1433 | blt ->fff_fallback 1434 | checktab CARG3 1435 | checknum cr1, CARG4 1436 | lwz PC, FRAME_PC(BASE) 1437 |.if DUALNUM 1438 | bne ->fff_fallback 1439 | bne cr1, ->fff_fallback 1440 |.else 1441 | lus TMP0, 0x3ff0 1442 | stw ZERO, TMPD_LO 1443 | bne ->fff_fallback 1444 | stw TMP0, TMPD_HI 1445 | bge cr1, ->fff_fallback 1446 | lfd FARG1, TMPD 1447 | toint TMP2, FARG2, f0 1448 |.endif 1449 | lwz TMP0, TAB:CARG1->asize 1450 | lwz TMP1, TAB:CARG1->array 1451 |.if not DUALNUM 1452 | fadd FARG2, FARG2, FARG1 1453 |.endif 1454 | addi TMP2, TMP2, 1 1455 | la RA, -8(BASE) 1456 | cmplw TMP0, TMP2 1457 |.if DUALNUM 1458 | stw TISNUM, 0(RA) 1459 | slwi TMP3, TMP2, 3 1460 | stw TMP2, 4(RA) 1461 |.else 1462 | slwi TMP3, TMP2, 3 1463 | stfd FARG2, 0(RA) 1464 |.endif 1465 | ble >2 // Not in array part? 1466 | lwzx TMP2, TMP1, TMP3 1467 | lfdx f0, TMP1, TMP3 1468 |1: 1469 | checknil TMP2 1470 | li RD, (0+1)*8 1471 | beq ->fff_res // End of iteration, return 0 results. 1472 | li RD, (2+1)*8 1473 | stfd f0, 8(RA) 1474 | b ->fff_res 1475 |2: // Check for empty hash part first. Otherwise call C function. 1476 | lwz TMP0, TAB:CARG1->hmask 1477 | cmplwi TMP0, 0 1478 | li RD, (0+1)*8 1479 | beq ->fff_res 1480 | mr CARG2, TMP2 1481 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 1482 | // Returns cTValue * or NULL. 1483 | cmplwi CRET1, 0 1484 | li RD, (0+1)*8 1485 | beq ->fff_res 1486 | lwz TMP2, 0(CRET1) 1487 | lfd f0, 0(CRET1) 1488 | b <1 1489 | 1490 |.ffunc_1 ipairs 1491 | checktab CARG3 1492 | lwz PC, FRAME_PC(BASE) 1493 | bne ->fff_fallback 1494#if LJ_52 1495 | lwz TAB:TMP2, TAB:CARG1->metatable 1496 | lfd f0, CFUNC:RB->upvalue[0] 1497 | cmplwi TAB:TMP2, 0 1498 | la RA, -8(BASE) 1499 | bne ->fff_fallback 1500#else 1501 | lfd f0, CFUNC:RB->upvalue[0] 1502 | la RA, -8(BASE) 1503#endif 1504 |.if DUALNUM 1505 | stw TISNUM, 8(BASE) 1506 |.else 1507 | stw ZERO, 8(BASE) 1508 |.endif 1509 | stw ZERO, 12(BASE) 1510 | li RD, (3+1)*8 1511 | stfd f0, 0(RA) 1512 | b ->fff_res 1513 | 1514 |//-- Base library: catch errors ---------------------------------------- 1515 | 1516 |.ffunc pcall 1517 | cmplwi NARGS8:RC, 8 1518 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 1519 | blt ->fff_fallback 1520 | mr TMP2, BASE 1521 | la BASE, 8(BASE) 1522 | // Remember active hook before pcall. 1523 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 1524 | subi NARGS8:RC, NARGS8:RC, 8 1525 | addi PC, TMP3, 8+FRAME_PCALL 1526 | b ->vm_call_dispatch 1527 | 1528 |.ffunc xpcall 1529 | cmplwi NARGS8:RC, 16 1530 | lwz CARG4, 8(BASE) 1531 | lfd FARG2, 8(BASE) 1532 | lfd FARG1, 0(BASE) 1533 | blt ->fff_fallback 1534 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1535 | mr TMP2, BASE 1536 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1537 | la BASE, 16(BASE) 1538 | // Remember active hook before pcall. 1539 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1540 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1541 | subi NARGS8:RC, NARGS8:RC, 16 1542 | stfd FARG1, 8(TMP2) 1543 | addi PC, TMP1, 16+FRAME_PCALL 1544 | b ->vm_call_dispatch 1545 | 1546 |//-- Coroutine library -------------------------------------------------- 1547 | 1548 |.macro coroutine_resume_wrap, resume 1549 |.if resume 1550 |.ffunc_1 coroutine_resume 1551 | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback 1552 |.else 1553 |.ffunc coroutine_wrap_aux 1554 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr 1555 |.endif 1556 | lbz TMP0, L:CARG1->status 1557 | lp TMP1, L:CARG1->cframe 1558 | lp CARG2, L:CARG1->top 1559 | cmplwi cr0, TMP0, LUA_YIELD 1560 | lp TMP2, L:CARG1->base 1561 | cmplwi cr1, TMP1, 0 1562 | lwz TMP0, L:CARG1->maxstack 1563 | cmplw cr7, CARG2, TMP2 1564 | lwz PC, FRAME_PC(BASE) 1565 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 1566 | add TMP2, CARG2, NARGS8:RC 1567 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD 1568 | cmplw cr1, TMP2, TMP0 1569 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt 1570 | stw PC, SAVE_PC 1571 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov 1572 | stp BASE, L->base 1573 | blt cr6, ->fff_fallback 1574 |1: 1575 |.if resume 1576 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. 1577 | subi NARGS8:RC, NARGS8:RC, 8 1578 | subi TMP2, TMP2, 8 1579 |.endif 1580 | stp TMP2, L:CARG1->top 1581 | li TMP1, 0 1582 | stp BASE, L->top 1583 |2: // Move args to coroutine. 1584 | cmpw TMP1, NARGS8:RC 1585 | lfdx f0, BASE, TMP1 1586 | beq >3 1587 | stfdx f0, CARG2, TMP1 1588 | addi TMP1, TMP1, 8 1589 | b <2 1590 |3: 1591 | li CARG3, 0 1592 | mr L:SAVE0, L:CARG1 1593 | li CARG4, 0 1594 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1595 | // Returns thread status. 1596 |4: 1597 | lp TMP2, L:SAVE0->base 1598 | cmplwi CRET1, LUA_YIELD 1599 | lp TMP3, L:SAVE0->top 1600 | li_vmstate INTERP 1601 | lp BASE, L->base 1602 | st_vmstate 1603 | bgt >8 1604 | sub RD, TMP3, TMP2 1605 | lwz TMP0, L->maxstack 1606 | cmplwi RD, 0 1607 | add TMP1, BASE, RD 1608 | beq >6 // No results? 1609 | cmplw TMP1, TMP0 1610 | li TMP1, 0 1611 | bgt >9 // Need to grow stack? 1612 | 1613 | subi TMP3, RD, 8 1614 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1615 |5: // Move results from coroutine. 1616 | cmplw TMP1, TMP3 1617 | lfdx f0, TMP2, TMP1 1618 | stfdx f0, BASE, TMP1 1619 | addi TMP1, TMP1, 8 1620 | bne <5 1621 |6: 1622 | andix. TMP0, PC, FRAME_TYPE 1623 |.if resume 1624 | li TMP1, LJ_TTRUE 1625 | la RA, -8(BASE) 1626 | stw TMP1, -8(BASE) // Prepend true to results. 1627 | addi RD, RD, 16 1628 |.else 1629 | mr RA, BASE 1630 | addi RD, RD, 8 1631 |.endif 1632 |7: 1633 | stw PC, SAVE_PC 1634 | mr MULTRES, RD 1635 | beq ->BC_RET_Z 1636 | b ->vm_return 1637 | 1638 |8: // Coroutine returned with error (at co->top-1). 1639 |.if resume 1640 | andix. TMP0, PC, FRAME_TYPE 1641 | la TMP3, -8(TMP3) 1642 | li TMP1, LJ_TFALSE 1643 | lfd f0, 0(TMP3) 1644 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1645 | li RD, (2+1)*8 1646 | stw TMP1, -8(BASE) // Prepend false to results. 1647 | la RA, -8(BASE) 1648 | stfd f0, 0(BASE) // Copy error message. 1649 | b <7 1650 |.else 1651 | mr CARG1, L 1652 | mr CARG2, L:SAVE0 1653 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1654 |.endif 1655 | 1656 |9: // Handle stack expansion on return from yield. 1657 | mr CARG1, L 1658 | srwi CARG2, RD, 3 1659 | bl extern lj_state_growstack // (lua_State *L, int n) 1660 | li CRET1, 0 1661 | b <4 1662 |.endmacro 1663 | 1664 | coroutine_resume_wrap 1 // coroutine.resume 1665 | coroutine_resume_wrap 0 // coroutine.wrap 1666 | 1667 |.ffunc coroutine_yield 1668 | lp TMP0, L->cframe 1669 | add TMP1, BASE, NARGS8:RC 1670 | stp BASE, L->base 1671 | andix. TMP0, TMP0, CFRAME_RESUME 1672 | stp TMP1, L->top 1673 | li CRET1, LUA_YIELD 1674 | beq ->fff_fallback 1675 | stp ZERO, L->cframe 1676 | stb CRET1, L->status 1677 | b ->vm_leave_unw 1678 | 1679 |//-- Math library ------------------------------------------------------- 1680 | 1681 |.ffunc_1 math_abs 1682 | checknum CARG3 1683 |.if DUALNUM 1684 | bne >2 1685 | srawi TMP1, CARG1, 31 1686 | xor TMP2, TMP1, CARG1 1687 |.if GPR64 1688 | lus TMP0, 0x8000 1689 | sub CARG1, TMP2, TMP1 1690 | cmplw CARG1, TMP0 1691 | beq >1 1692 |.else 1693 | sub. CARG1, TMP2, TMP1 1694 | blt >1 1695 |.endif 1696 |->fff_resi: 1697 | lwz PC, FRAME_PC(BASE) 1698 | la RA, -8(BASE) 1699 | stw TISNUM, -8(BASE) 1700 | stw CRET1, -4(BASE) 1701 | b ->fff_res1 1702 |1: 1703 | lus CARG3, 0x41e0 // 2^31. 1704 | li CARG1, 0 1705 | b ->fff_restv 1706 |2: 1707 |.endif 1708 | bge ->fff_fallback 1709 | rlwinm CARG3, CARG3, 0, 1, 31 1710 | // Fallthrough. 1711 | 1712 |->fff_restv: 1713 | // CARG3/CARG1 = TValue result. 1714 | lwz PC, FRAME_PC(BASE) 1715 | stw CARG3, -8(BASE) 1716 | la RA, -8(BASE) 1717 | stw CARG1, -4(BASE) 1718 |->fff_res1: 1719 | // RA = results, PC = return. 1720 | li RD, (1+1)*8 1721 |->fff_res: 1722 | // RA = results, RD = (nresults+1)*8, PC = return. 1723 | andix. TMP0, PC, FRAME_TYPE 1724 | mr MULTRES, RD 1725 | bney ->vm_return 1726 | lwz INS, -4(PC) 1727 | decode_RB8 RB, INS 1728 |5: 1729 | cmplw RB, RD // More results expected? 1730 | decode_RA8 TMP0, INS 1731 | bgt >6 1732 | ins_next1 1733 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1734 | sub BASE, RA, TMP0 1735 | ins_next2 1736 | 1737 |6: // Fill up results with nil. 1738 | subi TMP1, RD, 8 1739 | addi RD, RD, 8 1740 | stwx TISNIL, RA, TMP1 1741 | b <5 1742 | 1743 |.macro math_extern, func 1744 | .ffunc_n math_ .. func 1745 | blex func 1746 | b ->fff_resn 1747 |.endmacro 1748 | 1749 |.macro math_extern2, func 1750 | .ffunc_nn math_ .. func 1751 | blex func 1752 | b ->fff_resn 1753 |.endmacro 1754 | 1755 |.macro math_round, func 1756 | .ffunc_1 math_ .. func 1757 | checknum CARG3; beqy ->fff_restv 1758 | rlwinm TMP2, CARG3, 12, 21, 31 1759 | bge ->fff_fallback 1760 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 1761 | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? 1762 | subfic TMP0, TMP2, 31 1763 | blt >3 1764 | slwi TMP1, CARG3, 11 1765 | srwi TMP3, CARG1, 21 1766 | oris TMP1, TMP1, 0x8000 1767 | addi TMP2, TMP2, 1 1768 | or TMP1, TMP1, TMP3 1769 | slwi CARG2, CARG1, 11 1770 | bge cr1, >4 1771 | slw TMP3, TMP1, TMP2 1772 | srw RD, TMP1, TMP0 1773 | or TMP3, TMP3, CARG2 1774 | srawi TMP2, CARG3, 31 1775 |.if "func" == "floor" 1776 | and TMP1, TMP3, TMP2 1777 | addic TMP0, TMP1, -1 1778 | subfe TMP1, TMP0, TMP1 1779 | add CARG1, RD, TMP1 1780 | xor CARG1, CARG1, TMP2 1781 | sub CARG1, CARG1, TMP2 1782 | b ->fff_resi 1783 |.else 1784 | andc TMP1, TMP3, TMP2 1785 | addic TMP0, TMP1, -1 1786 | subfe TMP1, TMP0, TMP1 1787 | add CARG1, RD, TMP1 1788 | cmpw CARG1, RD 1789 | xor CARG1, CARG1, TMP2 1790 | sub CARG1, CARG1, TMP2 1791 | bge ->fff_resi 1792 | // Overflow to 2^31. 1793 | lus CARG3, 0x41e0 // 2^31. 1794 | li CARG1, 0 1795 | b ->fff_restv 1796 |.endif 1797 |3: // |x| < 1 1798 | slwi TMP2, CARG3, 1 1799 | srawi TMP1, CARG3, 31 1800 | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo 1801 |.if "func" == "floor" 1802 | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 1803 | subfic TMP2, TMP1, 0 1804 | subfe CARG1, CARG1, CARG1 1805 |.else 1806 | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 1807 | addic TMP2, TMP1, -1 1808 | subfe CARG1, TMP2, TMP1 1809 |.endif 1810 | b ->fff_resi 1811 |4: // exp >= 31. Check for -(2^31). 1812 | xoris TMP1, TMP1, 0x8000 1813 | srawi TMP2, CARG3, 31 1814 |.if "func" == "floor" 1815 | or TMP1, TMP1, CARG2 1816 |.endif 1817 |.if PPE 1818 | orc TMP1, TMP1, TMP2 1819 | cmpwi TMP1, 0 1820 |.else 1821 | orc. TMP1, TMP1, TMP2 1822 |.endif 1823 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 1824 | lus CARG1, 0x8000 // -(2^31). 1825 | beqy ->fff_resi 1826 |5: 1827 | lfd FARG1, 0(BASE) 1828 | blex func 1829 | b ->fff_resn 1830 |.endmacro 1831 | 1832 |.if DUALNUM 1833 | math_round floor 1834 | math_round ceil 1835 |.else 1836 | // NYI: use internal implementation. 1837 | math_extern floor 1838 | math_extern ceil 1839 |.endif 1840 | 1841 |.if SQRT 1842 |.ffunc_n math_sqrt 1843 | fsqrt FARG1, FARG1 1844 | b ->fff_resn 1845 |.else 1846 | math_extern sqrt 1847 |.endif 1848 | 1849 |.ffunc math_log 1850 | cmplwi NARGS8:RC, 8 1851 | lwz CARG3, 0(BASE) 1852 | lfd FARG1, 0(BASE) 1853 | bne ->fff_fallback // Need exactly 1 argument. 1854 | checknum CARG3; bge ->fff_fallback 1855 | blex log 1856 | b ->fff_resn 1857 | 1858 | math_extern log10 1859 | math_extern exp 1860 | math_extern sin 1861 | math_extern cos 1862 | math_extern tan 1863 | math_extern asin 1864 | math_extern acos 1865 | math_extern atan 1866 | math_extern sinh 1867 | math_extern cosh 1868 | math_extern tanh 1869 | math_extern2 pow 1870 | math_extern2 atan2 1871 | math_extern2 fmod 1872 | 1873 |->ff_math_deg: 1874 |.ffunc_n math_rad 1875 | lfd FARG2, CFUNC:RB->upvalue[0] 1876 | fmul FARG1, FARG1, FARG2 1877 | b ->fff_resn 1878 | 1879 |.if DUALNUM 1880 |.ffunc math_ldexp 1881 | cmplwi NARGS8:RC, 16 1882 | lwz CARG3, 0(BASE) 1883 | lfd FARG1, 0(BASE) 1884 | lwz CARG4, 8(BASE) 1885 |.if GPR64 1886 | lwz CARG2, 12(BASE) 1887 |.else 1888 | lwz CARG1, 12(BASE) 1889 |.endif 1890 | blt ->fff_fallback 1891 | checknum CARG3; bge ->fff_fallback 1892 | checknum CARG4; bne ->fff_fallback 1893 |.else 1894 |.ffunc_nn math_ldexp 1895 |.if GPR64 1896 | toint CARG2, FARG2 1897 |.else 1898 | toint CARG1, FARG2 1899 |.endif 1900 |.endif 1901 | blex ldexp 1902 | b ->fff_resn 1903 | 1904 |.ffunc_n math_frexp 1905 |.if GPR64 1906 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 1907 |.else 1908 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 1909 |.endif 1910 | lwz PC, FRAME_PC(BASE) 1911 | blex frexp 1912 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1913 | la RA, -8(BASE) 1914 |.if not DUALNUM 1915 | tonum_i FARG2, TMP1 1916 |.endif 1917 | stfd FARG1, 0(RA) 1918 | li RD, (2+1)*8 1919 |.if DUALNUM 1920 | stw TISNUM, 8(RA) 1921 | stw TMP1, 12(RA) 1922 |.else 1923 | stfd FARG2, 8(RA) 1924 |.endif 1925 | b ->fff_res 1926 | 1927 |.ffunc_n math_modf 1928 |.if GPR64 1929 | la CARG2, -8(BASE) 1930 |.else 1931 | la CARG1, -8(BASE) 1932 |.endif 1933 | lwz PC, FRAME_PC(BASE) 1934 | blex modf 1935 | la RA, -8(BASE) 1936 | stfd FARG1, 0(BASE) 1937 | li RD, (2+1)*8 1938 | b ->fff_res 1939 | 1940 |.macro math_minmax, name, ismax 1941 |.if DUALNUM 1942 | .ffunc_1 name 1943 | checknum CARG3 1944 | addi TMP1, BASE, 8 1945 | add TMP2, BASE, NARGS8:RC 1946 | bne >4 1947 |1: // Handle integers. 1948 | lwz CARG4, 0(TMP1) 1949 | cmplw cr1, TMP1, TMP2 1950 | lwz CARG2, 4(TMP1) 1951 | bge cr1, ->fff_resi 1952 | checknum CARG4 1953 | xoris TMP0, CARG1, 0x8000 1954 | xoris TMP3, CARG2, 0x8000 1955 | bne >3 1956 | subfc TMP3, TMP3, TMP0 1957 | subfe TMP0, TMP0, TMP0 1958 |.if ismax 1959 | andc TMP3, TMP3, TMP0 1960 |.else 1961 | and TMP3, TMP3, TMP0 1962 |.endif 1963 | add CARG1, TMP3, CARG2 1964 |.if GPR64 1965 | rldicl CARG1, CARG1, 0, 32 1966 |.endif 1967 | addi TMP1, TMP1, 8 1968 | b <1 1969 |3: 1970 | bge ->fff_fallback 1971 | // Convert intermediate result to number and continue below. 1972 | tonum_i FARG1, CARG1 1973 | lfd FARG2, 0(TMP1) 1974 | b >6 1975 |4: 1976 | lfd FARG1, 0(BASE) 1977 | bge ->fff_fallback 1978 |5: // Handle numbers. 1979 | lwz CARG4, 0(TMP1) 1980 | cmplw cr1, TMP1, TMP2 1981 | lfd FARG2, 0(TMP1) 1982 | bge cr1, ->fff_resn 1983 | checknum CARG4; bge >7 1984 |6: 1985 | fsub f0, FARG1, FARG2 1986 | addi TMP1, TMP1, 8 1987 |.if ismax 1988 | fsel FARG1, f0, FARG1, FARG2 1989 |.else 1990 | fsel FARG1, f0, FARG2, FARG1 1991 |.endif 1992 | b <5 1993 |7: // Convert integer to number and continue above. 1994 | lwz CARG2, 4(TMP1) 1995 | bne ->fff_fallback 1996 | tonum_i FARG2, CARG2 1997 | b <6 1998 |.else 1999 | .ffunc_n name 2000 | li TMP1, 8 2001 |1: 2002 | lwzx CARG2, BASE, TMP1 2003 | lfdx FARG2, BASE, TMP1 2004 | cmplw cr1, TMP1, NARGS8:RC 2005 | checknum CARG2 2006 | bge cr1, ->fff_resn 2007 | bge ->fff_fallback 2008 | fsub f0, FARG1, FARG2 2009 | addi TMP1, TMP1, 8 2010 |.if ismax 2011 | fsel FARG1, f0, FARG1, FARG2 2012 |.else 2013 | fsel FARG1, f0, FARG2, FARG1 2014 |.endif 2015 | b <1 2016 |.endif 2017 |.endmacro 2018 | 2019 | math_minmax math_min, 0 2020 | math_minmax math_max, 1 2021 | 2022 |//-- String library ----------------------------------------------------- 2023 | 2024 |.ffunc_1 string_len 2025 | checkstr CARG3; bne ->fff_fallback 2026 | lwz CRET1, STR:CARG1->len 2027 | b ->fff_resi 2028 | 2029 |.ffunc string_byte // Only handle the 1-arg case here. 2030 | cmplwi NARGS8:RC, 8 2031 | lwz CARG3, 0(BASE) 2032 | lwz STR:CARG1, 4(BASE) 2033 | bne ->fff_fallback // Need exactly 1 argument. 2034 | checkstr CARG3 2035 | bne ->fff_fallback 2036 | lwz TMP0, STR:CARG1->len 2037 |.if DUALNUM 2038 | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). 2039 | li RD, (0+1)*8 2040 | lwz PC, FRAME_PC(BASE) 2041 | cmplwi TMP0, 0 2042 | la RA, -8(BASE) 2043 | beqy ->fff_res 2044 | b ->fff_resi 2045 |.else 2046 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 2047 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 2048 | subfe RD, TMP3, TMP0 2049 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. 2050 | addi RD, RD, 1 2051 | lfd f0, TONUM_D 2052 | la RA, -8(BASE) 2053 | lwz PC, FRAME_PC(BASE) 2054 | fsub f0, f0, TOBIT 2055 | slwi RD, RD, 3 2056 | stfd f0, 0(RA) 2057 | b ->fff_res 2058 |.endif 2059 | 2060 |.ffunc string_char // Only handle the 1-arg case here. 2061 | ffgccheck 2062 | cmplwi NARGS8:RC, 8 2063 | lwz CARG3, 0(BASE) 2064 |.if DUALNUM 2065 | lwz TMP0, 4(BASE) 2066 | bne ->fff_fallback // Exactly 1 argument. 2067 | checknum CARG3; bne ->fff_fallback 2068 | la CARG2, 7(BASE) 2069 |.else 2070 | lfd FARG1, 0(BASE) 2071 | bne ->fff_fallback // Exactly 1 argument. 2072 | checknum CARG3; bge ->fff_fallback 2073 | toint TMP0, FARG1 2074 | la CARG2, TMPD_BLO 2075 |.endif 2076 | li CARG3, 1 2077 | cmplwi TMP0, 255; bgt ->fff_fallback 2078 |->fff_newstr: 2079 | mr CARG1, L 2080 | stp BASE, L->base 2081 | stw PC, SAVE_PC 2082 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2083 | // Returns GCstr *. 2084 | lp BASE, L->base 2085 | li CARG3, LJ_TSTR 2086 | b ->fff_restv 2087 | 2088 |.ffunc string_sub 2089 | ffgccheck 2090 | cmplwi NARGS8:RC, 16 2091 | lwz CARG3, 16(BASE) 2092 |.if not DUALNUM 2093 | lfd f0, 16(BASE) 2094 |.endif 2095 | lwz TMP0, 0(BASE) 2096 | lwz STR:CARG1, 4(BASE) 2097 | blt ->fff_fallback 2098 | lwz CARG2, 8(BASE) 2099 |.if DUALNUM 2100 | lwz TMP1, 12(BASE) 2101 |.else 2102 | lfd f1, 8(BASE) 2103 |.endif 2104 | li TMP2, -1 2105 | beq >1 2106 |.if DUALNUM 2107 | checknum CARG3 2108 | lwz TMP2, 20(BASE) 2109 | bne ->fff_fallback 2110 |1: 2111 | checknum CARG2; bne ->fff_fallback 2112 |.else 2113 | checknum CARG3; bge ->fff_fallback 2114 | toint TMP2, f0 2115 |1: 2116 | checknum CARG2; bge ->fff_fallback 2117 |.endif 2118 | checkstr TMP0; bne ->fff_fallback 2119 |.if not DUALNUM 2120 | toint TMP1, f1 2121 |.endif 2122 | lwz TMP0, STR:CARG1->len 2123 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 2124 | addi TMP3, TMP2, 1 2125 | blt >5 2126 |2: 2127 | cmpwi TMP1, 0 // start <= 0? 2128 | add TMP3, TMP1, TMP0 2129 | ble >7 2130 |3: 2131 | sub CARG3, TMP2, TMP1 2132 | addi CARG2, STR:CARG1, #STR-1 2133 | srawi TMP0, CARG3, 31 2134 | addi CARG3, CARG3, 1 2135 | add CARG2, CARG2, TMP1 2136 | andc CARG3, CARG3, TMP0 2137 |.if GPR64 2138 | rldicl CARG2, CARG2, 0, 32 2139 | rldicl CARG3, CARG3, 0, 32 2140 |.endif 2141 | b ->fff_newstr 2142 | 2143 |5: // Negative end or overflow. 2144 | cmpw TMP0, TMP2 // len >= end? (signed compare) 2145 | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. 2146 | bge <2 2147 | mr TMP2, TMP0 // Overflow: end = len. 2148 | b <2 2149 | 2150 |7: // Negative start or underflow. 2151 | .gpr64 extsw TMP1, TMP1 2152 | addic CARG3, TMP1, -1 2153 | subfe CARG3, CARG3, CARG3 2154 | srawi CARG2, TMP3, 31 // Note: modifies carry. 2155 | andc TMP3, TMP3, CARG3 2156 | andc TMP1, TMP3, CARG2 2157 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2158 | b <3 2159 | 2160 |.ffunc string_rep // Only handle the 1-char case inline. 2161 | ffgccheck 2162 | cmplwi NARGS8:RC, 16 2163 | lwz TMP0, 0(BASE) 2164 | lwz STR:CARG1, 4(BASE) 2165 | lwz CARG4, 8(BASE) 2166 |.if DUALNUM 2167 | lwz CARG3, 12(BASE) 2168 |.else 2169 | lfd FARG2, 8(BASE) 2170 |.endif 2171 | bne ->fff_fallback // Exactly 2 arguments. 2172 | checkstr TMP0; bne ->fff_fallback 2173 |.if DUALNUM 2174 | checknum CARG4; bne ->fff_fallback 2175 |.else 2176 | checknum CARG4; bge ->fff_fallback 2177 | toint CARG3, FARG2 2178 |.endif 2179 | lwz TMP0, STR:CARG1->len 2180 | cmpwi CARG3, 0 2181 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2182 | ble >2 // Count <= 0? (or non-int) 2183 | cmplwi TMP0, 1 2184 | subi TMP2, CARG3, 1 2185 | blt >2 // Zero length string? 2186 | cmplw cr1, TMP1, CARG3 2187 | bne ->fff_fallback // Fallback for > 1-char strings. 2188 | lbz TMP0, STR:CARG1[1] 2189 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2190 | blt cr1, ->fff_fallback 2191 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2192 | cmplwi TMP2, 0 2193 | stbx TMP0, CARG2, TMP2 2194 | subi TMP2, TMP2, 1 2195 | bne <1 2196 | b ->fff_newstr 2197 |2: // Return empty string. 2198 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) 2199 | li CARG3, LJ_TSTR 2200 | b ->fff_restv 2201 | 2202 |.ffunc string_reverse 2203 | ffgccheck 2204 | cmplwi NARGS8:RC, 8 2205 | lwz CARG3, 0(BASE) 2206 | lwz STR:CARG1, 4(BASE) 2207 | blt ->fff_fallback 2208 | checkstr CARG3 2209 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2210 | bne ->fff_fallback 2211 | lwz CARG3, STR:CARG1->len 2212 | la CARG1, #STR(STR:CARG1) 2213 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2214 | li TMP2, 0 2215 | cmplw TMP1, CARG3 2216 | subi TMP3, CARG3, 1 2217 | blt ->fff_fallback 2218 |1: // Reverse string copy. 2219 | cmpwi TMP3, 0 2220 | lbzx TMP1, CARG1, TMP2 2221 | blty ->fff_newstr 2222 | stbx TMP1, CARG2, TMP3 2223 | subi TMP3, TMP3, 1 2224 | addi TMP2, TMP2, 1 2225 | b <1 2226 | 2227 |.macro ffstring_case, name, lo 2228 | .ffunc name 2229 | ffgccheck 2230 | cmplwi NARGS8:RC, 8 2231 | lwz CARG3, 0(BASE) 2232 | lwz STR:CARG1, 4(BASE) 2233 | blt ->fff_fallback 2234 | checkstr CARG3 2235 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2236 | bne ->fff_fallback 2237 | lwz CARG3, STR:CARG1->len 2238 | la CARG1, #STR(STR:CARG1) 2239 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2240 | cmplw TMP1, CARG3 2241 | li TMP2, 0 2242 | blt ->fff_fallback 2243 |1: // ASCII case conversion. 2244 | cmplw TMP2, CARG3 2245 | lbzx TMP1, CARG1, TMP2 2246 | bgey ->fff_newstr 2247 | subi TMP0, TMP1, lo 2248 | xori TMP3, TMP1, 0x20 2249 | addic TMP0, TMP0, -26 2250 | subfe TMP3, TMP3, TMP3 2251 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20. 2252 | xor TMP1, TMP1, TMP3 2253 | stbx TMP1, CARG2, TMP2 2254 | addi TMP2, TMP2, 1 2255 | b <1 2256 |.endmacro 2257 | 2258 |ffstring_case string_lower, 65 2259 |ffstring_case string_upper, 97 2260 | 2261 |//-- Table library ------------------------------------------------------ 2262 | 2263 |.ffunc_1 table_getn 2264 | checktab CARG3; bne ->fff_fallback 2265 | bl extern lj_tab_len // (GCtab *t) 2266 | // Returns uint32_t (but less than 2^31). 2267 | b ->fff_resi 2268 | 2269 |//-- Bit library -------------------------------------------------------- 2270 | 2271 |.macro .ffunc_bit, name 2272 |.if DUALNUM 2273 | .ffunc_1 bit_..name 2274 | checknum CARG3; bnel ->fff_tobit_fb 2275 |.else 2276 | .ffunc_n bit_..name 2277 | fadd FARG1, FARG1, TOBIT 2278 | stfd FARG1, TMPD 2279 | lwz CARG1, TMPD_LO 2280 |.endif 2281 |.endmacro 2282 | 2283 |.macro .ffunc_bit_op, name, ins 2284 | .ffunc_bit name 2285 | addi TMP1, BASE, 8 2286 | add TMP2, BASE, NARGS8:RC 2287 |1: 2288 | lwz CARG4, 0(TMP1) 2289 | cmplw cr1, TMP1, TMP2 2290 |.if DUALNUM 2291 | lwz CARG2, 4(TMP1) 2292 |.else 2293 | lfd FARG1, 0(TMP1) 2294 |.endif 2295 | bgey cr1, ->fff_resi 2296 | checknum CARG4 2297 |.if DUALNUM 2298 | bnel ->fff_bitop_fb 2299 |.else 2300 | fadd FARG1, FARG1, TOBIT 2301 | bge ->fff_fallback 2302 | stfd FARG1, TMPD 2303 | lwz CARG2, TMPD_LO 2304 |.endif 2305 | ins CARG1, CARG1, CARG2 2306 | addi TMP1, TMP1, 8 2307 | b <1 2308 |.endmacro 2309 | 2310 |.ffunc_bit_op band, and 2311 |.ffunc_bit_op bor, or 2312 |.ffunc_bit_op bxor, xor 2313 | 2314 |.ffunc_bit bswap 2315 | rotlwi TMP0, CARG1, 8 2316 | rlwimi TMP0, CARG1, 24, 0, 7 2317 | rlwimi TMP0, CARG1, 24, 16, 23 2318 | mr CRET1, TMP0 2319 | b ->fff_resi 2320 | 2321 |.ffunc_bit bnot 2322 | not CRET1, CARG1 2323 | b ->fff_resi 2324 | 2325 |.macro .ffunc_bit_sh, name, ins, shmod 2326 |.if DUALNUM 2327 | .ffunc_2 bit_..name 2328 | checknum CARG3; bnel ->fff_tobit_fb 2329 | // Note: no inline conversion from number for 2nd argument! 2330 | checknum CARG4; bne ->fff_fallback 2331 |.else 2332 | .ffunc_nn bit_..name 2333 | fadd FARG1, FARG1, TOBIT 2334 | fadd FARG2, FARG2, TOBIT 2335 | stfd FARG1, TMPD 2336 | lwz CARG1, TMPD_LO 2337 | stfd FARG2, TMPD 2338 | lwz CARG2, TMPD_LO 2339 |.endif 2340 |.if shmod == 1 2341 | rlwinm CARG2, CARG2, 0, 27, 31 2342 |.elif shmod == 2 2343 | neg CARG2, CARG2 2344 |.endif 2345 | ins CRET1, CARG1, CARG2 2346 | b ->fff_resi 2347 |.endmacro 2348 | 2349 |.ffunc_bit_sh lshift, slw, 1 2350 |.ffunc_bit_sh rshift, srw, 1 2351 |.ffunc_bit_sh arshift, sraw, 1 2352 |.ffunc_bit_sh rol, rotlw, 0 2353 |.ffunc_bit_sh ror, rotlw, 2 2354 | 2355 |.ffunc_bit tobit 2356 |.if DUALNUM 2357 | b ->fff_resi 2358 |.else 2359 |->fff_resi: 2360 | tonum_i FARG1, CRET1 2361 |.endif 2362 |->fff_resn: 2363 | lwz PC, FRAME_PC(BASE) 2364 | la RA, -8(BASE) 2365 | stfd FARG1, -8(BASE) 2366 | b ->fff_res1 2367 | 2368 |// Fallback FP number to bit conversion. 2369 |->fff_tobit_fb: 2370 |.if DUALNUM 2371 | lfd FARG1, 0(BASE) 2372 | bgt ->fff_fallback 2373 | fadd FARG1, FARG1, TOBIT 2374 | stfd FARG1, TMPD 2375 | lwz CARG1, TMPD_LO 2376 | blr 2377 |.endif 2378 |->fff_bitop_fb: 2379 |.if DUALNUM 2380 | lfd FARG1, 0(TMP1) 2381 | bgt ->fff_fallback 2382 | fadd FARG1, FARG1, TOBIT 2383 | stfd FARG1, TMPD 2384 | lwz CARG2, TMPD_LO 2385 | blr 2386 |.endif 2387 | 2388 |//----------------------------------------------------------------------- 2389 | 2390 |->fff_fallback: // Call fast function fallback handler. 2391 | // BASE = new base, RB = CFUNC, RC = nargs*8 2392 | lp TMP3, CFUNC:RB->f 2393 | add TMP1, BASE, NARGS8:RC 2394 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. 2395 | addi TMP0, TMP1, 8*LUA_MINSTACK 2396 | lwz TMP2, L->maxstack 2397 | stw PC, SAVE_PC // Redundant (but a defined value). 2398 | .toc lp TMP3, 0(TMP3) 2399 | cmplw TMP0, TMP2 2400 | stp BASE, L->base 2401 | stp TMP1, L->top 2402 | mr CARG1, L 2403 | bgt >5 // Need to grow stack. 2404 | mtctr TMP3 2405 | bctrl // (lua_State *L) 2406 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2407 | lp BASE, L->base 2408 | cmpwi CRET1, 0 2409 | slwi RD, CRET1, 3 2410 | la RA, -8(BASE) 2411 | bgt ->fff_res // Returned nresults+1? 2412 |1: // Returned 0 or -1: retry fast path. 2413 | lp TMP0, L->top 2414 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2415 | sub NARGS8:RC, TMP0, BASE 2416 | bne ->vm_call_tail // Returned -1? 2417 | ins_callt // Returned 0: retry fast path. 2418 | 2419 |// Reconstruct previous base for vmeta_call during tailcall. 2420 |->vm_call_tail: 2421 | andix. TMP0, PC, FRAME_TYPE 2422 | rlwinm TMP1, PC, 0, 0, 28 2423 | bne >3 2424 | lwz INS, -4(PC) 2425 | decode_RA8 TMP1, INS 2426 | addi TMP1, TMP1, 8 2427 |3: 2428 | sub TMP2, BASE, TMP1 2429 | b ->vm_call_dispatch // Resolve again for tailcall. 2430 | 2431 |5: // Grow stack for fallback handler. 2432 | li CARG2, LUA_MINSTACK 2433 | bl extern lj_state_growstack // (lua_State *L, int n) 2434 | lp BASE, L->base 2435 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. 2436 | b <1 2437 | 2438 |->fff_gcstep: // Call GC step function. 2439 | // BASE = new base, RC = nargs*8 2440 | mflr SAVE0 2441 | stp BASE, L->base 2442 | add TMP0, BASE, NARGS8:RC 2443 | stw PC, SAVE_PC // Redundant (but a defined value). 2444 | stp TMP0, L->top 2445 | mr CARG1, L 2446 | bl extern lj_gc_step // (lua_State *L) 2447 | lp BASE, L->base 2448 | mtlr SAVE0 2449 | lp TMP0, L->top 2450 | sub NARGS8:RC, TMP0, BASE 2451 | lwz CFUNC:RB, FRAME_FUNC(BASE) 2452 | blr 2453 | 2454 |//----------------------------------------------------------------------- 2455 |//-- Special dispatch targets ------------------------------------------- 2456 |//----------------------------------------------------------------------- 2457 | 2458 |->vm_record: // Dispatch target for recording phase. 2459 |.if JIT 2460 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2461 | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. 2462 | bne >5 2463 | // Decrement the hookcount for consistency, but always do the call. 2464 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2465 | andix. TMP0, TMP3, HOOK_ACTIVE 2466 | bne >1 2467 | subi TMP2, TMP2, 1 2468 | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT 2469 | beqy >1 2470 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2471 | b >1 2472 |.endif 2473 | 2474 |->vm_rethook: // Dispatch target for return hooks. 2475 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2476 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2477 | beq >1 2478 |5: // Re-dispatch to static ins. 2479 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. 2480 | lpx TMP0, DISPATCH, TMP1 2481 | mtctr TMP0 2482 | bctr 2483 | 2484 |->vm_inshook: // Dispatch target for instr/line hooks. 2485 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2486 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2487 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2488 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 2489 | bne <5 2490 | 2491 | cmpwi cr1, TMP0, 0 2492 | addic. TMP2, TMP2, -1 2493 | beq cr1, <5 2494 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2495 | beq >1 2496 | bge cr1, <5 2497 |1: 2498 | mr CARG1, L 2499 | stw MULTRES, SAVE_MULTRES 2500 | mr CARG2, PC 2501 | stp BASE, L->base 2502 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2503 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2504 |3: 2505 | lp BASE, L->base 2506 |4: // Re-dispatch to static ins. 2507 | lwz INS, -4(PC) 2508 | decode_OPP TMP1, INS 2509 | decode_RB8 RB, INS 2510 | addi TMP1, TMP1, GG_DISP2STATIC 2511 | decode_RD8 RD, INS 2512 | lpx TMP0, DISPATCH, TMP1 2513 | decode_RA8 RA, INS 2514 | decode_RC8 RC, INS 2515 | mtctr TMP0 2516 | bctr 2517 | 2518 |->cont_hook: // Continue from hook yield. 2519 | addi PC, PC, 4 2520 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. 2521 | b <4 2522 | 2523 |->vm_hotloop: // Hot loop counter underflow. 2524 |.if JIT 2525 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2526 | addi CARG1, DISPATCH, GG_DISP2J 2527 | stw PC, SAVE_PC 2528 | lwz TMP1, LFUNC:TMP1->pc 2529 | mr CARG2, PC 2530 | stw L, DISPATCH_J(L)(DISPATCH) 2531 | lbz TMP1, PC2PROTO(framesize)(TMP1) 2532 | stp BASE, L->base 2533 | slwi TMP1, TMP1, 3 2534 | add TMP1, BASE, TMP1 2535 | stp TMP1, L->top 2536 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2537 | b <3 2538 |.endif 2539 | 2540 |->vm_callhook: // Dispatch target for call hooks. 2541 | mr CARG2, PC 2542 |.if JIT 2543 | b >1 2544 |.endif 2545 | 2546 |->vm_hotcall: // Hot call counter underflow. 2547 |.if JIT 2548 | ori CARG2, PC, 1 2549 |1: 2550 |.endif 2551 | add TMP0, BASE, RC 2552 | stw PC, SAVE_PC 2553 | mr CARG1, L 2554 | stp BASE, L->base 2555 | sub RA, RA, BASE 2556 | stp TMP0, L->top 2557 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2558 | // Returns ASMFunction. 2559 | lp BASE, L->base 2560 | lp TMP0, L->top 2561 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. 2562 | sub NARGS8:RC, TMP0, BASE 2563 | add RA, BASE, RA 2564 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2565 | lwz INS, -4(PC) 2566 | mtctr CRET1 2567 | bctr 2568 | 2569 |//----------------------------------------------------------------------- 2570 |//-- Trace exit handler ------------------------------------------------- 2571 |//----------------------------------------------------------------------- 2572 | 2573 |.macro savex_, a, b, c, d 2574 | stfd f..a, 16+a*8(sp) 2575 | stfd f..b, 16+b*8(sp) 2576 | stfd f..c, 16+c*8(sp) 2577 | stfd f..d, 16+d*8(sp) 2578 |.endmacro 2579 | 2580 |->vm_exit_handler: 2581 |.if JIT 2582 | addi sp, sp, -(16+32*8+32*4) 2583 | stmw r2, 16+32*8+2*4(sp) 2584 | addi DISPATCH, JGL, -GG_DISP2G-32768 2585 | li CARG2, ~LJ_VMST_EXIT 2586 | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. 2587 | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) 2588 | savex_ 0,1,2,3 2589 | stw CARG1, 0(sp) // Store extended stack chain. 2590 | mcrxr cr0 // Clear SO flag. 2591 | savex_ 4,5,6,7 2592 | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. 2593 | savex_ 8,9,10,11 2594 | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. 2595 | savex_ 12,13,14,15 2596 | mflr CARG3 2597 | li TMP1, 0 2598 | savex_ 16,17,18,19 2599 | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. 2600 | savex_ 20,21,22,23 2601 | lhz CARG4, 2(CARG3) // Load trace number. 2602 | savex_ 24,25,26,27 2603 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 2604 | savex_ 28,29,30,31 2605 | sub CARG3, TMP0, CARG3 // Compute exit number. 2606 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2607 | srwi CARG3, CARG3, 2 2608 | stw L, DISPATCH_J(L)(DISPATCH) 2609 | subi CARG3, CARG3, 2 2610 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH) 2611 | stw CARG4, DISPATCH_J(parent)(DISPATCH) 2612 | stp BASE, L->base 2613 | addi CARG1, DISPATCH, GG_DISP2J 2614 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2615 | addi CARG2, sp, 16 2616 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2617 | // Returns MULTRES (unscaled) or negated error code. 2618 | lp TMP1, L->cframe 2619 | lwz TMP2, 0(sp) 2620 | lp BASE, L->base 2621 |.if GPR64 2622 | rldicr sp, TMP1, 0, 61 2623 |.else 2624 | rlwinm sp, TMP1, 0, 0, 29 2625 |.endif 2626 | lwz PC, SAVE_PC // Get SAVE_PC. 2627 | stw TMP2, 0(sp) 2628 | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). 2629 | b >1 2630 |.endif 2631 |->vm_exit_interp: 2632 |.if JIT 2633 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2634 | lwz L, SAVE_L 2635 | addi DISPATCH, JGL, -GG_DISP2G-32768 2636 |1: 2637 | cmpwi CARG1, 0 2638 | blt >3 // Check for error from exit. 2639 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2640 | slwi MULTRES, CARG1, 3 2641 | li TMP2, 0 2642 | stw MULTRES, SAVE_MULTRES 2643 | lwz TMP1, LFUNC:TMP1->pc 2644 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 2645 | lwz KBASE, PC2PROTO(k)(TMP1) 2646 | // Setup type comparison constants. 2647 | li TISNUM, LJ_TISNUM 2648 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2649 | stw TMP3, TMPD 2650 | li ZERO, 0 2651 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2652 | lfs TOBIT, TMPD 2653 | stw TMP3, TMPD 2654 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2655 | li TISNIL, LJ_TNIL 2656 | stw TMP0, TONUM_HI 2657 | lfs TONUM, TMPD 2658 | // Modified copy of ins_next which handles function header dispatch, too. 2659 | lwz INS, 0(PC) 2660 | addi PC, PC, 4 2661 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. 2662 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2663 | decode_OPP TMP1, INS 2664 | decode_RA8 RA, INS 2665 | lpx TMP0, DISPATCH, TMP1 2666 | mtctr TMP0 2667 | cmplwi TMP1, BC_FUNCF*4 // Function header? 2668 | bge >2 2669 | decode_RB8 RB, INS 2670 | decode_RD8 RD, INS 2671 | decode_RC8 RC, INS 2672 | bctr 2673 |2: 2674 | subi RC, MULTRES, 8 2675 | add RA, RA, BASE 2676 | bctr 2677 | 2678 |3: // Rethrow error from the right C frame. 2679 | neg CARG2, CARG1 2680 | mr CARG1, L 2681 | bl extern lj_err_throw // (lua_State *L, int errcode) 2682 |.endif 2683 | 2684 |//----------------------------------------------------------------------- 2685 |//-- Math helper functions ---------------------------------------------- 2686 |//----------------------------------------------------------------------- 2687 | 2688 |// NYI: Use internal implementations of floor, ceil, trunc. 2689 | 2690 |->vm_modi: 2691 | divwo. TMP0, CARG1, CARG2 2692 | bso >1 2693 |.if GPR64 2694 | xor CARG3, CARG1, CARG2 2695 | cmpwi CARG3, 0 2696 |.else 2697 | xor. CARG3, CARG1, CARG2 2698 |.endif 2699 | mullw TMP0, TMP0, CARG2 2700 | sub CARG1, CARG1, TMP0 2701 | bgelr 2702 | cmpwi CARG1, 0; beqlr 2703 | add CARG1, CARG1, CARG2 2704 | blr 2705 |1: 2706 | cmpwi CARG2, 0 2707 | li CARG1, 0 2708 | beqlr 2709 | mcrxr cr0 // Clear SO for -2147483648 % -1 and return 0. 2710 | blr 2711 | 2712 |//----------------------------------------------------------------------- 2713 |//-- Miscellaneous functions -------------------------------------------- 2714 |//----------------------------------------------------------------------- 2715 | 2716 |// void lj_vm_cachesync(void *start, void *end) 2717 |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. 2718 |// This is a good lower bound, except for very ancient PPC models. 2719 |->vm_cachesync: 2720 |.if JIT or FFI 2721 | // Compute start of first cache line and number of cache lines. 2722 | rlwinm CARG1, CARG1, 0, 0, 26 2723 | sub CARG2, CARG2, CARG1 2724 | addi CARG2, CARG2, 31 2725 | rlwinm. CARG2, CARG2, 27, 5, 31 2726 | beqlr 2727 | mtctr CARG2 2728 | mr CARG3, CARG1 2729 |1: // Flush D-Cache. 2730 | dcbst r0, CARG1 2731 | addi CARG1, CARG1, 32 2732 | bdnz <1 2733 | sync 2734 | mtctr CARG2 2735 |1: // Invalidate I-Cache. 2736 | icbi r0, CARG3 2737 | addi CARG3, CARG3, 32 2738 | bdnz <1 2739 | isync 2740 | blr 2741 |.endif 2742 | 2743 |//----------------------------------------------------------------------- 2744 |//-- FFI helper functions ----------------------------------------------- 2745 |//----------------------------------------------------------------------- 2746 | 2747 |// Handler for callback functions. Callback slot number in r11, g in r12. 2748 |->vm_ffi_callback: 2749 |.if FFI 2750 |.type CTSTATE, CTState, PC 2751 | saveregs 2752 | lwz CTSTATE, GL:r12->ctype_state 2753 | addi DISPATCH, r12, GG_G2DISP 2754 | stw r11, CTSTATE->cb.slot 2755 | stw r3, CTSTATE->cb.gpr[0] 2756 | stfd f1, CTSTATE->cb.fpr[0] 2757 | stw r4, CTSTATE->cb.gpr[1] 2758 | stfd f2, CTSTATE->cb.fpr[1] 2759 | stw r5, CTSTATE->cb.gpr[2] 2760 | stfd f3, CTSTATE->cb.fpr[2] 2761 | stw r6, CTSTATE->cb.gpr[3] 2762 | stfd f4, CTSTATE->cb.fpr[3] 2763 | stw r7, CTSTATE->cb.gpr[4] 2764 | stfd f5, CTSTATE->cb.fpr[4] 2765 | stw r8, CTSTATE->cb.gpr[5] 2766 | stfd f6, CTSTATE->cb.fpr[5] 2767 | stw r9, CTSTATE->cb.gpr[6] 2768 | stfd f7, CTSTATE->cb.fpr[6] 2769 | stw r10, CTSTATE->cb.gpr[7] 2770 | stfd f8, CTSTATE->cb.fpr[7] 2771 | addi TMP0, sp, CFRAME_SPACE+8 2772 | stw TMP0, CTSTATE->cb.stack 2773 | mr CARG1, CTSTATE 2774 | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. 2775 | mr CARG2, sp 2776 | bl extern lj_ccallback_enter // (CTState *cts, void *cf) 2777 | // Returns lua_State *. 2778 | lp BASE, L:CRET1->base 2779 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 2780 | lp RC, L:CRET1->top 2781 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2782 | li ZERO, 0 2783 | mr L, CRET1 2784 | stw TMP3, TMPD 2785 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2786 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2787 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2788 | stw TMP0, TONUM_HI 2789 | li TISNIL, LJ_TNIL 2790 | li_vmstate INTERP 2791 | lfs TOBIT, TMPD 2792 | stw TMP3, TMPD 2793 | sub RC, RC, BASE 2794 | st_vmstate 2795 | lfs TONUM, TMPD 2796 | ins_callt 2797 |.endif 2798 | 2799 |->cont_ffi_callback: // Return from FFI callback. 2800 |.if FFI 2801 | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) 2802 | stp BASE, L->base 2803 | stp RB, L->top 2804 | stp L, CTSTATE->L 2805 | mr CARG1, CTSTATE 2806 | mr CARG2, RA 2807 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 2808 | lwz CRET1, CTSTATE->cb.gpr[0] 2809 | lfd FARG1, CTSTATE->cb.fpr[0] 2810 | lwz CRET2, CTSTATE->cb.gpr[1] 2811 | b ->vm_leave_unw 2812 |.endif 2813 | 2814 |->vm_ffi_call: // Call C function via FFI. 2815 | // Caveat: needs special frame unwinding, see below. 2816 |.if FFI 2817 | .type CCSTATE, CCallState, CARG1 2818 | lwz TMP1, CCSTATE->spadj 2819 | mflr TMP0 2820 | lbz CARG2, CCSTATE->nsp 2821 | lbz CARG3, CCSTATE->nfpr 2822 | neg TMP1, TMP1 2823 | stw TMP0, 4(sp) 2824 | cmpwi cr1, CARG3, 0 2825 | mr TMP2, sp 2826 | addic. CARG2, CARG2, -1 2827 | stwux sp, sp, TMP1 2828 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 2829 | stw r14, -4(TMP2) 2830 | stw CCSTATE, -8(TMP2) 2831 | mr r14, TMP2 2832 | la TMP1, CCSTATE->stack 2833 | slwi CARG2, CARG2, 2 2834 | blty >2 2835 | la TMP2, 8(sp) 2836 |1: 2837 | lwzx TMP0, TMP1, CARG2 2838 | stwx TMP0, TMP2, CARG2 2839 | addic. CARG2, CARG2, -4 2840 | bge <1 2841 |2: 2842 | bney cr1, >3 2843 | lfd f1, CCSTATE->fpr[0] 2844 | lfd f2, CCSTATE->fpr[1] 2845 | lfd f3, CCSTATE->fpr[2] 2846 | lfd f4, CCSTATE->fpr[3] 2847 | lfd f5, CCSTATE->fpr[4] 2848 | lfd f6, CCSTATE->fpr[5] 2849 | lfd f7, CCSTATE->fpr[6] 2850 | lfd f8, CCSTATE->fpr[7] 2851 |3: 2852 | lp TMP0, CCSTATE->func 2853 | lwz CARG2, CCSTATE->gpr[1] 2854 | lwz CARG3, CCSTATE->gpr[2] 2855 | lwz CARG4, CCSTATE->gpr[3] 2856 | lwz CARG5, CCSTATE->gpr[4] 2857 | mtctr TMP0 2858 | lwz r8, CCSTATE->gpr[5] 2859 | lwz r9, CCSTATE->gpr[6] 2860 | lwz r10, CCSTATE->gpr[7] 2861 | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2862 | bctrl 2863 | lwz CCSTATE:TMP1, -8(r14) 2864 | lwz TMP2, -4(r14) 2865 | lwz TMP0, 4(r14) 2866 | stw CARG1, CCSTATE:TMP1->gpr[0] 2867 | stfd FARG1, CCSTATE:TMP1->fpr[0] 2868 | stw CARG2, CCSTATE:TMP1->gpr[1] 2869 | mtlr TMP0 2870 | stw CARG3, CCSTATE:TMP1->gpr[2] 2871 | mr sp, r14 2872 | stw CARG4, CCSTATE:TMP1->gpr[3] 2873 | mr r14, TMP2 2874 | blr 2875 |.endif 2876 |// Note: vm_ffi_call must be the last function in this object file! 2877 | 2878 |//----------------------------------------------------------------------- 2879} 2880 2881/* Generate the code for a single instruction. */ 2882static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2883{ 2884 int vk = 0; 2885 |=>defop: 2886 2887 switch (op) { 2888 2889 /* -- Comparison ops ---------------------------------------------------- */ 2890 2891 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2892 2893 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2894 | // RA = src1*8, RD = src2*8, JMP with RD = target 2895 |.if DUALNUM 2896 | lwzux TMP0, RA, BASE 2897 | addi PC, PC, 4 2898 | lwz CARG2, 4(RA) 2899 | lwzux TMP1, RD, BASE 2900 | lwz TMP2, -4(PC) 2901 | checknum cr0, TMP0 2902 | lwz CARG3, 4(RD) 2903 | decode_RD4 TMP2, TMP2 2904 | checknum cr1, TMP1 2905 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2906 | bne cr0, >7 2907 | bne cr1, >8 2908 | cmpw CARG2, CARG3 2909 if (op == BC_ISLT) { 2910 | bge >2 2911 } else if (op == BC_ISGE) { 2912 | blt >2 2913 } else if (op == BC_ISLE) { 2914 | bgt >2 2915 } else { 2916 | ble >2 2917 } 2918 |1: 2919 | add PC, PC, TMP2 2920 |2: 2921 | ins_next 2922 | 2923 |7: // RA is not an integer. 2924 | bgt cr0, ->vmeta_comp 2925 | // RA is a number. 2926 | lfd f0, 0(RA) 2927 | bgt cr1, ->vmeta_comp 2928 | blt cr1, >4 2929 | // RA is a number, RD is an integer. 2930 | tonum_i f1, CARG3 2931 | b >5 2932 | 2933 |8: // RA is an integer, RD is not an integer. 2934 | bgt cr1, ->vmeta_comp 2935 | // RA is an integer, RD is a number. 2936 | tonum_i f0, CARG2 2937 |4: 2938 | lfd f1, 0(RD) 2939 |5: 2940 | fcmpu cr0, f0, f1 2941 if (op == BC_ISLT) { 2942 | bge <2 2943 } else if (op == BC_ISGE) { 2944 | blt <2 2945 } else if (op == BC_ISLE) { 2946 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2947 | bge <2 2948 } else { 2949 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2950 | blt <2 2951 } 2952 | b <1 2953 |.else 2954 | lwzx TMP0, BASE, RA 2955 | addi PC, PC, 4 2956 | lfdx f0, BASE, RA 2957 | lwzx TMP1, BASE, RD 2958 | checknum cr0, TMP0 2959 | lwz TMP2, -4(PC) 2960 | lfdx f1, BASE, RD 2961 | checknum cr1, TMP1 2962 | decode_RD4 TMP2, TMP2 2963 | bge cr0, ->vmeta_comp 2964 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2965 | bge cr1, ->vmeta_comp 2966 | fcmpu cr0, f0, f1 2967 if (op == BC_ISLT) { 2968 | bge >1 2969 } else if (op == BC_ISGE) { 2970 | blt >1 2971 } else if (op == BC_ISLE) { 2972 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2973 | bge >1 2974 } else { 2975 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2976 | blt >1 2977 } 2978 | add PC, PC, TMP2 2979 |1: 2980 | ins_next 2981 |.endif 2982 break; 2983 2984 case BC_ISEQV: case BC_ISNEV: 2985 vk = op == BC_ISEQV; 2986 | // RA = src1*8, RD = src2*8, JMP with RD = target 2987 |.if DUALNUM 2988 | lwzux TMP0, RA, BASE 2989 | addi PC, PC, 4 2990 | lwz CARG2, 4(RA) 2991 | lwzux TMP1, RD, BASE 2992 | checknum cr0, TMP0 2993 | lwz TMP2, -4(PC) 2994 | checknum cr1, TMP1 2995 | decode_RD4 TMP2, TMP2 2996 | lwz CARG3, 4(RD) 2997 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 2998 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2999 if (vk) { 3000 | ble cr7, ->BC_ISEQN_Z 3001 } else { 3002 | ble cr7, ->BC_ISNEN_Z 3003 } 3004 |.else 3005 | lwzux TMP0, RA, BASE 3006 | lwz TMP2, 0(PC) 3007 | lfd f0, 0(RA) 3008 | addi PC, PC, 4 3009 | lwzux TMP1, RD, BASE 3010 | checknum cr0, TMP0 3011 | decode_RD4 TMP2, TMP2 3012 | lfd f1, 0(RD) 3013 | checknum cr1, TMP1 3014 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3015 | bge cr0, >5 3016 | bge cr1, >5 3017 | fcmpu cr0, f0, f1 3018 if (vk) { 3019 | bne >1 3020 | add PC, PC, TMP2 3021 } else { 3022 | beq >1 3023 | add PC, PC, TMP2 3024 } 3025 |1: 3026 | ins_next 3027 |.endif 3028 |5: // Either or both types are not numbers. 3029 |.if not DUALNUM 3030 | lwz CARG2, 4(RA) 3031 | lwz CARG3, 4(RD) 3032 |.endif 3033 |.if FFI 3034 | cmpwi cr7, TMP0, LJ_TCDATA 3035 | cmpwi cr5, TMP1, LJ_TCDATA 3036 |.endif 3037 | not TMP3, TMP0 3038 | cmplw TMP0, TMP1 3039 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3040 |.if FFI 3041 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3042 |.endif 3043 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3044 |.if FFI 3045 | beq cr7, ->vmeta_equal_cd 3046 |.endif 3047 | cmplw cr5, CARG2, CARG3 3048 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3049 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3050 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3051 | mr SAVE0, PC 3052 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3053 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3054 if (vk) { 3055 | bne cr0, >6 3056 | add PC, PC, TMP2 3057 |6: 3058 } else { 3059 | beq cr0, >6 3060 | add PC, PC, TMP2 3061 |6: 3062 } 3063 |.if DUALNUM 3064 | bge cr0, >2 // Done if 1 or 2. 3065 |1: 3066 | ins_next 3067 |2: 3068 |.else 3069 | blt cr0, <1 // Done if 1 or 2. 3070 |.endif 3071 | blt cr6, <1 // Done if not tab/ud. 3072 | 3073 | // Different tables or userdatas. Need to check __eq metamethod. 3074 | // Field metatable must be at same offset for GCtab and GCudata! 3075 | lwz TAB:TMP2, TAB:CARG2->metatable 3076 | li CARG4, 1-vk // ne = 0 or 1. 3077 | cmplwi TAB:TMP2, 0 3078 | beq <1 // No metatable? 3079 | lbz TMP2, TAB:TMP2->nomm 3080 | andix. TMP2, TMP2, 1<<MM_eq 3081 | bne <1 // Or 'no __eq' flag set? 3082 | mr PC, SAVE0 // Restore old PC. 3083 | b ->vmeta_equal // Handle __eq metamethod. 3084 break; 3085 3086 case BC_ISEQS: case BC_ISNES: 3087 vk = op == BC_ISEQS; 3088 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target 3089 | lwzux TMP0, RA, BASE 3090 | srwi RD, RD, 1 3091 | lwz STR:TMP3, 4(RA) 3092 | lwz TMP2, 0(PC) 3093 | subfic RD, RD, -4 3094 | addi PC, PC, 4 3095 |.if FFI 3096 | cmpwi TMP0, LJ_TCDATA 3097 |.endif 3098 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 3099 | .gpr64 extsw TMP0, TMP0 3100 | subfic TMP0, TMP0, LJ_TSTR 3101 |.if FFI 3102 | beq ->vmeta_equal_cd 3103 |.endif 3104 | sub TMP1, STR:TMP1, STR:TMP3 3105 | or TMP0, TMP0, TMP1 3106 | decode_RD4 TMP2, TMP2 3107 | subfic TMP0, TMP0, 0 3108 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3109 | subfe TMP1, TMP1, TMP1 3110 if (vk) { 3111 | andc TMP2, TMP2, TMP1 3112 } else { 3113 | and TMP2, TMP2, TMP1 3114 } 3115 | add PC, PC, TMP2 3116 | ins_next 3117 break; 3118 3119 case BC_ISEQN: case BC_ISNEN: 3120 vk = op == BC_ISEQN; 3121 | // RA = src*8, RD = num_const*8, JMP with RD = target 3122 |.if DUALNUM 3123 | lwzux TMP0, RA, BASE 3124 | addi PC, PC, 4 3125 | lwz CARG2, 4(RA) 3126 | lwzux TMP1, RD, KBASE 3127 | checknum cr0, TMP0 3128 | lwz TMP2, -4(PC) 3129 | checknum cr1, TMP1 3130 | decode_RD4 TMP2, TMP2 3131 | lwz CARG3, 4(RD) 3132 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3133 if (vk) { 3134 |->BC_ISEQN_Z: 3135 } else { 3136 |->BC_ISNEN_Z: 3137 } 3138 | bne cr0, >7 3139 | bne cr1, >8 3140 | cmpw CARG2, CARG3 3141 |4: 3142 |.else 3143 if (vk) { 3144 |->BC_ISEQN_Z: // Dummy label. 3145 } else { 3146 |->BC_ISNEN_Z: // Dummy label. 3147 } 3148 | lwzx TMP0, BASE, RA 3149 | addi PC, PC, 4 3150 | lfdx f0, BASE, RA 3151 | lwz TMP2, -4(PC) 3152 | lfdx f1, KBASE, RD 3153 | decode_RD4 TMP2, TMP2 3154 | checknum TMP0 3155 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3156 | bge >3 3157 | fcmpu cr0, f0, f1 3158 |.endif 3159 if (vk) { 3160 | bne >1 3161 | add PC, PC, TMP2 3162 |1: 3163 |.if not FFI 3164 |3: 3165 |.endif 3166 } else { 3167 | beq >2 3168 |1: 3169 |.if not FFI 3170 |3: 3171 |.endif 3172 | add PC, PC, TMP2 3173 |2: 3174 } 3175 | ins_next 3176 |.if FFI 3177 |3: 3178 | cmpwi TMP0, LJ_TCDATA 3179 | beq ->vmeta_equal_cd 3180 | b <1 3181 |.endif 3182 |.if DUALNUM 3183 |7: // RA is not an integer. 3184 | bge cr0, <3 3185 | // RA is a number. 3186 | lfd f0, 0(RA) 3187 | blt cr1, >1 3188 | // RA is a number, RD is an integer. 3189 | tonum_i f1, CARG3 3190 | b >2 3191 | 3192 |8: // RA is an integer, RD is a number. 3193 | tonum_i f0, CARG2 3194 |1: 3195 | lfd f1, 0(RD) 3196 |2: 3197 | fcmpu cr0, f0, f1 3198 | b <4 3199 |.endif 3200 break; 3201 3202 case BC_ISEQP: case BC_ISNEP: 3203 vk = op == BC_ISEQP; 3204 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target 3205 | lwzx TMP0, BASE, RA 3206 | srwi TMP1, RD, 3 3207 | lwz TMP2, 0(PC) 3208 | not TMP1, TMP1 3209 | addi PC, PC, 4 3210 |.if FFI 3211 | cmpwi TMP0, LJ_TCDATA 3212 |.endif 3213 | sub TMP0, TMP0, TMP1 3214 |.if FFI 3215 | beq ->vmeta_equal_cd 3216 |.endif 3217 | decode_RD4 TMP2, TMP2 3218 | .gpr64 extsw TMP0, TMP0 3219 | addic TMP0, TMP0, -1 3220 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3221 | subfe TMP1, TMP1, TMP1 3222 if (vk) { 3223 | and TMP2, TMP2, TMP1 3224 } else { 3225 | andc TMP2, TMP2, TMP1 3226 } 3227 | add PC, PC, TMP2 3228 | ins_next 3229 break; 3230 3231 /* -- Unary test and copy ops ------------------------------------------- */ 3232 3233 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3234 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target 3235 | lwzx TMP0, BASE, RD 3236 | lwz INS, 0(PC) 3237 | addi PC, PC, 4 3238 if (op == BC_IST || op == BC_ISF) { 3239 | .gpr64 extsw TMP0, TMP0 3240 | subfic TMP0, TMP0, LJ_TTRUE 3241 | decode_RD4 TMP2, INS 3242 | subfe TMP1, TMP1, TMP1 3243 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3244 if (op == BC_IST) { 3245 | andc TMP2, TMP2, TMP1 3246 } else { 3247 | and TMP2, TMP2, TMP1 3248 } 3249 | add PC, PC, TMP2 3250 } else { 3251 | li TMP1, LJ_TFALSE 3252 | lfdx f0, BASE, RD 3253 | cmplw TMP0, TMP1 3254 if (op == BC_ISTC) { 3255 | bge >1 3256 } else { 3257 | blt >1 3258 } 3259 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3260 | decode_RD4 TMP2, INS 3261 | stfdx f0, BASE, RA 3262 | add PC, PC, TMP2 3263 |1: 3264 } 3265 | ins_next 3266 break; 3267 3268 /* -- Unary ops --------------------------------------------------------- */ 3269 3270 case BC_MOV: 3271 | // RA = dst*8, RD = src*8 3272 | ins_next1 3273 | lfdx f0, BASE, RD 3274 | stfdx f0, BASE, RA 3275 | ins_next2 3276 break; 3277 case BC_NOT: 3278 | // RA = dst*8, RD = src*8 3279 | ins_next1 3280 | lwzx TMP0, BASE, RD 3281 | .gpr64 extsw TMP0, TMP0 3282 | subfic TMP1, TMP0, LJ_TTRUE 3283 | adde TMP0, TMP0, TMP1 3284 | stwx TMP0, BASE, RA 3285 | ins_next2 3286 break; 3287 case BC_UNM: 3288 | // RA = dst*8, RD = src*8 3289 | lwzux TMP1, RD, BASE 3290 | lwz TMP0, 4(RD) 3291 | checknum TMP1 3292 |.if DUALNUM 3293 | bne >5 3294 |.if GPR64 3295 | lus TMP2, 0x8000 3296 | neg TMP0, TMP0 3297 | cmplw TMP0, TMP2 3298 | beq >4 3299 |.else 3300 | nego. TMP0, TMP0 3301 | bso >4 3302 |1: 3303 |.endif 3304 | ins_next1 3305 | stwux TISNUM, RA, BASE 3306 | stw TMP0, 4(RA) 3307 |3: 3308 | ins_next2 3309 |4: 3310 |.if not GPR64 3311 | // Potential overflow. 3312 | mcrxr cr0; bley <1 // Ignore unrelated overflow. 3313 |.endif 3314 | lus TMP1, 0x41e0 // 2^31. 3315 | li TMP0, 0 3316 | b >7 3317 |.endif 3318 |5: 3319 | bge ->vmeta_unm 3320 | xoris TMP1, TMP1, 0x8000 3321 |7: 3322 | ins_next1 3323 | stwux TMP1, RA, BASE 3324 | stw TMP0, 4(RA) 3325 |.if DUALNUM 3326 | b <3 3327 |.else 3328 | ins_next2 3329 |.endif 3330 break; 3331 case BC_LEN: 3332 | // RA = dst*8, RD = src*8 3333 | lwzux TMP0, RD, BASE 3334 | lwz CARG1, 4(RD) 3335 | checkstr TMP0; bne >2 3336 | lwz CRET1, STR:CARG1->len 3337 |1: 3338 |.if DUALNUM 3339 | ins_next1 3340 | stwux TISNUM, RA, BASE 3341 | stw CRET1, 4(RA) 3342 |.else 3343 | tonum_u f0, CRET1 // Result is a non-negative integer. 3344 | ins_next1 3345 | stfdx f0, BASE, RA 3346 |.endif 3347 | ins_next2 3348 |2: 3349 | checktab TMP0; bne ->vmeta_len 3350#if LJ_52 3351 | lwz TAB:TMP2, TAB:CARG1->metatable 3352 | cmplwi TAB:TMP2, 0 3353 | bne >9 3354 |3: 3355#endif 3356 |->BC_LEN_Z: 3357 | bl extern lj_tab_len // (GCtab *t) 3358 | // Returns uint32_t (but less than 2^31). 3359 | b <1 3360#if LJ_52 3361 |9: 3362 | lbz TMP0, TAB:TMP2->nomm 3363 | andix. TMP0, TMP0, 1<<MM_len 3364 | bne <3 // 'no __len' flag set: done. 3365 | b ->vmeta_len 3366#endif 3367 break; 3368 3369 /* -- Binary ops -------------------------------------------------------- */ 3370 3371 |.macro ins_arithpre 3372 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3373 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3374 ||switch (vk) { 3375 ||case 0: 3376 | lwzx TMP1, BASE, RB 3377 | .if DUALNUM 3378 | lwzx TMP2, KBASE, RC 3379 | .endif 3380 | lfdx f14, BASE, RB 3381 | lfdx f15, KBASE, RC 3382 | .if DUALNUM 3383 | checknum cr0, TMP1 3384 | checknum cr1, TMP2 3385 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3386 | bge ->vmeta_arith_vn 3387 | .else 3388 | checknum TMP1; bge ->vmeta_arith_vn 3389 | .endif 3390 || break; 3391 ||case 1: 3392 | lwzx TMP1, BASE, RB 3393 | .if DUALNUM 3394 | lwzx TMP2, KBASE, RC 3395 | .endif 3396 | lfdx f15, BASE, RB 3397 | lfdx f14, KBASE, RC 3398 | .if DUALNUM 3399 | checknum cr0, TMP1 3400 | checknum cr1, TMP2 3401 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3402 | bge ->vmeta_arith_nv 3403 | .else 3404 | checknum TMP1; bge ->vmeta_arith_nv 3405 | .endif 3406 || break; 3407 ||default: 3408 | lwzx TMP1, BASE, RB 3409 | lwzx TMP2, BASE, RC 3410 | lfdx f14, BASE, RB 3411 | lfdx f15, BASE, RC 3412 | checknum cr0, TMP1 3413 | checknum cr1, TMP2 3414 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3415 | bge ->vmeta_arith_vv 3416 || break; 3417 ||} 3418 |.endmacro 3419 | 3420 |.macro ins_arithfallback, ins 3421 ||switch (vk) { 3422 ||case 0: 3423 | ins ->vmeta_arith_vn2 3424 || break; 3425 ||case 1: 3426 | ins ->vmeta_arith_nv2 3427 || break; 3428 ||default: 3429 | ins ->vmeta_arith_vv2 3430 || break; 3431 ||} 3432 |.endmacro 3433 | 3434 |.macro intmod, a, b, c 3435 | bl ->vm_modi 3436 |.endmacro 3437 | 3438 |.macro fpmod, a, b, c 3439 |->BC_MODVN_Z: 3440 | fdiv FARG1, b, c 3441 | // NYI: Use internal implementation of floor. 3442 | blex floor // floor(b/c) 3443 | fmul a, FARG1, c 3444 | fsub a, b, a // b - floor(b/c)*c 3445 |.endmacro 3446 | 3447 |.macro ins_arithfp, fpins 3448 | ins_arithpre 3449 |.if "fpins" == "fpmod_" 3450 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3451 |.else 3452 | fpins f0, f14, f15 3453 | ins_next1 3454 | stfdx f0, BASE, RA 3455 | ins_next2 3456 |.endif 3457 |.endmacro 3458 | 3459 |.macro ins_arithdn, intins, fpins 3460 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3461 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3462 ||switch (vk) { 3463 ||case 0: 3464 | lwzux TMP1, RB, BASE 3465 | lwzux TMP2, RC, KBASE 3466 | lwz CARG1, 4(RB) 3467 | checknum cr0, TMP1 3468 | lwz CARG2, 4(RC) 3469 || break; 3470 ||case 1: 3471 | lwzux TMP1, RB, BASE 3472 | lwzux TMP2, RC, KBASE 3473 | lwz CARG2, 4(RB) 3474 | checknum cr0, TMP1 3475 | lwz CARG1, 4(RC) 3476 || break; 3477 ||default: 3478 | lwzux TMP1, RB, BASE 3479 | lwzux TMP2, RC, BASE 3480 | lwz CARG1, 4(RB) 3481 | checknum cr0, TMP1 3482 | lwz CARG2, 4(RC) 3483 || break; 3484 ||} 3485 | checknum cr1, TMP2 3486 | bne >5 3487 | bne cr1, >5 3488 | intins CARG1, CARG1, CARG2 3489 | bso >4 3490 |1: 3491 | ins_next1 3492 | stwux TISNUM, RA, BASE 3493 | stw CARG1, 4(RA) 3494 |2: 3495 | ins_next2 3496 |4: // Overflow. 3497 | mcrxr cr0; bley <1 // Ignore unrelated overflow. 3498 | ins_arithfallback b 3499 |5: // FP variant. 3500 ||if (vk == 1) { 3501 | lfd f15, 0(RB) 3502 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3503 | lfd f14, 0(RC) 3504 ||} else { 3505 | lfd f14, 0(RB) 3506 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3507 | lfd f15, 0(RC) 3508 ||} 3509 | ins_arithfallback bge 3510 |.if "fpins" == "fpmod_" 3511 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3512 |.else 3513 | fpins f0, f14, f15 3514 | ins_next1 3515 | stfdx f0, BASE, RA 3516 | b <2 3517 |.endif 3518 |.endmacro 3519 | 3520 |.macro ins_arith, intins, fpins 3521 |.if DUALNUM 3522 | ins_arithdn intins, fpins 3523 |.else 3524 | ins_arithfp fpins 3525 |.endif 3526 |.endmacro 3527 3528 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3529 |.if GPR64 3530 |.macro addo32., y, a, b 3531 | // Need to check overflow for (a<<32) + (b<<32). 3532 | rldicr TMP0, a, 32, 31 3533 | rldicr TMP3, b, 32, 31 3534 | addo. TMP0, TMP0, TMP3 3535 | add y, a, b 3536 |.endmacro 3537 | ins_arith addo32., fadd 3538 |.else 3539 | ins_arith addo., fadd 3540 |.endif 3541 break; 3542 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3543 |.if GPR64 3544 |.macro subo32., y, a, b 3545 | // Need to check overflow for (a<<32) - (b<<32). 3546 | rldicr TMP0, a, 32, 31 3547 | rldicr TMP3, b, 32, 31 3548 | subo. TMP0, TMP0, TMP3 3549 | sub y, a, b 3550 |.endmacro 3551 | ins_arith subo32., fsub 3552 |.else 3553 | ins_arith subo., fsub 3554 |.endif 3555 break; 3556 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3557 | ins_arith mullwo., fmul 3558 break; 3559 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3560 | ins_arithfp fdiv 3561 break; 3562 case BC_MODVN: 3563 | ins_arith intmod, fpmod 3564 break; 3565 case BC_MODNV: case BC_MODVV: 3566 | ins_arith intmod, fpmod_ 3567 break; 3568 case BC_POW: 3569 | // NYI: (partial) integer arithmetic. 3570 | lwzx TMP1, BASE, RB 3571 | lfdx FARG1, BASE, RB 3572 | lwzx TMP2, BASE, RC 3573 | lfdx FARG2, BASE, RC 3574 | checknum cr0, TMP1 3575 | checknum cr1, TMP2 3576 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3577 | bge ->vmeta_arith_vv 3578 | blex pow 3579 | ins_next1 3580 | stfdx FARG1, BASE, RA 3581 | ins_next2 3582 break; 3583 3584 case BC_CAT: 3585 | // RA = dst*8, RB = src_start*8, RC = src_end*8 3586 | sub CARG3, RC, RB 3587 | stp BASE, L->base 3588 | add CARG2, BASE, RC 3589 | mr SAVE0, RB 3590 |->BC_CAT_Z: 3591 | stw PC, SAVE_PC 3592 | mr CARG1, L 3593 | srwi CARG3, CARG3, 3 3594 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3595 | // Returns NULL (finished) or TValue * (metamethod). 3596 | cmplwi CRET1, 0 3597 | lp BASE, L->base 3598 | bne ->vmeta_binop 3599 | ins_next1 3600 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 3601 | stfdx f0, BASE, RA 3602 | ins_next2 3603 break; 3604 3605 /* -- Constant ops ------------------------------------------------------ */ 3606 3607 case BC_KSTR: 3608 | // RA = dst*8, RD = str_const*8 (~) 3609 | srwi TMP1, RD, 1 3610 | subfic TMP1, TMP1, -4 3611 | ins_next1 3612 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 3613 | li TMP2, LJ_TSTR 3614 | stwux TMP2, RA, BASE 3615 | stw TMP0, 4(RA) 3616 | ins_next2 3617 break; 3618 case BC_KCDATA: 3619 |.if FFI 3620 | // RA = dst*8, RD = cdata_const*8 (~) 3621 | srwi TMP1, RD, 1 3622 | subfic TMP1, TMP1, -4 3623 | ins_next1 3624 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 3625 | li TMP2, LJ_TCDATA 3626 | stwux TMP2, RA, BASE 3627 | stw TMP0, 4(RA) 3628 | ins_next2 3629 |.endif 3630 break; 3631 case BC_KSHORT: 3632 | // RA = dst*8, RD = int16_literal*8 3633 |.if DUALNUM 3634 | slwi RD, RD, 13 3635 | srawi RD, RD, 16 3636 | ins_next1 3637 | stwux TISNUM, RA, BASE 3638 | stw RD, 4(RA) 3639 | ins_next2 3640 |.else 3641 | // The soft-float approach is faster. 3642 | slwi RD, RD, 13 3643 | srawi TMP1, RD, 31 3644 | xor TMP2, TMP1, RD 3645 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3646 | cntlzw TMP3, TMP2 3647 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3648 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3649 | subfic TMP3, RD, 0 3650 | slwi TMP1, TMP1, 20 3651 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) 3652 | subfe TMP0, TMP0, TMP0 3653 | add RD, RD, TMP1 // hi = hi + exponent-1 3654 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi 3655 | ins_next1 3656 | stwux RD, RA, BASE 3657 | stw ZERO, 4(RA) 3658 | ins_next2 3659 |.endif 3660 break; 3661 case BC_KNUM: 3662 | // RA = dst*8, RD = num_const*8 3663 | ins_next1 3664 | lfdx f0, KBASE, RD 3665 | stfdx f0, BASE, RA 3666 | ins_next2 3667 break; 3668 case BC_KPRI: 3669 | // RA = dst*8, RD = primitive_type*8 (~) 3670 | srwi TMP1, RD, 3 3671 | not TMP0, TMP1 3672 | ins_next1 3673 | stwx TMP0, BASE, RA 3674 | ins_next2 3675 break; 3676 case BC_KNIL: 3677 | // RA = base*8, RD = end*8 3678 | stwx TISNIL, BASE, RA 3679 | addi RA, RA, 8 3680 |1: 3681 | stwx TISNIL, BASE, RA 3682 | cmpw RA, RD 3683 | addi RA, RA, 8 3684 | blt <1 3685 | ins_next_ 3686 break; 3687 3688 /* -- Upvalue and function ops ------------------------------------------ */ 3689 3690 case BC_UGET: 3691 | // RA = dst*8, RD = uvnum*8 3692 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3693 | srwi RD, RD, 1 3694 | addi RD, RD, offsetof(GCfuncL, uvptr) 3695 | lwzx UPVAL:RB, LFUNC:RB, RD 3696 | ins_next1 3697 | lwz TMP1, UPVAL:RB->v 3698 | lfd f0, 0(TMP1) 3699 | stfdx f0, BASE, RA 3700 | ins_next2 3701 break; 3702 case BC_USETV: 3703 | // RA = uvnum*8, RD = src*8 3704 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3705 | srwi RA, RA, 1 3706 | addi RA, RA, offsetof(GCfuncL, uvptr) 3707 | lfdux f0, RD, BASE 3708 | lwzx UPVAL:RB, LFUNC:RB, RA 3709 | lbz TMP3, UPVAL:RB->marked 3710 | lwz CARG2, UPVAL:RB->v 3711 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3712 | lbz TMP0, UPVAL:RB->closed 3713 | lwz TMP2, 0(RD) 3714 | stfd f0, 0(CARG2) 3715 | cmplwi cr1, TMP0, 0 3716 | lwz TMP1, 4(RD) 3717 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3718 | subi TMP2, TMP2, (LJ_TNUMX+1) 3719 | bne >2 // Upvalue is closed and black? 3720 |1: 3721 | ins_next 3722 | 3723 |2: // Check if new value is collectable. 3724 | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3725 | bge <1 // tvisgcv(v) 3726 | lbz TMP3, GCOBJ:TMP1->gch.marked 3727 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3728 | la CARG1, GG_DISP2G(DISPATCH) 3729 | // Crossed a write barrier. Move the barrier forward. 3730 | beq <1 3731 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3732 | b <1 3733 break; 3734 case BC_USETS: 3735 | // RA = uvnum*8, RD = str_const*8 (~) 3736 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3737 | srwi TMP1, RD, 1 3738 | srwi RA, RA, 1 3739 | subfic TMP1, TMP1, -4 3740 | addi RA, RA, offsetof(GCfuncL, uvptr) 3741 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 3742 | lwzx UPVAL:RB, LFUNC:RB, RA 3743 | lbz TMP3, UPVAL:RB->marked 3744 | lwz CARG2, UPVAL:RB->v 3745 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3746 | lbz TMP3, STR:TMP1->marked 3747 | lbz TMP2, UPVAL:RB->closed 3748 | li TMP0, LJ_TSTR 3749 | stw STR:TMP1, 4(CARG2) 3750 | stw TMP0, 0(CARG2) 3751 | bne >2 3752 |1: 3753 | ins_next 3754 | 3755 |2: // Check if string is white and ensure upvalue is closed. 3756 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) 3757 | cmplwi cr1, TMP2, 0 3758 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3759 | la CARG1, GG_DISP2G(DISPATCH) 3760 | // Crossed a write barrier. Move the barrier forward. 3761 | beq <1 3762 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3763 | b <1 3764 break; 3765 case BC_USETN: 3766 | // RA = uvnum*8, RD = num_const*8 3767 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3768 | srwi RA, RA, 1 3769 | addi RA, RA, offsetof(GCfuncL, uvptr) 3770 | lfdx f0, KBASE, RD 3771 | lwzx UPVAL:RB, LFUNC:RB, RA 3772 | ins_next1 3773 | lwz TMP1, UPVAL:RB->v 3774 | stfd f0, 0(TMP1) 3775 | ins_next2 3776 break; 3777 case BC_USETP: 3778 | // RA = uvnum*8, RD = primitive_type*8 (~) 3779 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3780 | srwi RA, RA, 1 3781 | srwi TMP0, RD, 3 3782 | addi RA, RA, offsetof(GCfuncL, uvptr) 3783 | not TMP0, TMP0 3784 | lwzx UPVAL:RB, LFUNC:RB, RA 3785 | ins_next1 3786 | lwz TMP1, UPVAL:RB->v 3787 | stw TMP0, 0(TMP1) 3788 | ins_next2 3789 break; 3790 3791 case BC_UCLO: 3792 | // RA = level*8, RD = target 3793 | lwz TMP1, L->openupval 3794 | branch_RD // Do this first since RD is not saved. 3795 | stp BASE, L->base 3796 | cmplwi TMP1, 0 3797 | mr CARG1, L 3798 | beq >1 3799 | add CARG2, BASE, RA 3800 | bl extern lj_func_closeuv // (lua_State *L, TValue *level) 3801 | lp BASE, L->base 3802 |1: 3803 | ins_next 3804 break; 3805 3806 case BC_FNEW: 3807 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) 3808 | srwi TMP1, RD, 1 3809 | stp BASE, L->base 3810 | subfic TMP1, TMP1, -4 3811 | stw PC, SAVE_PC 3812 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3813 | mr CARG1, L 3814 | lwz CARG3, FRAME_FUNC(BASE) 3815 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3816 | bl extern lj_func_newL_gc 3817 | // Returns GCfuncL *. 3818 | lp BASE, L->base 3819 | li TMP0, LJ_TFUNC 3820 | stwux TMP0, RA, BASE 3821 | stw LFUNC:CRET1, 4(RA) 3822 | ins_next 3823 break; 3824 3825 /* -- Table ops --------------------------------------------------------- */ 3826 3827 case BC_TNEW: 3828 case BC_TDUP: 3829 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) 3830 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 3831 | mr CARG1, L 3832 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 3833 | stp BASE, L->base 3834 | cmplw TMP0, TMP1 3835 | stw PC, SAVE_PC 3836 | bge >5 3837 |1: 3838 if (op == BC_TNEW) { 3839 | rlwinm CARG2, RD, 29, 21, 31 3840 | rlwinm CARG3, RD, 18, 27, 31 3841 | cmpwi CARG2, 0x7ff; beq >3 3842 |2: 3843 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3844 | // Returns Table *. 3845 } else { 3846 | srwi TMP1, RD, 1 3847 | subfic TMP1, TMP1, -4 3848 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3849 | bl extern lj_tab_dup // (lua_State *L, Table *kt) 3850 | // Returns Table *. 3851 } 3852 | lp BASE, L->base 3853 | li TMP0, LJ_TTAB 3854 | stwux TMP0, RA, BASE 3855 | stw TAB:CRET1, 4(RA) 3856 | ins_next 3857 if (op == BC_TNEW) { 3858 |3: 3859 | li CARG2, 0x801 3860 | b <2 3861 } 3862 |5: 3863 | mr SAVE0, RD 3864 | bl extern lj_gc_step_fixtop // (lua_State *L) 3865 | mr RD, SAVE0 3866 | mr CARG1, L 3867 | b <1 3868 break; 3869 3870 case BC_GGET: 3871 | // RA = dst*8, RD = str_const*8 (~) 3872 case BC_GSET: 3873 | // RA = src*8, RD = str_const*8 (~) 3874 | lwz LFUNC:TMP2, FRAME_FUNC(BASE) 3875 | srwi TMP1, RD, 1 3876 | lwz TAB:RB, LFUNC:TMP2->env 3877 | subfic TMP1, TMP1, -4 3878 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3879 if (op == BC_GGET) { 3880 | b ->BC_TGETS_Z 3881 } else { 3882 | b ->BC_TSETS_Z 3883 } 3884 break; 3885 3886 case BC_TGETV: 3887 | // RA = dst*8, RB = table*8, RC = key*8 3888 | lwzux CARG1, RB, BASE 3889 | lwzux CARG2, RC, BASE 3890 | lwz TAB:RB, 4(RB) 3891 |.if DUALNUM 3892 | lwz RC, 4(RC) 3893 |.else 3894 | lfd f0, 0(RC) 3895 |.endif 3896 | checktab CARG1 3897 | checknum cr1, CARG2 3898 | bne ->vmeta_tgetv 3899 |.if DUALNUM 3900 | lwz TMP0, TAB:RB->asize 3901 | bne cr1, >5 3902 | lwz TMP1, TAB:RB->array 3903 | cmplw TMP0, RC 3904 | slwi TMP2, RC, 3 3905 |.else 3906 | bge cr1, >5 3907 | // Convert number key to integer, check for integerness and range. 3908 | fctiwz f1, f0 3909 | fadd f2, f0, TOBIT 3910 | stfd f1, TMPD 3911 | lwz TMP0, TAB:RB->asize 3912 | fsub f2, f2, TOBIT 3913 | lwz TMP2, TMPD_LO 3914 | lwz TMP1, TAB:RB->array 3915 | fcmpu cr1, f0, f2 3916 | cmplw cr0, TMP0, TMP2 3917 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3918 | slwi TMP2, TMP2, 3 3919 |.endif 3920 | ble ->vmeta_tgetv // Integer key and in array part? 3921 | lwzx TMP0, TMP1, TMP2 3922 | lfdx f14, TMP1, TMP2 3923 | checknil TMP0; beq >2 3924 |1: 3925 | ins_next1 3926 | stfdx f14, BASE, RA 3927 | ins_next2 3928 | 3929 |2: // Check for __index if table value is nil. 3930 | lwz TAB:TMP2, TAB:RB->metatable 3931 | cmplwi TAB:TMP2, 0 3932 | beq <1 // No metatable: done. 3933 | lbz TMP0, TAB:TMP2->nomm 3934 | andix. TMP0, TMP0, 1<<MM_index 3935 | bne <1 // 'no __index' flag set: done. 3936 | b ->vmeta_tgetv 3937 | 3938 |5: 3939 | checkstr CARG2; bne ->vmeta_tgetv 3940 |.if not DUALNUM 3941 | lwz STR:RC, 4(RC) 3942 |.endif 3943 | b ->BC_TGETS_Z // String key? 3944 break; 3945 case BC_TGETS: 3946 | // RA = dst*8, RB = table*8, RC = str_const*8 (~) 3947 | lwzux CARG1, RB, BASE 3948 | srwi TMP1, RC, 1 3949 | lwz TAB:RB, 4(RB) 3950 | subfic TMP1, TMP1, -4 3951 | checktab CARG1 3952 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3953 | bne ->vmeta_tgets1 3954 |->BC_TGETS_Z: 3955 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 3956 | lwz TMP0, TAB:RB->hmask 3957 | lwz TMP1, STR:RC->hash 3958 | lwz NODE:TMP2, TAB:RB->node 3959 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 3960 | slwi TMP0, TMP1, 5 3961 | slwi TMP1, TMP1, 3 3962 | sub TMP1, TMP0, TMP1 3963 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 3964 |1: 3965 | lwz CARG1, NODE:TMP2->key 3966 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 3967 | lwz CARG2, NODE:TMP2->val 3968 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 3969 | checkstr CARG1; bne >4 3970 | cmpw TMP0, STR:RC; bne >4 3971 | checknil CARG2; beq >5 // Key found, but nil value? 3972 |3: 3973 | stwux CARG2, RA, BASE 3974 | stw TMP1, 4(RA) 3975 | ins_next 3976 | 3977 |4: // Follow hash chain. 3978 | lwz NODE:TMP2, NODE:TMP2->next 3979 | cmplwi NODE:TMP2, 0 3980 | bne <1 3981 | // End of hash chain: key not found, nil result. 3982 | li CARG2, LJ_TNIL 3983 | 3984 |5: // Check for __index if table value is nil. 3985 | lwz TAB:TMP2, TAB:RB->metatable 3986 | cmplwi TAB:TMP2, 0 3987 | beq <3 // No metatable: done. 3988 | lbz TMP0, TAB:TMP2->nomm 3989 | andix. TMP0, TMP0, 1<<MM_index 3990 | bne <3 // 'no __index' flag set: done. 3991 | b ->vmeta_tgets 3992 break; 3993 case BC_TGETB: 3994 | // RA = dst*8, RB = table*8, RC = index*8 3995 | lwzux CARG1, RB, BASE 3996 | srwi TMP0, RC, 3 3997 | lwz TAB:RB, 4(RB) 3998 | checktab CARG1; bne ->vmeta_tgetb 3999 | lwz TMP1, TAB:RB->asize 4000 | lwz TMP2, TAB:RB->array 4001 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4002 | lwzx TMP1, TMP2, RC 4003 | lfdx f0, TMP2, RC 4004 | checknil TMP1; beq >5 4005 |1: 4006 | ins_next1 4007 | stfdx f0, BASE, RA 4008 | ins_next2 4009 | 4010 |5: // Check for __index if table value is nil. 4011 | lwz TAB:TMP2, TAB:RB->metatable 4012 | cmplwi TAB:TMP2, 0 4013 | beq <1 // No metatable: done. 4014 | lbz TMP2, TAB:TMP2->nomm 4015 | andix. TMP2, TMP2, 1<<MM_index 4016 | bne <1 // 'no __index' flag set: done. 4017 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4018 break; 4019 4020 case BC_TSETV: 4021 | // RA = src*8, RB = table*8, RC = key*8 4022 | lwzux CARG1, RB, BASE 4023 | lwzux CARG2, RC, BASE 4024 | lwz TAB:RB, 4(RB) 4025 |.if DUALNUM 4026 | lwz RC, 4(RC) 4027 |.else 4028 | lfd f0, 0(RC) 4029 |.endif 4030 | checktab CARG1 4031 | checknum cr1, CARG2 4032 | bne ->vmeta_tsetv 4033 |.if DUALNUM 4034 | lwz TMP0, TAB:RB->asize 4035 | bne cr1, >5 4036 | lwz TMP1, TAB:RB->array 4037 | cmplw TMP0, RC 4038 | slwi TMP0, RC, 3 4039 |.else 4040 | bge cr1, >5 4041 | // Convert number key to integer, check for integerness and range. 4042 | fctiwz f1, f0 4043 | fadd f2, f0, TOBIT 4044 | stfd f1, TMPD 4045 | lwz TMP0, TAB:RB->asize 4046 | fsub f2, f2, TOBIT 4047 | lwz TMP2, TMPD_LO 4048 | lwz TMP1, TAB:RB->array 4049 | fcmpu cr1, f0, f2 4050 | cmplw cr0, TMP0, TMP2 4051 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 4052 | slwi TMP0, TMP2, 3 4053 |.endif 4054 | ble ->vmeta_tsetv // Integer key and in array part? 4055 | lwzx TMP2, TMP1, TMP0 4056 | lbz TMP3, TAB:RB->marked 4057 | lfdx f14, BASE, RA 4058 | checknil TMP2; beq >3 4059 |1: 4060 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4061 | stfdx f14, TMP1, TMP0 4062 | bne >7 4063 |2: 4064 | ins_next 4065 | 4066 |3: // Check for __newindex if previous value is nil. 4067 | lwz TAB:TMP2, TAB:RB->metatable 4068 | cmplwi TAB:TMP2, 0 4069 | beq <1 // No metatable: done. 4070 | lbz TMP2, TAB:TMP2->nomm 4071 | andix. TMP2, TMP2, 1<<MM_newindex 4072 | bne <1 // 'no __newindex' flag set: done. 4073 | b ->vmeta_tsetv 4074 | 4075 |5: 4076 | checkstr CARG2; bne ->vmeta_tsetv 4077 |.if not DUALNUM 4078 | lwz STR:RC, 4(RC) 4079 |.endif 4080 | b ->BC_TSETS_Z // String key? 4081 | 4082 |7: // Possible table write barrier for the value. Skip valiswhite check. 4083 | barrierback TAB:RB, TMP3, TMP0 4084 | b <2 4085 break; 4086 case BC_TSETS: 4087 | // RA = src*8, RB = table*8, RC = str_const*8 (~) 4088 | lwzux CARG1, RB, BASE 4089 | srwi TMP1, RC, 1 4090 | lwz TAB:RB, 4(RB) 4091 | subfic TMP1, TMP1, -4 4092 | checktab CARG1 4093 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 4094 | bne ->vmeta_tsets1 4095 |->BC_TSETS_Z: 4096 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4097 | lwz TMP0, TAB:RB->hmask 4098 | lwz TMP1, STR:RC->hash 4099 | lwz NODE:TMP2, TAB:RB->node 4100 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4101 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4102 | lfdx f14, BASE, RA 4103 | slwi TMP0, TMP1, 5 4104 | slwi TMP1, TMP1, 3 4105 | sub TMP1, TMP0, TMP1 4106 | lbz TMP3, TAB:RB->marked 4107 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4108 |1: 4109 | lwz CARG1, NODE:TMP2->key 4110 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4111 | lwz CARG2, NODE:TMP2->val 4112 | lwz NODE:TMP1, NODE:TMP2->next 4113 | checkstr CARG1; bne >5 4114 | cmpw TMP0, STR:RC; bne >5 4115 | checknil CARG2; beq >4 // Key found, but nil value? 4116 |2: 4117 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4118 | stfd f14, NODE:TMP2->val 4119 | bne >7 4120 |3: 4121 | ins_next 4122 | 4123 |4: // Check for __newindex if previous value is nil. 4124 | lwz TAB:TMP1, TAB:RB->metatable 4125 | cmplwi TAB:TMP1, 0 4126 | beq <2 // No metatable: done. 4127 | lbz TMP0, TAB:TMP1->nomm 4128 | andix. TMP0, TMP0, 1<<MM_newindex 4129 | bne <2 // 'no __newindex' flag set: done. 4130 | b ->vmeta_tsets 4131 | 4132 |5: // Follow hash chain. 4133 | cmplwi NODE:TMP1, 0 4134 | mr NODE:TMP2, NODE:TMP1 4135 | bne <1 4136 | // End of hash chain: key not found, add a new one. 4137 | 4138 | // But check for __newindex first. 4139 | lwz TAB:TMP1, TAB:RB->metatable 4140 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 4141 | stw PC, SAVE_PC 4142 | mr CARG1, L 4143 | cmplwi TAB:TMP1, 0 4144 | stp BASE, L->base 4145 | beq >6 // No metatable: continue. 4146 | lbz TMP0, TAB:TMP1->nomm 4147 | andix. TMP0, TMP0, 1<<MM_newindex 4148 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4149 |6: 4150 | li TMP0, LJ_TSTR 4151 | stw STR:RC, 4(CARG3) 4152 | mr CARG2, TAB:RB 4153 | stw TMP0, 0(CARG3) 4154 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4155 | // Returns TValue *. 4156 | lp BASE, L->base 4157 | stfd f14, 0(CRET1) 4158 | b <3 // No 2nd write barrier needed. 4159 | 4160 |7: // Possible table write barrier for the value. Skip valiswhite check. 4161 | barrierback TAB:RB, TMP3, TMP0 4162 | b <3 4163 break; 4164 case BC_TSETB: 4165 | // RA = src*8, RB = table*8, RC = index*8 4166 | lwzux CARG1, RB, BASE 4167 | srwi TMP0, RC, 3 4168 | lwz TAB:RB, 4(RB) 4169 | checktab CARG1; bne ->vmeta_tsetb 4170 | lwz TMP1, TAB:RB->asize 4171 | lwz TMP2, TAB:RB->array 4172 | lbz TMP3, TAB:RB->marked 4173 | cmplw TMP0, TMP1 4174 | lfdx f14, BASE, RA 4175 | bge ->vmeta_tsetb 4176 | lwzx TMP1, TMP2, RC 4177 | checknil TMP1; beq >5 4178 |1: 4179 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4180 | stfdx f14, TMP2, RC 4181 | bne >7 4182 |2: 4183 | ins_next 4184 | 4185 |5: // Check for __newindex if previous value is nil. 4186 | lwz TAB:TMP1, TAB:RB->metatable 4187 | cmplwi TAB:TMP1, 0 4188 | beq <1 // No metatable: done. 4189 | lbz TMP1, TAB:TMP1->nomm 4190 | andix. TMP1, TMP1, 1<<MM_newindex 4191 | bne <1 // 'no __newindex' flag set: done. 4192 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4193 | 4194 |7: // Possible table write barrier for the value. Skip valiswhite check. 4195 | barrierback TAB:RB, TMP3, TMP0 4196 | b <2 4197 break; 4198 4199 case BC_TSETM: 4200 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4201 | add RA, BASE, RA 4202 |1: 4203 | add TMP3, KBASE, RD 4204 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. 4205 | addic. TMP0, MULTRES, -8 4206 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. 4207 | srwi CARG3, TMP0, 3 4208 | beq >4 // Nothing to copy? 4209 | add CARG3, CARG3, TMP3 4210 | lwz TMP2, TAB:CARG2->asize 4211 | slwi TMP1, TMP3, 3 4212 | lbz TMP3, TAB:CARG2->marked 4213 | cmplw CARG3, TMP2 4214 | add TMP2, RA, TMP0 4215 | lwz TMP0, TAB:CARG2->array 4216 | bgt >5 4217 | add TMP1, TMP1, TMP0 4218 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4219 |3: // Copy result slots to table. 4220 | lfd f0, 0(RA) 4221 | addi RA, RA, 8 4222 | cmpw cr1, RA, TMP2 4223 | stfd f0, 0(TMP1) 4224 | addi TMP1, TMP1, 8 4225 | blt cr1, <3 4226 | bne >7 4227 |4: 4228 | ins_next 4229 | 4230 |5: // Need to resize array part. 4231 | stp BASE, L->base 4232 | mr CARG1, L 4233 | stw PC, SAVE_PC 4234 | mr SAVE0, RD 4235 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4236 | // Must not reallocate the stack. 4237 | mr RD, SAVE0 4238 | b <1 4239 | 4240 |7: // Possible table write barrier for any value. Skip valiswhite check. 4241 | barrierback TAB:CARG2, TMP3, TMP0 4242 | b <4 4243 break; 4244 4245 /* -- Calls and vararg handling ----------------------------------------- */ 4246 4247 case BC_CALLM: 4248 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 4249 | add NARGS8:RC, NARGS8:RC, MULTRES 4250 | // Fall through. Assumes BC_CALL follows. 4251 break; 4252 case BC_CALL: 4253 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 4254 | mr TMP2, BASE 4255 | lwzux TMP0, BASE, RA 4256 | lwz LFUNC:RB, 4(BASE) 4257 | subi NARGS8:RC, NARGS8:RC, 8 4258 | addi BASE, BASE, 8 4259 | checkfunc TMP0; bne ->vmeta_call 4260 | ins_call 4261 break; 4262 4263 case BC_CALLMT: 4264 | // RA = base*8, (RB = 0,) RC = extra_nargs*8 4265 | add NARGS8:RC, NARGS8:RC, MULTRES 4266 | // Fall through. Assumes BC_CALLT follows. 4267 break; 4268 case BC_CALLT: 4269 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 4270 | lwzux TMP0, RA, BASE 4271 | lwz LFUNC:RB, 4(RA) 4272 | subi NARGS8:RC, NARGS8:RC, 8 4273 | lwz TMP1, FRAME_PC(BASE) 4274 | checkfunc TMP0 4275 | addi RA, RA, 8 4276 | bne ->vmeta_callt 4277 |->BC_CALLT_Z: 4278 | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. 4279 | lbz TMP3, LFUNC:RB->ffid 4280 | xori TMP2, TMP1, FRAME_VARG 4281 | cmplwi cr1, NARGS8:RC, 0 4282 | bne >7 4283 |1: 4284 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. 4285 | li TMP2, 0 4286 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? 4287 | beq cr1, >3 4288 |2: 4289 | addi TMP3, TMP2, 8 4290 | lfdx f0, RA, TMP2 4291 | cmplw cr1, TMP3, NARGS8:RC 4292 | stfdx f0, BASE, TMP2 4293 | mr TMP2, TMP3 4294 | bne cr1, <2 4295 |3: 4296 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt 4297 | beq >5 4298 |4: 4299 | ins_callt 4300 | 4301 |5: // Tailcall to a fast function with a Lua frame below. 4302 | lwz INS, -4(TMP1) 4303 | decode_RA8 RA, INS 4304 | sub TMP1, BASE, RA 4305 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) 4306 | lwz TMP1, LFUNC:TMP1->pc 4307 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. 4308 | b <4 4309 | 4310 |7: // Tailcall from a vararg function. 4311 | andix. TMP0, TMP2, FRAME_TYPEP 4312 | bne <1 // Vararg frame below? 4313 | sub BASE, BASE, TMP2 // Relocate BASE down. 4314 | lwz TMP1, FRAME_PC(BASE) 4315 | andix. TMP0, TMP1, FRAME_TYPE 4316 | b <1 4317 break; 4318 4319 case BC_ITERC: 4320 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) 4321 | mr TMP2, BASE 4322 | add BASE, BASE, RA 4323 | lwz TMP1, -24(BASE) 4324 | lwz LFUNC:RB, -20(BASE) 4325 | lfd f1, -8(BASE) 4326 | lfd f0, -16(BASE) 4327 | stw TMP1, 0(BASE) // Copy callable. 4328 | stw LFUNC:RB, 4(BASE) 4329 | checkfunc TMP1 4330 | stfd f1, 16(BASE) // Copy control var. 4331 | li NARGS8:RC, 16 // Iterators get 2 arguments. 4332 | stfdu f0, 8(BASE) // Copy state. 4333 | bne ->vmeta_call 4334 | ins_call 4335 break; 4336 4337 case BC_ITERN: 4338 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4339 |.if JIT 4340 | // NYI: add hotloop, record BC_ITERN. 4341 |.endif 4342 | add RA, BASE, RA 4343 | lwz TAB:RB, -12(RA) 4344 | lwz RC, -4(RA) // Get index from control var. 4345 | lwz TMP0, TAB:RB->asize 4346 | lwz TMP1, TAB:RB->array 4347 | addi PC, PC, 4 4348 |1: // Traverse array part. 4349 | cmplw RC, TMP0 4350 | slwi TMP3, RC, 3 4351 | bge >5 // Index points after array part? 4352 | lwzx TMP2, TMP1, TMP3 4353 | lfdx f0, TMP1, TMP3 4354 | checknil TMP2 4355 | lwz INS, -4(PC) 4356 | beq >4 4357 |.if DUALNUM 4358 | stw RC, 4(RA) 4359 | stw TISNUM, 0(RA) 4360 |.else 4361 | tonum_u f1, RC 4362 |.endif 4363 | addi RC, RC, 1 4364 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 4365 | stfd f0, 8(RA) 4366 | decode_RD4 TMP1, INS 4367 | stw RC, -4(RA) // Update control var. 4368 | add PC, TMP1, TMP3 4369 |.if not DUALNUM 4370 | stfd f1, 0(RA) 4371 |.endif 4372 |3: 4373 | ins_next 4374 | 4375 |4: // Skip holes in array part. 4376 | addi RC, RC, 1 4377 | b <1 4378 | 4379 |5: // Traverse hash part. 4380 | lwz TMP1, TAB:RB->hmask 4381 | sub RC, RC, TMP0 4382 | lwz TMP2, TAB:RB->node 4383 |6: 4384 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. 4385 | slwi TMP3, RC, 5 4386 | bgty <3 4387 | slwi RB, RC, 3 4388 | sub TMP3, TMP3, RB 4389 | lwzx RB, TMP2, TMP3 4390 | lfdx f0, TMP2, TMP3 4391 | add NODE:TMP3, TMP2, TMP3 4392 | checknil RB 4393 | lwz INS, -4(PC) 4394 | beq >7 4395 | lfd f1, NODE:TMP3->key 4396 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 4397 | stfd f0, 8(RA) 4398 | add RC, RC, TMP0 4399 | decode_RD4 TMP1, INS 4400 | stfd f1, 0(RA) 4401 | addi RC, RC, 1 4402 | add PC, TMP1, TMP2 4403 | stw RC, -4(RA) // Update control var. 4404 | b <3 4405 | 4406 |7: // Skip holes in hash part. 4407 | addi RC, RC, 1 4408 | b <6 4409 break; 4410 4411 case BC_ISNEXT: 4412 | // RA = base*8, RD = target (points to ITERN) 4413 | add RA, BASE, RA 4414 | lwz TMP0, -24(RA) 4415 | lwz CFUNC:TMP1, -20(RA) 4416 | lwz TMP2, -16(RA) 4417 | lwz TMP3, -8(RA) 4418 | cmpwi cr0, TMP2, LJ_TTAB 4419 | cmpwi cr1, TMP0, LJ_TFUNC 4420 | cmpwi cr6, TMP3, LJ_TNIL 4421 | bne cr1, >5 4422 | lbz TMP1, CFUNC:TMP1->ffid 4423 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq 4424 | cmpwi cr7, TMP1, FF_next_N 4425 | srwi TMP0, RD, 1 4426 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4427 | add TMP3, PC, TMP0 4428 | bne cr0, >5 4429 | lus TMP1, 0xfffe 4430 | ori TMP1, TMP1, 0x7fff 4431 | stw ZERO, -4(RA) // Initialize control var. 4432 | stw TMP1, -8(RA) 4433 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4434 |1: 4435 | ins_next 4436 |5: // Despecialize bytecode if any of the checks fail. 4437 | li TMP0, BC_JMP 4438 | li TMP1, BC_ITERC 4439 | stb TMP0, -1(PC) 4440 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4441 | stb TMP1, 3(PC) 4442 | b <1 4443 break; 4444 4445 case BC_VARG: 4446 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 4447 | lwz TMP0, FRAME_PC(BASE) 4448 | add RC, BASE, RC 4449 | add RA, BASE, RA 4450 | addi RC, RC, FRAME_VARG 4451 | add TMP2, RA, RB 4452 | subi TMP3, BASE, 8 // TMP3 = vtop 4453 | sub RC, RC, TMP0 // RC = vbase 4454 | // Note: RC may now be even _above_ BASE if nargs was < numparams. 4455 | cmplwi cr1, RB, 0 4456 |.if PPE 4457 | sub TMP1, TMP3, RC 4458 | cmpwi TMP1, 0 4459 |.else 4460 | sub. TMP1, TMP3, RC 4461 |.endif 4462 | beq cr1, >5 // Copy all varargs? 4463 | subi TMP2, TMP2, 16 4464 | ble >2 // No vararg slots? 4465 |1: // Copy vararg slots to destination slots. 4466 | lfd f0, 0(RC) 4467 | addi RC, RC, 8 4468 | stfd f0, 0(RA) 4469 | cmplw RA, TMP2 4470 | cmplw cr1, RC, TMP3 4471 | bge >3 // All destination slots filled? 4472 | addi RA, RA, 8 4473 | blt cr1, <1 // More vararg slots? 4474 |2: // Fill up remainder with nil. 4475 | stw TISNIL, 0(RA) 4476 | cmplw RA, TMP2 4477 | addi RA, RA, 8 4478 | blt <2 4479 |3: 4480 | ins_next 4481 | 4482 |5: // Copy all varargs. 4483 | lwz TMP0, L->maxstack 4484 | li MULTRES, 8 // MULTRES = (0+1)*8 4485 | bley <3 // No vararg slots? 4486 | add TMP2, RA, TMP1 4487 | cmplw TMP2, TMP0 4488 | addi MULTRES, TMP1, 8 4489 | bgt >7 4490 |6: 4491 | lfd f0, 0(RC) 4492 | addi RC, RC, 8 4493 | stfd f0, 0(RA) 4494 | cmplw RC, TMP3 4495 | addi RA, RA, 8 4496 | blt <6 // More vararg slots? 4497 | b <3 4498 | 4499 |7: // Grow stack for varargs. 4500 | mr CARG1, L 4501 | stp RA, L->top 4502 | sub SAVE0, RC, BASE // Need delta, because BASE may change. 4503 | stp BASE, L->base 4504 | sub RA, RA, BASE 4505 | stw PC, SAVE_PC 4506 | srwi CARG2, TMP1, 3 4507 | bl extern lj_state_growstack // (lua_State *L, int n) 4508 | lp BASE, L->base 4509 | add RA, BASE, RA 4510 | add RC, BASE, SAVE0 4511 | subi TMP3, BASE, 8 4512 | b <6 4513 break; 4514 4515 /* -- Returns ----------------------------------------------------------- */ 4516 4517 case BC_RETM: 4518 | // RA = results*8, RD = extra_nresults*8 4519 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. 4520 | // Fall through. Assumes BC_RET follows. 4521 break; 4522 4523 case BC_RET: 4524 | // RA = results*8, RD = (nresults+1)*8 4525 | lwz PC, FRAME_PC(BASE) 4526 | add RA, BASE, RA 4527 | mr MULTRES, RD 4528 |1: 4529 | andix. TMP0, PC, FRAME_TYPE 4530 | xori TMP1, PC, FRAME_VARG 4531 | bne ->BC_RETV_Z 4532 | 4533 |->BC_RET_Z: 4534 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return 4535 | lwz INS, -4(PC) 4536 | cmpwi RD, 8 4537 | subi TMP2, BASE, 8 4538 | subi RC, RD, 8 4539 | decode_RB8 RB, INS 4540 | beq >3 4541 | li TMP1, 0 4542 |2: 4543 | addi TMP3, TMP1, 8 4544 | lfdx f0, RA, TMP1 4545 | cmpw TMP3, RC 4546 | stfdx f0, TMP2, TMP1 4547 | beq >3 4548 | addi TMP1, TMP3, 8 4549 | lfdx f1, RA, TMP3 4550 | cmpw TMP1, RC 4551 | stfdx f1, TMP2, TMP3 4552 | bne <2 4553 |3: 4554 |5: 4555 | cmplw RB, RD 4556 | decode_RA8 RA, INS 4557 | bgt >6 4558 | sub BASE, TMP2, RA 4559 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4560 | ins_next1 4561 | lwz TMP1, LFUNC:TMP1->pc 4562 | lwz KBASE, PC2PROTO(k)(TMP1) 4563 | ins_next2 4564 | 4565 |6: // Fill up results with nil. 4566 | subi TMP1, RD, 8 4567 | addi RD, RD, 8 4568 | stwx TISNIL, TMP2, TMP1 4569 | b <5 4570 | 4571 |->BC_RETV_Z: // Non-standard return case. 4572 | andix. TMP2, TMP1, FRAME_TYPEP 4573 | bne ->vm_return 4574 | // Return from vararg function: relocate BASE down. 4575 | sub BASE, BASE, TMP1 4576 | lwz PC, FRAME_PC(BASE) 4577 | b <1 4578 break; 4579 4580 case BC_RET0: case BC_RET1: 4581 | // RA = results*8, RD = (nresults+1)*8 4582 | lwz PC, FRAME_PC(BASE) 4583 | add RA, BASE, RA 4584 | mr MULTRES, RD 4585 | andix. TMP0, PC, FRAME_TYPE 4586 | xori TMP1, PC, FRAME_VARG 4587 | bney ->BC_RETV_Z 4588 | 4589 | lwz INS, -4(PC) 4590 | subi TMP2, BASE, 8 4591 | decode_RB8 RB, INS 4592 if (op == BC_RET1) { 4593 | lfd f0, 0(RA) 4594 | stfd f0, 0(TMP2) 4595 } 4596 |5: 4597 | cmplw RB, RD 4598 | decode_RA8 RA, INS 4599 | bgt >6 4600 | sub BASE, TMP2, RA 4601 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4602 | ins_next1 4603 | lwz TMP1, LFUNC:TMP1->pc 4604 | lwz KBASE, PC2PROTO(k)(TMP1) 4605 | ins_next2 4606 | 4607 |6: // Fill up results with nil. 4608 | subi TMP1, RD, 8 4609 | addi RD, RD, 8 4610 | stwx TISNIL, TMP2, TMP1 4611 | b <5 4612 break; 4613 4614 /* -- Loops and branches ------------------------------------------------ */ 4615 4616 case BC_FORL: 4617 |.if JIT 4618 | hotloop 4619 |.endif 4620 | // Fall through. Assumes BC_IFORL follows. 4621 break; 4622 4623 case BC_JFORI: 4624 case BC_JFORL: 4625#if !LJ_HASJIT 4626 break; 4627#endif 4628 case BC_FORI: 4629 case BC_IFORL: 4630 | // RA = base*8, RD = target (after end of loop or start of loop) 4631 vk = (op == BC_IFORL || op == BC_JFORL); 4632 |.if DUALNUM 4633 | // Integer loop. 4634 | lwzux TMP1, RA, BASE 4635 | lwz CARG1, FORL_IDX*8+4(RA) 4636 | cmplw cr0, TMP1, TISNUM 4637 if (vk) { 4638 | lwz CARG3, FORL_STEP*8+4(RA) 4639 | bne >9 4640 |.if GPR64 4641 | // Need to check overflow for (a<<32) + (b<<32). 4642 | rldicr TMP0, CARG1, 32, 31 4643 | rldicr TMP2, CARG3, 32, 31 4644 | add CARG1, CARG1, CARG3 4645 | addo. TMP0, TMP0, TMP2 4646 |.else 4647 | addo. CARG1, CARG1, CARG3 4648 |.endif 4649 | cmpwi cr6, CARG3, 0 4650 | lwz CARG2, FORL_STOP*8+4(RA) 4651 | bso >6 4652 |4: 4653 | stw CARG1, FORL_IDX*8+4(RA) 4654 } else { 4655 | lwz TMP3, FORL_STEP*8(RA) 4656 | lwz CARG3, FORL_STEP*8+4(RA) 4657 | lwz TMP2, FORL_STOP*8(RA) 4658 | lwz CARG2, FORL_STOP*8+4(RA) 4659 | cmplw cr7, TMP3, TISNUM 4660 | cmplw cr1, TMP2, TISNUM 4661 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4662 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4663 | cmpwi cr6, CARG3, 0 4664 | bne >9 4665 } 4666 | blt cr6, >5 4667 | cmpw CARG1, CARG2 4668 |1: 4669 | stw TISNUM, FORL_EXT*8(RA) 4670 if (op != BC_JFORL) { 4671 | srwi RD, RD, 1 4672 } 4673 | stw CARG1, FORL_EXT*8+4(RA) 4674 if (op != BC_JFORL) { 4675 | add RD, PC, RD 4676 } 4677 if (op == BC_FORI) { 4678 | bgt >3 // See FP loop below. 4679 } else if (op == BC_JFORI) { 4680 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4681 | bley >7 4682 } else if (op == BC_IFORL) { 4683 | bgt >2 4684 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4685 } else { 4686 | bley =>BC_JLOOP 4687 } 4688 |2: 4689 | ins_next 4690 |5: // Invert check for negative step. 4691 | cmpw CARG2, CARG1 4692 | b <1 4693 if (vk) { 4694 |6: // Potential overflow. 4695 | mcrxr cr0; bley <4 // Ignore unrelated overflow. 4696 | b <2 4697 } 4698 |.endif 4699 if (vk) { 4700 |.if DUALNUM 4701 |9: // FP loop. 4702 | lfd f1, FORL_IDX*8(RA) 4703 |.else 4704 | lfdux f1, RA, BASE 4705 |.endif 4706 | lfd f3, FORL_STEP*8(RA) 4707 | lfd f2, FORL_STOP*8(RA) 4708 | lwz TMP3, FORL_STEP*8(RA) 4709 | fadd f1, f1, f3 4710 | stfd f1, FORL_IDX*8(RA) 4711 } else { 4712 |.if DUALNUM 4713 |9: // FP loop. 4714 |.else 4715 | lwzux TMP1, RA, BASE 4716 | lwz TMP3, FORL_STEP*8(RA) 4717 | lwz TMP2, FORL_STOP*8(RA) 4718 | cmplw cr0, TMP1, TISNUM 4719 | cmplw cr7, TMP3, TISNUM 4720 | cmplw cr1, TMP2, TISNUM 4721 |.endif 4722 | lfd f1, FORL_IDX*8(RA) 4723 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 4724 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4725 | lfd f2, FORL_STOP*8(RA) 4726 | bge ->vmeta_for 4727 } 4728 | cmpwi cr6, TMP3, 0 4729 if (op != BC_JFORL) { 4730 | srwi RD, RD, 1 4731 } 4732 | stfd f1, FORL_EXT*8(RA) 4733 if (op != BC_JFORL) { 4734 | add RD, PC, RD 4735 } 4736 | fcmpu cr0, f1, f2 4737 if (op == BC_JFORI) { 4738 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4739 } 4740 | blt cr6, >5 4741 if (op == BC_FORI) { 4742 | bgt >3 4743 } else if (op == BC_IFORL) { 4744 |.if DUALNUM 4745 | bgty <2 4746 |.else 4747 | bgt >2 4748 |.endif 4749 |1: 4750 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4751 } else if (op == BC_JFORI) { 4752 | bley >7 4753 } else { 4754 | bley =>BC_JLOOP 4755 } 4756 |.if DUALNUM 4757 | b <2 4758 |.else 4759 |2: 4760 | ins_next 4761 |.endif 4762 |5: // Negative step. 4763 if (op == BC_FORI) { 4764 | bge <2 4765 |3: // Used by integer loop, too. 4766 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4767 } else if (op == BC_IFORL) { 4768 | bgey <1 4769 } else if (op == BC_JFORI) { 4770 | bgey >7 4771 } else { 4772 | bgey =>BC_JLOOP 4773 } 4774 | b <2 4775 if (op == BC_JFORI) { 4776 |7: 4777 | lwz INS, -4(PC) 4778 | decode_RD8 RD, INS 4779 | b =>BC_JLOOP 4780 } 4781 break; 4782 4783 case BC_ITERL: 4784 |.if JIT 4785 | hotloop 4786 |.endif 4787 | // Fall through. Assumes BC_IITERL follows. 4788 break; 4789 4790 case BC_JITERL: 4791#if !LJ_HASJIT 4792 break; 4793#endif 4794 case BC_IITERL: 4795 | // RA = base*8, RD = target 4796 | lwzux TMP1, RA, BASE 4797 | lwz TMP2, 4(RA) 4798 | checknil TMP1; beq >1 // Stop if iterator returned nil. 4799 if (op == BC_JITERL) { 4800 | stw TMP1, -8(RA) 4801 | stw TMP2, -4(RA) 4802 | b =>BC_JLOOP 4803 } else { 4804 | branch_RD // Otherwise save control var + branch. 4805 | stw TMP1, -8(RA) 4806 | stw TMP2, -4(RA) 4807 } 4808 |1: 4809 | ins_next 4810 break; 4811 4812 case BC_LOOP: 4813 | // RA = base*8, RD = target (loop extent) 4814 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4815 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4816 |.if JIT 4817 | hotloop 4818 |.endif 4819 | // Fall through. Assumes BC_ILOOP follows. 4820 break; 4821 4822 case BC_ILOOP: 4823 | // RA = base*8, RD = target (loop extent) 4824 | ins_next 4825 break; 4826 4827 case BC_JLOOP: 4828 |.if JIT 4829 | // RA = base*8 (ignored), RD = traceno*8 4830 | lwz TMP1, DISPATCH_J(trace)(DISPATCH) 4831 | srwi RD, RD, 1 4832 | // Traces on PPC don't store the trace number, so use 0. 4833 | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) 4834 | lwzx TRACE:TMP2, TMP1, RD 4835 | mcrxr cr0 // Clear SO flag. 4836 | lp TMP2, TRACE:TMP2->mcode 4837 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4838 | mtctr TMP2 4839 | stw L, DISPATCH_GL(jit_L)(DISPATCH) 4840 | addi JGL, DISPATCH, GG_DISP2G+32768 4841 | bctr 4842 |.endif 4843 break; 4844 4845 case BC_JMP: 4846 | // RA = base*8 (only used by trace recorder), RD = target 4847 | branch_RD 4848 | ins_next 4849 break; 4850 4851 /* -- Function headers -------------------------------------------------- */ 4852 4853 case BC_FUNCF: 4854 |.if JIT 4855 | hotcall 4856 |.endif 4857 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4858 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. 4859 break; 4860 4861 case BC_JFUNCF: 4862#if !LJ_HASJIT 4863 break; 4864#endif 4865 case BC_IFUNCF: 4866 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4867 | lwz TMP2, L->maxstack 4868 | lbz TMP1, -4+PC2PROTO(numparams)(PC) 4869 | lwz KBASE, -4+PC2PROTO(k)(PC) 4870 | cmplw RA, TMP2 4871 | slwi TMP1, TMP1, 3 4872 | bgt ->vm_growstack_l 4873 if (op != BC_JFUNCF) { 4874 | ins_next1 4875 } 4876 |2: 4877 | cmplw NARGS8:RC, TMP1 // Check for missing parameters. 4878 | blt >3 4879 if (op == BC_JFUNCF) { 4880 | decode_RD8 RD, INS 4881 | b =>BC_JLOOP 4882 } else { 4883 | ins_next2 4884 } 4885 | 4886 |3: // Clear missing parameters. 4887 | stwx TISNIL, BASE, NARGS8:RC 4888 | addi NARGS8:RC, NARGS8:RC, 8 4889 | b <2 4890 break; 4891 4892 case BC_JFUNCV: 4893#if !LJ_HASJIT 4894 break; 4895#endif 4896 | NYI // NYI: compiled vararg functions 4897 break; /* NYI: compiled vararg functions. */ 4898 4899 case BC_IFUNCV: 4900 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4901 | lwz TMP2, L->maxstack 4902 | add TMP1, BASE, RC 4903 | add TMP0, RA, RC 4904 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. 4905 | addi TMP3, RC, 8+FRAME_VARG 4906 | lwz KBASE, -4+PC2PROTO(k)(PC) 4907 | cmplw TMP0, TMP2 4908 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. 4909 | bge ->vm_growstack_l 4910 | lbz TMP2, -4+PC2PROTO(numparams)(PC) 4911 | mr RA, BASE 4912 | mr RC, TMP1 4913 | ins_next1 4914 | cmpwi TMP2, 0 4915 | addi BASE, TMP1, 8 4916 | beq >3 4917 |1: 4918 | cmplw RA, RC // Less args than parameters? 4919 | lwz TMP0, 0(RA) 4920 | lwz TMP3, 4(RA) 4921 | bge >4 4922 | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). 4923 | addi RA, RA, 8 4924 |2: 4925 | addic. TMP2, TMP2, -1 4926 | stw TMP0, 8(TMP1) 4927 | stw TMP3, 12(TMP1) 4928 | addi TMP1, TMP1, 8 4929 | bne <1 4930 |3: 4931 | ins_next2 4932 | 4933 |4: // Clear missing parameters. 4934 | li TMP0, LJ_TNIL 4935 | b <2 4936 break; 4937 4938 case BC_FUNCC: 4939 case BC_FUNCCW: 4940 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 4941 if (op == BC_FUNCC) { 4942 | lp RD, CFUNC:RB->f 4943 } else { 4944 | lp RD, DISPATCH_GL(wrapf)(DISPATCH) 4945 } 4946 | add TMP1, RA, NARGS8:RC 4947 | lwz TMP2, L->maxstack 4948 | .toc lp TMP3, 0(RD) 4949 | add RC, BASE, NARGS8:RC 4950 | stp BASE, L->base 4951 | cmplw TMP1, TMP2 4952 | stp RC, L->top 4953 | li_vmstate C 4954 |.if TOC 4955 | mtctr TMP3 4956 |.else 4957 | mtctr RD 4958 |.endif 4959 if (op == BC_FUNCCW) { 4960 | lp CARG2, CFUNC:RB->f 4961 } 4962 | mr CARG1, L 4963 | bgt ->vm_growstack_c // Need to grow stack. 4964 | .toc lp TOCREG, TOC_OFS(RD) 4965 | .tocenv lp ENVREG, ENV_OFS(RD) 4966 | st_vmstate 4967 | bctrl // (lua_State *L [, lua_CFunction f]) 4968 | // Returns nresults. 4969 | lp BASE, L->base 4970 | .toc ld TOCREG, SAVE_TOC 4971 | slwi RD, CRET1, 3 4972 | lp TMP1, L->top 4973 | li_vmstate INTERP 4974 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 4975 | sub RA, TMP1, RD // RA = L->top - nresults*8 4976 | st_vmstate 4977 | b ->vm_returnc 4978 break; 4979 4980 /* ---------------------------------------------------------------------- */ 4981 4982 default: 4983 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 4984 exit(2); 4985 break; 4986 } 4987} 4988 4989static int build_backend(BuildCtx *ctx) 4990{ 4991 int op; 4992 4993 dasm_growpc(Dst, BC__MAX); 4994 4995 build_subroutines(ctx); 4996 4997 |.code_op 4998 for (op = 0; op < BC__MAX; op++) 4999 build_ins(ctx, (BCOp)op, op); 5000 5001 return BC__MAX; 5002} 5003 5004/* Emit pseudo frame-info for all assembler functions. */ 5005static void emit_asm_debug(BuildCtx *ctx) 5006{ 5007 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5008 int i; 5009 switch (ctx->mode) { 5010 case BUILD_elfasm: 5011 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5012 fprintf(ctx->fp, 5013 ".Lframe0:\n" 5014 "\t.long .LECIE0-.LSCIE0\n" 5015 ".LSCIE0:\n" 5016 "\t.long 0xffffffff\n" 5017 "\t.byte 0x1\n" 5018 "\t.string \"\"\n" 5019 "\t.uleb128 0x1\n" 5020 "\t.sleb128 -4\n" 5021 "\t.byte 65\n" 5022 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5023 "\t.align 2\n" 5024 ".LECIE0:\n\n"); 5025 fprintf(ctx->fp, 5026 ".LSFDE0:\n" 5027 "\t.long .LEFDE0-.LASFDE0\n" 5028 ".LASFDE0:\n" 5029 "\t.long .Lframe0\n" 5030 "\t.long .Lbegin\n" 5031 "\t.long %d\n" 5032 "\t.byte 0xe\n\t.uleb128 %d\n" 5033 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5034 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5035 fcofs, CFRAME_SIZE); 5036 for (i = 14; i <= 31; i++) 5037 fprintf(ctx->fp, 5038 "\t.byte %d\n\t.uleb128 %d\n" 5039 "\t.byte %d\n\t.uleb128 %d\n", 5040 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5041 fprintf(ctx->fp, 5042 "\t.align 2\n" 5043 ".LEFDE0:\n\n"); 5044#if LJ_HASFFI 5045 fprintf(ctx->fp, 5046 ".LSFDE1:\n" 5047 "\t.long .LEFDE1-.LASFDE1\n" 5048 ".LASFDE1:\n" 5049 "\t.long .Lframe0\n" 5050#if LJ_TARGET_PS3 5051 "\t.long .lj_vm_ffi_call\n" 5052#else 5053 "\t.long lj_vm_ffi_call\n" 5054#endif 5055 "\t.long %d\n" 5056 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5057 "\t.byte 0x8e\n\t.uleb128 2\n" 5058 "\t.byte 0xd\n\t.uleb128 0xe\n" 5059 "\t.align 2\n" 5060 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5061#endif 5062#if !LJ_NO_UNWIND 5063 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5064 fprintf(ctx->fp, 5065 ".Lframe1:\n" 5066 "\t.long .LECIE1-.LSCIE1\n" 5067 ".LSCIE1:\n" 5068 "\t.long 0\n" 5069 "\t.byte 0x1\n" 5070 "\t.string \"zPR\"\n" 5071 "\t.uleb128 0x1\n" 5072 "\t.sleb128 -4\n" 5073 "\t.byte 65\n" 5074 "\t.uleb128 6\n" /* augmentation length */ 5075 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5076 "\t.long lj_err_unwind_dwarf-.\n" 5077 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5078 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5079 "\t.align 2\n" 5080 ".LECIE1:\n\n"); 5081 fprintf(ctx->fp, 5082 ".LSFDE2:\n" 5083 "\t.long .LEFDE2-.LASFDE2\n" 5084 ".LASFDE2:\n" 5085 "\t.long .LASFDE2-.Lframe1\n" 5086 "\t.long .Lbegin-.\n" 5087 "\t.long %d\n" 5088 "\t.uleb128 0\n" /* augmentation length */ 5089 "\t.byte 0xe\n\t.uleb128 %d\n" 5090 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5091 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5092 fcofs, CFRAME_SIZE); 5093 for (i = 14; i <= 31; i++) 5094 fprintf(ctx->fp, 5095 "\t.byte %d\n\t.uleb128 %d\n" 5096 "\t.byte %d\n\t.uleb128 %d\n", 5097 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5098 fprintf(ctx->fp, 5099 "\t.align 2\n" 5100 ".LEFDE2:\n\n"); 5101#if LJ_HASFFI 5102 fprintf(ctx->fp, 5103 ".Lframe2:\n" 5104 "\t.long .LECIE2-.LSCIE2\n" 5105 ".LSCIE2:\n" 5106 "\t.long 0\n" 5107 "\t.byte 0x1\n" 5108 "\t.string \"zR\"\n" 5109 "\t.uleb128 0x1\n" 5110 "\t.sleb128 -4\n" 5111 "\t.byte 65\n" 5112 "\t.uleb128 1\n" /* augmentation length */ 5113 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5114 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5115 "\t.align 2\n" 5116 ".LECIE2:\n\n"); 5117 fprintf(ctx->fp, 5118 ".LSFDE3:\n" 5119 "\t.long .LEFDE3-.LASFDE3\n" 5120 ".LASFDE3:\n" 5121 "\t.long .LASFDE3-.Lframe2\n" 5122 "\t.long lj_vm_ffi_call-.\n" 5123 "\t.long %d\n" 5124 "\t.uleb128 0\n" /* augmentation length */ 5125 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5126 "\t.byte 0x8e\n\t.uleb128 2\n" 5127 "\t.byte 0xd\n\t.uleb128 0xe\n" 5128 "\t.align 2\n" 5129 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5130#endif 5131#endif 5132 break; 5133 default: 5134 break; 5135 } 5136} 5137 5138