1|// Low-level VM code for MIPS64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6|// Sponsored by Cisco Systems, Inc.
7|
8|.arch mips64
9|.section code_op, code_sub
10|
11|.actionlist build_actionlist
12|.globals GLOB_
13|.globalnames globnames
14|.externnames extnames
15|
16|// Note: The ragged indentation of the instructions is intentional.
17|//       The starting columns indicate data dependencies.
18|
19|//-----------------------------------------------------------------------
20|
21|// Fixed register assignments for the interpreter.
22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
23|
24|.macro .FPU, a, b
25|.if FPU
26|  a, b
27|.endif
28|.endmacro
29|
30|// The following must be C callee-save (but BASE is often refetched).
31|.define BASE,		r16	// Base of current Lua stack frame.
32|.define KBASE,		r17	// Constants of current Lua function.
33|.define PC,		r18	// Next PC.
34|.define DISPATCH,	r19	// Opcode dispatch table.
35|.define LREG,		r20	// Register holding lua_State (also in SAVE_L).
36|.define MULTRES,	r21	// Size of multi-result: (nresults+1)*8.
37|
38|.define JGL,		r30	// On-trace: global_State + 32768.
39|
40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNIL,	r30
42|.define TISNUM,	r22
43|.if FPU
44|.define TOBIT,		f30	// 2^52 + 2^51.
45|.endif
46|
47|// The following temporaries are not saved across C calls, except for RA.
48|.define RA,		r23	// Callee-save.
49|.define RB,		r8
50|.define RC,		r9
51|.define RD,		r10
52|.define INS,		r11
53|
54|.define AT,		r1	// Assembler temporary.
55|.define TMP0,		r12
56|.define TMP1,		r13
57|.define TMP2,		r14
58|.define TMP3,		r15
59|
60|// MIPS n64 calling convention.
61|.define CFUNCADDR,	r25
62|.define CARG1,		r4
63|.define CARG2,		r5
64|.define CARG3,		r6
65|.define CARG4,		r7
66|.define CARG5,		r8
67|.define CARG6,		r9
68|.define CARG7,		r10
69|.define CARG8,		r11
70|
71|.define CRET1,		r2
72|.define CRET2,		r3
73|
74|.if FPU
75|.define FARG1,		f12
76|.define FARG2,		f13
77|.define FARG3,		f14
78|.define FARG4,		f15
79|.define FARG5,		f16
80|.define FARG6,		f17
81|.define FARG7,		f18
82|.define FARG8,		f19
83|
84|.define FRET1,		f0
85|.define FRET2,		f2
86|
87|.define FTMP0,		f20
88|.define FTMP1,		f21
89|.define FTMP2,		f22
90|.endif
91|
92|// Stack layout while in interpreter. Must match with lj_frame.h.
93|.if FPU		// MIPS64 hard-float.
94|
95|.define CFRAME_SPACE,	192	// Delta for sp.
96|
97|//----- 16 byte aligned, <-- sp entering interpreter
98|.define SAVE_ERRF,	188(sp)	// 32 bit values.
99|.define SAVE_NRES,	184(sp)
100|.define SAVE_CFRAME,	176(sp)	// 64 bit values.
101|.define SAVE_L,	168(sp)
102|.define SAVE_PC,	160(sp)
103|//----- 16 byte aligned
104|.define SAVE_GPR_,	80	// .. 80+10*8: 64 bit GPR saves.
105|.define SAVE_FPR_,	16	// .. 16+8*8: 64 bit FPR saves.
106|
107|.else			// MIPS64 soft-float
108|
109|.define CFRAME_SPACE,	128	// Delta for sp.
110|
111|//----- 16 byte aligned, <-- sp entering interpreter
112|.define SAVE_ERRF,	124(sp)	// 32 bit values.
113|.define SAVE_NRES,	120(sp)
114|.define SAVE_CFRAME,	112(sp)	// 64 bit values.
115|.define SAVE_L,	104(sp)
116|.define SAVE_PC,	96(sp)
117|//----- 16 byte aligned
118|.define SAVE_GPR_,	16	// .. 16+10*8: 64 bit GPR saves.
119|
120|.endif
121|
122|.define TMPX,		8(sp)	// Unused by interpreter, temp for JIT code.
123|.define TMPD,		0(sp)
124|//----- 16 byte aligned
125|
126|.define TMPD_OFS,	0
127|
128|.define SAVE_MULTRES,	TMPD
129|
130|//-----------------------------------------------------------------------
131|
132|.macro saveregs
133|  daddiu sp, sp, -CFRAME_SPACE
134|  sd ra, SAVE_GPR_+9*8(sp)
135|  sd r30, SAVE_GPR_+8*8(sp)
136|   .FPU sdc1 f31, SAVE_FPR_+7*8(sp)
137|  sd r23, SAVE_GPR_+7*8(sp)
138|   .FPU sdc1 f30, SAVE_FPR_+6*8(sp)
139|  sd r22, SAVE_GPR_+6*8(sp)
140|   .FPU sdc1 f29, SAVE_FPR_+5*8(sp)
141|  sd r21, SAVE_GPR_+5*8(sp)
142|   .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
143|  sd r20, SAVE_GPR_+4*8(sp)
144|   .FPU sdc1 f27, SAVE_FPR_+3*8(sp)
145|  sd r19, SAVE_GPR_+3*8(sp)
146|   .FPU sdc1 f26, SAVE_FPR_+2*8(sp)
147|  sd r18, SAVE_GPR_+2*8(sp)
148|   .FPU sdc1 f25, SAVE_FPR_+1*8(sp)
149|  sd r17, SAVE_GPR_+1*8(sp)
150|   .FPU sdc1 f24, SAVE_FPR_+0*8(sp)
151|  sd r16, SAVE_GPR_+0*8(sp)
152|.endmacro
153|
154|.macro restoreregs_ret
155|  ld ra, SAVE_GPR_+9*8(sp)
156|  ld r30, SAVE_GPR_+8*8(sp)
157|  ld r23, SAVE_GPR_+7*8(sp)
158|   .FPU ldc1 f31, SAVE_FPR_+7*8(sp)
159|  ld r22, SAVE_GPR_+6*8(sp)
160|   .FPU ldc1 f30, SAVE_FPR_+6*8(sp)
161|  ld r21, SAVE_GPR_+5*8(sp)
162|   .FPU ldc1 f29, SAVE_FPR_+5*8(sp)
163|  ld r20, SAVE_GPR_+4*8(sp)
164|   .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
165|  ld r19, SAVE_GPR_+3*8(sp)
166|   .FPU ldc1 f27, SAVE_FPR_+3*8(sp)
167|  ld r18, SAVE_GPR_+2*8(sp)
168|   .FPU ldc1 f26, SAVE_FPR_+2*8(sp)
169|  ld r17, SAVE_GPR_+1*8(sp)
170|   .FPU ldc1 f25, SAVE_FPR_+1*8(sp)
171|  ld r16, SAVE_GPR_+0*8(sp)
172|   .FPU ldc1 f24, SAVE_FPR_+0*8(sp)
173|  jr ra
174|  daddiu sp, sp, CFRAME_SPACE
175|.endmacro
176|
177|// Type definitions. Some of these are only used for documentation.
178|.type L,		lua_State,	LREG
179|.type GL,		global_State
180|.type TVALUE,		TValue
181|.type GCOBJ,		GCobj
182|.type STR,		GCstr
183|.type TAB,		GCtab
184|.type LFUNC,		GCfuncL
185|.type CFUNC,		GCfuncC
186|.type PROTO,		GCproto
187|.type UPVAL,		GCupval
188|.type NODE,		Node
189|.type NARGS8,		int
190|.type TRACE,		GCtrace
191|.type SBUF,		SBuf
192|
193|//-----------------------------------------------------------------------
194|
195|// Trap for not-yet-implemented parts.
196|.macro NYI; .long 0xf0f0f0f0; .endmacro
197|
198|// Macros to mark delay slots.
199|.macro ., a; a; .endmacro
200|.macro ., a,b; a,b; .endmacro
201|.macro ., a,b,c; a,b,c; .endmacro
202|.macro ., a,b,c,d; a,b,c,d; .endmacro
203|
204|.define FRAME_PC,	-8
205|.define FRAME_FUNC,	-16
206|
207|//-----------------------------------------------------------------------
208|
209|// Endian-specific defines.
210|.if ENDIAN_LE
211|.define HI,		4
212|.define LO,		0
213|.define OFS_RD,	2
214|.define OFS_RA,	1
215|.define OFS_OP,	0
216|.else
217|.define HI,		0
218|.define LO,		4
219|.define OFS_RD,	0
220|.define OFS_RA,	2
221|.define OFS_OP,	3
222|.endif
223|
224|// Instruction decode.
225|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
226|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
227|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro
228|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro
229|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
230|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro
231|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro
232|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
233|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro
234|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
235|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro
236|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro
237|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
238|
239|// Instruction fetch.
240|.macro ins_NEXT1
241|  lw INS, 0(PC)
242|   daddiu PC, PC, 4
243|.endmacro
244|// Instruction decode+dispatch.
245|.macro ins_NEXT2
246|  decode_OP8a TMP1, INS
247|  decode_OP8b TMP1
248|  daddu TMP0, DISPATCH, TMP1
249|   decode_RD8a RD, INS
250|  ld AT, 0(TMP0)
251|   decode_RA8a RA, INS
252|   decode_RD8b RD
253|  jr AT
254|   decode_RA8b RA
255|.endmacro
256|.macro ins_NEXT
257|  ins_NEXT1
258|  ins_NEXT2
259|.endmacro
260|
261|// Instruction footer.
262|.if 1
263|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
264|  .define ins_next, ins_NEXT
265|  .define ins_next_, ins_NEXT
266|  .define ins_next1, ins_NEXT1
267|  .define ins_next2, ins_NEXT2
268|.else
269|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
270|  // Affects only certain kinds of benchmarks (and only with -j off).
271|  .macro ins_next
272|    b ->ins_next
273|  .endmacro
274|  .macro ins_next1
275|  .endmacro
276|  .macro ins_next2
277|    b ->ins_next
278|  .endmacro
279|  .macro ins_next_
280|  ->ins_next:
281|    ins_NEXT
282|  .endmacro
283|.endif
284|
285|// Call decode and dispatch.
286|.macro ins_callt
287|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
288|  ld PC, LFUNC:RB->pc
289|  lw INS, 0(PC)
290|   daddiu PC, PC, 4
291|  decode_OP8a TMP1, INS
292|   decode_RA8a RA, INS
293|  decode_OP8b TMP1
294|   decode_RA8b RA
295|  daddu TMP0, DISPATCH, TMP1
296|  ld TMP0, 0(TMP0)
297|  jr TMP0
298|   daddu RA, RA, BASE
299|.endmacro
300|
301|.macro ins_call
302|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
303|  sd PC, FRAME_PC(BASE)
304|  ins_callt
305|.endmacro
306|
307|//-----------------------------------------------------------------------
308|
309|.macro branch_RD
310|  srl TMP0, RD, 1
311|  lui AT, (-(BCBIAS_J*4 >> 16) & 65535)
312|  addu TMP0, TMP0, AT
313|  daddu PC, PC, TMP0
314|.endmacro
315|
316|// Assumes DISPATCH is relative to GL.
317#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
318#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
319#define GG_DISP2GOT		(GG_OFS(got) - GG_OFS(dispatch))
320#define DISPATCH_GOT(name)	(GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
321|
322#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323|
324|.macro load_got, func
325|  ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH)
326|.endmacro
327|// Much faster. Sadly, there's no easy way to force the required code layout.
328|// .macro call_intern, func; bal extern func; .endmacro
329|.macro call_intern, func; jalr CFUNCADDR; .endmacro
330|.macro call_extern; jalr CFUNCADDR; .endmacro
331|.macro jmp_extern; jr CFUNCADDR; .endmacro
332|
333|.macro hotcheck, delta, target
334|  dsrl TMP1, PC, 1
335|  andi TMP1, TMP1, 126
336|  daddu TMP1, TMP1, DISPATCH
337|  lhu TMP2, GG_DISP2HOT(TMP1)
338|  addiu TMP2, TMP2, -delta
339|  bltz TMP2, target
340|.  sh TMP2, GG_DISP2HOT(TMP1)
341|.endmacro
342|
343|.macro hotloop
344|  hotcheck HOTCOUNT_LOOP, ->vm_hotloop
345|.endmacro
346|
347|.macro hotcall
348|  hotcheck HOTCOUNT_CALL, ->vm_hotcall
349|.endmacro
350|
351|// Set current VM state. Uses TMP0.
352|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
353|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
354|
355|// Move table write barrier back. Overwrites mark and tmp.
356|.macro barrierback, tab, mark, tmp, target
357|  ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
358|   andi mark, mark, ~LJ_GC_BLACK & 255		// black2gray(tab)
359|  sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
360|   sb mark, tab->marked
361|  b target
362|.  sd tmp, tab->gclist
363|.endmacro
364|
365|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg.
366|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro
367|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro
368|
369|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst.
370|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro
371|
372|// Extract (negative) type tag.
373|.macro gettp, dst, src; dsra dst, src, 47; .endmacro
374|
375|// Macros to check the TValue type and extract the GCobj. Branch on failure.
376|.macro checktp, reg, tp, target
377|  gettp AT, reg
378|  daddiu AT, AT, tp
379|  bnez AT, target
380|.  cleartp reg
381|.endmacro
382|.macro checktp, dst, reg, tp, target
383|  gettp AT, reg
384|  daddiu AT, AT, tp
385|  bnez AT, target
386|.  cleartp dst, reg
387|.endmacro
388|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
389|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
390|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
391|.macro checkint, reg, target	// Caveat: has delay slot!
392|  gettp AT, reg
393|  bne AT, TISNUM, target
394|.endmacro
395|.macro checknum, reg, target	// Caveat: has delay slot!
396|  gettp AT, reg
397|  sltiu AT, AT, LJ_TISNUM
398|  beqz AT, target
399|.endmacro
400|
401|.macro mov_false, reg
402|  lu reg, 0x8000
403|  dsll reg, reg, 32
404|  not reg, reg
405|.endmacro
406|.macro mov_true, reg
407|  li reg, 0x0001
408|  dsll reg, reg, 48
409|  not reg, reg
410|.endmacro
411|
412|//-----------------------------------------------------------------------
413
414/* Generate subroutines used by opcodes and other parts of the VM. */
415/* The .code_sub section should be last to help static branch prediction. */
416static void build_subroutines(BuildCtx *ctx)
417{
418  |.code_sub
419  |
420  |//-----------------------------------------------------------------------
421  |//-- Return handling ----------------------------------------------------
422  |//-----------------------------------------------------------------------
423  |
424  |->vm_returnp:
425  |  // See vm_return. Also: TMP2 = previous base.
426  |  andi AT, PC, FRAME_P
427  |  beqz AT, ->cont_dispatch
428  |
429  |  // Return from pcall or xpcall fast func.
430  |.  mov_true TMP1
431  |  ld PC, FRAME_PC(TMP2)		// Fetch PC of previous frame.
432  |  move BASE, TMP2			// Restore caller base.
433  |  // Prepending may overwrite the pcall frame, so do it at the end.
434  |   sd TMP1, -8(RA)			// Prepend true to results.
435  |   daddiu RA, RA, -8
436  |
437  |->vm_returnc:
438  |   addiu RD, RD, 8			// RD = (nresults+1)*8.
439  |  andi TMP0, PC, FRAME_TYPE
440  |   beqz RD, ->vm_unwind_c_eh
441  |.   li CRET1, LUA_YIELD
442  |  beqz TMP0, ->BC_RET_Z		// Handle regular return to Lua.
443  |.  move MULTRES, RD
444  |
445  |->vm_return:
446  |  // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
447  |  // TMP0 = PC & FRAME_TYPE
448  |   li TMP2, -8
449  |  xori AT, TMP0, FRAME_C
450  |   and TMP2, PC, TMP2
451  |  bnez AT, ->vm_returnp
452  |   dsubu TMP2, BASE, TMP2		// TMP2 = previous base.
453  |
454  |  addiu TMP1, RD, -8
455  |   sd TMP2, L->base
456  |    li_vmstate C
457  |   lw TMP2, SAVE_NRES
458  |   daddiu BASE, BASE, -16
459  |    st_vmstate
460  |  beqz TMP1, >2
461  |.   sll TMP2, TMP2, 3
462  |1:
463  |  addiu TMP1, TMP1, -8
464  |   ld CRET1, 0(RA)
465  |    daddiu RA, RA, 8
466  |   sd CRET1, 0(BASE)
467  |  bnez TMP1, <1
468  |.  daddiu BASE, BASE, 8
469  |
470  |2:
471  |  bne TMP2, RD, >6
472  |3:
473  |.  sd BASE, L->top			// Store new top.
474  |
475  |->vm_leave_cp:
476  |  ld TMP0, SAVE_CFRAME		// Restore previous C frame.
477  |   move CRET1, r0			// Ok return status for vm_pcall.
478  |  sd TMP0, L->cframe
479  |
480  |->vm_leave_unw:
481  |  restoreregs_ret
482  |
483  |6:
484  |  ld TMP1, L->maxstack
485  |  slt AT, TMP2, RD
486  |  bnez AT, >7			// Less results wanted?
487  |  // More results wanted. Check stack size and fill up results with nil.
488  |.  slt AT, BASE, TMP1
489  |  beqz AT, >8
490  |.  nop
491  |  sd TISNIL, 0(BASE)
492  |  addiu RD, RD, 8
493  |  b <2
494  |.  daddiu BASE, BASE, 8
495  |
496  |7:  // Less results wanted.
497  |  subu TMP0, RD, TMP2
498  |  dsubu TMP0, BASE, TMP0		// Either keep top or shrink it.
499  |.if MIPSR6
500  |  selnez TMP0, TMP0, TMP2		// LUA_MULTRET+1 case?
501  |  seleqz BASE, BASE, TMP2
502  |  b <3
503  |.  or BASE, BASE, TMP0
504  |.else
505  |  b <3
506  |.  movn BASE, TMP0, TMP2		// LUA_MULTRET+1 case?
507  |.endif
508  |
509  |8:  // Corner case: need to grow stack for filling up results.
510  |  // This can happen if:
511  |  // - A C function grows the stack (a lot).
512  |  // - The GC shrinks the stack in between.
513  |  // - A return back from a lua_call() with (high) nresults adjustment.
514  |  load_got lj_state_growstack
515  |   move MULTRES, RD
516  |  srl CARG2, TMP2, 3
517  |  call_intern lj_state_growstack	// (lua_State *L, int n)
518  |.  move CARG1, L
519  |    lw TMP2, SAVE_NRES
520  |  ld BASE, L->top			// Need the (realloced) L->top in BASE.
521  |   move RD, MULTRES
522  |  b <2
523  |.   sll TMP2, TMP2, 3
524  |
525  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
526  |  // (void *cframe, int errcode)
527  |  move sp, CARG1
528  |  move CRET1, CARG2
529  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
530  |  ld L, SAVE_L
531  |   li TMP0, ~LJ_VMST_C
532  |  ld GL:TMP1, L->glref
533  |  b ->vm_leave_unw
534  |.  sw TMP0, GL:TMP1->vmstate
535  |
536  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
537  |  // (void *cframe)
538  |  li AT, -4
539  |  and sp, CARG1, AT
540  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
541  |  ld L, SAVE_L
542  |     .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
543  |     li TISNIL, LJ_TNIL
544  |    li TISNUM, LJ_TISNUM
545  |  ld BASE, L->base
546  |   ld DISPATCH, L->glref		// Setup pointer to dispatch table.
547  |     .FPU mtc1 TMP3, TOBIT
548  |  mov_false TMP1
549  |    li_vmstate INTERP
550  |  ld PC, FRAME_PC(BASE)		// Fetch PC of previous frame.
551  |     .FPU cvt.d.s TOBIT, TOBIT
552  |  daddiu RA, BASE, -8		// Results start at BASE-8.
553  |   daddiu DISPATCH, DISPATCH, GG_G2DISP
554  |  sd TMP1, 0(RA)			// Prepend false to error message.
555  |    st_vmstate
556  |  b ->vm_returnc
557  |.  li RD, 16				// 2 results: false + error message.
558  |
559  |//-----------------------------------------------------------------------
560  |//-- Grow stack for calls -----------------------------------------------
561  |//-----------------------------------------------------------------------
562  |
563  |->vm_growstack_c:			// Grow stack for C function.
564  |  b >2
565  |.  li CARG2, LUA_MINSTACK
566  |
567  |->vm_growstack_l:			// Grow stack for Lua function.
568  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
569  |  daddu RC, BASE, RC
570  |   dsubu RA, RA, BASE
571  |  sd BASE, L->base
572  |   daddiu PC, PC, 4			// Must point after first instruction.
573  |  sd RC, L->top
574  |   srl CARG2, RA, 3
575  |2:
576  |  // L->base = new base, L->top = top
577  |  load_got lj_state_growstack
578  |   sd PC, SAVE_PC
579  |  call_intern lj_state_growstack	// (lua_State *L, int n)
580  |.  move CARG1, L
581  |  ld BASE, L->base
582  |  ld RC, L->top
583  |  ld LFUNC:RB, FRAME_FUNC(BASE)
584  |  dsubu RC, RC, BASE
585  |  cleartp LFUNC:RB
586  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
587  |  ins_callt				// Just retry the call.
588  |
589  |//-----------------------------------------------------------------------
590  |//-- Entry points into the assembler VM ---------------------------------
591  |//-----------------------------------------------------------------------
592  |
593  |->vm_resume:				// Setup C frame and resume thread.
594  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
595  |  saveregs
596  |  move L, CARG1
597  |    ld DISPATCH, L->glref		// Setup pointer to dispatch table.
598  |  move BASE, CARG2
599  |    lbu TMP1, L->status
600  |   sd L, SAVE_L
601  |  li PC, FRAME_CP
602  |  daddiu TMP0, sp, CFRAME_RESUME
603  |    daddiu DISPATCH, DISPATCH, GG_G2DISP
604  |   sw r0, SAVE_NRES
605  |   sw r0, SAVE_ERRF
606  |   sd CARG1, SAVE_PC			// Any value outside of bytecode is ok.
607  |   sd r0, SAVE_CFRAME
608  |    beqz TMP1, >3
609  |. sd TMP0, L->cframe
610  |
611  |  // Resume after yield (like a return).
612  |  sd L, DISPATCH_GL(cur_L)(DISPATCH)
613  |  move RA, BASE
614  |   ld BASE, L->base
615  |   ld TMP1, L->top
616  |  ld PC, FRAME_PC(BASE)
617  |     .FPU  lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
618  |   dsubu RD, TMP1, BASE
619  |     .FPU  mtc1 TMP3, TOBIT
620  |    sb r0, L->status
621  |     .FPU  cvt.d.s TOBIT, TOBIT
622  |    li_vmstate INTERP
623  |   daddiu RD, RD, 8
624  |    st_vmstate
625  |   move MULTRES, RD
626  |  andi TMP0, PC, FRAME_TYPE
627  |    li TISNIL, LJ_TNIL
628  |  beqz TMP0, ->BC_RET_Z
629  |.    li TISNUM, LJ_TISNUM
630  |  b ->vm_return
631  |.  nop
632  |
633  |->vm_pcall:				// Setup protected C frame and enter VM.
634  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
635  |  saveregs
636  |  sw CARG4, SAVE_ERRF
637  |  b >1
638  |.  li PC, FRAME_CP
639  |
640  |->vm_call:				// Setup C frame and enter VM.
641  |  // (lua_State *L, TValue *base, int nres1)
642  |  saveregs
643  |  li PC, FRAME_C
644  |
645  |1:  // Entry point for vm_pcall above (PC = ftype).
646  |  ld TMP1, L:CARG1->cframe
647  |    move L, CARG1
648  |   sw CARG3, SAVE_NRES
649  |    ld DISPATCH, L->glref		// Setup pointer to dispatch table.
650  |   sd CARG1, SAVE_L
651  |     move BASE, CARG2
652  |    daddiu DISPATCH, DISPATCH, GG_G2DISP
653  |   sd CARG1, SAVE_PC			// Any value outside of bytecode is ok.
654  |  sd TMP1, SAVE_CFRAME
655  |  sd sp, L->cframe			// Add our C frame to cframe chain.
656  |
657  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
658  |  sd L, DISPATCH_GL(cur_L)(DISPATCH)
659  |  ld TMP2, L->base			// TMP2 = old base (used in vmeta_call).
660  |     .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
661  |   ld TMP1, L->top
662  |     .FPU mtc1 TMP3, TOBIT
663  |  daddu PC, PC, BASE
664  |   dsubu NARGS8:RC, TMP1, BASE
665  |     li TISNUM, LJ_TISNUM
666  |  dsubu PC, PC, TMP2			// PC = frame delta + frame type
667  |     .FPU cvt.d.s TOBIT, TOBIT
668  |    li_vmstate INTERP
669  |     li TISNIL, LJ_TNIL
670  |    st_vmstate
671  |
672  |->vm_call_dispatch:
673  |  // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
674  |  ld LFUNC:RB, FRAME_FUNC(BASE)
675  |  checkfunc LFUNC:RB, ->vmeta_call
676  |
677  |->vm_call_dispatch_f:
678  |  ins_call
679  |  // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
680  |
681  |->vm_cpcall:				// Setup protected C frame, call C.
682  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
683  |  saveregs
684  |  move L, CARG1
685  |   ld TMP0, L:CARG1->stack
686  |  sd CARG1, SAVE_L
687  |   ld TMP1, L->top
688  |     ld DISPATCH, L->glref		// Setup pointer to dispatch table.
689  |  sd CARG1, SAVE_PC			// Any value outside of bytecode is ok.
690  |   dsubu TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
691  |    ld TMP1, L->cframe
692  |     daddiu DISPATCH, DISPATCH, GG_G2DISP
693  |   sw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
694  |  sw r0, SAVE_ERRF			// No error function.
695  |    sd TMP1, SAVE_CFRAME
696  |    sd sp, L->cframe			// Add our C frame to cframe chain.
697  |     sd L, DISPATCH_GL(cur_L)(DISPATCH)
698  |  jalr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
699  |.  move CFUNCADDR, CARG4
700  |  move BASE, CRET1
701  |  bnez CRET1, <3			// Else continue with the call.
702  |.  li PC, FRAME_CP
703  |  b ->vm_leave_cp			// No base? Just remove C frame.
704  |.  nop
705  |
706  |//-----------------------------------------------------------------------
707  |//-- Metamethod handling ------------------------------------------------
708  |//-----------------------------------------------------------------------
709  |
710  |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
711  |// stack, so BASE doesn't need to be reloaded across these calls.
712  |
713  |//-- Continuation dispatch ----------------------------------------------
714  |
715  |->cont_dispatch:
716  |  // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
717  |  ld TMP0, -32(BASE)			// Continuation.
718  |   move RB, BASE
719  |   move BASE, TMP2			// Restore caller BASE.
720  |    ld LFUNC:TMP1, FRAME_FUNC(TMP2)
721  |.if FFI
722  |  sltiu AT, TMP0, 2
723  |.endif
724  |     ld PC, -24(RB)			// Restore PC from [cont|PC].
725  |    cleartp LFUNC:TMP1
726  |   daddu TMP2, RA, RD
727  |    ld TMP1, LFUNC:TMP1->pc
728  |.if FFI
729  |  bnez AT, >1
730  |.endif
731  |.  sd TISNIL, -8(TMP2)		// Ensure one valid arg.
732  |  // BASE = base, RA = resultptr, RB = meta base
733  |  jr TMP0				// Jump to continuation.
734  |.  ld KBASE, PC2PROTO(k)(TMP1)
735  |
736  |.if FFI
737  |1:
738  |  bnez TMP0, ->cont_ffi_callback	// cont = 1: return from FFI callback.
739  |  // cont = 0: tailcall from C function.
740  |.  daddiu TMP1, RB, -32
741  |  b ->vm_call_tail
742  |.  dsubu RC, TMP1, BASE
743  |.endif
744  |
745  |->cont_cat:				// RA = resultptr, RB = meta base
746  |  lw INS, -4(PC)
747  |   daddiu CARG2, RB, -32
748  |  ld CRET1, 0(RA)
749  |  decode_RB8a MULTRES, INS
750  |   decode_RA8a RA, INS
751  |  decode_RB8b MULTRES
752  |   decode_RA8b RA
753  |  daddu TMP1, BASE, MULTRES
754  |   sd BASE, L->base
755  |   dsubu CARG3, CARG2, TMP1
756  |  bne TMP1, CARG2, ->BC_CAT_Z
757  |.  sd CRET1, 0(CARG2)
758  |  daddu RA, BASE, RA
759  |  b ->cont_nop
760  |.  sd CRET1, 0(RA)
761  |
762  |//-- Table indexing metamethods -----------------------------------------
763  |
764  |->vmeta_tgets1:
765  |  daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
766  |  li TMP0, LJ_TSTR
767  |  settp STR:RC, TMP0
768  |  b >1
769  |.  sd STR:RC, 0(CARG3)
770  |
771  |->vmeta_tgets:
772  |  daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
773  |  li TMP0, LJ_TTAB
774  |   li TMP1, LJ_TSTR
775  |  settp TAB:RB, TMP0
776  |   daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
777  |  sd TAB:RB, 0(CARG2)
778  |   settp STR:RC, TMP1
779  |  b >1
780  |.  sd STR:RC, 0(CARG3)
781  |
782  |->vmeta_tgetb:			// TMP0 = index
783  |  daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
784  |  settp TMP0, TISNUM
785  |  sd TMP0, 0(CARG3)
786  |
787  |->vmeta_tgetv:
788  |1:
789  |  load_got lj_meta_tget
790  |  sd BASE, L->base
791  |  sd PC, SAVE_PC
792  |  call_intern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
793  |.  move CARG1, L
794  |  // Returns TValue * (finished) or NULL (metamethod).
795  |  beqz CRET1, >3
796  |.  daddiu TMP1, BASE, -FRAME_CONT
797  |  ld CARG1, 0(CRET1)
798  |  ins_next1
799  |  sd CARG1, 0(RA)
800  |  ins_next2
801  |
802  |3:  // Call __index metamethod.
803  |  // BASE = base, L->top = new base, stack = cont/func/t/k
804  |  ld BASE, L->top
805  |  sd PC, -24(BASE)			// [cont|PC]
806  |   dsubu PC, BASE, TMP1
807  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
808  |  cleartp LFUNC:RB
809  |  b ->vm_call_dispatch_f
810  |.  li NARGS8:RC, 16			// 2 args for func(t, k).
811  |
812  |->vmeta_tgetr:
813  |  load_got lj_tab_getinth
814  |  call_intern lj_tab_getinth		// (GCtab *t, int32_t key)
815  |.  nop
816  |  // Returns cTValue * or NULL.
817  |  beqz CRET1, ->BC_TGETR_Z
818  |.  move CARG2, TISNIL
819  |  b ->BC_TGETR_Z
820  |.  ld CARG2, 0(CRET1)
821  |
822  |//-----------------------------------------------------------------------
823  |
824  |->vmeta_tsets1:
825  |  daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
826  |  li TMP0, LJ_TSTR
827  |  settp STR:RC, TMP0
828  |  b >1
829  |.  sd STR:RC, 0(CARG3)
830  |
831  |->vmeta_tsets:
832  |  daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
833  |  li TMP0, LJ_TTAB
834  |   li TMP1, LJ_TSTR
835  |  settp TAB:RB, TMP0
836  |   daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
837  |  sd TAB:RB, 0(CARG2)
838  |   settp STR:RC, TMP1
839  |  b >1
840  |.  sd STR:RC, 0(CARG3)
841  |
842  |->vmeta_tsetb:			// TMP0 = index
843  |  daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
844  |  settp TMP0, TISNUM
845  |  sd TMP0, 0(CARG3)
846  |
847  |->vmeta_tsetv:
848  |1:
849  |  load_got lj_meta_tset
850  |  sd BASE, L->base
851  |  sd PC, SAVE_PC
852  |  call_intern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
853  |.  move CARG1, L
854  |  // Returns TValue * (finished) or NULL (metamethod).
855  |  beqz CRET1, >3
856  |.  ld CARG1, 0(RA)
857  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
858  |  ins_next1
859  |  sd CARG1, 0(CRET1)
860  |  ins_next2
861  |
862  |3:  // Call __newindex metamethod.
863  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
864  |  daddiu TMP1, BASE, -FRAME_CONT
865  |  ld BASE, L->top
866  |  sd PC, -24(BASE)			// [cont|PC]
867  |   dsubu PC, BASE, TMP1
868  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
869  |  cleartp LFUNC:RB
870  |  sd CARG1, 16(BASE)			// Copy value to third argument.
871  |  b ->vm_call_dispatch_f
872  |.  li NARGS8:RC, 24			// 3 args for func(t, k, v)
873  |
874  |->vmeta_tsetr:
875  |  load_got lj_tab_setinth
876  |  sd BASE, L->base
877  |  sd PC, SAVE_PC
878  |  call_intern lj_tab_setinth	// (lua_State *L, GCtab *t, int32_t key)
879  |.  move CARG1, L
880  |  // Returns TValue *.
881  |  b ->BC_TSETR_Z
882  |.  nop
883  |
884  |//-- Comparison metamethods ---------------------------------------------
885  |
886  |->vmeta_comp:
887  |  // RA/RD point to o1/o2.
888  |  move CARG2, RA
889  |  move CARG3, RD
890  |  load_got lj_meta_comp
891  |  daddiu PC, PC, -4
892  |  sd BASE, L->base
893  |  sd PC, SAVE_PC
894  |  decode_OP1 CARG4, INS
895  |  call_intern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
896  |.  move CARG1, L
897  |  // Returns 0/1 or TValue * (metamethod).
898  |3:
899  |  sltiu AT, CRET1, 2
900  |  beqz AT, ->vmeta_binop
901  |   negu TMP2, CRET1
902  |4:
903  |  lhu RD, OFS_RD(PC)
904  |   daddiu PC, PC, 4
905  |   lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
906  |  sll RD, RD, 2
907  |  addu RD, RD, TMP1
908  |  and RD, RD, TMP2
909  |  daddu PC, PC, RD
910  |->cont_nop:
911  |  ins_next
912  |
913  |->cont_ra:				// RA = resultptr
914  |  lbu TMP1, -4+OFS_RA(PC)
915  |   ld CRET1, 0(RA)
916  |  sll TMP1, TMP1, 3
917  |  daddu TMP1, BASE, TMP1
918  |  b ->cont_nop
919  |.   sd CRET1, 0(TMP1)
920  |
921  |->cont_condt:			// RA = resultptr
922  |  ld TMP0, 0(RA)
923  |  gettp TMP0, TMP0
924  |  sltiu AT, TMP0, LJ_TISTRUECOND
925  |  b <4
926  |.  negu TMP2, AT			// Branch if result is true.
927  |
928  |->cont_condf:			// RA = resultptr
929  |  ld TMP0, 0(RA)
930  |  gettp TMP0, TMP0
931  |  sltiu AT, TMP0, LJ_TISTRUECOND
932  |  b <4
933  |.  addiu TMP2, AT, -1		// Branch if result is false.
934  |
935  |->vmeta_equal:
936  |  // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
937  |  load_got lj_meta_equal
938  |   cleartp LFUNC:CARG3, CARG2
939  |  cleartp LFUNC:CARG2, CARG1
940  |    move CARG4, TMP0
941  |  daddiu PC, PC, -4
942  |   sd BASE, L->base
943  |   sd PC, SAVE_PC
944  |  call_intern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
945  |.  move CARG1, L
946  |  // Returns 0/1 or TValue * (metamethod).
947  |  b <3
948  |.  nop
949  |
950  |->vmeta_equal_cd:
951  |.if FFI
952  |  load_got lj_meta_equal_cd
953  |  move CARG2, INS
954  |  daddiu PC, PC, -4
955  |   sd BASE, L->base
956  |   sd PC, SAVE_PC
957  |  call_intern lj_meta_equal_cd	// (lua_State *L, BCIns op)
958  |.  move CARG1, L
959  |  // Returns 0/1 or TValue * (metamethod).
960  |  b <3
961  |.  nop
962  |.endif
963  |
964  |->vmeta_istype:
965  |  load_got lj_meta_istype
966  |  daddiu PC, PC, -4
967  |   sd BASE, L->base
968  |   srl CARG2, RA, 3
969  |   srl CARG3, RD, 3
970  |  sd PC, SAVE_PC
971  |  call_intern lj_meta_istype	// (lua_State *L, BCReg ra, BCReg tp)
972  |.  move CARG1, L
973  |  b ->cont_nop
974  |.  nop
975  |
976  |//-- Arithmetic metamethods ---------------------------------------------
977  |
978  |->vmeta_unm:
979  |  move RC, RB
980  |
981  |->vmeta_arith:
982  |  load_got lj_meta_arith
983  |   sd BASE, L->base
984  |  move CARG2, RA
985  |   sd PC, SAVE_PC
986  |  move CARG3, RB
987  |  move CARG4, RC
988  |  decode_OP1 CARG5, INS	// CARG5 == RB.
989  |  call_intern lj_meta_arith	// (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
990  |.  move CARG1, L
991  |  // Returns NULL (finished) or TValue * (metamethod).
992  |  beqz CRET1, ->cont_nop
993  |.  nop
994  |
995  |  // Call metamethod for binary op.
996  |->vmeta_binop:
997  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
998  |  dsubu TMP1, CRET1, BASE
999  |   sd PC, -24(CRET1)			// [cont|PC]
1000  |   move TMP2, BASE
1001  |  daddiu PC, TMP1, FRAME_CONT
1002  |   move BASE, CRET1
1003  |  b ->vm_call_dispatch
1004  |.  li NARGS8:RC, 16			// 2 args for func(o1, o2).
1005  |
1006  |->vmeta_len:
1007  |  // CARG2 already set by BC_LEN.
1008#if LJ_52
1009  |  move MULTRES, CARG1
1010#endif
1011  |  load_got lj_meta_len
1012  |   sd BASE, L->base
1013  |   sd PC, SAVE_PC
1014  |  call_intern lj_meta_len		// (lua_State *L, TValue *o)
1015  |.  move CARG1, L
1016  |  // Returns NULL (retry) or TValue * (metamethod base).
1017#if LJ_52
1018  |  bnez CRET1, ->vmeta_binop		// Binop call for compatibility.
1019  |.  nop
1020  |  b ->BC_LEN_Z
1021  |.  move CARG1, MULTRES
1022#else
1023  |  b ->vmeta_binop			// Binop call for compatibility.
1024  |.  nop
1025#endif
1026  |
1027  |//-- Call metamethod ----------------------------------------------------
1028  |
1029  |->vmeta_call:			// Resolve and call __call metamethod.
1030  |  // TMP2 = old base, BASE = new base, RC = nargs*8
1031  |  load_got lj_meta_call
1032  |   sd TMP2, L->base			// This is the callers base!
1033  |  daddiu CARG2, BASE, -16
1034  |   sd PC, SAVE_PC
1035  |  daddu CARG3, BASE, RC
1036  |   move MULTRES, NARGS8:RC
1037  |  call_intern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
1038  |.  move CARG1, L
1039  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
1040  |   daddiu NARGS8:RC, MULTRES, 8	// Got one more argument now.
1041  |  cleartp LFUNC:RB
1042  |  ins_call
1043  |
1044  |->vmeta_callt:			// Resolve __call for BC_CALLT.
1045  |  // BASE = old base, RA = new base, RC = nargs*8
1046  |  load_got lj_meta_call
1047  |   sd BASE, L->base
1048  |  daddiu CARG2, RA, -16
1049  |   sd PC, SAVE_PC
1050  |  daddu CARG3, RA, RC
1051  |   move MULTRES, NARGS8:RC
1052  |  call_intern lj_meta_call		// (lua_State *L, TValue *func, TValue *top)
1053  |.  move CARG1, L
1054  |   ld RB, FRAME_FUNC(RA)		// Guaranteed to be a function here.
1055  |  ld TMP1, FRAME_PC(BASE)
1056  |  daddiu NARGS8:RC, MULTRES, 8	// Got one more argument now.
1057  |  b ->BC_CALLT_Z
1058  |.  cleartp LFUNC:CARG3, RB
1059  |
1060  |//-- Argument coercion for 'for' statement ------------------------------
1061  |
1062  |->vmeta_for:
1063  |  load_got lj_meta_for
1064  |   sd BASE, L->base
1065  |  move CARG2, RA
1066  |   sd PC, SAVE_PC
1067  |  move MULTRES, INS
1068  |  call_intern lj_meta_for	// (lua_State *L, TValue *base)
1069  |.  move CARG1, L
1070  |.if JIT
1071  |  decode_OP1 TMP0, MULTRES
1072  |  li AT, BC_JFORI
1073  |.endif
1074  |  decode_RA8a RA, MULTRES
1075  |   decode_RD8a RD, MULTRES
1076  |  decode_RA8b RA
1077  |.if JIT
1078  |  beq TMP0, AT, =>BC_JFORI
1079  |.  decode_RD8b RD
1080  |  b =>BC_FORI
1081  |.  nop
1082  |.else
1083  |  b =>BC_FORI
1084  |.  decode_RD8b RD
1085  |.endif
1086  |
1087  |//-----------------------------------------------------------------------
1088  |//-- Fast functions -----------------------------------------------------
1089  |//-----------------------------------------------------------------------
1090  |
1091  |.macro .ffunc, name
1092  |->ff_ .. name:
1093  |.endmacro
1094  |
1095  |.macro .ffunc_1, name
1096  |->ff_ .. name:
1097  |  beqz NARGS8:RC, ->fff_fallback
1098  |.  ld CARG1, 0(BASE)
1099  |.endmacro
1100  |
1101  |.macro .ffunc_2, name
1102  |->ff_ .. name:
1103  |  sltiu AT, NARGS8:RC, 16
1104  |  ld CARG1, 0(BASE)
1105  |  bnez AT, ->fff_fallback
1106  |.  ld CARG2, 8(BASE)
1107  |.endmacro
1108  |
1109  |.macro .ffunc_n, name	// Caveat: has delay slot!
1110  |->ff_ .. name:
1111  |  ld CARG1, 0(BASE)
1112  |  beqz NARGS8:RC, ->fff_fallback
1113  |  // Either ldc1 or the 1st instruction of checknum is in the delay slot.
1114  |  .FPU ldc1 FARG1, 0(BASE)
1115  |  checknum CARG1, ->fff_fallback
1116  |.endmacro
1117  |
1118  |.macro .ffunc_nn, name	// Caveat: has delay slot!
1119  |->ff_ .. name:
1120  |  ld CARG1, 0(BASE)
1121  |    sltiu AT, NARGS8:RC, 16
1122  |   ld CARG2, 8(BASE)
1123  |  bnez AT, ->fff_fallback
1124  |.  gettp TMP0, CARG1
1125  |   gettp TMP1, CARG2
1126  |  sltiu TMP0, TMP0, LJ_TISNUM
1127  |   sltiu TMP1, TMP1, LJ_TISNUM
1128  |  .FPU ldc1 FARG1, 0(BASE)
1129  |  and TMP0, TMP0, TMP1
1130  |   .FPU ldc1 FARG2, 8(BASE)
1131  |  beqz TMP0, ->fff_fallback
1132  |.endmacro
1133  |
1134  |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1135  |// MIPSR6: no delay slot, but a forbidden slot.
1136  |.macro ffgccheck
1137  |  ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1138  |  ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1139  |  dsubu AT, TMP0, TMP1
1140  |.if MIPSR6
1141  |  bgezalc AT, ->fff_gcstep
1142  |.else
1143  |  bgezal AT, ->fff_gcstep
1144  |.endif
1145  |.endmacro
1146  |
1147  |//-- Base library: checks -----------------------------------------------
1148  |.ffunc_1 assert
1149  |  gettp AT, CARG1
1150  |  sltiu AT, AT, LJ_TISTRUECOND
1151  |  beqz AT, ->fff_fallback
1152  |.  daddiu RA, BASE, -16
1153  |  ld PC, FRAME_PC(BASE)
1154  |  addiu RD, NARGS8:RC, 8		// Compute (nresults+1)*8.
1155  |  daddu TMP2, RA, RD
1156  |  daddiu TMP1, BASE, 8
1157  |  beq BASE, TMP2, ->fff_res		// Done if exactly 1 argument.
1158  |.  sd CARG1, 0(RA)
1159  |1:
1160  |  ld CRET1, 0(TMP1)
1161  |  sd CRET1, -16(TMP1)
1162  |  bne TMP1, TMP2, <1
1163  |.  daddiu TMP1, TMP1, 8
1164  |  b ->fff_res
1165  |.  nop
1166  |
1167  |.ffunc_1 type
1168  |  gettp TMP0, CARG1
1169  |  sltu TMP1, TISNUM, TMP0
1170  |  not TMP2, TMP0
1171  |  li TMP3, ~LJ_TISNUM
1172  |.if MIPSR6
1173  |  selnez TMP2, TMP2, TMP1
1174  |  seleqz TMP3, TMP3, TMP1
1175  |  or TMP2, TMP2, TMP3
1176  |.else
1177  |  movz TMP2, TMP3, TMP1
1178  |.endif
1179  |  dsll TMP2, TMP2, 3
1180  |  daddu TMP2, CFUNC:RB, TMP2
1181  |  b ->fff_restv
1182  |.  ld CARG1, CFUNC:TMP2->upvalue
1183  |
1184  |//-- Base library: getters and setters ---------------------------------
1185  |
1186  |.ffunc_1 getmetatable
1187  |  gettp TMP2, CARG1
1188  |  daddiu TMP0, TMP2, -LJ_TTAB
1189  |  daddiu TMP1, TMP2, -LJ_TUDATA
1190  |.if MIPSR6
1191  |  selnez TMP0, TMP1, TMP0
1192  |.else
1193  |  movn TMP0, TMP1, TMP0
1194  |.endif
1195  |  bnez TMP0, >6
1196  |.  cleartp TAB:CARG1
1197  |1:  // Field metatable must be at same offset for GCtab and GCudata!
1198  |  ld TAB:RB, TAB:CARG1->metatable
1199  |2:
1200  |  ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1201  |  beqz TAB:RB, ->fff_restv
1202  |.  li CARG1, LJ_TNIL
1203  |  lw TMP0, TAB:RB->hmask
1204  |   lw TMP1, STR:RC->sid
1205  |    ld NODE:TMP2, TAB:RB->node
1206  |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
1207  |  dsll TMP0, TMP1, 5
1208  |  dsll TMP1, TMP1, 3
1209  |  dsubu TMP1, TMP0, TMP1
1210  |  daddu NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
1211  |  li CARG4, LJ_TSTR
1212  |  settp STR:RC, CARG4		// Tagged key to look for.
1213  |3:  // Rearranged logic, because we expect _not_ to find the key.
1214  |  ld TMP0, NODE:TMP2->key
1215  |   ld CARG1, NODE:TMP2->val
1216  |    ld NODE:TMP2, NODE:TMP2->next
1217  |  beq RC, TMP0, >5
1218  |.  li AT, LJ_TTAB
1219  |  bnez NODE:TMP2, <3
1220  |.  nop
1221  |4:
1222  |  move CARG1, RB
1223  |  b ->fff_restv			// Not found, keep default result.
1224  |.  settp CARG1, AT
1225  |5:
1226  |  bne CARG1, TISNIL, ->fff_restv
1227  |.  nop
1228  |  b <4				// Ditto for nil value.
1229  |.  nop
1230  |
1231  |6:
1232  |  sltiu AT, TMP2, LJ_TISNUM
1233  |.if MIPSR6
1234  |  selnez TMP0, TISNUM, AT
1235  |  seleqz AT, TMP2, AT
1236  |  or TMP2, TMP0, AT
1237  |.else
1238  |  movn TMP2, TISNUM, AT
1239  |.endif
1240  |  dsll TMP2, TMP2, 3
1241  |   dsubu TMP0, DISPATCH, TMP2
1242  |  b <2
1243  |.  ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0)
1244  |
1245  |.ffunc_2 setmetatable
1246  |  // Fast path: no mt for table yet and not clearing the mt.
1247  |  checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1248  |  gettp TMP3, CARG2
1249  |   ld TAB:TMP0, TAB:TMP1->metatable
1250  |   lbu TMP2, TAB:TMP1->marked
1251  |  daddiu AT, TMP3, -LJ_TTAB
1252  |   cleartp TAB:CARG2
1253  |  or AT, AT, TAB:TMP0
1254  |  bnez AT, ->fff_fallback
1255  |.  andi AT, TMP2, LJ_GC_BLACK	// isblack(table)
1256  |  beqz AT, ->fff_restv
1257  |.  sd TAB:CARG2, TAB:TMP1->metatable
1258  |  barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
1259  |
1260  |.ffunc rawget
1261  |  ld CARG2, 0(BASE)
1262  |  sltiu AT, NARGS8:RC, 16
1263  |  load_got lj_tab_get
1264  |  gettp TMP0, CARG2
1265  |   cleartp CARG2
1266  |  daddiu TMP0, TMP0, -LJ_TTAB
1267  |  or AT, AT, TMP0
1268  |  bnez AT, ->fff_fallback
1269  |.  daddiu CARG3, BASE, 8
1270  |  call_intern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
1271  |.  move CARG1, L
1272  |  b ->fff_restv
1273  |.  ld CARG1, 0(CRET1)
1274  |
1275  |//-- Base library: conversions ------------------------------------------
1276  |
1277  |.ffunc tonumber
1278  |  // Only handles the number case inline (without a base argument).
1279  |  ld CARG1, 0(BASE)
1280  |  xori AT, NARGS8:RC, 8		// Exactly one number argument.
1281  |  gettp TMP1, CARG1
1282  |  sltu TMP0, TISNUM, TMP1
1283  |  or AT, AT, TMP0
1284  |  bnez AT, ->fff_fallback
1285  |.  nop
1286  |  b ->fff_restv
1287  |.  nop
1288  |
1289  |.ffunc_1 tostring
1290  |  // Only handles the string or number case inline.
1291  |  gettp TMP0, CARG1
1292  |  daddiu AT, TMP0, -LJ_TSTR
1293  |  // A __tostring method in the string base metatable is ignored.
1294  |  beqz AT, ->fff_restv	// String key?
1295  |  // Handle numbers inline, unless a number base metatable is present.
1296  |.  ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1297  |  sltu TMP0, TISNUM, TMP0
1298  |  or TMP0, TMP0, TMP1
1299  |  bnez TMP0, ->fff_fallback
1300  |.  sd BASE, L->base			// Add frame since C call can throw.
1301  |.if MIPSR6
1302  |  sd PC, SAVE_PC			// Redundant (but a defined value).
1303  |  ffgccheck
1304  |.else
1305  |  ffgccheck
1306  |.  sd PC, SAVE_PC			// Redundant (but a defined value).
1307  |.endif
1308  |  load_got lj_strfmt_number
1309  |  move CARG1, L
1310  |  call_intern lj_strfmt_number	// (lua_State *L, cTValue *o)
1311  |.  move CARG2, BASE
1312  |  // Returns GCstr *.
1313  |  li AT, LJ_TSTR
1314  |  settp CRET1, AT
1315  |  b ->fff_restv
1316  |.  move CARG1, CRET1
1317  |
1318  |//-- Base library: iterators -------------------------------------------
1319  |
1320  |.ffunc_1 next
1321  |  checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
1322  |  daddu TMP2, BASE, NARGS8:RC
1323  |  sd TISNIL, 0(TMP2)			// Set missing 2nd arg to nil.
1324  |  ld PC, FRAME_PC(BASE)
1325  |  load_got lj_tab_next
1326  |   sd BASE, L->base			// Add frame since C call can throw.
1327  |   sd BASE, L->top			// Dummy frame length is ok.
1328  |  daddiu CARG3, BASE, 8
1329  |   sd PC, SAVE_PC
1330  |  call_intern lj_tab_next		// (lua_State *L, GCtab *t, TValue *key)
1331  |.  move CARG1, L
1332  |  // Returns 0 at end of traversal.
1333  |  beqz CRET1, ->fff_restv		// End of traversal: return nil.
1334  |.  move CARG1, TISNIL
1335  |  ld TMP0, 8(BASE)
1336  |    daddiu RA, BASE, -16
1337  |  ld TMP2, 16(BASE)
1338  |  sd TMP0, 0(RA)
1339  |  sd TMP2, 8(RA)
1340  |  b ->fff_res
1341  |.  li RD, (2+1)*8
1342  |
1343  |.ffunc_1 pairs
1344  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1345  |  ld PC, FRAME_PC(BASE)
1346#if LJ_52
1347  |  ld TAB:TMP2, TAB:TMP1->metatable
1348  |  ld TMP0, CFUNC:RB->upvalue[0]
1349  |  bnez TAB:TMP2, ->fff_fallback
1350#else
1351  |  ld TMP0, CFUNC:RB->upvalue[0]
1352#endif
1353  |.  daddiu RA, BASE, -16
1354  |  sd TISNIL, 0(BASE)
1355  |   sd CARG1, -8(BASE)
1356  |    sd TMP0, 0(RA)
1357  |  b ->fff_res
1358  |.  li RD, (3+1)*8
1359  |
1360  |.ffunc_2 ipairs_aux
1361  |  checktab CARG1, ->fff_fallback
1362  |   checkint CARG2, ->fff_fallback
1363  |.  lw TMP0, TAB:CARG1->asize
1364  |   ld TMP1, TAB:CARG1->array
1365  |    ld PC, FRAME_PC(BASE)
1366  |  sextw TMP2, CARG2
1367  |  addiu TMP2, TMP2, 1
1368  |  sltu AT, TMP2, TMP0
1369  |    daddiu RA, BASE, -16
1370  |   zextw TMP0, TMP2
1371  |   settp TMP0, TISNUM
1372  |  beqz AT, >2			// Not in array part?
1373  |.  sd TMP0, 0(RA)
1374  |  dsll TMP3, TMP2, 3
1375  |  daddu TMP3, TMP1, TMP3
1376  |  ld TMP1, 0(TMP3)
1377  |1:
1378  |  beq TMP1, TISNIL, ->fff_res	// End of iteration, return 0 results.
1379  |.  li RD, (0+1)*8
1380  |  sd TMP1, -8(BASE)
1381  |  b ->fff_res
1382  |.  li RD, (2+1)*8
1383  |2:  // Check for empty hash part first. Otherwise call C function.
1384  |  lw TMP0, TAB:CARG1->hmask
1385  |  load_got lj_tab_getinth
1386  |  beqz TMP0, ->fff_res
1387  |.  li RD, (0+1)*8
1388  |  call_intern lj_tab_getinth		// (GCtab *t, int32_t key)
1389  |.  move CARG2, TMP2
1390  |  // Returns cTValue * or NULL.
1391  |  beqz CRET1, ->fff_res
1392  |.  li RD, (0+1)*8
1393  |  b <1
1394  |.  ld TMP1, 0(CRET1)
1395  |
1396  |.ffunc_1 ipairs
1397  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1398  |  ld PC, FRAME_PC(BASE)
1399#if LJ_52
1400  |  ld TAB:TMP2, TAB:TMP1->metatable
1401  |  ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
1402  |  bnez TAB:TMP2, ->fff_fallback
1403#else
1404  |  ld TMP0, CFUNC:RB->upvalue[0]
1405#endif
1406  |  daddiu RA, BASE, -16
1407  |  dsll AT, TISNUM, 47
1408  |  sd CARG1, -8(BASE)
1409  |   sd AT, 0(BASE)
1410  |    sd CFUNC:TMP0, 0(RA)
1411  |  b ->fff_res
1412  |.  li RD, (3+1)*8
1413  |
1414  |//-- Base library: catch errors ----------------------------------------
1415  |
1416  |.ffunc pcall
1417  |  daddiu NARGS8:RC, NARGS8:RC, -8
1418  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1419  |  bltz NARGS8:RC, ->fff_fallback
1420  |.   move TMP2, BASE
1421  |   daddiu BASE, BASE, 16
1422  |  // Remember active hook before pcall.
1423  |  srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1424  |  andi TMP3, TMP3, 1
1425  |  daddiu PC, TMP3, 16+FRAME_PCALL
1426  |  beqz NARGS8:RC, ->vm_call_dispatch
1427  |1:
1428  |.  daddu TMP0, BASE, NARGS8:RC
1429  |2:
1430  |  ld TMP1, -16(TMP0)
1431  |  sd TMP1, -8(TMP0)
1432  |  daddiu TMP0, TMP0, -8
1433  |  bne TMP0, BASE, <2
1434  |.  nop
1435  |  b ->vm_call_dispatch
1436  |.  nop
1437  |
1438  |.ffunc xpcall
1439  |  daddiu NARGS8:TMP0, NARGS8:RC, -16
1440  |  ld CARG1, 0(BASE)
1441  |   ld CARG2, 8(BASE)
1442  |    bltz NARGS8:TMP0, ->fff_fallback
1443  |.    lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1444  |  gettp AT, CARG2
1445  |  daddiu AT, AT, -LJ_TFUNC
1446  |  bnez AT, ->fff_fallback		// Traceback must be a function.
1447  |.   move TMP2, BASE
1448  |  move NARGS8:RC, NARGS8:TMP0
1449  |   daddiu BASE, BASE, 24
1450  |  // Remember active hook before pcall.
1451  |  srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1452  |   sd CARG2, 0(TMP2)			// Swap function and traceback.
1453  |  andi TMP3, TMP3, 1
1454  |   sd CARG1, 8(TMP2)
1455  |  beqz NARGS8:RC, ->vm_call_dispatch
1456  |.  daddiu PC, TMP3, 24+FRAME_PCALL
1457  |  b <1
1458  |.  nop
1459  |
1460  |//-- Coroutine library --------------------------------------------------
1461  |
1462  |.macro coroutine_resume_wrap, resume
1463  |.if resume
1464  |.ffunc_1 coroutine_resume
1465  |  checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
1466  |.else
1467  |.ffunc coroutine_wrap_aux
1468  |  ld L:CARG1, CFUNC:RB->upvalue[0].gcr
1469  |  cleartp L:CARG1
1470  |.endif
1471  |  lbu TMP0, L:CARG1->status
1472  |   ld TMP1, L:CARG1->cframe
1473  |    ld CARG2, L:CARG1->top
1474  |    ld TMP2, L:CARG1->base
1475  |  addiu AT, TMP0, -LUA_YIELD
1476  |    daddu CARG3, CARG2, TMP0
1477  |   daddiu TMP3, CARG2, 8
1478  |.if MIPSR6
1479  |  seleqz CARG2, CARG2, AT
1480  |  selnez TMP3, TMP3, AT
1481  |  bgtz AT, ->fff_fallback		// st > LUA_YIELD?
1482  |.  or CARG2, TMP3, CARG2
1483  |.else
1484  |  bgtz AT, ->fff_fallback		// st > LUA_YIELD?
1485  |.  movn CARG2, TMP3, AT
1486  |.endif
1487  |   xor TMP2, TMP2, CARG3
1488  |  bnez TMP1, ->fff_fallback		// cframe != 0?
1489  |.  or AT, TMP2, TMP0
1490  |  ld TMP0, L:CARG1->maxstack
1491  |  beqz AT, ->fff_fallback		// base == top && st == 0?
1492  |.  ld PC, FRAME_PC(BASE)
1493  |  daddu TMP2, CARG2, NARGS8:RC
1494  |  sltu AT, TMP0, TMP2
1495  |  bnez AT, ->fff_fallback		// Stack overflow?
1496  |.  sd PC, SAVE_PC
1497  |   sd BASE, L->base
1498  |1:
1499  |.if resume
1500  |  daddiu BASE, BASE, 8		// Keep resumed thread in stack for GC.
1501  |  daddiu NARGS8:RC, NARGS8:RC, -8
1502  |  daddiu TMP2, TMP2, -8
1503  |.endif
1504  |  sd TMP2, L:CARG1->top
1505  |  daddu TMP1, BASE, NARGS8:RC
1506  |  move CARG3, CARG2
1507  |  sd BASE, L->top
1508  |2:  // Move args to coroutine.
1509  |   ld CRET1, 0(BASE)
1510  |  sltu AT, BASE, TMP1
1511  |  beqz AT, >3
1512  |.  daddiu BASE, BASE, 8
1513  |   sd CRET1, 0(CARG3)
1514  |  b <2
1515  |.  daddiu CARG3, CARG3, 8
1516  |3:
1517  |  bal ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
1518  |.  move L:RA, L:CARG1
1519  |  // Returns thread status.
1520  |4:
1521  |  ld TMP2, L:RA->base
1522  |   sltiu AT, CRET1, LUA_YIELD+1
1523  |  ld TMP3, L:RA->top
1524  |    li_vmstate INTERP
1525  |  ld BASE, L->base
1526  |    sd L, DISPATCH_GL(cur_L)(DISPATCH)
1527  |    st_vmstate
1528  |   beqz AT, >8
1529  |. dsubu RD, TMP3, TMP2
1530  |   ld TMP0, L->maxstack
1531  |  beqz RD, >6			// No results?
1532  |.  daddu TMP1, BASE, RD
1533  |  sltu AT, TMP0, TMP1
1534  |  bnez AT, >9			// Need to grow stack?
1535  |.  daddu TMP3, TMP2, RD
1536  |  sd TMP2, L:RA->top			// Clear coroutine stack.
1537  |  move TMP1, BASE
1538  |5:  // Move results from coroutine.
1539  |   ld CRET1, 0(TMP2)
1540  |  daddiu TMP2, TMP2, 8
1541  |  sltu AT, TMP2, TMP3
1542  |   sd CRET1, 0(TMP1)
1543  |  bnez AT, <5
1544  |.  daddiu TMP1, TMP1, 8
1545  |6:
1546  |  andi TMP0, PC, FRAME_TYPE
1547  |.if resume
1548  |  mov_true TMP1
1549  |   daddiu RA, BASE, -8
1550  |  sd TMP1, -8(BASE)			// Prepend true to results.
1551  |  daddiu RD, RD, 16
1552  |.else
1553  |  move RA, BASE
1554  |  daddiu RD, RD, 8
1555  |.endif
1556  |7:
1557  |  sd PC, SAVE_PC
1558  |  beqz TMP0, ->BC_RET_Z
1559  |.  move MULTRES, RD
1560  |  b ->vm_return
1561  |.  nop
1562  |
1563  |8:  // Coroutine returned with error (at co->top-1).
1564  |.if resume
1565  |  daddiu TMP3, TMP3, -8
1566  |   mov_false TMP1
1567  |  ld CRET1, 0(TMP3)
1568  |   sd TMP3, L:RA->top		// Remove error from coroutine stack.
1569  |    li RD, (2+1)*8
1570  |   sd TMP1, -8(BASE)			// Prepend false to results.
1571  |    daddiu RA, BASE, -8
1572  |  sd CRET1, 0(BASE)			// Copy error message.
1573  |  b <7
1574  |.  andi TMP0, PC, FRAME_TYPE
1575  |.else
1576  |  load_got lj_ffh_coroutine_wrap_err
1577  |  move CARG2, L:RA
1578  |  call_intern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
1579  |.  move CARG1, L
1580  |.endif
1581  |
1582  |9:  // Handle stack expansion on return from yield.
1583  |  load_got lj_state_growstack
1584  |  srl CARG2, RD, 3
1585  |  call_intern lj_state_growstack	// (lua_State *L, int n)
1586  |.  move CARG1, L
1587  |  b <4
1588  |.  li CRET1, 0
1589  |.endmacro
1590  |
1591  |  coroutine_resume_wrap 1		// coroutine.resume
1592  |  coroutine_resume_wrap 0		// coroutine.wrap
1593  |
1594  |.ffunc coroutine_yield
1595  |  ld TMP0, L->cframe
1596  |   daddu TMP1, BASE, NARGS8:RC
1597  |   sd BASE, L->base
1598  |  andi TMP0, TMP0, CFRAME_RESUME
1599  |   sd TMP1, L->top
1600  |  beqz TMP0, ->fff_fallback
1601  |.   li CRET1, LUA_YIELD
1602  |  sd r0, L->cframe
1603  |  b ->vm_leave_unw
1604  |.   sb CRET1, L->status
1605  |
1606  |//-- Math library -------------------------------------------------------
1607  |
1608  |.ffunc_1 math_abs
1609  |  gettp CARG2, CARG1
1610  |  daddiu AT, CARG2, -LJ_TISNUM
1611  |  bnez AT, >1
1612  |.  sextw TMP1, CARG1
1613  |  sra TMP0, TMP1, 31			// Extract sign.
1614  |  xor TMP1, TMP1, TMP0
1615  |  dsubu CARG1, TMP1, TMP0
1616  |  dsll TMP3, CARG1, 32
1617  |  bgez TMP3, ->fff_restv
1618  |.  settp CARG1, TISNUM
1619  |  li CARG1, 0x41e0			// 2^31 as a double.
1620  |  b ->fff_restv
1621  |.  dsll CARG1, CARG1, 48
1622  |1:
1623  |  sltiu AT, CARG2, LJ_TISNUM
1624  |  beqz AT, ->fff_fallback
1625  |.  dextm CARG1, CARG1, 0, 30
1626  |// fallthrough
1627  |
1628  |->fff_restv:
1629  |  // CARG1 = TValue result.
1630  |  ld PC, FRAME_PC(BASE)
1631  |  daddiu RA, BASE, -16
1632  |   sd CARG1, -16(BASE)
1633  |->fff_res1:
1634  |  // RA = results, PC = return.
1635  |  li RD, (1+1)*8
1636  |->fff_res:
1637  |  // RA = results, RD = (nresults+1)*8, PC = return.
1638  |  andi TMP0, PC, FRAME_TYPE
1639  |  bnez TMP0, ->vm_return
1640  |.  move MULTRES, RD
1641  |  lw INS, -4(PC)
1642  |  decode_RB8a RB, INS
1643  |  decode_RB8b RB
1644  |5:
1645  |  sltu AT, RD, RB
1646  |  bnez AT, >6			// More results expected?
1647  |.  decode_RA8a TMP0, INS
1648  |  decode_RA8b TMP0
1649  |  ins_next1
1650  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
1651  |   dsubu BASE, RA, TMP0
1652  |  ins_next2
1653  |
1654  |6:  // Fill up results with nil.
1655  |  daddu TMP1, RA, RD
1656  |   daddiu RD, RD, 8
1657  |  b <5
1658  |.  sd TISNIL, -8(TMP1)
1659  |
1660  |.macro math_extern, func
1661  |  .ffunc_n math_ .. func
1662  |  load_got func
1663  |  call_extern
1664  |.  nop
1665  |  b ->fff_resn
1666  |.  nop
1667  |.endmacro
1668  |
1669  |.macro math_extern2, func
1670  |  .ffunc_nn math_ .. func
1671  |.  load_got func
1672  |  call_extern
1673  |.  nop
1674  |  b ->fff_resn
1675  |.  nop
1676  |.endmacro
1677  |
1678  |// TODO: Return integer type if result is integer (own sf implementation).
1679  |.macro math_round, func
1680  |->ff_math_ .. func:
1681  |  ld CARG1, 0(BASE)
1682  |  beqz NARGS8:RC, ->fff_fallback
1683  |.  gettp TMP0, CARG1
1684  |  beq TMP0, TISNUM, ->fff_restv
1685  |.  sltu AT, TMP0, TISNUM
1686  |  beqz AT, ->fff_fallback
1687  |.if FPU
1688  |.  ldc1 FARG1, 0(BASE)
1689  |  bal ->vm_ .. func
1690  |.  nop
1691  |.else
1692  |.  load_got func
1693  |  call_extern
1694  |.  nop
1695  |.endif
1696  |  b ->fff_resn
1697  |.  nop
1698  |.endmacro
1699  |
1700  |  math_round floor
1701  |  math_round ceil
1702  |
1703  |.ffunc math_log
1704  |  li AT, 8
1705  |  bne NARGS8:RC, AT, ->fff_fallback	// Exactly 1 argument.
1706  |.  ld CARG1, 0(BASE)
1707  |  checknum CARG1, ->fff_fallback
1708  |.  load_got log
1709  |.if FPU
1710  |  call_extern
1711  |.  ldc1 FARG1, 0(BASE)
1712  |.else
1713  |  call_extern
1714  |.  nop
1715  |.endif
1716  |  b ->fff_resn
1717  |.  nop
1718  |
1719  |  math_extern log10
1720  |  math_extern exp
1721  |  math_extern sin
1722  |  math_extern cos
1723  |  math_extern tan
1724  |  math_extern asin
1725  |  math_extern acos
1726  |  math_extern atan
1727  |  math_extern sinh
1728  |  math_extern cosh
1729  |  math_extern tanh
1730  |  math_extern2 pow
1731  |  math_extern2 atan2
1732  |  math_extern2 fmod
1733  |
1734  |.if FPU
1735  |.ffunc_n math_sqrt
1736  |.  sqrt.d FRET1, FARG1
1737  |// fallthrough to ->fff_resn
1738  |.else
1739  |  math_extern sqrt
1740  |.endif
1741  |
1742  |->fff_resn:
1743  |  ld PC, FRAME_PC(BASE)
1744  |  daddiu RA, BASE, -16
1745  |  b ->fff_res1
1746  |.if FPU
1747  |.  sdc1 FRET1, 0(RA)
1748  |.else
1749  |.  sd CRET1, 0(RA)
1750  |.endif
1751  |
1752  |
1753  |.ffunc_2 math_ldexp
1754  |  checknum CARG1, ->fff_fallback
1755  |  checkint CARG2, ->fff_fallback
1756  |.  load_got ldexp
1757  |  .FPU ldc1 FARG1, 0(BASE)
1758  |  call_extern
1759  |.  lw CARG2, 8+LO(BASE)
1760  |  b ->fff_resn
1761  |.  nop
1762  |
1763  |.ffunc_n math_frexp
1764  |  load_got frexp
1765  |   ld PC, FRAME_PC(BASE)
1766  |  call_extern
1767  |.  daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
1768  |   lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1769  |  daddiu RA, BASE, -16
1770  |.if FPU
1771  |   mtc1 TMP1, FARG2
1772  |  sdc1 FRET1, 0(RA)
1773  |   cvt.d.w FARG2, FARG2
1774  |   sdc1 FARG2, 8(RA)
1775  |.else
1776  |  sd CRET1, 0(RA)
1777  |  zextw TMP1, TMP1
1778  |  settp TMP1, TISNUM
1779  |  sd TMP1, 8(RA)
1780  |.endif
1781  |  b ->fff_res
1782  |.  li RD, (2+1)*8
1783  |
1784  |.ffunc_n math_modf
1785  |  load_got modf
1786  |   ld PC, FRAME_PC(BASE)
1787  |  call_extern
1788  |.  daddiu CARG2, BASE, -16
1789  |  daddiu RA, BASE, -16
1790  |.if FPU
1791  |  sdc1 FRET1, -8(BASE)
1792  |.else
1793  |  sd CRET1, -8(BASE)
1794  |.endif
1795  |  b ->fff_res
1796  |.  li RD, (2+1)*8
1797  |
1798  |.macro math_minmax, name, intins, intinsc, fpins
1799  |  .ffunc_1 name
1800  |  daddu TMP3, BASE, NARGS8:RC
1801  |  checkint CARG1, >5
1802  |.  daddiu TMP2, BASE, 8
1803  |1:  // Handle integers.
1804  |  beq TMP2, TMP3, ->fff_restv
1805  |.  ld CARG2, 0(TMP2)
1806  |  checkint CARG2, >3
1807  |.  sextw CARG1, CARG1
1808  |  lw CARG2, LO(TMP2)
1809  |.  slt AT, CARG1, CARG2
1810  |.if MIPSR6
1811  |  intins TMP1, CARG2, AT
1812  |  intinsc CARG1, CARG1, AT
1813  |  or CARG1, CARG1, TMP1
1814  |.else
1815  |  intins CARG1, CARG2, AT
1816  |.endif
1817  |  daddiu TMP2, TMP2, 8
1818  |  zextw CARG1, CARG1
1819  |  b <1
1820  |.  settp CARG1, TISNUM
1821  |
1822  |3:  // Convert intermediate result to number and continue with number loop.
1823  |  checknum CARG2, ->fff_fallback
1824  |.if FPU
1825  |.  mtc1 CARG1, FRET1
1826  |  cvt.d.w FRET1, FRET1
1827  |  b >7
1828  |.  ldc1 FARG1, 0(TMP2)
1829  |.else
1830  |.  nop
1831  |  bal ->vm_sfi2d_1
1832  |.  nop
1833  |  b >7
1834  |.  nop
1835  |.endif
1836  |
1837  |5:
1838  |  .FPU ldc1 FRET1, 0(BASE)
1839  |  checknum CARG1, ->fff_fallback
1840  |6:  // Handle numbers.
1841  |.  ld CARG2, 0(TMP2)
1842  |  beq TMP2, TMP3, ->fff_resn
1843  |.if FPU
1844  |  ldc1 FARG1, 0(TMP2)
1845  |.else
1846  |  move CRET1, CARG1
1847  |.endif
1848  |  checknum CARG2, >8
1849  |.  nop
1850  |7:
1851  |.if FPU
1852  |.if MIPSR6
1853  |  fpins FRET1, FRET1, FARG1
1854  |.else
1855  |.if fpins  // ismax
1856  |  c.olt.d FARG1, FRET1
1857  |.else
1858  |  c.olt.d FRET1, FARG1
1859  |.endif
1860  |  movf.d FRET1, FARG1
1861  |.endif
1862  |.else
1863  |.if fpins  // ismax
1864  |  bal ->vm_sfcmpogt
1865  |.else
1866  |  bal ->vm_sfcmpolt
1867  |.endif
1868  |.  nop
1869  |.if MIPSR6
1870  |  seleqz AT, CARG2, CRET1
1871  |  selnez CARG1, CARG1, CRET1
1872  |  or CARG1, CARG1, AT
1873  |.else
1874  |  movz CARG1, CARG2, CRET1
1875  |.endif
1876  |.endif
1877  |  b <6
1878  |.  daddiu TMP2, TMP2, 8
1879  |
1880  |8:  // Convert integer to number and continue with number loop.
1881  |  checkint CARG2, ->fff_fallback
1882  |.if FPU
1883  |.  lwc1 FARG1, LO(TMP2)
1884  |  b <7
1885  |.  cvt.d.w FARG1, FARG1
1886  |.else
1887  |.  lw CARG2, LO(TMP2)
1888  |  bal ->vm_sfi2d_2
1889  |.  nop
1890  |  b <7
1891  |.  nop
1892  |.endif
1893  |
1894  |.endmacro
1895  |
1896  |.if MIPSR6
1897  |  math_minmax math_min, seleqz, selnez, min.d
1898  |  math_minmax math_max, selnez, seleqz, max.d
1899  |.else
1900  |  math_minmax math_min, movz, _, 0
1901  |  math_minmax math_max, movn, _, 1
1902  |.endif
1903  |
1904  |//-- String library -----------------------------------------------------
1905  |
1906  |.ffunc string_byte			// Only handle the 1-arg case here.
1907  |  ld CARG1, 0(BASE)
1908  |  gettp TMP0, CARG1
1909  |  xori AT, NARGS8:RC, 8
1910  |  daddiu TMP0, TMP0, -LJ_TSTR
1911  |  or AT, AT, TMP0
1912  |  bnez AT, ->fff_fallback		// Need exactly 1 string argument.
1913  |.  cleartp STR:CARG1
1914  |  lw TMP0, STR:CARG1->len
1915  |    daddiu RA, BASE, -16
1916  |    ld PC, FRAME_PC(BASE)
1917  |  sltu RD, r0, TMP0
1918  |   lbu TMP1, STR:CARG1[1]		// Access is always ok (NUL at end).
1919  |  addiu RD, RD, 1
1920  |  sll RD, RD, 3			// RD = ((str->len != 0)+1)*8
1921  |  settp TMP1, TISNUM
1922  |  b ->fff_res
1923  |.  sd TMP1, 0(RA)
1924  |
1925  |.ffunc string_char			// Only handle the 1-arg case here.
1926  |  ffgccheck
1927  |.if not MIPSR6
1928  |.  nop
1929  |.endif
1930  |  ld CARG1, 0(BASE)
1931  |  gettp TMP0, CARG1
1932  |  xori AT, NARGS8:RC, 8		// Exactly 1 argument.
1933  |  daddiu TMP0, TMP0, -LJ_TISNUM	// Integer.
1934  |  li TMP1, 255
1935  |   sextw CARG1, CARG1
1936  |  or AT, AT, TMP0
1937  |   sltu TMP1, TMP1, CARG1		// !(255 < n).
1938  |   or AT, AT, TMP1
1939  |  bnez AT, ->fff_fallback
1940  |.  li CARG3, 1
1941  |  daddiu CARG2, sp, TMPD_OFS
1942  |  sb CARG1, TMPD
1943  |->fff_newstr:
1944  |  load_got lj_str_new
1945  |   sd BASE, L->base
1946  |   sd PC, SAVE_PC
1947  |  call_intern lj_str_new		// (lua_State *L, char *str, size_t l)
1948  |.  move CARG1, L
1949  |  // Returns GCstr *.
1950  |  ld BASE, L->base
1951  |->fff_resstr:
1952  |  li AT, LJ_TSTR
1953  |  settp CRET1, AT
1954  |  b ->fff_restv
1955  |.  move CARG1, CRET1
1956  |
1957  |.ffunc string_sub
1958  |  ffgccheck
1959  |.if not MIPSR6
1960  |.  nop
1961  |.endif
1962  |  addiu AT, NARGS8:RC, -16
1963  |  ld TMP0, 0(BASE)
1964  |  bltz AT, ->fff_fallback
1965  |.  gettp TMP3, TMP0
1966  |  cleartp STR:CARG1, TMP0
1967  |  ld CARG2, 8(BASE)
1968  |  beqz AT, >1
1969  |.  li CARG4, -1
1970  |  ld CARG3, 16(BASE)
1971  |  checkint CARG3, ->fff_fallback
1972  |.  sextw CARG4, CARG3
1973  |1:
1974  |  checkint CARG2, ->fff_fallback
1975  |.  li AT, LJ_TSTR
1976  |  bne TMP3, AT, ->fff_fallback
1977  |.  sextw CARG3, CARG2
1978  |  lw CARG2, STR:CARG1->len
1979  |  // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1980  |  slt AT, CARG4, r0
1981  |  addiu TMP0, CARG2, 1
1982  |  addu TMP1, CARG4, TMP0
1983  |   slt TMP3, CARG3, r0
1984  |.if MIPSR6
1985  |  seleqz CARG4, CARG4, AT
1986  |  selnez TMP1, TMP1, AT
1987  |  or CARG4, TMP1, CARG4		// if (end < 0) end += len+1
1988  |.else
1989  |  movn CARG4, TMP1, AT		// if (end < 0) end += len+1
1990  |.endif
1991  |   addu TMP1, CARG3, TMP0
1992  |.if MIPSR6
1993  |   selnez TMP1, TMP1, TMP3
1994  |   seleqz CARG3, CARG3, TMP3
1995  |   or CARG3, TMP1, CARG3		// if (start < 0) start += len+1
1996  |   li TMP2, 1
1997  |  slt AT, CARG4, r0
1998  |   slt TMP3, r0, CARG3
1999  |  seleqz CARG4, CARG4, AT		// if (end < 0) end = 0
2000  |   selnez CARG3, CARG3, TMP3
2001  |   seleqz TMP2, TMP2, TMP3
2002  |   or CARG3, TMP2, CARG3		// if (start < 1) start = 1
2003  |  slt AT, CARG2, CARG4
2004  |  seleqz CARG4, CARG4, AT
2005  |  selnez CARG2, CARG2, AT
2006  |  or CARG4, CARG2, CARG4		// if (end > len) end = len
2007  |.else
2008  |   movn CARG3, TMP1, TMP3		// if (start < 0) start += len+1
2009  |   li TMP2, 1
2010  |  slt AT, CARG4, r0
2011  |   slt TMP3, r0, CARG3
2012  |  movn CARG4, r0, AT			// if (end < 0) end = 0
2013  |   movz CARG3, TMP2, TMP3		// if (start < 1) start = 1
2014  |  slt AT, CARG2, CARG4
2015  |  movn CARG4, CARG2, AT		// if (end > len) end = len
2016  |.endif
2017  |   daddu CARG2, STR:CARG1, CARG3
2018  |  subu CARG3, CARG4, CARG3		// len = end - start
2019  |   daddiu CARG2, CARG2, sizeof(GCstr)-1
2020  |  bgez CARG3, ->fff_newstr
2021  |.  addiu CARG3, CARG3, 1		// len++
2022  |->fff_emptystr:  // Return empty string.
2023  |  li AT, LJ_TSTR
2024  |  daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
2025  |  b ->fff_restv
2026  |.  settp CARG1, AT
2027  |
2028  |.macro ffstring_op, name
2029  |  .ffunc string_ .. name
2030  |  ffgccheck
2031  |.  nop
2032  |  beqz NARGS8:RC, ->fff_fallback
2033  |.  ld CARG2, 0(BASE)
2034  |  checkstr STR:CARG2, ->fff_fallback
2035  |  daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
2036  |  load_got lj_buf_putstr_ .. name
2037  |  ld TMP0, SBUF:CARG1->b
2038  |   sd L, SBUF:CARG1->L
2039  |   sd BASE, L->base
2040  |  sd TMP0, SBUF:CARG1->p
2041  |  call_intern extern lj_buf_putstr_ .. name
2042  |.  sd PC, SAVE_PC
2043  |  load_got lj_buf_tostr
2044  |  call_intern lj_buf_tostr
2045  |.  move SBUF:CARG1, SBUF:CRET1
2046  |  b ->fff_resstr
2047  |.  ld BASE, L->base
2048  |.endmacro
2049  |
2050  |ffstring_op reverse
2051  |ffstring_op lower
2052  |ffstring_op upper
2053  |
2054  |//-- Bit library --------------------------------------------------------
2055  |
2056  |->vm_tobit_fb:
2057  |  beqz TMP1, ->fff_fallback
2058  |.if FPU
2059  |.  ldc1 FARG1, 0(BASE)
2060  |  add.d FARG1, FARG1, TOBIT
2061  |  mfc1 CRET1, FARG1
2062  |  jr ra
2063  |.  zextw CRET1, CRET1
2064  |.else
2065  |// FP number to bit conversion for soft-float.
2066  |->vm_tobit:
2067  |  dsll TMP0, CARG1, 1
2068  |  li CARG3, 1076
2069  |  dsrl AT, TMP0, 53
2070  |  dsubu CARG3, CARG3, AT
2071  |  sltiu AT, CARG3, 54
2072  |  beqz AT, >1
2073  |.  dextm TMP0, TMP0, 0, 20
2074  |  dinsu TMP0, AT, 21, 21
2075  |  slt AT, CARG1, r0
2076  |  dsrlv CRET1, TMP0, CARG3
2077  |  dsubu TMP0, r0, CRET1
2078  |.if MIPSR6
2079  |  selnez TMP0, TMP0, AT
2080  |  seleqz CRET1, CRET1, AT
2081  |  or CRET1, CRET1, TMP0
2082  |.else
2083  |  movn CRET1, TMP0, AT
2084  |.endif
2085  |  jr ra
2086  |.  zextw CRET1, CRET1
2087  |1:
2088  |  jr ra
2089  |.  move CRET1, r0
2090  |
2091  |// FP number to int conversion with a check for soft-float.
2092  |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
2093  |->vm_tointg:
2094  |.if JIT
2095  |  dsll CRET2, CARG1, 1
2096  |  beqz CRET2, >2
2097  |.  li TMP0, 1076
2098  |  dsrl AT, CRET2, 53
2099  |  dsubu TMP0, TMP0, AT
2100  |  sltiu AT, TMP0, 54
2101  |  beqz AT, >1
2102  |.  dextm CRET2, CRET2, 0, 20
2103  |  dinsu CRET2, AT, 21, 21
2104  |  slt AT, CARG1, r0
2105  |  dsrlv CRET1, CRET2, TMP0
2106  |  dsubu CARG1, r0, CRET1
2107  |.if MIPSR6
2108  |  seleqz CRET1, CRET1, AT
2109  |  selnez CARG1, CARG1, AT
2110  |  or CRET1, CRET1, CARG1
2111  |.else
2112  |  movn CRET1, CARG1, AT
2113  |.endif
2114  |  li CARG1, 64
2115  |  subu TMP0, CARG1, TMP0
2116  |  dsllv CRET2, CRET2, TMP0	// Integer check.
2117  |  sextw AT, CRET1
2118  |  xor AT, CRET1, AT		// Range check.
2119  |.if MIPSR6
2120  |  seleqz AT, AT, CRET2
2121  |  selnez CRET2, CRET2, CRET2
2122  |  jr ra
2123  |.  or CRET2, AT, CRET2
2124  |.else
2125  |  jr ra
2126  |.  movz CRET2, AT, CRET2
2127  |.endif
2128  |1:
2129  |  jr ra
2130  |.  li CRET2, 1
2131  |2:
2132  |  jr ra
2133  |.  move CRET1, r0
2134  |.endif
2135  |.endif
2136  |
2137  |.macro .ffunc_bit, name
2138  |  .ffunc_1 bit_..name
2139  |  gettp TMP0, CARG1
2140  |  beq TMP0, TISNUM, >6
2141  |.  zextw CRET1, CARG1
2142  |  bal ->vm_tobit_fb
2143  |.  sltiu TMP1, TMP0, LJ_TISNUM
2144  |6:
2145  |.endmacro
2146  |
2147  |.macro .ffunc_bit_op, name, bins
2148  |  .ffunc_bit name
2149  |  daddiu TMP2, BASE, 8
2150  |  daddu TMP3, BASE, NARGS8:RC
2151  |1:
2152  |  beq TMP2, TMP3, ->fff_resi
2153  |.  ld CARG1, 0(TMP2)
2154  |  gettp TMP0, CARG1
2155  |.if FPU
2156  |  bne TMP0, TISNUM, >2
2157  |.  daddiu TMP2, TMP2, 8
2158  |  zextw CARG1, CARG1
2159  |  b <1
2160  |.  bins CRET1, CRET1, CARG1
2161  |2:
2162  |   ldc1 FARG1, -8(TMP2)
2163  |  sltiu AT, TMP0, LJ_TISNUM
2164  |  beqz AT, ->fff_fallback
2165  |.  add.d FARG1, FARG1, TOBIT
2166  |  mfc1 CARG1, FARG1
2167  |  zextw CARG1, CARG1
2168  |  b <1
2169  |.  bins CRET1, CRET1, CARG1
2170  |.else
2171  |  beq TMP0, TISNUM, >2
2172  |.  move CRET2, CRET1
2173  |  bal ->vm_tobit_fb
2174  |.  sltiu TMP1, TMP0, LJ_TISNUM
2175  |  move CARG1, CRET2
2176  |2:
2177  |  zextw CARG1, CARG1
2178  |  bins CRET1, CRET1, CARG1
2179  |  b <1
2180  |.  daddiu TMP2, TMP2, 8
2181  |.endif
2182  |.endmacro
2183  |
2184  |.ffunc_bit_op band, and
2185  |.ffunc_bit_op bor, or
2186  |.ffunc_bit_op bxor, xor
2187  |
2188  |.ffunc_bit bswap
2189  |  dsrl TMP0, CRET1, 8
2190  |   dsrl TMP1, CRET1, 24
2191  |  andi TMP2, TMP0, 0xff00
2192  |   dins TMP1, CRET1, 24, 31
2193  |  dins TMP2, TMP0, 16, 23
2194  |  b ->fff_resi
2195  |.  or CRET1, TMP1, TMP2
2196  |
2197  |.ffunc_bit bnot
2198  |  not CRET1, CRET1
2199  |  b ->fff_resi
2200  |.  zextw CRET1, CRET1
2201  |
2202  |.macro .ffunc_bit_sh, name, shins, shmod
2203  |  .ffunc_2 bit_..name
2204  |  gettp TMP0, CARG1
2205  |  beq TMP0, TISNUM, >1
2206  |.  nop
2207  |  bal ->vm_tobit_fb
2208  |.  sltiu TMP1, TMP0, LJ_TISNUM
2209  |  move CARG1, CRET1
2210  |1:
2211  |  gettp TMP0, CARG2
2212  |  bne TMP0, TISNUM, ->fff_fallback
2213  |.  zextw CARG2, CARG2
2214  |  sextw CARG1, CARG1
2215  |.if shmod == 1
2216  |  negu CARG2, CARG2
2217  |.endif
2218  |  shins CRET1, CARG1, CARG2
2219  |  b ->fff_resi
2220  |.  zextw CRET1, CRET1
2221  |.endmacro
2222  |
2223  |.ffunc_bit_sh lshift, sllv, 0
2224  |.ffunc_bit_sh rshift, srlv, 0
2225  |.ffunc_bit_sh arshift, srav, 0
2226  |.ffunc_bit_sh rol, rotrv, 1
2227  |.ffunc_bit_sh ror, rotrv, 0
2228  |
2229  |.ffunc_bit tobit
2230  |->fff_resi:
2231  |  ld PC, FRAME_PC(BASE)
2232  |  daddiu RA, BASE, -16
2233  |  settp CRET1, TISNUM
2234  |  b ->fff_res1
2235  |.  sd CRET1, -16(BASE)
2236  |
2237  |//-----------------------------------------------------------------------
2238  |->fff_fallback:			// Call fast function fallback handler.
2239  |  // BASE = new base, RB = CFUNC, RC = nargs*8
2240  |  ld TMP3, CFUNC:RB->f
2241  |    daddu TMP1, BASE, NARGS8:RC
2242  |   ld PC, FRAME_PC(BASE)		// Fallback may overwrite PC.
2243  |    daddiu TMP0, TMP1, 8*LUA_MINSTACK
2244  |     ld TMP2, L->maxstack
2245  |   sd PC, SAVE_PC			// Redundant (but a defined value).
2246  |  sltu AT, TMP2, TMP0
2247  |     sd BASE, L->base
2248  |    sd TMP1, L->top
2249  |  bnez AT, >5			// Need to grow stack.
2250  |.  move CFUNCADDR, TMP3
2251  |  jalr TMP3				// (lua_State *L)
2252  |.  move CARG1, L
2253  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2254  |  ld BASE, L->base
2255  |   sll RD, CRET1, 3
2256  |  bgtz CRET1, ->fff_res		// Returned nresults+1?
2257  |.  daddiu RA, BASE, -16
2258  |1:  // Returned 0 or -1: retry fast path.
2259  |   ld LFUNC:RB, FRAME_FUNC(BASE)
2260  |  ld TMP0, L->top
2261  |   cleartp LFUNC:RB
2262  |  bnez CRET1, ->vm_call_tail		// Returned -1?
2263  |.  dsubu NARGS8:RC, TMP0, BASE
2264  |  ins_callt				// Returned 0: retry fast path.
2265  |
2266  |// Reconstruct previous base for vmeta_call during tailcall.
2267  |->vm_call_tail:
2268  |  andi TMP0, PC, FRAME_TYPE
2269  |   li AT, -4
2270  |  bnez TMP0, >3
2271  |.  and TMP1, PC, AT
2272  |  lbu TMP1, OFS_RA(PC)
2273  |  sll TMP1, TMP1, 3
2274  |  addiu TMP1, TMP1, 16
2275  |3:
2276  |  b ->vm_call_dispatch		// Resolve again for tailcall.
2277  |.  dsubu TMP2, BASE, TMP1
2278  |
2279  |5:  // Grow stack for fallback handler.
2280  |  load_got lj_state_growstack
2281  |  li CARG2, LUA_MINSTACK
2282  |  call_intern lj_state_growstack	// (lua_State *L, int n)
2283  |.  move CARG1, L
2284  |  ld BASE, L->base
2285  |  b <1
2286  |.  li CRET1, 0			// Force retry.
2287  |
2288  |->fff_gcstep:			// Call GC step function.
2289  |  // BASE = new base, RC = nargs*8
2290  |  move MULTRES, ra
2291  |  load_got lj_gc_step
2292  |   sd BASE, L->base
2293  |  daddu TMP0, BASE, NARGS8:RC
2294  |   sd PC, SAVE_PC			// Redundant (but a defined value).
2295  |  sd TMP0, L->top
2296  |  call_intern lj_gc_step		// (lua_State *L)
2297  |.  move CARG1, L
2298  |   ld BASE, L->base
2299  |  move ra, MULTRES
2300  |    ld TMP0, L->top
2301  |  ld CFUNC:RB, FRAME_FUNC(BASE)
2302  |  cleartp CFUNC:RB
2303  |  jr ra
2304  |.  dsubu NARGS8:RC, TMP0, BASE
2305  |
2306  |//-----------------------------------------------------------------------
2307  |//-- Special dispatch targets -------------------------------------------
2308  |//-----------------------------------------------------------------------
2309  |
2310  |->vm_record:				// Dispatch target for recording phase.
2311  |.if JIT
2312  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2313  |  andi AT, TMP3, HOOK_VMEVENT	// No recording while in vmevent.
2314  |  bnez AT, >5
2315  |  // Decrement the hookcount for consistency, but always do the call.
2316  |.  lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2317  |  andi AT, TMP3, HOOK_ACTIVE
2318  |  bnez AT, >1
2319  |.  addiu TMP2, TMP2, -1
2320  |  andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2321  |  beqz AT, >1
2322  |.  nop
2323  |  b >1
2324  |.  sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2325  |.endif
2326  |
2327  |->vm_rethook:			// Dispatch target for return hooks.
2328  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2329  |  andi AT, TMP3, HOOK_ACTIVE		// Hook already active?
2330  |  beqz AT, >1
2331  |5:  // Re-dispatch to static ins.
2332  |.  ld AT, GG_DISP2STATIC(TMP0)	// Assumes TMP0 holds DISPATCH+OP*4.
2333  |  jr AT
2334  |.  nop
2335  |
2336  |->vm_inshook:			// Dispatch target for instr/line hooks.
2337  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2338  |  lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2339  |  andi AT, TMP3, HOOK_ACTIVE		// Hook already active?
2340  |  bnez AT, <5
2341  |.  andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2342  |  beqz AT, <5
2343  |.  addiu TMP2, TMP2, -1
2344  |  beqz TMP2, >1
2345  |.  sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2346  |  andi AT, TMP3, LUA_MASKLINE
2347  |  beqz AT, <5
2348  |1:
2349  |.  load_got lj_dispatch_ins
2350  |   sw MULTRES, SAVE_MULTRES
2351  |  move CARG2, PC
2352  |   sd BASE, L->base
2353  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2354  |  call_intern lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
2355  |.  move CARG1, L
2356  |3:
2357  |  ld BASE, L->base
2358  |4:  // Re-dispatch to static ins.
2359  |  lw INS, -4(PC)
2360  |  decode_OP8a TMP1, INS
2361  |  decode_OP8b TMP1
2362  |  daddu TMP0, DISPATCH, TMP1
2363  |   decode_RD8a RD, INS
2364  |  ld AT, GG_DISP2STATIC(TMP0)
2365  |   decode_RA8a RA, INS
2366  |   decode_RD8b RD
2367  |  jr AT
2368  |   decode_RA8b RA
2369  |
2370  |->cont_hook:				// Continue from hook yield.
2371  |  daddiu PC, PC, 4
2372  |  b <4
2373  |.  lw MULTRES, -24+LO(RB)		// Restore MULTRES for *M ins.
2374  |
2375  |->vm_hotloop:			// Hot loop counter underflow.
2376  |.if JIT
2377  |  ld LFUNC:TMP1, FRAME_FUNC(BASE)
2378  |   daddiu CARG1, DISPATCH, GG_DISP2J
2379  |  cleartp LFUNC:TMP1
2380  |   sd PC, SAVE_PC
2381  |  ld TMP1, LFUNC:TMP1->pc
2382  |   move CARG2, PC
2383  |   sd L, DISPATCH_J(L)(DISPATCH)
2384  |  lbu TMP1, PC2PROTO(framesize)(TMP1)
2385  |  load_got lj_trace_hot
2386  |   sd BASE, L->base
2387  |  dsll TMP1, TMP1, 3
2388  |  daddu TMP1, BASE, TMP1
2389  |  call_intern lj_trace_hot		// (jit_State *J, const BCIns *pc)
2390  |.  sd TMP1, L->top
2391  |  b <3
2392  |.  nop
2393  |.endif
2394  |
2395  |
2396  |->vm_callhook:			// Dispatch target for call hooks.
2397  |.if JIT
2398  |  b >1
2399  |.endif
2400  |.  move CARG2, PC
2401  |
2402  |->vm_hotcall:			// Hot call counter underflow.
2403  |.if JIT
2404  |  ori CARG2, PC, 1
2405  |1:
2406  |.endif
2407  |  load_got lj_dispatch_call
2408  |  daddu TMP0, BASE, RC
2409  |   sd PC, SAVE_PC
2410  |   sd BASE, L->base
2411  |  dsubu RA, RA, BASE
2412  |   sd TMP0, L->top
2413  |  call_intern lj_dispatch_call	// (lua_State *L, const BCIns *pc)
2414  |.  move CARG1, L
2415  |  // Returns ASMFunction.
2416  |  ld BASE, L->base
2417  |   ld TMP0, L->top
2418  |   sd r0, SAVE_PC			// Invalidate for subsequent line hook.
2419  |  dsubu NARGS8:RC, TMP0, BASE
2420  |  daddu RA, BASE, RA
2421  |  ld LFUNC:RB, FRAME_FUNC(BASE)
2422  |  cleartp LFUNC:RB
2423  |  jr CRET1
2424  |.  lw INS, -4(PC)
2425  |
2426  |->cont_stitch:			// Trace stitching.
2427  |.if JIT
2428  |  // RA = resultptr, RB = meta base
2429  |  lw INS, -4(PC)
2430  |    ld TRACE:TMP2, -40(RB)		// Save previous trace.
2431  |  decode_RA8a RC, INS
2432  |   daddiu AT, MULTRES, -8
2433  |    cleartp TRACE:TMP2
2434  |  decode_RA8b RC
2435  |   beqz AT, >2
2436  |. daddu RC, BASE, RC			// Call base.
2437  |1:  // Move results down.
2438  |  ld CARG1, 0(RA)
2439  |   daddiu AT, AT, -8
2440  |    daddiu RA, RA, 8
2441  |  sd CARG1, 0(RC)
2442  |   bnez AT, <1
2443  |.   daddiu RC, RC, 8
2444  |2:
2445  |   decode_RA8a RA, INS
2446  |    decode_RB8a RB, INS
2447  |   decode_RA8b RA
2448  |    decode_RB8b RB
2449  |   daddu RA, RA, RB
2450  |   daddu RA, BASE, RA
2451  |3:
2452  |   sltu AT, RC, RA
2453  |   bnez AT, >9			// More results wanted?
2454  |.   nop
2455  |
2456  |  lhu TMP3, TRACE:TMP2->traceno
2457  |  lhu RD, TRACE:TMP2->link
2458  |  beq RD, TMP3, ->cont_nop		// Blacklisted.
2459  |.  load_got lj_dispatch_stitch
2460  |  bnez RD, =>BC_JLOOP		// Jump to stitched trace.
2461  |.  sll RD, RD, 3
2462  |
2463  |  // Stitch a new trace to the previous trace.
2464  |  sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2465  |  sd L, DISPATCH_J(L)(DISPATCH)
2466  |  sd BASE, L->base
2467  |  daddiu CARG1, DISPATCH, GG_DISP2J
2468  |  call_intern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
2469  |.  move CARG2, PC
2470  |  b ->cont_nop
2471  |.  ld BASE, L->base
2472  |
2473  |9:
2474  |  sd TISNIL, 0(RC)
2475  |  b <3
2476  |.  daddiu RC, RC, 8
2477  |.endif
2478  |
2479  |->vm_profhook:			// Dispatch target for profiler hook.
2480#if LJ_HASPROFILE
2481  |  load_got lj_dispatch_profile
2482  |   sw MULTRES, SAVE_MULTRES
2483  |  move CARG2, PC
2484  |   sd BASE, L->base
2485  |  call_intern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
2486  |.  move CARG1, L
2487  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2488  |  daddiu PC, PC, -4
2489  |  b ->cont_nop
2490  |.  ld BASE, L->base
2491#endif
2492  |
2493  |//-----------------------------------------------------------------------
2494  |//-- Trace exit handler -------------------------------------------------
2495  |//-----------------------------------------------------------------------
2496  |
2497  |.macro savex_, a, b
2498  |.if FPU
2499  |  sdc1 f..a, a*8(sp)
2500  |  sdc1 f..b, b*8(sp)
2501  |  sd r..a, 32*8+a*8(sp)
2502  |  sd r..b, 32*8+b*8(sp)
2503  |.else
2504  |  sd r..a, a*8(sp)
2505  |  sd r..b, b*8(sp)
2506  |.endif
2507  |.endmacro
2508  |
2509  |->vm_exit_handler:
2510  |.if JIT
2511  |.if FPU
2512  |  daddiu sp, sp, -(32*8+32*8)
2513  |.else
2514  |  daddiu sp, sp, -(32*8)
2515  |.endif
2516  |  savex_ 0, 1
2517  |  savex_ 2, 3
2518  |  savex_ 4, 5
2519  |  savex_ 6, 7
2520  |  savex_ 8, 9
2521  |  savex_ 10, 11
2522  |  savex_ 12, 13
2523  |  savex_ 14, 15
2524  |  savex_ 16, 17
2525  |  savex_ 18, 19
2526  |  savex_ 20, 21
2527  |  savex_ 22, 23
2528  |  savex_ 24, 25
2529  |  savex_ 26, 27
2530  |  savex_ 28, 30
2531  |.if FPU
2532  |  sdc1 f29, 29*8(sp)
2533  |  sdc1 f31, 31*8(sp)
2534  |  sd r0, 32*8+31*8(sp)		// Clear RID_TMP.
2535  |  daddiu TMP2, sp, 32*8+32*8		// Recompute original value of sp.
2536  |  sd TMP2, 32*8+29*8(sp)		// Store sp in RID_SP
2537  |.else
2538  |  sd r0, 31*8(sp)			// Clear RID_TMP.
2539  |  daddiu TMP2, sp, 32*8		// Recompute original value of sp.
2540  |  sd TMP2, 29*8(sp)			// Store sp in RID_SP
2541  |.endif
2542  |  li_vmstate EXIT
2543  |  daddiu DISPATCH, JGL, -GG_DISP2G-32768
2544  |  lw TMP1, 0(TMP2)			// Load exit number.
2545  |  st_vmstate
2546  |  ld L, DISPATCH_GL(cur_L)(DISPATCH)
2547  |   ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
2548  |  load_got lj_trace_exit
2549  |  sd L, DISPATCH_J(L)(DISPATCH)
2550  |  sw ra, DISPATCH_J(parent)(DISPATCH)  // Store trace number.
2551  |   sd BASE, L->base
2552  |  sw TMP1, DISPATCH_J(exitno)(DISPATCH)  // Store exit number.
2553  |  daddiu CARG1, DISPATCH, GG_DISP2J
2554  |   sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2555  |  call_intern lj_trace_exit		// (jit_State *J, ExitState *ex)
2556  |.  move CARG2, sp
2557  |  // Returns MULTRES (unscaled) or negated error code.
2558  |  ld TMP1, L->cframe
2559  |  li AT, -4
2560  |   ld BASE, L->base
2561  |  and sp, TMP1, AT
2562  |   ld PC, SAVE_PC			// Get SAVE_PC.
2563  |  b >1
2564  |.  sd L, SAVE_L			// Set SAVE_L (on-trace resume/yield).
2565  |.endif
2566  |->vm_exit_interp:
2567  |.if JIT
2568  |  // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2569  |  ld L, SAVE_L
2570  |   daddiu DISPATCH, JGL, -GG_DISP2G-32768
2571  |  sd BASE, L->base
2572  |1:
2573  |  bltz CRET1, >9			// Check for error from exit.
2574  |.  ld LFUNC:RB, FRAME_FUNC(BASE)
2575  |    .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
2576  |  dsll MULTRES, CRET1, 3
2577  |  cleartp LFUNC:RB
2578  |  sw MULTRES, SAVE_MULTRES
2579  |    li TISNIL, LJ_TNIL
2580  |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
2581  |    .FPU mtc1 TMP3, TOBIT
2582  |  ld TMP1, LFUNC:RB->pc
2583  |   sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2584  |  ld KBASE, PC2PROTO(k)(TMP1)
2585  |    .FPU cvt.d.s TOBIT, TOBIT
2586  |  // Modified copy of ins_next which handles function header dispatch, too.
2587  |  lw INS, 0(PC)
2588  |   daddiu PC, PC, 4
2589  |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2590  |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2591  |  decode_OP8a TMP1, INS
2592  |  decode_OP8b TMP1
2593  |    sltiu TMP2, TMP1, BC_FUNCF*8
2594  |  daddu TMP0, DISPATCH, TMP1
2595  |   decode_RD8a RD, INS
2596  |  ld AT, 0(TMP0)
2597  |   decode_RA8a RA, INS
2598  |    beqz TMP2, >2
2599  |.  decode_RA8b RA
2600  |  jr AT
2601  |.  decode_RD8b RD
2602  |2:
2603  |  sltiu TMP2, TMP1, (BC_FUNCC+2)*8	// Fast function?
2604  |  bnez TMP2, >3
2605  |.  ld TMP1, FRAME_PC(BASE)
2606  |  // Check frame below fast function.
2607  |  andi TMP0, TMP1, FRAME_TYPE
2608  |  bnez TMP0, >3			// Trace stitching continuation?
2609  |.  nop
2610  |  // Otherwise set KBASE for Lua function below fast function.
2611  |  lw TMP2, -4(TMP1)
2612  |  decode_RA8a TMP0, TMP2
2613  |  decode_RA8b TMP0
2614  |  dsubu TMP1, BASE, TMP0
2615  |  ld LFUNC:TMP2, -32(TMP1)
2616  |  cleartp LFUNC:TMP2
2617  |  ld TMP1, LFUNC:TMP2->pc
2618  |  ld KBASE, PC2PROTO(k)(TMP1)
2619  |3:
2620  |  daddiu RC, MULTRES, -8
2621  |  jr AT
2622  |.  daddu RA, RA, BASE
2623  |
2624  |9:  // Rethrow error from the right C frame.
2625  |  load_got lj_err_run
2626  |  call_intern lj_err_run		// (lua_State *L)
2627  |.  move CARG1, L
2628  |.endif
2629  |
2630  |//-----------------------------------------------------------------------
2631  |//-- Math helper functions ----------------------------------------------
2632  |//-----------------------------------------------------------------------
2633  |
2634  |// Hard-float round to integer.
2635  |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2636  |// MIPSR6: Modifies FTMP1, too.
2637  |.macro vm_round_hf, func
2638  |  lui TMP0, 0x4330			// Hiword of 2^52 (double).
2639  |  dsll TMP0, TMP0, 32
2640  |  dmtc1 TMP0, f4
2641  |  abs.d FRET2, FARG1			// |x|
2642  |    dmfc1 AT, FARG1
2643  |.if MIPSR6
2644  |  cmp.lt.d FTMP1, FRET2, f4
2645  |   add.d FRET1, FRET2, f4		// (|x| + 2^52) - 2^52
2646  |  bc1eqz FTMP1, >1			// Truncate only if |x| < 2^52.
2647  |.else
2648  |  c.olt.d 0, FRET2, f4
2649  |   add.d FRET1, FRET2, f4		// (|x| + 2^52) - 2^52
2650  |  bc1f 0, >1				// Truncate only if |x| < 2^52.
2651  |.endif
2652  |.  sub.d FRET1, FRET1, f4
2653  |    slt AT, AT, r0
2654  |.if "func" == "ceil"
2655  |   lui TMP0, 0xbff0			// Hiword of -1 (double). Preserves -0.
2656  |.else
2657  |   lui TMP0, 0x3ff0			// Hiword of +1 (double).
2658  |.endif
2659  |.if "func" == "trunc"
2660  |   dsll TMP0, TMP0, 32
2661  |   dmtc1 TMP0, f4
2662  |.if MIPSR6
2663  |  cmp.lt.d FTMP1, FRET2, FRET1	// |x| < result?
2664  |   sub.d FRET2, FRET1, f4
2665  |  sel.d  FTMP1, FRET1, FRET2		// If yes, subtract +1.
2666  |  dmtc1 AT, FRET1
2667  |  neg.d FRET2, FTMP1
2668  |  jr ra
2669  |.  sel.d FRET1, FTMP1, FRET2		// Merge sign bit back in.
2670  |.else
2671  |  c.olt.d 0, FRET2, FRET1		// |x| < result?
2672  |   sub.d FRET2, FRET1, f4
2673  |  movt.d FRET1, FRET2, 0		// If yes, subtract +1.
2674  |  neg.d FRET2, FRET1
2675  |  jr ra
2676  |.  movn.d FRET1, FRET2, AT		// Merge sign bit back in.
2677  |.endif
2678  |.else
2679  |  neg.d FRET2, FRET1
2680  |   dsll TMP0, TMP0, 32
2681  |   dmtc1 TMP0, f4
2682  |.if MIPSR6
2683  |  dmtc1 AT, FTMP1
2684  |  sel.d FTMP1, FRET1, FRET2
2685  |.if "func" == "ceil"
2686  |  cmp.lt.d FRET1, FTMP1, FARG1	// x > result?
2687  |.else
2688  |  cmp.lt.d FRET1, FARG1, FTMP1	// x < result?
2689  |.endif
2690  |   sub.d FRET2, FTMP1, f4		// If yes, subtract +-1.
2691  |  jr ra
2692  |.  sel.d FRET1, FTMP1, FRET2
2693  |.else
2694  |  movn.d FRET1, FRET2, AT		// Merge sign bit back in.
2695  |.if "func" == "ceil"
2696  |  c.olt.d 0, FRET1, FARG1		// x > result?
2697  |.else
2698  |  c.olt.d 0, FARG1, FRET1		// x < result?
2699  |.endif
2700  |   sub.d FRET2, FRET1, f4		// If yes, subtract +-1.
2701  |  jr ra
2702  |.  movt.d FRET1, FRET2, 0
2703  |.endif
2704  |.endif
2705  |1:
2706  |  jr ra
2707  |.  mov.d FRET1, FARG1
2708  |.endmacro
2709  |
2710  |.macro vm_round, func
2711  |.if FPU
2712  |  vm_round_hf, func
2713  |.endif
2714  |.endmacro
2715  |
2716  |->vm_floor:
2717  |  vm_round floor
2718  |->vm_ceil:
2719  |  vm_round ceil
2720  |->vm_trunc:
2721  |.if JIT
2722  |  vm_round trunc
2723  |.endif
2724  |
2725  |// Soft-float integer to number conversion.
2726  |.macro sfi2d, ARG
2727  |.if not FPU
2728  |  beqz ARG, >9			// Handle zero first.
2729  |.  sra TMP0, ARG, 31
2730  |  xor TMP1, ARG, TMP0
2731  |  dsubu TMP1, TMP1, TMP0		// Absolute value in TMP1.
2732  |  dclz ARG, TMP1
2733  |  addiu ARG, ARG, -11
2734  |  li AT, 0x3ff+63-11-1
2735  |   dsllv TMP1, TMP1, ARG		// Align mantissa left with leading 1.
2736  |  subu ARG, AT, ARG			// Exponent - 1.
2737  |  ins ARG, TMP0, 11, 11		// Sign | Exponent.
2738  |  dsll ARG, ARG, 52			// Align left.
2739  |  jr ra
2740  |.  daddu ARG, ARG, TMP1		// Add mantissa, increment exponent.
2741  |9:
2742  |  jr ra
2743  |.  nop
2744  |.endif
2745  |.endmacro
2746  |
2747  |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
2748  |->vm_sfi2d_1:
2749  |  sfi2d CARG1
2750  |
2751  |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
2752  |->vm_sfi2d_2:
2753  |  sfi2d CARG2
2754  |
2755  |// Soft-float comparison. Equivalent to c.eq.d.
2756  |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2757  |->vm_sfcmpeq:
2758  |.if not FPU
2759  |  dsll AT, CARG1, 1
2760  |  dsll TMP0, CARG2, 1
2761  |  or TMP1, AT, TMP0
2762  |  beqz TMP1, >8			// Both args +-0: return 1.
2763  |.  lui TMP1, 0xffe0
2764  |  dsll TMP1, TMP1, 32
2765  |   sltu AT, TMP1, AT
2766  |   sltu TMP0, TMP1, TMP0
2767  |  or TMP1, AT, TMP0
2768  |  bnez TMP1, >9			// Either arg is NaN: return 0;
2769  |.  xor AT, CARG1, CARG2
2770  |  jr ra
2771  |.  sltiu CRET1, AT, 1		// Same values: return 1.
2772  |8:
2773  |  jr ra
2774  |.  li CRET1, 1
2775  |9:
2776  |  jr ra
2777  |.  li CRET1, 0
2778  |.endif
2779  |
2780  |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2781  |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2782  |->vm_sfcmpult:
2783  |.if not FPU
2784  |  b >1
2785  |.  li CRET2, 1
2786  |.endif
2787  |
2788  |->vm_sfcmpolt:
2789  |.if not FPU
2790  |  li CRET2, 0
2791  |1:
2792  |  dsll AT, CARG1, 1
2793  |  dsll TMP0, CARG2, 1
2794  |  or TMP1, AT, TMP0
2795  |  beqz TMP1, >8			// Both args +-0: return 0.
2796  |.  lui TMP1, 0xffe0
2797  |  dsll TMP1, TMP1, 32
2798  |   sltu AT, TMP1, AT
2799  |   sltu TMP0, TMP1, TMP0
2800  |  or TMP1, AT, TMP0
2801  |  bnez TMP1, >9			// Either arg is NaN: return 0 or 1;
2802  |.  and AT, CARG1, CARG2
2803  |  bltz AT, >5			// Both args negative?
2804  |.  nop
2805  |  jr ra
2806  |.  slt CRET1, CARG1, CARG2
2807  |5:  // Swap conditions if both operands are negative.
2808  |  jr ra
2809  |.  slt CRET1, CARG2, CARG1
2810  |8:
2811  |  jr ra
2812  |.  li CRET1, 0
2813  |9:
2814  |  jr ra
2815  |.  move CRET1, CRET2
2816  |.endif
2817  |
2818  |->vm_sfcmpogt:
2819  |.if not FPU
2820  |  dsll AT, CARG2, 1
2821  |  dsll TMP0, CARG1, 1
2822  |  or TMP1, AT, TMP0
2823  |  beqz TMP1, >8			// Both args +-0: return 0.
2824  |.  lui TMP1, 0xffe0
2825  |  dsll TMP1, TMP1, 32
2826  |   sltu AT, TMP1, AT
2827  |   sltu TMP0, TMP1, TMP0
2828  |  or TMP1, AT, TMP0
2829  |  bnez TMP1, >9			// Either arg is NaN: return 0 or 1;
2830  |.  and AT, CARG2, CARG1
2831  |  bltz AT, >5			// Both args negative?
2832  |.  nop
2833  |  jr ra
2834  |.  slt CRET1, CARG2, CARG1
2835  |5:  // Swap conditions if both operands are negative.
2836  |  jr ra
2837  |.  slt CRET1, CARG1, CARG2
2838  |8:
2839  |  jr ra
2840  |.  li CRET1, 0
2841  |9:
2842  |  jr ra
2843  |.  li CRET1, 0
2844  |.endif
2845  |
2846  |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2847  |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2848  |->vm_sfcmpolex:
2849  |.if not FPU
2850  |  dsll AT, CARG1, 1
2851  |  dsll TMP0, CARG2, 1
2852  |  or TMP1, AT, TMP0
2853  |  beqz TMP1, >8			// Both args +-0: return 1.
2854  |.  lui TMP1, 0xffe0
2855  |  dsll TMP1, TMP1, 32
2856  |   sltu AT, TMP1, AT
2857  |   sltu TMP0, TMP1, TMP0
2858  |  or TMP1, AT, TMP0
2859  |  bnez TMP1, >9			// Either arg is NaN: return 0;
2860  |.  and AT, CARG1, CARG2
2861  |  xor AT, AT, TMP3
2862  |  bltz AT, >5			// Both args negative?
2863  |.  nop
2864  |  jr ra
2865  |.  slt CRET1, CARG2, CARG1
2866  |5:  // Swap conditions if both operands are negative.
2867  |  jr ra
2868  |.  slt CRET1, CARG1, CARG2
2869  |8:
2870  |  jr ra
2871  |.  li CRET1, 1
2872  |9:
2873  |  jr ra
2874  |.  li CRET1, 0
2875  |.endif
2876  |
2877  |.macro sfmin_max, name, fpcall
2878  |->vm_sf .. name:
2879  |.if JIT and not FPU
2880  |  move TMP2, ra
2881  |  bal ->fpcall
2882  |.  nop
2883  |  move ra, TMP2
2884  |  move TMP0, CRET1
2885  |  move CRET1, CARG1
2886  |.if MIPSR6
2887  |  selnez CRET1, CRET1, TMP0
2888  |  seleqz TMP0, CARG2, TMP0
2889  |  jr ra
2890  |.  or CRET1, CRET1, TMP0
2891  |.else
2892  |  jr ra
2893  |.  movz CRET1, CARG2, TMP0
2894  |.endif
2895  |.endif
2896  |.endmacro
2897  |
2898  |  sfmin_max min, vm_sfcmpolt
2899  |  sfmin_max max, vm_sfcmpogt
2900  |
2901  |//-----------------------------------------------------------------------
2902  |//-- Miscellaneous functions --------------------------------------------
2903  |//-----------------------------------------------------------------------
2904  |
2905  |//-----------------------------------------------------------------------
2906  |//-- FFI helper functions -----------------------------------------------
2907  |//-----------------------------------------------------------------------
2908  |
2909  |// Handler for callback functions. Callback slot number in r1, g in r2.
2910  |->vm_ffi_callback:
2911  |.if FFI
2912  |.type CTSTATE, CTState, PC
2913  |  saveregs
2914  |  ld CTSTATE, GL:r2->ctype_state
2915  |   daddiu DISPATCH, r2, GG_G2DISP
2916  |  load_got lj_ccallback_enter
2917  |  sw r1, CTSTATE->cb.slot
2918  |  sd CARG1, CTSTATE->cb.gpr[0]
2919  |  .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2920  |  sd CARG2, CTSTATE->cb.gpr[1]
2921  |  .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2922  |  sd CARG3, CTSTATE->cb.gpr[2]
2923  |  .FPU sdc1 FARG3, CTSTATE->cb.fpr[2]
2924  |  sd CARG4, CTSTATE->cb.gpr[3]
2925  |  .FPU sdc1 FARG4, CTSTATE->cb.fpr[3]
2926  |  sd CARG5, CTSTATE->cb.gpr[4]
2927  |  .FPU sdc1 FARG5, CTSTATE->cb.fpr[4]
2928  |  sd CARG6, CTSTATE->cb.gpr[5]
2929  |  .FPU sdc1 FARG6, CTSTATE->cb.fpr[5]
2930  |  sd CARG7, CTSTATE->cb.gpr[6]
2931  |  .FPU sdc1 FARG7, CTSTATE->cb.fpr[6]
2932  |  sd CARG8, CTSTATE->cb.gpr[7]
2933  |  .FPU sdc1 FARG8, CTSTATE->cb.fpr[7]
2934  |  daddiu TMP0, sp, CFRAME_SPACE
2935  |  sd TMP0, CTSTATE->cb.stack
2936  |  sd r0, SAVE_PC			// Any value outside of bytecode is ok.
2937  |   move CARG2, sp
2938  |  call_intern lj_ccallback_enter	// (CTState *cts, void *cf)
2939  |.  move CARG1, CTSTATE
2940  |  // Returns lua_State *.
2941  |  ld BASE, L:CRET1->base
2942  |  ld RC, L:CRET1->top
2943  |   move L, CRET1
2944  |     .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
2945  |  ld LFUNC:RB, FRAME_FUNC(BASE)
2946  |     .FPU mtc1 TMP3, TOBIT
2947  |      li TISNIL, LJ_TNIL
2948  |       li TISNUM, LJ_TISNUM
2949  |    li_vmstate INTERP
2950  |  subu RC, RC, BASE
2951  |   cleartp LFUNC:RB
2952  |    st_vmstate
2953  |     .FPU cvt.d.s TOBIT, TOBIT
2954  |  ins_callt
2955  |.endif
2956  |
2957  |->cont_ffi_callback:			// Return from FFI callback.
2958  |.if FFI
2959  |  load_got lj_ccallback_leave
2960  |  ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
2961  |   sd BASE, L->base
2962  |   sd RB, L->top
2963  |  sd L, CTSTATE->L
2964  |  move CARG2, RA
2965  |  call_intern lj_ccallback_leave	// (CTState *cts, TValue *o)
2966  |.  move CARG1, CTSTATE
2967  |  .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2968  |  ld CRET1, CTSTATE->cb.gpr[0]
2969  |  .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2970  |  b ->vm_leave_unw
2971  |.  ld CRET2, CTSTATE->cb.gpr[1]
2972  |.endif
2973  |
2974  |->vm_ffi_call:			// Call C function via FFI.
2975  |  // Caveat: needs special frame unwinding, see below.
2976  |.if FFI
2977  |  .type CCSTATE, CCallState, CARG1
2978  |  lw TMP1, CCSTATE->spadj
2979  |   lbu CARG2, CCSTATE->nsp
2980  |  move TMP2, sp
2981  |  dsubu sp, sp, TMP1
2982  |  sd ra, -8(TMP2)
2983  |   sll CARG2, CARG2, 3
2984  |  sd r16, -16(TMP2)
2985  |  sd CCSTATE, -24(TMP2)
2986  |  move r16, TMP2
2987  |  daddiu TMP1, CCSTATE, offsetof(CCallState, stack)
2988  |  move TMP2, sp
2989  |  beqz CARG2, >2
2990  |.  daddu TMP3, TMP1, CARG2
2991  |1:
2992  |   ld TMP0, 0(TMP1)
2993  |  daddiu TMP1, TMP1, 8
2994  |  sltu AT, TMP1, TMP3
2995  |   sd TMP0, 0(TMP2)
2996  |  bnez AT, <1
2997  |.  daddiu TMP2, TMP2, 8
2998  |2:
2999  |  ld CFUNCADDR, CCSTATE->func
3000  |  .FPU ldc1 FARG1, CCSTATE->gpr[0]
3001  |  ld CARG2, CCSTATE->gpr[1]
3002  |  .FPU ldc1 FARG2, CCSTATE->gpr[1]
3003  |  ld CARG3, CCSTATE->gpr[2]
3004  |  .FPU ldc1 FARG3, CCSTATE->gpr[2]
3005  |  ld CARG4, CCSTATE->gpr[3]
3006  |  .FPU ldc1 FARG4, CCSTATE->gpr[3]
3007  |  ld CARG5, CCSTATE->gpr[4]
3008  |  .FPU ldc1 FARG5, CCSTATE->gpr[4]
3009  |  ld CARG6, CCSTATE->gpr[5]
3010  |  .FPU ldc1 FARG6, CCSTATE->gpr[5]
3011  |  ld CARG7, CCSTATE->gpr[6]
3012  |  .FPU ldc1 FARG7, CCSTATE->gpr[6]
3013  |  ld CARG8, CCSTATE->gpr[7]
3014  |  .FPU ldc1 FARG8, CCSTATE->gpr[7]
3015  |  jalr CFUNCADDR
3016  |.  ld CARG1, CCSTATE->gpr[0]		// Do this last, since CCSTATE is CARG1.
3017  |  ld CCSTATE:TMP1, -24(r16)
3018  |  ld TMP2, -16(r16)
3019  |  ld ra, -8(r16)
3020  |  sd CRET1, CCSTATE:TMP1->gpr[0]
3021  |  sd CRET2, CCSTATE:TMP1->gpr[1]
3022  |.if FPU
3023  |  sdc1 FRET1, CCSTATE:TMP1->fpr[0]
3024  |  sdc1 FRET2, CCSTATE:TMP1->fpr[1]
3025  |.else
3026  |  sd CARG1, CCSTATE:TMP1->gpr[2]	// 2nd FP struct field for soft-float.
3027  |.endif
3028  |  move sp, r16
3029  |  jr ra
3030  |.  move r16, TMP2
3031  |.endif
3032  |// Note: vm_ffi_call must be the last function in this object file!
3033  |
3034  |//-----------------------------------------------------------------------
3035}
3036
3037/* Generate the code for a single instruction. */
3038static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3039{
3040  int vk = 0;
3041  |=>defop:
3042
3043  switch (op) {
3044
3045  /* -- Comparison ops ---------------------------------------------------- */
3046
3047  /* Remember: all ops branch for a true comparison, fall through otherwise. */
3048
3049  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3050    |  // RA = src1*8, RD = src2*8, JMP with RD = target
3051    |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
3052    |  daddu RA, BASE, RA
3053    |   daddu RD, BASE, RD
3054    |  ld ARGRA, 0(RA)
3055    |   ld ARGRD, 0(RD)
3056    |    lhu TMP2, OFS_RD(PC)
3057    |  gettp CARG3, ARGRA
3058    |   gettp CARG4, ARGRD
3059    |  bne CARG3, TISNUM, >2
3060    |.   daddiu PC, PC, 4
3061    |  bne CARG4, TISNUM, >5
3062    |.   decode_RD4b TMP2
3063    |  sextw ARGRA, ARGRA
3064    |   sextw ARGRD, ARGRD
3065    |    lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3066    |  slt AT, CARG1, CARG2
3067    |    addu TMP2, TMP2, TMP3
3068    |.if MIPSR6
3069    |  movop TMP2, TMP2, AT
3070    |.else
3071    |  movop TMP2, r0, AT
3072    |.endif
3073    |1:
3074    |  daddu PC, PC, TMP2
3075    |  ins_next
3076    |
3077    |2:  // RA is not an integer.
3078    |  sltiu AT, CARG3, LJ_TISNUM
3079    |  beqz AT, ->vmeta_comp
3080    |.   lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3081    |  sltiu AT, CARG4, LJ_TISNUM
3082    |  beqz AT, >4
3083    |.   decode_RD4b TMP2
3084    |.if FPU
3085    |  ldc1 FRA, 0(RA)
3086    |   ldc1 FRD, 0(RD)
3087    |.endif
3088    |3:  // RA and RD are both numbers.
3089    |.if FPU
3090    |.if MIPSR6
3091    |  fcomp FTMP0, FTMP0, FTMP2
3092    |   addu TMP2, TMP2, TMP3
3093    |  mfc1 TMP3, FTMP0
3094    |  b <1
3095    |.  fmovop TMP2, TMP2, TMP3
3096    |.else
3097    |  fcomp FTMP0, FTMP2
3098    |   addu TMP2, TMP2, TMP3
3099    |  b <1
3100    |.  fmovop TMP2, r0
3101    |.endif
3102    |.else
3103    |  bal sfcomp
3104    |.   addu TMP2, TMP2, TMP3
3105    |  b <1
3106    |.if MIPSR6
3107    |.  movop TMP2, TMP2, CRET1
3108    |.else
3109    |.  movop TMP2, r0, CRET1
3110    |.endif
3111    |.endif
3112    |
3113    |4:  // RA is a number, RD is not a number.
3114    |  bne CARG4, TISNUM, ->vmeta_comp
3115    |  // RA is a number, RD is an integer. Convert RD to a number.
3116    |.if FPU
3117    |.  lwc1 FRD, LO(RD)
3118    |  ldc1 FRA, 0(RA)
3119    |  b <3
3120    |.  cvt.d.w FRD, FRD
3121    |.else
3122    |.if "ARGRD" == "CARG1"
3123    |.  sextw CARG1, CARG1
3124    |  bal ->vm_sfi2d_1
3125    |.  nop
3126    |.else
3127    |.  sextw CARG2, CARG2
3128    |  bal ->vm_sfi2d_2
3129    |.  nop
3130    |.endif
3131    |  b <3
3132    |.  nop
3133    |.endif
3134    |
3135    |5:  // RA is an integer, RD is not an integer
3136    |  sltiu AT, CARG4, LJ_TISNUM
3137    |  beqz AT, ->vmeta_comp
3138    |.  lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3139    |  // RA is an integer, RD is a number. Convert RA to a number.
3140    |.if FPU
3141    |   lwc1 FRA, LO(RA)
3142    |   ldc1 FRD, 0(RD)
3143    |  b <3
3144    |   cvt.d.w FRA, FRA
3145    |.else
3146    |.if "ARGRA" == "CARG1"
3147    |  bal ->vm_sfi2d_1
3148    |.  sextw CARG1, CARG1
3149    |.else
3150    |  bal ->vm_sfi2d_2
3151    |.  sextw CARG2, CARG2
3152    |.endif
3153    |  b <3
3154    |.  nop
3155    |.endif
3156    |.endmacro
3157    |
3158    |.if MIPSR6
3159    if (op == BC_ISLT) {
3160      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3161    } else if (op == BC_ISGE) {
3162      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3163    } else if (op == BC_ISLE) {
3164      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3165    } else {
3166      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3167    }
3168    |.else
3169    if (op == BC_ISLT) {
3170      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
3171    } else if (op == BC_ISGE) {
3172      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
3173    } else if (op == BC_ISLE) {
3174      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
3175    } else {
3176      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
3177    }
3178    |.endif
3179    break;
3180
3181  case BC_ISEQV: case BC_ISNEV:
3182    vk = op == BC_ISEQV;
3183    |  // RA = src1*8, RD = src2*8, JMP with RD = target
3184    |  daddu RA, BASE, RA
3185    |    daddiu PC, PC, 4
3186    |   daddu RD, BASE, RD
3187    |  ld CARG1, 0(RA)
3188    |    lhu TMP2, -4+OFS_RD(PC)
3189    |   ld CARG2, 0(RD)
3190    |  gettp CARG3, CARG1
3191    |   gettp CARG4, CARG2
3192    |  sltu AT, TISNUM, CARG3
3193    |   sltu TMP1, TISNUM, CARG4
3194    |  or AT, AT, TMP1
3195    if (vk) {
3196      |  beqz AT, ->BC_ISEQN_Z
3197    } else {
3198      |  beqz AT, ->BC_ISNEN_Z
3199    }
3200    |  // Either or both types are not numbers.
3201    |    lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3202    |.if FFI
3203    |.  li AT, LJ_TCDATA
3204    |  beq CARG3, AT, ->vmeta_equal_cd
3205    |.endif
3206    |   decode_RD4b TMP2
3207    |.if FFI
3208    |  beq CARG4, AT, ->vmeta_equal_cd
3209    |.  nop
3210    |.endif
3211    |  bne CARG1, CARG2, >2
3212    |.  addu TMP2, TMP2, TMP3
3213    |  // Tag and value are equal.
3214    if (vk) {
3215      |->BC_ISEQV_Z:
3216      |  daddu PC, PC, TMP2
3217    }
3218    |1:
3219    |  ins_next
3220    |
3221    |2:  // Check if the tags are the same and it's a table or userdata.
3222    |  xor AT, CARG3, CARG4			// Same type?
3223    |  sltiu TMP0, CARG3, LJ_TISTABUD+1		// Table or userdata?
3224    |.if MIPSR6
3225    |  seleqz TMP0, TMP0, AT
3226    |.else
3227    |  movn TMP0, r0, AT
3228    |.endif
3229    if (vk) {
3230      |  beqz TMP0, <1
3231    } else {
3232      |  beqz TMP0, ->BC_ISEQV_Z  // Reuse code from opposite instruction.
3233    }
3234    |  // Different tables or userdatas. Need to check __eq metamethod.
3235    |  // Field metatable must be at same offset for GCtab and GCudata!
3236    |.  cleartp TAB:TMP1, CARG1
3237    |  ld TAB:TMP3, TAB:TMP1->metatable
3238    if (vk) {
3239      |  beqz TAB:TMP3, <1		// No metatable?
3240      |.  nop
3241      |  lbu TMP3, TAB:TMP3->nomm
3242      |  andi TMP3, TMP3, 1<<MM_eq
3243      |  bnez TMP3, >1			// Or 'no __eq' flag set?
3244    } else {
3245      |  beqz TAB:TMP3,->BC_ISEQV_Z	// No metatable?
3246      |.  nop
3247      |  lbu TMP3, TAB:TMP3->nomm
3248      |  andi TMP3, TMP3, 1<<MM_eq
3249      |  bnez TMP3, ->BC_ISEQV_Z	// Or 'no __eq' flag set?
3250    }
3251    |.  nop
3252    |  b ->vmeta_equal			// Handle __eq metamethod.
3253    |.  li TMP0, 1-vk			// ne = 0 or 1.
3254    break;
3255
3256  case BC_ISEQS: case BC_ISNES:
3257    vk = op == BC_ISEQS;
3258    |  // RA = src*8, RD = str_const*8 (~), JMP with RD = target
3259    |  daddu RA, BASE, RA
3260    |   daddiu PC, PC, 4
3261    |  ld CARG1, 0(RA)
3262    |   dsubu RD, KBASE, RD
3263    |    lhu TMP2, -4+OFS_RD(PC)
3264    |   ld CARG2, -8(RD)		// KBASE-8-str_const*8
3265    |.if FFI
3266    |  gettp TMP0, CARG1
3267    |  li AT, LJ_TCDATA
3268    |.endif
3269    |  li TMP1, LJ_TSTR
3270    |   decode_RD4b TMP2
3271    |.if FFI
3272    |  beq TMP0, AT, ->vmeta_equal_cd
3273    |.endif
3274    |.  settp CARG2, TMP1
3275    |   lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3276    |  xor TMP1, CARG1, CARG2
3277    |   addu TMP2, TMP2, TMP3
3278    |.if MIPSR6
3279    if (vk) {
3280      |  seleqz TMP2, TMP2, TMP1
3281    } else {
3282      |  selnez TMP2, TMP2, TMP1
3283    }
3284    |.else
3285    if (vk) {
3286      |  movn TMP2, r0, TMP1
3287    } else {
3288      |  movz TMP2, r0, TMP1
3289    }
3290    |.endif
3291    |  daddu PC, PC, TMP2
3292    |  ins_next
3293    break;
3294
3295  case BC_ISEQN: case BC_ISNEN:
3296    vk = op == BC_ISEQN;
3297    |  // RA = src*8, RD = num_const*8, JMP with RD = target
3298    |  daddu RA, BASE, RA
3299    |   daddu RD, KBASE, RD
3300    |  ld CARG1, 0(RA)
3301    |   ld CARG2, 0(RD)
3302    |    lhu TMP2, OFS_RD(PC)
3303    |  gettp CARG3, CARG1
3304    |   gettp CARG4, CARG2
3305    |    daddiu PC, PC, 4
3306    |    lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3307    if (vk) {
3308      |->BC_ISEQN_Z:
3309    } else {
3310      |->BC_ISNEN_Z:
3311    }
3312    |  bne CARG3, TISNUM, >3
3313    |.   decode_RD4b TMP2
3314    |  bne CARG4, TISNUM, >6
3315    |.   addu TMP2, TMP2, TMP3
3316    |  xor AT, CARG1, CARG2
3317    |.if MIPSR6
3318    if (vk) {
3319      | seleqz TMP2, TMP2, AT
3320      |1:
3321      |  daddu PC, PC, TMP2
3322      |2:
3323    } else {
3324      |  selnez TMP2, TMP2, AT
3325      |1:
3326      |2:
3327      |  daddu PC, PC, TMP2
3328    }
3329    |.else
3330    if (vk) {
3331      | movn TMP2, r0, AT
3332      |1:
3333      |  daddu PC, PC, TMP2
3334      |2:
3335    } else {
3336      |  movz TMP2, r0, AT
3337      |1:
3338      |2:
3339      |  daddu PC, PC, TMP2
3340    }
3341    |.endif
3342    |  ins_next
3343    |
3344    |3:  // RA is not an integer.
3345    |  sltu AT, CARG3, TISNUM
3346    |.if FFI
3347    |  beqz AT, >8
3348    |.else
3349    |  beqz AT, <2
3350    |.endif
3351    |.   addu TMP2, TMP2, TMP3
3352    |  sltu AT, CARG4, TISNUM
3353    |.if FPU
3354    |  ldc1 FTMP0, 0(RA)
3355    |   ldc1 FTMP2, 0(RD)
3356    |.endif
3357    |  beqz AT, >5
3358    |.  nop
3359    |4:  // RA and RD are both numbers.
3360    |.if FPU
3361    |.if MIPSR6
3362    |  cmp.eq.d FTMP0, FTMP0, FTMP2
3363    |  dmfc1 TMP1, FTMP0
3364    |  b <1
3365    if (vk) {
3366      |.  selnez TMP2, TMP2, TMP1
3367    } else {
3368      |.  seleqz TMP2, TMP2, TMP1
3369    }
3370    |.else
3371    |  c.eq.d FTMP0, FTMP2
3372    |  b <1
3373    if (vk) {
3374      |.  movf TMP2, r0
3375    } else {
3376      |.  movt TMP2, r0
3377    }
3378    |.endif
3379    |.else
3380    |  bal ->vm_sfcmpeq
3381    |.  nop
3382    |  b <1
3383    |.if MIPSR6
3384    if (vk) {
3385      |.  selnez TMP2, TMP2, CRET1
3386    } else {
3387      |.  seleqz TMP2, TMP2, CRET1
3388    }
3389    |.else
3390    if (vk) {
3391      |.  movz TMP2, r0, CRET1
3392    } else {
3393      |.  movn TMP2, r0, CRET1
3394    }
3395    |.endif
3396    |.endif
3397    |
3398    |5:  // RA is a number, RD is not a number.
3399    |.if FFI
3400    |  bne CARG4, TISNUM, >9
3401    |.else
3402    |  bne CARG4, TISNUM, <2
3403    |.endif
3404    |  // RA is a number, RD is an integer. Convert RD to a number.
3405    |.if FPU
3406    |.  lwc1 FTMP2, LO(RD)
3407    |  b <4
3408    |.  cvt.d.w FTMP2, FTMP2
3409    |.else
3410    |.  sextw CARG2, CARG2
3411    |  bal ->vm_sfi2d_2
3412    |.  nop
3413    |  b <4
3414    |.  nop
3415    |.endif
3416    |
3417    |6:  // RA is an integer, RD is not an integer
3418    |  sltu AT, CARG4, TISNUM
3419    |.if FFI
3420    |  beqz AT, >9
3421    |.else
3422    |  beqz AT, <2
3423    |.endif
3424    |  // RA is an integer, RD is a number. Convert RA to a number.
3425    |.if FPU
3426    |.  lwc1 FTMP0, LO(RA)
3427    |   ldc1 FTMP2, 0(RD)
3428    |  b <4
3429    |   cvt.d.w FTMP0, FTMP0
3430    |.else
3431    |.  sextw CARG1, CARG1
3432    |  bal ->vm_sfi2d_1
3433    |.  nop
3434    |  b <4
3435    |.  nop
3436    |.endif
3437    |
3438    |.if FFI
3439    |8:
3440    |  li AT, LJ_TCDATA
3441    |  bne CARG3, AT, <2
3442    |.  nop
3443    |  b ->vmeta_equal_cd
3444    |.  nop
3445    |9:
3446    |  li AT, LJ_TCDATA
3447    |  bne CARG4, AT, <2
3448    |.  nop
3449    |  b ->vmeta_equal_cd
3450    |.  nop
3451    |.endif
3452    break;
3453
3454  case BC_ISEQP: case BC_ISNEP:
3455    vk = op == BC_ISEQP;
3456    |  // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
3457    |  daddu RA, BASE, RA
3458    |   srl TMP1, RD, 3
3459    |  ld TMP0, 0(RA)
3460    |    lhu TMP2, OFS_RD(PC)
3461    |   not TMP1, TMP1
3462    |  gettp TMP0, TMP0
3463    |    daddiu PC, PC, 4
3464    |.if FFI
3465    |  li AT, LJ_TCDATA
3466    |  beq TMP0, AT, ->vmeta_equal_cd
3467    |.endif
3468    |.  xor TMP0, TMP0, TMP1
3469    |  decode_RD4b TMP2
3470    |  lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3471    |  addu TMP2, TMP2, TMP3
3472    |.if MIPSR6
3473    if (vk) {
3474      |  seleqz TMP2, TMP2, TMP0
3475    } else {
3476      |  selnez TMP2, TMP2, TMP0
3477    }
3478    |.else
3479    if (vk) {
3480      |  movn TMP2, r0, TMP0
3481    } else {
3482      |  movz TMP2, r0, TMP0
3483    }
3484    |.endif
3485    |  daddu PC, PC, TMP2
3486    |  ins_next
3487    break;
3488
3489  /* -- Unary test and copy ops ------------------------------------------- */
3490
3491  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3492    |  // RA = dst*8 or unused, RD = src*8, JMP with RD = target
3493    |  daddu RD, BASE, RD
3494    |   lhu TMP2, OFS_RD(PC)
3495    |  ld TMP0, 0(RD)
3496    |   daddiu PC, PC, 4
3497    |  gettp TMP0, TMP0
3498    |  sltiu TMP0, TMP0, LJ_TISTRUECOND
3499    if (op == BC_IST || op == BC_ISF) {
3500      |   decode_RD4b TMP2
3501      |   lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3502      |   addu TMP2, TMP2, TMP3
3503      |.if MIPSR6
3504      if (op == BC_IST) {
3505	|  selnez TMP2, TMP2, TMP0;
3506      } else {
3507	|  seleqz TMP2, TMP2, TMP0;
3508      }
3509      |.else
3510      if (op == BC_IST) {
3511	|  movz TMP2, r0, TMP0
3512      } else {
3513	|  movn TMP2, r0, TMP0
3514      }
3515      |.endif
3516      |  daddu PC, PC, TMP2
3517    } else {
3518      |  ld CRET1, 0(RD)
3519      if (op == BC_ISTC) {
3520	|  beqz TMP0, >1
3521      } else {
3522	|  bnez TMP0, >1
3523      }
3524      |.  daddu RA, BASE, RA
3525      |   decode_RD4b TMP2
3526      |   lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3527      |   addu TMP2, TMP2, TMP3
3528      |  sd CRET1, 0(RA)
3529      |   daddu PC, PC, TMP2
3530      |1:
3531    }
3532    |  ins_next
3533    break;
3534
3535  case BC_ISTYPE:
3536    |  // RA = src*8, RD = -type*8
3537    |  daddu TMP2, BASE, RA
3538    |  srl TMP1, RD, 3
3539    |  ld TMP0, 0(TMP2)
3540    |  ins_next1
3541    |  gettp TMP0, TMP0
3542    |  daddu AT, TMP0, TMP1
3543    |  bnez AT, ->vmeta_istype
3544    |.  ins_next2
3545    break;
3546  case BC_ISNUM:
3547    |  // RA = src*8, RD = -(TISNUM-1)*8
3548    |  daddu TMP2, BASE, RA
3549    |  ld TMP0, 0(TMP2)
3550    |  ins_next1
3551    |  checknum TMP0, ->vmeta_istype
3552    |.  ins_next2
3553    break;
3554
3555  /* -- Unary ops --------------------------------------------------------- */
3556
3557  case BC_MOV:
3558    |  // RA = dst*8, RD = src*8
3559    |  daddu RD, BASE, RD
3560    |   daddu RA, BASE, RA
3561    |  ld CRET1, 0(RD)
3562    |  ins_next1
3563    |  sd CRET1, 0(RA)
3564    |  ins_next2
3565    break;
3566  case BC_NOT:
3567    |  // RA = dst*8, RD = src*8
3568    |  daddu RD, BASE, RD
3569    |   daddu RA, BASE, RA
3570    |  ld TMP0, 0(RD)
3571    |   li AT, LJ_TTRUE
3572    |  gettp TMP0, TMP0
3573    |  sltu TMP0, AT, TMP0
3574    |  addiu TMP0, TMP0, 1
3575    |  dsll TMP0, TMP0, 47
3576    |  not TMP0, TMP0
3577    |  ins_next1
3578    |   sd TMP0, 0(RA)
3579    |  ins_next2
3580    break;
3581  case BC_UNM:
3582    |  // RA = dst*8, RD = src*8
3583    |  daddu RB, BASE, RD
3584    |  ld CARG1, 0(RB)
3585    |    daddu RA, BASE, RA
3586    |  gettp CARG3, CARG1
3587    |  bne CARG3, TISNUM, >2
3588    |.  lui TMP1, 0x8000
3589    |  sextw CARG1, CARG1
3590    |  beq CARG1, TMP1, ->vmeta_unm	// Meta handler deals with -2^31.
3591    |.  negu CARG1, CARG1
3592    |  zextw CARG1, CARG1
3593    |  settp CARG1, TISNUM
3594    |1:
3595    |  ins_next1
3596    |   sd CARG1, 0(RA)
3597    |  ins_next2
3598    |2:
3599    |  sltiu AT, CARG3, LJ_TISNUM
3600    |  beqz AT, ->vmeta_unm
3601    |.  dsll TMP1, TMP1, 32
3602    |  b <1
3603    |.  xor CARG1, CARG1, TMP1
3604    break;
3605  case BC_LEN:
3606    |  // RA = dst*8, RD = src*8
3607    |  daddu CARG2, BASE, RD
3608    |   daddu RA, BASE, RA
3609    |  ld TMP0, 0(CARG2)
3610    |  gettp TMP1, TMP0
3611    |  daddiu AT, TMP1, -LJ_TSTR
3612    |  bnez AT, >2
3613    |.  cleartp STR:CARG1, TMP0
3614    |   lw CRET1, STR:CARG1->len
3615    |1:
3616    |  settp CRET1, TISNUM
3617    |  ins_next1
3618    |  sd CRET1, 0(RA)
3619    |  ins_next2
3620    |2:
3621    |  daddiu AT, TMP1, -LJ_TTAB
3622    |  bnez AT, ->vmeta_len
3623    |.  nop
3624#if LJ_52
3625    |  ld TAB:TMP2, TAB:CARG1->metatable
3626    |  bnez TAB:TMP2, >9
3627    |.  nop
3628    |3:
3629#endif
3630    |->BC_LEN_Z:
3631    |  load_got lj_tab_len
3632    |  call_intern lj_tab_len		// (GCtab *t)
3633    |.  nop
3634    |  // Returns uint32_t (but less than 2^31).
3635    |  b <1
3636    |.  nop
3637#if LJ_52
3638    |9:
3639    |  lbu TMP0, TAB:TMP2->nomm
3640    |  andi TMP0, TMP0, 1<<MM_len
3641    |  bnez TMP0, <3			// 'no __len' flag set: done.
3642    |.  nop
3643    |  b ->vmeta_len
3644    |.  nop
3645#endif
3646    break;
3647
3648  /* -- Binary ops -------------------------------------------------------- */
3649
3650    |.macro fpmod, a, b, c
3651    |  bal ->vm_floor		// floor(b/c)
3652    |.  div.d FARG1, b, c
3653    |  mul.d a, FRET1, c
3654    |  sub.d a, b, a		// b - floor(b/c)*c
3655    |.endmacro
3656
3657    |.macro sfpmod
3658    |  daddiu sp, sp, -16
3659    |
3660    |  load_got __divdf3
3661    |  sd CARG1, 0(sp)
3662    |  call_extern
3663    |.  sd CARG2, 8(sp)
3664    |
3665    |  load_got floor
3666    |  call_extern
3667    |.  move CARG1, CRET1
3668    |
3669    |  load_got __muldf3
3670    |  move CARG1, CRET1
3671    |  call_extern
3672    |.  ld CARG2, 8(sp)
3673    |
3674    |  load_got __subdf3
3675    |  ld CARG1, 0(sp)
3676    |  call_extern
3677    |.  move CARG2, CRET1
3678    |
3679    |  daddiu sp, sp, 16
3680    |.endmacro
3681
3682    |.macro ins_arithpre, label
3683    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3684    |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3685    ||switch (vk) {
3686    ||case 0:
3687    |   decode_RB8a RB, INS
3688    |   decode_RB8b RB
3689    |    decode_RDtoRC8 RC, RD
3690    |   // RA = dst*8, RB = src1*8, RC = num_const*8
3691    |   daddu RB, BASE, RB
3692    |.if "label" ~= "none"
3693    |   b label
3694    |.endif
3695    |.   daddu RC, KBASE, RC
3696    ||  break;
3697    ||case 1:
3698    |   decode_RB8a RC, INS
3699    |   decode_RB8b RC
3700    |    decode_RDtoRC8 RB, RD
3701    |   // RA = dst*8, RB = num_const*8, RC = src1*8
3702    |   daddu RC, BASE, RC
3703    |.if "label" ~= "none"
3704    |   b label
3705    |.endif
3706    |.   daddu RB, KBASE, RB
3707    ||  break;
3708    ||default:
3709    |   decode_RB8a RB, INS
3710    |   decode_RB8b RB
3711    |    decode_RDtoRC8 RC, RD
3712    |   // RA = dst*8, RB = src1*8, RC = src2*8
3713    |   daddu RB, BASE, RB
3714    |.if "label" ~= "none"
3715    |   b label
3716    |.endif
3717    |.   daddu RC, BASE, RC
3718    ||  break;
3719    ||}
3720    |.endmacro
3721    |
3722    |.macro ins_arith, intins, fpins, fpcall, label
3723    |  ins_arithpre none
3724    |
3725    |.if "label" ~= "none"
3726    |label:
3727    |.endif
3728    |
3729    |// Used in 5.
3730    |  ld CARG1, 0(RB)
3731    |   ld CARG2, 0(RC)
3732    |  gettp TMP0, CARG1
3733    |   gettp TMP1, CARG2
3734    |
3735    |.if "intins" ~= "div"
3736    |
3737    |  // Check for two integers.
3738    |  sextw CARG3, CARG1
3739    |  bne TMP0, TISNUM, >5
3740    |.  sextw CARG4, CARG2
3741    |  bne TMP1, TISNUM, >5
3742    |
3743    |.if "intins" == "addu"
3744    |.  intins CRET1, CARG3, CARG4
3745    |  xor TMP1, CRET1, CARG3		// ((y^a) & (y^b)) < 0: overflow.
3746    |  xor TMP2, CRET1, CARG4
3747    |  and TMP1, TMP1, TMP2
3748    |  bltz TMP1, ->vmeta_arith
3749    |.  daddu RA, BASE, RA
3750    |.elif "intins" == "subu"
3751    |.  intins CRET1, CARG3, CARG4
3752    |  xor TMP1, CRET1, CARG3		// ((y^a) & (a^b)) < 0: overflow.
3753    |  xor TMP2, CARG3, CARG4
3754    |  and TMP1, TMP1, TMP2
3755    |  bltz TMP1, ->vmeta_arith
3756    |.  daddu RA, BASE, RA
3757    |.elif "intins" == "mult"
3758    |.if MIPSR6
3759    |.  nop
3760    |  mul CRET1, CARG3, CARG4
3761    |  muh TMP2, CARG3, CARG4
3762    |.else
3763    |.  intins CARG3, CARG4
3764    |  mflo CRET1
3765    |  mfhi TMP2
3766    |.endif
3767    |  sra TMP1, CRET1, 31
3768    |  bne TMP1, TMP2, ->vmeta_arith
3769    |.  daddu RA, BASE, RA
3770    |.else
3771    |.  load_got lj_vm_modi
3772    |  beqz CARG4, ->vmeta_arith
3773    |.  daddu RA, BASE, RA
3774    |  move CARG1, CARG3
3775    |  call_extern
3776    |.  move CARG2, CARG4
3777    |.endif
3778    |
3779    |  zextw CRET1, CRET1
3780    |  settp CRET1, TISNUM
3781    |  ins_next1
3782    |  sd CRET1, 0(RA)
3783    |3:
3784    |  ins_next2
3785    |
3786    |.endif
3787    |
3788    |5:  // Check for two numbers.
3789    |  .FPU ldc1 FTMP0, 0(RB)
3790    |  sltu AT, TMP0, TISNUM
3791    |   sltu TMP0, TMP1, TISNUM
3792    |  .FPU ldc1 FTMP2, 0(RC)
3793    |   and AT, AT, TMP0
3794    |   beqz AT, ->vmeta_arith
3795    |.   daddu RA, BASE, RA
3796    |
3797    |.if FPU
3798    |  fpins FRET1, FTMP0, FTMP2
3799    |.elif "fpcall" == "sfpmod"
3800    |  sfpmod
3801    |.else
3802    |  load_got fpcall
3803    |  call_extern
3804    |.  nop
3805    |.endif
3806    |
3807    |  ins_next1
3808    |.if "intins" ~= "div"
3809    |  b <3
3810    |.endif
3811    |.if FPU
3812    |.  sdc1 FRET1, 0(RA)
3813    |.else
3814    |.  sd CRET1, 0(RA)
3815    |.endif
3816    |.if "intins" == "div"
3817    |  ins_next2
3818    |.endif
3819    |
3820    |.endmacro
3821
3822  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3823    |  ins_arith addu, add.d, __adddf3, none
3824    break;
3825  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3826    |  ins_arith subu, sub.d, __subdf3, none
3827    break;
3828  case BC_MULVN: case BC_MULNV: case BC_MULVV:
3829    |  ins_arith mult, mul.d, __muldf3, none
3830    break;
3831  case BC_DIVVN:
3832    |  ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3833    break;
3834  case BC_DIVNV: case BC_DIVVV:
3835    |  ins_arithpre ->BC_DIVVN_Z
3836    break;
3837  case BC_MODVN:
3838    |  ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
3839    break;
3840  case BC_MODNV: case BC_MODVV:
3841    |  ins_arithpre ->BC_MODVN_Z
3842    break;
3843  case BC_POW:
3844    |  ins_arithpre none
3845    |  ld CARG1, 0(RB)
3846    |   ld CARG2, 0(RC)
3847    |  gettp TMP0, CARG1
3848    |   gettp TMP1, CARG2
3849    |  sltiu TMP0, TMP0, LJ_TISNUM
3850    |   sltiu TMP1, TMP1, LJ_TISNUM
3851    |  and AT, TMP0, TMP1
3852    |  load_got pow
3853    |  beqz AT, ->vmeta_arith
3854    |.  daddu RA, BASE, RA
3855    |.if FPU
3856    |  ldc1 FARG1, 0(RB)
3857    |  ldc1 FARG2, 0(RC)
3858    |.endif
3859    |  call_extern
3860    |.  nop
3861    |  ins_next1
3862    |.if FPU
3863    |  sdc1 FRET1, 0(RA)
3864    |.else
3865    |  sd CRET1, 0(RA)
3866    |.endif
3867    |  ins_next2
3868    break;
3869
3870  case BC_CAT:
3871    |  // RA = dst*8, RB = src_start*8, RC = src_end*8
3872    |  decode_RB8a RB, INS
3873    |  decode_RB8b RB
3874    |   decode_RDtoRC8 RC, RD
3875    |  dsubu CARG3, RC, RB
3876    |   sd BASE, L->base
3877    |  daddu CARG2, BASE, RC
3878    |  move MULTRES, RB
3879    |->BC_CAT_Z:
3880    |  load_got lj_meta_cat
3881    |  srl CARG3, CARG3, 3
3882    |   sd PC, SAVE_PC
3883    |  call_intern lj_meta_cat		// (lua_State *L, TValue *top, int left)
3884    |.  move CARG1, L
3885    |  // Returns NULL (finished) or TValue * (metamethod).
3886    |  bnez CRET1, ->vmeta_binop
3887    |.  ld BASE, L->base
3888    |  daddu RB, BASE, MULTRES
3889    |  ld CRET1, 0(RB)
3890    |   daddu RA, BASE, RA
3891    |  ins_next1
3892    |  sd CRET1, 0(RA)
3893    |  ins_next2
3894    break;
3895
3896  /* -- Constant ops ------------------------------------------------------ */
3897
3898  case BC_KSTR:
3899    |  // RA = dst*8, RD = str_const*8 (~)
3900    |  dsubu TMP1, KBASE, RD
3901    |  ins_next1
3902    |   li TMP2, LJ_TSTR
3903    |  ld TMP0, -8(TMP1)		// KBASE-8-str_const*8
3904    |  daddu RA, BASE, RA
3905    |   settp TMP0, TMP2
3906    |  sd TMP0, 0(RA)
3907    |  ins_next2
3908    break;
3909  case BC_KCDATA:
3910    |.if FFI
3911    |  // RA = dst*8, RD = cdata_const*8 (~)
3912    |  dsubu TMP1, KBASE, RD
3913    |  ins_next1
3914    |  ld TMP0, -8(TMP1)		// KBASE-8-cdata_const*8
3915    |   li TMP2, LJ_TCDATA
3916    |  daddu RA, BASE, RA
3917    |   settp TMP0, TMP2
3918    |  sd TMP0, 0(RA)
3919    |  ins_next2
3920    |.endif
3921    break;
3922  case BC_KSHORT:
3923    |  // RA = dst*8, RD = int16_literal*8
3924    |   sra RD, INS, 16
3925    |  daddu RA, BASE, RA
3926    |   zextw RD, RD
3927    |  ins_next1
3928    |   settp RD, TISNUM
3929    |   sd RD, 0(RA)
3930    |  ins_next2
3931    break;
3932  case BC_KNUM:
3933    |  // RA = dst*8, RD = num_const*8
3934    |  daddu RD, KBASE, RD
3935    |   daddu RA, BASE, RA
3936    |  ld CRET1, 0(RD)
3937    |  ins_next1
3938    |  sd CRET1, 0(RA)
3939    |  ins_next2
3940    break;
3941  case BC_KPRI:
3942    |  // RA = dst*8, RD = primitive_type*8 (~)
3943    |   daddu RA, BASE, RA
3944    |  dsll TMP0, RD, 44
3945    |  not TMP0, TMP0
3946    |  ins_next1
3947    |   sd TMP0, 0(RA)
3948    |  ins_next2
3949    break;
3950  case BC_KNIL:
3951    |  // RA = base*8, RD = end*8
3952    |  daddu RA, BASE, RA
3953    |  sd TISNIL, 0(RA)
3954    |   daddiu RA, RA, 8
3955    |  daddu RD, BASE, RD
3956    |1:
3957    |  sd TISNIL, 0(RA)
3958    |  slt AT, RA, RD
3959    |  bnez AT, <1
3960    |.  daddiu RA, RA, 8
3961    |  ins_next_
3962    break;
3963
3964  /* -- Upvalue and function ops ------------------------------------------ */
3965
3966  case BC_UGET:
3967    |  // RA = dst*8, RD = uvnum*8
3968    |  ld LFUNC:RB, FRAME_FUNC(BASE)
3969    |   daddu RA, BASE, RA
3970    |  cleartp LFUNC:RB
3971    |  daddu RD, RD, LFUNC:RB
3972    |  ld UPVAL:RB, LFUNC:RD->uvptr
3973    |  ins_next1
3974    |  ld TMP1, UPVAL:RB->v
3975    |  ld CRET1, 0(TMP1)
3976    |   sd CRET1, 0(RA)
3977    |  ins_next2
3978    break;
3979  case BC_USETV:
3980    |  // RA = uvnum*8, RD = src*8
3981    |  ld LFUNC:RB, FRAME_FUNC(BASE)
3982    |   daddu RD, BASE, RD
3983    |  cleartp LFUNC:RB
3984    |  daddu RA, RA, LFUNC:RB
3985    |  ld UPVAL:RB, LFUNC:RA->uvptr
3986    |   ld CRET1, 0(RD)
3987    |  lbu TMP3, UPVAL:RB->marked
3988    |   ld CARG2, UPVAL:RB->v
3989    |  andi TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
3990    |  lbu TMP0, UPVAL:RB->closed
3991    |   gettp TMP2, CRET1
3992    |   sd CRET1, 0(CARG2)
3993    |  li AT, LJ_GC_BLACK|1
3994    |  or TMP3, TMP3, TMP0
3995    |  beq TMP3, AT, >2			// Upvalue is closed and black?
3996    |.  daddiu TMP2, TMP2, -(LJ_TNUMX+1)
3997    |1:
3998    |  ins_next
3999    |
4000    |2:  // Check if new value is collectable.
4001    |  sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
4002    |  beqz AT, <1			// tvisgcv(v)
4003    |.  cleartp GCOBJ:CRET1, CRET1
4004    |  lbu TMP3, GCOBJ:CRET1->gch.marked
4005    |  andi TMP3, TMP3, LJ_GC_WHITES	// iswhite(v)
4006    |  beqz TMP3, <1
4007    |.  load_got lj_gc_barrieruv
4008    |  // Crossed a write barrier. Move the barrier forward.
4009    |  call_intern lj_gc_barrieruv	// (global_State *g, TValue *tv)
4010    |.  daddiu CARG1, DISPATCH, GG_DISP2G
4011    |  b <1
4012    |.  nop
4013    break;
4014  case BC_USETS:
4015    |  // RA = uvnum*8, RD = str_const*8 (~)
4016    |  ld LFUNC:RB, FRAME_FUNC(BASE)
4017    |   dsubu TMP1, KBASE, RD
4018    |  cleartp LFUNC:RB
4019    |  daddu RA, RA, LFUNC:RB
4020    |  ld UPVAL:RB, LFUNC:RA->uvptr
4021    |   ld STR:TMP1, -8(TMP1)		// KBASE-8-str_const*8
4022    |  lbu TMP2, UPVAL:RB->marked
4023    |   ld CARG2, UPVAL:RB->v
4024    |   lbu TMP3, STR:TMP1->marked
4025    |  andi AT, TMP2, LJ_GC_BLACK	// isblack(uv)
4026    |   lbu TMP2, UPVAL:RB->closed
4027    |   li TMP0, LJ_TSTR
4028    |   settp TMP1, TMP0
4029    |  bnez AT, >2
4030    |.  sd TMP1, 0(CARG2)
4031    |1:
4032    |  ins_next
4033    |
4034    |2:  // Check if string is white and ensure upvalue is closed.
4035    |  beqz TMP2, <1
4036    |.  andi AT, TMP3, LJ_GC_WHITES	// iswhite(str)
4037    |  beqz AT, <1
4038    |.  load_got lj_gc_barrieruv
4039    |  // Crossed a write barrier. Move the barrier forward.
4040    |  call_intern lj_gc_barrieruv	// (global_State *g, TValue *tv)
4041    |.  daddiu CARG1, DISPATCH, GG_DISP2G
4042    |  b <1
4043    |.  nop
4044    break;
4045  case BC_USETN:
4046    |  // RA = uvnum*8, RD = num_const*8
4047    |  ld LFUNC:RB, FRAME_FUNC(BASE)
4048    |   daddu RD, KBASE, RD
4049    |  cleartp LFUNC:RB
4050    |  daddu RA, RA, LFUNC:RB
4051    |  ld UPVAL:RB, LFUNC:RA->uvptr
4052    |   ld CRET1, 0(RD)
4053    |  ld TMP1, UPVAL:RB->v
4054    |  ins_next1
4055    |   sd CRET1, 0(TMP1)
4056    |  ins_next2
4057    break;
4058  case BC_USETP:
4059    |  // RA = uvnum*8, RD = primitive_type*8 (~)
4060    |  ld LFUNC:RB, FRAME_FUNC(BASE)
4061    |   dsll TMP0, RD, 44
4062    |  cleartp LFUNC:RB
4063    |  daddu RA, RA, LFUNC:RB
4064    |   not TMP0, TMP0
4065    |  ld UPVAL:RB, LFUNC:RA->uvptr
4066    |  ins_next1
4067    |  ld TMP1, UPVAL:RB->v
4068    |   sd TMP0, 0(TMP1)
4069    |  ins_next2
4070    break;
4071
4072  case BC_UCLO:
4073    |  // RA = level*8, RD = target
4074    |  ld TMP2, L->openupval
4075    |  branch_RD			// Do this first since RD is not saved.
4076    |  load_got lj_func_closeuv
4077    |   sd BASE, L->base
4078    |  beqz TMP2, >1
4079    |.  move CARG1, L
4080    |  call_intern lj_func_closeuv	// (lua_State *L, TValue *level)
4081    |.  daddu CARG2, BASE, RA
4082    |  ld BASE, L->base
4083    |1:
4084    |  ins_next
4085    break;
4086
4087  case BC_FNEW:
4088    |  // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
4089    |  load_got lj_func_newL_gc
4090    |  dsubu TMP1, KBASE, RD
4091    |  ld CARG3, FRAME_FUNC(BASE)
4092    |   ld CARG2, -8(TMP1)		// KBASE-8-tab_const*8
4093    |    sd BASE, L->base
4094    |    sd PC, SAVE_PC
4095    |  cleartp CARG3
4096    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
4097    |  call_intern lj_func_newL_gc
4098    |.  move CARG1, L
4099    |  // Returns GCfuncL *.
4100    |   li TMP0, LJ_TFUNC
4101    |  ld BASE, L->base
4102    |  ins_next1
4103    |   settp CRET1, TMP0
4104    |  daddu RA, BASE, RA
4105    |   sd CRET1, 0(RA)
4106    |  ins_next2
4107    break;
4108
4109  /* -- Table ops --------------------------------------------------------- */
4110
4111  case BC_TNEW:
4112  case BC_TDUP:
4113    |  // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
4114    |  ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
4115    |  ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
4116    |   sd BASE, L->base
4117    |   sd PC, SAVE_PC
4118    |  sltu AT, TMP0, TMP1
4119    |  beqz AT, >5
4120    |1:
4121    if (op == BC_TNEW) {
4122      |  load_got lj_tab_new
4123      |  srl CARG2, RD, 3
4124      |  andi CARG2, CARG2, 0x7ff
4125      |  li TMP0, 0x801
4126      |  addiu AT, CARG2, -0x7ff
4127      |   srl CARG3, RD, 14
4128      |.if MIPSR6
4129      |  seleqz TMP0, TMP0, AT
4130      |  selnez CARG2, CARG2, AT
4131      |  or CARG2, CARG2, TMP0
4132      |.else
4133      |  movz CARG2, TMP0, AT
4134      |.endif
4135      |  // (lua_State *L, int32_t asize, uint32_t hbits)
4136      |  call_intern lj_tab_new
4137      |.  move CARG1, L
4138      |  // Returns Table *.
4139    } else {
4140      |  load_got lj_tab_dup
4141      |  dsubu TMP1, KBASE, RD
4142      |  move CARG1, L
4143      |  call_intern lj_tab_dup		// (lua_State *L, Table *kt)
4144      |.  ld CARG2, -8(TMP1)		// KBASE-8-str_const*8
4145      |  // Returns Table *.
4146    }
4147    |   li TMP0, LJ_TTAB
4148    |  ld BASE, L->base
4149    |  ins_next1
4150    |  daddu RA, BASE, RA
4151    |   settp CRET1, TMP0
4152    |   sd CRET1, 0(RA)
4153    |  ins_next2
4154    |5:
4155    |  load_got lj_gc_step_fixtop
4156    |  move MULTRES, RD
4157    |  call_intern lj_gc_step_fixtop	// (lua_State *L)
4158    |.  move CARG1, L
4159    |  b <1
4160    |.  move RD, MULTRES
4161    break;
4162
4163  case BC_GGET:
4164    |  // RA = dst*8, RD = str_const*8 (~)
4165  case BC_GSET:
4166    |  // RA = src*8, RD = str_const*8 (~)
4167    |  ld LFUNC:TMP2, FRAME_FUNC(BASE)
4168    |   dsubu TMP1, KBASE, RD
4169    |   ld STR:RC, -8(TMP1)		// KBASE-8-str_const*8
4170    |  cleartp LFUNC:TMP2
4171    |  ld TAB:RB, LFUNC:TMP2->env
4172    if (op == BC_GGET) {
4173      |  b ->BC_TGETS_Z
4174    } else {
4175      |  b ->BC_TSETS_Z
4176    }
4177    |.  daddu RA, BASE, RA
4178    break;
4179
4180  case BC_TGETV:
4181    |  // RA = dst*8, RB = table*8, RC = key*8
4182    |  decode_RB8a RB, INS
4183    |  decode_RB8b RB
4184    |   decode_RDtoRC8 RC, RD
4185    |  daddu CARG2, BASE, RB
4186    |   daddu CARG3, BASE, RC
4187    |  ld TAB:RB, 0(CARG2)
4188    |   ld TMP2, 0(CARG3)
4189    |   daddu RA, BASE, RA
4190    |  checktab TAB:RB, ->vmeta_tgetv
4191    |   gettp TMP3, TMP2
4192    |  bne TMP3, TISNUM, >5		// Integer key?
4193    |.  lw TMP0, TAB:RB->asize
4194    |  sextw TMP2, TMP2
4195    |   ld TMP1, TAB:RB->array
4196    |  sltu AT, TMP2, TMP0
4197    |   sll TMP2, TMP2, 3
4198    |  beqz AT, ->vmeta_tgetv		// Integer key and in array part?
4199    |.  daddu TMP2, TMP1, TMP2
4200    |  ld AT, 0(TMP2)
4201    |  beq AT, TISNIL, >2
4202    |.   ld CRET1, 0(TMP2)
4203    |1:
4204    |  ins_next1
4205    |   sd CRET1, 0(RA)
4206    |  ins_next2
4207    |
4208    |2:  // Check for __index if table value is nil.
4209    |  ld TAB:TMP2, TAB:RB->metatable
4210    |  beqz TAB:TMP2, <1		// No metatable: done.
4211    |.  nop
4212    |  lbu TMP0, TAB:TMP2->nomm
4213    |  andi TMP0, TMP0, 1<<MM_index
4214    |  bnez TMP0, <1			// 'no __index' flag set: done.
4215    |.  nop
4216    |  b ->vmeta_tgetv
4217    |.  nop
4218    |
4219    |5:
4220    |  li AT, LJ_TSTR
4221    |  bne TMP3, AT, ->vmeta_tgetv
4222    |.  cleartp RC, TMP2
4223    |  b ->BC_TGETS_Z			// String key?
4224    |.  nop
4225    break;
4226  case BC_TGETS:
4227    |  // RA = dst*8, RB = table*8, RC = str_const*8 (~)
4228    |  decode_RB8a RB, INS
4229    |  decode_RB8b RB
4230    |   decode_RC8a RC, INS
4231    |  daddu CARG2, BASE, RB
4232    |   decode_RC8b RC
4233    |  ld TAB:RB, 0(CARG2)
4234    |   dsubu CARG3, KBASE, RC
4235    |  daddu RA, BASE, RA
4236    |   ld STR:RC, -8(CARG3)		// KBASE-8-str_const*8
4237    |  checktab TAB:RB, ->vmeta_tgets1
4238    |->BC_TGETS_Z:
4239    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
4240    |  lw TMP0, TAB:RB->hmask
4241    |   lw TMP1, STR:RC->sid
4242    |    ld NODE:TMP2, TAB:RB->node
4243    |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
4244    |  sll TMP0, TMP1, 5
4245    |  sll TMP1, TMP1, 3
4246    |  subu TMP1, TMP0, TMP1
4247    |   li TMP3, LJ_TSTR
4248    |  daddu NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
4249    |   settp STR:RC, TMP3		// Tagged key to look for.
4250    |1:
4251    |  ld CARG1, NODE:TMP2->key
4252    |   ld CRET1, NODE:TMP2->val
4253    |    ld NODE:TMP1, NODE:TMP2->next
4254    |  bne CARG1, RC, >4
4255    |.  ld TAB:TMP3, TAB:RB->metatable
4256    |  beq CRET1, TISNIL, >5		// Key found, but nil value?
4257    |.  nop
4258    |3:
4259    |  ins_next1
4260    |   sd CRET1, 0(RA)
4261    |  ins_next2
4262    |
4263    |4:  // Follow hash chain.
4264    |  bnez NODE:TMP1, <1
4265    |.  move NODE:TMP2, NODE:TMP1
4266    |  // End of hash chain: key not found, nil result.
4267    |
4268    |5:  // Check for __index if table value is nil.
4269    |  beqz TAB:TMP3, <3		// No metatable: done.
4270    |.  move CRET1, TISNIL
4271    |  lbu TMP0, TAB:TMP3->nomm
4272    |  andi TMP0, TMP0, 1<<MM_index
4273    |  bnez TMP0, <3			// 'no __index' flag set: done.
4274    |.  nop
4275    |  b ->vmeta_tgets
4276    |.  nop
4277    break;
4278  case BC_TGETB:
4279    |  // RA = dst*8, RB = table*8, RC = index*8
4280    |  decode_RB8a RB, INS
4281    |  decode_RB8b RB
4282    |  daddu CARG2, BASE, RB
4283    |   decode_RDtoRC8 RC, RD
4284    |  ld TAB:RB, 0(CARG2)
4285    |   daddu RA, BASE, RA
4286    |  srl TMP0, RC, 3
4287    |  checktab TAB:RB, ->vmeta_tgetb
4288    |  lw TMP1, TAB:RB->asize
4289    |   ld TMP2, TAB:RB->array
4290    |  sltu AT, TMP0, TMP1
4291    |  beqz AT, ->vmeta_tgetb
4292    |.  daddu RC, TMP2, RC
4293    |  ld AT, 0(RC)
4294    |  beq AT, TISNIL, >5
4295    |.  ld CRET1, 0(RC)
4296    |1:
4297    |  ins_next1
4298    |   sd CRET1, 0(RA)
4299    |  ins_next2
4300    |
4301    |5:  // Check for __index if table value is nil.
4302    |  ld TAB:TMP2, TAB:RB->metatable
4303    |  beqz TAB:TMP2, <1		// No metatable: done.
4304    |.  nop
4305    |  lbu TMP1, TAB:TMP2->nomm
4306    |  andi TMP1, TMP1, 1<<MM_index
4307    |  bnez TMP1, <1			// 'no __index' flag set: done.
4308    |.  nop
4309    |  b ->vmeta_tgetb			// Caveat: preserve TMP0 and CARG2!
4310    |.  nop
4311    break;
4312  case BC_TGETR:
4313    |  // RA = dst*8, RB = table*8, RC = key*8
4314    |  decode_RB8a RB, INS
4315    |  decode_RB8b RB
4316    |   decode_RDtoRC8 RC, RD
4317    |  daddu RB, BASE, RB
4318    |   daddu RC, BASE, RC
4319    |  ld TAB:CARG1, 0(RB)
4320    |   lw CARG2, LO(RC)
4321    |    daddu RA, BASE, RA
4322    |  cleartp TAB:CARG1
4323    |  lw TMP0, TAB:CARG1->asize
4324    |   ld TMP1, TAB:CARG1->array
4325    |  sltu AT, CARG2, TMP0
4326    |   sll TMP2, CARG2, 3
4327    |  beqz AT, ->vmeta_tgetr		// In array part?
4328    |.  daddu CRET1, TMP1, TMP2
4329    |   ld CARG2, 0(CRET1)
4330    |->BC_TGETR_Z:
4331    |  ins_next1
4332    |   sd CARG2, 0(RA)
4333    |  ins_next2
4334    break;
4335
4336  case BC_TSETV:
4337    |  // RA = src*8, RB = table*8, RC = key*8
4338    |  decode_RB8a RB, INS
4339    |  decode_RB8b RB
4340    |   decode_RDtoRC8 RC, RD
4341    |  daddu CARG2, BASE, RB
4342    |   daddu CARG3, BASE, RC
4343    |  ld RB, 0(CARG2)
4344    |   ld TMP2, 0(CARG3)
4345    |  daddu RA, BASE, RA
4346    |  checktab RB, ->vmeta_tsetv
4347    |  checkint TMP2, >5
4348    |.  sextw RC, TMP2
4349    |  lw TMP0, TAB:RB->asize
4350    |   ld TMP1, TAB:RB->array
4351    |  sltu AT, RC, TMP0
4352    |   sll TMP2, RC, 3
4353    |  beqz AT, ->vmeta_tsetv		// Integer key and in array part?
4354    |.  daddu TMP1, TMP1, TMP2
4355    |  ld TMP0, 0(TMP1)
4356    |   lbu TMP3, TAB:RB->marked
4357    |  beq TMP0, TISNIL, >3
4358    |.  ld CRET1, 0(RA)
4359    |1:
4360    |   andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
4361    |  bnez AT, >7
4362    |.  sd CRET1, 0(TMP1)
4363    |2:
4364    |  ins_next
4365    |
4366    |3:  // Check for __newindex if previous value is nil.
4367    |  ld TAB:TMP2, TAB:RB->metatable
4368    |  beqz TAB:TMP2, <1		// No metatable: done.
4369    |.  nop
4370    |  lbu TMP2, TAB:TMP2->nomm
4371    |  andi TMP2, TMP2, 1<<MM_newindex
4372    |  bnez TMP2, <1			// 'no __newindex' flag set: done.
4373    |.  nop
4374    |  b ->vmeta_tsetv
4375    |.  nop
4376    |
4377    |5:
4378    |  gettp AT, TMP2
4379    |  daddiu AT, AT, -LJ_TSTR
4380    |  bnez AT, ->vmeta_tsetv
4381    |.  nop
4382    |  b ->BC_TSETS_Z			// String key?
4383    |.  cleartp STR:RC, TMP2
4384    |
4385    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4386    |  barrierback TAB:RB, TMP3, TMP0, <2
4387    break;
4388  case BC_TSETS:
4389    |  // RA = src*8, RB = table*8, RC = str_const*8 (~)
4390    |  decode_RB8a RB, INS
4391    |  decode_RB8b RB
4392    |  daddu CARG2, BASE, RB
4393    |   decode_RC8a RC, INS
4394    |    ld TAB:RB, 0(CARG2)
4395    |   decode_RC8b RC
4396    |   dsubu CARG3, KBASE, RC
4397    |   ld RC, -8(CARG3)		// KBASE-8-str_const*8
4398    |  daddu RA, BASE, RA
4399    |   cleartp STR:RC
4400    |  checktab TAB:RB, ->vmeta_tsets1
4401    |->BC_TSETS_Z:
4402    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
4403    |  lw TMP0, TAB:RB->hmask
4404    |   lw TMP1, STR:RC->sid
4405    |    ld NODE:TMP2, TAB:RB->node
4406    |   sb r0, TAB:RB->nomm		// Clear metamethod cache.
4407    |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
4408    |  sll TMP0, TMP1, 5
4409    |  sll TMP1, TMP1, 3
4410    |  subu TMP1, TMP0, TMP1
4411    |   li TMP3, LJ_TSTR
4412    |  daddu NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
4413    |   settp STR:RC, TMP3		// Tagged key to look for.
4414    |.if FPU
4415    |   ldc1 FTMP0, 0(RA)
4416    |.else
4417    |   ld CRET1, 0(RA)
4418    |.endif
4419    |1:
4420    |  ld TMP0, NODE:TMP2->key
4421    |   ld CARG2, NODE:TMP2->val
4422    |    ld NODE:TMP1, NODE:TMP2->next
4423    |  bne TMP0, RC, >5
4424    |.    lbu TMP3, TAB:RB->marked
4425    |   beq CARG2, TISNIL, >4		// Key found, but nil value?
4426    |.   ld TAB:TMP0, TAB:RB->metatable
4427    |2:
4428    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
4429    |  bnez AT, >7
4430    |.if FPU
4431    |.  sdc1 FTMP0, NODE:TMP2->val
4432    |.else
4433    |.  sd CRET1, NODE:TMP2->val
4434    |.endif
4435    |3:
4436    |  ins_next
4437    |
4438    |4:  // Check for __newindex if previous value is nil.
4439    |  beqz TAB:TMP0, <2		// No metatable: done.
4440    |.  nop
4441    |  lbu TMP0, TAB:TMP0->nomm
4442    |  andi TMP0, TMP0, 1<<MM_newindex
4443    |  bnez TMP0, <2			// 'no __newindex' flag set: done.
4444    |.  nop
4445    |  b ->vmeta_tsets
4446    |.  nop
4447    |
4448    |5:  // Follow hash chain.
4449    |  bnez NODE:TMP1, <1
4450    |.  move NODE:TMP2, NODE:TMP1
4451    |  // End of hash chain: key not found, add a new one
4452    |
4453    |  // But check for __newindex first.
4454    |  ld TAB:TMP2, TAB:RB->metatable
4455    |  beqz TAB:TMP2, >6		// No metatable: continue.
4456    |.  daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
4457    |  lbu TMP0, TAB:TMP2->nomm
4458    |  andi TMP0, TMP0, 1<<MM_newindex
4459    |  beqz TMP0, ->vmeta_tsets		// 'no __newindex' flag NOT set: check.
4460    |6:
4461    |  load_got lj_tab_newkey
4462    |  sd RC, 0(CARG3)
4463    |   sd BASE, L->base
4464    |  move CARG2, TAB:RB
4465    |   sd PC, SAVE_PC
4466    |  call_intern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k
4467    |.  move CARG1, L
4468    |  // Returns TValue *.
4469    |  ld BASE, L->base
4470    |.if FPU
4471    |  b <3				// No 2nd write barrier needed.
4472    |.  sdc1 FTMP0, 0(CRET1)
4473    |.else
4474    |  ld CARG1, 0(RA)
4475    |  b <3				// No 2nd write barrier needed.
4476    |.  sd CARG1, 0(CRET1)
4477    |.endif
4478    |
4479    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4480    |  barrierback TAB:RB, TMP3, TMP0, <3
4481    break;
4482  case BC_TSETB:
4483    |  // RA = src*8, RB = table*8, RC = index*8
4484    |  decode_RB8a RB, INS
4485    |  decode_RB8b RB
4486    |  daddu CARG2, BASE, RB
4487    |   decode_RDtoRC8 RC, RD
4488    |  ld TAB:RB, 0(CARG2)
4489    |   daddu RA, BASE, RA
4490    |  srl TMP0, RC, 3
4491    |  checktab RB, ->vmeta_tsetb
4492    |  lw TMP1, TAB:RB->asize
4493    |   ld TMP2, TAB:RB->array
4494    |  sltu AT, TMP0, TMP1
4495    |  beqz AT, ->vmeta_tsetb
4496    |.  daddu RC, TMP2, RC
4497    |  ld TMP1, 0(RC)
4498    |   lbu TMP3, TAB:RB->marked
4499    |  beq TMP1, TISNIL, >5
4500    |1:
4501    |.  ld CRET1, 0(RA)
4502    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
4503    |  bnez AT, >7
4504    |.   sd CRET1, 0(RC)
4505    |2:
4506    |  ins_next
4507    |
4508    |5:  // Check for __newindex if previous value is nil.
4509    |  ld TAB:TMP2, TAB:RB->metatable
4510    |  beqz TAB:TMP2, <1		// No metatable: done.
4511    |.  nop
4512    |  lbu TMP1, TAB:TMP2->nomm
4513    |  andi TMP1, TMP1, 1<<MM_newindex
4514    |  bnez TMP1, <1			// 'no __newindex' flag set: done.
4515    |.  nop
4516    |  b ->vmeta_tsetb			// Caveat: preserve TMP0 and CARG2!
4517    |.  nop
4518    |
4519    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4520    |  barrierback TAB:RB, TMP3, TMP0, <2
4521    break;
4522  case BC_TSETR:
4523    |  // RA = dst*8, RB = table*8, RC = key*8
4524    |  decode_RB8a RB, INS
4525    |  decode_RB8b RB
4526    |   decode_RDtoRC8 RC, RD
4527    |  daddu CARG1, BASE, RB
4528    |   daddu CARG3, BASE, RC
4529    |  ld TAB:CARG2, 0(CARG1)
4530    |   lw CARG3, LO(CARG3)
4531    |  cleartp TAB:CARG2
4532    |  lbu TMP3, TAB:CARG2->marked
4533    |   lw TMP0, TAB:CARG2->asize
4534    |    ld TMP1, TAB:CARG2->array
4535    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
4536    |  bnez AT, >7
4537    |.  daddu RA, BASE, RA
4538    |2:
4539    |  sltu AT, CARG3, TMP0
4540    |   sll TMP2, CARG3, 3
4541    |  beqz AT, ->vmeta_tsetr		// In array part?
4542    |.  daddu CRET1, TMP1, TMP2
4543    |->BC_TSETR_Z:
4544    |  ld CARG1, 0(RA)
4545    |  ins_next1
4546    |  sd CARG1, 0(CRET1)
4547    |  ins_next2
4548    |
4549    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4550    |  barrierback TAB:CARG2, TMP3, CRET1, <2
4551    break;
4552
4553  case BC_TSETM:
4554    |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
4555    |  daddu RA, BASE, RA
4556    |1:
4557    |   daddu TMP3, KBASE, RD
4558    |  ld TAB:CARG2, -8(RA)		// Guaranteed to be a table.
4559    |    addiu TMP0, MULTRES, -8
4560    |   lw TMP3, LO(TMP3)		// Integer constant is in lo-word.
4561    |    beqz TMP0, >4			// Nothing to copy?
4562    |.    srl CARG3, TMP0, 3
4563    |  cleartp CARG2
4564    |  addu CARG3, CARG3, TMP3
4565    |  lw TMP2, TAB:CARG2->asize
4566    |   sll TMP1, TMP3, 3
4567    |    lbu TMP3, TAB:CARG2->marked
4568    |   ld CARG1, TAB:CARG2->array
4569    |  sltu AT, TMP2, CARG3
4570    |  bnez AT, >5
4571    |.  daddu TMP2, RA, TMP0
4572    |   daddu TMP1, TMP1, CARG1
4573    |  andi TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
4574    |3:  // Copy result slots to table.
4575    |   ld CRET1, 0(RA)
4576    |    daddiu RA, RA, 8
4577    |  sltu AT, RA, TMP2
4578    |   sd CRET1, 0(TMP1)
4579    |  bnez AT, <3
4580    |.   daddiu TMP1, TMP1, 8
4581    |  bnez TMP0, >7
4582    |.  nop
4583    |4:
4584    |  ins_next
4585    |
4586    |5:  // Need to resize array part.
4587    |  load_got lj_tab_reasize
4588    |   sd BASE, L->base
4589    |   sd PC, SAVE_PC
4590    |  move BASE, RD
4591    |  call_intern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
4592    |.  move CARG1, L
4593    |  // Must not reallocate the stack.
4594    |  move RD, BASE
4595    |  b <1
4596    |.  ld BASE, L->base	// Reload BASE for lack of a saved register.
4597    |
4598    |7:  // Possible table write barrier for any value. Skip valiswhite check.
4599    |  barrierback TAB:CARG2, TMP3, TMP0, <4
4600    break;
4601
4602  /* -- Calls and vararg handling ----------------------------------------- */
4603
4604  case BC_CALLM:
4605    |  // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
4606    |  decode_RDtoRC8 NARGS8:RC, RD
4607    |  b ->BC_CALL_Z
4608    |.  addu NARGS8:RC, NARGS8:RC, MULTRES
4609    break;
4610  case BC_CALL:
4611    |  // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
4612    |  decode_RDtoRC8 NARGS8:RC, RD
4613    |->BC_CALL_Z:
4614    |  move TMP2, BASE
4615    |  daddu BASE, BASE, RA
4616    |   ld LFUNC:RB, 0(BASE)
4617    |   daddiu BASE, BASE, 16
4618    |  addiu NARGS8:RC, NARGS8:RC, -8
4619    |  checkfunc RB, ->vmeta_call
4620    |  ins_call
4621    break;
4622
4623  case BC_CALLMT:
4624    |  // RA = base*8, (RB = 0,) RC = extra_nargs*8
4625    |  addu NARGS8:RD, NARGS8:RD, MULTRES	// BC_CALLT gets RC from RD.
4626    |  // Fall through. Assumes BC_CALLT follows.
4627    break;
4628  case BC_CALLT:
4629    |  // RA = base*8, (RB = 0,) RC = (nargs+1)*8
4630    |  daddu RA, BASE, RA
4631    |  ld RB, 0(RA)
4632    |   move NARGS8:RC, RD
4633    |    ld TMP1, FRAME_PC(BASE)
4634    |   daddiu RA, RA, 16
4635    |  addiu NARGS8:RC, NARGS8:RC, -8
4636    |  checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
4637    |->BC_CALLT_Z:
4638    |  andi TMP0, TMP1, FRAME_TYPE	// Caveat: preserve TMP0 until the 'or'.
4639    |   lbu TMP3, LFUNC:CARG3->ffid
4640    |  bnez TMP0, >7
4641    |.  xori TMP2, TMP1, FRAME_VARG
4642    |1:
4643    |  sd RB, FRAME_FUNC(BASE)		// Copy function down, but keep PC.
4644    |  sltiu AT, TMP3, 2		// (> FF_C) Calling a fast function?
4645    |  move TMP2, BASE
4646    |  move RB, CARG3
4647    |  beqz NARGS8:RC, >3
4648    |.  move TMP3, NARGS8:RC
4649    |2:
4650    |   ld CRET1, 0(RA)
4651    |    daddiu RA, RA, 8
4652    |  addiu TMP3, TMP3, -8
4653    |   sd CRET1, 0(TMP2)
4654    |  bnez TMP3, <2
4655    |.   daddiu TMP2, TMP2, 8
4656    |3:
4657    |  or TMP0, TMP0, AT
4658    |  beqz TMP0, >5
4659    |.  nop
4660    |4:
4661    |  ins_callt
4662    |
4663    |5:  // Tailcall to a fast function with a Lua frame below.
4664    |  lw INS, -4(TMP1)
4665    |  decode_RA8a RA, INS
4666    |  decode_RA8b RA
4667    |  dsubu TMP1, BASE, RA
4668    |  ld TMP1, -32(TMP1)
4669    |  cleartp LFUNC:TMP1
4670    |  ld TMP1, LFUNC:TMP1->pc
4671    |  b <4
4672    |.  ld KBASE, PC2PROTO(k)(TMP1)	// Need to prepare KBASE.
4673    |
4674    |7:  // Tailcall from a vararg function.
4675    |  andi AT, TMP2, FRAME_TYPEP
4676    |  bnez AT, <1			// Vararg frame below?
4677    |.  dsubu TMP2, BASE, TMP2		// Relocate BASE down.
4678    |  move BASE, TMP2
4679    |  ld TMP1, FRAME_PC(TMP2)
4680    |  b <1
4681    |.  andi TMP0, TMP1, FRAME_TYPE
4682    break;
4683
4684  case BC_ITERC:
4685    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
4686    |  move TMP2, BASE			// Save old BASE fir vmeta_call.
4687    |  daddu BASE, BASE, RA
4688    |  ld RB, -24(BASE)
4689    |   ld CARG1, -16(BASE)
4690    |    ld CARG2, -8(BASE)
4691    |  li NARGS8:RC, 16			// Iterators get 2 arguments.
4692    |  sd RB, 0(BASE)			// Copy callable.
4693    |   sd CARG1, 16(BASE)		// Copy state.
4694    |    sd CARG2, 24(BASE)		// Copy control var.
4695    |   daddiu BASE, BASE, 16
4696    |  checkfunc RB, ->vmeta_call
4697    |  ins_call
4698    break;
4699
4700  case BC_ITERN:
4701    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
4702    |.if JIT
4703    |  // NYI: add hotloop, record BC_ITERN.
4704    |.endif
4705    |  daddu RA, BASE, RA
4706    |  ld TAB:RB, -16(RA)
4707    |   lw RC, -8+LO(RA)		// Get index from control var.
4708    |  cleartp TAB:RB
4709    |   daddiu PC, PC, 4
4710    |  lw TMP0, TAB:RB->asize
4711    |   ld TMP1, TAB:RB->array
4712    |  dsll CARG3, TISNUM, 47
4713    |1:  // Traverse array part.
4714    |  sltu AT, RC, TMP0
4715    |  beqz AT, >5			// Index points after array part?
4716    |.  sll TMP3, RC, 3
4717    |  daddu TMP3, TMP1, TMP3
4718    |  ld CARG1, 0(TMP3)
4719    |     lhu RD, -4+OFS_RD(PC)
4720    |   or TMP2, RC, CARG3
4721    |  beq CARG1, TISNIL, <1		// Skip holes in array part.
4722    |.  addiu RC, RC, 1
4723    |   sd TMP2, 0(RA)
4724    |  sd CARG1, 8(RA)
4725    |   or TMP0, RC, CARG3
4726    |     lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4727    |     decode_RD4b RD
4728    |     daddu RD, RD, TMP3
4729    |   sw TMP0, -8+LO(RA)		// Update control var.
4730    |     daddu PC, PC, RD
4731    |3:
4732    |  ins_next
4733    |
4734    |5:  // Traverse hash part.
4735    |  lw TMP1, TAB:RB->hmask
4736    |  subu RC, RC, TMP0
4737    |   ld TMP2, TAB:RB->node
4738    |6:
4739    |  sltu AT, TMP1, RC		// End of iteration? Branch to ITERL+1.
4740    |  bnez AT, <3
4741    |.  sll TMP3, RC, 5
4742    |   sll RB, RC, 3
4743    |   subu TMP3, TMP3, RB
4744    |  daddu NODE:TMP3, TMP3, TMP2
4745    |  ld CARG1, 0(NODE:TMP3)
4746    |     lhu RD, -4+OFS_RD(PC)
4747    |  beq CARG1, TISNIL, <6		// Skip holes in hash part.
4748    |.  addiu RC, RC, 1
4749    |  ld CARG2, NODE:TMP3->key
4750    |     lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4751    |  sd CARG1, 8(RA)
4752    |    addu RC, RC, TMP0
4753    |     decode_RD4b RD
4754    |     addu RD, RD, TMP3
4755    |  sd CARG2, 0(RA)
4756    |     daddu PC, PC, RD
4757    |  b <3
4758    |.  sw RC, -8+LO(RA)		// Update control var.
4759    break;
4760
4761  case BC_ISNEXT:
4762    |  // RA = base*8, RD = target (points to ITERN)
4763    |  daddu RA, BASE, RA
4764    |    srl TMP0, RD, 1
4765    |  ld CFUNC:CARG1, -24(RA)
4766    |    daddu TMP0, PC, TMP0
4767    |   ld CARG2, -16(RA)
4768    |   ld CARG3, -8(RA)
4769    |    lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4770    |  checkfunc CFUNC:CARG1, >5
4771    |  gettp CARG2, CARG2
4772    |  daddiu CARG2, CARG2, -LJ_TTAB
4773    |  lbu TMP1, CFUNC:CARG1->ffid
4774    |  daddiu CARG3, CARG3, -LJ_TNIL
4775    |  or AT, CARG2, CARG3
4776    |  daddiu TMP1, TMP1, -FF_next_N
4777    |  or AT, AT, TMP1
4778    |  bnez AT, >5
4779    |.  lui TMP1, 0xfffe
4780    |  daddu PC, TMP0, TMP2
4781    |  ori TMP1, TMP1, 0x7fff
4782    |  dsll TMP1, TMP1, 32
4783    |  sd TMP1, -8(RA)
4784    |1:
4785    |  ins_next
4786    |5:  // Despecialize bytecode if any of the checks fail.
4787    |  li TMP3, BC_JMP
4788    |   li TMP1, BC_ITERC
4789    |  sb TMP3, -4+OFS_OP(PC)
4790    |   daddu PC, TMP0, TMP2
4791    |  b <1
4792    |.  sb TMP1, OFS_OP(PC)
4793    break;
4794
4795  case BC_VARG:
4796    |  // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
4797    |  ld TMP0, FRAME_PC(BASE)
4798    |  decode_RDtoRC8 RC, RD
4799    |   decode_RB8a RB, INS
4800    |  daddu RC, BASE, RC
4801    |   decode_RB8b RB
4802    |   daddu RA, BASE, RA
4803    |  daddiu RC, RC, FRAME_VARG
4804    |   daddu TMP2, RA, RB
4805    |  daddiu TMP3, BASE, -16		// TMP3 = vtop
4806    |  dsubu RC, RC, TMP0		// RC = vbase
4807    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
4808    |  beqz RB, >5			// Copy all varargs?
4809    |.  dsubu TMP1, TMP3, RC
4810    |  daddiu TMP2, TMP2, -16
4811    |1:  // Copy vararg slots to destination slots.
4812    |  ld CARG1, 0(RC)
4813    |  sltu AT, RC, TMP3
4814    |    daddiu RC, RC, 8
4815    |.if MIPSR6
4816    |  selnez CARG1, CARG1, AT
4817    |  seleqz AT, TISNIL, AT
4818    |  or CARG1, CARG1, AT
4819    |.else
4820    |  movz CARG1, TISNIL, AT
4821    |.endif
4822    |  sd CARG1, 0(RA)
4823    |  sltu AT, RA, TMP2
4824    |  bnez AT, <1
4825    |.   daddiu RA, RA, 8
4826    |3:
4827    |  ins_next
4828    |
4829    |5:  // Copy all varargs.
4830    |  ld TMP0, L->maxstack
4831    |  blez TMP1, <3			// No vararg slots?
4832    |.  li MULTRES, 8			// MULTRES = (0+1)*8
4833    |  daddu TMP2, RA, TMP1
4834    |  sltu AT, TMP0, TMP2
4835    |  bnez AT, >7
4836    |.  daddiu MULTRES, TMP1, 8
4837    |6:
4838    |  ld CRET1, 0(RC)
4839    |   daddiu RC, RC, 8
4840    |  sd CRET1, 0(RA)
4841    |  sltu AT, RC, TMP3
4842    |  bnez AT, <6			// More vararg slots?
4843    |.  daddiu RA, RA, 8
4844    |  b <3
4845    |.  nop
4846    |
4847    |7:  // Grow stack for varargs.
4848    |  load_got lj_state_growstack
4849    |   sd RA, L->top
4850    |  dsubu RA, RA, BASE
4851    |   sd BASE, L->base
4852    |  dsubu BASE, RC, BASE		// Need delta, because BASE may change.
4853    |   sd PC, SAVE_PC
4854    |  srl CARG2, TMP1, 3
4855    |  call_intern lj_state_growstack	// (lua_State *L, int n)
4856    |.  move CARG1, L
4857    |  move RC, BASE
4858    |  ld BASE, L->base
4859    |  daddu RA, BASE, RA
4860    |  daddu RC, BASE, RC
4861    |  b <6
4862    |.  daddiu TMP3, BASE, -16
4863    break;
4864
4865  /* -- Returns ----------------------------------------------------------- */
4866
4867  case BC_RETM:
4868    |  // RA = results*8, RD = extra_nresults*8
4869    |  addu RD, RD, MULTRES		// MULTRES >= 8, so RD >= 8.
4870    |  // Fall through. Assumes BC_RET follows.
4871    break;
4872
4873  case BC_RET:
4874    |  // RA = results*8, RD = (nresults+1)*8
4875    |  ld PC, FRAME_PC(BASE)
4876    |   daddu RA, BASE, RA
4877    |    move MULTRES, RD
4878    |1:
4879    |  andi TMP0, PC, FRAME_TYPE
4880    |  bnez TMP0, ->BC_RETV_Z
4881    |.  xori TMP1, PC, FRAME_VARG
4882    |
4883    |->BC_RET_Z:
4884    |  // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
4885    |   lw INS, -4(PC)
4886    |    daddiu TMP2, BASE, -16
4887    |    daddiu RC, RD, -8
4888    |  decode_RA8a TMP0, INS
4889    |   decode_RB8a RB, INS
4890    |  decode_RA8b TMP0
4891    |   decode_RB8b RB
4892    |   daddu TMP3, TMP2, RB
4893    |  beqz RC, >3
4894    |.  dsubu BASE, TMP2, TMP0
4895    |2:
4896    |   ld CRET1, 0(RA)
4897    |    daddiu RA, RA, 8
4898    |  daddiu RC, RC, -8
4899    |   sd CRET1, 0(TMP2)
4900    |  bnez RC, <2
4901    |.   daddiu TMP2, TMP2, 8
4902    |3:
4903    |  daddiu TMP3, TMP3, -8
4904    |5:
4905    |  sltu AT, TMP2, TMP3
4906    |  bnez AT, >6
4907    |.  ld LFUNC:TMP1, FRAME_FUNC(BASE)
4908    |  ins_next1
4909    |  cleartp LFUNC:TMP1
4910    |  ld TMP1, LFUNC:TMP1->pc
4911    |  ld KBASE, PC2PROTO(k)(TMP1)
4912    |  ins_next2
4913    |
4914    |6:  // Fill up results with nil.
4915    |  sd TISNIL, 0(TMP2)
4916    |  b <5
4917    |.  daddiu TMP2, TMP2, 8
4918    |
4919    |->BC_RETV_Z:  // Non-standard return case.
4920    |  andi TMP2, TMP1, FRAME_TYPEP
4921    |  bnez TMP2, ->vm_return
4922    |.  nop
4923    |  // Return from vararg function: relocate BASE down.
4924    |  dsubu BASE, BASE, TMP1
4925    |  b <1
4926    |.  ld PC, FRAME_PC(BASE)
4927    break;
4928
4929  case BC_RET0: case BC_RET1:
4930    |  // RA = results*8, RD = (nresults+1)*8
4931    |  ld PC, FRAME_PC(BASE)
4932    |   daddu RA, BASE, RA
4933    |    move MULTRES, RD
4934    |  andi TMP0, PC, FRAME_TYPE
4935    |  bnez TMP0, ->BC_RETV_Z
4936    |.  xori TMP1, PC, FRAME_VARG
4937    |  lw INS, -4(PC)
4938    |   daddiu TMP2, BASE, -16
4939    if (op == BC_RET1) {
4940      |  ld CRET1, 0(RA)
4941    }
4942    |  decode_RB8a RB, INS
4943    |   decode_RA8a RA, INS
4944    |  decode_RB8b RB
4945    |   decode_RA8b RA
4946    |   dsubu BASE, TMP2, RA
4947    if (op == BC_RET1) {
4948      |  sd CRET1, 0(TMP2)
4949    }
4950    |5:
4951    |  sltu AT, RD, RB
4952    |  bnez AT, >6
4953    |.  ld TMP1, FRAME_FUNC(BASE)
4954    |  ins_next1
4955    |  cleartp LFUNC:TMP1
4956    |  ld TMP1, LFUNC:TMP1->pc
4957    |  ld KBASE, PC2PROTO(k)(TMP1)
4958    |  ins_next2
4959    |
4960    |6:  // Fill up results with nil.
4961    |  daddiu TMP2, TMP2, 8
4962    |  daddiu RD, RD, 8
4963    |  b <5
4964    if (op == BC_RET1) {
4965      |.  sd TISNIL, 0(TMP2)
4966    } else {
4967      |.  sd TISNIL, -8(TMP2)
4968    }
4969    break;
4970
4971  /* -- Loops and branches ------------------------------------------------ */
4972
4973  case BC_FORL:
4974    |.if JIT
4975    |  hotloop
4976    |.endif
4977    |  // Fall through. Assumes BC_IFORL follows.
4978    break;
4979
4980  case BC_JFORI:
4981  case BC_JFORL:
4982#if !LJ_HASJIT
4983    break;
4984#endif
4985  case BC_FORI:
4986  case BC_IFORL:
4987    |  // RA = base*8, RD = target (after end of loop or start of loop)
4988    vk = (op == BC_IFORL || op == BC_JFORL);
4989    |  daddu RA, BASE, RA
4990    |  ld CARG1, FORL_IDX*8(RA)		// IDX CARG1 - CARG3 type
4991    |  gettp CARG3, CARG1
4992    if (op != BC_JFORL) {
4993      |  srl RD, RD, 1
4994      |  lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4995      |  daddu TMP2, RD, TMP2
4996    }
4997    if (!vk) {
4998      |  ld CARG2, FORL_STOP*8(RA)	// STOP CARG2 - CARG4 type
4999      |  ld CRET1, FORL_STEP*8(RA)	// STEP CRET1 - CRET2 type
5000      |  gettp CARG4, CARG2
5001      |  bne CARG3, TISNUM, >5
5002      |.  gettp CRET2, CRET1
5003      |  bne CARG4, TISNUM, ->vmeta_for
5004      |.  sextw CARG3, CARG1
5005      |  bne CRET2, TISNUM, ->vmeta_for
5006      |.  sextw CARG2, CARG2
5007      |  dext AT, CRET1, 31, 0
5008      |  slt CRET1, CARG2, CARG3
5009      |  slt TMP1, CARG3, CARG2
5010      |.if MIPSR6
5011      |  selnez TMP1, TMP1, AT
5012      |  seleqz CRET1, CRET1, AT
5013      |  or CRET1, CRET1, TMP1
5014      |.else
5015      |  movn CRET1, TMP1, AT
5016      |.endif
5017    } else {
5018      |  bne CARG3, TISNUM, >5
5019      |.  ld CARG2, FORL_STEP*8(RA)	// STEP CARG2 - CARG4 type
5020      |    ld CRET1, FORL_STOP*8(RA)	// STOP CRET1 - CRET2 type
5021      |  sextw TMP3, CARG1
5022      |   sextw CARG2, CARG2
5023      |    sextw CRET1, CRET1
5024      |  addu CARG1, TMP3, CARG2
5025      |  xor TMP0, CARG1, TMP3
5026      |  xor TMP1, CARG1, CARG2
5027      |  and TMP0, TMP0, TMP1
5028      |  slt TMP1, CARG1, CRET1
5029      |  slt CRET1, CRET1, CARG1
5030      |  slt AT, CARG2, r0
5031      |   slt TMP0, TMP0, r0		// ((y^a) & (y^b)) < 0: overflow.
5032      |.if MIPSR6
5033      |  selnez TMP1, TMP1, AT
5034      |  seleqz CRET1, CRET1, AT
5035      |  or CRET1, CRET1, TMP1
5036      |.else
5037      |  movn CRET1, TMP1, AT
5038      |.endif
5039      |   or CRET1, CRET1, TMP0
5040      |  zextw CARG1, CARG1
5041      |  settp CARG1, TISNUM
5042    }
5043    |1:
5044    if (op == BC_FORI) {
5045      |.if MIPSR6
5046      |  selnez TMP2, TMP2, CRET1
5047      |.else
5048      |  movz TMP2, r0, CRET1
5049      |.endif
5050      |  daddu PC, PC, TMP2
5051    } else if (op == BC_JFORI) {
5052      |  daddu PC, PC, TMP2
5053      |  lhu RD, -4+OFS_RD(PC)
5054    } else if (op == BC_IFORL) {
5055      |.if MIPSR6
5056      |  seleqz TMP2, TMP2, CRET1
5057      |.else
5058      |  movn TMP2, r0, CRET1
5059      |.endif
5060      |  daddu PC, PC, TMP2
5061    }
5062    if (vk) {
5063      |  sd CARG1, FORL_IDX*8(RA)
5064    }
5065    |  ins_next1
5066    |  sd CARG1, FORL_EXT*8(RA)
5067    |2:
5068    if (op == BC_JFORI) {
5069      |  beqz CRET1, =>BC_JLOOP
5070      |.  decode_RD8b RD
5071    } else if (op == BC_JFORL) {
5072      |  beqz CRET1, =>BC_JLOOP
5073    }
5074    |  ins_next2
5075    |
5076    |5:  // FP loop.
5077    |.if FPU
5078    if (!vk) {
5079      |  ldc1 f0, FORL_IDX*8(RA)
5080      |   ldc1 f2, FORL_STOP*8(RA)
5081      |  sltiu TMP0, CARG3, LJ_TISNUM
5082      |  sltiu TMP1, CARG4, LJ_TISNUM
5083      |  sltiu AT, CRET2, LJ_TISNUM
5084      |   ld TMP3, FORL_STEP*8(RA)
5085      |  and TMP0, TMP0, TMP1
5086      |  and AT, AT, TMP0
5087      |  beqz AT, ->vmeta_for
5088      |.  slt TMP3, TMP3, r0
5089      |.if MIPSR6
5090      |   dmtc1 TMP3, FTMP2
5091      |  cmp.lt.d FTMP0, f0, f2
5092      |  cmp.lt.d FTMP1, f2, f0
5093      |  sel.d FTMP2, FTMP1, FTMP0
5094      |  b <1
5095      |.  dmfc1 CRET1, FTMP2
5096      |.else
5097      |  c.ole.d 0, f0, f2
5098      |  c.ole.d 1, f2, f0
5099      |  li CRET1, 1
5100      |  movt CRET1, r0, 0
5101      |  movt AT, r0, 1
5102      |  b <1
5103      |.  movn CRET1, AT, TMP3
5104      |.endif
5105    } else {
5106      |  ldc1 f0, FORL_IDX*8(RA)
5107      |   ldc1 f4, FORL_STEP*8(RA)
5108      |    ldc1 f2, FORL_STOP*8(RA)
5109      |   ld TMP3, FORL_STEP*8(RA)
5110      |  add.d f0, f0, f4
5111      |.if MIPSR6
5112      |   slt TMP3, TMP3, r0
5113      |   dmtc1 TMP3, FTMP2
5114      |  cmp.lt.d FTMP0, f0, f2
5115      |  cmp.lt.d FTMP1, f2, f0
5116      |  sel.d FTMP2, FTMP1, FTMP0
5117      |  dmfc1 CRET1, FTMP2
5118      if (op == BC_IFORL) {
5119	|  seleqz TMP2, TMP2, CRET1
5120	|  daddu PC, PC, TMP2
5121      }
5122      |.else
5123      |  c.ole.d 0, f0, f2
5124      |  c.ole.d 1, f2, f0
5125      |   slt TMP3, TMP3, r0
5126      |  li CRET1, 1
5127      |  li AT, 1
5128      |  movt CRET1, r0, 0
5129      |  movt AT, r0, 1
5130      |  movn CRET1, AT, TMP3
5131      if (op == BC_IFORL) {
5132	|  movn TMP2, r0, CRET1
5133	|  daddu PC, PC, TMP2
5134      }
5135      |.endif
5136      |  sdc1 f0, FORL_IDX*8(RA)
5137      |  ins_next1
5138      |  b <2
5139      |.  sdc1 f0, FORL_EXT*8(RA)
5140    }
5141    |.else
5142    if (!vk) {
5143      |  sltiu TMP0, CARG3, LJ_TISNUM
5144      |  sltiu TMP1, CARG4, LJ_TISNUM
5145      |  sltiu AT, CRET2, LJ_TISNUM
5146      |  and TMP0, TMP0, TMP1
5147      |  and AT, AT, TMP0
5148      |  beqz AT, ->vmeta_for
5149      |.  nop
5150      |  bal ->vm_sfcmpolex
5151      |.  lw TMP3, FORL_STEP*8+HI(RA)
5152      |  b <1
5153      |.  nop
5154    } else {
5155      |  load_got __adddf3
5156      |  call_extern
5157      |.  sw TMP2, TMPD
5158      |  ld CARG2, FORL_STOP*8(RA)
5159      |  move CARG1, CRET1
5160      if ( op == BC_JFORL ) {
5161	|  lhu RD, -4+OFS_RD(PC)
5162	|  decode_RD8b RD
5163      }
5164      |  bal ->vm_sfcmpolex
5165      |.  lw TMP3, FORL_STEP*8+HI(RA)
5166      |  b <1
5167      |.  lw TMP2, TMPD
5168    }
5169    |.endif
5170    break;
5171
5172  case BC_ITERL:
5173    |.if JIT
5174    |  hotloop
5175    |.endif
5176    |  // Fall through. Assumes BC_IITERL follows.
5177    break;
5178
5179  case BC_JITERL:
5180#if !LJ_HASJIT
5181    break;
5182#endif
5183  case BC_IITERL:
5184    |  // RA = base*8, RD = target
5185    |  daddu RA, BASE, RA
5186    |  ld TMP1, 0(RA)
5187    |  beq TMP1, TISNIL, >1		// Stop if iterator returned nil.
5188    |.  nop
5189    if (op == BC_JITERL) {
5190      |  b =>BC_JLOOP
5191      |.  sd TMP1, -8(RA)
5192    } else {
5193      |  branch_RD			// Otherwise save control var + branch.
5194      |  sd TMP1, -8(RA)
5195    }
5196    |1:
5197    |  ins_next
5198    break;
5199
5200  case BC_LOOP:
5201    |  // RA = base*8, RD = target (loop extent)
5202    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
5203    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5204    |.if JIT
5205    |  hotloop
5206    |.endif
5207    |  // Fall through. Assumes BC_ILOOP follows.
5208    break;
5209
5210  case BC_ILOOP:
5211    |  // RA = base*8, RD = target (loop extent)
5212    |  ins_next
5213    break;
5214
5215  case BC_JLOOP:
5216    |.if JIT
5217    |  // RA = base*8 (ignored), RD = traceno*8
5218    |  ld TMP1, DISPATCH_J(trace)(DISPATCH)
5219    |   li AT, 0
5220    |  daddu TMP1, TMP1, RD
5221    |  // Traces on MIPS don't store the trace number, so use 0.
5222    |   sd AT, DISPATCH_GL(vmstate)(DISPATCH)
5223    |  ld TRACE:TMP2, 0(TMP1)
5224    |   sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
5225    |  ld TMP2, TRACE:TMP2->mcode
5226    |   sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
5227    |  jr TMP2
5228    |.  daddiu JGL, DISPATCH, GG_DISP2G+32768
5229    |.endif
5230    break;
5231
5232  case BC_JMP:
5233    |  // RA = base*8 (only used by trace recorder), RD = target
5234    |  branch_RD
5235    |  ins_next
5236    break;
5237
5238  /* -- Function headers -------------------------------------------------- */
5239
5240  case BC_FUNCF:
5241    |.if JIT
5242    |  hotcall
5243    |.endif
5244  case BC_FUNCV:  /* NYI: compiled vararg functions. */
5245    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
5246    break;
5247
5248  case BC_JFUNCF:
5249#if !LJ_HASJIT
5250    break;
5251#endif
5252  case BC_IFUNCF:
5253    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5254    |  ld TMP2, L->maxstack
5255    |   lbu TMP1, -4+PC2PROTO(numparams)(PC)
5256    |    ld KBASE, -4+PC2PROTO(k)(PC)
5257    |  sltu AT, TMP2, RA
5258    |  bnez AT, ->vm_growstack_l
5259    |.  sll TMP1, TMP1, 3
5260    if (op != BC_JFUNCF) {
5261      |  ins_next1
5262    }
5263    |2:
5264    |  sltu AT, NARGS8:RC, TMP1		// Check for missing parameters.
5265    |  bnez AT, >3
5266    |.  daddu AT, BASE, NARGS8:RC
5267    if (op == BC_JFUNCF) {
5268      |  decode_RD8a RD, INS
5269      |  b =>BC_JLOOP
5270      |.  decode_RD8b RD
5271    } else {
5272      |  ins_next2
5273    }
5274    |
5275    |3:  // Clear missing parameters.
5276    |  sd TISNIL, 0(AT)
5277    |  b <2
5278    |.  addiu NARGS8:RC, NARGS8:RC, 8
5279    break;
5280
5281  case BC_JFUNCV:
5282#if !LJ_HASJIT
5283    break;
5284#endif
5285    |  NYI  // NYI: compiled vararg functions
5286    break;  /* NYI: compiled vararg functions. */
5287
5288  case BC_IFUNCV:
5289    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5290    |   li TMP0, LJ_TFUNC
5291    |   daddu TMP1, BASE, RC
5292    |  ld TMP2, L->maxstack
5293    |   settp LFUNC:RB, TMP0
5294    |  daddu TMP0, RA, RC
5295    |   sd LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
5296    |   daddiu TMP3, RC, 16+FRAME_VARG
5297    |  sltu AT, TMP0, TMP2
5298    |    ld KBASE, -4+PC2PROTO(k)(PC)
5299    |  beqz AT, ->vm_growstack_l
5300    |.  sd TMP3, 8(TMP1)		// Store delta + FRAME_VARG.
5301    |  lbu TMP2, -4+PC2PROTO(numparams)(PC)
5302    |   move RA, BASE
5303    |   move RC, TMP1
5304    |  ins_next1
5305    |  beqz TMP2, >3
5306    |.  daddiu BASE, TMP1, 16
5307    |1:
5308    |  ld TMP0, 0(RA)
5309    |  sltu AT, RA, RC			// Less args than parameters?
5310    |  move CARG1, TMP0
5311    |.if MIPSR6
5312    |  selnez TMP0, TMP0, AT
5313    |  seleqz TMP3, TISNIL, AT
5314    |  or TMP0, TMP0, TMP3
5315    |  seleqz TMP3, CARG1, AT
5316    |  selnez CARG1, TISNIL, AT
5317    |  or CARG1, CARG1, TMP3
5318    |.else
5319    |  movz TMP0, TISNIL, AT		// Clear missing parameters.
5320    |  movn CARG1, TISNIL, AT		// Clear old fixarg slot (help the GC).
5321    |.endif
5322    |    addiu TMP2, TMP2, -1
5323    |  sd TMP0, 16(TMP1)
5324    |    daddiu TMP1, TMP1, 8
5325    |  sd CARG1, 0(RA)
5326    |  bnez TMP2, <1
5327    |.   daddiu RA, RA, 8
5328    |3:
5329    |  ins_next2
5330    break;
5331
5332  case BC_FUNCC:
5333  case BC_FUNCCW:
5334    |  // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
5335    if (op == BC_FUNCC) {
5336      |  ld CFUNCADDR, CFUNC:RB->f
5337    } else {
5338      |  ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH)
5339    }
5340    |  daddu TMP1, RA, NARGS8:RC
5341    |  ld TMP2, L->maxstack
5342    |   daddu RC, BASE, NARGS8:RC
5343    |  sd BASE, L->base
5344    |  sltu AT, TMP2, TMP1
5345    |   sd RC, L->top
5346    |    li_vmstate C
5347    if (op == BC_FUNCCW) {
5348      |  ld CARG2, CFUNC:RB->f
5349    }
5350    |  bnez AT, ->vm_growstack_c	// Need to grow stack.
5351    |.  move CARG1, L
5352    |  jalr CFUNCADDR			// (lua_State *L [, lua_CFunction f])
5353    |.   st_vmstate
5354    |  // Returns nresults.
5355    |  ld BASE, L->base
5356    |   sll RD, CRET1, 3
5357    |  ld TMP1, L->top
5358    |    li_vmstate INTERP
5359    |  ld PC, FRAME_PC(BASE)		// Fetch PC of caller.
5360    |   dsubu RA, TMP1, RD		// RA = L->top - nresults*8
5361    |    sd L, DISPATCH_GL(cur_L)(DISPATCH)
5362    |  b ->vm_returnc
5363    |.   st_vmstate
5364    break;
5365
5366  /* ---------------------------------------------------------------------- */
5367
5368  default:
5369    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5370    exit(2);
5371    break;
5372  }
5373}
5374
5375static int build_backend(BuildCtx *ctx)
5376{
5377  int op;
5378
5379  dasm_growpc(Dst, BC__MAX);
5380
5381  build_subroutines(ctx);
5382
5383  |.code_op
5384  for (op = 0; op < BC__MAX; op++)
5385    build_ins(ctx, (BCOp)op, op);
5386
5387  return BC__MAX;
5388}
5389
5390/* Emit pseudo frame-info for all assembler functions. */
5391static void emit_asm_debug(BuildCtx *ctx)
5392{
5393  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5394  int i;
5395  switch (ctx->mode) {
5396  case BUILD_elfasm:
5397    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5398    fprintf(ctx->fp,
5399	".Lframe0:\n"
5400	"\t.4byte .LECIE0-.LSCIE0\n"
5401	".LSCIE0:\n"
5402	"\t.4byte 0xffffffff\n"
5403	"\t.byte 0x1\n"
5404	"\t.string \"\"\n"
5405	"\t.uleb128 0x1\n"
5406	"\t.sleb128 -4\n"
5407	"\t.byte 31\n"
5408	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
5409	"\t.align 2\n"
5410	".LECIE0:\n\n");
5411    fprintf(ctx->fp,
5412	".LSFDE0:\n"
5413	"\t.4byte .LEFDE0-.LASFDE0\n"
5414	".LASFDE0:\n"
5415	"\t.4byte .Lframe0\n"
5416	"\t.8byte .Lbegin\n"
5417	"\t.8byte %d\n"
5418	"\t.byte 0xe\n\t.uleb128 %d\n"
5419	"\t.byte 0x9f\n\t.sleb128 2*5\n"
5420	"\t.byte 0x9e\n\t.sleb128 2*6\n",
5421	fcofs, CFRAME_SIZE);
5422    for (i = 23; i >= 16; i--)
5423      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
5424#if !LJ_SOFTFP
5425    for (i = 31; i >= 24; i--)
5426      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
5427#endif
5428    fprintf(ctx->fp,
5429	"\t.align 2\n"
5430	".LEFDE0:\n\n");
5431#if LJ_HASFFI
5432    fprintf(ctx->fp,
5433	".LSFDE1:\n"
5434	"\t.4byte .LEFDE1-.LASFDE1\n"
5435	".LASFDE1:\n"
5436	"\t.4byte .Lframe0\n"
5437	"\t.4byte lj_vm_ffi_call\n"
5438	"\t.4byte %d\n"
5439	"\t.byte 0x9f\n\t.uleb128 2*1\n"
5440	"\t.byte 0x90\n\t.uleb128 2*2\n"
5441	"\t.byte 0xd\n\t.uleb128 0x10\n"
5442	"\t.align 2\n"
5443	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5444#endif
5445#if !LJ_NO_UNWIND
5446    /* NYI */
5447#endif
5448    break;
5449  default:
5450    break;
5451  }
5452}
5453
5454