1|// Low-level VM code for ARM64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch arm64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|//       The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
19|//
20|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
21|// x18 is reserved on most platforms. Don't use it, save it or restore it.
22|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
23|// depending on the instruction.
24|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
25|//
26|// x0-x7/v0-v7 hold parameters and results.
27|
28|// Fixed register assignments for the interpreter.
29|
30|// The following must be C callee-save.
31|.define BASE,		x19	// Base of current Lua stack frame.
32|.define KBASE,		x20	// Constants of current Lua function.
33|.define PC,		x21	// Next PC.
34|.define GLREG,		x22	// Global state.
35|.define LREG,		x23	// Register holding lua_State (also in SAVE_L).
36|.define TISNUM,	x24	// Constant LJ_TISNUM << 47.
37|.define TISNUMhi,	x25	// Constant LJ_TISNUM << 15.
38|.define TISNIL,	x26	// Constant -1LL.
39|.define fp,		x29	// Yes, we have to maintain a frame pointer.
40|
41|.define ST_INTERP,	w26	// Constant -1.
42|
43|// The following temporaries are not saved across C calls, except for RA/RC.
44|.define RA,		x27
45|.define RC,		x28
46|.define RB,		x17
47|.define RAw,		w27
48|.define RCw,		w28
49|.define RBw,		w17
50|.define INS,		x16
51|.define INSw,		w16
52|.define ITYPE,		x15
53|.define TMP0,		x8
54|.define TMP1,		x9
55|.define TMP2,		x10
56|.define TMP3,		x11
57|.define TMP0w,		w8
58|.define TMP1w,		w9
59|.define TMP2w,		w10
60|.define TMP3w,		w11
61|
62|// Calling conventions. Also used as temporaries.
63|.define CARG1,		x0
64|.define CARG2,		x1
65|.define CARG3,		x2
66|.define CARG4,		x3
67|.define CARG5,		x4
68|.define CARG1w,	w0
69|.define CARG2w,	w1
70|.define CARG3w,	w2
71|.define CARG4w,	w3
72|.define CARG5w,	w4
73|
74|.define FARG1,		d0
75|.define FARG2,		d1
76|
77|.define CRET1,		x0
78|.define CRET1w,	w0
79|
80|// Stack layout while in interpreter. Must match with lj_frame.h.
81|
82|.define CFRAME_SPACE,	208
83|//----- 16 byte aligned, <-- sp entering interpreter
84|.define SAVE_FP_LR_,	192
85|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
86|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
87|// Unused		[sp, #44]	// 32 bit values
88|.define SAVE_NRES,	[sp, #40]
89|.define SAVE_ERRF,	[sp, #36]
90|.define SAVE_MULTRES,	[sp, #32]
91|.define TMPD,		[sp, #24]	// 64 bit values
92|.define SAVE_L,	[sp, #16]
93|.define SAVE_PC,	[sp, #8]
94|.define SAVE_CFRAME,	[sp, #0]
95|//----- 16 byte aligned, <-- sp while in interpreter.
96|
97|.define TMPDofs,	#24
98|
99|.macro save_, gpr1, gpr2, fpr1, fpr2
100|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
101|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
102|.endmacro
103|.macro rest_, gpr1, gpr2, fpr1, fpr2
104|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
105|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
106|.endmacro
107|
108|.macro saveregs
109|  sub sp, sp, # CFRAME_SPACE
110|  stp fp, lr, [sp, # SAVE_FP_LR_]
111|  add fp, sp, # SAVE_FP_LR_
112|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
113|  save_ 21, 22, 8, 9
114|  save_ 23, 24, 10, 11
115|  save_ 25, 26, 12, 13
116|  save_ 27, 28, 14, 15
117|.endmacro
118|.macro restoreregs
119|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
120|  rest_ 21, 22, 8, 9
121|  rest_ 23, 24, 10, 11
122|  rest_ 25, 26, 12, 13
123|  rest_ 27, 28, 14, 15
124|  ldp fp, lr, [sp, # SAVE_FP_LR_]
125|  add sp, sp, # CFRAME_SPACE
126|.endmacro
127|
128|// Type definitions. Some of these are only used for documentation.
129|.type L,		lua_State,	LREG
130|.type GL,		global_State,	GLREG
131|.type TVALUE,		TValue
132|.type GCOBJ,		GCobj
133|.type STR,		GCstr
134|.type TAB,		GCtab
135|.type LFUNC,		GCfuncL
136|.type CFUNC,		GCfuncC
137|.type PROTO,		GCproto
138|.type UPVAL,		GCupval
139|.type NODE,		Node
140|.type NARGS8,		int
141|.type TRACE,		GCtrace
142|.type SBUF,		SBuf
143|
144|//-----------------------------------------------------------------------
145|
146|// Trap for not-yet-implemented parts.
147|.macro NYI; brk; .endmacro
148|
149|//-----------------------------------------------------------------------
150|
151|// Access to frame relative to BASE.
152|.define FRAME_FUNC,	#-16
153|.define FRAME_PC,	#-8
154|
155|// Endian-specific defines.
156|.if ENDIAN_LE
157|.define LO,		0
158|.define OFS_RD,	2
159|.define OFS_RB,	3
160|.define OFS_RA,	1
161|.define OFS_OP,	0
162|.else
163|.define LO,		4
164|.define OFS_RD,	0
165|.define OFS_RB,	0
166|.define OFS_RA,	2
167|.define OFS_OP,	3
168|.endif
169|
170|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
171|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
172|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
173|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
174|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
175|
176|// Instruction decode+dispatch.
177|.macro ins_NEXT
178|  ldr INSw, [PC], #4
179|  add TMP1, GL, INS, uxtb #3
180|   decode_RA RA, INS
181|  ldr TMP0, [TMP1, #GG_G2DISP]
182|   decode_RD RC, INS
183|  br TMP0
184|.endmacro
185|
186|// Instruction footer.
187|.if 1
188|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
189|  .define ins_next, ins_NEXT
190|  .define ins_next_, ins_NEXT
191|.else
192|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
193|  // Affects only certain kinds of benchmarks (and only with -j off).
194|  .macro ins_next
195|    b ->ins_next
196|  .endmacro
197|  .macro ins_next_
198|  ->ins_next:
199|    ins_NEXT
200|  .endmacro
201|.endif
202|
203|// Call decode and dispatch.
204|.macro ins_callt
205|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
206|  ldr PC, LFUNC:CARG3->pc
207|  ldr INSw, [PC], #4
208|  add TMP1, GL, INS, uxtb #3
209|   decode_RA RA, INS
210|  ldr TMP0, [TMP1, #GG_G2DISP]
211|   add RA, BASE, RA, lsl #3
212|  br TMP0
213|.endmacro
214|
215|.macro ins_call
216|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
217|  str PC, [BASE, FRAME_PC]
218|  ins_callt
219|.endmacro
220|
221|//-----------------------------------------------------------------------
222|
223|// Macros to check the TValue type and extract the GCobj. Branch on failure.
224|.macro checktp, reg, tp, target
225|  asr ITYPE, reg, #47
226|  cmn ITYPE, #-tp
227|   and reg, reg, #LJ_GCVMASK
228|  bne target
229|.endmacro
230|.macro checktp, dst, reg, tp, target
231|  asr ITYPE, reg, #47
232|  cmn ITYPE, #-tp
233|   and dst, reg, #LJ_GCVMASK
234|  bne target
235|.endmacro
236|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
237|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
238|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
239|.macro checkint, reg, target
240|  cmp TISNUMhi, reg, lsr #32
241|  bne target
242|.endmacro
243|.macro checknum, reg, target
244|  cmp TISNUMhi, reg, lsr #32
245|  bls target
246|.endmacro
247|.macro checknumber, reg, target
248|  cmp TISNUMhi, reg, lsr #32
249|  blo target
250|.endmacro
251|
252|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
253|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
254|
255#define GL_J(field)	(GG_G2J + (int)offsetof(jit_State, field))
256|
257#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
258|
259|.macro hotcheck, delta
260|  lsr CARG1, PC, #1
261|  and CARG1, CARG1, #126
262|  add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
263|  ldrh CARG2w, [GL, CARG1]
264|  subs CARG2, CARG2, #delta
265|  strh CARG2w, [GL, CARG1]
266|.endmacro
267|
268|.macro hotloop
269|  hotcheck HOTCOUNT_LOOP
270|  blo ->vm_hotloop
271|.endmacro
272|
273|.macro hotcall
274|  hotcheck HOTCOUNT_CALL
275|  blo ->vm_hotcall
276|.endmacro
277|
278|// Set current VM state.
279|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
280|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
281|
282|// Move table write barrier back. Overwrites mark and tmp.
283|.macro barrierback, tab, mark, tmp
284|  ldr tmp, GL->gc.grayagain
285|   and mark, mark, #~LJ_GC_BLACK	// black2gray(tab)
286|  str tab, GL->gc.grayagain
287|   strb mark, tab->marked
288|  str tmp, tab->gclist
289|.endmacro
290|
291|//-----------------------------------------------------------------------
292
293#if !LJ_DUALNUM
294#error "Only dual-number mode supported for ARM64 target"
295#endif
296
297/* Generate subroutines used by opcodes and other parts of the VM. */
298/* The .code_sub section should be last to help static branch prediction. */
299static void build_subroutines(BuildCtx *ctx)
300{
301  |.code_sub
302  |
303  |//-----------------------------------------------------------------------
304  |//-- Return handling ----------------------------------------------------
305  |//-----------------------------------------------------------------------
306  |
307  |->vm_returnp:
308  |  // See vm_return. Also: RB = previous base.
309  |  tbz PC, #2, ->cont_dispatch	// (PC & FRAME_P) == 0?
310  |
311  |  // Return from pcall or xpcall fast func.
312  |  ldr PC, [RB, FRAME_PC]		// Fetch PC of previous frame.
313  |   mov_true TMP0
314  |  mov BASE, RB
315  |  // Prepending may overwrite the pcall frame, so do it at the end.
316  |   str TMP0, [RA, #-8]!		// Prepend true to results.
317  |
318  |->vm_returnc:
319  |  adds RC, RC, #8			// RC = (nresults+1)*8.
320  |  mov CRET1, #LUA_YIELD
321  |  beq ->vm_unwind_c_eh
322  |  str RCw, SAVE_MULTRES
323  |  ands CARG1, PC, #FRAME_TYPE
324  |  beq ->BC_RET_Z			// Handle regular return to Lua.
325  |
326  |->vm_return:
327  |  // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
328  |  // CARG1 = PC & FRAME_TYPE
329  |  and RB, PC, #~FRAME_TYPEP
330  |   cmp CARG1, #FRAME_C
331  |  sub RB, BASE, RB			// RB = previous base.
332  |   bne ->vm_returnp
333  |
334  |  str RB, L->base
335  |   ldrsw CARG2, SAVE_NRES		// CARG2 = nresults+1.
336  |    mv_vmstate TMP0w, C
337  |   sub BASE, BASE, #16
338  |  subs TMP2, RC, #8
339  |    st_vmstate TMP0w
340  |  beq >2
341  |1:
342  |  subs TMP2, TMP2, #8
343  |   ldr TMP0, [RA], #8
344  |   str TMP0, [BASE], #8
345  |  bne <1
346  |2:
347  |  cmp RC, CARG2, lsl #3		// More/less results wanted?
348  |  bne >6
349  |3:
350  |  str BASE, L->top			// Store new top.
351  |
352  |->vm_leave_cp:
353  |  ldr RC, SAVE_CFRAME		// Restore previous C frame.
354  |   mov CRET1, #0			// Ok return status for vm_pcall.
355  |  str RC, L->cframe
356  |
357  |->vm_leave_unw:
358  |  restoreregs
359  |  ret
360  |
361  |6:
362  |  bgt >7				// Less results wanted?
363  |  // More results wanted. Check stack size and fill up results with nil.
364  |  ldr CARG3, L->maxstack
365  |  cmp BASE, CARG3
366  |  bhs >8
367  |   str TISNIL, [BASE], #8
368  |  add RC, RC, #8
369  |  b <2
370  |
371  |7:  // Less results wanted.
372  |  cbz CARG2, <3			// LUA_MULTRET+1 case?
373  |  sub CARG1, RC, CARG2, lsl #3
374  |  sub BASE, BASE, CARG1		// Shrink top.
375  |  b <3
376  |
377  |8:  // Corner case: need to grow stack for filling up results.
378  |  // This can happen if:
379  |  // - A C function grows the stack (a lot).
380  |  // - The GC shrinks the stack in between.
381  |  // - A return back from a lua_call() with (high) nresults adjustment.
382  |  str BASE, L->top			// Save current top held in BASE (yes).
383  |  mov CARG1, L
384  |  bl extern lj_state_growstack	// (lua_State *L, int n)
385  |  ldr BASE, L->top			// Need the (realloced) L->top in BASE.
386  |  ldrsw CARG2, SAVE_NRES
387  |  b <2
388  |
389  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
390  |  // (void *cframe, int errcode)
391  |  mov sp, CARG1
392  |  mov CRET1, CARG2
393  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
394  |  ldr L, SAVE_L
395  |   mv_vmstate TMP0w, C
396  |  ldr GL, L->glref
397  |   st_vmstate TMP0w
398  |  b ->vm_leave_unw
399  |
400  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
401  |  // (void *cframe)
402  |  and sp, CARG1, #CFRAME_RAWMASK
403  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
404  |  ldr L, SAVE_L
405  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
406  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
407  |    movn TISNIL, #0
408  |    mov RC, #16			// 2 results: false + error message.
409  |  ldr BASE, L->base
410  |   ldr GL, L->glref			// Setup pointer to global state.
411  |    mov_false TMP0
412  |  sub RA, BASE, #8			// Results start at BASE-8.
413  |  ldr PC, [BASE, FRAME_PC]		// Fetch PC of previous frame.
414  |    str TMP0, [BASE, #-8]		// Prepend false to error message.
415  |   st_vmstate ST_INTERP
416  |  b ->vm_returnc
417  |
418  |//-----------------------------------------------------------------------
419  |//-- Grow stack for calls -----------------------------------------------
420  |//-----------------------------------------------------------------------
421  |
422  |->vm_growstack_c:			// Grow stack for C function.
423  |  // CARG1 = L
424  |  mov CARG2, #LUA_MINSTACK
425  |  b >2
426  |
427  |->vm_growstack_l:			// Grow stack for Lua function.
428  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
429  |  add RC, BASE, RC
430  |   sub RA, RA, BASE
431  |    mov CARG1, L
432  |  stp BASE, RC, L->base
433  |   add PC, PC, #4			// Must point after first instruction.
434  |   lsr CARG2, RA, #3
435  |2:
436  |  // L->base = new base, L->top = top
437  |  str PC, SAVE_PC
438  |  bl extern lj_state_growstack	// (lua_State *L, int n)
439  |  ldp BASE, RC, L->base
440  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
441  |   sub NARGS8:RC, RC, BASE
442  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
443  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
444  |  ins_callt				// Just retry the call.
445  |
446  |//-----------------------------------------------------------------------
447  |//-- Entry points into the assembler VM ---------------------------------
448  |//-----------------------------------------------------------------------
449  |
450  |->vm_resume:				// Setup C frame and resume thread.
451  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
452  |  saveregs
453  |  mov L, CARG1
454  |    ldr GL, L->glref			// Setup pointer to global state.
455  |  mov BASE, CARG2
456  |   str L, SAVE_L
457  |  mov PC, #FRAME_CP
458  |   str wzr, SAVE_NRES
459  |    add TMP0, sp, #CFRAME_RESUME
460  |  ldrb TMP1w, L->status
461  |   str wzr, SAVE_ERRF
462  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
463  |   str xzr, SAVE_CFRAME
464  |    str TMP0, L->cframe
465  |  cbz TMP1w, >3
466  |
467  |  // Resume after yield (like a return).
468  |  str L, GL->cur_L
469  |  mov RA, BASE
470  |   ldp BASE, CARG1, L->base
471  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
472  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
473  |  ldr PC, [BASE, FRAME_PC]
474  |     strb wzr, L->status
475  |    movn TISNIL, #0
476  |   sub RC, CARG1, BASE
477  |  ands CARG1, PC, #FRAME_TYPE
478  |   add RC, RC, #8
479  |     st_vmstate ST_INTERP
480  |   str RCw, SAVE_MULTRES
481  |  beq ->BC_RET_Z
482  |  b ->vm_return
483  |
484  |->vm_pcall:				// Setup protected C frame and enter VM.
485  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
486  |  saveregs
487  |  mov PC, #FRAME_CP
488  |  str CARG4w, SAVE_ERRF
489  |  b >1
490  |
491  |->vm_call:				// Setup C frame and enter VM.
492  |  // (lua_State *L, TValue *base, int nres1)
493  |  saveregs
494  |  mov PC, #FRAME_C
495  |
496  |1:  // Entry point for vm_pcall above (PC = ftype).
497  |  ldr RC, L:CARG1->cframe
498  |   str CARG3w, SAVE_NRES
499  |    mov L, CARG1
500  |   str CARG1, SAVE_L
501  |    ldr GL, L->glref			// Setup pointer to global state.
502  |     mov BASE, CARG2
503  |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
504  |  add TMP0, sp, #0
505  |   str RC, SAVE_CFRAME
506  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
507  |
508  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
509  |  str L, GL->cur_L
510  |  ldp RB, CARG1, L->base		// RB = old base (for vmeta_call).
511  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
512  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
513  |  add PC, PC, BASE
514  |    movn TISNIL, #0
515  |  sub PC, PC, RB			// PC = frame delta + frame type
516  |   sub NARGS8:RC, CARG1, BASE
517  |    st_vmstate ST_INTERP
518  |
519  |->vm_call_dispatch:
520  |  // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
521  |  ldr CARG3, [BASE, FRAME_FUNC]
522  |  checkfunc CARG3, ->vmeta_call
523  |
524  |->vm_call_dispatch_f:
525  |  ins_call
526  |  // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
527  |
528  |->vm_cpcall:				// Setup protected C frame, call C.
529  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
530  |  saveregs
531  |  mov L, CARG1
532  |   ldr RA, L:CARG1->stack
533  |  str CARG1, SAVE_L
534  |    ldr GL, L->glref			// Setup pointer to global state.
535  |   ldr RB, L->top
536  |  str CARG1, SAVE_PC			// Any value outside of bytecode is ok.
537  |  ldr RC, L->cframe
538  |   sub RA, RA, RB			// Compute -savestack(L, L->top).
539  |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
540  |  str wzr, SAVE_ERRF			// No error function.
541  |  add TMP0, sp, #0
542  |   str RC, SAVE_CFRAME
543  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
544  |    str L, GL->cur_L
545  |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
546  |  mov BASE, CRET1
547  |   mov PC, #FRAME_CP
548  |  cbnz BASE, <3			// Else continue with the call.
549  |  b ->vm_leave_cp			// No base? Just remove C frame.
550  |
551  |//-----------------------------------------------------------------------
552  |//-- Metamethod handling ------------------------------------------------
553  |//-----------------------------------------------------------------------
554  |
555  |//-- Continuation dispatch ----------------------------------------------
556  |
557  |->cont_dispatch:
558  |  // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
559  |  ldr LFUNC:CARG3, [RB, FRAME_FUNC]
560  |    ldr CARG1, [BASE, #-32]		// Get continuation.
561  |   mov CARG4, BASE
562  |   mov BASE, RB			// Restore caller BASE.
563  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
564  |.if FFI
565  |    cmp CARG1, #1
566  |.endif
567  |   ldr PC, [CARG4, #-24]		// Restore PC from [cont|PC].
568  |    add TMP0, RA, RC
569  |    str TISNIL, [TMP0, #-8]		// Ensure one valid arg.
570  |.if FFI
571  |    bls >1
572  |.endif
573  |  ldr CARG3, LFUNC:CARG3->pc
574  |  ldr KBASE, [CARG3, #PC2PROTO(k)]
575  |  // BASE = base, RA = resultptr, CARG4 = meta base
576  |    br CARG1
577  |
578  |.if FFI
579  |1:
580  |  beq ->cont_ffi_callback		// cont = 1: return from FFI callback.
581  |  // cont = 0: tailcall from C function.
582  |   sub CARG4, CARG4, #32
583  |   sub RC, CARG4, BASE
584  |  b ->vm_call_tail
585  |.endif
586  |
587  |->cont_cat:				// RA = resultptr, CARG4 = meta base
588  |  ldr INSw, [PC, #-4]
589  |   sub CARG2, CARG4, #32
590  |   ldr TMP0, [RA]
591  |     str BASE, L->base
592  |  decode_RB RB, INS
593  |   decode_RA RA, INS
594  |  add TMP1, BASE, RB, lsl #3
595  |  subs TMP1, CARG2, TMP1
596  |  beq >1
597  |   str TMP0, [CARG2]
598  |  lsr CARG3, TMP1, #3
599  |  b ->BC_CAT_Z
600  |
601  |1:
602  |   str TMP0, [BASE, RA, lsl #3]
603  |  b ->cont_nop
604  |
605  |//-- Table indexing metamethods -----------------------------------------
606  |
607  |->vmeta_tgets1:
608  |  movn CARG4, #~LJ_TSTR
609  |   add CARG2, BASE, RB, lsl #3
610  |  add CARG4, STR:RC, CARG4, lsl #47
611  |  b >2
612  |
613  |->vmeta_tgets:
614  |  movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
615  |  str CARG2, GL->tmptv
616  |  add CARG2, GL, #offsetof(global_State, tmptv)
617  |2:
618  |   add CARG3, sp, TMPDofs
619  |  str CARG4, TMPD
620  |  b >1
621  |
622  |->vmeta_tgetb:			// RB = table, RC = index
623  |  add RC, RC, TISNUM
624  |   add CARG2, BASE, RB, lsl #3
625  |   add CARG3, sp, TMPDofs
626  |  str RC, TMPD
627  |  b >1
628  |
629  |->vmeta_tgetv:			// RB = table, RC = key
630  |  add CARG2, BASE, RB, lsl #3
631  |   add CARG3, BASE, RC, lsl #3
632  |1:
633  |   str BASE, L->base
634  |  mov CARG1, L
635  |   str PC, SAVE_PC
636  |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
637  |  // Returns TValue * (finished) or NULL (metamethod).
638  |  cbz CRET1, >3
639  |  ldr TMP0, [CRET1]
640  |  str TMP0, [BASE, RA, lsl #3]
641  |  ins_next
642  |
643  |3:  // Call __index metamethod.
644  |  // BASE = base, L->top = new base, stack = cont/func/t/k
645  |   sub TMP1, BASE, #FRAME_CONT
646  |  ldr BASE, L->top
647  |    mov NARGS8:RC, #16		// 2 args for func(t, k).
648  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
649  |    str PC, [BASE, #-24]		// [cont|PC]
650  |   sub PC, BASE, TMP1
651  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
652  |  b ->vm_call_dispatch_f
653  |
654  |->vmeta_tgetr:
655  |  sxtw CARG2, TMP1w
656  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
657  |  // Returns cTValue * or NULL.
658  |  mov TMP0, TISNIL
659  |  cbz CRET1, ->BC_TGETR_Z
660  |  ldr TMP0, [CRET1]
661  |  b ->BC_TGETR_Z
662  |
663  |//-----------------------------------------------------------------------
664  |
665  |->vmeta_tsets1:
666  |  movn CARG4, #~LJ_TSTR
667  |   add CARG2, BASE, RB, lsl #3
668  |  add CARG4, STR:RC, CARG4, lsl #47
669  |  b >2
670  |
671  |->vmeta_tsets:
672  |  movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
673  |  str CARG2, GL->tmptv
674  |  add CARG2, GL, #offsetof(global_State, tmptv)
675  |2:
676  |   add CARG3, sp, TMPDofs
677  |  str CARG4, TMPD
678  |  b >1
679  |
680  |->vmeta_tsetb:			// RB = table, RC = index
681  |  add RC, RC, TISNUM
682  |   add CARG2, BASE, RB, lsl #3
683  |   add CARG3, sp, TMPDofs
684  |  str RC, TMPD
685  |  b >1
686  |
687  |->vmeta_tsetv:
688  |  add CARG2, BASE, RB, lsl #3
689  |   add CARG3, BASE, RC, lsl #3
690  |1:
691  |   str BASE, L->base
692  |  mov CARG1, L
693  |   str PC, SAVE_PC
694  |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
695  |  // Returns TValue * (finished) or NULL (metamethod).
696  |   ldr TMP0, [BASE, RA, lsl #3]
697  |  cbz CRET1, >3
698  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
699  |   str TMP0, [CRET1]
700  |  ins_next
701  |
702  |3:  // Call __newindex metamethod.
703  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
704  |   sub TMP1, BASE, #FRAME_CONT
705  |  ldr BASE, L->top
706  |    mov NARGS8:RC, #24		// 3 args for func(t, k, v).
707  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
708  |   str TMP0, [BASE, #16]		// Copy value to third argument.
709  |    str PC, [BASE, #-24]		// [cont|PC]
710  |   sub PC, BASE, TMP1
711  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
712  |  b ->vm_call_dispatch_f
713  |
714  |->vmeta_tsetr:
715  |  sxtw CARG3, TMP1w
716  |  str BASE, L->base
717  |  mov CARG1, L
718  |  str PC, SAVE_PC
719  |  bl extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
720  |  // Returns TValue *.
721  |  b ->BC_TSETR_Z
722  |
723  |//-- Comparison metamethods ---------------------------------------------
724  |
725  |->vmeta_comp:
726  |  add CARG2, BASE, RA, lsl #3
727  |   sub PC, PC, #4
728  |  add CARG3, BASE, RC, lsl #3
729  |   str BASE, L->base
730  |  mov CARG1, L
731  |   str PC, SAVE_PC
732  |  uxtb CARG4w, INSw
733  |  bl extern lj_meta_comp  // (lua_State *L, TValue *o1, *o2, int op)
734  |  // Returns 0/1 or TValue * (metamethod).
735  |3:
736  |  cmp CRET1, #1
737  |  bhi ->vmeta_binop
738  |4:
739  |   ldrh RBw, [PC, # OFS_RD]
740  |    add PC, PC, #4
741  |   add RB, PC, RB, lsl #2
742  |   sub RB, RB, #0x20000
743  |  csel PC, PC, RB, lo
744  |->cont_nop:
745  |  ins_next
746  |
747  |->cont_ra:				// RA = resultptr
748  |  ldr INSw, [PC, #-4]
749  |   ldr TMP0, [RA]
750  |  decode_RA TMP1, INS
751  |   str TMP0, [BASE, TMP1, lsl #3]
752  |  b ->cont_nop
753  |
754  |->cont_condt:			// RA = resultptr
755  |  ldr TMP0, [RA]
756  |   mov_true TMP1
757  |  cmp TMP1, TMP0			// Branch if result is true.
758  |  b <4
759  |
760  |->cont_condf:			// RA = resultptr
761  |  ldr TMP0, [RA]
762  |   mov_false TMP1
763  |  cmp TMP0, TMP1			// Branch if result is false.
764  |  b <4
765  |
766  |->vmeta_equal:
767  |  // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
768  |  and TAB:CARG3, CARG3, #LJ_GCVMASK
769  |  sub PC, PC, #4
770  |   str BASE, L->base
771  |   mov CARG1, L
772  |  str PC, SAVE_PC
773  |  bl extern lj_meta_equal  // (lua_State *L, GCobj *o1, *o2, int ne)
774  |  // Returns 0/1 or TValue * (metamethod).
775  |  b <3
776  |
777  |->vmeta_equal_cd:
778  |.if FFI
779  |  sub PC, PC, #4
780  |   str BASE, L->base
781  |   mov CARG1, L
782  |   mov CARG2, INS
783  |  str PC, SAVE_PC
784  |  bl extern lj_meta_equal_cd		// (lua_State *L, BCIns op)
785  |  // Returns 0/1 or TValue * (metamethod).
786  |  b <3
787  |.endif
788  |
789  |->vmeta_istype:
790  |  sub PC, PC, #4
791  |   str BASE, L->base
792  |   mov CARG1, L
793  |   mov CARG2, RA
794  |   mov CARG3, RC
795  |  str PC, SAVE_PC
796  |  bl extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
797  |  b ->cont_nop
798  |
799  |//-- Arithmetic metamethods ---------------------------------------------
800  |
801  |->vmeta_arith_vn:
802  |  add CARG3, BASE, RB, lsl #3
803  |   add CARG4, KBASE, RC, lsl #3
804  |  b >1
805  |
806  |->vmeta_arith_nv:
807  |  add CARG4, BASE, RB, lsl #3
808  |   add CARG3, KBASE, RC, lsl #3
809  |  b >1
810  |
811  |->vmeta_unm:
812  |  add CARG3, BASE, RC, lsl #3
813  |  mov CARG4, CARG3
814  |  b >1
815  |
816  |->vmeta_arith_vv:
817  |  add CARG3, BASE, RB, lsl #3
818  |   add CARG4, BASE, RC, lsl #3
819  |1:
820  |  uxtb CARG5w, INSw
821  |   add CARG2, BASE, RA, lsl #3
822  |    str BASE, L->base
823  |   mov CARG1, L
824  |    str PC, SAVE_PC
825  |  bl extern lj_meta_arith  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
826  |  // Returns NULL (finished) or TValue * (metamethod).
827  |  cbz CRET1, ->cont_nop
828  |
829  |  // Call metamethod for binary op.
830  |->vmeta_binop:
831  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
832  |  sub TMP1, CRET1, BASE
833  |   str PC, [CRET1, #-24]		// [cont|PC]
834  |  add PC, TMP1, #FRAME_CONT
835  |  mov BASE, CRET1
836  |   mov NARGS8:RC, #16		// 2 args for func(o1, o2).
837  |  b ->vm_call_dispatch
838  |
839  |->vmeta_len:
840  |  add CARG2, BASE, RC, lsl #3
841#if LJ_52
842  |  mov TAB:RC, TAB:CARG1		// Save table (ignored for other types).
843#endif
844  |   str BASE, L->base
845  |  mov CARG1, L
846  |   str PC, SAVE_PC
847  |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
848  |  // Returns NULL (retry) or TValue * (metamethod base).
849#if LJ_52
850  |  cbnz CRET1, ->vmeta_binop		// Binop call for compatibility.
851  |  mov TAB:CARG1, TAB:RC
852  |  b ->BC_LEN_Z
853#else
854  |  b ->vmeta_binop			// Binop call for compatibility.
855#endif
856  |
857  |//-- Call metamethod ----------------------------------------------------
858  |
859  |->vmeta_call:			// Resolve and call __call metamethod.
860  |  // RB = old base, BASE = new base, RC = nargs*8
861  |  mov CARG1, L
862  |   str RB, L->base			// This is the callers base!
863  |  sub CARG2, BASE, #16
864  |   str PC, SAVE_PC
865  |  add CARG3, BASE, NARGS8:RC
866  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
867  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
868  |   add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
869  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
870  |  ins_call
871  |
872  |->vmeta_callt:			// Resolve __call for BC_CALLT.
873  |  // BASE = old base, RA = new base, RC = nargs*8
874  |  mov CARG1, L
875  |   str BASE, L->base
876  |  sub CARG2, RA, #16
877  |   str PC, SAVE_PC
878  |  add CARG3, RA, NARGS8:RC
879  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
880  |  ldr TMP1, [RA, FRAME_FUNC]		// Guaranteed to be a function here.
881  |   ldr PC, [BASE, FRAME_PC]
882  |   add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
883  |  and LFUNC:CARG3, TMP1, #LJ_GCVMASK
884  |  b ->BC_CALLT2_Z
885  |
886  |//-- Argument coercion for 'for' statement ------------------------------
887  |
888  |->vmeta_for:
889  |  mov CARG1, L
890  |   str BASE, L->base
891  |  mov CARG2, RA
892  |   str PC, SAVE_PC
893  |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
894  |  ldr INSw, [PC, #-4]
895  |.if JIT
896  |   uxtb TMP0w, INSw
897  |.endif
898  |  decode_RA RA, INS
899  |  decode_RD RC, INS
900  |.if JIT
901  |   cmp TMP0, #BC_JFORI
902  |   beq =>BC_JFORI
903  |.endif
904  |  b =>BC_FORI
905  |
906  |//-----------------------------------------------------------------------
907  |//-- Fast functions -----------------------------------------------------
908  |//-----------------------------------------------------------------------
909  |
910  |.macro .ffunc, name
911  |->ff_ .. name:
912  |.endmacro
913  |
914  |.macro .ffunc_1, name
915  |->ff_ .. name:
916  |  ldr CARG1, [BASE]
917  |   cmp NARGS8:RC, #8
918  |   blo ->fff_fallback
919  |.endmacro
920  |
921  |.macro .ffunc_2, name
922  |->ff_ .. name:
923  |  ldp CARG1, CARG2, [BASE]
924  |   cmp NARGS8:RC, #16
925  |   blo ->fff_fallback
926  |.endmacro
927  |
928  |.macro .ffunc_n, name
929  |  .ffunc name
930  |  ldr CARG1, [BASE]
931  |   cmp NARGS8:RC, #8
932  |  ldr FARG1, [BASE]
933  |   blo ->fff_fallback
934  |  checknum CARG1, ->fff_fallback
935  |.endmacro
936  |
937  |.macro .ffunc_nn, name
938  |  .ffunc name
939  |  ldp CARG1, CARG2, [BASE]
940  |   cmp NARGS8:RC, #16
941  |  ldp FARG1, FARG2, [BASE]
942  |   blo ->fff_fallback
943  |  checknum CARG1, ->fff_fallback
944  |  checknum CARG2, ->fff_fallback
945  |.endmacro
946  |
947  |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
948  |.macro ffgccheck
949  |  ldp CARG1, CARG2, GL->gc.total	// Assumes threshold follows total.
950  |  cmp CARG1, CARG2
951  |  blt >1
952  |  bl ->fff_gcstep
953  |1:
954  |.endmacro
955  |
956  |//-- Base library: checks -----------------------------------------------
957  |
958  |.ffunc_1 assert
959  |   ldr PC, [BASE, FRAME_PC]
960  |  mov_false TMP1
961  |  cmp CARG1, TMP1
962  |  bhs ->fff_fallback
963  |  str CARG1, [BASE, #-16]
964  |  sub RB, BASE, #8
965  |  subs RA, NARGS8:RC, #8
966  |   add RC, NARGS8:RC, #8		// Compute (nresults+1)*8.
967  |  cbz RA, ->fff_res			// Done if exactly 1 argument.
968  |1:
969  |   ldr CARG1, [RB, #16]
970  |  sub RA, RA, #8
971  |   str CARG1, [RB], #8
972  |  cbnz RA, <1
973  |  b ->fff_res
974  |
975  |.ffunc_1 type
976  |  mov TMP0, #~LJ_TISNUM
977  |  asr ITYPE, CARG1, #47
978  |  cmn ITYPE, #~LJ_TISNUM
979  |  csinv TMP1, TMP0, ITYPE, lo
980  |  add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
981  |  ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
982  |  b ->fff_restv
983  |
984  |//-- Base library: getters and setters ---------------------------------
985  |
986  |.ffunc_1 getmetatable
987  |  asr ITYPE, CARG1, #47
988  |  cmn ITYPE, #-LJ_TTAB
989  |  ccmn ITYPE, #-LJ_TUDATA, #4, ne
990  |   and TAB:CARG1, CARG1, #LJ_GCVMASK
991  |  bne >6
992  |1:  // Field metatable must be at same offset for GCtab and GCudata!
993  |  ldr TAB:RB, TAB:CARG1->metatable
994  |2:
995  |   mov CARG1, TISNIL
996  |   ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
997  |  cbz TAB:RB, ->fff_restv
998  |  ldr TMP1w, TAB:RB->hmask
999  |   ldr TMP2w, STR:RC->sid
1000  |    ldr NODE:CARG3, TAB:RB->node
1001  |  and TMP1w, TMP1w, TMP2w		// idx = str->sid & tab->hmask
1002  |  add TMP1, TMP1, TMP1, lsl #1
1003  |  movn CARG4, #~LJ_TSTR
1004  |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
1005  |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
1006  |3:  // Rearranged logic, because we expect _not_ to find the key.
1007  |  ldp CARG1, TMP0, NODE:CARG3->val
1008  |   ldr NODE:CARG3, NODE:CARG3->next
1009  |  cmp TMP0, CARG4
1010  |  beq >5
1011  |  cbnz NODE:CARG3, <3
1012  |4:
1013  |  mov CARG1, RB			// Use metatable as default result.
1014  |  movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
1015  |  b ->fff_restv
1016  |5:
1017  |  cmp TMP0, TISNIL
1018  |  bne ->fff_restv
1019  |  b <4
1020  |
1021  |6:
1022  |  movn TMP0, #~LJ_TISNUM
1023  |  cmp ITYPE, TMP0
1024  |  csel ITYPE, ITYPE, TMP0, hs
1025  |  sub TMP1, GL, ITYPE, lsl #3
1026  |  ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
1027  |  b <2
1028  |
1029  |.ffunc_2 setmetatable
1030  |  // Fast path: no mt for table yet and not clearing the mt.
1031  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1032  |   ldr TAB:TMP0, TAB:TMP1->metatable
1033  |  asr ITYPE, CARG2, #47
1034  |   ldrb TMP2w, TAB:TMP1->marked
1035  |  cmn ITYPE, #-LJ_TTAB
1036  |    and TAB:CARG2, CARG2, #LJ_GCVMASK
1037  |  ccmp TAB:TMP0, #0, #0, eq
1038  |  bne ->fff_fallback
1039  |    str TAB:CARG2, TAB:TMP1->metatable
1040  |   tbz TMP2w, #2, ->fff_restv	// isblack(table)
1041  |  barrierback TAB:TMP1, TMP2w, TMP0
1042  |  b ->fff_restv
1043  |
1044  |.ffunc rawget
1045  |  ldr CARG2, [BASE]
1046  |   cmp NARGS8:RC, #16
1047  |   blo ->fff_fallback
1048  |  checktab CARG2, ->fff_fallback
1049  |   mov CARG1, L
1050  |   add CARG3, BASE, #8
1051  |  bl extern lj_tab_get  // (lua_State *L, GCtab *t, cTValue *key)
1052  |  // Returns cTValue *.
1053  |  ldr CARG1, [CRET1]
1054  |  b ->fff_restv
1055  |
1056  |//-- Base library: conversions ------------------------------------------
1057  |
1058  |.ffunc tonumber
1059  |  // Only handles the number case inline (without a base argument).
1060  |  ldr CARG1, [BASE]
1061  |   cmp NARGS8:RC, #8
1062  |   bne ->fff_fallback
1063  |  checknumber CARG1, ->fff_fallback
1064  |  b ->fff_restv
1065  |
1066  |.ffunc_1 tostring
1067  |  // Only handles the string or number case inline.
1068  |  asr ITYPE, CARG1, #47
1069  |  cmn ITYPE, #-LJ_TSTR
1070  |  // A __tostring method in the string base metatable is ignored.
1071  |  beq ->fff_restv
1072  |  // Handle numbers inline, unless a number base metatable is present.
1073  |  ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
1074  |   str BASE, L->base
1075  |  cmn ITYPE, #-LJ_TISNUM
1076  |  ccmp TMP1, #0, #0, ls
1077  |   str PC, SAVE_PC			// Redundant (but a defined value).
1078  |  bne ->fff_fallback
1079  |  ffgccheck
1080  |  mov CARG1, L
1081  |  mov CARG2, BASE
1082  |  bl extern lj_strfmt_number		// (lua_State *L, cTValue *o)
1083  |  // Returns GCstr *.
1084  |   movn TMP1, #~LJ_TSTR
1085  |  ldr BASE, L->base
1086  |   add CARG1, CARG1, TMP1, lsl #47
1087  |  b ->fff_restv
1088  |
1089  |//-- Base library: iterators -------------------------------------------
1090  |
1091  |.ffunc_1 next
1092  |  checktp CARG1, LJ_TTAB, ->fff_fallback
1093  |  str TISNIL, [BASE, NARGS8:RC]	// Set missing 2nd arg to nil.
1094  |  ldr PC, [BASE, FRAME_PC]
1095  |  add CARG2, BASE, #8
1096  |  sub CARG3, BASE, #16
1097  |  bl extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
1098  |  // Returns 1=found, 0=end, -1=error.
1099  |   mov RC, #(2+1)*8
1100  |  tbnz CRET1w, #31, ->fff_fallback	// Invalid key.
1101  |  cbnz CRET1, ->fff_res		// Found key/value.
1102  |  // End of traversal: return nil.
1103  |  str TISNIL, [BASE, #-16]
1104  |  b ->fff_res1
1105  |
1106  |.ffunc_1 pairs
1107  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1108#if LJ_52
1109  |  ldr TAB:CARG2, TAB:TMP1->metatable
1110#endif
1111  |   ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1112  |    ldr PC, [BASE, FRAME_PC]
1113#if LJ_52
1114  |  cbnz TAB:CARG2, ->fff_fallback
1115#endif
1116  |  mov RC, #(3+1)*8
1117  |  stp CARG1, TISNIL, [BASE, #-8]
1118  |   str CFUNC:CARG4, [BASE, #-16]
1119  |  b ->fff_res
1120  |
1121  |.ffunc_2 ipairs_aux
1122  |  checktab CARG1, ->fff_fallback
1123  |   checkint CARG2, ->fff_fallback
1124  |  ldr TMP1w, TAB:CARG1->asize
1125  |   ldr CARG3, TAB:CARG1->array
1126  |    ldr TMP0w, TAB:CARG1->hmask
1127  |  add CARG2w, CARG2w, #1
1128  |  cmp CARG2w, TMP1w
1129  |    ldr PC, [BASE, FRAME_PC]
1130  |     add TMP2, CARG2, TISNUM
1131  |   mov RC, #(0+1)*8
1132  |     str TMP2, [BASE, #-16]
1133  |  bhs >2				// Not in array part?
1134  |  ldr TMP0, [CARG3, CARG2, lsl #3]
1135  |1:
1136  |   mov TMP1, #(2+1)*8
1137  |   cmp TMP0, TISNIL
1138  |  str TMP0, [BASE, #-8]
1139  |   csel RC, RC, TMP1, eq
1140  |  b ->fff_res
1141  |2:  // Check for empty hash part first. Otherwise call C function.
1142  |  cbz TMP0w, ->fff_res
1143  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
1144  |  // Returns cTValue * or NULL.
1145  |  cbz CRET1, ->fff_res
1146  |  ldr TMP0, [CRET1]
1147  |  b <1
1148  |
1149  |.ffunc_1 ipairs
1150  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1151#if LJ_52
1152  |  ldr TAB:CARG2, TAB:TMP1->metatable
1153#endif
1154  |   ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1155  |    ldr PC, [BASE, FRAME_PC]
1156#if LJ_52
1157  |  cbnz TAB:CARG2, ->fff_fallback
1158#endif
1159  |  mov RC, #(3+1)*8
1160  |  stp CARG1, TISNUM, [BASE, #-8]
1161  |   str CFUNC:CARG4, [BASE, #-16]
1162  |  b ->fff_res
1163  |
1164  |//-- Base library: catch errors ----------------------------------------
1165  |
1166  |.ffunc pcall
1167  |   cmp NARGS8:RC, #8
1168  |  ldrb TMP0w, GL->hookmask
1169  |   blo ->fff_fallback
1170  |   sub NARGS8:RC, NARGS8:RC, #8
1171  |    mov RB, BASE
1172  |    add BASE, BASE, #16
1173  |  ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1174  |  add PC, TMP0, #16+FRAME_PCALL
1175  |   beq ->vm_call_dispatch
1176  |1:
1177  |   add TMP2, BASE, NARGS8:RC
1178  |2:
1179  |   ldr TMP0, [TMP2, #-16]
1180  |   str TMP0, [TMP2, #-8]!
1181  |  cmp TMP2, BASE
1182  |  bne <2
1183  |  b ->vm_call_dispatch
1184  |
1185  |.ffunc xpcall
1186  |     ldp CARG1, CARG2, [BASE]
1187  |  ldrb TMP0w, GL->hookmask
1188  |   subs NARGS8:TMP1, NARGS8:RC, #16
1189  |   blo ->fff_fallback
1190  |    mov RB, BASE
1191  |     asr ITYPE, CARG2, #47
1192  |  ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1193  |     cmn ITYPE, #-LJ_TFUNC
1194  |  add PC, TMP0, #24+FRAME_PCALL
1195  |     bne ->fff_fallback		// Traceback must be a function.
1196  |   mov NARGS8:RC, NARGS8:TMP1
1197  |    add BASE, BASE, #24
1198  |     stp CARG2, CARG1, [RB]		// Swap function and traceback.
1199  |   cbz NARGS8:RC, ->vm_call_dispatch
1200  |  b <1
1201  |
1202  |//-- Coroutine library --------------------------------------------------
1203  |
1204  |.macro coroutine_resume_wrap, resume
1205  |.if resume
1206  |.ffunc_1 coroutine_resume
1207  |  checktp CARG1, LJ_TTHREAD, ->fff_fallback
1208  |.else
1209  |.ffunc coroutine_wrap_aux
1210  |  ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
1211  |  and L:CARG1, CARG1, #LJ_GCVMASK
1212  |.endif
1213  |   ldr PC, [BASE, FRAME_PC]
1214  |     str BASE, L->base
1215  |  ldp RB, CARG2, L:CARG1->base
1216  |   ldrb TMP1w, L:CARG1->status
1217  |  add TMP0, CARG2, TMP1
1218  |   str PC, SAVE_PC
1219  |  cmp TMP0, RB
1220  |  beq ->fff_fallback
1221  |   cmp TMP1, #LUA_YIELD
1222  |    add TMP0, CARG2, #8
1223  |   csel CARG2, CARG2, TMP0, hs
1224  |   ldr CARG4, L:CARG1->maxstack
1225  |   add CARG3, CARG2, NARGS8:RC
1226  |    ldr RB, L:CARG1->cframe
1227  |   ccmp CARG3, CARG4, #2, ls
1228  |    ccmp RB, #0, #2, ls
1229  |    bhi ->fff_fallback
1230  |.if resume
1231  |  sub CARG3, CARG3, #8		// Keep resumed thread in stack for GC.
1232  |  add BASE, BASE, #8
1233  |  sub NARGS8:RC, NARGS8:RC, #8
1234  |.endif
1235  |  str CARG3, L:CARG1->top
1236  |  str BASE, L->top
1237  |  cbz NARGS8:RC, >3
1238  |2:  // Move args to coroutine.
1239  |   ldr TMP0, [BASE, RB]
1240  |  cmp RB, NARGS8:RC
1241  |   str TMP0, [CARG2, RB]
1242  |   add RB, RB, #8
1243  |  bne <2
1244  |3:
1245  |  mov CARG3, #0
1246  |   mov L:RA, L:CARG1
1247  |  mov CARG4, #0
1248  |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
1249  |  // Returns thread status.
1250  |4:
1251  |  ldp CARG3, CARG4, L:RA->base
1252  |   cmp CRET1, #LUA_YIELD
1253  |  ldr BASE, L->base
1254  |    str L, GL->cur_L
1255  |    st_vmstate ST_INTERP
1256  |   bhi >8
1257  |  sub RC, CARG4, CARG3
1258  |   ldr CARG1, L->maxstack
1259  |   add CARG2, BASE, RC
1260  |  cbz RC, >6				// No results?
1261  |  cmp CARG2, CARG1
1262  |   mov RB, #0
1263  |  bhi >9				// Need to grow stack?
1264  |
1265  |  sub CARG4, RC, #8
1266  |   str CARG3, L:RA->top		// Clear coroutine stack.
1267  |5:  // Move results from coroutine.
1268  |   ldr TMP0, [CARG3, RB]
1269  |  cmp RB, CARG4
1270  |   str TMP0, [BASE, RB]
1271  |   add RB, RB, #8
1272  |  bne <5
1273  |6:
1274  |.if resume
1275  |  mov_true TMP1
1276  |   add RC, RC, #16
1277  |7:
1278  |  str TMP1, [BASE, #-8]		// Prepend true/false to results.
1279  |   sub RA, BASE, #8
1280  |.else
1281  |   mov RA, BASE
1282  |   add RC, RC, #8
1283  |.endif
1284  |  ands CARG1, PC, #FRAME_TYPE
1285  |   str PC, SAVE_PC
1286  |   str RCw, SAVE_MULTRES
1287  |  beq ->BC_RET_Z
1288  |  b ->vm_return
1289  |
1290  |8:  // Coroutine returned with error (at co->top-1).
1291  |.if resume
1292  |  ldr TMP0, [CARG4, #-8]!
1293  |   mov_false TMP1
1294  |    mov RC, #(2+1)*8
1295  |  str CARG4, L:RA->top		// Remove error from coroutine stack.
1296  |  str TMP0, [BASE]			// Copy error message.
1297  |  b <7
1298  |.else
1299  |  mov CARG1, L
1300  |  mov CARG2, L:RA
1301  |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
1302  |  // Never returns.
1303  |.endif
1304  |
1305  |9:  // Handle stack expansion on return from yield.
1306  |  mov CARG1, L
1307  |  lsr CARG2, RC, #3
1308  |  bl extern lj_state_growstack	// (lua_State *L, int n)
1309  |  mov CRET1, #0
1310  |  b <4
1311  |.endmacro
1312  |
1313  |  coroutine_resume_wrap 1		// coroutine.resume
1314  |  coroutine_resume_wrap 0		// coroutine.wrap
1315  |
1316  |.ffunc coroutine_yield
1317  |  ldr TMP0, L->cframe
1318  |   add TMP1, BASE, NARGS8:RC
1319  |    mov CRET1, #LUA_YIELD
1320  |   stp BASE, TMP1, L->base
1321  |  tbz TMP0, #0, ->fff_fallback
1322  |   str xzr, L->cframe
1323  |    strb CRET1w, L->status
1324  |  b ->vm_leave_unw
1325  |
1326  |//-- Math library -------------------------------------------------------
1327  |
1328  |.macro math_round, func, round
1329  |  .ffunc math_ .. func
1330  |  ldr CARG1, [BASE]
1331  |   cmp NARGS8:RC, #8
1332  |  ldr d0, [BASE]
1333  |   blo ->fff_fallback
1334  |  cmp TISNUMhi, CARG1, lsr #32
1335  |  beq ->fff_restv
1336  |  blo ->fff_fallback
1337  |  round d0, d0
1338  |  b ->fff_resn
1339  |.endmacro
1340  |
1341  |  math_round floor, frintm
1342  |  math_round ceil, frintp
1343  |
1344  |.ffunc_1 math_abs
1345  |  checknumber CARG1, ->fff_fallback
1346  |  and CARG1, CARG1, #U64x(7fffffff,ffffffff)
1347  |  bne ->fff_restv
1348  |  eor CARG2w, CARG1w, CARG1w, asr #31
1349  |   movz CARG3, #0x41e0, lsl #48	// 2^31.
1350  |  subs CARG1w, CARG2w, CARG1w, asr #31
1351  |   add CARG1, CARG1, TISNUM
1352  |  csel CARG1, CARG1, CARG3, pl
1353  |  // Fallthrough.
1354  |
1355  |->fff_restv:
1356  |  // CARG1 = TValue result.
1357  |  ldr PC, [BASE, FRAME_PC]
1358  |  str CARG1, [BASE, #-16]
1359  |->fff_res1:
1360  |  // PC = return.
1361  |  mov RC, #(1+1)*8
1362  |->fff_res:
1363  |  // RC = (nresults+1)*8, PC = return.
1364  |  ands CARG1, PC, #FRAME_TYPE
1365  |   str RCw, SAVE_MULTRES
1366  |   sub RA, BASE, #16
1367  |  bne ->vm_return
1368  |  ldr INSw, [PC, #-4]
1369  |  decode_RB RB, INS
1370  |5:
1371  |  cmp RC, RB, lsl #3			// More results expected?
1372  |  blo >6
1373  |  decode_RA TMP1, INS
1374  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
1375  |  sub BASE, RA, TMP1, lsl #3
1376  |  ins_next
1377  |
1378  |6:  // Fill up results with nil.
1379  |  add TMP1, RA, RC
1380  |   add RC, RC, #8
1381  |  str TISNIL, [TMP1, #-8]
1382  |  b <5
1383  |
1384  |.macro math_extern, func
1385  |  .ffunc_n math_ .. func
1386  |  bl extern func
1387  |  b ->fff_resn
1388  |.endmacro
1389  |
1390  |.macro math_extern2, func
1391  |  .ffunc_nn math_ .. func
1392  |  bl extern func
1393  |  b ->fff_resn
1394  |.endmacro
1395  |
1396  |.ffunc_n math_sqrt
1397  |  fsqrt d0, d0
1398  |->fff_resn:
1399  |  ldr PC, [BASE, FRAME_PC]
1400  |  str d0, [BASE, #-16]
1401  |  b ->fff_res1
1402  |
1403  |.ffunc math_log
1404  |  ldr CARG1, [BASE]
1405  |   cmp NARGS8:RC, #8
1406  |  ldr FARG1, [BASE]
1407  |   bne ->fff_fallback			// Need exactly 1 argument.
1408  |  checknum CARG1, ->fff_fallback
1409  |  bl extern log
1410  |  b ->fff_resn
1411  |
1412  |  math_extern log10
1413  |  math_extern exp
1414  |  math_extern sin
1415  |  math_extern cos
1416  |  math_extern tan
1417  |  math_extern asin
1418  |  math_extern acos
1419  |  math_extern atan
1420  |  math_extern sinh
1421  |  math_extern cosh
1422  |  math_extern tanh
1423  |  math_extern2 pow
1424  |  math_extern2 atan2
1425  |  math_extern2 fmod
1426  |
1427  |.ffunc_2 math_ldexp
1428  |  ldr FARG1, [BASE]
1429  |  checknum CARG1, ->fff_fallback
1430  |  checkint CARG2, ->fff_fallback
1431  |  sxtw CARG1, CARG2w
1432  |  bl extern ldexp			// (double x, int exp)
1433  |  b ->fff_resn
1434  |
1435  |.ffunc_n math_frexp
1436  |  add CARG1, sp, TMPDofs
1437  |  bl extern frexp
1438  |   ldr CARG2w, TMPD
1439  |    ldr PC, [BASE, FRAME_PC]
1440  |  str d0, [BASE, #-16]
1441  |    mov RC, #(2+1)*8
1442  |   add CARG2, CARG2, TISNUM
1443  |   str CARG2, [BASE, #-8]
1444  |  b ->fff_res
1445  |
1446  |.ffunc_n math_modf
1447  |  sub CARG1, BASE, #16
1448  |   ldr PC, [BASE, FRAME_PC]
1449  |  bl extern modf
1450  |   mov RC, #(2+1)*8
1451  |  str d0, [BASE, #-8]
1452  |  b ->fff_res
1453  |
1454  |.macro math_minmax, name, cond, fcond
1455  |  .ffunc_1 name
1456  |   add RB, BASE, RC
1457  |   add RA, BASE, #8
1458  |  checkint CARG1, >4
1459  |1:  // Handle integers.
1460  |  ldr CARG2, [RA]
1461  |   cmp RA, RB
1462  |   bhs ->fff_restv
1463  |  checkint CARG2, >3
1464  |  cmp CARG1w, CARG2w
1465  |   add RA, RA, #8
1466  |  csel CARG1, CARG2, CARG1, cond
1467  |  b <1
1468  |3:  // Convert intermediate result to number and continue below.
1469  |  scvtf d0, CARG1w
1470  |  blo ->fff_fallback
1471  |  ldr d1, [RA]
1472  |  b >6
1473  |
1474  |4:
1475  |  ldr d0, [BASE]
1476  |  blo ->fff_fallback
1477  |5:  // Handle numbers.
1478  |  ldr CARG2, [RA]
1479  |  ldr d1, [RA]
1480  |   cmp RA, RB
1481  |   bhs ->fff_resn
1482  |  checknum CARG2, >7
1483  |6:
1484  |  fcmp d0, d1
1485  |   add RA, RA, #8
1486  |  fcsel d0, d1, d0, fcond
1487  |  b <5
1488  |7:  // Convert integer to number and continue above.
1489  |  scvtf d1, CARG2w
1490  |  blo ->fff_fallback
1491  |  b <6
1492  |.endmacro
1493  |
1494  |  math_minmax math_min, gt, pl
1495  |  math_minmax math_max, lt, le
1496  |
1497  |//-- String library -----------------------------------------------------
1498  |
1499  |.ffunc string_byte			// Only handle the 1-arg case here.
1500  |  ldp PC, CARG1, [BASE, FRAME_PC]
1501  |   cmp NARGS8:RC, #8
1502  |  asr ITYPE, CARG1, #47
1503  |  ccmn ITYPE, #-LJ_TSTR, #0, eq
1504  |   and STR:CARG1, CARG1, #LJ_GCVMASK
1505  |  bne ->fff_fallback
1506  |  ldrb TMP0w, STR:CARG1[1]		// Access is always ok (NUL at end).
1507  |   ldr CARG3w, STR:CARG1->len
1508  |  add TMP0, TMP0, TISNUM
1509  |  str TMP0, [BASE, #-16]
1510  |  mov RC, #(0+1)*8
1511  |   cbz CARG3, ->fff_res
1512  |  b ->fff_res1
1513  |
1514  |.ffunc string_char			// Only handle the 1-arg case here.
1515  |  ffgccheck
1516  |  ldp PC, CARG1, [BASE, FRAME_PC]
1517  |  cmp CARG1w, #255
1518  |   ccmp NARGS8:RC, #8, #0, ls		// Need exactly 1 argument.
1519  |  bne ->fff_fallback
1520  |  checkint CARG1, ->fff_fallback
1521  |  mov CARG3, #1
1522  |  // Point to the char inside the integer in the stack slot.
1523  |.if ENDIAN_LE
1524  |  mov CARG2, BASE
1525  |.else
1526  |  add CARG2, BASE, #7
1527  |.endif
1528  |->fff_newstr:
1529  |  // CARG2 = str, CARG3 = len.
1530  |   str BASE, L->base
1531  |  mov CARG1, L
1532  |   str PC, SAVE_PC
1533  |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
1534  |->fff_resstr:
1535  |  // Returns GCstr *.
1536  |  ldr BASE, L->base
1537  |   movn TMP1, #~LJ_TSTR
1538  |  add CARG1, CARG1, TMP1, lsl #47
1539  |  b ->fff_restv
1540  |
1541  |.ffunc string_sub
1542  |  ffgccheck
1543  |  ldr CARG1, [BASE]
1544  |    ldr CARG3, [BASE, #16]
1545  |   cmp NARGS8:RC, #16
1546  |    movn RB, #0
1547  |   beq >1
1548  |   blo ->fff_fallback
1549  |    checkint CARG3, ->fff_fallback
1550  |    sxtw RB, CARG3w
1551  |1:
1552  |  ldr CARG2, [BASE, #8]
1553  |  checkstr CARG1, ->fff_fallback
1554  |   ldr TMP1w, STR:CARG1->len
1555  |  checkint CARG2, ->fff_fallback
1556  |  sxtw CARG2, CARG2w
1557  |  // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
1558  |   add TMP2, RB, TMP1
1559  |   cmp RB, #0
1560  |  add TMP0, CARG2, TMP1
1561  |   csinc RB, RB, TMP2, ge		// if (end < 0) end += len+1
1562  |  cmp CARG2, #0
1563  |  csinc CARG2, CARG2, TMP0, ge	// if (start < 0) start += len+1
1564  |   cmp RB, #0
1565  |   csel RB, RB, xzr, ge		// if (end < 0) end = 0
1566  |  cmp CARG2, #1
1567  |  csinc CARG2, CARG2, xzr, ge	// if (start < 1) start = 1
1568  |   cmp RB, TMP1
1569  |   csel RB, RB, TMP1, le		// if (end > len) end = len
1570  |  add CARG1, STR:CARG1, #sizeof(GCstr)-1
1571  |   subs CARG3, RB, CARG2		// len = end - start
1572  |  add CARG2, CARG1, CARG2
1573  |   add CARG3, CARG3, #1		// len += 1
1574  |   bge ->fff_newstr
1575  |  add STR:CARG1, GL, #offsetof(global_State, strempty)
1576  |   movn TMP1, #~LJ_TSTR
1577  |  add CARG1, CARG1, TMP1, lsl #47
1578  |  b ->fff_restv
1579  |
1580  |.macro ffstring_op, name
1581  |  .ffunc string_ .. name
1582  |  ffgccheck
1583  |  ldr CARG2, [BASE]
1584  |   cmp NARGS8:RC, #8
1585  |  asr ITYPE, CARG2, #47
1586  |  ccmn ITYPE, #-LJ_TSTR, #0, hs
1587  |   and STR:CARG2, CARG2, #LJ_GCVMASK
1588  |  bne ->fff_fallback
1589  |  ldr TMP0, GL->tmpbuf.b
1590  |   add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
1591  |   str BASE, L->base
1592  |   str PC, SAVE_PC
1593  |   str L, GL->tmpbuf.L
1594  |  str TMP0, GL->tmpbuf.w
1595  |  bl extern lj_buf_putstr_ .. name
1596  |  bl extern lj_buf_tostr
1597  |  b ->fff_resstr
1598  |.endmacro
1599  |
1600  |ffstring_op reverse
1601  |ffstring_op lower
1602  |ffstring_op upper
1603  |
1604  |//-- Bit library --------------------------------------------------------
1605  |
1606  |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
1607  |->vm_tobit_fb:
1608  |  bls ->fff_fallback
1609  |  add CARG2, CARG1, CARG1
1610  |  mov CARG3, #1076
1611  |  sub CARG3, CARG3, CARG2, lsr #53
1612  |  cmp CARG3, #53
1613  |  bhi >1
1614  |  and CARG2, CARG2, #U64x(001fffff,ffffffff)
1615  |  orr CARG2, CARG2, #U64x(00200000,00000000)
1616  |   cmp CARG1, #0
1617  |  lsr CARG2, CARG2, CARG3
1618  |   cneg CARG1w, CARG2w, mi
1619  |  br lr
1620  |1:
1621  |  mov CARG1w, #0
1622  |  br lr
1623  |
1624  |.macro .ffunc_bit, name
1625  |  .ffunc_1 bit_..name
1626  |  adr lr, >1
1627  |  checkint CARG1, ->vm_tobit_fb
1628  |1:
1629  |.endmacro
1630  |
1631  |.macro .ffunc_bit_op, name, ins
1632  |  .ffunc_bit name
1633  |  mov RA, #8
1634  |  mov TMP0w, CARG1w
1635  |  adr lr, >2
1636  |1:
1637  |  ldr CARG1, [BASE, RA]
1638  |   cmp RA, NARGS8:RC
1639  |    add RA, RA, #8
1640  |   bge >9
1641  |  checkint CARG1, ->vm_tobit_fb
1642  |2:
1643  |  ins TMP0w, TMP0w, CARG1w
1644  |  b <1
1645  |.endmacro
1646  |
1647  |.ffunc_bit_op band, and
1648  |.ffunc_bit_op bor, orr
1649  |.ffunc_bit_op bxor, eor
1650  |
1651  |.ffunc_bit tobit
1652  |  mov TMP0w, CARG1w
1653  |9:  // Label reused by .ffunc_bit_op users.
1654  |  add CARG1, TMP0, TISNUM
1655  |  b ->fff_restv
1656  |
1657  |.ffunc_bit bswap
1658  |  rev TMP0w, CARG1w
1659  |  add CARG1, TMP0, TISNUM
1660  |  b ->fff_restv
1661  |
1662  |.ffunc_bit bnot
1663  |  mvn TMP0w, CARG1w
1664  |  add CARG1, TMP0, TISNUM
1665  |  b ->fff_restv
1666  |
1667  |.macro .ffunc_bit_sh, name, ins, shmod
1668  |  .ffunc bit_..name
1669  |  ldp TMP0, CARG1, [BASE]
1670  |   cmp NARGS8:RC, #16
1671  |   blo ->fff_fallback
1672  |  adr lr, >1
1673  |  checkint CARG1, ->vm_tobit_fb
1674  |1:
1675  |.if shmod == 0
1676  |  mov TMP1, CARG1
1677  |.else
1678  |  neg TMP1, CARG1
1679  |.endif
1680  |  mov CARG1, TMP0
1681  |  adr lr, >2
1682  |  checkint CARG1, ->vm_tobit_fb
1683  |2:
1684  |  ins TMP0w, CARG1w, TMP1w
1685  |  add CARG1, TMP0, TISNUM
1686  |  b ->fff_restv
1687  |.endmacro
1688  |
1689  |.ffunc_bit_sh lshift, lsl, 0
1690  |.ffunc_bit_sh rshift, lsr, 0
1691  |.ffunc_bit_sh arshift, asr, 0
1692  |.ffunc_bit_sh rol, ror, 1
1693  |.ffunc_bit_sh ror, ror, 0
1694  |
1695  |//-----------------------------------------------------------------------
1696  |
1697  |->fff_fallback:			// Call fast function fallback handler.
1698  |  // BASE = new base, RC = nargs*8
1699  |   ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC]	// Fallback may overwrite PC.
1700  |  ldr TMP2, L->maxstack
1701  |  add TMP1, BASE, NARGS8:RC
1702  |  stp BASE, TMP1, L->base
1703  |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1704  |  add TMP1, TMP1, #8*LUA_MINSTACK
1705  |   ldr CARG3, CFUNC:CARG3->f
1706  |    str PC, SAVE_PC			// Redundant (but a defined value).
1707  |  cmp TMP1, TMP2
1708  |   mov CARG1, L
1709  |  bhi >5				// Need to grow stack.
1710  |   blr CARG3				// (lua_State *L)
1711  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1712  |   ldr BASE, L->base
1713  |  cmp CRET1w, #0
1714  |   lsl RC, CRET1, #3
1715  |   sub RA, BASE, #16
1716  |  bgt ->fff_res			// Returned nresults+1?
1717  |1:  // Returned 0 or -1: retry fast path.
1718  |   ldr CARG1, L->top
1719  |    ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1720  |   sub NARGS8:RC, CARG1, BASE
1721  |  bne ->vm_call_tail			// Returned -1?
1722  |    and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1723  |  ins_callt				// Returned 0: retry fast path.
1724  |
1725  |// Reconstruct previous base for vmeta_call during tailcall.
1726  |->vm_call_tail:
1727  |  ands TMP0, PC, #FRAME_TYPE
1728  |   and TMP1, PC, #~FRAME_TYPEP
1729  |  bne >3
1730  |  ldrb RAw, [PC, #-4+OFS_RA]
1731  |  lsl RA, RA, #3
1732  |  add TMP1, RA, #16
1733  |3:
1734  |  sub RB, BASE, TMP1
1735  |  b ->vm_call_dispatch		// Resolve again for tailcall.
1736  |
1737  |5:  // Grow stack for fallback handler.
1738  |  mov CARG2, #LUA_MINSTACK
1739  |  bl extern lj_state_growstack	// (lua_State *L, int n)
1740  |  ldr BASE, L->base
1741  |  cmp CARG1, CARG1			// Set zero-flag to force retry.
1742  |  b <1
1743  |
1744  |->fff_gcstep:			// Call GC step function.
1745  |  // BASE = new base, RC = nargs*8
1746  |   add CARG2, BASE, NARGS8:RC	// Calculate L->top.
1747  |  mov RA, lr
1748  |   stp BASE, CARG2, L->base
1749  |   str PC, SAVE_PC			// Redundant (but a defined value).
1750  |  mov CARG1, L
1751  |  bl extern lj_gc_step		// (lua_State *L)
1752  |  ldp BASE, CARG2, L->base
1753  |   ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1754  |  mov lr, RA				// Help return address predictor.
1755  |  sub NARGS8:RC, CARG2, BASE		// Calculate nargs*8.
1756  |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1757  |  ret
1758  |
1759  |//-----------------------------------------------------------------------
1760  |//-- Special dispatch targets -------------------------------------------
1761  |//-----------------------------------------------------------------------
1762  |
1763  |->vm_record:				// Dispatch target for recording phase.
1764  |.if JIT
1765  |  ldrb CARG1w, GL->hookmask
1766  |  tst CARG1, #HOOK_VMEVENT		// No recording while in vmevent.
1767  |  bne >5
1768  |  // Decrement the hookcount for consistency, but always do the call.
1769  |   ldr CARG2w, GL->hookcount
1770  |  tst CARG1, #HOOK_ACTIVE
1771  |  bne >1
1772  |   sub CARG2w, CARG2w, #1
1773  |  tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
1774  |  beq >1
1775  |   str CARG2w, GL->hookcount
1776  |  b >1
1777  |.endif
1778  |
1779  |->vm_rethook:			// Dispatch target for return hooks.
1780  |  ldrb TMP2w, GL->hookmask
1781  |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
1782  |5:  // Re-dispatch to static ins.
1783  |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1784  |  br TMP0
1785  |
1786  |->vm_inshook:			// Dispatch target for instr/line hooks.
1787  |  ldrb TMP2w, GL->hookmask
1788  |   ldr TMP3w, GL->hookcount
1789  |  tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5	// Hook already active?
1790  |  tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
1791  |  beq <5
1792  |   sub TMP3w, TMP3w, #1
1793  |   str TMP3w, GL->hookcount
1794  |   cbz TMP3w, >1
1795  |  tbz TMP2w, #LUA_HOOKLINE, <5
1796  |1:
1797  |  mov CARG1, L
1798  |   str BASE, L->base
1799  |  mov CARG2, PC
1800  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1801  |  bl extern lj_dispatch_ins		// (lua_State *L, const BCIns *pc)
1802  |3:
1803  |  ldr BASE, L->base
1804  |4:  // Re-dispatch to static ins.
1805  |  ldr INSw, [PC, #-4]
1806  |  add TMP1, GL, INS, uxtb #3
1807  |   decode_RA RA, INS
1808  |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1809  |   decode_RD RC, INS
1810  |  br TMP0
1811  |
1812  |->cont_hook:				// Continue from hook yield.
1813  |  ldr CARG1, [CARG4, #-40]
1814  |   add PC, PC, #4
1815  |  str CARG1w, SAVE_MULTRES		// Restore MULTRES for *M ins.
1816  |  b <4
1817  |
1818  |->vm_hotloop:			// Hot loop counter underflow.
1819  |.if JIT
1820  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Same as curr_topL(L).
1821  |   add CARG1, GL, #GG_G2DISP+GG_DISP2J
1822  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1823  |   str PC, SAVE_PC
1824  |  ldr CARG3, LFUNC:CARG3->pc
1825  |   mov CARG2, PC
1826  |   str L, [GL, #GL_J(L)]
1827  |  ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
1828  |   str BASE, L->base
1829  |  add CARG3, BASE, CARG3, lsl #3
1830  |  str CARG3, L->top
1831  |  bl extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
1832  |  b <3
1833  |.endif
1834  |
1835  |->vm_callhook:			// Dispatch target for call hooks.
1836  |  mov CARG2, PC
1837  |.if JIT
1838  |  b >1
1839  |.endif
1840  |
1841  |->vm_hotcall:			// Hot call counter underflow.
1842  |.if JIT
1843  |  orr CARG2, PC, #1
1844  |1:
1845  |.endif
1846  |  add TMP1, BASE, NARGS8:RC
1847  |   str PC, SAVE_PC
1848  |   mov CARG1, L
1849  |   sub RA, RA, BASE
1850  |  stp BASE, TMP1, L->base
1851  |  bl extern lj_dispatch_call		// (lua_State *L, const BCIns *pc)
1852  |  // Returns ASMFunction.
1853  |  ldp BASE, TMP1, L->base
1854  |   str xzr, SAVE_PC			// Invalidate for subsequent line hook.
1855  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1856  |  add RA, BASE, RA
1857  |  sub NARGS8:RC, TMP1, BASE
1858  |   ldr INSw, [PC, #-4]
1859  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1860  |  br CRET1
1861  |
1862  |->cont_stitch:			// Trace stitching.
1863  |.if JIT
1864  |  // RA = resultptr, CARG4 = meta base
1865  |   ldr RBw, SAVE_MULTRES
1866  |  ldr INSw, [PC, #-4]
1867  |    ldr TRACE:CARG3, [CARG4, #-40]	// Save previous trace.
1868  |   subs RB, RB, #8
1869  |  decode_RA RC, INS			// Call base.
1870  |    and CARG3, CARG3, #LJ_GCVMASK
1871  |   beq >2
1872  |1:  // Move results down.
1873  |  ldr CARG1, [RA]
1874  |    add RA, RA, #8
1875  |   subs RB, RB, #8
1876  |  str CARG1, [BASE, RC, lsl #3]
1877  |    add RC, RC, #1
1878  |   bne <1
1879  |2:
1880  |   decode_RA RA, INS
1881  |   decode_RB RB, INS
1882  |   add RA, RA, RB
1883  |3:
1884  |   cmp RA, RC
1885  |   bhi >9				// More results wanted?
1886  |
1887  |  ldrh RAw, TRACE:CARG3->traceno
1888  |  ldrh RCw, TRACE:CARG3->link
1889  |  cmp RCw, RAw
1890  |  beq ->cont_nop			// Blacklisted.
1891  |  cmp RCw, #0
1892  |  bne =>BC_JLOOP			// Jump to stitched trace.
1893  |
1894  |  // Stitch a new trace to the previous trace.
1895  |  mov CARG1, #GL_J(exitno)
1896  |  str RAw, [GL, CARG1]
1897  |  mov CARG1, #GL_J(L)
1898  |  str L, [GL, CARG1]
1899  |  str BASE, L->base
1900  |  add CARG1, GL, #GG_G2J
1901  |  mov CARG2, PC
1902  |  bl extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
1903  |  ldr BASE, L->base
1904  |  b ->cont_nop
1905  |
1906  |9:  // Fill up results with nil.
1907  |  str TISNIL, [BASE, RC, lsl #3]
1908  |  add RC, RC, #1
1909  |  b <3
1910  |.endif
1911  |
1912  |->vm_profhook:			// Dispatch target for profiler hook.
1913#if LJ_HASPROFILE
1914  |  mov CARG1, L
1915  |   str BASE, L->base
1916  |  mov CARG2, PC
1917  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
1918  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
1919  |  ldr BASE, L->base
1920  |  sub PC, PC, #4
1921  |  b ->cont_nop
1922#endif
1923  |
1924  |//-----------------------------------------------------------------------
1925  |//-- Trace exit handler -------------------------------------------------
1926  |//-----------------------------------------------------------------------
1927  |
1928  |.macro savex_, a, b
1929  |  stp d..a, d..b, [sp, #a*8]
1930  |  stp x..a, x..b, [sp, #32*8+a*8]
1931  |.endmacro
1932  |
1933  |->vm_exit_handler:
1934  |.if JIT
1935  |  sub     sp, sp, #(64*8)
1936  |  savex_, 0, 1
1937  |  savex_, 2, 3
1938  |  savex_, 4, 5
1939  |  savex_, 6, 7
1940  |  savex_, 8, 9
1941  |  savex_, 10, 11
1942  |  savex_, 12, 13
1943  |  savex_, 14, 15
1944  |  savex_, 16, 17
1945  |  savex_, 18, 19
1946  |  savex_, 20, 21
1947  |  savex_, 22, 23
1948  |  savex_, 24, 25
1949  |  savex_, 26, 27
1950  |  savex_, 28, 29
1951  |  stp d30, d31, [sp, #30*8]
1952  |  ldr CARG1, [sp, #64*8]	// Load original value of lr.
1953  |   add CARG3, sp, #64*8	// Recompute original value of sp.
1954  |    mv_vmstate CARG4w, EXIT
1955  |   stp xzr, CARG3, [sp, #62*8]	// Store 0/sp in RID_LR/RID_SP.
1956  |  sub CARG1, CARG1, lr
1957  |   ldr L, GL->cur_L
1958  |  lsr CARG1, CARG1, #2
1959  |   ldr BASE, GL->jit_base
1960  |  sub CARG1, CARG1, #2
1961  |   ldr CARG2w, [lr]		// Load trace number.
1962  |    st_vmstate CARG4w
1963  |.if ENDIAN_BE
1964  |   rev32 CARG2, CARG2
1965  |.endif
1966  |   str BASE, L->base
1967  |  ubfx CARG2w, CARG2w, #5, #16
1968  |  str CARG1w, [GL, #GL_J(exitno)]
1969  |   str CARG2w, [GL, #GL_J(parent)]
1970  |   str L, [GL, #GL_J(L)]
1971  |  str xzr, GL->jit_base
1972  |  add CARG1, GL, #GG_G2J
1973  |  mov CARG2, sp
1974  |  bl extern lj_trace_exit		// (jit_State *J, ExitState *ex)
1975  |  // Returns MULTRES (unscaled) or negated error code.
1976  |  ldr CARG2, L->cframe
1977  |   ldr BASE, L->base
1978  |  and sp, CARG2, #CFRAME_RAWMASK
1979  |   ldr PC, SAVE_PC			// Get SAVE_PC.
1980  |  str L, SAVE_L			// Set SAVE_L (on-trace resume/yield).
1981  |  b >1
1982  |.endif
1983  |
1984  |->vm_exit_interp:
1985  |  // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
1986  |.if JIT
1987  |  ldr L, SAVE_L
1988  |1:
1989  |  cmp CARG1w, #0
1990  |  blt >9				// Check for error from exit.
1991  |   lsl RC, CARG1, #3
1992  |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
1993  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
1994  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1995  |    movn TISNIL, #0
1996  |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
1997  |   str RCw, SAVE_MULTRES
1998  |   str BASE, L->base
1999  |  ldr CARG2, LFUNC:CARG2->pc
2000  |   str xzr, GL->jit_base
2001  |    mv_vmstate CARG4w, INTERP
2002  |  ldr KBASE, [CARG2, #PC2PROTO(k)]
2003  |  // Modified copy of ins_next which handles function header dispatch, too.
2004  |  ldrb RBw, [PC, # OFS_OP]
2005  |   ldr INSw, [PC], #4
2006  |    st_vmstate CARG4w
2007  |  cmp RBw, #BC_FUNCC+2		// Fast function?
2008  |   add TMP1, GL, INS, uxtb #3
2009  |  bhs >4
2010  |2:
2011  |  cmp RBw, #BC_FUNCF			// Function header?
2012  |  add TMP0, GL, RB, uxtb #3
2013  |  ldr RB, [TMP0, #GG_G2DISP]
2014  |   decode_RA RA, INS
2015  |   lsr TMP0, INS, #16
2016  |   csel RC, TMP0, RC, lo
2017  |   blo >5
2018  |   ldr CARG3, [BASE, FRAME_FUNC]
2019  |   sub RC, RC, #8
2020  |   add RA, BASE, RA, lsl #3	// Yes: RA = BASE+framesize*8, RC = nargs*8
2021  |   and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2022  |5:
2023  |  br RB
2024  |
2025  |4:  // Check frame below fast function.
2026  |  ldr CARG1, [BASE, FRAME_PC]
2027  |  ands CARG2, CARG1, #FRAME_TYPE
2028  |  bne <2			// Trace stitching continuation?
2029  |  // Otherwise set KBASE for Lua function below fast function.
2030  |  ldr CARG3w, [CARG1, #-4]
2031  |  decode_RA CARG1, CARG3
2032  |  sub CARG2, BASE, CARG1, lsl #3
2033  |  ldr LFUNC:CARG3, [CARG2, #-32]
2034  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2035  |  ldr CARG3, LFUNC:CARG3->pc
2036  |  ldr KBASE, [CARG3, #PC2PROTO(k)]
2037  |  b <2
2038  |
2039  |9:  // Rethrow error from the right C frame.
2040  |  neg CARG2w, CARG1w
2041  |  mov CARG1, L
2042  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
2043  |.endif
2044  |
2045  |//-----------------------------------------------------------------------
2046  |//-- Math helper functions ----------------------------------------------
2047  |//-----------------------------------------------------------------------
2048  |
2049  |  // int lj_vm_modi(int dividend, int divisor);
2050  |->vm_modi:
2051  |    eor CARG4w, CARG1w, CARG2w
2052  |    cmp CARG4w, #0
2053  |  eor CARG3w, CARG1w, CARG1w, asr #31
2054  |   eor CARG4w, CARG2w, CARG2w, asr #31
2055  |  sub CARG3w, CARG3w, CARG1w, asr #31
2056  |   sub CARG4w, CARG4w, CARG2w, asr #31
2057  |  udiv CARG1w, CARG3w, CARG4w
2058  |  msub CARG1w, CARG1w, CARG4w, CARG3w
2059  |    ccmp CARG1w, #0, #4, mi
2060  |    sub CARG3w, CARG1w, CARG4w
2061  |    csel CARG1w, CARG1w, CARG3w, eq
2062  |  eor CARG3w, CARG1w, CARG2w
2063  |  cmp CARG3w, #0
2064  |  cneg CARG1w, CARG1w, mi
2065  |  ret
2066  |
2067  |//-----------------------------------------------------------------------
2068  |//-- Miscellaneous functions --------------------------------------------
2069  |//-----------------------------------------------------------------------
2070  |
2071  |.define NEXT_TAB,		TAB:CARG1
2072  |.define NEXT_RES,		CARG1
2073  |.define NEXT_IDX,		CARG2w
2074  |.define NEXT_LIM,		CARG3w
2075  |.define NEXT_TMP0,		TMP0
2076  |.define NEXT_TMP0w,		TMP0w
2077  |.define NEXT_TMP1,		TMP1
2078  |.define NEXT_TMP1w,		TMP1w
2079  |.define NEXT_RES_PTR,	sp
2080  |.define NEXT_RES_VAL,	[sp]
2081  |.define NEXT_RES_KEY,	[sp, #8]
2082  |
2083  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2084  |// Next idx returned in CRET2w.
2085  |->vm_next:
2086  |.if JIT
2087  |  ldr NEXT_LIM, NEXT_TAB->asize
2088  |   ldr NEXT_TMP1, NEXT_TAB->array
2089  |1:  // Traverse array part.
2090  |  subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM
2091  |  bhs >5				// Index points after array part?
2092  |  ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3]
2093  |  cmn NEXT_TMP0, #-LJ_TNIL
2094  |   cinc NEXT_IDX, NEXT_IDX, eq
2095  |  beq <1				// Skip holes in array part.
2096  |  str NEXT_TMP0, NEXT_RES_VAL
2097  |   movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16
2098  |   stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY
2099  |  add NEXT_IDX, NEXT_IDX, #1
2100  |  mov NEXT_RES, NEXT_RES_PTR
2101  |4:
2102  |  ret
2103  |
2104  |5:  // Traverse hash part.
2105  |  ldr NEXT_TMP1w, NEXT_TAB->hmask
2106  |   ldr NODE:NEXT_RES, NEXT_TAB->node
2107  |   add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1
2108  |  add NEXT_LIM, NEXT_LIM, NEXT_TMP1w
2109  |   add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3
2110  |6:
2111  |  cmp NEXT_IDX, NEXT_LIM
2112  |  bhi >9
2113  |  ldr NEXT_TMP0, NODE:NEXT_RES->val
2114  |  cmn NEXT_TMP0, #-LJ_TNIL
2115  |   add NEXT_IDX, NEXT_IDX, #1
2116  |  bne <4
2117  |  // Skip holes in hash part.
2118  |  add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node)
2119  |  b <6
2120  |
2121  |9:  // End of iteration. Set the key to nil (not the value).
2122  |  movn NEXT_TMP0, #0
2123  |  str NEXT_TMP0, NEXT_RES_KEY
2124  |  mov NEXT_RES, NEXT_RES_PTR
2125  |  ret
2126  |.endif
2127  |
2128  |//-----------------------------------------------------------------------
2129  |//-- FFI helper functions -----------------------------------------------
2130  |//-----------------------------------------------------------------------
2131  |
2132  |// Handler for callback functions.
2133  |// Saveregs already performed. Callback slot number in [sp], g in r12.
2134  |->vm_ffi_callback:
2135  |.if FFI
2136  |.type CTSTATE, CTState, PC
2137  |  saveregs
2138  |  ldr CTSTATE, GL:x10->ctype_state
2139  |  mov GL, x10
2140  |    add x10, sp, # CFRAME_SPACE
2141  |  str w9, CTSTATE->cb.slot
2142  |  stp x0, x1, CTSTATE->cb.gpr[0]
2143  |   stp d0, d1, CTSTATE->cb.fpr[0]
2144  |  stp x2, x3, CTSTATE->cb.gpr[2]
2145  |   stp d2, d3, CTSTATE->cb.fpr[2]
2146  |  stp x4, x5, CTSTATE->cb.gpr[4]
2147  |   stp d4, d5, CTSTATE->cb.fpr[4]
2148  |  stp x6, x7, CTSTATE->cb.gpr[6]
2149  |   stp d6, d7, CTSTATE->cb.fpr[6]
2150  |    str x10, CTSTATE->cb.stack
2151  |  mov CARG1, CTSTATE
2152  |   str CTSTATE, SAVE_PC		// Any value outside of bytecode is ok.
2153  |  mov CARG2, sp
2154  |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
2155  |  // Returns lua_State *.
2156  |  ldp BASE, RC, L:CRET1->base
2157  |   movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
2158  |   movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
2159  |   movn TISNIL, #0
2160  |   mov L, CRET1
2161  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2162  |  sub RC, RC, BASE
2163  |   st_vmstate ST_INTERP
2164  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2165  |  ins_callt
2166  |.endif
2167  |
2168  |->cont_ffi_callback:			// Return from FFI callback.
2169  |.if FFI
2170  |  ldr CTSTATE, GL->ctype_state
2171  |   stp BASE, CARG4, L->base
2172  |  str L, CTSTATE->L
2173  |  mov CARG1, CTSTATE
2174  |  mov CARG2, RA
2175  |  bl extern lj_ccallback_leave       // (CTState *cts, TValue *o)
2176  |  ldp x0, x1, CTSTATE->cb.gpr[0]
2177  |   ldp d0, d1, CTSTATE->cb.fpr[0]
2178  |  b ->vm_leave_unw
2179  |.endif
2180  |
2181  |->vm_ffi_call:			// Call C function via FFI.
2182  |  // Caveat: needs special frame unwinding, see below.
2183  |.if FFI
2184  |  .type CCSTATE, CCallState, x19
2185  |  stp x20, CCSTATE, [sp, #-32]!
2186  |  stp fp, lr, [sp, #16]
2187  |  add fp, sp, #16
2188  |  mov CCSTATE, x0
2189  |  ldr TMP0w, CCSTATE:x0->spadj
2190  |   ldrb TMP1w, CCSTATE->nsp
2191  |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
2192  |   subs TMP1, TMP1, #1
2193  |    ldr TMP3, CCSTATE->func
2194  |  sub sp, sp, TMP0
2195  |   bmi >2
2196  |1:  // Copy stack slots
2197  |  ldr TMP0, [TMP2, TMP1, lsl #3]
2198  |  str TMP0, [sp, TMP1, lsl #3]
2199  |  subs TMP1, TMP1, #1
2200  |  bpl <1
2201  |2:
2202  |  ldp x0, x1, CCSTATE->gpr[0]
2203  |   ldp d0, d1, CCSTATE->fpr[0]
2204  |  ldp x2, x3, CCSTATE->gpr[2]
2205  |   ldp d2, d3, CCSTATE->fpr[2]
2206  |  ldp x4, x5, CCSTATE->gpr[4]
2207  |   ldp d4, d5, CCSTATE->fpr[4]
2208  |  ldp x6, x7, CCSTATE->gpr[6]
2209  |   ldp d6, d7, CCSTATE->fpr[6]
2210  |  ldr x8, CCSTATE->retp
2211  |  blr TMP3
2212  |  sub sp, fp, #16
2213  |  stp x0, x1, CCSTATE->gpr[0]
2214  |   stp d0, d1, CCSTATE->fpr[0]
2215  |   stp d2, d3, CCSTATE->fpr[2]
2216  |  ldp fp, lr, [sp, #16]
2217  |  ldp x20, CCSTATE, [sp], #32
2218  |  ret
2219  |.endif
2220  |// Note: vm_ffi_call must be the last function in this object file!
2221  |
2222  |//-----------------------------------------------------------------------
2223}
2224
2225/* Generate the code for a single instruction. */
2226static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2227{
2228  int vk = 0;
2229  |=>defop:
2230
2231  switch (op) {
2232
2233  /* -- Comparison ops ---------------------------------------------------- */
2234
2235  /* Remember: all ops branch for a true comparison, fall through otherwise. */
2236
2237  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2238    |  // RA = src1, RC = src2, JMP with RC = target
2239    |  ldr CARG1, [BASE, RA, lsl #3]
2240    |    ldrh RBw, [PC, # OFS_RD]
2241    |   ldr CARG2, [BASE, RC, lsl #3]
2242    |    add PC, PC, #4
2243    |    add RB, PC, RB, lsl #2
2244    |    sub RB, RB, #0x20000
2245    |  checkint CARG1, >3
2246    |   checkint CARG2, >4
2247    |  cmp CARG1w, CARG2w
2248    if (op == BC_ISLT) {
2249      |  csel PC, RB, PC, lt
2250    } else if (op == BC_ISGE) {
2251      |  csel PC, RB, PC, ge
2252    } else if (op == BC_ISLE) {
2253      |  csel PC, RB, PC, le
2254    } else {
2255      |  csel PC, RB, PC, gt
2256    }
2257    |1:
2258    |  ins_next
2259    |
2260    |3:  // RA not int.
2261    |    ldr FARG1, [BASE, RA, lsl #3]
2262    |  blo ->vmeta_comp
2263    |    ldr FARG2, [BASE, RC, lsl #3]
2264    |   cmp TISNUMhi, CARG2, lsr #32
2265    |   bhi >5
2266    |   bne ->vmeta_comp
2267    |  // RA number, RC int.
2268    |  scvtf FARG2, CARG2w
2269    |  b >5
2270    |
2271    |4:  // RA int, RC not int
2272    |    ldr FARG2, [BASE, RC, lsl #3]
2273    |   blo ->vmeta_comp
2274    |  // RA int, RC number.
2275    |  scvtf FARG1, CARG1w
2276    |
2277    |5:  // RA number, RC number
2278    |  fcmp FARG1, FARG2
2279    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2280    if (op == BC_ISLT) {
2281      |  csel PC, RB, PC, lo
2282    } else if (op == BC_ISGE) {
2283      |  csel PC, RB, PC, hs
2284    } else if (op == BC_ISLE) {
2285      |  csel PC, RB, PC, ls
2286    } else {
2287      |  csel PC, RB, PC, hi
2288    }
2289    |  b <1
2290    break;
2291
2292  case BC_ISEQV: case BC_ISNEV:
2293    vk = op == BC_ISEQV;
2294    |  // RA = src1, RC = src2, JMP with RC = target
2295    |  ldr CARG1, [BASE, RA, lsl #3]
2296    |   add RC, BASE, RC, lsl #3
2297    |    ldrh RBw, [PC, # OFS_RD]
2298    |   ldr CARG3, [RC]
2299    |    add PC, PC, #4
2300    |    add RB, PC, RB, lsl #2
2301    |    sub RB, RB, #0x20000
2302    |  asr ITYPE, CARG3, #47
2303    |  cmn ITYPE, #-LJ_TISNUM
2304    if (vk) {
2305      |  bls ->BC_ISEQN_Z
2306    } else {
2307      |  bls ->BC_ISNEN_Z
2308    }
2309    |  // RC is not a number.
2310    |   asr TMP0, CARG1, #47
2311    |.if FFI
2312    |  // Check if RC or RA is a cdata.
2313    |  cmn ITYPE, #-LJ_TCDATA
2314    |   ccmn TMP0, #-LJ_TCDATA, #4, ne
2315    |  beq ->vmeta_equal_cd
2316    |.endif
2317    |  cmp CARG1, CARG3
2318    |  bne >2
2319    |  // Tag and value are equal.
2320    if (vk) {
2321      |->BC_ISEQV_Z:
2322      |  mov PC, RB			// Perform branch.
2323    }
2324    |1:
2325    |  ins_next
2326    |
2327    |2:  // Check if the tags are the same and it's a table or userdata.
2328    |  cmp ITYPE, TMP0
2329    |  ccmn ITYPE, #-LJ_TISTABUD, #2, eq
2330    if (vk) {
2331      |  bhi <1
2332    } else {
2333      |  bhi ->BC_ISEQV_Z		// Reuse code from opposite instruction.
2334    }
2335    |  // Different tables or userdatas. Need to check __eq metamethod.
2336    |  // Field metatable must be at same offset for GCtab and GCudata!
2337    |  and TAB:CARG2, CARG1, #LJ_GCVMASK
2338    |  ldr TAB:TMP2, TAB:CARG2->metatable
2339    if (vk) {
2340      |  cbz TAB:TMP2, <1		// No metatable?
2341      |  ldrb TMP1w, TAB:TMP2->nomm
2342      |   mov CARG4, #0			// ne = 0
2343      |  tbnz TMP1w, #MM_eq, <1		// 'no __eq' flag set: done.
2344    } else {
2345      |  cbz TAB:TMP2, ->BC_ISEQV_Z	// No metatable?
2346      |  ldrb TMP1w, TAB:TMP2->nomm
2347      |   mov CARG4, #1			// ne = 1.
2348      |  tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z	// 'no __eq' flag set: done.
2349    }
2350    |  b ->vmeta_equal
2351    break;
2352
2353  case BC_ISEQS: case BC_ISNES:
2354    vk = op == BC_ISEQS;
2355    |  // RA = src, RC = str_const (~), JMP with RC = target
2356    |  ldr CARG1, [BASE, RA, lsl #3]
2357    |   mvn RC, RC
2358    |    ldrh RBw, [PC, # OFS_RD]
2359    |   ldr CARG2, [KBASE, RC, lsl #3]
2360    |    add PC, PC, #4
2361    |   movn TMP0, #~LJ_TSTR
2362    |.if FFI
2363    |  asr ITYPE, CARG1, #47
2364    |.endif
2365    |    add RB, PC, RB, lsl #2
2366    |   add CARG2, CARG2, TMP0, lsl #47
2367    |    sub RB, RB, #0x20000
2368    |.if FFI
2369    |  cmn ITYPE, #-LJ_TCDATA
2370    |  beq ->vmeta_equal_cd
2371    |.endif
2372    |  cmp CARG1, CARG2
2373    if (vk) {
2374      |  csel PC, RB, PC, eq
2375    } else {
2376      |  csel PC, RB, PC, ne
2377    }
2378    |  ins_next
2379    break;
2380
2381  case BC_ISEQN: case BC_ISNEN:
2382    vk = op == BC_ISEQN;
2383    |  // RA = src, RC = num_const (~), JMP with RC = target
2384    |  ldr CARG1, [BASE, RA, lsl #3]
2385    |   add RC, KBASE, RC, lsl #3
2386    |    ldrh RBw, [PC, # OFS_RD]
2387    |   ldr CARG3, [RC]
2388    |    add PC, PC, #4
2389    |    add RB, PC, RB, lsl #2
2390    |    sub RB, RB, #0x20000
2391    if (vk) {
2392      |->BC_ISEQN_Z:
2393    } else {
2394      |->BC_ISNEN_Z:
2395    }
2396    |  checkint CARG1, >4
2397    |   checkint CARG3, >6
2398    |  cmp CARG1w, CARG3w
2399    |1:
2400    if (vk) {
2401      |  csel PC, RB, PC, eq
2402      |2:
2403    } else {
2404      |2:
2405      |  csel PC, RB, PC, ne
2406    }
2407    |3:
2408    |  ins_next
2409    |
2410    |4:  // RA not int.
2411    |.if FFI
2412    |  blo >7
2413    |.else
2414    |  blo <2
2415    |.endif
2416    |    ldr FARG1, [BASE, RA, lsl #3]
2417    |    ldr FARG2, [RC]
2418    |   cmp TISNUMhi, CARG3, lsr #32
2419    |   bne >5
2420    |  // RA number, RC int.
2421    |  scvtf FARG2, CARG3w
2422    |5:
2423    |  // RA number, RC number.
2424    |  fcmp FARG1, FARG2
2425    |  b <1
2426    |
2427    |6:  // RA int, RC number
2428    |  ldr FARG2, [RC]
2429    |  scvtf FARG1, CARG1w
2430    |  fcmp FARG1, FARG2
2431    |  b <1
2432    |
2433    |.if FFI
2434    |7:
2435    |  asr ITYPE, CARG1, #47
2436    |  cmn ITYPE, #-LJ_TCDATA
2437    |  bne <2
2438    |  b ->vmeta_equal_cd
2439    |.endif
2440    break;
2441
2442  case BC_ISEQP: case BC_ISNEP:
2443    vk = op == BC_ISEQP;
2444    |  // RA = src, RC = primitive_type (~), JMP with RC = target
2445    |  ldr TMP0, [BASE, RA, lsl #3]
2446    |   ldrh RBw, [PC, # OFS_RD]
2447    |   add PC, PC, #4
2448    |  add RC, RC, #1
2449    |   add RB, PC, RB, lsl #2
2450    |.if FFI
2451    |  asr ITYPE, TMP0, #47
2452    |  cmn ITYPE, #-LJ_TCDATA
2453    |  beq ->vmeta_equal_cd
2454    |  cmn RC, ITYPE
2455    |.else
2456    |  cmn RC, TMP0, asr #47
2457    |.endif
2458    |   sub RB, RB, #0x20000
2459    if (vk) {
2460      |  csel PC, RB, PC, eq
2461    } else {
2462      |  csel PC, RB, PC, ne
2463    }
2464    |  ins_next
2465    break;
2466
2467  /* -- Unary test and copy ops ------------------------------------------- */
2468
2469  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2470    |  // RA = dst or unused, RC = src, JMP with RC = target
2471    |   ldrh RBw, [PC, # OFS_RD]
2472    |  ldr TMP0, [BASE, RC, lsl #3]
2473    |   add PC, PC, #4
2474    |  mov_false TMP1
2475    |   add RB, PC, RB, lsl #2
2476    |  cmp TMP0, TMP1
2477    |   sub RB, RB, #0x20000
2478    if (op == BC_ISTC || op == BC_IST) {
2479      if (op == BC_ISTC) {
2480	|  csel RA, RA, RC, lo
2481      }
2482      |  csel PC, RB, PC, lo
2483    } else {
2484      if (op == BC_ISFC) {
2485	|  csel RA, RA, RC, hs
2486      }
2487      |  csel PC, RB, PC, hs
2488    }
2489    if (op == BC_ISTC || op == BC_ISFC) {
2490      |  str TMP0, [BASE, RA, lsl #3]
2491    }
2492    |  ins_next
2493    break;
2494
2495  case BC_ISTYPE:
2496    |  // RA = src, RC = -type
2497    |  ldr TMP0, [BASE, RA, lsl #3]
2498    |  cmn RC, TMP0, asr #47
2499    |  bne ->vmeta_istype
2500    |  ins_next
2501    break;
2502  case BC_ISNUM:
2503    |  // RA = src, RC = -(TISNUM-1)
2504    |  ldr TMP0, [BASE, RA]
2505    |  checknum TMP0, ->vmeta_istype
2506    |  ins_next
2507    break;
2508
2509  /* -- Unary ops --------------------------------------------------------- */
2510
2511  case BC_MOV:
2512    |  // RA = dst, RC = src
2513    |  ldr TMP0, [BASE, RC, lsl #3]
2514    |  str TMP0, [BASE, RA, lsl #3]
2515    |  ins_next
2516    break;
2517  case BC_NOT:
2518    |  // RA = dst, RC = src
2519    |  ldr TMP0, [BASE, RC, lsl #3]
2520    |   mov_false TMP1
2521    |   mov_true TMP2
2522    |  cmp TMP0, TMP1
2523    |  csel TMP0, TMP1, TMP2, lo
2524    |  str TMP0, [BASE, RA, lsl #3]
2525    |  ins_next
2526    break;
2527  case BC_UNM:
2528    |  // RA = dst, RC = src
2529    |  ldr TMP0, [BASE, RC, lsl #3]
2530    |  asr ITYPE, TMP0, #47
2531    |  cmn ITYPE, #-LJ_TISNUM
2532    |  bhi ->vmeta_unm
2533    |  eor TMP0, TMP0, #U64x(80000000,00000000)
2534    |  bne >5
2535    |  negs TMP0w, TMP0w
2536    |   movz CARG3, #0x41e0, lsl #48	// 2^31.
2537    |   add TMP0, TMP0, TISNUM
2538    |  csel TMP0, TMP0, CARG3, vc
2539    |5:
2540    |  str TMP0, [BASE, RA, lsl #3]
2541    |  ins_next
2542    break;
2543  case BC_LEN:
2544    |  // RA = dst, RC = src
2545    |  ldr CARG1, [BASE, RC, lsl #3]
2546    |  asr ITYPE, CARG1, #47
2547    |  cmn ITYPE, #-LJ_TSTR
2548    |   and CARG1, CARG1, #LJ_GCVMASK
2549    |  bne >2
2550    |  ldr CARG1w, STR:CARG1->len
2551    |1:
2552    |  add CARG1, CARG1, TISNUM
2553    |  str CARG1, [BASE, RA, lsl #3]
2554    |  ins_next
2555    |
2556    |2:
2557    |  cmn ITYPE, #-LJ_TTAB
2558    |  bne ->vmeta_len
2559#if LJ_52
2560    |  ldr TAB:CARG2, TAB:CARG1->metatable
2561    |  cbnz TAB:CARG2, >9
2562    |3:
2563#endif
2564    |->BC_LEN_Z:
2565    |  bl extern lj_tab_len		// (GCtab *t)
2566    |  // Returns uint32_t (but less than 2^31).
2567    |  b <1
2568    |
2569#if LJ_52
2570    |9:
2571    |  ldrb TMP1w, TAB:CARG2->nomm
2572    |  tbnz TMP1w, #MM_len, <3		// 'no __len' flag set: done.
2573    |  b ->vmeta_len
2574#endif
2575    break;
2576
2577  /* -- Binary ops -------------------------------------------------------- */
2578
2579    |.macro ins_arithcheck_int, target
2580    |  checkint CARG1, target
2581    |  checkint CARG2, target
2582    |.endmacro
2583    |
2584    |.macro ins_arithcheck_num, target
2585    |  checknum CARG1, target
2586    |  checknum CARG2, target
2587    |.endmacro
2588    |
2589    |.macro ins_arithcheck_nzdiv, target
2590    |  cbz CARG2w, target
2591    |.endmacro
2592    |
2593    |.macro ins_arithhead
2594    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2595    ||if (vk == 1) {
2596    |   and RC, RC, #255
2597    |    decode_RB RB, INS
2598    ||} else {
2599    |   decode_RB RB, INS
2600    |    and RC, RC, #255
2601    ||}
2602    |.endmacro
2603    |
2604    |.macro ins_arithload, reg1, reg2
2605    |  // RA = dst, RB = src1, RC = src2 | num_const
2606    ||switch (vk) {
2607    ||case 0:
2608    |   ldr reg1, [BASE, RB, lsl #3]
2609    |    ldr reg2, [KBASE, RC, lsl #3]
2610    ||  break;
2611    ||case 1:
2612    |   ldr reg1, [KBASE, RC, lsl #3]
2613    |    ldr reg2, [BASE, RB, lsl #3]
2614    ||  break;
2615    ||default:
2616    |   ldr reg1, [BASE, RB, lsl #3]
2617    |    ldr reg2, [BASE, RC, lsl #3]
2618    ||  break;
2619    ||}
2620    |.endmacro
2621    |
2622    |.macro ins_arithfallback, ins
2623    ||switch (vk) {
2624    ||case 0:
2625    |   ins ->vmeta_arith_vn
2626    ||  break;
2627    ||case 1:
2628    |   ins ->vmeta_arith_nv
2629    ||  break;
2630    ||default:
2631    |   ins ->vmeta_arith_vv
2632    ||  break;
2633    ||}
2634    |.endmacro
2635    |
2636    |.macro ins_arithmod, res, reg1, reg2
2637    |  fdiv d2, reg1, reg2
2638    |  frintm d2, d2
2639    |  fmsub res, d2, reg2, reg1
2640    |.endmacro
2641    |
2642    |.macro ins_arithdn, intins, fpins
2643    |  ins_arithhead
2644    |  ins_arithload CARG1, CARG2
2645    |  ins_arithcheck_int >5
2646    |.if "intins" == "smull"
2647    |  smull CARG1, CARG1w, CARG2w
2648    |  cmp CARG1, CARG1, sxtw
2649    |   mov CARG1w, CARG1w
2650    |  ins_arithfallback bne
2651    |.elif "intins" == "ins_arithmodi"
2652    |  ins_arithfallback ins_arithcheck_nzdiv
2653    |  bl ->vm_modi
2654    |.else
2655    |  intins CARG1w, CARG1w, CARG2w
2656    |  ins_arithfallback bvs
2657    |.endif
2658    |  add CARG1, CARG1, TISNUM
2659    |  str CARG1, [BASE, RA, lsl #3]
2660    |4:
2661    |  ins_next
2662    |
2663    |5:  // FP variant.
2664    |  ins_arithload FARG1, FARG2
2665    |  ins_arithfallback ins_arithcheck_num
2666    |  fpins FARG1, FARG1, FARG2
2667    |  str FARG1, [BASE, RA, lsl #3]
2668    |  b <4
2669    |.endmacro
2670    |
2671    |.macro ins_arithfp, fpins
2672    |  ins_arithhead
2673    |  ins_arithload CARG1, CARG2
2674    |  ins_arithload FARG1, FARG2
2675    |  ins_arithfallback ins_arithcheck_num
2676    |.if "fpins" == "fpow"
2677    |  bl extern pow
2678    |.else
2679    |  fpins FARG1, FARG1, FARG2
2680    |.endif
2681    |  str FARG1, [BASE, RA, lsl #3]
2682    |  ins_next
2683    |.endmacro
2684
2685  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2686    |  ins_arithdn adds, fadd
2687    break;
2688  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2689    |  ins_arithdn subs, fsub
2690    break;
2691  case BC_MULVN: case BC_MULNV: case BC_MULVV:
2692    |  ins_arithdn smull, fmul
2693    break;
2694  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2695    |  ins_arithfp fdiv
2696    break;
2697  case BC_MODVN: case BC_MODNV: case BC_MODVV:
2698    |  ins_arithdn ins_arithmodi, ins_arithmod
2699    break;
2700  case BC_POW:
2701    |  // NYI: (partial) integer arithmetic.
2702    |  ins_arithfp fpow
2703    break;
2704
2705  case BC_CAT:
2706    |  decode_RB RB, INS
2707    |   and RC, RC, #255
2708    |  // RA = dst, RB = src_start, RC = src_end
2709    |   str BASE, L->base
2710    |  sub CARG3, RC, RB
2711    |  add CARG2, BASE, RC, lsl #3
2712    |->BC_CAT_Z:
2713    |  // RA = dst, CARG2 = top-1, CARG3 = left
2714    |  mov CARG1, L
2715    |   str PC, SAVE_PC
2716    |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
2717    |  // Returns NULL (finished) or TValue * (metamethod).
2718    |  ldrb RBw, [PC, #-4+OFS_RB]
2719    |   ldr BASE, L->base
2720    |   cbnz CRET1, ->vmeta_binop
2721    |  ldr TMP0, [BASE, RB, lsl #3]
2722    |  str TMP0, [BASE, RA, lsl #3]	// Copy result to RA.
2723    |  ins_next
2724    break;
2725
2726  /* -- Constant ops ------------------------------------------------------ */
2727
2728  case BC_KSTR:
2729    |  // RA = dst, RC = str_const (~)
2730    |  mvn RC, RC
2731    |  ldr TMP0, [KBASE, RC, lsl #3]
2732    |   movn TMP1, #~LJ_TSTR
2733    |  add TMP0, TMP0, TMP1, lsl #47
2734    |  str TMP0, [BASE, RA, lsl #3]
2735    |  ins_next
2736    break;
2737  case BC_KCDATA:
2738    |.if FFI
2739    |  // RA = dst, RC = cdata_const (~)
2740    |  mvn RC, RC
2741    |  ldr TMP0, [KBASE, RC, lsl #3]
2742    |   movn TMP1, #~LJ_TCDATA
2743    |  add TMP0, TMP0, TMP1, lsl #47
2744    |  str TMP0, [BASE, RA, lsl #3]
2745    |  ins_next
2746    |.endif
2747    break;
2748  case BC_KSHORT:
2749    |  // RA = dst, RC = int16_literal
2750    |  sxth RCw, RCw
2751    |  add TMP0, RC, TISNUM
2752    |  str TMP0, [BASE, RA, lsl #3]
2753    |  ins_next
2754    break;
2755  case BC_KNUM:
2756    |  // RA = dst, RC = num_const
2757    |  ldr TMP0, [KBASE, RC, lsl #3]
2758    |  str TMP0, [BASE, RA, lsl #3]
2759    |  ins_next
2760    break;
2761  case BC_KPRI:
2762    |  // RA = dst, RC = primitive_type (~)
2763    |  mvn TMP0, RC, lsl #47
2764    |  str TMP0, [BASE, RA, lsl #3]
2765    |  ins_next
2766    break;
2767  case BC_KNIL:
2768    |  // RA = base, RC = end
2769    |  add RA, BASE, RA, lsl #3
2770    |   add RC, BASE, RC, lsl #3
2771    |  str TISNIL, [RA], #8
2772    |1:
2773    |   cmp RA, RC
2774    |  str TISNIL, [RA], #8
2775    |   blt <1
2776    |  ins_next_
2777    break;
2778
2779  /* -- Upvalue and function ops ------------------------------------------ */
2780
2781  case BC_UGET:
2782    |  // RA = dst, RC = uvnum
2783    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2784    |   add RC, RC, #offsetof(GCfuncL, uvptr)/8
2785    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2786    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
2787    |  ldr CARG2, UPVAL:CARG2->v
2788    |  ldr TMP0, [CARG2]
2789    |  str TMP0, [BASE, RA, lsl #3]
2790    |  ins_next
2791    break;
2792  case BC_USETV:
2793    |  // RA = uvnum, RC = src
2794    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2795    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
2796    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2797    |  ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2798    |   ldr CARG3, [BASE, RC, lsl #3]
2799    |    ldr CARG2, UPVAL:CARG1->v
2800    |  ldrb TMP2w, UPVAL:CARG1->marked
2801    |  ldrb TMP0w, UPVAL:CARG1->closed
2802    |    asr ITYPE, CARG3, #47
2803    |   str CARG3, [CARG2]
2804    |    add ITYPE, ITYPE, #-LJ_TISGCV
2805    |  tst TMP2w, #LJ_GC_BLACK		// isblack(uv)
2806    |  ccmp TMP0w, #0, #4, ne		// && uv->closed
2807    |    ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne	// && tvisgcv(v)
2808    |  bhi >2
2809    |1:
2810    |  ins_next
2811    |
2812    |2:  // Check if new value is white.
2813    |  and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
2814    |  ldrb TMP1w, GCOBJ:CARG3->gch.marked
2815    |  tst TMP1w, #LJ_GC_WHITES		// iswhite(str)
2816    |  beq <1
2817    |  // Crossed a write barrier. Move the barrier forward.
2818    |  mov CARG1, GL
2819    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
2820    |  b <1
2821    break;
2822  case BC_USETS:
2823    |  // RA = uvnum, RC = str_const (~)
2824    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2825    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
2826    |    mvn RC, RC
2827    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2828    |  ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2829    |   ldr STR:CARG3, [KBASE, RC, lsl #3]
2830    |   movn TMP0, #~LJ_TSTR
2831    |    ldr CARG2, UPVAL:CARG1->v
2832    |  ldrb TMP2w, UPVAL:CARG1->marked
2833    |   add TMP0, STR:CARG3, TMP0, lsl #47
2834    |    ldrb TMP1w, STR:CARG3->marked
2835    |   str TMP0, [CARG2]
2836    |  tbnz TMP2w, #2, >2		// isblack(uv)
2837    |1:
2838    |  ins_next
2839    |
2840    |2:  // Check if string is white and ensure upvalue is closed.
2841    |  ldrb TMP0w, UPVAL:CARG1->closed
2842    |    tst TMP1w, #LJ_GC_WHITES	// iswhite(str)
2843    |  ccmp TMP0w, #0, #4, ne
2844    |  beq <1
2845    |  // Crossed a write barrier. Move the barrier forward.
2846    |  mov CARG1, GL
2847    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
2848    |  b <1
2849    break;
2850  case BC_USETN:
2851    |  // RA = uvnum, RC = num_const
2852    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2853    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
2854    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2855    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2856    |   ldr TMP0, [KBASE, RC, lsl #3]
2857    |  ldr CARG2, UPVAL:CARG2->v
2858    |   str TMP0, [CARG2]
2859    |  ins_next
2860    break;
2861  case BC_USETP:
2862    |  // RA = uvnum, RC = primitive_type (~)
2863    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2864    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
2865    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2866    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2867    |   mvn TMP0, RC, lsl #47
2868    |  ldr CARG2, UPVAL:CARG2->v
2869    |   str TMP0, [CARG2]
2870    |  ins_next
2871    break;
2872
2873  case BC_UCLO:
2874    |  // RA = level, RC = target
2875    |  ldr CARG3, L->openupval
2876    |   add RC, PC, RC, lsl #2
2877    |    str BASE, L->base
2878    |   sub PC, RC, #0x20000
2879    |  cbz CARG3, >1
2880    |  mov CARG1, L
2881    |  add CARG2, BASE, RA, lsl #3
2882    |  bl extern lj_func_closeuv	// (lua_State *L, TValue *level)
2883    |  ldr BASE, L->base
2884    |1:
2885    |  ins_next
2886    break;
2887
2888  case BC_FNEW:
2889    |  // RA = dst, RC = proto_const (~) (holding function prototype)
2890    |  mvn RC, RC
2891    |   str BASE, L->base
2892    |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2893    |    str PC, SAVE_PC
2894    |   ldr CARG2, [KBASE, RC, lsl #3]
2895    |    mov CARG1, L
2896    |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2897    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
2898    |  bl extern lj_func_newL_gc
2899    |  // Returns GCfuncL *.
2900    |  ldr BASE, L->base
2901    |   movn TMP0, #~LJ_TFUNC
2902    |   add CRET1, CRET1, TMP0, lsl #47
2903    |  str CRET1, [BASE, RA, lsl #3]
2904    |  ins_next
2905    break;
2906
2907  /* -- Table ops --------------------------------------------------------- */
2908
2909  case BC_TNEW:
2910  case BC_TDUP:
2911    |  // RA = dst, RC = (hbits|asize) | tab_const (~)
2912    |  ldp CARG3, CARG4, GL->gc.total	// Assumes threshold follows total.
2913    |   str BASE, L->base
2914    |   str PC, SAVE_PC
2915    |   mov CARG1, L
2916    |  cmp CARG3, CARG4
2917    |  bhs >5
2918    |1:
2919    if (op == BC_TNEW) {
2920      |  and CARG2, RC, #0x7ff
2921      |   lsr CARG3, RC, #11
2922      |  cmp CARG2, #0x7ff
2923      |  mov TMP0, #0x801
2924      |  csel CARG2, CARG2, TMP0, ne
2925      |  bl extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
2926      |  // Returns GCtab *.
2927    } else {
2928      |  mvn RC, RC
2929      |  ldr CARG2, [KBASE, RC, lsl #3]
2930      |  bl extern lj_tab_dup  // (lua_State *L, Table *kt)
2931      |  // Returns GCtab *.
2932    }
2933    |  ldr BASE, L->base
2934    |   movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
2935    |  str CRET1, [BASE, RA, lsl #3]
2936    |  ins_next
2937    |
2938    |5:
2939    |  bl extern lj_gc_step_fixtop  // (lua_State *L)
2940    |  mov CARG1, L
2941    |  b <1
2942    break;
2943
2944  case BC_GGET:
2945    |  // RA = dst, RC = str_const (~)
2946  case BC_GSET:
2947    |  // RA = src, RC = str_const (~)
2948    |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
2949    |   mvn RC, RC
2950    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
2951    |  ldr TAB:CARG2, LFUNC:CARG1->env
2952    |   ldr STR:RC, [KBASE, RC, lsl #3]
2953    if (op == BC_GGET) {
2954      |  b ->BC_TGETS_Z
2955    } else {
2956      |  b ->BC_TSETS_Z
2957    }
2958    break;
2959
2960  case BC_TGETV:
2961    |  decode_RB RB, INS
2962    |   and RC, RC, #255
2963    |  // RA = dst, RB = table, RC = key
2964    |  ldr CARG2, [BASE, RB, lsl #3]
2965    |   ldr TMP1, [BASE, RC, lsl #3]
2966    |  checktab CARG2, ->vmeta_tgetv
2967    |  checkint TMP1, >9		// Integer key?
2968    |  ldr CARG3, TAB:CARG2->array
2969    |   ldr CARG1w, TAB:CARG2->asize
2970    |  add CARG3, CARG3, TMP1, uxtw #3
2971    |   cmp TMP1w, CARG1w		// In array part?
2972    |   bhs ->vmeta_tgetv
2973    |  ldr TMP0, [CARG3]
2974    |  cmp TMP0, TISNIL
2975    |  beq >5
2976    |1:
2977    |  str TMP0, [BASE, RA, lsl #3]
2978    |  ins_next
2979    |
2980    |5:  // Check for __index if table value is nil.
2981    |  ldr TAB:CARG1, TAB:CARG2->metatable
2982    |  cbz TAB:CARG1, <1		// No metatable: done.
2983    |  ldrb TMP1w, TAB:CARG1->nomm
2984    |  tbnz TMP1w, #MM_index, <1	// 'no __index' flag set: done.
2985    |  b ->vmeta_tgetv
2986    |
2987    |9:
2988    |  asr ITYPE, TMP1, #47
2989    |  cmn ITYPE, #-LJ_TSTR		// String key?
2990    |  bne ->vmeta_tgetv
2991    |   and STR:RC, TMP1, #LJ_GCVMASK
2992    |  b ->BC_TGETS_Z
2993    break;
2994  case BC_TGETS:
2995    |  decode_RB RB, INS
2996    |   and RC, RC, #255
2997    |  // RA = dst, RB = table, RC = str_const (~)
2998    |  ldr CARG2, [BASE, RB, lsl #3]
2999    |   mvn RC, RC
3000    |   ldr STR:RC, [KBASE, RC, lsl #3]
3001    |  checktab CARG2, ->vmeta_tgets1
3002    |->BC_TGETS_Z:
3003    |  // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
3004    |  ldr TMP1w, TAB:CARG2->hmask
3005    |   ldr TMP2w, STR:RC->sid
3006    |    ldr NODE:CARG3, TAB:CARG2->node
3007    |  and TMP1w, TMP1w, TMP2w		// idx = str->sid & tab->hmask
3008    |  add TMP1, TMP1, TMP1, lsl #1
3009    |  movn CARG4, #~LJ_TSTR
3010    |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
3011    |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
3012    |1:
3013    |  ldp TMP0, CARG1, NODE:CARG3->val
3014    |   ldr NODE:CARG3, NODE:CARG3->next
3015    |  cmp CARG1, CARG4
3016    |  bne >4
3017    |  cmp TMP0, TISNIL
3018    |  beq >5
3019    |3:
3020    |  str TMP0, [BASE, RA, lsl #3]
3021    |  ins_next
3022    |
3023    |4:  // Follow hash chain.
3024    |  cbnz NODE:CARG3, <1
3025    |  // End of hash chain: key not found, nil result.
3026    |   mov TMP0, TISNIL
3027    |
3028    |5:  // Check for __index if table value is nil.
3029    |  ldr TAB:CARG1, TAB:CARG2->metatable
3030    |  cbz TAB:CARG1, <3		// No metatable: done.
3031    |  ldrb TMP1w, TAB:CARG1->nomm
3032    |  tbnz TMP1w, #MM_index, <3	// 'no __index' flag set: done.
3033    |  b ->vmeta_tgets
3034    break;
3035  case BC_TGETB:
3036    |  decode_RB RB, INS
3037    |   and RC, RC, #255
3038    |  // RA = dst, RB = table, RC = index
3039    |  ldr CARG2, [BASE, RB, lsl #3]
3040    |  checktab CARG2, ->vmeta_tgetb
3041    |  ldr CARG3, TAB:CARG2->array
3042    |   ldr CARG1w, TAB:CARG2->asize
3043    |  add CARG3, CARG3, RC, lsl #3
3044    |   cmp RCw, CARG1w			// In array part?
3045    |   bhs ->vmeta_tgetb
3046    |  ldr TMP0, [CARG3]
3047    |  cmp TMP0, TISNIL
3048    |  beq >5
3049    |1:
3050    |  str TMP0, [BASE, RA, lsl #3]
3051    |  ins_next
3052    |
3053    |5:  // Check for __index if table value is nil.
3054    |  ldr TAB:CARG1, TAB:CARG2->metatable
3055    |  cbz TAB:CARG1, <1		// No metatable: done.
3056    |  ldrb TMP1w, TAB:CARG1->nomm
3057    |  tbnz TMP1w, #MM_index, <1	// 'no __index' flag set: done.
3058    |  b ->vmeta_tgetb
3059    break;
3060  case BC_TGETR:
3061    |  decode_RB RB, INS
3062    |   and RC, RC, #255
3063    |  // RA = dst, RB = table, RC = key
3064    |  ldr CARG1, [BASE, RB, lsl #3]
3065    |   ldr TMP1, [BASE, RC, lsl #3]
3066    |  and TAB:CARG1, CARG1, #LJ_GCVMASK
3067    |  ldr CARG3, TAB:CARG1->array
3068    |   ldr TMP2w, TAB:CARG1->asize
3069    |  add CARG3, CARG3, TMP1w, uxtw #3
3070    |   cmp TMP1w, TMP2w		// In array part?
3071    |   bhs ->vmeta_tgetr
3072    |  ldr TMP0, [CARG3]
3073    |->BC_TGETR_Z:
3074    |  str TMP0, [BASE, RA, lsl #3]
3075    |  ins_next
3076    break;
3077
3078  case BC_TSETV:
3079    |  decode_RB RB, INS
3080    |   and RC, RC, #255
3081    |  // RA = src, RB = table, RC = key
3082    |  ldr CARG2, [BASE, RB, lsl #3]
3083    |   ldr TMP1, [BASE, RC, lsl #3]
3084    |  checktab CARG2, ->vmeta_tsetv
3085    |  checkint TMP1, >9		// Integer key?
3086    |  ldr CARG3, TAB:CARG2->array
3087    |   ldr CARG1w, TAB:CARG2->asize
3088    |  add CARG3, CARG3, TMP1, uxtw #3
3089    |   cmp TMP1w, CARG1w		// In array part?
3090    |   bhs ->vmeta_tsetv
3091    |  ldr TMP1, [CARG3]
3092    |   ldr TMP0, [BASE, RA, lsl #3]
3093    |    ldrb TMP2w, TAB:CARG2->marked
3094    |  cmp TMP1, TISNIL			// Previous value is nil?
3095    |  beq >5
3096    |1:
3097    |   str TMP0, [CARG3]
3098    |    tbnz TMP2w, #2, >7		// isblack(table)
3099    |2:
3100    |   ins_next
3101    |
3102    |5:  // Check for __newindex if previous value is nil.
3103    |  ldr TAB:CARG1, TAB:CARG2->metatable
3104    |  cbz TAB:CARG1, <1		// No metatable: done.
3105    |  ldrb TMP1w, TAB:CARG1->nomm
3106    |  tbnz TMP1w, #MM_newindex, <1	// 'no __newindex' flag set: done.
3107    |  b ->vmeta_tsetv
3108    |
3109    |7:  // Possible table write barrier for the value. Skip valiswhite check.
3110    |  barrierback TAB:CARG2, TMP2w, TMP1
3111    |  b <2
3112    |
3113    |9:
3114    |  asr ITYPE, TMP1, #47
3115    |  cmn ITYPE, #-LJ_TSTR		// String key?
3116    |  bne ->vmeta_tsetv
3117    |   and STR:RC, TMP1, #LJ_GCVMASK
3118    |  b ->BC_TSETS_Z
3119    break;
3120  case BC_TSETS:
3121    |  decode_RB RB, INS
3122    |   and RC, RC, #255
3123    |  // RA = dst, RB = table, RC = str_const (~)
3124    |  ldr CARG2, [BASE, RB, lsl #3]
3125    |   mvn RC, RC
3126    |   ldr STR:RC, [KBASE, RC, lsl #3]
3127    |  checktab CARG2, ->vmeta_tsets1
3128    |->BC_TSETS_Z:
3129    |  // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
3130    |  ldr TMP1w, TAB:CARG2->hmask
3131    |   ldr TMP2w, STR:RC->sid
3132    |    ldr NODE:CARG3, TAB:CARG2->node
3133    |  and TMP1w, TMP1w, TMP2w		// idx = str->sid & tab->hmask
3134    |  add TMP1, TMP1, TMP1, lsl #1
3135    |  movn CARG4, #~LJ_TSTR
3136    |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
3137    |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
3138    |   strb wzr, TAB:CARG2->nomm	// Clear metamethod cache.
3139    |1:
3140    |  ldp TMP1, CARG1, NODE:CARG3->val
3141    |   ldr NODE:TMP3, NODE:CARG3->next
3142    |    ldrb TMP2w, TAB:CARG2->marked
3143    |  cmp CARG1, CARG4
3144    |  bne >5
3145    |   ldr TMP0, [BASE, RA, lsl #3]
3146    |  cmp TMP1, TISNIL			// Previous value is nil?
3147    |  beq >4
3148    |2:
3149    |   str TMP0, NODE:CARG3->val
3150    |    tbnz TMP2w, #2, >7		// isblack(table)
3151    |3:
3152    |  ins_next
3153    |
3154    |4:  // Check for __newindex if previous value is nil.
3155    |  ldr TAB:CARG1, TAB:CARG2->metatable
3156    |  cbz TAB:CARG1, <2		// No metatable: done.
3157    |  ldrb TMP1w, TAB:CARG1->nomm
3158    |  tbnz TMP1w, #MM_newindex, <2	// 'no __newindex' flag set: done.
3159    |  b ->vmeta_tsets
3160    |
3161    |5:  // Follow hash chain.
3162    |  mov NODE:CARG3, NODE:TMP3
3163    |  cbnz NODE:TMP3, <1
3164    |  // End of hash chain: key not found, add a new one.
3165    |
3166    |  // But check for __newindex first.
3167    |  ldr TAB:CARG1, TAB:CARG2->metatable
3168    |  cbz TAB:CARG1, >6		// No metatable: continue.
3169    |  ldrb TMP1w, TAB:CARG1->nomm
3170    |  // 'no __newindex' flag NOT set: check.
3171    |  tbz TMP1w, #MM_newindex, ->vmeta_tsets
3172    |6:
3173    |  movn TMP1, #~LJ_TSTR
3174    |   str PC, SAVE_PC
3175    |  add TMP0, STR:RC, TMP1, lsl #47
3176    |   str BASE, L->base
3177    |   mov CARG1, L
3178    |  str TMP0, TMPD
3179    |   add CARG3, sp, TMPDofs
3180    |  bl extern lj_tab_newkey		// (lua_State *L, GCtab *t, TValue *k)
3181    |  // Returns TValue *.
3182    |  ldr BASE, L->base
3183    |  ldr TMP0, [BASE, RA, lsl #3]
3184    |  str TMP0, [CRET1]
3185    |  b <3				// No 2nd write barrier needed.
3186    |
3187    |7:  // Possible table write barrier for the value. Skip valiswhite check.
3188    |  barrierback TAB:CARG2, TMP2w, TMP1
3189    |  b <3
3190    break;
3191  case BC_TSETB:
3192    |  decode_RB RB, INS
3193    |   and RC, RC, #255
3194    |  // RA = src, RB = table, RC = index
3195    |  ldr CARG2, [BASE, RB, lsl #3]
3196    |  checktab CARG2, ->vmeta_tsetb
3197    |  ldr CARG3, TAB:CARG2->array
3198    |   ldr CARG1w, TAB:CARG2->asize
3199    |  add CARG3, CARG3, RC, lsl #3
3200    |   cmp RCw, CARG1w			// In array part?
3201    |   bhs ->vmeta_tsetb
3202    |  ldr TMP1, [CARG3]
3203    |   ldr TMP0, [BASE, RA, lsl #3]
3204    |    ldrb TMP2w, TAB:CARG2->marked
3205    |  cmp TMP1, TISNIL			// Previous value is nil?
3206    |  beq >5
3207    |1:
3208    |   str TMP0, [CARG3]
3209    |    tbnz TMP2w, #2, >7		// isblack(table)
3210    |2:
3211    |   ins_next
3212    |
3213    |5:  // Check for __newindex if previous value is nil.
3214    |  ldr TAB:CARG1, TAB:CARG2->metatable
3215    |  cbz TAB:CARG1, <1		// No metatable: done.
3216    |  ldrb TMP1w, TAB:CARG1->nomm
3217    |  tbnz TMP1w, #MM_newindex, <1	// 'no __newindex' flag set: done.
3218    |  b ->vmeta_tsetb
3219    |
3220    |7:  // Possible table write barrier for the value. Skip valiswhite check.
3221    |  barrierback TAB:CARG2, TMP2w, TMP1
3222    |  b <2
3223    break;
3224  case BC_TSETR:
3225    |  decode_RB RB, INS
3226    |   and RC, RC, #255
3227    |  // RA = src, RB = table, RC = key
3228    |  ldr CARG2, [BASE, RB, lsl #3]
3229    |   ldr TMP1, [BASE, RC, lsl #3]
3230    |  and TAB:CARG2, CARG2, #LJ_GCVMASK
3231    |  ldr CARG1, TAB:CARG2->array
3232    |    ldrb TMP2w, TAB:CARG2->marked
3233    |   ldr CARG4w, TAB:CARG2->asize
3234    |  add CARG1, CARG1, TMP1, uxtw #3
3235    |    tbnz TMP2w, #2, >7		// isblack(table)
3236    |2:
3237    |   cmp TMP1w, CARG4w		// In array part?
3238    |   bhs ->vmeta_tsetr
3239    |->BC_TSETR_Z:
3240    |   ldr TMP0, [BASE, RA, lsl #3]
3241    |   str TMP0, [CARG1]
3242    |   ins_next
3243    |
3244    |7:  // Possible table write barrier for the value. Skip valiswhite check.
3245    |  barrierback TAB:CARG2, TMP2w, TMP0
3246    |  b <2
3247    break;
3248
3249  case BC_TSETM:
3250    |  // RA = base (table at base-1), RC = num_const (start index)
3251    |  add RA, BASE, RA, lsl #3
3252    |1:
3253    |   ldr RBw, SAVE_MULTRES
3254    |  ldr TAB:CARG2, [RA, #-8]		// Guaranteed to be a table.
3255    |   ldr TMP1, [KBASE, RC, lsl #3]	// Integer constant is in lo-word.
3256    |    sub RB, RB, #8
3257    |    cbz RB, >4			// Nothing to copy?
3258    |  and TAB:CARG2, CARG2, #LJ_GCVMASK
3259    |  ldr CARG1w, TAB:CARG2->asize
3260    |   add CARG3w, TMP1w, RBw, lsr #3
3261    |   ldr CARG4, TAB:CARG2->array
3262    |  cmp CARG3, CARG1
3263    |    add RB, RA, RB
3264    |  bhi >5
3265    |   add TMP1, CARG4, TMP1w, uxtw #3
3266    |    ldrb TMP2w, TAB:CARG2->marked
3267    |3:  // Copy result slots to table.
3268    |   ldr TMP0, [RA], #8
3269    |   str TMP0, [TMP1], #8
3270    |  cmp RA, RB
3271    |  blo <3
3272    |    tbnz TMP2w, #2, >7		// isblack(table)
3273    |4:
3274    |  ins_next
3275    |
3276    |5:  // Need to resize array part.
3277    |   str BASE, L->base
3278    |  mov CARG1, L
3279    |   str PC, SAVE_PC
3280    |  bl extern lj_tab_reasize		// (lua_State *L, GCtab *t, int nasize)
3281    |  // Must not reallocate the stack.
3282    |  b <1
3283    |
3284    |7:  // Possible table write barrier for any value. Skip valiswhite check.
3285    |  barrierback TAB:CARG2, TMP2w, TMP1
3286    |  b <4
3287    break;
3288
3289  /* -- Calls and vararg handling ----------------------------------------- */
3290
3291  case BC_CALLM:
3292    |  // RA = base, (RB = nresults+1,) RC = extra_nargs
3293    |  ldr TMP0w, SAVE_MULTRES
3294    |  decode_RC8RD NARGS8:RC, RC
3295    |  add NARGS8:RC, NARGS8:RC, TMP0
3296    |  b ->BC_CALL_Z
3297    break;
3298  case BC_CALL:
3299    |  decode_RC8RD NARGS8:RC, RC
3300    |  // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
3301    |->BC_CALL_Z:
3302    |  mov RB, BASE			// Save old BASE for vmeta_call.
3303    |  add BASE, BASE, RA, lsl #3
3304    |  ldr CARG3, [BASE]
3305    |   sub NARGS8:RC, NARGS8:RC, #8
3306    |   add BASE, BASE, #16
3307    |  checkfunc CARG3, ->vmeta_call
3308    |  ins_call
3309    break;
3310
3311  case BC_CALLMT:
3312    |  // RA = base, (RB = 0,) RC = extra_nargs
3313    |  ldr TMP0w, SAVE_MULTRES
3314    |  add NARGS8:RC, TMP0, RC, lsl #3
3315    |  b ->BC_CALLT1_Z
3316    break;
3317  case BC_CALLT:
3318    |  lsl NARGS8:RC, RC, #3
3319    |  // RA = base, (RB = 0,) RC = (nargs+1)*8
3320    |->BC_CALLT1_Z:
3321    |  add RA, BASE, RA, lsl #3
3322    |  ldr TMP1, [RA]
3323    |   sub NARGS8:RC, NARGS8:RC, #8
3324    |   add RA, RA, #16
3325    |  checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
3326    |  ldr PC, [BASE, FRAME_PC]
3327    |->BC_CALLT2_Z:
3328    |   mov RB, #0
3329    |   ldrb TMP2w, LFUNC:CARG3->ffid
3330    |  tst PC, #FRAME_TYPE
3331    |  bne >7
3332    |1:
3333    |  str TMP1, [BASE, FRAME_FUNC]	// Copy function down, but keep PC.
3334    |  cbz NARGS8:RC, >3
3335    |2:
3336    |  ldr TMP0, [RA, RB]
3337    |   add TMP1, RB, #8
3338    |   cmp TMP1, NARGS8:RC
3339    |  str TMP0, [BASE, RB]
3340    |    mov RB, TMP1
3341    |   bne <2
3342    |3:
3343    |  cmp TMP2, #1			// (> FF_C) Calling a fast function?
3344    |  bhi >5
3345    |4:
3346    |  ins_callt
3347    |
3348    |5:  // Tailcall to a fast function with a Lua frame below.
3349    |  ldrb RAw, [PC, #-4+OFS_RA]
3350    |  sub CARG1, BASE, RA, lsl #3
3351    |  ldr LFUNC:CARG1, [CARG1, #-32]
3352    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3353    |  ldr CARG1, LFUNC:CARG1->pc
3354    |  ldr KBASE, [CARG1, #PC2PROTO(k)]
3355    |  b <4
3356    |
3357    |7:  // Tailcall from a vararg function.
3358    |  eor PC, PC, #FRAME_VARG
3359    |  tst PC, #FRAME_TYPEP		// Vararg frame below?
3360    |  csel TMP2, RB, TMP2, ne		// Clear ffid if no Lua function below.
3361    |  bne <1
3362    |  sub BASE, BASE, PC
3363    |  ldr PC, [BASE, FRAME_PC]
3364    |  tst PC, #FRAME_TYPE
3365    |  csel TMP2, RB, TMP2, ne		// Clear ffid if no Lua function below.
3366    |  b <1
3367    break;
3368
3369  case BC_ITERC:
3370    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3371    |  add RA, BASE, RA, lsl #3
3372    |  ldr CARG3, [RA, #-24]
3373    |    mov RB, BASE			// Save old BASE for vmeta_call.
3374    |   ldp CARG1, CARG2, [RA, #-16]
3375    |    add BASE, RA, #16
3376    |    mov NARGS8:RC, #16		// Iterators get 2 arguments.
3377    |  str CARG3, [RA]			// Copy callable.
3378    |   stp CARG1, CARG2, [RA, #16]	// Copy state and control var.
3379    |  checkfunc CARG3, ->vmeta_call
3380    |  ins_call
3381    break;
3382
3383  case BC_ITERN:
3384    |.if JIT
3385    |  hotloop
3386    |.endif
3387    |->vm_IITERN:
3388    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3389    |  add RA, BASE, RA, lsl #3
3390    |  ldr TAB:RB, [RA, #-16]
3391    |    ldrh TMP3w, [PC, # OFS_RD]
3392    |  ldr CARG1w, [RA, #-8+LO]		// Get index from control var.
3393    |    add PC, PC, #4
3394    |    add TMP3, PC, TMP3, lsl #2
3395    |  and TAB:RB, RB, #LJ_GCVMASK
3396    |    sub TMP3, TMP3, #0x20000
3397    |  ldr TMP1w, TAB:RB->asize
3398    |   ldr CARG2, TAB:RB->array
3399    |1:  // Traverse array part.
3400    |  subs RC, CARG1, TMP1
3401    |   add CARG3, CARG2, CARG1, lsl #3
3402    |  bhs >5				// Index points after array part?
3403    |   ldr TMP0, [CARG3]
3404    |   cmp TMP0, TISNIL
3405    |   cinc CARG1, CARG1, eq		// Skip holes in array part.
3406    |   beq <1
3407    |   add CARG1, CARG1, TISNUM
3408    |   stp CARG1, TMP0, [RA]
3409    |    add CARG1, CARG1, #1
3410    |3:
3411    |    str CARG1w, [RA, #-8+LO]	// Update control var.
3412    |  mov PC, TMP3
3413    |4:
3414    |  ins_next
3415    |
3416    |5:  // Traverse hash part.
3417    |  ldr TMP2w, TAB:RB->hmask
3418    |   ldr NODE:RB, TAB:RB->node
3419    |6:
3420    |   add CARG1, RC, RC, lsl #1
3421    |  cmp RC, TMP2			// End of iteration? Branch to ITERN+1.
3422    |   add NODE:CARG3, NODE:RB, CARG1, lsl #3  // node = tab->node + idx*3*8
3423    |  bhi <4
3424    |  ldp TMP0, CARG1, NODE:CARG3->val
3425    |  cmp TMP0, TISNIL
3426    |   add RC, RC, #1
3427    |  beq <6				// Skip holes in hash part.
3428    |  stp CARG1, TMP0, [RA]
3429    |  add CARG1, RC, TMP1
3430    |  b <3
3431    break;
3432
3433  case BC_ISNEXT:
3434    |  // RA = base, RC = target (points to ITERN)
3435    |  add RA, BASE, RA, lsl #3
3436    |  ldr CFUNC:CARG1, [RA, #-24]
3437    |     add RC, PC, RC, lsl #2
3438    |   ldp TAB:CARG3, CARG4, [RA, #-16]
3439    |     sub RC, RC, #0x20000
3440    |  checkfunc CFUNC:CARG1, >5
3441    |   asr TMP0, TAB:CARG3, #47
3442    |  ldrb TMP1w, CFUNC:CARG1->ffid
3443    |   cmn TMP0, #-LJ_TTAB
3444    |   ccmp CARG4, TISNIL, #0, eq
3445    |  ccmp TMP1w, #FF_next_N, #0, eq
3446    |  bne >5
3447    |  mov TMP0w, #0xfffe7fff		// LJ_KEYINDEX
3448    |  lsl TMP0, TMP0, #32
3449    |  str TMP0, [RA, #-8]		// Initialize control var.
3450    |1:
3451    |     mov PC, RC
3452    |  ins_next
3453    |
3454    |5:  // Despecialize bytecode if any of the checks fail.
3455    |.if JIT
3456    |  ldrb TMP2w, [RC, # OFS_OP]
3457    |.endif
3458    |  mov TMP0, #BC_JMP
3459    |   mov TMP1, #BC_ITERC
3460    |  strb TMP0w, [PC, #-4+OFS_OP]
3461    |.if JIT
3462    |  cmp TMP2w, #BC_ITERN
3463    |  bne >6
3464    |.endif
3465    |   strb TMP1w, [RC, # OFS_OP]
3466    |  b <1
3467    |.if JIT
3468    |6:  // Unpatch JLOOP.
3469    |  ldr RA, [GL, #GL_J(trace)]
3470    |  ldrh TMP2w, [RC, # OFS_RD]
3471    |  ldr TRACE:RA, [RA, TMP2, lsl #3]
3472    |  ldr TMP2w, TRACE:RA->startins
3473    |  bfxil TMP2w, TMP1w, #0, #8
3474    |  str TMP2w, [RC]
3475    |  b <1
3476    |.endif
3477    break;
3478
3479  case BC_VARG:
3480    |  decode_RB RB, INS
3481    |   and RC, RC, #255
3482    |  // RA = base, RB = (nresults+1), RC = numparams
3483    |  ldr TMP1, [BASE, FRAME_PC]
3484    |  add RC, BASE, RC, lsl #3
3485    |   add RA, BASE, RA, lsl #3
3486    |  add RC, RC, #FRAME_VARG
3487    |   add TMP2, RA, RB, lsl #3
3488    |  sub RC, RC, TMP1			// RC = vbase
3489    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
3490    |   sub TMP3, BASE, #16		// TMP3 = vtop
3491    |  cbz RB, >5
3492    |   sub TMP2, TMP2, #16
3493    |1:  // Copy vararg slots to destination slots.
3494    |  cmp RC, TMP3
3495    |  ldr TMP0, [RC], #8
3496    |  csel TMP0, TMP0, TISNIL, lo
3497    |   cmp RA, TMP2
3498    |  str TMP0, [RA], #8
3499    |   blo <1
3500    |2:
3501    |  ins_next
3502    |
3503    |5:  // Copy all varargs.
3504    |  ldr TMP0, L->maxstack
3505    |   subs TMP2, TMP3, RC
3506    |   csel RB, xzr, TMP2, le		// MULTRES = (max(vtop-vbase,0)+1)*8
3507    |   add RB, RB, #8
3508    |  add TMP1, RA, TMP2
3509    |   str RBw, SAVE_MULTRES
3510    |   ble <2				// Nothing to copy.
3511    |  cmp TMP1, TMP0
3512    |  bhi >7
3513    |6:
3514    |  ldr TMP0, [RC], #8
3515    |  str TMP0, [RA], #8
3516    |  cmp RC, TMP3
3517    |  blo <6
3518    |  b <2
3519    |
3520    |7:  // Grow stack for varargs.
3521    |  lsr CARG2, TMP2, #3
3522    |   stp BASE, RA, L->base
3523    |  mov CARG1, L
3524    |  sub RC, RC, BASE			// Need delta, because BASE may change.
3525    |   str PC, SAVE_PC
3526    |  bl extern lj_state_growstack	// (lua_State *L, int n)
3527    |  ldp BASE, RA, L->base
3528    |  add RC, BASE, RC
3529    |  sub TMP3, BASE, #16
3530    |  b <6
3531    break;
3532
3533  /* -- Returns ----------------------------------------------------------- */
3534
3535  case BC_RETM:
3536    |  // RA = results, RC = extra results
3537    |  ldr TMP0w, SAVE_MULTRES
3538    |   ldr PC, [BASE, FRAME_PC]
3539    |    add RA, BASE, RA, lsl #3
3540    |  add RC, TMP0, RC, lsl #3
3541    |  b ->BC_RETM_Z
3542    break;
3543
3544  case BC_RET:
3545    |  // RA = results, RC = nresults+1
3546    |  ldr PC, [BASE, FRAME_PC]
3547    |   lsl RC, RC, #3
3548    |    add RA, BASE, RA, lsl #3
3549    |->BC_RETM_Z:
3550    |   str RCw, SAVE_MULTRES
3551    |1:
3552    |  ands CARG1, PC, #FRAME_TYPE
3553    |   eor CARG2, PC, #FRAME_VARG
3554    |  bne ->BC_RETV2_Z
3555    |
3556    |->BC_RET_Z:
3557    |  // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
3558    |  ldr INSw, [PC, #-4]
3559    |  subs TMP1, RC, #8
3560    |   sub CARG3, BASE, #16
3561    |  beq >3
3562    |2:
3563    |  ldr TMP0, [RA], #8
3564    |   add BASE, BASE, #8
3565    |   sub TMP1, TMP1, #8
3566    |  str TMP0, [BASE, #-24]
3567    |   cbnz TMP1, <2
3568    |3:
3569    |  decode_RA RA, INS
3570    |  sub CARG4, CARG3, RA, lsl #3
3571    |   decode_RB RB, INS
3572    |  ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
3573    |5:
3574    |  cmp RC, RB, lsl #3		// More results expected?
3575    |  blo >6
3576    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3577    |  mov BASE, CARG4
3578    |  ldr CARG2, LFUNC:CARG1->pc
3579    |  ldr KBASE, [CARG2, #PC2PROTO(k)]
3580    |   ins_next
3581    |
3582    |6:  // Fill up results with nil.
3583    |  add BASE, BASE, #8
3584    |   add RC, RC, #8
3585    |  str TISNIL, [BASE, #-24]
3586    |  b <5
3587    |
3588    |->BC_RETV1_Z:  // Non-standard return case.
3589    |  add RA, BASE, RA, lsl #3
3590    |->BC_RETV2_Z:
3591    |  tst CARG2, #FRAME_TYPEP
3592    |  bne ->vm_return
3593    |  // Return from vararg function: relocate BASE down.
3594    |  sub BASE, BASE, CARG2
3595    |  ldr PC, [BASE, FRAME_PC]
3596    |  b <1
3597    break;
3598
3599  case BC_RET0: case BC_RET1:
3600    |  // RA = results, RC = nresults+1
3601    |  ldr PC, [BASE, FRAME_PC]
3602    |   lsl RC, RC, #3
3603    |   str RCw, SAVE_MULTRES
3604    |  ands CARG1, PC, #FRAME_TYPE
3605    |   eor CARG2, PC, #FRAME_VARG
3606    |  bne ->BC_RETV1_Z
3607    |   ldr INSw, [PC, #-4]
3608    if (op == BC_RET1) {
3609      |  ldr TMP0, [BASE, RA, lsl #3]
3610    }
3611    |  sub CARG4, BASE, #16
3612    |   decode_RA RA, INS
3613    |  sub BASE, CARG4, RA, lsl #3
3614    if (op == BC_RET1) {
3615      |  str TMP0, [CARG4], #8
3616    }
3617    |   decode_RB RB, INS
3618    |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3619    |5:
3620    |  cmp RC, RB, lsl #3
3621    |  blo >6
3622    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3623    |  ldr CARG2, LFUNC:CARG1->pc
3624    |  ldr KBASE, [CARG2, #PC2PROTO(k)]
3625    |  ins_next
3626    |
3627    |6:  // Fill up results with nil.
3628    |  add RC, RC, #8
3629    |  str TISNIL, [CARG4], #8
3630    |  b <5
3631    break;
3632
3633  /* -- Loops and branches ------------------------------------------------ */
3634
3635  |.define FOR_IDX,  [RA];      .define FOR_TIDX,  [RA, #4]
3636  |.define FOR_STOP, [RA, #8];  .define FOR_TSTOP, [RA, #12]
3637  |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
3638  |.define FOR_EXT,  [RA, #24]; .define FOR_TEXT,  [RA, #28]
3639
3640  case BC_FORL:
3641    |.if JIT
3642    |  hotloop
3643    |.endif
3644    |  // Fall through. Assumes BC_IFORL follows.
3645    break;
3646
3647  case BC_JFORI:
3648  case BC_JFORL:
3649#if !LJ_HASJIT
3650    break;
3651#endif
3652  case BC_FORI:
3653  case BC_IFORL:
3654    |  // RA = base, RC = target (after end of loop or start of loop)
3655    vk = (op == BC_IFORL || op == BC_JFORL);
3656    |  add RA, BASE, RA, lsl #3
3657    |  ldp CARG1, CARG2, FOR_IDX		// CARG1 = IDX, CARG2 = STOP
3658    |   ldr CARG3, FOR_STEP			// CARG3 = STEP
3659    if (op != BC_JFORL) {
3660      |   add RC, PC, RC, lsl #2
3661      |   sub RC, RC, #0x20000
3662    }
3663    |  checkint CARG1, >5
3664    if (!vk) {
3665      |  checkint CARG2, ->vmeta_for
3666      |   checkint CARG3, ->vmeta_for
3667      |  tbnz CARG3w, #31, >4
3668      |  cmp CARG1w, CARG2w
3669    } else {
3670      |  adds CARG1w, CARG1w, CARG3w
3671      |  bvs >2
3672      |   add TMP0, CARG1, TISNUM
3673      |  tbnz CARG3w, #31, >4
3674      |  cmp CARG1w, CARG2w
3675    }
3676    |1:
3677    if (op == BC_FORI) {
3678      |  csel PC, RC, PC, gt
3679    } else if (op == BC_JFORI) {
3680      |  mov PC, RC
3681      |  ldrh RCw, [RC, #-4+OFS_RD]
3682    } else if (op == BC_IFORL) {
3683      |  csel PC, RC, PC, le
3684    }
3685    if (vk) {
3686      |   str TMP0, FOR_IDX
3687      |   str TMP0, FOR_EXT
3688    } else {
3689      |  str CARG1, FOR_EXT
3690    }
3691    if (op == BC_JFORI || op == BC_JFORL) {
3692      |  ble =>BC_JLOOP
3693    }
3694    |2:
3695    |   ins_next
3696    |
3697    |4:  // Invert check for negative step.
3698    |  cmp CARG2w, CARG1w
3699    |  b <1
3700    |
3701    |5:  // FP loop.
3702    |  ldp d0, d1, FOR_IDX
3703    |  blo ->vmeta_for
3704    if (!vk) {
3705      |  checknum CARG2, ->vmeta_for
3706      |   checknum CARG3, ->vmeta_for
3707      |  str d0, FOR_EXT
3708    } else {
3709      |  ldr d2, FOR_STEP
3710      |  fadd d0, d0, d2
3711    }
3712    |  tbnz CARG3, #63, >7
3713    |  fcmp d0, d1
3714    |6:
3715    if (vk) {
3716      |  str d0, FOR_IDX
3717      |  str d0, FOR_EXT
3718    }
3719    if (op == BC_FORI) {
3720      |  csel PC, RC, PC, hi
3721    } else if (op == BC_JFORI) {
3722      |  ldrh RCw, [RC, #-4+OFS_RD]
3723      |  bls =>BC_JLOOP
3724    } else if (op == BC_IFORL) {
3725      |  csel PC, RC, PC, ls
3726    } else {
3727      |  bls =>BC_JLOOP
3728    }
3729    |  b <2
3730    |
3731    |7:  // Invert check for negative step.
3732    |  fcmp d1, d0
3733    |  b <6
3734    break;
3735
3736  case BC_ITERL:
3737    |.if JIT
3738    |  hotloop
3739    |.endif
3740    |  // Fall through. Assumes BC_IITERL follows.
3741    break;
3742
3743  case BC_JITERL:
3744#if !LJ_HASJIT
3745    break;
3746#endif
3747  case BC_IITERL:
3748    |  // RA = base, RC = target
3749    |  ldr CARG1, [BASE, RA, lsl #3]
3750    |   add TMP1, BASE, RA, lsl #3
3751    |  cmp CARG1, TISNIL
3752    |  beq >1				// Stop if iterator returned nil.
3753    if (op == BC_JITERL) {
3754      |  str CARG1, [TMP1, #-8]
3755      |  b =>BC_JLOOP
3756    } else {
3757      |  add TMP0, PC, RC, lsl #2	// Otherwise save control var + branch.
3758      |  sub PC, TMP0, #0x20000
3759      |  str CARG1, [TMP1, #-8]
3760    }
3761    |1:
3762    |  ins_next
3763    break;
3764
3765  case BC_LOOP:
3766    |  // RA = base, RC = target (loop extent)
3767    |  // Note: RA/RC is only used by trace recorder to determine scope/extent
3768    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3769    |.if JIT
3770    |  hotloop
3771    |.endif
3772    |  // Fall through. Assumes BC_ILOOP follows.
3773    break;
3774
3775  case BC_ILOOP:
3776    |  // RA = base, RC = target (loop extent)
3777    |  ins_next
3778    break;
3779
3780  case BC_JLOOP:
3781    |.if JIT
3782    |  // RA = base (ignored), RC = traceno
3783    |  ldr CARG1, [GL, #GL_J(trace)]
3784    |   mov CARG2w, #0  // Traces on ARM64 don't store the trace #, so use 0.
3785    |  ldr TRACE:RC, [CARG1, RC, lsl #3]
3786    |   st_vmstate CARG2w
3787    |  ldr RA, TRACE:RC->mcode
3788    |   str BASE, GL->jit_base
3789    |   str L, GL->tmpbuf.L
3790    |  sub sp, sp, #16	// See SPS_FIXED. Avoids sp adjust in every root trace.
3791    |  br RA
3792    |.endif
3793    break;
3794
3795  case BC_JMP:
3796    |  // RA = base (only used by trace recorder), RC = target
3797    |  add RC, PC, RC, lsl #2
3798    |  sub PC, RC, #0x20000
3799    |  ins_next
3800    break;
3801
3802  /* -- Function headers -------------------------------------------------- */
3803
3804  case BC_FUNCF:
3805    |.if JIT
3806    |  hotcall
3807    |.endif
3808  case BC_FUNCV:  /* NYI: compiled vararg functions. */
3809    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3810    break;
3811
3812  case BC_JFUNCF:
3813#if !LJ_HASJIT
3814    break;
3815#endif
3816  case BC_IFUNCF:
3817    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3818    |  ldr CARG1, L->maxstack
3819    |   ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3820    |    ldr KBASE, [PC, #-4+PC2PROTO(k)]
3821    |  cmp RA, CARG1
3822    |  bhi ->vm_growstack_l
3823    |2:
3824    |  cmp NARGS8:RC, TMP1, lsl #3	// Check for missing parameters.
3825    |  blo >3
3826    if (op == BC_JFUNCF) {
3827      |  decode_RD RC, INS
3828      |  b =>BC_JLOOP
3829    } else {
3830      |  ins_next
3831    }
3832    |
3833    |3:  // Clear missing parameters.
3834    |  str TISNIL, [BASE, NARGS8:RC]
3835    |  add NARGS8:RC, NARGS8:RC, #8
3836    |  b <2
3837    break;
3838
3839  case BC_JFUNCV:
3840#if !LJ_HASJIT
3841    break;
3842#endif
3843    |  NYI  // NYI: compiled vararg functions
3844    break;  /* NYI: compiled vararg functions. */
3845
3846  case BC_IFUNCV:
3847    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3848    |  ldr CARG1, L->maxstack
3849    |   movn TMP0, #~LJ_TFUNC
3850    |   add TMP2, BASE, RC
3851    |   add LFUNC:CARG3, CARG3, TMP0, lsl #47
3852    |  add RA, RA, RC
3853    |   add TMP0, RC, #16+FRAME_VARG
3854    |   str LFUNC:CARG3, [TMP2], #8	// Store (tagged) copy of LFUNC.
3855    |    ldr KBASE, [PC, #-4+PC2PROTO(k)]
3856    |  cmp RA, CARG1
3857    |   str TMP0, [TMP2], #8		// Store delta + FRAME_VARG.
3858    |  bhs ->vm_growstack_l
3859    |   sub RC, TMP2, #16
3860    |  ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3861    |   mov RA, BASE
3862    |   mov BASE, TMP2
3863    |  cbz TMP1, >2
3864    |1:
3865    |  cmp RA, RC			// Less args than parameters?
3866    |  bhs >3
3867    |   ldr TMP0, [RA]
3868    |  sub TMP1, TMP1, #1
3869    |    str TISNIL, [RA], #8		// Clear old fixarg slot (help the GC).
3870    |   str TMP0, [TMP2], #8
3871    |  cbnz TMP1, <1
3872    |2:
3873    |  ins_next
3874    |
3875    |3:
3876    |  sub TMP1, TMP1, #1
3877    |   str TISNIL, [TMP2], #8
3878    |  cbz TMP1, <2
3879    |  b <3
3880    break;
3881
3882  case BC_FUNCC:
3883  case BC_FUNCCW:
3884    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
3885    if (op == BC_FUNCC) {
3886      |  ldr CARG4, CFUNC:CARG3->f
3887    } else {
3888      |  ldr CARG4, GL->wrapf
3889    }
3890    |   add CARG2, RA, NARGS8:RC
3891    |   ldr CARG1, L->maxstack
3892    |  add RC, BASE, NARGS8:RC
3893    |   cmp CARG2, CARG1
3894    |  stp BASE, RC, L->base
3895    if (op == BC_FUNCCW) {
3896      |  ldr CARG2, CFUNC:CARG3->f
3897    }
3898    |    mv_vmstate TMP0w, C
3899    |  mov CARG1, L
3900    |   bhi ->vm_growstack_c		// Need to grow stack.
3901    |    st_vmstate TMP0w
3902    |  blr CARG4			// (lua_State *L [, lua_CFunction f])
3903    |  // Returns nresults.
3904    |  ldp BASE, TMP1, L->base
3905    |    str L, GL->cur_L
3906    |   sbfiz RC, CRET1, #3, #32
3907    |    st_vmstate ST_INTERP
3908    |  ldr PC, [BASE, FRAME_PC]
3909    |   sub RA, TMP1, RC		// RA = L->top - nresults*8
3910    |  b ->vm_returnc
3911    break;
3912
3913  /* ---------------------------------------------------------------------- */
3914
3915  default:
3916    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3917    exit(2);
3918    break;
3919  }
3920}
3921
3922static int build_backend(BuildCtx *ctx)
3923{
3924  int op;
3925
3926  dasm_growpc(Dst, BC__MAX);
3927
3928  build_subroutines(ctx);
3929
3930  |.code_op
3931  for (op = 0; op < BC__MAX; op++)
3932    build_ins(ctx, (BCOp)op, op);
3933
3934  return BC__MAX;
3935}
3936
3937/* Emit pseudo frame-info for all assembler functions. */
3938static void emit_asm_debug(BuildCtx *ctx)
3939{
3940  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
3941  int i;
3942  switch (ctx->mode) {
3943  case BUILD_elfasm:
3944    fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
3945    fprintf(ctx->fp,
3946	".Lframe0:\n"
3947	"\t.long .LECIE0-.LSCIE0\n"
3948	".LSCIE0:\n"
3949	"\t.long 0xffffffff\n"
3950	"\t.byte 0x1\n"
3951	"\t.string \"\"\n"
3952	"\t.uleb128 0x1\n"
3953	"\t.sleb128 -8\n"
3954	"\t.byte 30\n"				/* Return address is in lr. */
3955	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
3956	"\t.align 3\n"
3957	".LECIE0:\n\n");
3958    fprintf(ctx->fp,
3959	".LSFDE0:\n"
3960	"\t.long .LEFDE0-.LASFDE0\n"
3961	".LASFDE0:\n"
3962	"\t.long .Lframe0\n"
3963	"\t.quad .Lbegin\n"
3964	"\t.quad %d\n"
3965	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
3966	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
3967	fcofs);
3968    for (i = 19; i <= 28; i++)  /* offset x19-x28 */
3969      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
3970    for (i = 8; i <= 15; i++)  /* offset d8-d15 */
3971      fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3972	      64+i, i+(3+(28-19+1)-8));
3973    fprintf(ctx->fp,
3974	"\t.align 3\n"
3975	".LEFDE0:\n\n");
3976#if LJ_HASFFI
3977    fprintf(ctx->fp,
3978	".LSFDE1:\n"
3979	"\t.long .LEFDE1-.LASFDE1\n"
3980	".LASFDE1:\n"
3981	"\t.long .Lframe0\n"
3982	"\t.quad lj_vm_ffi_call\n"
3983	"\t.quad %d\n"
3984	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
3985	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
3986	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
3987	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
3988	"\t.align 3\n"
3989	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
3990#endif
3991    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
3992    fprintf(ctx->fp,
3993	".Lframe1:\n"
3994	"\t.long .LECIE1-.LSCIE1\n"
3995	".LSCIE1:\n"
3996	"\t.long 0\n"
3997	"\t.byte 0x1\n"
3998	"\t.string \"zPR\"\n"
3999	"\t.uleb128 0x1\n"
4000	"\t.sleb128 -8\n"
4001	"\t.byte 30\n"				/* Return address is in lr. */
4002	"\t.uleb128 6\n"			/* augmentation length */
4003	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
4004	"\t.long lj_err_unwind_dwarf-.\n"
4005	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
4006	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
4007	"\t.align 3\n"
4008	".LECIE1:\n\n");
4009    fprintf(ctx->fp,
4010	".LSFDE2:\n"
4011	"\t.long .LEFDE2-.LASFDE2\n"
4012	".LASFDE2:\n"
4013	"\t.long .LASFDE2-.Lframe1\n"
4014	"\t.long .Lbegin-.\n"
4015	"\t.long %d\n"
4016	"\t.uleb128 0\n"			/* augmentation length */
4017	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
4018	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
4019	fcofs);
4020    for (i = 19; i <= 28; i++)  /* offset x19-x28 */
4021      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
4022    for (i = 8; i <= 15; i++)  /* offset d8-d15 */
4023      fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
4024	      64+i, i+(3+(28-19+1)-8));
4025    fprintf(ctx->fp,
4026	"\t.align 3\n"
4027	".LEFDE2:\n\n");
4028#if LJ_HASFFI
4029    fprintf(ctx->fp,
4030	".Lframe2:\n"
4031	"\t.long .LECIE2-.LSCIE2\n"
4032	".LSCIE2:\n"
4033	"\t.long 0\n"
4034	"\t.byte 0x1\n"
4035	"\t.string \"zR\"\n"
4036	"\t.uleb128 0x1\n"
4037	"\t.sleb128 -8\n"
4038	"\t.byte 30\n"				/* Return address is in lr. */
4039	"\t.uleb128 1\n"			/* augmentation length */
4040	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
4041	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
4042	"\t.align 3\n"
4043	".LECIE2:\n\n");
4044    fprintf(ctx->fp,
4045	".LSFDE3:\n"
4046	"\t.long .LEFDE3-.LASFDE3\n"
4047	".LASFDE3:\n"
4048	"\t.long .LASFDE3-.Lframe2\n"
4049	"\t.long lj_vm_ffi_call-.\n"
4050	"\t.long %d\n"
4051	"\t.uleb128 0\n"			/* augmentation length */
4052	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
4053	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
4054	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
4055	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
4056	"\t.align 3\n"
4057	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4058#endif
4059    break;
4060#if !LJ_NO_UNWIND
4061  case BUILD_machasm: {
4062#if LJ_HASFFI
4063    int fcsize = 0;
4064#endif
4065    int j;
4066    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4067    fprintf(ctx->fp,
4068	"EH_frame1:\n"
4069	"\t.set L$set$x,LECIEX-LSCIEX\n"
4070	"\t.long L$set$x\n"
4071	"LSCIEX:\n"
4072	"\t.long 0\n"
4073	"\t.byte 0x1\n"
4074	"\t.ascii \"zPR\\0\"\n"
4075	"\t.uleb128 0x1\n"
4076	"\t.sleb128 -8\n"
4077	"\t.byte 30\n"				/* Return address is in lr. */
4078	"\t.uleb128 6\n"			/* augmentation length */
4079	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
4080	"\t.long _lj_err_unwind_dwarf@GOT-.\n"
4081	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
4082	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
4083	"\t.align 3\n"
4084	"LECIEX:\n\n");
4085    for (j = 0; j < ctx->nsym; j++) {
4086      const char *name = ctx->sym[j].name;
4087      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
4088      if (size == 0) continue;
4089#if LJ_HASFFI
4090      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4091#endif
4092      fprintf(ctx->fp,
4093	"LSFDE%d:\n"
4094	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4095	"\t.long L$set$%d\n"
4096	"LASFDE%d:\n"
4097	"\t.long LASFDE%d-EH_frame1\n"
4098	"\t.long %s-.\n"
4099	"\t.long %d\n"
4100	"\t.uleb128 0\n"			/* augmentation length */
4101	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
4102	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
4103	j, j, j, j, j, j, j, name, size);
4104      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
4105	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
4106      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
4107	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
4108		64+i, i+(3+(28-19+1)-8));
4109      fprintf(ctx->fp,
4110	"\t.align 3\n"
4111	"LEFDE%d:\n\n", j);
4112    }
4113#if LJ_HASFFI
4114    if (fcsize) {
4115      fprintf(ctx->fp,
4116	"EH_frame2:\n"
4117	"\t.set L$set$y,LECIEY-LSCIEY\n"
4118	"\t.long L$set$y\n"
4119	"LSCIEY:\n"
4120	"\t.long 0\n"
4121	"\t.byte 0x1\n"
4122	"\t.ascii \"zR\\0\"\n"
4123	"\t.uleb128 0x1\n"
4124	"\t.sleb128 -8\n"
4125	"\t.byte 30\n"				/* Return address is in lr. */
4126	"\t.uleb128 1\n"			/* augmentation length */
4127	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
4128	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
4129	"\t.align 3\n"
4130	"LECIEY:\n\n");
4131      fprintf(ctx->fp,
4132	"LSFDEY:\n"
4133	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
4134	"\t.long L$set$yy\n"
4135	"LASFDEY:\n"
4136	"\t.long LASFDEY-EH_frame2\n"
4137	"\t.long _lj_vm_ffi_call-.\n"
4138	"\t.long %d\n"
4139	"\t.uleb128 0\n"			/* augmentation length */
4140	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
4141	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
4142	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
4143	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
4144	"\t.align 3\n"
4145	"LEFDEY:\n\n", fcsize);
4146    }
4147#endif
4148    fprintf(ctx->fp, ".subsections_via_symbols\n");
4149    }
4150    break;
4151#endif
4152  default:
4153    break;
4154  }
4155}
4156
4157