1|// Low-level VM code for x86 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|
5|.if P64
6|.arch x64
7|.else
8|.arch x86
9|.endif
10|.section code_op, code_sub
11|
12|.actionlist build_actionlist
13|.globals GLOB_
14|.globalnames globnames
15|.externnames extnames
16|
17|//-----------------------------------------------------------------------
18|
19|.if P64
20|.define X64, 1
21|.if WIN
22|.define X64WIN, 1
23|.endif
24|.endif
25|
26|// Fixed register assignments for the interpreter.
27|// This is very fragile and has many dependencies. Caveat emptor.
28|.define BASE,		edx		// Not C callee-save, refetched anyway.
29|.if not X64
30|.define KBASE,		edi		// Must be C callee-save.
31|.define KBASEa,	KBASE
32|.define PC,		esi		// Must be C callee-save.
33|.define PCa,		PC
34|.define DISPATCH,	ebx		// Must be C callee-save.
35|.elif X64WIN
36|.define KBASE,		edi		// Must be C callee-save.
37|.define KBASEa,	rdi
38|.define PC,		esi		// Must be C callee-save.
39|.define PCa,		rsi
40|.define DISPATCH,	ebx		// Must be C callee-save.
41|.else
42|.define KBASE,		r15d		// Must be C callee-save.
43|.define KBASEa,	r15
44|.define PC,		ebx		// Must be C callee-save.
45|.define PCa,		rbx
46|.define DISPATCH,	r14d		// Must be C callee-save.
47|.endif
48|
49|.define RA,		ecx
50|.define RAH,		ch
51|.define RAL,		cl
52|.define RB,		ebp		// Must be ebp (C callee-save).
53|.define RC,		eax		// Must be eax.
54|.define RCW,		ax
55|.define RCH,		ah
56|.define RCL,		al
57|.define OP,		RB
58|.define RD,		RC
59|.define RDW,		RCW
60|.define RDL,		RCL
61|.if X64
62|.define RAa, rcx
63|.define RBa, rbp
64|.define RCa, rax
65|.define RDa, rax
66|.else
67|.define RAa, RA
68|.define RBa, RB
69|.define RCa, RC
70|.define RDa, RD
71|.endif
72|
73|.if not X64
74|.define FCARG1,	ecx		// x86 fastcall arguments.
75|.define FCARG2,	edx
76|.elif X64WIN
77|.define CARG1,		rcx		// x64/WIN64 C call arguments.
78|.define CARG2,		rdx
79|.define CARG3,		r8
80|.define CARG4,		r9
81|.define CARG1d,	ecx
82|.define CARG2d,	edx
83|.define CARG3d,	r8d
84|.define CARG4d,	r9d
85|.define FCARG1,	CARG1d		// Upwards compatible to x86 fastcall.
86|.define FCARG2,	CARG2d
87|.else
88|.define CARG1,		rdi		// x64/POSIX C call arguments.
89|.define CARG2,		rsi
90|.define CARG3,		rdx
91|.define CARG4,		rcx
92|.define CARG5,		r8
93|.define CARG6,		r9
94|.define CARG1d,	edi
95|.define CARG2d,	esi
96|.define CARG3d,	edx
97|.define CARG4d,	ecx
98|.define CARG5d,	r8d
99|.define CARG6d,	r9d
100|.define FCARG1,	CARG1d		// Simulate x86 fastcall.
101|.define FCARG2,	CARG2d
102|.endif
103|
104|// Type definitions. Some of these are only used for documentation.
105|.type L,		lua_State
106|.type GL,		global_State
107|.type TVALUE,		TValue
108|.type GCOBJ,		GCobj
109|.type STR,		GCstr
110|.type TAB,		GCtab
111|.type LFUNC,		GCfuncL
112|.type CFUNC,		GCfuncC
113|.type PROTO,		GCproto
114|.type UPVAL,		GCupval
115|.type NODE,		Node
116|.type NARGS,		int
117|.type TRACE,		GCtrace
118|.type SBUF,		SBuf
119|
120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//-----------------------------------------------------------------------
122|.if not X64		// x86 stack layout.
123|
124|.if WIN
125|
126|.define CFRAME_SPACE,	aword*9			// Delta for esp (see <--).
127|.macro saveregs_
128|  push edi; push esi; push ebx
129|  push extern lj_err_unwind_win
130|  fs; push dword [0]
131|  fs; mov [0], esp
132|  sub esp, CFRAME_SPACE
133|.endmacro
134|.macro restoreregs
135|  add esp, CFRAME_SPACE
136|  fs; pop dword [0]
137|  pop edi	// Short for esp += 4.
138|  pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE,	aword*7			// Delta for esp (see <--).
144|.macro saveregs_
145|  push edi; push esi; push ebx
146|  sub esp, CFRAME_SPACE
147|.endmacro
148|.macro restoreregs
149|  add esp, CFRAME_SPACE
150|  pop ebx; pop esi; pop edi; pop ebp
151|.endmacro
152|
153|.endif
154|
155|.macro saveregs
156|  push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF,	aword [esp+aword*19]	// vm_pcall/vm_cpcall only.
161|.define SAVE_NRES,	aword [esp+aword*18]
162|.define SAVE_CFRAME,	aword [esp+aword*17]
163|.define SAVE_L,	aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET,	aword [esp+aword*15]	//<-- esp entering interpreter.
166|.define SAVE_R4,	aword [esp+aword*14]
167|.define SAVE_R3,	aword [esp+aword*13]
168|.define SAVE_R2,	aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1,	aword [esp+aword*11]
171|.define SEH_FUNC,	aword [esp+aword*10]
172|.define SEH_NEXT,	aword [esp+aword*9]	//<-- esp after register saves.
173|.define UNUSED2,	aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1,	aword [esp+aword*7]
176|.define SAVE_PC,	aword [esp+aword*6]
177|.define TMP2,		aword [esp+aword*5]
178|.define TMP1,		aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4,		aword [esp+aword*3]
181|.define ARG3,		aword [esp+aword*2]
182|.define ARG2,		aword [esp+aword*1]
183|.define ARG1,		aword [esp]		//<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
186|.define SAVE_ERRF,	aword [esp+aword*15]	// vm_pcall/vm_cpcall only.
187|.define SAVE_NRES,	aword [esp+aword*14]
188|.define SAVE_CFRAME,	aword [esp+aword*13]
189|.define SAVE_L,	aword [esp+aword*12]
190|//----- 16 byte aligned, ^^^ arguments from C caller
191|.define SAVE_RET,	aword [esp+aword*11]	//<-- esp entering interpreter.
192|.define SAVE_R4,	aword [esp+aword*10]
193|.define SAVE_R3,	aword [esp+aword*9]
194|.define SAVE_R2,	aword [esp+aword*8]
195|//----- 16 byte aligned
196|.define SAVE_R1,	aword [esp+aword*7]	//<-- esp after register saves.
197|.define SAVE_PC,	aword [esp+aword*6]
198|.define TMP2,		aword [esp+aword*5]
199|.define TMP1,		aword [esp+aword*4]
200|//----- 16 byte aligned
201|.define ARG4,		aword [esp+aword*3]
202|.define ARG3,		aword [esp+aword*2]
203|.define ARG2,		aword [esp+aword*1]
204|.define ARG1,		aword [esp]		//<-- esp while in interpreter.
205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
207|
208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
209|.define FPARG3,	qword [esp+qword*1]
210|.define FPARG1,	qword [esp]
211|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
212|.define TMPQ,		qword [esp+aword*4]
213|.define TMP3,		ARG4
214|.define ARG5,		TMP1
215|.define TMPa,		TMP1
216|.define MULTRES,	TMP2
217|
218|// Arguments for vm_call and vm_pcall.
219|.define INARG_BASE,	SAVE_CFRAME		// Overwritten by SAVE_CFRAME!
220|
221|// Arguments for vm_cpcall.
222|.define INARG_CP_CALL,	SAVE_ERRF
223|.define INARG_CP_UD,	SAVE_NRES
224|.define INARG_CP_FUNC,	SAVE_CFRAME
225|
226|//-----------------------------------------------------------------------
227|.elif X64WIN		// x64/Windows stack layout
228|
229|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
230|.macro saveregs_
231|  push rdi; push rsi; push rbx
232|  sub rsp, CFRAME_SPACE
233|.endmacro
234|.macro saveregs
235|  push rbp; saveregs_
236|.endmacro
237|.macro restoreregs
238|  add rsp, CFRAME_SPACE
239|  pop rbx; pop rsi; pop rdi; pop rbp
240|.endmacro
241|
242|.define SAVE_CFRAME,	aword [rsp+aword*13]
243|.define SAVE_PC,	dword [rsp+dword*25]
244|.define SAVE_L,	dword [rsp+dword*24]
245|.define SAVE_ERRF,	dword [rsp+dword*23]
246|.define SAVE_NRES,	dword [rsp+dword*22]
247|.define TMP2,		dword [rsp+dword*21]
248|.define TMP1,		dword [rsp+dword*20]
249|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
250|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
251|.define SAVE_R4,	aword [rsp+aword*8]
252|.define SAVE_R3,	aword [rsp+aword*7]
253|.define SAVE_R2,	aword [rsp+aword*6]
254|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
255|.define ARG5,		aword [rsp+aword*4]
256|.define CSAVE_4,	aword [rsp+aword*3]
257|.define CSAVE_3,	aword [rsp+aword*2]
258|.define CSAVE_2,	aword [rsp+aword*1]
259|.define CSAVE_1,	aword [rsp]		//<-- rsp while in interpreter.
260|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
261|
262|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
263|.define TMPQ,		qword [rsp+aword*10]
264|.define MULTRES,	TMP2
265|.define TMPa,		ARG5
266|.define ARG5d,		dword [rsp+aword*4]
267|.define TMP3,		ARG5d
268|
269|//-----------------------------------------------------------------------
270|.else			// x64/POSIX stack layout
271|
272|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
273|.macro saveregs_
274|  push rbx; push r15; push r14
275|.if NO_UNWIND
276|  push r13; push r12
277|.endif
278|  sub rsp, CFRAME_SPACE
279|.endmacro
280|.macro saveregs
281|  push rbp; saveregs_
282|.endmacro
283|.macro restoreregs
284|  add rsp, CFRAME_SPACE
285|.if NO_UNWIND
286|  pop r12; pop r13
287|.endif
288|  pop r14; pop r15; pop rbx; pop rbp
289|.endmacro
290|
291|//----- 16 byte aligned,
292|.if NO_UNWIND
293|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
294|.define SAVE_R4,	aword [rsp+aword*10]
295|.define SAVE_R3,	aword [rsp+aword*9]
296|.define SAVE_R2,	aword [rsp+aword*8]
297|.define SAVE_R1,	aword [rsp+aword*7]
298|.define SAVE_RU2,	aword [rsp+aword*6]
299|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
300|.else
301|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
302|.define SAVE_R4,	aword [rsp+aword*8]
303|.define SAVE_R3,	aword [rsp+aword*7]
304|.define SAVE_R2,	aword [rsp+aword*6]
305|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
306|.endif
307|.define SAVE_CFRAME,	aword [rsp+aword*4]
308|.define SAVE_PC,	dword [rsp+dword*7]
309|.define SAVE_L,	dword [rsp+dword*6]
310|.define SAVE_ERRF,	dword [rsp+dword*5]
311|.define SAVE_NRES,	dword [rsp+dword*4]
312|.define TMPa,		aword [rsp+aword*1]
313|.define TMP2,		dword [rsp+dword*1]
314|.define TMP1,		dword [rsp]		//<-- rsp while in interpreter.
315|//----- 16 byte aligned
316|
317|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
318|.define TMPQ,		qword [rsp]
319|.define TMP3,		dword [rsp+aword*1]
320|.define MULTRES,	TMP2
321|
322|.endif
323|
324|//-----------------------------------------------------------------------
325|
326|// Instruction headers.
327|.macro ins_A; .endmacro
328|.macro ins_AD; .endmacro
329|.macro ins_AJ; .endmacro
330|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
331|.macro ins_AB_; movzx RB, RCH; .endmacro
332|.macro ins_A_C; movzx RC, RCL; .endmacro
333|.macro ins_AND; not RDa; .endmacro
334|
335|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
336|.macro ins_NEXT
337|  mov RC, [PC]
338|  movzx RA, RCH
339|  movzx OP, RCL
340|  add PC, 4
341|  shr RC, 16
342|.if X64
343|  jmp aword [DISPATCH+OP*8]
344|.else
345|  jmp aword [DISPATCH+OP*4]
346|.endif
347|.endmacro
348|
349|// Instruction footer.
350|.if 1
351|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
352|  .define ins_next, ins_NEXT
353|  .define ins_next_, ins_NEXT
354|.else
355|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
356|  // Affects only certain kinds of benchmarks (and only with -j off).
357|  // Around 10%-30% slower on Core2, a lot more slower on P4.
358|  .macro ins_next
359|    jmp ->ins_next
360|  .endmacro
361|  .macro ins_next_
362|  ->ins_next:
363|    ins_NEXT
364|  .endmacro
365|.endif
366|
367|// Call decode and dispatch.
368|.macro ins_callt
369|  // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
370|  mov PC, LFUNC:RB->pc
371|  mov RA, [PC]
372|  movzx OP, RAL
373|  movzx RA, RAH
374|  add PC, 4
375|.if X64
376|  jmp aword [DISPATCH+OP*8]
377|.else
378|  jmp aword [DISPATCH+OP*4]
379|.endif
380|.endmacro
381|
382|.macro ins_call
383|  // BASE = new base, RB = LFUNC, RD = nargs+1
384|  mov [BASE-4], PC
385|  ins_callt
386|.endmacro
387|
388|//-----------------------------------------------------------------------
389|
390|// Macros to test operand types.
391|.macro checktp, reg, tp;  cmp dword [BASE+reg*8+4], tp; .endmacro
392|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
393|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
394|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
395|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
396|
397|// These operands must be used with movzx.
398|.define PC_OP, byte [PC-4]
399|.define PC_RA, byte [PC-3]
400|.define PC_RB, byte [PC-1]
401|.define PC_RC, byte [PC-2]
402|.define PC_RD, word [PC-2]
403|
404|.macro branchPC, reg
405|  lea PC, [PC+reg*4-BCBIAS_J*4]
406|.endmacro
407|
408|// Assumes DISPATCH is relative to GL.
409#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
410#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
411|
412#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
413|
414|// Decrement hashed hotcount and trigger trace recorder if zero.
415|.macro hotloop, reg
416|  mov reg, PC
417|  shr reg, 1
418|  and reg, HOTCOUNT_PCMASK
419|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
420|  jb ->vm_hotloop
421|.endmacro
422|
423|.macro hotcall, reg
424|  mov reg, PC
425|  shr reg, 1
426|  and reg, HOTCOUNT_PCMASK
427|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
428|  jb ->vm_hotcall
429|.endmacro
430|
431|// Set current VM state.
432|.macro set_vmstate, st
433|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
434|.endmacro
435|
436|// x87 compares.
437|.macro fcomparepp			// Compare and pop st0 >< st1.
438|  fucomip st1
439|  fpop
440|.endmacro
441|
442|.macro fpop1; fstp st1; .endmacro
443|
444|// Synthesize SSE FP constants.
445|.macro sseconst_abs, reg, tmp		// Synthesize abs mask.
446|.if X64
447|  mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
448|.else
449|  pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
450|.endif
451|.endmacro
452|
453|.macro sseconst_hi, reg, tmp, val	// Synthesize hi-32 bit const.
454|.if X64
455|  mov64 tmp, U64x(val,00000000); movd reg, tmp
456|.else
457|  mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
458|.endif
459|.endmacro
460|
461|.macro sseconst_sign, reg, tmp		// Synthesize sign mask.
462|  sseconst_hi reg, tmp, 80000000
463|.endmacro
464|.macro sseconst_1, reg, tmp		// Synthesize 1.0.
465|  sseconst_hi reg, tmp, 3ff00000
466|.endmacro
467|.macro sseconst_m1, reg, tmp		// Synthesize -1.0.
468|  sseconst_hi reg, tmp, bff00000
469|.endmacro
470|.macro sseconst_2p52, reg, tmp		// Synthesize 2^52.
471|  sseconst_hi reg, tmp, 43300000
472|.endmacro
473|.macro sseconst_tobit, reg, tmp	// Synthesize 2^52 + 2^51.
474|  sseconst_hi reg, tmp, 43380000
475|.endmacro
476|
477|// Move table write barrier back. Overwrites reg.
478|.macro barrierback, tab, reg
479|  and byte tab->marked, (uint8_t)~LJ_GC_BLACK	// black2gray(tab)
480|  mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
481|  mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
482|  mov tab->gclist, reg
483|.endmacro
484|
485|//-----------------------------------------------------------------------
486
487/* Generate subroutines used by opcodes and other parts of the VM. */
488/* The .code_sub section should be last to help static branch prediction. */
489static void build_subroutines(BuildCtx *ctx)
490{
491  |.code_sub
492  |
493  |//-----------------------------------------------------------------------
494  |//-- Return handling ----------------------------------------------------
495  |//-----------------------------------------------------------------------
496  |
497  |->vm_returnp:
498  |  test PC, FRAME_P
499  |  jz ->cont_dispatch
500  |
501  |  // Return from pcall or xpcall fast func.
502  |  and PC, -8
503  |  sub BASE, PC			// Restore caller base.
504  |  lea RAa, [RA+PC-8]			// Rebase RA and prepend one result.
505  |  mov PC, [BASE-4]			// Fetch PC of previous frame.
506  |  // Prepending may overwrite the pcall frame, so do it at the end.
507  |  mov dword [BASE+RA+4], LJ_TTRUE	// Prepend true to results.
508  |
509  |->vm_returnc:
510  |  add RD, 1				// RD = nresults+1
511  |  jz ->vm_unwind_yield
512  |  mov MULTRES, RD
513  |  test PC, FRAME_TYPE
514  |  jz ->BC_RET_Z			// Handle regular return to Lua.
515  |
516  |->vm_return:
517  |  // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
518  |  xor PC, FRAME_C
519  |  test PC, FRAME_TYPE
520  |  jnz ->vm_returnp
521  |
522  |  // Return to C.
523  |  set_vmstate C
524  |  and PC, -8
525  |  sub PC, BASE
526  |  neg PC				// Previous base = BASE - delta.
527  |
528  |  sub RD, 1
529  |  jz >2
530  |1:  // Move results down.
531  |.if X64
532  |  mov RBa, [BASE+RA]
533  |  mov [BASE-8], RBa
534  |.else
535  |  mov RB, [BASE+RA]
536  |  mov [BASE-8], RB
537  |  mov RB, [BASE+RA+4]
538  |  mov [BASE-4], RB
539  |.endif
540  |  add BASE, 8
541  |  sub RD, 1
542  |  jnz <1
543  |2:
544  |  mov L:RB, SAVE_L
545  |  mov L:RB->base, PC
546  |3:
547  |  mov RD, MULTRES
548  |  mov RA, SAVE_NRES			// RA = wanted nresults+1
549  |4:
550  |  cmp RA, RD
551  |  jne >6				// More/less results wanted?
552  |5:
553  |  sub BASE, 8
554  |  mov L:RB->top, BASE
555  |
556  |->vm_leave_cp:
557  |  mov RAa, SAVE_CFRAME		// Restore previous C frame.
558  |  mov L:RB->cframe, RAa
559  |  xor eax, eax			// Ok return status for vm_pcall.
560  |
561  |->vm_leave_unw:
562  |  restoreregs
563  |  ret
564  |
565  |6:
566  |  jb >7				// Less results wanted?
567  |  // More results wanted. Check stack size and fill up results with nil.
568  |  cmp BASE, L:RB->maxstack
569  |  ja >8
570  |  mov dword [BASE-4], LJ_TNIL
571  |  add BASE, 8
572  |  add RD, 1
573  |  jmp <4
574  |
575  |7:  // Less results wanted.
576  |  test RA, RA
577  |  jz <5				// But check for LUA_MULTRET+1.
578  |  sub RA, RD				// Negative result!
579  |  lea BASE, [BASE+RA*8]		// Correct top.
580  |  jmp <5
581  |
582  |8:  // Corner case: need to grow stack for filling up results.
583  |  // This can happen if:
584  |  // - A C function grows the stack (a lot).
585  |  // - The GC shrinks the stack in between.
586  |  // - A return back from a lua_call() with (high) nresults adjustment.
587  |  mov L:RB->top, BASE		// Save current top held in BASE (yes).
588  |  mov MULTRES, RD			// Need to fill only remainder with nil.
589  |  mov FCARG2, RA
590  |  mov FCARG1, L:RB
591  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
592  |  mov BASE, L:RB->top		// Need the (realloced) L->top in BASE.
593  |  jmp <3
594  |
595  |->vm_unwind_yield:
596  |  mov al, LUA_YIELD
597  |  jmp ->vm_unwind_c_eh
598  |
599  |->vm_unwind_c@8:			// Unwind C stack, return from vm_pcall.
600  |  // (void *cframe, int errcode)
601  |.if X64
602  |  mov eax, CARG2d			// Error return status for vm_pcall.
603  |  mov rsp, CARG1
604  |.else
605  |  mov eax, FCARG2			// Error return status for vm_pcall.
606  |  mov esp, FCARG1
607  |.if WIN
608  |  lea FCARG1, SEH_NEXT
609  |  fs; mov [0], FCARG1
610  |.endif
611  |.endif
612  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
613  |  mov L:RB, SAVE_L
614  |  mov GL:RB, L:RB->glref
615  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
616  |  jmp ->vm_leave_unw
617  |
618  |->vm_unwind_rethrow:
619  |.if X64 and not X64WIN
620  |  mov FCARG1, SAVE_L
621  |  mov FCARG2, eax
622  |  restoreregs
623  |  jmp extern lj_err_throw@8		// (lua_State *L, int errcode)
624  |.endif
625  |
626  |->vm_unwind_ff@4:			// Unwind C stack, return from ff pcall.
627  |  // (void *cframe)
628  |.if X64
629  |  and CARG1, CFRAME_RAWMASK
630  |  mov rsp, CARG1
631  |.else
632  |  and FCARG1, CFRAME_RAWMASK
633  |  mov esp, FCARG1
634  |.if WIN
635  |  lea FCARG1, SEH_NEXT
636  |  fs; mov [0], FCARG1
637  |.endif
638  |.endif
639  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
640  |  mov L:RB, SAVE_L
641  |  mov RAa, -8			// Results start at BASE+RA = BASE-8.
642  |  mov RD, 1+1			// Really 1+2 results, incr. later.
643  |  mov BASE, L:RB->base
644  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
645  |  add DISPATCH, GG_G2DISP
646  |  mov PC, [BASE-4]			// Fetch PC of previous frame.
647  |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
648  |  set_vmstate INTERP
649  |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
650  |
651  |.if WIN and not X64
652  |->vm_rtlunwind@16:			// Thin layer around RtlUnwind.
653  |  // (void *cframe, void *excptrec, void *unwinder, int errcode)
654  |  mov [esp], FCARG1			// Return value for RtlUnwind.
655  |  push FCARG2			// Exception record for RtlUnwind.
656  |  push 0				// Ignored by RtlUnwind.
657  |  push dword [FCARG1+CFRAME_OFS_SEH]
658  |  call extern RtlUnwind@16		// Violates ABI (clobbers too much).
659  |  mov FCARG1, eax
660  |  mov FCARG2, [esp+4]		// errcode (for vm_unwind_c).
661  |  ret				// Jump to unwinder.
662  |.endif
663  |
664  |//-----------------------------------------------------------------------
665  |//-- Grow stack for calls -----------------------------------------------
666  |//-----------------------------------------------------------------------
667  |
668  |->vm_growstack_c:			// Grow stack for C function.
669  |  mov FCARG2, LUA_MINSTACK
670  |  jmp >2
671  |
672  |->vm_growstack_v:			// Grow stack for vararg Lua function.
673  |  sub RD, 8
674  |  jmp >1
675  |
676  |->vm_growstack_f:			// Grow stack for fixarg Lua function.
677  |  // BASE = new base, RD = nargs+1, RB = L, PC = first PC
678  |  lea RD, [BASE+NARGS:RD*8-8]
679  |1:
680  |  movzx RA, byte [PC-4+PC2PROTO(framesize)]
681  |  add PC, 4				// Must point after first instruction.
682  |  mov L:RB->base, BASE
683  |  mov L:RB->top, RD
684  |  mov SAVE_PC, PC
685  |  mov FCARG2, RA
686  |2:
687  |  // RB = L, L->base = new base, L->top = top
688  |  mov FCARG1, L:RB
689  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
690  |  mov BASE, L:RB->base
691  |  mov RD, L:RB->top
692  |  mov LFUNC:RB, [BASE-8]
693  |  sub RD, BASE
694  |  shr RD, 3
695  |  add NARGS:RD, 1
696  |  // BASE = new base, RB = LFUNC, RD = nargs+1
697  |  ins_callt				// Just retry the call.
698  |
699  |//-----------------------------------------------------------------------
700  |//-- Entry points into the assembler VM ---------------------------------
701  |//-----------------------------------------------------------------------
702  |
703  |->vm_resume:				// Setup C frame and resume thread.
704  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
705  |  saveregs
706  |.if X64
707  |  mov L:RB, CARG1d			// Caveat: CARG1d may be RA.
708  |  mov SAVE_L, CARG1d
709  |  mov RA, CARG2d
710  |.else
711  |  mov L:RB, SAVE_L
712  |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
713  |.endif
714  |  mov PC, FRAME_CP
715  |  xor RD, RD
716  |  lea KBASEa, [esp+CFRAME_RESUME]
717  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
718  |  add DISPATCH, GG_G2DISP
719  |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
720  |  mov SAVE_CFRAME, RDa
721  |.if X64
722  |  mov SAVE_NRES, RD
723  |  mov SAVE_ERRF, RD
724  |.endif
725  |  mov L:RB->cframe, KBASEa
726  |  cmp byte L:RB->status, RDL
727  |  je >2				// Initial resume (like a call).
728  |
729  |  // Resume after yield (like a return).
730  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
731  |  set_vmstate INTERP
732  |  mov byte L:RB->status, RDL
733  |  mov BASE, L:RB->base
734  |  mov RD, L:RB->top
735  |  sub RD, RA
736  |  shr RD, 3
737  |  add RD, 1				// RD = nresults+1
738  |  sub RA, BASE			// RA = resultofs
739  |  mov PC, [BASE-4]
740  |  mov MULTRES, RD
741  |  test PC, FRAME_TYPE
742  |  jz ->BC_RET_Z
743  |  jmp ->vm_return
744  |
745  |->vm_pcall:				// Setup protected C frame and enter VM.
746  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
747  |  saveregs
748  |  mov PC, FRAME_CP
749  |.if X64
750  |  mov SAVE_ERRF, CARG4d
751  |.endif
752  |  jmp >1
753  |
754  |->vm_call:				// Setup C frame and enter VM.
755  |  // (lua_State *L, TValue *base, int nres1)
756  |  saveregs
757  |  mov PC, FRAME_C
758  |
759  |1:  // Entry point for vm_pcall above (PC = ftype).
760  |.if X64
761  |  mov SAVE_NRES, CARG3d
762  |  mov L:RB, CARG1d			// Caveat: CARG1d may be RA.
763  |  mov SAVE_L, CARG1d
764  |  mov RA, CARG2d
765  |.else
766  |  mov L:RB, SAVE_L
767  |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
768  |.endif
769  |
770  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
771  |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
772  |  mov SAVE_CFRAME, KBASEa
773  |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
774  |  add DISPATCH, GG_G2DISP
775  |.if X64
776  |  mov L:RB->cframe, rsp
777  |.else
778  |  mov L:RB->cframe, esp
779  |.endif
780  |
781  |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
782  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
783  |  set_vmstate INTERP
784  |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
785  |  add PC, RA
786  |  sub PC, BASE			// PC = frame delta + frame type
787  |
788  |  mov RD, L:RB->top
789  |  sub RD, RA
790  |  shr NARGS:RD, 3
791  |  add NARGS:RD, 1			// RD = nargs+1
792  |
793  |->vm_call_dispatch:
794  |  mov LFUNC:RB, [RA-8]
795  |  cmp dword [RA-4], LJ_TFUNC
796  |  jne ->vmeta_call			// Ensure KBASE defined and != BASE.
797  |
798  |->vm_call_dispatch_f:
799  |  mov BASE, RA
800  |  ins_call
801  |  // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
802  |
803  |->vm_cpcall:				// Setup protected C frame, call C.
804  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
805  |  saveregs
806  |.if X64
807  |  mov L:RB, CARG1d			// Caveat: CARG1d may be RA.
808  |  mov SAVE_L, CARG1d
809  |.else
810  |  mov L:RB, SAVE_L
811  |  // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
812  |  mov RC, INARG_CP_UD		// Get args before they are overwritten.
813  |  mov RA, INARG_CP_FUNC
814  |  mov BASE, INARG_CP_CALL
815  |.endif
816  |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
817  |
818  |  mov KBASE, L:RB->stack		// Compute -savestack(L, L->top).
819  |  sub KBASE, L:RB->top
820  |   mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
821  |  mov SAVE_ERRF, 0			// No error function.
822  |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
823  |   add DISPATCH, GG_G2DISP
824  |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
825  |
826  |.if X64
827  |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
828  |  mov SAVE_CFRAME, KBASEa
829  |  mov L:RB->cframe, rsp
830  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
831  |
832  |  call CARG4			// (lua_State *L, lua_CFunction func, void *ud)
833  |.else
834  |  mov ARG3, RC			// Have to copy args downwards.
835  |  mov ARG2, RA
836  |  mov ARG1, L:RB
837  |
838  |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
839  |  mov SAVE_CFRAME, KBASE
840  |  mov L:RB->cframe, esp
841  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
842  |
843  |  call BASE			// (lua_State *L, lua_CFunction func, void *ud)
844  |.endif
845  |  // TValue * (new base) or NULL returned in eax (RC).
846  |  test RC, RC
847  |  jz ->vm_leave_cp			// No base? Just remove C frame.
848  |  mov RA, RC
849  |  mov PC, FRAME_CP
850  |  jmp <2				// Else continue with the call.
851  |
852  |//-----------------------------------------------------------------------
853  |//-- Metamethod handling ------------------------------------------------
854  |//-----------------------------------------------------------------------
855  |
856  |//-- Continuation dispatch ----------------------------------------------
857  |
858  |->cont_dispatch:
859  |  // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
860  |  add RA, BASE
861  |  and PC, -8
862  |  mov RB, BASE
863  |  sub BASE, PC			// Restore caller BASE.
864  |  mov dword [RA+RD*8-4], LJ_TNIL	// Ensure one valid arg.
865  |  mov RC, RA				// ... in [RC]
866  |  mov PC, [RB-12]			// Restore PC from [cont|PC].
867  |.if X64
868  |  movsxd RAa, dword [RB-16]		// May be negative on WIN64 with debug.
869  |.if FFI
870  |  cmp RA, 1
871  |  jbe >1
872  |.endif
873  |  lea KBASEa, qword [=>0]
874  |  add RAa, KBASEa
875  |.else
876  |  mov RA, dword [RB-16]
877  |.if FFI
878  |  cmp RA, 1
879  |  jbe >1
880  |.endif
881  |.endif
882  |  mov LFUNC:KBASE, [BASE-8]
883  |  mov KBASE, LFUNC:KBASE->pc
884  |  mov KBASE, [KBASE+PC2PROTO(k)]
885  |  // BASE = base, RC = result, RB = meta base
886  |  jmp RAa				// Jump to continuation.
887  |
888  |.if FFI
889  |1:
890  |  je ->cont_ffi_callback		// cont = 1: return from FFI callback.
891  |  // cont = 0: Tail call from C function.
892  |  sub RB, BASE
893  |  shr RB, 3
894  |  lea RD, [RB-1]
895  |  jmp ->vm_call_tail
896  |.endif
897  |
898  |->cont_cat:				// BASE = base, RC = result, RB = mbase
899  |  movzx RA, PC_RB
900  |  sub RB, 16
901  |  lea RA, [BASE+RA*8]
902  |  sub RA, RB
903  |  je ->cont_ra
904  |  neg RA
905  |  shr RA, 3
906  |.if X64WIN
907  |  mov CARG3d, RA
908  |  mov L:CARG1d, SAVE_L
909  |  mov L:CARG1d->base, BASE
910  |  mov RCa, [RC]
911  |  mov [RB], RCa
912  |  mov CARG2d, RB
913  |.elif X64
914  |  mov L:CARG1d, SAVE_L
915  |  mov L:CARG1d->base, BASE
916  |  mov CARG3d, RA
917  |  mov RAa, [RC]
918  |  mov [RB], RAa
919  |  mov CARG2d, RB
920  |.else
921  |  mov ARG3, RA
922  |  mov RA, [RC+4]
923  |  mov RC, [RC]
924  |  mov [RB+4], RA
925  |  mov [RB], RC
926  |  mov ARG2, RB
927  |.endif
928  |  jmp ->BC_CAT_Z
929  |
930  |//-- Table indexing metamethods -----------------------------------------
931  |
932  |->vmeta_tgets:
933  |  mov TMP1, RC			// RC = GCstr *
934  |  mov TMP2, LJ_TSTR
935  |  lea RCa, TMP1			// Store temp. TValue in TMP1/TMP2.
936  |  cmp PC_OP, BC_GGET
937  |  jne >1
938  |  lea RA, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
939  |  mov [RA], TAB:RB			// RB = GCtab *
940  |  mov dword [RA+4], LJ_TTAB
941  |  mov RB, RA
942  |  jmp >2
943  |
944  |->vmeta_tgetb:
945  |  movzx RC, PC_RC
946  |.if DUALNUM
947  |  mov TMP2, LJ_TISNUM
948  |  mov TMP1, RC
949  |.else
950  |  cvtsi2sd xmm0, RC
951  |  movsd TMPQ, xmm0
952  |.endif
953  |  lea RCa, TMPQ			// Store temp. TValue in TMPQ.
954  |  jmp >1
955  |
956  |->vmeta_tgetv:
957  |  movzx RC, PC_RC			// Reload TValue *k from RC.
958  |  lea RC, [BASE+RC*8]
959  |1:
960  |  movzx RB, PC_RB			// Reload TValue *t from RB.
961  |  lea RB, [BASE+RB*8]
962  |2:
963  |.if X64
964  |  mov L:CARG1d, SAVE_L
965  |  mov L:CARG1d->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
966  |  mov CARG2d, RB
967  |  mov CARG3, RCa			// May be 64 bit ptr to stack.
968  |  mov L:RB, L:CARG1d
969  |.else
970  |  mov ARG2, RB
971  |  mov L:RB, SAVE_L
972  |  mov ARG3, RC
973  |  mov ARG1, L:RB
974  |  mov L:RB->base, BASE
975  |.endif
976  |  mov SAVE_PC, PC
977  |  call extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
978  |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
979  |  mov BASE, L:RB->base
980  |  test RC, RC
981  |  jz >3
982  |->cont_ra:				// BASE = base, RC = result
983  |  movzx RA, PC_RA
984  |.if X64
985  |  mov RBa, [RC]
986  |  mov [BASE+RA*8], RBa
987  |.else
988  |  mov RB, [RC+4]
989  |  mov RC, [RC]
990  |  mov [BASE+RA*8+4], RB
991  |  mov [BASE+RA*8], RC
992  |.endif
993  |  ins_next
994  |
995  |3:  // Call __index metamethod.
996  |  // BASE = base, L->top = new base, stack = cont/func/t/k
997  |  mov RA, L:RB->top
998  |  mov [RA-12], PC			// [cont|PC]
999  |  lea PC, [RA+FRAME_CONT]
1000  |  sub PC, BASE
1001  |  mov LFUNC:RB, [RA-8]		// Guaranteed to be a function here.
1002  |  mov NARGS:RD, 2+1			// 2 args for func(t, k).
1003  |  jmp ->vm_call_dispatch_f
1004  |
1005  |->vmeta_tgetr:
1006  |  mov FCARG1, TAB:RB
1007  |  mov RB, BASE			// Save BASE.
1008  |  mov FCARG2, RC			// Caveat: FCARG2 == BASE
1009  |  call extern lj_tab_getinth@8	// (GCtab *t, int32_t key)
1010  |  // cTValue * or NULL returned in eax (RC).
1011  |  movzx RA, PC_RA
1012  |  mov BASE, RB			// Restore BASE.
1013  |  test RC, RC
1014  |  jnz ->BC_TGETR_Z
1015  |  mov dword [BASE+RA*8+4], LJ_TNIL
1016  |  jmp ->BC_TGETR2_Z
1017  |
1018  |//-----------------------------------------------------------------------
1019  |
1020  |->vmeta_tsets:
1021  |  mov TMP1, RC			// RC = GCstr *
1022  |  mov TMP2, LJ_TSTR
1023  |  lea RCa, TMP1			// Store temp. TValue in TMP1/TMP2.
1024  |  cmp PC_OP, BC_GSET
1025  |  jne >1
1026  |  lea RA, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
1027  |  mov [RA], TAB:RB			// RB = GCtab *
1028  |  mov dword [RA+4], LJ_TTAB
1029  |  mov RB, RA
1030  |  jmp >2
1031  |
1032  |->vmeta_tsetb:
1033  |  movzx RC, PC_RC
1034  |.if DUALNUM
1035  |  mov TMP2, LJ_TISNUM
1036  |  mov TMP1, RC
1037  |.else
1038  |  cvtsi2sd xmm0, RC
1039  |  movsd TMPQ, xmm0
1040  |.endif
1041  |  lea RCa, TMPQ			// Store temp. TValue in TMPQ.
1042  |  jmp >1
1043  |
1044  |->vmeta_tsetv:
1045  |  movzx RC, PC_RC			// Reload TValue *k from RC.
1046  |  lea RC, [BASE+RC*8]
1047  |1:
1048  |  movzx RB, PC_RB			// Reload TValue *t from RB.
1049  |  lea RB, [BASE+RB*8]
1050  |2:
1051  |.if X64
1052  |  mov L:CARG1d, SAVE_L
1053  |  mov L:CARG1d->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
1054  |  mov CARG2d, RB
1055  |  mov CARG3, RCa			// May be 64 bit ptr to stack.
1056  |  mov L:RB, L:CARG1d
1057  |.else
1058  |  mov ARG2, RB
1059  |  mov L:RB, SAVE_L
1060  |  mov ARG3, RC
1061  |  mov ARG1, L:RB
1062  |  mov L:RB->base, BASE
1063  |.endif
1064  |  mov SAVE_PC, PC
1065  |  call extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
1066  |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
1067  |  mov BASE, L:RB->base
1068  |  test RC, RC
1069  |  jz >3
1070  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
1071  |  movzx RA, PC_RA
1072  |.if X64
1073  |  mov RBa, [BASE+RA*8]
1074  |  mov [RC], RBa
1075  |.else
1076  |  mov RB, [BASE+RA*8+4]
1077  |  mov RA, [BASE+RA*8]
1078  |  mov [RC+4], RB
1079  |  mov [RC], RA
1080  |.endif
1081  |->cont_nop:				// BASE = base, (RC = result)
1082  |  ins_next
1083  |
1084  |3:  // Call __newindex metamethod.
1085  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
1086  |  mov RA, L:RB->top
1087  |  mov [RA-12], PC			// [cont|PC]
1088  |  movzx RC, PC_RA
1089  |  // Copy value to third argument.
1090  |.if X64
1091  |  mov RBa, [BASE+RC*8]
1092  |  mov [RA+16], RBa
1093  |.else
1094  |  mov RB, [BASE+RC*8+4]
1095  |  mov RC, [BASE+RC*8]
1096  |  mov [RA+20], RB
1097  |  mov [RA+16], RC
1098  |.endif
1099  |  lea PC, [RA+FRAME_CONT]
1100  |  sub PC, BASE
1101  |  mov LFUNC:RB, [RA-8]		// Guaranteed to be a function here.
1102  |  mov NARGS:RD, 3+1			// 3 args for func(t, k, v).
1103  |  jmp ->vm_call_dispatch_f
1104  |
1105  |->vmeta_tsetr:
1106  |.if X64WIN
1107  |  mov L:CARG1d, SAVE_L
1108  |  mov CARG3d, RC
1109  |  mov L:CARG1d->base, BASE
1110  |  xchg CARG2d, TAB:RB		// Caveat: CARG2d == BASE.
1111  |.elif X64
1112  |  mov L:CARG1d, SAVE_L
1113  |  mov CARG2d, TAB:RB
1114  |  mov L:CARG1d->base, BASE
1115  |  mov RB, BASE			// Save BASE.
1116  |  mov CARG3d, RC			// Caveat: CARG3d == BASE.
1117  |.else
1118  |  mov L:RA, SAVE_L
1119  |  mov ARG2, TAB:RB
1120  |  mov RB, BASE			// Save BASE.
1121  |  mov ARG3, RC
1122  |  mov ARG1, L:RA
1123  |  mov L:RA->base, BASE
1124  |.endif
1125  |  mov SAVE_PC, PC
1126  |  call extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
1127  |  // TValue * returned in eax (RC).
1128  |  movzx RA, PC_RA
1129  |  mov BASE, RB			// Restore BASE.
1130  |  jmp ->BC_TSETR_Z
1131  |
1132  |//-- Comparison metamethods ---------------------------------------------
1133  |
1134  |->vmeta_comp:
1135  |.if X64
1136  |  mov L:RB, SAVE_L
1137  |  mov L:RB->base, BASE		// Caveat: CARG2d/CARG3d == BASE.
1138  |.if X64WIN
1139  |  lea CARG3d, [BASE+RD*8]
1140  |  lea CARG2d, [BASE+RA*8]
1141  |.else
1142  |  lea CARG2d, [BASE+RA*8]
1143  |  lea CARG3d, [BASE+RD*8]
1144  |.endif
1145  |  mov CARG1d, L:RB			// Caveat: CARG1d/CARG4d == RA.
1146  |  movzx CARG4d, PC_OP
1147  |.else
1148  |  movzx RB, PC_OP
1149  |  lea RD, [BASE+RD*8]
1150  |  lea RA, [BASE+RA*8]
1151  |  mov ARG4, RB
1152  |  mov L:RB, SAVE_L
1153  |  mov ARG3, RD
1154  |  mov ARG2, RA
1155  |  mov ARG1, L:RB
1156  |  mov L:RB->base, BASE
1157  |.endif
1158  |  mov SAVE_PC, PC
1159  |  call extern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
1160  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
1161  |3:
1162  |  mov BASE, L:RB->base
1163  |  cmp RC, 1
1164  |  ja ->vmeta_binop
1165  |4:
1166  |  lea PC, [PC+4]
1167  |  jb >6
1168  |5:
1169  |  movzx RD, PC_RD
1170  |  branchPC RD
1171  |6:
1172  |  ins_next
1173  |
1174  |->cont_condt:			// BASE = base, RC = result
1175  |  add PC, 4
1176  |  cmp dword [RC+4], LJ_TISTRUECOND	// Branch if result is true.
1177  |  jb <5
1178  |  jmp <6
1179  |
1180  |->cont_condf:			// BASE = base, RC = result
1181  |  cmp dword [RC+4], LJ_TISTRUECOND	// Branch if result is false.
1182  |  jmp <4
1183  |
1184  |->vmeta_equal:
1185  |  sub PC, 4
1186  |.if X64WIN
1187  |  mov CARG3d, RD
1188  |  mov CARG4d, RB
1189  |  mov L:RB, SAVE_L
1190  |  mov L:RB->base, BASE		// Caveat: CARG2d == BASE.
1191  |  mov CARG2d, RA
1192  |  mov CARG1d, L:RB			// Caveat: CARG1d == RA.
1193  |.elif X64
1194  |  mov CARG2d, RA
1195  |  mov CARG4d, RB			// Caveat: CARG4d == RA.
1196  |  mov L:RB, SAVE_L
1197  |  mov L:RB->base, BASE		// Caveat: CARG3d == BASE.
1198  |  mov CARG3d, RD
1199  |  mov CARG1d, L:RB
1200  |.else
1201  |  mov ARG4, RB
1202  |  mov L:RB, SAVE_L
1203  |  mov ARG3, RD
1204  |  mov ARG2, RA
1205  |  mov ARG1, L:RB
1206  |  mov L:RB->base, BASE
1207  |.endif
1208  |  mov SAVE_PC, PC
1209  |  call extern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
1210  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
1211  |  jmp <3
1212  |
1213  |->vmeta_equal_cd:
1214  |.if FFI
1215  |  sub PC, 4
1216  |  mov L:RB, SAVE_L
1217  |  mov L:RB->base, BASE
1218  |  mov FCARG1, L:RB
1219  |  mov FCARG2, dword [PC-4]
1220  |  mov SAVE_PC, PC
1221  |  call extern lj_meta_equal_cd@8	// (lua_State *L, BCIns ins)
1222  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
1223  |  jmp <3
1224  |.endif
1225  |
1226  |->vmeta_istype:
1227  |.if X64
1228  |  mov L:RB, SAVE_L
1229  |  mov L:RB->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
1230  |  mov CARG2d, RA
1231  |  movzx CARG3d, PC_RD
1232  |  mov L:CARG1d, L:RB
1233  |.else
1234  |  movzx RD, PC_RD
1235  |  mov ARG2, RA
1236  |  mov L:RB, SAVE_L
1237  |  mov ARG3, RD
1238  |  mov ARG1, L:RB
1239  |  mov L:RB->base, BASE
1240  |.endif
1241  |  mov SAVE_PC, PC
1242  |  call extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
1243  |  mov BASE, L:RB->base
1244  |  jmp <6
1245  |
1246  |//-- Arithmetic metamethods ---------------------------------------------
1247  |
1248  |->vmeta_arith_vno:
1249  |.if DUALNUM
1250  |  movzx RB, PC_RB
1251  |.endif
1252  |->vmeta_arith_vn:
1253  |  lea RC, [KBASE+RC*8]
1254  |  jmp >1
1255  |
1256  |->vmeta_arith_nvo:
1257  |.if DUALNUM
1258  |  movzx RC, PC_RC
1259  |.endif
1260  |->vmeta_arith_nv:
1261  |  lea RC, [KBASE+RC*8]
1262  |  lea RB, [BASE+RB*8]
1263  |  xchg RB, RC
1264  |  jmp >2
1265  |
1266  |->vmeta_unm:
1267  |  lea RC, [BASE+RD*8]
1268  |  mov RB, RC
1269  |  jmp >2
1270  |
1271  |->vmeta_arith_vvo:
1272  |.if DUALNUM
1273  |  movzx RB, PC_RB
1274  |.endif
1275  |->vmeta_arith_vv:
1276  |  lea RC, [BASE+RC*8]
1277  |1:
1278  |  lea RB, [BASE+RB*8]
1279  |2:
1280  |  lea RA, [BASE+RA*8]
1281  |.if X64WIN
1282  |  mov CARG3d, RB
1283  |  mov CARG4d, RC
1284  |  movzx RC, PC_OP
1285  |  mov ARG5d, RC
1286  |  mov L:RB, SAVE_L
1287  |  mov L:RB->base, BASE		// Caveat: CARG2d == BASE.
1288  |  mov CARG2d, RA
1289  |  mov CARG1d, L:RB			// Caveat: CARG1d == RA.
1290  |.elif X64
1291  |  movzx CARG5d, PC_OP
1292  |  mov CARG2d, RA
1293  |  mov CARG4d, RC			// Caveat: CARG4d == RA.
1294  |  mov L:CARG1d, SAVE_L
1295  |  mov L:CARG1d->base, BASE		// Caveat: CARG3d == BASE.
1296  |  mov CARG3d, RB
1297  |  mov L:RB, L:CARG1d
1298  |.else
1299  |  mov ARG3, RB
1300  |  mov L:RB, SAVE_L
1301  |  mov ARG4, RC
1302  |  movzx RC, PC_OP
1303  |  mov ARG2, RA
1304  |  mov ARG5, RC
1305  |  mov ARG1, L:RB
1306  |  mov L:RB->base, BASE
1307  |.endif
1308  |  mov SAVE_PC, PC
1309  |  call extern lj_meta_arith	// (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1310  |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1311  |  mov BASE, L:RB->base
1312  |  test RC, RC
1313  |  jz ->cont_nop
1314  |
1315  |  // Call metamethod for binary op.
1316  |->vmeta_binop:
1317  |  // BASE = base, RC = new base, stack = cont/func/o1/o2
1318  |  mov RA, RC
1319  |  sub RC, BASE
1320  |  mov [RA-12], PC			// [cont|PC]
1321  |  lea PC, [RC+FRAME_CONT]
1322  |  mov NARGS:RD, 2+1			// 2 args for func(o1, o2).
1323  |  jmp ->vm_call_dispatch
1324  |
1325  |->vmeta_len:
1326  |  mov L:RB, SAVE_L
1327  |  mov L:RB->base, BASE
1328  |  lea FCARG2, [BASE+RD*8]		// Caveat: FCARG2 == BASE
1329  |  mov L:FCARG1, L:RB
1330  |  mov SAVE_PC, PC
1331  |  call extern lj_meta_len@8		// (lua_State *L, TValue *o)
1332  |  // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1333  |  mov BASE, L:RB->base
1334#if LJ_52
1335  |  test RC, RC
1336  |  jne ->vmeta_binop			// Binop call for compatibility.
1337  |  movzx RD, PC_RD
1338  |  mov TAB:FCARG1, [BASE+RD*8]
1339  |  jmp ->BC_LEN_Z
1340#else
1341  |  jmp ->vmeta_binop			// Binop call for compatibility.
1342#endif
1343  |
1344  |//-- Call metamethod ----------------------------------------------------
1345  |
1346  |->vmeta_call_ra:
1347  |  lea RA, [BASE+RA*8+8]
1348  |->vmeta_call:			// Resolve and call __call metamethod.
1349  |  // BASE = old base, RA = new base, RC = nargs+1, PC = return
1350  |  mov TMP2, RA			// Save RA, RC for us.
1351  |  mov TMP1, NARGS:RD
1352  |  sub RA, 8
1353  |.if X64
1354  |  mov L:RB, SAVE_L
1355  |  mov L:RB->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
1356  |  mov CARG2d, RA
1357  |  lea CARG3d, [RA+NARGS:RD*8]
1358  |  mov CARG1d, L:RB			// Caveat: CARG1d may be RA.
1359  |.else
1360  |  lea RC, [RA+NARGS:RD*8]
1361  |  mov L:RB, SAVE_L
1362  |  mov ARG2, RA
1363  |  mov ARG3, RC
1364  |  mov ARG1, L:RB
1365  |  mov L:RB->base, BASE		// This is the callers base!
1366  |.endif
1367  |  mov SAVE_PC, PC
1368  |  call extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
1369  |  mov BASE, L:RB->base
1370  |  mov RA, TMP2
1371  |  mov NARGS:RD, TMP1
1372  |  mov LFUNC:RB, [RA-8]
1373  |  add NARGS:RD, 1
1374  |  // This is fragile. L->base must not move, KBASE must always be defined.
1375  |.if x64
1376  |  cmp KBASEa, rdx			// Continue with CALLT if flag set.
1377  |.else
1378  |  cmp KBASE, BASE			// Continue with CALLT if flag set.
1379  |.endif
1380  |  je ->BC_CALLT_Z
1381  |  mov BASE, RA
1382  |  ins_call				// Otherwise call resolved metamethod.
1383  |
1384  |//-- Argument coercion for 'for' statement ------------------------------
1385  |
1386  |->vmeta_for:
1387  |  mov L:RB, SAVE_L
1388  |  mov L:RB->base, BASE
1389  |  mov FCARG2, RA			// Caveat: FCARG2 == BASE
1390  |  mov L:FCARG1, L:RB			// Caveat: FCARG1 == RA
1391  |  mov SAVE_PC, PC
1392  |  call extern lj_meta_for@8	// (lua_State *L, TValue *base)
1393  |  mov BASE, L:RB->base
1394  |  mov RC, [PC-4]
1395  |  movzx RA, RCH
1396  |  movzx OP, RCL
1397  |  shr RC, 16
1398  |.if X64
1399  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Retry FORI or JFORI.
1400  |.else
1401  |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]	// Retry FORI or JFORI.
1402  |.endif
1403  |
1404  |//-----------------------------------------------------------------------
1405  |//-- Fast functions -----------------------------------------------------
1406  |//-----------------------------------------------------------------------
1407  |
1408  |.macro .ffunc, name
1409  |->ff_ .. name:
1410  |.endmacro
1411  |
1412  |.macro .ffunc_1, name
1413  |->ff_ .. name:
1414  |  cmp NARGS:RD, 1+1;  jb ->fff_fallback
1415  |.endmacro
1416  |
1417  |.macro .ffunc_2, name
1418  |->ff_ .. name:
1419  |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
1420  |.endmacro
1421  |
1422  |.macro .ffunc_nsse, name, op
1423  |  .ffunc_1 name
1424  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
1425  |  op xmm0, qword [BASE]
1426  |.endmacro
1427  |
1428  |.macro .ffunc_nsse, name
1429  |  .ffunc_nsse name, movsd
1430  |.endmacro
1431  |
1432  |.macro .ffunc_nnsse, name
1433  |  .ffunc_2 name
1434  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
1435  |  cmp dword [BASE+12], LJ_TISNUM;  jae ->fff_fallback
1436  |  movsd xmm0, qword [BASE]
1437  |  movsd xmm1, qword [BASE+8]
1438  |.endmacro
1439  |
1440  |.macro .ffunc_nnr, name
1441  |  .ffunc_2 name
1442  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
1443  |  cmp dword [BASE+12], LJ_TISNUM;  jae ->fff_fallback
1444  |  fld qword [BASE+8]
1445  |  fld qword [BASE]
1446  |.endmacro
1447  |
1448  |// Inlined GC threshold check. Caveat: uses label 1.
1449  |.macro ffgccheck
1450  |  mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1451  |  cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1452  |  jb >1
1453  |  call ->fff_gcstep
1454  |1:
1455  |.endmacro
1456  |
1457  |//-- Base library: checks -----------------------------------------------
1458  |
1459  |.ffunc_1 assert
1460  |  mov RB, [BASE+4]
1461  |  cmp RB, LJ_TISTRUECOND;  jae ->fff_fallback
1462  |  mov PC, [BASE-4]
1463  |  mov MULTRES, RD
1464  |  mov [BASE-4], RB
1465  |  mov RB, [BASE]
1466  |  mov [BASE-8], RB
1467  |  sub RD, 2
1468  |  jz >2
1469  |  mov RA, BASE
1470  |1:
1471  |  add RA, 8
1472  |.if X64
1473  |  mov RBa, [RA]
1474  |  mov [RA-8], RBa
1475  |.else
1476  |  mov RB, [RA+4]
1477  |  mov [RA-4], RB
1478  |  mov RB, [RA]
1479  |  mov [RA-8], RB
1480  |.endif
1481  |  sub RD, 1
1482  |  jnz <1
1483  |2:
1484  |  mov RD, MULTRES
1485  |  jmp ->fff_res_
1486  |
1487  |.ffunc_1 type
1488  |  mov RB, [BASE+4]
1489  |.if X64
1490  |  mov RA, RB
1491  |  sar RA, 15
1492  |  cmp RA, -2
1493  |  je >3
1494  |.endif
1495  |  mov RC, ~LJ_TNUMX
1496  |  not RB
1497  |  cmp RC, RB
1498  |  cmova RC, RB
1499  |2:
1500  |  mov CFUNC:RB, [BASE-8]
1501  |  mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1502  |  mov PC, [BASE-4]
1503  |  mov dword [BASE-4], LJ_TSTR
1504  |  mov [BASE-8], STR:RC
1505  |  jmp ->fff_res1
1506  |.if X64
1507  |3:
1508  |  mov RC, ~LJ_TLIGHTUD
1509  |  jmp <2
1510  |.endif
1511  |
1512  |//-- Base library: getters and setters ---------------------------------
1513  |
1514  |.ffunc_1 getmetatable
1515  |  mov RB, [BASE+4]
1516  |  mov PC, [BASE-4]
1517  |  cmp RB, LJ_TTAB;  jne >6
1518  |1:  // Field metatable must be at same offset for GCtab and GCudata!
1519  |  mov TAB:RB, [BASE]
1520  |  mov TAB:RB, TAB:RB->metatable
1521  |2:
1522  |  test TAB:RB, TAB:RB
1523  |  mov dword [BASE-4], LJ_TNIL
1524  |  jz ->fff_res1
1525  |  mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
1526  |  mov dword [BASE-4], LJ_TTAB	// Store metatable as default result.
1527  |  mov [BASE-8], TAB:RB
1528  |  mov RA, TAB:RB->hmask
1529  |  and RA, STR:RC->sid
1530  |  imul RA, #NODE
1531  |  add NODE:RA, TAB:RB->node
1532  |3:  // Rearranged logic, because we expect _not_ to find the key.
1533  |  cmp dword NODE:RA->key.it, LJ_TSTR
1534  |  jne >4
1535  |  cmp dword NODE:RA->key.gcr, STR:RC
1536  |  je >5
1537  |4:
1538  |  mov NODE:RA, NODE:RA->next
1539  |  test NODE:RA, NODE:RA
1540  |  jnz <3
1541  |  jmp ->fff_res1			// Not found, keep default result.
1542  |5:
1543  |  mov RB, [RA+4]
1544  |  cmp RB, LJ_TNIL;  je ->fff_res1	// Ditto for nil value.
1545  |  mov RC, [RA]
1546  |  mov [BASE-4], RB			// Return value of mt.__metatable.
1547  |  mov [BASE-8], RC
1548  |  jmp ->fff_res1
1549  |
1550  |6:
1551  |  cmp RB, LJ_TUDATA;  je <1
1552  |.if X64
1553  |  cmp RB, LJ_TNUMX;  ja >8
1554  |  cmp RB, LJ_TISNUM;  jbe >7
1555  |  mov RB, LJ_TLIGHTUD
1556  |  jmp >8
1557  |7:
1558  |.else
1559  |  cmp RB, LJ_TISNUM;  ja >8
1560  |.endif
1561  |  mov RB, LJ_TNUMX
1562  |8:
1563  |  not RB
1564  |  mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1565  |  jmp <2
1566  |
1567  |.ffunc_2 setmetatable
1568  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1569  |  // Fast path: no mt for table yet and not clearing the mt.
1570  |  mov TAB:RB, [BASE]
1571  |  cmp dword TAB:RB->metatable, 0;  jne ->fff_fallback
1572  |  cmp dword [BASE+12], LJ_TTAB;  jne ->fff_fallback
1573  |  mov TAB:RC, [BASE+8]
1574  |  mov TAB:RB->metatable, TAB:RC
1575  |  mov PC, [BASE-4]
1576  |  mov dword [BASE-4], LJ_TTAB		// Return original table.
1577  |  mov [BASE-8], TAB:RB
1578  |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
1579  |  jz >1
1580  |  // Possible write barrier. Table is black, but skip iswhite(mt) check.
1581  |  barrierback TAB:RB, RC
1582  |1:
1583  |  jmp ->fff_res1
1584  |
1585  |.ffunc_2 rawget
1586  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1587  |.if X64WIN
1588  |  mov RB, BASE			// Save BASE.
1589  |  lea CARG3d, [BASE+8]
1590  |  mov CARG2d, [BASE]			// Caveat: CARG2d == BASE.
1591  |  mov CARG1d, SAVE_L
1592  |.elif X64
1593  |  mov RB, BASE			// Save BASE.
1594  |  mov CARG2d, [BASE]
1595  |  lea CARG3d, [BASE+8]		// Caveat: CARG3d == BASE.
1596  |  mov CARG1d, SAVE_L
1597  |.else
1598  |  mov TAB:RD, [BASE]
1599  |  mov L:RB, SAVE_L
1600  |  mov ARG2, TAB:RD
1601  |  mov ARG1, L:RB
1602  |  mov RB, BASE			// Save BASE.
1603  |  add BASE, 8
1604  |  mov ARG3, BASE
1605  |.endif
1606  |  call extern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
1607  |  // cTValue * returned in eax (RD).
1608  |  mov BASE, RB			// Restore BASE.
1609  |  // Copy table slot.
1610  |.if X64
1611  |  mov RBa, [RD]
1612  |  mov PC, [BASE-4]
1613  |  mov [BASE-8], RBa
1614  |.else
1615  |  mov RB, [RD]
1616  |  mov RD, [RD+4]
1617  |  mov PC, [BASE-4]
1618  |  mov [BASE-8], RB
1619  |  mov [BASE-4], RD
1620  |.endif
1621  |  jmp ->fff_res1
1622  |
1623  |//-- Base library: conversions ------------------------------------------
1624  |
1625  |.ffunc tonumber
1626  |  // Only handles the number case inline (without a base argument).
1627  |  cmp NARGS:RD, 1+1;  jne ->fff_fallback	// Exactly one argument.
1628  |  cmp dword [BASE+4], LJ_TISNUM
1629  |.if DUALNUM
1630  |  jne >1
1631  |  mov RB, dword [BASE]; jmp ->fff_resi
1632  |1:
1633  |  ja ->fff_fallback
1634  |.else
1635  |  jae ->fff_fallback
1636  |.endif
1637  |  movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1638  |
1639  |.ffunc_1 tostring
1640  |  // Only handles the string or number case inline.
1641  |  mov PC, [BASE-4]
1642  |  cmp dword [BASE+4], LJ_TSTR;  jne >3
1643  |  // A __tostring method in the string base metatable is ignored.
1644  |  mov STR:RD, [BASE]
1645  |2:
1646  |  mov dword [BASE-4], LJ_TSTR
1647  |  mov [BASE-8], STR:RD
1648  |  jmp ->fff_res1
1649  |3:  // Handle numbers inline, unless a number base metatable is present.
1650  |  cmp dword [BASE+4], LJ_TISNUM;  ja ->fff_fallback
1651  |  cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1652  |  jne ->fff_fallback
1653  |  ffgccheck				// Caveat: uses label 1.
1654  |  mov L:RB, SAVE_L
1655  |  mov L:RB->base, BASE		// Add frame since C call can throw.
1656  |  mov SAVE_PC, PC			// Redundant (but a defined value).
1657  |.if X64 and not X64WIN
1658  |  mov FCARG2, BASE			// Otherwise: FCARG2 == BASE
1659  |.endif
1660  |  mov L:FCARG1, L:RB
1661  |.if DUALNUM
1662  |  call extern lj_strfmt_number@8	// (lua_State *L, cTValue *o)
1663  |.else
1664  |  call extern lj_strfmt_num@8	// (lua_State *L, lua_Number *np)
1665  |.endif
1666  |  // GCstr returned in eax (RD).
1667  |  mov BASE, L:RB->base
1668  |  jmp <2
1669  |
1670  |//-- Base library: iterators -------------------------------------------
1671  |
1672  |.ffunc_1 next
1673  |  je >2				// Missing 2nd arg?
1674  |1:
1675  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1676  |  mov PC, [BASE-4]
1677  |  mov RB, BASE			// Save BASE.
1678  |.if X64WIN
1679  |  mov CARG1d, [BASE]
1680  |  lea CARG3d, [BASE-8]
1681  |  lea CARG2d, [BASE+8]		// Caveat: CARG2d == BASE.
1682  |.elif X64
1683  |  mov CARG1d, [BASE]
1684  |  lea CARG2d, [BASE+8]
1685  |  lea CARG3d, [BASE-8]		// Caveat: CARG3d == BASE.
1686  |.else
1687  |  mov TAB:RD, [BASE]
1688  |  mov ARG1, TAB:RD
1689  |  add BASE, 8
1690  |  mov ARG2, BASE
1691  |  sub BASE, 8+8
1692  |  mov ARG3, BASE
1693  |.endif
1694  |  call extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
1695  |  // 1=found, 0=end, -1=error returned in eax (RD).
1696  |  mov BASE, RB			// Restore BASE.
1697  |  test RD, RD;  jg ->fff_res2	// Found key/value.
1698  |  js ->fff_fallback_2		// Invalid key.
1699  |  // End of traversal: return nil.
1700  |  mov dword [BASE-4], LJ_TNIL
1701  |  jmp ->fff_res1
1702  |2:  // Set missing 2nd arg to nil.
1703  |  mov dword [BASE+12], LJ_TNIL
1704  |  jmp <1
1705  |
1706  |.ffunc_1 pairs
1707  |  mov TAB:RB, [BASE]
1708  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1709#if LJ_52
1710  |  cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1711#endif
1712  |  mov CFUNC:RB, [BASE-8]
1713  |  mov CFUNC:RD, CFUNC:RB->upvalue[0]
1714  |  mov PC, [BASE-4]
1715  |  mov dword [BASE-4], LJ_TFUNC
1716  |  mov [BASE-8], CFUNC:RD
1717  |  mov dword [BASE+12], LJ_TNIL
1718  |  mov RD, 1+3
1719  |  jmp ->fff_res
1720  |
1721  |.ffunc_2 ipairs_aux
1722  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1723  |  cmp dword [BASE+12], LJ_TISNUM
1724  |.if DUALNUM
1725  |  jne ->fff_fallback
1726  |.else
1727  |  jae ->fff_fallback
1728  |.endif
1729  |  mov PC, [BASE-4]
1730  |.if DUALNUM
1731  |  mov RD, dword [BASE+8]
1732  |  add RD, 1
1733  |  mov dword [BASE-4], LJ_TISNUM
1734  |  mov dword [BASE-8], RD
1735  |.else
1736  |  movsd xmm0, qword [BASE+8]
1737  |  sseconst_1 xmm1, RBa
1738  |  addsd xmm0, xmm1
1739  |  cvttsd2si RD, xmm0
1740  |  movsd qword [BASE-8], xmm0
1741  |.endif
1742  |  mov TAB:RB, [BASE]
1743  |  cmp RD, TAB:RB->asize;  jae >2	// Not in array part?
1744  |  shl RD, 3
1745  |  add RD, TAB:RB->array
1746  |1:
1747  |  cmp dword [RD+4], LJ_TNIL;  je ->fff_res0
1748  |  // Copy array slot.
1749  |.if X64
1750  |  mov RBa, [RD]
1751  |  mov [BASE], RBa
1752  |.else
1753  |  mov RB, [RD]
1754  |  mov RD, [RD+4]
1755  |  mov [BASE], RB
1756  |  mov [BASE+4], RD
1757  |.endif
1758  |->fff_res2:
1759  |  mov RD, 1+2
1760  |  jmp ->fff_res
1761  |2:  // Check for empty hash part first. Otherwise call C function.
1762  |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
1763  |  mov FCARG1, TAB:RB
1764  |  mov RB, BASE			// Save BASE.
1765  |  mov FCARG2, RD			// Caveat: FCARG2 == BASE
1766  |  call extern lj_tab_getinth@8	// (GCtab *t, int32_t key)
1767  |  // cTValue * or NULL returned in eax (RD).
1768  |  mov BASE, RB
1769  |  test RD, RD
1770  |  jnz <1
1771  |->fff_res0:
1772  |  mov RD, 1+0
1773  |  jmp ->fff_res
1774  |
1775  |.ffunc_1 ipairs
1776  |  mov TAB:RB, [BASE]
1777  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
1778#if LJ_52
1779  |  cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1780#endif
1781  |  mov CFUNC:RB, [BASE-8]
1782  |  mov CFUNC:RD, CFUNC:RB->upvalue[0]
1783  |  mov PC, [BASE-4]
1784  |  mov dword [BASE-4], LJ_TFUNC
1785  |  mov [BASE-8], CFUNC:RD
1786  |.if DUALNUM
1787  |  mov dword [BASE+12], LJ_TISNUM
1788  |  mov dword [BASE+8], 0
1789  |.else
1790  |  xorps xmm0, xmm0
1791  |  movsd qword [BASE+8], xmm0
1792  |.endif
1793  |  mov RD, 1+3
1794  |  jmp ->fff_res
1795  |
1796  |//-- Base library: catch errors ----------------------------------------
1797  |
1798  |.ffunc_1 pcall
1799  |  lea RA, [BASE+8]
1800  |  sub NARGS:RD, 1
1801  |  mov PC, 8+FRAME_PCALL
1802  |1:
1803  |  movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
1804  |  shr RB, HOOK_ACTIVE_SHIFT
1805  |  and RB, 1
1806  |  add PC, RB				// Remember active hook before pcall.
1807  |  jmp ->vm_call_dispatch
1808  |
1809  |.ffunc_2 xpcall
1810  |  cmp dword [BASE+12], LJ_TFUNC;  jne ->fff_fallback
1811  |  mov RB, [BASE+4]			// Swap function and traceback.
1812  |  mov [BASE+12], RB
1813  |  mov dword [BASE+4], LJ_TFUNC
1814  |  mov LFUNC:RB, [BASE]
1815  |  mov PC, [BASE+8]
1816  |  mov [BASE+8], LFUNC:RB
1817  |  mov [BASE], PC
1818  |  lea RA, [BASE+16]
1819  |  sub NARGS:RD, 2
1820  |  mov PC, 16+FRAME_PCALL
1821  |  jmp <1
1822  |
1823  |//-- Coroutine library --------------------------------------------------
1824  |
1825  |.macro coroutine_resume_wrap, resume
1826  |.if resume
1827  |.ffunc_1 coroutine_resume
1828  |  mov L:RB, [BASE]
1829  |.else
1830  |.ffunc coroutine_wrap_aux
1831  |  mov CFUNC:RB, [BASE-8]
1832  |  mov L:RB, CFUNC:RB->upvalue[0].gcr
1833  |.endif
1834  |  mov PC, [BASE-4]
1835  |  mov SAVE_PC, PC
1836  |.if X64
1837  |  mov TMP1, L:RB
1838  |.else
1839  |  mov ARG1, L:RB
1840  |.endif
1841  |.if resume
1842  |  cmp dword [BASE+4], LJ_TTHREAD;  jne ->fff_fallback
1843  |.endif
1844  |  cmp aword L:RB->cframe, 0; jne ->fff_fallback
1845  |  cmp byte L:RB->status, LUA_YIELD;  ja ->fff_fallback
1846  |  mov RA, L:RB->top
1847  |  je >1				// Status != LUA_YIELD (i.e. 0)?
1848  |  cmp RA, L:RB->base			// Check for presence of initial func.
1849  |  je ->fff_fallback
1850  |1:
1851  |.if resume
1852  |  lea PC, [RA+NARGS:RD*8-16]		// Check stack space (-1-thread).
1853  |.else
1854  |  lea PC, [RA+NARGS:RD*8-8]		// Check stack space (-1).
1855  |.endif
1856  |  cmp PC, L:RB->maxstack; ja ->fff_fallback
1857  |  mov L:RB->top, PC
1858  |
1859  |  mov L:RB, SAVE_L
1860  |  mov L:RB->base, BASE
1861  |.if resume
1862  |  add BASE, 8			// Keep resumed thread in stack for GC.
1863  |.endif
1864  |  mov L:RB->top, BASE
1865  |.if resume
1866  |  lea RB, [BASE+NARGS:RD*8-24]	// RB = end of source for stack move.
1867  |.else
1868  |  lea RB, [BASE+NARGS:RD*8-16]	// RB = end of source for stack move.
1869  |.endif
1870  |  sub RBa, PCa			// Relative to PC.
1871  |
1872  |  cmp PC, RA
1873  |  je >3
1874  |2:  // Move args to coroutine.
1875  |.if X64
1876  |  mov RCa, [PC+RB]
1877  |  mov [PC-8], RCa
1878  |.else
1879  |  mov RC, [PC+RB+4]
1880  |  mov [PC-4], RC
1881  |  mov RC, [PC+RB]
1882  |  mov [PC-8], RC
1883  |.endif
1884  |  sub PC, 8
1885  |  cmp PC, RA
1886  |  jne <2
1887  |3:
1888  |.if X64
1889  |  mov CARG2d, RA
1890  |  mov CARG1d, TMP1
1891  |.else
1892  |  mov ARG2, RA
1893  |  xor RA, RA
1894  |  mov ARG4, RA
1895  |  mov ARG3, RA
1896  |.endif
1897  |  call ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
1898  |
1899  |  mov L:RB, SAVE_L
1900  |.if X64
1901  |  mov L:PC, TMP1
1902  |.else
1903  |  mov L:PC, ARG1			// The callee doesn't modify SAVE_L.
1904  |.endif
1905  |  mov BASE, L:RB->base
1906  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1907  |  set_vmstate INTERP
1908  |
1909  |  cmp eax, LUA_YIELD
1910  |  ja >8
1911  |4:
1912  |  mov RA, L:PC->base
1913  |  mov KBASE, L:PC->top
1914  |  mov L:PC->top, RA			// Clear coroutine stack.
1915  |  mov PC, KBASE
1916  |  sub PC, RA
1917  |  je >6				// No results?
1918  |  lea RD, [BASE+PC]
1919  |  shr PC, 3
1920  |  cmp RD, L:RB->maxstack
1921  |  ja >9				// Need to grow stack?
1922  |
1923  |  mov RB, BASE
1924  |  sub RBa, RAa
1925  |5:  // Move results from coroutine.
1926  |.if X64
1927  |  mov RDa, [RA]
1928  |  mov [RA+RB], RDa
1929  |.else
1930  |  mov RD, [RA]
1931  |  mov [RA+RB], RD
1932  |  mov RD, [RA+4]
1933  |  mov [RA+RB+4], RD
1934  |.endif
1935  |  add RA, 8
1936  |  cmp RA, KBASE
1937  |  jne <5
1938  |6:
1939  |.if resume
1940  |  lea RD, [PC+2]			// nresults+1 = 1 + true + results.
1941  |  mov dword [BASE-4], LJ_TTRUE	// Prepend true to results.
1942  |.else
1943  |  lea RD, [PC+1]			// nresults+1 = 1 + results.
1944  |.endif
1945  |7:
1946  |  mov PC, SAVE_PC
1947  |  mov MULTRES, RD
1948  |.if resume
1949  |  mov RAa, -8
1950  |.else
1951  |  xor RA, RA
1952  |.endif
1953  |  test PC, FRAME_TYPE
1954  |  jz ->BC_RET_Z
1955  |  jmp ->vm_return
1956  |
1957  |8:  // Coroutine returned with error (at co->top-1).
1958  |.if resume
1959  |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to results.
1960  |  mov RA, L:PC->top
1961  |  sub RA, 8
1962  |  mov L:PC->top, RA			// Clear error from coroutine stack.
1963  |  // Copy error message.
1964  |.if X64
1965  |  mov RDa, [RA]
1966  |  mov [BASE], RDa
1967  |.else
1968  |  mov RD, [RA]
1969  |  mov [BASE], RD
1970  |  mov RD, [RA+4]
1971  |  mov [BASE+4], RD
1972  |.endif
1973  |  mov RD, 1+2			// nresults+1 = 1 + false + error.
1974  |  jmp <7
1975  |.else
1976  |  mov FCARG2, L:PC
1977  |  mov FCARG1, L:RB
1978  |  call extern lj_ffh_coroutine_wrap_err@8  // (lua_State *L, lua_State *co)
1979  |  // Error function does not return.
1980  |.endif
1981  |
1982  |9:  // Handle stack expansion on return from yield.
1983  |.if X64
1984  |  mov L:RA, TMP1
1985  |.else
1986  |  mov L:RA, ARG1			// The callee doesn't modify SAVE_L.
1987  |.endif
1988  |  mov L:RA->top, KBASE		// Undo coroutine stack clearing.
1989  |  mov FCARG2, PC
1990  |  mov FCARG1, L:RB
1991  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
1992  |.if X64
1993  |  mov L:PC, TMP1
1994  |.else
1995  |  mov L:PC, ARG1
1996  |.endif
1997  |  mov BASE, L:RB->base
1998  |  jmp <4				// Retry the stack move.
1999  |.endmacro
2000  |
2001  |  coroutine_resume_wrap 1		// coroutine.resume
2002  |  coroutine_resume_wrap 0		// coroutine.wrap
2003  |
2004  |.ffunc coroutine_yield
2005  |  mov L:RB, SAVE_L
2006  |  test aword L:RB->cframe, CFRAME_RESUME
2007  |  jz ->fff_fallback
2008  |  mov L:RB->base, BASE
2009  |  lea RD, [BASE+NARGS:RD*8-8]
2010  |  mov L:RB->top, RD
2011  |  xor RD, RD
2012  |  mov aword L:RB->cframe, RDa
2013  |  mov al, LUA_YIELD
2014  |  mov byte L:RB->status, al
2015  |  jmp ->vm_leave_unw
2016  |
2017  |//-- Math library -------------------------------------------------------
2018  |
2019  |.if not DUALNUM
2020  |->fff_resi:  // Dummy.
2021  |.endif
2022  |
2023  |->fff_resn:
2024  |  mov PC, [BASE-4]
2025  |  fstp qword [BASE-8]
2026  |  jmp ->fff_res1
2027  |
2028  |  .ffunc_1 math_abs
2029  |.if DUALNUM
2030  |  cmp dword [BASE+4], LJ_TISNUM; jne >2
2031  |  mov RB, dword [BASE]
2032  |  cmp RB, 0; jns ->fff_resi
2033  |  neg RB; js >1
2034  |->fff_resbit:
2035  |->fff_resi:
2036  |  mov PC, [BASE-4]
2037  |  mov dword [BASE-4], LJ_TISNUM
2038  |  mov dword [BASE-8], RB
2039  |  jmp ->fff_res1
2040  |1:
2041  |  mov PC, [BASE-4]
2042  |  mov dword [BASE-4], 0x41e00000  // 2^31.
2043  |  mov dword [BASE-8], 0
2044  |  jmp ->fff_res1
2045  |2:
2046  |  ja ->fff_fallback
2047  |.else
2048  |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2049  |.endif
2050  |  movsd xmm0, qword [BASE]
2051  |  sseconst_abs xmm1, RDa
2052  |  andps xmm0, xmm1
2053  |->fff_resxmm0:
2054  |  mov PC, [BASE-4]
2055  |  movsd qword [BASE-8], xmm0
2056  |  // fallthrough
2057  |
2058  |->fff_res1:
2059  |  mov RD, 1+1
2060  |->fff_res:
2061  |  mov MULTRES, RD
2062  |->fff_res_:
2063  |  test PC, FRAME_TYPE
2064  |  jnz >7
2065  |5:
2066  |  cmp PC_RB, RDL			// More results expected?
2067  |  ja >6
2068  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
2069  |  movzx RA, PC_RA
2070  |  not RAa				// Note: ~RA = -(RA+1)
2071  |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
2072  |  ins_next
2073  |
2074  |6:  // Fill up results with nil.
2075  |  mov dword [BASE+RD*8-12], LJ_TNIL
2076  |  add RD, 1
2077  |  jmp <5
2078  |
2079  |7:  // Non-standard return case.
2080  |  mov RAa, -8			// Results start at BASE+RA = BASE-8.
2081  |  jmp ->vm_return
2082  |
2083  |.if X64
2084  |.define fff_resfp, fff_resxmm0
2085  |.else
2086  |.define fff_resfp, fff_resn
2087  |.endif
2088  |
2089  |.macro math_round, func
2090  |  .ffunc math_ .. func
2091  |.if DUALNUM
2092  |  cmp dword [BASE+4], LJ_TISNUM; jne >1
2093  |  mov RB, dword [BASE]; jmp ->fff_resi
2094  |1:
2095  |  ja ->fff_fallback
2096  |.else
2097  |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2098  |.endif
2099  |  movsd xmm0, qword [BASE]
2100  |  call ->vm_ .. func .. _sse
2101  |.if DUALNUM
2102  |  cvttsd2si RB, xmm0
2103  |  cmp RB, 0x80000000
2104  |  jne ->fff_resi
2105  |  cvtsi2sd xmm1, RB
2106  |  ucomisd xmm0, xmm1
2107  |  jp ->fff_resxmm0
2108  |  je ->fff_resi
2109  |.endif
2110  |  jmp ->fff_resxmm0
2111  |.endmacro
2112  |
2113  |  math_round floor
2114  |  math_round ceil
2115  |
2116  |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2117  |
2118  |.ffunc math_log
2119  |  cmp NARGS:RD, 1+1; jne ->fff_fallback	// Exactly one argument.
2120  |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2121  |  movsd xmm0, qword [BASE]
2122  |.if not X64
2123  |  movsd FPARG1, xmm0
2124  |.endif
2125  |  mov RB, BASE
2126  |  call extern log
2127  |  mov BASE, RB
2128  |  jmp ->fff_resfp
2129  |
2130  |.macro math_extern, func
2131  |  .ffunc_nsse math_ .. func
2132  |.if not X64
2133  |  movsd FPARG1, xmm0
2134  |.endif
2135  |  mov RB, BASE
2136  |  call extern func
2137  |  mov BASE, RB
2138  |  jmp ->fff_resfp
2139  |.endmacro
2140  |
2141  |.macro math_extern2, func
2142  |  .ffunc_nnsse math_ .. func
2143  |.if not X64
2144  |  movsd FPARG1, xmm0
2145  |  movsd FPARG3, xmm1
2146  |.endif
2147  |  mov RB, BASE
2148  |  call extern func
2149  |  mov BASE, RB
2150  |  jmp ->fff_resfp
2151  |.endmacro
2152  |
2153  |  math_extern log10
2154  |  math_extern exp
2155  |  math_extern sin
2156  |  math_extern cos
2157  |  math_extern tan
2158  |  math_extern asin
2159  |  math_extern acos
2160  |  math_extern atan
2161  |  math_extern sinh
2162  |  math_extern cosh
2163  |  math_extern tanh
2164  |  math_extern2 pow
2165  |  math_extern2 atan2
2166  |  math_extern2 fmod
2167  |
2168  |.ffunc_nnr math_ldexp;	fscale; fpop1;	jmp ->fff_resn
2169  |
2170  |.ffunc_1 math_frexp
2171  |  mov RB, [BASE+4]
2172  |  cmp RB, LJ_TISNUM;  jae ->fff_fallback
2173  |  mov PC, [BASE-4]
2174  |  mov RC, [BASE]
2175  |  mov [BASE-4], RB; mov [BASE-8], RC
2176  |  shl RB, 1; cmp RB, 0xffe00000; jae >3
2177  |  or RC, RB; jz >3
2178  |  mov RC, 1022
2179  |  cmp RB, 0x00200000; jb >4
2180  |1:
2181  |  shr RB, 21; sub RB, RC		// Extract and unbias exponent.
2182  |  cvtsi2sd xmm0, RB
2183  |  mov RB, [BASE-4]
2184  |  and RB, 0x800fffff			// Mask off exponent.
2185  |  or RB, 0x3fe00000			// Put mantissa in range [0.5,1) or 0.
2186  |  mov [BASE-4], RB
2187  |2:
2188  |  movsd qword [BASE], xmm0
2189  |  mov RD, 1+2
2190  |  jmp ->fff_res
2191  |3:  // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2192  |  xorps xmm0, xmm0; jmp <2
2193  |4:  // Handle denormals by multiplying with 2^54 and adjusting the bias.
2194  |  movsd xmm0, qword [BASE]
2195  |  sseconst_hi xmm1, RBa, 43500000  // 2^54.
2196  |  mulsd xmm0, xmm1
2197  |  movsd qword [BASE-8], xmm0
2198  |  mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2199  |
2200  |.ffunc_nsse math_modf
2201  |  mov RB, [BASE+4]
2202  |  mov PC, [BASE-4]
2203  |  shl RB, 1; cmp RB, 0xffe00000; je >4	// +-Inf?
2204  |  movaps xmm4, xmm0
2205  |  call ->vm_trunc_sse
2206  |  subsd xmm4, xmm0
2207  |1:
2208  |  movsd qword [BASE-8], xmm0
2209  |  movsd qword [BASE], xmm4
2210  |  mov RC, [BASE-4]; mov RB, [BASE+4]
2211  |  xor RC, RB; js >3				// Need to adjust sign?
2212  |2:
2213  |  mov RD, 1+2
2214  |  jmp ->fff_res
2215  |3:
2216  |  xor RB, 0x80000000; mov [BASE+4], RB	// Flip sign of fraction.
2217  |  jmp <2
2218  |4:
2219  |  xorps xmm4, xmm4; jmp <1			// Return +-Inf and +-0.
2220  |
2221  |.macro math_minmax, name, cmovop, sseop
2222  |  .ffunc_1 name
2223  |  mov RA, 2
2224  |  cmp dword [BASE+4], LJ_TISNUM
2225  |.if DUALNUM
2226  |  jne >4
2227  |  mov RB, dword [BASE]
2228  |1:  // Handle integers.
2229  |  cmp RA, RD; jae ->fff_resi
2230  |  cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
2231  |  cmp RB, dword [BASE+RA*8-8]
2232  |  cmovop RB, dword [BASE+RA*8-8]
2233  |  add RA, 1
2234  |  jmp <1
2235  |3:
2236  |  ja ->fff_fallback
2237  |  // Convert intermediate result to number and continue below.
2238  |  cvtsi2sd xmm0, RB
2239  |  jmp >6
2240  |4:
2241  |  ja ->fff_fallback
2242  |.else
2243  |  jae ->fff_fallback
2244  |.endif
2245  |
2246  |  movsd xmm0, qword [BASE]
2247  |5:  // Handle numbers or integers.
2248  |  cmp RA, RD; jae ->fff_resxmm0
2249  |  cmp dword [BASE+RA*8-4], LJ_TISNUM
2250  |.if DUALNUM
2251  |  jb >6
2252  |  ja ->fff_fallback
2253  |  cvtsi2sd xmm1, dword [BASE+RA*8-8]
2254  |  jmp >7
2255  |.else
2256  |  jae ->fff_fallback
2257  |.endif
2258  |6:
2259  |  movsd xmm1, qword [BASE+RA*8-8]
2260  |7:
2261  |  sseop xmm0, xmm1
2262  |  add RA, 1
2263  |  jmp <5
2264  |.endmacro
2265  |
2266  |  math_minmax math_min, cmovg, minsd
2267  |  math_minmax math_max, cmovl, maxsd
2268  |
2269  |//-- String library -----------------------------------------------------
2270  |
2271  |.ffunc string_byte			// Only handle the 1-arg case here.
2272  |  cmp NARGS:RD, 1+1;  jne ->fff_fallback
2273  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
2274  |  mov STR:RB, [BASE]
2275  |  mov PC, [BASE-4]
2276  |  cmp dword STR:RB->len, 1
2277  |  jb ->fff_res0			// Return no results for empty string.
2278  |  movzx RB, byte STR:RB[1]
2279  |.if DUALNUM
2280  |  jmp ->fff_resi
2281  |.else
2282  |  cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2283  |.endif
2284  |
2285  |.ffunc string_char			// Only handle the 1-arg case here.
2286  |  ffgccheck
2287  |  cmp NARGS:RD, 1+1;  jne ->fff_fallback	// *Exactly* 1 arg.
2288  |  cmp dword [BASE+4], LJ_TISNUM
2289  |.if DUALNUM
2290  |  jne ->fff_fallback
2291  |  mov RB, dword [BASE]
2292  |  cmp RB, 255;  ja ->fff_fallback
2293  |  mov TMP2, RB
2294  |.else
2295  |  jae ->fff_fallback
2296  |  cvttsd2si RB, qword [BASE]
2297  |  cmp RB, 255;  ja ->fff_fallback
2298  |  mov TMP2, RB
2299  |.endif
2300  |.if X64
2301  |  mov TMP3, 1
2302  |.else
2303  |  mov ARG3, 1
2304  |.endif
2305  |  lea RDa, TMP2			// Points to stack. Little-endian.
2306  |->fff_newstr:
2307  |  mov L:RB, SAVE_L
2308  |  mov L:RB->base, BASE
2309  |.if X64
2310  |  mov CARG3d, TMP3			// Zero-extended to size_t.
2311  |  mov CARG2, RDa			// May be 64 bit ptr to stack.
2312  |  mov CARG1d, L:RB
2313  |.else
2314  |  mov ARG2, RD
2315  |  mov ARG1, L:RB
2316  |.endif
2317  |  mov SAVE_PC, PC
2318  |  call extern lj_str_new		// (lua_State *L, char *str, size_t l)
2319  |->fff_resstr:
2320  |  // GCstr * returned in eax (RD).
2321  |  mov BASE, L:RB->base
2322  |  mov PC, [BASE-4]
2323  |  mov dword [BASE-4], LJ_TSTR
2324  |  mov [BASE-8], STR:RD
2325  |  jmp ->fff_res1
2326  |
2327  |.ffunc string_sub
2328  |  ffgccheck
2329  |  mov TMP2, -1
2330  |  cmp NARGS:RD, 1+2;  jb ->fff_fallback
2331  |  jna >1
2332  |  cmp dword [BASE+20], LJ_TISNUM
2333  |.if DUALNUM
2334  |  jne ->fff_fallback
2335  |  mov RB, dword [BASE+16]
2336  |  mov TMP2, RB
2337  |.else
2338  |  jae ->fff_fallback
2339  |  cvttsd2si RB, qword [BASE+16]
2340  |  mov TMP2, RB
2341  |.endif
2342  |1:
2343  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
2344  |  cmp dword [BASE+12], LJ_TISNUM
2345  |.if DUALNUM
2346  |  jne ->fff_fallback
2347  |.else
2348  |  jae ->fff_fallback
2349  |.endif
2350  |  mov STR:RB, [BASE]
2351  |  mov TMP3, STR:RB
2352  |  mov RB, STR:RB->len
2353  |.if DUALNUM
2354  |  mov RA, dword [BASE+8]
2355  |.else
2356  |  cvttsd2si RA, qword [BASE+8]
2357  |.endif
2358  |  mov RC, TMP2
2359  |  cmp RB, RC				// len < end? (unsigned compare)
2360  |  jb >5
2361  |2:
2362  |  test RA, RA			// start <= 0?
2363  |  jle >7
2364  |3:
2365  |  mov STR:RB, TMP3
2366  |  sub RC, RA				// start > end?
2367  |  jl ->fff_emptystr
2368  |  lea RB, [STR:RB+RA+#STR-1]
2369  |  add RC, 1
2370  |4:
2371  |.if X64
2372  |  mov TMP3, RC
2373  |.else
2374  |  mov ARG3, RC
2375  |.endif
2376  |  mov RD, RB
2377  |  jmp ->fff_newstr
2378  |
2379  |5:  // Negative end or overflow.
2380  |  jl >6
2381  |  lea RC, [RC+RB+1]			// end = end+(len+1)
2382  |  jmp <2
2383  |6:  // Overflow.
2384  |  mov RC, RB				// end = len
2385  |  jmp <2
2386  |
2387  |7:  // Negative start or underflow.
2388  |  je >8
2389  |  add RA, RB				// start = start+(len+1)
2390  |  add RA, 1
2391  |  jg <3				// start > 0?
2392  |8:  // Underflow.
2393  |  mov RA, 1				// start = 1
2394  |  jmp <3
2395  |
2396  |->fff_emptystr:  // Range underflow.
2397  |  xor RC, RC				// Zero length. Any ptr in RB is ok.
2398  |  jmp <4
2399  |
2400  |.macro ffstring_op, name
2401  |  .ffunc_1 string_ .. name
2402  |  ffgccheck
2403  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
2404  |  mov L:RB, SAVE_L
2405  |   lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2406  |  mov L:RB->base, BASE
2407  |  mov STR:FCARG2, [BASE]		// Caveat: FCARG2 == BASE
2408  |   mov RCa, SBUF:FCARG1->b
2409  |   mov SBUF:FCARG1->L, L:RB
2410  |   mov SBUF:FCARG1->w, RCa
2411  |  mov SAVE_PC, PC
2412  |  call extern lj_buf_putstr_ .. name .. @8
2413  |  mov FCARG1, eax
2414  |  call extern lj_buf_tostr@4
2415  |  jmp ->fff_resstr
2416  |.endmacro
2417  |
2418  |ffstring_op reverse
2419  |ffstring_op lower
2420  |ffstring_op upper
2421  |
2422  |//-- Bit library --------------------------------------------------------
2423  |
2424  |.macro .ffunc_bit, name, kind, fdef
2425  |  fdef name
2426  |.if kind == 2
2427  |  sseconst_tobit xmm1, RBa
2428  |.endif
2429  |  cmp dword [BASE+4], LJ_TISNUM
2430  |.if DUALNUM
2431  |  jne >1
2432  |  mov RB, dword [BASE]
2433  |.if kind > 0
2434  |  jmp >2
2435  |.else
2436  |  jmp ->fff_resbit
2437  |.endif
2438  |1:
2439  |  ja ->fff_fallback
2440  |.else
2441  |  jae ->fff_fallback
2442  |.endif
2443  |  movsd xmm0, qword [BASE]
2444  |.if kind < 2
2445  |  sseconst_tobit xmm1, RBa
2446  |.endif
2447  |  addsd xmm0, xmm1
2448  |  movd RB, xmm0
2449  |2:
2450  |.endmacro
2451  |
2452  |.macro .ffunc_bit, name, kind
2453  |  .ffunc_bit name, kind, .ffunc_1
2454  |.endmacro
2455  |
2456  |.ffunc_bit bit_tobit, 0
2457  |  jmp ->fff_resbit
2458  |
2459  |.macro .ffunc_bit_op, name, ins
2460  |  .ffunc_bit name, 2
2461  |  mov TMP2, NARGS:RD			// Save for fallback.
2462  |  lea RD, [BASE+NARGS:RD*8-16]
2463  |1:
2464  |  cmp RD, BASE
2465  |  jbe ->fff_resbit
2466  |  cmp dword [RD+4], LJ_TISNUM
2467  |.if DUALNUM
2468  |  jne >2
2469  |  ins RB, dword [RD]
2470  |  sub RD, 8
2471  |  jmp <1
2472  |2:
2473  |  ja ->fff_fallback_bit_op
2474  |.else
2475  |  jae ->fff_fallback_bit_op
2476  |.endif
2477  |  movsd xmm0, qword [RD]
2478  |  addsd xmm0, xmm1
2479  |  movd RA, xmm0
2480  |  ins RB, RA
2481  |  sub RD, 8
2482  |  jmp <1
2483  |.endmacro
2484  |
2485  |.ffunc_bit_op bit_band, and
2486  |.ffunc_bit_op bit_bor, or
2487  |.ffunc_bit_op bit_bxor, xor
2488  |
2489  |.ffunc_bit bit_bswap, 1
2490  |  bswap RB
2491  |  jmp ->fff_resbit
2492  |
2493  |.ffunc_bit bit_bnot, 1
2494  |  not RB
2495  |.if DUALNUM
2496  |  jmp ->fff_resbit
2497  |.else
2498  |->fff_resbit:
2499  |  cvtsi2sd xmm0, RB
2500  |  jmp ->fff_resxmm0
2501  |.endif
2502  |
2503  |->fff_fallback_bit_op:
2504  |  mov NARGS:RD, TMP2			// Restore for fallback
2505  |  jmp ->fff_fallback
2506  |
2507  |.macro .ffunc_bit_sh, name, ins
2508  |.if DUALNUM
2509  |  .ffunc_bit name, 1, .ffunc_2
2510  |  // Note: no inline conversion from number for 2nd argument!
2511  |  cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2512  |  mov RA, dword [BASE+8]
2513  |.else
2514  |  .ffunc_nnsse name
2515  |  sseconst_tobit xmm2, RBa
2516  |  addsd xmm0, xmm2
2517  |  addsd xmm1, xmm2
2518  |  movd RB, xmm0
2519  |  movd RA, xmm1
2520  |.endif
2521  |  ins RB, cl				// Assumes RA is ecx.
2522  |  jmp ->fff_resbit
2523  |.endmacro
2524  |
2525  |.ffunc_bit_sh bit_lshift, shl
2526  |.ffunc_bit_sh bit_rshift, shr
2527  |.ffunc_bit_sh bit_arshift, sar
2528  |.ffunc_bit_sh bit_rol, rol
2529  |.ffunc_bit_sh bit_ror, ror
2530  |
2531  |//-----------------------------------------------------------------------
2532  |
2533  |->fff_fallback_2:
2534  |  mov NARGS:RD, 1+2			// Other args are ignored, anyway.
2535  |  jmp ->fff_fallback
2536  |->fff_fallback_1:
2537  |  mov NARGS:RD, 1+1			// Other args are ignored, anyway.
2538  |->fff_fallback:			// Call fast function fallback handler.
2539  |  // BASE = new base, RD = nargs+1
2540  |  mov L:RB, SAVE_L
2541  |  mov PC, [BASE-4]			// Fallback may overwrite PC.
2542  |  mov SAVE_PC, PC			// Redundant (but a defined value).
2543  |  mov L:RB->base, BASE
2544  |  lea RD, [BASE+NARGS:RD*8-8]
2545  |  lea RA, [RD+8*LUA_MINSTACK]	// Ensure enough space for handler.
2546  |  mov L:RB->top, RD
2547  |  mov CFUNC:RD, [BASE-8]
2548  |  cmp RA, L:RB->maxstack
2549  |  ja >5				// Need to grow stack.
2550  |.if X64
2551  |  mov CARG1d, L:RB
2552  |.else
2553  |  mov ARG1, L:RB
2554  |.endif
2555  |  call aword CFUNC:RD->f		// (lua_State *L)
2556  |  mov BASE, L:RB->base
2557  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2558  |  test RD, RD;  jg ->fff_res		// Returned nresults+1?
2559  |1:
2560  |  mov RA, L:RB->top
2561  |  sub RA, BASE
2562  |  shr RA, 3
2563  |  test RD, RD
2564  |  lea NARGS:RD, [RA+1]
2565  |  mov LFUNC:RB, [BASE-8]
2566  |  jne ->vm_call_tail			// Returned -1?
2567  |  ins_callt				// Returned 0: retry fast path.
2568  |
2569  |// Reconstruct previous base for vmeta_call during tailcall.
2570  |->vm_call_tail:
2571  |  mov RA, BASE
2572  |  test PC, FRAME_TYPE
2573  |  jnz >3
2574  |  movzx RB, PC_RA
2575  |  not RBa				// Note: ~RB = -(RB+1)
2576  |  lea BASE, [BASE+RB*8]		// base = base - (RB+1)*8
2577  |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
2578  |3:
2579  |  mov RB, PC
2580  |  and RB, -8
2581  |  sub BASE, RB
2582  |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
2583  |
2584  |5:  // Grow stack for fallback handler.
2585  |  mov FCARG2, LUA_MINSTACK
2586  |  mov FCARG1, L:RB
2587  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
2588  |  mov BASE, L:RB->base
2589  |  xor RD, RD				// Simulate a return 0.
2590  |  jmp <1				// Dumb retry (goes through ff first).
2591  |
2592  |->fff_gcstep:			// Call GC step function.
2593  |  // BASE = new base, RD = nargs+1
2594  |  pop RBa				// Must keep stack at same level.
2595  |  mov TMPa, RBa			// Save return address
2596  |  mov L:RB, SAVE_L
2597  |  mov SAVE_PC, PC			// Redundant (but a defined value).
2598  |  mov L:RB->base, BASE
2599  |  lea RD, [BASE+NARGS:RD*8-8]
2600  |  mov FCARG1, L:RB
2601  |  mov L:RB->top, RD
2602  |  call extern lj_gc_step@4		// (lua_State *L)
2603  |  mov BASE, L:RB->base
2604  |  mov RD, L:RB->top
2605  |  sub RD, BASE
2606  |  shr RD, 3
2607  |  add NARGS:RD, 1
2608  |  mov RBa, TMPa
2609  |  push RBa				// Restore return address.
2610  |  ret
2611  |
2612  |//-----------------------------------------------------------------------
2613  |//-- Special dispatch targets -------------------------------------------
2614  |//-----------------------------------------------------------------------
2615  |
2616  |->vm_record:				// Dispatch target for recording phase.
2617  |.if JIT
2618  |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2619  |  test RDL, HOOK_VMEVENT		// No recording while in vmevent.
2620  |  jnz >5
2621  |  // Decrement the hookcount for consistency, but always do the call.
2622  |  test RDL, HOOK_ACTIVE
2623  |  jnz >1
2624  |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2625  |  jz >1
2626  |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2627  |  jmp >1
2628  |.endif
2629  |
2630  |->vm_rethook:			// Dispatch target for return hooks.
2631  |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2632  |  test RDL, HOOK_ACTIVE		// Hook already active?
2633  |  jnz >5
2634  |  jmp >1
2635  |
2636  |->vm_inshook:			// Dispatch target for instr/line hooks.
2637  |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2638  |  test RDL, HOOK_ACTIVE		// Hook already active?
2639  |  jnz >5
2640  |
2641  |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2642  |  jz >5
2643  |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2644  |  jz >1
2645  |  test RDL, LUA_MASKLINE
2646  |  jz >5
2647  |1:
2648  |  mov L:RB, SAVE_L
2649  |  mov L:RB->base, BASE
2650  |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
2651  |  mov FCARG1, L:RB
2652  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2653  |  call extern lj_dispatch_ins@8	// (lua_State *L, const BCIns *pc)
2654  |3:
2655  |  mov BASE, L:RB->base
2656  |4:
2657  |  movzx RA, PC_RA
2658  |5:
2659  |  movzx OP, PC_OP
2660  |  movzx RD, PC_RD
2661  |.if X64
2662  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Re-dispatch to static ins.
2663  |.else
2664  |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]	// Re-dispatch to static ins.
2665  |.endif
2666  |
2667  |->cont_hook:				// Continue from hook yield.
2668  |  add PC, 4
2669  |  mov RA, [RB-24]
2670  |  mov MULTRES, RA			// Restore MULTRES for *M ins.
2671  |  jmp <4
2672  |
2673  |->vm_hotloop:			// Hot loop counter underflow.
2674  |.if JIT
2675  |  mov LFUNC:RB, [BASE-8]		// Same as curr_topL(L).
2676  |  mov RB, LFUNC:RB->pc
2677  |  movzx RD, byte [RB+PC2PROTO(framesize)]
2678  |  lea RD, [BASE+RD*8]
2679  |  mov L:RB, SAVE_L
2680  |  mov L:RB->base, BASE
2681  |  mov L:RB->top, RD
2682  |  mov FCARG2, PC
2683  |  lea FCARG1, [DISPATCH+GG_DISP2J]
2684  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2685  |  mov SAVE_PC, PC
2686  |  call extern lj_trace_hot@8		// (jit_State *J, const BCIns *pc)
2687  |  jmp <3
2688  |.endif
2689  |
2690  |->vm_callhook:			// Dispatch target for call hooks.
2691  |  mov SAVE_PC, PC
2692  |.if JIT
2693  |  jmp >1
2694  |.endif
2695  |
2696  |->vm_hotcall:			// Hot call counter underflow.
2697  |.if JIT
2698  |  mov SAVE_PC, PC
2699  |  or PC, 1				// Marker for hot call.
2700  |1:
2701  |.endif
2702  |  lea RD, [BASE+NARGS:RD*8-8]
2703  |  mov L:RB, SAVE_L
2704  |  mov L:RB->base, BASE
2705  |  mov L:RB->top, RD
2706  |  mov FCARG2, PC
2707  |  mov FCARG1, L:RB
2708  |  call extern lj_dispatch_call@8	// (lua_State *L, const BCIns *pc)
2709  |  // ASMFunction returned in eax/rax (RDa).
2710  |  mov SAVE_PC, 0			// Invalidate for subsequent line hook.
2711  |.if JIT
2712  |  and PC, -2
2713  |.endif
2714  |  mov BASE, L:RB->base
2715  |  mov RAa, RDa
2716  |  mov RD, L:RB->top
2717  |  sub RD, BASE
2718  |  mov RBa, RAa
2719  |  movzx RA, PC_RA
2720  |  shr RD, 3
2721  |  add NARGS:RD, 1
2722  |  jmp RBa
2723  |
2724  |->cont_stitch:			// Trace stitching.
2725  |.if JIT
2726  |  // BASE = base, RC = result, RB = mbase
2727  |  mov TRACE:RA, [RB-24]		// Save previous trace.
2728  |  mov TMP1, TRACE:RA
2729  |  mov TMP3, DISPATCH			// Need one more register.
2730  |  mov DISPATCH, MULTRES
2731  |  movzx RA, PC_RA
2732  |  lea RA, [BASE+RA*8]		// Call base.
2733  |  sub DISPATCH, 1
2734  |  jz >2
2735  |1:  // Move results down.
2736  |.if X64
2737  |  mov RBa, [RC]
2738  |  mov [RA], RBa
2739  |.else
2740  |  mov RB, [RC]
2741  |  mov [RA], RB
2742  |  mov RB, [RC+4]
2743  |  mov [RA+4], RB
2744  |.endif
2745  |  add RC, 8
2746  |  add RA, 8
2747  |  sub DISPATCH, 1
2748  |  jnz <1
2749  |2:
2750  |  movzx RC, PC_RA
2751  |  movzx RB, PC_RB
2752  |  add RC, RB
2753  |  lea RC, [BASE+RC*8-8]
2754  |3:
2755  |  cmp RC, RA
2756  |  ja >9				// More results wanted?
2757  |
2758  |  mov DISPATCH, TMP3
2759  |  mov TRACE:RD, TMP1			// Get previous trace.
2760  |  movzx RB, word TRACE:RD->traceno
2761  |  movzx RD, word TRACE:RD->link
2762  |  cmp RD, RB
2763  |  je ->cont_nop			// Blacklisted.
2764  |  test RD, RD
2765  |  jne =>BC_JLOOP			// Jump to stitched trace.
2766  |
2767  |  // Stitch a new trace to the previous trace.
2768  |  mov [DISPATCH+DISPATCH_J(exitno)], RB
2769  |  mov L:RB, SAVE_L
2770  |  mov L:RB->base, BASE
2771  |  mov FCARG2, PC
2772  |  lea FCARG1, [DISPATCH+GG_DISP2J]
2773  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2774  |  call extern lj_dispatch_stitch@8	// (jit_State *J, const BCIns *pc)
2775  |  mov BASE, L:RB->base
2776  |  jmp ->cont_nop
2777  |
2778  |9:  // Fill up results with nil.
2779  |  mov dword [RA+4], LJ_TNIL
2780  |  add RA, 8
2781  |  jmp <3
2782  |.endif
2783  |
2784  |->vm_profhook:			// Dispatch target for profiler hook.
2785#if LJ_HASPROFILE
2786  |  mov L:RB, SAVE_L
2787  |  mov L:RB->base, BASE
2788  |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
2789  |  mov FCARG1, L:RB
2790  |  call extern lj_dispatch_profile@8	// (lua_State *L, const BCIns *pc)
2791  |  mov BASE, L:RB->base
2792  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2793  |  sub PC, 4
2794  |  jmp ->cont_nop
2795#endif
2796  |
2797  |//-----------------------------------------------------------------------
2798  |//-- Trace exit handler -------------------------------------------------
2799  |//-----------------------------------------------------------------------
2800  |
2801  |// Called from an exit stub with the exit number on the stack.
2802  |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2803  |->vm_exit_handler:
2804  |.if JIT
2805  |.if X64
2806  |  push r13; push r12
2807  |  push r11; push r10; push r9; push r8
2808  |  push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2809  |  push rbx; push rdx; push rcx; push rax
2810  |  movzx RC, byte [rbp-8]		// Reconstruct exit number.
2811  |  mov RCH, byte [rbp-16]
2812  |  mov [rbp-8], r15; mov [rbp-16], r14
2813  |.else
2814  |  push ebp; lea ebp, [esp+12]; push ebp
2815  |  push ebx; push edx; push ecx; push eax
2816  |  movzx RC, byte [ebp-4]		// Reconstruct exit number.
2817  |  mov RCH, byte [ebp-8]
2818  |  mov [ebp-4], edi; mov [ebp-8], esi
2819  |.endif
2820  |  // Caveat: DISPATCH is ebx.
2821  |  mov DISPATCH, [ebp]
2822  |  mov RA, [DISPATCH+DISPATCH_GL(vmstate)]	// Get trace number.
2823  |  set_vmstate EXIT
2824  |  mov [DISPATCH+DISPATCH_J(exitno)], RC
2825  |  mov [DISPATCH+DISPATCH_J(parent)], RA
2826  |.if X64
2827  |.if X64WIN
2828  |  sub rsp, 16*8+4*8			// Room for SSE regs + save area.
2829  |.else
2830  |  sub rsp, 16*8			// Room for SSE regs.
2831  |.endif
2832  |  add rbp, -128
2833  |  movsd qword [rbp-8],   xmm15; movsd qword [rbp-16],  xmm14
2834  |  movsd qword [rbp-24],  xmm13; movsd qword [rbp-32],  xmm12
2835  |  movsd qword [rbp-40],  xmm11; movsd qword [rbp-48],  xmm10
2836  |  movsd qword [rbp-56],  xmm9;  movsd qword [rbp-64],  xmm8
2837  |  movsd qword [rbp-72],  xmm7;  movsd qword [rbp-80],  xmm6
2838  |  movsd qword [rbp-88],  xmm5;  movsd qword [rbp-96],  xmm4
2839  |  movsd qword [rbp-104], xmm3;  movsd qword [rbp-112], xmm2
2840  |  movsd qword [rbp-120], xmm1;  movsd qword [rbp-128], xmm0
2841  |.else
2842  |  sub esp, 8*8+16			// Room for SSE regs + args.
2843  |  movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2844  |  movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2845  |  movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2846  |  movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2847  |.endif
2848  |  // Caveat: RB is ebp.
2849  |  mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2850  |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2851  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2852  |  mov L:RB->base, BASE
2853  |.if X64WIN
2854  |  lea CARG2, [rsp+4*8]
2855  |.elif X64
2856  |  mov CARG2, rsp
2857  |.else
2858  |  lea FCARG2, [esp+16]
2859  |.endif
2860  |  lea FCARG1, [DISPATCH+GG_DISP2J]
2861  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2862  |  call extern lj_trace_exit@8	// (jit_State *J, ExitState *ex)
2863  |  // MULTRES or negated error code returned in eax (RD).
2864  |  mov RAa, L:RB->cframe
2865  |  and RAa, CFRAME_RAWMASK
2866  |.if X64WIN
2867  |  // Reposition stack later.
2868  |.elif X64
2869  |  mov rsp, RAa			// Reposition stack to C frame.
2870  |.else
2871  |  mov esp, RAa			// Reposition stack to C frame.
2872  |.endif
2873  |  mov [RAa+CFRAME_OFS_L], L:RB	// Set SAVE_L (on-trace resume/yield).
2874  |  mov BASE, L:RB->base
2875  |  mov PC, [RAa+CFRAME_OFS_PC]	// Get SAVE_PC.
2876  |.if X64
2877  |  jmp >1
2878  |.endif
2879  |.endif
2880  |->vm_exit_interp:
2881  |  // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2882  |.if JIT
2883  |.if X64
2884  |  // Restore additional callee-save registers only used in compiled code.
2885  |.if X64WIN
2886  |  lea RAa, [rsp+9*16+4*8]
2887  |1:
2888  |  movdqa xmm15, [RAa-9*16]
2889  |  movdqa xmm14, [RAa-8*16]
2890  |  movdqa xmm13, [RAa-7*16]
2891  |  movdqa xmm12, [RAa-6*16]
2892  |  movdqa xmm11, [RAa-5*16]
2893  |  movdqa xmm10, [RAa-4*16]
2894  |  movdqa xmm9, [RAa-3*16]
2895  |  movdqa xmm8, [RAa-2*16]
2896  |  movdqa xmm7, [RAa-1*16]
2897  |  mov rsp, RAa			// Reposition stack to C frame.
2898  |  movdqa xmm6, [RAa]
2899  |  mov r15, CSAVE_3
2900  |  mov r14, CSAVE_4
2901  |.else
2902  |  add rsp, 16			// Reposition stack to C frame.
2903  |1:
2904  |.endif
2905  |  mov r13, TMPa
2906  |  mov r12, TMPQ
2907  |.endif
2908#ifdef LUA_USE_TRACE_LOGS
2909  |.if X64
2910  |  mov FCARG1, SAVE_L
2911  |  mov L:FCARG1->base, BASE
2912  |  mov RB, RD     // Save RD
2913  |  mov TMP1, PC  // Save PC
2914  |  mov CARG3d, PC   // CARG3d == BASE
2915  |  mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)]
2916  |  call extern lj_log_trace_direct_exit@8
2917  |  mov PC, TMP1
2918  |  mov RD, RB
2919  |  mov RB, SAVE_L
2920  |  mov BASE, L:RB->base
2921  |.endif
2922#endif
2923  |  test RD, RD; js >9			// Check for error from exit.
2924  |  mov L:RB, SAVE_L
2925  |  mov MULTRES, RD
2926  |  mov LFUNC:KBASE, [BASE-8]
2927  |  mov KBASE, LFUNC:KBASE->pc
2928  |  mov KBASE, [KBASE+PC2PROTO(k)]
2929  |  mov L:RB->base, BASE
2930  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2931  |  set_vmstate INTERP
2932  |  // Modified copy of ins_next which handles function header dispatch, too.
2933  |  mov RC, [PC]
2934  |  movzx RA, RCH
2935  |  movzx OP, RCL
2936  |  add PC, 4
2937  |  shr RC, 16
2938  |  cmp OP, BC_FUNCF			// Function header?
2939  |  jb >3
2940  |  cmp OP, BC_FUNCC+2			// Fast function?
2941  |  jae >4
2942  |2:
2943  |  mov RC, MULTRES			// RC/RD holds nres+1.
2944  |3:
2945  |.if X64
2946  |  jmp aword [DISPATCH+OP*8]
2947  |.else
2948  |  jmp aword [DISPATCH+OP*4]
2949  |.endif
2950  |
2951  |4:  // Check frame below fast function.
2952  |  mov RC, [BASE-4]
2953  |  test RC, FRAME_TYPE
2954  |  jnz <2				// Trace stitching continuation?
2955  |  // Otherwise set KBASE for Lua function below fast function.
2956  |  movzx RC, byte [RC-3]
2957  |  not RCa
2958  |  mov LFUNC:KBASE, [BASE+RC*8-8]
2959  |  mov KBASE, LFUNC:KBASE->pc
2960  |  mov KBASE, [KBASE+PC2PROTO(k)]
2961  |  jmp <2
2962  |
2963  |9:  // Rethrow error from the right C frame.
2964  |  mov FCARG2, RD
2965  |  mov FCARG1, L:RB
2966  |  neg FCARG2
2967  |  call extern lj_err_trace@8		// (lua_State *L, int errcode)
2968  |.endif
2969  |
2970  |//-----------------------------------------------------------------------
2971  |//-- Math helper functions ----------------------------------------------
2972  |//-----------------------------------------------------------------------
2973  |
2974  |// FP value rounding. Called by math.floor/math.ceil fast functions
2975  |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2976  |.macro vm_round, name, mode, cond
2977  |->name:
2978  |.if not X64 and cond
2979  |  movsd xmm0, qword [esp+4]
2980  |  call ->name .. _sse
2981  |  movsd qword [esp+4], xmm0  // Overwrite callee-owned arg.
2982  |  fld qword [esp+4]
2983  |  ret
2984  |.endif
2985  |
2986  |->name .. _sse:
2987  |  sseconst_abs xmm2, RDa
2988  |  sseconst_2p52 xmm3, RDa
2989  |  movaps xmm1, xmm0
2990  |  andpd xmm1, xmm2			// |x|
2991  |  ucomisd xmm3, xmm1			// No truncation if 2^52 <= |x|.
2992  |  jbe >1
2993  |  andnpd xmm2, xmm0			// Isolate sign bit.
2994  |.if mode == 2		// trunc(x)?
2995  |  movaps xmm0, xmm1
2996  |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
2997  |  subsd xmm1, xmm3
2998  |  sseconst_1 xmm3, RDa
2999  |  cmpsd xmm0, xmm1, 1		// |x| < result?
3000  |  andpd xmm0, xmm3
3001  |  subsd xmm1, xmm0			// If yes, subtract -1.
3002  |  orpd xmm1, xmm2			// Merge sign bit back in.
3003  |.else
3004  |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
3005  |  subsd xmm1, xmm3
3006  |  orpd xmm1, xmm2			// Merge sign bit back in.
3007  |  .if mode == 1		// ceil(x)?
3008  |    sseconst_m1 xmm2, RDa		// Must subtract -1 to preserve -0.
3009  |    cmpsd xmm0, xmm1, 6		// x > result?
3010  |  .else			// floor(x)?
3011  |    sseconst_1 xmm2, RDa
3012  |    cmpsd xmm0, xmm1, 1		// x < result?
3013  |  .endif
3014  |  andpd xmm0, xmm2
3015  |  subsd xmm1, xmm0			// If yes, subtract +-1.
3016  |.endif
3017  |  movaps xmm0, xmm1
3018  |1:
3019  |  ret
3020  |.endmacro
3021  |
3022  |  vm_round vm_floor, 0, 1
3023  |  vm_round vm_ceil,  1, JIT
3024  |  vm_round vm_trunc, 2, JIT
3025  |
3026  |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3027  |->vm_mod:
3028  |// Args in xmm0/xmm1, return value in xmm0.
3029  |// Caveat: xmm0-xmm5 and RC (eax) modified!
3030  |  movaps xmm5, xmm0
3031  |  divsd xmm0, xmm1
3032  |  sseconst_abs xmm2, RDa
3033  |  sseconst_2p52 xmm3, RDa
3034  |  movaps xmm4, xmm0
3035  |  andpd xmm4, xmm2			// |x/y|
3036  |  ucomisd xmm3, xmm4			// No truncation if 2^52 <= |x/y|.
3037  |  jbe >1
3038  |  andnpd xmm2, xmm0			// Isolate sign bit.
3039  |  addsd xmm4, xmm3			// (|x/y| + 2^52) - 2^52
3040  |  subsd xmm4, xmm3
3041  |  orpd xmm4, xmm2			// Merge sign bit back in.
3042  |  sseconst_1 xmm2, RDa
3043  |  cmpsd xmm0, xmm4, 1		// x/y < result?
3044  |  andpd xmm0, xmm2
3045  |  subsd xmm4, xmm0			// If yes, subtract 1.0.
3046  |  movaps xmm0, xmm5
3047  |  mulsd xmm1, xmm4
3048  |  subsd xmm0, xmm1
3049  |  ret
3050  |1:
3051  |  mulsd xmm1, xmm0
3052  |  movaps xmm0, xmm5
3053  |  subsd xmm0, xmm1
3054  |  ret
3055  |
3056  |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3057  |->vm_powi_sse:
3058  |  cmp eax, 1; jle >6			// i<=1?
3059  |  // Now 1 < (unsigned)i <= 0x80000000.
3060  |1:  // Handle leading zeros.
3061  |  test eax, 1; jnz >2
3062  |  mulsd xmm0, xmm0
3063  |  shr eax, 1
3064  |  jmp <1
3065  |2:
3066  |  shr eax, 1; jz >5
3067  |  movaps xmm1, xmm0
3068  |3:  // Handle trailing bits.
3069  |  mulsd xmm0, xmm0
3070  |  shr eax, 1; jz >4
3071  |  jnc <3
3072  |  mulsd xmm1, xmm0
3073  |  jmp <3
3074  |4:
3075  |  mulsd xmm0, xmm1
3076  |5:
3077  |  ret
3078  |6:
3079  |  je <5				// x^1 ==> x
3080  |  jb >7				// x^0 ==> 1
3081  |  neg eax
3082  |  call <1
3083  |  sseconst_1 xmm1, RDa
3084  |  divsd xmm1, xmm0
3085  |  movaps xmm0, xmm1
3086  |  ret
3087  |7:
3088  |  sseconst_1 xmm0, RDa
3089  |  ret
3090  |
3091  |//-----------------------------------------------------------------------
3092  |//-- Miscellaneous functions --------------------------------------------
3093  |//-----------------------------------------------------------------------
3094  |
3095  |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
3096  |->vm_cpuid:
3097  |.if X64
3098  |  mov eax, CARG1d
3099  |  .if X64WIN; push rsi; mov rsi, CARG2; .endif
3100  |  push rbx
3101  |  xor ecx, ecx
3102  |  cpuid
3103  |  mov [rsi], eax
3104  |  mov [rsi+4], ebx
3105  |  mov [rsi+8], ecx
3106  |  mov [rsi+12], edx
3107  |  pop rbx
3108  |  .if X64WIN; pop rsi; .endif
3109  |  ret
3110  |.else
3111  |  pushfd
3112  |  pop edx
3113  |  mov ecx, edx
3114  |  xor edx, 0x00200000		// Toggle ID bit in flags.
3115  |  push edx
3116  |  popfd
3117  |  pushfd
3118  |  pop edx
3119  |  xor eax, eax			// Zero means no features supported.
3120  |  cmp ecx, edx
3121  |  jz >1				// No ID toggle means no CPUID support.
3122  |  mov eax, [esp+4]			// Argument 1 is function number.
3123  |  push edi
3124  |  push ebx
3125  |  xor ecx, ecx
3126  |  cpuid
3127  |  mov edi, [esp+16]			// Argument 2 is result area.
3128  |  mov [edi], eax
3129  |  mov [edi+4], ebx
3130  |  mov [edi+8], ecx
3131  |  mov [edi+12], edx
3132  |  pop ebx
3133  |  pop edi
3134  |1:
3135  |  ret
3136  |.endif
3137  |
3138  |.define NEXT_TAB,		TAB:FCARG1
3139  |.define NEXT_IDX,		FCARG2
3140  |.define NEXT_PTR,		RCa
3141  |.define NEXT_PTRd,		RC
3142  |.macro NEXT_RES_IDXL, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
3143  |.if X64
3144  |.define NEXT_TMP,		CARG3d
3145  |.define NEXT_TMPq,		CARG3
3146  |.define NEXT_ASIZE,		CARG4d
3147  |.macro NEXT_ENTER;		.endmacro
3148  |.macro NEXT_LEAVE;		ret; .endmacro
3149  |.if X64WIN
3150  |.define NEXT_RES_PTR,	[rsp+aword*5]
3151  |.macro NEXT_RES_IDX, op2;	add NEXT_IDX, op2; .endmacro
3152  |.else
3153  |.define NEXT_RES_PTR,	[rsp+aword*1]
3154  |.macro NEXT_RES_IDX, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
3155  |.endif
3156  |.else
3157  |.define NEXT_ASIZE,		esi
3158  |.define NEXT_TMP,		edi
3159  |.macro NEXT_ENTER;		push esi; push edi; .endmacro
3160  |.macro NEXT_LEAVE;		pop edi; pop esi; ret; .endmacro
3161  |.define NEXT_RES_PTR,	[esp+dword*3]
3162  |.macro NEXT_RES_IDX, op2;	add NEXT_IDX, op2; .endmacro
3163  |.endif
3164  |
3165  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
3166  |// Next idx returned in edx.
3167  |->vm_next:
3168  |.if JIT
3169  |  NEXT_ENTER
3170  |  mov NEXT_ASIZE, NEXT_TAB->asize
3171  |1:  // Traverse array part.
3172  |  cmp NEXT_IDX, NEXT_ASIZE;  jae >5
3173  |  mov NEXT_TMP, NEXT_TAB->array
3174  |  cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL;  je >2
3175  |  lea NEXT_PTR, NEXT_RES_PTR
3176  |.if X64
3177  |  mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
3178  |  mov qword [NEXT_PTR], NEXT_TMPq
3179  |.else
3180  |  mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
3181  |  mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
3182  |  mov dword [NEXT_PTR+4], NEXT_ASIZE
3183  |  mov dword [NEXT_PTR], NEXT_TMP
3184  |.endif
3185  |.if DUALNUM
3186  |  mov dword [NEXT_PTR+dword*3], LJ_TISNUM
3187  |  mov dword [NEXT_PTR+dword*2], NEXT_IDX
3188  |.else
3189  |  cvtsi2sd xmm0, NEXT_IDX
3190  |  movsd qword [NEXT_PTR+dword*2], xmm0
3191  |.endif
3192  |  NEXT_RES_IDX 1
3193  |  NEXT_LEAVE
3194  |2:  // Skip holes in array part.
3195  |  add NEXT_IDX, 1
3196  |  jmp <1
3197  |
3198  |5:  // Traverse hash part.
3199  |  sub NEXT_IDX, NEXT_ASIZE
3200  |6:
3201  |  cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
3202  |  imul NEXT_PTRd, NEXT_IDX, #NODE
3203  |  add NODE:NEXT_PTRd, dword NEXT_TAB->node
3204  |  cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
3205  |  NEXT_RES_IDXL NEXT_ASIZE+1
3206  |  NEXT_LEAVE
3207  |7:  // Skip holes in hash part.
3208  |  add NEXT_IDX, 1
3209  |  jmp <6
3210  |
3211  |9:  // End of iteration. Set the key to nil (not the value).
3212  |  NEXT_RES_IDX NEXT_ASIZE
3213  |  lea NEXT_PTR, NEXT_RES_PTR
3214  |  mov dword [NEXT_PTR+dword*3], LJ_TNIL
3215  |  NEXT_LEAVE
3216  |.endif
3217  |
3218  |//-----------------------------------------------------------------------
3219  |//-- Assertions ---------------------------------------------------------
3220  |//-----------------------------------------------------------------------
3221  |
3222  |->assert_bad_for_arg_type:
3223#ifdef LUA_USE_ASSERT
3224  |  int3
3225#endif
3226  |  int3
3227  |
3228  |//-----------------------------------------------------------------------
3229  |//-- FFI helper functions -----------------------------------------------
3230  |//-----------------------------------------------------------------------
3231  |
3232  |// Handler for callback functions. Callback slot number in ah/al.
3233  |->vm_ffi_callback:
3234  |.if FFI
3235  |.type CTSTATE, CTState, PC
3236  |.if not X64
3237  |  sub esp, 16			// Leave room for SAVE_ERRF etc.
3238  |.endif
3239  |  saveregs_	// ebp/rbp already saved. ebp now holds global_State *.
3240  |  lea DISPATCH, [ebp+GG_G2DISP]
3241  |  mov CTSTATE, GL:ebp->ctype_state
3242  |  movzx eax, ax
3243  |  mov CTSTATE->cb.slot, eax
3244  |.if X64
3245  |  mov CTSTATE->cb.gpr[0], CARG1
3246  |  mov CTSTATE->cb.gpr[1], CARG2
3247  |  mov CTSTATE->cb.gpr[2], CARG3
3248  |  mov CTSTATE->cb.gpr[3], CARG4
3249  |  movsd qword CTSTATE->cb.fpr[0], xmm0
3250  |  movsd qword CTSTATE->cb.fpr[1], xmm1
3251  |  movsd qword CTSTATE->cb.fpr[2], xmm2
3252  |  movsd qword CTSTATE->cb.fpr[3], xmm3
3253  |.if X64WIN
3254  |  lea rax, [rsp+CFRAME_SIZE+4*8]
3255  |.else
3256  |  lea rax, [rsp+CFRAME_SIZE]
3257  |  mov CTSTATE->cb.gpr[4], CARG5
3258  |  mov CTSTATE->cb.gpr[5], CARG6
3259  |  movsd qword CTSTATE->cb.fpr[4], xmm4
3260  |  movsd qword CTSTATE->cb.fpr[5], xmm5
3261  |  movsd qword CTSTATE->cb.fpr[6], xmm6
3262  |  movsd qword CTSTATE->cb.fpr[7], xmm7
3263  |.endif
3264  |  mov CTSTATE->cb.stack, rax
3265  |  mov CARG2, rsp
3266  |.else
3267  |  lea eax, [esp+CFRAME_SIZE+16]
3268  |  mov CTSTATE->cb.gpr[0], FCARG1
3269  |  mov CTSTATE->cb.gpr[1], FCARG2
3270  |  mov CTSTATE->cb.stack, eax
3271  |  mov FCARG1, [esp+CFRAME_SIZE+12]	// Move around misplaced retaddr/ebp.
3272  |  mov FCARG2, [esp+CFRAME_SIZE+8]
3273  |  mov SAVE_RET, FCARG1
3274  |  mov SAVE_R4, FCARG2
3275  |  mov FCARG2, esp
3276  |.endif
3277  |  mov SAVE_PC, CTSTATE		// Any value outside of bytecode is ok.
3278  |  mov FCARG1, CTSTATE
3279  |  call extern lj_ccallback_enter@8	// (CTState *cts, void *cf)
3280  |  // lua_State * returned in eax (RD).
3281  |  set_vmstate INTERP
3282  |  mov BASE, L:RD->base
3283  |  mov RD, L:RD->top
3284  |  sub RD, BASE
3285  |  mov LFUNC:RB, [BASE-8]
3286  |  shr RD, 3
3287  |  add RD, 1
3288  |  ins_callt
3289  |.endif
3290  |
3291  |->cont_ffi_callback:			// Return from FFI callback.
3292  |.if FFI
3293  |  mov L:RA, SAVE_L
3294  |  mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
3295  |  mov aword CTSTATE->L, L:RAa
3296  |  mov L:RA->base, BASE
3297  |  mov L:RA->top, RB
3298  |  mov FCARG1, CTSTATE
3299  |  mov FCARG2, RC
3300  |  call extern lj_ccallback_leave@8	// (CTState *cts, TValue *o)
3301  |.if X64
3302  |  mov rax, CTSTATE->cb.gpr[0]
3303  |  movsd xmm0, qword CTSTATE->cb.fpr[0]
3304  |  jmp ->vm_leave_unw
3305  |.else
3306  |  mov L:RB, SAVE_L
3307  |  mov eax, CTSTATE->cb.gpr[0]
3308  |  mov edx, CTSTATE->cb.gpr[1]
3309  |  cmp dword CTSTATE->cb.gpr[2], 1
3310  |  jb >7
3311  |  je >6
3312  |  fld qword CTSTATE->cb.fpr[0].d
3313  |  jmp >7
3314  |6:
3315  |  fld dword CTSTATE->cb.fpr[0].f
3316  |7:
3317  |  mov ecx, L:RB->top
3318  |  movzx ecx, word [ecx+6]		// Get stack adjustment and copy up.
3319  |  mov SAVE_L, ecx			// Must be one slot above SAVE_RET
3320  |  restoreregs
3321  |  pop ecx				// Move return addr from SAVE_RET.
3322  |  add esp, [esp]			// Adjust stack.
3323  |  add esp, 16
3324  |  push ecx
3325  |  ret
3326  |.endif
3327  |.endif
3328  |
3329  |->vm_ffi_call@4:			// Call C function via FFI.
3330  |  // Caveat: needs special frame unwinding, see below.
3331  |.if FFI
3332  |.if X64
3333  |  .type CCSTATE, CCallState, rbx
3334  |  push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
3335  |.else
3336  |  .type CCSTATE, CCallState, ebx
3337  |  push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
3338  |.endif
3339  |
3340  |  // Readjust stack.
3341  |.if X64
3342  |  mov eax, CCSTATE->spadj
3343  |  sub rsp, rax
3344  |.else
3345  |  sub esp, CCSTATE->spadj
3346  |.if WIN
3347  |  mov CCSTATE->spadj, esp
3348  |.endif
3349  |.endif
3350  |
3351  |  // Copy stack slots.
3352  |  movzx ecx, byte CCSTATE->nsp
3353  |  sub ecx, 1
3354  |  js >2
3355  |1:
3356  |.if X64
3357  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
3358  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
3359  |.else
3360  |  mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
3361  |  mov [esp+ecx*4], eax
3362  |.endif
3363  |  sub ecx, 1
3364  |  jns <1
3365  |2:
3366  |
3367  |.if X64
3368  |  movzx eax, byte CCSTATE->nfpr
3369  |  mov CARG1, CCSTATE->gpr[0]
3370  |  mov CARG2, CCSTATE->gpr[1]
3371  |  mov CARG3, CCSTATE->gpr[2]
3372  |  mov CARG4, CCSTATE->gpr[3]
3373  |.if not X64WIN
3374  |  mov CARG5, CCSTATE->gpr[4]
3375  |  mov CARG6, CCSTATE->gpr[5]
3376  |.endif
3377  |  test eax, eax; jz >5
3378  |  movaps xmm0, CCSTATE->fpr[0]
3379  |  movaps xmm1, CCSTATE->fpr[1]
3380  |  movaps xmm2, CCSTATE->fpr[2]
3381  |  movaps xmm3, CCSTATE->fpr[3]
3382  |.if not X64WIN
3383  |  cmp eax, 4; jbe >5
3384  |  movaps xmm4, CCSTATE->fpr[4]
3385  |  movaps xmm5, CCSTATE->fpr[5]
3386  |  movaps xmm6, CCSTATE->fpr[6]
3387  |  movaps xmm7, CCSTATE->fpr[7]
3388  |.endif
3389  |5:
3390  |.else
3391  |  mov FCARG1, CCSTATE->gpr[0]
3392  |  mov FCARG2, CCSTATE->gpr[1]
3393  |.endif
3394  |
3395  |  call aword CCSTATE->func
3396  |
3397  |.if X64
3398  |  mov CCSTATE->gpr[0], rax
3399  |  movaps CCSTATE->fpr[0], xmm0
3400  |.if not X64WIN
3401  |  mov CCSTATE->gpr[1], rdx
3402  |  movaps CCSTATE->fpr[1], xmm1
3403  |.endif
3404  |.else
3405  |  mov CCSTATE->gpr[0], eax
3406  |  mov CCSTATE->gpr[1], edx
3407  |  cmp byte CCSTATE->resx87, 1
3408  |  jb >7
3409  |  je >6
3410  |  fstp qword CCSTATE->fpr[0].d[0]
3411  |  jmp >7
3412  |6:
3413  |  fstp dword CCSTATE->fpr[0].f[0]
3414  |7:
3415  |.if WIN
3416  |  sub CCSTATE->spadj, esp
3417  |.endif
3418  |.endif
3419  |
3420  |.if X64
3421  |  mov rbx, [rbp-8]; leave; ret
3422  |.else
3423  |  mov ebx, [ebp-4]; leave; ret
3424  |.endif
3425  |.endif
3426  |// Note: vm_ffi_call must be the last function in this object file!
3427  |
3428  |//-----------------------------------------------------------------------
3429}
3430
3431/* Generate the code for a single instruction. */
3432static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3433{
3434  int vk = 0;
3435  |// Note: aligning all instructions does not pay off.
3436  |=>defop:
3437
3438  switch (op) {
3439
3440  /* -- Comparison ops ---------------------------------------------------- */
3441
3442  /* Remember: all ops branch for a true comparison, fall through otherwise. */
3443
3444  |.macro jmp_comp, lt, ge, le, gt, target
3445  ||switch (op) {
3446  ||case BC_ISLT:
3447  |   lt target
3448  ||break;
3449  ||case BC_ISGE:
3450  |   ge target
3451  ||break;
3452  ||case BC_ISLE:
3453  |   le target
3454  ||break;
3455  ||case BC_ISGT:
3456  |   gt target
3457  ||break;
3458  ||default: break;  /* Shut up GCC. */
3459  ||}
3460  |.endmacro
3461
3462  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3463    |  // RA = src1, RD = src2, JMP with RD = target
3464    |  ins_AD
3465    |.if DUALNUM
3466    |  checkint RA, >7
3467    |  checkint RD, >8
3468    |  mov RB, dword [BASE+RA*8]
3469    |  add PC, 4
3470    |  cmp RB, dword [BASE+RD*8]
3471    |  jmp_comp jge, jl, jg, jle, >9
3472    |6:
3473    |  movzx RD, PC_RD
3474    |  branchPC RD
3475    |9:
3476    |  ins_next
3477    |
3478    |7:  // RA is not an integer.
3479    |  ja ->vmeta_comp
3480    |  // RA is a number.
3481    |  cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3482    |  // RA is a number, RD is an integer.
3483    |  cvtsi2sd xmm0, dword [BASE+RD*8]
3484    |  jmp >2
3485    |
3486    |8:  // RA is an integer, RD is not an integer.
3487    |  ja ->vmeta_comp
3488    |  // RA is an integer, RD is a number.
3489    |  cvtsi2sd xmm1, dword [BASE+RA*8]
3490    |  movsd xmm0, qword [BASE+RD*8]
3491    |  add PC, 4
3492    |  ucomisd xmm0, xmm1
3493    |  jmp_comp jbe, ja, jb, jae, <9
3494    |  jmp <6
3495    |.else
3496    |  checknum RA, ->vmeta_comp
3497    |  checknum RD, ->vmeta_comp
3498    |.endif
3499    |1:
3500    |  movsd xmm0, qword [BASE+RD*8]
3501    |2:
3502    |  add PC, 4
3503    |  ucomisd xmm0, qword [BASE+RA*8]
3504    |3:
3505    |  // Unordered: all of ZF CF PF set, ordered: PF clear.
3506    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3507    |.if DUALNUM
3508    |  jmp_comp jbe, ja, jb, jae, <9
3509    |  jmp <6
3510    |.else
3511    |  jmp_comp jbe, ja, jb, jae, >1
3512    |  movzx RD, PC_RD
3513    |  branchPC RD
3514    |1:
3515    |  ins_next
3516    |.endif
3517    break;
3518
3519  case BC_ISEQV: case BC_ISNEV:
3520    vk = op == BC_ISEQV;
3521    |  ins_AD	// RA = src1, RD = src2, JMP with RD = target
3522    |  mov RB, [BASE+RD*8+4]
3523    |  add PC, 4
3524    |.if DUALNUM
3525    |  cmp RB, LJ_TISNUM; jne >7
3526    |  checkint RA, >8
3527    |  mov RB, dword [BASE+RD*8]
3528    |  cmp RB, dword [BASE+RA*8]
3529    if (vk) {
3530      |  jne >9
3531    } else {
3532      |  je >9
3533    }
3534    |  movzx RD, PC_RD
3535    |  branchPC RD
3536    |9:
3537    |  ins_next
3538    |
3539    |7:  // RD is not an integer.
3540    |  ja >5
3541    |  // RD is a number.
3542    |  cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
3543    |  // RD is a number, RA is an integer.
3544    |  cvtsi2sd xmm0, dword [BASE+RA*8]
3545    |  jmp >2
3546    |
3547    |8:  // RD is an integer, RA is not an integer.
3548    |  ja >5
3549    |  // RD is an integer, RA is a number.
3550    |  cvtsi2sd xmm0, dword [BASE+RD*8]
3551    |  ucomisd xmm0, qword [BASE+RA*8]
3552    |  jmp >4
3553    |
3554    |.else
3555    |  cmp RB, LJ_TISNUM; jae >5
3556    |  checknum RA, >5
3557    |.endif
3558    |1:
3559    |  movsd xmm0, qword [BASE+RA*8]
3560    |2:
3561    |  ucomisd xmm0, qword [BASE+RD*8]
3562    |4:
3563  iseqne_fp:
3564    if (vk) {
3565      |  jp >2				// Unordered means not equal.
3566      |  jne >2
3567    } else {
3568      |  jp >2				// Unordered means not equal.
3569      |  je >1
3570    }
3571  iseqne_end:
3572    if (vk) {
3573      |1:				// EQ: Branch to the target.
3574      |  movzx RD, PC_RD
3575      |  branchPC RD
3576      |2:				// NE: Fallthrough to next instruction.
3577      |.if not FFI
3578      |3:
3579      |.endif
3580    } else {
3581      |.if not FFI
3582      |3:
3583      |.endif
3584      |2:				// NE: Branch to the target.
3585      |  movzx RD, PC_RD
3586      |  branchPC RD
3587      |1:				// EQ: Fallthrough to next instruction.
3588    }
3589    if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
3590		       op == BC_ISEQN || op == BC_ISNEN)) {
3591      |  jmp <9
3592    } else {
3593      |  ins_next
3594    }
3595    |
3596    if (op == BC_ISEQV || op == BC_ISNEV) {
3597      |5:  // Either or both types are not numbers.
3598      |.if FFI
3599      |  cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
3600      |  checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
3601      |.endif
3602      |  checktp RA, RB			// Compare types.
3603      |  jne <2				// Not the same type?
3604      |  cmp RB, LJ_TISPRI
3605      |  jae <1				// Same type and primitive type?
3606      |
3607      |  // Same types and not a primitive type. Compare GCobj or pvalue.
3608      |  mov RA, [BASE+RA*8]
3609      |  mov RD, [BASE+RD*8]
3610      |  cmp RA, RD
3611      |  je <1				// Same GCobjs or pvalues?
3612      |  cmp RB, LJ_TISTABUD
3613      |  ja <2				// Different objects and not table/ud?
3614      |.if X64
3615      |  cmp RB, LJ_TUDATA		// And not 64 bit lightuserdata.
3616      |  jb <2
3617      |.endif
3618      |
3619      |  // Different tables or userdatas. Need to check __eq metamethod.
3620      |  // Field metatable must be at same offset for GCtab and GCudata!
3621      |  mov TAB:RB, TAB:RA->metatable
3622      |  test TAB:RB, TAB:RB
3623      |  jz <2				// No metatable?
3624      |  test byte TAB:RB->nomm, 1<<MM_eq
3625      |  jnz <2				// Or 'no __eq' flag set?
3626      if (vk) {
3627	|  xor RB, RB			// ne = 0
3628      } else {
3629	|  mov RB, 1			// ne = 1
3630      }
3631      |  jmp ->vmeta_equal		// Handle __eq metamethod.
3632    } else {
3633      |.if FFI
3634      |3:
3635      |  cmp RB, LJ_TCDATA
3636      if (LJ_DUALNUM && vk) {
3637	|  jne <9
3638      } else {
3639	|  jne <2
3640      }
3641      |  jmp ->vmeta_equal_cd
3642      |.endif
3643    }
3644    break;
3645  case BC_ISEQS: case BC_ISNES:
3646    vk = op == BC_ISEQS;
3647    |  ins_AND	// RA = src, RD = str const, JMP with RD = target
3648    |  mov RB, [BASE+RA*8+4]
3649    |  add PC, 4
3650    |  cmp RB, LJ_TSTR; jne >3
3651    |  mov RA, [BASE+RA*8]
3652    |  cmp RA, [KBASE+RD*4]
3653  iseqne_test:
3654    if (vk) {
3655      |  jne >2
3656    } else {
3657      |  je >1
3658    }
3659    goto iseqne_end;
3660  case BC_ISEQN: case BC_ISNEN:
3661    vk = op == BC_ISEQN;
3662    |  ins_AD	// RA = src, RD = num const, JMP with RD = target
3663    |  mov RB, [BASE+RA*8+4]
3664    |  add PC, 4
3665    |.if DUALNUM
3666    |  cmp RB, LJ_TISNUM; jne >7
3667    |  cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
3668    |  mov RB, dword [KBASE+RD*8]
3669    |  cmp RB, dword [BASE+RA*8]
3670    if (vk) {
3671      |  jne >9
3672    } else {
3673      |  je >9
3674    }
3675    |  movzx RD, PC_RD
3676    |  branchPC RD
3677    |9:
3678    |  ins_next
3679    |
3680    |7:  // RA is not an integer.
3681    |  ja >3
3682    |  // RA is a number.
3683    |  cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
3684    |  // RA is a number, RD is an integer.
3685    |  cvtsi2sd xmm0, dword [KBASE+RD*8]
3686    |  jmp >2
3687    |
3688    |8:  // RA is an integer, RD is a number.
3689    |  cvtsi2sd xmm0, dword [BASE+RA*8]
3690    |  ucomisd xmm0, qword [KBASE+RD*8]
3691    |  jmp >4
3692    |.else
3693    |  cmp RB, LJ_TISNUM; jae >3
3694    |.endif
3695    |1:
3696    |  movsd xmm0, qword [KBASE+RD*8]
3697    |2:
3698    |  ucomisd xmm0, qword [BASE+RA*8]
3699    |4:
3700    goto iseqne_fp;
3701  case BC_ISEQP: case BC_ISNEP:
3702    vk = op == BC_ISEQP;
3703    |  ins_AND	// RA = src, RD = primitive type (~), JMP with RD = target
3704    |  mov RB, [BASE+RA*8+4]
3705    |  add PC, 4
3706    |  cmp RB, RD
3707    if (!LJ_HASFFI) goto iseqne_test;
3708    if (vk) {
3709      |  jne >3
3710      |  movzx RD, PC_RD
3711      |  branchPC RD
3712      |2:
3713      |  ins_next
3714      |3:
3715      |  cmp RB, LJ_TCDATA; jne <2
3716      |  jmp ->vmeta_equal_cd
3717    } else {
3718      |  je >2
3719      |  cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
3720      |  movzx RD, PC_RD
3721      |  branchPC RD
3722      |2:
3723      |  ins_next
3724    }
3725    break;
3726
3727  /* -- Unary test and copy ops ------------------------------------------- */
3728
3729  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3730    |  ins_AD	// RA = dst or unused, RD = src, JMP with RD = target
3731    |  mov RB, [BASE+RD*8+4]
3732    |  add PC, 4
3733    |  cmp RB, LJ_TISTRUECOND
3734    if (op == BC_IST || op == BC_ISTC) {
3735      |  jae >1
3736    } else {
3737      |  jb >1
3738    }
3739    if (op == BC_ISTC || op == BC_ISFC) {
3740      |  mov [BASE+RA*8+4], RB
3741      |  mov RB, [BASE+RD*8]
3742      |  mov [BASE+RA*8], RB
3743    }
3744    |  movzx RD, PC_RD
3745    |  branchPC RD
3746    |1:					// Fallthrough to the next instruction.
3747    |  ins_next
3748    break;
3749
3750  case BC_ISTYPE:
3751    |  ins_AD	// RA = src, RD = -type
3752    |  add RD, [BASE+RA*8+4]
3753    |  jne ->vmeta_istype
3754    |  ins_next
3755    break;
3756  case BC_ISNUM:
3757    |  ins_AD	// RA = src, RD = -(TISNUM-1)
3758    |  checknum RA, ->vmeta_istype
3759    |  ins_next
3760    break;
3761
3762  /* -- Unary ops --------------------------------------------------------- */
3763
3764  case BC_MOV:
3765    |  ins_AD	// RA = dst, RD = src
3766    |.if X64
3767    |  mov RBa, [BASE+RD*8]
3768    |  mov [BASE+RA*8], RBa
3769    |.else
3770    |  mov RB, [BASE+RD*8+4]
3771    |  mov RD, [BASE+RD*8]
3772    |  mov [BASE+RA*8+4], RB
3773    |  mov [BASE+RA*8], RD
3774    |.endif
3775    |  ins_next_
3776    break;
3777  case BC_NOT:
3778    |  ins_AD	// RA = dst, RD = src
3779    |  xor RB, RB
3780    |  checktp RD, LJ_TISTRUECOND
3781    |  adc RB, LJ_TTRUE
3782    |  mov [BASE+RA*8+4], RB
3783    |  ins_next
3784    break;
3785  case BC_UNM:
3786    |  ins_AD	// RA = dst, RD = src
3787    |.if DUALNUM
3788    |  checkint RD, >5
3789    |  mov RB, [BASE+RD*8]
3790    |  neg RB
3791    |  jo >4
3792    |  mov dword [BASE+RA*8+4], LJ_TISNUM
3793    |  mov dword [BASE+RA*8], RB
3794    |9:
3795    |  ins_next
3796    |4:
3797    |  mov dword [BASE+RA*8+4], 0x41e00000  // 2^31.
3798    |  mov dword [BASE+RA*8], 0
3799    |  jmp <9
3800    |5:
3801    |  ja ->vmeta_unm
3802    |.else
3803    |  checknum RD, ->vmeta_unm
3804    |.endif
3805    |  movsd xmm0, qword [BASE+RD*8]
3806    |  sseconst_sign xmm1, RDa
3807    |  xorps xmm0, xmm1
3808    |  movsd qword [BASE+RA*8], xmm0
3809    |.if DUALNUM
3810    |  jmp <9
3811    |.else
3812    |  ins_next
3813    |.endif
3814    break;
3815  case BC_LEN:
3816    |  ins_AD	// RA = dst, RD = src
3817    |  checkstr RD, >2
3818    |  mov STR:RD, [BASE+RD*8]
3819    |.if DUALNUM
3820    |  mov RD, dword STR:RD->len
3821    |1:
3822    |  mov dword [BASE+RA*8+4], LJ_TISNUM
3823    |  mov dword [BASE+RA*8], RD
3824    |.else
3825    |  xorps xmm0, xmm0
3826    |  cvtsi2sd xmm0, dword STR:RD->len
3827    |1:
3828    |  movsd qword [BASE+RA*8], xmm0
3829    |.endif
3830    |  ins_next
3831    |2:
3832    |  checktab RD, ->vmeta_len
3833    |  mov TAB:FCARG1, [BASE+RD*8]
3834#if LJ_52
3835    |  mov TAB:RB, TAB:FCARG1->metatable
3836    |  cmp TAB:RB, 0
3837    |  jnz >9
3838    |3:
3839#endif
3840    |->BC_LEN_Z:
3841    |  mov RB, BASE			// Save BASE.
3842    |  call extern lj_tab_len@4		// (GCtab *t)
3843    |  // Length of table returned in eax (RD).
3844    |.if DUALNUM
3845    |  // Nothing to do.
3846    |.else
3847    |  cvtsi2sd xmm0, RD
3848    |.endif
3849    |  mov BASE, RB			// Restore BASE.
3850    |  movzx RA, PC_RA
3851    |  jmp <1
3852#if LJ_52
3853    |9:  // Check for __len.
3854    |  test byte TAB:RB->nomm, 1<<MM_len
3855    |  jnz <3
3856    |  jmp ->vmeta_len			// 'no __len' flag NOT set: check.
3857#endif
3858    break;
3859
3860  /* -- Binary ops -------------------------------------------------------- */
3861
3862    |.macro ins_arithpre, sseins, ssereg
3863    |  ins_ABC
3864    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3865    ||switch (vk) {
3866    ||case 0:
3867    |   checknum RB, ->vmeta_arith_vn
3868    |   .if DUALNUM
3869    |     cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
3870    |   .endif
3871    |   movsd xmm0, qword [BASE+RB*8]
3872    |   sseins ssereg, qword [KBASE+RC*8]
3873    ||  break;
3874    ||case 1:
3875    |   checknum RB, ->vmeta_arith_nv
3876    |   .if DUALNUM
3877    |     cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
3878    |   .endif
3879    |   movsd xmm0, qword [KBASE+RC*8]
3880    |   sseins ssereg, qword [BASE+RB*8]
3881    ||  break;
3882    ||default:
3883    |   checknum RB, ->vmeta_arith_vv
3884    |   checknum RC, ->vmeta_arith_vv
3885    |   movsd xmm0, qword [BASE+RB*8]
3886    |   sseins ssereg, qword [BASE+RC*8]
3887    ||  break;
3888    ||}
3889    |.endmacro
3890    |
3891    |.macro ins_arithdn, intins
3892    |  ins_ABC
3893    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3894    ||switch (vk) {
3895    ||case 0:
3896    |   checkint RB, ->vmeta_arith_vn
3897    |   cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
3898    |   mov RB, [BASE+RB*8]
3899    |   intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
3900    ||  break;
3901    ||case 1:
3902    |   checkint RB, ->vmeta_arith_nv
3903    |   cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
3904    |   mov RC, [KBASE+RC*8]
3905    |   intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
3906    ||  break;
3907    ||default:
3908    |   checkint RB, ->vmeta_arith_vv
3909    |   checkint RC, ->vmeta_arith_vv
3910    |   mov RB, [BASE+RB*8]
3911    |   intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
3912    ||  break;
3913    ||}
3914    |  mov dword [BASE+RA*8+4], LJ_TISNUM
3915    ||if (vk == 1) {
3916    |   mov dword [BASE+RA*8], RC
3917    ||} else {
3918    |   mov dword [BASE+RA*8], RB
3919    ||}
3920    |  ins_next
3921    |.endmacro
3922    |
3923    |.macro ins_arithpost
3924    |  movsd qword [BASE+RA*8], xmm0
3925    |.endmacro
3926    |
3927    |.macro ins_arith, sseins
3928    |  ins_arithpre sseins, xmm0
3929    |  ins_arithpost
3930    |  ins_next
3931    |.endmacro
3932    |
3933    |.macro ins_arith, intins, sseins
3934    |.if DUALNUM
3935    |  ins_arithdn intins
3936    |.else
3937    |  ins_arith, sseins
3938    |.endif
3939    |.endmacro
3940
3941    |  // RA = dst, RB = src1 or num const, RC = src2 or num const
3942  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3943    |  ins_arith add, addsd
3944    break;
3945  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3946    |  ins_arith sub, subsd
3947    break;
3948  case BC_MULVN: case BC_MULNV: case BC_MULVV:
3949    |  ins_arith imul, mulsd
3950    break;
3951  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3952    |  ins_arith divsd
3953    break;
3954  case BC_MODVN:
3955    |  ins_arithpre movsd, xmm1
3956    |->BC_MODVN_Z:
3957    |  call ->vm_mod
3958    |  ins_arithpost
3959    |  ins_next
3960    break;
3961  case BC_MODNV: case BC_MODVV:
3962    |  ins_arithpre movsd, xmm1
3963    |  jmp ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
3964    break;
3965  case BC_POW:
3966    |  ins_arithpre movsd, xmm1
3967    |  mov RB, BASE
3968    |.if not X64
3969    |  movsd FPARG1, xmm0
3970    |  movsd FPARG3, xmm1
3971    |.endif
3972    |  call extern pow
3973    |  movzx RA, PC_RA
3974    |  mov BASE, RB
3975    |.if X64
3976    |  ins_arithpost
3977    |.else
3978    |  fstp qword [BASE+RA*8]
3979    |.endif
3980    |  ins_next
3981    break;
3982
3983  case BC_CAT:
3984    |  ins_ABC	// RA = dst, RB = src_start, RC = src_end
3985    |.if X64
3986    |  mov L:CARG1d, SAVE_L
3987    |  mov L:CARG1d->base, BASE
3988    |  lea CARG2d, [BASE+RC*8]
3989    |  mov CARG3d, RC
3990    |  sub CARG3d, RB
3991    |->BC_CAT_Z:
3992    |  mov L:RB, L:CARG1d
3993    |.else
3994    |  lea RA, [BASE+RC*8]
3995    |  sub RC, RB
3996    |  mov ARG2, RA
3997    |  mov ARG3, RC
3998    |->BC_CAT_Z:
3999    |  mov L:RB, SAVE_L
4000    |  mov ARG1, L:RB
4001    |  mov L:RB->base, BASE
4002    |.endif
4003    |  mov SAVE_PC, PC
4004    |  call extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
4005    |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
4006    |  mov BASE, L:RB->base
4007    |  test RC, RC
4008    |  jnz ->vmeta_binop
4009    |  movzx RB, PC_RB			// Copy result to Stk[RA] from Stk[RB].
4010    |  movzx RA, PC_RA
4011    |.if X64
4012    |  mov RCa, [BASE+RB*8]
4013    |  mov [BASE+RA*8], RCa
4014    |.else
4015    |  mov RC, [BASE+RB*8+4]
4016    |  mov RB, [BASE+RB*8]
4017    |  mov [BASE+RA*8+4], RC
4018    |  mov [BASE+RA*8], RB
4019    |.endif
4020    |  ins_next
4021    break;
4022
4023  /* -- Constant ops ------------------------------------------------------ */
4024
4025  case BC_KSTR:
4026    |  ins_AND	// RA = dst, RD = str const (~)
4027    |  mov RD, [KBASE+RD*4]
4028    |  mov dword [BASE+RA*8+4], LJ_TSTR
4029    |  mov [BASE+RA*8], RD
4030    |  ins_next
4031    break;
4032  case BC_KCDATA:
4033    |.if FFI
4034    |  ins_AND	// RA = dst, RD = cdata const (~)
4035    |  mov RD, [KBASE+RD*4]
4036    |  mov dword [BASE+RA*8+4], LJ_TCDATA
4037    |  mov [BASE+RA*8], RD
4038    |  ins_next
4039    |.endif
4040    break;
4041  case BC_KSHORT:
4042    |  ins_AD	// RA = dst, RD = signed int16 literal
4043    |.if DUALNUM
4044    |  movsx RD, RDW
4045    |  mov dword [BASE+RA*8+4], LJ_TISNUM
4046    |  mov dword [BASE+RA*8], RD
4047    |.else
4048    |  movsx RD, RDW			// Sign-extend literal.
4049    |  cvtsi2sd xmm0, RD
4050    |  movsd qword [BASE+RA*8], xmm0
4051    |.endif
4052    |  ins_next
4053    break;
4054  case BC_KNUM:
4055    |  ins_AD	// RA = dst, RD = num const
4056    |  movsd xmm0, qword [KBASE+RD*8]
4057    |  movsd qword [BASE+RA*8], xmm0
4058    |  ins_next
4059    break;
4060  case BC_KPRI:
4061    |  ins_AND	// RA = dst, RD = primitive type (~)
4062    |  mov [BASE+RA*8+4], RD
4063    |  ins_next
4064    break;
4065  case BC_KNIL:
4066    |  ins_AD	// RA = dst_start, RD = dst_end
4067    |  lea RA, [BASE+RA*8+12]
4068    |  lea RD, [BASE+RD*8+4]
4069    |  mov RB, LJ_TNIL
4070    |  mov [RA-8], RB			// Sets minimum 2 slots.
4071    |1:
4072    |  mov [RA], RB
4073    |  add RA, 8
4074    |  cmp RA, RD
4075    |  jbe <1
4076    |  ins_next
4077    break;
4078
4079  /* -- Upvalue and function ops ------------------------------------------ */
4080
4081  case BC_UGET:
4082    |  ins_AD	// RA = dst, RD = upvalue #
4083    |  mov LFUNC:RB, [BASE-8]
4084    |  mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
4085    |  mov RB, UPVAL:RB->v
4086    |.if X64
4087    |  mov RDa, [RB]
4088    |  mov [BASE+RA*8], RDa
4089    |.else
4090    |  mov RD, [RB+4]
4091    |  mov RB, [RB]
4092    |  mov [BASE+RA*8+4], RD
4093    |  mov [BASE+RA*8], RB
4094    |.endif
4095    |  ins_next
4096    break;
4097  case BC_USETV:
4098#define TV2MARKOFS \
4099 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
4100    |  ins_AD	// RA = upvalue #, RD = src
4101    |  mov LFUNC:RB, [BASE-8]
4102    |  mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4103    |  cmp byte UPVAL:RB->closed, 0
4104    |  mov RB, UPVAL:RB->v
4105    |  mov RA, [BASE+RD*8]
4106    |  mov RD, [BASE+RD*8+4]
4107    |  mov [RB], RA
4108    |  mov [RB+4], RD
4109    |  jz >1
4110    |  // Check barrier for closed upvalue.
4111    |  test byte [RB+TV2MARKOFS], LJ_GC_BLACK		// isblack(uv)
4112    |  jnz >2
4113    |1:
4114    |  ins_next
4115    |
4116    |2:  // Upvalue is black. Check if new value is collectable and white.
4117    |  sub RD, LJ_TISGCV
4118    |  cmp RD, LJ_TNUMX - LJ_TISGCV			// tvisgcv(v)
4119    |  jbe <1
4120    |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(v)
4121    |  jz <1
4122    |  // Crossed a write barrier. Move the barrier forward.
4123    |.if X64 and not X64WIN
4124    |  mov FCARG2, RB
4125    |  mov RB, BASE			// Save BASE.
4126    |.else
4127    |  xchg FCARG2, RB			// Save BASE (FCARG2 == BASE).
4128    |.endif
4129    |  lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4130    |  call extern lj_gc_barrieruv@8	// (global_State *g, TValue *tv)
4131    |  mov BASE, RB			// Restore BASE.
4132    |  jmp <1
4133    break;
4134#undef TV2MARKOFS
4135  case BC_USETS:
4136    |  ins_AND	// RA = upvalue #, RD = str const (~)
4137    |  mov LFUNC:RB, [BASE-8]
4138    |  mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4139    |  mov GCOBJ:RA, [KBASE+RD*4]
4140    |  mov RD, UPVAL:RB->v
4141    |  mov [RD], GCOBJ:RA
4142    |  mov dword [RD+4], LJ_TSTR
4143    |  test byte UPVAL:RB->marked, LJ_GC_BLACK		// isblack(uv)
4144    |  jnz >2
4145    |1:
4146    |  ins_next
4147    |
4148    |2:  // Check if string is white and ensure upvalue is closed.
4149    |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(str)
4150    |  jz <1
4151    |  cmp byte UPVAL:RB->closed, 0
4152    |  jz <1
4153    |  // Crossed a write barrier. Move the barrier forward.
4154    |  mov RB, BASE			// Save BASE (FCARG2 == BASE).
4155    |  mov FCARG2, RD
4156    |  lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4157    |  call extern lj_gc_barrieruv@8	// (global_State *g, TValue *tv)
4158    |  mov BASE, RB			// Restore BASE.
4159    |  jmp <1
4160    break;
4161  case BC_USETN:
4162    |  ins_AD	// RA = upvalue #, RD = num const
4163    |  mov LFUNC:RB, [BASE-8]
4164    |  movsd xmm0, qword [KBASE+RD*8]
4165    |  mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4166    |  mov RA, UPVAL:RB->v
4167    |  movsd qword [RA], xmm0
4168    |  ins_next
4169    break;
4170  case BC_USETP:
4171    |  ins_AND	// RA = upvalue #, RD = primitive type (~)
4172    |  mov LFUNC:RB, [BASE-8]
4173    |  mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4174    |  mov RA, UPVAL:RB->v
4175    |  mov [RA+4], RD
4176    |  ins_next
4177    break;
4178  case BC_UCLO:
4179    |  ins_AD	// RA = level, RD = target
4180    |  branchPC RD			// Do this first to free RD.
4181    |  mov L:RB, SAVE_L
4182    |  cmp dword L:RB->openupval, 0
4183    |  je >1
4184    |  mov L:RB->base, BASE
4185    |  lea FCARG2, [BASE+RA*8]		// Caveat: FCARG2 == BASE
4186    |  mov L:FCARG1, L:RB		// Caveat: FCARG1 == RA
4187    |  call extern lj_func_closeuv@8	// (lua_State *L, TValue *level)
4188    |  mov BASE, L:RB->base
4189    |1:
4190    |  ins_next
4191    break;
4192
4193  case BC_FNEW:
4194    |  ins_AND	// RA = dst, RD = proto const (~) (holding function prototype)
4195    |.if X64
4196    |  mov L:RB, SAVE_L
4197    |  mov L:RB->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
4198    |  mov CARG3d, [BASE-8]
4199    |  mov CARG2d, [KBASE+RD*4]		// Fetch GCproto *.
4200    |  mov CARG1d, L:RB
4201    |.else
4202    |  mov LFUNC:RA, [BASE-8]
4203    |  mov PROTO:RD, [KBASE+RD*4]	// Fetch GCproto *.
4204    |  mov L:RB, SAVE_L
4205    |  mov ARG3, LFUNC:RA
4206    |  mov ARG2, PROTO:RD
4207    |  mov ARG1, L:RB
4208    |  mov L:RB->base, BASE
4209    |.endif
4210    |  mov SAVE_PC, PC
4211    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
4212    |  call extern lj_func_newL_gc
4213    |  // GCfuncL * returned in eax (RC).
4214    |  mov BASE, L:RB->base
4215    |  movzx RA, PC_RA
4216    |  mov [BASE+RA*8], LFUNC:RC
4217    |  mov dword [BASE+RA*8+4], LJ_TFUNC
4218    |  ins_next
4219    break;
4220
4221  /* -- Table ops --------------------------------------------------------- */
4222
4223  case BC_TNEW:
4224    |  ins_AD	// RA = dst, RD = hbits|asize
4225    |  mov L:RB, SAVE_L
4226    |  mov L:RB->base, BASE
4227    |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4228    |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4229    |  mov SAVE_PC, PC
4230    |  jae >5
4231    |1:
4232    |.if X64
4233    |  mov CARG3d, RD
4234    |  and RD, 0x7ff
4235    |  shr CARG3d, 11
4236    |.else
4237    |  mov RA, RD
4238    |  and RD, 0x7ff
4239    |  shr RA, 11
4240    |  mov ARG3, RA
4241    |.endif
4242    |  cmp RD, 0x7ff
4243    |  je >3
4244    |2:
4245    |.if X64
4246    |  mov L:CARG1d, L:RB
4247    |  mov CARG2d, RD
4248    |.else
4249    |  mov ARG1, L:RB
4250    |  mov ARG2, RD
4251    |.endif
4252    |  call extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
4253    |  // Table * returned in eax (RC).
4254    |  mov BASE, L:RB->base
4255    |  movzx RA, PC_RA
4256    |  mov [BASE+RA*8], TAB:RC
4257    |  mov dword [BASE+RA*8+4], LJ_TTAB
4258    |  ins_next
4259    |3:  // Turn 0x7ff into 0x801.
4260    |  mov RD, 0x801
4261    |  jmp <2
4262    |5:
4263    |  mov L:FCARG1, L:RB
4264    |  call extern lj_gc_step_fixtop@4	// (lua_State *L)
4265    |  movzx RD, PC_RD
4266    |  jmp <1
4267    break;
4268  case BC_TDUP:
4269    |  ins_AND	// RA = dst, RD = table const (~) (holding template table)
4270    |  mov L:RB, SAVE_L
4271    |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4272    |  mov SAVE_PC, PC
4273    |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4274    |  mov L:RB->base, BASE
4275    |  jae >3
4276    |2:
4277    |  mov TAB:FCARG2, [KBASE+RD*4]	// Caveat: FCARG2 == BASE
4278    |  mov L:FCARG1, L:RB		// Caveat: FCARG1 == RA
4279    |  call extern lj_tab_dup@8		// (lua_State *L, Table *kt)
4280    |  // Table * returned in eax (RC).
4281    |  mov BASE, L:RB->base
4282    |  movzx RA, PC_RA
4283    |  mov [BASE+RA*8], TAB:RC
4284    |  mov dword [BASE+RA*8+4], LJ_TTAB
4285    |  ins_next
4286    |3:
4287    |  mov L:FCARG1, L:RB
4288    |  call extern lj_gc_step_fixtop@4	// (lua_State *L)
4289    |  movzx RD, PC_RD			// Need to reload RD.
4290    |  not RDa
4291    |  jmp <2
4292    break;
4293
4294  case BC_GGET:
4295    |  ins_AND	// RA = dst, RD = str const (~)
4296    |  mov LFUNC:RB, [BASE-8]
4297    |  mov TAB:RB, LFUNC:RB->env
4298    |  mov STR:RC, [KBASE+RD*4]
4299    |  jmp ->BC_TGETS_Z
4300    break;
4301  case BC_GSET:
4302    |  ins_AND	// RA = src, RD = str const (~)
4303    |  mov LFUNC:RB, [BASE-8]
4304    |  mov TAB:RB, LFUNC:RB->env
4305    |  mov STR:RC, [KBASE+RD*4]
4306    |  jmp ->BC_TSETS_Z
4307    break;
4308
4309  case BC_TGETV:
4310    |  ins_ABC	// RA = dst, RB = table, RC = key
4311    |  checktab RB, ->vmeta_tgetv
4312    |  mov TAB:RB, [BASE+RB*8]
4313    |
4314    |  // Integer key?
4315    |.if DUALNUM
4316    |  checkint RC, >5
4317    |  mov RC, dword [BASE+RC*8]
4318    |.else
4319    |  // Convert number to int and back and compare.
4320    |  checknum RC, >5
4321    |  movsd xmm0, qword [BASE+RC*8]
4322    |  cvttsd2si RC, xmm0
4323    |  cvtsi2sd xmm1, RC
4324    |  ucomisd xmm0, xmm1
4325    |  jne ->vmeta_tgetv		// Generic numeric key? Use fallback.
4326    |.endif
4327    |  cmp RC, TAB:RB->asize	// Takes care of unordered, too.
4328    |  jae ->vmeta_tgetv		// Not in array part? Use fallback.
4329    |  shl RC, 3
4330    |  add RC, TAB:RB->array
4331    |  cmp dword [RC+4], LJ_TNIL	// Avoid overwriting RB in fastpath.
4332    |  je >2
4333    |  // Get array slot.
4334    |.if X64
4335    |  mov RBa, [RC]
4336    |  mov [BASE+RA*8], RBa
4337    |.else
4338    |  mov RB, [RC]
4339    |  mov RC, [RC+4]
4340    |  mov [BASE+RA*8], RB
4341    |  mov [BASE+RA*8+4], RC
4342    |.endif
4343    |1:
4344    |  ins_next
4345    |
4346    |2:  // Check for __index if table value is nil.
4347    |  cmp dword TAB:RB->metatable, 0	// Shouldn't overwrite RA for fastpath.
4348    |  jz >3
4349    |  mov TAB:RA, TAB:RB->metatable
4350    |  test byte TAB:RA->nomm, 1<<MM_index
4351    |  jz ->vmeta_tgetv			// 'no __index' flag NOT set: check.
4352    |  movzx RA, PC_RA			// Restore RA.
4353    |3:
4354    |  mov dword [BASE+RA*8+4], LJ_TNIL
4355    |  jmp <1
4356    |
4357    |5:  // String key?
4358    |  checkstr RC, ->vmeta_tgetv
4359    |  mov STR:RC, [BASE+RC*8]
4360    |  jmp ->BC_TGETS_Z
4361    break;
4362  case BC_TGETS:
4363    |  ins_ABC	// RA = dst, RB = table, RC = str const (~)
4364    |  not RCa
4365    |  mov STR:RC, [KBASE+RC*4]
4366    |  checktab RB, ->vmeta_tgets
4367    |  mov TAB:RB, [BASE+RB*8]
4368    |->BC_TGETS_Z:	// RB = GCtab *, RC = GCstr *, refetches PC_RA.
4369    |  mov RA, TAB:RB->hmask
4370    |  and RA, STR:RC->sid
4371    |  imul RA, #NODE
4372    |  add NODE:RA, TAB:RB->node
4373    |1:
4374    |  cmp dword NODE:RA->key.it, LJ_TSTR
4375    |  jne >4
4376    |  cmp dword NODE:RA->key.gcr, STR:RC
4377    |  jne >4
4378    |  // Ok, key found. Assumes: offsetof(Node, val) == 0
4379    |  cmp dword [RA+4], LJ_TNIL	// Avoid overwriting RB in fastpath.
4380    |  je >5				// Key found, but nil value?
4381    |  movzx RC, PC_RA
4382    |  // Get node value.
4383    |.if X64
4384    |  mov RBa, [RA]
4385    |  mov [BASE+RC*8], RBa
4386    |.else
4387    |  mov RB, [RA]
4388    |  mov RA, [RA+4]
4389    |  mov [BASE+RC*8], RB
4390    |  mov [BASE+RC*8+4], RA
4391    |.endif
4392    |2:
4393    |  ins_next
4394    |
4395    |3:
4396    |  movzx RC, PC_RA
4397    |  mov dword [BASE+RC*8+4], LJ_TNIL
4398    |  jmp <2
4399    |
4400    |4:  // Follow hash chain.
4401    |  mov NODE:RA, NODE:RA->next
4402    |  test NODE:RA, NODE:RA
4403    |  jnz <1
4404    |  // End of hash chain: key not found, nil result.
4405    |
4406    |5:  // Check for __index if table value is nil.
4407    |  mov TAB:RA, TAB:RB->metatable
4408    |  test TAB:RA, TAB:RA
4409    |  jz <3				// No metatable: done.
4410    |  test byte TAB:RA->nomm, 1<<MM_index
4411    |  jnz <3				// 'no __index' flag set: done.
4412    |  jmp ->vmeta_tgets		// Caveat: preserve STR:RC.
4413    break;
4414  case BC_TGETB:
4415    |  ins_ABC	// RA = dst, RB = table, RC = byte literal
4416    |  checktab RB, ->vmeta_tgetb
4417    |  mov TAB:RB, [BASE+RB*8]
4418    |  cmp RC, TAB:RB->asize
4419    |  jae ->vmeta_tgetb
4420    |  shl RC, 3
4421    |  add RC, TAB:RB->array
4422    |  cmp dword [RC+4], LJ_TNIL	// Avoid overwriting RB in fastpath.
4423    |  je >2
4424    |  // Get array slot.
4425    |.if X64
4426    |  mov RBa, [RC]
4427    |  mov [BASE+RA*8], RBa
4428    |.else
4429    |  mov RB, [RC]
4430    |  mov RC, [RC+4]
4431    |  mov [BASE+RA*8], RB
4432    |  mov [BASE+RA*8+4], RC
4433    |.endif
4434    |1:
4435    |  ins_next
4436    |
4437    |2:  // Check for __index if table value is nil.
4438    |  cmp dword TAB:RB->metatable, 0	// Shouldn't overwrite RA for fastpath.
4439    |  jz >3
4440    |  mov TAB:RA, TAB:RB->metatable
4441    |  test byte TAB:RA->nomm, 1<<MM_index
4442    |  jz ->vmeta_tgetb			// 'no __index' flag NOT set: check.
4443    |  movzx RA, PC_RA			// Restore RA.
4444    |3:
4445    |  mov dword [BASE+RA*8+4], LJ_TNIL
4446    |  jmp <1
4447    break;
4448  case BC_TGETR:
4449    |  ins_ABC	// RA = dst, RB = table, RC = key
4450    |  mov TAB:RB, [BASE+RB*8]
4451    |.if DUALNUM
4452    |  mov RC, dword [BASE+RC*8]
4453    |.else
4454    |  cvttsd2si RC, qword [BASE+RC*8]
4455    |.endif
4456    |  cmp RC, TAB:RB->asize
4457    |  jae ->vmeta_tgetr		// Not in array part? Use fallback.
4458    |  shl RC, 3
4459    |  add RC, TAB:RB->array
4460    |  // Get array slot.
4461    |->BC_TGETR_Z:
4462    |.if X64
4463    |  mov RBa, [RC]
4464    |  mov [BASE+RA*8], RBa
4465    |.else
4466    |  mov RB, [RC]
4467    |  mov RC, [RC+4]
4468    |  mov [BASE+RA*8], RB
4469    |  mov [BASE+RA*8+4], RC
4470    |.endif
4471    |->BC_TGETR2_Z:
4472    |  ins_next
4473    break;
4474
4475  case BC_TSETV:
4476    |  ins_ABC	// RA = src, RB = table, RC = key
4477    |  checktab RB, ->vmeta_tsetv
4478    |  mov TAB:RB, [BASE+RB*8]
4479    |
4480    |  // Integer key?
4481    |.if DUALNUM
4482    |  checkint RC, >5
4483    |  mov RC, dword [BASE+RC*8]
4484    |.else
4485    |  // Convert number to int and back and compare.
4486    |  checknum RC, >5
4487    |  movsd xmm0, qword [BASE+RC*8]
4488    |  cvttsd2si RC, xmm0
4489    |  cvtsi2sd xmm1, RC
4490    |  ucomisd xmm0, xmm1
4491    |  jne ->vmeta_tsetv		// Generic numeric key? Use fallback.
4492    |.endif
4493    |  cmp RC, TAB:RB->asize		// Takes care of unordered, too.
4494    |  jae ->vmeta_tsetv
4495    |  shl RC, 3
4496    |  add RC, TAB:RB->array
4497    |  cmp dword [RC+4], LJ_TNIL
4498    |  je >3				// Previous value is nil?
4499    |1:
4500    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
4501    |  jnz >7
4502    |2:  // Set array slot.
4503    |.if X64
4504    |  mov RBa, [BASE+RA*8]
4505    |  mov [RC], RBa
4506    |.else
4507    |  mov RB, [BASE+RA*8+4]
4508    |  mov RA, [BASE+RA*8]
4509    |  mov [RC+4], RB
4510    |  mov [RC], RA
4511    |.endif
4512    |  ins_next
4513    |
4514    |3:  // Check for __newindex if previous value is nil.
4515    |  cmp dword TAB:RB->metatable, 0	// Shouldn't overwrite RA for fastpath.
4516    |  jz <1
4517    |  mov TAB:RA, TAB:RB->metatable
4518    |  test byte TAB:RA->nomm, 1<<MM_newindex
4519    |  jz ->vmeta_tsetv			// 'no __newindex' flag NOT set: check.
4520    |  movzx RA, PC_RA			// Restore RA.
4521    |  jmp <1
4522    |
4523    |5:  // String key?
4524    |  checkstr RC, ->vmeta_tsetv
4525    |  mov STR:RC, [BASE+RC*8]
4526    |  jmp ->BC_TSETS_Z
4527    |
4528    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4529    |  barrierback TAB:RB, RA
4530    |  movzx RA, PC_RA			// Restore RA.
4531    |  jmp <2
4532    break;
4533  case BC_TSETS:
4534    |  ins_ABC	// RA = src, RB = table, RC = str const (~)
4535    |  not RCa
4536    |  mov STR:RC, [KBASE+RC*4]
4537    |  checktab RB, ->vmeta_tsets
4538    |  mov TAB:RB, [BASE+RB*8]
4539    |->BC_TSETS_Z:	// RB = GCtab *, RC = GCstr *, refetches PC_RA.
4540    |  mov RA, TAB:RB->hmask
4541    |  and RA, STR:RC->sid
4542    |  imul RA, #NODE
4543    |  mov byte TAB:RB->nomm, 0		// Clear metamethod cache.
4544    |  add NODE:RA, TAB:RB->node
4545    |1:
4546    |  cmp dword NODE:RA->key.it, LJ_TSTR
4547    |  jne >5
4548    |  cmp dword NODE:RA->key.gcr, STR:RC
4549    |  jne >5
4550    |  // Ok, key found. Assumes: offsetof(Node, val) == 0
4551    |  cmp dword [RA+4], LJ_TNIL
4552    |  je >4				// Previous value is nil?
4553    |2:
4554    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
4555    |  jnz >7
4556    |3:  // Set node value.
4557    |  movzx RC, PC_RA
4558    |.if X64
4559    |  mov RBa, [BASE+RC*8]
4560    |  mov [RA], RBa
4561    |.else
4562    |  mov RB, [BASE+RC*8+4]
4563    |  mov RC, [BASE+RC*8]
4564    |  mov [RA+4], RB
4565    |  mov [RA], RC
4566    |.endif
4567    |  ins_next
4568    |
4569    |4:  // Check for __newindex if previous value is nil.
4570    |  cmp dword TAB:RB->metatable, 0	// Shouldn't overwrite RA for fastpath.
4571    |  jz <2
4572    |  mov TMP1, RA			// Save RA.
4573    |  mov TAB:RA, TAB:RB->metatable
4574    |  test byte TAB:RA->nomm, 1<<MM_newindex
4575    |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
4576    |  mov RA, TMP1			// Restore RA.
4577    |  jmp <2
4578    |
4579    |5:  // Follow hash chain.
4580    |  mov NODE:RA, NODE:RA->next
4581    |  test NODE:RA, NODE:RA
4582    |  jnz <1
4583    |  // End of hash chain: key not found, add a new one.
4584    |
4585    |  // But check for __newindex first.
4586    |  mov TAB:RA, TAB:RB->metatable
4587    |  test TAB:RA, TAB:RA
4588    |  jz >6				// No metatable: continue.
4589    |  test byte TAB:RA->nomm, 1<<MM_newindex
4590    |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
4591    |6:
4592    |  mov TMP1, STR:RC
4593    |  mov TMP2, LJ_TSTR
4594    |  mov TMP3, TAB:RB			// Save TAB:RB for us.
4595    |.if X64
4596    |  mov L:CARG1d, SAVE_L
4597    |  mov L:CARG1d->base, BASE
4598    |  lea CARG3, TMP1
4599    |  mov CARG2d, TAB:RB
4600    |  mov L:RB, L:CARG1d
4601    |.else
4602    |  lea RC, TMP1			// Store temp. TValue in TMP1/TMP2.
4603    |  mov ARG2, TAB:RB
4604    |  mov L:RB, SAVE_L
4605    |  mov ARG3, RC
4606    |  mov ARG1, L:RB
4607    |  mov L:RB->base, BASE
4608    |.endif
4609    |  mov SAVE_PC, PC
4610    |  call extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k)
4611    |  // Handles write barrier for the new key. TValue * returned in eax (RC).
4612    |  mov BASE, L:RB->base
4613    |  mov TAB:RB, TMP3			// Need TAB:RB for barrier.
4614    |  mov RA, eax
4615    |  jmp <2				// Must check write barrier for value.
4616    |
4617    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4618    |  barrierback TAB:RB, RC		// Destroys STR:RC.
4619    |  jmp <3
4620    break;
4621  case BC_TSETB:
4622    |  ins_ABC	// RA = src, RB = table, RC = byte literal
4623    |  checktab RB, ->vmeta_tsetb
4624    |  mov TAB:RB, [BASE+RB*8]
4625    |  cmp RC, TAB:RB->asize
4626    |  jae ->vmeta_tsetb
4627    |  shl RC, 3
4628    |  add RC, TAB:RB->array
4629    |  cmp dword [RC+4], LJ_TNIL
4630    |  je >3				// Previous value is nil?
4631    |1:
4632    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
4633    |  jnz >7
4634    |2:	 // Set array slot.
4635    |.if X64
4636    |  mov RAa, [BASE+RA*8]
4637    |  mov [RC], RAa
4638    |.else
4639    |  mov RB, [BASE+RA*8+4]
4640    |  mov RA, [BASE+RA*8]
4641    |  mov [RC+4], RB
4642    |  mov [RC], RA
4643    |.endif
4644    |  ins_next
4645    |
4646    |3:  // Check for __newindex if previous value is nil.
4647    |  cmp dword TAB:RB->metatable, 0	// Shouldn't overwrite RA for fastpath.
4648    |  jz <1
4649    |  mov TAB:RA, TAB:RB->metatable
4650    |  test byte TAB:RA->nomm, 1<<MM_newindex
4651    |  jz ->vmeta_tsetb			// 'no __newindex' flag NOT set: check.
4652    |  movzx RA, PC_RA			// Restore RA.
4653    |  jmp <1
4654    |
4655    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4656    |  barrierback TAB:RB, RA
4657    |  movzx RA, PC_RA			// Restore RA.
4658    |  jmp <2
4659    break;
4660  case BC_TSETR:
4661    |  ins_ABC	// RA = src, RB = table, RC = key
4662    |  mov TAB:RB, [BASE+RB*8]
4663    |.if DUALNUM
4664    |  mov RC, dword [BASE+RC*8]
4665    |.else
4666    |  cvttsd2si RC, qword [BASE+RC*8]
4667    |.endif
4668    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
4669    |  jnz >7
4670    |2:
4671    |  cmp RC, TAB:RB->asize
4672    |  jae ->vmeta_tsetr
4673    |  shl RC, 3
4674    |  add RC, TAB:RB->array
4675    |  // Set array slot.
4676    |->BC_TSETR_Z:
4677    |.if X64
4678    |  mov RBa, [BASE+RA*8]
4679    |  mov [RC], RBa
4680    |.else
4681    |  mov RB, [BASE+RA*8+4]
4682    |  mov RA, [BASE+RA*8]
4683    |  mov [RC+4], RB
4684    |  mov [RC], RA
4685    |.endif
4686    |  ins_next
4687    |
4688    |7:  // Possible table write barrier for the value. Skip valiswhite check.
4689    |  barrierback TAB:RB, RA
4690    |  movzx RA, PC_RA			// Restore RA.
4691    |  jmp <2
4692    break;
4693
4694  case BC_TSETM:
4695    |  ins_AD	// RA = base (table at base-1), RD = num const (start index)
4696    |  mov TMP1, KBASE			// Need one more free register.
4697    |  mov KBASE, dword [KBASE+RD*8]	// Integer constant is in lo-word.
4698    |1:
4699    |  lea RA, [BASE+RA*8]
4700    |  mov TAB:RB, [RA-8]		// Guaranteed to be a table.
4701    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
4702    |  jnz >7
4703    |2:
4704    |  mov RD, MULTRES
4705    |  sub RD, 1
4706    |  jz >4				// Nothing to copy?
4707    |  add RD, KBASE			// Compute needed size.
4708    |  cmp RD, TAB:RB->asize
4709    |  ja >5				// Doesn't fit into array part?
4710    |  sub RD, KBASE
4711    |  shl KBASE, 3
4712    |  add KBASE, TAB:RB->array
4713    |3:  // Copy result slots to table.
4714    |.if X64
4715    |  mov RBa, [RA]
4716    |  add RA, 8
4717    |  mov [KBASE], RBa
4718    |.else
4719    |  mov RB, [RA]
4720    |  mov [KBASE], RB
4721    |  mov RB, [RA+4]
4722    |  add RA, 8
4723    |  mov [KBASE+4], RB
4724    |.endif
4725    |  add KBASE, 8
4726    |  sub RD, 1
4727    |  jnz <3
4728    |4:
4729    |  mov KBASE, TMP1
4730    |  ins_next
4731    |
4732    |5:  // Need to resize array part.
4733    |.if X64
4734    |  mov L:CARG1d, SAVE_L
4735    |  mov L:CARG1d->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
4736    |  mov CARG2d, TAB:RB
4737    |  mov CARG3d, RD
4738    |  mov L:RB, L:CARG1d
4739    |.else
4740    |  mov ARG2, TAB:RB
4741    |  mov L:RB, SAVE_L
4742    |  mov L:RB->base, BASE
4743    |  mov ARG3, RD
4744    |  mov ARG1, L:RB
4745    |.endif
4746    |  mov SAVE_PC, PC
4747    |  call extern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
4748    |  mov BASE, L:RB->base
4749    |  movzx RA, PC_RA			// Restore RA.
4750    |  jmp <1				// Retry.
4751    |
4752    |7:  // Possible table write barrier for any value. Skip valiswhite check.
4753    |  barrierback TAB:RB, RD
4754    |  jmp <2
4755    break;
4756
4757  /* -- Calls and vararg handling ----------------------------------------- */
4758
4759  case BC_CALL: case BC_CALLM:
4760    |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4761    if (op == BC_CALLM) {
4762      |  add NARGS:RD, MULTRES
4763    }
4764    |  cmp dword [BASE+RA*8+4], LJ_TFUNC
4765    |  mov LFUNC:RB, [BASE+RA*8]
4766    |  jne ->vmeta_call_ra
4767    |  lea BASE, [BASE+RA*8+8]
4768    |  ins_call
4769    break;
4770
4771  case BC_CALLMT:
4772    |  ins_AD	// RA = base, RD = extra_nargs
4773    |  add NARGS:RD, MULTRES
4774    |  // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
4775    break;
4776  case BC_CALLT:
4777    |  ins_AD	// RA = base, RD = nargs+1
4778    |  lea RA, [BASE+RA*8+8]
4779    |  mov KBASE, BASE			// Use KBASE for move + vmeta_call hint.
4780    |  mov LFUNC:RB, [RA-8]
4781    |  cmp dword [RA-4], LJ_TFUNC
4782    |  jne ->vmeta_call
4783    |->BC_CALLT_Z:
4784    |  mov PC, [BASE-4]
4785    |  test PC, FRAME_TYPE
4786    |  jnz >7
4787    |1:
4788    |  mov [BASE-8], LFUNC:RB		// Copy function down, reloaded below.
4789    |  mov MULTRES, NARGS:RD
4790    |  sub NARGS:RD, 1
4791    |  jz >3
4792    |2:  // Move args down.
4793    |.if X64
4794    |  mov RBa, [RA]
4795    |  add RA, 8
4796    |  mov [KBASE], RBa
4797    |.else
4798    |  mov RB, [RA]
4799    |  mov [KBASE], RB
4800    |  mov RB, [RA+4]
4801    |  add RA, 8
4802    |  mov [KBASE+4], RB
4803    |.endif
4804    |  add KBASE, 8
4805    |  sub NARGS:RD, 1
4806    |  jnz <2
4807    |
4808    |  mov LFUNC:RB, [BASE-8]
4809    |3:
4810    |  mov NARGS:RD, MULTRES
4811    |  cmp byte LFUNC:RB->ffid, 1	// (> FF_C) Calling a fast function?
4812    |  ja >5
4813    |4:
4814    |  ins_callt
4815    |
4816    |5:  // Tailcall to a fast function.
4817    |  test PC, FRAME_TYPE		// Lua frame below?
4818    |  jnz <4
4819    |  movzx RA, PC_RA
4820    |  not RAa
4821    |  mov LFUNC:KBASE, [BASE+RA*8-8]	// Need to prepare KBASE.
4822    |  mov KBASE, LFUNC:KBASE->pc
4823    |  mov KBASE, [KBASE+PC2PROTO(k)]
4824    |  jmp <4
4825    |
4826    |7:  // Tailcall from a vararg function.
4827    |  sub PC, FRAME_VARG
4828    |  test PC, FRAME_TYPEP
4829    |  jnz >8				// Vararg frame below?
4830    |  sub BASE, PC			// Need to relocate BASE/KBASE down.
4831    |  mov KBASE, BASE
4832    |  mov PC, [BASE-4]
4833    |  jmp <1
4834    |8:
4835    |  add PC, FRAME_VARG
4836    |  jmp <1
4837    break;
4838
4839  case BC_ITERC:
4840    |  ins_A	// RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4841    |  lea RA, [BASE+RA*8+8]		// fb = base+1
4842    |.if X64
4843    |  mov RBa, [RA-24]			// Copy state. fb[0] = fb[-3].
4844    |  mov RCa, [RA-16]			// Copy control var. fb[1] = fb[-2].
4845    |  mov [RA], RBa
4846    |  mov [RA+8], RCa
4847    |.else
4848    |  mov RB, [RA-24]			// Copy state. fb[0] = fb[-3].
4849    |  mov RC, [RA-20]
4850    |  mov [RA], RB
4851    |  mov [RA+4], RC
4852    |  mov RB, [RA-16]			// Copy control var. fb[1] = fb[-2].
4853    |  mov RC, [RA-12]
4854    |  mov [RA+8], RB
4855    |  mov [RA+12], RC
4856    |.endif
4857    |  mov LFUNC:RB, [RA-32]		// Copy callable. fb[-1] = fb[-4]
4858    |  mov RC, [RA-28]
4859    |  mov [RA-8], LFUNC:RB
4860    |  mov [RA-4], RC
4861    |  cmp RC, LJ_TFUNC			// Handle like a regular 2-arg call.
4862    |  mov NARGS:RD, 2+1
4863    |  jne ->vmeta_call
4864    |  mov BASE, RA
4865    |  ins_call
4866    break;
4867
4868  case BC_ITERN:
4869    |.if JIT
4870    |  hotloop RB
4871    |.endif
4872    |->vm_IITERN:
4873    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4874    |  mov TMP1, KBASE			// Need two more free registers.
4875    |  mov TMP2, DISPATCH
4876    |  mov TAB:RB, [BASE+RA*8-16]
4877    |  mov RC, [BASE+RA*8-8]		// Get index from control var.
4878    |  mov DISPATCH, TAB:RB->asize
4879    |  add PC, 4
4880    |  mov KBASE, TAB:RB->array
4881    |1:  // Traverse array part.
4882    |  cmp RC, DISPATCH; jae >5		// Index points after array part?
4883    |  cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
4884    |.if DUALNUM
4885    |  mov dword [BASE+RA*8+4], LJ_TISNUM
4886    |  mov dword [BASE+RA*8], RC
4887    |.else
4888    |  cvtsi2sd xmm0, RC
4889    |.endif
4890    |  // Copy array slot to returned value.
4891    |.if X64
4892    |  mov RBa, [KBASE+RC*8]
4893    |  mov [BASE+RA*8+8], RBa
4894    |.else
4895    |  mov RB, [KBASE+RC*8+4]
4896    |  mov [BASE+RA*8+12], RB
4897    |  mov RB, [KBASE+RC*8]
4898    |  mov [BASE+RA*8+8], RB
4899    |.endif
4900    |  add RC, 1
4901    |  // Return array index as a numeric key.
4902    |.if DUALNUM
4903    |  // See above.
4904    |.else
4905    |  movsd qword [BASE+RA*8], xmm0
4906    |.endif
4907    |  mov [BASE+RA*8-8], RC		// Update control var.
4908    |2:
4909    |  movzx RD, PC_RD			// Get target from ITERL.
4910    |  branchPC RD
4911    |3:
4912    |  mov DISPATCH, TMP2
4913    |  mov KBASE, TMP1
4914    |  ins_next
4915    |
4916    |4:  // Skip holes in array part.
4917    |  add RC, 1
4918    |  jmp <1
4919    |
4920    |5:  // Traverse hash part.
4921    |  sub RC, DISPATCH
4922    |6:
4923    |  cmp RC, TAB:RB->hmask; ja <3	// End of iteration? Branch to ITERL+1.
4924    |  imul KBASE, RC, #NODE
4925    |  add NODE:KBASE, TAB:RB->node
4926    |  cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
4927    |  lea DISPATCH, [RC+DISPATCH+1]
4928    |  // Copy key and value from hash slot.
4929    |.if X64
4930    |  mov RBa, NODE:KBASE->key
4931    |  mov RCa, NODE:KBASE->val
4932    |  mov [BASE+RA*8], RBa
4933    |  mov [BASE+RA*8+8], RCa
4934    |.else
4935    |  mov RB, NODE:KBASE->key.gcr
4936    |  mov RC, NODE:KBASE->key.it
4937    |  mov [BASE+RA*8], RB
4938    |  mov [BASE+RA*8+4], RC
4939    |  mov RB, NODE:KBASE->val.gcr
4940    |  mov RC, NODE:KBASE->val.it
4941    |  mov [BASE+RA*8+8], RB
4942    |  mov [BASE+RA*8+12], RC
4943    |.endif
4944    |  mov [BASE+RA*8-8], DISPATCH
4945    |  jmp <2
4946    |
4947    |7:  // Skip holes in hash part.
4948    |  add RC, 1
4949    |  jmp <6
4950    break;
4951
4952  case BC_ISNEXT:
4953    |  ins_AD	// RA = base, RD = target (points to ITERN)
4954    |  cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
4955    |  mov CFUNC:RB, [BASE+RA*8-24]
4956    |  cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
4957    |  cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
4958    |  cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4959    |  branchPC RD
4960    |  mov dword [BASE+RA*8-8], 0	// Initialize control var.
4961    |  mov dword [BASE+RA*8-4], LJ_KEYINDEX
4962    |1:
4963    |  ins_next
4964    |5:  // Despecialize bytecode if any of the checks fail.
4965    |  mov PC_OP, BC_JMP
4966    |  branchPC RD
4967    |.if JIT
4968    |  cmp byte [PC], BC_ITERN
4969    |  jne >6
4970    |.endif
4971    |  mov byte [PC], BC_ITERC
4972    |  jmp <1
4973    |.if JIT
4974    |6:  // Unpatch JLOOP.
4975    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
4976    |  movzx RC, word [PC+2]
4977    |  mov TRACE:RA, [RA+RC*4]
4978    |  mov eax, TRACE:RA->startins
4979    |  mov al, BC_ITERC
4980    |  mov dword [PC], eax
4981    |  jmp <1
4982    |.endif
4983    break;
4984
4985  case BC_VARG:
4986    |  ins_ABC	// RA = base, RB = nresults+1, RC = numparams
4987    |  mov TMP1, KBASE			// Need one more free register.
4988    |  lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
4989    |  lea RA, [BASE+RA*8]
4990    |  sub KBASE, [BASE-4]
4991    |  // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
4992    |  test RB, RB
4993    |  jz >5				// Copy all varargs?
4994    |  lea RB, [RA+RB*8-8]
4995    |  cmp KBASE, BASE			// No vararg slots?
4996    |  jnb >2
4997    |1:  // Copy vararg slots to destination slots.
4998    |.if X64
4999    |  mov RCa, [KBASE-8]
5000    |  add KBASE, 8
5001    |  mov [RA], RCa
5002    |.else
5003    |  mov RC, [KBASE-8]
5004    |  mov [RA], RC
5005    |  mov RC, [KBASE-4]
5006    |  add KBASE, 8
5007    |  mov [RA+4], RC
5008    |.endif
5009    |  add RA, 8
5010    |  cmp RA, RB			// All destination slots filled?
5011    |  jnb >3
5012    |  cmp KBASE, BASE			// No more vararg slots?
5013    |  jb <1
5014    |2:  // Fill up remainder with nil.
5015    |  mov dword [RA+4], LJ_TNIL
5016    |  add RA, 8
5017    |  cmp RA, RB
5018    |  jb <2
5019    |3:
5020    |  mov KBASE, TMP1
5021    |  ins_next
5022    |
5023    |5:  // Copy all varargs.
5024    |  mov MULTRES, 1			// MULTRES = 0+1
5025    |  mov RC, BASE
5026    |  sub RC, KBASE
5027    |  jbe <3				// No vararg slots?
5028    |  mov RB, RC
5029    |  shr RB, 3
5030    |  add RB, 1
5031    |  mov MULTRES, RB			// MULTRES = #varargs+1
5032    |  mov L:RB, SAVE_L
5033    |  add RC, RA
5034    |  cmp RC, L:RB->maxstack
5035    |  ja >7				// Need to grow stack?
5036    |6:  // Copy all vararg slots.
5037    |.if X64
5038    |  mov RCa, [KBASE-8]
5039    |  add KBASE, 8
5040    |  mov [RA], RCa
5041    |.else
5042    |  mov RC, [KBASE-8]
5043    |  mov [RA], RC
5044    |  mov RC, [KBASE-4]
5045    |  add KBASE, 8
5046    |  mov [RA+4], RC
5047    |.endif
5048    |  add RA, 8
5049    |  cmp KBASE, BASE			// No more vararg slots?
5050    |  jb <6
5051    |  jmp <3
5052    |
5053    |7:  // Grow stack for varargs.
5054    |  mov L:RB->base, BASE
5055    |  mov L:RB->top, RA
5056    |  mov SAVE_PC, PC
5057    |  sub KBASE, BASE			// Need delta, because BASE may change.
5058    |  mov FCARG2, MULTRES
5059    |  sub FCARG2, 1
5060    |  mov FCARG1, L:RB
5061    |  call extern lj_state_growstack@8	// (lua_State *L, int n)
5062    |  mov BASE, L:RB->base
5063    |  mov RA, L:RB->top
5064    |  add KBASE, BASE
5065    |  jmp <6
5066    break;
5067
5068  /* -- Returns ----------------------------------------------------------- */
5069
5070  case BC_RETM:
5071    |  ins_AD	// RA = results, RD = extra_nresults
5072    |  add RD, MULTRES			// MULTRES >=1, so RD >=1.
5073    |  // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
5074    break;
5075
5076  case BC_RET: case BC_RET0: case BC_RET1:
5077    |  ins_AD	// RA = results, RD = nresults+1
5078    if (op != BC_RET0) {
5079      |  shl RA, 3
5080    }
5081    |1:
5082    |  mov PC, [BASE-4]
5083    |  mov MULTRES, RD			// Save nresults+1.
5084    |  test PC, FRAME_TYPE		// Check frame type marker.
5085    |  jnz >7				// Not returning to a fixarg Lua func?
5086    switch (op) {
5087    case BC_RET:
5088      |->BC_RET_Z:
5089      |  mov KBASE, BASE		// Use KBASE for result move.
5090      |  sub RD, 1
5091      |  jz >3
5092      |2:  // Move results down.
5093      |.if X64
5094      |  mov RBa, [KBASE+RA]
5095      |  mov [KBASE-8], RBa
5096      |.else
5097      |  mov RB, [KBASE+RA]
5098      |  mov [KBASE-8], RB
5099      |  mov RB, [KBASE+RA+4]
5100      |  mov [KBASE-4], RB
5101      |.endif
5102      |  add KBASE, 8
5103      |  sub RD, 1
5104      |  jnz <2
5105      |3:
5106      |  mov RD, MULTRES		// Note: MULTRES may be >255.
5107      |  movzx RB, PC_RB		// So cannot compare with RDL!
5108      |5:
5109      |  cmp RB, RD			// More results expected?
5110      |  ja >6
5111      break;
5112    case BC_RET1:
5113      |.if X64
5114      |  mov RBa, [BASE+RA]
5115      |  mov [BASE-8], RBa
5116      |.else
5117      |  mov RB, [BASE+RA+4]
5118      |  mov [BASE-4], RB
5119      |  mov RB, [BASE+RA]
5120      |  mov [BASE-8], RB
5121      |.endif
5122      /* fallthrough */
5123    case BC_RET0:
5124      |5:
5125      |  cmp PC_RB, RDL			// More results expected?
5126      |  ja >6
5127    default:
5128      break;
5129    }
5130    |  movzx RA, PC_RA
5131    |  not RAa				// Note: ~RA = -(RA+1)
5132    |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
5133    |  mov LFUNC:KBASE, [BASE-8]
5134    |  mov KBASE, LFUNC:KBASE->pc
5135    |  mov KBASE, [KBASE+PC2PROTO(k)]
5136    |  ins_next
5137    |
5138    |6:  // Fill up results with nil.
5139    if (op == BC_RET) {
5140      |  mov dword [KBASE-4], LJ_TNIL	// Note: relies on shifted base.
5141      |  add KBASE, 8
5142    } else {
5143      |  mov dword [BASE+RD*8-12], LJ_TNIL
5144    }
5145    |  add RD, 1
5146    |  jmp <5
5147    |
5148    |7:  // Non-standard return case.
5149    |  lea RB, [PC-FRAME_VARG]
5150    |  test RB, FRAME_TYPEP
5151    |  jnz ->vm_return
5152    |  // Return from vararg function: relocate BASE down and RA up.
5153    |  sub BASE, RB
5154    if (op != BC_RET0) {
5155      |  add RA, RB
5156    }
5157    |  jmp <1
5158    break;
5159
5160  /* -- Loops and branches ------------------------------------------------ */
5161
5162  |.define FOR_IDX,  [RA];    .define FOR_TIDX,  dword [RA+4]
5163  |.define FOR_STOP, [RA+8];  .define FOR_TSTOP, dword [RA+12]
5164  |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
5165  |.define FOR_EXT,  [RA+24]; .define FOR_TEXT,  dword [RA+28]
5166
5167  case BC_FORL:
5168    |.if JIT
5169    |  hotloop RB
5170    |.endif
5171    | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
5172    break;
5173
5174  case BC_JFORI:
5175  case BC_JFORL:
5176#if !LJ_HASJIT
5177    break;
5178#endif
5179  case BC_FORI:
5180  case BC_IFORL:
5181    vk = (op == BC_IFORL || op == BC_JFORL);
5182    |  ins_AJ	// RA = base, RD = target (after end of loop or start of loop)
5183    |  lea RA, [BASE+RA*8]
5184    if (LJ_DUALNUM) {
5185      |  cmp FOR_TIDX, LJ_TISNUM; jne >9
5186      if (!vk) {
5187	|  cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
5188	|  cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
5189	|  mov RB, dword FOR_IDX
5190	|  cmp dword FOR_STEP, 0; jl >5
5191      } else {
5192#ifdef LUA_USE_ASSERT
5193	|  cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5194	|  cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5195#endif
5196	|  mov RB, dword FOR_STEP
5197	|  test RB, RB; js >5
5198	|  add RB, dword FOR_IDX; jo >1
5199	|  mov dword FOR_IDX, RB
5200      }
5201      |  cmp RB, dword FOR_STOP
5202      |  mov FOR_TEXT, LJ_TISNUM
5203      |  mov dword FOR_EXT, RB
5204      if (op == BC_FORI) {
5205	|  jle >7
5206	|1:
5207	|6:
5208	|  branchPC RD
5209      } else if (op == BC_JFORI) {
5210	|  branchPC RD
5211	|  movzx RD, PC_RD
5212	|  jle =>BC_JLOOP
5213	|1:
5214	|6:
5215      } else if (op == BC_IFORL) {
5216	|  jg >7
5217	|6:
5218	|  branchPC RD
5219	|1:
5220      } else {
5221	|  jle =>BC_JLOOP
5222	|1:
5223	|6:
5224      }
5225      |7:
5226      |  ins_next
5227      |
5228      |5:  // Invert check for negative step.
5229      if (vk) {
5230	|  add RB, dword FOR_IDX; jo <1
5231	|  mov dword FOR_IDX, RB
5232      }
5233      |  cmp RB, dword FOR_STOP
5234      |  mov FOR_TEXT, LJ_TISNUM
5235      |  mov dword FOR_EXT, RB
5236      if (op == BC_FORI) {
5237	|  jge <7
5238      } else if (op == BC_JFORI) {
5239	|  branchPC RD
5240	|  movzx RD, PC_RD
5241	|  jge =>BC_JLOOP
5242      } else if (op == BC_IFORL) {
5243	|  jl <7
5244      } else {
5245	|  jge =>BC_JLOOP
5246      }
5247      |  jmp <6
5248      |9:  // Fallback to FP variant.
5249    } else if (!vk) {
5250      |  cmp FOR_TIDX, LJ_TISNUM
5251    }
5252    if (!vk) {
5253      |  jae ->vmeta_for
5254      |  cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
5255    } else {
5256#ifdef LUA_USE_ASSERT
5257      |  cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5258      |  cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5259#endif
5260    }
5261    |  mov RB, FOR_TSTEP		// Load type/hiword of for step.
5262    if (!vk) {
5263      |  cmp RB, LJ_TISNUM; jae ->vmeta_for
5264    }
5265    |  movsd xmm0, qword FOR_IDX
5266    |  movsd xmm1, qword FOR_STOP
5267    if (vk) {
5268      |  addsd xmm0, qword FOR_STEP
5269      |  movsd qword FOR_IDX, xmm0
5270      |  test RB, RB; js >3
5271    } else {
5272      |  jl >3
5273    }
5274    |  ucomisd xmm1, xmm0
5275    |1:
5276    |  movsd qword FOR_EXT, xmm0
5277    if (op == BC_FORI) {
5278      |.if DUALNUM
5279      |  jnb <7
5280      |.else
5281      |  jnb >2
5282      |  branchPC RD
5283      |.endif
5284    } else if (op == BC_JFORI) {
5285      |  branchPC RD
5286      |  movzx RD, PC_RD
5287      |  jnb =>BC_JLOOP
5288    } else if (op == BC_IFORL) {
5289      |.if DUALNUM
5290      |  jb <7
5291      |.else
5292      |  jb >2
5293      |  branchPC RD
5294      |.endif
5295    } else {
5296      |  jnb =>BC_JLOOP
5297    }
5298    |.if DUALNUM
5299    |  jmp <6
5300    |.else
5301    |2:
5302    |  ins_next
5303    |.endif
5304    |
5305    |3:  // Invert comparison if step is negative.
5306    |  ucomisd xmm0, xmm1
5307    |  jmp <1
5308    break;
5309
5310  case BC_ITERL:
5311    |.if JIT
5312    |  hotloop RB
5313    |.endif
5314    | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
5315    break;
5316
5317  case BC_JITERL:
5318#if !LJ_HASJIT
5319    break;
5320#endif
5321  case BC_IITERL:
5322    |  ins_AJ	// RA = base, RD = target
5323    |  lea RA, [BASE+RA*8]
5324    |  mov RB, [RA+4]
5325    |  cmp RB, LJ_TNIL; je >1		// Stop if iterator returned nil.
5326    if (op == BC_JITERL) {
5327      |  mov [RA-4], RB
5328      |  mov RB, [RA]
5329      |  mov [RA-8], RB
5330      |  jmp =>BC_JLOOP
5331    } else {
5332      |  branchPC RD			// Otherwise save control var + branch.
5333      |  mov RD, [RA]
5334      |  mov [RA-4], RB
5335      |  mov [RA-8], RD
5336    }
5337    |1:
5338    |  ins_next
5339    break;
5340
5341  case BC_LOOP:
5342    |  ins_A	// RA = base, RD = target (loop extent)
5343    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
5344    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5345    |.if JIT
5346    |  hotloop RB
5347    |.endif
5348    | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
5349    break;
5350
5351  case BC_ILOOP:
5352    |  ins_A	// RA = base, RD = target (loop extent)
5353    |  ins_next
5354    break;
5355
5356  case BC_JLOOP:
5357    |.if JIT
5358    |  ins_AD	// RA = base (ignored), RD = traceno
5359#ifdef LUA_USE_TRACE_LOGS
5360    |.if X64
5361    |  mov L:RB, SAVE_L
5362    |  mov L:RB->base, BASE  // Save BASE
5363    |  mov TMP1, RD     // Save RD
5364    |  mov CARG3d, PC  // CARG3d == BASE
5365    |  mov FCARG2, RD
5366    |  mov FCARG1, RB
5367    |  call extern lj_log_trace_entry@8
5368    |  mov RD, TMP1
5369    |  mov BASE, L:RB->base
5370    |.endif
5371#endif
5372    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
5373    |  mov TRACE:RD, [RA+RD*4]
5374    |  mov RDa, TRACE:RD->mcode
5375    |  mov L:RB, SAVE_L
5376    |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5377    |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5378    |  // Save additional callee-save registers only used in compiled code.
5379    |.if X64WIN
5380    |  mov TMPQ, r12
5381    |  mov TMPa, r13
5382    |  mov CSAVE_4, r14
5383    |  mov CSAVE_3, r15
5384    |  mov RAa, rsp
5385    |  sub rsp, 9*16+4*8
5386    |  movdqa [RAa], xmm6
5387    |  movdqa [RAa-1*16], xmm7
5388    |  movdqa [RAa-2*16], xmm8
5389    |  movdqa [RAa-3*16], xmm9
5390    |  movdqa [RAa-4*16], xmm10
5391    |  movdqa [RAa-5*16], xmm11
5392    |  movdqa [RAa-6*16], xmm12
5393    |  movdqa [RAa-7*16], xmm13
5394    |  movdqa [RAa-8*16], xmm14
5395    |  movdqa [RAa-9*16], xmm15
5396    |.elif X64
5397    |  mov TMPQ, r12
5398    |  mov TMPa, r13
5399    |  sub rsp, 16
5400    |.endif
5401    |  jmp RDa
5402    |.endif
5403    break;
5404
5405  case BC_JMP:
5406    |  ins_AJ	// RA = unused, RD = target
5407    |  branchPC RD
5408    |  ins_next
5409    break;
5410
5411  /* -- Function headers -------------------------------------------------- */
5412
5413   /*
5414   ** Reminder: A function may be called with func/args above L->maxstack,
5415   ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
5416   ** too. This means all FUNC* ops (including fast functions) must check
5417   ** for stack overflow _before_ adding more slots!
5418   */
5419
5420  case BC_FUNCF:
5421    |.if JIT
5422    |  hotcall RB
5423    |.endif
5424  case BC_FUNCV:  /* NYI: compiled vararg functions. */
5425    | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
5426    break;
5427
5428  case BC_JFUNCF:
5429#if !LJ_HASJIT
5430    break;
5431#endif
5432  case BC_IFUNCF:
5433    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
5434    |  mov KBASE, [PC-4+PC2PROTO(k)]
5435    |  mov L:RB, SAVE_L
5436    |  lea RA, [BASE+RA*8]		// Top of frame.
5437    |  cmp RA, L:RB->maxstack
5438    |  ja ->vm_growstack_f
5439    |  movzx RA, byte [PC-4+PC2PROTO(numparams)]
5440    |  cmp NARGS:RD, RA			// Check for missing parameters.
5441    |  jbe >3
5442    |2:
5443    if (op == BC_JFUNCF) {
5444      |  movzx RD, PC_RD
5445      |  jmp =>BC_JLOOP
5446    } else {
5447      |  ins_next
5448    }
5449    |
5450    |3:  // Clear missing parameters.
5451    |  mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
5452    |  add NARGS:RD, 1
5453    |  cmp NARGS:RD, RA
5454    |  jbe <3
5455    |  jmp <2
5456    break;
5457
5458  case BC_JFUNCV:
5459#if !LJ_HASJIT
5460    break;
5461#endif
5462    | int3  // NYI: compiled vararg functions
5463    break;  /* NYI: compiled vararg functions. */
5464
5465  case BC_IFUNCV:
5466    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
5467    |  lea RB, [NARGS:RD*8+FRAME_VARG]
5468    |  lea RD, [BASE+NARGS:RD*8]
5469    |  mov LFUNC:KBASE, [BASE-8]
5470    |  mov [RD-4], RB			// Store delta + FRAME_VARG.
5471    |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
5472    |  mov L:RB, SAVE_L
5473    |  lea RA, [RD+RA*8]
5474    |  cmp RA, L:RB->maxstack
5475    |  ja ->vm_growstack_v		// Need to grow stack.
5476    |  mov RA, BASE
5477    |  mov BASE, RD
5478    |  movzx RB, byte [PC-4+PC2PROTO(numparams)]
5479    |  test RB, RB
5480    |  jz >2
5481    |1:  // Copy fixarg slots up to new frame.
5482    |  add RA, 8
5483    |  cmp RA, BASE
5484    |  jnb >3				// Less args than parameters?
5485    |  mov KBASE, [RA-8]
5486    |  mov [RD], KBASE
5487    |  mov KBASE, [RA-4]
5488    |  mov [RD+4], KBASE
5489    |  add RD, 8
5490    |  mov dword [RA-4], LJ_TNIL	// Clear old fixarg slot (help the GC).
5491    |  sub RB, 1
5492    |  jnz <1
5493    |2:
5494    if (op == BC_JFUNCV) {
5495      |  movzx RD, PC_RD
5496      |  jmp =>BC_JLOOP
5497    } else {
5498      |  mov KBASE, [PC-4+PC2PROTO(k)]
5499      |  ins_next
5500    }
5501    |
5502    |3:  // Clear missing parameters.
5503    |  mov dword [RD+4], LJ_TNIL
5504    |  add RD, 8
5505    |  sub RB, 1
5506    |  jnz <3
5507    |  jmp <2
5508    break;
5509
5510  case BC_FUNCC:
5511  case BC_FUNCCW:
5512    |  ins_AD  // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
5513    |  mov CFUNC:RB, [BASE-8]
5514    |  mov KBASEa, CFUNC:RB->f
5515    |  mov L:RB, SAVE_L
5516    |  lea RD, [BASE+NARGS:RD*8-8]
5517    |  mov L:RB->base, BASE
5518    |  lea RA, [RD+8*LUA_MINSTACK]
5519    |  cmp RA, L:RB->maxstack
5520    |  mov L:RB->top, RD
5521    if (op == BC_FUNCC) {
5522      |.if X64
5523      |  mov CARG1d, L:RB			// Caveat: CARG1d may be RA.
5524      |.else
5525      |  mov ARG1, L:RB
5526      |.endif
5527    } else {
5528      |.if X64
5529      |  mov CARG2, KBASEa
5530      |  mov CARG1d, L:RB			// Caveat: CARG1d may be RA.
5531      |.else
5532      |  mov ARG2, KBASEa
5533      |  mov ARG1, L:RB
5534      |.endif
5535    }
5536    |  ja ->vm_growstack_c		// Need to grow stack.
5537    |  set_vmstate C
5538    if (op == BC_FUNCC) {
5539      |  call KBASEa			// (lua_State *L)
5540    } else {
5541      |  // (lua_State *L, lua_CFunction f)
5542      |  call aword [DISPATCH+DISPATCH_GL(wrapf)]
5543    }
5544    |  // nresults returned in eax (RD).
5545    |  mov BASE, L:RB->base
5546    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5547    |  set_vmstate INTERP
5548    |  lea RA, [BASE+RD*8]
5549    |  neg RA
5550    |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
5551    |  mov PC, [BASE-4]			// Fetch PC of caller.
5552    |  jmp ->vm_returnc
5553    break;
5554
5555  /* ---------------------------------------------------------------------- */
5556
5557  default:
5558    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5559    exit(2);
5560    break;
5561  }
5562}
5563
5564static int build_backend(BuildCtx *ctx)
5565{
5566  int op;
5567  dasm_growpc(Dst, BC__MAX);
5568  build_subroutines(ctx);
5569  |.code_op
5570  for (op = 0; op < BC__MAX; op++)
5571    build_ins(ctx, (BCOp)op, op);
5572  return BC__MAX;
5573}
5574
5575/* Emit pseudo frame-info for all assembler functions. */
5576static void emit_asm_debug(BuildCtx *ctx)
5577{
5578  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5579#if LJ_64
5580#define SZPTR	"8"
5581#define BSZPTR	"3"
5582#define REG_SP	"0x7"
5583#define REG_RA	"0x10"
5584#else
5585#define SZPTR	"4"
5586#define BSZPTR	"2"
5587#define REG_SP	"0x4"
5588#define REG_RA	"0x8"
5589#endif
5590  switch (ctx->mode) {
5591  case BUILD_elfasm:
5592    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5593    fprintf(ctx->fp,
5594	".Lframe0:\n"
5595	"\t.long .LECIE0-.LSCIE0\n"
5596	".LSCIE0:\n"
5597	"\t.long 0xffffffff\n"
5598	"\t.byte 0x1\n"
5599	"\t.string \"\"\n"
5600	"\t.uleb128 0x1\n"
5601	"\t.sleb128 -" SZPTR "\n"
5602	"\t.byte " REG_RA "\n"
5603	"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5604	"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5605	"\t.align " SZPTR "\n"
5606	".LECIE0:\n\n");
5607    fprintf(ctx->fp,
5608	".LSFDE0:\n"
5609	"\t.long .LEFDE0-.LASFDE0\n"
5610	".LASFDE0:\n"
5611	"\t.long .Lframe0\n"
5612#if LJ_64
5613	"\t.quad .Lbegin\n"
5614	"\t.quad %d\n"
5615	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
5616	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
5617	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
5618	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
5619	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
5620#if LJ_NO_UNWIND
5621	"\t.byte 0x8d\n\t.uleb128 0x6\n"	/* offset r13 */
5622	"\t.byte 0x8c\n\t.uleb128 0x7\n"	/* offset r12 */
5623#endif
5624#else
5625	"\t.long .Lbegin\n"
5626	"\t.long %d\n"
5627	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
5628	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
5629	"\t.byte 0x87\n\t.uleb128 0x3\n"	/* offset edi */
5630	"\t.byte 0x86\n\t.uleb128 0x4\n"	/* offset esi */
5631	"\t.byte 0x83\n\t.uleb128 0x5\n"	/* offset ebx */
5632#endif
5633	"\t.align " SZPTR "\n"
5634	".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
5635#if LJ_HASFFI
5636    fprintf(ctx->fp,
5637	".LSFDE1:\n"
5638	"\t.long .LEFDE1-.LASFDE1\n"
5639	".LASFDE1:\n"
5640	"\t.long .Lframe0\n"
5641#if LJ_64
5642	"\t.quad lj_vm_ffi_call\n"
5643	"\t.quad %d\n"
5644	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
5645	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
5646	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
5647	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
5648#else
5649	"\t.long lj_vm_ffi_call\n"
5650	"\t.long %d\n"
5651	"\t.byte 0xe\n\t.uleb128 8\n"		/* def_cfa_offset */
5652	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
5653	"\t.byte 0xd\n\t.uleb128 0x5\n"		/* def_cfa_register ebp */
5654	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset ebx */
5655#endif
5656	"\t.align " SZPTR "\n"
5657	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5658#endif
5659#if !LJ_NO_UNWIND
5660#if LJ_TARGET_SOLARIS
5661#if LJ_64
5662    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
5663#else
5664    fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
5665#endif
5666#else
5667    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
5668#endif
5669    fprintf(ctx->fp,
5670	".Lframe1:\n"
5671	"\t.long .LECIE1-.LSCIE1\n"
5672	".LSCIE1:\n"
5673	"\t.long 0\n"
5674	"\t.byte 0x1\n"
5675	"\t.string \"zPR\"\n"
5676	"\t.uleb128 0x1\n"
5677	"\t.sleb128 -" SZPTR "\n"
5678	"\t.byte " REG_RA "\n"
5679	"\t.uleb128 6\n"			/* augmentation length */
5680	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
5681	"\t.long lj_err_unwind_dwarf-.\n"
5682	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
5683	"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5684	"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5685	"\t.align " SZPTR "\n"
5686	".LECIE1:\n\n");
5687    fprintf(ctx->fp,
5688	".LSFDE2:\n"
5689	"\t.long .LEFDE2-.LASFDE2\n"
5690	".LASFDE2:\n"
5691	"\t.long .LASFDE2-.Lframe1\n"
5692	"\t.long .Lbegin-.\n"
5693	"\t.long %d\n"
5694	"\t.uleb128 0\n"			/* augmentation length */
5695	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
5696#if LJ_64
5697	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
5698	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
5699	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
5700	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
5701#else
5702	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
5703	"\t.byte 0x87\n\t.uleb128 0x3\n"	/* offset edi */
5704	"\t.byte 0x86\n\t.uleb128 0x4\n"	/* offset esi */
5705	"\t.byte 0x83\n\t.uleb128 0x5\n"	/* offset ebx */
5706#endif
5707	"\t.align " SZPTR "\n"
5708	".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
5709#if LJ_HASFFI
5710    fprintf(ctx->fp,
5711	".Lframe2:\n"
5712	"\t.long .LECIE2-.LSCIE2\n"
5713	".LSCIE2:\n"
5714	"\t.long 0\n"
5715	"\t.byte 0x1\n"
5716	"\t.string \"zR\"\n"
5717	"\t.uleb128 0x1\n"
5718	"\t.sleb128 -" SZPTR "\n"
5719	"\t.byte " REG_RA "\n"
5720	"\t.uleb128 1\n"			/* augmentation length */
5721	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
5722	"\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5723	"\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5724	"\t.align " SZPTR "\n"
5725	".LECIE2:\n\n");
5726    fprintf(ctx->fp,
5727	".LSFDE3:\n"
5728	"\t.long .LEFDE3-.LASFDE3\n"
5729	".LASFDE3:\n"
5730	"\t.long .LASFDE3-.Lframe2\n"
5731	"\t.long lj_vm_ffi_call-.\n"
5732	"\t.long %d\n"
5733	"\t.uleb128 0\n"			/* augmentation length */
5734#if LJ_64
5735	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
5736	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
5737	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
5738	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
5739#else
5740	"\t.byte 0xe\n\t.uleb128 8\n"		/* def_cfa_offset */
5741	"\t.byte 0x85\n\t.uleb128 0x2\n"	/* offset ebp */
5742	"\t.byte 0xd\n\t.uleb128 0x5\n"		/* def_cfa_register ebp */
5743	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset ebx */
5744#endif
5745	"\t.align " SZPTR "\n"
5746	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
5747#endif
5748#endif
5749    break;
5750#if !LJ_NO_UNWIND
5751  /* Mental note: never let Apple design an assembler.
5752  ** Or a linker. Or a plastic case. But I digress.
5753  */
5754  case BUILD_machasm: {
5755#if LJ_HASFFI
5756    int fcsize = 0;
5757#endif
5758    int i;
5759    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
5760    fprintf(ctx->fp,
5761	"EH_frame1:\n"
5762	"\t.set L$set$x,LECIEX-LSCIEX\n"
5763	"\t.long L$set$x\n"
5764	"LSCIEX:\n"
5765	"\t.long 0\n"
5766	"\t.byte 0x1\n"
5767	"\t.ascii \"zPR\\0\"\n"
5768	"\t.byte 0x1\n"
5769	"\t.byte 128-" SZPTR "\n"
5770	"\t.byte " REG_RA "\n"
5771	"\t.byte 6\n"				/* augmentation length */
5772	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
5773#if LJ_64
5774	"\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
5775	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
5776	"\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
5777#else
5778	"\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
5779	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
5780	"\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n"  /* esp=5 on 32 bit MACH-O. */
5781#endif
5782	"\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
5783	"\t.align " BSZPTR "\n"
5784	"LECIEX:\n\n");
5785    for (i = 0; i < ctx->nsym; i++) {
5786      const char *name = ctx->sym[i].name;
5787      int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
5788      if (size == 0) continue;
5789#if LJ_HASFFI
5790      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
5791#endif
5792      fprintf(ctx->fp,
5793	  "%s.eh:\n"
5794	  "LSFDE%d:\n"
5795	  "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
5796	  "\t.long L$set$%d\n"
5797	  "LASFDE%d:\n"
5798	  "\t.long LASFDE%d-EH_frame1\n"
5799	  "\t.long %s-.\n"
5800	  "\t.long %d\n"
5801	  "\t.byte 0\n"				/* augmentation length */
5802	  "\t.byte 0xe\n\t.byte %d\n"		/* def_cfa_offset */
5803#if LJ_64
5804	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
5805	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
5806	  "\t.byte 0x8f\n\t.byte 0x4\n"		/* offset r15 */
5807	  "\t.byte 0x8e\n\t.byte 0x5\n"		/* offset r14 */
5808#else
5809	  "\t.byte 0x84\n\t.byte 0x2\n"		/* offset ebp (4 for MACH-O)*/
5810	  "\t.byte 0x87\n\t.byte 0x3\n"		/* offset edi */
5811	  "\t.byte 0x86\n\t.byte 0x4\n"		/* offset esi */
5812	  "\t.byte 0x83\n\t.byte 0x5\n"		/* offset ebx */
5813#endif
5814	  "\t.align " BSZPTR "\n"
5815	  "LEFDE%d:\n\n",
5816	  name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
5817    }
5818#if LJ_HASFFI
5819    if (fcsize) {
5820      fprintf(ctx->fp,
5821	  "EH_frame2:\n"
5822	  "\t.set L$set$y,LECIEY-LSCIEY\n"
5823	  "\t.long L$set$y\n"
5824	  "LSCIEY:\n"
5825	  "\t.long 0\n"
5826	  "\t.byte 0x1\n"
5827	  "\t.ascii \"zR\\0\"\n"
5828	  "\t.byte 0x1\n"
5829	  "\t.byte 128-" SZPTR "\n"
5830	  "\t.byte " REG_RA "\n"
5831	  "\t.byte 1\n"				/* augmentation length */
5832#if LJ_64
5833	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
5834	  "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
5835#else
5836	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
5837	  "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n"  /* esp=5 on 32 bit MACH. */
5838#endif
5839	  "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
5840	  "\t.align " BSZPTR "\n"
5841	  "LECIEY:\n\n");
5842      fprintf(ctx->fp,
5843	  "_lj_vm_ffi_call.eh:\n"
5844	  "LSFDEY:\n"
5845	  "\t.set L$set$yy,LEFDEY-LASFDEY\n"
5846	  "\t.long L$set$yy\n"
5847	  "LASFDEY:\n"
5848	  "\t.long LASFDEY-EH_frame2\n"
5849	  "\t.long _lj_vm_ffi_call-.\n"
5850	  "\t.long %d\n"
5851	  "\t.byte 0\n"				/* augmentation length */
5852#if LJ_64
5853	  "\t.byte 0xe\n\t.byte 16\n"		/* def_cfa_offset */
5854	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
5855	  "\t.byte 0xd\n\t.byte 0x6\n"		/* def_cfa_register rbp */
5856	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
5857#else
5858	  "\t.byte 0xe\n\t.byte 8\n"		/* def_cfa_offset */
5859	  "\t.byte 0x84\n\t.byte 0x2\n"		/* offset ebp (4 for MACH-O)*/
5860	  "\t.byte 0xd\n\t.byte 0x4\n"		/* def_cfa_register ebp */
5861	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset ebx */
5862#endif
5863	  "\t.align " BSZPTR "\n"
5864	  "LEFDEY:\n\n", fcsize);
5865    }
5866#endif
5867#if !LJ_64
5868    fprintf(ctx->fp,
5869      "\t.non_lazy_symbol_pointer\n"
5870      "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
5871      ".indirect_symbol _lj_err_unwind_dwarf\n"
5872      ".long 0\n\n");
5873    fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5874    {
5875      const char *const *xn;
5876      for (xn = ctx->extnames; *xn; xn++)
5877	if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5878	  fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5879    }
5880#endif
5881    fprintf(ctx->fp, ".subsections_via_symbols\n");
5882    }
5883    break;
5884#endif
5885  default:  /* Difficult for other modes. */
5886    break;
5887  }
5888}
5889
5890