1 /*
2 ** FFI C callback handling.
3 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 #include "lj_obj.h"
7 
8 #if LJ_HASFFI
9 
10 #include "lj_gc.h"
11 #include "lj_err.h"
12 #include "lj_tab.h"
13 #include "lj_state.h"
14 #include "lj_frame.h"
15 #include "lj_ctype.h"
16 #include "lj_cconv.h"
17 #include "lj_ccall.h"
18 #include "lj_ccallback.h"
19 #include "lj_target.h"
20 #include "lj_mcode.h"
21 #include "lj_trace.h"
22 #include "lj_vm.h"
23 #include "lualib.h"
24 
25 #if LJ_ARCH_PPC_ELFV2
26 #include "lualib.h"
27 #endif
28 
29 /* -- Target-specific handling of callback slots -------------------------- */
30 
31 #define CALLBACK_MCODE_SIZE	(LJ_PAGESIZE * LJ_NUM_CBPAGE)
32 
33 #if LJ_OS_NOJIT
34 
35 /* Callbacks disabled. */
36 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
37 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
38 #define CALLBACK_MAX_SLOT	0
39 
40 #elif LJ_TARGET_X86ORX64
41 
42 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
43 #define CALLBACK_MCODE_GROUP	(-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
44 
45 #define CALLBACK_SLOT2OFS(slot) \
46   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
47 
CALLBACK_OFS2SLOT(MSize ofs)48 static MSize CALLBACK_OFS2SLOT(MSize ofs)
49 {
50   MSize group;
51   ofs -= CALLBACK_MCODE_HEAD;
52   group = ofs / (32*4 + CALLBACK_MCODE_GROUP);
53   return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32;
54 }
55 
56 #define CALLBACK_MAX_SLOT \
57   (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32)
58 
59 #elif LJ_TARGET_ARM
60 
61 #define CALLBACK_MCODE_HEAD		32
62 
63 #elif LJ_TARGET_ARM64
64 
65 #define CALLBACK_MCODE_HEAD		32
66 
67 #elif LJ_TARGET_PPC
68 
69 #if LJ_ARCH_PPC_OPD
70 
71 #define CALLBACK_SLOT2OFS(slot)		(24*(slot))
72 #define CALLBACK_OFS2SLOT(ofs)		((ofs)/24)
73 #define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
74 
75 #elif LJ_ARCH_PPC_ELFV2
76 
77 #define CALLBACK_SLOT2OFS(slot)		(4*(slot))
78 #define CALLBACK_OFS2SLOT(ofs)		((ofs)/4)
79 #define CALLBACK_MAX_SLOT		(CALLBACK_MCODE_SIZE/4 - 10)
80 
81 #else
82 
83 #define CALLBACK_MCODE_HEAD		24
84 
85 #endif
86 
87 #elif LJ_TARGET_MIPS32
88 
89 #define CALLBACK_MCODE_HEAD		20
90 
91 #elif LJ_TARGET_MIPS64
92 
93 #define CALLBACK_MCODE_HEAD		52
94 
95 #else
96 
97 /* Missing support for this architecture. */
98 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
99 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
100 #define CALLBACK_MAX_SLOT	0
101 
102 #endif
103 
104 #ifndef CALLBACK_SLOT2OFS
105 #define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
106 #define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
107 #define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
108 #endif
109 
110 /* Convert callback slot number to callback function pointer. */
callback_slot2ptr(CTState * cts,MSize slot)111 static void *callback_slot2ptr(CTState *cts, MSize slot)
112 {
113   return (uint8_t *)cts->cb.mcode + CALLBACK_SLOT2OFS(slot);
114 }
115 
116 /* Convert callback function pointer to slot number. */
lj_ccallback_ptr2slot(CTState * cts,void * p)117 MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
118 {
119   uintptr_t ofs = (uintptr_t)((uint8_t *)p -(uint8_t *)cts->cb.mcode);
120   if (ofs < CALLBACK_MCODE_SIZE) {
121     MSize slot = CALLBACK_OFS2SLOT((MSize)ofs);
122     if (CALLBACK_SLOT2OFS(slot) == (MSize)ofs)
123       return slot;
124   }
125   return ~0u;  /* Not a known callback function pointer. */
126 }
127 
128 /* Initialize machine code for callback function pointers. */
129 #if LJ_OS_NOJIT
130 /* Disabled callback support. */
131 #define callback_mcode_init(g, p)	(p)
132 #elif LJ_TARGET_X86ORX64
callback_mcode_init(global_State * g,uint8_t * page)133 static void *callback_mcode_init(global_State *g, uint8_t *page)
134 {
135   uint8_t *p = page;
136   uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
137   MSize slot;
138 #if LJ_64
139   *(void **)p = target; p += 8;
140 #endif
141   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
142     /* mov al, slot; jmp group */
143     *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot;
144     if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) {
145       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
146       *p++ = XI_PUSH + RID_EBP;
147       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
148 #if LJ_GC64
149       *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
150       *(uint64_t *)p = (uint64_t)(g); p += 8;
151 #else
152       *p++ = XI_MOVri | RID_EBP;
153       *(int32_t *)p = i32ptr(g); p += 4;
154 #endif
155 #if LJ_64
156       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
157       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
158       *(int32_t *)p = (int32_t)(page-(p+4)); p += 4;
159 #else
160       /* jmp lj_vm_ffi_callback. */
161       *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4;
162 #endif
163     } else {
164       *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
165     }
166   }
167   return p;
168 }
169 #elif LJ_TARGET_ARM
callback_mcode_init(global_State * g,uint32_t * page)170 static void *callback_mcode_init(global_State *g, uint32_t *page)
171 {
172   uint32_t *p = page;
173   void *target = (void *)lj_vm_ffi_callback;
174   MSize slot;
175   /* This must match with the saveregs macro in buildvm_arm.dasc. */
176   *p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC);
177   *p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR);
178   *p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD;
179   *p++ = ARMI_STR|ARMI_LS_P|ARMI_LS_W|ARMF_D(RID_R12)|ARMF_N(RID_SP)|(CFRAME_SIZE-4*9);
180   *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_R12)|ARMF_N(RID_PC);
181   *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_PC)|ARMF_N(RID_PC);
182   *p++ = u32ptr(g);
183   *p++ = u32ptr(target);
184   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
185     *p++ = ARMI_MOV|ARMF_D(RID_R12)|ARMF_M(RID_PC);
186     *p = ARMI_B | ((page-p-2) & 0x00ffffffu);
187     p++;
188   }
189   return p;
190 }
191 #elif LJ_TARGET_ARM64
callback_mcode_init(global_State * g,uint32_t * page)192 static void *callback_mcode_init(global_State *g, uint32_t *page)
193 {
194   uint32_t *p = page;
195   void *target = (void *)lj_vm_ffi_callback;
196   MSize slot;
197   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
198   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
199   *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
200   *p++ = A64I_LE(A64I_NOP);
201   ((void **)p)[0] = target;
202   ((void **)p)[1] = g;
203   p += 4;
204   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
205     *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
206     *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
207     p++;
208   }
209   return p;
210 }
211 #elif LJ_TARGET_PPC
212 #if LJ_ARCH_PPC_OPD
213 register void *vm_toc __asm__("r2");
callback_mcode_init(global_State * g,uint64_t * page)214 static void *callback_mcode_init(global_State *g, uint64_t *page)
215 {
216   uint64_t *p = page;
217   void *target = (void *)lj_vm_ffi_callback;
218   MSize slot;
219   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
220     *p++ = (uint64_t)target;
221     *p++ = (uint64_t)vm_toc;
222     *p++ = (uint64_t)g | ((uint64_t)slot << 47);
223   }
224   return p;
225 }
226 #else
callback_mcode_init(global_State * g,uint32_t * page)227 static void *callback_mcode_init(global_State *g, uint32_t *page)
228 {
229   uint32_t *p = page;
230   void *target = (void *)lj_vm_ffi_callback;
231   MSize slot;
232 #if LJ_ARCH_PPC_ELFV2
233   // Needs to be in sync with lj_vm_ffi_callback.
234   lua_assert(CALLBACK_MCODE_SIZE == 4096);
235   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
236     *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
237     p++;
238   }
239   *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
240   *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
241   *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32);  /* sldi */
242   *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32);  /* sldi */
243   *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff);
244   *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff);
245   *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff);
246   *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
247   *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
248   *p++ = PPCI_BCTR;
249 #else
250   *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
251   *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
252   *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
253   *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
254   *p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
255   *p++ = PPCI_BCTR;
256   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
257     *p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
258     *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
259     p++;
260   }
261 #endif
262   return p;
263 }
264 #endif
265 #elif LJ_TARGET_MIPS
callback_mcode_init(global_State * g,uint32_t * page)266 static void *callback_mcode_init(global_State *g, uint32_t *page)
267 {
268   uint32_t *p = page;
269   uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
270   uintptr_t ug = (uintptr_t)(void *)g;
271   MSize slot;
272 #if LJ_TARGET_MIPS32
273   *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
274   *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
275 #else
276   *p++ = MIPSI_LUI  | MIPSF_T(RID_R3) | (target >> 48);
277   *p++ = MIPSI_LUI  | MIPSF_T(RID_R2) | (ug >> 48);
278   *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
279   *p++ = MIPSI_ORI  | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
280   *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
281   *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
282   *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
283   *p++ = MIPSI_ORI  | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
284   *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
285   *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
286 #endif
287   *p++ = MIPSI_ORI  | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
288   *p++ = MIPSI_JR | MIPSF_S(RID_R3);
289   *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
290   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
291     *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
292     p++;
293     *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot;
294   }
295   return p;
296 }
297 #else
298 /* Missing support for this architecture. */
299 #define callback_mcode_init(g, p)	(p)
300 #endif
301 
302 /* -- Machine code management --------------------------------------------- */
303 
304 #if LJ_TARGET_WINDOWS
305 
306 #define WIN32_LEAN_AND_MEAN
307 #include <windows.h>
308 
309 #elif LJ_TARGET_POSIX
310 
311 #include <sys/mman.h>
312 #ifndef MAP_ANONYMOUS
313 #define MAP_ANONYMOUS   MAP_ANON
314 #endif
315 #ifdef PROT_MPROTECT
316 #define CCPROT_CREATE	(PROT_MPROTECT(PROT_EXEC))
317 #else
318 #define CCPROT_CREATE	0
319 #endif
320 
321 #endif
322 
323 /* Allocate and initialize area for callback function pointers. */
callback_mcode_new(CTState * cts)324 static void callback_mcode_new(CTState *cts)
325 {
326   size_t sz = (size_t)CALLBACK_MCODE_SIZE;
327   void *p, *pe;
328   if (CALLBACK_MAX_SLOT == 0)
329     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
330 #if LJ_TARGET_WINDOWS
331   p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
332   if (!p)
333     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
334 #elif LJ_TARGET_POSIX
335   p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
336 	   -1, 0);
337   if (p == MAP_FAILED)
338     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
339 #else
340   /* Fallback allocator. Fails if memory is not executable by default. */
341   p = lj_mem_new(cts->L, sz);
342 #endif
343   cts->cb.mcode = p;
344   pe = callback_mcode_init(cts->g, p);
345   UNUSED(pe);
346   lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz,
347 	       "miscalculated CALLBACK_MAX_SLOT");
348   lj_mcode_sync(p, (char *)p + sz);
349 #if LJ_TARGET_WINDOWS
350   {
351     DWORD oprot;
352     LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
353   }
354 #elif LJ_TARGET_POSIX
355   mprotect(p, sz, (PROT_READ|PROT_EXEC));
356 #endif
357 }
358 
359 /* Free area for callback function pointers. */
lj_ccallback_mcode_free(CTState * cts)360 void lj_ccallback_mcode_free(CTState *cts)
361 {
362   size_t sz = (size_t)CALLBACK_MCODE_SIZE;
363   void *p = cts->cb.mcode;
364   if (p == NULL) return;
365 #if LJ_TARGET_WINDOWS
366   VirtualFree(p, 0, MEM_RELEASE);
367   UNUSED(sz);
368 #elif LJ_TARGET_POSIX
369   munmap(p, sz);
370 #else
371   lj_mem_free(cts->g, p, sz);
372 #endif
373 }
374 
375 /* -- C callback entry ---------------------------------------------------- */
376 
377 /* Target-specific handling of register arguments. Similar to lj_ccall.c. */
378 #if LJ_TARGET_X86
379 
380 #define CALLBACK_HANDLE_REGARG \
381   if (!isfp) {  /* Only non-FP values may be passed in registers. */ \
382     if (n > 1) {  /* Anything > 32 bit is passed on the stack. */ \
383       if (!LJ_ABI_WIN) ngpr = maxgpr;  /* Prevent reordering. */ \
384     } else if (ngpr + 1 <= maxgpr) { \
385       sp = &cts->cb.gpr[ngpr]; \
386       ngpr += n; \
387       goto done; \
388     } \
389   }
390 
391 #elif LJ_TARGET_X64 && LJ_ABI_WIN
392 
393 /* Windows/x64 argument registers are strictly positional (use ngpr). */
394 #define CALLBACK_HANDLE_REGARG \
395   if (isfp) { \
396     if (ngpr < maxgpr) { sp = &cts->cb.fpr[ngpr++]; UNUSED(nfpr); goto done; } \
397   } else { \
398     if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
399   }
400 
401 #elif LJ_TARGET_X64
402 
403 #define CALLBACK_HANDLE_REGARG \
404   if (isfp) { \
405     if (nfpr + n <= CCALL_NARG_FPR) { \
406       sp = &cts->cb.fpr[nfpr]; \
407       nfpr += n; \
408       goto done; \
409     } \
410   } else { \
411     if (ngpr + n <= maxgpr) { \
412       sp = &cts->cb.gpr[ngpr]; \
413       ngpr += n; \
414       goto done; \
415     } \
416   }
417 
418 #elif LJ_TARGET_ARM
419 
420 #if LJ_ABI_SOFTFP
421 
422 #define CALLBACK_HANDLE_REGARG_FP1	UNUSED(isfp);
423 #define CALLBACK_HANDLE_REGARG_FP2
424 
425 #else
426 
427 #define CALLBACK_HANDLE_REGARG_FP1 \
428   if (isfp) { \
429     if (n == 1) { \
430       if (fprodd) { \
431 	sp = &cts->cb.fpr[fprodd-1]; \
432 	fprodd = 0; \
433 	goto done; \
434       } else if (nfpr + 1 <= CCALL_NARG_FPR) { \
435 	sp = &cts->cb.fpr[nfpr++]; \
436 	fprodd = nfpr; \
437 	goto done; \
438       } \
439     } else { \
440       if (nfpr + 1 <= CCALL_NARG_FPR) { \
441 	sp = &cts->cb.fpr[nfpr++]; \
442 	goto done; \
443       } \
444     } \
445     fprodd = 0;  /* No reordering after the first FP value is on stack. */ \
446   } else {
447 
448 #define CALLBACK_HANDLE_REGARG_FP2	}
449 
450 #endif
451 
452 #define CALLBACK_HANDLE_REGARG \
453   CALLBACK_HANDLE_REGARG_FP1 \
454   if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
455   if (ngpr + n <= maxgpr) { \
456     sp = &cts->cb.gpr[ngpr]; \
457     ngpr += n; \
458     goto done; \
459   } CALLBACK_HANDLE_REGARG_FP2
460 
461 #elif LJ_TARGET_ARM64
462 
463 #define CALLBACK_HANDLE_REGARG \
464   if (isfp) { \
465     if (nfpr + n <= CCALL_NARG_FPR) { \
466       sp = &cts->cb.fpr[nfpr]; \
467       nfpr += n; \
468       goto done; \
469     } else { \
470       nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
471     } \
472   } else { \
473     if (!LJ_TARGET_OSX && n > 1) \
474       ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
475     if (ngpr + n <= maxgpr) { \
476       sp = &cts->cb.gpr[ngpr]; \
477       ngpr += n; \
478       goto done; \
479     } else { \
480       ngpr = CCALL_NARG_GPR;  /* Prevent reordering. */ \
481     } \
482   }
483 
484 #elif LJ_TARGET_PPC
485 
486 #define CALLBACK_HANDLE_GPR \
487   if (n > 1) { \
488     lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) ||  /* double. */ \
489 		 ctype_isinteger(cta->info)) && n == 2,  /* int64_t. */ \
490 		 "bad GPR type"); \
491     ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
492   } \
493   if (ngpr + n <= maxgpr) { \
494     sp = &cts->cb.gpr[ngpr]; \
495     ngpr += n; \
496     goto done; \
497   }
498 
499 #if LJ_ABI_SOFTFP
500 #define CALLBACK_HANDLE_REGARG \
501   CALLBACK_HANDLE_GPR \
502   UNUSED(isfp);
503 #else
504 #define CALLBACK_HANDLE_REGARG \
505   if (isfp) { \
506     if (nfpr + 1 <= CCALL_NARG_FPR) { \
507       sp = &cts->cb.fpr[nfpr++]; \
508       cta = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
509       goto done; \
510     } \
511   } else {  /* Try to pass argument in GPRs. */ \
512     CALLBACK_HANDLE_GPR \
513   }
514 #endif
515 
516 #if !LJ_ABI_SOFTFP
517 #define CALLBACK_HANDLE_RET \
518   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
519     *(double *)dp = *(float *)dp;  /* FPRs always hold doubles. */
520 #endif
521 
522 #elif LJ_TARGET_MIPS32
523 
524 #define CALLBACK_HANDLE_GPR \
525   if (n > 1) ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
526   if (ngpr + n <= maxgpr) { \
527     sp = &cts->cb.gpr[ngpr]; \
528     ngpr += n; \
529     goto done; \
530   }
531 
532 #if !LJ_ABI_SOFTFP	/* MIPS32 hard-float */
533 #define CALLBACK_HANDLE_REGARG \
534   if (isfp && nfpr < CCALL_NARG_FPR) {  /* Try to pass argument in FPRs. */ \
535     sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
536     nfpr++; ngpr += n; \
537     goto done; \
538   } else {  /* Try to pass argument in GPRs. */ \
539     nfpr = CCALL_NARG_FPR; \
540     CALLBACK_HANDLE_GPR \
541   }
542 #else			/* MIPS32 soft-float */
543 #define CALLBACK_HANDLE_REGARG \
544   CALLBACK_HANDLE_GPR \
545   UNUSED(isfp);
546 #endif
547 
548 #define CALLBACK_HANDLE_RET \
549   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
550     ((float *)dp)[1] = *(float *)dp;
551 
552 #elif LJ_TARGET_MIPS64
553 
554 #if !LJ_ABI_SOFTFP	/* MIPS64 hard-float */
555 #define CALLBACK_HANDLE_REGARG \
556   if (ngpr + n <= maxgpr) { \
557     sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
558     ngpr += n; \
559     goto done; \
560   }
561 #else			/* MIPS64 soft-float */
562 #define CALLBACK_HANDLE_REGARG \
563   if (ngpr + n <= maxgpr) { \
564     UNUSED(isfp); \
565     sp = (void*) &cts->cb.gpr[ngpr]; \
566     ngpr += n; \
567     goto done; \
568   }
569 #endif
570 
571 #define CALLBACK_HANDLE_RET \
572   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
573     ((float *)dp)[1] = *(float *)dp;
574 
575 #elif LJ_TARGET_S390X
576 
577 #define CALLBACK_HANDLE_REGARG \
578   if (isfp) { \
579     if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
580   } else { \
581     if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
582   }
583 
584 #else
585 #error "Missing calling convention definitions for this architecture"
586 #endif
587 
588 /* Convert and push callback arguments to Lua stack. */
callback_conv_args(CTState * cts,lua_State * L)589 static void callback_conv_args(CTState *cts, lua_State *L)
590 {
591   TValue *o = L->top;
592   intptr_t *stack = cts->cb.stack;
593   MSize slot = cts->cb.slot;
594   CTypeID id = 0, rid, fid;
595   int gcsteps = 0;
596   CType *ct;
597   GCfunc *fn;
598   int fntp;
599   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
600 #if CCALL_NARG_FPR
601   MSize nfpr = 0;
602 #if LJ_TARGET_ARM
603   MSize fprodd = 0;
604 #endif
605 #endif
606 
607   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
608     ct = ctype_get(cts, id);
609     rid = ctype_cid(ct->info);  /* Return type. x86: +(spadj<<16). */
610     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
611     fntp = LJ_TFUNC;
612   } else {  /* Must set up frame first, before throwing the error. */
613     ct = NULL;
614     rid = 0;
615     fn = (GCfunc *)L;
616     fntp = LJ_TTHREAD;
617   }
618   /* Continuation returns from callback. */
619   if (LJ_FR2) {
620     (o++)->u64 = LJ_CONT_FFI_CALLBACK;
621     (o++)->u64 = rid;
622   } else {
623     o->u32.lo = LJ_CONT_FFI_CALLBACK;
624     o->u32.hi = rid;
625     o++;
626   }
627   setframe_gc(o, obj2gco(fn), fntp);
628   if (LJ_FR2) o++;
629   setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
630   L->top = L->base = ++o;
631   if (!ct)
632     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
633   if (isluafunc(fn))
634     setcframe_pc(L->cframe, proto_bc(funcproto(fn))+1);
635   lj_state_checkstack(L, LUA_MINSTACK);  /* May throw. */
636   o = L->base;  /* Might have been reallocated. */
637 
638 #if LJ_TARGET_X86
639   /* x86 has several different calling conventions. */
640   switch (ctype_cconv(ct->info)) {
641   case CTCC_FASTCALL: maxgpr = 2; break;
642   case CTCC_THISCALL: maxgpr = 1; break;
643   default: maxgpr = 0; break;
644   }
645 #endif
646 
647   fid = ct->sib;
648   while (fid) {
649     CType *ctf = ctype_get(cts, fid);
650     if (!ctype_isattrib(ctf->info)) {
651       CType *cta;
652       void *sp;
653       CTSize sz;
654       int isfp;
655       MSize n;
656       lj_assertCTS(ctype_isfield(ctf->info), "field expected");
657       cta = ctype_rawchild(cts, ctf);
658       isfp = ctype_isfp(cta->info);
659       sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
660       n = sz / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
661 
662       CALLBACK_HANDLE_REGARG  /* Handle register arguments. */
663 
664       /* Otherwise pass argument on stack. */
665       if (CCALL_ALIGN_STACKARG && LJ_32 && sz == 8)
666 	nsp = (nsp + 1) & ~1u;  /* Align 64 bit argument on stack. */
667       sp = &stack[nsp];
668       nsp += n;
669 
670     done:
671       if (LJ_BE && cta->size < CTSIZE_PTR
672 #if LJ_TARGET_MIPS64
673 	  && !(isfp && nsp)
674 #endif
675 	 )
676 	sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
677       gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
678     }
679     fid = ctf->sib;
680   }
681   L->top = o;
682 #if LJ_TARGET_X86
683   /* Store stack adjustment for returns from non-cdecl callbacks. */
684   if (ctype_cconv(ct->info) != CTCC_CDECL) {
685 #if LJ_FR2
686     (L->base-3)->u64 |= (nsp << (16+2));
687 #else
688     (L->base-2)->u32.hi |= (nsp << (16+2));
689 #endif
690   }
691 #endif
692   while (gcsteps-- > 0)
693     lj_gc_check(L);
694 }
695 
696 /* Convert Lua object to callback result. */
callback_conv_result(CTState * cts,lua_State * L,TValue * o)697 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
698 {
699 #if LJ_FR2
700   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
701 #else
702   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
703 #endif
704 #if LJ_TARGET_X86
705   cts->cb.gpr[2] = 0;
706 #endif
707   if (!ctype_isvoid(ctr->info)) {
708     uint8_t *dp = (uint8_t *)&cts->cb.gpr[0];
709 #if CCALL_NUM_FPR
710     if (ctype_isfp(ctr->info))
711       dp = (uint8_t *)&cts->cb.fpr[0];
712 #endif
713 #if LJ_TARGET_ARM64 && LJ_BE
714     if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
715       dp = (uint8_t *)&cts->cb.fpr[0].f[1];
716 #endif
717     lj_cconv_ct_tv(cts, ctr, dp, o, 0);
718 #ifdef CALLBACK_HANDLE_RET
719     CALLBACK_HANDLE_RET
720 #endif
721     /* Extend returned integers to (at least) 32 bits. */
722     if (ctype_isinteger_or_bool(ctr->info) && ctr->size < 4) {
723       if (ctr->info & CTF_UNSIGNED)
724 	*(uint32_t *)dp = ctr->size == 1 ? (uint32_t)*(uint8_t *)dp :
725 					   (uint32_t)*(uint16_t *)dp;
726       else
727 	*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
728 					  (int32_t)*(int16_t *)dp;
729     }
730 #if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
731     if (ctr->size <= 4 &&
732        (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
733       if (ctr->info & CTF_UNSIGNED)
734         *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
735       else
736         *(int64_t *)dp = (int64_t)*(int32_t *)dp;
737     }
738 #endif
739 #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
740     /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
741     if (ctr->size <= 4 &&
742 	(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
743       *(int64_t *)dp = (int64_t)*(int32_t *)dp;
744 #endif
745 #if LJ_TARGET_X86
746     if (ctype_isfp(ctr->info))
747       cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
748 #endif
749   }
750 }
751 
752 /* Enter callback. */
lj_ccallback_enter(CTState * cts,void * cf)753 lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
754 {
755   lua_State *L = cts->L;
756   global_State *g = cts->g;
757   lj_assertG(L != NULL, "uninitialized cts->L in callback");
758   if (tvref(g->jit_base)) {
759     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
760     if (g->panic) g->panic(L);
761     exit(EXIT_FAILURE);
762   }
763   lj_trace_abort(g);  /* Never record across callback. */
764   /* Setup C frame. */
765   cframe_prev(cf) = L->cframe;
766   setcframe_L(cf, L);
767   cframe_errfunc(cf) = -1;
768   cframe_nres(cf) = 0;
769   L->cframe = cf;
770   callback_conv_args(cts, L);
771   return L;  /* Now call the function on this stack. */
772 }
773 
774 /* Leave callback. */
lj_ccallback_leave(CTState * cts,TValue * o)775 void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
776 {
777   lua_State *L = cts->L;
778   GCfunc *fn;
779   TValue *obase = L->base;
780   L->base = L->top;  /* Keep continuation frame for throwing errors. */
781   if (o >= L->base) {
782     /* PC of RET* is lost. Point to last line for result conv. errors. */
783     fn = curr_func(L);
784     if (isluafunc(fn)) {
785       GCproto *pt = funcproto(fn);
786       setcframe_pc(L->cframe, proto_bc(pt)+pt->sizebc+1);
787     }
788   }
789   callback_conv_result(cts, L, o);
790   /* Finally drop C frame and continuation frame. */
791   L->top -= 2+2*LJ_FR2;
792   L->base = obase;
793   L->cframe = cframe_prev(L->cframe);
794   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
795 }
796 
797 /* -- C callback management ----------------------------------------------- */
798 
799 /* Get an unused slot in the callback slot table. */
callback_slot_new(CTState * cts,CType * ct)800 static MSize callback_slot_new(CTState *cts, CType *ct)
801 {
802   CTypeID id = ctype_typeid(cts, ct);
803   CTypeID1 *cbid = cts->cb.cbid;
804   MSize top;
805   for (top = cts->cb.topid; top < cts->cb.sizeid; top++)
806     if (LJ_LIKELY(cbid[top] == 0))
807       goto found;
808 #if CALLBACK_MAX_SLOT
809   if (top >= CALLBACK_MAX_SLOT)
810 #endif
811     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
812   if (!cts->cb.mcode)
813     callback_mcode_new(cts);
814   lj_mem_growvec(cts->L, cbid, cts->cb.sizeid, CALLBACK_MAX_SLOT, CTypeID1);
815   cts->cb.cbid = cbid;
816   memset(cbid+top, 0, (cts->cb.sizeid-top)*sizeof(CTypeID1));
817 found:
818   cbid[top] = id;
819   cts->cb.topid = top+1;
820   return top;
821 }
822 
823 /* Check for function pointer and supported argument/result types. */
callback_checkfunc(CTState * cts,CType * ct)824 static CType *callback_checkfunc(CTState *cts, CType *ct)
825 {
826   int narg = 0;
827   if (!ctype_isptr(ct->info) || (LJ_64 && ct->size != CTSIZE_PTR))
828     return NULL;
829   ct = ctype_rawchild(cts, ct);
830   if (ctype_isfunc(ct->info)) {
831     CType *ctr = ctype_rawchild(cts, ct);
832     CTypeID fid = ct->sib;
833     if (!(ctype_isvoid(ctr->info) || ctype_isenum(ctr->info) ||
834 	  ctype_isptr(ctr->info) || (ctype_isnum(ctr->info) && ctr->size <= 8)))
835       return NULL;
836     if ((ct->info & CTF_VARARG))
837       return NULL;
838     while (fid) {
839       CType *ctf = ctype_get(cts, fid);
840       if (!ctype_isattrib(ctf->info)) {
841 	CType *cta;
842 	lj_assertCTS(ctype_isfield(ctf->info), "field expected");
843 	cta = ctype_rawchild(cts, ctf);
844 	if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
845 	      (ctype_isnum(cta->info) && cta->size <= 8)) ||
846 	    ++narg >= LUA_MINSTACK-3)
847 	  return NULL;
848       }
849       fid = ctf->sib;
850     }
851     return ct;
852   }
853   return NULL;
854 }
855 
856 /* Create a new callback and return the callback function pointer. */
lj_ccallback_new(CTState * cts,CType * ct,GCfunc * fn)857 void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn)
858 {
859   ct = callback_checkfunc(cts, ct);
860   if (ct) {
861     MSize slot = callback_slot_new(cts, ct);
862     GCtab *t = cts->miscmap;
863     setfuncV(cts->L, lj_tab_setint(cts->L, t, (int32_t)slot), fn);
864     lj_gc_anybarriert(cts->L, t);
865     return callback_slot2ptr(cts, slot);
866   }
867   return NULL;  /* Bad conversion. */
868 }
869 
870 #endif
871