1 #include "schpriv.h"
2 #include "schmach.h"
3 #include "future.h"
4 
5 #ifdef MZ_USE_JIT
6 
7 #include "jit.h"
8 
9 #ifdef USE_FLONUM_UNBOXING
10 static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Lambda *lam,
11                                     int num_rands, int args_already_in_place,
12                                     int offset, int direct_flostack_offset,
13                                     int save_reg,
14                                     /* used only to skip unneeded checks: */
15                                     Scheme_App_Rec *app, Scheme_Object **alt_rands);
16 #endif
17 
18 static int detect_unsafe_struct_refs(Scheme_Object *arg, Scheme_Object **alt_rands, Scheme_App_Rec *app,
19                                      int i, int num_rands, int shift);
20 static int generate_unsafe_struct_ref_sequence(mz_jit_state *jitter, Scheme_Object *arg, Scheme_Object *last_arg,
21                                                int count, int stack_pos);
22 
23 int scheme_direct_call_count, scheme_indirect_call_count;
24 
25 struct jit_direct_arg {
26   int gen;
27   int reg;
28 };
29 
30 THREAD_LOCAL_DECL(static Scheme_Object **fixup_runstack_base);
31 THREAD_LOCAL_DECL(static int fixup_already_in_place);
32 
clear_runstack(Scheme_Object ** rs,intptr_t amt,Scheme_Object * sv)33 static Scheme_Object *clear_runstack(Scheme_Object **rs, intptr_t amt, Scheme_Object *sv)
34 {
35   int i;
36   for (i = 0; i < amt; i++) {
37     rs[i] = NULL;
38   }
39   return sv;
40 }
41 
42 #define JITCALL_TS_PROCS
43 #define JIT_APPLY_TS_PROCS
44 #include "jit_ts.c"
45 
46 /*========================================================================*/
47 /*                         application codegen                            */
48 /*========================================================================*/
49 
generate_proc_struct_retry(mz_jit_state * jitter,int num_rands,GC_CAN_IGNORE jit_insn * refagain)50 static jit_insn *generate_proc_struct_retry(mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *refagain)
51 {
52   GC_CAN_IGNORE jit_insn *ref2, *ref3, *refz1, *refz2, *refz3, *refy3, *refz4, *refz5;
53   GC_CAN_IGNORE jit_insn *refz6, *refz7, *refz8, *refz9, *ref9, *ref10;
54 
55   ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_proc_struct_type);
56 
57   /* This is an applicable struct. But if it's for reducing arity,
58      then we can't just apply the struct's procedure. */
59   jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Structure *)0x0)->stype);
60   jit_ldi_p(JIT_R2, &scheme_reduced_procedure_struct);
61   if (num_rands <= SCHEME_MAX_FAST_ARITY_CHECK) {
62     ref3 = jit_bner_p(jit_forward(), JIT_R1, JIT_R2);
63 
64     /* Matches reduced arity in a simple way? */
65     jit_ldxi_p(JIT_R2, JIT_V1, &((Scheme_Structure *)0x0)->slots[1]);
66     refy3 = jit_bmci_l(jit_forward(), JIT_R2, 1); /* not a fixnum? */
67     refz3 = jit_bmci_l(jit_forward(), JIT_R2, (1 << (num_rands + 1)));
68 
69     /* Yes, matches */
70     mz_patch_branch(ref3);
71   } else {
72     /* Too many arguments for fast check, so assume it desn't match */
73     refz3 = jit_beqr_p(jit_forward(), JIT_R1, JIT_R2);
74     refy3 = NULL;
75   }
76 
77   /* It's an applicable struct that is not an arity reduce or the
78      arity matches. We can extract the procedure if it's in a field: */
79   jit_ldxi_p(JIT_R1, JIT_R1, &((Scheme_Struct_Type *)0x0)->proc_attr);
80   refz1 = jit_bmci_i(jit_forward(), JIT_R1, 0x1);
81   CHECK_LIMIT();
82 
83   /* Proc is a field in the record */
84   jit_rshi_ul(JIT_R1, JIT_R1, 1);
85   jit_lshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
86   jit_addi_p(JIT_R1, JIT_R1, &((Scheme_Structure *)0x0)->slots);
87   jit_ldxr_p(JIT_R1, JIT_V1, JIT_R1);
88   CHECK_LIMIT();
89 
90   /* JIT_R1 now has the wrapped procedure */
91   refz4 = jit_bmsi_i(jit_forward(), JIT_R1, 0x1);
92   refz2 = mz_bnei_t(jit_forward(), JIT_R1, scheme_native_closure_type, JIT_R2);
93   CHECK_LIMIT();
94 
95   /* It's a native closure, but we can't just jump to it, in case
96      the arity is wrong and an error needs to be reported using
97      the original wrapper. */
98   mz_prepare(2);
99   jit_movi_i(JIT_R0, num_rands);
100   jit_pusharg_i(JIT_R0); /* argc */
101   jit_pusharg_p(JIT_R1); /* closure */
102   (void)mz_finish_unsynced_runstack(scheme_native_arity_check);
103   CHECK_LIMIT();
104   jit_retval(JIT_R0);
105   refz5 = jit_beqi_i(jit_forward(), JIT_R0, 0);
106   CHECK_LIMIT();
107 
108   /* Extract proc again, then loop */
109   jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Structure *)0x0)->stype);
110   jit_ldxi_p(JIT_R1, JIT_R1, &((Scheme_Struct_Type *)0x0)->proc_attr);
111   jit_rshi_ul(JIT_R1, JIT_R1, 1);
112   jit_lshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
113   jit_addi_p(JIT_R1, JIT_R1, &((Scheme_Structure *)0x0)->slots);
114   jit_ldxr_p(JIT_V1, JIT_V1, JIT_R1);
115   (void)jit_jmpi(refagain);
116   CHECK_LIMIT();
117 
118   mz_patch_branch(ref2);
119   /* check for a procedure impersonator that just keeps properties
120      or is the result of unsafe-{impersonate,chaperone}-procedure */
121   ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_proc_chaperone_type);
122   jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Chaperone *)0x0)->redirects);
123   refz6 = mz_bnei_t(jit_forward(), JIT_R1, scheme_vector_type, JIT_R2);
124   (void)jit_ldxi_l(JIT_R2, JIT_R1, &SCHEME_VEC_SIZE(0x0));
125   refz7 = jit_bmci_i(jit_forward(), JIT_R2, 0x1);
126   /* Flag is set for a property-only or unsafe chaperone: */
127   jit_ldxi_s(JIT_R2, JIT_V1, &SCHEME_CHAPERONE_FLAGS(((Scheme_Chaperone *)0x0)));
128   refz8 = jit_bmci_ul(jit_forward(), JIT_R2, SCHEME_PROC_CHAPERONE_CALL_DIRECT);
129   /* In the case of an unsafe chaperone, we can only make a direct
130      call if the arity-check will succeed, otherwise the error message
131      will use the wrong name. */
132   jit_ldxi_p(JIT_R2, JIT_R1, &(SCHEME_VEC_ELS(0x0)[1]));
133   ref9 = jit_beqi_p(jit_forward(), JIT_R2, scheme_false);
134   refz9 = jit_bnei_p(jit_forward(), JIT_R2, scheme_make_integer(num_rands));
135   mz_patch_branch(ref9);
136   CHECK_LIMIT();
137   /* If the vector is immutable, we need to provide the self proc,
138      if it's not provided already. The self proc is supplied through
139      a side channel in the thread record. */
140   jit_ldxi_s(JIT_R2, JIT_R1, &MZ_OPT_HASH_KEY((Scheme_Inclhash_Object *)(0x0)));
141   ref9 = jit_bmci_i(jit_forward(), JIT_R2, 0x1);
142   (void)mz_tl_ldi_p(JIT_R2, tl_scheme_current_thread);
143   jit_ldxi_l(JIT_R1, JIT_R2, &((Scheme_Thread *)0x0)->self_for_proc_chaperone);
144   ref10 = jit_bnei_p(jit_forward(), JIT_R1, NULL);
145   jit_stxi_l(&((Scheme_Thread *)0x0)->self_for_proc_chaperone, JIT_R2, JIT_V1);
146   mz_patch_branch(ref10);
147   jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Chaperone *)0x0)->redirects);
148   mz_patch_branch(ref9);
149   /* Position [0] in SCHEME_VEC_ELS contains either the
150      unwrapped function (if chaperone-procedure got #f
151      for the proc argument) or the unsafe-chaperone
152      replacement-proc argument; either way, just call it */
153   jit_ldxi_p(JIT_V1, JIT_R1, &(SCHEME_VEC_ELS(0x0)[0]));
154   (void)jit_jmpi(refagain);
155   CHECK_LIMIT();
156 
157   mz_patch_branch(refz1);
158   mz_patch_branch(refz2);
159   mz_patch_branch(refz3);
160   if (refy3)
161     mz_patch_branch(refy3);
162   mz_patch_branch(refz4);
163   mz_patch_branch(refz5);
164   mz_patch_branch(refz6);
165   mz_patch_branch(refz7);
166   mz_patch_branch(refz8);
167   mz_patch_branch(refz9);
168 
169   return ref2;
170 }
171 
172 #ifdef INSTRUMENT_PRIMITIVES
173 extern int g_print_prims;
174 #endif
175 
176 /* Support for intercepting direct calls to primitives: */
177 #ifdef MZ_USE_FUTURES
178 
scheme_noncm_prim_indirect(Scheme_Prim proc,int argc)179 Scheme_Object *scheme_noncm_prim_indirect(Scheme_Prim proc, int argc)
180   XFORM_SKIP_PROC
181 {
182   if (scheme_use_rtcall)
183     return scheme_rtcall_iS_s("[prim_indirect]",
184                               FSRC_PRIM,
185                               proc,
186                               argc,
187                               MZ_RUNSTACK);
188   else
189     return proc(argc, MZ_RUNSTACK);
190 }
191 
scheme_prim_indirect(Scheme_Primitive_Closure_Proc proc,int argc,Scheme_Object * self)192 Scheme_Object *scheme_prim_indirect(Scheme_Primitive_Closure_Proc proc, int argc, Scheme_Object *self)
193   XFORM_SKIP_PROC
194 {
195   if (scheme_use_rtcall)
196     return scheme_rtcall_iSs_s("[prim_indirect]", FSRC_PRIM, proc, argc, MZ_RUNSTACK, self);
197   else
198     return proc(argc, MZ_RUNSTACK, self);
199 }
200 
201 #endif
202 
203 /* Various specific 'futurized' versions of primitives that may
204    be invoked directly from JIT code and are not considered thread-safe
205    (are not invoked via apply_multi_from_native, etc.) */
206 
207 #ifdef MZ_USE_FUTURES
ts__scheme_tail_apply_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)208 static Scheme_Object *ts__scheme_tail_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
209   XFORM_SKIP_PROC
210 {
211   if (scheme_use_rtcall) {
212     /* try thread-local allocation: */
213     Scheme_Object **a;
214 #ifdef MZ_PRECISE_GC
215     if (scheme_future_is_runtime_thread())
216       a = NULL;
217     else
218       a = MALLOC_N(Scheme_Object *, argc);
219 #else
220     a = NULL; /* future-local allocation is not supported */
221 #endif
222     if (a) {
223       Scheme_Thread *p = scheme_current_thread;
224       memcpy(a, argv, argc * sizeof(Scheme_Object*));
225       p->ku.apply.tail_rator = rator;
226       p->ku.apply.tail_num_rands = argc;
227       p->ku.apply.tail_rands = a;
228       return SCHEME_TAIL_CALL_WAITING;
229     } else
230       return scheme_rtcall_tail_apply(rator, argc, argv);
231   } else
232     return _scheme_tail_apply_from_native(rator, argc, argv);
233 }
234 #else
235 # define ts__scheme_tail_apply_from_native _scheme_tail_apply_from_native
236 #endif
237 
_scheme_tail_apply_from_native_fixup_args(Scheme_Object * rator,int argc,Scheme_Object ** argv)238 static Scheme_Object *_scheme_tail_apply_from_native_fixup_args(Scheme_Object *rator,
239                                                                 int argc,
240                                                                 Scheme_Object **argv)
241   XFORM_SKIP_PROC
242 {
243   int already = fixup_already_in_place, i;
244   Scheme_Object **base;
245 
246   base = fixup_runstack_base XFORM_OK_MINUS argc XFORM_OK_MINUS already;
247 
248   /* Need to shift argc to end of base: */
249   for (i = 0; i < argc; i++) {
250     base[already + i] = argv[i];
251   }
252 
253   /* In futures mode, it's important that the argument array matches
254      runstack: */
255   MZ_RUNSTACK = base;
256 
257   return ts__scheme_tail_apply_from_native(rator, argc + already, base);
258 }
259 
260 #if defined(MZ_USE_FUTURES) && defined(MZ_PRECISE_GC)
261 
try_future_local_stack_overflow(Scheme_Object * rator,int argc,Scheme_Object ** argv,int multi)262 static Scheme_Object *try_future_local_stack_overflow(Scheme_Object *rator, int argc, Scheme_Object **argv, int multi)
263   XFORM_SKIP_PROC
264 {
265   if (SAME_TYPE(SCHEME_TYPE(rator), scheme_native_closure_type)
266       && scheme_can_apply_native_in_future(rator)) {
267     /* the only reason to get here is stack overflow,
268        either for the runstack or C stack */
269     return scheme_rtcall_apply_with_new_stack(rator, argc, argv, multi);
270   } else if (multi)
271     return ts__scheme_apply_multi_from_native(rator, argc, argv);
272   else
273     return ts__scheme_apply_from_native(rator, argc, argv);
274 }
275 
x_ts__scheme_apply_multi_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)276 static Scheme_Object *x_ts__scheme_apply_multi_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
277   XFORM_SKIP_PROC
278 {
279   if (scheme_use_rtcall)
280     return try_future_local_stack_overflow(rator, argc, argv, 1);
281   else
282     return _scheme_apply_multi_from_native(rator, argc, argv);
283 }
284 
x_ts__scheme_apply_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)285 static Scheme_Object *x_ts__scheme_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
286   XFORM_SKIP_PROC
287 {
288   if (scheme_use_rtcall)
289     return try_future_local_stack_overflow(rator, argc, argv, 0);
290   else
291     return _scheme_apply_from_native(rator, argc, argv);
292 }
293 
294 #else
295 # define x_ts__scheme_apply_multi_from_native ts__scheme_apply_multi_from_native
296 # define x_ts__scheme_apply_from_native ts__scheme_apply_from_native
297 #endif
298 
generate_pause_for_gc_and_retry(mz_jit_state * jitter,int in_short_jumps,int gc_reg,GC_CAN_IGNORE jit_insn * refagain)299 static int generate_pause_for_gc_and_retry(mz_jit_state *jitter,
300                                            int in_short_jumps,
301                                            int gc_reg, /* must not be JIT_R1 */
302                                            GC_CAN_IGNORE jit_insn *refagain)
303 {
304 #ifdef MZ_USE_FUTURES
305   GC_CAN_IGNORE jit_insn *refslow = 0, *refpause;
306   int i;
307 
308   mz_rs_sync();
309 
310   /* expose gc_reg to GC */
311   mz_tl_sti_p(tl_jit_future_storage, gc_reg, JIT_R1);
312 
313   /* Save non-preserved registers. Use a multiple of 4 to avoid
314      alignment problems. */
315   jit_pushr_l(JIT_R1);
316   jit_pushr_l(JIT_R2);
317   jit_pushr_l(JIT_R0);
318   jit_pushr_l(JIT_R0);
319   CHECK_LIMIT();
320 
321   mz_tl_ldi_i(JIT_R0, tl_scheme_future_need_gc_pause);
322   refpause = jit_bgti_i(jit_forward(), JIT_R0, 0);
323 
324   for (i = 0; i < 2; i++) {
325     /* Restore non-preserved registers, and also move the gc-exposed
326        register back. */
327     if (i == 1) {
328       mz_patch_branch(refpause);
329       JIT_UPDATE_THREAD_RSPTR();
330       jit_prepare(0);
331       mz_finish(scheme_future_gc_pause);
332     }
333     jit_popr_l(JIT_R0);
334     jit_popr_l(JIT_R0);
335     jit_popr_l(JIT_R2);
336     CHECK_LIMIT();
337     mz_tl_ldi_p(gc_reg, tl_jit_future_storage);
338     jit_movi_p(JIT_R1, NULL);
339     mz_tl_sti_p(tl_jit_future_storage, JIT_R1, JIT_R2);
340     jit_popr_l(JIT_R1);
341     CHECK_LIMIT();
342     if (!i)
343       refslow = jit_jmpi(jit_forward());
344     else
345       (void)jit_jmpi(refagain);
346   }
347 
348   mz_patch_ucbranch(refslow);
349 
350   return 1;
351 #else
352   return 1;
353 #endif
354 }
355 
generate_direct_prim_tail_call(mz_jit_state * jitter,int num_rands)356 static int generate_direct_prim_tail_call(mz_jit_state *jitter, int num_rands)
357 {
358   /* JIT_V1 must have the target function pointer.
359      Also, scheme_current_runstack must be up-to-date...
360      unless num-rands == 1, in which case JIT_R0 must
361      have the argument. */
362   if (num_rands == 1) {
363     jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
364     CHECK_RUNSTACK_OVERFLOW();
365     jit_str_p(JIT_RUNSTACK, JIT_R0);
366     JIT_UPDATE_THREAD_RSPTR();
367   }
368   jit_movi_i(JIT_R1, num_rands);
369   mz_prepare_direct_prim(2); /* a prim takes 3 args, but a NONCM prim ignores the 3rd */
370   CHECK_LIMIT();
371   {
372     /* May use JIT_R0 and create local branch: */
373     mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
374                             jit_pusharg_i(JIT_R1),
375                             JIT_V1, scheme_noncm_prim_indirect);
376   }
377   CHECK_LIMIT();
378   /*  Return: */
379   mz_pop_threadlocal();
380   mz_pop_locals();
381   jit_ret();
382 
383   return 1;
384 }
385 
386 #define NUM_AVAIL_DIRECT_ARG_REGS 3
387 static const int direct_arg_regs[] = { JIT_V1, JIT_R1, JIT_R0 };
388 
scheme_generate_tail_call(mz_jit_state * jitter,int num_rands,int direct_native,int need_set_rs,int is_inline,Scheme_Native_Closure * direct_to_code,jit_direct_arg * direct_args,Scheme_Lambda * direct_lam)389 int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
390                               int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_args,
391                               Scheme_Lambda *direct_lam)
392 /* Proc is in V1 unless direct_to_code, args are at RUNSTACK.
393    If num_rands < 0, then argc is in LOCAL2 and arguments are already below RUNSTACK_BASE.
394    If direct_native == 2, then some arguments are already in place (shallower in the runstack
395    than the arguments to move).
396    If direct_args, then R0, R1, V1 hold arguments.
397    If direct lam in unboxing mode, slow path needs to box flonum arguments; num_rands
398      must be >= 0 */
399 {
400   int i, r2_has_runstack = 0;
401   GC_CAN_IGNORE jit_insn *top_refagain, *refagain, *ref, *ref2, *ref4, *ref5;
402 
403   __START_SHORT_JUMPS__(num_rands < 100);
404 
405   top_refagain = jit_get_ip();
406 
407   /* First, try fast direct jump to native code: */
408   if (!direct_native) {
409     ref = jit_bmsi_ul(jit_forward(), JIT_V1, 0x1);
410     jit_ldxi_s(JIT_R1, JIT_V1, &((Scheme_Object *)0x0)->type);
411     ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_native_closure_type);
412     /* code at ref2 uses JIT_R1 */
413     CHECK_LIMIT();
414   } else {
415     ref = ref2 = NULL;
416   }
417 
418   refagain = jit_get_ip();
419 
420   /* Right kind of function. Extract data and check stack depth: */
421   if (!direct_to_code) {
422     jit_ldxi_p(JIT_R0, JIT_V1, &((Scheme_Native_Closure *)0x0)->code);
423     jit_ldxi_i(JIT_R2, JIT_R0, &((Scheme_Native_Lambda *)0x0)->max_let_depth);
424     mz_tl_ldi_p(JIT_R1, tl_MZ_RUNSTACK_START);
425     jit_subr_ul(JIT_R1, JIT_RUNSTACK, JIT_R1);
426     ref4 = jit_bltr_ul(jit_forward(), JIT_R1, JIT_R2);
427     CHECK_LIMIT();
428   } else
429     ref4 = NULL;
430 
431   /* Fast jump ok (proc will check argc).
432      At this point, V1 = closure (unless direct_to_code) and R0 = code. */
433 
434   /* Check for thread swap: */
435   if (!direct_to_code) {
436     (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
437     ref5 = jit_blei_i(jit_forward(), JIT_R2, 0);
438 #ifndef FUEL_AUTODECEREMENTS
439     jit_subi_p(JIT_R2, JIT_R2, 0x1);
440     (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
441 #endif
442   } else
443     ref5 = NULL;
444   CHECK_LIMIT();
445 
446   /* Copy args to runstack base: */
447   if (num_rands >= 0) {
448     /* Fixed argc: */
449     if (num_rands) {
450       mz_ld_runstack_base_alt(JIT_R2);
451       jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands));
452       CHECK_RUNSTACK_OVERFLOW();
453       for (i = num_rands; i--; ) {
454         if (direct_args) {
455           int reg = direct_args[i].reg;
456           jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, reg);
457         } else {
458           jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(i));
459           jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, JIT_R1);
460         }
461         CHECK_LIMIT();
462       }
463       jit_movr_p(JIT_RUNSTACK, JIT_R2);
464       r2_has_runstack = 1;
465     } else {
466 #ifdef JIT_RUNSTACK_BASE
467       jit_movr_p(JIT_RUNSTACK, JIT_RUNSTACK_BASE);
468 #else
469       mz_get_local_p(JIT_RUNSTACK, JIT_RUNSTACK_BASE_LOCAL);
470 #endif
471     }
472     if (direct_native > 1) { /* => some_args_already_in_place */
473       mz_get_local_p(JIT_R1, JIT_LOCAL2);
474       jit_lshi_l(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
475       jit_subr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
476       r2_has_runstack = 0;
477       CHECK_RUNSTACK_OVERFLOW();
478     }
479   } else {
480     /* Variable argc (in LOCAL2):
481        arguments are already in place. */
482   }
483   /* RUNSTACK, RUNSTACK_BASE, V1 (unless direct_to_code), and R0 are ready */
484 
485   /* Extract function and data: */
486   if (!direct_to_code) {
487     jit_movr_p(JIT_R2, JIT_V1);
488     r2_has_runstack = 0;
489     if (direct_native) {
490       jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Lambda *)0x0)->u.tail_code);
491     } else {
492       jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Lambda *)0x0)->arity_code);
493     }
494     jit_movr_p(JIT_R0, JIT_R2);
495   }
496   /* Set up arguments; JIT_RUNSTACK and JIT_RUNSTACK_BASE must also be ready */
497   if (num_rands >= 0) {
498     jit_movi_i(JIT_R1, num_rands);
499     if (direct_native > 1) { /* => some_args_already_in_place */
500       mz_get_local_p(JIT_R2, JIT_LOCAL2);
501       jit_addr_i(JIT_R1, JIT_R1, JIT_R2);
502       r2_has_runstack = 0;
503     }
504   } else {
505     mz_get_local_p(JIT_R1, JIT_LOCAL2);
506   }
507   if (!r2_has_runstack)
508     jit_movr_p(JIT_R2, JIT_RUNSTACK);
509   if (need_set_rs && !direct_to_code) {
510     /* In case arity check fails, need to update runstack now: */
511     JIT_UPDATE_THREAD_RSPTR();
512   }
513   if (direct_native && direct_to_code) {
514     __END_SHORT_JUMPS__(num_rands < 100);
515     /* load closure pointer into R0: */
516     scheme_mz_load_retained(jitter, JIT_R0, direct_to_code);
517     /* jump directly: */
518     (void)jit_jmpi(direct_to_code->code->u.tail_code);
519     /* no slow path in this mode */
520     return 1;
521   }
522   /* Now jump: */
523   jit_jmpr(JIT_V1);
524   CHECK_LIMIT();
525 
526   if (!direct_native && !is_inline && (num_rands >= 0)) {
527     /* Handle simple applicable struct: */
528     mz_patch_branch(ref2);
529     /* uses JIT_R1: */
530     ref2 = generate_proc_struct_retry(jitter, num_rands, top_refagain);
531     CHECK_LIMIT();
532   }
533 
534   /* The slow way: */
535   /*  V1 and RUNSTACK must be intact! */
536   if (ref5)
537     mz_patch_branch(ref5);
538   generate_pause_for_gc_and_retry(jitter,
539                                   num_rands < 100,  /* in short jumps */
540                                   JIT_V1, /* expose V1 to GC */
541                                   refagain); /* retry code pointer */
542   CHECK_LIMIT();
543   if (!direct_native) {
544     mz_patch_branch(ref);
545     mz_patch_branch(ref2);
546   }
547   if (ref4)
548     mz_patch_branch(ref4);
549   CHECK_LIMIT();
550 #ifdef USE_FLONUM_UNBOXING
551   if (direct_lam) {
552     if (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS) {
553       /* Need to box flonum arguments. Flonums are currently in the place where
554          the target function expects them unpacked from arguments. We need to save
555          JIT_V1. */
556       generate_argument_boxing(jitter, direct_lam,
557                                num_rands, 0,
558                                0, 0,
559                                JIT_V1,
560                                NULL, NULL);
561       CHECK_LIMIT();
562       mz_rs_sync();
563       scheme_mz_flostack_restore(jitter, 0, 0, 1, 0);
564     }
565   }
566 #endif
567   if (need_set_rs) {
568     JIT_UPDATE_THREAD_RSPTR();
569   }
570   if (direct_native > 1) { /* => some_args_already_in_place */
571     /* Need to shuffle argument lists. Since we can pass only
572        three arguments, use static variables for the others. */
573     mz_ld_runstack_base_alt(JIT_R1);
574     mz_tl_sti_p(tl_fixup_runstack_base, JIT_RUNSTACK_BASE_OR_ALT(JIT_R1), JIT_R0);
575     mz_get_local_p(JIT_R1, JIT_LOCAL2);
576     mz_tl_sti_l(tl_fixup_already_in_place, JIT_R1, JIT_R0);
577   }
578   if (num_rands >= 0) {
579     jit_movi_i(JIT_R0, num_rands);
580   } else {
581     mz_get_local_p(JIT_R0, JIT_LOCAL2);
582   }
583   /* Since we've overwritten JIT_RUNSTACK, if this is not shared
584      code, and if this is 3m, then the runstack no longer
585      has a pointer to the closure for this code. To ensure that
586      an appropriate return point exists, jump to static code
587      for the rest. (This is the slow path, anyway.) */
588   __END_SHORT_JUMPS__(num_rands < 100);
589   if (direct_native > 1) {
590     (void)jit_jmpi(sjc.finish_tail_call_fixup_code);
591   } else {
592     (void)jit_jmpi(sjc.finish_tail_call_code);
593   }
594 
595   return 1;
596 }
597 
scheme_generate_force_value_same_mark(mz_jit_state * jitter)598 int scheme_generate_force_value_same_mark(mz_jit_state *jitter)
599 {
600   GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
601   (void)jit_movi_p(JIT_R0, SCHEME_TAIL_CALL_WAITING);
602   mz_prepare(1);
603   jit_pusharg_p(JIT_R0);
604   (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refr);
605   jit_retval(JIT_R0);
606   return 1;
607 }
608 
scheme_generate_finish_apply(mz_jit_state * jitter)609 int scheme_generate_finish_apply(mz_jit_state *jitter)
610 {
611   GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
612   (void)mz_finish_lwe(ts__scheme_apply_from_native, refr);
613   return 1;
614 }
615 
scheme_generate_finish_tail_apply(mz_jit_state * jitter)616 int scheme_generate_finish_tail_apply(mz_jit_state *jitter)
617 {
618   GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
619   (void)mz_finish_lwe(ts__scheme_tail_apply_from_native, refr);
620   return 1;
621 }
622 
scheme_generate_finish_multi_apply(mz_jit_state * jitter)623 int scheme_generate_finish_multi_apply(mz_jit_state *jitter)
624 {
625   GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
626   (void)mz_finish_lwe(ts__scheme_apply_multi_from_native, refr);
627   return 1;
628 }
629 
scheme_generate_finish_tail_call(mz_jit_state * jitter,int direct_native)630 int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native)
631 {
632   mz_prepare(3);
633   CHECK_LIMIT();
634   jit_pusharg_p(JIT_RUNSTACK);
635   jit_pusharg_i(JIT_R0);
636   jit_pusharg_p(JIT_V1);
637   if (direct_native > 1) { /* => some_args_already_in_place */
638     GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
639     (void)mz_finish_lwe(_scheme_tail_apply_from_native_fixup_args, refr);
640   } else {
641     GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
642     (void)mz_finish_lwe(ts__scheme_tail_apply_from_native, refr);
643   }
644   CHECK_LIMIT();
645   /* Return: */
646   mz_pop_threadlocal();
647   mz_pop_locals();
648   jit_ret();
649 
650   return 1;
651 }
652 
generate_direct_prim_non_tail_call(mz_jit_state * jitter,int num_rands,int multi_ok,int pop_and_jump)653 static int generate_direct_prim_non_tail_call(mz_jit_state *jitter, int num_rands, int multi_ok, int pop_and_jump)
654 {
655   /* See generate_prim_non_tail_call for assumptions. */
656 
657   if (pop_and_jump) {
658     mz_prolog(JIT_R1);
659   }
660 
661   if (num_rands == 1) {
662     jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
663     CHECK_RUNSTACK_OVERFLOW();
664     jit_str_p(JIT_RUNSTACK, JIT_R0);
665     JIT_UPDATE_THREAD_RSPTR();
666   }
667 
668   jit_movi_i(JIT_R1, num_rands);
669   mz_prepare_direct_prim(2); /* a prim takes 3 args, but a NONCM prim ignores the 3rd */
670   CHECK_LIMIT();
671   {
672     /* May use JIT_R0 and create local branch: */
673     mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
674                             jit_pusharg_i(JIT_R1),
675                             JIT_V1, scheme_noncm_prim_indirect);
676   }
677   CHECK_LIMIT();
678   jit_retval(JIT_R0);
679   VALIDATE_RESULT(JIT_R0);
680   /* No need to check for multi values or tail-call, because
681      we only use this for noncm primitives. */
682 
683   if (num_rands == 1) {
684     jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
685     jitter->need_set_rs = 1;
686   }
687 
688   if (pop_and_jump) {
689     mz_epilog(JIT_V1);
690   }
691 
692   return 1;
693 }
694 
generate_retry_call(mz_jit_state * jitter,int num_rands,int multi_ok,int result_ignored,GC_CAN_IGNORE jit_insn * reftop)695 static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok, int result_ignored,
696                                GC_CAN_IGNORE jit_insn *reftop)
697   /* If num_rands < 0, original argc is in V1, and we should
698      pop argc arguments off runstack before pushing more (unless num_rands == -3).
699      This function is called with short jumps enabled. */
700 {
701   GC_CAN_IGNORE jit_insn *ref, *ref2, *refloop;
702 
703   if (!reftop) {
704     int mo = (multi_ok
705               ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
706               : SHARED_SINGLE_VALUE_CASE);
707     reftop = sjc.shared_non_tail_retry_code[mo];
708   }
709 
710   /* Get new argc: */
711   (void)mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread);
712   jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
713   /* Thread is in R1. New argc is in R2. Old argc to cancel may be in V1. */
714 
715   /* Enough room on runstack? */
716   mz_tl_ldi_p(JIT_R0, tl_MZ_RUNSTACK_START);
717   jit_subr_ul(JIT_R0, JIT_RUNSTACK, JIT_R0); /* R0 is space left (in bytes) */
718   if ((num_rands < 0) && (num_rands != -3)) {
719     jit_subr_l(JIT_R2, JIT_R2, JIT_V1);
720   }
721   jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE);
722   ref = jit_bltr_ul(jit_forward(), JIT_R0, JIT_R2);
723   CHECK_LIMIT();
724 
725   /* Yes, there's enough room. Adjust the runstack. */
726   jit_subr_l(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R2);
727   CHECK_RUNSTACK_OVERFLOW();
728 
729   /* Copy arguments to runstack, then jump to reftop. */
730   jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
731   jit_ldxi_p(JIT_V1, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_rands);
732   jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE);
733   CHECK_LIMIT();
734   refloop = jit_get_ip();
735   ref2 = jit_blei_l(jit_forward(), JIT_R2, 0);
736   jit_subi_l(JIT_R2, JIT_R2, JIT_WORD_SIZE);
737   jit_ldxr_p(JIT_R0, JIT_V1, JIT_R2);
738   jit_stxr_p(JIT_R2, JIT_RUNSTACK, JIT_R0);
739   (void)jit_jmpi(refloop);
740   CHECK_LIMIT();
741 
742   /* Clear tail-call arguments pointer: */
743   jit_stixi_p(&((Scheme_Thread *)0x0)->ku.apply.tail_rands, JIT_R1, NULL);
744   CHECK_LIMIT();
745 
746   /* R1 is still the thread.
747      Put procedure and argc in place, then jump to apply: */
748   mz_patch_branch(ref2);
749   jit_ldxi_p(JIT_V1, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_rator);
750   (void)jit_movi_p(JIT_R0, NULL);
751   jit_stxi_p(&((Scheme_Thread *)0x0)->ku.apply.tail_rator, JIT_R1, JIT_R0);
752   jit_ldxi_l(JIT_R0, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
753   __END_SHORT_JUMPS__(1);
754   (void)jit_jmpi(reftop);
755   __START_SHORT_JUMPS__(1);
756 
757   /* Slow path; restore R0 to SCHEME_TAIL_CALL_WAITING */
758   mz_patch_branch(ref);
759   (void)jit_movi_p(JIT_R0, SCHEME_TAIL_CALL_WAITING);
760 
761   return 1;
762 }
763 
generate_clear_previous_args(mz_jit_state * jitter,int num_rands)764 static int generate_clear_previous_args(mz_jit_state *jitter, int num_rands)
765 {
766   if (num_rands >= 0) {
767     int i;
768     for (i = 0; i < num_rands; i++) {
769       jit_stxi_p(WORDS_TO_BYTES(i), JIT_RUNSTACK, JIT_RUNSTACK);
770       CHECK_LIMIT();
771     }
772   } else {
773     /* covered by generate_clear_slow_previous_args */
774   }
775   return 1;
776 }
777 
generate_clear_slow_previous_args(mz_jit_state * jitter)778 static int generate_clear_slow_previous_args(mz_jit_state *jitter)
779 {
780   CHECK_LIMIT();
781   mz_prepare(3);
782   jit_pusharg_p(JIT_R0);
783   jit_pusharg_l(JIT_V1);
784   jit_pusharg_l(JIT_RUNSTACK);
785   (void)mz_finish(clear_runstack);
786   jit_retval(JIT_R0);
787   return 1;
788 }
789 
generate_ignored_result_check(mz_jit_state * jitter)790 static int generate_ignored_result_check(mz_jit_state *jitter)
791 {
792   /* if multiple results, need to clear ignored result in thread */
793   GC_CAN_IGNORE jit_insn *refm;
794 
795   __START_INNER_TINY__(1);
796 
797   refm = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
798   mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread);
799   jit_stixi_p(&((Scheme_Thread *)0x0)->ku.multiple.array, JIT_R1, NULL);
800   (void)jit_movi_p(JIT_R0, scheme_void);
801   mz_patch_branch(refm);
802 
803   __END_INNER_TINY__(1);
804 
805   return 1;
806 }
807 
scheme_generate_non_tail_call(mz_jit_state * jitter,int num_rands,int direct_native,int need_set_rs,int multi_ok,int result_ignored,int nontail_self,int pop_and_jump,int is_inlined,int unboxed_args,jit_insn * reftop)808 int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
809 				  int multi_ok, int result_ignored, int nontail_self, int pop_and_jump,
810                                   int is_inlined, int unboxed_args, jit_insn *reftop)
811 {
812   /* Non-tail call.
813      Proc is in V1, args are at RUNSTACK.
814      If nontail_self, then R0 has proc pointer, and R2 has max_let_depth.
815      If unboxed_args, LOCAL3 holds address with argument-copying code,
816       where R2 is set before jumping to the old FP, and R1 holds
817       return address back here, and V1 and R0 must be preserved;
818       num_rands >= 0 in this case, and the "slow path" returns NULL.
819      If num_rands < 0, then argc is in R0, and
820       if num_rands != -3, need to pop runstack before returning.
821      If num_rands == -1 or -3, skip prolog. */
822   GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9;
823   GC_CAN_IGNORE jit_insn *ref10, *refagain, *top_refagain;
824   GC_CAN_IGNORE jit_insn *refrts USED_ONLY_FOR_FUTURES;
825 #ifndef FUEL_AUTODECEREMENTS
826   GC_CAN_IGNORE jit_insn *ref11;
827 #endif
828 
829   CHECK_RUNSTACK_OVERFLOW();
830 
831   __START_SHORT_JUMPS__(1);
832 
833   if (pop_and_jump) {
834     if ((num_rands != -1) && (num_rands != -3)) {
835       mz_prolog(JIT_R1);
836     } else if (!reftop) {
837       reftop = jit_get_ip();
838     }
839   }
840 
841   top_refagain = jit_get_ip();
842 
843   /* Check for inlined native type */
844   if (!direct_native) {
845     ref = jit_bmsi_ul(jit_forward(), JIT_V1, 0x1);
846     jit_ldxi_s(JIT_R1, JIT_V1, &((Scheme_Object *)0x0)->type);
847     ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_native_closure_type);
848     /* code at ref2 uses JIT_R1 */
849     CHECK_LIMIT();
850   } else {
851     ref = ref2 = NULL;
852   }
853 
854   refagain = jit_get_ip();
855 
856   /* Before inlined native, check max let depth */
857   if (!nontail_self) {
858     jit_ldxi_p(JIT_R2, JIT_V1, &((Scheme_Native_Closure *)0x0)->code);
859     jit_ldxi_i(JIT_R2, JIT_R2, &((Scheme_Native_Lambda *)0x0)->max_let_depth);
860   }
861   mz_tl_ldi_p(JIT_R1, tl_MZ_RUNSTACK_START);
862   jit_subr_ul(JIT_R1, JIT_RUNSTACK, JIT_R1);
863   ref4 = jit_bltr_ul(jit_forward(), JIT_R1, JIT_R2);
864   CHECK_LIMIT();
865 
866   /* Before inlined native, check stack depth: */
867   (void)mz_tl_ldi_p(JIT_R1, tl_scheme_jit_stack_boundary); /* assumes USE_STACK_BOUNDARY_VAR */
868   ref9 = jit_bltr_ul(jit_forward(), JIT_SP, JIT_R1); /* assumes down-growing stack */
869   CHECK_LIMIT();
870 
871 #ifndef FUEL_AUTODECEREMENTS
872   /* Finally, check for thread swap: */
873   (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
874   ref11 = jit_blei_i(jit_forward(), JIT_R2, 0);
875   jit_subi_p(JIT_R2, JIT_R2, 0x1);
876   (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
877 #endif
878 
879   /* Fast inlined-native jump ok (proc will check argc, if necessary) */
880   {
881     GC_CAN_IGNORE jit_insn *refr;
882 #if defined(MZ_USE_JIT_I386) || defined(MZ_USE_JIT_ARM)
883 # define KEEP_CALL_AND_RETURN_PAIRED
884 #endif
885 #ifdef KEEP_CALL_AND_RETURN_PAIRED
886     GC_CAN_IGNORE jit_insn *refxr;
887 #endif
888     if (num_rands < 0) {
889       /* We need to save argc to clear and manually pop the
890          runstack. So move V1 to R2 and move R0 to V1: */
891       jit_movr_p(JIT_R2, JIT_V1);
892       jit_movr_p(JIT_V1, JIT_R0);
893     }
894     if (unboxed_args) {
895       jit_movr_p(JIT_R2, JIT_FP); /* save old FP */
896     }
897     jit_shuffle_saved_regs(); /* maybe copies V registers to be restored */
898 #ifdef KEEP_CALL_AND_RETURN_PAIRED
899     /* keep call & ret paired (for branch prediction) by jumping to where
900        we really want to return, then back here: */
901     refr = jit_jmpi(jit_forward());
902     refxr = jit_get_ip();
903     jit_base_prolog();
904 #else
905     refr = jit_patchable_movi_p(JIT_R1, jit_forward());
906     _jit_prolog_again(jitter, NATIVE_ARG_COUNT, JIT_R1); /* saves V registers (or copied V registers) */
907 #endif
908     if (num_rands >= 0) {
909       if (nontail_self) { jit_movr_p(JIT_R1, JIT_R0); }
910       jit_movr_p(JIT_R0, JIT_V1); /* closure */
911       if (!nontail_self && !unboxed_args) {
912         /* nontail_self is only enabled when there are no rest args: */
913         jit_movi_i(JIT_R1, num_rands); /* argc */
914         jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
915       }
916       jit_addi_p(JIT_RUNSTACK_BASE_OR_ALT(JIT_V1), JIT_RUNSTACK, WORDS_TO_BYTES(num_rands));
917       mz_st_runstack_base_alt(JIT_V1);
918     } else {
919       /* R2 is closure, V1 is argc */
920       jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
921       jit_addr_p(JIT_RUNSTACK_BASE_OR_ALT(JIT_R0), JIT_RUNSTACK, JIT_R1);
922       mz_st_runstack_base_alt(JIT_R0);
923       jit_movr_p(JIT_R0, JIT_R2); /* closure */
924       jit_movr_i(JIT_R1, JIT_V1); /* argc */
925       jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
926     }
927     CHECK_LIMIT();
928     if (unboxed_args) {
929       /* old FP is still in R2 */
930       mz_get_local_p_x(JIT_V1, JIT_LOCAL3, JIT_R2);
931     }
932     mz_push_locals();
933     mz_repush_threadlocal();
934     if (unboxed_args) {
935       GC_CAN_IGNORE jit_insn *refrr;
936       refrr = jit_patchable_movi_p(JIT_R1, jit_forward());
937       jit_jmpr(JIT_V1);
938       jit_patch_movi(refrr, jit_get_ip());
939       jit_movi_i(JIT_R1, num_rands); /* argc */
940       jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
941     }
942     if (!nontail_self) {
943       jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code);
944       if (direct_native) {
945         jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Lambda *)0x0)->u.tail_code);
946       } else {
947         jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Lambda *)0x0)->arity_code);
948         if (need_set_rs) {
949           /* In case arity check fails, need to update runstack now: */
950           JIT_UPDATE_THREAD_RSPTR();
951         }
952       }
953       jit_jmpr(JIT_V1); /* callee restores (copied) V registers, etc. */
954     } else {
955       /* self-call function pointer is in R1 */
956       jit_jmpr(JIT_R1);
957     }
958 #ifdef KEEP_CALL_AND_RETURN_PAIRED
959     mz_patch_ucbranch(refr);
960     (void)jit_short_calli(refxr);
961 #else
962     jit_patch_movi(refr, jit_get_ip());
963 #endif
964     jit_unshuffle_saved_regs(); /* maybe uncopies V registers */
965     /* If num_rands < 0, then V1 has argc */
966   }
967   CHECK_LIMIT();
968   jit_retval(JIT_R0);
969   VALIDATE_RESULT(JIT_R0);
970 
971   /* Fast common-case return */
972   if (pop_and_jump) {
973     GC_CAN_IGNORE jit_insn *refc;
974     __START_INNER_TINY__(1);
975     refc = jit_blei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
976     __END_INNER_TINY__(1);
977     if ((num_rands < 0) && (num_rands != -3)) {
978       /* At this point, argc must be in V1 */
979       jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
980       jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
981     }
982     if (pop_and_jump) {
983       mz_epilog(JIT_V1);
984     }
985     __START_INNER_TINY__(1);
986     mz_patch_branch(refc);
987     __END_INNER_TINY__(1);
988     CHECK_LIMIT();
989   }
990 
991   if (!multi_ok) {
992     GC_CAN_IGNORE jit_insn *refm;
993     __END_SHORT_JUMPS__(1);
994     refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
995     mz_patch_branch_at(refm, sjc.bad_result_arity_code);
996     __START_SHORT_JUMPS__(1);
997   }
998   ref6 = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_TAIL_CALL_WAITING);
999   generate_clear_previous_args(jitter, num_rands);
1000   CHECK_LIMIT();
1001   if (pop_and_jump) {
1002     /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
1003     generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
1004   }
1005   CHECK_LIMIT();
1006   if (need_set_rs) {
1007     JIT_UPDATE_THREAD_RSPTR();
1008   }
1009   if (num_rands < 0) {
1010     generate_clear_slow_previous_args(jitter);
1011     CHECK_LIMIT();
1012   }
1013   mz_prepare(1);
1014   jit_pusharg_p(JIT_R0);
1015   if (multi_ok) {
1016     (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts);
1017   } else {
1018     (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts);
1019   }
1020   ref5 = jit_jmpi(jit_forward());
1021   CHECK_LIMIT();
1022 
1023   /* Maybe it's a prim? */
1024   if (!direct_native) {
1025     mz_patch_branch(ref2);
1026     ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_prim_type);
1027     /* It's a prim. Arity check... fast path when exactly equal to min, only: */
1028     jit_ldxi_i(JIT_R2, JIT_V1, &((Scheme_Primitive_Proc *)0x0)->mina);
1029     if (num_rands >= 0) {
1030       ref7 = jit_bnei_i(jit_forward(), JIT_R2, num_rands);
1031     } else {
1032       ref7 = jit_bner_i(jit_forward(), JIT_R2, JIT_R0);
1033     }
1034     /* Fast prim application */
1035     jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Primitive_Proc *)0x0)->prim_val);
1036     if (need_set_rs) {
1037       JIT_UPDATE_THREAD_RSPTR();
1038     }
1039     mz_prepare_direct_prim(3);
1040     jit_pusharg_p(JIT_V1);
1041     CHECK_LIMIT();
1042     if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
1043     {
1044       __END_SHORT_JUMPS__(1);
1045       /* May use JIT_R0 and create local branch: */
1046       mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
1047                               jit_pusharg_i(JIT_R2),
1048                               JIT_R1, scheme_prim_indirect);
1049       __START_SHORT_JUMPS__(1);
1050     }
1051     CHECK_LIMIT();
1052     jit_retval(JIT_R0);
1053     VALIDATE_RESULT(JIT_R0);
1054     if (!multi_ok) {
1055       GC_CAN_IGNORE jit_insn *refm;
1056       __END_SHORT_JUMPS__(1);
1057       refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
1058       mz_patch_branch_at(refm, sjc.bad_result_arity_code);
1059       __START_SHORT_JUMPS__(1);
1060     }
1061     ref10 = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_TAIL_CALL_WAITING);
1062     generate_clear_previous_args(jitter, num_rands);
1063     CHECK_LIMIT();
1064     if (pop_and_jump) {
1065       /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
1066       generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
1067     }
1068     CHECK_LIMIT();
1069     if (num_rands < 0) {
1070       generate_clear_slow_previous_args(jitter);
1071       CHECK_LIMIT();
1072     }
1073     mz_prepare(1);
1074     jit_pusharg_p(JIT_R0);
1075     if (multi_ok) {
1076       (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts);
1077     } else {
1078       (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts);
1079     }
1080     CHECK_LIMIT();
1081     ref8 = jit_jmpi(jit_forward());
1082 
1083     /* Check for simple applicable struct wrapper */
1084     if (!is_inlined && (num_rands >= 0)) {
1085       mz_patch_branch(ref2);
1086       /* uses JIT_R1 */
1087       ref2 = generate_proc_struct_retry(jitter, num_rands, top_refagain);
1088       CHECK_LIMIT();
1089     }
1090   } else {
1091     ref2 = ref7 = ref8 = ref10 = NULL;
1092   }
1093 
1094   /* The slow way: */
1095   mz_patch_branch(ref9);
1096   if (!unboxed_args) {
1097     generate_pause_for_gc_and_retry(jitter,
1098                                     1,  /* in short jumps */
1099                                     JIT_V1, /* expose V1 to GC */
1100                                     refagain); /* retry code pointer */
1101   }
1102 
1103   CHECK_LIMIT();
1104   if (!direct_native) {
1105     mz_patch_branch(ref);
1106     mz_patch_branch(ref2);
1107     mz_patch_branch(ref7);
1108   }
1109   mz_patch_branch(ref4);
1110 #ifndef FUEL_AUTODECEREMENTS
1111   mz_patch_branch(ref11);
1112 #endif
1113   if (unboxed_args) {
1114     /* no slow path here; return NULL to box arguments fall back to generic */
1115     (void)jit_movi_p(JIT_R0, NULL);
1116     if (pop_and_jump) {
1117       mz_epilog(JIT_V1);
1118     }
1119   } else {
1120     /* normal slow path: */
1121     if (need_set_rs) {
1122       JIT_UPDATE_THREAD_RSPTR();
1123     }
1124     if (num_rands >= 0) {
1125       jit_movi_i(JIT_R0, num_rands);
1126     }
1127     mz_prepare(3);
1128     CHECK_LIMIT();
1129     jit_pusharg_p(JIT_RUNSTACK);
1130     jit_pusharg_i(JIT_R0);
1131     jit_pusharg_p(JIT_V1);
1132     if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
1133     if (multi_ok) {
1134       (void)mz_finish_lwe(x_ts__scheme_apply_multi_from_native, refrts);
1135     } else {
1136       (void)mz_finish_lwe(x_ts__scheme_apply_from_native, refrts);
1137     }
1138     CHECK_LIMIT();
1139   }
1140 
1141   mz_patch_ucbranch(ref5);
1142   if (!direct_native) {
1143     mz_patch_ucbranch(ref8);
1144   }
1145   jit_retval(JIT_R0);
1146   VALIDATE_RESULT(JIT_R0);
1147   mz_patch_branch(ref6);
1148   if (!direct_native) {
1149     mz_patch_branch(ref10);
1150     if (result_ignored) {
1151       generate_ignored_result_check(jitter);
1152       CHECK_LIMIT();
1153     }
1154   }
1155   /* Note: same return code is above for faster common-case return */
1156   if ((num_rands < 0) && (num_rands != -3)) {
1157     /* At this point, argc must be in V1 */
1158     jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
1159     jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
1160   }
1161   if (pop_and_jump) {
1162     mz_epilog(JIT_V1);
1163   }
1164   CHECK_LIMIT();
1165 
1166   __END_SHORT_JUMPS__(1);
1167 
1168   return 1;
1169 }
1170 
1171 #ifdef USE_FLONUM_UNBOXING
generate_argument_boxing(mz_jit_state * jitter,Scheme_Lambda * lam,int num_rands,int args_already_in_place,int offset,int direct_flostack_offset,int save_reg,Scheme_App_Rec * app,Scheme_Object ** alt_rands)1172 static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Lambda *lam,
1173                                     int num_rands, int args_already_in_place,
1174                                     int offset, int direct_flostack_offset,
1175                                     int save_reg,
1176                                     /* used only to skip unneeded checks: */
1177                                     Scheme_App_Rec *app, Scheme_Object **alt_rands)
1178 {
1179   int i, arg_tmp_offset;
1180   Scheme_Object *rand;
1181 
1182   arg_tmp_offset = offset - direct_flostack_offset;
1183   for (i = num_rands; i--; ) {
1184     int extfl;
1185     extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(lam, i + args_already_in_place);
1186     if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(lam, i + args_already_in_place)) {
1187       rand = (alt_rands
1188               ? alt_rands[i+1+args_already_in_place]
1189               : (app
1190                  ? app->args[i+1+args_already_in_place]
1191                  : NULL));
1192       arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1193       /* Boxing definitely isn't needed if the value was from a local that doesn't hold
1194          an unboxed value, otherwise we generate code to check dynamically. */
1195       if (!rand
1196           || !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)
1197           || (!extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM))
1198           || (extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM))) {
1199         GC_CAN_IGNORE jit_insn *iref;
1200         int aoffset;
1201         aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
1202         if (save_reg == JIT_R0) {
1203           if (i != num_rands - 1)
1204             mz_pushr_p(JIT_R0);
1205         } else {
1206           mz_pushr_p(JIT_V1);
1207         }
1208         if (!rand || SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
1209           /* assert: !rand or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM
1210              or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM */
1211           /* So, we have to check for an existing box */
1212           if ((save_reg != JIT_R0) || (i != num_rands - 1))
1213             mz_rs_ldxi(JIT_R0, i+1);
1214           mz_rs_sync();
1215           __START_TINY_JUMPS__(1);
1216           iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1217           __END_TINY_JUMPS__(1);
1218         } else
1219           iref = NULL;
1220         jit_movi_l(JIT_R0, aoffset);
1221         mz_rs_sync();
1222         MZ_FPUSEL_STMT(extfl,
1223                        (void)jit_calli(sjc.box_extflonum_from_stack_code),
1224                        (void)jit_calli(sjc.box_flonum_from_stack_code));
1225         if ((save_reg != JIT_R0) || (i != num_rands - 1))
1226           mz_rs_stxi(i+1, JIT_R0);
1227         if (iref) {
1228           __START_TINY_JUMPS__(1);
1229           mz_patch_branch(iref);
1230           __END_TINY_JUMPS__(1);
1231         }
1232         CHECK_LIMIT();
1233         if (save_reg == JIT_R0) {
1234           if (i != num_rands - 1)
1235             mz_popr_p(JIT_R0);
1236         } else {
1237           mz_popr_p(JIT_V1);
1238         }
1239       }
1240     }
1241   }
1242 
1243   return 1;
1244 }
1245 #endif
1246 
generate_self_tail_call(Scheme_Object * rator,mz_jit_state * jitter,int num_rands,GC_CAN_IGNORE jit_insn * slow_code,int args_already_in_place,int direct_flostack_offset,Scheme_App_Rec * app,Scheme_Object ** alt_rands)1247 static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *slow_code,
1248                                    int args_already_in_place, int direct_flostack_offset,
1249                                    Scheme_App_Rec *app, Scheme_Object **alt_rands)
1250 /* Last argument is in R0 */
1251 {
1252   GC_CAN_IGNORE jit_insn *refslow, *refagain;
1253   int i;
1254   int jmp_tiny USED_ONLY_SOMETIMES;
1255   int jmp_short USED_ONLY_SOMETIMES;
1256   int closure_size = jitter->self_closure_size;
1257   int space, offset;
1258 #ifdef USE_FLONUM_UNBOXING
1259   int arg_offset = 0, arg_tmp_offset;
1260   Scheme_Object *rand;
1261 #endif
1262 
1263 #ifdef JIT_PRECISE_GC
1264   closure_size += 1; /* Skip procedure pointer, too */
1265 #endif
1266 
1267   jmp_tiny = num_rands < 5;
1268   jmp_short = num_rands < 100;
1269 
1270   __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1271 
1272   refagain = jit_get_ip();
1273 
1274   /* Check for thread swap: */
1275   (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
1276   refslow = jit_blei_i(jit_forward(), JIT_R2, 0);
1277 #ifndef FUEL_AUTODECEREMENTS
1278   jit_subi_p(JIT_R2, JIT_R2, 0x1);
1279   (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
1280 #endif
1281 
1282   __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1283 
1284   offset = jitter->flostack_offset;
1285   space = jitter->flostack_space;
1286 #ifdef USE_FLONUM_UNBOXING
1287   arg_tmp_offset = offset - direct_flostack_offset;
1288 #endif
1289 
1290   /* Copy args to runstack after closure data: */
1291   mz_ld_runstack_base_alt(JIT_R2);
1292   jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place));
1293   for (i = num_rands; i--; ) {
1294     int already_loaded = (i == num_rands - 1);
1295 #ifdef USE_FLONUM_UNBOXING
1296     int is_flonum, already_unboxed = 0, extfl = 0;
1297     if ((SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_TYPED_ARGS)
1298         && (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_lam, i + args_already_in_place)
1299             || CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i + args_already_in_place))) {
1300       is_flonum = 1;
1301       extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i + args_already_in_place);
1302       rand = (alt_rands
1303               ? alt_rands[i+1+args_already_in_place]
1304               : app->args[i+1+args_already_in_place]);
1305       arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1306       mz_ld_fppush(MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0), arg_tmp_offset, extfl);
1307       already_unboxed = 1;
1308       if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
1309         already_loaded = 1;
1310         (void)jit_movi_p(JIT_R0, NULL);
1311       }
1312     } else
1313       is_flonum = extfl = 0;
1314 #endif
1315     if (!already_loaded)
1316       jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
1317     jit_stxi_p(WORDS_TO_BYTES(i + closure_size + args_already_in_place), JIT_R2, JIT_R0);
1318 #ifdef USE_FLONUM_UNBOXING
1319     if (is_flonum) {
1320       if (!already_unboxed) {
1321         MZ_FPUSEL_STMT(extfl,
1322                        jit_fpu_ldxi_ld_fppush(JIT_FPU_FPR0, JIT_R0, &((Scheme_Long_Double *)0x0)->long_double_val),
1323                        jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val));
1324       }
1325       arg_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1326       mz_st_fppop(arg_offset, MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0), extfl);
1327     }
1328 #endif
1329     CHECK_LIMIT();
1330   }
1331   jit_movr_p(JIT_RUNSTACK, JIT_R2);
1332 
1333   scheme_mz_flostack_restore(jitter, jitter->self_restart_space, jitter->self_restart_offset, 1, 1);
1334 
1335   /* Now jump: */
1336   (void)jit_jmpi(jitter->self_restart_code);
1337   CHECK_LIMIT();
1338 
1339   /* Slow path: */
1340   __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1341   mz_patch_branch(refslow);
1342   __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1343 
1344   generate_pause_for_gc_and_retry(jitter,
1345                                   0,  /* in short jumps */
1346                                   JIT_R0, /* expose R0 to GC */
1347                                   refagain); /* retry code pointer */
1348   CHECK_LIMIT();
1349 
1350   jitter->flostack_offset = offset;
1351   jitter->flostack_space = space;
1352 
1353 #ifdef USE_FLONUM_UNBOXING
1354   /* Need to box any arguments that we have only in flonum form */
1355   if (SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_TYPED_ARGS) {
1356     generate_argument_boxing(jitter, jitter->self_lam,
1357                              num_rands, args_already_in_place,
1358                              offset, direct_flostack_offset,
1359                              JIT_R0,
1360                              app, alt_rands);
1361     CHECK_LIMIT();
1362 
1363     /* Arguments already in place may also need to be boxed. */
1364     arg_tmp_offset = jitter->self_restart_offset;
1365     for (i = jitter->self_lam->closure_size; i--; ) {
1366       /* Skip over flonums unpacked from the closure. I think this never
1367          happens, because I think that a self-call with already-in-place
1368          flonum arguments will only happen when the closure is empty. */
1369       if (CLOSURE_CONTENT_IS_FLONUM(jitter->self_lam, i))
1370         arg_tmp_offset -= sizeof(double);
1371       else if (CLOSURE_CONTENT_IS_EXTFLONUM(jitter->self_lam, i))
1372         arg_tmp_offset -= 2*sizeof(double);
1373     }
1374     for (i = 0; i < args_already_in_place; i++) {
1375       if (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_lam, i)
1376           || CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i)) {
1377         GC_CAN_IGNORE jit_insn *iref;
1378         int extfl USED_ONLY_IF_LONG_DOUBLE;
1379         extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i);
1380         mz_pushr_p(JIT_R0);
1381         mz_ld_runstack_base_alt(JIT_R2);
1382         jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + args_already_in_place));
1383         jit_ldxi_p(JIT_R0, JIT_R2, WORDS_TO_BYTES(i));
1384         mz_rs_sync();
1385         __START_TINY_JUMPS__(1);
1386         iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1387         __END_TINY_JUMPS__(1);
1388         {
1389           int aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
1390           jit_movi_l(JIT_R0, aoffset);
1391           MZ_FPUSEL_STMT(extfl,
1392                          (void)jit_calli(sjc.box_extflonum_from_stack_code),
1393                          (void)jit_calli(sjc.box_flonum_from_stack_code));
1394           mz_ld_runstack_base_alt(JIT_R2);
1395           jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + args_already_in_place));
1396           jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, JIT_R0);
1397         }
1398         __START_TINY_JUMPS__(1);
1399         mz_patch_branch(iref);
1400         __END_TINY_JUMPS__(1);
1401         mz_popr_p(JIT_R0);
1402         CHECK_LIMIT();
1403         arg_tmp_offset -= MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1404       }
1405     }
1406   }
1407 #endif
1408 
1409   scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
1410 
1411   CHECK_LIMIT();
1412 
1413   if (args_already_in_place) {
1414     jit_movi_l(JIT_R2, args_already_in_place);
1415     mz_set_local_p(JIT_R2, JIT_LOCAL2);
1416   }
1417 
1418   if (num_rands > 0) {
1419     /* We didn't leave room for the last argument, so now we need to make
1420        space for it. (Possible improvement: it may be possible to know that
1421        room is available already, so that this isn't necessary.) */
1422     mz_runstack_unskipped(jitter, 1);
1423     mz_runstack_pushed(jitter, 1);
1424     mz_rs_dec(1);
1425     for (i = 0; i < num_rands-1; i++) {
1426       mz_rs_ldxi(JIT_R1, i+1);
1427       mz_rs_stxi(i, JIT_R1);
1428       CHECK_LIMIT();
1429     }
1430 
1431     mz_rs_stxi(num_rands - 1, JIT_R0);
1432   }
1433   scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL);
1434   CHECK_LIMIT();
1435   mz_rs_sync();
1436 
1437   (void)jit_jmpi(slow_code);
1438 
1439   return 1;
1440 }
1441 
1442 typedef struct {
1443   int num_rands;
1444   mz_jit_state *old_jitter;
1445   int multi_ok;
1446   int result_ignored;
1447   int is_tail;
1448   int direct_prim, direct_native, nontail_self, unboxed_args;
1449 } Generate_Call_Data;
1450 
jit_register_sub_func(mz_jit_state * jitter,void * code,Scheme_Object * protocol,int gcable)1451 static void jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol, int gcable)
1452 /* protocol: #f => normal lightweight call protocol
1453              void => next return address is in LOCAL2
1454              eof => name to use is in LOCAL2
1455              null => no name, but usual frame */
1456 {
1457   void *code_end;
1458 
1459   code_end = jit_get_ip();
1460   if (jitter->retain_start)
1461     scheme_jit_add_symbol((uintptr_t)jit_unadjust_ip(code),
1462                           (uintptr_t)jit_unadjust_ip(code_end) - 1,
1463                           protocol,
1464                           gcable);
1465 }
1466 
scheme_jit_register_sub_func(mz_jit_state * jitter,void * code,Scheme_Object * protocol)1467 void scheme_jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol)
1468 {
1469   jit_register_sub_func(jitter, code, protocol, 0);
1470 }
1471 
scheme_jit_register_helper_func(mz_jit_state * jitter,void * code,int gcable)1472 void scheme_jit_register_helper_func(mz_jit_state *jitter, void *code, int gcable)
1473 {
1474 #if defined(MZ_USE_DWARF_LIBUNWIND) || defined(_WIN64)
1475   /* Null indicates that there's no function name to report, but the
1476      stack should be unwound manually using the JIT-generated convention. */
1477   jit_register_sub_func(jitter, code, scheme_null, gcable);
1478 #endif
1479 }
1480 
do_generate_shared_call(mz_jit_state * jitter,void * _data)1481 static int do_generate_shared_call(mz_jit_state *jitter, void *_data)
1482 {
1483   Generate_Call_Data *data = (Generate_Call_Data *)_data;
1484 
1485 #ifdef MZ_USE_JIT_PPC
1486   jitter->js.jitl.nbArgs = data->old_jitter->js.jitl.nbArgs;
1487 #endif
1488 
1489   if (data->is_tail) {
1490     int ok;
1491     void *code;
1492 
1493     code = jit_get_ip();
1494 
1495     if (data->direct_prim)
1496       ok = generate_direct_prim_tail_call(jitter, data->num_rands);
1497     else
1498       ok = scheme_generate_tail_call(jitter, data->num_rands, data->direct_native, 1, 0,
1499                                      NULL, NULL, NULL);
1500 
1501     scheme_jit_register_helper_func(jitter, code, 0);
1502 
1503     return ok;
1504   } else {
1505     int ok;
1506     void *code;
1507 
1508     code = jit_get_ip();
1509 
1510     if (data->direct_prim)
1511       ok = generate_direct_prim_non_tail_call(jitter, data->num_rands, data->multi_ok, 1);
1512     else
1513       ok = scheme_generate_non_tail_call(jitter, data->num_rands, data->direct_native, 1,
1514                                          data->multi_ok, data->result_ignored, data->nontail_self,
1515                                          1, 0, data->unboxed_args, NULL);
1516 
1517     scheme_jit_register_sub_func(jitter, code, scheme_false);
1518 
1519     return ok;
1520   }
1521 }
1522 
scheme_generate_shared_call(int num_rands,mz_jit_state * old_jitter,int multi_ok,int result_ignored,int is_tail,int direct_prim,int direct_native,int nontail_self,int unboxed_args)1523 void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int result_ignored,
1524                                   int is_tail, int direct_prim, int direct_native, int nontail_self,
1525                                   int unboxed_args)
1526 {
1527   Generate_Call_Data data;
1528   void *ip;
1529 
1530   data.num_rands = num_rands;
1531   data.old_jitter = old_jitter;
1532   data.multi_ok = multi_ok;
1533   data.result_ignored = result_ignored;
1534   data.is_tail = is_tail;
1535   data.direct_prim = direct_prim;
1536   data.direct_native = direct_native;
1537   data.nontail_self = nontail_self;
1538   data.unboxed_args = unboxed_args;
1539 
1540   ip = scheme_generate_one(old_jitter, do_generate_shared_call, &data, 0, NULL, NULL);
1541   return jit_adjust_ip(ip);
1542 }
1543 
scheme_ensure_retry_available(mz_jit_state * jitter,int multi_ok,int result_ignored)1544 void scheme_ensure_retry_available(mz_jit_state *jitter, int multi_ok, int result_ignored)
1545 {
1546   int mo = (multi_ok
1547             ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
1548             : SHARED_SINGLE_VALUE_CASE);
1549   if (!sjc.shared_non_tail_retry_code[mo]) {
1550     void *code;
1551     code = scheme_generate_shared_call(-1, jitter, multi_ok, result_ignored, 0, 0, 0, 0, 0);
1552     sjc.shared_non_tail_retry_code[mo] = code;
1553   }
1554 }
1555 
generate_nontail_self_setup(mz_jit_state * jitter)1556 static int generate_nontail_self_setup(mz_jit_state *jitter)
1557 {
1558   void *pp, **pd;
1559   pp = jit_patchable_movi_p(JIT_R2, jit_forward());
1560   pd = (void **)scheme_malloc(2 * sizeof(void *));
1561   pd[0] = pp;
1562   pd[1] = jitter->patch_depth;
1563   jitter->patch_depth = pd;
1564   (void)jit_patchable_movi_p(JIT_R0, jitter->self_nontail_code);
1565 #ifdef JIT_PRECISE_GC
1566   if (jitter->closure_self_on_runstack) {
1567     /* Get this closure's pointer from the run stack */
1568     int depth = jitter->depth + jitter->extra_pushed - 1;
1569     jit_ldxi_p(JIT_V1, JIT_RUNSTACK, WORDS_TO_BYTES(depth));
1570   }
1571 #endif
1572   return 0;
1573 }
1574 
can_direct_native(Scheme_Object * p,int num_rands,intptr_t * extract_case)1575 static int can_direct_native(Scheme_Object *p, int num_rands, intptr_t *extract_case)
1576 {
1577   if (SAME_TYPE(SCHEME_TYPE(p), scheme_native_closure_type)) {
1578     if (((Scheme_Native_Closure *)p)->code->closure_size < 0) {
1579       /* case-lambda */
1580       int cnt, i;
1581       mzshort *arities;
1582 
1583       cnt = ((Scheme_Native_Closure *)p)->code->closure_size;
1584       cnt = -(cnt + 1);
1585       arities = ((Scheme_Native_Closure *)p)->code->u.arities;
1586       for (i = 0; i < cnt; i++) {
1587         if (arities[i] == num_rands) {
1588           *extract_case = (intptr_t)&((Scheme_Native_Closure *)0x0)->vals[i];
1589           return 1;
1590         }
1591       }
1592     } else {
1593       /* not a case-lambda... */
1594       if (scheme_native_arity_check(p, num_rands)
1595           /* If it also accepts num_rands + 1, then it has a vararg,
1596              so don't try direct_native. */
1597           && !scheme_native_arity_check(p, num_rands + 1)) {
1598         return 1;
1599       }
1600     }
1601   }
1602 
1603   return 0;
1604 }
1605 
is_noncm_hash_ref(Scheme_Object * rator,int num_rands,Scheme_App_Rec * app)1606 static int is_noncm_hash_ref(Scheme_Object *rator, int num_rands, Scheme_App_Rec *app)
1607 {
1608   /* hash-ref acts like a non-cm prim if there's no procedure 3rd argument */
1609   if (SAME_OBJ(rator, scheme_hash_ref_proc) && ((num_rands == 2) || (num_rands == 3))) {
1610     if (num_rands == 3) {
1611       if ((SCHEME_TYPE(app->args[3]) < _scheme_values_types_)
1612           || SCHEME_PROCP(app->args[3]))
1613         return 0;
1614     }
1615     return 1;
1616   }
1617 
1618   return 0;
1619 }
1620 
check_special_direct_args(Scheme_App_Rec * app,Scheme_Object ** alt_rands,int num_rands,int args_already_in_place)1621 static jit_direct_arg *check_special_direct_args(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands,
1622                                                  int args_already_in_place)
1623 {
1624   jit_direct_arg *inline_direct_args = NULL;
1625   Scheme_Object *v;
1626   int reg_to_pos[NUM_AVAIL_DIRECT_ARG_REGS];
1627   int n = 0, j, pos, i;
1628 
1629   return NULL;
1630 
1631   for (j = 0; j < NUM_AVAIL_DIRECT_ARG_REGS; j++) {
1632     reg_to_pos[j] = 0;
1633   }
1634 
1635   for (i = 0; i < num_rands; i++) {
1636     v = (alt_rands
1637          ? alt_rands[i+1+args_already_in_place]
1638          : app->args[i+1+args_already_in_place]);
1639     if (SAME_TYPE(SCHEME_TYPE(v), scheme_local_type)
1640         && !SCHEME_GET_LOCAL_TYPE(v)) {
1641       pos = SCHEME_LOCAL_POS(v);
1642       for (j = 0; j < n; j++) {
1643         if (reg_to_pos[j] == pos)
1644           break;
1645       }
1646       if (j >= n) {
1647         if (n >= NUM_AVAIL_DIRECT_ARG_REGS)
1648           break;
1649         reg_to_pos[n++] = pos;
1650       }
1651     } else
1652       break;
1653   }
1654 
1655   if (i < num_rands)
1656     return NULL;
1657 
1658   /* We hit the special case! */
1659   inline_direct_args = MALLOC_N_ATOMIC(jit_direct_arg, num_rands);
1660 
1661   n = 0;
1662   for (j = 0; j < NUM_AVAIL_DIRECT_ARG_REGS; j++)  {
1663     reg_to_pos[j] = 0;
1664   }
1665 
1666   for (i = 0; i < num_rands; i++) {
1667     v = (alt_rands
1668          ? alt_rands[i+1+args_already_in_place]
1669          : app->args[i+1+args_already_in_place]);
1670     pos = SCHEME_LOCAL_POS(v);
1671     for (j = 0; j < n; j++) {
1672       if (reg_to_pos[j] == pos) {
1673         inline_direct_args[i].gen = 0;
1674         inline_direct_args[i].reg = direct_arg_regs[j];
1675         break;
1676       }
1677     }
1678     if (j >= n) {
1679       reg_to_pos[n] = pos;
1680       inline_direct_args[i].gen = 1;
1681       inline_direct_args[i].reg = direct_arg_regs[n];
1682       n++;
1683     }
1684   }
1685 
1686   return inline_direct_args;
1687 }
1688 
1689 #ifdef USE_FLONUM_UNBOXING
1690 
generate_fp_argument_shift(int direct_flostack_offset,mz_jit_state * jitter,int src_fp_reg)1691 static int generate_fp_argument_shift(int direct_flostack_offset, mz_jit_state *jitter, int src_fp_reg)
1692 {
1693   int i;
1694 
1695   if ((src_fp_reg == JIT_FP)
1696       && (jitter->flostack_offset == direct_flostack_offset))
1697     /* no shift needed */
1698     return 1;
1699 
1700   /* Since we're just shifting bytes, it's ok to pretend that all
1701      boxed values are `double's. */
1702   for (i = 0; i < direct_flostack_offset; i += sizeof(double)) {
1703     int i_pos, a_pos;
1704     i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
1705     mz_ld_fppush_x(JIT_FPR0, i_pos, src_fp_reg, 0);
1706     a_pos = i + sizeof(double);
1707     mz_st_fppop(a_pos, JIT_FPR0, 0);
1708     CHECK_LIMIT();
1709   }
1710 
1711   return 1;
1712 }
1713 
generate_call_path_with_unboxes(mz_jit_state * jitter,int direct_flostack_offset,void * unboxed_code,GC_CAN_IGNORE jit_insn ** _refdone,int num_rands,Scheme_Lambda * direct_lam,Scheme_Object * rator)1714 static int generate_call_path_with_unboxes(mz_jit_state *jitter, int direct_flostack_offset, void *unboxed_code,
1715                                            GC_CAN_IGNORE jit_insn **_refdone,
1716                                            int num_rands, Scheme_Lambda *direct_lam, Scheme_Object *rator)
1717 {
1718   GC_CAN_IGNORE jit_insn *refdone, *refgo, *refcopy;
1719   int i, k, offset;
1720 
1721   refgo = jit_jmpi(jit_forward());
1722   refcopy = jit_get_ip();
1723 
1724   /* Callback code to copy unboxed arguments.
1725      R1 has the return address, R2 holds the old FP */
1726 
1727   offset = FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
1728                                    / FLOSTACK_SPACE_CHUNK);
1729   jit_subi_l(JIT_SP, JIT_SP, offset);
1730 
1731   generate_fp_argument_shift(direct_flostack_offset, jitter, JIT_R2);
1732   CHECK_LIMIT();
1733 
1734   jit_jmpr(JIT_R1);
1735 
1736   mz_patch_ucbranch(refgo);
1737 
1738   /* install callback pointer and jump to shared code: */
1739 
1740   (void)jit_patchable_movi_p(JIT_R1, refcopy);
1741   mz_set_local_p(JIT_R1, JIT_LOCAL3);
1742 
1743   (void)jit_calli(unboxed_code);
1744 
1745   refdone = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1746   *_refdone = refdone;
1747 
1748   CHECK_LIMIT();
1749 
1750   /* box arguments for slow path */
1751   for (i = 0, k = 0; i < num_rands; i++) {
1752     if ((SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)
1753         && (CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i)
1754             || CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i))) {
1755       int extfl;
1756       extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i);
1757 
1758       offset = jitter->flostack_offset - k;
1759       offset = JIT_FRAME_FLOSTACK_OFFSET - offset;
1760       jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
1761       scheme_generate_flonum_local_boxing(jitter, i, offset, JIT_R0, extfl);
1762       k += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1763     }
1764   }
1765 
1766   /* Reset V1 to rator for slow path: */
1767   scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL);
1768   CHECK_LIMIT();
1769   mz_rs_sync();
1770 
1771   return 1;
1772 }
1773 #endif
1774 
scheme_generate_app(Scheme_App_Rec * app,Scheme_Object ** alt_rands,int num_rands,int num_pushes,mz_jit_state * jitter,int is_tail,int multi_ok,int result_ignored,int no_call)1775 int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands, int num_pushes,
1776 			mz_jit_state *jitter, int is_tail, int multi_ok, int result_ignored,
1777                         int no_call)
1778 /* de-sync'd ok
1779    If no_call is 2, then rator is not necssarily evaluated.
1780    If no_call is 1, then rator is left in V1 and arguments are on runstack. */
1781 {
1782   int i, offset, need_safety = 0, apply_to_list = 0, num_unsafe_struct_refs;
1783   int direct_prim = 0, need_non_tail = 0, direct_native = 0, direct_self = 0, nontail_self = 0;
1784   Scheme_Native_Closure *inline_direct_native = NULL;
1785   int almost_inline_direct_native = 0;
1786 #ifdef USE_FLONUM_UNBOXING
1787   Scheme_Lambda *direct_lam = NULL;
1788 #endif
1789   int direct_flostack_offset = 0, unboxed_non_tail_args = 0;
1790   jit_direct_arg *inline_direct_args = NULL;
1791   int proc_already_in_place = 0;
1792   Scheme_Object *rator, *v, *arg;
1793   int reorder_ok = 0;
1794   int args_already_in_place = 0;
1795   intptr_t extract_case = 0; /* when direct_native, non-0 => offset to extract case-lambda case */
1796   START_JIT_DATA();
1797 
1798   rator = (alt_rands ? alt_rands[0] : app->args[0]);
1799 
1800   rator = scheme_specialize_to_constant(rator, jitter, num_pushes, 0);
1801 
1802   if (no_call == 2) {
1803     direct_prim = 1;
1804   } else if (SCHEME_PRIMP(rator)) {
1805     if ((num_rands >= ((Scheme_Primitive_Proc *)rator)->mina)
1806 	&& ((num_rands <= ((Scheme_Primitive_Proc *)rator)->mu.maxa)
1807 	    || (((Scheme_Primitive_Proc *)rator)->mina < 0))
1808 	&& (scheme_is_noncm(rator, jitter, 0, 0)
1809             || is_noncm_hash_ref(rator, num_rands, app)
1810             /* It's also ok to directly call `values' if multiple values are ok: */
1811             || (multi_ok && SAME_OBJ(rator, scheme_values_proc))))
1812       direct_prim = 1;
1813     else {
1814       reorder_ok = 1;
1815       if ((num_rands >= 2) && SAME_OBJ(rator, scheme_apply_proc))
1816         apply_to_list = 1;
1817     }
1818     if (!(((Scheme_Primitive_Proc *)rator)->pp.flags & SCHEME_PRIM_IS_MULTI_RESULT))
1819       result_ignored = 0; /* don't need to check for multiple values to ignore */
1820   } else {
1821     Scheme_Type t;
1822     t = SCHEME_TYPE(rator);
1823 
1824     if (t == scheme_case_closure_type) {
1825       /* Turn it into a JITted empty case closure: */
1826       rator = scheme_unclose_case_lambda(rator, 1);
1827       t = SCHEME_TYPE(rator);
1828     }
1829 
1830     if ((t == scheme_local_type) && scheme_ok_to_delay_local(rator)) {
1831       /* We can re-order evaluation of the rator. */
1832       reorder_ok = 1;
1833 
1834       /* Call to known native, or even known self? */
1835       {
1836 	int pos, flags;
1837 	pos = SCHEME_LOCAL_POS(rator) - num_pushes;
1838 	if (scheme_mz_is_closure(jitter, pos, num_rands, &flags)) {
1839 	  direct_native = 1;
1840 	  if ((pos == jitter->self_pos)
1841 	      && (num_rands < MAX_SHARED_CALL_RANDS)) {
1842             if (is_tail)
1843               direct_self = 1;
1844             else if (jitter->self_nontail_code)
1845               nontail_self = 1;
1846 	  }
1847 	}
1848       }
1849     } else if ((t == scheme_toplevel_type) || (t == scheme_static_toplevel_type)) {
1850       int flags = SCHEME_TOPLEVEL_FLAGS(rator);
1851 
1852       if ((flags & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
1853         /* We can re-order evaluation of the rator. */
1854         reorder_ok = 1;
1855 
1856         if (jitter->nc
1857             && ((flags & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_CONST)) {
1858           Scheme_Object *p;
1859 
1860           if (t == scheme_toplevel_type)
1861             p = scheme_extract_global(rator, jitter->nc, 0);
1862           else
1863             p = SCHEME_STATIC_TOPLEVEL_PREFIX(rator)->a[SCHEME_TOPLEVEL_POS(rator)];
1864           if (p) {
1865             p = ((Scheme_Bucket *)p)->val;
1866             if (can_direct_native(p, num_rands, &extract_case)) {
1867               int pos = SCHEME_TOPLEVEL_POS(rator);
1868 
1869               direct_native = 1;
1870 
1871               if ((pos == jitter->self_toplevel_pos)
1872                   && (num_rands < MAX_SHARED_CALL_RANDS)) {
1873                 if (is_tail) {
1874                   direct_self = 1;
1875                 } else if (jitter->self_nontail_code)
1876                   nontail_self = 1;
1877               }
1878             }
1879           }
1880         }
1881       }
1882     } else if (SAME_TYPE(t, scheme_native_closure_type)) {
1883       direct_native = can_direct_native(rator, num_rands, &extract_case);
1884       reorder_ok = 1;
1885     } else if (SAME_TYPE(t, scheme_closure_type)) {
1886       Scheme_Lambda *lam;
1887       lam = ((Scheme_Closure *)rator)->code;
1888       if ((lam->num_params == num_rands)
1889           && !(SCHEME_LAMBDA_FLAGS(lam) & LAMBDA_HAS_REST)) {
1890         direct_native = 1;
1891 
1892         if (SAME_OBJ(lam->u.jit_clone, jitter->self_lam)
1893             && (num_rands < MAX_SHARED_CALL_RANDS)) {
1894           if (is_tail)
1895             direct_self = 1;
1896           else if (jitter->self_nontail_code)
1897             nontail_self = 1;
1898         } else {
1899           Scheme_Closure *c = (Scheme_Closure *)rator;
1900           if (ZERO_SIZED_CLOSUREP(c)) {
1901             /* If we're calling a constant function in tail position, then
1902                there's a good chance that this function is a wrapper to
1903                get to a loop, a nested loop, or a mutually recursive call.
1904                Inline the jump to the potential loop,
1905                absorbing the runstack and C stack checks into the check
1906                for this function --- only works if we can JIT the target
1907                of the call. */
1908             Scheme_Native_Closure *nc;
1909             nc = (Scheme_Native_Closure *)scheme_jit_closure((Scheme_Object *)lam, NULL);
1910             if (nc->code->start_code == scheme_on_demand_jit_code) {
1911               if (nc->code->arity_code != sjc.in_progress_on_demand_jit_arity_code) {
1912                 scheme_on_demand_generate_lambda(nc, 0, NULL, 0);
1913                 CHECK_NESTED_GENERATE();
1914               }
1915             }
1916             if (nc->code->start_code != scheme_on_demand_jit_code) {
1917               if (is_tail) {
1918                 if (nc->code->max_let_depth > jitter->max_tail_depth)
1919                   jitter->max_tail_depth = nc->code->max_let_depth;
1920                 inline_direct_native = nc;
1921 #ifdef USE_FLONUM_UNBOXING
1922                 direct_lam = lam;
1923 #endif
1924               } else {
1925                 if (num_rands < MAX_SHARED_CALL_RANDS) {
1926 #ifdef USE_FLONUM_UNBOXING
1927                   direct_lam = lam;
1928 #endif
1929                   unboxed_non_tail_args = 1;
1930                 }
1931               }
1932             } else {
1933               if (is_tail) {
1934                 /* To tie a mutally recursive loop, we've leave an indirection
1935                    and a runstack-space check, but we can still handle unboxed
1936                    arguments. */
1937 #ifdef USE_FLONUM_UNBOXING
1938                 direct_lam = lam;
1939 #endif
1940                 almost_inline_direct_native = 1;
1941               }
1942             }
1943           }
1944         }
1945       }
1946       reorder_ok = 1;
1947     } else if (t > _scheme_values_types_) {
1948       /* We can re-order evaluation of the rator. */
1949       reorder_ok = 1;
1950     }
1951 
1952 #ifdef JIT_PRECISE_GC
1953     if (jitter->closure_self_on_runstack) {
1954       /* We can get this closure's pointer back from the Scheme stack. */
1955       if (nontail_self)
1956         direct_self = 1;
1957     }
1958 #endif
1959 
1960     if (direct_self)
1961       reorder_ok = 0; /* superseded by direct_self */
1962   }
1963 
1964   FOR_LOG(if (direct_native) { LOG_IT((" [direct]\n")); } )
1965 
1966   /* Direct native tail with same number of args as just received? */
1967   if (direct_native && is_tail && num_rands && !almost_inline_direct_native
1968       && (num_rands == jitter->self_lam->num_params)
1969       && !(SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_REST)) {
1970     /* Check whether the actual arguments refer to Scheme-stack
1971        locations that will be filled with argument values; that
1972        is, check how many arguments are already in place for
1973        the call. */
1974     mz_runstack_skipped(jitter, num_pushes);
1975     for (i = 0; i < num_rands; i++) {
1976       v = (alt_rands ? alt_rands[i+1] : app->args[i+1]);
1977       if (SAME_TYPE(SCHEME_TYPE(v), scheme_local_type)
1978           && !(SCHEME_GET_LOCAL_FLAGS(v) == SCHEME_LOCAL_OTHER_CLEARS)) {
1979         int pos;
1980         pos = mz_remap(SCHEME_LOCAL_POS(v));
1981         if (pos == (jitter->depth + jitter->extra_pushed + args_already_in_place))
1982           args_already_in_place++;
1983         else
1984           break;
1985       } else
1986         break;
1987     }
1988     mz_runstack_unskipped(jitter, num_pushes);
1989     if (args_already_in_place) {
1990       direct_native = 2;
1991       if (num_pushes)
1992         mz_runstack_skipped(jitter, args_already_in_place);
1993       num_rands -= args_already_in_place;
1994       if (num_pushes)
1995         num_pushes -= args_already_in_place;
1996     }
1997     LOG_IT((" [args in place: %d]\n", args_already_in_place));
1998   }
1999 
2000   if (inline_direct_native) {
2001     /* Look for very special case where arguments are so simple
2002        that we can move them directly into a couple of registers. */
2003     inline_direct_args = check_special_direct_args(app, alt_rands, num_rands, args_already_in_place);
2004   }
2005 
2006   if (num_rands) {
2007     if (inline_direct_args) {
2008       mz_runstack_skipped(jitter, num_pushes);
2009     } else if (!direct_prim || (num_rands > 1) || (no_call == 2)) {
2010       int skip_end = 0;
2011       if (direct_self && is_tail && !no_call && (num_rands > 0)) {
2012         /* last argument is kept in a register */
2013         skip_end = 1;
2014       }
2015       if (num_rands - skip_end > 0) {
2016         mz_rs_dec(num_rands-skip_end);
2017         CHECK_RUNSTACK_OVERFLOW();
2018         if (num_pushes)
2019           mz_runstack_pushed(jitter, num_pushes-skip_end);
2020         else
2021           scheme_extra_pushed(jitter, num_rands-skip_end);
2022       }
2023       need_safety = num_rands-skip_end;
2024       if (skip_end && num_pushes)
2025         mz_runstack_skipped(jitter, skip_end);
2026     } else {
2027       if (num_pushes)
2028         mz_runstack_skipped(jitter, 1);
2029     }
2030   }
2031 
2032   for (i = num_rands + args_already_in_place + 1; i--; ) {
2033     v = (alt_rands ? alt_rands[i] : app->args[i]);
2034     if (!scheme_is_simple(v, INIT_SIMPLE_DEPTH, 1, jitter, 0)) {
2035       need_non_tail = 1;
2036       break;
2037     }
2038   }
2039 
2040   if (need_non_tail) {
2041     offset = scheme_generate_non_tail_mark_pos_prefix(jitter);
2042     CHECK_LIMIT();
2043   } else
2044     offset = 0;
2045 
2046   if (!direct_prim && !reorder_ok && !direct_self) {
2047     if (need_safety && !scheme_is_non_gc(rator, INIT_SIMPLE_DEPTH)) {
2048       scheme_stack_safety(jitter, need_safety, offset);
2049       CHECK_LIMIT();
2050       need_safety = 0;
2051     }
2052 
2053     scheme_generate_non_tail(rator, jitter, 0, !need_non_tail, 0); /* sync'd after args below */
2054     CHECK_LIMIT();
2055 
2056     if (num_rands) {
2057       /* Save rator where GC can see it */
2058       Scheme_Type t;
2059       arg = (alt_rands
2060              ? alt_rands[1+args_already_in_place]
2061              : app->args[1+args_already_in_place]);
2062       t = SCHEME_TYPE(arg);
2063       if ((num_rands == 1) && ((SAME_TYPE(scheme_local_type, t)
2064                                 && (SCHEME_GET_LOCAL_TYPE(arg) != SCHEME_LOCAL_TYPE_FLONUM)
2065                                 && (SCHEME_GET_LOCAL_TYPE(arg) != SCHEME_LOCAL_TYPE_EXTFLONUM))
2066 			       || (t >= _scheme_values_types_))) {
2067 	/* App of something complex to a local variable. We
2068 	   can move the proc directly to V1. */
2069 	jit_movr_p(JIT_V1, JIT_R0);
2070 	proc_already_in_place = 1;
2071       } else {
2072 	mz_rs_stxi(num_rands - 1 + offset, JIT_R0);
2073         if (need_safety)
2074           need_safety--;
2075       }
2076     } else {
2077       jit_movr_p(JIT_V1, JIT_R0);
2078     }
2079   }
2080   /* not sync'd...*/
2081 
2082 #ifdef USE_FLONUM_UNBOXING
2083   if (direct_self && is_tail)
2084     direct_lam = jitter->self_lam;
2085 #endif
2086 
2087 #ifdef JIT_PRECISE_GC
2088   FOR_LOG(if (direct_lam) { LOG_IT((" [typed]\n")); } )
2089 #endif
2090 
2091 #ifdef USE_FLONUM_UNBOXING
2092   /* we want to push flonums into local storage in reverse order
2093      of evaluation, so make a pass to create space: */
2094   if (direct_lam
2095       && (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)) {
2096     for (i = num_rands; i--; ) {
2097       int extfl;
2098       extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place);
2099       if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i+args_already_in_place)) {
2100         /* make space: */
2101         scheme_generate_flonum_local_unboxing(jitter, 0, 1, extfl);
2102         CHECK_LIMIT();
2103       }
2104     }
2105   }
2106 #endif
2107 
2108   for (i = 0; i < num_rands; i++) {
2109     PAUSE_JIT_DATA();
2110     arg = (alt_rands
2111            ? alt_rands[i+1+args_already_in_place]
2112            : app->args[i+1+args_already_in_place]);
2113     if (need_safety && !scheme_is_non_gc(arg, INIT_SIMPLE_DEPTH)) {
2114       scheme_stack_safety(jitter, need_safety - i, offset + i);
2115       CHECK_LIMIT();
2116       need_safety = 0;
2117     }
2118 #ifdef USE_FLONUM_UNBOXING
2119     if (direct_lam)
2120       num_unsafe_struct_refs = 0;
2121     else
2122 #endif
2123       num_unsafe_struct_refs = detect_unsafe_struct_refs(arg, alt_rands, app, i, num_rands, 1+args_already_in_place);
2124     if (num_unsafe_struct_refs > 1) {
2125       /* Found a sequence of `(unsafed-struct-ref id 'number)` with
2126          sequential `number`s, so extract the whole group at once */
2127       v = (alt_rands
2128            ? alt_rands[i+1+args_already_in_place+num_unsafe_struct_refs-1]
2129            : app->args[i+1+args_already_in_place+num_unsafe_struct_refs-1]);
2130       mz_rs_sync();
2131       generate_unsafe_struct_ref_sequence(jitter, arg, v, num_unsafe_struct_refs, i + offset);
2132       CHECK_LIMIT();
2133       i += (num_unsafe_struct_refs - 1);
2134 #ifdef USE_FLONUM_UNBOXING
2135     } else if (direct_lam
2136                && (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)
2137                && (CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i+args_already_in_place)
2138                    || CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place))) {
2139       int directly;
2140       int extfl;
2141       extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place);
2142       jitter->unbox++;
2143       MZ_FPUSEL_STMT_ONLY(extfl, jitter->unbox_extflonum++);
2144       if (scheme_can_unbox_inline(arg, 5, JIT_FPUSEL_FPR_NUM(extfl)-1, 0, extfl))
2145         directly = 2;
2146       else if (scheme_can_unbox_directly(arg, extfl))
2147         directly = 1;
2148       else
2149         directly = 0;
2150       scheme_generate_unboxed(arg, jitter, directly, 1);
2151       MZ_FPUSEL_STMT_ONLY(extfl, --jitter->unbox_extflonum);
2152       --jitter->unbox;
2153       --jitter->unbox_depth;
2154       CHECK_LIMIT();
2155 
2156       /* use space made by scheme_generate_flonum_local_unboxing() above: */
2157       mz_st_fppop(jitter->flostack_offset - direct_flostack_offset,
2158                   MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0),
2159                   extfl);
2160       direct_flostack_offset += MZ_FPUSEL(extfl, 2 * sizeof(double), sizeof(double));
2161       CHECK_LIMIT();
2162 
2163       if (SAME_TYPE(SCHEME_TYPE(arg), scheme_local_type)) {
2164         /* Keep local Scheme_Object* view, in case a box has been allocated */
2165         int apos;
2166         apos = mz_remap(SCHEME_LOCAL_POS(arg));
2167         mz_rs_ldxi(JIT_R0, apos);
2168       } else {
2169         (void)jit_movi_p(JIT_R0, NULL);
2170       }
2171 #endif
2172     } else if (inline_direct_args) {
2173       if (inline_direct_args[i].gen)
2174           scheme_generate(arg, jitter, 0, 0, 0, inline_direct_args[i].reg, NULL, NULL);
2175     } else
2176       scheme_generate_non_tail(arg, jitter, 0, !need_non_tail, 0); /* sync'd below */
2177     RESUME_JIT_DATA();
2178     CHECK_LIMIT();
2179 
2180     if ((i == num_rands - 1) && !direct_prim && !reorder_ok && !direct_self && !proc_already_in_place) {
2181       /* Move rator back to register: */
2182       mz_rs_ldxi(JIT_V1, i + offset);
2183     }
2184     if ((!direct_prim || (num_rands > 1) || (no_call == 2))
2185         && (!direct_self || !is_tail || no_call || (i + 1 < num_rands))
2186         && !inline_direct_args) {
2187       int reg = mz_CURRENT_REG_STATUS_VALID();
2188       mz_rs_stxi(i + offset, JIT_R0);
2189       mz_SET_REG_STATUS_VALID(reg);
2190     }
2191   }
2192   /* not sync'd... */
2193 
2194   if (need_non_tail) {
2195     /* Uses JIT_R2: */
2196     scheme_generate_non_tail_mark_pos_suffix(jitter);
2197     CHECK_LIMIT();
2198   }
2199 
2200   if (direct_prim) {
2201     if (!no_call) {
2202       (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)rator)->prim_val);
2203       if (num_rands == 1) {
2204         if (num_pushes)
2205           mz_runstack_unskipped(jitter, 1);
2206       } else {
2207         mz_rs_sync();
2208         JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
2209       }
2210       LOG_IT(("direct: %s\n", ((Scheme_Primitive_Proc *)rator)->name));
2211     }
2212   }
2213 
2214   if (reorder_ok && !inline_direct_native) {
2215     if ((no_call < 2) && !apply_to_list) {
2216       scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL); /* sync'd below, or not */
2217     }
2218     CHECK_LIMIT();
2219   }
2220 
2221   if (!no_call)
2222     mz_rs_sync();
2223 
2224   END_JIT_DATA(20);
2225 
2226   if (direct_prim || direct_native || direct_self || nontail_self)
2227     scheme_direct_call_count++;
2228   else
2229     scheme_indirect_call_count++;
2230 
2231   if (direct_native && extract_case) {
2232     /* extract case from case-lambda */
2233     jit_ldxi_p(JIT_V1, JIT_V1, extract_case);
2234   }
2235 
2236   if (no_call) {
2237     /* leave actual call to inlining code */
2238   } else if (!(direct_self && is_tail)
2239              && !inline_direct_native
2240              && !almost_inline_direct_native
2241              && (num_rands >= MAX_SHARED_CALL_RANDS)) {
2242     LOG_IT(("<-many args\n"));
2243     if (is_tail) {
2244       scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
2245       if (direct_prim) {
2246         generate_direct_prim_tail_call(jitter, num_rands);
2247       } else {
2248         if (args_already_in_place) {
2249           jit_movi_l(JIT_R2, args_already_in_place);
2250           mz_set_local_p(JIT_R2, JIT_LOCAL2);
2251         }
2252 	scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1,
2253                                   NULL, NULL, NULL);
2254       }
2255     } else {
2256       if (direct_prim)
2257 	generate_direct_prim_non_tail_call(jitter, num_rands, multi_ok, 0);
2258       else {
2259         if (nontail_self) {
2260           generate_nontail_self_setup(jitter);
2261         }
2262 	scheme_generate_non_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs,
2263                                       multi_ok, result_ignored, nontail_self, 0, 1, 0, NULL);
2264       }
2265     }
2266   } else {
2267     /* Jump to code to implement a [tail-]call for `num_rands' arguments */
2268     void *code;
2269     int dp = (direct_prim ? 1 : (direct_native ? (1 + direct_native + (nontail_self ? 1 : 0)) : 0));
2270     /* if unboxed_non_tail_args, then we'll also use index 4 in place of dp */
2271 
2272     if (is_tail) {
2273       if (num_rands < MAX_SHARED_CALL_RANDS) {
2274         if (!sjc.shared_tail_code[dp][num_rands]) {
2275           code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2276                                              direct_prim, direct_native, 0, 0);
2277           sjc.shared_tail_code[dp][num_rands] = code;
2278         }
2279         code = sjc.shared_tail_code[dp][num_rands];
2280       } else {
2281         /* We won't use this code pointer, because `direct_self` or similar. */
2282         code = NULL;
2283       }
2284       CHECK_NESTED_GENERATE();
2285       if (direct_self) {
2286         LOG_IT(("<-self\n"));
2287 	generate_self_tail_call(rator, jitter, num_rands, code, args_already_in_place, direct_flostack_offset,
2288                                 app, alt_rands);
2289 	CHECK_LIMIT();
2290       } else if (inline_direct_native || almost_inline_direct_native) {
2291         LOG_IT(("<-native-tail\n"));
2292 #ifdef USE_FLONUM_UNBOXING
2293         /* Copy unboxed flonums into place where the target code expects them: */
2294         generate_fp_argument_shift(direct_flostack_offset, jitter, JIT_FP);
2295         CHECK_LIMIT();
2296 #endif
2297         scheme_mz_flostack_restore(jitter,
2298                                    FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
2299                                                            / FLOSTACK_SPACE_CHUNK),
2300                                    direct_flostack_offset,
2301                                    1, 1);
2302         /* move args and call function: */
2303         if (args_already_in_place) {
2304           jit_movi_l(JIT_R2, args_already_in_place);
2305           mz_set_local_p(JIT_R2, JIT_LOCAL2);
2306         }
2307         scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1,
2308                                   inline_direct_native, inline_direct_args,
2309 #ifdef USE_FLONUM_UNBOXING
2310                                   almost_inline_direct_native ? direct_lam : NULL
2311 #else
2312                                   NULL
2313 #endif
2314                                   );
2315         CHECK_LIMIT();
2316       } else {
2317         scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
2318         LOG_IT(("<-tail\n"));
2319         if (args_already_in_place) {
2320           jit_movi_l(JIT_R2, args_already_in_place);
2321           mz_set_local_p(JIT_R2, JIT_LOCAL2);
2322         }
2323         if (apply_to_list) {
2324           jit_movi_i(JIT_V1, num_rands);
2325           (void)jit_jmpi(sjc.apply_to_list_tail_code);
2326         } else {
2327           (void)jit_jmpi(code);
2328         }
2329       }
2330     } else {
2331       int mo = (multi_ok
2332                 ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
2333                 : SHARED_SINGLE_VALUE_CASE);
2334 #ifdef USE_FLONUM_UNBOXING
2335       void *unboxed_code;
2336 #endif
2337 
2338       if (unboxed_non_tail_args && !direct_flostack_offset)
2339         unboxed_non_tail_args = 0;
2340 
2341 #ifdef USE_FLONUM_UNBOXING
2342       if (unboxed_non_tail_args) {
2343         if (!sjc.shared_non_tail_code[4][num_rands][mo]) {
2344           scheme_ensure_retry_available(jitter, multi_ok, result_ignored);
2345           code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2346                                              direct_prim, direct_native, nontail_self, 1);
2347           sjc.shared_non_tail_code[4][num_rands][mo] = code;
2348         }
2349         unboxed_code = sjc.shared_non_tail_code[4][num_rands][mo];
2350         CHECK_NESTED_GENERATE();
2351       } else
2352         unboxed_code = NULL;
2353 #endif
2354 
2355       if (num_rands < MAX_SHARED_CALL_RANDS) {
2356         if (!sjc.shared_non_tail_code[dp][num_rands][mo]) {
2357           scheme_ensure_retry_available(jitter, multi_ok, result_ignored);
2358           code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2359                                              direct_prim, direct_native, nontail_self, 0);
2360           sjc.shared_non_tail_code[dp][num_rands][mo] = code;
2361         }
2362         code = sjc.shared_non_tail_code[dp][num_rands][mo];
2363       } else {
2364         /* Not used, due to `apply_to_list` */
2365         code = NULL;
2366       }
2367       LOG_IT(("<-non-tail %d %d %d\n", dp, num_rands, mo));
2368       CHECK_NESTED_GENERATE();
2369 
2370       if (nontail_self) {
2371         generate_nontail_self_setup(jitter);
2372       }
2373 
2374       if (apply_to_list) {
2375         jit_movi_i(JIT_V1, num_rands);
2376         if (multi_ok)
2377           (void)jit_calli(sjc.apply_to_list_multi_ok_code);
2378         else
2379           (void)jit_calli(sjc.apply_to_list_code);
2380       } else {
2381         GC_CAN_IGNORE jit_insn *refdone = NULL;
2382 
2383 #ifdef USE_FLONUM_UNBOXING
2384         if (unboxed_code) {
2385           generate_call_path_with_unboxes(jitter, direct_flostack_offset, unboxed_code, &refdone,
2386                                           num_rands, direct_lam, rator);
2387           CHECK_LIMIT();
2388         }
2389 #endif
2390 
2391         (void)jit_calli(code);
2392 
2393         if (refdone)
2394           mz_patch_branch(refdone);
2395       }
2396 
2397       if (direct_prim) {
2398         if (num_rands == 1) {
2399           /* Popped single argument after return of prim: */
2400           jitter->need_set_rs = 1;
2401         } else {
2402           /* Runstack is up-to-date: */
2403           jitter->need_set_rs = 0;
2404         }
2405       } else {
2406         /* Otherwise, we may have called native code, which may have left
2407            the runstack register out of sync with scheme_current_runstack. */
2408         jitter->need_set_rs = 1;
2409       }
2410     }
2411   }
2412 
2413   END_JIT_DATA(need_non_tail ? 22 : 4);
2414 
2415   return is_tail ? 2 : 1;
2416 }
2417 
2418 
detect_unsafe_struct_refs(Scheme_Object * arg,Scheme_Object ** alt_rands,Scheme_App_Rec * app,int i,int num_rands,int shift)2419 static int detect_unsafe_struct_refs(Scheme_Object *arg, Scheme_Object **alt_rands, Scheme_App_Rec *app,
2420                                      int i, int num_rands, int shift)
2421 /* Look for `(unsafe-struct[*]-ref id 'num)` ... as a sequence of
2422    arguments, which shows up as a result of `struct-copy`, and return
2423    the length of the sequence. Instead of performing each
2424    `unsafe-struct[*]-ref` separately, which can involve a chaperone test
2425    each time, we'll test once and extract all. */
2426 {
2427   Scheme_App3_Rec *app3, *next_app3;
2428   Scheme_Object *next_arg;
2429 
2430   if (SAME_TYPE(SCHEME_TYPE(arg), scheme_application3_type)) {
2431     app3 = (Scheme_App3_Rec *)arg;
2432     if ((SAME_OBJ(app3->rator, scheme_unsafe_struct_ref_proc)
2433          || SAME_OBJ(app3->rator, scheme_unsafe_struct_star_ref_proc))
2434         && SAME_TYPE(SCHEME_TYPE(app3->rand1), scheme_local_type)
2435         && SCHEME_INTP(app3->rand2)) {
2436       int seq = 1, delta = SCHEME_INT_VAL(app3->rand2) - i;
2437       i++;
2438       while (i < num_rands) {
2439         next_arg = (alt_rands ? alt_rands[i+shift] : app->args[i+shift]);
2440         if (SAME_TYPE(SCHEME_TYPE(next_arg), scheme_application3_type)) {
2441           next_app3 = (Scheme_App3_Rec *)next_arg;
2442           if ((SAME_OBJ(next_app3->rator, scheme_unsafe_struct_ref_proc)
2443                || SAME_OBJ(next_app3->rator, scheme_unsafe_struct_star_ref_proc))
2444               && SAME_TYPE(SCHEME_TYPE(next_app3->rand1), scheme_local_type)
2445               && SCHEME_INTP(next_app3->rand2)
2446               && (SCHEME_INT_VAL(next_app3->rand2) == i + delta)
2447               && (SCHEME_LOCAL_POS(next_app3->rand1) == SCHEME_LOCAL_POS(app3->rand1))) {
2448             seq++;
2449             i++;
2450           } else
2451             break;
2452         } else
2453           break;
2454       }
2455       return seq;
2456     }
2457   }
2458 
2459   return 0;
2460 }
2461 
generate_unsafe_struct_ref_sequence(mz_jit_state * jitter,Scheme_Object * arg,Scheme_Object * last_arg,int count,int stack_pos)2462 static int generate_unsafe_struct_ref_sequence(mz_jit_state *jitter, Scheme_Object *arg, Scheme_Object *last_arg,
2463                                                int count, int stack_pos)
2464 /* Implement a sequence discovered by `detect_unsafe_struct_refs()`. */
2465 {
2466   Scheme_App3_Rec *app3 = (Scheme_App3_Rec *)arg;
2467   int i, base = SCHEME_INT_VAL(app3->rand2);
2468   GC_CAN_IGNORE jit_insn *ref2;
2469 
2470   /* Using `last_arg` ensures that we clear the local, if needed */
2471   mz_runstack_skipped(jitter, 2);
2472   scheme_generate(((Scheme_App3_Rec *)last_arg)->rand1, jitter, 0, 0, 0, JIT_R0, NULL, NULL);
2473   CHECK_LIMIT();
2474   mz_runstack_unskipped(jitter, 2);
2475 
2476   /* Check for chaperones, and take slow path if found */
2477   __START_SHORT_JUMPS__(1);
2478   if (SAME_OBJ(app3->rator, scheme_unsafe_struct_ref_proc)) {
2479     GC_CAN_IGNORE jit_insn *ref, *refslow;
2480     jit_ldxi_s(JIT_R2, JIT_R0, &((Scheme_Object *)0x0)->type);
2481     ref = jit_bnei_i(jit_forward(), JIT_R2, scheme_chaperone_type);
2482     refslow = jit_get_ip();
2483     jit_addi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(stack_pos));
2484     jit_str_p(JIT_R1, JIT_R0);
2485     jit_movi_i(JIT_V1, base);
2486     jit_movi_i(JIT_R0, count);
2487     (void)jit_calli(sjc.struct_raw_refs_code);
2488     ref2 = jit_jmpi(jit_forward());
2489     mz_patch_branch(ref);
2490     (void)jit_beqi_i(refslow, JIT_R2, scheme_proc_chaperone_type);
2491     CHECK_LIMIT();
2492   } else
2493     ref2 = NULL;
2494 
2495   /* This is the fast path: */
2496   for (i = 0; i < count; i++) {
2497     jit_ldxi_p(JIT_R1, JIT_R0, (intptr_t)&(((Scheme_Structure *)0x0)->slots[i+base]));
2498     if (i != count - 1)
2499       mz_rs_stxi(stack_pos+i, JIT_R1);
2500     else
2501       jit_movr_p(JIT_R0, JIT_R1);
2502     CHECK_LIMIT();
2503   }
2504 
2505   if (ref2)
2506     mz_patch_branch(ref2);
2507   __END_SHORT_JUMPS__(1);
2508 
2509   return 1;
2510 }
2511 
2512 #endif
2513