1 #include "schpriv.h"
2 #include "schmach.h"
3 #include "future.h"
4
5 #ifdef MZ_USE_JIT
6
7 #include "jit.h"
8
9 #ifdef USE_FLONUM_UNBOXING
10 static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Lambda *lam,
11 int num_rands, int args_already_in_place,
12 int offset, int direct_flostack_offset,
13 int save_reg,
14 /* used only to skip unneeded checks: */
15 Scheme_App_Rec *app, Scheme_Object **alt_rands);
16 #endif
17
18 static int detect_unsafe_struct_refs(Scheme_Object *arg, Scheme_Object **alt_rands, Scheme_App_Rec *app,
19 int i, int num_rands, int shift);
20 static int generate_unsafe_struct_ref_sequence(mz_jit_state *jitter, Scheme_Object *arg, Scheme_Object *last_arg,
21 int count, int stack_pos);
22
23 int scheme_direct_call_count, scheme_indirect_call_count;
24
25 struct jit_direct_arg {
26 int gen;
27 int reg;
28 };
29
30 THREAD_LOCAL_DECL(static Scheme_Object **fixup_runstack_base);
31 THREAD_LOCAL_DECL(static int fixup_already_in_place);
32
clear_runstack(Scheme_Object ** rs,intptr_t amt,Scheme_Object * sv)33 static Scheme_Object *clear_runstack(Scheme_Object **rs, intptr_t amt, Scheme_Object *sv)
34 {
35 int i;
36 for (i = 0; i < amt; i++) {
37 rs[i] = NULL;
38 }
39 return sv;
40 }
41
42 #define JITCALL_TS_PROCS
43 #define JIT_APPLY_TS_PROCS
44 #include "jit_ts.c"
45
46 /*========================================================================*/
47 /* application codegen */
48 /*========================================================================*/
49
generate_proc_struct_retry(mz_jit_state * jitter,int num_rands,GC_CAN_IGNORE jit_insn * refagain)50 static jit_insn *generate_proc_struct_retry(mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *refagain)
51 {
52 GC_CAN_IGNORE jit_insn *ref2, *ref3, *refz1, *refz2, *refz3, *refy3, *refz4, *refz5;
53 GC_CAN_IGNORE jit_insn *refz6, *refz7, *refz8, *refz9, *ref9, *ref10;
54
55 ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_proc_struct_type);
56
57 /* This is an applicable struct. But if it's for reducing arity,
58 then we can't just apply the struct's procedure. */
59 jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Structure *)0x0)->stype);
60 jit_ldi_p(JIT_R2, &scheme_reduced_procedure_struct);
61 if (num_rands <= SCHEME_MAX_FAST_ARITY_CHECK) {
62 ref3 = jit_bner_p(jit_forward(), JIT_R1, JIT_R2);
63
64 /* Matches reduced arity in a simple way? */
65 jit_ldxi_p(JIT_R2, JIT_V1, &((Scheme_Structure *)0x0)->slots[1]);
66 refy3 = jit_bmci_l(jit_forward(), JIT_R2, 1); /* not a fixnum? */
67 refz3 = jit_bmci_l(jit_forward(), JIT_R2, (1 << (num_rands + 1)));
68
69 /* Yes, matches */
70 mz_patch_branch(ref3);
71 } else {
72 /* Too many arguments for fast check, so assume it desn't match */
73 refz3 = jit_beqr_p(jit_forward(), JIT_R1, JIT_R2);
74 refy3 = NULL;
75 }
76
77 /* It's an applicable struct that is not an arity reduce or the
78 arity matches. We can extract the procedure if it's in a field: */
79 jit_ldxi_p(JIT_R1, JIT_R1, &((Scheme_Struct_Type *)0x0)->proc_attr);
80 refz1 = jit_bmci_i(jit_forward(), JIT_R1, 0x1);
81 CHECK_LIMIT();
82
83 /* Proc is a field in the record */
84 jit_rshi_ul(JIT_R1, JIT_R1, 1);
85 jit_lshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
86 jit_addi_p(JIT_R1, JIT_R1, &((Scheme_Structure *)0x0)->slots);
87 jit_ldxr_p(JIT_R1, JIT_V1, JIT_R1);
88 CHECK_LIMIT();
89
90 /* JIT_R1 now has the wrapped procedure */
91 refz4 = jit_bmsi_i(jit_forward(), JIT_R1, 0x1);
92 refz2 = mz_bnei_t(jit_forward(), JIT_R1, scheme_native_closure_type, JIT_R2);
93 CHECK_LIMIT();
94
95 /* It's a native closure, but we can't just jump to it, in case
96 the arity is wrong and an error needs to be reported using
97 the original wrapper. */
98 mz_prepare(2);
99 jit_movi_i(JIT_R0, num_rands);
100 jit_pusharg_i(JIT_R0); /* argc */
101 jit_pusharg_p(JIT_R1); /* closure */
102 (void)mz_finish_unsynced_runstack(scheme_native_arity_check);
103 CHECK_LIMIT();
104 jit_retval(JIT_R0);
105 refz5 = jit_beqi_i(jit_forward(), JIT_R0, 0);
106 CHECK_LIMIT();
107
108 /* Extract proc again, then loop */
109 jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Structure *)0x0)->stype);
110 jit_ldxi_p(JIT_R1, JIT_R1, &((Scheme_Struct_Type *)0x0)->proc_attr);
111 jit_rshi_ul(JIT_R1, JIT_R1, 1);
112 jit_lshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
113 jit_addi_p(JIT_R1, JIT_R1, &((Scheme_Structure *)0x0)->slots);
114 jit_ldxr_p(JIT_V1, JIT_V1, JIT_R1);
115 (void)jit_jmpi(refagain);
116 CHECK_LIMIT();
117
118 mz_patch_branch(ref2);
119 /* check for a procedure impersonator that just keeps properties
120 or is the result of unsafe-{impersonate,chaperone}-procedure */
121 ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_proc_chaperone_type);
122 jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Chaperone *)0x0)->redirects);
123 refz6 = mz_bnei_t(jit_forward(), JIT_R1, scheme_vector_type, JIT_R2);
124 (void)jit_ldxi_l(JIT_R2, JIT_R1, &SCHEME_VEC_SIZE(0x0));
125 refz7 = jit_bmci_i(jit_forward(), JIT_R2, 0x1);
126 /* Flag is set for a property-only or unsafe chaperone: */
127 jit_ldxi_s(JIT_R2, JIT_V1, &SCHEME_CHAPERONE_FLAGS(((Scheme_Chaperone *)0x0)));
128 refz8 = jit_bmci_ul(jit_forward(), JIT_R2, SCHEME_PROC_CHAPERONE_CALL_DIRECT);
129 /* In the case of an unsafe chaperone, we can only make a direct
130 call if the arity-check will succeed, otherwise the error message
131 will use the wrong name. */
132 jit_ldxi_p(JIT_R2, JIT_R1, &(SCHEME_VEC_ELS(0x0)[1]));
133 ref9 = jit_beqi_p(jit_forward(), JIT_R2, scheme_false);
134 refz9 = jit_bnei_p(jit_forward(), JIT_R2, scheme_make_integer(num_rands));
135 mz_patch_branch(ref9);
136 CHECK_LIMIT();
137 /* If the vector is immutable, we need to provide the self proc,
138 if it's not provided already. The self proc is supplied through
139 a side channel in the thread record. */
140 jit_ldxi_s(JIT_R2, JIT_R1, &MZ_OPT_HASH_KEY((Scheme_Inclhash_Object *)(0x0)));
141 ref9 = jit_bmci_i(jit_forward(), JIT_R2, 0x1);
142 (void)mz_tl_ldi_p(JIT_R2, tl_scheme_current_thread);
143 jit_ldxi_l(JIT_R1, JIT_R2, &((Scheme_Thread *)0x0)->self_for_proc_chaperone);
144 ref10 = jit_bnei_p(jit_forward(), JIT_R1, NULL);
145 jit_stxi_l(&((Scheme_Thread *)0x0)->self_for_proc_chaperone, JIT_R2, JIT_V1);
146 mz_patch_branch(ref10);
147 jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Chaperone *)0x0)->redirects);
148 mz_patch_branch(ref9);
149 /* Position [0] in SCHEME_VEC_ELS contains either the
150 unwrapped function (if chaperone-procedure got #f
151 for the proc argument) or the unsafe-chaperone
152 replacement-proc argument; either way, just call it */
153 jit_ldxi_p(JIT_V1, JIT_R1, &(SCHEME_VEC_ELS(0x0)[0]));
154 (void)jit_jmpi(refagain);
155 CHECK_LIMIT();
156
157 mz_patch_branch(refz1);
158 mz_patch_branch(refz2);
159 mz_patch_branch(refz3);
160 if (refy3)
161 mz_patch_branch(refy3);
162 mz_patch_branch(refz4);
163 mz_patch_branch(refz5);
164 mz_patch_branch(refz6);
165 mz_patch_branch(refz7);
166 mz_patch_branch(refz8);
167 mz_patch_branch(refz9);
168
169 return ref2;
170 }
171
172 #ifdef INSTRUMENT_PRIMITIVES
173 extern int g_print_prims;
174 #endif
175
176 /* Support for intercepting direct calls to primitives: */
177 #ifdef MZ_USE_FUTURES
178
scheme_noncm_prim_indirect(Scheme_Prim proc,int argc)179 Scheme_Object *scheme_noncm_prim_indirect(Scheme_Prim proc, int argc)
180 XFORM_SKIP_PROC
181 {
182 if (scheme_use_rtcall)
183 return scheme_rtcall_iS_s("[prim_indirect]",
184 FSRC_PRIM,
185 proc,
186 argc,
187 MZ_RUNSTACK);
188 else
189 return proc(argc, MZ_RUNSTACK);
190 }
191
scheme_prim_indirect(Scheme_Primitive_Closure_Proc proc,int argc,Scheme_Object * self)192 Scheme_Object *scheme_prim_indirect(Scheme_Primitive_Closure_Proc proc, int argc, Scheme_Object *self)
193 XFORM_SKIP_PROC
194 {
195 if (scheme_use_rtcall)
196 return scheme_rtcall_iSs_s("[prim_indirect]", FSRC_PRIM, proc, argc, MZ_RUNSTACK, self);
197 else
198 return proc(argc, MZ_RUNSTACK, self);
199 }
200
201 #endif
202
203 /* Various specific 'futurized' versions of primitives that may
204 be invoked directly from JIT code and are not considered thread-safe
205 (are not invoked via apply_multi_from_native, etc.) */
206
207 #ifdef MZ_USE_FUTURES
ts__scheme_tail_apply_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)208 static Scheme_Object *ts__scheme_tail_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
209 XFORM_SKIP_PROC
210 {
211 if (scheme_use_rtcall) {
212 /* try thread-local allocation: */
213 Scheme_Object **a;
214 #ifdef MZ_PRECISE_GC
215 if (scheme_future_is_runtime_thread())
216 a = NULL;
217 else
218 a = MALLOC_N(Scheme_Object *, argc);
219 #else
220 a = NULL; /* future-local allocation is not supported */
221 #endif
222 if (a) {
223 Scheme_Thread *p = scheme_current_thread;
224 memcpy(a, argv, argc * sizeof(Scheme_Object*));
225 p->ku.apply.tail_rator = rator;
226 p->ku.apply.tail_num_rands = argc;
227 p->ku.apply.tail_rands = a;
228 return SCHEME_TAIL_CALL_WAITING;
229 } else
230 return scheme_rtcall_tail_apply(rator, argc, argv);
231 } else
232 return _scheme_tail_apply_from_native(rator, argc, argv);
233 }
234 #else
235 # define ts__scheme_tail_apply_from_native _scheme_tail_apply_from_native
236 #endif
237
_scheme_tail_apply_from_native_fixup_args(Scheme_Object * rator,int argc,Scheme_Object ** argv)238 static Scheme_Object *_scheme_tail_apply_from_native_fixup_args(Scheme_Object *rator,
239 int argc,
240 Scheme_Object **argv)
241 XFORM_SKIP_PROC
242 {
243 int already = fixup_already_in_place, i;
244 Scheme_Object **base;
245
246 base = fixup_runstack_base XFORM_OK_MINUS argc XFORM_OK_MINUS already;
247
248 /* Need to shift argc to end of base: */
249 for (i = 0; i < argc; i++) {
250 base[already + i] = argv[i];
251 }
252
253 /* In futures mode, it's important that the argument array matches
254 runstack: */
255 MZ_RUNSTACK = base;
256
257 return ts__scheme_tail_apply_from_native(rator, argc + already, base);
258 }
259
260 #if defined(MZ_USE_FUTURES) && defined(MZ_PRECISE_GC)
261
try_future_local_stack_overflow(Scheme_Object * rator,int argc,Scheme_Object ** argv,int multi)262 static Scheme_Object *try_future_local_stack_overflow(Scheme_Object *rator, int argc, Scheme_Object **argv, int multi)
263 XFORM_SKIP_PROC
264 {
265 if (SAME_TYPE(SCHEME_TYPE(rator), scheme_native_closure_type)
266 && scheme_can_apply_native_in_future(rator)) {
267 /* the only reason to get here is stack overflow,
268 either for the runstack or C stack */
269 return scheme_rtcall_apply_with_new_stack(rator, argc, argv, multi);
270 } else if (multi)
271 return ts__scheme_apply_multi_from_native(rator, argc, argv);
272 else
273 return ts__scheme_apply_from_native(rator, argc, argv);
274 }
275
x_ts__scheme_apply_multi_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)276 static Scheme_Object *x_ts__scheme_apply_multi_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
277 XFORM_SKIP_PROC
278 {
279 if (scheme_use_rtcall)
280 return try_future_local_stack_overflow(rator, argc, argv, 1);
281 else
282 return _scheme_apply_multi_from_native(rator, argc, argv);
283 }
284
x_ts__scheme_apply_from_native(Scheme_Object * rator,int argc,Scheme_Object ** argv)285 static Scheme_Object *x_ts__scheme_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv)
286 XFORM_SKIP_PROC
287 {
288 if (scheme_use_rtcall)
289 return try_future_local_stack_overflow(rator, argc, argv, 0);
290 else
291 return _scheme_apply_from_native(rator, argc, argv);
292 }
293
294 #else
295 # define x_ts__scheme_apply_multi_from_native ts__scheme_apply_multi_from_native
296 # define x_ts__scheme_apply_from_native ts__scheme_apply_from_native
297 #endif
298
generate_pause_for_gc_and_retry(mz_jit_state * jitter,int in_short_jumps,int gc_reg,GC_CAN_IGNORE jit_insn * refagain)299 static int generate_pause_for_gc_and_retry(mz_jit_state *jitter,
300 int in_short_jumps,
301 int gc_reg, /* must not be JIT_R1 */
302 GC_CAN_IGNORE jit_insn *refagain)
303 {
304 #ifdef MZ_USE_FUTURES
305 GC_CAN_IGNORE jit_insn *refslow = 0, *refpause;
306 int i;
307
308 mz_rs_sync();
309
310 /* expose gc_reg to GC */
311 mz_tl_sti_p(tl_jit_future_storage, gc_reg, JIT_R1);
312
313 /* Save non-preserved registers. Use a multiple of 4 to avoid
314 alignment problems. */
315 jit_pushr_l(JIT_R1);
316 jit_pushr_l(JIT_R2);
317 jit_pushr_l(JIT_R0);
318 jit_pushr_l(JIT_R0);
319 CHECK_LIMIT();
320
321 mz_tl_ldi_i(JIT_R0, tl_scheme_future_need_gc_pause);
322 refpause = jit_bgti_i(jit_forward(), JIT_R0, 0);
323
324 for (i = 0; i < 2; i++) {
325 /* Restore non-preserved registers, and also move the gc-exposed
326 register back. */
327 if (i == 1) {
328 mz_patch_branch(refpause);
329 JIT_UPDATE_THREAD_RSPTR();
330 jit_prepare(0);
331 mz_finish(scheme_future_gc_pause);
332 }
333 jit_popr_l(JIT_R0);
334 jit_popr_l(JIT_R0);
335 jit_popr_l(JIT_R2);
336 CHECK_LIMIT();
337 mz_tl_ldi_p(gc_reg, tl_jit_future_storage);
338 jit_movi_p(JIT_R1, NULL);
339 mz_tl_sti_p(tl_jit_future_storage, JIT_R1, JIT_R2);
340 jit_popr_l(JIT_R1);
341 CHECK_LIMIT();
342 if (!i)
343 refslow = jit_jmpi(jit_forward());
344 else
345 (void)jit_jmpi(refagain);
346 }
347
348 mz_patch_ucbranch(refslow);
349
350 return 1;
351 #else
352 return 1;
353 #endif
354 }
355
generate_direct_prim_tail_call(mz_jit_state * jitter,int num_rands)356 static int generate_direct_prim_tail_call(mz_jit_state *jitter, int num_rands)
357 {
358 /* JIT_V1 must have the target function pointer.
359 Also, scheme_current_runstack must be up-to-date...
360 unless num-rands == 1, in which case JIT_R0 must
361 have the argument. */
362 if (num_rands == 1) {
363 jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
364 CHECK_RUNSTACK_OVERFLOW();
365 jit_str_p(JIT_RUNSTACK, JIT_R0);
366 JIT_UPDATE_THREAD_RSPTR();
367 }
368 jit_movi_i(JIT_R1, num_rands);
369 mz_prepare_direct_prim(2); /* a prim takes 3 args, but a NONCM prim ignores the 3rd */
370 CHECK_LIMIT();
371 {
372 /* May use JIT_R0 and create local branch: */
373 mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
374 jit_pusharg_i(JIT_R1),
375 JIT_V1, scheme_noncm_prim_indirect);
376 }
377 CHECK_LIMIT();
378 /* Return: */
379 mz_pop_threadlocal();
380 mz_pop_locals();
381 jit_ret();
382
383 return 1;
384 }
385
386 #define NUM_AVAIL_DIRECT_ARG_REGS 3
387 static const int direct_arg_regs[] = { JIT_V1, JIT_R1, JIT_R0 };
388
scheme_generate_tail_call(mz_jit_state * jitter,int num_rands,int direct_native,int need_set_rs,int is_inline,Scheme_Native_Closure * direct_to_code,jit_direct_arg * direct_args,Scheme_Lambda * direct_lam)389 int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
390 int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_args,
391 Scheme_Lambda *direct_lam)
392 /* Proc is in V1 unless direct_to_code, args are at RUNSTACK.
393 If num_rands < 0, then argc is in LOCAL2 and arguments are already below RUNSTACK_BASE.
394 If direct_native == 2, then some arguments are already in place (shallower in the runstack
395 than the arguments to move).
396 If direct_args, then R0, R1, V1 hold arguments.
397 If direct lam in unboxing mode, slow path needs to box flonum arguments; num_rands
398 must be >= 0 */
399 {
400 int i, r2_has_runstack = 0;
401 GC_CAN_IGNORE jit_insn *top_refagain, *refagain, *ref, *ref2, *ref4, *ref5;
402
403 __START_SHORT_JUMPS__(num_rands < 100);
404
405 top_refagain = jit_get_ip();
406
407 /* First, try fast direct jump to native code: */
408 if (!direct_native) {
409 ref = jit_bmsi_ul(jit_forward(), JIT_V1, 0x1);
410 jit_ldxi_s(JIT_R1, JIT_V1, &((Scheme_Object *)0x0)->type);
411 ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_native_closure_type);
412 /* code at ref2 uses JIT_R1 */
413 CHECK_LIMIT();
414 } else {
415 ref = ref2 = NULL;
416 }
417
418 refagain = jit_get_ip();
419
420 /* Right kind of function. Extract data and check stack depth: */
421 if (!direct_to_code) {
422 jit_ldxi_p(JIT_R0, JIT_V1, &((Scheme_Native_Closure *)0x0)->code);
423 jit_ldxi_i(JIT_R2, JIT_R0, &((Scheme_Native_Lambda *)0x0)->max_let_depth);
424 mz_tl_ldi_p(JIT_R1, tl_MZ_RUNSTACK_START);
425 jit_subr_ul(JIT_R1, JIT_RUNSTACK, JIT_R1);
426 ref4 = jit_bltr_ul(jit_forward(), JIT_R1, JIT_R2);
427 CHECK_LIMIT();
428 } else
429 ref4 = NULL;
430
431 /* Fast jump ok (proc will check argc).
432 At this point, V1 = closure (unless direct_to_code) and R0 = code. */
433
434 /* Check for thread swap: */
435 if (!direct_to_code) {
436 (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
437 ref5 = jit_blei_i(jit_forward(), JIT_R2, 0);
438 #ifndef FUEL_AUTODECEREMENTS
439 jit_subi_p(JIT_R2, JIT_R2, 0x1);
440 (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
441 #endif
442 } else
443 ref5 = NULL;
444 CHECK_LIMIT();
445
446 /* Copy args to runstack base: */
447 if (num_rands >= 0) {
448 /* Fixed argc: */
449 if (num_rands) {
450 mz_ld_runstack_base_alt(JIT_R2);
451 jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands));
452 CHECK_RUNSTACK_OVERFLOW();
453 for (i = num_rands; i--; ) {
454 if (direct_args) {
455 int reg = direct_args[i].reg;
456 jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, reg);
457 } else {
458 jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(i));
459 jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, JIT_R1);
460 }
461 CHECK_LIMIT();
462 }
463 jit_movr_p(JIT_RUNSTACK, JIT_R2);
464 r2_has_runstack = 1;
465 } else {
466 #ifdef JIT_RUNSTACK_BASE
467 jit_movr_p(JIT_RUNSTACK, JIT_RUNSTACK_BASE);
468 #else
469 mz_get_local_p(JIT_RUNSTACK, JIT_RUNSTACK_BASE_LOCAL);
470 #endif
471 }
472 if (direct_native > 1) { /* => some_args_already_in_place */
473 mz_get_local_p(JIT_R1, JIT_LOCAL2);
474 jit_lshi_l(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
475 jit_subr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
476 r2_has_runstack = 0;
477 CHECK_RUNSTACK_OVERFLOW();
478 }
479 } else {
480 /* Variable argc (in LOCAL2):
481 arguments are already in place. */
482 }
483 /* RUNSTACK, RUNSTACK_BASE, V1 (unless direct_to_code), and R0 are ready */
484
485 /* Extract function and data: */
486 if (!direct_to_code) {
487 jit_movr_p(JIT_R2, JIT_V1);
488 r2_has_runstack = 0;
489 if (direct_native) {
490 jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Lambda *)0x0)->u.tail_code);
491 } else {
492 jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Lambda *)0x0)->arity_code);
493 }
494 jit_movr_p(JIT_R0, JIT_R2);
495 }
496 /* Set up arguments; JIT_RUNSTACK and JIT_RUNSTACK_BASE must also be ready */
497 if (num_rands >= 0) {
498 jit_movi_i(JIT_R1, num_rands);
499 if (direct_native > 1) { /* => some_args_already_in_place */
500 mz_get_local_p(JIT_R2, JIT_LOCAL2);
501 jit_addr_i(JIT_R1, JIT_R1, JIT_R2);
502 r2_has_runstack = 0;
503 }
504 } else {
505 mz_get_local_p(JIT_R1, JIT_LOCAL2);
506 }
507 if (!r2_has_runstack)
508 jit_movr_p(JIT_R2, JIT_RUNSTACK);
509 if (need_set_rs && !direct_to_code) {
510 /* In case arity check fails, need to update runstack now: */
511 JIT_UPDATE_THREAD_RSPTR();
512 }
513 if (direct_native && direct_to_code) {
514 __END_SHORT_JUMPS__(num_rands < 100);
515 /* load closure pointer into R0: */
516 scheme_mz_load_retained(jitter, JIT_R0, direct_to_code);
517 /* jump directly: */
518 (void)jit_jmpi(direct_to_code->code->u.tail_code);
519 /* no slow path in this mode */
520 return 1;
521 }
522 /* Now jump: */
523 jit_jmpr(JIT_V1);
524 CHECK_LIMIT();
525
526 if (!direct_native && !is_inline && (num_rands >= 0)) {
527 /* Handle simple applicable struct: */
528 mz_patch_branch(ref2);
529 /* uses JIT_R1: */
530 ref2 = generate_proc_struct_retry(jitter, num_rands, top_refagain);
531 CHECK_LIMIT();
532 }
533
534 /* The slow way: */
535 /* V1 and RUNSTACK must be intact! */
536 if (ref5)
537 mz_patch_branch(ref5);
538 generate_pause_for_gc_and_retry(jitter,
539 num_rands < 100, /* in short jumps */
540 JIT_V1, /* expose V1 to GC */
541 refagain); /* retry code pointer */
542 CHECK_LIMIT();
543 if (!direct_native) {
544 mz_patch_branch(ref);
545 mz_patch_branch(ref2);
546 }
547 if (ref4)
548 mz_patch_branch(ref4);
549 CHECK_LIMIT();
550 #ifdef USE_FLONUM_UNBOXING
551 if (direct_lam) {
552 if (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS) {
553 /* Need to box flonum arguments. Flonums are currently in the place where
554 the target function expects them unpacked from arguments. We need to save
555 JIT_V1. */
556 generate_argument_boxing(jitter, direct_lam,
557 num_rands, 0,
558 0, 0,
559 JIT_V1,
560 NULL, NULL);
561 CHECK_LIMIT();
562 mz_rs_sync();
563 scheme_mz_flostack_restore(jitter, 0, 0, 1, 0);
564 }
565 }
566 #endif
567 if (need_set_rs) {
568 JIT_UPDATE_THREAD_RSPTR();
569 }
570 if (direct_native > 1) { /* => some_args_already_in_place */
571 /* Need to shuffle argument lists. Since we can pass only
572 three arguments, use static variables for the others. */
573 mz_ld_runstack_base_alt(JIT_R1);
574 mz_tl_sti_p(tl_fixup_runstack_base, JIT_RUNSTACK_BASE_OR_ALT(JIT_R1), JIT_R0);
575 mz_get_local_p(JIT_R1, JIT_LOCAL2);
576 mz_tl_sti_l(tl_fixup_already_in_place, JIT_R1, JIT_R0);
577 }
578 if (num_rands >= 0) {
579 jit_movi_i(JIT_R0, num_rands);
580 } else {
581 mz_get_local_p(JIT_R0, JIT_LOCAL2);
582 }
583 /* Since we've overwritten JIT_RUNSTACK, if this is not shared
584 code, and if this is 3m, then the runstack no longer
585 has a pointer to the closure for this code. To ensure that
586 an appropriate return point exists, jump to static code
587 for the rest. (This is the slow path, anyway.) */
588 __END_SHORT_JUMPS__(num_rands < 100);
589 if (direct_native > 1) {
590 (void)jit_jmpi(sjc.finish_tail_call_fixup_code);
591 } else {
592 (void)jit_jmpi(sjc.finish_tail_call_code);
593 }
594
595 return 1;
596 }
597
scheme_generate_force_value_same_mark(mz_jit_state * jitter)598 int scheme_generate_force_value_same_mark(mz_jit_state *jitter)
599 {
600 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
601 (void)jit_movi_p(JIT_R0, SCHEME_TAIL_CALL_WAITING);
602 mz_prepare(1);
603 jit_pusharg_p(JIT_R0);
604 (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refr);
605 jit_retval(JIT_R0);
606 return 1;
607 }
608
scheme_generate_finish_apply(mz_jit_state * jitter)609 int scheme_generate_finish_apply(mz_jit_state *jitter)
610 {
611 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
612 (void)mz_finish_lwe(ts__scheme_apply_from_native, refr);
613 return 1;
614 }
615
scheme_generate_finish_tail_apply(mz_jit_state * jitter)616 int scheme_generate_finish_tail_apply(mz_jit_state *jitter)
617 {
618 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
619 (void)mz_finish_lwe(ts__scheme_tail_apply_from_native, refr);
620 return 1;
621 }
622
scheme_generate_finish_multi_apply(mz_jit_state * jitter)623 int scheme_generate_finish_multi_apply(mz_jit_state *jitter)
624 {
625 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
626 (void)mz_finish_lwe(ts__scheme_apply_multi_from_native, refr);
627 return 1;
628 }
629
scheme_generate_finish_tail_call(mz_jit_state * jitter,int direct_native)630 int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native)
631 {
632 mz_prepare(3);
633 CHECK_LIMIT();
634 jit_pusharg_p(JIT_RUNSTACK);
635 jit_pusharg_i(JIT_R0);
636 jit_pusharg_p(JIT_V1);
637 if (direct_native > 1) { /* => some_args_already_in_place */
638 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
639 (void)mz_finish_lwe(_scheme_tail_apply_from_native_fixup_args, refr);
640 } else {
641 GC_CAN_IGNORE jit_insn *refr USED_ONLY_FOR_FUTURES;
642 (void)mz_finish_lwe(ts__scheme_tail_apply_from_native, refr);
643 }
644 CHECK_LIMIT();
645 /* Return: */
646 mz_pop_threadlocal();
647 mz_pop_locals();
648 jit_ret();
649
650 return 1;
651 }
652
generate_direct_prim_non_tail_call(mz_jit_state * jitter,int num_rands,int multi_ok,int pop_and_jump)653 static int generate_direct_prim_non_tail_call(mz_jit_state *jitter, int num_rands, int multi_ok, int pop_and_jump)
654 {
655 /* See generate_prim_non_tail_call for assumptions. */
656
657 if (pop_and_jump) {
658 mz_prolog(JIT_R1);
659 }
660
661 if (num_rands == 1) {
662 jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
663 CHECK_RUNSTACK_OVERFLOW();
664 jit_str_p(JIT_RUNSTACK, JIT_R0);
665 JIT_UPDATE_THREAD_RSPTR();
666 }
667
668 jit_movi_i(JIT_R1, num_rands);
669 mz_prepare_direct_prim(2); /* a prim takes 3 args, but a NONCM prim ignores the 3rd */
670 CHECK_LIMIT();
671 {
672 /* May use JIT_R0 and create local branch: */
673 mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
674 jit_pusharg_i(JIT_R1),
675 JIT_V1, scheme_noncm_prim_indirect);
676 }
677 CHECK_LIMIT();
678 jit_retval(JIT_R0);
679 VALIDATE_RESULT(JIT_R0);
680 /* No need to check for multi values or tail-call, because
681 we only use this for noncm primitives. */
682
683 if (num_rands == 1) {
684 jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1));
685 jitter->need_set_rs = 1;
686 }
687
688 if (pop_and_jump) {
689 mz_epilog(JIT_V1);
690 }
691
692 return 1;
693 }
694
generate_retry_call(mz_jit_state * jitter,int num_rands,int multi_ok,int result_ignored,GC_CAN_IGNORE jit_insn * reftop)695 static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok, int result_ignored,
696 GC_CAN_IGNORE jit_insn *reftop)
697 /* If num_rands < 0, original argc is in V1, and we should
698 pop argc arguments off runstack before pushing more (unless num_rands == -3).
699 This function is called with short jumps enabled. */
700 {
701 GC_CAN_IGNORE jit_insn *ref, *ref2, *refloop;
702
703 if (!reftop) {
704 int mo = (multi_ok
705 ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
706 : SHARED_SINGLE_VALUE_CASE);
707 reftop = sjc.shared_non_tail_retry_code[mo];
708 }
709
710 /* Get new argc: */
711 (void)mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread);
712 jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
713 /* Thread is in R1. New argc is in R2. Old argc to cancel may be in V1. */
714
715 /* Enough room on runstack? */
716 mz_tl_ldi_p(JIT_R0, tl_MZ_RUNSTACK_START);
717 jit_subr_ul(JIT_R0, JIT_RUNSTACK, JIT_R0); /* R0 is space left (in bytes) */
718 if ((num_rands < 0) && (num_rands != -3)) {
719 jit_subr_l(JIT_R2, JIT_R2, JIT_V1);
720 }
721 jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE);
722 ref = jit_bltr_ul(jit_forward(), JIT_R0, JIT_R2);
723 CHECK_LIMIT();
724
725 /* Yes, there's enough room. Adjust the runstack. */
726 jit_subr_l(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R2);
727 CHECK_RUNSTACK_OVERFLOW();
728
729 /* Copy arguments to runstack, then jump to reftop. */
730 jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
731 jit_ldxi_p(JIT_V1, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_rands);
732 jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE);
733 CHECK_LIMIT();
734 refloop = jit_get_ip();
735 ref2 = jit_blei_l(jit_forward(), JIT_R2, 0);
736 jit_subi_l(JIT_R2, JIT_R2, JIT_WORD_SIZE);
737 jit_ldxr_p(JIT_R0, JIT_V1, JIT_R2);
738 jit_stxr_p(JIT_R2, JIT_RUNSTACK, JIT_R0);
739 (void)jit_jmpi(refloop);
740 CHECK_LIMIT();
741
742 /* Clear tail-call arguments pointer: */
743 jit_stixi_p(&((Scheme_Thread *)0x0)->ku.apply.tail_rands, JIT_R1, NULL);
744 CHECK_LIMIT();
745
746 /* R1 is still the thread.
747 Put procedure and argc in place, then jump to apply: */
748 mz_patch_branch(ref2);
749 jit_ldxi_p(JIT_V1, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_rator);
750 (void)jit_movi_p(JIT_R0, NULL);
751 jit_stxi_p(&((Scheme_Thread *)0x0)->ku.apply.tail_rator, JIT_R1, JIT_R0);
752 jit_ldxi_l(JIT_R0, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
753 __END_SHORT_JUMPS__(1);
754 (void)jit_jmpi(reftop);
755 __START_SHORT_JUMPS__(1);
756
757 /* Slow path; restore R0 to SCHEME_TAIL_CALL_WAITING */
758 mz_patch_branch(ref);
759 (void)jit_movi_p(JIT_R0, SCHEME_TAIL_CALL_WAITING);
760
761 return 1;
762 }
763
generate_clear_previous_args(mz_jit_state * jitter,int num_rands)764 static int generate_clear_previous_args(mz_jit_state *jitter, int num_rands)
765 {
766 if (num_rands >= 0) {
767 int i;
768 for (i = 0; i < num_rands; i++) {
769 jit_stxi_p(WORDS_TO_BYTES(i), JIT_RUNSTACK, JIT_RUNSTACK);
770 CHECK_LIMIT();
771 }
772 } else {
773 /* covered by generate_clear_slow_previous_args */
774 }
775 return 1;
776 }
777
generate_clear_slow_previous_args(mz_jit_state * jitter)778 static int generate_clear_slow_previous_args(mz_jit_state *jitter)
779 {
780 CHECK_LIMIT();
781 mz_prepare(3);
782 jit_pusharg_p(JIT_R0);
783 jit_pusharg_l(JIT_V1);
784 jit_pusharg_l(JIT_RUNSTACK);
785 (void)mz_finish(clear_runstack);
786 jit_retval(JIT_R0);
787 return 1;
788 }
789
generate_ignored_result_check(mz_jit_state * jitter)790 static int generate_ignored_result_check(mz_jit_state *jitter)
791 {
792 /* if multiple results, need to clear ignored result in thread */
793 GC_CAN_IGNORE jit_insn *refm;
794
795 __START_INNER_TINY__(1);
796
797 refm = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
798 mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread);
799 jit_stixi_p(&((Scheme_Thread *)0x0)->ku.multiple.array, JIT_R1, NULL);
800 (void)jit_movi_p(JIT_R0, scheme_void);
801 mz_patch_branch(refm);
802
803 __END_INNER_TINY__(1);
804
805 return 1;
806 }
807
scheme_generate_non_tail_call(mz_jit_state * jitter,int num_rands,int direct_native,int need_set_rs,int multi_ok,int result_ignored,int nontail_self,int pop_and_jump,int is_inlined,int unboxed_args,jit_insn * reftop)808 int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
809 int multi_ok, int result_ignored, int nontail_self, int pop_and_jump,
810 int is_inlined, int unboxed_args, jit_insn *reftop)
811 {
812 /* Non-tail call.
813 Proc is in V1, args are at RUNSTACK.
814 If nontail_self, then R0 has proc pointer, and R2 has max_let_depth.
815 If unboxed_args, LOCAL3 holds address with argument-copying code,
816 where R2 is set before jumping to the old FP, and R1 holds
817 return address back here, and V1 and R0 must be preserved;
818 num_rands >= 0 in this case, and the "slow path" returns NULL.
819 If num_rands < 0, then argc is in R0, and
820 if num_rands != -3, need to pop runstack before returning.
821 If num_rands == -1 or -3, skip prolog. */
822 GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9;
823 GC_CAN_IGNORE jit_insn *ref10, *refagain, *top_refagain;
824 GC_CAN_IGNORE jit_insn *refrts USED_ONLY_FOR_FUTURES;
825 #ifndef FUEL_AUTODECEREMENTS
826 GC_CAN_IGNORE jit_insn *ref11;
827 #endif
828
829 CHECK_RUNSTACK_OVERFLOW();
830
831 __START_SHORT_JUMPS__(1);
832
833 if (pop_and_jump) {
834 if ((num_rands != -1) && (num_rands != -3)) {
835 mz_prolog(JIT_R1);
836 } else if (!reftop) {
837 reftop = jit_get_ip();
838 }
839 }
840
841 top_refagain = jit_get_ip();
842
843 /* Check for inlined native type */
844 if (!direct_native) {
845 ref = jit_bmsi_ul(jit_forward(), JIT_V1, 0x1);
846 jit_ldxi_s(JIT_R1, JIT_V1, &((Scheme_Object *)0x0)->type);
847 ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_native_closure_type);
848 /* code at ref2 uses JIT_R1 */
849 CHECK_LIMIT();
850 } else {
851 ref = ref2 = NULL;
852 }
853
854 refagain = jit_get_ip();
855
856 /* Before inlined native, check max let depth */
857 if (!nontail_self) {
858 jit_ldxi_p(JIT_R2, JIT_V1, &((Scheme_Native_Closure *)0x0)->code);
859 jit_ldxi_i(JIT_R2, JIT_R2, &((Scheme_Native_Lambda *)0x0)->max_let_depth);
860 }
861 mz_tl_ldi_p(JIT_R1, tl_MZ_RUNSTACK_START);
862 jit_subr_ul(JIT_R1, JIT_RUNSTACK, JIT_R1);
863 ref4 = jit_bltr_ul(jit_forward(), JIT_R1, JIT_R2);
864 CHECK_LIMIT();
865
866 /* Before inlined native, check stack depth: */
867 (void)mz_tl_ldi_p(JIT_R1, tl_scheme_jit_stack_boundary); /* assumes USE_STACK_BOUNDARY_VAR */
868 ref9 = jit_bltr_ul(jit_forward(), JIT_SP, JIT_R1); /* assumes down-growing stack */
869 CHECK_LIMIT();
870
871 #ifndef FUEL_AUTODECEREMENTS
872 /* Finally, check for thread swap: */
873 (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
874 ref11 = jit_blei_i(jit_forward(), JIT_R2, 0);
875 jit_subi_p(JIT_R2, JIT_R2, 0x1);
876 (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
877 #endif
878
879 /* Fast inlined-native jump ok (proc will check argc, if necessary) */
880 {
881 GC_CAN_IGNORE jit_insn *refr;
882 #if defined(MZ_USE_JIT_I386) || defined(MZ_USE_JIT_ARM)
883 # define KEEP_CALL_AND_RETURN_PAIRED
884 #endif
885 #ifdef KEEP_CALL_AND_RETURN_PAIRED
886 GC_CAN_IGNORE jit_insn *refxr;
887 #endif
888 if (num_rands < 0) {
889 /* We need to save argc to clear and manually pop the
890 runstack. So move V1 to R2 and move R0 to V1: */
891 jit_movr_p(JIT_R2, JIT_V1);
892 jit_movr_p(JIT_V1, JIT_R0);
893 }
894 if (unboxed_args) {
895 jit_movr_p(JIT_R2, JIT_FP); /* save old FP */
896 }
897 jit_shuffle_saved_regs(); /* maybe copies V registers to be restored */
898 #ifdef KEEP_CALL_AND_RETURN_PAIRED
899 /* keep call & ret paired (for branch prediction) by jumping to where
900 we really want to return, then back here: */
901 refr = jit_jmpi(jit_forward());
902 refxr = jit_get_ip();
903 jit_base_prolog();
904 #else
905 refr = jit_patchable_movi_p(JIT_R1, jit_forward());
906 _jit_prolog_again(jitter, NATIVE_ARG_COUNT, JIT_R1); /* saves V registers (or copied V registers) */
907 #endif
908 if (num_rands >= 0) {
909 if (nontail_self) { jit_movr_p(JIT_R1, JIT_R0); }
910 jit_movr_p(JIT_R0, JIT_V1); /* closure */
911 if (!nontail_self && !unboxed_args) {
912 /* nontail_self is only enabled when there are no rest args: */
913 jit_movi_i(JIT_R1, num_rands); /* argc */
914 jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
915 }
916 jit_addi_p(JIT_RUNSTACK_BASE_OR_ALT(JIT_V1), JIT_RUNSTACK, WORDS_TO_BYTES(num_rands));
917 mz_st_runstack_base_alt(JIT_V1);
918 } else {
919 /* R2 is closure, V1 is argc */
920 jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
921 jit_addr_p(JIT_RUNSTACK_BASE_OR_ALT(JIT_R0), JIT_RUNSTACK, JIT_R1);
922 mz_st_runstack_base_alt(JIT_R0);
923 jit_movr_p(JIT_R0, JIT_R2); /* closure */
924 jit_movr_i(JIT_R1, JIT_V1); /* argc */
925 jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
926 }
927 CHECK_LIMIT();
928 if (unboxed_args) {
929 /* old FP is still in R2 */
930 mz_get_local_p_x(JIT_V1, JIT_LOCAL3, JIT_R2);
931 }
932 mz_push_locals();
933 mz_repush_threadlocal();
934 if (unboxed_args) {
935 GC_CAN_IGNORE jit_insn *refrr;
936 refrr = jit_patchable_movi_p(JIT_R1, jit_forward());
937 jit_jmpr(JIT_V1);
938 jit_patch_movi(refrr, jit_get_ip());
939 jit_movi_i(JIT_R1, num_rands); /* argc */
940 jit_movr_p(JIT_R2, JIT_RUNSTACK); /* argv */
941 }
942 if (!nontail_self) {
943 jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code);
944 if (direct_native) {
945 jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Lambda *)0x0)->u.tail_code);
946 } else {
947 jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Lambda *)0x0)->arity_code);
948 if (need_set_rs) {
949 /* In case arity check fails, need to update runstack now: */
950 JIT_UPDATE_THREAD_RSPTR();
951 }
952 }
953 jit_jmpr(JIT_V1); /* callee restores (copied) V registers, etc. */
954 } else {
955 /* self-call function pointer is in R1 */
956 jit_jmpr(JIT_R1);
957 }
958 #ifdef KEEP_CALL_AND_RETURN_PAIRED
959 mz_patch_ucbranch(refr);
960 (void)jit_short_calli(refxr);
961 #else
962 jit_patch_movi(refr, jit_get_ip());
963 #endif
964 jit_unshuffle_saved_regs(); /* maybe uncopies V registers */
965 /* If num_rands < 0, then V1 has argc */
966 }
967 CHECK_LIMIT();
968 jit_retval(JIT_R0);
969 VALIDATE_RESULT(JIT_R0);
970
971 /* Fast common-case return */
972 if (pop_and_jump) {
973 GC_CAN_IGNORE jit_insn *refc;
974 __START_INNER_TINY__(1);
975 refc = jit_blei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
976 __END_INNER_TINY__(1);
977 if ((num_rands < 0) && (num_rands != -3)) {
978 /* At this point, argc must be in V1 */
979 jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
980 jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
981 }
982 if (pop_and_jump) {
983 mz_epilog(JIT_V1);
984 }
985 __START_INNER_TINY__(1);
986 mz_patch_branch(refc);
987 __END_INNER_TINY__(1);
988 CHECK_LIMIT();
989 }
990
991 if (!multi_ok) {
992 GC_CAN_IGNORE jit_insn *refm;
993 __END_SHORT_JUMPS__(1);
994 refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
995 mz_patch_branch_at(refm, sjc.bad_result_arity_code);
996 __START_SHORT_JUMPS__(1);
997 }
998 ref6 = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_TAIL_CALL_WAITING);
999 generate_clear_previous_args(jitter, num_rands);
1000 CHECK_LIMIT();
1001 if (pop_and_jump) {
1002 /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
1003 generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
1004 }
1005 CHECK_LIMIT();
1006 if (need_set_rs) {
1007 JIT_UPDATE_THREAD_RSPTR();
1008 }
1009 if (num_rands < 0) {
1010 generate_clear_slow_previous_args(jitter);
1011 CHECK_LIMIT();
1012 }
1013 mz_prepare(1);
1014 jit_pusharg_p(JIT_R0);
1015 if (multi_ok) {
1016 (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts);
1017 } else {
1018 (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts);
1019 }
1020 ref5 = jit_jmpi(jit_forward());
1021 CHECK_LIMIT();
1022
1023 /* Maybe it's a prim? */
1024 if (!direct_native) {
1025 mz_patch_branch(ref2);
1026 ref2 = jit_bnei_i(jit_forward(), JIT_R1, scheme_prim_type);
1027 /* It's a prim. Arity check... fast path when exactly equal to min, only: */
1028 jit_ldxi_i(JIT_R2, JIT_V1, &((Scheme_Primitive_Proc *)0x0)->mina);
1029 if (num_rands >= 0) {
1030 ref7 = jit_bnei_i(jit_forward(), JIT_R2, num_rands);
1031 } else {
1032 ref7 = jit_bner_i(jit_forward(), JIT_R2, JIT_R0);
1033 }
1034 /* Fast prim application */
1035 jit_ldxi_p(JIT_R1, JIT_V1, &((Scheme_Primitive_Proc *)0x0)->prim_val);
1036 if (need_set_rs) {
1037 JIT_UPDATE_THREAD_RSPTR();
1038 }
1039 mz_prepare_direct_prim(3);
1040 jit_pusharg_p(JIT_V1);
1041 CHECK_LIMIT();
1042 if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
1043 {
1044 __END_SHORT_JUMPS__(1);
1045 /* May use JIT_R0 and create local branch: */
1046 mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK),
1047 jit_pusharg_i(JIT_R2),
1048 JIT_R1, scheme_prim_indirect);
1049 __START_SHORT_JUMPS__(1);
1050 }
1051 CHECK_LIMIT();
1052 jit_retval(JIT_R0);
1053 VALIDATE_RESULT(JIT_R0);
1054 if (!multi_ok) {
1055 GC_CAN_IGNORE jit_insn *refm;
1056 __END_SHORT_JUMPS__(1);
1057 refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
1058 mz_patch_branch_at(refm, sjc.bad_result_arity_code);
1059 __START_SHORT_JUMPS__(1);
1060 }
1061 ref10 = jit_bnei_p(jit_forward(), JIT_R0, SCHEME_TAIL_CALL_WAITING);
1062 generate_clear_previous_args(jitter, num_rands);
1063 CHECK_LIMIT();
1064 if (pop_and_jump) {
1065 /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
1066 generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
1067 }
1068 CHECK_LIMIT();
1069 if (num_rands < 0) {
1070 generate_clear_slow_previous_args(jitter);
1071 CHECK_LIMIT();
1072 }
1073 mz_prepare(1);
1074 jit_pusharg_p(JIT_R0);
1075 if (multi_ok) {
1076 (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts);
1077 } else {
1078 (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts);
1079 }
1080 CHECK_LIMIT();
1081 ref8 = jit_jmpi(jit_forward());
1082
1083 /* Check for simple applicable struct wrapper */
1084 if (!is_inlined && (num_rands >= 0)) {
1085 mz_patch_branch(ref2);
1086 /* uses JIT_R1 */
1087 ref2 = generate_proc_struct_retry(jitter, num_rands, top_refagain);
1088 CHECK_LIMIT();
1089 }
1090 } else {
1091 ref2 = ref7 = ref8 = ref10 = NULL;
1092 }
1093
1094 /* The slow way: */
1095 mz_patch_branch(ref9);
1096 if (!unboxed_args) {
1097 generate_pause_for_gc_and_retry(jitter,
1098 1, /* in short jumps */
1099 JIT_V1, /* expose V1 to GC */
1100 refagain); /* retry code pointer */
1101 }
1102
1103 CHECK_LIMIT();
1104 if (!direct_native) {
1105 mz_patch_branch(ref);
1106 mz_patch_branch(ref2);
1107 mz_patch_branch(ref7);
1108 }
1109 mz_patch_branch(ref4);
1110 #ifndef FUEL_AUTODECEREMENTS
1111 mz_patch_branch(ref11);
1112 #endif
1113 if (unboxed_args) {
1114 /* no slow path here; return NULL to box arguments fall back to generic */
1115 (void)jit_movi_p(JIT_R0, NULL);
1116 if (pop_and_jump) {
1117 mz_epilog(JIT_V1);
1118 }
1119 } else {
1120 /* normal slow path: */
1121 if (need_set_rs) {
1122 JIT_UPDATE_THREAD_RSPTR();
1123 }
1124 if (num_rands >= 0) {
1125 jit_movi_i(JIT_R0, num_rands);
1126 }
1127 mz_prepare(3);
1128 CHECK_LIMIT();
1129 jit_pusharg_p(JIT_RUNSTACK);
1130 jit_pusharg_i(JIT_R0);
1131 jit_pusharg_p(JIT_V1);
1132 if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */
1133 if (multi_ok) {
1134 (void)mz_finish_lwe(x_ts__scheme_apply_multi_from_native, refrts);
1135 } else {
1136 (void)mz_finish_lwe(x_ts__scheme_apply_from_native, refrts);
1137 }
1138 CHECK_LIMIT();
1139 }
1140
1141 mz_patch_ucbranch(ref5);
1142 if (!direct_native) {
1143 mz_patch_ucbranch(ref8);
1144 }
1145 jit_retval(JIT_R0);
1146 VALIDATE_RESULT(JIT_R0);
1147 mz_patch_branch(ref6);
1148 if (!direct_native) {
1149 mz_patch_branch(ref10);
1150 if (result_ignored) {
1151 generate_ignored_result_check(jitter);
1152 CHECK_LIMIT();
1153 }
1154 }
1155 /* Note: same return code is above for faster common-case return */
1156 if ((num_rands < 0) && (num_rands != -3)) {
1157 /* At this point, argc must be in V1 */
1158 jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
1159 jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
1160 }
1161 if (pop_and_jump) {
1162 mz_epilog(JIT_V1);
1163 }
1164 CHECK_LIMIT();
1165
1166 __END_SHORT_JUMPS__(1);
1167
1168 return 1;
1169 }
1170
1171 #ifdef USE_FLONUM_UNBOXING
generate_argument_boxing(mz_jit_state * jitter,Scheme_Lambda * lam,int num_rands,int args_already_in_place,int offset,int direct_flostack_offset,int save_reg,Scheme_App_Rec * app,Scheme_Object ** alt_rands)1172 static int generate_argument_boxing(mz_jit_state *jitter, Scheme_Lambda *lam,
1173 int num_rands, int args_already_in_place,
1174 int offset, int direct_flostack_offset,
1175 int save_reg,
1176 /* used only to skip unneeded checks: */
1177 Scheme_App_Rec *app, Scheme_Object **alt_rands)
1178 {
1179 int i, arg_tmp_offset;
1180 Scheme_Object *rand;
1181
1182 arg_tmp_offset = offset - direct_flostack_offset;
1183 for (i = num_rands; i--; ) {
1184 int extfl;
1185 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(lam, i + args_already_in_place);
1186 if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(lam, i + args_already_in_place)) {
1187 rand = (alt_rands
1188 ? alt_rands[i+1+args_already_in_place]
1189 : (app
1190 ? app->args[i+1+args_already_in_place]
1191 : NULL));
1192 arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1193 /* Boxing definitely isn't needed if the value was from a local that doesn't hold
1194 an unboxed value, otherwise we generate code to check dynamically. */
1195 if (!rand
1196 || !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)
1197 || (!extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM))
1198 || (extfl && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM))) {
1199 GC_CAN_IGNORE jit_insn *iref;
1200 int aoffset;
1201 aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
1202 if (save_reg == JIT_R0) {
1203 if (i != num_rands - 1)
1204 mz_pushr_p(JIT_R0);
1205 } else {
1206 mz_pushr_p(JIT_V1);
1207 }
1208 if (!rand || SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
1209 /* assert: !rand or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FLONUM
1210 or SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_EXTFLONUM */
1211 /* So, we have to check for an existing box */
1212 if ((save_reg != JIT_R0) || (i != num_rands - 1))
1213 mz_rs_ldxi(JIT_R0, i+1);
1214 mz_rs_sync();
1215 __START_TINY_JUMPS__(1);
1216 iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1217 __END_TINY_JUMPS__(1);
1218 } else
1219 iref = NULL;
1220 jit_movi_l(JIT_R0, aoffset);
1221 mz_rs_sync();
1222 MZ_FPUSEL_STMT(extfl,
1223 (void)jit_calli(sjc.box_extflonum_from_stack_code),
1224 (void)jit_calli(sjc.box_flonum_from_stack_code));
1225 if ((save_reg != JIT_R0) || (i != num_rands - 1))
1226 mz_rs_stxi(i+1, JIT_R0);
1227 if (iref) {
1228 __START_TINY_JUMPS__(1);
1229 mz_patch_branch(iref);
1230 __END_TINY_JUMPS__(1);
1231 }
1232 CHECK_LIMIT();
1233 if (save_reg == JIT_R0) {
1234 if (i != num_rands - 1)
1235 mz_popr_p(JIT_R0);
1236 } else {
1237 mz_popr_p(JIT_V1);
1238 }
1239 }
1240 }
1241 }
1242
1243 return 1;
1244 }
1245 #endif
1246
generate_self_tail_call(Scheme_Object * rator,mz_jit_state * jitter,int num_rands,GC_CAN_IGNORE jit_insn * slow_code,int args_already_in_place,int direct_flostack_offset,Scheme_App_Rec * app,Scheme_Object ** alt_rands)1247 static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *slow_code,
1248 int args_already_in_place, int direct_flostack_offset,
1249 Scheme_App_Rec *app, Scheme_Object **alt_rands)
1250 /* Last argument is in R0 */
1251 {
1252 GC_CAN_IGNORE jit_insn *refslow, *refagain;
1253 int i;
1254 int jmp_tiny USED_ONLY_SOMETIMES;
1255 int jmp_short USED_ONLY_SOMETIMES;
1256 int closure_size = jitter->self_closure_size;
1257 int space, offset;
1258 #ifdef USE_FLONUM_UNBOXING
1259 int arg_offset = 0, arg_tmp_offset;
1260 Scheme_Object *rand;
1261 #endif
1262
1263 #ifdef JIT_PRECISE_GC
1264 closure_size += 1; /* Skip procedure pointer, too */
1265 #endif
1266
1267 jmp_tiny = num_rands < 5;
1268 jmp_short = num_rands < 100;
1269
1270 __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1271
1272 refagain = jit_get_ip();
1273
1274 /* Check for thread swap: */
1275 (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter);
1276 refslow = jit_blei_i(jit_forward(), JIT_R2, 0);
1277 #ifndef FUEL_AUTODECEREMENTS
1278 jit_subi_p(JIT_R2, JIT_R2, 0x1);
1279 (void)mz_tl_sti_i(tl_scheme_fuel_counter, JIT_R2, JIT_R1);
1280 #endif
1281
1282 __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1283
1284 offset = jitter->flostack_offset;
1285 space = jitter->flostack_space;
1286 #ifdef USE_FLONUM_UNBOXING
1287 arg_tmp_offset = offset - direct_flostack_offset;
1288 #endif
1289
1290 /* Copy args to runstack after closure data: */
1291 mz_ld_runstack_base_alt(JIT_R2);
1292 jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + closure_size + args_already_in_place));
1293 for (i = num_rands; i--; ) {
1294 int already_loaded = (i == num_rands - 1);
1295 #ifdef USE_FLONUM_UNBOXING
1296 int is_flonum, already_unboxed = 0, extfl = 0;
1297 if ((SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_TYPED_ARGS)
1298 && (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_lam, i + args_already_in_place)
1299 || CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i + args_already_in_place))) {
1300 is_flonum = 1;
1301 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i + args_already_in_place);
1302 rand = (alt_rands
1303 ? alt_rands[i+1+args_already_in_place]
1304 : app->args[i+1+args_already_in_place]);
1305 arg_tmp_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1306 mz_ld_fppush(MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0), arg_tmp_offset, extfl);
1307 already_unboxed = 1;
1308 if (!already_loaded && !SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) {
1309 already_loaded = 1;
1310 (void)jit_movi_p(JIT_R0, NULL);
1311 }
1312 } else
1313 is_flonum = extfl = 0;
1314 #endif
1315 if (!already_loaded)
1316 jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
1317 jit_stxi_p(WORDS_TO_BYTES(i + closure_size + args_already_in_place), JIT_R2, JIT_R0);
1318 #ifdef USE_FLONUM_UNBOXING
1319 if (is_flonum) {
1320 if (!already_unboxed) {
1321 MZ_FPUSEL_STMT(extfl,
1322 jit_fpu_ldxi_ld_fppush(JIT_FPU_FPR0, JIT_R0, &((Scheme_Long_Double *)0x0)->long_double_val),
1323 jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val));
1324 }
1325 arg_offset += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1326 mz_st_fppop(arg_offset, MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0), extfl);
1327 }
1328 #endif
1329 CHECK_LIMIT();
1330 }
1331 jit_movr_p(JIT_RUNSTACK, JIT_R2);
1332
1333 scheme_mz_flostack_restore(jitter, jitter->self_restart_space, jitter->self_restart_offset, 1, 1);
1334
1335 /* Now jump: */
1336 (void)jit_jmpi(jitter->self_restart_code);
1337 CHECK_LIMIT();
1338
1339 /* Slow path: */
1340 __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1341 mz_patch_branch(refslow);
1342 __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short);
1343
1344 generate_pause_for_gc_and_retry(jitter,
1345 0, /* in short jumps */
1346 JIT_R0, /* expose R0 to GC */
1347 refagain); /* retry code pointer */
1348 CHECK_LIMIT();
1349
1350 jitter->flostack_offset = offset;
1351 jitter->flostack_space = space;
1352
1353 #ifdef USE_FLONUM_UNBOXING
1354 /* Need to box any arguments that we have only in flonum form */
1355 if (SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_TYPED_ARGS) {
1356 generate_argument_boxing(jitter, jitter->self_lam,
1357 num_rands, args_already_in_place,
1358 offset, direct_flostack_offset,
1359 JIT_R0,
1360 app, alt_rands);
1361 CHECK_LIMIT();
1362
1363 /* Arguments already in place may also need to be boxed. */
1364 arg_tmp_offset = jitter->self_restart_offset;
1365 for (i = jitter->self_lam->closure_size; i--; ) {
1366 /* Skip over flonums unpacked from the closure. I think this never
1367 happens, because I think that a self-call with already-in-place
1368 flonum arguments will only happen when the closure is empty. */
1369 if (CLOSURE_CONTENT_IS_FLONUM(jitter->self_lam, i))
1370 arg_tmp_offset -= sizeof(double);
1371 else if (CLOSURE_CONTENT_IS_EXTFLONUM(jitter->self_lam, i))
1372 arg_tmp_offset -= 2*sizeof(double);
1373 }
1374 for (i = 0; i < args_already_in_place; i++) {
1375 if (CLOSURE_ARGUMENT_IS_FLONUM(jitter->self_lam, i)
1376 || CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i)) {
1377 GC_CAN_IGNORE jit_insn *iref;
1378 int extfl USED_ONLY_IF_LONG_DOUBLE;
1379 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(jitter->self_lam, i);
1380 mz_pushr_p(JIT_R0);
1381 mz_ld_runstack_base_alt(JIT_R2);
1382 jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + args_already_in_place));
1383 jit_ldxi_p(JIT_R0, JIT_R2, WORDS_TO_BYTES(i));
1384 mz_rs_sync();
1385 __START_TINY_JUMPS__(1);
1386 iref = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1387 __END_TINY_JUMPS__(1);
1388 {
1389 int aoffset = JIT_FRAME_FLOSTACK_OFFSET - arg_tmp_offset;
1390 jit_movi_l(JIT_R0, aoffset);
1391 MZ_FPUSEL_STMT(extfl,
1392 (void)jit_calli(sjc.box_extflonum_from_stack_code),
1393 (void)jit_calli(sjc.box_flonum_from_stack_code));
1394 mz_ld_runstack_base_alt(JIT_R2);
1395 jit_subi_p(JIT_R2, JIT_RUNSTACK_BASE_OR_ALT(JIT_R2), WORDS_TO_BYTES(num_rands + args_already_in_place));
1396 jit_stxi_p(WORDS_TO_BYTES(i), JIT_R2, JIT_R0);
1397 }
1398 __START_TINY_JUMPS__(1);
1399 mz_patch_branch(iref);
1400 __END_TINY_JUMPS__(1);
1401 mz_popr_p(JIT_R0);
1402 CHECK_LIMIT();
1403 arg_tmp_offset -= MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1404 }
1405 }
1406 }
1407 #endif
1408
1409 scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
1410
1411 CHECK_LIMIT();
1412
1413 if (args_already_in_place) {
1414 jit_movi_l(JIT_R2, args_already_in_place);
1415 mz_set_local_p(JIT_R2, JIT_LOCAL2);
1416 }
1417
1418 if (num_rands > 0) {
1419 /* We didn't leave room for the last argument, so now we need to make
1420 space for it. (Possible improvement: it may be possible to know that
1421 room is available already, so that this isn't necessary.) */
1422 mz_runstack_unskipped(jitter, 1);
1423 mz_runstack_pushed(jitter, 1);
1424 mz_rs_dec(1);
1425 for (i = 0; i < num_rands-1; i++) {
1426 mz_rs_ldxi(JIT_R1, i+1);
1427 mz_rs_stxi(i, JIT_R1);
1428 CHECK_LIMIT();
1429 }
1430
1431 mz_rs_stxi(num_rands - 1, JIT_R0);
1432 }
1433 scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL);
1434 CHECK_LIMIT();
1435 mz_rs_sync();
1436
1437 (void)jit_jmpi(slow_code);
1438
1439 return 1;
1440 }
1441
1442 typedef struct {
1443 int num_rands;
1444 mz_jit_state *old_jitter;
1445 int multi_ok;
1446 int result_ignored;
1447 int is_tail;
1448 int direct_prim, direct_native, nontail_self, unboxed_args;
1449 } Generate_Call_Data;
1450
jit_register_sub_func(mz_jit_state * jitter,void * code,Scheme_Object * protocol,int gcable)1451 static void jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol, int gcable)
1452 /* protocol: #f => normal lightweight call protocol
1453 void => next return address is in LOCAL2
1454 eof => name to use is in LOCAL2
1455 null => no name, but usual frame */
1456 {
1457 void *code_end;
1458
1459 code_end = jit_get_ip();
1460 if (jitter->retain_start)
1461 scheme_jit_add_symbol((uintptr_t)jit_unadjust_ip(code),
1462 (uintptr_t)jit_unadjust_ip(code_end) - 1,
1463 protocol,
1464 gcable);
1465 }
1466
scheme_jit_register_sub_func(mz_jit_state * jitter,void * code,Scheme_Object * protocol)1467 void scheme_jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol)
1468 {
1469 jit_register_sub_func(jitter, code, protocol, 0);
1470 }
1471
scheme_jit_register_helper_func(mz_jit_state * jitter,void * code,int gcable)1472 void scheme_jit_register_helper_func(mz_jit_state *jitter, void *code, int gcable)
1473 {
1474 #if defined(MZ_USE_DWARF_LIBUNWIND) || defined(_WIN64)
1475 /* Null indicates that there's no function name to report, but the
1476 stack should be unwound manually using the JIT-generated convention. */
1477 jit_register_sub_func(jitter, code, scheme_null, gcable);
1478 #endif
1479 }
1480
do_generate_shared_call(mz_jit_state * jitter,void * _data)1481 static int do_generate_shared_call(mz_jit_state *jitter, void *_data)
1482 {
1483 Generate_Call_Data *data = (Generate_Call_Data *)_data;
1484
1485 #ifdef MZ_USE_JIT_PPC
1486 jitter->js.jitl.nbArgs = data->old_jitter->js.jitl.nbArgs;
1487 #endif
1488
1489 if (data->is_tail) {
1490 int ok;
1491 void *code;
1492
1493 code = jit_get_ip();
1494
1495 if (data->direct_prim)
1496 ok = generate_direct_prim_tail_call(jitter, data->num_rands);
1497 else
1498 ok = scheme_generate_tail_call(jitter, data->num_rands, data->direct_native, 1, 0,
1499 NULL, NULL, NULL);
1500
1501 scheme_jit_register_helper_func(jitter, code, 0);
1502
1503 return ok;
1504 } else {
1505 int ok;
1506 void *code;
1507
1508 code = jit_get_ip();
1509
1510 if (data->direct_prim)
1511 ok = generate_direct_prim_non_tail_call(jitter, data->num_rands, data->multi_ok, 1);
1512 else
1513 ok = scheme_generate_non_tail_call(jitter, data->num_rands, data->direct_native, 1,
1514 data->multi_ok, data->result_ignored, data->nontail_self,
1515 1, 0, data->unboxed_args, NULL);
1516
1517 scheme_jit_register_sub_func(jitter, code, scheme_false);
1518
1519 return ok;
1520 }
1521 }
1522
scheme_generate_shared_call(int num_rands,mz_jit_state * old_jitter,int multi_ok,int result_ignored,int is_tail,int direct_prim,int direct_native,int nontail_self,int unboxed_args)1523 void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int result_ignored,
1524 int is_tail, int direct_prim, int direct_native, int nontail_self,
1525 int unboxed_args)
1526 {
1527 Generate_Call_Data data;
1528 void *ip;
1529
1530 data.num_rands = num_rands;
1531 data.old_jitter = old_jitter;
1532 data.multi_ok = multi_ok;
1533 data.result_ignored = result_ignored;
1534 data.is_tail = is_tail;
1535 data.direct_prim = direct_prim;
1536 data.direct_native = direct_native;
1537 data.nontail_self = nontail_self;
1538 data.unboxed_args = unboxed_args;
1539
1540 ip = scheme_generate_one(old_jitter, do_generate_shared_call, &data, 0, NULL, NULL);
1541 return jit_adjust_ip(ip);
1542 }
1543
scheme_ensure_retry_available(mz_jit_state * jitter,int multi_ok,int result_ignored)1544 void scheme_ensure_retry_available(mz_jit_state *jitter, int multi_ok, int result_ignored)
1545 {
1546 int mo = (multi_ok
1547 ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
1548 : SHARED_SINGLE_VALUE_CASE);
1549 if (!sjc.shared_non_tail_retry_code[mo]) {
1550 void *code;
1551 code = scheme_generate_shared_call(-1, jitter, multi_ok, result_ignored, 0, 0, 0, 0, 0);
1552 sjc.shared_non_tail_retry_code[mo] = code;
1553 }
1554 }
1555
generate_nontail_self_setup(mz_jit_state * jitter)1556 static int generate_nontail_self_setup(mz_jit_state *jitter)
1557 {
1558 void *pp, **pd;
1559 pp = jit_patchable_movi_p(JIT_R2, jit_forward());
1560 pd = (void **)scheme_malloc(2 * sizeof(void *));
1561 pd[0] = pp;
1562 pd[1] = jitter->patch_depth;
1563 jitter->patch_depth = pd;
1564 (void)jit_patchable_movi_p(JIT_R0, jitter->self_nontail_code);
1565 #ifdef JIT_PRECISE_GC
1566 if (jitter->closure_self_on_runstack) {
1567 /* Get this closure's pointer from the run stack */
1568 int depth = jitter->depth + jitter->extra_pushed - 1;
1569 jit_ldxi_p(JIT_V1, JIT_RUNSTACK, WORDS_TO_BYTES(depth));
1570 }
1571 #endif
1572 return 0;
1573 }
1574
can_direct_native(Scheme_Object * p,int num_rands,intptr_t * extract_case)1575 static int can_direct_native(Scheme_Object *p, int num_rands, intptr_t *extract_case)
1576 {
1577 if (SAME_TYPE(SCHEME_TYPE(p), scheme_native_closure_type)) {
1578 if (((Scheme_Native_Closure *)p)->code->closure_size < 0) {
1579 /* case-lambda */
1580 int cnt, i;
1581 mzshort *arities;
1582
1583 cnt = ((Scheme_Native_Closure *)p)->code->closure_size;
1584 cnt = -(cnt + 1);
1585 arities = ((Scheme_Native_Closure *)p)->code->u.arities;
1586 for (i = 0; i < cnt; i++) {
1587 if (arities[i] == num_rands) {
1588 *extract_case = (intptr_t)&((Scheme_Native_Closure *)0x0)->vals[i];
1589 return 1;
1590 }
1591 }
1592 } else {
1593 /* not a case-lambda... */
1594 if (scheme_native_arity_check(p, num_rands)
1595 /* If it also accepts num_rands + 1, then it has a vararg,
1596 so don't try direct_native. */
1597 && !scheme_native_arity_check(p, num_rands + 1)) {
1598 return 1;
1599 }
1600 }
1601 }
1602
1603 return 0;
1604 }
1605
is_noncm_hash_ref(Scheme_Object * rator,int num_rands,Scheme_App_Rec * app)1606 static int is_noncm_hash_ref(Scheme_Object *rator, int num_rands, Scheme_App_Rec *app)
1607 {
1608 /* hash-ref acts like a non-cm prim if there's no procedure 3rd argument */
1609 if (SAME_OBJ(rator, scheme_hash_ref_proc) && ((num_rands == 2) || (num_rands == 3))) {
1610 if (num_rands == 3) {
1611 if ((SCHEME_TYPE(app->args[3]) < _scheme_values_types_)
1612 || SCHEME_PROCP(app->args[3]))
1613 return 0;
1614 }
1615 return 1;
1616 }
1617
1618 return 0;
1619 }
1620
check_special_direct_args(Scheme_App_Rec * app,Scheme_Object ** alt_rands,int num_rands,int args_already_in_place)1621 static jit_direct_arg *check_special_direct_args(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands,
1622 int args_already_in_place)
1623 {
1624 jit_direct_arg *inline_direct_args = NULL;
1625 Scheme_Object *v;
1626 int reg_to_pos[NUM_AVAIL_DIRECT_ARG_REGS];
1627 int n = 0, j, pos, i;
1628
1629 return NULL;
1630
1631 for (j = 0; j < NUM_AVAIL_DIRECT_ARG_REGS; j++) {
1632 reg_to_pos[j] = 0;
1633 }
1634
1635 for (i = 0; i < num_rands; i++) {
1636 v = (alt_rands
1637 ? alt_rands[i+1+args_already_in_place]
1638 : app->args[i+1+args_already_in_place]);
1639 if (SAME_TYPE(SCHEME_TYPE(v), scheme_local_type)
1640 && !SCHEME_GET_LOCAL_TYPE(v)) {
1641 pos = SCHEME_LOCAL_POS(v);
1642 for (j = 0; j < n; j++) {
1643 if (reg_to_pos[j] == pos)
1644 break;
1645 }
1646 if (j >= n) {
1647 if (n >= NUM_AVAIL_DIRECT_ARG_REGS)
1648 break;
1649 reg_to_pos[n++] = pos;
1650 }
1651 } else
1652 break;
1653 }
1654
1655 if (i < num_rands)
1656 return NULL;
1657
1658 /* We hit the special case! */
1659 inline_direct_args = MALLOC_N_ATOMIC(jit_direct_arg, num_rands);
1660
1661 n = 0;
1662 for (j = 0; j < NUM_AVAIL_DIRECT_ARG_REGS; j++) {
1663 reg_to_pos[j] = 0;
1664 }
1665
1666 for (i = 0; i < num_rands; i++) {
1667 v = (alt_rands
1668 ? alt_rands[i+1+args_already_in_place]
1669 : app->args[i+1+args_already_in_place]);
1670 pos = SCHEME_LOCAL_POS(v);
1671 for (j = 0; j < n; j++) {
1672 if (reg_to_pos[j] == pos) {
1673 inline_direct_args[i].gen = 0;
1674 inline_direct_args[i].reg = direct_arg_regs[j];
1675 break;
1676 }
1677 }
1678 if (j >= n) {
1679 reg_to_pos[n] = pos;
1680 inline_direct_args[i].gen = 1;
1681 inline_direct_args[i].reg = direct_arg_regs[n];
1682 n++;
1683 }
1684 }
1685
1686 return inline_direct_args;
1687 }
1688
1689 #ifdef USE_FLONUM_UNBOXING
1690
generate_fp_argument_shift(int direct_flostack_offset,mz_jit_state * jitter,int src_fp_reg)1691 static int generate_fp_argument_shift(int direct_flostack_offset, mz_jit_state *jitter, int src_fp_reg)
1692 {
1693 int i;
1694
1695 if ((src_fp_reg == JIT_FP)
1696 && (jitter->flostack_offset == direct_flostack_offset))
1697 /* no shift needed */
1698 return 1;
1699
1700 /* Since we're just shifting bytes, it's ok to pretend that all
1701 boxed values are `double's. */
1702 for (i = 0; i < direct_flostack_offset; i += sizeof(double)) {
1703 int i_pos, a_pos;
1704 i_pos = jitter->flostack_offset - direct_flostack_offset + i + sizeof(double);
1705 mz_ld_fppush_x(JIT_FPR0, i_pos, src_fp_reg, 0);
1706 a_pos = i + sizeof(double);
1707 mz_st_fppop(a_pos, JIT_FPR0, 0);
1708 CHECK_LIMIT();
1709 }
1710
1711 return 1;
1712 }
1713
generate_call_path_with_unboxes(mz_jit_state * jitter,int direct_flostack_offset,void * unboxed_code,GC_CAN_IGNORE jit_insn ** _refdone,int num_rands,Scheme_Lambda * direct_lam,Scheme_Object * rator)1714 static int generate_call_path_with_unboxes(mz_jit_state *jitter, int direct_flostack_offset, void *unboxed_code,
1715 GC_CAN_IGNORE jit_insn **_refdone,
1716 int num_rands, Scheme_Lambda *direct_lam, Scheme_Object *rator)
1717 {
1718 GC_CAN_IGNORE jit_insn *refdone, *refgo, *refcopy;
1719 int i, k, offset;
1720
1721 refgo = jit_jmpi(jit_forward());
1722 refcopy = jit_get_ip();
1723
1724 /* Callback code to copy unboxed arguments.
1725 R1 has the return address, R2 holds the old FP */
1726
1727 offset = FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
1728 / FLOSTACK_SPACE_CHUNK);
1729 jit_subi_l(JIT_SP, JIT_SP, offset);
1730
1731 generate_fp_argument_shift(direct_flostack_offset, jitter, JIT_R2);
1732 CHECK_LIMIT();
1733
1734 jit_jmpr(JIT_R1);
1735
1736 mz_patch_ucbranch(refgo);
1737
1738 /* install callback pointer and jump to shared code: */
1739
1740 (void)jit_patchable_movi_p(JIT_R1, refcopy);
1741 mz_set_local_p(JIT_R1, JIT_LOCAL3);
1742
1743 (void)jit_calli(unboxed_code);
1744
1745 refdone = jit_bnei_p(jit_forward(), JIT_R0, NULL);
1746 *_refdone = refdone;
1747
1748 CHECK_LIMIT();
1749
1750 /* box arguments for slow path */
1751 for (i = 0, k = 0; i < num_rands; i++) {
1752 if ((SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)
1753 && (CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i)
1754 || CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i))) {
1755 int extfl;
1756 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i);
1757
1758 offset = jitter->flostack_offset - k;
1759 offset = JIT_FRAME_FLOSTACK_OFFSET - offset;
1760 jit_ldxi_p(JIT_R0, JIT_RUNSTACK, WORDS_TO_BYTES(i));
1761 scheme_generate_flonum_local_boxing(jitter, i, offset, JIT_R0, extfl);
1762 k += MZ_FPUSEL(extfl, 2*sizeof(double), sizeof(double));
1763 }
1764 }
1765
1766 /* Reset V1 to rator for slow path: */
1767 scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL);
1768 CHECK_LIMIT();
1769 mz_rs_sync();
1770
1771 return 1;
1772 }
1773 #endif
1774
scheme_generate_app(Scheme_App_Rec * app,Scheme_Object ** alt_rands,int num_rands,int num_pushes,mz_jit_state * jitter,int is_tail,int multi_ok,int result_ignored,int no_call)1775 int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands, int num_pushes,
1776 mz_jit_state *jitter, int is_tail, int multi_ok, int result_ignored,
1777 int no_call)
1778 /* de-sync'd ok
1779 If no_call is 2, then rator is not necssarily evaluated.
1780 If no_call is 1, then rator is left in V1 and arguments are on runstack. */
1781 {
1782 int i, offset, need_safety = 0, apply_to_list = 0, num_unsafe_struct_refs;
1783 int direct_prim = 0, need_non_tail = 0, direct_native = 0, direct_self = 0, nontail_self = 0;
1784 Scheme_Native_Closure *inline_direct_native = NULL;
1785 int almost_inline_direct_native = 0;
1786 #ifdef USE_FLONUM_UNBOXING
1787 Scheme_Lambda *direct_lam = NULL;
1788 #endif
1789 int direct_flostack_offset = 0, unboxed_non_tail_args = 0;
1790 jit_direct_arg *inline_direct_args = NULL;
1791 int proc_already_in_place = 0;
1792 Scheme_Object *rator, *v, *arg;
1793 int reorder_ok = 0;
1794 int args_already_in_place = 0;
1795 intptr_t extract_case = 0; /* when direct_native, non-0 => offset to extract case-lambda case */
1796 START_JIT_DATA();
1797
1798 rator = (alt_rands ? alt_rands[0] : app->args[0]);
1799
1800 rator = scheme_specialize_to_constant(rator, jitter, num_pushes, 0);
1801
1802 if (no_call == 2) {
1803 direct_prim = 1;
1804 } else if (SCHEME_PRIMP(rator)) {
1805 if ((num_rands >= ((Scheme_Primitive_Proc *)rator)->mina)
1806 && ((num_rands <= ((Scheme_Primitive_Proc *)rator)->mu.maxa)
1807 || (((Scheme_Primitive_Proc *)rator)->mina < 0))
1808 && (scheme_is_noncm(rator, jitter, 0, 0)
1809 || is_noncm_hash_ref(rator, num_rands, app)
1810 /* It's also ok to directly call `values' if multiple values are ok: */
1811 || (multi_ok && SAME_OBJ(rator, scheme_values_proc))))
1812 direct_prim = 1;
1813 else {
1814 reorder_ok = 1;
1815 if ((num_rands >= 2) && SAME_OBJ(rator, scheme_apply_proc))
1816 apply_to_list = 1;
1817 }
1818 if (!(((Scheme_Primitive_Proc *)rator)->pp.flags & SCHEME_PRIM_IS_MULTI_RESULT))
1819 result_ignored = 0; /* don't need to check for multiple values to ignore */
1820 } else {
1821 Scheme_Type t;
1822 t = SCHEME_TYPE(rator);
1823
1824 if (t == scheme_case_closure_type) {
1825 /* Turn it into a JITted empty case closure: */
1826 rator = scheme_unclose_case_lambda(rator, 1);
1827 t = SCHEME_TYPE(rator);
1828 }
1829
1830 if ((t == scheme_local_type) && scheme_ok_to_delay_local(rator)) {
1831 /* We can re-order evaluation of the rator. */
1832 reorder_ok = 1;
1833
1834 /* Call to known native, or even known self? */
1835 {
1836 int pos, flags;
1837 pos = SCHEME_LOCAL_POS(rator) - num_pushes;
1838 if (scheme_mz_is_closure(jitter, pos, num_rands, &flags)) {
1839 direct_native = 1;
1840 if ((pos == jitter->self_pos)
1841 && (num_rands < MAX_SHARED_CALL_RANDS)) {
1842 if (is_tail)
1843 direct_self = 1;
1844 else if (jitter->self_nontail_code)
1845 nontail_self = 1;
1846 }
1847 }
1848 }
1849 } else if ((t == scheme_toplevel_type) || (t == scheme_static_toplevel_type)) {
1850 int flags = SCHEME_TOPLEVEL_FLAGS(rator);
1851
1852 if ((flags & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_FIXED) {
1853 /* We can re-order evaluation of the rator. */
1854 reorder_ok = 1;
1855
1856 if (jitter->nc
1857 && ((flags & SCHEME_TOPLEVEL_FLAGS_MASK) >= SCHEME_TOPLEVEL_CONST)) {
1858 Scheme_Object *p;
1859
1860 if (t == scheme_toplevel_type)
1861 p = scheme_extract_global(rator, jitter->nc, 0);
1862 else
1863 p = SCHEME_STATIC_TOPLEVEL_PREFIX(rator)->a[SCHEME_TOPLEVEL_POS(rator)];
1864 if (p) {
1865 p = ((Scheme_Bucket *)p)->val;
1866 if (can_direct_native(p, num_rands, &extract_case)) {
1867 int pos = SCHEME_TOPLEVEL_POS(rator);
1868
1869 direct_native = 1;
1870
1871 if ((pos == jitter->self_toplevel_pos)
1872 && (num_rands < MAX_SHARED_CALL_RANDS)) {
1873 if (is_tail) {
1874 direct_self = 1;
1875 } else if (jitter->self_nontail_code)
1876 nontail_self = 1;
1877 }
1878 }
1879 }
1880 }
1881 }
1882 } else if (SAME_TYPE(t, scheme_native_closure_type)) {
1883 direct_native = can_direct_native(rator, num_rands, &extract_case);
1884 reorder_ok = 1;
1885 } else if (SAME_TYPE(t, scheme_closure_type)) {
1886 Scheme_Lambda *lam;
1887 lam = ((Scheme_Closure *)rator)->code;
1888 if ((lam->num_params == num_rands)
1889 && !(SCHEME_LAMBDA_FLAGS(lam) & LAMBDA_HAS_REST)) {
1890 direct_native = 1;
1891
1892 if (SAME_OBJ(lam->u.jit_clone, jitter->self_lam)
1893 && (num_rands < MAX_SHARED_CALL_RANDS)) {
1894 if (is_tail)
1895 direct_self = 1;
1896 else if (jitter->self_nontail_code)
1897 nontail_self = 1;
1898 } else {
1899 Scheme_Closure *c = (Scheme_Closure *)rator;
1900 if (ZERO_SIZED_CLOSUREP(c)) {
1901 /* If we're calling a constant function in tail position, then
1902 there's a good chance that this function is a wrapper to
1903 get to a loop, a nested loop, or a mutually recursive call.
1904 Inline the jump to the potential loop,
1905 absorbing the runstack and C stack checks into the check
1906 for this function --- only works if we can JIT the target
1907 of the call. */
1908 Scheme_Native_Closure *nc;
1909 nc = (Scheme_Native_Closure *)scheme_jit_closure((Scheme_Object *)lam, NULL);
1910 if (nc->code->start_code == scheme_on_demand_jit_code) {
1911 if (nc->code->arity_code != sjc.in_progress_on_demand_jit_arity_code) {
1912 scheme_on_demand_generate_lambda(nc, 0, NULL, 0);
1913 CHECK_NESTED_GENERATE();
1914 }
1915 }
1916 if (nc->code->start_code != scheme_on_demand_jit_code) {
1917 if (is_tail) {
1918 if (nc->code->max_let_depth > jitter->max_tail_depth)
1919 jitter->max_tail_depth = nc->code->max_let_depth;
1920 inline_direct_native = nc;
1921 #ifdef USE_FLONUM_UNBOXING
1922 direct_lam = lam;
1923 #endif
1924 } else {
1925 if (num_rands < MAX_SHARED_CALL_RANDS) {
1926 #ifdef USE_FLONUM_UNBOXING
1927 direct_lam = lam;
1928 #endif
1929 unboxed_non_tail_args = 1;
1930 }
1931 }
1932 } else {
1933 if (is_tail) {
1934 /* To tie a mutally recursive loop, we've leave an indirection
1935 and a runstack-space check, but we can still handle unboxed
1936 arguments. */
1937 #ifdef USE_FLONUM_UNBOXING
1938 direct_lam = lam;
1939 #endif
1940 almost_inline_direct_native = 1;
1941 }
1942 }
1943 }
1944 }
1945 }
1946 reorder_ok = 1;
1947 } else if (t > _scheme_values_types_) {
1948 /* We can re-order evaluation of the rator. */
1949 reorder_ok = 1;
1950 }
1951
1952 #ifdef JIT_PRECISE_GC
1953 if (jitter->closure_self_on_runstack) {
1954 /* We can get this closure's pointer back from the Scheme stack. */
1955 if (nontail_self)
1956 direct_self = 1;
1957 }
1958 #endif
1959
1960 if (direct_self)
1961 reorder_ok = 0; /* superseded by direct_self */
1962 }
1963
1964 FOR_LOG(if (direct_native) { LOG_IT((" [direct]\n")); } )
1965
1966 /* Direct native tail with same number of args as just received? */
1967 if (direct_native && is_tail && num_rands && !almost_inline_direct_native
1968 && (num_rands == jitter->self_lam->num_params)
1969 && !(SCHEME_LAMBDA_FLAGS(jitter->self_lam) & LAMBDA_HAS_REST)) {
1970 /* Check whether the actual arguments refer to Scheme-stack
1971 locations that will be filled with argument values; that
1972 is, check how many arguments are already in place for
1973 the call. */
1974 mz_runstack_skipped(jitter, num_pushes);
1975 for (i = 0; i < num_rands; i++) {
1976 v = (alt_rands ? alt_rands[i+1] : app->args[i+1]);
1977 if (SAME_TYPE(SCHEME_TYPE(v), scheme_local_type)
1978 && !(SCHEME_GET_LOCAL_FLAGS(v) == SCHEME_LOCAL_OTHER_CLEARS)) {
1979 int pos;
1980 pos = mz_remap(SCHEME_LOCAL_POS(v));
1981 if (pos == (jitter->depth + jitter->extra_pushed + args_already_in_place))
1982 args_already_in_place++;
1983 else
1984 break;
1985 } else
1986 break;
1987 }
1988 mz_runstack_unskipped(jitter, num_pushes);
1989 if (args_already_in_place) {
1990 direct_native = 2;
1991 if (num_pushes)
1992 mz_runstack_skipped(jitter, args_already_in_place);
1993 num_rands -= args_already_in_place;
1994 if (num_pushes)
1995 num_pushes -= args_already_in_place;
1996 }
1997 LOG_IT((" [args in place: %d]\n", args_already_in_place));
1998 }
1999
2000 if (inline_direct_native) {
2001 /* Look for very special case where arguments are so simple
2002 that we can move them directly into a couple of registers. */
2003 inline_direct_args = check_special_direct_args(app, alt_rands, num_rands, args_already_in_place);
2004 }
2005
2006 if (num_rands) {
2007 if (inline_direct_args) {
2008 mz_runstack_skipped(jitter, num_pushes);
2009 } else if (!direct_prim || (num_rands > 1) || (no_call == 2)) {
2010 int skip_end = 0;
2011 if (direct_self && is_tail && !no_call && (num_rands > 0)) {
2012 /* last argument is kept in a register */
2013 skip_end = 1;
2014 }
2015 if (num_rands - skip_end > 0) {
2016 mz_rs_dec(num_rands-skip_end);
2017 CHECK_RUNSTACK_OVERFLOW();
2018 if (num_pushes)
2019 mz_runstack_pushed(jitter, num_pushes-skip_end);
2020 else
2021 scheme_extra_pushed(jitter, num_rands-skip_end);
2022 }
2023 need_safety = num_rands-skip_end;
2024 if (skip_end && num_pushes)
2025 mz_runstack_skipped(jitter, skip_end);
2026 } else {
2027 if (num_pushes)
2028 mz_runstack_skipped(jitter, 1);
2029 }
2030 }
2031
2032 for (i = num_rands + args_already_in_place + 1; i--; ) {
2033 v = (alt_rands ? alt_rands[i] : app->args[i]);
2034 if (!scheme_is_simple(v, INIT_SIMPLE_DEPTH, 1, jitter, 0)) {
2035 need_non_tail = 1;
2036 break;
2037 }
2038 }
2039
2040 if (need_non_tail) {
2041 offset = scheme_generate_non_tail_mark_pos_prefix(jitter);
2042 CHECK_LIMIT();
2043 } else
2044 offset = 0;
2045
2046 if (!direct_prim && !reorder_ok && !direct_self) {
2047 if (need_safety && !scheme_is_non_gc(rator, INIT_SIMPLE_DEPTH)) {
2048 scheme_stack_safety(jitter, need_safety, offset);
2049 CHECK_LIMIT();
2050 need_safety = 0;
2051 }
2052
2053 scheme_generate_non_tail(rator, jitter, 0, !need_non_tail, 0); /* sync'd after args below */
2054 CHECK_LIMIT();
2055
2056 if (num_rands) {
2057 /* Save rator where GC can see it */
2058 Scheme_Type t;
2059 arg = (alt_rands
2060 ? alt_rands[1+args_already_in_place]
2061 : app->args[1+args_already_in_place]);
2062 t = SCHEME_TYPE(arg);
2063 if ((num_rands == 1) && ((SAME_TYPE(scheme_local_type, t)
2064 && (SCHEME_GET_LOCAL_TYPE(arg) != SCHEME_LOCAL_TYPE_FLONUM)
2065 && (SCHEME_GET_LOCAL_TYPE(arg) != SCHEME_LOCAL_TYPE_EXTFLONUM))
2066 || (t >= _scheme_values_types_))) {
2067 /* App of something complex to a local variable. We
2068 can move the proc directly to V1. */
2069 jit_movr_p(JIT_V1, JIT_R0);
2070 proc_already_in_place = 1;
2071 } else {
2072 mz_rs_stxi(num_rands - 1 + offset, JIT_R0);
2073 if (need_safety)
2074 need_safety--;
2075 }
2076 } else {
2077 jit_movr_p(JIT_V1, JIT_R0);
2078 }
2079 }
2080 /* not sync'd...*/
2081
2082 #ifdef USE_FLONUM_UNBOXING
2083 if (direct_self && is_tail)
2084 direct_lam = jitter->self_lam;
2085 #endif
2086
2087 #ifdef JIT_PRECISE_GC
2088 FOR_LOG(if (direct_lam) { LOG_IT((" [typed]\n")); } )
2089 #endif
2090
2091 #ifdef USE_FLONUM_UNBOXING
2092 /* we want to push flonums into local storage in reverse order
2093 of evaluation, so make a pass to create space: */
2094 if (direct_lam
2095 && (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)) {
2096 for (i = num_rands; i--; ) {
2097 int extfl;
2098 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place);
2099 if (extfl || CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i+args_already_in_place)) {
2100 /* make space: */
2101 scheme_generate_flonum_local_unboxing(jitter, 0, 1, extfl);
2102 CHECK_LIMIT();
2103 }
2104 }
2105 }
2106 #endif
2107
2108 for (i = 0; i < num_rands; i++) {
2109 PAUSE_JIT_DATA();
2110 arg = (alt_rands
2111 ? alt_rands[i+1+args_already_in_place]
2112 : app->args[i+1+args_already_in_place]);
2113 if (need_safety && !scheme_is_non_gc(arg, INIT_SIMPLE_DEPTH)) {
2114 scheme_stack_safety(jitter, need_safety - i, offset + i);
2115 CHECK_LIMIT();
2116 need_safety = 0;
2117 }
2118 #ifdef USE_FLONUM_UNBOXING
2119 if (direct_lam)
2120 num_unsafe_struct_refs = 0;
2121 else
2122 #endif
2123 num_unsafe_struct_refs = detect_unsafe_struct_refs(arg, alt_rands, app, i, num_rands, 1+args_already_in_place);
2124 if (num_unsafe_struct_refs > 1) {
2125 /* Found a sequence of `(unsafed-struct-ref id 'number)` with
2126 sequential `number`s, so extract the whole group at once */
2127 v = (alt_rands
2128 ? alt_rands[i+1+args_already_in_place+num_unsafe_struct_refs-1]
2129 : app->args[i+1+args_already_in_place+num_unsafe_struct_refs-1]);
2130 mz_rs_sync();
2131 generate_unsafe_struct_ref_sequence(jitter, arg, v, num_unsafe_struct_refs, i + offset);
2132 CHECK_LIMIT();
2133 i += (num_unsafe_struct_refs - 1);
2134 #ifdef USE_FLONUM_UNBOXING
2135 } else if (direct_lam
2136 && (SCHEME_LAMBDA_FLAGS(direct_lam) & LAMBDA_HAS_TYPED_ARGS)
2137 && (CLOSURE_ARGUMENT_IS_FLONUM(direct_lam, i+args_already_in_place)
2138 || CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place))) {
2139 int directly;
2140 int extfl;
2141 extfl = CLOSURE_ARGUMENT_IS_EXTFLONUM(direct_lam, i+args_already_in_place);
2142 jitter->unbox++;
2143 MZ_FPUSEL_STMT_ONLY(extfl, jitter->unbox_extflonum++);
2144 if (scheme_can_unbox_inline(arg, 5, JIT_FPUSEL_FPR_NUM(extfl)-1, 0, extfl))
2145 directly = 2;
2146 else if (scheme_can_unbox_directly(arg, extfl))
2147 directly = 1;
2148 else
2149 directly = 0;
2150 scheme_generate_unboxed(arg, jitter, directly, 1);
2151 MZ_FPUSEL_STMT_ONLY(extfl, --jitter->unbox_extflonum);
2152 --jitter->unbox;
2153 --jitter->unbox_depth;
2154 CHECK_LIMIT();
2155
2156 /* use space made by scheme_generate_flonum_local_unboxing() above: */
2157 mz_st_fppop(jitter->flostack_offset - direct_flostack_offset,
2158 MZ_FPUSEL(extfl, JIT_FPU_FPR0, JIT_FPR0),
2159 extfl);
2160 direct_flostack_offset += MZ_FPUSEL(extfl, 2 * sizeof(double), sizeof(double));
2161 CHECK_LIMIT();
2162
2163 if (SAME_TYPE(SCHEME_TYPE(arg), scheme_local_type)) {
2164 /* Keep local Scheme_Object* view, in case a box has been allocated */
2165 int apos;
2166 apos = mz_remap(SCHEME_LOCAL_POS(arg));
2167 mz_rs_ldxi(JIT_R0, apos);
2168 } else {
2169 (void)jit_movi_p(JIT_R0, NULL);
2170 }
2171 #endif
2172 } else if (inline_direct_args) {
2173 if (inline_direct_args[i].gen)
2174 scheme_generate(arg, jitter, 0, 0, 0, inline_direct_args[i].reg, NULL, NULL);
2175 } else
2176 scheme_generate_non_tail(arg, jitter, 0, !need_non_tail, 0); /* sync'd below */
2177 RESUME_JIT_DATA();
2178 CHECK_LIMIT();
2179
2180 if ((i == num_rands - 1) && !direct_prim && !reorder_ok && !direct_self && !proc_already_in_place) {
2181 /* Move rator back to register: */
2182 mz_rs_ldxi(JIT_V1, i + offset);
2183 }
2184 if ((!direct_prim || (num_rands > 1) || (no_call == 2))
2185 && (!direct_self || !is_tail || no_call || (i + 1 < num_rands))
2186 && !inline_direct_args) {
2187 int reg = mz_CURRENT_REG_STATUS_VALID();
2188 mz_rs_stxi(i + offset, JIT_R0);
2189 mz_SET_REG_STATUS_VALID(reg);
2190 }
2191 }
2192 /* not sync'd... */
2193
2194 if (need_non_tail) {
2195 /* Uses JIT_R2: */
2196 scheme_generate_non_tail_mark_pos_suffix(jitter);
2197 CHECK_LIMIT();
2198 }
2199
2200 if (direct_prim) {
2201 if (!no_call) {
2202 (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)rator)->prim_val);
2203 if (num_rands == 1) {
2204 if (num_pushes)
2205 mz_runstack_unskipped(jitter, 1);
2206 } else {
2207 mz_rs_sync();
2208 JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
2209 }
2210 LOG_IT(("direct: %s\n", ((Scheme_Primitive_Proc *)rator)->name));
2211 }
2212 }
2213
2214 if (reorder_ok && !inline_direct_native) {
2215 if ((no_call < 2) && !apply_to_list) {
2216 scheme_generate(rator, jitter, 0, 0, 0, JIT_V1, NULL, NULL); /* sync'd below, or not */
2217 }
2218 CHECK_LIMIT();
2219 }
2220
2221 if (!no_call)
2222 mz_rs_sync();
2223
2224 END_JIT_DATA(20);
2225
2226 if (direct_prim || direct_native || direct_self || nontail_self)
2227 scheme_direct_call_count++;
2228 else
2229 scheme_indirect_call_count++;
2230
2231 if (direct_native && extract_case) {
2232 /* extract case from case-lambda */
2233 jit_ldxi_p(JIT_V1, JIT_V1, extract_case);
2234 }
2235
2236 if (no_call) {
2237 /* leave actual call to inlining code */
2238 } else if (!(direct_self && is_tail)
2239 && !inline_direct_native
2240 && !almost_inline_direct_native
2241 && (num_rands >= MAX_SHARED_CALL_RANDS)) {
2242 LOG_IT(("<-many args\n"));
2243 if (is_tail) {
2244 scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
2245 if (direct_prim) {
2246 generate_direct_prim_tail_call(jitter, num_rands);
2247 } else {
2248 if (args_already_in_place) {
2249 jit_movi_l(JIT_R2, args_already_in_place);
2250 mz_set_local_p(JIT_R2, JIT_LOCAL2);
2251 }
2252 scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1,
2253 NULL, NULL, NULL);
2254 }
2255 } else {
2256 if (direct_prim)
2257 generate_direct_prim_non_tail_call(jitter, num_rands, multi_ok, 0);
2258 else {
2259 if (nontail_self) {
2260 generate_nontail_self_setup(jitter);
2261 }
2262 scheme_generate_non_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs,
2263 multi_ok, result_ignored, nontail_self, 0, 1, 0, NULL);
2264 }
2265 }
2266 } else {
2267 /* Jump to code to implement a [tail-]call for `num_rands' arguments */
2268 void *code;
2269 int dp = (direct_prim ? 1 : (direct_native ? (1 + direct_native + (nontail_self ? 1 : 0)) : 0));
2270 /* if unboxed_non_tail_args, then we'll also use index 4 in place of dp */
2271
2272 if (is_tail) {
2273 if (num_rands < MAX_SHARED_CALL_RANDS) {
2274 if (!sjc.shared_tail_code[dp][num_rands]) {
2275 code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2276 direct_prim, direct_native, 0, 0);
2277 sjc.shared_tail_code[dp][num_rands] = code;
2278 }
2279 code = sjc.shared_tail_code[dp][num_rands];
2280 } else {
2281 /* We won't use this code pointer, because `direct_self` or similar. */
2282 code = NULL;
2283 }
2284 CHECK_NESTED_GENERATE();
2285 if (direct_self) {
2286 LOG_IT(("<-self\n"));
2287 generate_self_tail_call(rator, jitter, num_rands, code, args_already_in_place, direct_flostack_offset,
2288 app, alt_rands);
2289 CHECK_LIMIT();
2290 } else if (inline_direct_native || almost_inline_direct_native) {
2291 LOG_IT(("<-native-tail\n"));
2292 #ifdef USE_FLONUM_UNBOXING
2293 /* Copy unboxed flonums into place where the target code expects them: */
2294 generate_fp_argument_shift(direct_flostack_offset, jitter, JIT_FP);
2295 CHECK_LIMIT();
2296 #endif
2297 scheme_mz_flostack_restore(jitter,
2298 FLOSTACK_SPACE_CHUNK * ((direct_flostack_offset + (FLOSTACK_SPACE_CHUNK - 1))
2299 / FLOSTACK_SPACE_CHUNK),
2300 direct_flostack_offset,
2301 1, 1);
2302 /* move args and call function: */
2303 if (args_already_in_place) {
2304 jit_movi_l(JIT_R2, args_already_in_place);
2305 mz_set_local_p(JIT_R2, JIT_LOCAL2);
2306 }
2307 scheme_generate_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, 1,
2308 inline_direct_native, inline_direct_args,
2309 #ifdef USE_FLONUM_UNBOXING
2310 almost_inline_direct_native ? direct_lam : NULL
2311 #else
2312 NULL
2313 #endif
2314 );
2315 CHECK_LIMIT();
2316 } else {
2317 scheme_mz_flostack_restore(jitter, 0, 0, 1, 1);
2318 LOG_IT(("<-tail\n"));
2319 if (args_already_in_place) {
2320 jit_movi_l(JIT_R2, args_already_in_place);
2321 mz_set_local_p(JIT_R2, JIT_LOCAL2);
2322 }
2323 if (apply_to_list) {
2324 jit_movi_i(JIT_V1, num_rands);
2325 (void)jit_jmpi(sjc.apply_to_list_tail_code);
2326 } else {
2327 (void)jit_jmpi(code);
2328 }
2329 }
2330 } else {
2331 int mo = (multi_ok
2332 ? (result_ignored ? SHARED_RESULT_IGNORED_CASE : SHARED_MULTI_OK_CASE)
2333 : SHARED_SINGLE_VALUE_CASE);
2334 #ifdef USE_FLONUM_UNBOXING
2335 void *unboxed_code;
2336 #endif
2337
2338 if (unboxed_non_tail_args && !direct_flostack_offset)
2339 unboxed_non_tail_args = 0;
2340
2341 #ifdef USE_FLONUM_UNBOXING
2342 if (unboxed_non_tail_args) {
2343 if (!sjc.shared_non_tail_code[4][num_rands][mo]) {
2344 scheme_ensure_retry_available(jitter, multi_ok, result_ignored);
2345 code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2346 direct_prim, direct_native, nontail_self, 1);
2347 sjc.shared_non_tail_code[4][num_rands][mo] = code;
2348 }
2349 unboxed_code = sjc.shared_non_tail_code[4][num_rands][mo];
2350 CHECK_NESTED_GENERATE();
2351 } else
2352 unboxed_code = NULL;
2353 #endif
2354
2355 if (num_rands < MAX_SHARED_CALL_RANDS) {
2356 if (!sjc.shared_non_tail_code[dp][num_rands][mo]) {
2357 scheme_ensure_retry_available(jitter, multi_ok, result_ignored);
2358 code = scheme_generate_shared_call(num_rands, jitter, multi_ok, result_ignored, is_tail,
2359 direct_prim, direct_native, nontail_self, 0);
2360 sjc.shared_non_tail_code[dp][num_rands][mo] = code;
2361 }
2362 code = sjc.shared_non_tail_code[dp][num_rands][mo];
2363 } else {
2364 /* Not used, due to `apply_to_list` */
2365 code = NULL;
2366 }
2367 LOG_IT(("<-non-tail %d %d %d\n", dp, num_rands, mo));
2368 CHECK_NESTED_GENERATE();
2369
2370 if (nontail_self) {
2371 generate_nontail_self_setup(jitter);
2372 }
2373
2374 if (apply_to_list) {
2375 jit_movi_i(JIT_V1, num_rands);
2376 if (multi_ok)
2377 (void)jit_calli(sjc.apply_to_list_multi_ok_code);
2378 else
2379 (void)jit_calli(sjc.apply_to_list_code);
2380 } else {
2381 GC_CAN_IGNORE jit_insn *refdone = NULL;
2382
2383 #ifdef USE_FLONUM_UNBOXING
2384 if (unboxed_code) {
2385 generate_call_path_with_unboxes(jitter, direct_flostack_offset, unboxed_code, &refdone,
2386 num_rands, direct_lam, rator);
2387 CHECK_LIMIT();
2388 }
2389 #endif
2390
2391 (void)jit_calli(code);
2392
2393 if (refdone)
2394 mz_patch_branch(refdone);
2395 }
2396
2397 if (direct_prim) {
2398 if (num_rands == 1) {
2399 /* Popped single argument after return of prim: */
2400 jitter->need_set_rs = 1;
2401 } else {
2402 /* Runstack is up-to-date: */
2403 jitter->need_set_rs = 0;
2404 }
2405 } else {
2406 /* Otherwise, we may have called native code, which may have left
2407 the runstack register out of sync with scheme_current_runstack. */
2408 jitter->need_set_rs = 1;
2409 }
2410 }
2411 }
2412
2413 END_JIT_DATA(need_non_tail ? 22 : 4);
2414
2415 return is_tail ? 2 : 1;
2416 }
2417
2418
detect_unsafe_struct_refs(Scheme_Object * arg,Scheme_Object ** alt_rands,Scheme_App_Rec * app,int i,int num_rands,int shift)2419 static int detect_unsafe_struct_refs(Scheme_Object *arg, Scheme_Object **alt_rands, Scheme_App_Rec *app,
2420 int i, int num_rands, int shift)
2421 /* Look for `(unsafe-struct[*]-ref id 'num)` ... as a sequence of
2422 arguments, which shows up as a result of `struct-copy`, and return
2423 the length of the sequence. Instead of performing each
2424 `unsafe-struct[*]-ref` separately, which can involve a chaperone test
2425 each time, we'll test once and extract all. */
2426 {
2427 Scheme_App3_Rec *app3, *next_app3;
2428 Scheme_Object *next_arg;
2429
2430 if (SAME_TYPE(SCHEME_TYPE(arg), scheme_application3_type)) {
2431 app3 = (Scheme_App3_Rec *)arg;
2432 if ((SAME_OBJ(app3->rator, scheme_unsafe_struct_ref_proc)
2433 || SAME_OBJ(app3->rator, scheme_unsafe_struct_star_ref_proc))
2434 && SAME_TYPE(SCHEME_TYPE(app3->rand1), scheme_local_type)
2435 && SCHEME_INTP(app3->rand2)) {
2436 int seq = 1, delta = SCHEME_INT_VAL(app3->rand2) - i;
2437 i++;
2438 while (i < num_rands) {
2439 next_arg = (alt_rands ? alt_rands[i+shift] : app->args[i+shift]);
2440 if (SAME_TYPE(SCHEME_TYPE(next_arg), scheme_application3_type)) {
2441 next_app3 = (Scheme_App3_Rec *)next_arg;
2442 if ((SAME_OBJ(next_app3->rator, scheme_unsafe_struct_ref_proc)
2443 || SAME_OBJ(next_app3->rator, scheme_unsafe_struct_star_ref_proc))
2444 && SAME_TYPE(SCHEME_TYPE(next_app3->rand1), scheme_local_type)
2445 && SCHEME_INTP(next_app3->rand2)
2446 && (SCHEME_INT_VAL(next_app3->rand2) == i + delta)
2447 && (SCHEME_LOCAL_POS(next_app3->rand1) == SCHEME_LOCAL_POS(app3->rand1))) {
2448 seq++;
2449 i++;
2450 } else
2451 break;
2452 } else
2453 break;
2454 }
2455 return seq;
2456 }
2457 }
2458
2459 return 0;
2460 }
2461
generate_unsafe_struct_ref_sequence(mz_jit_state * jitter,Scheme_Object * arg,Scheme_Object * last_arg,int count,int stack_pos)2462 static int generate_unsafe_struct_ref_sequence(mz_jit_state *jitter, Scheme_Object *arg, Scheme_Object *last_arg,
2463 int count, int stack_pos)
2464 /* Implement a sequence discovered by `detect_unsafe_struct_refs()`. */
2465 {
2466 Scheme_App3_Rec *app3 = (Scheme_App3_Rec *)arg;
2467 int i, base = SCHEME_INT_VAL(app3->rand2);
2468 GC_CAN_IGNORE jit_insn *ref2;
2469
2470 /* Using `last_arg` ensures that we clear the local, if needed */
2471 mz_runstack_skipped(jitter, 2);
2472 scheme_generate(((Scheme_App3_Rec *)last_arg)->rand1, jitter, 0, 0, 0, JIT_R0, NULL, NULL);
2473 CHECK_LIMIT();
2474 mz_runstack_unskipped(jitter, 2);
2475
2476 /* Check for chaperones, and take slow path if found */
2477 __START_SHORT_JUMPS__(1);
2478 if (SAME_OBJ(app3->rator, scheme_unsafe_struct_ref_proc)) {
2479 GC_CAN_IGNORE jit_insn *ref, *refslow;
2480 jit_ldxi_s(JIT_R2, JIT_R0, &((Scheme_Object *)0x0)->type);
2481 ref = jit_bnei_i(jit_forward(), JIT_R2, scheme_chaperone_type);
2482 refslow = jit_get_ip();
2483 jit_addi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(stack_pos));
2484 jit_str_p(JIT_R1, JIT_R0);
2485 jit_movi_i(JIT_V1, base);
2486 jit_movi_i(JIT_R0, count);
2487 (void)jit_calli(sjc.struct_raw_refs_code);
2488 ref2 = jit_jmpi(jit_forward());
2489 mz_patch_branch(ref);
2490 (void)jit_beqi_i(refslow, JIT_R2, scheme_proc_chaperone_type);
2491 CHECK_LIMIT();
2492 } else
2493 ref2 = NULL;
2494
2495 /* This is the fast path: */
2496 for (i = 0; i < count; i++) {
2497 jit_ldxi_p(JIT_R1, JIT_R0, (intptr_t)&(((Scheme_Structure *)0x0)->slots[i+base]));
2498 if (i != count - 1)
2499 mz_rs_stxi(stack_pos+i, JIT_R1);
2500 else
2501 jit_movr_p(JIT_R0, JIT_R1);
2502 CHECK_LIMIT();
2503 }
2504
2505 if (ref2)
2506 mz_patch_branch(ref2);
2507 __END_SHORT_JUMPS__(1);
2508
2509 return 1;
2510 }
2511
2512 #endif
2513