1 #include "schpriv.h"
2 #include "schmach.h"
3 #include "future.h"
4 
5 #ifdef MZ_USE_JIT
6 
7 #include "jit.h"
8 
9 #define JITARITH_TS_PROCS
10 #include "jit_ts.c"
11 
scheme_jit_is_fixnum(Scheme_Object * rand)12 int scheme_jit_is_fixnum(Scheme_Object *rand)
13 {
14   if (SCHEME_INTP(rand)
15       || (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)
16           && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FIXNUM)))
17     return 1;
18   else if (scheme_expr_produces_local_type(rand, NULL) == SCHEME_LOCAL_TYPE_FIXNUM)
19     return 1;
20   else
21     return 0;
22 }
23 
can_reorder_unboxing(Scheme_Object * rand,Scheme_Object * rand2,int extfl)24 static int can_reorder_unboxing(Scheme_Object *rand, Scheme_Object *rand2, int extfl)
25 {
26   /* Can we reorder `rand' and `rand2', given that we want floating-point
27      results (so it's ok for `rand' to be a floating-point local)? */
28   return scheme_is_relatively_constant_and_avoids_r1_maybe_fp(rand, rand2, 1, extfl);
29 }
30 
is_inline_unboxable_op(Scheme_Object * obj,int flag,int unsafely,int just_checking_result,int extfl)31 static int is_inline_unboxable_op(Scheme_Object *obj, int flag, int unsafely, int just_checking_result, int extfl)
32 /* If unsafely, a result of 2 means that arguments should be checked safely. */
33 {
34   if (!SCHEME_PRIMP(obj))
35     return 0;
36   if (!(SCHEME_PRIM_PROC_OPT_FLAGS(obj) & flag))
37     return 0;
38 
39   /* We have a table here for now, instead of flags accessed via
40      SCHEME_PRIM_PROC_OPT_FLAGS(), because this function reports
41      properties of the JIT rather than inherent properties of the
42      functions. */
43 
44   if (!extfl) {
45     if (IS_NAMED_PRIM(obj, "unsafe-fl+")) return 1;
46     if (IS_NAMED_PRIM(obj, "unsafe-fl-")) return 1;
47     if (IS_NAMED_PRIM(obj, "unsafe-fl*")) return 1;
48     if (IS_NAMED_PRIM(obj, "unsafe-fl/")) return 1;
49     if (IS_NAMED_PRIM(obj, "unsafe-flabs")) return 1;
50     if (IS_NAMED_PRIM(obj, "unsafe-flsqrt")) return 1;
51     if (IS_NAMED_PRIM(obj, "unsafe-flmin")) return 1;
52     if (IS_NAMED_PRIM(obj, "unsafe-flmax")) return 1;
53     if (IS_NAMED_PRIM(obj, "unsafe-fx->fl")) return 1;
54     if (IS_NAMED_PRIM(obj, "unsafe-f64vector-ref")) return 1;
55     if (IS_NAMED_PRIM(obj, "unsafe-flvector-ref")) return 1;
56     if (IS_NAMED_PRIM(obj, "unsafe-flimag-part")) return 1;
57     if (IS_NAMED_PRIM(obj, "unsafe-flreal-part")) return 1;
58 
59     if (unsafely) {
60       /* These are inline-unboxable when their args are
61          safely inline-unboxable: */
62       if (IS_NAMED_PRIM(obj, "fl+")) return 2;
63       if (IS_NAMED_PRIM(obj, "fl-")) return 2;
64       if (IS_NAMED_PRIM(obj, "fl*")) return 2;
65       if (IS_NAMED_PRIM(obj, "fl/")) return 2;
66       if (IS_NAMED_PRIM(obj, "flabs")) return 2;
67       if (IS_NAMED_PRIM(obj, "flsqrt")) return 2;
68       if (IS_NAMED_PRIM(obj, "flmin")) return 2;
69       if (IS_NAMED_PRIM(obj, "flmax")) return 2;
70       if (IS_NAMED_PRIM(obj, "flimag-part")) return 2;
71       if (IS_NAMED_PRIM(obj, "flreal-part")) return 2;
72 
73       if (just_checking_result) {
74         if (IS_NAMED_PRIM(obj, "flfloor")) return 1;
75         if (IS_NAMED_PRIM(obj, "flceiling")) return 1;
76         if (IS_NAMED_PRIM(obj, "fltruncate")) return 1;
77         if (IS_NAMED_PRIM(obj, "flround")) return 1;
78         if (IS_NAMED_PRIM(obj, "flsingle")) return 1;
79         if (IS_NAMED_PRIM(obj, "unsafe-flsingle")) return 1;
80         if (IS_NAMED_PRIM(obj, "flsin")) return 1;
81         if (IS_NAMED_PRIM(obj, "flcos")) return 1;
82         if (IS_NAMED_PRIM(obj, "fltan")) return 1;
83         if (IS_NAMED_PRIM(obj, "flasin")) return 1;
84         if (IS_NAMED_PRIM(obj, "flacos")) return 1;
85         if (IS_NAMED_PRIM(obj, "flatan")) return 1;
86         if (IS_NAMED_PRIM(obj, "fllog")) return 1;
87         if (IS_NAMED_PRIM(obj, "flexp")) return 1;
88         if (IS_NAMED_PRIM(obj, "flexpt")) return 1;
89       }
90     }
91   }
92 
93 #ifdef MZ_LONG_DOUBLE
94   if (extfl) {
95     if (IS_NAMED_PRIM(obj, "unsafe-extfl+")) return 1;
96     if (IS_NAMED_PRIM(obj, "unsafe-extfl-")) return 1;
97     if (IS_NAMED_PRIM(obj, "unsafe-extfl*")) return 1;
98     if (IS_NAMED_PRIM(obj, "unsafe-extfl/")) return 1;
99     if (IS_NAMED_PRIM(obj, "unsafe-extflabs")) return 1;
100     if (IS_NAMED_PRIM(obj, "unsafe-extflsqrt")) return 1;
101     if (IS_NAMED_PRIM(obj, "unsafe-extflmin")) return 1;
102     if (IS_NAMED_PRIM(obj, "unsafe-extflmax")) return 1;
103     if (IS_NAMED_PRIM(obj, "unsafe-fx->extfl")) return 1;
104     if (IS_NAMED_PRIM(obj, "unsafe-f80vector-ref")) return 1;
105     if (IS_NAMED_PRIM(obj, "unsafe-extflvector-ref")) return 1;
106 
107     if (unsafely) {
108       /* These are inline-unboxable when their args are
109          safely inline-unboxable: */
110       if (IS_NAMED_PRIM(obj, "extfl+")) return 2;
111       if (IS_NAMED_PRIM(obj, "extfl-")) return 2;
112       if (IS_NAMED_PRIM(obj, "extfl*")) return 2;
113       if (IS_NAMED_PRIM(obj, "extfl/")) return 2;
114       if (IS_NAMED_PRIM(obj, "extflabs")) return 2;
115       if (IS_NAMED_PRIM(obj, "extflsqrt")) return 2;
116       if (IS_NAMED_PRIM(obj, "extflmin")) return 2;
117       if (IS_NAMED_PRIM(obj, "extflmax")) return 2;
118 
119       if (just_checking_result) {
120         if (IS_NAMED_PRIM(obj, "extflfloor")) return 1;
121         if (IS_NAMED_PRIM(obj, "extflceiling")) return 1;
122         if (IS_NAMED_PRIM(obj, "extfltruncate")) return 1;
123         if (IS_NAMED_PRIM(obj, "extflround")) return 1;
124         if (IS_NAMED_PRIM(obj, "extflsin")) return 1;
125         if (IS_NAMED_PRIM(obj, "extflcos")) return 1;
126         if (IS_NAMED_PRIM(obj, "extfltan")) return 1;
127         if (IS_NAMED_PRIM(obj, "extflasin")) return 1;
128         if (IS_NAMED_PRIM(obj, "extflacos")) return 1;
129         if (IS_NAMED_PRIM(obj, "extflatan")) return 1;
130         if (IS_NAMED_PRIM(obj, "extfllog")) return 1;
131         if (IS_NAMED_PRIM(obj, "extflexp")) return 1;
132         if (IS_NAMED_PRIM(obj, "extflexpt")) return 1;
133       }
134     }
135   }
136 #endif
137 
138   return 0;
139 }
140 
scheme_generate_pop_unboxed(mz_jit_state * jitter)141 int scheme_generate_pop_unboxed(mz_jit_state *jitter)
142 {
143 #if defined(MZ_USE_JIT_I386)
144 # if 1
145   if (jitter->unbox_depth) {
146     scheme_signal_error("internal error: scheme_generate_pop_unboxed() isn't right");
147   }
148   /* The code below doesn't work right because it's emitted *before*
149      the test for failure. Adding it after the failure test means
150      moving it to (something like) sjc.unbound_global_code. Meanwhile,
151      the JIT doesn't currently actually try to reference globals when it has
152      values on the FP stack. */
153 # else
154   /* If we have some arguments pushed on the FP stack, we need
155      to pop them off before escaping. */
156   int i;
157   for (i = jitter->unbox_depth; i--; ) {
158     FSTPr(0);
159   }
160   CHECK_LIMIT();
161 # endif
162 #endif
163   return 1;
164 }
165 
is_unboxing_immediate(Scheme_Object * obj,int unsafely,int extfl)166 static int is_unboxing_immediate(Scheme_Object *obj, int unsafely, int extfl)
167 {
168   Scheme_Type t;
169 
170   t = SCHEME_TYPE(obj);
171   switch (t) {
172   case scheme_local_type:
173     if (!extfl) {
174       if (SCHEME_GET_LOCAL_TYPE(obj) == SCHEME_LOCAL_TYPE_FLONUM)
175         return 1;
176     }
177 #ifdef MZ_LONG_DOUBLE
178     if (extfl) {
179       if (SCHEME_GET_LOCAL_TYPE(obj) == SCHEME_LOCAL_TYPE_EXTFLONUM)
180         return 1;
181     }
182 #endif
183     return unsafely;
184   case scheme_toplevel_type:
185   case scheme_static_toplevel_type:
186     /* Can generalize to allow any toplevel if scheme_generate_pop_unboxed() is fixed */
187     if ((SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) < SCHEME_TOPLEVEL_READY)
188       return 0;
189     return unsafely;
190     break;
191   case scheme_local_unbox_type:
192     return unsafely;
193     break;
194   default:
195     if (!unsafely) {
196       if (!extfl)
197         return SCHEME_FLOATP(obj);
198 #ifdef MZ_LONG_DOUBLE
199       if (extfl)
200         return SCHEME_LONG_DBLP(obj);
201 #endif
202       return 0;
203     }
204     return (t > _scheme_values_types_);
205   }
206 }
207 
scheme_can_unbox_inline(Scheme_Object * obj,int fuel,int regs,int unsafely,int extfl)208 int scheme_can_unbox_inline(Scheme_Object *obj, int fuel, int regs, int unsafely, int extfl)
209 /* Assuming that `arg' is [unsafely] assumed to produce a flonum, can we
210    just unbox it without using more than `regs' registers? There
211    cannot be any errors or function calls, unless we've specifically
212    instrumented them to save/pop floating-point values before
213    jumping. If the result is true, then arguments must be evaluated in
214    order. */
215 {
216   Scheme_Type t;
217 
218   if (!fuel) return 0;
219   if (!regs) return 0;
220 
221   t = SCHEME_TYPE(obj);
222   switch (t) {
223   case scheme_application2_type:
224     {
225       Scheme_App2_Rec *app = (Scheme_App2_Rec *)obj;
226       int ok_op;
227       ok_op = is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_UNARY_INLINED, unsafely, 0, extfl);
228       if (!ok_op)
229         return 0;
230       else if (ok_op == 2)
231         unsafely = 0;
232       return scheme_can_unbox_inline(app->rand, fuel - 1, regs, unsafely, extfl);
233     }
234   case scheme_application3_type:
235     {
236       Scheme_App3_Rec *app = (Scheme_App3_Rec *)obj;
237       int ok_op;
238       ok_op = is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_BINARY_INLINED, unsafely, 0, extfl);
239       if (!ok_op)
240         return 0;
241       else if (ok_op == 2)
242         unsafely = 0;
243       if ((SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)
244           && (IS_NAMED_PRIM(app->rator, "unsafe-f64vector-ref")
245               || IS_NAMED_PRIM(app->rator, "unsafe-flvector-ref"))) {
246         if (is_unboxing_immediate(app->rand1, 1, extfl)
247             && is_unboxing_immediate(app->rand2, 1, extfl)) {
248           return 1;
249         }
250       }
251 #ifdef MZ_LONG_DOUBLE
252       if ((SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)
253           && (IS_NAMED_PRIM(app->rator, "unsafe-f80vector-ref")
254               || IS_NAMED_PRIM(app->rator, "unsafe-extflvector-ref"))) {
255         if (is_unboxing_immediate(app->rand1, 1, extfl)
256             && is_unboxing_immediate(app->rand2, 1, extfl)) {
257           return 1;
258         }
259       }
260 #endif
261       if (!scheme_can_unbox_inline(app->rand1, fuel - 1, regs, unsafely, extfl))
262         return 0;
263       return scheme_can_unbox_inline(app->rand2, fuel - 1, regs - 1, unsafely, extfl);
264     }
265   default:
266     return is_unboxing_immediate(obj, unsafely, extfl);
267   }
268 }
269 
can_unbox_directly(Scheme_Object * obj,int extfl,int bfuel)270 int can_unbox_directly(Scheme_Object *obj, int extfl, int bfuel)
271 /* Used only when !can_unbox_inline(). Detects safe operations that
272    produce flonums when they don't raise an exception, and that the JIT
273    supports directly unboxing. */
274 {
275   Scheme_Type t;
276 
277   while (1) {
278     t = SCHEME_TYPE(obj);
279     switch (t) {
280     case scheme_application2_type:
281       {
282         Scheme_App2_Rec *app = (Scheme_App2_Rec *)obj;
283         if (is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_UNARY_INLINED, 1, 1, extfl))
284           return 1;
285         if (SCHEME_PRIMP(app->rator)
286             && (SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_UNARY_INLINED)) {
287           if (!extfl) {
288             if (IS_NAMED_PRIM(app->rator, "->fl")
289                 || IS_NAMED_PRIM(app->rator, "fx->fl")
290                 || IS_NAMED_PRIM(app->rator, "unsafe-flrandom"))
291               return 1;
292           }
293 #ifdef MZ_LONG_DOUBLE
294           if (extfl) {
295             if (IS_NAMED_PRIM(app->rator, "->extfl")
296                 || IS_NAMED_PRIM(app->rator, "fx->extfl"))
297               return 1;
298           }
299 #endif
300         }
301         return 0;
302       }
303       break;
304     case scheme_application3_type:
305       {
306         Scheme_App3_Rec *app = (Scheme_App3_Rec *)obj;
307         if (is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_BINARY_INLINED, 1, 1, extfl))
308           return 1;
309         if (SCHEME_PRIMP(app->rator)
310             && (SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)) {
311           if (!extfl) {
312             if (IS_NAMED_PRIM(app->rator, "flvector-ref")) return 1;
313           }
314 #ifdef MZ_LONG_DOUBLE
315           if (extfl) {
316             if (IS_NAMED_PRIM(app->rator, "extflvector-ref")) return 1;
317           }
318 #endif
319         }
320         return 0;
321       }
322       break;
323     case scheme_let_value_type:
324       obj = ((Scheme_Let_Value *)obj)->body;
325       break;
326     case scheme_let_one_type:
327       obj = ((Scheme_Let_One *)obj)->body;
328       break;
329     case scheme_let_void_type:
330       obj = ((Scheme_Let_Void *)obj)->body;
331       break;
332     case scheme_letrec_type:
333       obj = ((Scheme_Letrec *)obj)->body;
334       break;
335     case scheme_branch_type:
336       if (!bfuel)
337         return 0;
338       bfuel--;
339       if (!can_unbox_directly(((Scheme_Branch_Rec *)obj)->tbranch, extfl, bfuel))
340         return 0;
341       obj = ((Scheme_Branch_Rec *)obj)->fbranch;
342       break;
343     case scheme_sequence_type:
344       obj = ((Scheme_Sequence *)obj)->array[((Scheme_Sequence *)obj)->count - 1];
345       break;
346     default:
347       return is_unboxing_immediate(obj, 0, extfl);
348     }
349   }
350 }
351 
scheme_can_unbox_directly(Scheme_Object * obj,int extfl)352 int scheme_can_unbox_directly(Scheme_Object *obj, int extfl)
353 {
354   return can_unbox_directly(obj, extfl, 3);
355 }
356 
generate_arith_slow_path(mz_jit_state * jitter,Scheme_Object * rator,jit_insn ** _ref,jit_insn ** _ref4,Branch_Info * for_branch,int branch_short,int orig_args,int reversed,int arith,int use_v,int v,int dest)357 static jit_insn *generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *rator,
358 					  jit_insn **_ref, jit_insn **_ref4,
359                                           Branch_Info *for_branch, int branch_short,
360 					  int orig_args, int reversed, int arith, int use_v, int v,
361                                           int dest)
362 /* *_ref4 is place to set for where to jump (for true case, if for_branch) after completing;
363    *_ref is place to set for where to jump for false if for_branch, result if !for_branch;
364    result is place to jump to start slow path if fixnum attempt fails */
365 {
366   GC_CAN_IGNORE jit_insn *ref, *ref4, *refslow;
367 
368   refslow = jit_get_ip();
369 
370   (void)jit_movi_p(JIT_R2, ((Scheme_Primitive_Proc *)rator)->prim_val);
371   if (for_branch) {
372     scheme_prepare_branch_jump(jitter, for_branch);
373     CHECK_LIMIT();
374     ref4 = jit_patchable_movi_p(JIT_V1, jit_forward());
375     mz_set_local_p(JIT_V1, JIT_LOCAL2);
376     ref = jit_patchable_movi_p(JIT_V1, jit_forward());
377   } else {
378     ref4 = NULL;
379     ref = NULL;
380   }
381 
382   if (orig_args == 1) {
383     if (for_branch) {
384       (void)jit_jmpi(sjc.call_original_unary_arith_for_branch_code);
385     } else {
386       (void)jit_calli(sjc.call_original_unary_arith_code);
387     }
388   } else {
389     if (use_v) {
390       (void)jit_movi_p(JIT_R1, scheme_make_integer(v));
391       reversed = !reversed;
392     }
393 
394     if (for_branch) {
395       if (reversed) {
396 	(void)jit_jmpi(sjc.call_original_binary_rev_arith_for_branch_code);
397       } else {
398 	(void)jit_jmpi(sjc.call_original_binary_arith_for_branch_code);
399       }
400     } else {
401       if (reversed) {
402 	(void)jit_calli(sjc.call_original_binary_rev_arith_code);
403       } else {
404 	(void)jit_calli(sjc.call_original_binary_arith_code);
405       }
406     }
407   }
408 
409   if (!for_branch) {
410     jit_movr_p(dest, JIT_R0);
411     __START_SHORT_JUMPS__(branch_short);
412     ref = jit_jmpi(jit_forward());
413     __END_SHORT_JUMPS__(branch_short);
414   }
415 
416   *_ref = ref;
417   *_ref4 = ref4;
418 
419   if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP)) {
420     /* Add tag back to first arg, just in case. See arithmetic-shift branch to refslow. */
421     ref = jit_get_ip();
422 
423     if (reversed || use_v) {
424       jit_ori_l(JIT_R0, JIT_R0, 0x1);
425     } else {
426       jit_ori_l(JIT_R1, JIT_R1, 0x1);
427     }
428 
429     __START_TINY_JUMPS__(1);
430     (void)jit_jmpi(refslow);
431     __END_TINY_JUMPS__(1);
432 
433     return ref;
434   } else {
435     return refslow;
436   }
437 }
438 
439 #ifdef SIXTY_FOUR_BIT_INTEGERS
440 # define SCHEME_INT_SMALL_ENOUGH(rand2) ((((intptr_t)rand2 & 0x7FFFFFFF) == (intptr_t)rand2) || (((intptr_t)rand2 & 0xFFFFFFFFF8000000) == 0xFFFFFFFFF8000000))
441 #else
442 # define SCHEME_INT_SMALL_ENOUGH(rand2) 1
443 #endif
444 
can_fast_double(int arith,int cmp,int two_args)445 static int can_fast_double(int arith, int cmp, int two_args)
446 {
447 #ifdef INLINE_FP_OPS
448   if ((arith == ARITH_ADD)
449       || (arith == ARITH_SUB)
450       || (arith == ARITH_MUL)
451       || (arith == ARITH_DIV)
452       || (arith == ARITH_ABS)
453       || (arith == ARITH_EX_INEX)
454       || (arith == ARITH_SQRT)
455       || (arith == ARITH_FLUNOP)
456       || (arith == ARITH_INEX_EX)
457       || (arith == ARITH_INEX_TRUNC_EX))
458     return 1;
459 #endif
460 #ifdef INLINE_FP_COMP
461   if ((!arith && (cmp != CMP_EVENP) && (cmp != CMP_ODDP))
462       || ((arith == ARITH_MIN) && two_args)
463       || ((arith == ARITH_MAX) && two_args))
464     return 1;
465 #endif
466 
467   return 0;
468 }
469 
470 #ifdef CAN_INLINE_ALLOC
471 # ifdef JIT_USE_FP_OPS
472 #  define DECL_FLONUM_GLUE(op) static void call_ ## op(void) XFORM_SKIP_PROC {  \
473     scheme_jit_save_fp = scheme_double_ ## op(scheme_jit_save_fp); }
474 #  ifdef MZ_LONG_DOUBLE
475 #   define DECL_EXTNUM_GLUE(op) static void call_long_double_ ## op(void) XFORM_SKIP_PROC {  \
476       scheme_jit_save_extfp = scheme_long_double_ ## op(scheme_jit_save_extfp); }
477 #   define DECL_FP_GLUE(op) DECL_FLONUM_GLUE(op) DECL_EXTNUM_GLUE(op)
478 #  else
479 #   define DECL_FP_GLUE(op) DECL_FLONUM_GLUE(op)
480 #  endif
481 DECL_FP_GLUE(sin)
482 DECL_FP_GLUE(cos)
483 DECL_FP_GLUE(tan)
484 DECL_FP_GLUE(asin)
485 DECL_FP_GLUE(acos)
486 DECL_FP_GLUE(atan)
487 DECL_FP_GLUE(exp)
488 DECL_FP_GLUE(log)
489 DECL_FP_GLUE(floor)
490 DECL_FP_GLUE(ceiling)
491 DECL_FP_GLUE(truncate)
492 DECL_FP_GLUE(round)
493 DECL_FLONUM_GLUE(single)
494 
495 typedef void (*call_fp_proc)(void);
496 #  ifdef MZ_LONG_DOUBLE
497 typedef void (*call_extfp_proc)(void);
498 #  endif
499 
500 #  define DECL_BIN_FLONUM_GLUE(op) static void call_ ## op(void) XFORM_SKIP_PROC {  \
501     scheme_jit_save_fp = scheme_double_ ## op(scheme_jit_save_fp, scheme_jit_save_fp2); }
502 #  ifdef MZ_LONG_DOUBLE
503 #   define DECL_BIN_EXTNUM_GLUE(op) static void call_long_double_ ## op(void) XFORM_SKIP_PROC {  \
504      scheme_jit_save_extfp = scheme_long_double_ ## op(scheme_jit_save_extfp, scheme_jit_save_extfp2); }
505 #   define DECL_BIN_FP_GLUE(op) DECL_BIN_FLONUM_GLUE(op) DECL_BIN_EXTNUM_GLUE(op)
506 #  else
507 #   define DECL_BIN_FP_GLUE(op) DECL_BIN_FLONUM_GLUE(op)
508 #  endif
509 
510 DECL_BIN_FP_GLUE(expt)
511 typedef void (*call_fp_bin_proc)(void);
512 
513 #  ifdef MZ_LONG_DOUBLE
514 typedef void (*call_extfp_bin_proc)(void);
515 #  endif
516 # endif
517 #endif
518 
scheme_generate_unboxing(mz_jit_state * jitter,int target)519 int scheme_generate_unboxing(mz_jit_state *jitter, int target)
520 {
521   int fpr0 USED_ONLY_SOMETIMES;
522 
523 #ifdef MZ_LONG_DOUBLE
524   if (jitter->unbox_extflonum) {
525     fpr0 = JIT_FPU_FPR_0(jitter->unbox_depth);
526     jit_fpu_ldxi_ld_fppush(fpr0, target, &((Scheme_Long_Double *)0x0)->long_double_val);
527   } else
528 #endif
529     {
530       fpr0 = JIT_FPR_0(jitter->unbox_depth);
531       jit_ldxi_d_fppush(fpr0, target, &((Scheme_Double *)0x0)->double_val);
532     }
533 
534   jitter->unbox_depth++;
535 
536   return 1;
537 }
538 
scheme_generate_alloc_double(mz_jit_state * jitter,int inline_retry,int dest)539 int scheme_generate_alloc_double(mz_jit_state *jitter, int inline_retry, int dest)
540 /* value should be in JIT_FPR0; R0-R2 not saved; V1 used */
541 {
542 #ifdef INLINE_FP_OPS
543 # ifdef CAN_INLINE_ALLOC
544   scheme_inline_alloc(jitter, sizeof(Scheme_Double), scheme_double_type, 0, 0, 1, inline_retry, 0);
545   CHECK_LIMIT();
546   jit_addi_p(dest, JIT_V1, OBJHEAD_SIZE);
547   (void)jit_stxi_d_fppop(&((Scheme_Double *)0x0)->double_val, dest, JIT_FPR0);
548 # else
549   (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R0);
550   JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
551   mz_prepare(0);
552   {
553     GC_CAN_IGNORE jit_insn *refr;
554     (void)mz_finish_lwe(ts_malloc_double, refr);
555   }
556   jit_retval(dest);
557 # endif
558 #endif
559   return 1;
560 }
561 
562 #ifdef MZ_LONG_DOUBLE
scheme_generate_alloc_long_double(mz_jit_state * jitter,int inline_retry,int dest)563 int scheme_generate_alloc_long_double(mz_jit_state *jitter, int inline_retry, int dest)
564 /* same as above */
565 {
566 #ifdef INLINE_FP_OPS
567 # ifdef CAN_INLINE_ALLOC
568   scheme_inline_alloc(jitter, sizeof(Scheme_Long_Double), scheme_long_double_type, 0, 0, 0, inline_retry, 1);
569   CHECK_LIMIT();
570   jit_addi_p(dest, JIT_V1, OBJHEAD_SIZE);
571   (void)jit_fpu_stxi_ld_fppop(&((Scheme_Long_Double *)0x0)->long_double_val, dest, JIT_FPU_FPR0);
572 # else
573   (void)mz_fpu_ta_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R0);
574   JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
575   mz_prepare(0);
576   {
577     GC_CAN_IGNORE jit_insn *refr;
578     (void)mz_finish_lwe(ts_malloc_long_double, refr);
579   }
580   jit_retval(dest);
581 # endif
582 #endif
583   return 1;
584 }
585 #endif
586 
scheme_generate_alloc_X_double(mz_jit_state * jitter,int inline_retry,int dest,int extfl)587 int scheme_generate_alloc_X_double(mz_jit_state *jitter, int inline_retry, int dest, int extfl)
588 {
589   MZ_FPUSEL_STMT(extfl,
590                  return scheme_generate_alloc_long_double(jitter, inline_retry, dest),
591                  return scheme_generate_alloc_double(jitter, inline_retry, dest));
592 }
593 
generate_float_point_arith(mz_jit_state * jitter,Scheme_Object * rator,int arith,int cmp,int reversed,int two_args,int second_const,jit_insn ** _refd,jit_insn ** _refdt,Branch_Info * for_branch,int branch_short,int unsafe_fl,int unboxed,int unboxed_result,int dest,int extfl)594 static int generate_float_point_arith(mz_jit_state *jitter, Scheme_Object *rator,
595                                       int arith, int cmp, int reversed, int two_args, int second_const,
596                                       jit_insn **_refd, jit_insn **_refdt, Branch_Info *for_branch,
597                                       int branch_short, int unsafe_fl, int unboxed, int unboxed_result,
598                                       int dest, int extfl)
599 /* Unless unboxed, first arg is in JIT_R1, second in JIT_R0.
600    If unboxed in push/pop mode, first arg is pushed before second.
601    If unboxed in direct mode, first arg is in JIT_FPR0+depth
602     and second is in JIT_FPR1+depth (which is backward).
603    Unboxed implies unsafe unless arith == ARITH_INEX_EX or arith == ARITH_INEX_TRUNC_EX. */
604 {
605 #if defined(INLINE_FP_OPS) || defined(INLINE_FP_COMP)
606   GC_CAN_IGNORE jit_insn *ref8, *ref9, *ref10, *refd, *refdt, *refs = NULL, *refs2 = NULL;
607   int no_alloc = unboxed_result;
608   int need_post_pop USED_ONLY_SOMETIMES = 0;
609 
610   if (!unsafe_fl && !unboxed) {
611     /* Maybe they're doubles */
612     __START_TINY_JUMPS__(1);
613     if (two_args) {
614       jit_orr_ul(JIT_R2, JIT_R0, JIT_R1);
615       ref8 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1);
616     } else
617       ref8 = NULL;
618     jit_ldxi_s(JIT_R2, JIT_R0, &((Scheme_Object *)0x0)->type);
619     if (extfl)
620       ref9 = jit_bnei_i(jit_forward(), JIT_R2, scheme_long_double_type);
621     else
622       ref9 = jit_bnei_i(jit_forward(), JIT_R2, scheme_double_type);
623     if (two_args) {
624       jit_ldxi_s(JIT_R2, JIT_R1, &((Scheme_Object *)0x0)->type);
625       if (extfl)
626         ref10 = jit_bnei_i(jit_forward(), JIT_R2, scheme_long_double_type);
627       else
628         ref10 = jit_bnei_i(jit_forward(), JIT_R2, scheme_double_type);
629     } else
630       ref10 = NULL;
631     CHECK_LIMIT();
632     __END_TINY_JUMPS__(1);
633   } else {
634     ref8 = ref9 = ref10 = NULL;
635   }
636 
637   if (!two_args && !second_const && ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP) || ((arith == ARITH_DIV) && reversed))) {
638     /* Special case: multiplication by exact 0 */
639     (void)jit_movi_p(dest, scheme_make_integer(0));
640   } else {
641     /* Yes, they're doubles. First arg is in JIT_R1, second is in JIT_R0.
642        Put the first arg in fpr0 and second (if any) into fpr1. To work
643        right with stacks, that means pushing the second argument first. */
644     int fpr1, fpr0;
645 
646     fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
647     fpr1 = JIT_FPUSEL_FPR_1(extfl, 1+jitter->unbox_depth);
648 
649     if (two_args) {
650       if (!unboxed) {
651         MZ_FPUSEL_STMT(extfl,
652                        jit_fpu_ldxi_ld_fppush(fpr1, JIT_R1, &((Scheme_Long_Double *)0x0)->long_double_val),
653                        jit_ldxi_d_fppush(fpr1, JIT_R1, &((Scheme_Double *)0x0)->double_val));
654       }
655     } else if ((arith == ARITH_SUB) && !second_const && reversed) {
656       reversed = 0;
657     } else if (arith == ARITH_ABS) {
658       /* abs needs no extra number */
659     } else if (arith == ARITH_SQRT) {
660       /* sqrt needs no extra number */
661     } else if (arith == ARITH_FLUNOP) {
662       /* flround, flsin, etc. needs no extra number */
663     } else if (arith == ARITH_EX_INEX) {
664       /* exact->inexact needs no extra number */
665     } else if (arith == ARITH_INEX_EX) {
666       /* inexact->exact needs no extra number */
667     } else if (arith == ARITH_INEX_TRUNC_EX) {
668       /* fl->fx needs no extra number */
669     } else {
670 #ifdef MZ_LONG_DOUBLE
671       long_double d;
672       d = long_double_from_intptr(second_const);
673       if (extfl) {
674         mz_fpu_movi_ld_fppush(fpr1, d, JIT_R2)
675       } else {
676         mz_movi_d_fppush(fpr1, second_const, JIT_R2);
677       }
678 #else
679       double d = second_const;
680       mz_movi_d_fppush(fpr1, d, JIT_R2);
681 #endif
682 
683       reversed = !reversed;
684       cmp = -cmp;
685     }
686 
687     if (!unboxed) {
688       if (arith != ARITH_EX_INEX) {
689         MZ_FPUSEL_STMT(extfl,
690                        jit_fpu_ldxi_ld_fppush(fpr0, JIT_R0, &((Scheme_Long_Double *)0x0)->long_double_val),
691                        jit_ldxi_d_fppush(fpr0, JIT_R0, &((Scheme_Double *)0x0)->double_val));
692       }
693     }
694 
695 #ifdef DIRECT_FPR_ACCESS
696 # define USES_DIRECT_FPR_ACCESS (!extfl)
697 #else
698 # define USES_DIRECT_FPR_ACCESS 0
699 #endif
700 
701 #ifdef DIRECT_FPR_ACCESS
702     if (unboxed && USES_DIRECT_FPR_ACCESS) {
703       /* arguments are backward */
704       reversed = !reversed;
705       cmp = -cmp;
706     }
707 #endif
708 
709     CHECK_LIMIT();
710 
711     if (arith) {
712 #if defined(MZ_LONG_DOUBLE) && defined(MZ_NEED_SET_EXTFL_MODE)
713       int need_control_reset = 0;
714       if (extfl) {
715         switch (arith) {
716         case ARITH_ADD:
717         case ARITH_MUL:
718         case ARITH_DIV:
719         case ARITH_SUB:
720         case ARITH_SQRT:
721           jit_set_fp_control(0x37f);
722           need_control_reset = 1;
723           break;
724         }
725       }
726 #endif
727       switch (arith) {
728       case ARITH_ADD:
729         jit_FPSEL_addr_xd_fppop(extfl, fpr0, fpr0, fpr1);
730         break;
731       case ARITH_MUL:
732         jit_FPSEL_mulr_xd_fppop(extfl, fpr0, fpr0, fpr1);
733         break;
734       case ARITH_DIV:
735         if (!reversed)
736           jit_FPSEL_divrr_xd_fppop(extfl, fpr0, fpr0, fpr1);
737         else
738           jit_FPSEL_divr_xd_fppop(extfl, fpr0, fpr0, fpr1);
739         break;
740       case ARITH_SUB:
741         {
742           if (!two_args && !second_const && !reversed) {
743             /* Need a special case to make sure that (- 0.0) => -0.0 */
744             jit_FPSEL_negr_xd_fppop(extfl, fpr0, fpr0);
745           } else if (reversed)
746             jit_FPSEL_subr_xd_fppop(extfl, fpr0, fpr0, fpr1);
747           else
748             jit_FPSEL_subrr_xd_fppop(extfl, fpr0, fpr0, fpr1);
749         }
750         break;
751       case ARITH_MIN:
752       case ARITH_MAX:
753         {
754           GC_CAN_IGNORE jit_insn *refc, *refn;
755           __START_TINY_JUMPS__(1);
756 
757           /* If R0 is nan, then copy to R1, ensuring nan result */
758           refn = jit_FPSEL_beqr_xd(extfl, jit_forward(), fpr0, fpr0);
759           if (unboxed)
760             jit_FPSEL_movr_xd_rel(extfl, fpr1, fpr0);
761           else
762             jit_movr_p(JIT_R1, JIT_R0);
763           mz_patch_branch(refn);
764           if (arith == ARITH_MIN) {
765             if (unboxed) {
766               refc = jit_FPSEL_bltr_xd(extfl, jit_forward(), fpr0, fpr1);
767             } else {
768               refc = jit_FPSEL_bltr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
769             }
770           } else {
771             if (unboxed) {
772               refc = jit_FPSEL_bger_xd(extfl, jit_forward(), fpr0, fpr1);
773             } else {
774               refc = jit_FPSEL_bger_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
775             }
776           }
777 	  CHECK_LIMIT();
778           if (unboxed) {
779             jit_FPSEL_movr_xd_rel(extfl, fpr0, fpr1);
780             need_post_pop = 1;
781           } else
782             jit_movr_p(JIT_R0, JIT_R1);
783 
784           mz_patch_branch(refc);
785           __END_TINY_JUMPS__(1);
786           if (!unboxed) {
787             /* we've already set JIT_R0 */
788             jit_movr_p(dest, JIT_R0);
789             no_alloc = 1;
790           }
791         }
792         break;
793       case ARITH_ABS:
794         jit_FPSEL_abs_xd_fppop(extfl, fpr0, fpr0);
795         break;
796       case ARITH_EX_INEX: /* exact->inexact */
797         /* no work to do, because argument is already inexact;
798            no need to allocate, because argument is never unboxed,
799            and it therefore already resides in R0 */
800         jit_movr_p(dest, JIT_R0);
801         no_alloc = 1;
802         break;
803       case ARITH_INEX_EX: /* inexact->exact */
804         if (!unsafe_fl) {
805           jit_FPSEL_movr_xd_fppush(extfl, fpr1, fpr0);
806         }
807         jit_FPSEL_roundr_xd_l_fppop(extfl, JIT_R1, fpr0);
808         if (!unsafe_fl) {
809           /* to check whether it fits in a fixnum, we
810              need to convert back and check whether it
811              is the same */
812           if (unboxed) {
813             JIT_ASSERT(jitter->unbox_depth == 0);
814             jit_FPSEL_movr_xd_fppush(extfl, JIT_FPR2, fpr1); /* for slow path */
815           }
816           jit_FPSEL_extr_l_xd_fppush(extfl, fpr0, JIT_R1);
817           __START_TINY_JUMPS__(1);
818           refs = jit_FPSEL_bantieqr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
819           __END_TINY_JUMPS__(1);
820           /* result still may not fit in a fixnum */
821           jit_lshi_l(JIT_R2, JIT_R1, 1);
822           jit_rshi_l(JIT_R2, JIT_R2, 1);
823           __START_TINY_JUMPS__(1);
824           refs2 = jit_bner_l(jit_forward(), JIT_R1, JIT_R2);
825           __END_TINY_JUMPS__(1);
826 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
827           if (unboxed && !USES_DIRECT_FPR_ACCESS)
828             jit_FPSEL_roundr_xd_l_fppop(extfl, JIT_R1, JIT_FPR2); /* slow path won't be needed */
829 #endif
830         }
831         jit_fixnum_l(dest, JIT_R1);
832         no_alloc = 1;
833         break;
834       case ARITH_INEX_TRUNC_EX: /* fl->fx */
835         if (!unsafe_fl) {
836 #ifdef DIRECT_FPR_ACCESS
837           if (unboxed && USES_DIRECT_FPR_ACCESS) {
838             JIT_ASSERT(jitter->unbox_depth == 0);
839             jit_FPSEL_movr_xd_fppush(extfl, JIT_FPR2, fpr0); /* for slow path */
840           }
841 #endif
842 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
843           if (!USES_DIRECT_FPR_ACCESS) {
844             jit_FPSEL_movr_xd_fppush(extfl, fpr0, fpr0); /* copy for comparison */
845           }
846 #endif
847 #ifdef MZ_LONG_DOUBLE
848           if (extfl) {
849             mz_fpu_movi_ld_fppush(fpr1, scheme_extfl_too_positive_for_fixnum, JIT_R1);
850           } else
851 #endif
852             {
853               mz_movi_d_fppush(fpr1, scheme_double_too_positive_for_fixnum, JIT_R1);
854             }
855           __START_TINY_JUMPS__(1);
856           refs = jit_FPSEL_bantigtr_xd_fppop(extfl, jit_forward(), fpr1, fpr0);
857           __END_TINY_JUMPS__(1);
858 
859 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
860           if (!USES_DIRECT_FPR_ACCESS) {
861             jit_FPSEL_movr_xd_fppush(extfl, fpr0, fpr0); /* copy for comparison */
862           }
863 #endif
864 #ifdef MZ_LONG_DOUBLE
865           if (extfl) {
866             mz_fpu_movi_ld_fppush(fpr1, scheme_extfl_too_negative_for_fixnum, JIT_R1);
867           } else
868 #endif
869             {
870               mz_movi_d_fppush(fpr1, scheme_double_too_negative_for_fixnum, JIT_R1);
871             }
872           __START_TINY_JUMPS__(1);
873           refs2 = jit_FPSEL_bantiltr_xd_fppop(extfl, jit_forward(), fpr1, fpr0);
874           __END_TINY_JUMPS__(1);
875         }
876         jit_FPSEL_truncr_xd_l_fppop(extfl, JIT_R1, fpr0);
877         jit_fixnum_l(dest, JIT_R1);
878         no_alloc = 1;
879         break;
880       case ARITH_SQRT:
881         jit_FPSEL_sqrt_xd_fppop(extfl, fpr0, fpr0);
882         break;
883 #ifdef CAN_INLINE_ALLOC
884 # ifdef JIT_USE_FP_OPS
885       case ARITH_FLUNOP: /* flfloor, flsin, etc. */
886         {
887           call_fp_proc f;
888 #ifdef MZ_LONG_DOUBLE
889           if (extfl) {
890             if (IS_NAMED_PRIM(rator, "extflsin"))
891               f = call_long_double_sin;
892             else if (IS_NAMED_PRIM(rator, "extflcos"))
893               f = call_long_double_cos;
894             else if (IS_NAMED_PRIM(rator, "extfltan"))
895               f = call_long_double_tan;
896             else if (IS_NAMED_PRIM(rator, "extflasin"))
897               f = call_long_double_asin;
898             else if (IS_NAMED_PRIM(rator, "extflacos"))
899               f = call_long_double_acos;
900             else if (IS_NAMED_PRIM(rator, "extflatan"))
901               f = call_long_double_atan;
902             else if (IS_NAMED_PRIM(rator, "extflexp"))
903               f = call_long_double_exp;
904             else if (IS_NAMED_PRIM(rator, "extfllog"))
905               f = call_long_double_log;
906             else if (IS_NAMED_PRIM(rator, "extflfloor"))
907               f = call_long_double_floor;
908             else if (IS_NAMED_PRIM(rator, "extflceiling"))
909               f = call_long_double_ceiling;
910             else if (IS_NAMED_PRIM(rator, "extfltruncate"))
911               f = call_long_double_truncate;
912             else if (IS_NAMED_PRIM(rator, "extflround"))
913               f = call_long_double_round;
914             else {
915               scheme_signal_error("internal error: unknown extflonum function");
916               f = NULL;
917             }
918             (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R2);
919             mz_prepare(0);
920             (void)mz_finish(f);
921             (void)mz_fpu_tl_ldi_ld_fppush(JIT_FPU_FPR0, tl_scheme_jit_save_extfp, JIT_R2);
922           } else
923 #endif
924             {
925             if (IS_NAMED_PRIM(rator, "flsin"))
926               f = call_sin;
927             else if (IS_NAMED_PRIM(rator, "flcos"))
928               f = call_cos;
929             else if (IS_NAMED_PRIM(rator, "fltan"))
930               f = call_tan;
931             else if (IS_NAMED_PRIM(rator, "flasin"))
932               f = call_asin;
933             else if (IS_NAMED_PRIM(rator, "flacos"))
934               f = call_acos;
935             else if (IS_NAMED_PRIM(rator, "flatan"))
936               f = call_atan;
937             else if (IS_NAMED_PRIM(rator, "flexp"))
938               f = call_exp;
939             else if (IS_NAMED_PRIM(rator, "fllog"))
940               f = call_log;
941             else if (IS_NAMED_PRIM(rator, "flfloor"))
942               f = call_floor;
943             else if (IS_NAMED_PRIM(rator, "flceiling"))
944               f = call_ceiling;
945             else if (IS_NAMED_PRIM(rator, "fltruncate"))
946               f = call_truncate;
947             else if (IS_NAMED_PRIM(rator, "flround"))
948               f = call_round;
949             else if (IS_NAMED_PRIM(rator, "flsingle") || IS_NAMED_PRIM(rator, "unsafe-flsingle"))
950               f = call_single;
951             else {
952               scheme_signal_error("internal error: unknown flonum function");
953               f = NULL;
954             }
955             (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
956             mz_prepare(0);
957             (void)mz_finish(f);
958             (void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
959           }
960         }
961         break;
962       case ARITH_EXPT: /* flexpt */
963         {
964 #ifdef MZ_LONG_DOUBLE
965           if (extfl) {
966             if (!reversed) {
967               (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp2, JIT_FPU_FPR0, JIT_R2);
968               (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR1, JIT_R2);
969             } else {
970               (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R2);
971               (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp2, JIT_FPU_FPR1, JIT_R2);
972             }
973             mz_prepare(0);
974             (void)mz_finish(call_long_double_expt);
975             (void)mz_fpu_tl_ldi_ld_fppush(JIT_FPU_FPR0, tl_scheme_jit_save_extfp, JIT_R2);
976           } else
977 #endif
978             {
979             if (!reversed) {
980               (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp2, JIT_FPR0, JIT_R2);
981               (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR1, JIT_R2);
982             } else {
983               (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
984               (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp2, JIT_FPR1, JIT_R2);
985             }
986             mz_prepare(0);
987             (void)mz_finish(call_expt);
988             (void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
989           }
990         }
991         break;
992 # endif
993 #endif
994       default:
995         break;
996       }
997       CHECK_LIMIT();
998 
999       if (!no_alloc) {
1000         mz_rs_sync(); /* needed if arguments were unboxed */
1001         scheme_generate_alloc_X_double(jitter, 0, dest, extfl);
1002         CHECK_LIMIT();
1003 #if defined(MZ_USE_JIT_I386)
1004         if (need_post_pop && !USES_DIRECT_FPR_ACCESS)
1005           FSTPr(0);
1006 #endif
1007       } else if (unboxed_result) {
1008         jitter->unbox_depth++;
1009 #if defined(MZ_USE_JIT_I386)
1010         if (need_post_pop && !USES_DIRECT_FPR_ACCESS) {
1011           FXCHr(1);
1012           FSTPr(0);
1013         }
1014 #endif
1015       }
1016 #if defined(MZ_LONG_DOUBLE) && defined(MZ_NEED_SET_EXTFL_MODE)
1017       if (extfl && need_control_reset) {
1018         jit_set_fp_control(0x27f);
1019       }
1020 #endif
1021     } else {
1022       /* The "anti" variants below invert the branch. Unlike the "un"
1023          variants, the "anti" variants invert the comparison result
1024          after the layer where +nan.0 always generates false. */
1025       __START_SHORT_JUMPS__(branch_short);
1026       if (for_branch) {
1027         scheme_prepare_branch_jump(jitter, for_branch);
1028         CHECK_LIMIT();
1029       }
1030       R0_FP_ADJUST(_jitl.r0_can_be_tmp++);
1031       switch (cmp) {
1032       case CMP_LT:
1033         refd = jit_FPSEL_bantigtr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1034         break;
1035       case CMP_LEQ:
1036         refd = jit_FPSEL_bantiger_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1037         break;
1038       case CMP_EQUAL:
1039         refd = jit_FPSEL_bantieqr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1040         break;
1041       case CMP_GEQ:
1042         refd = jit_FPSEL_bantiler_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1043         break;
1044       case CMP_GT:
1045         refd = jit_FPSEL_bantiltr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1046         break;
1047       default:
1048         refd = NULL;
1049         break;
1050       }
1051       R0_FP_ADJUST(_jitl.r0_can_be_tmp--);
1052       __END_SHORT_JUMPS__(branch_short);
1053       *_refd = refd;
1054     }
1055   }
1056 
1057   if (!unsafe_fl) {
1058     /* Jump to return result or true branch: */
1059     __START_SHORT_JUMPS__(branch_short);
1060     refdt = jit_jmpi(jit_forward());
1061     *_refdt = refdt;
1062     __END_SHORT_JUMPS__(branch_short);
1063   }
1064 
1065   if (!unsafe_fl) {
1066     /* No, they're not both doubles, or slow path is needed
1067        for some other reason. */
1068     __START_TINY_JUMPS__(1);
1069     if (!unboxed) {
1070       if (two_args) {
1071         mz_patch_branch(ref8);
1072         mz_patch_branch(ref10);
1073       }
1074       mz_patch_branch(ref9);
1075     }
1076     if (refs)
1077       mz_patch_branch(refs);
1078     if (refs2)
1079       mz_patch_branch(refs2);
1080     __END_TINY_JUMPS__(1);
1081   }
1082 #endif
1083 
1084   return 1;
1085 }
1086 
check_float_type_result(mz_jit_state * jitter,int reg,void * fail_code,Scheme_Object * rator,int type)1087 static int check_float_type_result(mz_jit_state *jitter, int reg, void *fail_code, Scheme_Object *rator, int type)
1088 /* Doesn't use R0 or R1, except for `reg' */
1089 {
1090   /* Check for flonum result */
1091   GC_CAN_IGNORE jit_insn *ref, *reffail;
1092 
1093   mz_rs_sync();
1094 
1095   __START_TINY_JUMPS__(1);
1096   ref = jit_bmci_l(jit_forward(), reg, 0x1);
1097   __END_TINY_JUMPS__(1);
1098 
1099   reffail = jit_get_ip();
1100   (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)rator)->prim_val);
1101   (void)jit_calli(fail_code);
1102 
1103   __START_TINY_JUMPS__(1);
1104   mz_patch_branch(ref);
1105   __END_TINY_JUMPS__(1);
1106 
1107   jit_ldxi_s(JIT_R2, reg, &((Scheme_Object *)0x0)->type);
1108   __START_SHORT_JUMPS__(1);
1109   (void)jit_bnei_i(reffail, JIT_R2, type);
1110   __END_SHORT_JUMPS__(1);
1111   CHECK_LIMIT();
1112 
1113   scheme_generate_unboxing(jitter, reg);
1114 
1115   return 1;
1116 }
1117 
check_flonum_result(mz_jit_state * jitter,int reg,void ** fail_code,Scheme_Object * rator,int extfl)1118 static int check_flonum_result(mz_jit_state *jitter, int reg, void **fail_code, Scheme_Object *rator, int extfl)
1119 /* Doesn't use R0 or R1, except for `reg' */
1120 {
1121   return check_float_type_result(jitter, reg, fail_code[extfl], rator,
1122                                  (extfl ? scheme_long_double_type : scheme_double_type));
1123 }
1124 
generate_modulo_setup(mz_jit_state * jitter,int branch_short,int a1,int a2)1125 static void generate_modulo_setup(mz_jit_state *jitter, int branch_short, int a1, int a2)
1126 /* r1 has two flags: bit 0 means two args have different sign; bit 1 means second arg is negative */
1127 {
1128   GC_CAN_IGNORE jit_insn *refx;
1129 
1130   jit_movi_l(JIT_R1, 0x0);
1131   __START_INNER_TINY__(branch_short);
1132   refx = jit_bgei_l(jit_forward(), a1, 0);
1133   jit_negr_l(a1, a1);
1134   jit_movi_l(JIT_R1, 0x1);
1135   mz_patch_branch(refx);
1136   refx = jit_bgei_l(jit_forward(), a2, 0);
1137   jit_xori_l(JIT_R1, JIT_R1, 0x3);
1138   jit_negr_l(a2, a2);
1139   mz_patch_branch(refx);
1140   __END_INNER_TINY__(branch_short);
1141 }
1142 
scheme_generate_arith_for(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest,int extfl)1143 int scheme_generate_arith_for(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
1144                               int orig_args, int arith, int cmp, int v,
1145                               Branch_Info *for_branch, int branch_short,
1146                               int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow,
1147                               int dest, int extfl)
1148 /* needs de-sync */
1149 /* Operation codes are defined in jit.h.
1150    Either arith is non-zero or it's a cmp; the value of each determines the operation:
1151         arith = 1 -> + or add1 (if !rand2)
1152         arith = -1 -> - or sub1
1153         arith = 2 -> *
1154         arith = -2 -> /
1155         arith = -3 -> quotient
1156         arith = -4 -> remainder
1157         arith = -5 -> modulo
1158         arith = 3 -> bitwise-and
1159         arith = 4 -> bitwise-ior
1160         arith = 5 -> bitwise-xor
1161         arith = 6 -> arithmetic-shift, fxlshift
1162         arith = -6 -> fxrshift
1163         arith = 7 -> bitwise-not
1164         arith = 9 -> min
1165         arith = 10 -> max
1166         arith = 11 -> abs
1167         arith = 12 -> exact->inexact
1168         arith = 13 -> sqrt
1169         arith = 14 -> unary floating-point op (consult `rator')
1170         arith = 15 -> inexact->exact
1171         arith = 16 -> flexpt
1172         cmp = 0 -> = or zero?
1173         cmp = +/-1 -> >=/<=
1174         cmp = +/-2 -> >/< or positive/negative?
1175         cmp = 3 -> bitwise-bit-test?
1176         cmp = +/-4 -> even?/odd?
1177    If rand is NULL, then we're generating part of the fast path for an
1178    nary arithmatic over a binary operator; the first argument is
1179    already in R0 (fixnum or min/max) or a floating-point register
1180    (flonum) and the second argument is in R1 (fixnum or min/max) or a
1181    floating-point register (flonum).
1182    For unsafe_fx or unsafe_fl -1 means safe but specific to the type.
1183 */
1184 {
1185   GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *ref4, *refd = NULL, *refdt = NULL;
1186   GC_CAN_IGNORE jit_insn *refslow;
1187   int reversed = 0;
1188   int has_fixnum_fast = 1, has_flonum_fast = 1;
1189   int inlined_flonum1, inlined_flonum2;
1190 
1191   LOG_IT(("inlined %s\n", rator ? ((Scheme_Primitive_Proc *)rator)->name : "???"));
1192 
1193   if (unsafe_fx < 0) {
1194     unsafe_fx = 0;
1195     has_flonum_fast = 0;
1196   }
1197 
1198   if (unsafe_fl) {
1199     if (!rand) {
1200       inlined_flonum1 = inlined_flonum2 = 1;
1201     } else {
1202       if (scheme_can_unbox_inline(rand, 5, JIT_FPUSEL_FPR_NUM(extfl)-2, unsafe_fl > 0, extfl))
1203         inlined_flonum1 = 1;
1204       else
1205         inlined_flonum1 = 0;
1206       if (!rand2 || scheme_can_unbox_inline(rand2, 5, JIT_FPUSEL_FPR_NUM(extfl)-3, unsafe_fl > 0, extfl))
1207         inlined_flonum2 = 1;
1208       else
1209         inlined_flonum2 = 0;
1210     }
1211   } else
1212     inlined_flonum1 = inlined_flonum2 = 0;
1213 
1214   if (unsafe_fl
1215 #ifndef USE_FLONUM_UNBOXING
1216       && inlined_flonum1 && inlined_flonum2
1217 #endif
1218       ) {
1219     /* Unboxed (and maybe unsafe) floating-point ops. */
1220     int args_unboxed = (((arith != ARITH_MIN) && (arith != ARITH_MAX)) || rand);
1221     int flonum_depth, fl_reversed = 0, can_direct1, can_direct2;
1222 
1223     if (inlined_flonum1 && inlined_flonum2 && (arith != ARITH_INEX_EX) && (arith != ARITH_INEX_TRUNC_EX))
1224       /* safe can be implemented as unsafe */
1225       unsafe_fl = 1;
1226 
1227     if (!args_unboxed && rand)
1228       scheme_signal_error("internal error: invalid mode");
1229 
1230     if (inlined_flonum1 && !inlined_flonum2 && can_reorder_unboxing(rand, rand2, extfl)) {
1231       GC_CAN_IGNORE Scheme_Object *tmp;
1232       reversed = !reversed;
1233       cmp = -cmp;
1234       fl_reversed = 1;
1235       tmp = rand;
1236       rand = rand2;
1237       rand2 = tmp;
1238       inlined_flonum1 = 0;
1239       inlined_flonum2 = 1;
1240     }
1241 
1242     if (inlined_flonum1)
1243       can_direct1 = 2;
1244     else
1245       can_direct1 = scheme_can_unbox_directly(rand, extfl);
1246     if (inlined_flonum2)
1247       can_direct2 = 2;
1248     else
1249       can_direct2 = scheme_can_unbox_directly(rand2, extfl);
1250 
1251     if (args_unboxed) {
1252       jitter->unbox++;
1253       MZ_FPUSEL_STMT_ONLY(extfl, jitter->unbox_extflonum++);
1254     }
1255     if (!rand) {
1256       CHECK_LIMIT();
1257       if (args_unboxed)
1258         flonum_depth = 2;
1259       else
1260         flonum_depth = 0;
1261     } else if (!rand2) {
1262       mz_runstack_skipped(jitter, 1);
1263       scheme_generate_unboxed(rand, jitter, can_direct1, (unsafe_fl > 0));
1264       CHECK_LIMIT();
1265       mz_runstack_unskipped(jitter, 1);
1266       if (!can_direct1 && (unsafe_fl <= 0)) {
1267         check_flonum_result(jitter, JIT_R0, sjc.fl1_fail_code, rator, extfl);
1268         CHECK_LIMIT();
1269       }
1270       flonum_depth = 1;
1271     } else {
1272 #ifdef USE_FLONUM_UNBOXING
1273       int flostack = 0, flopos = 0;
1274 #endif
1275       mz_runstack_skipped(jitter, 2);
1276       scheme_generate_unboxed(rand, jitter, can_direct1, (unsafe_fl > 0));
1277       CHECK_LIMIT();
1278       if (!(inlined_flonum1 && inlined_flonum2)) {
1279         if (!can_direct1 && (unsafe_fl <= 0)) {
1280           mz_pushr_p(JIT_R0);
1281         } else if (!inlined_flonum2) {
1282 #ifdef USE_FLONUM_UNBOXING
1283           flostack = scheme_mz_flostack_save(jitter, &flopos);
1284           --jitter->unbox_depth;
1285           scheme_generate_flonum_local_unboxing(jitter, 0, 0, extfl);
1286           CHECK_LIMIT();
1287 #endif
1288         }
1289       }
1290       scheme_generate_unboxed(rand2, jitter, can_direct2, (unsafe_fl > 0));
1291       CHECK_LIMIT();
1292       if (!(inlined_flonum1 && inlined_flonum2)) {
1293         if ((can_direct1 || (unsafe_fl > 0)) && !inlined_flonum2) {
1294 #ifdef USE_FLONUM_UNBOXING
1295           int fpr0 USED_ONLY_SOMETIMES;
1296           fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
1297           mz_ld_fppush(fpr0, jitter->flostack_offset, extfl);
1298           scheme_mz_flostack_restore(jitter, flostack, flopos, 1, 1);
1299           CHECK_LIMIT();
1300           jitter->unbox_depth++;
1301 #endif
1302         }
1303         if (!can_direct2 && (unsafe_fl <= 0)) {
1304           jit_movr_p(JIT_R1, JIT_R0);
1305           if (!can_direct1) {
1306             mz_popr_p(JIT_R0);
1307             check_flonum_result(jitter, JIT_R0, sjc.fl2rr_fail_code[fl_reversed], rator, extfl);
1308             CHECK_LIMIT();
1309           }
1310           check_flonum_result(jitter, JIT_R1, sjc.fl2fr_fail_code[fl_reversed], rator, extfl);
1311           CHECK_LIMIT();
1312         } else {
1313           if (!can_direct1 && (unsafe_fl <= 0)) {
1314             mz_popr_p(JIT_R0);
1315             check_flonum_result(jitter, JIT_R0, sjc.fl2rf_fail_code[fl_reversed], rator, extfl);
1316             CHECK_LIMIT();
1317           }
1318           if (!(can_direct1 || (unsafe_fl > 0)) || !inlined_flonum2) {
1319             cmp = -cmp;
1320             reversed = !reversed;
1321           }
1322         }
1323       }
1324       mz_runstack_unskipped(jitter, 2);
1325       flonum_depth = 2;
1326     }
1327     if (args_unboxed) {
1328       MZ_FPUSEL_STMT_ONLY(extfl, --jitter->unbox_extflonum);
1329       --jitter->unbox;
1330     }
1331     jitter->unbox_depth -= flonum_depth;
1332     if (!jitter->unbox && jitter->unbox_depth && rand)
1333       scheme_signal_error("internal error: broken unbox depth");
1334     if (for_branch
1335         || (arith == ARITH_INEX_EX)        /* has slow path */
1336         || (arith == ARITH_INEX_TRUNC_EX)) /* could have slow path */
1337       mz_rs_sync(); /* needed if arguments were unboxed */
1338 
1339     generate_float_point_arith(jitter, rator, arith, cmp, reversed, !!rand2, 0,
1340                                &refd, &refdt, for_branch, branch_short,
1341                                ((arith == ARITH_INEX_EX) || (arith == ARITH_INEX_TRUNC_EX)) ? (unsafe_fl > 0) : 1,
1342                                args_unboxed, jitter->unbox, dest, extfl);
1343     CHECK_LIMIT();
1344     ref3 = NULL;
1345     ref = NULL;
1346     ref4 = NULL;
1347 
1348     if (((arith == ARITH_INEX_EX) || (arith == ARITH_INEX_TRUNC_EX)) && (unsafe_fl < 1)) {
1349       /* need a slow path */
1350       if (args_unboxed) {
1351         MZ_FPUSEL_STMT(extfl,
1352                        (void)jit_calli(sjc.box_extflonum_from_reg_code),
1353                        (void)jit_calli(sjc.box_flonum_from_reg_code));
1354       }
1355       generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, orig_args, reversed, arith, 0, 0, dest);
1356       /* assert: !ref4, since not for_branch */
1357       __START_SHORT_JUMPS__(branch_short);
1358       mz_patch_ucbranch(ref);
1359       mz_patch_ucbranch(refdt);
1360       __END_SHORT_JUMPS__(branch_short);
1361     }
1362 
1363     __START_SHORT_JUMPS__(branch_short);
1364   } else {
1365     mz_jit_unbox_state ubs;
1366 
1367     if (unsafe_fl < 0) {
1368       has_fixnum_fast = 0;
1369       unsafe_fl = 0;
1370     }
1371 
1372     /* While generating a fixnum op, don't unbox! */
1373     scheme_mz_unbox_save(jitter, &ubs);
1374 
1375     if (!rand) {
1376       /* generating for an nary operation; first arg in R0,
1377          second in R1 */
1378       reversed = 1;
1379       cmp = -cmp;
1380       refslow = overflow_refslow;
1381       refd = NULL;
1382       refdt = NULL;
1383       ref3 = NULL;
1384       ref = NULL;
1385       ref4 = NULL;
1386     } else {
1387       if (!unsafe_fl
1388           && ((!arith && (cmp != CMP_BIT))
1389               || (arith == ARITH_MIN)
1390               || (arith == ARITH_MAX)
1391               || (arith == ARITH_AND)
1392               || (arith == ARITH_IOR)
1393               || (arith == ARITH_XOR))) {
1394         /* No slow path necessary for fixnum arguments. */
1395         if (scheme_jit_is_fixnum(rand) && (!rand2 || scheme_jit_is_fixnum(rand2))) {
1396           unsafe_fx = 1;
1397         }
1398       }
1399 
1400       if (rand2) {
1401         if (SCHEME_INTP(rand2)
1402             && SCHEME_INT_SMALL_ENOUGH(rand2)
1403             && ((arith == ARITH_SH)
1404                 ? ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1405                    && (SCHEME_INT_VAL(rand2) >= -MAX_TRY_SHIFT))
1406                 : (((arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH))
1407                    ? ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1408                       && (SCHEME_INT_VAL(rand2) >= 0))
1409                    : 1))
1410             && ((cmp != CMP_BIT)
1411                 || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1412                     && (SCHEME_INT_VAL(rand2) >= 0)))) {
1413           /* Second is constant, so use constant mode.
1414              For arithmetic shift, only do this if the constant
1415              is in range. */
1416           v = SCHEME_INT_VAL(rand2);
1417           rand2 = NULL;
1418         } else if (SCHEME_INTP(rand)
1419                    && SCHEME_INT_SMALL_ENOUGH(rand)
1420                    && (arith != ARITH_SH) && (arith != ARITH_LSH) && (arith != ARITH_LSH_WRAP) && (arith != ARITH_RSH)
1421                    && (cmp != CMP_BIT)) {
1422           /* First is constant; swap argument order and use constant mode. */
1423           v = SCHEME_INT_VAL(rand);
1424           cmp = -cmp;
1425           rand = rand2;
1426           rand2 = NULL;
1427           reversed = 1;
1428         } else if ((scheme_ok_to_move_local(rand2)
1429                     || SCHEME_INTP(rand2))
1430                    && !(scheme_ok_to_move_local(rand)
1431                         || SCHEME_INTP(rand))) {
1432           /* Second expression is side-effect-free, unlike the first;
1433              swap order and use the fast path for when the first arg is
1434              side-effect free. */
1435           Scheme_Object *t = rand2;
1436           rand2 = rand;
1437           rand = t;
1438           cmp = -cmp;
1439           reversed = 1;
1440         }
1441       }
1442 
1443       if ((arith == ARITH_SUB) && (orig_args == 1) && !v) {
1444         /* Unary subtract */
1445         reversed = 1;
1446       }
1447 
1448       if (!unsafe_fl && !unsafe_fx
1449           && (scheme_jit_is_fixnum(rand) && (!rand2 || scheme_jit_is_fixnum(rand2)))) {
1450         /* Since we'll get a fix num, skip flonum variant */
1451         has_flonum_fast = 0;
1452       }
1453 
1454       if (rand2) {
1455         int dir;
1456         dir = scheme_generate_two_args(rand, rand2, jitter, 0, orig_args);
1457         CHECK_LIMIT();
1458         /* Since we want rand in R1 and rand2 in R0, direction is backwards: */
1459         if (dir > 0) {
1460           Scheme_Object *t = rand2;
1461           rand2 = rand;
1462           rand = t;
1463           cmp = -cmp;
1464           reversed = !reversed;
1465         }
1466       } else {
1467         mz_runstack_skipped(jitter, orig_args);
1468         scheme_generate_non_tail(rand, jitter, 0, 1, 0); /* sync'd later */
1469         CHECK_LIMIT();
1470         mz_runstack_unskipped(jitter, orig_args);
1471         CHECK_RUNSTACK_OVERFLOW();
1472       }
1473       /* not sync'd... */
1474 
1475       /* two arguments: rand2 in R0, and rand in R1 */
1476       /* one argument: rand in R0 */
1477 
1478       if (rand2) {
1479         int va;
1480 
1481         if (scheme_jit_is_fixnum(rand)) {
1482           if (scheme_jit_is_fixnum(rand2))
1483             va = -1; /* no check needed */
1484           else
1485             va = JIT_R0; /* check only rand2 */
1486         } else if (scheme_jit_is_fixnum(rand2)) {
1487           va = JIT_R1; /* check only rand */
1488         } else {
1489           if (!unsafe_fx && !unsafe_fl) {
1490             /* check both fixnum bits at once by ANDing into R2: */
1491             jit_andr_ul(JIT_R2, JIT_R0, JIT_R1);
1492             va = JIT_R2;
1493           } else
1494             va =  -1;
1495         }
1496 
1497         if (!unsafe_fx && !unsafe_fl) {
1498           mz_rs_sync();
1499 
1500           __START_TINY_JUMPS_IF_COMPACT__(1);
1501           if (va == -1)
1502             ref2 = jit_jmpi(jit_forward());
1503           else
1504             ref2 = jit_bmsi_ul(jit_forward(), va, 0x1);
1505           __END_TINY_JUMPS_IF_COMPACT__(1);
1506         } else {
1507           ref2 = NULL;
1508           if (for_branch) mz_rs_sync();
1509         }
1510 
1511         if (unsafe_fl || (!unsafe_fx && !SCHEME_INTP(rand)
1512                           && has_flonum_fast
1513                           && can_fast_double(arith, cmp, 1))) {
1514           /* Maybe they're both doubles... */
1515           if (unsafe_fl) mz_rs_sync();
1516           generate_float_point_arith(jitter, rator, arith, cmp, reversed, 1, 0, &refd, &refdt,
1517                                      for_branch, branch_short, unsafe_fl, 0, ubs.unbox, dest, extfl);
1518           CHECK_LIMIT();
1519         }
1520 
1521         if (!unsafe_fx && !unsafe_fl) {
1522           if (!has_fixnum_fast) {
1523             __START_TINY_JUMPS_IF_COMPACT__(1);
1524             if (va == -1)
1525               mz_patch_ucbranch(ref2);
1526             else
1527               mz_patch_branch(ref2);
1528             __END_TINY_JUMPS_IF_COMPACT__(1);
1529           }
1530 
1531           /* Slow path */
1532           refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short,
1533                                              orig_args, reversed, arith, 0, 0, dest);
1534 
1535           if (has_fixnum_fast) {
1536             __START_TINY_JUMPS_IF_COMPACT__(1);
1537             if (va == -1)
1538               mz_patch_ucbranch(ref2);
1539             else
1540               mz_patch_branch(ref2);
1541             __END_TINY_JUMPS_IF_COMPACT__(1);
1542           }
1543         } else {
1544           refslow = overflow_refslow;
1545           ref = NULL;
1546           ref4 = NULL;
1547         }
1548         CHECK_LIMIT();
1549       } else {
1550         /* Only one argument: */
1551         int is_fx;
1552 
1553         is_fx = scheme_jit_is_fixnum(rand);
1554 
1555         if (!unsafe_fx && !unsafe_fl) {
1556           mz_rs_sync();
1557           __START_TINY_JUMPS_IF_COMPACT__(1);
1558           if (is_fx)
1559             ref2 = jit_jmpi(jit_forward());
1560           else
1561             ref2 = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1);
1562           __END_TINY_JUMPS_IF_COMPACT__(1);
1563         } else {
1564           if (for_branch) mz_rs_sync();
1565           ref2 = NULL;
1566         }
1567 
1568         if (unsafe_fl
1569             || ((orig_args != 2) /* <- heuristic: we could generate code when an exact argument is
1570                                     given, but the extra FP code is probably not worthwhile. */
1571                 && !unsafe_fx
1572                 && has_flonum_fast
1573                 && can_fast_double(arith, cmp, 0)
1574                 /* watch out: divide by 0 is special: */
1575                 && ((arith != ARITH_DIV) || v || reversed))) {
1576           /* Maybe it's a double... */
1577           generate_float_point_arith(jitter, rator, arith, cmp, reversed, 0, v, &refd, &refdt,
1578                                      for_branch, branch_short, unsafe_fl, 0, ubs.unbox, dest, extfl);
1579           CHECK_LIMIT();
1580         }
1581 
1582         if (!unsafe_fx && !unsafe_fl) {
1583           if (!has_fixnum_fast) {
1584             __START_TINY_JUMPS_IF_COMPACT__(1);
1585             if (is_fx)
1586               mz_patch_ucbranch(ref2);
1587             else
1588               mz_patch_branch(ref2);
1589             __END_TINY_JUMPS_IF_COMPACT__(1);
1590           }
1591 
1592           /* Slow path */
1593           refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short,
1594                                              orig_args, reversed, arith, 1, v, dest);
1595 
1596           if (has_fixnum_fast) {
1597             __START_TINY_JUMPS_IF_COMPACT__(1);
1598             if (is_fx)
1599               mz_patch_ucbranch(ref2);
1600             else
1601               mz_patch_branch(ref2);
1602             __END_TINY_JUMPS_IF_COMPACT__(1);
1603           }
1604         } else {
1605           refslow = overflow_refslow;
1606           ref = NULL;
1607           ref4 = NULL;
1608         }
1609       }
1610 
1611       CHECK_LIMIT();
1612     }
1613 
1614     __START_SHORT_JUMPS__(branch_short);
1615 
1616     if (!unsafe_fl) {
1617       if (arith) {
1618         if (((arith == ARITH_DIV) || (arith == ARITH_QUOT) || (arith == ARITH_REM) || (arith == ARITH_MOD)) && !rand2) {
1619           (void)jit_movi_p(JIT_R1, scheme_make_integer(v));
1620           rand2 = scheme_true;
1621           reversed = !reversed;
1622         }
1623 
1624         if (rand2) {
1625           /* First arg is in JIT_R1, second is in JIT_R0 */
1626           if ((arith == ARITH_ADD) || (arith == ARITH_ADD_WRAP)) {
1627             jit_andi_ul(JIT_R2, JIT_R1, (~0x1));
1628             if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_ADD_WRAP))
1629               jit_addr_l(dest, JIT_R2, JIT_R0);
1630             else {
1631               (void)jit_boaddr_l(refslow, JIT_R2, JIT_R0);
1632               jit_movr_p(dest, JIT_R2);
1633             }
1634           } else if ((arith == ARITH_SUB) || (arith == ARITH_SUB_WRAP)) {
1635             if (reversed) {
1636               jit_movr_p(JIT_R2, JIT_R0);
1637               if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1638                 jit_subr_l(JIT_R2, JIT_R2, JIT_R1);
1639               else
1640                 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R1);
1641             } else {
1642               jit_movr_p(JIT_R2, JIT_R1);
1643               if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1644                 (void)jit_subr_l(JIT_R2, JIT_R2, JIT_R0);
1645               else
1646                 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0);
1647             }
1648             jit_ori_ul(dest, JIT_R2, 0x1);
1649           } else if ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP)) {
1650             jit_andi_ul(JIT_R2, JIT_R1, (~0x1));
1651             jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1652             if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_MUL_WRAP))
1653               jit_mulr_l(JIT_V1, JIT_V1, JIT_R2);
1654             else
1655               (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2);
1656             jit_ori_ul(dest, JIT_V1, 0x1);
1657           } else if ((arith == ARITH_DIV) || (arith == ARITH_QUOT) || (arith == ARITH_REM) || (arith == ARITH_MOD)) {
1658             if (reversed) {
1659               jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1660               jit_rshi_l(JIT_R2, JIT_R1, 0x1);
1661             } else {
1662               jit_rshi_l(JIT_R2, JIT_R0, 0x1);
1663               jit_rshi_l(JIT_V1, JIT_R1, 0x1);
1664             }
1665             if (!unsafe_fx || overflow_refslow)
1666               (void)jit_beqi_l(refslow, JIT_R2, 0);
1667 
1668             if (arith == ARITH_MOD) {
1669               generate_modulo_setup(jitter, branch_short, JIT_V1, JIT_R2);
1670               CHECK_LIMIT();
1671             }
1672             if ((arith == ARITH_DIV) || (arith == ARITH_QUOT))
1673               jit_divr_l(JIT_R0, JIT_V1, JIT_R2);
1674             else
1675               jit_modr_l(JIT_R0, JIT_V1, JIT_R2);
1676 
1677             if (arith == ARITH_DIV) {
1678               GC_CAN_IGNORE jit_insn *refx, *refz;
1679               __START_INNER_TINY__(branch_short);
1680               /* watch out for negation of most neg<ative fixnum,
1681                  which is a positive number too big for a fixnum */
1682               refz = jit_beqi_p(jit_forward(), JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 2))));
1683               __END_INNER_TINY__(branch_short);
1684               if (reversed)
1685                 jit_mulr_l(JIT_R2, JIT_R0, JIT_R2);
1686               else
1687                 jit_mulr_l(JIT_V1, JIT_R0, JIT_V1);
1688               __START_INNER_TINY__(branch_short);
1689               refx = jit_beqr_l(jit_forward(), JIT_R2, JIT_V1);
1690               mz_patch_branch(refz);
1691               __END_INNER_TINY__(branch_short);
1692               /* restore R0 argument: */
1693               if (reversed) {
1694                 jit_fixnum_l(JIT_R0, JIT_V1);
1695               } else {
1696                 jit_fixnum_l(JIT_R0, JIT_R2);
1697               }
1698               (void)jit_jmpi(refslow);
1699               __START_INNER_TINY__(branch_short);
1700               mz_patch_branch(refx);
1701               __END_INNER_TINY__(branch_short);
1702             } else if (arith == ARITH_MOD) {
1703               GC_CAN_IGNORE jit_insn *refx, *refy;
1704               __START_INNER_TINY__(branch_short);
1705               refy = jit_beqi_l(jit_forward(), JIT_R0, 0);
1706               refx = jit_bmci_l(jit_forward(), JIT_R1, 0x1);
1707               jit_subr_l(JIT_R0, JIT_R2, JIT_R0);
1708               mz_patch_branch(refx);
1709               refx = jit_bmci_l(jit_forward(), JIT_R1, 0x2);
1710               jit_negr_l(JIT_R0, JIT_R0);
1711               mz_patch_branch(refx);
1712               mz_patch_branch(refy);
1713               __END_INNER_TINY__(branch_short);
1714             } else if (arith == ARITH_QUOT) {
1715               /* watch out for negation of most negative fixnum,
1716                  which is a positive number too big for a fixnum */
1717               if (!unsafe_fx || overflow_refslow) {
1718                 GC_CAN_IGNORE jit_insn *refx;
1719                 __START_INNER_TINY__(branch_short);
1720                 refx = jit_bnei_p(jit_forward(), JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 2))));
1721                 __END_INNER_TINY__(branch_short);
1722                 /* first argument must have been most negative fixnum,
1723                    second argument must have been -1: */
1724                 if (reversed)
1725                   (void)jit_movi_p(JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 1)) | 0x1));
1726                 else
1727                   (void)jit_movi_p(JIT_R0, scheme_make_integer(-1));
1728                 (void)jit_jmpi(refslow);
1729                 __START_INNER_TINY__(branch_short);
1730                 mz_patch_branch(refx);
1731                 __END_INNER_TINY__(branch_short);
1732               }
1733             }
1734             jit_fixnum_l(dest, JIT_R0);
1735           } else if (arith == ARITH_AND) {
1736             /* and */
1737             jit_andr_ul(dest, JIT_R1, JIT_R0);
1738           } else if (arith == ARITH_IOR) {
1739             /* ior */
1740             jit_orr_ul(dest, JIT_R1, JIT_R0);
1741           } else if (arith == ARITH_XOR) {
1742             /* xor */
1743             jit_andi_ul(JIT_R0, JIT_R0, (~0x1));
1744             jit_xorr_ul(dest, JIT_R1, JIT_R0);
1745           } else if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH)) {
1746             /* arithmetic-shift
1747                This is a lot of code, but if you're using
1748                arithmetic-shift, then you probably want it. */
1749             int v1 = (reversed ? JIT_R0 : JIT_R1);
1750             int v2 = (reversed ? JIT_R1 : JIT_R0);
1751             GC_CAN_IGNORE jit_insn *refi, *refc;
1752 
1753             if (arith == ARITH_SH)
1754               refi = jit_bgei_l(jit_forward(), v2, (intptr_t)scheme_make_integer(0));
1755             else {
1756               refi = NULL;
1757               if (!unsafe_fx || overflow_refslow)
1758                 (void)jit_blti_l(refslow, v2, (intptr_t)scheme_make_integer(0));
1759             }
1760 
1761             if ((arith == ARITH_SH) || (arith == ARITH_RSH)) {
1762               /* Right shift */
1763               if (!unsafe_fx || overflow_refslow) {
1764                 /* check for a small enough shift */
1765                 if (arith == ARITH_RSH) {
1766                   (void)jit_bgti_l(refslow, v2, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1767                 } else {
1768                   (void)jit_blti_l(refslow, v2, (intptr_t)scheme_make_integer(-MAX_TRY_SHIFT));
1769                 }
1770               }
1771               if (arith == ARITH_RSH)
1772                 jit_rshi_l(JIT_V1, v2, 0x1);
1773               else {
1774                 jit_notr_l(JIT_V1, v2);
1775                 jit_rshi_l(JIT_V1, JIT_V1, 0x1);
1776                 jit_addi_l(JIT_V1, JIT_V1, 0x1);
1777               }
1778               CHECK_LIMIT();
1779 #ifdef MZ_USE_JIT_I386
1780               /* Can't shift from _ECX */
1781               jit_movr_l(JIT_R2, v1);
1782               jit_rshr_l(JIT_R2, JIT_R2, JIT_V1);
1783 #else
1784               jit_rshr_l(JIT_R2, v1, JIT_V1);
1785 #endif
1786               jit_ori_l(dest, JIT_R2, 0x1);
1787               if (!unsafe_fx || overflow_refslow)
1788                 refc = jit_jmpi(jit_forward());
1789               else
1790                 refc = NULL;
1791               CHECK_LIMIT();
1792             } else
1793               refc = NULL;
1794 
1795             /* Left shift */
1796             if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP)) {
1797               if (refi)
1798                 mz_patch_branch(refi);
1799               if (!unsafe_fx || overflow_refslow)
1800                 (void)jit_bgti_l(refslow, v2, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1801               jit_rshi_l(JIT_V1, v2, 0x1);
1802               jit_andi_l(v1, v1, (~0x1));
1803 #ifdef MZ_USE_JIT_I386
1804               /* Can't shift from _ECX */
1805               jit_movr_l(JIT_R2, v1);
1806               jit_lshr_l(JIT_R2, JIT_R2, JIT_V1);
1807 #else
1808               jit_lshr_l(JIT_R2, v1, JIT_V1);
1809 #endif
1810               CHECK_LIMIT();
1811               if ((!unsafe_fx || overflow_refslow) && (arith != ARITH_LSH_WRAP)) {
1812                 /* If shifting back right produces a different result, that's overflow... */
1813                 jit_rshr_l(JIT_V1, JIT_R2, JIT_V1);
1814                 /* !! In case we go refslow, it needs to add back tag to v1 !! */
1815                 (void)jit_bner_p(refslow, JIT_V1, v1);
1816               }
1817               /* No overflow (or we don't care) */
1818               jit_ori_l(dest, JIT_R2, 0x1);
1819             }
1820 
1821             if (refc)
1822               mz_patch_ucbranch(refc);
1823           } else if (arith == ARITH_MIN) {
1824             /* min */
1825             GC_CAN_IGNORE jit_insn *refc;
1826             __START_INNER_TINY__(branch_short);
1827             refc = jit_bltr_l(jit_forward(), JIT_R0, JIT_R1);
1828             jit_movr_l(JIT_R0, JIT_R1);
1829             mz_patch_branch(refc);
1830             jit_movr_p(dest, JIT_R0);
1831             __END_INNER_TINY__(branch_short);
1832           } else if (arith == ARITH_MAX) {
1833             /* max */
1834             GC_CAN_IGNORE jit_insn *refc;
1835             __START_INNER_TINY__(branch_short);
1836             refc = jit_bgtr_l(jit_forward(), JIT_R0, JIT_R1);
1837             jit_movr_l(JIT_R0, JIT_R1);
1838             mz_patch_branch(refc);
1839             jit_movr_p(dest, JIT_R0);
1840             __END_INNER_TINY__(branch_short);
1841           }
1842         } else {
1843           /* Non-constant arg is in JIT_R0 */
1844           if ((arith == ARITH_ADD) || (arith == ARITH_ADD_WRAP)) {
1845             if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_ADD_WRAP))
1846               jit_addi_l(dest, JIT_R0, (uintptr_t)v << 1);
1847             else {
1848               jit_movr_p(JIT_R2, JIT_R0);
1849               (void)jit_boaddi_l(refslow, JIT_R2, (uintptr_t)v << 1);
1850               jit_movr_p(dest, JIT_R2);
1851             }
1852           } else if ((arith == ARITH_SUB) || (arith == ARITH_SUB_WRAP)) {
1853             if (reversed) {
1854               (void)jit_movi_p(JIT_R2, scheme_make_integer(v));
1855               if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1856                 jit_subr_l(JIT_R2, JIT_R2, JIT_R0);
1857               else
1858                 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0);
1859               jit_addi_ul(dest, JIT_R2, 0x1);
1860             } else {
1861               if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1862                 jit_subi_l(dest, JIT_R0, (uintptr_t)v << 1);
1863               else {
1864                 jit_movr_p(JIT_R2, JIT_R0);
1865                 (void)jit_bosubi_l(refslow, JIT_R2, (uintptr_t)v << 1);
1866                 jit_movr_p(dest, JIT_R2);
1867               }
1868             }
1869           } else if ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP)) {
1870             if (v == 1) {
1871               /* R0 already is the answer */
1872               jit_movr_p(dest, JIT_R0);
1873             } else if (v == 0) {
1874               (void)jit_movi_p(dest, scheme_make_integer(0));
1875             } else {
1876               (void)jit_movi_l(JIT_R2, ((intptr_t)scheme_make_integer(v) & (~0x1)));
1877               jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1878               if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_MUL_WRAP))
1879                 jit_mulr_l(JIT_V1, JIT_V1, JIT_R2);
1880               else {
1881                 (void)jit_movi_p(JIT_R1, scheme_make_integer(v)); /* for slow path */
1882                 (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2);
1883               }
1884               jit_ori_ul(dest, JIT_V1, 0x1);
1885             }
1886           } else {
1887             if (arith == ARITH_AND) {
1888               /* and */
1889               intptr_t l = (intptr_t)scheme_make_integer(v);
1890               jit_andi_ul(dest, JIT_R0, l);
1891             } else if (arith == ARITH_IOR) {
1892               /* ior */
1893               intptr_t l = (intptr_t)scheme_make_integer(v);
1894               jit_ori_ul(dest, JIT_R0, l);
1895             } else if (arith == ARITH_XOR) {
1896               /* xor */
1897               jit_xori_ul(dest, JIT_R0, (uintptr_t)v << 1);
1898             } else if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH)) {
1899               /* arithmetic-shift */
1900               /* We only get here when v is in range, such as between -MAX_TRY_SHIFT and
1901                  MAX_TRY_SHIFT inclusive for ARITH_SH. */
1902               if ((v <= 0) || (arith == ARITH_RSH)) {
1903                 int amt = v;
1904                 if (arith != ARITH_RSH)
1905                   amt = -amt;
1906                 jit_rshi_l(JIT_R0, JIT_R0, amt);
1907                 jit_ori_l(dest, JIT_R0, 0x1);
1908               } else {
1909                 jit_andi_l(JIT_R0, JIT_R0, (~0x1));
1910                 jit_lshi_l(JIT_R2, JIT_R0, v);
1911                 if ((!unsafe_fx || overflow_refslow) && (arith != ARITH_LSH_WRAP)) {
1912                   /* If shifting back right produces a different result, that's overflow... */
1913                   jit_rshi_l(JIT_V1, JIT_R2, v);
1914                   /* !! In case we go refslow, it nseed to add back tag to JIT_R0 !! */
1915                   (void)jit_bner_p(refslow, JIT_V1, JIT_R0);
1916                 }
1917                 /* No overflow (or we don't care) */
1918                 jit_ori_l(dest, JIT_R2, 0x1);
1919               }
1920             } else if (arith == ARITH_NOT) {
1921               jit_notr_ul(JIT_R0, JIT_R0);
1922               jit_ori_ul(dest, JIT_R0, 0x1);
1923             } else if (arith == ARITH_MIN) {
1924               /* min */
1925               GC_CAN_IGNORE jit_insn *refc;
1926               __START_INNER_TINY__(branch_short);
1927               refc = jit_blti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
1928               jit_movi_l(JIT_R0, (intptr_t)scheme_make_integer(v));
1929               mz_patch_branch(refc);
1930               jit_movr_p(dest, JIT_R0);
1931               __END_INNER_TINY__(branch_short);
1932             } else if (arith == ARITH_MAX) {
1933               /* max */
1934               GC_CAN_IGNORE jit_insn *refc;
1935               __START_INNER_TINY__(branch_short);
1936               refc = jit_bgti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
1937               jit_movi_l(JIT_R0, (intptr_t)scheme_make_integer(v));
1938               mz_patch_branch(refc);
1939               jit_movr_p(dest, JIT_R0);
1940               __END_INNER_TINY__(branch_short);
1941             } else if (arith == ARITH_ABS) {
1942               /* abs */
1943               GC_CAN_IGNORE jit_insn *refc;
1944               __START_INNER_TINY__(branch_short);
1945               refc = jit_bgei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(0));
1946               __END_INNER_TINY__(branch_short);
1947               /* watch out for most negative fixnum! */
1948               if (!unsafe_fx || overflow_refslow)
1949                 (void)jit_beqi_p(refslow, JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 1)) | 0x1));
1950               (void)jit_movi_p(JIT_R1, scheme_make_integer(0));
1951               jit_subr_l(JIT_R0, JIT_R1, JIT_R0);
1952               jit_ori_l(JIT_R0, JIT_R0, 0x1);
1953               __START_INNER_TINY__(branch_short);
1954               mz_patch_branch(refc);
1955               jit_movr_p(dest, JIT_R0);
1956               __END_INNER_TINY__(branch_short);
1957               CHECK_LIMIT();
1958             } else if (arith == ARITH_EX_INEX) {
1959               /* exact->inexact */
1960               int fpr0 USED_ONLY_SOMETIMES;
1961               fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
1962               jit_rshi_l(JIT_R0, JIT_R0, 1);
1963               jit_FPSEL_extr_l_xd_fppush(extfl, fpr0, JIT_R0);
1964               CHECK_LIMIT();
1965               if (!ubs.unbox) {
1966                 mz_rs_sync(); /* needed for unsafe op before allocation */
1967                 __END_SHORT_JUMPS__(branch_short);
1968                 scheme_generate_alloc_X_double(jitter, 0, dest, extfl);
1969                 __START_SHORT_JUMPS__(branch_short);
1970               } else {
1971                 jitter->unbox_depth++;
1972               }
1973               CHECK_LIMIT();
1974             } else if (arith == ARITH_INEX_EX) {
1975               /* inexact->exact */
1976               /* no work to do, since fixnum is already exact */
1977               jit_movr_p(dest, JIT_R0);
1978             }
1979           }
1980         }
1981         if (refdt)
1982           mz_patch_ucbranch(refdt);
1983         if (!unsafe_fx && !unsafe_fl)
1984           mz_patch_ucbranch(ref);
1985         ref3 = NULL;
1986       } else {
1987         /* If second is constant, first arg is in JIT_R0. */
1988         /* Otherwise, first arg is in JIT_R1, second is in JIT_R0 */
1989         /* Jump to ref3 to produce false */
1990         int rs_valid, rs_can_keep = 0;
1991 
1992         switch (cmp) {
1993         case -CMP_BIT:
1994           if (rand2) {
1995             if (!unsafe_fx || overflow_refslow) {
1996               (void)jit_blti_l(refslow, JIT_R1, 0);
1997               (void)jit_bgti_l(refslow, JIT_R1, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1998             }
1999           }
2000           break;
2001         case CMP_BIT:
2002           if (rand2) {
2003             if (!unsafe_fx || overflow_refslow) {
2004               (void)jit_blti_l(refslow, JIT_R0, 0);
2005               (void)jit_bgti_l(refslow, JIT_R0, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
2006             }
2007           }
2008           break;
2009         }
2010 
2011         /* Don't use refslow from here on */
2012 
2013         if (for_branch) {
2014           scheme_prepare_branch_jump(jitter, for_branch);
2015           CHECK_LIMIT();
2016         }
2017 
2018         rs_valid = mz_CURRENT_REG_STATUS_VALID();
2019 
2020         switch (cmp) {
2021         case CMP_ODDP:
2022           ref3 = jit_bmci_l(jit_forward(), JIT_R0, 0x2);
2023           rs_can_keep = 1;
2024           break;
2025         case -CMP_BIT:
2026           if (rand2) {
2027             jit_rshi_l(JIT_R1, JIT_R1, 1);
2028             jit_addi_l(JIT_V1, JIT_R1, 1);
2029             jit_movi_l(JIT_R2, 1);
2030             jit_lshr_l(JIT_R2, JIT_R2, JIT_V1);
2031             ref3 = jit_bmcr_l(jit_forward(), JIT_R0, JIT_R2);
2032           } else {
2033             /* shouldn't get here */
2034             scheme_signal_error("internal error: bitwise-bit-test? constant in wrong position");
2035             ref3 = NULL;
2036           }
2037           break;
2038         case CMP_LT:
2039           if (rand2) {
2040             ref3 = jit_bger_l(jit_forward(), JIT_R1, JIT_R0);
2041           } else {
2042             ref3 = jit_bgei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2043           }
2044           rs_can_keep = 1;
2045           break;
2046         case CMP_LEQ:
2047           if (rand2) {
2048             ref3 = jit_bgtr_l(jit_forward(), JIT_R1, JIT_R0);
2049           } else {
2050             ref3 = jit_bgti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2051           }
2052           rs_can_keep = 1;
2053           break;
2054         case CMP_EQUAL:
2055           if (rand2) {
2056             ref3 = jit_bner_l(jit_forward(), JIT_R1, JIT_R0);
2057           } else {
2058             ref3 = jit_bnei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2059           }
2060           rs_can_keep = 1;
2061           break;
2062         case CMP_GEQ:
2063           if (rand2) {
2064             ref3 = jit_bltr_l(jit_forward(), JIT_R1, JIT_R0);
2065           } else {
2066             ref3 = jit_blti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2067           }
2068           rs_can_keep = 1;
2069           break;
2070         case CMP_GT:
2071           if (rand2) {
2072             ref3 = jit_bler_l(jit_forward(), JIT_R1, JIT_R0);
2073           } else {
2074             ref3 = jit_blei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2075           }
2076           rs_can_keep = 1;
2077           break;
2078         default:
2079         case CMP_BIT:
2080           if (rand2) {
2081             jit_rshi_l(JIT_R0, JIT_R0, 1);
2082             jit_addi_l(JIT_R0, JIT_R0, 1);
2083             jit_movi_l(JIT_V1, 1);
2084             jit_lshr_l(JIT_R0, JIT_V1, JIT_R0);
2085             ref3 = jit_bmcr_l(jit_forward(), JIT_R1, JIT_R0);
2086           } else {
2087             ref3 = jit_bmci_l(jit_forward(), JIT_R0, (uintptr_t)1 << (v+1));
2088             rs_can_keep = 1;
2089           }
2090           break;
2091         case CMP_EVENP:
2092           ref3 = jit_bmsi_l(jit_forward(), JIT_R0, 0x2);
2093           rs_can_keep = 1;
2094           break;
2095         }
2096 
2097         mz_SET_REG_STATUS_VALID(rs_valid && rs_can_keep);
2098       }
2099     } else {
2100       ref3 = NULL;
2101     }
2102 
2103     scheme_mz_unbox_restore(jitter, &ubs);
2104   }
2105 
2106   if (!arith) {
2107     if (for_branch) {
2108       if (refdt) {
2109         scheme_add_or_patch_branch_true_uc(jitter, for_branch, refdt);
2110         CHECK_LIMIT();
2111       }
2112       if (ref4) {
2113         scheme_add_or_patch_branch_true_movi(jitter, for_branch, ref4);
2114         CHECK_LIMIT();
2115       }
2116       scheme_add_branch_false(for_branch, ref3);
2117       scheme_add_branch_false(for_branch, refd);
2118       scheme_add_branch_false_movi(for_branch, ref);
2119       scheme_branch_for_true(jitter, for_branch);
2120       CHECK_LIMIT();
2121     } else {
2122       if (refdt)
2123         mz_patch_ucbranch(refdt);
2124 
2125       (void)jit_movi_p(dest, scheme_true);
2126       __START_INNER_TINY__(branch_short);
2127       ref2 = jit_jmpi(jit_forward());
2128       __END_INNER_TINY__(branch_short);
2129       if (ref3)
2130         mz_patch_branch(ref3);
2131       if (refd)
2132         mz_patch_branch(refd);
2133       (void)jit_movi_p(dest, scheme_false);
2134       __START_INNER_TINY__(branch_short);
2135       mz_patch_ucbranch(ref2);
2136       __END_INNER_TINY__(branch_short);
2137       if (!unsafe_fx && !unsafe_fl)
2138         mz_patch_ucbranch(ref);
2139     }
2140   }
2141 
2142   __END_SHORT_JUMPS__(branch_short);
2143 
2144   return 1;
2145 }
2146 
scheme_generate_arith(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest)2147 int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
2148 			  int orig_args, int arith, int cmp, int v,
2149                           Branch_Info *for_branch, int branch_short,
2150                           int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow,
2151                           int dest)
2152 {
2153   return scheme_generate_arith_for(jitter, rator, rand, rand2,
2154                                    orig_args, arith, cmp, v,
2155                                    for_branch, branch_short,
2156                                    unsafe_fx, unsafe_fl, overflow_refslow,
2157                                    dest, 0);
2158 }
2159 
scheme_generate_extflonum_arith(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_extfl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest)2160 int scheme_generate_extflonum_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
2161                                     int orig_args, int arith, int cmp, int v,
2162                                     Branch_Info *for_branch, int branch_short,
2163                                     int unsafe_fx, int unsafe_extfl, GC_CAN_IGNORE jit_insn *overflow_refslow,
2164                                     int dest)
2165 {
2166   return scheme_generate_arith_for(jitter, rator, rand, rand2,
2167                                    orig_args, arith, cmp, v,
2168                                    for_branch, branch_short,
2169                                    unsafe_fx, unsafe_extfl, overflow_refslow,
2170                                    dest, 1);
2171 }
2172 
2173 
2174 #define MAX_NON_SIMPLE_ARGS 6
2175 
extract_nary_arg(int reg,int n,mz_jit_state * jitter,Scheme_App_Rec * app,Scheme_Object ** alt_args,int old_short_jumps)2176 static int extract_nary_arg(int reg, int n, mz_jit_state *jitter, Scheme_App_Rec *app,
2177                             Scheme_Object **alt_args, int old_short_jumps)
2178 {
2179   if (!alt_args) {
2180     jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(n));
2181     if (jitter->unbox)
2182       scheme_generate_unboxing(jitter, JIT_R0);
2183   } else if (scheme_is_constant_and_avoids_r1(app->args[n+1])) {
2184     __END_SHORT_JUMPS__(old_short_jumps);
2185     scheme_generate(app->args[n+1], jitter, 0, 0, 0, reg, NULL, NULL);
2186     CHECK_LIMIT();
2187     __START_SHORT_JUMPS__(old_short_jumps);
2188   } else {
2189     int i, j = 0;
2190     for (i = 0; i < n; i++) {
2191       if (!scheme_is_constant_and_avoids_r1(app->args[i+1]))
2192         j++;
2193     }
2194     jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(j));
2195     if (jitter->unbox)
2196       scheme_generate_unboxing(jitter, JIT_R0);
2197   }
2198   CHECK_LIMIT();
2199   return 1;
2200 }
2201 
init_nary_branches(Branch_Info * for_nary_branch,Branch_Info_Addr * addrs)2202 static void init_nary_branches(Branch_Info *for_nary_branch, Branch_Info_Addr *addrs)
2203 {
2204   memset(for_nary_branch, 0, sizeof(Branch_Info));
2205   for_nary_branch->addrs_size = 3;
2206   for_nary_branch->addrs = addrs;
2207 }
2208 
patch_nary_branches(mz_jit_state * jitter,Branch_Info * for_nary_branch,GC_CAN_IGNORE jit_insn * reffalse)2209 static void patch_nary_branches(mz_jit_state *jitter, Branch_Info *for_nary_branch, GC_CAN_IGNORE jit_insn *reffalse)
2210 {
2211   int i;
2212 
2213   for (i = for_nary_branch->addrs_count; i--; ) {
2214     if (for_nary_branch->addrs[i].mode == BRANCH_ADDR_FALSE) {
2215       if (for_nary_branch->addrs[i].kind == BRANCH_ADDR_BRANCH)
2216         mz_patch_branch_at(for_nary_branch->addrs[i].addr, reffalse);
2217       else if (for_nary_branch->addrs[i].kind == BRANCH_ADDR_MOVI)
2218         jit_patch_movi(for_nary_branch->addrs[i].addr, reffalse);
2219       else
2220         break;
2221     } else
2222       break;
2223   }
2224 
2225   if (i != -1)
2226     scheme_signal_error("internal error: unexpected branch addresses");
2227 }
2228 
scheme_generate_nary_arith(mz_jit_state * jitter,Scheme_App_Rec * app,int arith,int cmp,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,int dest)2229 int scheme_generate_nary_arith(mz_jit_state *jitter, Scheme_App_Rec *app,
2230                                int arith, int cmp, Branch_Info *for_branch, int branch_short,
2231                                int unsafe_fx, int unsafe_fl,
2232                                int dest)
2233 {
2234   int c, i, non_simple_c = 0, stack_c, use_fx = !unsafe_fl, trigger_arg = 0, use_short;
2235   Scheme_Object *non_simples[MAX_NON_SIMPLE_ARGS], **alt_args, *v;
2236   Branch_Info for_nary_branch;
2237   Branch_Info_Addr nary_addrs[3];
2238   GC_CAN_IGNORE jit_insn *refslow, *reffx, *refdone;
2239   GC_CAN_IGNORE jit_insn *reffalse = NULL, *refdone3 = NULL;
2240 #ifdef INLINE_FP_COMP
2241   int args_unboxed;
2242   GC_CAN_IGNORE jit_insn *reffl, *refdone2;
2243   int use_fl = !unsafe_fx;
2244 # define mzSET_USE_FL(x) x
2245 #else
2246 # define mzSET_USE_FL(x) /* empty */
2247 #endif
2248 
2249   if ((arith == ARITH_AND)
2250       || (arith == ARITH_IOR)
2251       || (arith == ARITH_XOR)) {
2252     /* bitwise operators are fixnum, only */
2253     mzSET_USE_FL(use_fl = 0);
2254   }
2255 
2256 #ifdef INLINE_FP_COMP
2257 # ifndef INLINE_FP_OPS
2258   if ((arith == ARITH_ADD)
2259       || (arith == ARITH_SUB)
2260       || (arith == ARITH_MUL)
2261       || (arith == ARITH_DIV)) {
2262     /* assert: unsafe_fl < 1 */
2263     use_fl = 0;
2264   }
2265 # endif
2266 #endif
2267 
2268   c = app->num_args;
2269   if (!c) {
2270     /* Constant folding would normally prevent us from getting here, but just in case */
2271     if ((arith == ARITH_ADD) || (arith == ARITH_IOR) || (arith == ARITH_XOR)) {
2272       if (!unsafe_fl)
2273         (void)jit_movi_p(dest, scheme_make_integer(0));
2274       else
2275         (void)jit_movi_p(dest, scheme_zerod);
2276       return 1;
2277     } else if (arith == ARITH_AND) {
2278       (void)jit_movi_p(dest, scheme_make_integer(-1));
2279       return 1;
2280     } else if (arith == ARITH_MUL) {
2281       if (!unsafe_fl)
2282         (void)jit_movi_p(dest, scheme_make_integer(1));
2283       else
2284         scheme_mz_load_retained(jitter, dest, scheme_make_double(1.0));
2285       return 1;
2286     }
2287   }
2288 
2289   for (i = 0; i < c; i++) {
2290     v = app->args[i+1];
2291     if (!scheme_is_constant_and_avoids_r1(v)) {
2292       if (non_simple_c < (MAX_NON_SIMPLE_ARGS-1))
2293         non_simples[1+non_simple_c] = v;
2294       non_simple_c++;
2295     }
2296     if (SCHEME_INTP(v)) {
2297       mzSET_USE_FL(use_fl = 0);
2298       if (trigger_arg == i)
2299         trigger_arg++;
2300     } else if (SCHEME_FLOATP(v)) {
2301       use_fx = 0;
2302       if (trigger_arg == i)
2303         trigger_arg++;
2304     } else if (SCHEME_TYPE(v) >= _scheme_ir_values_types_) {
2305       use_fx = 0;
2306       mzSET_USE_FL(use_fl = 0);
2307     }
2308   }
2309 
2310   if ((non_simple_c <= (MAX_NON_SIMPLE_ARGS-1)) && (non_simple_c < c)) {
2311     stack_c = non_simple_c;
2312     alt_args = non_simples;
2313     non_simples[0] = app->args[0];
2314     mz_runstack_skipped(jitter, c - stack_c);
2315   } else {
2316     stack_c = c;
2317     alt_args = NULL;
2318   }
2319 
2320   if (stack_c)
2321     scheme_generate_app(app, alt_args, stack_c, stack_c, jitter, 0, 0, 0, 2);
2322   CHECK_LIMIT();
2323   mz_rs_sync();
2324 
2325   use_short = c < 100;
2326   __START_SHORT_JUMPS__(use_short);
2327 
2328   if (trigger_arg >= c) {
2329     /* we don't expect this to happen, since constant-folding normally
2330        would have collapsed it --- but a division by zero, for example,
2331        might block constant folding */
2332     trigger_arg = 0;
2333   }
2334 
2335   extract_nary_arg(JIT_R0, trigger_arg, jitter, app, alt_args, use_short);
2336   CHECK_LIMIT();
2337 
2338   if ((unsafe_fl < 1) && (unsafe_fx < 1)) {
2339     /* trigger argument a fixnum? */
2340     reffx = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1);
2341   } else
2342     reffx = NULL;
2343 
2344 #ifdef INLINE_FP_COMP
2345   if (use_fl && (unsafe_fl < 1)) {
2346     /* First argument a flonum? */
2347     jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type);
2348     reffl = jit_beqi_i(jit_forward(), JIT_R0, scheme_double_type);
2349     CHECK_LIMIT();
2350   } else {
2351     reffl = NULL;
2352   }
2353 #endif
2354 
2355   if (!use_fx && reffx) {
2356     mz_patch_branch(reffx);
2357   }
2358 
2359   if ((unsafe_fl < 1) && (unsafe_fx < 1)) {
2360     refslow = jit_get_ip();
2361     /* slow path */
2362     if (alt_args) {
2363       /* get all args on runstack */
2364       int delta = stack_c - c;
2365       for (i = 0; i < c; i++) {
2366         if (delta) {
2367           extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2368           CHECK_LIMIT();
2369           jit_stxi_p(WORDS_TO_BYTES(i+delta), JIT_RUNSTACK, JIT_R0);
2370         } else
2371           break;
2372       }
2373       jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c));
2374     }
2375     (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)app->args[0])->prim_val);
2376     (void)jit_movi_i(JIT_R1, c);
2377     (void)jit_calli(sjc.call_original_nary_arith_code);
2378     if (alt_args) {
2379       jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c));
2380     }
2381     refdone = jit_jmpi(jit_forward());
2382   } else {
2383     refdone = NULL;
2384     refslow = NULL;
2385   }
2386 
2387   if (!arith) {
2388     GC_CAN_IGNORE jit_insn *refskip;
2389     if ((unsafe_fx > 0) || (unsafe_fl > 0)) {
2390       /* No dispatch so far, so jump to fast path to skip #f result */
2391       __START_INNER_TINY__(use_short);
2392       refskip = jit_jmpi(jit_forward());
2393       __END_INNER_TINY__(use_short);
2394     } else
2395       refskip = NULL;
2396 
2397     reffalse = jit_get_ip();
2398     (void)jit_movi_p(JIT_R0, scheme_false);
2399     refdone3 = jit_jmpi(jit_forward());
2400 
2401     if (refskip) {
2402       __START_INNER_TINY__(use_short);
2403       mz_patch_ucbranch(refskip);
2404       __END_INNER_TINY__(use_short);
2405     }
2406   } else {
2407     reffalse = NULL;
2408   }
2409 
2410 #ifdef INLINE_FP_COMP
2411   if (use_fl) {
2412     /* Flonum branch: */
2413     if (unsafe_fl < 1) {
2414       mz_patch_branch(reffl);
2415       for (i = 0; i < c; i++) {
2416         if (i != trigger_arg) {
2417           v = app->args[i+1];
2418           if (!SCHEME_FLOATP(v)) {
2419             extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2420             (void)jit_bmsi_ul(refslow, JIT_R0, 0x1);
2421             jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type);
2422             (void)jit_bnei_i(refslow, JIT_R0, scheme_double_type);
2423             CHECK_LIMIT();
2424           }
2425         }
2426       }
2427     }
2428     /* All flonums, so inline fast flonum combination */
2429     args_unboxed = ((arith != ARITH_MIN) && (arith != ARITH_MAX)); /* no unboxing for min & max */
2430     if (args_unboxed)
2431       jitter->unbox++;
2432     extract_nary_arg(JIT_R0, 0, jitter, app, alt_args, use_short);
2433     CHECK_LIMIT();
2434     for (i = 1; i < c; i++) {
2435       if (!arith && (i > 1))
2436         extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args, use_short);
2437       extract_nary_arg((args_unboxed ? JIT_R0 : JIT_R1), i, jitter, app, alt_args, use_short);
2438       if ((i == c - 1) && args_unboxed) --jitter->unbox; /* box last result */
2439       if (!arith) init_nary_branches(&for_nary_branch, nary_addrs);
2440       __END_SHORT_JUMPS__(use_short);
2441       scheme_generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0,
2442                             !arith ? &for_nary_branch : NULL, use_short, 0, 1, NULL,
2443                             JIT_R0);
2444       __START_SHORT_JUMPS__(use_short);
2445       if (!arith) patch_nary_branches(jitter, &for_nary_branch, reffalse);
2446       CHECK_LIMIT();
2447     }
2448     if (use_fx) {
2449       refdone2 = jit_jmpi(jit_forward());
2450     } else {
2451       refdone2 = NULL;
2452     }
2453   } else {
2454     refdone2 = NULL;
2455   }
2456 #endif
2457 
2458   if (use_fx) {
2459     /* Fixnum branch */
2460     if (unsafe_fx < 1) {
2461       mz_patch_branch(reffx);
2462       for (i = 0; i < c; i++) {
2463         if (i != trigger_arg) {
2464           v = app->args[i+1];
2465           if (!SCHEME_INTP(v)) {
2466             extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2467             CHECK_LIMIT();
2468             (void)jit_bmci_ul(refslow, JIT_R0, 0x1);
2469             CHECK_LIMIT();
2470           }
2471         }
2472       }
2473     }
2474     /* All fixnums, so inline fast fixnum combination;
2475        on overflow, bail out to refslow. */
2476     extract_nary_arg(JIT_R0, 0, jitter, app, alt_args, use_short);
2477     for (i = 1; i < c; i++) {
2478       if (!arith && (i > 1))
2479         extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args, use_short);
2480       extract_nary_arg(JIT_R1, i, jitter, app, alt_args, use_short);
2481       CHECK_LIMIT();
2482       if (!arith) init_nary_branches(&for_nary_branch, nary_addrs);
2483       __END_SHORT_JUMPS__(use_short);
2484       scheme_generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0,
2485                             !arith ? &for_nary_branch : NULL, use_short, 1, 0, refslow,
2486                             JIT_R0);
2487       __START_SHORT_JUMPS__(use_short);
2488       if (!arith) patch_nary_branches(jitter, &for_nary_branch, reffalse);
2489       CHECK_LIMIT();
2490     }
2491   }
2492 
2493 #ifdef INLINE_FP_COMP
2494   if (use_fl && use_fx) {
2495     mz_patch_ucbranch(refdone2);
2496   }
2497 #endif
2498   if (!arith) {
2499     (void)jit_movi_p(JIT_R0, scheme_true);
2500   }
2501   if (refdone)
2502     mz_patch_ucbranch(refdone);
2503   if (refdone3)
2504     mz_patch_ucbranch(refdone3);
2505 
2506   __END_SHORT_JUMPS__(use_short);
2507 
2508   if (stack_c) {
2509     mz_rs_inc(stack_c); /* no sync */
2510     mz_runstack_popped(jitter, stack_c);
2511   }
2512   if (c > stack_c)
2513     mz_runstack_unskipped(jitter, c - stack_c);
2514 
2515   if (!arith && for_branch) {
2516     GC_CAN_IGNORE jit_insn *refx;
2517     scheme_prepare_branch_jump(jitter, for_branch);
2518     CHECK_LIMIT();
2519     __START_SHORT_JUMPS__(branch_short);
2520     refx = jit_beqi_p(jit_forward(), JIT_R0, scheme_false);
2521     scheme_add_branch_false(for_branch, refx);
2522     scheme_branch_for_true(jitter, for_branch);
2523     __END_SHORT_JUMPS__(branch_short);
2524     CHECK_LIMIT();
2525   }
2526 
2527   if (!for_branch && !jitter->unbox)
2528     jit_movr_p(dest, JIT_R0);
2529 
2530   return 1;
2531 }
2532 
2533 #endif
2534