1 #include "schpriv.h"
2 #include "schmach.h"
3 #include "future.h"
4
5 #ifdef MZ_USE_JIT
6
7 #include "jit.h"
8
9 #define JITARITH_TS_PROCS
10 #include "jit_ts.c"
11
scheme_jit_is_fixnum(Scheme_Object * rand)12 int scheme_jit_is_fixnum(Scheme_Object *rand)
13 {
14 if (SCHEME_INTP(rand)
15 || (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)
16 && (SCHEME_GET_LOCAL_TYPE(rand) == SCHEME_LOCAL_TYPE_FIXNUM)))
17 return 1;
18 else if (scheme_expr_produces_local_type(rand, NULL) == SCHEME_LOCAL_TYPE_FIXNUM)
19 return 1;
20 else
21 return 0;
22 }
23
can_reorder_unboxing(Scheme_Object * rand,Scheme_Object * rand2,int extfl)24 static int can_reorder_unboxing(Scheme_Object *rand, Scheme_Object *rand2, int extfl)
25 {
26 /* Can we reorder `rand' and `rand2', given that we want floating-point
27 results (so it's ok for `rand' to be a floating-point local)? */
28 return scheme_is_relatively_constant_and_avoids_r1_maybe_fp(rand, rand2, 1, extfl);
29 }
30
is_inline_unboxable_op(Scheme_Object * obj,int flag,int unsafely,int just_checking_result,int extfl)31 static int is_inline_unboxable_op(Scheme_Object *obj, int flag, int unsafely, int just_checking_result, int extfl)
32 /* If unsafely, a result of 2 means that arguments should be checked safely. */
33 {
34 if (!SCHEME_PRIMP(obj))
35 return 0;
36 if (!(SCHEME_PRIM_PROC_OPT_FLAGS(obj) & flag))
37 return 0;
38
39 /* We have a table here for now, instead of flags accessed via
40 SCHEME_PRIM_PROC_OPT_FLAGS(), because this function reports
41 properties of the JIT rather than inherent properties of the
42 functions. */
43
44 if (!extfl) {
45 if (IS_NAMED_PRIM(obj, "unsafe-fl+")) return 1;
46 if (IS_NAMED_PRIM(obj, "unsafe-fl-")) return 1;
47 if (IS_NAMED_PRIM(obj, "unsafe-fl*")) return 1;
48 if (IS_NAMED_PRIM(obj, "unsafe-fl/")) return 1;
49 if (IS_NAMED_PRIM(obj, "unsafe-flabs")) return 1;
50 if (IS_NAMED_PRIM(obj, "unsafe-flsqrt")) return 1;
51 if (IS_NAMED_PRIM(obj, "unsafe-flmin")) return 1;
52 if (IS_NAMED_PRIM(obj, "unsafe-flmax")) return 1;
53 if (IS_NAMED_PRIM(obj, "unsafe-fx->fl")) return 1;
54 if (IS_NAMED_PRIM(obj, "unsafe-f64vector-ref")) return 1;
55 if (IS_NAMED_PRIM(obj, "unsafe-flvector-ref")) return 1;
56 if (IS_NAMED_PRIM(obj, "unsafe-flimag-part")) return 1;
57 if (IS_NAMED_PRIM(obj, "unsafe-flreal-part")) return 1;
58
59 if (unsafely) {
60 /* These are inline-unboxable when their args are
61 safely inline-unboxable: */
62 if (IS_NAMED_PRIM(obj, "fl+")) return 2;
63 if (IS_NAMED_PRIM(obj, "fl-")) return 2;
64 if (IS_NAMED_PRIM(obj, "fl*")) return 2;
65 if (IS_NAMED_PRIM(obj, "fl/")) return 2;
66 if (IS_NAMED_PRIM(obj, "flabs")) return 2;
67 if (IS_NAMED_PRIM(obj, "flsqrt")) return 2;
68 if (IS_NAMED_PRIM(obj, "flmin")) return 2;
69 if (IS_NAMED_PRIM(obj, "flmax")) return 2;
70 if (IS_NAMED_PRIM(obj, "flimag-part")) return 2;
71 if (IS_NAMED_PRIM(obj, "flreal-part")) return 2;
72
73 if (just_checking_result) {
74 if (IS_NAMED_PRIM(obj, "flfloor")) return 1;
75 if (IS_NAMED_PRIM(obj, "flceiling")) return 1;
76 if (IS_NAMED_PRIM(obj, "fltruncate")) return 1;
77 if (IS_NAMED_PRIM(obj, "flround")) return 1;
78 if (IS_NAMED_PRIM(obj, "flsingle")) return 1;
79 if (IS_NAMED_PRIM(obj, "unsafe-flsingle")) return 1;
80 if (IS_NAMED_PRIM(obj, "flsin")) return 1;
81 if (IS_NAMED_PRIM(obj, "flcos")) return 1;
82 if (IS_NAMED_PRIM(obj, "fltan")) return 1;
83 if (IS_NAMED_PRIM(obj, "flasin")) return 1;
84 if (IS_NAMED_PRIM(obj, "flacos")) return 1;
85 if (IS_NAMED_PRIM(obj, "flatan")) return 1;
86 if (IS_NAMED_PRIM(obj, "fllog")) return 1;
87 if (IS_NAMED_PRIM(obj, "flexp")) return 1;
88 if (IS_NAMED_PRIM(obj, "flexpt")) return 1;
89 }
90 }
91 }
92
93 #ifdef MZ_LONG_DOUBLE
94 if (extfl) {
95 if (IS_NAMED_PRIM(obj, "unsafe-extfl+")) return 1;
96 if (IS_NAMED_PRIM(obj, "unsafe-extfl-")) return 1;
97 if (IS_NAMED_PRIM(obj, "unsafe-extfl*")) return 1;
98 if (IS_NAMED_PRIM(obj, "unsafe-extfl/")) return 1;
99 if (IS_NAMED_PRIM(obj, "unsafe-extflabs")) return 1;
100 if (IS_NAMED_PRIM(obj, "unsafe-extflsqrt")) return 1;
101 if (IS_NAMED_PRIM(obj, "unsafe-extflmin")) return 1;
102 if (IS_NAMED_PRIM(obj, "unsafe-extflmax")) return 1;
103 if (IS_NAMED_PRIM(obj, "unsafe-fx->extfl")) return 1;
104 if (IS_NAMED_PRIM(obj, "unsafe-f80vector-ref")) return 1;
105 if (IS_NAMED_PRIM(obj, "unsafe-extflvector-ref")) return 1;
106
107 if (unsafely) {
108 /* These are inline-unboxable when their args are
109 safely inline-unboxable: */
110 if (IS_NAMED_PRIM(obj, "extfl+")) return 2;
111 if (IS_NAMED_PRIM(obj, "extfl-")) return 2;
112 if (IS_NAMED_PRIM(obj, "extfl*")) return 2;
113 if (IS_NAMED_PRIM(obj, "extfl/")) return 2;
114 if (IS_NAMED_PRIM(obj, "extflabs")) return 2;
115 if (IS_NAMED_PRIM(obj, "extflsqrt")) return 2;
116 if (IS_NAMED_PRIM(obj, "extflmin")) return 2;
117 if (IS_NAMED_PRIM(obj, "extflmax")) return 2;
118
119 if (just_checking_result) {
120 if (IS_NAMED_PRIM(obj, "extflfloor")) return 1;
121 if (IS_NAMED_PRIM(obj, "extflceiling")) return 1;
122 if (IS_NAMED_PRIM(obj, "extfltruncate")) return 1;
123 if (IS_NAMED_PRIM(obj, "extflround")) return 1;
124 if (IS_NAMED_PRIM(obj, "extflsin")) return 1;
125 if (IS_NAMED_PRIM(obj, "extflcos")) return 1;
126 if (IS_NAMED_PRIM(obj, "extfltan")) return 1;
127 if (IS_NAMED_PRIM(obj, "extflasin")) return 1;
128 if (IS_NAMED_PRIM(obj, "extflacos")) return 1;
129 if (IS_NAMED_PRIM(obj, "extflatan")) return 1;
130 if (IS_NAMED_PRIM(obj, "extfllog")) return 1;
131 if (IS_NAMED_PRIM(obj, "extflexp")) return 1;
132 if (IS_NAMED_PRIM(obj, "extflexpt")) return 1;
133 }
134 }
135 }
136 #endif
137
138 return 0;
139 }
140
scheme_generate_pop_unboxed(mz_jit_state * jitter)141 int scheme_generate_pop_unboxed(mz_jit_state *jitter)
142 {
143 #if defined(MZ_USE_JIT_I386)
144 # if 1
145 if (jitter->unbox_depth) {
146 scheme_signal_error("internal error: scheme_generate_pop_unboxed() isn't right");
147 }
148 /* The code below doesn't work right because it's emitted *before*
149 the test for failure. Adding it after the failure test means
150 moving it to (something like) sjc.unbound_global_code. Meanwhile,
151 the JIT doesn't currently actually try to reference globals when it has
152 values on the FP stack. */
153 # else
154 /* If we have some arguments pushed on the FP stack, we need
155 to pop them off before escaping. */
156 int i;
157 for (i = jitter->unbox_depth; i--; ) {
158 FSTPr(0);
159 }
160 CHECK_LIMIT();
161 # endif
162 #endif
163 return 1;
164 }
165
is_unboxing_immediate(Scheme_Object * obj,int unsafely,int extfl)166 static int is_unboxing_immediate(Scheme_Object *obj, int unsafely, int extfl)
167 {
168 Scheme_Type t;
169
170 t = SCHEME_TYPE(obj);
171 switch (t) {
172 case scheme_local_type:
173 if (!extfl) {
174 if (SCHEME_GET_LOCAL_TYPE(obj) == SCHEME_LOCAL_TYPE_FLONUM)
175 return 1;
176 }
177 #ifdef MZ_LONG_DOUBLE
178 if (extfl) {
179 if (SCHEME_GET_LOCAL_TYPE(obj) == SCHEME_LOCAL_TYPE_EXTFLONUM)
180 return 1;
181 }
182 #endif
183 return unsafely;
184 case scheme_toplevel_type:
185 case scheme_static_toplevel_type:
186 /* Can generalize to allow any toplevel if scheme_generate_pop_unboxed() is fixed */
187 if ((SCHEME_TOPLEVEL_FLAGS(obj) & SCHEME_TOPLEVEL_FLAGS_MASK) < SCHEME_TOPLEVEL_READY)
188 return 0;
189 return unsafely;
190 break;
191 case scheme_local_unbox_type:
192 return unsafely;
193 break;
194 default:
195 if (!unsafely) {
196 if (!extfl)
197 return SCHEME_FLOATP(obj);
198 #ifdef MZ_LONG_DOUBLE
199 if (extfl)
200 return SCHEME_LONG_DBLP(obj);
201 #endif
202 return 0;
203 }
204 return (t > _scheme_values_types_);
205 }
206 }
207
scheme_can_unbox_inline(Scheme_Object * obj,int fuel,int regs,int unsafely,int extfl)208 int scheme_can_unbox_inline(Scheme_Object *obj, int fuel, int regs, int unsafely, int extfl)
209 /* Assuming that `arg' is [unsafely] assumed to produce a flonum, can we
210 just unbox it without using more than `regs' registers? There
211 cannot be any errors or function calls, unless we've specifically
212 instrumented them to save/pop floating-point values before
213 jumping. If the result is true, then arguments must be evaluated in
214 order. */
215 {
216 Scheme_Type t;
217
218 if (!fuel) return 0;
219 if (!regs) return 0;
220
221 t = SCHEME_TYPE(obj);
222 switch (t) {
223 case scheme_application2_type:
224 {
225 Scheme_App2_Rec *app = (Scheme_App2_Rec *)obj;
226 int ok_op;
227 ok_op = is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_UNARY_INLINED, unsafely, 0, extfl);
228 if (!ok_op)
229 return 0;
230 else if (ok_op == 2)
231 unsafely = 0;
232 return scheme_can_unbox_inline(app->rand, fuel - 1, regs, unsafely, extfl);
233 }
234 case scheme_application3_type:
235 {
236 Scheme_App3_Rec *app = (Scheme_App3_Rec *)obj;
237 int ok_op;
238 ok_op = is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_BINARY_INLINED, unsafely, 0, extfl);
239 if (!ok_op)
240 return 0;
241 else if (ok_op == 2)
242 unsafely = 0;
243 if ((SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)
244 && (IS_NAMED_PRIM(app->rator, "unsafe-f64vector-ref")
245 || IS_NAMED_PRIM(app->rator, "unsafe-flvector-ref"))) {
246 if (is_unboxing_immediate(app->rand1, 1, extfl)
247 && is_unboxing_immediate(app->rand2, 1, extfl)) {
248 return 1;
249 }
250 }
251 #ifdef MZ_LONG_DOUBLE
252 if ((SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)
253 && (IS_NAMED_PRIM(app->rator, "unsafe-f80vector-ref")
254 || IS_NAMED_PRIM(app->rator, "unsafe-extflvector-ref"))) {
255 if (is_unboxing_immediate(app->rand1, 1, extfl)
256 && is_unboxing_immediate(app->rand2, 1, extfl)) {
257 return 1;
258 }
259 }
260 #endif
261 if (!scheme_can_unbox_inline(app->rand1, fuel - 1, regs, unsafely, extfl))
262 return 0;
263 return scheme_can_unbox_inline(app->rand2, fuel - 1, regs - 1, unsafely, extfl);
264 }
265 default:
266 return is_unboxing_immediate(obj, unsafely, extfl);
267 }
268 }
269
can_unbox_directly(Scheme_Object * obj,int extfl,int bfuel)270 int can_unbox_directly(Scheme_Object *obj, int extfl, int bfuel)
271 /* Used only when !can_unbox_inline(). Detects safe operations that
272 produce flonums when they don't raise an exception, and that the JIT
273 supports directly unboxing. */
274 {
275 Scheme_Type t;
276
277 while (1) {
278 t = SCHEME_TYPE(obj);
279 switch (t) {
280 case scheme_application2_type:
281 {
282 Scheme_App2_Rec *app = (Scheme_App2_Rec *)obj;
283 if (is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_UNARY_INLINED, 1, 1, extfl))
284 return 1;
285 if (SCHEME_PRIMP(app->rator)
286 && (SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_UNARY_INLINED)) {
287 if (!extfl) {
288 if (IS_NAMED_PRIM(app->rator, "->fl")
289 || IS_NAMED_PRIM(app->rator, "fx->fl")
290 || IS_NAMED_PRIM(app->rator, "unsafe-flrandom"))
291 return 1;
292 }
293 #ifdef MZ_LONG_DOUBLE
294 if (extfl) {
295 if (IS_NAMED_PRIM(app->rator, "->extfl")
296 || IS_NAMED_PRIM(app->rator, "fx->extfl"))
297 return 1;
298 }
299 #endif
300 }
301 return 0;
302 }
303 break;
304 case scheme_application3_type:
305 {
306 Scheme_App3_Rec *app = (Scheme_App3_Rec *)obj;
307 if (is_inline_unboxable_op(app->rator, SCHEME_PRIM_IS_BINARY_INLINED, 1, 1, extfl))
308 return 1;
309 if (SCHEME_PRIMP(app->rator)
310 && (SCHEME_PRIM_PROC_OPT_FLAGS(app->rator) & SCHEME_PRIM_IS_BINARY_INLINED)) {
311 if (!extfl) {
312 if (IS_NAMED_PRIM(app->rator, "flvector-ref")) return 1;
313 }
314 #ifdef MZ_LONG_DOUBLE
315 if (extfl) {
316 if (IS_NAMED_PRIM(app->rator, "extflvector-ref")) return 1;
317 }
318 #endif
319 }
320 return 0;
321 }
322 break;
323 case scheme_let_value_type:
324 obj = ((Scheme_Let_Value *)obj)->body;
325 break;
326 case scheme_let_one_type:
327 obj = ((Scheme_Let_One *)obj)->body;
328 break;
329 case scheme_let_void_type:
330 obj = ((Scheme_Let_Void *)obj)->body;
331 break;
332 case scheme_letrec_type:
333 obj = ((Scheme_Letrec *)obj)->body;
334 break;
335 case scheme_branch_type:
336 if (!bfuel)
337 return 0;
338 bfuel--;
339 if (!can_unbox_directly(((Scheme_Branch_Rec *)obj)->tbranch, extfl, bfuel))
340 return 0;
341 obj = ((Scheme_Branch_Rec *)obj)->fbranch;
342 break;
343 case scheme_sequence_type:
344 obj = ((Scheme_Sequence *)obj)->array[((Scheme_Sequence *)obj)->count - 1];
345 break;
346 default:
347 return is_unboxing_immediate(obj, 0, extfl);
348 }
349 }
350 }
351
scheme_can_unbox_directly(Scheme_Object * obj,int extfl)352 int scheme_can_unbox_directly(Scheme_Object *obj, int extfl)
353 {
354 return can_unbox_directly(obj, extfl, 3);
355 }
356
generate_arith_slow_path(mz_jit_state * jitter,Scheme_Object * rator,jit_insn ** _ref,jit_insn ** _ref4,Branch_Info * for_branch,int branch_short,int orig_args,int reversed,int arith,int use_v,int v,int dest)357 static jit_insn *generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *rator,
358 jit_insn **_ref, jit_insn **_ref4,
359 Branch_Info *for_branch, int branch_short,
360 int orig_args, int reversed, int arith, int use_v, int v,
361 int dest)
362 /* *_ref4 is place to set for where to jump (for true case, if for_branch) after completing;
363 *_ref is place to set for where to jump for false if for_branch, result if !for_branch;
364 result is place to jump to start slow path if fixnum attempt fails */
365 {
366 GC_CAN_IGNORE jit_insn *ref, *ref4, *refslow;
367
368 refslow = jit_get_ip();
369
370 (void)jit_movi_p(JIT_R2, ((Scheme_Primitive_Proc *)rator)->prim_val);
371 if (for_branch) {
372 scheme_prepare_branch_jump(jitter, for_branch);
373 CHECK_LIMIT();
374 ref4 = jit_patchable_movi_p(JIT_V1, jit_forward());
375 mz_set_local_p(JIT_V1, JIT_LOCAL2);
376 ref = jit_patchable_movi_p(JIT_V1, jit_forward());
377 } else {
378 ref4 = NULL;
379 ref = NULL;
380 }
381
382 if (orig_args == 1) {
383 if (for_branch) {
384 (void)jit_jmpi(sjc.call_original_unary_arith_for_branch_code);
385 } else {
386 (void)jit_calli(sjc.call_original_unary_arith_code);
387 }
388 } else {
389 if (use_v) {
390 (void)jit_movi_p(JIT_R1, scheme_make_integer(v));
391 reversed = !reversed;
392 }
393
394 if (for_branch) {
395 if (reversed) {
396 (void)jit_jmpi(sjc.call_original_binary_rev_arith_for_branch_code);
397 } else {
398 (void)jit_jmpi(sjc.call_original_binary_arith_for_branch_code);
399 }
400 } else {
401 if (reversed) {
402 (void)jit_calli(sjc.call_original_binary_rev_arith_code);
403 } else {
404 (void)jit_calli(sjc.call_original_binary_arith_code);
405 }
406 }
407 }
408
409 if (!for_branch) {
410 jit_movr_p(dest, JIT_R0);
411 __START_SHORT_JUMPS__(branch_short);
412 ref = jit_jmpi(jit_forward());
413 __END_SHORT_JUMPS__(branch_short);
414 }
415
416 *_ref = ref;
417 *_ref4 = ref4;
418
419 if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP)) {
420 /* Add tag back to first arg, just in case. See arithmetic-shift branch to refslow. */
421 ref = jit_get_ip();
422
423 if (reversed || use_v) {
424 jit_ori_l(JIT_R0, JIT_R0, 0x1);
425 } else {
426 jit_ori_l(JIT_R1, JIT_R1, 0x1);
427 }
428
429 __START_TINY_JUMPS__(1);
430 (void)jit_jmpi(refslow);
431 __END_TINY_JUMPS__(1);
432
433 return ref;
434 } else {
435 return refslow;
436 }
437 }
438
439 #ifdef SIXTY_FOUR_BIT_INTEGERS
440 # define SCHEME_INT_SMALL_ENOUGH(rand2) ((((intptr_t)rand2 & 0x7FFFFFFF) == (intptr_t)rand2) || (((intptr_t)rand2 & 0xFFFFFFFFF8000000) == 0xFFFFFFFFF8000000))
441 #else
442 # define SCHEME_INT_SMALL_ENOUGH(rand2) 1
443 #endif
444
can_fast_double(int arith,int cmp,int two_args)445 static int can_fast_double(int arith, int cmp, int two_args)
446 {
447 #ifdef INLINE_FP_OPS
448 if ((arith == ARITH_ADD)
449 || (arith == ARITH_SUB)
450 || (arith == ARITH_MUL)
451 || (arith == ARITH_DIV)
452 || (arith == ARITH_ABS)
453 || (arith == ARITH_EX_INEX)
454 || (arith == ARITH_SQRT)
455 || (arith == ARITH_FLUNOP)
456 || (arith == ARITH_INEX_EX)
457 || (arith == ARITH_INEX_TRUNC_EX))
458 return 1;
459 #endif
460 #ifdef INLINE_FP_COMP
461 if ((!arith && (cmp != CMP_EVENP) && (cmp != CMP_ODDP))
462 || ((arith == ARITH_MIN) && two_args)
463 || ((arith == ARITH_MAX) && two_args))
464 return 1;
465 #endif
466
467 return 0;
468 }
469
470 #ifdef CAN_INLINE_ALLOC
471 # ifdef JIT_USE_FP_OPS
472 # define DECL_FLONUM_GLUE(op) static void call_ ## op(void) XFORM_SKIP_PROC { \
473 scheme_jit_save_fp = scheme_double_ ## op(scheme_jit_save_fp); }
474 # ifdef MZ_LONG_DOUBLE
475 # define DECL_EXTNUM_GLUE(op) static void call_long_double_ ## op(void) XFORM_SKIP_PROC { \
476 scheme_jit_save_extfp = scheme_long_double_ ## op(scheme_jit_save_extfp); }
477 # define DECL_FP_GLUE(op) DECL_FLONUM_GLUE(op) DECL_EXTNUM_GLUE(op)
478 # else
479 # define DECL_FP_GLUE(op) DECL_FLONUM_GLUE(op)
480 # endif
481 DECL_FP_GLUE(sin)
482 DECL_FP_GLUE(cos)
483 DECL_FP_GLUE(tan)
484 DECL_FP_GLUE(asin)
485 DECL_FP_GLUE(acos)
486 DECL_FP_GLUE(atan)
487 DECL_FP_GLUE(exp)
488 DECL_FP_GLUE(log)
489 DECL_FP_GLUE(floor)
490 DECL_FP_GLUE(ceiling)
491 DECL_FP_GLUE(truncate)
492 DECL_FP_GLUE(round)
493 DECL_FLONUM_GLUE(single)
494
495 typedef void (*call_fp_proc)(void);
496 # ifdef MZ_LONG_DOUBLE
497 typedef void (*call_extfp_proc)(void);
498 # endif
499
500 # define DECL_BIN_FLONUM_GLUE(op) static void call_ ## op(void) XFORM_SKIP_PROC { \
501 scheme_jit_save_fp = scheme_double_ ## op(scheme_jit_save_fp, scheme_jit_save_fp2); }
502 # ifdef MZ_LONG_DOUBLE
503 # define DECL_BIN_EXTNUM_GLUE(op) static void call_long_double_ ## op(void) XFORM_SKIP_PROC { \
504 scheme_jit_save_extfp = scheme_long_double_ ## op(scheme_jit_save_extfp, scheme_jit_save_extfp2); }
505 # define DECL_BIN_FP_GLUE(op) DECL_BIN_FLONUM_GLUE(op) DECL_BIN_EXTNUM_GLUE(op)
506 # else
507 # define DECL_BIN_FP_GLUE(op) DECL_BIN_FLONUM_GLUE(op)
508 # endif
509
510 DECL_BIN_FP_GLUE(expt)
511 typedef void (*call_fp_bin_proc)(void);
512
513 # ifdef MZ_LONG_DOUBLE
514 typedef void (*call_extfp_bin_proc)(void);
515 # endif
516 # endif
517 #endif
518
scheme_generate_unboxing(mz_jit_state * jitter,int target)519 int scheme_generate_unboxing(mz_jit_state *jitter, int target)
520 {
521 int fpr0 USED_ONLY_SOMETIMES;
522
523 #ifdef MZ_LONG_DOUBLE
524 if (jitter->unbox_extflonum) {
525 fpr0 = JIT_FPU_FPR_0(jitter->unbox_depth);
526 jit_fpu_ldxi_ld_fppush(fpr0, target, &((Scheme_Long_Double *)0x0)->long_double_val);
527 } else
528 #endif
529 {
530 fpr0 = JIT_FPR_0(jitter->unbox_depth);
531 jit_ldxi_d_fppush(fpr0, target, &((Scheme_Double *)0x0)->double_val);
532 }
533
534 jitter->unbox_depth++;
535
536 return 1;
537 }
538
scheme_generate_alloc_double(mz_jit_state * jitter,int inline_retry,int dest)539 int scheme_generate_alloc_double(mz_jit_state *jitter, int inline_retry, int dest)
540 /* value should be in JIT_FPR0; R0-R2 not saved; V1 used */
541 {
542 #ifdef INLINE_FP_OPS
543 # ifdef CAN_INLINE_ALLOC
544 scheme_inline_alloc(jitter, sizeof(Scheme_Double), scheme_double_type, 0, 0, 1, inline_retry, 0);
545 CHECK_LIMIT();
546 jit_addi_p(dest, JIT_V1, OBJHEAD_SIZE);
547 (void)jit_stxi_d_fppop(&((Scheme_Double *)0x0)->double_val, dest, JIT_FPR0);
548 # else
549 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R0);
550 JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
551 mz_prepare(0);
552 {
553 GC_CAN_IGNORE jit_insn *refr;
554 (void)mz_finish_lwe(ts_malloc_double, refr);
555 }
556 jit_retval(dest);
557 # endif
558 #endif
559 return 1;
560 }
561
562 #ifdef MZ_LONG_DOUBLE
scheme_generate_alloc_long_double(mz_jit_state * jitter,int inline_retry,int dest)563 int scheme_generate_alloc_long_double(mz_jit_state *jitter, int inline_retry, int dest)
564 /* same as above */
565 {
566 #ifdef INLINE_FP_OPS
567 # ifdef CAN_INLINE_ALLOC
568 scheme_inline_alloc(jitter, sizeof(Scheme_Long_Double), scheme_long_double_type, 0, 0, 0, inline_retry, 1);
569 CHECK_LIMIT();
570 jit_addi_p(dest, JIT_V1, OBJHEAD_SIZE);
571 (void)jit_fpu_stxi_ld_fppop(&((Scheme_Long_Double *)0x0)->long_double_val, dest, JIT_FPU_FPR0);
572 # else
573 (void)mz_fpu_ta_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R0);
574 JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
575 mz_prepare(0);
576 {
577 GC_CAN_IGNORE jit_insn *refr;
578 (void)mz_finish_lwe(ts_malloc_long_double, refr);
579 }
580 jit_retval(dest);
581 # endif
582 #endif
583 return 1;
584 }
585 #endif
586
scheme_generate_alloc_X_double(mz_jit_state * jitter,int inline_retry,int dest,int extfl)587 int scheme_generate_alloc_X_double(mz_jit_state *jitter, int inline_retry, int dest, int extfl)
588 {
589 MZ_FPUSEL_STMT(extfl,
590 return scheme_generate_alloc_long_double(jitter, inline_retry, dest),
591 return scheme_generate_alloc_double(jitter, inline_retry, dest));
592 }
593
generate_float_point_arith(mz_jit_state * jitter,Scheme_Object * rator,int arith,int cmp,int reversed,int two_args,int second_const,jit_insn ** _refd,jit_insn ** _refdt,Branch_Info * for_branch,int branch_short,int unsafe_fl,int unboxed,int unboxed_result,int dest,int extfl)594 static int generate_float_point_arith(mz_jit_state *jitter, Scheme_Object *rator,
595 int arith, int cmp, int reversed, int two_args, int second_const,
596 jit_insn **_refd, jit_insn **_refdt, Branch_Info *for_branch,
597 int branch_short, int unsafe_fl, int unboxed, int unboxed_result,
598 int dest, int extfl)
599 /* Unless unboxed, first arg is in JIT_R1, second in JIT_R0.
600 If unboxed in push/pop mode, first arg is pushed before second.
601 If unboxed in direct mode, first arg is in JIT_FPR0+depth
602 and second is in JIT_FPR1+depth (which is backward).
603 Unboxed implies unsafe unless arith == ARITH_INEX_EX or arith == ARITH_INEX_TRUNC_EX. */
604 {
605 #if defined(INLINE_FP_OPS) || defined(INLINE_FP_COMP)
606 GC_CAN_IGNORE jit_insn *ref8, *ref9, *ref10, *refd, *refdt, *refs = NULL, *refs2 = NULL;
607 int no_alloc = unboxed_result;
608 int need_post_pop USED_ONLY_SOMETIMES = 0;
609
610 if (!unsafe_fl && !unboxed) {
611 /* Maybe they're doubles */
612 __START_TINY_JUMPS__(1);
613 if (two_args) {
614 jit_orr_ul(JIT_R2, JIT_R0, JIT_R1);
615 ref8 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1);
616 } else
617 ref8 = NULL;
618 jit_ldxi_s(JIT_R2, JIT_R0, &((Scheme_Object *)0x0)->type);
619 if (extfl)
620 ref9 = jit_bnei_i(jit_forward(), JIT_R2, scheme_long_double_type);
621 else
622 ref9 = jit_bnei_i(jit_forward(), JIT_R2, scheme_double_type);
623 if (two_args) {
624 jit_ldxi_s(JIT_R2, JIT_R1, &((Scheme_Object *)0x0)->type);
625 if (extfl)
626 ref10 = jit_bnei_i(jit_forward(), JIT_R2, scheme_long_double_type);
627 else
628 ref10 = jit_bnei_i(jit_forward(), JIT_R2, scheme_double_type);
629 } else
630 ref10 = NULL;
631 CHECK_LIMIT();
632 __END_TINY_JUMPS__(1);
633 } else {
634 ref8 = ref9 = ref10 = NULL;
635 }
636
637 if (!two_args && !second_const && ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP) || ((arith == ARITH_DIV) && reversed))) {
638 /* Special case: multiplication by exact 0 */
639 (void)jit_movi_p(dest, scheme_make_integer(0));
640 } else {
641 /* Yes, they're doubles. First arg is in JIT_R1, second is in JIT_R0.
642 Put the first arg in fpr0 and second (if any) into fpr1. To work
643 right with stacks, that means pushing the second argument first. */
644 int fpr1, fpr0;
645
646 fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
647 fpr1 = JIT_FPUSEL_FPR_1(extfl, 1+jitter->unbox_depth);
648
649 if (two_args) {
650 if (!unboxed) {
651 MZ_FPUSEL_STMT(extfl,
652 jit_fpu_ldxi_ld_fppush(fpr1, JIT_R1, &((Scheme_Long_Double *)0x0)->long_double_val),
653 jit_ldxi_d_fppush(fpr1, JIT_R1, &((Scheme_Double *)0x0)->double_val));
654 }
655 } else if ((arith == ARITH_SUB) && !second_const && reversed) {
656 reversed = 0;
657 } else if (arith == ARITH_ABS) {
658 /* abs needs no extra number */
659 } else if (arith == ARITH_SQRT) {
660 /* sqrt needs no extra number */
661 } else if (arith == ARITH_FLUNOP) {
662 /* flround, flsin, etc. needs no extra number */
663 } else if (arith == ARITH_EX_INEX) {
664 /* exact->inexact needs no extra number */
665 } else if (arith == ARITH_INEX_EX) {
666 /* inexact->exact needs no extra number */
667 } else if (arith == ARITH_INEX_TRUNC_EX) {
668 /* fl->fx needs no extra number */
669 } else {
670 #ifdef MZ_LONG_DOUBLE
671 long_double d;
672 d = long_double_from_intptr(second_const);
673 if (extfl) {
674 mz_fpu_movi_ld_fppush(fpr1, d, JIT_R2)
675 } else {
676 mz_movi_d_fppush(fpr1, second_const, JIT_R2);
677 }
678 #else
679 double d = second_const;
680 mz_movi_d_fppush(fpr1, d, JIT_R2);
681 #endif
682
683 reversed = !reversed;
684 cmp = -cmp;
685 }
686
687 if (!unboxed) {
688 if (arith != ARITH_EX_INEX) {
689 MZ_FPUSEL_STMT(extfl,
690 jit_fpu_ldxi_ld_fppush(fpr0, JIT_R0, &((Scheme_Long_Double *)0x0)->long_double_val),
691 jit_ldxi_d_fppush(fpr0, JIT_R0, &((Scheme_Double *)0x0)->double_val));
692 }
693 }
694
695 #ifdef DIRECT_FPR_ACCESS
696 # define USES_DIRECT_FPR_ACCESS (!extfl)
697 #else
698 # define USES_DIRECT_FPR_ACCESS 0
699 #endif
700
701 #ifdef DIRECT_FPR_ACCESS
702 if (unboxed && USES_DIRECT_FPR_ACCESS) {
703 /* arguments are backward */
704 reversed = !reversed;
705 cmp = -cmp;
706 }
707 #endif
708
709 CHECK_LIMIT();
710
711 if (arith) {
712 #if defined(MZ_LONG_DOUBLE) && defined(MZ_NEED_SET_EXTFL_MODE)
713 int need_control_reset = 0;
714 if (extfl) {
715 switch (arith) {
716 case ARITH_ADD:
717 case ARITH_MUL:
718 case ARITH_DIV:
719 case ARITH_SUB:
720 case ARITH_SQRT:
721 jit_set_fp_control(0x37f);
722 need_control_reset = 1;
723 break;
724 }
725 }
726 #endif
727 switch (arith) {
728 case ARITH_ADD:
729 jit_FPSEL_addr_xd_fppop(extfl, fpr0, fpr0, fpr1);
730 break;
731 case ARITH_MUL:
732 jit_FPSEL_mulr_xd_fppop(extfl, fpr0, fpr0, fpr1);
733 break;
734 case ARITH_DIV:
735 if (!reversed)
736 jit_FPSEL_divrr_xd_fppop(extfl, fpr0, fpr0, fpr1);
737 else
738 jit_FPSEL_divr_xd_fppop(extfl, fpr0, fpr0, fpr1);
739 break;
740 case ARITH_SUB:
741 {
742 if (!two_args && !second_const && !reversed) {
743 /* Need a special case to make sure that (- 0.0) => -0.0 */
744 jit_FPSEL_negr_xd_fppop(extfl, fpr0, fpr0);
745 } else if (reversed)
746 jit_FPSEL_subr_xd_fppop(extfl, fpr0, fpr0, fpr1);
747 else
748 jit_FPSEL_subrr_xd_fppop(extfl, fpr0, fpr0, fpr1);
749 }
750 break;
751 case ARITH_MIN:
752 case ARITH_MAX:
753 {
754 GC_CAN_IGNORE jit_insn *refc, *refn;
755 __START_TINY_JUMPS__(1);
756
757 /* If R0 is nan, then copy to R1, ensuring nan result */
758 refn = jit_FPSEL_beqr_xd(extfl, jit_forward(), fpr0, fpr0);
759 if (unboxed)
760 jit_FPSEL_movr_xd_rel(extfl, fpr1, fpr0);
761 else
762 jit_movr_p(JIT_R1, JIT_R0);
763 mz_patch_branch(refn);
764 if (arith == ARITH_MIN) {
765 if (unboxed) {
766 refc = jit_FPSEL_bltr_xd(extfl, jit_forward(), fpr0, fpr1);
767 } else {
768 refc = jit_FPSEL_bltr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
769 }
770 } else {
771 if (unboxed) {
772 refc = jit_FPSEL_bger_xd(extfl, jit_forward(), fpr0, fpr1);
773 } else {
774 refc = jit_FPSEL_bger_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
775 }
776 }
777 CHECK_LIMIT();
778 if (unboxed) {
779 jit_FPSEL_movr_xd_rel(extfl, fpr0, fpr1);
780 need_post_pop = 1;
781 } else
782 jit_movr_p(JIT_R0, JIT_R1);
783
784 mz_patch_branch(refc);
785 __END_TINY_JUMPS__(1);
786 if (!unboxed) {
787 /* we've already set JIT_R0 */
788 jit_movr_p(dest, JIT_R0);
789 no_alloc = 1;
790 }
791 }
792 break;
793 case ARITH_ABS:
794 jit_FPSEL_abs_xd_fppop(extfl, fpr0, fpr0);
795 break;
796 case ARITH_EX_INEX: /* exact->inexact */
797 /* no work to do, because argument is already inexact;
798 no need to allocate, because argument is never unboxed,
799 and it therefore already resides in R0 */
800 jit_movr_p(dest, JIT_R0);
801 no_alloc = 1;
802 break;
803 case ARITH_INEX_EX: /* inexact->exact */
804 if (!unsafe_fl) {
805 jit_FPSEL_movr_xd_fppush(extfl, fpr1, fpr0);
806 }
807 jit_FPSEL_roundr_xd_l_fppop(extfl, JIT_R1, fpr0);
808 if (!unsafe_fl) {
809 /* to check whether it fits in a fixnum, we
810 need to convert back and check whether it
811 is the same */
812 if (unboxed) {
813 JIT_ASSERT(jitter->unbox_depth == 0);
814 jit_FPSEL_movr_xd_fppush(extfl, JIT_FPR2, fpr1); /* for slow path */
815 }
816 jit_FPSEL_extr_l_xd_fppush(extfl, fpr0, JIT_R1);
817 __START_TINY_JUMPS__(1);
818 refs = jit_FPSEL_bantieqr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
819 __END_TINY_JUMPS__(1);
820 /* result still may not fit in a fixnum */
821 jit_lshi_l(JIT_R2, JIT_R1, 1);
822 jit_rshi_l(JIT_R2, JIT_R2, 1);
823 __START_TINY_JUMPS__(1);
824 refs2 = jit_bner_l(jit_forward(), JIT_R1, JIT_R2);
825 __END_TINY_JUMPS__(1);
826 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
827 if (unboxed && !USES_DIRECT_FPR_ACCESS)
828 jit_FPSEL_roundr_xd_l_fppop(extfl, JIT_R1, JIT_FPR2); /* slow path won't be needed */
829 #endif
830 }
831 jit_fixnum_l(dest, JIT_R1);
832 no_alloc = 1;
833 break;
834 case ARITH_INEX_TRUNC_EX: /* fl->fx */
835 if (!unsafe_fl) {
836 #ifdef DIRECT_FPR_ACCESS
837 if (unboxed && USES_DIRECT_FPR_ACCESS) {
838 JIT_ASSERT(jitter->unbox_depth == 0);
839 jit_FPSEL_movr_xd_fppush(extfl, JIT_FPR2, fpr0); /* for slow path */
840 }
841 #endif
842 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
843 if (!USES_DIRECT_FPR_ACCESS) {
844 jit_FPSEL_movr_xd_fppush(extfl, fpr0, fpr0); /* copy for comparison */
845 }
846 #endif
847 #ifdef MZ_LONG_DOUBLE
848 if (extfl) {
849 mz_fpu_movi_ld_fppush(fpr1, scheme_extfl_too_positive_for_fixnum, JIT_R1);
850 } else
851 #endif
852 {
853 mz_movi_d_fppush(fpr1, scheme_double_too_positive_for_fixnum, JIT_R1);
854 }
855 __START_TINY_JUMPS__(1);
856 refs = jit_FPSEL_bantigtr_xd_fppop(extfl, jit_forward(), fpr1, fpr0);
857 __END_TINY_JUMPS__(1);
858
859 #if !defined(DIRECT_FPR_ACCESS) || defined(MZ_LONG_DOUBLE)
860 if (!USES_DIRECT_FPR_ACCESS) {
861 jit_FPSEL_movr_xd_fppush(extfl, fpr0, fpr0); /* copy for comparison */
862 }
863 #endif
864 #ifdef MZ_LONG_DOUBLE
865 if (extfl) {
866 mz_fpu_movi_ld_fppush(fpr1, scheme_extfl_too_negative_for_fixnum, JIT_R1);
867 } else
868 #endif
869 {
870 mz_movi_d_fppush(fpr1, scheme_double_too_negative_for_fixnum, JIT_R1);
871 }
872 __START_TINY_JUMPS__(1);
873 refs2 = jit_FPSEL_bantiltr_xd_fppop(extfl, jit_forward(), fpr1, fpr0);
874 __END_TINY_JUMPS__(1);
875 }
876 jit_FPSEL_truncr_xd_l_fppop(extfl, JIT_R1, fpr0);
877 jit_fixnum_l(dest, JIT_R1);
878 no_alloc = 1;
879 break;
880 case ARITH_SQRT:
881 jit_FPSEL_sqrt_xd_fppop(extfl, fpr0, fpr0);
882 break;
883 #ifdef CAN_INLINE_ALLOC
884 # ifdef JIT_USE_FP_OPS
885 case ARITH_FLUNOP: /* flfloor, flsin, etc. */
886 {
887 call_fp_proc f;
888 #ifdef MZ_LONG_DOUBLE
889 if (extfl) {
890 if (IS_NAMED_PRIM(rator, "extflsin"))
891 f = call_long_double_sin;
892 else if (IS_NAMED_PRIM(rator, "extflcos"))
893 f = call_long_double_cos;
894 else if (IS_NAMED_PRIM(rator, "extfltan"))
895 f = call_long_double_tan;
896 else if (IS_NAMED_PRIM(rator, "extflasin"))
897 f = call_long_double_asin;
898 else if (IS_NAMED_PRIM(rator, "extflacos"))
899 f = call_long_double_acos;
900 else if (IS_NAMED_PRIM(rator, "extflatan"))
901 f = call_long_double_atan;
902 else if (IS_NAMED_PRIM(rator, "extflexp"))
903 f = call_long_double_exp;
904 else if (IS_NAMED_PRIM(rator, "extfllog"))
905 f = call_long_double_log;
906 else if (IS_NAMED_PRIM(rator, "extflfloor"))
907 f = call_long_double_floor;
908 else if (IS_NAMED_PRIM(rator, "extflceiling"))
909 f = call_long_double_ceiling;
910 else if (IS_NAMED_PRIM(rator, "extfltruncate"))
911 f = call_long_double_truncate;
912 else if (IS_NAMED_PRIM(rator, "extflround"))
913 f = call_long_double_round;
914 else {
915 scheme_signal_error("internal error: unknown extflonum function");
916 f = NULL;
917 }
918 (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R2);
919 mz_prepare(0);
920 (void)mz_finish(f);
921 (void)mz_fpu_tl_ldi_ld_fppush(JIT_FPU_FPR0, tl_scheme_jit_save_extfp, JIT_R2);
922 } else
923 #endif
924 {
925 if (IS_NAMED_PRIM(rator, "flsin"))
926 f = call_sin;
927 else if (IS_NAMED_PRIM(rator, "flcos"))
928 f = call_cos;
929 else if (IS_NAMED_PRIM(rator, "fltan"))
930 f = call_tan;
931 else if (IS_NAMED_PRIM(rator, "flasin"))
932 f = call_asin;
933 else if (IS_NAMED_PRIM(rator, "flacos"))
934 f = call_acos;
935 else if (IS_NAMED_PRIM(rator, "flatan"))
936 f = call_atan;
937 else if (IS_NAMED_PRIM(rator, "flexp"))
938 f = call_exp;
939 else if (IS_NAMED_PRIM(rator, "fllog"))
940 f = call_log;
941 else if (IS_NAMED_PRIM(rator, "flfloor"))
942 f = call_floor;
943 else if (IS_NAMED_PRIM(rator, "flceiling"))
944 f = call_ceiling;
945 else if (IS_NAMED_PRIM(rator, "fltruncate"))
946 f = call_truncate;
947 else if (IS_NAMED_PRIM(rator, "flround"))
948 f = call_round;
949 else if (IS_NAMED_PRIM(rator, "flsingle") || IS_NAMED_PRIM(rator, "unsafe-flsingle"))
950 f = call_single;
951 else {
952 scheme_signal_error("internal error: unknown flonum function");
953 f = NULL;
954 }
955 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
956 mz_prepare(0);
957 (void)mz_finish(f);
958 (void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
959 }
960 }
961 break;
962 case ARITH_EXPT: /* flexpt */
963 {
964 #ifdef MZ_LONG_DOUBLE
965 if (extfl) {
966 if (!reversed) {
967 (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp2, JIT_FPU_FPR0, JIT_R2);
968 (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR1, JIT_R2);
969 } else {
970 (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp, JIT_FPU_FPR0, JIT_R2);
971 (void)mz_fpu_tl_sti_ld_fppop(tl_scheme_jit_save_extfp2, JIT_FPU_FPR1, JIT_R2);
972 }
973 mz_prepare(0);
974 (void)mz_finish(call_long_double_expt);
975 (void)mz_fpu_tl_ldi_ld_fppush(JIT_FPU_FPR0, tl_scheme_jit_save_extfp, JIT_R2);
976 } else
977 #endif
978 {
979 if (!reversed) {
980 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp2, JIT_FPR0, JIT_R2);
981 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR1, JIT_R2);
982 } else {
983 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
984 (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp2, JIT_FPR1, JIT_R2);
985 }
986 mz_prepare(0);
987 (void)mz_finish(call_expt);
988 (void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
989 }
990 }
991 break;
992 # endif
993 #endif
994 default:
995 break;
996 }
997 CHECK_LIMIT();
998
999 if (!no_alloc) {
1000 mz_rs_sync(); /* needed if arguments were unboxed */
1001 scheme_generate_alloc_X_double(jitter, 0, dest, extfl);
1002 CHECK_LIMIT();
1003 #if defined(MZ_USE_JIT_I386)
1004 if (need_post_pop && !USES_DIRECT_FPR_ACCESS)
1005 FSTPr(0);
1006 #endif
1007 } else if (unboxed_result) {
1008 jitter->unbox_depth++;
1009 #if defined(MZ_USE_JIT_I386)
1010 if (need_post_pop && !USES_DIRECT_FPR_ACCESS) {
1011 FXCHr(1);
1012 FSTPr(0);
1013 }
1014 #endif
1015 }
1016 #if defined(MZ_LONG_DOUBLE) && defined(MZ_NEED_SET_EXTFL_MODE)
1017 if (extfl && need_control_reset) {
1018 jit_set_fp_control(0x27f);
1019 }
1020 #endif
1021 } else {
1022 /* The "anti" variants below invert the branch. Unlike the "un"
1023 variants, the "anti" variants invert the comparison result
1024 after the layer where +nan.0 always generates false. */
1025 __START_SHORT_JUMPS__(branch_short);
1026 if (for_branch) {
1027 scheme_prepare_branch_jump(jitter, for_branch);
1028 CHECK_LIMIT();
1029 }
1030 R0_FP_ADJUST(_jitl.r0_can_be_tmp++);
1031 switch (cmp) {
1032 case CMP_LT:
1033 refd = jit_FPSEL_bantigtr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1034 break;
1035 case CMP_LEQ:
1036 refd = jit_FPSEL_bantiger_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1037 break;
1038 case CMP_EQUAL:
1039 refd = jit_FPSEL_bantieqr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1040 break;
1041 case CMP_GEQ:
1042 refd = jit_FPSEL_bantiler_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1043 break;
1044 case CMP_GT:
1045 refd = jit_FPSEL_bantiltr_xd_fppop(extfl, jit_forward(), fpr0, fpr1);
1046 break;
1047 default:
1048 refd = NULL;
1049 break;
1050 }
1051 R0_FP_ADJUST(_jitl.r0_can_be_tmp--);
1052 __END_SHORT_JUMPS__(branch_short);
1053 *_refd = refd;
1054 }
1055 }
1056
1057 if (!unsafe_fl) {
1058 /* Jump to return result or true branch: */
1059 __START_SHORT_JUMPS__(branch_short);
1060 refdt = jit_jmpi(jit_forward());
1061 *_refdt = refdt;
1062 __END_SHORT_JUMPS__(branch_short);
1063 }
1064
1065 if (!unsafe_fl) {
1066 /* No, they're not both doubles, or slow path is needed
1067 for some other reason. */
1068 __START_TINY_JUMPS__(1);
1069 if (!unboxed) {
1070 if (two_args) {
1071 mz_patch_branch(ref8);
1072 mz_patch_branch(ref10);
1073 }
1074 mz_patch_branch(ref9);
1075 }
1076 if (refs)
1077 mz_patch_branch(refs);
1078 if (refs2)
1079 mz_patch_branch(refs2);
1080 __END_TINY_JUMPS__(1);
1081 }
1082 #endif
1083
1084 return 1;
1085 }
1086
check_float_type_result(mz_jit_state * jitter,int reg,void * fail_code,Scheme_Object * rator,int type)1087 static int check_float_type_result(mz_jit_state *jitter, int reg, void *fail_code, Scheme_Object *rator, int type)
1088 /* Doesn't use R0 or R1, except for `reg' */
1089 {
1090 /* Check for flonum result */
1091 GC_CAN_IGNORE jit_insn *ref, *reffail;
1092
1093 mz_rs_sync();
1094
1095 __START_TINY_JUMPS__(1);
1096 ref = jit_bmci_l(jit_forward(), reg, 0x1);
1097 __END_TINY_JUMPS__(1);
1098
1099 reffail = jit_get_ip();
1100 (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)rator)->prim_val);
1101 (void)jit_calli(fail_code);
1102
1103 __START_TINY_JUMPS__(1);
1104 mz_patch_branch(ref);
1105 __END_TINY_JUMPS__(1);
1106
1107 jit_ldxi_s(JIT_R2, reg, &((Scheme_Object *)0x0)->type);
1108 __START_SHORT_JUMPS__(1);
1109 (void)jit_bnei_i(reffail, JIT_R2, type);
1110 __END_SHORT_JUMPS__(1);
1111 CHECK_LIMIT();
1112
1113 scheme_generate_unboxing(jitter, reg);
1114
1115 return 1;
1116 }
1117
check_flonum_result(mz_jit_state * jitter,int reg,void ** fail_code,Scheme_Object * rator,int extfl)1118 static int check_flonum_result(mz_jit_state *jitter, int reg, void **fail_code, Scheme_Object *rator, int extfl)
1119 /* Doesn't use R0 or R1, except for `reg' */
1120 {
1121 return check_float_type_result(jitter, reg, fail_code[extfl], rator,
1122 (extfl ? scheme_long_double_type : scheme_double_type));
1123 }
1124
generate_modulo_setup(mz_jit_state * jitter,int branch_short,int a1,int a2)1125 static void generate_modulo_setup(mz_jit_state *jitter, int branch_short, int a1, int a2)
1126 /* r1 has two flags: bit 0 means two args have different sign; bit 1 means second arg is negative */
1127 {
1128 GC_CAN_IGNORE jit_insn *refx;
1129
1130 jit_movi_l(JIT_R1, 0x0);
1131 __START_INNER_TINY__(branch_short);
1132 refx = jit_bgei_l(jit_forward(), a1, 0);
1133 jit_negr_l(a1, a1);
1134 jit_movi_l(JIT_R1, 0x1);
1135 mz_patch_branch(refx);
1136 refx = jit_bgei_l(jit_forward(), a2, 0);
1137 jit_xori_l(JIT_R1, JIT_R1, 0x3);
1138 jit_negr_l(a2, a2);
1139 mz_patch_branch(refx);
1140 __END_INNER_TINY__(branch_short);
1141 }
1142
scheme_generate_arith_for(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest,int extfl)1143 int scheme_generate_arith_for(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
1144 int orig_args, int arith, int cmp, int v,
1145 Branch_Info *for_branch, int branch_short,
1146 int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow,
1147 int dest, int extfl)
1148 /* needs de-sync */
1149 /* Operation codes are defined in jit.h.
1150 Either arith is non-zero or it's a cmp; the value of each determines the operation:
1151 arith = 1 -> + or add1 (if !rand2)
1152 arith = -1 -> - or sub1
1153 arith = 2 -> *
1154 arith = -2 -> /
1155 arith = -3 -> quotient
1156 arith = -4 -> remainder
1157 arith = -5 -> modulo
1158 arith = 3 -> bitwise-and
1159 arith = 4 -> bitwise-ior
1160 arith = 5 -> bitwise-xor
1161 arith = 6 -> arithmetic-shift, fxlshift
1162 arith = -6 -> fxrshift
1163 arith = 7 -> bitwise-not
1164 arith = 9 -> min
1165 arith = 10 -> max
1166 arith = 11 -> abs
1167 arith = 12 -> exact->inexact
1168 arith = 13 -> sqrt
1169 arith = 14 -> unary floating-point op (consult `rator')
1170 arith = 15 -> inexact->exact
1171 arith = 16 -> flexpt
1172 cmp = 0 -> = or zero?
1173 cmp = +/-1 -> >=/<=
1174 cmp = +/-2 -> >/< or positive/negative?
1175 cmp = 3 -> bitwise-bit-test?
1176 cmp = +/-4 -> even?/odd?
1177 If rand is NULL, then we're generating part of the fast path for an
1178 nary arithmatic over a binary operator; the first argument is
1179 already in R0 (fixnum or min/max) or a floating-point register
1180 (flonum) and the second argument is in R1 (fixnum or min/max) or a
1181 floating-point register (flonum).
1182 For unsafe_fx or unsafe_fl -1 means safe but specific to the type.
1183 */
1184 {
1185 GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *ref4, *refd = NULL, *refdt = NULL;
1186 GC_CAN_IGNORE jit_insn *refslow;
1187 int reversed = 0;
1188 int has_fixnum_fast = 1, has_flonum_fast = 1;
1189 int inlined_flonum1, inlined_flonum2;
1190
1191 LOG_IT(("inlined %s\n", rator ? ((Scheme_Primitive_Proc *)rator)->name : "???"));
1192
1193 if (unsafe_fx < 0) {
1194 unsafe_fx = 0;
1195 has_flonum_fast = 0;
1196 }
1197
1198 if (unsafe_fl) {
1199 if (!rand) {
1200 inlined_flonum1 = inlined_flonum2 = 1;
1201 } else {
1202 if (scheme_can_unbox_inline(rand, 5, JIT_FPUSEL_FPR_NUM(extfl)-2, unsafe_fl > 0, extfl))
1203 inlined_flonum1 = 1;
1204 else
1205 inlined_flonum1 = 0;
1206 if (!rand2 || scheme_can_unbox_inline(rand2, 5, JIT_FPUSEL_FPR_NUM(extfl)-3, unsafe_fl > 0, extfl))
1207 inlined_flonum2 = 1;
1208 else
1209 inlined_flonum2 = 0;
1210 }
1211 } else
1212 inlined_flonum1 = inlined_flonum2 = 0;
1213
1214 if (unsafe_fl
1215 #ifndef USE_FLONUM_UNBOXING
1216 && inlined_flonum1 && inlined_flonum2
1217 #endif
1218 ) {
1219 /* Unboxed (and maybe unsafe) floating-point ops. */
1220 int args_unboxed = (((arith != ARITH_MIN) && (arith != ARITH_MAX)) || rand);
1221 int flonum_depth, fl_reversed = 0, can_direct1, can_direct2;
1222
1223 if (inlined_flonum1 && inlined_flonum2 && (arith != ARITH_INEX_EX) && (arith != ARITH_INEX_TRUNC_EX))
1224 /* safe can be implemented as unsafe */
1225 unsafe_fl = 1;
1226
1227 if (!args_unboxed && rand)
1228 scheme_signal_error("internal error: invalid mode");
1229
1230 if (inlined_flonum1 && !inlined_flonum2 && can_reorder_unboxing(rand, rand2, extfl)) {
1231 GC_CAN_IGNORE Scheme_Object *tmp;
1232 reversed = !reversed;
1233 cmp = -cmp;
1234 fl_reversed = 1;
1235 tmp = rand;
1236 rand = rand2;
1237 rand2 = tmp;
1238 inlined_flonum1 = 0;
1239 inlined_flonum2 = 1;
1240 }
1241
1242 if (inlined_flonum1)
1243 can_direct1 = 2;
1244 else
1245 can_direct1 = scheme_can_unbox_directly(rand, extfl);
1246 if (inlined_flonum2)
1247 can_direct2 = 2;
1248 else
1249 can_direct2 = scheme_can_unbox_directly(rand2, extfl);
1250
1251 if (args_unboxed) {
1252 jitter->unbox++;
1253 MZ_FPUSEL_STMT_ONLY(extfl, jitter->unbox_extflonum++);
1254 }
1255 if (!rand) {
1256 CHECK_LIMIT();
1257 if (args_unboxed)
1258 flonum_depth = 2;
1259 else
1260 flonum_depth = 0;
1261 } else if (!rand2) {
1262 mz_runstack_skipped(jitter, 1);
1263 scheme_generate_unboxed(rand, jitter, can_direct1, (unsafe_fl > 0));
1264 CHECK_LIMIT();
1265 mz_runstack_unskipped(jitter, 1);
1266 if (!can_direct1 && (unsafe_fl <= 0)) {
1267 check_flonum_result(jitter, JIT_R0, sjc.fl1_fail_code, rator, extfl);
1268 CHECK_LIMIT();
1269 }
1270 flonum_depth = 1;
1271 } else {
1272 #ifdef USE_FLONUM_UNBOXING
1273 int flostack = 0, flopos = 0;
1274 #endif
1275 mz_runstack_skipped(jitter, 2);
1276 scheme_generate_unboxed(rand, jitter, can_direct1, (unsafe_fl > 0));
1277 CHECK_LIMIT();
1278 if (!(inlined_flonum1 && inlined_flonum2)) {
1279 if (!can_direct1 && (unsafe_fl <= 0)) {
1280 mz_pushr_p(JIT_R0);
1281 } else if (!inlined_flonum2) {
1282 #ifdef USE_FLONUM_UNBOXING
1283 flostack = scheme_mz_flostack_save(jitter, &flopos);
1284 --jitter->unbox_depth;
1285 scheme_generate_flonum_local_unboxing(jitter, 0, 0, extfl);
1286 CHECK_LIMIT();
1287 #endif
1288 }
1289 }
1290 scheme_generate_unboxed(rand2, jitter, can_direct2, (unsafe_fl > 0));
1291 CHECK_LIMIT();
1292 if (!(inlined_flonum1 && inlined_flonum2)) {
1293 if ((can_direct1 || (unsafe_fl > 0)) && !inlined_flonum2) {
1294 #ifdef USE_FLONUM_UNBOXING
1295 int fpr0 USED_ONLY_SOMETIMES;
1296 fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
1297 mz_ld_fppush(fpr0, jitter->flostack_offset, extfl);
1298 scheme_mz_flostack_restore(jitter, flostack, flopos, 1, 1);
1299 CHECK_LIMIT();
1300 jitter->unbox_depth++;
1301 #endif
1302 }
1303 if (!can_direct2 && (unsafe_fl <= 0)) {
1304 jit_movr_p(JIT_R1, JIT_R0);
1305 if (!can_direct1) {
1306 mz_popr_p(JIT_R0);
1307 check_flonum_result(jitter, JIT_R0, sjc.fl2rr_fail_code[fl_reversed], rator, extfl);
1308 CHECK_LIMIT();
1309 }
1310 check_flonum_result(jitter, JIT_R1, sjc.fl2fr_fail_code[fl_reversed], rator, extfl);
1311 CHECK_LIMIT();
1312 } else {
1313 if (!can_direct1 && (unsafe_fl <= 0)) {
1314 mz_popr_p(JIT_R0);
1315 check_flonum_result(jitter, JIT_R0, sjc.fl2rf_fail_code[fl_reversed], rator, extfl);
1316 CHECK_LIMIT();
1317 }
1318 if (!(can_direct1 || (unsafe_fl > 0)) || !inlined_flonum2) {
1319 cmp = -cmp;
1320 reversed = !reversed;
1321 }
1322 }
1323 }
1324 mz_runstack_unskipped(jitter, 2);
1325 flonum_depth = 2;
1326 }
1327 if (args_unboxed) {
1328 MZ_FPUSEL_STMT_ONLY(extfl, --jitter->unbox_extflonum);
1329 --jitter->unbox;
1330 }
1331 jitter->unbox_depth -= flonum_depth;
1332 if (!jitter->unbox && jitter->unbox_depth && rand)
1333 scheme_signal_error("internal error: broken unbox depth");
1334 if (for_branch
1335 || (arith == ARITH_INEX_EX) /* has slow path */
1336 || (arith == ARITH_INEX_TRUNC_EX)) /* could have slow path */
1337 mz_rs_sync(); /* needed if arguments were unboxed */
1338
1339 generate_float_point_arith(jitter, rator, arith, cmp, reversed, !!rand2, 0,
1340 &refd, &refdt, for_branch, branch_short,
1341 ((arith == ARITH_INEX_EX) || (arith == ARITH_INEX_TRUNC_EX)) ? (unsafe_fl > 0) : 1,
1342 args_unboxed, jitter->unbox, dest, extfl);
1343 CHECK_LIMIT();
1344 ref3 = NULL;
1345 ref = NULL;
1346 ref4 = NULL;
1347
1348 if (((arith == ARITH_INEX_EX) || (arith == ARITH_INEX_TRUNC_EX)) && (unsafe_fl < 1)) {
1349 /* need a slow path */
1350 if (args_unboxed) {
1351 MZ_FPUSEL_STMT(extfl,
1352 (void)jit_calli(sjc.box_extflonum_from_reg_code),
1353 (void)jit_calli(sjc.box_flonum_from_reg_code));
1354 }
1355 generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short, orig_args, reversed, arith, 0, 0, dest);
1356 /* assert: !ref4, since not for_branch */
1357 __START_SHORT_JUMPS__(branch_short);
1358 mz_patch_ucbranch(ref);
1359 mz_patch_ucbranch(refdt);
1360 __END_SHORT_JUMPS__(branch_short);
1361 }
1362
1363 __START_SHORT_JUMPS__(branch_short);
1364 } else {
1365 mz_jit_unbox_state ubs;
1366
1367 if (unsafe_fl < 0) {
1368 has_fixnum_fast = 0;
1369 unsafe_fl = 0;
1370 }
1371
1372 /* While generating a fixnum op, don't unbox! */
1373 scheme_mz_unbox_save(jitter, &ubs);
1374
1375 if (!rand) {
1376 /* generating for an nary operation; first arg in R0,
1377 second in R1 */
1378 reversed = 1;
1379 cmp = -cmp;
1380 refslow = overflow_refslow;
1381 refd = NULL;
1382 refdt = NULL;
1383 ref3 = NULL;
1384 ref = NULL;
1385 ref4 = NULL;
1386 } else {
1387 if (!unsafe_fl
1388 && ((!arith && (cmp != CMP_BIT))
1389 || (arith == ARITH_MIN)
1390 || (arith == ARITH_MAX)
1391 || (arith == ARITH_AND)
1392 || (arith == ARITH_IOR)
1393 || (arith == ARITH_XOR))) {
1394 /* No slow path necessary for fixnum arguments. */
1395 if (scheme_jit_is_fixnum(rand) && (!rand2 || scheme_jit_is_fixnum(rand2))) {
1396 unsafe_fx = 1;
1397 }
1398 }
1399
1400 if (rand2) {
1401 if (SCHEME_INTP(rand2)
1402 && SCHEME_INT_SMALL_ENOUGH(rand2)
1403 && ((arith == ARITH_SH)
1404 ? ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1405 && (SCHEME_INT_VAL(rand2) >= -MAX_TRY_SHIFT))
1406 : (((arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH))
1407 ? ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1408 && (SCHEME_INT_VAL(rand2) >= 0))
1409 : 1))
1410 && ((cmp != CMP_BIT)
1411 || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT)
1412 && (SCHEME_INT_VAL(rand2) >= 0)))) {
1413 /* Second is constant, so use constant mode.
1414 For arithmetic shift, only do this if the constant
1415 is in range. */
1416 v = SCHEME_INT_VAL(rand2);
1417 rand2 = NULL;
1418 } else if (SCHEME_INTP(rand)
1419 && SCHEME_INT_SMALL_ENOUGH(rand)
1420 && (arith != ARITH_SH) && (arith != ARITH_LSH) && (arith != ARITH_LSH_WRAP) && (arith != ARITH_RSH)
1421 && (cmp != CMP_BIT)) {
1422 /* First is constant; swap argument order and use constant mode. */
1423 v = SCHEME_INT_VAL(rand);
1424 cmp = -cmp;
1425 rand = rand2;
1426 rand2 = NULL;
1427 reversed = 1;
1428 } else if ((scheme_ok_to_move_local(rand2)
1429 || SCHEME_INTP(rand2))
1430 && !(scheme_ok_to_move_local(rand)
1431 || SCHEME_INTP(rand))) {
1432 /* Second expression is side-effect-free, unlike the first;
1433 swap order and use the fast path for when the first arg is
1434 side-effect free. */
1435 Scheme_Object *t = rand2;
1436 rand2 = rand;
1437 rand = t;
1438 cmp = -cmp;
1439 reversed = 1;
1440 }
1441 }
1442
1443 if ((arith == ARITH_SUB) && (orig_args == 1) && !v) {
1444 /* Unary subtract */
1445 reversed = 1;
1446 }
1447
1448 if (!unsafe_fl && !unsafe_fx
1449 && (scheme_jit_is_fixnum(rand) && (!rand2 || scheme_jit_is_fixnum(rand2)))) {
1450 /* Since we'll get a fix num, skip flonum variant */
1451 has_flonum_fast = 0;
1452 }
1453
1454 if (rand2) {
1455 int dir;
1456 dir = scheme_generate_two_args(rand, rand2, jitter, 0, orig_args);
1457 CHECK_LIMIT();
1458 /* Since we want rand in R1 and rand2 in R0, direction is backwards: */
1459 if (dir > 0) {
1460 Scheme_Object *t = rand2;
1461 rand2 = rand;
1462 rand = t;
1463 cmp = -cmp;
1464 reversed = !reversed;
1465 }
1466 } else {
1467 mz_runstack_skipped(jitter, orig_args);
1468 scheme_generate_non_tail(rand, jitter, 0, 1, 0); /* sync'd later */
1469 CHECK_LIMIT();
1470 mz_runstack_unskipped(jitter, orig_args);
1471 CHECK_RUNSTACK_OVERFLOW();
1472 }
1473 /* not sync'd... */
1474
1475 /* two arguments: rand2 in R0, and rand in R1 */
1476 /* one argument: rand in R0 */
1477
1478 if (rand2) {
1479 int va;
1480
1481 if (scheme_jit_is_fixnum(rand)) {
1482 if (scheme_jit_is_fixnum(rand2))
1483 va = -1; /* no check needed */
1484 else
1485 va = JIT_R0; /* check only rand2 */
1486 } else if (scheme_jit_is_fixnum(rand2)) {
1487 va = JIT_R1; /* check only rand */
1488 } else {
1489 if (!unsafe_fx && !unsafe_fl) {
1490 /* check both fixnum bits at once by ANDing into R2: */
1491 jit_andr_ul(JIT_R2, JIT_R0, JIT_R1);
1492 va = JIT_R2;
1493 } else
1494 va = -1;
1495 }
1496
1497 if (!unsafe_fx && !unsafe_fl) {
1498 mz_rs_sync();
1499
1500 __START_TINY_JUMPS_IF_COMPACT__(1);
1501 if (va == -1)
1502 ref2 = jit_jmpi(jit_forward());
1503 else
1504 ref2 = jit_bmsi_ul(jit_forward(), va, 0x1);
1505 __END_TINY_JUMPS_IF_COMPACT__(1);
1506 } else {
1507 ref2 = NULL;
1508 if (for_branch) mz_rs_sync();
1509 }
1510
1511 if (unsafe_fl || (!unsafe_fx && !SCHEME_INTP(rand)
1512 && has_flonum_fast
1513 && can_fast_double(arith, cmp, 1))) {
1514 /* Maybe they're both doubles... */
1515 if (unsafe_fl) mz_rs_sync();
1516 generate_float_point_arith(jitter, rator, arith, cmp, reversed, 1, 0, &refd, &refdt,
1517 for_branch, branch_short, unsafe_fl, 0, ubs.unbox, dest, extfl);
1518 CHECK_LIMIT();
1519 }
1520
1521 if (!unsafe_fx && !unsafe_fl) {
1522 if (!has_fixnum_fast) {
1523 __START_TINY_JUMPS_IF_COMPACT__(1);
1524 if (va == -1)
1525 mz_patch_ucbranch(ref2);
1526 else
1527 mz_patch_branch(ref2);
1528 __END_TINY_JUMPS_IF_COMPACT__(1);
1529 }
1530
1531 /* Slow path */
1532 refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short,
1533 orig_args, reversed, arith, 0, 0, dest);
1534
1535 if (has_fixnum_fast) {
1536 __START_TINY_JUMPS_IF_COMPACT__(1);
1537 if (va == -1)
1538 mz_patch_ucbranch(ref2);
1539 else
1540 mz_patch_branch(ref2);
1541 __END_TINY_JUMPS_IF_COMPACT__(1);
1542 }
1543 } else {
1544 refslow = overflow_refslow;
1545 ref = NULL;
1546 ref4 = NULL;
1547 }
1548 CHECK_LIMIT();
1549 } else {
1550 /* Only one argument: */
1551 int is_fx;
1552
1553 is_fx = scheme_jit_is_fixnum(rand);
1554
1555 if (!unsafe_fx && !unsafe_fl) {
1556 mz_rs_sync();
1557 __START_TINY_JUMPS_IF_COMPACT__(1);
1558 if (is_fx)
1559 ref2 = jit_jmpi(jit_forward());
1560 else
1561 ref2 = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1);
1562 __END_TINY_JUMPS_IF_COMPACT__(1);
1563 } else {
1564 if (for_branch) mz_rs_sync();
1565 ref2 = NULL;
1566 }
1567
1568 if (unsafe_fl
1569 || ((orig_args != 2) /* <- heuristic: we could generate code when an exact argument is
1570 given, but the extra FP code is probably not worthwhile. */
1571 && !unsafe_fx
1572 && has_flonum_fast
1573 && can_fast_double(arith, cmp, 0)
1574 /* watch out: divide by 0 is special: */
1575 && ((arith != ARITH_DIV) || v || reversed))) {
1576 /* Maybe it's a double... */
1577 generate_float_point_arith(jitter, rator, arith, cmp, reversed, 0, v, &refd, &refdt,
1578 for_branch, branch_short, unsafe_fl, 0, ubs.unbox, dest, extfl);
1579 CHECK_LIMIT();
1580 }
1581
1582 if (!unsafe_fx && !unsafe_fl) {
1583 if (!has_fixnum_fast) {
1584 __START_TINY_JUMPS_IF_COMPACT__(1);
1585 if (is_fx)
1586 mz_patch_ucbranch(ref2);
1587 else
1588 mz_patch_branch(ref2);
1589 __END_TINY_JUMPS_IF_COMPACT__(1);
1590 }
1591
1592 /* Slow path */
1593 refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, branch_short,
1594 orig_args, reversed, arith, 1, v, dest);
1595
1596 if (has_fixnum_fast) {
1597 __START_TINY_JUMPS_IF_COMPACT__(1);
1598 if (is_fx)
1599 mz_patch_ucbranch(ref2);
1600 else
1601 mz_patch_branch(ref2);
1602 __END_TINY_JUMPS_IF_COMPACT__(1);
1603 }
1604 } else {
1605 refslow = overflow_refslow;
1606 ref = NULL;
1607 ref4 = NULL;
1608 }
1609 }
1610
1611 CHECK_LIMIT();
1612 }
1613
1614 __START_SHORT_JUMPS__(branch_short);
1615
1616 if (!unsafe_fl) {
1617 if (arith) {
1618 if (((arith == ARITH_DIV) || (arith == ARITH_QUOT) || (arith == ARITH_REM) || (arith == ARITH_MOD)) && !rand2) {
1619 (void)jit_movi_p(JIT_R1, scheme_make_integer(v));
1620 rand2 = scheme_true;
1621 reversed = !reversed;
1622 }
1623
1624 if (rand2) {
1625 /* First arg is in JIT_R1, second is in JIT_R0 */
1626 if ((arith == ARITH_ADD) || (arith == ARITH_ADD_WRAP)) {
1627 jit_andi_ul(JIT_R2, JIT_R1, (~0x1));
1628 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_ADD_WRAP))
1629 jit_addr_l(dest, JIT_R2, JIT_R0);
1630 else {
1631 (void)jit_boaddr_l(refslow, JIT_R2, JIT_R0);
1632 jit_movr_p(dest, JIT_R2);
1633 }
1634 } else if ((arith == ARITH_SUB) || (arith == ARITH_SUB_WRAP)) {
1635 if (reversed) {
1636 jit_movr_p(JIT_R2, JIT_R0);
1637 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1638 jit_subr_l(JIT_R2, JIT_R2, JIT_R1);
1639 else
1640 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R1);
1641 } else {
1642 jit_movr_p(JIT_R2, JIT_R1);
1643 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1644 (void)jit_subr_l(JIT_R2, JIT_R2, JIT_R0);
1645 else
1646 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0);
1647 }
1648 jit_ori_ul(dest, JIT_R2, 0x1);
1649 } else if ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP)) {
1650 jit_andi_ul(JIT_R2, JIT_R1, (~0x1));
1651 jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1652 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_MUL_WRAP))
1653 jit_mulr_l(JIT_V1, JIT_V1, JIT_R2);
1654 else
1655 (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2);
1656 jit_ori_ul(dest, JIT_V1, 0x1);
1657 } else if ((arith == ARITH_DIV) || (arith == ARITH_QUOT) || (arith == ARITH_REM) || (arith == ARITH_MOD)) {
1658 if (reversed) {
1659 jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1660 jit_rshi_l(JIT_R2, JIT_R1, 0x1);
1661 } else {
1662 jit_rshi_l(JIT_R2, JIT_R0, 0x1);
1663 jit_rshi_l(JIT_V1, JIT_R1, 0x1);
1664 }
1665 if (!unsafe_fx || overflow_refslow)
1666 (void)jit_beqi_l(refslow, JIT_R2, 0);
1667
1668 if (arith == ARITH_MOD) {
1669 generate_modulo_setup(jitter, branch_short, JIT_V1, JIT_R2);
1670 CHECK_LIMIT();
1671 }
1672 if ((arith == ARITH_DIV) || (arith == ARITH_QUOT))
1673 jit_divr_l(JIT_R0, JIT_V1, JIT_R2);
1674 else
1675 jit_modr_l(JIT_R0, JIT_V1, JIT_R2);
1676
1677 if (arith == ARITH_DIV) {
1678 GC_CAN_IGNORE jit_insn *refx, *refz;
1679 __START_INNER_TINY__(branch_short);
1680 /* watch out for negation of most neg<ative fixnum,
1681 which is a positive number too big for a fixnum */
1682 refz = jit_beqi_p(jit_forward(), JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 2))));
1683 __END_INNER_TINY__(branch_short);
1684 if (reversed)
1685 jit_mulr_l(JIT_R2, JIT_R0, JIT_R2);
1686 else
1687 jit_mulr_l(JIT_V1, JIT_R0, JIT_V1);
1688 __START_INNER_TINY__(branch_short);
1689 refx = jit_beqr_l(jit_forward(), JIT_R2, JIT_V1);
1690 mz_patch_branch(refz);
1691 __END_INNER_TINY__(branch_short);
1692 /* restore R0 argument: */
1693 if (reversed) {
1694 jit_fixnum_l(JIT_R0, JIT_V1);
1695 } else {
1696 jit_fixnum_l(JIT_R0, JIT_R2);
1697 }
1698 (void)jit_jmpi(refslow);
1699 __START_INNER_TINY__(branch_short);
1700 mz_patch_branch(refx);
1701 __END_INNER_TINY__(branch_short);
1702 } else if (arith == ARITH_MOD) {
1703 GC_CAN_IGNORE jit_insn *refx, *refy;
1704 __START_INNER_TINY__(branch_short);
1705 refy = jit_beqi_l(jit_forward(), JIT_R0, 0);
1706 refx = jit_bmci_l(jit_forward(), JIT_R1, 0x1);
1707 jit_subr_l(JIT_R0, JIT_R2, JIT_R0);
1708 mz_patch_branch(refx);
1709 refx = jit_bmci_l(jit_forward(), JIT_R1, 0x2);
1710 jit_negr_l(JIT_R0, JIT_R0);
1711 mz_patch_branch(refx);
1712 mz_patch_branch(refy);
1713 __END_INNER_TINY__(branch_short);
1714 } else if (arith == ARITH_QUOT) {
1715 /* watch out for negation of most negative fixnum,
1716 which is a positive number too big for a fixnum */
1717 if (!unsafe_fx || overflow_refslow) {
1718 GC_CAN_IGNORE jit_insn *refx;
1719 __START_INNER_TINY__(branch_short);
1720 refx = jit_bnei_p(jit_forward(), JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 2))));
1721 __END_INNER_TINY__(branch_short);
1722 /* first argument must have been most negative fixnum,
1723 second argument must have been -1: */
1724 if (reversed)
1725 (void)jit_movi_p(JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 1)) | 0x1));
1726 else
1727 (void)jit_movi_p(JIT_R0, scheme_make_integer(-1));
1728 (void)jit_jmpi(refslow);
1729 __START_INNER_TINY__(branch_short);
1730 mz_patch_branch(refx);
1731 __END_INNER_TINY__(branch_short);
1732 }
1733 }
1734 jit_fixnum_l(dest, JIT_R0);
1735 } else if (arith == ARITH_AND) {
1736 /* and */
1737 jit_andr_ul(dest, JIT_R1, JIT_R0);
1738 } else if (arith == ARITH_IOR) {
1739 /* ior */
1740 jit_orr_ul(dest, JIT_R1, JIT_R0);
1741 } else if (arith == ARITH_XOR) {
1742 /* xor */
1743 jit_andi_ul(JIT_R0, JIT_R0, (~0x1));
1744 jit_xorr_ul(dest, JIT_R1, JIT_R0);
1745 } else if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH)) {
1746 /* arithmetic-shift
1747 This is a lot of code, but if you're using
1748 arithmetic-shift, then you probably want it. */
1749 int v1 = (reversed ? JIT_R0 : JIT_R1);
1750 int v2 = (reversed ? JIT_R1 : JIT_R0);
1751 GC_CAN_IGNORE jit_insn *refi, *refc;
1752
1753 if (arith == ARITH_SH)
1754 refi = jit_bgei_l(jit_forward(), v2, (intptr_t)scheme_make_integer(0));
1755 else {
1756 refi = NULL;
1757 if (!unsafe_fx || overflow_refslow)
1758 (void)jit_blti_l(refslow, v2, (intptr_t)scheme_make_integer(0));
1759 }
1760
1761 if ((arith == ARITH_SH) || (arith == ARITH_RSH)) {
1762 /* Right shift */
1763 if (!unsafe_fx || overflow_refslow) {
1764 /* check for a small enough shift */
1765 if (arith == ARITH_RSH) {
1766 (void)jit_bgti_l(refslow, v2, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1767 } else {
1768 (void)jit_blti_l(refslow, v2, (intptr_t)scheme_make_integer(-MAX_TRY_SHIFT));
1769 }
1770 }
1771 if (arith == ARITH_RSH)
1772 jit_rshi_l(JIT_V1, v2, 0x1);
1773 else {
1774 jit_notr_l(JIT_V1, v2);
1775 jit_rshi_l(JIT_V1, JIT_V1, 0x1);
1776 jit_addi_l(JIT_V1, JIT_V1, 0x1);
1777 }
1778 CHECK_LIMIT();
1779 #ifdef MZ_USE_JIT_I386
1780 /* Can't shift from _ECX */
1781 jit_movr_l(JIT_R2, v1);
1782 jit_rshr_l(JIT_R2, JIT_R2, JIT_V1);
1783 #else
1784 jit_rshr_l(JIT_R2, v1, JIT_V1);
1785 #endif
1786 jit_ori_l(dest, JIT_R2, 0x1);
1787 if (!unsafe_fx || overflow_refslow)
1788 refc = jit_jmpi(jit_forward());
1789 else
1790 refc = NULL;
1791 CHECK_LIMIT();
1792 } else
1793 refc = NULL;
1794
1795 /* Left shift */
1796 if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP)) {
1797 if (refi)
1798 mz_patch_branch(refi);
1799 if (!unsafe_fx || overflow_refslow)
1800 (void)jit_bgti_l(refslow, v2, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1801 jit_rshi_l(JIT_V1, v2, 0x1);
1802 jit_andi_l(v1, v1, (~0x1));
1803 #ifdef MZ_USE_JIT_I386
1804 /* Can't shift from _ECX */
1805 jit_movr_l(JIT_R2, v1);
1806 jit_lshr_l(JIT_R2, JIT_R2, JIT_V1);
1807 #else
1808 jit_lshr_l(JIT_R2, v1, JIT_V1);
1809 #endif
1810 CHECK_LIMIT();
1811 if ((!unsafe_fx || overflow_refslow) && (arith != ARITH_LSH_WRAP)) {
1812 /* If shifting back right produces a different result, that's overflow... */
1813 jit_rshr_l(JIT_V1, JIT_R2, JIT_V1);
1814 /* !! In case we go refslow, it needs to add back tag to v1 !! */
1815 (void)jit_bner_p(refslow, JIT_V1, v1);
1816 }
1817 /* No overflow (or we don't care) */
1818 jit_ori_l(dest, JIT_R2, 0x1);
1819 }
1820
1821 if (refc)
1822 mz_patch_ucbranch(refc);
1823 } else if (arith == ARITH_MIN) {
1824 /* min */
1825 GC_CAN_IGNORE jit_insn *refc;
1826 __START_INNER_TINY__(branch_short);
1827 refc = jit_bltr_l(jit_forward(), JIT_R0, JIT_R1);
1828 jit_movr_l(JIT_R0, JIT_R1);
1829 mz_patch_branch(refc);
1830 jit_movr_p(dest, JIT_R0);
1831 __END_INNER_TINY__(branch_short);
1832 } else if (arith == ARITH_MAX) {
1833 /* max */
1834 GC_CAN_IGNORE jit_insn *refc;
1835 __START_INNER_TINY__(branch_short);
1836 refc = jit_bgtr_l(jit_forward(), JIT_R0, JIT_R1);
1837 jit_movr_l(JIT_R0, JIT_R1);
1838 mz_patch_branch(refc);
1839 jit_movr_p(dest, JIT_R0);
1840 __END_INNER_TINY__(branch_short);
1841 }
1842 } else {
1843 /* Non-constant arg is in JIT_R0 */
1844 if ((arith == ARITH_ADD) || (arith == ARITH_ADD_WRAP)) {
1845 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_ADD_WRAP))
1846 jit_addi_l(dest, JIT_R0, (uintptr_t)v << 1);
1847 else {
1848 jit_movr_p(JIT_R2, JIT_R0);
1849 (void)jit_boaddi_l(refslow, JIT_R2, (uintptr_t)v << 1);
1850 jit_movr_p(dest, JIT_R2);
1851 }
1852 } else if ((arith == ARITH_SUB) || (arith == ARITH_SUB_WRAP)) {
1853 if (reversed) {
1854 (void)jit_movi_p(JIT_R2, scheme_make_integer(v));
1855 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1856 jit_subr_l(JIT_R2, JIT_R2, JIT_R0);
1857 else
1858 (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0);
1859 jit_addi_ul(dest, JIT_R2, 0x1);
1860 } else {
1861 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_SUB_WRAP))
1862 jit_subi_l(dest, JIT_R0, (uintptr_t)v << 1);
1863 else {
1864 jit_movr_p(JIT_R2, JIT_R0);
1865 (void)jit_bosubi_l(refslow, JIT_R2, (uintptr_t)v << 1);
1866 jit_movr_p(dest, JIT_R2);
1867 }
1868 }
1869 } else if ((arith == ARITH_MUL) || (arith == ARITH_MUL_WRAP)) {
1870 if (v == 1) {
1871 /* R0 already is the answer */
1872 jit_movr_p(dest, JIT_R0);
1873 } else if (v == 0) {
1874 (void)jit_movi_p(dest, scheme_make_integer(0));
1875 } else {
1876 (void)jit_movi_l(JIT_R2, ((intptr_t)scheme_make_integer(v) & (~0x1)));
1877 jit_rshi_l(JIT_V1, JIT_R0, 0x1);
1878 if ((unsafe_fx && !overflow_refslow) || (arith == ARITH_MUL_WRAP))
1879 jit_mulr_l(JIT_V1, JIT_V1, JIT_R2);
1880 else {
1881 (void)jit_movi_p(JIT_R1, scheme_make_integer(v)); /* for slow path */
1882 (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2);
1883 }
1884 jit_ori_ul(dest, JIT_V1, 0x1);
1885 }
1886 } else {
1887 if (arith == ARITH_AND) {
1888 /* and */
1889 intptr_t l = (intptr_t)scheme_make_integer(v);
1890 jit_andi_ul(dest, JIT_R0, l);
1891 } else if (arith == ARITH_IOR) {
1892 /* ior */
1893 intptr_t l = (intptr_t)scheme_make_integer(v);
1894 jit_ori_ul(dest, JIT_R0, l);
1895 } else if (arith == ARITH_XOR) {
1896 /* xor */
1897 jit_xori_ul(dest, JIT_R0, (uintptr_t)v << 1);
1898 } else if ((arith == ARITH_SH) || (arith == ARITH_LSH) || (arith == ARITH_LSH_WRAP) || (arith == ARITH_RSH)) {
1899 /* arithmetic-shift */
1900 /* We only get here when v is in range, such as between -MAX_TRY_SHIFT and
1901 MAX_TRY_SHIFT inclusive for ARITH_SH. */
1902 if ((v <= 0) || (arith == ARITH_RSH)) {
1903 int amt = v;
1904 if (arith != ARITH_RSH)
1905 amt = -amt;
1906 jit_rshi_l(JIT_R0, JIT_R0, amt);
1907 jit_ori_l(dest, JIT_R0, 0x1);
1908 } else {
1909 jit_andi_l(JIT_R0, JIT_R0, (~0x1));
1910 jit_lshi_l(JIT_R2, JIT_R0, v);
1911 if ((!unsafe_fx || overflow_refslow) && (arith != ARITH_LSH_WRAP)) {
1912 /* If shifting back right produces a different result, that's overflow... */
1913 jit_rshi_l(JIT_V1, JIT_R2, v);
1914 /* !! In case we go refslow, it nseed to add back tag to JIT_R0 !! */
1915 (void)jit_bner_p(refslow, JIT_V1, JIT_R0);
1916 }
1917 /* No overflow (or we don't care) */
1918 jit_ori_l(dest, JIT_R2, 0x1);
1919 }
1920 } else if (arith == ARITH_NOT) {
1921 jit_notr_ul(JIT_R0, JIT_R0);
1922 jit_ori_ul(dest, JIT_R0, 0x1);
1923 } else if (arith == ARITH_MIN) {
1924 /* min */
1925 GC_CAN_IGNORE jit_insn *refc;
1926 __START_INNER_TINY__(branch_short);
1927 refc = jit_blti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
1928 jit_movi_l(JIT_R0, (intptr_t)scheme_make_integer(v));
1929 mz_patch_branch(refc);
1930 jit_movr_p(dest, JIT_R0);
1931 __END_INNER_TINY__(branch_short);
1932 } else if (arith == ARITH_MAX) {
1933 /* max */
1934 GC_CAN_IGNORE jit_insn *refc;
1935 __START_INNER_TINY__(branch_short);
1936 refc = jit_bgti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
1937 jit_movi_l(JIT_R0, (intptr_t)scheme_make_integer(v));
1938 mz_patch_branch(refc);
1939 jit_movr_p(dest, JIT_R0);
1940 __END_INNER_TINY__(branch_short);
1941 } else if (arith == ARITH_ABS) {
1942 /* abs */
1943 GC_CAN_IGNORE jit_insn *refc;
1944 __START_INNER_TINY__(branch_short);
1945 refc = jit_bgei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(0));
1946 __END_INNER_TINY__(branch_short);
1947 /* watch out for most negative fixnum! */
1948 if (!unsafe_fx || overflow_refslow)
1949 (void)jit_beqi_p(refslow, JIT_R0, (void *)(((uintptr_t)1 << ((8 * JIT_WORD_SIZE) - 1)) | 0x1));
1950 (void)jit_movi_p(JIT_R1, scheme_make_integer(0));
1951 jit_subr_l(JIT_R0, JIT_R1, JIT_R0);
1952 jit_ori_l(JIT_R0, JIT_R0, 0x1);
1953 __START_INNER_TINY__(branch_short);
1954 mz_patch_branch(refc);
1955 jit_movr_p(dest, JIT_R0);
1956 __END_INNER_TINY__(branch_short);
1957 CHECK_LIMIT();
1958 } else if (arith == ARITH_EX_INEX) {
1959 /* exact->inexact */
1960 int fpr0 USED_ONLY_SOMETIMES;
1961 fpr0 = JIT_FPUSEL_FPR_0(extfl, jitter->unbox_depth);
1962 jit_rshi_l(JIT_R0, JIT_R0, 1);
1963 jit_FPSEL_extr_l_xd_fppush(extfl, fpr0, JIT_R0);
1964 CHECK_LIMIT();
1965 if (!ubs.unbox) {
1966 mz_rs_sync(); /* needed for unsafe op before allocation */
1967 __END_SHORT_JUMPS__(branch_short);
1968 scheme_generate_alloc_X_double(jitter, 0, dest, extfl);
1969 __START_SHORT_JUMPS__(branch_short);
1970 } else {
1971 jitter->unbox_depth++;
1972 }
1973 CHECK_LIMIT();
1974 } else if (arith == ARITH_INEX_EX) {
1975 /* inexact->exact */
1976 /* no work to do, since fixnum is already exact */
1977 jit_movr_p(dest, JIT_R0);
1978 }
1979 }
1980 }
1981 if (refdt)
1982 mz_patch_ucbranch(refdt);
1983 if (!unsafe_fx && !unsafe_fl)
1984 mz_patch_ucbranch(ref);
1985 ref3 = NULL;
1986 } else {
1987 /* If second is constant, first arg is in JIT_R0. */
1988 /* Otherwise, first arg is in JIT_R1, second is in JIT_R0 */
1989 /* Jump to ref3 to produce false */
1990 int rs_valid, rs_can_keep = 0;
1991
1992 switch (cmp) {
1993 case -CMP_BIT:
1994 if (rand2) {
1995 if (!unsafe_fx || overflow_refslow) {
1996 (void)jit_blti_l(refslow, JIT_R1, 0);
1997 (void)jit_bgti_l(refslow, JIT_R1, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
1998 }
1999 }
2000 break;
2001 case CMP_BIT:
2002 if (rand2) {
2003 if (!unsafe_fx || overflow_refslow) {
2004 (void)jit_blti_l(refslow, JIT_R0, 0);
2005 (void)jit_bgti_l(refslow, JIT_R0, (intptr_t)scheme_make_integer(MAX_TRY_SHIFT));
2006 }
2007 }
2008 break;
2009 }
2010
2011 /* Don't use refslow from here on */
2012
2013 if (for_branch) {
2014 scheme_prepare_branch_jump(jitter, for_branch);
2015 CHECK_LIMIT();
2016 }
2017
2018 rs_valid = mz_CURRENT_REG_STATUS_VALID();
2019
2020 switch (cmp) {
2021 case CMP_ODDP:
2022 ref3 = jit_bmci_l(jit_forward(), JIT_R0, 0x2);
2023 rs_can_keep = 1;
2024 break;
2025 case -CMP_BIT:
2026 if (rand2) {
2027 jit_rshi_l(JIT_R1, JIT_R1, 1);
2028 jit_addi_l(JIT_V1, JIT_R1, 1);
2029 jit_movi_l(JIT_R2, 1);
2030 jit_lshr_l(JIT_R2, JIT_R2, JIT_V1);
2031 ref3 = jit_bmcr_l(jit_forward(), JIT_R0, JIT_R2);
2032 } else {
2033 /* shouldn't get here */
2034 scheme_signal_error("internal error: bitwise-bit-test? constant in wrong position");
2035 ref3 = NULL;
2036 }
2037 break;
2038 case CMP_LT:
2039 if (rand2) {
2040 ref3 = jit_bger_l(jit_forward(), JIT_R1, JIT_R0);
2041 } else {
2042 ref3 = jit_bgei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2043 }
2044 rs_can_keep = 1;
2045 break;
2046 case CMP_LEQ:
2047 if (rand2) {
2048 ref3 = jit_bgtr_l(jit_forward(), JIT_R1, JIT_R0);
2049 } else {
2050 ref3 = jit_bgti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2051 }
2052 rs_can_keep = 1;
2053 break;
2054 case CMP_EQUAL:
2055 if (rand2) {
2056 ref3 = jit_bner_l(jit_forward(), JIT_R1, JIT_R0);
2057 } else {
2058 ref3 = jit_bnei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2059 }
2060 rs_can_keep = 1;
2061 break;
2062 case CMP_GEQ:
2063 if (rand2) {
2064 ref3 = jit_bltr_l(jit_forward(), JIT_R1, JIT_R0);
2065 } else {
2066 ref3 = jit_blti_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2067 }
2068 rs_can_keep = 1;
2069 break;
2070 case CMP_GT:
2071 if (rand2) {
2072 ref3 = jit_bler_l(jit_forward(), JIT_R1, JIT_R0);
2073 } else {
2074 ref3 = jit_blei_l(jit_forward(), JIT_R0, (intptr_t)scheme_make_integer(v));
2075 }
2076 rs_can_keep = 1;
2077 break;
2078 default:
2079 case CMP_BIT:
2080 if (rand2) {
2081 jit_rshi_l(JIT_R0, JIT_R0, 1);
2082 jit_addi_l(JIT_R0, JIT_R0, 1);
2083 jit_movi_l(JIT_V1, 1);
2084 jit_lshr_l(JIT_R0, JIT_V1, JIT_R0);
2085 ref3 = jit_bmcr_l(jit_forward(), JIT_R1, JIT_R0);
2086 } else {
2087 ref3 = jit_bmci_l(jit_forward(), JIT_R0, (uintptr_t)1 << (v+1));
2088 rs_can_keep = 1;
2089 }
2090 break;
2091 case CMP_EVENP:
2092 ref3 = jit_bmsi_l(jit_forward(), JIT_R0, 0x2);
2093 rs_can_keep = 1;
2094 break;
2095 }
2096
2097 mz_SET_REG_STATUS_VALID(rs_valid && rs_can_keep);
2098 }
2099 } else {
2100 ref3 = NULL;
2101 }
2102
2103 scheme_mz_unbox_restore(jitter, &ubs);
2104 }
2105
2106 if (!arith) {
2107 if (for_branch) {
2108 if (refdt) {
2109 scheme_add_or_patch_branch_true_uc(jitter, for_branch, refdt);
2110 CHECK_LIMIT();
2111 }
2112 if (ref4) {
2113 scheme_add_or_patch_branch_true_movi(jitter, for_branch, ref4);
2114 CHECK_LIMIT();
2115 }
2116 scheme_add_branch_false(for_branch, ref3);
2117 scheme_add_branch_false(for_branch, refd);
2118 scheme_add_branch_false_movi(for_branch, ref);
2119 scheme_branch_for_true(jitter, for_branch);
2120 CHECK_LIMIT();
2121 } else {
2122 if (refdt)
2123 mz_patch_ucbranch(refdt);
2124
2125 (void)jit_movi_p(dest, scheme_true);
2126 __START_INNER_TINY__(branch_short);
2127 ref2 = jit_jmpi(jit_forward());
2128 __END_INNER_TINY__(branch_short);
2129 if (ref3)
2130 mz_patch_branch(ref3);
2131 if (refd)
2132 mz_patch_branch(refd);
2133 (void)jit_movi_p(dest, scheme_false);
2134 __START_INNER_TINY__(branch_short);
2135 mz_patch_ucbranch(ref2);
2136 __END_INNER_TINY__(branch_short);
2137 if (!unsafe_fx && !unsafe_fl)
2138 mz_patch_ucbranch(ref);
2139 }
2140 }
2141
2142 __END_SHORT_JUMPS__(branch_short);
2143
2144 return 1;
2145 }
2146
scheme_generate_arith(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest)2147 int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
2148 int orig_args, int arith, int cmp, int v,
2149 Branch_Info *for_branch, int branch_short,
2150 int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow,
2151 int dest)
2152 {
2153 return scheme_generate_arith_for(jitter, rator, rand, rand2,
2154 orig_args, arith, cmp, v,
2155 for_branch, branch_short,
2156 unsafe_fx, unsafe_fl, overflow_refslow,
2157 dest, 0);
2158 }
2159
scheme_generate_extflonum_arith(mz_jit_state * jitter,Scheme_Object * rator,Scheme_Object * rand,Scheme_Object * rand2,int orig_args,int arith,int cmp,int v,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_extfl,GC_CAN_IGNORE jit_insn * overflow_refslow,int dest)2160 int scheme_generate_extflonum_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
2161 int orig_args, int arith, int cmp, int v,
2162 Branch_Info *for_branch, int branch_short,
2163 int unsafe_fx, int unsafe_extfl, GC_CAN_IGNORE jit_insn *overflow_refslow,
2164 int dest)
2165 {
2166 return scheme_generate_arith_for(jitter, rator, rand, rand2,
2167 orig_args, arith, cmp, v,
2168 for_branch, branch_short,
2169 unsafe_fx, unsafe_extfl, overflow_refslow,
2170 dest, 1);
2171 }
2172
2173
2174 #define MAX_NON_SIMPLE_ARGS 6
2175
extract_nary_arg(int reg,int n,mz_jit_state * jitter,Scheme_App_Rec * app,Scheme_Object ** alt_args,int old_short_jumps)2176 static int extract_nary_arg(int reg, int n, mz_jit_state *jitter, Scheme_App_Rec *app,
2177 Scheme_Object **alt_args, int old_short_jumps)
2178 {
2179 if (!alt_args) {
2180 jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(n));
2181 if (jitter->unbox)
2182 scheme_generate_unboxing(jitter, JIT_R0);
2183 } else if (scheme_is_constant_and_avoids_r1(app->args[n+1])) {
2184 __END_SHORT_JUMPS__(old_short_jumps);
2185 scheme_generate(app->args[n+1], jitter, 0, 0, 0, reg, NULL, NULL);
2186 CHECK_LIMIT();
2187 __START_SHORT_JUMPS__(old_short_jumps);
2188 } else {
2189 int i, j = 0;
2190 for (i = 0; i < n; i++) {
2191 if (!scheme_is_constant_and_avoids_r1(app->args[i+1]))
2192 j++;
2193 }
2194 jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(j));
2195 if (jitter->unbox)
2196 scheme_generate_unboxing(jitter, JIT_R0);
2197 }
2198 CHECK_LIMIT();
2199 return 1;
2200 }
2201
init_nary_branches(Branch_Info * for_nary_branch,Branch_Info_Addr * addrs)2202 static void init_nary_branches(Branch_Info *for_nary_branch, Branch_Info_Addr *addrs)
2203 {
2204 memset(for_nary_branch, 0, sizeof(Branch_Info));
2205 for_nary_branch->addrs_size = 3;
2206 for_nary_branch->addrs = addrs;
2207 }
2208
patch_nary_branches(mz_jit_state * jitter,Branch_Info * for_nary_branch,GC_CAN_IGNORE jit_insn * reffalse)2209 static void patch_nary_branches(mz_jit_state *jitter, Branch_Info *for_nary_branch, GC_CAN_IGNORE jit_insn *reffalse)
2210 {
2211 int i;
2212
2213 for (i = for_nary_branch->addrs_count; i--; ) {
2214 if (for_nary_branch->addrs[i].mode == BRANCH_ADDR_FALSE) {
2215 if (for_nary_branch->addrs[i].kind == BRANCH_ADDR_BRANCH)
2216 mz_patch_branch_at(for_nary_branch->addrs[i].addr, reffalse);
2217 else if (for_nary_branch->addrs[i].kind == BRANCH_ADDR_MOVI)
2218 jit_patch_movi(for_nary_branch->addrs[i].addr, reffalse);
2219 else
2220 break;
2221 } else
2222 break;
2223 }
2224
2225 if (i != -1)
2226 scheme_signal_error("internal error: unexpected branch addresses");
2227 }
2228
scheme_generate_nary_arith(mz_jit_state * jitter,Scheme_App_Rec * app,int arith,int cmp,Branch_Info * for_branch,int branch_short,int unsafe_fx,int unsafe_fl,int dest)2229 int scheme_generate_nary_arith(mz_jit_state *jitter, Scheme_App_Rec *app,
2230 int arith, int cmp, Branch_Info *for_branch, int branch_short,
2231 int unsafe_fx, int unsafe_fl,
2232 int dest)
2233 {
2234 int c, i, non_simple_c = 0, stack_c, use_fx = !unsafe_fl, trigger_arg = 0, use_short;
2235 Scheme_Object *non_simples[MAX_NON_SIMPLE_ARGS], **alt_args, *v;
2236 Branch_Info for_nary_branch;
2237 Branch_Info_Addr nary_addrs[3];
2238 GC_CAN_IGNORE jit_insn *refslow, *reffx, *refdone;
2239 GC_CAN_IGNORE jit_insn *reffalse = NULL, *refdone3 = NULL;
2240 #ifdef INLINE_FP_COMP
2241 int args_unboxed;
2242 GC_CAN_IGNORE jit_insn *reffl, *refdone2;
2243 int use_fl = !unsafe_fx;
2244 # define mzSET_USE_FL(x) x
2245 #else
2246 # define mzSET_USE_FL(x) /* empty */
2247 #endif
2248
2249 if ((arith == ARITH_AND)
2250 || (arith == ARITH_IOR)
2251 || (arith == ARITH_XOR)) {
2252 /* bitwise operators are fixnum, only */
2253 mzSET_USE_FL(use_fl = 0);
2254 }
2255
2256 #ifdef INLINE_FP_COMP
2257 # ifndef INLINE_FP_OPS
2258 if ((arith == ARITH_ADD)
2259 || (arith == ARITH_SUB)
2260 || (arith == ARITH_MUL)
2261 || (arith == ARITH_DIV)) {
2262 /* assert: unsafe_fl < 1 */
2263 use_fl = 0;
2264 }
2265 # endif
2266 #endif
2267
2268 c = app->num_args;
2269 if (!c) {
2270 /* Constant folding would normally prevent us from getting here, but just in case */
2271 if ((arith == ARITH_ADD) || (arith == ARITH_IOR) || (arith == ARITH_XOR)) {
2272 if (!unsafe_fl)
2273 (void)jit_movi_p(dest, scheme_make_integer(0));
2274 else
2275 (void)jit_movi_p(dest, scheme_zerod);
2276 return 1;
2277 } else if (arith == ARITH_AND) {
2278 (void)jit_movi_p(dest, scheme_make_integer(-1));
2279 return 1;
2280 } else if (arith == ARITH_MUL) {
2281 if (!unsafe_fl)
2282 (void)jit_movi_p(dest, scheme_make_integer(1));
2283 else
2284 scheme_mz_load_retained(jitter, dest, scheme_make_double(1.0));
2285 return 1;
2286 }
2287 }
2288
2289 for (i = 0; i < c; i++) {
2290 v = app->args[i+1];
2291 if (!scheme_is_constant_and_avoids_r1(v)) {
2292 if (non_simple_c < (MAX_NON_SIMPLE_ARGS-1))
2293 non_simples[1+non_simple_c] = v;
2294 non_simple_c++;
2295 }
2296 if (SCHEME_INTP(v)) {
2297 mzSET_USE_FL(use_fl = 0);
2298 if (trigger_arg == i)
2299 trigger_arg++;
2300 } else if (SCHEME_FLOATP(v)) {
2301 use_fx = 0;
2302 if (trigger_arg == i)
2303 trigger_arg++;
2304 } else if (SCHEME_TYPE(v) >= _scheme_ir_values_types_) {
2305 use_fx = 0;
2306 mzSET_USE_FL(use_fl = 0);
2307 }
2308 }
2309
2310 if ((non_simple_c <= (MAX_NON_SIMPLE_ARGS-1)) && (non_simple_c < c)) {
2311 stack_c = non_simple_c;
2312 alt_args = non_simples;
2313 non_simples[0] = app->args[0];
2314 mz_runstack_skipped(jitter, c - stack_c);
2315 } else {
2316 stack_c = c;
2317 alt_args = NULL;
2318 }
2319
2320 if (stack_c)
2321 scheme_generate_app(app, alt_args, stack_c, stack_c, jitter, 0, 0, 0, 2);
2322 CHECK_LIMIT();
2323 mz_rs_sync();
2324
2325 use_short = c < 100;
2326 __START_SHORT_JUMPS__(use_short);
2327
2328 if (trigger_arg >= c) {
2329 /* we don't expect this to happen, since constant-folding normally
2330 would have collapsed it --- but a division by zero, for example,
2331 might block constant folding */
2332 trigger_arg = 0;
2333 }
2334
2335 extract_nary_arg(JIT_R0, trigger_arg, jitter, app, alt_args, use_short);
2336 CHECK_LIMIT();
2337
2338 if ((unsafe_fl < 1) && (unsafe_fx < 1)) {
2339 /* trigger argument a fixnum? */
2340 reffx = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1);
2341 } else
2342 reffx = NULL;
2343
2344 #ifdef INLINE_FP_COMP
2345 if (use_fl && (unsafe_fl < 1)) {
2346 /* First argument a flonum? */
2347 jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type);
2348 reffl = jit_beqi_i(jit_forward(), JIT_R0, scheme_double_type);
2349 CHECK_LIMIT();
2350 } else {
2351 reffl = NULL;
2352 }
2353 #endif
2354
2355 if (!use_fx && reffx) {
2356 mz_patch_branch(reffx);
2357 }
2358
2359 if ((unsafe_fl < 1) && (unsafe_fx < 1)) {
2360 refslow = jit_get_ip();
2361 /* slow path */
2362 if (alt_args) {
2363 /* get all args on runstack */
2364 int delta = stack_c - c;
2365 for (i = 0; i < c; i++) {
2366 if (delta) {
2367 extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2368 CHECK_LIMIT();
2369 jit_stxi_p(WORDS_TO_BYTES(i+delta), JIT_RUNSTACK, JIT_R0);
2370 } else
2371 break;
2372 }
2373 jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c));
2374 }
2375 (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)app->args[0])->prim_val);
2376 (void)jit_movi_i(JIT_R1, c);
2377 (void)jit_calli(sjc.call_original_nary_arith_code);
2378 if (alt_args) {
2379 jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c));
2380 }
2381 refdone = jit_jmpi(jit_forward());
2382 } else {
2383 refdone = NULL;
2384 refslow = NULL;
2385 }
2386
2387 if (!arith) {
2388 GC_CAN_IGNORE jit_insn *refskip;
2389 if ((unsafe_fx > 0) || (unsafe_fl > 0)) {
2390 /* No dispatch so far, so jump to fast path to skip #f result */
2391 __START_INNER_TINY__(use_short);
2392 refskip = jit_jmpi(jit_forward());
2393 __END_INNER_TINY__(use_short);
2394 } else
2395 refskip = NULL;
2396
2397 reffalse = jit_get_ip();
2398 (void)jit_movi_p(JIT_R0, scheme_false);
2399 refdone3 = jit_jmpi(jit_forward());
2400
2401 if (refskip) {
2402 __START_INNER_TINY__(use_short);
2403 mz_patch_ucbranch(refskip);
2404 __END_INNER_TINY__(use_short);
2405 }
2406 } else {
2407 reffalse = NULL;
2408 }
2409
2410 #ifdef INLINE_FP_COMP
2411 if (use_fl) {
2412 /* Flonum branch: */
2413 if (unsafe_fl < 1) {
2414 mz_patch_branch(reffl);
2415 for (i = 0; i < c; i++) {
2416 if (i != trigger_arg) {
2417 v = app->args[i+1];
2418 if (!SCHEME_FLOATP(v)) {
2419 extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2420 (void)jit_bmsi_ul(refslow, JIT_R0, 0x1);
2421 jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type);
2422 (void)jit_bnei_i(refslow, JIT_R0, scheme_double_type);
2423 CHECK_LIMIT();
2424 }
2425 }
2426 }
2427 }
2428 /* All flonums, so inline fast flonum combination */
2429 args_unboxed = ((arith != ARITH_MIN) && (arith != ARITH_MAX)); /* no unboxing for min & max */
2430 if (args_unboxed)
2431 jitter->unbox++;
2432 extract_nary_arg(JIT_R0, 0, jitter, app, alt_args, use_short);
2433 CHECK_LIMIT();
2434 for (i = 1; i < c; i++) {
2435 if (!arith && (i > 1))
2436 extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args, use_short);
2437 extract_nary_arg((args_unboxed ? JIT_R0 : JIT_R1), i, jitter, app, alt_args, use_short);
2438 if ((i == c - 1) && args_unboxed) --jitter->unbox; /* box last result */
2439 if (!arith) init_nary_branches(&for_nary_branch, nary_addrs);
2440 __END_SHORT_JUMPS__(use_short);
2441 scheme_generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0,
2442 !arith ? &for_nary_branch : NULL, use_short, 0, 1, NULL,
2443 JIT_R0);
2444 __START_SHORT_JUMPS__(use_short);
2445 if (!arith) patch_nary_branches(jitter, &for_nary_branch, reffalse);
2446 CHECK_LIMIT();
2447 }
2448 if (use_fx) {
2449 refdone2 = jit_jmpi(jit_forward());
2450 } else {
2451 refdone2 = NULL;
2452 }
2453 } else {
2454 refdone2 = NULL;
2455 }
2456 #endif
2457
2458 if (use_fx) {
2459 /* Fixnum branch */
2460 if (unsafe_fx < 1) {
2461 mz_patch_branch(reffx);
2462 for (i = 0; i < c; i++) {
2463 if (i != trigger_arg) {
2464 v = app->args[i+1];
2465 if (!SCHEME_INTP(v)) {
2466 extract_nary_arg(JIT_R0, i, jitter, app, alt_args, use_short);
2467 CHECK_LIMIT();
2468 (void)jit_bmci_ul(refslow, JIT_R0, 0x1);
2469 CHECK_LIMIT();
2470 }
2471 }
2472 }
2473 }
2474 /* All fixnums, so inline fast fixnum combination;
2475 on overflow, bail out to refslow. */
2476 extract_nary_arg(JIT_R0, 0, jitter, app, alt_args, use_short);
2477 for (i = 1; i < c; i++) {
2478 if (!arith && (i > 1))
2479 extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args, use_short);
2480 extract_nary_arg(JIT_R1, i, jitter, app, alt_args, use_short);
2481 CHECK_LIMIT();
2482 if (!arith) init_nary_branches(&for_nary_branch, nary_addrs);
2483 __END_SHORT_JUMPS__(use_short);
2484 scheme_generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0,
2485 !arith ? &for_nary_branch : NULL, use_short, 1, 0, refslow,
2486 JIT_R0);
2487 __START_SHORT_JUMPS__(use_short);
2488 if (!arith) patch_nary_branches(jitter, &for_nary_branch, reffalse);
2489 CHECK_LIMIT();
2490 }
2491 }
2492
2493 #ifdef INLINE_FP_COMP
2494 if (use_fl && use_fx) {
2495 mz_patch_ucbranch(refdone2);
2496 }
2497 #endif
2498 if (!arith) {
2499 (void)jit_movi_p(JIT_R0, scheme_true);
2500 }
2501 if (refdone)
2502 mz_patch_ucbranch(refdone);
2503 if (refdone3)
2504 mz_patch_ucbranch(refdone3);
2505
2506 __END_SHORT_JUMPS__(use_short);
2507
2508 if (stack_c) {
2509 mz_rs_inc(stack_c); /* no sync */
2510 mz_runstack_popped(jitter, stack_c);
2511 }
2512 if (c > stack_c)
2513 mz_runstack_unskipped(jitter, c - stack_c);
2514
2515 if (!arith && for_branch) {
2516 GC_CAN_IGNORE jit_insn *refx;
2517 scheme_prepare_branch_jump(jitter, for_branch);
2518 CHECK_LIMIT();
2519 __START_SHORT_JUMPS__(branch_short);
2520 refx = jit_beqi_p(jit_forward(), JIT_R0, scheme_false);
2521 scheme_add_branch_false(for_branch, refx);
2522 scheme_branch_for_true(jitter, for_branch);
2523 __END_SHORT_JUMPS__(branch_short);
2524 CHECK_LIMIT();
2525 }
2526
2527 if (!for_branch && !jitter->unbox)
2528 jit_movr_p(dest, JIT_R0);
2529
2530 return 1;
2531 }
2532
2533 #endif
2534