1 /*
2   JIT limtations:
3 
4   1) See "About short-jump mode" below.
5 
6   2) Use jit_patchable_movi_p() when a constant needs to be
7      visible to the GC.
8 
9   3) Immediate operands must be 32-bit values on x86_64, except with
10      jit_movi, jit_sti, jit_ldi, jit_bXi, jit_calli (in default-long
11      mode), and jit_finishi.
12 
13   4) Function calls are limited to 3 arguments (i.e., jit_prepare()
14      must never be called with a number greater than 3). This limit
15      is related to the way the x86_64 port shuffles arguments into
16      temporary registers.
17 
18   5) On non-Win64 x86_64, arguments are delivered in JIT_V2, JIT_V3,
19      JIT_R2, and JIT_R1 in that order. So don't set JIT_R2 before
20      getting the third argument, etc.
21 
22      On non-Win64 x86_64, arguments are delivered in JIT_R1, JIT_R2,
23      and other registers. So don't set JIT_R2 before getting the
24      second argument, etc.
25 
26 */
27 
28 #ifdef __APPLE__
29 # define _CALL_DARWIN
30 #endif
31 
32 #ifdef __GNUC__
33 #pragma GCC diagnostic ignored "-Waddress"
34 #if __GNUC__ >= 6
35 # pragma GCC diagnostic ignored "-Wtautological-compare"
36 #endif
37 #ifdef __clang__
38 #  ifdef MZ_PRECISE_GC
39 #   pragma clang diagnostic ignored "-Wtautological-compare"
40 #   pragma clang diagnostic ignored "-Wself-assign"
41 #   pragma clang diagnostic ignored "-Wconstant-logical-operand"
42 #  endif
43 # endif
44 #endif
45 
46 /* Separate JIT_PRECISE_GC lets us test some 3m support in non-3m mode: */
47 #ifdef MZ_PRECISE_GC
48 # define JIT_PRECISE_GC
49 #endif
50 
51 /* IMPORTANT! 3m arithmetic checking disabled for the whole file! */
52 #ifdef MZ_PRECISE_GC
53 END_XFORM_ARITH;
54 #endif
55 
56 #define JIT_USE_FP_OPS
57 
58 #ifdef MZ_USE_JIT_X86_64
59 # define MZ_USE_JIT_I386
60 # define JIT_X86_64
61 # ifndef MZ_NO_JIT_SSE
62 #  define JIT_X86_SSE
63 # endif
64 #endif
65 
66 #ifdef MZ_USE_JIT_I386
67 # ifndef JIT_X86_64
68 #  define JIT_X86_PLAIN
69 # endif
70 #endif
71 
72 #ifdef MZ_USE_JIT_SSE
73 # ifndef JIT_X86_SSE
74 #  define JIT_X86_SSE
75 # endif
76 #endif
77 
78 #ifdef MZ_USE_JIT_PPC
79 # define DEFINE_LIGHTNING_FUNCS_STATIC /* empty */
80 # define jit_notify_freed_code scheme_jit_notify_freed_code
81 # define jit_flush_code scheme_jit_flush_code
82 # define _jit_prolog scheme_jit_prolog
83 # define _jit_epilog scheme_jit_epilog
84 #endif
85 
86 /* The ABI for _CALL_DARWIN or JIT_X86_64 requires alignment. Even
87    when it's not required, it's better for performance when flonums
88    are stored on the stack. */
89 #define JIT_X86_ALIGN_STACK 1
90 
91 #ifndef DEFINE_LIGHTNING_FUNCS
92 # define SUPPRESS_LIGHTNING_FUNCS
93 #endif
94 
95 #include "lightning/lightning.h"
96 #define _jit (jitter->js)
97 #define _jitp (&_jit)
98 
99 #ifdef MZ_USE_JIT_X86_64
100 # define JIT_LOG_WORD_SIZE 3
101 #else
102 # define JIT_LOG_WORD_SIZE 2
103 #endif
104 #define JIT_WORD_SIZE (1 << JIT_LOG_WORD_SIZE)
105 #define WORDS_TO_BYTES(x) ((unsigned)(x) << JIT_LOG_WORD_SIZE)
106 #define MAX_TRY_SHIFT 30
107 
108 #ifdef USE_THREAD_LOCAL
109 # define NATIVE_ARG_COUNT 4
110 #else
111 # define NATIVE_ARG_COUNT 3
112 #endif
113 
114 #define JIT_LOG_DOUBLE_SIZE 3
115 #define JIT_DOUBLE_SIZE (1 << JIT_LOG_DOUBLE_SIZE)
116 
117 #ifdef MZ_LONG_DOUBLE
118 # ifdef MZ_USE_JIT_X86_64
119 #  define JIT_LOG_LONG_DOUBLE_SIZE 4
120 #  define JIT_LONG_DOUBLE_SIZE (1 << JIT_LOG_LONG_DOUBLE_SIZE)
121 # else
122 #  define JIT_LOG_LONG_DOUBLE_SIZE not_implemented
123 #  define JIT_LONG_DOUBLE_SIZE 12
124 #endif
125 
126 #endif
127 
128 /* a mzchar is an int: */
129 #define LOG_MZCHAR_SIZE 2
130 
131 #if defined(MZ_USE_JIT_PPC) || defined(MZ_USE_JIT_X86_64)
132 /* Both PPC and x86_64 need long jumps, sometimes */
133 # define NEED_LONG_JUMPS
134 #endif
135 #if defined(MZ_USE_JIT_PPC)
136 /* For PPC, long jumps may be needed even within a JIT-generated block */
137 # define NEED_LONG_BRANCHES
138 #endif
139 #if defined(MZ_USE_JIT_X86_64)
140 /* For x86_64, long jumps are needed only if we start allocating far away */
141 # define SET_DEFAULT_LONG_JUMPS
142 #endif
143 /* Tiny jumps seem worthwhile for x86, but they don't seem to help for x86_64: */
144 #if defined(MZ_USE_JIT_I386) && !defined(MZ_USE_JIT_X86_64)
145 # define USE_TINY_JUMPS
146 #endif
147 #if defined(MZ_USE_JIT_ARM)
148 /* For ARM, long jumps are needed for jumps longer than 2^23: */
149 # define NEED_LONG_JUMPS
150 # define LONG_JUMPS_DEFAULT(x) 1
151 #endif
152 
153 #ifdef MZ_USE_FUTURES
154 # define MZ_USE_LWC
155 #endif
156 
157 #ifdef MZ_USE_SINGLE_FLOATS
158 # define SCHEME_FLOAT_TYPE scheme_float_type
159 #else
160 # define SCHEME_FLOAT_TYPE scheme_double_type
161 #endif
162 
163 /* These flags are set post-JIT: */
164 #define NATIVE_PRESERVES_MARKS 0x1
165 #define NATIVE_IS_SINGLE_RESULT 0x2
166 /* Pre-JIT flags are in "schpriv.h" */
167 
168 #if defined(MZ_PRECISE_GC) && !defined(USE_COMPACT_3M_GC)
169 # define CAN_INLINE_ALLOC
170 #endif
171 
172 #ifdef JIT_USE_FP_OPS
173 # define INLINE_FP_COMP
174 # ifdef CAN_INLINE_ALLOC
175 #  define INLINE_FP_OPS
176 # endif
177 #endif
178 
179 #if defined(CAN_INLINE_ALLOC)
180 # if defined(MZ_USE_JIT_I386)
181 #  define USE_FLONUM_UNBOXING
182 # endif
183 # if defined(MZ_USE_JIT_ARM) && defined(__ARM_PCS_VFP)
184 #  define USE_FLONUM_UNBOXING
185 # endif
186 #endif
187 
188 #if defined(__GNUC__)
189 # define USED_ONLY_SOMETIMES __attribute__((unused))
190 #else
191 # define USED_ONLY_SOMETIMES /* empty */
192 #endif
193 
194 #if !defined(MZ_USE_FUTURES)
195 # define USED_ONLY_FOR_FUTURES USED_ONLY_SOMETIMES
196 #else
197 # define USED_ONLY_FOR_FUTURES /* empty */
198 #endif
199 
200 #if !defined(USE_FLONUM_UNBOXING)
201 # define USED_ONLY_IF_FLONUM_UNBOXING USED_ONLY_SOMETIMES
202 #else
203 # define USED_ONLY_IF_FLONUM_UNBOXING /* empty */
204 #endif
205 
206 #if !defined(MZ_LONG_DOUBLE)
207 # define USED_ONLY_IF_LONG_DOUBLE USED_ONLY_SOMETIMES
208 #else
209 # define USED_ONLY_IF_LONG_DOUBLE /* empty */
210 #endif
211 
212 #include "jitfpu.h"
213 
214 #define JIT_ASSERT(v) MZ_ASSERT(v)
215 
216 /* Tracking statistics: */
217 #if 0
218 # define NUM_CATEGORIES 23
219 extern int jit_sizes[NUM_CATEGORIES];
220 extern int jit_counts[NUM_CATEGORIES];
221 extern int jit_code_size;
222 # define START_JIT_DATA() void *__pos = jit_get_ip(); uintptr_t __total = 0
223 # define END_JIT_DATA(where) if (jitter->retain_start) { \
224                               jit_sizes[where] += __total + ((uintptr_t)jit_get_ip() - (uintptr_t)__pos); \
225                               jit_counts[where]++; }
226 # define PAUSE_JIT_DATA() __total += ((uintptr_t)jit_get_ip() - (uintptr_t)__pos)
227 # define RESUME_JIT_DATA() __pos = jit_get_ip()
228 # define RECORD_CODE_SIZE(s) jit_code_size += s
229 #else
230 # define START_JIT_DATA() /* empty */
231 # define END_JIT_DATA(where) /* empty */
232 # define PAUSE_JIT_DATA() /* empty */
233 # define RESUME_JIT_DATA() /* empty */
234 # define RECORD_CODE_SIZE(s) /* empty */
235 #endif
236 
237 extern int scheme_direct_call_count, scheme_indirect_call_count;
238 extern int scheme_jit_malloced;
239 #ifdef JIT_USE_FP_OPS
240 THREAD_LOCAL_DECL(extern double scheme_jit_save_fp);
241 THREAD_LOCAL_DECL(extern double scheme_jit_save_fp2);
242 # ifdef MZ_LONG_DOUBLE
243 THREAD_LOCAL_DECL(extern long_double scheme_jit_save_extfp);
244 THREAD_LOCAL_DECL(extern long_double scheme_jit_save_extfp2);
245 # endif
246 #endif
247 
248 typedef int (*Native_Check_Arity_Proc)(Scheme_Object *o, int argc, int dummy EXTRA_NATIVE_ARGUMENT_TYPE);
249 typedef Scheme_Object *(*Native_Get_Arity_Proc)(Scheme_Object *o, int dumm1, int dummy2 EXTRA_NATIVE_ARGUMENT_TYPE);
250 typedef Scheme_Object *(*LWC_Native_Starter)(void *data,
251                                              int argc,
252                                              Scheme_Object **argv,
253                                              void *thdloc,
254                                              Scheme_Native_Proc *chain_to,
255                                              void **save_pos);
256 
257 typedef struct Apply_LWC_Args {
258   void *dest_stack_pos; /* must be first */
259   Scheme_Current_LWC *lwc;
260   void *copy_to_install;
261   intptr_t full_size, copy_size;
262 #ifdef JIT_X86_64
263   intptr_t saved_r14, saved_r15;
264 # ifdef _WIN64
265   intptr_t saved_r12, saved_r13;
266 # endif
267 #endif
268   Scheme_Object *result;
269   void *new_runstack;
270   void *new_runstack_base;
271   void *new_threadlocal;
272 } Apply_LWC_Args;
273 
274 typedef Scheme_Object *(*Continuation_Apply_Indirect)(Apply_LWC_Args *, intptr_t);
275 typedef Scheme_Object *(*Continuation_Apply_Finish)(Apply_LWC_Args *args, void *stack, void *frame);
276 
277 #define JIT_MAX_VECTOR_INLINE_SIZE 256
278 #define JIT_MAX_STRUCT_FIELD_INLINE_COUNT 256
279 
280 #ifdef MZ_LONG_DOUBLE
281 # define JIT_NUM_FL_KINDS 2
282 #else
283 # define JIT_NUM_FL_KINDS 1
284 #endif
285 
286 struct scheme_jit_common_record {
287   int skip_checks;
288 
289 #define MAX_SHARED_CALL_RANDS 25
290   void *shared_tail_code[4][MAX_SHARED_CALL_RANDS];
291 # define SHARED_SINGLE_VALUE_CASE 0
292 # define SHARED_MULTI_OK_CASE 1
293 # define SHARED_RESULT_IGNORED_CASE 2
294 # define SHARED_NUM_NONTAIL_CASES 3
295   void *shared_non_tail_code[5][MAX_SHARED_CALL_RANDS][SHARED_NUM_NONTAIL_CASES];
296   void *shared_non_tail_retry_code[SHARED_NUM_NONTAIL_CASES];
297   void *shared_non_tail_argc_code[SHARED_NUM_NONTAIL_CASES];
298   void *shared_tail_argc_code;
299 
300 #define MAX_SHARED_ARITY_CHECK 25
301   void *shared_arity_check[MAX_SHARED_ARITY_CHECK][2][2];
302 
303   void *bad_result_arity_code;
304   void *unbound_global_code;
305   void *call_original_unary_arith_code;
306   void *call_original_binary_arith_code;
307   void *call_original_binary_rev_arith_code;
308   void *call_original_unary_arith_for_branch_code;
309   void *call_original_binary_arith_for_branch_code;
310   void *call_original_binary_rev_arith_for_branch_code;
311   void *call_original_nary_arith_code;
312   void *bad_car_code, *bad_cdr_code;
313   void *bad_caar_code, *bad_cdar_code, *bad_cadr_code, *bad_cddr_code;
314   void *bad_cXr_code;
315   void *bad_mcar_code, *bad_mcdr_code;
316   void *bad_set_mcar_code, *bad_set_mcdr_code;
317   void *bad_syntax_e_code;
318   void *imag_part_code, *real_part_code, *make_rectangular_code;
319   void *bad_flimag_part_code, *bad_flreal_part_code, *bad_make_flrectangular_code;
320   void *unbox_code, *set_box_code, *unbox_star_fail_code, *set_box_star_fail_code, *box_cas_fail_code, *weak_box_value_code;
321   void *vector_cas_fail_code;
322   void *bad_vector_length_code, *bad_vector_star_length_code;
323   void *bad_flvector_length_code;
324   void *bad_fxvector_length_code;
325   void *bad_string_length_code;
326   void *bad_bytes_length_code;
327   void *vector_ref_code, *vector_ref_check_index_code, *vector_set_code, *vector_set_check_index_code;
328   void *chap_vector_ref_code, *chap_vector_ref_check_index_code, *chap_vector_set_code, *chap_vector_set_check_index_code;
329   void *vector_star_ref_code, *vector_star_ref_check_index_code, *vector_star_set_code, *vector_star_set_check_index_code;
330   void *string_ref_code, *string_ref_check_index_code, *string_set_code, *string_set_check_index_code;
331   void *bytes_ref_code, *bytes_ref_check_index_code, *bytes_set_code, *bytes_set_check_index_code;
332   void *flvector_ref_check_index_code[JIT_NUM_FL_KINDS];
333   void *flvector_set_check_index_code[JIT_NUM_FL_KINDS], *flvector_set_flonum_check_index_code[JIT_NUM_FL_KINDS];
334   void *fxvector_ref_code, *fxvector_ref_check_index_code, *fxvector_set_code, *fxvector_set_check_index_code;
335   void *struct_raw_ref_code, *struct_raw_set_code, *struct_raw_refs_code;
336   void *on_demand_jit_arity_code, *in_progress_on_demand_jit_arity_code;
337   void *get_stack_pointer_code;
338   void *stack_cache_pop_code;
339   void *struct_pred_code, *struct_pred_tail_code, *struct_pred_multi_code;
340   void *struct_pred_branch_code;
341   void *struct_get_code, *struct_get_tail_code, *struct_get_multi_code;
342   void *struct_set_code, *struct_set_tail_code, *struct_set_multi_code;
343   void *struct_prop_get_code, *struct_prop_get_tail_code, *struct_prop_get_multi_code;
344   void *struct_prop_get_defl_code, *struct_prop_get_defl_tail_code, *struct_prop_get_defl_multi_code;
345   void *struct_prop_pred_code, *struct_prop_pred_tail_code, *struct_prop_pred_multi_code;
346   void *struct_proc_extract_code;
347   void *struct_constr_unary_code, *struct_constr_unary_tail_code, *struct_constr_unary_multi_code;
348   void *struct_constr_binary_code, *struct_constr_binary_tail_code, *struct_constr_binary_multi_code;
349   void *struct_constr_nary_code, *struct_constr_nary_tail_code, *struct_constr_nary_multi_code;
350   void *bad_app_vals_target;
351   void *app_values_slow_code, *app_values_multi_slow_code, *app_values_tail_slow_code;
352   void *bad_char_to_integer_code, *slow_integer_to_char_code;
353   void *slow_cpointer_tag_code, *slow_set_cpointer_tag_code;
354   void *values_code;
355   void *symbol_interned_p_code;
356   void *list_p_code, *list_p_branch_code;
357   void *list_length_code;
358   void *list_ref_code, *list_tail_code;
359   void *hash_ref_code;
360   void *finish_tail_call_code, *finish_tail_call_fixup_code;
361   void *linklet_run_start_code;
362   void *thread_start_child_code;
363   void *box_flonum_from_stack_code, *box_flonum_from_reg_code;
364   void *fl1_fail_code[JIT_NUM_FL_KINDS], *fl2rr_fail_code[2][JIT_NUM_FL_KINDS];
365   void *fl2fr_fail_code[2][JIT_NUM_FL_KINDS], *fl2rf_fail_code[2][JIT_NUM_FL_KINDS];
366 #ifdef MZ_LONG_DOUBLE
367   void *bad_extflvector_length_code;
368   void *box_extflonum_from_stack_code, *box_extflonum_from_reg_code;
369 #endif
370   void *wcm_code, *wcm_nontail_code, *wcm_chaperone;
371   void *with_immed_mark_code;
372   void *apply_to_list_tail_code, *apply_to_list_code, *apply_to_list_multi_ok_code;
373   void *eqv_code, *eqv_branch_code;
374   void *bad_string_eq_2_code;
375   void *bad_string_rev_eq_2_code;
376   void *bad_bytes_eq_2_code;
377   void *bad_bytes_rev_eq_2_code;
378   void *proc_arity_includes_code;
379 
380 #ifdef CAN_INLINE_ALLOC
381   void *make_list_code, *make_list_star_code;
382   void *retry_alloc_code;
383   void *retry_alloc_code_keep_r0_r1;
384   void *retry_alloc_code_keep_fpr1;
385 # ifdef MZ_LONG_DOUBLE
386   void *retry_alloc_code_keep_extfpr1;
387 # endif
388 #endif
389   void *make_rest_list_code, *make_rest_list_clear_code;
390   void *call_check_not_defined_code, *call_check_assign_not_defined_code;
391   void *force_value_same_mark_code;
392   void *slow_ptr_set_code, *slow_ptr_ref_code;
393 
394   Continuation_Apply_Indirect continuation_apply_indirect_code;
395 #ifdef MZ_USE_LWC
396   Continuation_Apply_Finish continuation_apply_finish_code;
397 #endif
398 
399   Native_Check_Arity_Proc check_arity_code;
400   Native_Get_Arity_Proc get_arity_code;
401 
402   LWC_Native_Starter native_starter_code;
403 };
404 
405 extern struct scheme_jit_common_record scheme_jit_common;
406 
407 #define sjc scheme_jit_common
408 
409 typedef struct mz_jit_state {
410   MZTAG_IF_REQUIRED
411   GC_CAN_IGNORE jit_state js;
412   char *limit;
413   int extra_pushed, max_extra_pushed;
414   int depth; /* the position of the closure's first value on the stack */
415   int max_depth, max_tail_depth;
416   int *mappings; /* For each element,
417 		    case 0x1 bit:
418 		    . 0 -> case 0x2 bit:
419                     .        0 -> case rest bits:
420                     .               0 -> save point
421                     .               1 -> shift >>2 to get orig pushed count
422                     .        1 -> shift >>4 to get arity for single orig pushed
423                     .             shift >>2 to get flags
424 		    . 1 -> case 0x2 bit:
425                     .        0 -> shift >>2 to get new (native) pushed
426                     .        1 -> shift >>2 to get flostack offset */
427   int num_mappings, mappings_size;
428   int retained, retained_double;
429   int need_set_rs;
430   void **retain_start;
431   double *retain_double_start;
432   Scheme_Native_Lambda *retaining_data; /* poke when setting retain_start for generational GC */
433   int local1_busy, pushed_marks;
434   int log_depth;
435   int self_pos, self_closure_size, self_toplevel_pos;
436   int self_to_closure_delta, closure_to_args_delta;
437   int closure_self_on_runstack;
438   int example_argc, example_argv_delta;
439   Scheme_Object **example_argv;
440   void *self_restart_code;
441   void *self_nontail_code;
442   Scheme_Native_Closure *nc; /* for extract_globals and extract_closure_local, only */
443   Scheme_Lambda *self_lam;
444   void *status_at_ptr;
445   int r0_status, r1_status;
446   void *patch_depth;
447   int rs_virtual_offset;
448   int unbox, unbox_depth;
449   int flostack_offset, flostack_space;
450 #ifdef MZ_LONG_DOUBLE
451   int unbox_extflonum;
452 #endif
453   int self_restart_offset, self_restart_space;
454 } mz_jit_state;
455 
456 mz_jit_state *scheme_clone_jitter(mz_jit_state *j);
457 void scheme_unclone_jitter(mz_jit_state *j, mz_jit_state *j_copy);
458 
459 typedef int (*Generate_Proc)(mz_jit_state *j, void *data);
460 
461 typedef struct {
462   jit_insn *addr;
463   char mode, kind;
464 } Branch_Info_Addr;
465 
466 #define BRANCH_ADDR_FALSE 0
467 #define BRANCH_ADDR_TRUE  1
468 
469 #define BRANCH_ADDR_BRANCH    0
470 #define BRANCH_ADDR_UCBRANCH  1
471 #define BRANCH_ADDR_MOVI      2
472 
473 typedef struct {
474   int include_slow;
475   int non_tail, restore_depth, flostack, flostack_pos;
476   int branch_short, true_needs_jump;
477   int addrs_count, addrs_size;
478   Branch_Info_Addr *addrs;
479 } Branch_Info;
480 
481 typedef struct {
482   int position;
483   int count;
484   char delivered;
485 } Expected_Values_Info;
486 
487 #define mz_CURRENT_REG_STATUS_VALID() (jitter->status_at_ptr == _jit.x.pc)
488 #define mz_SET_REG_STATUS_VALID(v) (jitter->status_at_ptr = (v ? _jit.x.pc : 0))
489 
490 #define mz_SET_R0_STATUS_VALID(v) (jitter->status_at_ptr = (v ? _jit.x.pc : 0), \
491                                    jitter->r1_status = -1)
492 
493 /* If JIT_THREAD_LOCAL is defined, then access to global variables
494    goes through a thread_local_pointers table. Call
495    scheme_jit_fill_threadlocal_table() to fill the table in a new
496    OS-level thread. Use mz_tl_ldi_p(), etc., with `tl_MZ_RUNSTACK',
497    etc., to access variables that can be thread local. (JIT-generated
498    code accesses only a handful, so we can just enumerate them.)
499 
500    On x86, the thread-local table pointer is loaded on entry to the
501    JIT world into a C stack slot. On x86_64, it is loaded into the
502    callee-saved R14 (and the old value is saved on the C stack). */
503 #ifdef USE_THREAD_LOCAL
504 # define JIT_THREAD_LOCAL
505 #endif
506 
507 #ifdef JIT_THREAD_LOCAL
508 # define tl_delta(id) ((uintptr_t)&(id) - (uintptr_t)&BOTTOM_VARIABLE)
509 # define tl_MZ_RUNSTACK                    tl_delta(MZ_RUNSTACK)
510 # define tl_MZ_RUNSTACK_START              tl_delta(MZ_RUNSTACK_START)
511 # define tl_GC_gen0_alloc_page_ptr         tl_delta(GC_gen0_alloc_page_ptr)
512 # define tl_scheme_current_thread          tl_delta(scheme_current_thread)
513 # define tl_scheme_current_cont_mark_pos   tl_delta(scheme_current_cont_mark_pos)
514 # define tl_scheme_current_cont_mark_stack tl_delta(scheme_current_cont_mark_stack)
515 # define tl_stack_cache_stack_pos          tl_delta(stack_cache_stack_pos)
516 # define tl_retry_alloc_r1                 tl_delta(retry_alloc_r1)
517 # define tl_fixup_runstack_base            tl_delta(fixup_runstack_base)
518 # define tl_fixup_already_in_place         tl_delta(fixup_already_in_place)
519 # define tl_scheme_jit_save_fp             tl_delta(scheme_jit_save_fp)
520 # define tl_scheme_jit_save_fp2            tl_delta(scheme_jit_save_fp2)
521 #ifdef MZ_LONG_DOUBLE
522 # define tl_scheme_jit_save_extfp          tl_delta(scheme_jit_save_extfp)
523 # define tl_scheme_jit_save_extfp2         tl_delta(scheme_jit_save_extfp2)
524 #endif
525 # define tl_scheme_fuel_counter            tl_delta(scheme_fuel_counter)
526 # define tl_scheme_jit_stack_boundary      tl_delta(scheme_jit_stack_boundary)
527 # define tl_jit_future_storage             tl_delta(jit_future_storage)
528 # define tl_scheme_future_need_gc_pause    tl_delta(scheme_future_need_gc_pause)
529 # define tl_scheme_use_rtcall              tl_delta(scheme_use_rtcall)
530 # define tl_scheme_current_lwc             tl_delta(scheme_current_lwc)
531 
532 void *scheme_jit_get_threadlocal_table();
533 
534 # ifdef JIT_X86_64
535 #  define JIT_R10 JIT_R(10)
536 #  define JIT_R14 JIT_R(14)
537 #  define mz_tl_addr(reg, addr) (void)0
538 #  define mz_tl_addr_tmp(tmp_reg, addr) (void)0
539 #  define mz_tl_addr_untmp(tmp_reg) (void)0
540 #  define mz_tl_tmp_reg(tmp_reg) JIT_R10
541 #  define _mz_tl_str_p(addr, tmp_reg, reg) jit_stxi_p(addr, JIT_R14, reg)
542 #  define _mz_tl_str_l(addr, tmp_reg, reg) jit_stxi_l(addr, JIT_R14, reg)
543 #  define _mz_tl_str_i(addr, tmp_reg, reg) jit_stxi_i(addr, JIT_R14, reg)
544 #  define mz_tl_ldr_p(reg, addr) jit_ldxi_p(reg, JIT_R14, addr)
545 #  define mz_tl_ldr_l(reg, addr) jit_ldxi_l(reg, JIT_R14, addr)
546 #  define mz_tl_ldr_i(reg, addr) jit_ldxi_i(reg, JIT_R14, addr)
547 #  define mz_tl_str_d_fppop(tmp_reg, reg, addr) jit_stxi_d_fppop(addr, JIT_R14, reg)
548 #  define mz_tl_ldr_d_fppush(reg, tmp_reg, addr) jit_ldxi_d_fppush(reg, JIT_R14, addr)
549 #  define mz_fpu_tl_str_ld_fppop(tmp_reg, reg, addr) jit_fpu_stxi_ld_fppop(addr, JIT_R14, reg)
550 #  define mz_fpu_tl_ldr_ld_fppush(reg, tmp_reg, addr) jit_fpu_ldxi_ld_fppush(reg, JIT_R14, addr)
551 #  define mz_tl_addr_tmp_i(tmp_reg, addr) (void)0
552 #  define mz_tl_addr_untmp_i(tmp_reg) (void)0
553 #  define mz_tl_tmp_reg_i(tmp_reg) tmp_reg
554 # else
555 #  define THREAD_LOCAL_USES_JIT_V2
556 #  ifdef THREAD_LOCAL_USES_JIT_V2
557 #   define mz_tl_addr(reg, addr) (jit_addi_p(reg, JIT_V2, addr))
558 #   define mz_tl_addr_tmp(tmp_reg, addr) (void)0
559 #   define mz_tl_addr_untmp(tmp_reg) (void)0
560 #   define mz_tl_tmp_reg(tmp_reg) (void)0
561 #   define _mz_tl_str_p(addr, tmp_reg, reg) jit_stxi_p(addr, JIT_V2, reg)
562 #   define _mz_tl_str_l(addr, tmp_reg, reg) jit_stxi_l(addr, JIT_V2, reg)
563 #   define _mz_tl_str_i(addr, tmp_reg, reg) jit_stxi_i(addr, JIT_V2, reg)
564 #  else
565 #   define mz_tl_addr(reg, addr) (mz_get_local_p(reg, JIT_LOCAL4), jit_addi_p(reg, reg, addr))
566 #   define mz_tl_addr_tmp(tmp_reg, addr) (PUSHQr(tmp_reg), mz_tl_addr(tmp_reg, addr))
567 #   define mz_tl_addr_untmp(tmp_reg) POPQr(tmp_reg)
568 #   define mz_tl_tmp_reg(tmp_reg) tmp_reg
569 #   define _mz_tl_str_p(addr, tmp_reg, reg) jit_str_p(tmp_reg, reg)
570 #   define _mz_tl_str_l(addr, tmp_reg, reg) jit_str_l(tmp_reg, reg)
571 #   define _mz_tl_str_i(addr, tmp_reg, reg) jit_str_i(tmp_reg, reg)
572 #  endif
573 #  define mz_tl_addr_tmp_i(tmp_reg, addr) mz_tl_addr_tmp(tmp_reg, addr)
574 #  define mz_tl_addr_untmp_i(tmp_reg) mz_tl_addr_untmp(tmp_reg)
575 #  define mz_tl_tmp_reg_i(tmp_reg) mz_tl_tmp_reg(tmp_reg)
576 #  define mz_tl_ldr_p(reg, addr) jit_ldr_p(reg, reg)
577 #  define mz_tl_ldr_l(reg, addr) jit_ldr_l(reg, reg)
578 #  define mz_tl_ldr_i(reg, addr) jit_ldr_i(reg, reg)
579 #  define mz_tl_str_d_fppop(tmp_reg, reg, addr) jit_str_d_fppop(tmp_reg, reg)
580 #  define mz_tl_ldr_d_fppush(reg, tmp_reg, addr) jit_ldr_d_fppush(reg, tmp_reg)
581 #  define mz_fpu_tl_str_ld_fppop(tmp_reg, reg, addr) jit_fpu_str_ld_fppop(tmp_reg, reg)
582 #  define mz_fpu_tl_ldr_ld_fppush(reg, tmp_reg, addr) jit_fpu_ldr_ld_fppush(reg, tmp_reg)
583 # endif
584 
585 /* A given tmp_reg doesn't have to be unused; it just has to be distinct from other arguments. */
586 # define mz_tl_sti_p(addr, reg, tmp_reg) (mz_tl_addr_tmp(tmp_reg, addr), _mz_tl_str_p(addr, mz_tl_tmp_reg(tmp_reg), reg), mz_tl_addr_untmp(tmp_reg))
587 # define mz_tl_sti_l(addr, reg, tmp_reg) (mz_tl_addr_tmp(tmp_reg, addr), _mz_tl_str_l(addr, mz_tl_tmp_reg(tmp_reg), reg), mz_tl_addr_untmp(tmp_reg))
588 # define mz_tl_sti_i(addr, reg, tmp_reg) (mz_tl_addr_tmp_i(tmp_reg, addr), _mz_tl_str_i(addr, mz_tl_tmp_reg_i(tmp_reg), reg), mz_tl_addr_untmp_i(tmp_reg))
589 # define mz_tl_ldi_p(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_p(reg, addr))
590 # define mz_tl_ldi_l(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_l(reg, addr))
591 # define mz_tl_ldi_i(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_i(reg, addr))
592 # define mz_tl_sti_d_fppop(addr, reg, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_tl_str_d_fppop(tmp_reg, reg, addr))
593 # define mz_tl_ldi_d_fppush(reg, addr, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_tl_ldr_d_fppush(reg, tmp_reg, addr))
594 # ifdef MZ_LONG_DOUBLE
595 #  define mz_fpu_tl_sti_ld_fppop(addr, reg, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_fpu_tl_str_ld_fppop(tmp_reg, reg, addr))
596 #  define mz_fpu_tl_ldi_ld_fppush(reg, addr, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_fpu_tl_ldr_ld_fppush(reg, tmp_reg, addr))
597 # endif
598 #else
599 # define mz_tl_sti_p(addr, reg, tmp_reg) jit_sti_p(addr, reg)
600 # define mz_tl_sti_l(addr, reg, tmp_reg) jit_sti_l(addr, reg)
601 # define mz_tl_sti_i(addr, reg, tmp_reg) jit_sti_i(addr, reg)
602 # define mz_tl_ldi_p(reg, addr) jit_ldi_p(reg, addr)
603 # define mz_tl_ldi_l(reg, addr) jit_ldi_l(reg, addr)
604 # define mz_tl_ldi_i(reg, addr) jit_ldi_i(reg, addr)
605 # define mz_tl_sti_d_fppop(addr, reg, tmp_reg) jit_sti_d_fppop(addr, reg)
606 # define mz_tl_ldi_d_fppush(reg, addr, tmp_reg) jit_ldi_d_fppush(reg, addr)
607 # define mz_fpu_tl_sti_ld_fppop(addr, reg, tmp_reg) jit_fpu_sti_ld_fppop(addr, reg)
608 # define mz_fpu_tl_ldi_ld_fppush(reg, addr, tmp_reg) jit_fpu_ldi_ld_fppush(reg, addr)
609 # define tl_MZ_RUNSTACK (&MZ_RUNSTACK)
610 # define tl_MZ_RUNSTACK_START (&MZ_RUNSTACK_START)
611 # define tl_GC_gen0_alloc_page_ptr (&GC_gen0_alloc_page_ptr)
612 # define tl_scheme_current_thread (&scheme_current_thread)
613 # define tl_scheme_current_cont_mark_pos (&scheme_current_cont_mark_pos)
614 # define tl_scheme_current_cont_mark_stack (&scheme_current_cont_mark_stack)
615 # define tl_stack_cache_stack_pos (&stack_cache_stack_pos)
616 # define tl_retry_alloc_r1 (&retry_alloc_r1)
617 # define tl_fixup_runstack_base (&fixup_runstack_base)
618 # define tl_fixup_already_in_place (&fixup_already_in_place)
619 # define tl_scheme_jit_save_fp (&scheme_jit_save_fp)
620 # define tl_scheme_jit_save_fp2 (&scheme_jit_save_fp2)
621 # ifdef MZ_LONG_DOUBLE
622 #  define tl_scheme_jit_save_extfp (&scheme_jit_save_extfp)
623 #  define tl_scheme_jit_save_extfp2 (&scheme_jit_save_extfp2)
624 # endif
625 # define tl_scheme_fuel_counter ((void *)&scheme_fuel_counter)
626 # define tl_scheme_jit_stack_boundary ((void *)&scheme_jit_stack_boundary)
627 #endif
628 
629 /*========================================================================*/
630 /*                           code-gen utils                               */
631 /*========================================================================*/
632 
633 #define JIT_RUNSTACK JIT_V0
634 
635 #ifndef THREAD_LOCAL_USES_JIT_V2
636 # define JIT_RUNSTACK_BASE JIT_V2
637 # define JIT_RUNSTACK_BASE_OR_ALT(alt) JIT_RUNSTACK_BASE
638 # define mz_ld_runstack_base_alt(reg) /* empty */
639 # define mz_st_runstack_base_alt(reg) /* empty */
640 #else
641 # define JIT_RUNSTACK_BASE_OR_ALT(alt) alt
642 # define JIT_RUNSTACK_BASE_LOCAL JIT_LOCAL4
643 # define mz_ld_runstack_base_alt(reg) mz_get_local_p(reg, JIT_RUNSTACK_BASE_LOCAL)
644 # define mz_st_runstack_base_alt(reg) mz_set_local_p(reg, JIT_RUNSTACK_BASE_LOCAL)
645 #endif
646 
647 #define JIT_UPDATE_THREAD_RSPTR() mz_tl_sti_p(tl_MZ_RUNSTACK, JIT_RUNSTACK, JIT_R0)
648 #define JIT_UPDATE_THREAD_RSPTR_IF_NEEDED() \
649     if (jitter->need_set_rs) {   \
650       JIT_UPDATE_THREAD_RSPTR(); \
651       jitter->need_set_rs = 0;   \
652     }
653 #define JIT_UPDATE_THREAD_RSPTR_FOR_BRANCH_IF_NEEDED() \
654     if (jitter->need_set_rs) {   \
655       JIT_UPDATE_THREAD_RSPTR(); \
656     }
657 
658 #if 0
659 /* Debugging: checking for runstack overflow. A CHECK_RUNSTACK_OVERFLOW() should
660    be included after each decrement of JIT_RUNSTACK. Failure is "reported" by
661    going into an immediate loop. */
662 static void *top;
663 static void *cr_tmp;
664 # define CHECK_RUNSTACK_OVERFLOW_NOCL() \
665   jit_sti_l(&cr_tmp, JIT_R0); jit_movi_l(JIT_R0, __LINE__); jit_ldi_l(JIT_R0, &scheme_current_runstack_start); \
666   top = (_jit.x.pc); (void)jit_bltr_ul(top, JIT_RUNSTACK, JIT_R0); jit_ldi_l(JIT_R0, &cr_tmp)
667 # define CHECK_RUNSTACK_OVERFLOW() \
668      CHECK_LIMIT(); CHECK_RUNSTACK_OVERFLOW_NOCL()
669 #else
670 # define CHECK_RUNSTACK_OVERFLOW() /* empty */
671 # define CHECK_RUNSTACK_OVERFLOW_NOCL() /* empty */
672 #endif
673 
674 #if 0
675 /* Debugging: ... */
676 static void *top4;
677 # define VALIDATE_RESULT(reg) top4 = (_jit.x.pc); (void)jit_beqi_ul(top4, reg, 0)
678 #else
679 # define VALIDATE_RESULT(reg) /* empty */
680 #endif
681 
682 /* The mz_rs_... family of operations operate on a virtual
683    JIT_RUNSTACK register to perform a kind of peephole optimization.
684    The virtual register can be de-sync'd from the actual register, so
685    that multiple adjustments to the register can be collapsed; this
686    mostly improves code size, rather than speed. Functions that cause
687    the register to be de-sync'd are marked as such. Functions that can
688    accommodate a de-sync'd register on entry are marked as such. All
689    other functions can assume a sync'd regsiter and ensure a sync'd
690    register. Note that branches and calls normally require a sync'd
691    register. */
692 
693 #if 1
694 # define mz_rs_dec(n) (((jitter->r0_status >= 0) ? jitter->r0_status += (n) : 0), \
695                        ((jitter->r1_status >= 0) ? jitter->r1_status += (n) : 0), \
696                        jitter->rs_virtual_offset -= (n))
697 # define mz_rs_inc(n) (jitter->r0_status -= (n), \
698                        jitter->r1_status -= (n), \
699                        jitter->rs_virtual_offset += (n))
700 # define mz_rs_ldxi(reg, n) jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(((n) + jitter->rs_virtual_offset)))
701 # define mz_rs_ldr(reg) mz_rs_ldxi(reg, 0)
702 # define mz_rs_stxi(n, reg) jit_stxi_p(WORDS_TO_BYTES(((n) + jitter->rs_virtual_offset)), JIT_RUNSTACK, reg)
703 # define mz_rs_str(reg) mz_rs_stxi(0, reg)
704 # define mz_rs_sync() (jitter->rs_virtual_offset \
705                        ? ((jitter->status_at_ptr == _jit.x.pc) \
706                           ? (jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(jitter->rs_virtual_offset)), \
707                              jitter->status_at_ptr = _jit.x.pc, \
708                              jitter->rs_virtual_offset = 0) \
709                           : (jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(jitter->rs_virtual_offset)), \
710                              jitter->rs_virtual_offset = 0)) \
711                        : 0)
712 # define mz_rs_sync_0() (jitter->rs_virtual_offset = 0)
713 #else
714 # define mz_rs_dec(n) jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(n))
715 # define mz_rs_inc(n) jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(n))
716 # define mz_rs_ldr(reg) jit_ldr_p(reg, JIT_RUNSTACK)
717 # define mz_rs_ldxi(reg, n) jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(n))
718 # define mz_rs_str(reg) jit_str_p(JIT_RUNSTACK, reg)
719 # define mz_rs_stxi(n, reg) jit_stxi_p(WORDS_TO_BYTES(n), JIT_RUNSTACK, reg)
720 # define mz_rs_sync() /* empty */
721 # define mz_rs_sync_0() /* empty */
722 #endif
723 
724 /* No need to sync if a branch just goes to an exception. */
725 # define mz_rs_sync_fail_branch() /* empty */
726 
727 /* de-sync's rs: */
728 #define mz_pushr_p(x) scheme_mz_pushr_p_it(jitter, x)
729 #define mz_popr_p(x) scheme_mz_popr_p_it(jitter, x, 0)
730 #define mz_popr_x() scheme_mz_popr_p_it(jitter, JIT_R1, 1)
731 
732 #define CHECK_RUNSTACK_REGISTER_UPDATE 0
733 
734 #if CHECK_RUNSTACK_REGISTER_UPDATE
735 /* Debugging: at each _finish(), double-check that the runstack register has been
736    copied into scheme_current_runstack. This code assumes that mz_finishr() is not
737    used with JIT_R0.  Failure is "reported" by going into an immediate loop, but
738    check_location is set to the source line number to help indicate where the
739    problem originated. */
740 static void *top;
741 # define CONFIRM_RUNSTACK() (jit_movi_l(JIT_R0, __LINE__), \
742                              mz_tl_ldi_p(JIT_R0, tl_MZ_RUNSTACK), top = (_jit.x.pc), jit_bner_p(top, JIT_RUNSTACK, JIT_R0))
743 #else
744 # define CONFIRM_RUNSTACK() 0
745 #endif
746 
747 #define mz_prepare(x) jit_prepare(x)
748 #define mz_finish(x) ((void)CONFIRM_RUNSTACK(), jit_finish(x))
749 #define mz_finishr(x) ((void)CONFIRM_RUNSTACK(), jit_finishr(x))
750 #define mz_finish_unsynced_runstack(x) jit_finish(x)
751 
752 #define mz_nonrs_finish(x) jit_finish(x)
753 
754 #define mz_retain(x) scheme_mz_retain_it(jitter, x)
755 #define mz_remap(x) scheme_mz_remap_it(jitter, x)
756 
757 #ifdef jit_bxnei_s
758 # define mz_bnei_t(label, reg, stype, scratch_reg) jit_bxnei_s(label, reg, stype)
759 # define mz_beqi_t(label, reg, stype, scratch_reg) jit_bxeqi_s(label, reg, stype)
760 #else
761 # define mz_bnei_t(label, reg, stype, scratch_reg) \
762   (jit_ldxi_s(scratch_reg, reg, &((Scheme_Object *)0x0)->type), \
763    jit_bnei_i(label, scratch_reg, stype))
764 # define mz_beqi_t(label, reg, stype, scratch_reg) \
765   (jit_ldxi_s(scratch_reg, reg, &((Scheme_Object *)0x0)->type), \
766    jit_beqi_i(label, scratch_reg, stype))
767 #endif
768 
769 /* Stack alignment, fixed up by mz_push_locals():
770     - On PPC, jit_prolog() generates an aligned stack.
771       It also leaves room for 3 locals.
772     - On x86, jit_prolog() pushes three words after the
773       old EBP. So, for 16-byte alignment, the stack is
774       one word past proper alignment; push 3 to realign
775       (which leaves room for three locals)
776     - On x86_64, jit_prolog() pushes three words after
777       the old RBP. So, for 16-byte alignment, the stack
778       is one word past alignment. Push 1 to realign (but
779       mz_push_locals() pushes 3, because we need at least
780       two locals).
781     - On ARM, the stack should be 8-byte aligned, and
782       jit_prolog() leaves the stack in an aligned state.
783 */
784 
785 /*    LOCAL1 is used to save the value current_cont_mark_stack,
786       at least for the first time it needs to be saved in a
787       function body. If it needs to be saved again, it is
788       pushed onto the runstack. (The value of current_cont_mark_stack
789       is an integer that marks a point in the stack, as opposed
790       to being an address of a stack position.) */
791 
792 /*
793    mz_prolog() and mz_epilog() bracket an internal "function" using a
794    lighter-weight ABI that keeps all Rx and Vx registers as-is on
795    entry and exit, as well as the frame pointer. Some of those
796    functions are registered in a special way with add_symbol() so that
797    the backtrace function can follow the lightweight ABI to get back
798    to the calling code. The lightweight ABI does not support nested
799    calls (at least not on all platforms; see LOCAL2 below).
800 
801    LOCAL2 and LOCAL3 are available for temporary storage on the C
802    stack using mz_get_local() and mz_set_local() under certain
803    circumstances:
804 
805    * They can only be used within a function (normally corresponding
806      to a Racket lambda) where mz_push_locals() has been called after
807      jit_prolog(), and where mz_pop_locals() is called before
808      jit_ret().
809 
810    * On some platforms, LOCAL2 and LOCAL3 are the same.
811 
812    * On some platforms, a lightweight function created with
813      mz_prolog() and mz_epilog() uses LOCAL2 to save the return
814      address. On those platforms, though, LOCAL3 is different from
815      LOCAL2. So, LOCAL3 can always be used for temporary storage in
816      such functions (assuming that they're called from a function that
817      pushes locals, and that nothing else is using LOCAL2).
818 
819 */
820 
821 /*  x86[_64] frame (counting down from frame pointer marked with <-):
822       return address
823       prev frame <-
824       saved EBX (= JIT_RUNSTACK, when saved from native call)
825       saved R12/ESI (= JIT_V1, when saved from native call)
826       saved R13/EDI (= JIT_V2 x86_64: = RUNSTACK_BASE, when saved from native call
827                               x86: = THREAD_LOCAL or RUNSTACK_BASE, when saved from native call
828       LOCAL1 (which is a cont_mark_stack offset, if anything)
829       LOCAL2 (some pointer, never to stack or runstack)
830       LOCAL3 (temp space for misc uses; not saved across calls that might capture LWC)
831       LOCAL4 (x86_64: = saved R14 when THREAD_LOCAL
832               x86: = RUNSTACK_BASE or THREAD_LOCAL)
833       [some empty slots, maybe, depending on alignment]
834       [space for "flostack" --- local unboxed values, such as flonums]
835     Registers: JIT_V1 = RUNSTACK, JIT_V2 = x86_64: RUNSTACK_BASE
836                                            x86: RUNSTACK_BASE or THREAD_LOCAL
837                x86_64: JIT_R14 = THREAD_LOCAL
838 */
839 
840 #ifdef JIT_THREAD_LOCAL
841 # define NEED_LOCAL4
842 #endif
843 
844 #define mz_set_local_p(x, l) mz_set_local_p_x(x, l, JIT_FP)
845 #define mz_get_local_p(x, l) mz_get_local_p_x(x, l, JIT_FP)
846 
847 /* --- PPC --- */
848 #if defined(MZ_USE_JIT_PPC)
849 /* JIT_LOCAL1, JIT_LOCAL2, and JIT_LOCAL3 are offsets in the stack frame. */
850 # define JIT_LOCAL1 56
851 # define JIT_LOCAL2 60
852 # define JIT_LOCAL3 64
853 # define mz_set_local_p_x(x, l, FP) jit_stxi_p(l, FP, x)
854 # define mz_get_local_p_x(x, l, FP) jit_ldxi_p(x, FP, l)
855 # define mz_patch_branch_at(a, v) (_jitl.long_jumps ? (void)jit_patch_movei(a-4, a-3, v) : (void)jit_patch_branch(a-1, v))
856 # define mz_patch_ucbranch_at(a, v) (_jitl.long_jumps ? (void)jit_patch_movei(a-4, a-3, v) : (void)jit_patch_ucbranch(a-1, v))
857 # define mz_prolog(x) (MFLRr(x), mz_set_local_p(x, JIT_LOCAL2))
858 # define mz_epilog(x) (mz_get_local_p(x, JIT_LOCAL2), jit_jmpr(x))
859 # define mz_epilog_without_jmp() /* empty */
860 # define jit_shuffle_saved_regs() /* empty */
861 # define jit_unshuffle_saved_regs() /* empty */
862 # define mz_push_locals() /* empty */
863 # define mz_pop_locals() /* empty */
864 # ifdef SUPPRESS_LIGHTNING_FUNCS
865 void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg);
866 # else
scheme_jit_prolog_again(mz_jit_state * jitter,int n,int ret_addr_reg)867 void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
868 {
869   /* This must be consistent with _jit_prolog in many ways: */
870   int frame_size;
871   int ofs;
872   int first_saved_reg = JIT_AUX - n;
873   int num_saved_regs = 32 - first_saved_reg;
874 
875   frame_size = 24 + 32 + 12 + num_saved_regs * 4;	/* r27..r31 + args		   */
876   frame_size += 15;			/* the stack must be quad-word     */
877   frame_size &= ~15;			/* aligned			   */
878 
879   STWUrm(1, -frame_size, 1);		/* stwu  r1, -x(r1)		   */
880 
881   /* We actually only need to save V0-V2, which are at
882      the end of the saved area: */
883   first_saved_reg = 29;
884   num_saved_regs = 3;
885 
886   ofs = frame_size - num_saved_regs * 4;
887   STMWrm(first_saved_reg, ofs, 1);		/* stmw  rI, ofs(r1)		   */
888 #ifdef _CALL_DARWIN
889   STWrm(ret_addr_reg, frame_size + 8, 1); /* stw   r0, x+8(r1)		   */
890 #else
891   STWrm(ret_addr_reg, frame_size + 4, 1); /* stw   r0, x+4(r1)		   */
892 #endif
893 }
894 # endif
895 # define _jit_prolog_again scheme_jit_prolog_again
896 #endif
897 
898 /* --- ARM --- */
899 #ifdef MZ_USE_JIT_ARM
900 # define JIT_LOCAL1 JIT_FRAME_EXTRA_SPACE_OFFSET
901 # define JIT_LOCAL2 (JIT_FRAME_EXTRA_SPACE_OFFSET+4)
902 # define JIT_LOCAL3 (JIT_FRAME_EXTRA_SPACE_OFFSET+8)
903 # define JIT_LOCAL4 (JIT_FRAME_EXTRA_SPACE_OFFSET+12)
904 # define JIT_FRAME_FLOSTACK_OFFSET JIT_FRAME_EXTRA_SPACE_OFFSET
905 # define mz_set_local_p_x(x, l, FP) jit_stxi_p(l, FP, x)
906 # define mz_get_local_p_x(x, l, FP) jit_ldxi_p(x, FP, l)
907 # define mz_patch_branch_at(a, v) jit_patch_at(a, v)
908 # define mz_patch_ucbranch_at(a, v) jit_patch_at(a, v)
909 # define mz_prolog(x) (mz_set_local_p(JIT_LR, JIT_LOCAL2))
910 # define mz_epilog(x) (mz_get_local_p(JIT_LR, JIT_LOCAL2), jit_jmpr(JIT_LR))
911 # define mz_epilog_without_jmp() /* empty */
912 # define jit_shuffle_saved_regs() /* empty */
913 # define jit_unshuffle_saved_regs() /* empty */
914 # define mz_push_locals() /* empty */
915 # define mz_pop_locals() /* empty */
916 # define jit_base_prolog() jit_prolog(0)
917 # ifdef SUPPRESS_LIGHTNING_FUNCS
918 void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg);
919 # else
scheme_jit_prolog_again(mz_jit_state * jitter,int n,int ret_addr_reg)920 void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
921 {
922   jit_movr_p(JIT_LR, ret_addr_reg);
923   arm_prolog(_jitp, n);
924 }
925 # endif
926 # define _jit_prolog_again scheme_jit_prolog_again
927 #endif
928 
929 /* --- x86[_64] --- */
930 #if defined(JIT_X86_64) || defined(JIT_X86_PLAIN)
931 /* From frame pointer, -1 is saved frame pointer, -2 is saved ESI/R12,
932    and -3 is saved EDI/R13. On entry to a procedure, prolog pushes 4
933    since the call (which also pushed), so if the stack was 16-bytes
934    aligned before the call, it is current stack pointer is 1 word
935    (either 4 or 8 bytes) below alignment (need to push 3 or 1 words to
936    re-align). Also, for a call without a prolog, the stack pointer is
937    1 word (for the return address) below alignment. */
938 # define JIT_LOCAL1 -(JIT_WORD_SIZE * 4)
939 # define JIT_LOCAL2 -(JIT_WORD_SIZE * 5)
940 # define mz_set_local_p_x(x, l, FP) jit_stxi_p((l), FP, (x))
941 # define mz_get_local_p_x(x, l, FP) jit_ldxi_p((x), FP, (l))
942 # define mz_patch_branch_at(a, v) jit_patch_branch_at(a, v)
943 # define mz_patch_ucbranch_at(a, v) jit_patch_ucbranch_at(a, v)
944 # ifdef JIT_X86_ALIGN_STACK
945    /* Maintain 16-byte stack alignment. */
946 #  ifdef JIT_X86_64
947 #   define STACK_ALIGN_WORDS 1
948 #  else
949 #   define STACK_ALIGN_WORDS 3
950 #  endif
951 #  define JIT_LOCAL3 -(JIT_WORD_SIZE * 6)
952 #  ifdef NEED_LOCAL4
953 #   ifdef JIT_X86_64
954 #    define LOCAL_FRAME_SIZE 5
955 #   else
956 #    define LOCAL_FRAME_SIZE 7
957 #   endif
958 #   define JIT_LOCAL4_OFFSET 7
959 #  else
960 #   define LOCAL_FRAME_SIZE 3
961 #  endif
962 #  define _mz_prolog(x) (ADDQiBr(-(STACK_ALIGN_WORDS * JIT_WORD_SIZE), JIT_SP))
963 #  define _mz_epilog_without_jmp() ADDQiBr((STACK_ALIGN_WORDS + 1) * JIT_WORD_SIZE, JIT_SP)
964 #  define _mz_epilog(x) (ADDQiBr(STACK_ALIGN_WORDS * JIT_WORD_SIZE, JIT_SP), RET_())
965 # else
966 #  define JIT_LOCAL3 JIT_LOCAL2
967 #  ifdef NEED_LOCAL4
968 #   define LOCAL_FRAME_SIZE 3
969 #   define JIT_LOCAL4_OFFSET 6
970 #  else
971 #   define LOCAL_FRAME_SIZE 2
972 #  endif
973 #  define _mz_prolog(x) /* empty */
974 #  define _mz_epilog(x) RET_()
975 #  define _mz_epilog_without_jmp() ADDQir(JIT_WORD_SIZE, JIT_SP)
976 # endif
977 # ifdef NEED_LOCAL4
978 #   define JIT_LOCAL4 -(JIT_WORD_SIZE * JIT_LOCAL4_OFFSET)
979 # endif
980 # ifdef MZ_PROLOG_CREATE_FULL_STACK_FRAME
981   /* Make the internal ABI the same as the main call ABI */
982 #  define MZ_LOCAL_FRAME_SIZE (LOCAL_FRAME_SIZE+3)
983 #  define mz_prolog(x) (PUSHQr(_EBP),                                  \
984                         mz_get_local_p((x), JIT_LOCAL3),               \
985                         MOVQrr(_ESP, _EBP),                            \
986                         ADDQiBr(-(MZ_LOCAL_FRAME_SIZE * JIT_WORD_SIZE), JIT_SP), \
987                         mz_set_local_p((x), JIT_LOCAL3))
988 #  define mz_epilog_without_jmp() (ADDQiBr(MZ_LOCAL_FRAME_SIZE * JIT_WORD_SIZE, JIT_SP), POPQr(_EBP), ADDQiBr(JIT_WORD_SIZE, JIT_SP))
989 #  define mz_epilog(x) (ADDQiBr(MZ_LOCAL_FRAME_SIZE * JIT_WORD_SIZE, JIT_SP), POPQr(_EBP), RET_())
990 # else
991   /* Normal internal ABI */
992 #  define mz_prolog(x) _mz_prolog(x)
993 #  define mz_epilog_without_jmp() _mz_epilog_without_jmp()
994 #  define mz_epilog(x) _mz_epilog(x)
995 # endif
996 # define mz_push_locals() SUBQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP)
997 # define mz_pop_locals() ADDQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP)
998 # define JIT_FRAME_FLOSTACK_OFFSET (-(JIT_WORD_SIZE * (LOCAL_FRAME_SIZE + 3)))
999 # define _jit_prolog_again(jitter, n, ret_addr_reg) (PUSHQr(ret_addr_reg), jit_base_prolog())
1000 # if defined(MZ_USE_JIT_X86_64) && !defined(_WIN64)
1001 #  define jit_shuffle_saved_regs() (MOVQrr(_ESI, _R12), MOVQrr(_EDI, _R13))
1002 #  define jit_unshuffle_saved_regs() (MOVQrr(_R12, _ESI), MOVQrr(_R13, _EDI))
1003 # else
1004 #  define jit_shuffle_saved_regs() /* empty */
1005 #  define jit_unshuffle_saved_regs() /* empty */
1006 # endif
1007 #endif
1008 
1009 #ifdef JIT_THREAD_LOCAL
1010 # ifdef JIT_X86_64
1011 #  define mz_pop_threadlocal() mz_get_local_p(JIT_R14, JIT_LOCAL4)
1012 #  define mz_push_threadlocal(in) /* empty */
1013 #  define mz_push_threadlocal_early() (mz_set_local_p(JIT_R14, JIT_LOCAL4), jit_movr_p(JIT_R14, JIT_R_ARG4))
1014 #  define mz_repush_threadlocal() mz_set_local_p(JIT_R14, JIT_LOCAL4)
1015 # else
1016 #  define mz_pop_threadlocal() /* empty */
1017 #  ifdef THREAD_LOCAL_USES_JIT_V2
1018 #   define _mz_install_threadlocal(reg) jit_movr_p(JIT_V2, reg)
1019 #   define mz_repush_threadlocal() /* empty */
1020 #  else
1021 #   define _mz_install_threadlocal(reg) mz_set_local_p(reg, JIT_LOCAL4)
1022 #   define mz_repush_threadlocal() (PUSHQr(JIT_R0), jit_ldr_p(JIT_R0, _EBP), \
1023                                     jit_ldxi_p(JIT_R0, JIT_R0, JIT_LOCAL4), \
1024                                     jit_stxi_p(JIT_LOCAL4, _EBP, JIT_R0), \
1025                                     POPQr(JIT_R0))
1026 #  endif
1027 #  define mz_push_threadlocal(in) (in = jit_arg_p(), jit_getarg_p(JIT_V2, in), _mz_install_threadlocal(JIT_V2))
1028 #  define mz_push_threadlocal_early() /* empty */
1029 # endif
1030 #else
1031 # define mz_pop_threadlocal() /* empty */
1032 # define mz_push_threadlocal(in) /* empty */
1033 # define mz_push_threadlocal_early() /* empty */
1034 # define mz_repush_threadlocal() /* empty */
1035 #endif
1036 
1037 #if 0
1038 static jit_insn *fp_tmpr;
1039 # define check_fp_depth(i, FP) \
1040   (jit_addi_l(FP, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))),             \
1041    fp_tmpr = jit_bger_l(0, FP, JIT_SP),                               \
1042    jit_ldi_p(FP, 0),                                                    \
1043    mz_patch_branch(fp_tmpr),                                            \
1044    jit_subi_l(FP, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))))
1045 #else
1046 # define check_fp_depth(i, FP) (void)0
1047 #endif
1048 
1049 #define FLOSTACK_SPACE_CHUNK 16
1050 # define mz_ld_fppush_x(r, i, FP, extfl) (check_fp_depth(i, FP), jit_FPSEL_ldxi_xd_fppush(extfl, r, FP, (JIT_FRAME_FLOSTACK_OFFSET - (i))))
1051 # define mz_ld_fppush(r, i, extfl) mz_ld_fppush_x(r, i, JIT_FP, extfl)
1052 # define mz_st_fppop_x(i, r, FP, extfl) (check_fp_depth(i, FP), (void)jit_FPSEL_stxi_xd_fppop(extfl, (JIT_FRAME_FLOSTACK_OFFSET - (i)), FP, r))
1053 # define mz_st_fppop(i, r, extfl) mz_st_fppop_x(i, r, JIT_FP, extfl)
1054 
1055 #define mz_patch_branch(a) mz_patch_branch_at(a, jit_get_ip())
1056 #define mz_patch_ucbranch(a) mz_patch_ucbranch_at(a, jit_get_ip())
1057 
1058 #ifdef NEED_LONG_JUMPS
1059 # define __START_SHORT_JUMPS__(cond) if (cond) { _jitl.long_jumps = 0; }
1060 # define __END_SHORT_JUMPS__(cond) if (cond) { _jitl.long_jumps = LONG_JUMPS_DEFAULT(_jitl); }
1061 #else
1062 # define __START_SHORT_JUMPS__(cond) /* empty */
1063 # define __END_SHORT_JUMPS__(cond) /* empty */
1064 #endif
1065 
1066 #ifdef USE_TINY_JUMPS
1067 /* A tiny jump has to be between -128 and 127 bytes. */
1068 # define __START_TINY_JUMPS__(cond) if (cond) { __START_SHORT_JUMPS__(1); _jitl.tiny_jumps = 1; }
1069 # define __END_TINY_JUMPS__(cond) if (cond) { _jitl.tiny_jumps = 0; __END_SHORT_JUMPS__(1); }
1070 # define __START_INNER_TINY__(cond) __END_SHORT_JUMPS__(cond); __START_TINY_JUMPS__(1);
1071 # define __END_INNER_TINY__(cond) __END_TINY_JUMPS__(1); __START_SHORT_JUMPS__(cond);
1072 #else
1073 # define __START_TINY_JUMPS__(cond) __START_SHORT_JUMPS__(cond)
1074 # define __END_TINY_JUMPS__(cond) __END_SHORT_JUMPS__(cond)
1075 # define __START_INNER_TINY__(cond) /* empty */
1076 # define __END_INNER_TINY__(cond) /* empty */
1077 #endif
1078 
1079 #define __START_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __START_TINY_JUMPS__(1); } else { __START_SHORT_JUMPS__(cond); }
1080 #define __END_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __END_TINY_JUMPS__(1); } else { __END_SHORT_JUMPS__(cond); }
1081 
1082 #if defined(JIT_X86_64) || defined(JIT_X86_SSE)
1083 # define __START_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
1084 # define __END_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
1085 #else
1086 # define __START_TINY_JUMPS_IF_COMPACT__(cond) __START_TINY_JUMPS__(cond)
1087 # define __END_TINY_JUMPS_IF_COMPACT__(cond) __END_TINY_JUMPS__(cond)
1088 #endif
1089 
1090 #ifdef jit_leai_l
1091 # define jit_fixnum_l(JIT_Rdest, JIT_Rsrc) jit_leai_l(JIT_Rdest, JIT_Rsrc, 1, 1)
1092 #else
1093 # define jit_fixnum_l(JIT_Rdest, JIT_Rsrc) (jit_lshi_l(JIT_Rdest, JIT_Rsrc, 1), \
1094                                             jit_ori_l(JIT_Rdest, JIT_Rdest, 0x1))
1095 #endif
1096 
1097 /*
1098  About short-jump mode:
1099 
1100    In
1101       jit_jmpi(code);
1102    or
1103       jit_blti_i(code, v);
1104    the generated instructions can depend on the relative location
1105    between the instruction address and the actual value. Do not enable
1106    short jumps if the relative offset can change between the initial
1107    sizing pass and the final pass. Of course, also don't enable short
1108    jumps if the jump is potentially long (i.e. more than +/- 2^15
1109    on PowerPC, or more than +/- 2^31 on x86_64). Otherwise, enable
1110    short-jump mode as much as possible.
1111 
1112    Tiny-jump mode is like short-jump mode, but the offset must be
1113    within +/- 2^7. Favor tiny jumps over short jumps when possible.
1114 
1115    On x86_64, short is the default, since "short" is pretty long.
1116    Short mode is never needed for jumps within a single allocated
1117    block (on the assumption that a single block of code can never get
1118    that long). Default-long mode must be enabled if allocated code
1119    blocks can be far apart.
1120 
1121    A jit_calli() is "medium": for x86_64, it is short unless
1122    default-long mode is enabled; otherwise, it is always
1123    long.
1124 
1125    All mz_finish() are long jumps. This is true even in default-short
1126    jump mode on x86_64, since the target is likely to be C code that
1127    is not necessarily close to JIT-allocate code.
1128 */
1129 
1130 /* A lightweight continuation is one that contains only frames from
1131    JIT-generated code. Use scheme_call_as_lightweight_continuation()
1132    to start such a continuation, and it must be exited from the JIT
1133    world by mz_finish_lwe().
1134 
1135    Use mz_finish_lwe(addr, tmp) for a call that may capture a lightweight
1136    continuation:
1137 
1138    * JIT_V1 does not contain a value that needs to change if the runstack moves.
1139      (Other JIT constraints imply that it isn't a pointer to GCable memory.)
1140 
1141    * Relevant thread-local state is confined to the C stack, runstack,
1142      mark stack, and tl_save_fp[2].
1143 
1144    * A pointer to the runstack can be used as a Scheme_Object** argument, but
1145      only when it points to MZ_RUNSTACK.
1146 
1147   The `tmp' is a `jit_insn *' that can be used by the expansion of the
1148   macro.
1149 
1150 */
1151 
1152 #ifdef MZ_USE_LWC
1153 # ifdef JIT_RUNSTACK_BASE
1154 #  define SAVE_RS_BASE_REG() jit_stxi_p((intptr_t)&((Scheme_Current_LWC *)0x0)->runstack_base_end, JIT_R0, JIT_RUNSTACK_BASE)
1155 # else
1156 #  define SAVE_RS_BASE_REG() (void)0
1157 # endif
1158 # define adjust_lwc_return_address(pc) ((jit_insn *)((char *)(pc) - jit_return_pop_insn_len()))
1159 # define mz_finish_lwe(d, refr) (mz_tl_ldi_p(JIT_R0, tl_scheme_current_lwc), \
1160                                  jit_stxi_p((intptr_t)&((Scheme_Current_LWC *)0x0)->frame_end, JIT_R0, JIT_FP), \
1161                                  jit_stxi_p((intptr_t)&((Scheme_Current_LWC *)0x0)->stack_end, JIT_R0, JIT_SP), \
1162                                  jit_stxi_p((intptr_t)&((Scheme_Current_LWC *)0x0)->saved_v1, JIT_R0, JIT_V1), \
1163                                  SAVE_RS_BASE_REG(),                    \
1164                                  refr = jit_patchable_movi_p(JIT_R1, jit_forward()), \
1165                                  jit_stxi_p((intptr_t)&((Scheme_Current_LWC *)0x0)->original_dest, JIT_R0, JIT_R1), \
1166                                  mz_finish(d),                          \
1167                                  jit_patch_movi(refr, adjust_lwc_return_address(_jit.x.pc)))
1168 #else
1169 # define mz_finish_lwe(d, refr) (refr = NULL, mz_finish(d))
1170 #endif
1171 
1172 #define mz_nonrs_finish_lwe(d, refr) mz_finish_lwe(d, refr)
1173 
1174 #if 0
1175 # define FOR_LOG(x) x
1176 # define LOG_IT(args) if (jitter->retain_start) { if (getenv("JITLOG")) { START_XFORM_SKIP; emit_indentation(jitter); printf args; END_XFORM_SKIP; } }
1177 static void emit_indentation(mz_jit_state *jitter)
1178 {
1179   int i = jitter->log_depth;
1180   while (i--) {
1181     printf("  ");
1182   }
1183 }
1184 #else
1185 # define FOR_LOG(x) /* empty */
1186 # define LOG_IT(args) /* empty */
1187 #endif
1188 
1189 /**********************************************************************/
1190 
1191 /* FP-generation code is written to work both with a FP
1192    stack (i387) and normal FP regsiters (everything else), though the
1193    double-agent operations that end in _fppop() and _fppush(). In
1194    FP-stack mode, the register names don't actually matter, but the
1195    pushes and pops much balance. The popping branch operations pop
1196    both arguments before branching. */
1197 
1198 #if !defined(MZ_USE_JIT_I386) || defined(JIT_X86_SSE)
1199 /* Not FP stack, so use normal variants. */
1200 #define DIRECT_FPR_ACCESS
1201 #define jit_movi_d_fppush(rd,immd)    jit_movi_d(rd,immd)
1202 #define jit_ldi_d_fppush(rd, is)      jit_ldi_d(rd, is)
1203 #define jit_ldr_d_fppush(rd, rs)      jit_ldr_d(rd, rs)
1204 #define jit_ldr_f_fppush(rd, rs)      jit_ldr_f(rd, rs)
1205 #define jit_ldxi_d_fppush(rd, rs, is) jit_ldxi_d(rd, rs, is)
1206 #define jit_ldxi_f_fppush(rd, rs, is) jit_ldxi_f(rd, rs, is)
1207 #define jit_ldxr_d_fppush(rd, rs, is) jit_ldxr_d(rd, rs, is)
1208 #define jit_addr_d_fppop(rd,s1,s2)    jit_addr_d(rd,s1,s2)
1209 #define jit_subr_d_fppop(rd,s1,s2)    jit_subr_d(rd,s1,s2)
1210 #define jit_subrr_d_fppop(rd,s1,s2)   jit_subrr_d(rd,s1,s2)
1211 #define jit_mulr_d_fppop(rd,s1,s2)    jit_mulr_d(rd,s1,s2)
1212 #define jit_divr_d_fppop(rd,s1,s2)    jit_divr_d(rd,s1,s2)
1213 #define jit_divrr_d_fppop(rd,s1,s2)   jit_divrr_d(rd,s1,s2)
1214 #define jit_negr_d_fppop(rd,rs)       jit_negr_d(rd,rs)
1215 #define jit_abs_d_fppop(rd,rs)        jit_abs_d(rd,rs)
1216 #define jit_sqrt_d_fppop(rd,rs)       jit_sqrt_d(rd,rs)
1217 #define jit_sti_d_fppop(id, rs)       jit_sti_d(id, rs)
1218 #define jit_str_d_fppop(id, rd)       jit_str_d(id, rd)
1219 #define jit_str_f_fppop(id, rd)       jit_str_f(id, rd)
1220 #define jit_stxi_d_fppop(id, rd, rs)  jit_stxi_d(id, rd, rs)
1221 #define jit_stxr_d_fppop(id, rd, rs)  jit_stxr_d(id, rd, rs)
1222 #define jit_bger_d_fppop(d, s1, s2)   jit_bger_d(d, s1, s2)
1223 #define jit_bantiger_d_fppop(d, s1, s2) jit_bantiger_d(d, s1, s2)
1224 #define jit_bler_d_fppop(d, s1, s2)   jit_bler_d(d, s1, s2)
1225 #define jit_bantiler_d_fppop(d, s1, s2) jit_bantiler_d(d, s1, s2)
1226 #define jit_bgtr_d_fppop(d, s1, s2)   jit_bgtr_d(d, s1, s2)
1227 #define jit_bantigtr_d_fppop(d, s1, s2) jit_bantigtr_d(d, s1, s2)
1228 #define jit_bltr_d_fppop(d, s1, s2)   jit_bltr_d(d, s1, s2)
1229 #define jit_bantiltr_d_fppop(d, s1, s2) jit_bantiltr_d(d, s1, s2)
1230 #define jit_beqr_d_fppop(d, s1, s2)   jit_beqr_d(d, s1, s2)
1231 #define jit_bantieqr_d_fppop(d, s1, s2) jit_bantieqr_d(d, s1, s2)
1232 #define jit_extr_l_d_fppush(rd, rs)   jit_extr_l_d(rd, rs)
1233 #define jit_roundr_d_l_fppop(rd, rs)  jit_roundr_d_l(rd, rs)
1234 #define jit_truncr_d_l_fppop(rd, rs)  jit_truncr_d_l(rd, rs)
1235 #define jit_movr_d_rel(rd, rs)        jit_movr_d(rd, rs)
1236 #define jit_movr_d_fppush(rd, rs)        jit_movr_d(rd, rs)
1237 #define R0_FP_ADJUST(x) /* empty */
1238 #define JIT_FPR_0(r) JIT_FPR(r)
1239 #define JIT_FPR_1(r) JIT_FPR(r)
1240 #else
1241 #define R0_FP_ADJUST(x) x
1242 #define JIT_FPR_0(r) JIT_FPR0
1243 #define JIT_FPR_1(r) JIT_FPR1
1244 #endif
1245 
1246 #ifdef MZ_LONG_DOUBLE
1247 #define JIT_FPU_FPR_0(r) JIT_FPU_FPR0
1248 #define JIT_FPU_FPR_1(r) JIT_FPU_FPR1
1249 #endif
1250 
1251 #if defined(MZ_USE_JIT_I386) && (!defined(JIT_X86_64) || !defined(JIT_X86_SSE))
1252 /* This is better than lightning's x87 or 32-bit SSE jit_movi_d[_fppush](): */
1253 # define mz_movi_d_fppush(rd,immd,tmp)    { GC_CAN_IGNORE void *addr; \
1254                                             addr = scheme_mz_retain_double(jitter, immd); \
1255                                             (void)jit_patchable_movi_p(tmp, addr);        \
1256                                             jit_ldr_d_fppush(rd, tmp); }
1257 #else
1258 # define mz_movi_d_fppush(rd,immd,tmp)    jit_movi_d_fppush(rd,immd)
1259 #endif
1260 
1261 #ifdef MZ_LONG_DOUBLE
1262 # define mz_fpu_movi_ld_fppush(rd,immd,tmp)    { GC_CAN_IGNORE void *addr; \
1263                                                  addr = scheme_mz_retain_long_double(jitter, immd); \
1264                                                  (void)jit_patchable_movi_p(tmp, addr);        \
1265                                                  jit_fpu_ldr_ld_fppush(rd, tmp); }
1266 #endif
1267 
1268 
1269 /**********************************************************************/
1270 
1271 /* Does boxing a type require registers, possibly GC, etc.? */
1272 #ifdef MZ_LONG_DOUBLE
1273 #define JIT_TYPE_NEEDS_BOXING(t) ((t) == SCHEME_LOCAL_TYPE_FLONUM \
1274                                   || (t) == SCHEME_LOCAL_TYPE_EXTFLONUM)
1275 
1276 #else
1277 #define JIT_TYPE_NEEDS_BOXING(t) ((t) == SCHEME_LOCAL_TYPE_FLONUM)
1278 #endif
1279 
1280 /**********************************************************************/
1281 
1282 #ifdef MZ_USE_FUTURES
1283 # define mz_prepare_direct_prim(n) mz_prepare(n)
1284 # define mz_finishr_direct_prim(reg, proc, refr) (jit_pusharg_p(reg), (void)mz_finish_lwe(proc, refr))
1285 # define mz_direct_only(p) /* skip this arg, so that total count <= 3 args */
1286 /* Inlines check of scheme_use_rtcall: */
1287 # define mz_generate_direct_prim(direct_only, first_arg, reg, prim_indirect) \
1288   { \
1289      GC_CAN_IGNORE jit_insn *refdirect, *refcont, *refitsr;      \
1290      int argstate; \
1291      jit_save_argstate(argstate); \
1292      mz_tl_ldi_i(JIT_R0, tl_scheme_use_rtcall); \
1293      __START_TINY_JUMPS__(1); \
1294      refdirect = jit_beqi_i(jit_forward(), JIT_R0, 0); \
1295      first_arg; \
1296      mz_finishr_direct_prim(reg, prim_indirect, refitsr);       \
1297      refcont = jit_jmpi(jit_forward()); \
1298      CHECK_LIMIT(); \
1299      mz_patch_branch(refdirect); \
1300      jit_restore_argstate(argstate); \
1301      direct_only; \
1302      first_arg; \
1303      mz_finishr(reg); \
1304      mz_patch_ucbranch(refcont); \
1305      __END_TINY_JUMPS__(1); \
1306   }
1307 # define mz_finish_prim_lwe(prim, refr) \
1308     { \
1309       GC_CAN_IGNORE jit_insn *refdirect, *refdone; \
1310       int argstate; \
1311       __START_TINY_JUMPS__(1); \
1312       jit_save_argstate(argstate); \
1313       mz_tl_ldi_i(JIT_R0, tl_scheme_use_rtcall); \
1314       refdirect = jit_beqi_i(jit_forward(), JIT_R0, 0); \
1315       (void)mz_finish_lwe(prim, refr); \
1316       refdone = jit_jmpi(jit_forward()); \
1317       jit_restore_argstate(argstate); \
1318       mz_patch_branch(refdirect); \
1319       (void)mz_finish(prim); \
1320       mz_patch_ucbranch(refdone); \
1321       __END_TINY_JUMPS__(1); \
1322     }
1323 #else
1324 /* futures not enabled */
1325 # define mz_prepare_direct_prim(n) mz_prepare(n)
1326 # define mz_finishr_direct_prim(reg, proc) mz_finishr(reg)
1327 # define mz_direct_only(p) p
1328 # define ts_scheme_on_demand scheme_on_demand
1329 # define ts_prepare_retry_alloc prepare_retry_alloc
1330 # define ts_make_fsemaphore scheme_make_fsemaphore
1331 # define mz_generate_direct_prim(direct_only, first_arg, reg, prim_indirect) \
1332   (mz_direct_only(direct_only), first_arg, mz_finishr_direct_prim(reg, prim_indirect))
1333 # define mz_finish_prim_lwe(prim, refr) (void)mz_finish_lwe(prim, refr)
1334 #endif
1335 
1336 /**********************************************************************/
1337 
1338 #define IS_NAMED_PRIM(p, nm) (!strcmp(((Scheme_Primitive_Proc *)p)->name, nm))
1339 
1340 /**********************************************************************/
1341 /*                             jitstate                               */
1342 /**********************************************************************/
1343 
1344 #define JIT_BUFFER_PAD_SIZE 200
1345 
1346 #define PAST_LIMIT() ((uintptr_t)jit_get_raw_ip() > (uintptr_t)jitter->limit)
1347 #define CHECK_LIMIT() if (PAST_LIMIT()) return past_limit(jitter, __FILE__, __LINE__);
1348 #if 1
1349 # define past_limit(j, f, l) 0
1350 #else
past_limit(mz_jit_state * jitter,const char * file,int line)1351 static int past_limit(mz_jit_state *jitter, const char *file, int line)
1352 {
1353   if (((uintptr_t)jit_get_raw_ip() > (uintptr_t)jitter->limit + JIT_BUFFER_PAD_SIZE)
1354       || (jitter->retain_start)) {
1355     printf("way past %s %d\n", file, line); abort();
1356   }
1357   return 0;
1358 }
1359 #endif
1360 
1361 /* Use CHECK_NESTED_GENERATE() after a nested call to scheme_generate_one()
1362    or after getting a shared code pointer that may be generated by another
1363    place: */
1364 #ifdef SET_DEFAULT_LONG_JUMPS
1365 extern int scheme_check_long_mode(int long_mode);
1366 # define CHECK_NESTED_GENERATE() if (scheme_check_long_mode(_jitl.long_jumps_default)) return 0;
1367 #else
1368 # define CHECK_NESTED_GENERATE() /* empty */
1369 #endif
1370 
1371 void *scheme_generate_one(mz_jit_state *old_jitter,
1372 			  Generate_Proc generate,
1373 			  void *data,
1374 			  int gcable,
1375 			  void *save_ptr,
1376 			  Scheme_Native_Lambda *ndata);
1377 int scheme_mz_is_closure(mz_jit_state *jitter, int i, int arity, int *_flags);
1378 void scheme_mz_runstack_saved(mz_jit_state *jitter);
1379 int scheme_mz_runstack_restored(mz_jit_state *jitter);
1380 void scheme_mz_flostack_restore(mz_jit_state *jitter, int space, int pos, int gen, int adj);
1381 int scheme_mz_flostack_save(mz_jit_state *jitter, int *pos);
1382 int scheme_mz_compute_runstack_restored(mz_jit_state *jitter, int adj, int skip);
1383 int scheme_mz_retain_it(mz_jit_state *jitter, void *v);
1384 double *scheme_mz_retain_double(mz_jit_state *jitter, double d);
1385 #ifdef MZ_LONG_DOUBLE
1386 long_double *scheme_mz_retain_long_double(mz_jit_state *jitter, long_double d);
1387 #endif
1388 int scheme_mz_remap_it(mz_jit_state *jitter, int i);
1389 void scheme_mz_pushr_p_it(mz_jit_state *jitter, int reg);
1390 void scheme_mz_popr_p_it(mz_jit_state *jitter, int reg, int discard);
1391 void scheme_extra_pushed(mz_jit_state *jitter, int n);
1392 void scheme_extra_popped(mz_jit_state *jitter, int n);
1393 void scheme_mz_need_space(mz_jit_state *jitter, int need_extra);
1394 int scheme_stack_safety(mz_jit_state *jitter, int cnt, int offset);
1395 #ifdef USE_FLONUM_UNBOXING
1396 int scheme_mz_flostack_pos(mz_jit_state *jitter, int i);
1397 #endif
1398 void scheme_mz_load_retained(mz_jit_state *jitter, int rs, void *o);
1399 
1400 void scheme_mz_runstack_skipped(mz_jit_state *jitter, int n);
1401 void scheme_mz_runstack_unskipped(mz_jit_state *jitter, int n);
1402 void scheme_mz_runstack_pushed(mz_jit_state *jitter, int n);
1403 void scheme_mz_runstack_closure_pushed(mz_jit_state *jitter, int a, int flags);
1404 void scheme_mz_runstack_flonum_pushed(mz_jit_state *jitter, int pos);
1405 void scheme_mz_runstack_popped(mz_jit_state *jitter, int n);
1406 int scheme_mz_try_runstack_pop(mz_jit_state *jitter, int n);
1407 
1408 #define mz_runstack_skipped(j, n) scheme_mz_runstack_skipped(j, n)
1409 #define mz_runstack_unskipped(j, n) scheme_mz_runstack_unskipped(j, n)
1410 #define mz_runstack_pushed(j, n) scheme_mz_runstack_pushed(j, n)
1411 #define mz_runstack_closure_pushed(j, n, f) scheme_mz_runstack_closure_pushed(j, n, f)
1412 #define mz_runstack_flonum_pushed(j, n) scheme_mz_runstack_flonum_pushed(j, n)
1413 #define mz_runstack_popped(j, n) scheme_mz_runstack_popped(j, n)
1414 #define mz_try_runstack_pop(j, n) scheme_mz_try_runstack_pop(j, n)
1415 
1416 typedef struct {
1417   int unbox;
1418 #ifdef MZ_LONG_DOUBLE
1419   int unbox_extflonum;
1420 #endif
1421 } mz_jit_unbox_state;
1422 
1423 void scheme_mz_unbox_save(mz_jit_state *jitter, mz_jit_unbox_state *r);
1424 void scheme_mz_unbox_restore(mz_jit_state *jitter, mz_jit_unbox_state *r);
1425 
1426 /**********************************************************************/
1427 /*                             jitinline                              */
1428 /**********************************************************************/
1429 
1430 int scheme_inlined_unary_prim(Scheme_Object *o, Scheme_Object *_app, mz_jit_state *jitter);
1431 int scheme_inlined_binary_prim(Scheme_Object *o, Scheme_Object *_app, mz_jit_state *jitter);
1432 int scheme_inlined_nary_prim(Scheme_Object *o, Scheme_Object *_app, mz_jit_state *jitter);
1433 int scheme_generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, int is_tail, int multi_ok,
1434 				  Branch_Info *for_branch, int branch_short, int result_ignored,
1435                                   int dest);
1436 int scheme_generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, int is_tail, int multi_ok,
1437 				   Branch_Info *for_branch, int branch_short, int result_ignored,
1438                                    int dest);
1439 int scheme_generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int is_tail, int multi_ok,
1440                                  Branch_Info *for_branch, int branch_short, int result_ignored,
1441                                  int dest);
1442 int scheme_generate_inlined_test(mz_jit_state *jitter, Scheme_Object *obj, int branch_short,
1443                                  Branch_Info *for_branch);
1444 int scheme_generate_cons_alloc(mz_jit_state *jitter, int rev, int inline_retry, int known_list, int dest);
1445 int scheme_generate_struct_alloc(mz_jit_state *jitter, int num_args,
1446                                  int inline_slow, int pop_and_jump, int check_proc,
1447                                  int is_tail, int multi_ok, int dest);
1448 int scheme_generate_two_args(Scheme_Object *rand1, Scheme_Object *rand2, mz_jit_state *jitter,
1449                              int order_matters, int skipped);
1450 
1451 /**********************************************************************/
1452 /*                             jitalloc                               */
1453 /**********************************************************************/
1454 
1455 #ifdef CAN_INLINE_ALLOC
1456 int scheme_inline_alloc(mz_jit_state *jitter, int amt, Scheme_Type ty, int flags,
1457 			int keep_r0_r1, int keep_fpr1, int inline_retry
1458                         , int keep_extfpr1);
1459 int scheme_generate_alloc_retry(mz_jit_state *jitter, int i);
1460 #else
1461 Scheme_Object *scheme_jit_make_list(GC_CAN_IGNORE Scheme_Object **rs, intptr_t n);
1462 Scheme_Object *scheme_jit_make_list_star(GC_CAN_IGNORE Scheme_Object **rs, intptr_t n);
1463 Scheme_Object *scheme_jit_make_vector(intptr_t n);
1464 Scheme_Object *scheme_jit_make_one_element_vector(Scheme_Object *a);
1465 Scheme_Object *scheme_jit_make_two_element_vector(Scheme_Object *a, Scheme_Object *b);
1466 Scheme_Object *scheme_jit_make_ivector(intptr_t n);
1467 Scheme_Object *scheme_jit_make_one_element_ivector(Scheme_Object *a);
1468 Scheme_Object *scheme_jit_make_two_element_ivector(Scheme_Object *a, Scheme_Object *b);
1469 #endif
1470 
1471 /**********************************************************************/
1472 /*                             jitarith                               */
1473 /**********************************************************************/
1474 
1475 int scheme_jit_is_fixnum(Scheme_Object *rand);
1476 int scheme_can_unbox_inline(Scheme_Object *obj, int fuel, int regs, int unsafely, int extfl);
1477 int scheme_can_unbox_directly(Scheme_Object *obj, int extfl);
1478 int scheme_generate_unboxing(mz_jit_state *jitter, int target);
1479 int scheme_generate_pop_unboxed(mz_jit_state *jitter);
1480 int scheme_generate_nary_arith(mz_jit_state *jitter, Scheme_App_Rec *app,
1481                                int arith, int cmp, Branch_Info *for_branch, int branch_short,
1482                                int unsafe_fx, int unsafe_fl,
1483                                int dest);
1484 int scheme_generate_alloc_double(mz_jit_state *jitter, int inline_retry, int dest);
1485 int scheme_generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
1486 			  int orig_args, int arith, int cmp, int v,
1487                           Branch_Info *for_branch, int branch_short,
1488                           int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow,
1489                           int dest);
1490 
1491 #ifdef MZ_LONG_DOUBLE
1492 int scheme_generate_alloc_long_double(mz_jit_state *jitter, int inline_retry, int dest);
1493 int scheme_generate_extflonum_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2,
1494                                       int orig_args, int arith, int cmp, int v,
1495                                       Branch_Info *for_branch, int branch_short, int unsafe_fx, int unsafe_extfl,
1496                                       GC_CAN_IGNORE jit_insn *overflow_refslow, int dest);
1497 #endif
1498 
1499 int scheme_generate_alloc_X_double(mz_jit_state *jitter, int inline_retry, int dest, int extfl);
1500 
1501 /**********************************************************************/
1502 /*                              jitcall                               */
1503 /**********************************************************************/
1504 
1505 typedef struct jit_direct_arg jit_direct_arg;
1506 
1507 void *scheme_generate_shared_call(int num_rands, mz_jit_state *old_jitter, int multi_ok, int result_ignored,
1508                                   int is_tail, int direct_prim, int direct_native, int nontail_self, int unboxed_args);
1509 void scheme_ensure_retry_available(mz_jit_state *jitter, int multi_ok, int result_ignored);
1510 int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_rands, int num_pushes,
1511 			mz_jit_state *jitter, int is_tail, int multi_ok, int ignored_result,
1512                         int no_call);
1513 int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
1514                               int is_inline, Scheme_Native_Closure *direct_to_code, jit_direct_arg *direct_arg,
1515                               Scheme_Lambda *direct_data);
1516 int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
1517 				  int multi_ok, int result_ignored, int nontail_self, int pop_and_jump,
1518                                   int is_inlined, int unboxed_args, jit_insn *reftop);
1519 int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native);
1520 int scheme_generate_finish_apply(mz_jit_state *jitter);
1521 int scheme_generate_finish_multi_apply(mz_jit_state *jitter);
1522 int scheme_generate_finish_tail_apply(mz_jit_state *jitter);
1523 void scheme_jit_register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol);
1524 void scheme_jit_register_helper_func(mz_jit_state *jitter, void *code, int gcable);
1525 #ifdef MZ_USE_FUTURES
1526 Scheme_Object *scheme_noncm_prim_indirect(Scheme_Prim proc, int argc);
1527 Scheme_Object *scheme_prim_indirect(Scheme_Primitive_Closure_Proc proc, int argc, Scheme_Object *self);
1528 #endif
1529 int scheme_generate_force_value_same_mark(mz_jit_state *jitter);
1530 
1531 /**********************************************************************/
1532 /*                             jitstack                               */
1533 /**********************************************************************/
1534 
1535 void scheme_jit_add_symbol(uintptr_t start, uintptr_t end, void *value, int gc_able);
1536 void *scheme_decrement_cache_stack_pos(void *p);
1537 void scheme_register_stack_cache_stack(void);
1538 #ifdef MZ_PRECISE_GC
1539 void scheme_jit_release_native_code(void *fnlized, void *p);
1540 #endif
1541 
1542 /**********************************************************************/
1543 /*                            jitcommon                               */
1544 /**********************************************************************/
1545 
1546 int scheme_do_generate_common(mz_jit_state *jitter, void *_data);
1547 int scheme_do_generate_more_common(mz_jit_state *jitter, void *_data);
1548 
1549 int scheme_save_struct_temp(mz_jit_state *jitter, int reg);
1550 int scheme_restore_struct_temp(mz_jit_state *jitter, int reg);
1551 int scheme_generate_struct_op(mz_jit_state *jitter, int kind, int for_branch,
1552                               Branch_Info *branch_info, int branch_short,
1553                               int result_ignored,
1554                               int check_proc, int check_arg_fixnum,
1555                               int type_pos, int field_pos,
1556                               int authentic, int type_unpacked,
1557                               int pop_and_jump,
1558                               jit_insn *refslow, jit_insn *refslow2,
1559                               jit_insn *bref_false, jit_insn *bref_true);
1560 
1561 /**********************************************************************/
1562 /*                               jit                                  */
1563 /**********************************************************************/
1564 
1565 int scheme_generate_non_tail(Scheme_Object *obj, mz_jit_state *jitter, int multi_ok, int need_ends, int ignored);
1566 int scheme_generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitter, int multi_ok, int need_ends, int ignored,
1567                                          Branch_Info *for_branch);
1568 int scheme_generate_non_tail_for_values(Scheme_Object *obj, mz_jit_state *jitter, int multi_ok, int need_ends, int ignored,
1569                                         Expected_Values_Info *for_values);
1570 int scheme_generate(Scheme_Object *obj, mz_jit_state *jitter, int tail_ok, int wcm_may_replace, int multi_ok, int target,
1571                     Branch_Info *for_branch, Expected_Values_Info *for_values);
1572 int scheme_generate_unboxed(Scheme_Object *obj, mz_jit_state *jitter, int inlined_ok, int unbox_anyway);
1573 
1574 void scheme_generate_function_prolog(mz_jit_state *jitter);
1575 
1576 #ifdef USE_FLONUM_UNBOXING
1577 int scheme_generate_flonum_local_unboxing(mz_jit_state *jitter, int push, int no_store, int extfl);
1578 int scheme_generate_flonum_local_boxing(mz_jit_state *jitter, int pos, int offset, int target, int extfl);
1579 #endif
1580 int scheme_generate_non_tail_mark_pos_prefix(mz_jit_state *jitter);
1581 void scheme_generate_non_tail_mark_pos_suffix(mz_jit_state *jitter);
1582 
1583 Scheme_Object **scheme_on_demand(Scheme_Object **argv);
1584 Scheme_Object **scheme_on_demand_with_args(Scheme_Object **in_argv, Scheme_Object **argv, int argv_delta);
1585 
1586 void scheme_jit_allocate_values(int count, Scheme_Thread *p);
1587 Scheme_Structure *scheme_jit_allocate_structure(int argc, Scheme_Struct_Type *stype);
1588 
1589 void scheme_prepare_branch_jump(mz_jit_state *jitter, Branch_Info *for_branch);
1590 void scheme_branch_for_true(mz_jit_state *jitter, Branch_Info *for_branch);
1591 void scheme_add_or_patch_branch_true_uc(mz_jit_state *jitter, Branch_Info *for_branch, jit_insn *ref);
1592 void scheme_add_or_patch_branch_true_movi(mz_jit_state *jitter, Branch_Info *for_branch, jit_insn *ref);
1593 void scheme_add_branch_false(Branch_Info *for_branch, jit_insn *ref);
1594 void scheme_add_branch_false_movi(Branch_Info *for_branch, jit_insn *ref);
1595 
1596 int scheme_ok_to_move_local(Scheme_Object *obj);
1597 int scheme_ok_to_delay_local(Scheme_Object *obj);
1598 int scheme_can_delay_and_avoids_r1(Scheme_Object *obj);
1599 int scheme_can_delay_and_avoids_r1_r2(Scheme_Object *obj);
1600 int scheme_is_constant_and_avoids_r1(Scheme_Object *obj);
1601 int scheme_is_relatively_constant_and_avoids_r1_maybe_fp(Scheme_Object *obj, Scheme_Object *wrt,
1602                                                          int fp_ok, int extfl);
1603 int scheme_is_relatively_constant_and_avoids_r1(Scheme_Object *obj, Scheme_Object *wrt);
1604 int scheme_needs_only_target_register(Scheme_Object *obj, int and_can_reorder);
1605 int scheme_is_noncm(Scheme_Object *a, mz_jit_state *jitter, int depth, int stack_start);
1606 int scheme_is_simple(Scheme_Object *obj, int depth, int just_markless, mz_jit_state *jitter, int stack_start);
1607 #define INIT_SIMPLE_DEPTH 10
1608 int scheme_is_non_gc(Scheme_Object *obj, int depth);
1609 
1610 #ifdef USE_FLONUM_UNBOXING
1611 int scheme_jit_check_closure_flonum_bit(Scheme_Lambda *data, int pos, int delta);
1612 # define CLOSURE_ARGUMENT_IS_FLONUM(data, pos) scheme_jit_check_closure_flonum_bit(data, pos, 0)
1613 # define CLOSURE_CONTENT_IS_FLONUM(data, pos) scheme_jit_check_closure_flonum_bit(data, pos, data->num_params)
1614 int scheme_jit_check_closure_extflonum_bit(Scheme_Lambda *data, int pos, int delta);
1615 # define CLOSURE_ARGUMENT_IS_EXTFLONUM(data, pos) scheme_jit_check_closure_extflonum_bit(data, pos, 0)
1616 # define CLOSURE_CONTENT_IS_EXTFLONUM(data, pos) scheme_jit_check_closure_extflonum_bit(data, pos, data->num_params)
1617 #endif
1618 
1619 Scheme_Object *scheme_extract_global(Scheme_Object *o, Scheme_Native_Closure *nc, int local_only);
1620 Scheme_Object *scheme_extract_closure_local(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int get_constant);
1621 Scheme_Object *scheme_specialize_to_constant(Scheme_Object *obj, mz_jit_state *jitter, int extra_push, int extract_static);
1622 
1623 void scheme_jit_register_traversers(void);
1624 #ifdef MZ_USE_LWC
1625 Scheme_Object *scheme_jit_continuation_apply_install(Apply_LWC_Args *args);
1626 #endif
1627 
1628 
1629 /**********************************************************************/
1630 
1631 /* Arithmetic operation codes. Used in jitarith.c and jitinline.c. */
1632 
1633 /*  +, add1, fx+, unsafe-fx+, fl+, unsafe-fl+ */
1634 #define ARITH_ADD      1
1635 /*  -, sub1, fx-, unsafe-fx-, fl-, unsafe-fl- */
1636 #define ARITH_SUB     -1
1637 /*  *, fx*, unsafe-fx*, fl*, unsafe-fl* */
1638 #define ARITH_MUL      2
1639 /*  /, fl/, unsafe-fl/ */
1640 #define ARITH_DIV     -2
1641 /*  quotient, fxquotient, unsafe-fxquotient */
1642 #define ARITH_QUOT    -3
1643 /*  remainder, fxremainder, unsafe-fxremainder */
1644 #define ARITH_REM     -4
1645 /*  modulo, fxmodulo, unsafe-fxmodulo */
1646 #define ARITH_MOD     -5
1647 /*  bitwise-and, fxand, unsafe-fxand */
1648 #define ARITH_AND      3
1649 /*  bitwise-ior, fxior, unsafe-fxior */
1650 #define ARITH_IOR      4
1651 /*  bitwise-xor, fxxor, unsafe-fxxor */
1652 #define ARITH_XOR      5
1653 /*  fxlshift, unsafe-fxlshift */
1654 #define ARITH_LSH      6
1655 /*  fxrshift, unsafe-fxrshift */
1656 #define ARITH_RSH     -6
1657 /*  bitwise-not, fxnot, unsafe-fxnot */
1658 #define ARITH_NOT      7
1659 /*  min, fxmin, unsafe-fxmin, flmin, unsafe-flmin */
1660 #define ARITH_MIN      9
1661 /*  max, fxmax, unsafe-fxmax, flmax, unsafe-flmax */
1662 #define ARITH_MAX      10
1663 /*  abs, fxabs, unsafe-fxabs, flabs, unsafe-flabs */
1664 #define ARITH_ABS      11
1665 /*  exact->inexact, real->double-flonum, unsafe-fx->fl, ->fl, fx->fl */
1666 #define ARITH_EX_INEX  12
1667 /*  sqrt, flsqrt, unsafe-flsqrt */
1668 #define ARITH_SQRT     13
1669 /*  flfloor, flceiling, flround, fltruncate, flsin,  flcos, fltan, */
1670 /*  flasin, flacos, flatan, flexp, fllog */
1671 #define ARITH_FLUNOP   14
1672 /*  inexact->exact, fl->exact-integer */
1673 #define ARITH_INEX_EX  15
1674 /*  fl->fx, unsafe-fl->fx, extfl->fx, unsafe-extfl->fx */
1675 #define ARITH_INEX_TRUNC_EX  16
1676 /*  flexpt */
1677 #define ARITH_EXPT     17
1678 /*  fx+/wraparound, unsafe-fl+/wraparound */
1679 #define ARITH_ADD_WRAP      18
1680 /*  fx-/wraparound, unsafe-fx-/wraparound */
1681 #define ARITH_SUB_WRAP     -18
1682 /*  fx+/wraparound, unsafe-fx+/wraparound */
1683 #define ARITH_MUL_WRAP      19
1684 /*  fxlshift/wraparound, unsafe-fxlshift/wraparound */
1685 #define ARITH_LSH_WRAP      20
1686 /*  arithmetic-shift */
1687 #define ARITH_SH            21
1688 
1689 /* Comparison codes. Used in jitarith.c and jitinline.c. */
1690 
1691 /*  zero?, =, fx=, unsafe-fx=, fl=, unsafe-fl= */
1692 #define CMP_EQUAL  0
1693 /*  >=, fx>=, unsafe-fx>=, fl>=, unsafe-fl>= */
1694 #define CMP_GEQ    1
1695 /*  <=, fx<=, unsafe-fx<=, fl<=, unsafe-fl<= */
1696 #define CMP_LEQ   -1
1697 /*  >, fx>, unsafe-fx>, fl>, unsafe-fl>, positive? */
1698 #define CMP_GT     2
1699 /*  <, fx<, unsafe-fx<, fl<, unsafe-fl<, negative? */
1700 #define CMP_LT    -2
1701 /*  bitwise-bit-test? */
1702 #define CMP_BIT    3
1703 /*  even? */
1704 #define CMP_EVENP  4
1705 /*  odd? */
1706 #define CMP_ODDP  -4
1707 
1708 /**********************************************************************/
1709 
1710 #define INLINE_STRUCT_PROC_PRED 1
1711 #define INLINE_STRUCT_PROC_GET  2
1712 #define INLINE_STRUCT_PROC_SET  3
1713 #define INLINE_STRUCT_PROC_PROP_GET 4
1714 #define INLINE_STRUCT_PROC_PROP_GET_W_DEFAULT 5
1715 #define INLINE_STRUCT_PROC_PROP_PRED 6
1716 #define INLINE_STRUCT_PROC_CONSTR 7
1717