1 /*
2  * Copyright (C) 2012-2020  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19 
20 #if HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdio.h>
28 
29 #include "../lightening.h"
30 
31 #define ASSERT(x) do { if (!(x)) abort(); } while (0)
32 
33 #if defined(__GNUC__)
34 # define maybe_unused           __attribute__ ((unused))
35 # define UNLIKELY(exprn) __builtin_expect(exprn, 0)
36 #else
37 # define maybe_unused           /**/
38 # define UNLIKELY(exprn) exprn
39 #endif
40 
41 union jit_pc
42 {
43   uint8_t *uc;
44   uint16_t *us;
45   uint32_t *ui;
46   uint64_t *ul;
47   intptr_t w;
48   uintptr_t uw;
49 };
50 
51 #ifdef JIT_NEEDS_LITERAL_POOL
52 struct jit_literal_pool_entry
53 {
54   jit_reloc_t reloc;
55   uintptr_t value;
56 };
57 
58 struct jit_literal_pool
59 {
60   uint32_t deadline;
61   uint32_t size;
62   uint32_t capacity;
63   struct jit_literal_pool_entry entries[];
64 };
65 #endif // JIT_NEEDS_LITERAL_POOL
66 
67 struct jit_state
68 {
69   union jit_pc pc;
70   uint8_t *start;
71   uint8_t *last_instruction_start;
72   uint8_t *limit;
73   uint8_t temp_gpr_saved;
74   uint8_t temp_fpr_saved;
75   uint8_t overflow;
76   uint8_t emitting_data;
77   int frame_size; // Used to know when to align stack.
78 #ifdef JIT_NEEDS_LITERAL_POOL
79   struct jit_literal_pool *pool;
80 #endif
81   void* (*alloc)(size_t);
82   void (*free)(void*);
83 };
84 
85 static jit_bool_t jit_get_cpu(void);
86 static jit_bool_t jit_init(jit_state_t *);
87 static void jit_flush(void *fptr, void *tptr);
88 static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc,
89                             jit_pointer_t addr);
90 static void* bless_function_pointer(void *ptr);
91 
92 struct abi_arg_iterator;
93 
94 #ifdef JIT_NEEDS_LITERAL_POOL
95 static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit,
96                                                    size_t capacity);
97 static void reset_literal_pool(jit_state_t *_jit,
98                                struct jit_literal_pool *pool);
99 static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
100                                       uint8_t max_offset_bits);
101 static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src);
102 static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src,
103                                   uintptr_t value);
104 enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED };
105 static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard);
106 
107 static int32_t read_jmp_offset(uint32_t *loc);
108 static int offset_in_jmp_range(ptrdiff_t offset, int flags);
109 static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
110 static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset);
111 static int32_t read_jcc_offset(uint32_t *loc);
112 static int offset_in_jcc_range(ptrdiff_t offset, int flags);
113 static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset);
114 static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset);
115 static void patch_veneer(uint32_t *loc, jit_pointer_t addr);
116 static int32_t read_load_from_pool_offset(uint32_t *loc);
117 #endif
118 
119 static jit_bool_t is_fpr_arg(enum jit_operand_abi arg);
120 static jit_bool_t is_gpr_arg(enum jit_operand_abi arg);
121 static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
122                                    const jit_operand_t *args);
123 static void next_abi_arg(struct abi_arg_iterator *iter,
124                          jit_operand_t *arg);
125 
126 jit_bool_t
init_jit(void)127 init_jit(void)
128 {
129   return jit_get_cpu ();
130 }
131 
132 jit_state_t *
jit_new_state(void * (* alloc_fn)(size_t),void (* free_fn)(void *))133 jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*))
134 {
135   if (!alloc_fn) alloc_fn = malloc;
136   if (!free_fn) free_fn = free;
137 
138   jit_state_t *_jit = alloc_fn (sizeof (*_jit));
139   if (!_jit)
140     abort ();
141 
142   memset(_jit, 0, sizeof (*_jit));
143   _jit->alloc = alloc_fn;
144   _jit->free = free_fn;
145 
146   if (!jit_init (_jit)) {
147 #ifdef JIT_NEEDS_LITERAL_POOL
148     free_fn (_jit->pool);
149 #endif
150     free_fn (_jit);
151     return NULL;
152   }
153 
154 #ifdef JIT_NEEDS_LITERAL_POOL
155   _jit->pool = alloc_literal_pool(_jit, 0);
156 #endif
157 
158   return _jit;
159 }
160 
161 void
jit_destroy_state(jit_state_t * _jit)162 jit_destroy_state(jit_state_t *_jit)
163 {
164 #ifdef JIT_NEEDS_LITERAL_POOL
165   _jit->free (_jit->pool);
166 #endif
167   _jit->free (_jit);
168 }
169 
170 jit_pointer_t
jit_address(jit_state_t * _jit)171 jit_address(jit_state_t *_jit)
172 {
173   ASSERT (_jit->start);
174   jit_pointer_t ret = _jit->pc.uc;
175   return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret);
176 }
177 
178 void
jit_begin(jit_state_t * _jit,uint8_t * buf,size_t length)179 jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
180 {
181   ASSERT (!_jit->start);
182 
183   _jit->pc.uc = _jit->start = buf;
184   _jit->limit = buf + length;
185   _jit->overflow = 0;
186   _jit->frame_size = 0;
187   _jit->emitting_data = 0;
188 #if JIT_NEEDS_LITERAL_POOL
189   ASSERT(_jit->pool->size == 0);
190   _jit->pool->deadline = length;
191 #endif
192 }
193 
194 jit_bool_t
jit_has_overflow(jit_state_t * _jit)195 jit_has_overflow(jit_state_t *_jit)
196 {
197   ASSERT (_jit->start);
198   return _jit->overflow;
199 }
200 
201 void
jit_reset(jit_state_t * _jit)202 jit_reset(jit_state_t *_jit)
203 {
204   ASSERT (_jit->start);
205   _jit->pc.uc = _jit->start = _jit->limit = NULL;
206   _jit->overflow = 0;
207   _jit->frame_size = 0;
208   _jit->emitting_data = 0;
209 #ifdef JIT_NEEDS_LITERAL_POOL
210   reset_literal_pool(_jit, _jit->pool);
211 #endif
212 }
213 
214 jit_function_pointer_t
jit_address_to_function_pointer(jit_pointer_t p)215 jit_address_to_function_pointer(jit_pointer_t p)
216 {
217   return bless_function_pointer(p);
218 }
219 
220 void*
jit_end(jit_state_t * _jit,size_t * length)221 jit_end(jit_state_t *_jit, size_t *length)
222 {
223 #ifdef JIT_NEEDS_LITERAL_POOL
224   if (_jit->pool->size)
225     emit_literal_pool(_jit, NO_GUARD_NEEDED);
226 #endif
227 
228   if (_jit->overflow)
229     return NULL;
230 
231   uint8_t *start = _jit->start;
232   uint8_t *end = _jit->pc.uc;
233 
234   ASSERT(start);
235   ASSERT(start <= end);
236   ASSERT(end <= _jit->limit);
237   ASSERT(!_jit->emitting_data);
238 
239   jit_flush (start, end);
240 
241   if (length) {
242     *length = end - start;
243   }
244 
245   _jit->pc.uc = _jit->start = _jit->limit = NULL;
246   _jit->overflow = 0;
247   _jit->frame_size = 0;
248 #ifdef JIT_NEEDS_LITERAL_POOL
249   reset_literal_pool(_jit, _jit->pool);
250 #endif
251 
252   return jit_address_to_function_pointer(start);
253 }
254 
255 static int
is_power_of_two(unsigned x)256 is_power_of_two (unsigned x)
257 {
258   return x && !(x & (x-1));
259 }
260 
261 static jit_gpr_t
get_temp_gpr(jit_state_t * _jit)262 get_temp_gpr(jit_state_t *_jit)
263 {
264   switch(_jit->temp_gpr_saved++)
265     {
266     case 0:
267       return JIT_TMP0;
268 #ifdef JIT_TMP1
269     case 1:
270       return JIT_TMP1;
271 #endif
272     default:
273       abort();
274     }
275 }
276 
277 static jit_fpr_t
get_temp_fpr(jit_state_t * _jit)278 get_temp_fpr(jit_state_t *_jit)
279 {
280   switch(_jit->temp_fpr_saved++)
281     {
282     case 0:
283       return JIT_FTMP;
284     default:
285       abort();
286     }
287 }
288 
289 static void
unget_temp_fpr(jit_state_t * _jit)290 unget_temp_fpr(jit_state_t *_jit)
291 {
292   ASSERT(_jit->temp_fpr_saved);
293   _jit->temp_fpr_saved--;
294 }
295 
296 static void
unget_temp_gpr(jit_state_t * _jit)297 unget_temp_gpr(jit_state_t *_jit)
298 {
299   ASSERT(_jit->temp_gpr_saved);
300   _jit->temp_gpr_saved--;
301 }
302 
emit_u8(jit_state_t * _jit,uint8_t u8)303 static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
304   if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
305     _jit->overflow = 1;
306   } else {
307     *_jit->pc.uc++ = u8;
308   }
309 }
310 
emit_u16(jit_state_t * _jit,uint16_t u16)311 static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
312   if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
313     _jit->overflow = 1;
314   } else {
315     *_jit->pc.us++ = u16;
316   }
317 }
318 
emit_u32(jit_state_t * _jit,uint32_t u32)319 static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
320   if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
321     _jit->overflow = 1;
322   } else {
323     *_jit->pc.ui++ = u32;
324   }
325 }
326 
327 #ifdef JIT_NEEDS_LITERAL_POOL
emit_u16_with_pool(jit_state_t * _jit,uint16_t u16)328 static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) {
329   emit_u16(_jit, u16);
330   if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
331     emit_literal_pool(_jit, GUARD_NEEDED);
332 }
333 
emit_u32_with_pool(jit_state_t * _jit,uint32_t u32)334 static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) {
335   emit_u32(_jit, u32);
336   if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
337     emit_literal_pool(_jit, GUARD_NEEDED);
338 }
339 #endif
340 
emit_u64(jit_state_t * _jit,uint64_t u64)341 static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
342   if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
343     _jit->overflow = 1;
344   } else {
345     *_jit->pc.ul++ = u64;
346   }
347 }
348 
emit_uintptr(jit_state_t * _jit,uintptr_t u)349 static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) {
350   if (sizeof(u) == 4)
351     emit_u32 (_jit, u);
352   else
353     emit_u64 (_jit, u);
354 }
355 
356 static inline jit_reloc_t
jit_reloc(jit_state_t * _jit,enum jit_reloc_kind kind,uint8_t inst_start_offset,uint8_t * loc,uint8_t * pc_base,uint8_t rsh)357 jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind,
358           uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base,
359           uint8_t rsh)
360 {
361   jit_reloc_t ret;
362 
363   ASSERT(rsh < __WORDSIZE);
364   ASSERT(pc_base >= (loc - inst_start_offset));
365   ASSERT(pc_base - (loc - inst_start_offset) < 256);
366 
367   ret.kind = kind;
368   ret.inst_start_offset = inst_start_offset;
369   ret.pc_base_offset = pc_base - (loc - inst_start_offset);
370   ret.rsh = rsh;
371   ret.offset = loc - _jit->start;
372 
373   return ret;
374 }
375 
376 static inline jit_reloc_t
emit_abs_reloc(jit_state_t * _jit,uint8_t inst_start)377 emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start)
378 {
379   uint8_t *loc = _jit->pc.uc;
380   emit_uintptr (_jit, 0);
381   return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0);
382 }
383 
384 void
jit_patch_here(jit_state_t * _jit,jit_reloc_t reloc)385 jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
386 {
387   jit_patch_there (_jit, reloc, jit_address (_jit));
388 }
389 
390 void
jit_patch_there(jit_state_t * _jit,jit_reloc_t reloc,jit_pointer_t addr)391 jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
392 {
393   if (_jit->overflow)
394     return;
395   union jit_pc loc;
396   uint8_t *end;
397   loc.uc = _jit->start + reloc.offset;
398   uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset;
399   ptrdiff_t diff = (uint8_t*)addr - pc_base;
400   ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0);
401   diff >>= reloc.rsh;
402 #ifdef JIT_NEEDS_LITERAL_POOL
403   int flags = reloc.kind & ~JIT_RELOC_MASK;
404 #endif
405 
406   switch (reloc.kind & JIT_RELOC_MASK)
407     {
408     case JIT_RELOC_ABSOLUTE:
409       if (sizeof(diff) == 4)
410         *loc.ui = (uintptr_t)addr;
411       else
412         *loc.ul = (uintptr_t)addr;
413       end = loc.uc + sizeof(diff);
414       break;
415     case JIT_RELOC_REL8:
416       ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
417       *loc.uc = diff;
418       end = loc.uc + 1;
419       break;
420     case JIT_RELOC_REL16:
421       ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
422       *loc.us = diff;
423       end = loc.uc + 2;
424       break;
425 #ifdef JIT_NEEDS_LITERAL_POOL
426     case JIT_RELOC_JMP_WITH_VENEER: {
427       int32_t voff = read_jmp_offset(loc.ui);
428       uint8_t *target = pc_base + (voff << reloc.rsh);
429       if (target == loc.uc) {
430         // PC still in range to reify direct branch.
431         if (offset_in_jmp_range(diff, flags)) {
432           // Target also in range: reify direct branch.
433           patch_jmp_offset(loc.ui, diff);
434           remove_pending_literal(_jit, reloc);
435         } else {
436           // Target out of range; branch to veneer.
437           patch_pending_literal(_jit, reloc, (uintptr_t) addr);
438         }
439       } else {
440         // Already emitted a veneer.  In this case, patch the veneer
441         // directly.
442         patch_veneer((uint32_t *) target, addr);
443       }
444       return;
445     }
446     case JIT_RELOC_JCC_WITH_VENEER: {
447       int32_t voff = read_jcc_offset(loc.ui);
448       uint8_t *target = pc_base + (voff << reloc.rsh);
449       if (target == loc.uc) {
450         if (offset_in_jcc_range(diff, flags)) {
451           patch_jcc_offset(loc.ui, diff);
452           remove_pending_literal(_jit, reloc);
453         } else {
454           patch_pending_literal(_jit, reloc, (uintptr_t) addr);
455         }
456       } else {
457         patch_veneer((uint32_t *) target, addr);
458       }
459       return;
460     }
461     case JIT_RELOC_LOAD_FROM_POOL: {
462       int32_t voff = read_load_from_pool_offset(loc.ui);
463       uint8_t *target = pc_base + (voff << reloc.rsh);
464       if (target == loc.uc) {
465         patch_pending_literal(_jit, reloc, (uintptr_t) addr);
466       } else {
467         *(uintptr_t *) target = (uintptr_t) addr;
468       }
469       return;
470     }
471 #endif
472     case JIT_RELOC_REL32:
473       ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
474       *loc.ui = diff;
475       end = loc.uc + 4;
476       break;
477     case JIT_RELOC_REL64:
478       *loc.ul = diff;
479       end = loc.uc + 8;
480       break;
481     default:
482       abort ();
483     }
484 
485   if (end == _jit->pc.uc)
486     jit_try_shorten (_jit, reloc, addr);
487 }
488 
489 void
jit_begin_data(jit_state_t * j,size_t max_size_or_zero)490 jit_begin_data(jit_state_t *j, size_t max_size_or_zero)
491 {
492 #ifdef JIT_NEEDS_LITERAL_POOL
493   if (j->pool->size) {
494     uint8_t *deadline = j->start + j->pool->deadline;
495     // Emit a literal pool now if the data might overwrite the deadline.
496     // Emitting data won't add entries to the pool.
497     if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline)
498       emit_literal_pool(j, NO_GUARD_NEEDED);
499   }
500 #endif
501 
502   ASSERT(!j->emitting_data);
503   j->emitting_data = 1;
504 }
505 
506 void
jit_end_data(jit_state_t * j)507 jit_end_data(jit_state_t *j)
508 {
509   ASSERT(j->emitting_data);
510   j->emitting_data = 0;
511 }
512 
513 void
jit_emit_u8(jit_state_t * j,uint8_t u8)514 jit_emit_u8(jit_state_t *j, uint8_t u8)
515 {
516   ASSERT(j->emitting_data);
517   emit_u8(j, u8);
518 }
519 
520 void
jit_emit_u16(jit_state_t * j,uint16_t u16)521 jit_emit_u16(jit_state_t *j, uint16_t u16)
522 {
523   ASSERT(j->emitting_data);
524   emit_u16(j, u16);
525 }
526 
527 void
jit_emit_u32(jit_state_t * j,uint32_t u32)528 jit_emit_u32(jit_state_t *j, uint32_t u32)
529 {
530   ASSERT(j->emitting_data);
531   emit_u32(j, u32);
532 }
533 
534 void
jit_emit_u64(jit_state_t * j,uint64_t u64)535 jit_emit_u64(jit_state_t *j, uint64_t u64)
536 {
537   ASSERT(j->emitting_data);
538   emit_u64(j, u64);
539 }
540 
541 jit_reloc_t
jit_emit_addr(jit_state_t * j)542 jit_emit_addr(jit_state_t *j)
543 {
544   ASSERT(j->emitting_data);
545   uint8_t inst_start = 0;
546   return emit_abs_reloc(j, inst_start);
547 }
548 
549 #if defined(__i386__) || defined(__x86_64__)
550 # include "x86.c"
551 #elif defined(__mips__)
552 # include "mips.c"
553 #elif defined(__arm__)
554 # include "arm.c"
555 #elif defined(__ppc__) || defined(__powerpc__)
556 # include "ppc.c"
557 #elif defined(__aarch64__)
558 # include "aarch64.c"
559 #elif defined(__s390__) || defined(__s390x__)
560 # include "s390.c"
561 #endif
562 
563 #define JIT_IMPL_0(stem, ret) \
564   ret jit_##stem (jit_state_t* _jit) \
565   {                                  \
566     return stem(_jit);            \
567   }
568 #define JIT_IMPL_1(stem, ret, ta)                 \
569   ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
570   {                                               \
571     return stem(_jit, unwrap_##ta(a));         \
572   }
573 #define JIT_IMPL_2(stem, ret, ta, tb)                             \
574   ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
575   {                                                               \
576     return stem(_jit, unwrap_##ta(a), unwrap_##tb(b));         \
577   }
578 #define JIT_IMPL_3(stem, ret, ta, tb, tc)                               \
579   ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \
580   {                                                                     \
581     return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
582   }
583 #define JIT_IMPL_4(stem, ret, ta, tb, tc, td)                           \
584   ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \
585   {                                                                     \
586     return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \
587   }
588 
589 #define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
590 #define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
591 #define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
592 #define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
593 #define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
594 #define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
595 #define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
596 #define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
597 #define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
598 #define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
599 #define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
600 #define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
601 #define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
602 #define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
603 #define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
604 #define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
605 #define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
606 #define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
607 #define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
608 #define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
609 #define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
610 #define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
611 #define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
612 #define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
613 #define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
614 #define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
615 #define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
616 #define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
617 #define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
618 #define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
619 #define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
620 #define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
621 #define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
622 #define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
623 #define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
624 
625 #define unwrap_gpr(r) jit_gpr_regno(r)
626 #define unwrap_fpr(r) jit_fpr_regno(r)
627 #define unwrap_imm(i) i
628 #define unwrap_uimm(u) u
629 #define unwrap_off(o) o
630 #define unwrap_pointer(p) ((uintptr_t) p)
631 #define unwrap_float32(f) f
632 #define unwrap_float64(d) d
633 
634 #define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)635 FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
636 #undef IMPL_INSTRUCTION
637 
638 void
639 jit_align(jit_state_t *_jit, unsigned align)
640 {
641   ASSERT (is_power_of_two (align));
642   uintptr_t here = _jit->pc.w;
643   uintptr_t there = (here + align - 1) & ~(align - 1);
644   if (there - here)
645     nop(_jit, there - here);
646 }
647 
648 static jit_bool_t
is_fpr_arg(enum jit_operand_abi arg)649 is_fpr_arg(enum jit_operand_abi arg)
650 {
651   switch (arg)
652     {
653     case JIT_OPERAND_ABI_UINT8:
654     case JIT_OPERAND_ABI_INT8:
655     case JIT_OPERAND_ABI_UINT16:
656     case JIT_OPERAND_ABI_INT16:
657     case JIT_OPERAND_ABI_UINT32:
658     case JIT_OPERAND_ABI_INT32:
659     case JIT_OPERAND_ABI_UINT64:
660     case JIT_OPERAND_ABI_INT64:
661     case JIT_OPERAND_ABI_POINTER:
662       return 0;
663     case JIT_OPERAND_ABI_FLOAT:
664     case JIT_OPERAND_ABI_DOUBLE:
665       return 1;
666     default:
667       abort();
668     }
669 }
670 
671 static jit_bool_t
is_gpr_arg(enum jit_operand_abi arg)672 is_gpr_arg(enum jit_operand_abi arg)
673 {
674   return !is_fpr_arg(arg);
675 }
676 
677 static void
abi_imm_to_gpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t dst,intptr_t imm)678 abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst,
679                intptr_t imm)
680 {
681   switch (abi) {
682   case JIT_OPERAND_ABI_UINT8:
683     ASSERT(0 <= imm);
684     ASSERT(imm <= UINT8_MAX);
685     break;
686   case JIT_OPERAND_ABI_INT8:
687     ASSERT(INT8_MIN <= imm);
688     ASSERT(imm <= INT8_MAX);
689     break;
690   case JIT_OPERAND_ABI_UINT16:
691     ASSERT(0 <= imm);
692     ASSERT(imm <= UINT16_MAX);
693     break;
694   case JIT_OPERAND_ABI_INT16:
695     ASSERT(INT16_MIN <= imm);
696     ASSERT(imm <= INT16_MAX);
697     break;
698   case JIT_OPERAND_ABI_UINT32:
699     ASSERT(0 <= imm);
700     ASSERT(imm <= UINT32_MAX);
701     break;
702   case JIT_OPERAND_ABI_INT32:
703     ASSERT(INT32_MIN <= imm);
704     ASSERT(imm <= INT32_MAX);
705     break;
706 #if __WORDSIZE > 32
707   case JIT_OPERAND_ABI_UINT64:
708   case JIT_OPERAND_ABI_INT64:
709     break;
710 #endif
711   case JIT_OPERAND_ABI_POINTER:
712     break;
713   default:
714     abort();
715   }
716   jit_movi (_jit, dst, imm);
717 }
718 
719 static void
abi_gpr_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_gpr_t src)720 abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
721                jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
722 {
723   // Invariant: GPR memory destination operand sizes are rounded up to words.
724   // True for ARM, AArch64, IA32, and X86-64.  Some ABIs expect to be able to
725   // load operands from the stack via a full-word read, so we need to make sure
726   // we don't leave garbage in the high bytes of (for example) the stack slot
727   // for a uint8_t arg.
728   switch (abi) {
729   case JIT_OPERAND_ABI_UINT8:
730   case JIT_OPERAND_ABI_INT8:
731     jit_stxi(_jit, offset, base, src);
732     break;
733   case JIT_OPERAND_ABI_UINT16:
734   case JIT_OPERAND_ABI_INT16:
735     jit_stxi(_jit, offset, base, src);
736     break;
737   case JIT_OPERAND_ABI_UINT32:
738   case JIT_OPERAND_ABI_INT32:
739 #if __WORDSIZE == 32
740   case JIT_OPERAND_ABI_POINTER:
741 #endif
742     jit_stxi(_jit, offset, base, src);
743     break;
744 #if __WORDSIZE == 64
745   case JIT_OPERAND_ABI_UINT64:
746   case JIT_OPERAND_ABI_INT64:
747   case JIT_OPERAND_ABI_POINTER:
748     jit_stxi_l(_jit, offset, base, src);
749     break;
750 #endif
751   default:
752     abort();
753   }
754 }
755 
756 static void
abi_fpr_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_fpr_t src)757 abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
758                jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src)
759 {
760   switch (abi) {
761   case JIT_OPERAND_ABI_FLOAT:
762     jit_stxi_f(_jit, offset, base, src);
763     break;
764   case JIT_OPERAND_ABI_DOUBLE:
765     jit_stxi_d(_jit, offset, base, src);
766     break;
767   default:
768     abort();
769   }
770 }
771 
772 static void
abi_mem_to_gpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t dst,jit_gpr_t base,ptrdiff_t offset)773 abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi,
774                jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset)
775 {
776   switch (abi) {
777   case JIT_OPERAND_ABI_UINT8:
778     jit_ldxi_uc(_jit, dst, base, offset);
779     break;
780   case JIT_OPERAND_ABI_INT8:
781     jit_ldxi_c(_jit, dst, base, offset);
782     break;
783   case JIT_OPERAND_ABI_UINT16:
784     jit_ldxi_us(_jit, dst, base, offset);
785     break;
786   case JIT_OPERAND_ABI_INT16:
787     jit_ldxi_s(_jit, dst, base, offset);
788     break;
789 #if __WORDSIZE == 32
790   case JIT_OPERAND_ABI_UINT32:
791   case JIT_OPERAND_ABI_POINTER:
792 #endif
793   case JIT_OPERAND_ABI_INT32:
794     jit_ldxi_i(_jit, dst, base, offset);
795     break;
796 #if __WORDSIZE == 64
797   case JIT_OPERAND_ABI_UINT32:
798     jit_ldxi_ui(_jit, dst, base, offset);
799     break;
800   case JIT_OPERAND_ABI_UINT64:
801   case JIT_OPERAND_ABI_POINTER:
802   case JIT_OPERAND_ABI_INT64:
803     jit_ldxi_l(_jit, dst, base, offset);
804     break;
805 #endif
806   default:
807     abort();
808   }
809 }
810 
811 static void
abi_mem_to_fpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_fpr_t dst,jit_gpr_t base,ptrdiff_t offset)812 abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi,
813                jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset)
814 {
815   switch (abi) {
816   case JIT_OPERAND_ABI_FLOAT:
817     jit_ldxi_f(_jit, dst, base, offset);
818     break;
819   case JIT_OPERAND_ABI_DOUBLE:
820     jit_ldxi_d(_jit, dst, base, offset);
821     break;
822   default:
823     abort();
824   }
825 }
826 
827 static void
abi_imm_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_imm_t imm)828 abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
829                ptrdiff_t offset, jit_imm_t imm)
830 {
831   ASSERT(!is_fpr_arg(abi));
832 
833   jit_gpr_t tmp = get_temp_gpr(_jit);
834   abi_imm_to_gpr(_jit, abi, tmp, imm);
835   abi_gpr_to_mem(_jit, abi, base, offset, tmp);
836   unget_temp_gpr(_jit);
837 }
838 
839 static void
abi_mem_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_gpr_t src_base,ptrdiff_t src_offset)840 abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
841                ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset)
842 {
843   if (is_gpr_arg (abi)) {
844     jit_gpr_t tmp = get_temp_gpr(_jit);
845     abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset);
846     abi_gpr_to_mem(_jit, abi, base, offset, tmp);
847     unget_temp_gpr(_jit);
848   } else {
849     jit_fpr_t tmp = get_temp_fpr(_jit);
850     abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset);
851     abi_fpr_to_mem(_jit, abi, base, offset, tmp);
852     unget_temp_fpr(_jit);
853   }
854 }
855 
856 #define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b))
857 
858 #define MOVE_KIND_ENUM(a, b) \
859   MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b)
860 enum move_kind {
861   MOVE_KIND_ENUM(IMM, GPR),
862   MOVE_KIND_ENUM(GPR, GPR),
863   MOVE_KIND_ENUM(MEM, GPR),
864   MOVE_KIND_ENUM(FPR, FPR),
865   MOVE_KIND_ENUM(MEM, FPR),
866   MOVE_KIND_ENUM(IMM, MEM),
867   MOVE_KIND_ENUM(GPR, MEM),
868   MOVE_KIND_ENUM(FPR, MEM),
869   MOVE_KIND_ENUM(MEM, MEM)
870 };
871 #undef MOVE_KIND_ENUM
872 
873 static void
move_operand(jit_state_t * _jit,jit_operand_t dst,jit_operand_t src)874 move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
875 {
876   switch (MOVE_KIND (src.kind, dst.kind)) {
877   case MOVE_IMM_TO_GPR:
878     return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm);
879 
880   case MOVE_GPR_TO_GPR:
881     return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr);
882 
883   case MOVE_MEM_TO_GPR:
884     return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base,
885                           src.loc.mem.offset);
886 
887   case MOVE_FPR_TO_FPR:
888     ASSERT(src.abi == dst.abi);
889     if (src.abi == JIT_OPERAND_ABI_DOUBLE)
890       return jit_movr_d(_jit, dst.loc.fpr, src.loc.fpr);
891     else
892       return jit_movr_f(_jit, dst.loc.fpr, src.loc.fpr);
893 
894   case MOVE_MEM_TO_FPR:
895     return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr, src.loc.mem.base,
896                           src.loc.mem.offset);
897 
898   case MOVE_IMM_TO_MEM:
899     return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
900                           src.loc.imm);
901 
902   case MOVE_GPR_TO_MEM:
903     return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
904                           src.loc.gpr.gpr);
905 
906   case MOVE_FPR_TO_MEM:
907     return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
908                           src.loc.fpr);
909 
910   case MOVE_MEM_TO_MEM:
911     return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
912                           src.loc.mem.base, src.loc.mem.offset);
913 
914   default:
915     abort();
916   }
917 }
918 
919 // A direct transliteration of "Tilting at windmills with Coq: formal
920 // verification of a compilation algorithm for parallel moves" by
921 // Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy:
922 // https://xavierleroy.org/publi/parallel-move.pdf
923 
924 enum move_status { TO_MOVE, BEING_MOVED, MOVED };
925 
926 static inline int
already_in_place(jit_operand_t src,jit_operand_t dst)927 already_in_place(jit_operand_t src, jit_operand_t dst)
928 {
929   switch (MOVE_KIND(src.kind, dst.kind)) {
930   case MOVE_GPR_TO_GPR:
931     return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr);
932   case MOVE_FPR_TO_FPR:
933     return jit_same_fprs (src.loc.fpr, dst.loc.fpr);
934   case MOVE_MEM_TO_MEM:
935     return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) &&
936       src.loc.mem.offset == dst.loc.mem.offset;
937   default:
938     return 0;
939   }
940 }
941 
942 static inline int
write_would_clobber(jit_operand_t src,jit_operand_t dst)943 write_would_clobber(jit_operand_t src, jit_operand_t dst)
944 {
945   if (already_in_place (src, dst))
946     return 1;
947 
948   if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR)
949     return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr);
950 
951   return 0;
952 }
953 
954 static inline ptrdiff_t
operand_addend(jit_operand_t op)955 operand_addend(jit_operand_t op)
956 {
957   switch (op.kind) {
958   case JIT_OPERAND_KIND_GPR:
959     return op.loc.gpr.addend;
960   case JIT_OPERAND_KIND_MEM:
961     return op.loc.mem.addend;
962   default:
963     abort();
964   }
965 }
966 
967 static void
move_one(jit_state_t * _jit,jit_operand_t * dst,jit_operand_t * src,size_t argc,enum move_status * status,size_t i)968 move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
969          size_t argc, enum move_status *status, size_t i)
970 {
971   int tmp_gpr = 0, tmp_fpr = 0;
972 
973   if (already_in_place(src[i], dst[i]))
974     return;
975 
976   status[i] = BEING_MOVED;
977   for (size_t j = 0; j < argc; j++) {
978     if (write_would_clobber(src[j], dst[i])) {
979       switch (status[j]) {
980       case TO_MOVE:
981         move_one(_jit, dst, src, argc, status, j);
982         break;
983       case BEING_MOVED: {
984         jit_operand_t tmp;
985         if (is_fpr_arg (src[j].kind)) {
986           tmp_fpr = 1;
987           tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit));
988         } else {
989           tmp_gpr = 1;
990           /* Preserve addend, if any, from source operand, to be applied
991              at the end.  */
992           tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit),
993                                             operand_addend(src[j]));
994         }
995         move_operand (_jit, tmp, src[j]);
996         src[j] = tmp;
997         break;
998       }
999       case MOVED:
1000         break;
1001       default:
1002         abort ();
1003       }
1004     }
1005   }
1006 
1007   move_operand (_jit, dst[i], src[i]);
1008   status[i] = MOVED;
1009   if (tmp_gpr)
1010     unget_temp_gpr(_jit);
1011   else if (tmp_fpr)
1012     unget_temp_fpr(_jit);
1013 }
1014 
1015 static void
apply_addend(jit_state_t * _jit,jit_operand_t dst,jit_operand_t src)1016 apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
1017 {
1018   switch (MOVE_KIND(src.kind, dst.kind)) {
1019   case MOVE_GPR_TO_GPR:
1020   case MOVE_MEM_TO_GPR:
1021     if (operand_addend(src))
1022       jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src));
1023     break;
1024   case MOVE_GPR_TO_MEM:
1025   case MOVE_MEM_TO_MEM:
1026     if (operand_addend(src)) {
1027       jit_gpr_t tmp = get_temp_gpr(_jit);
1028       abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, dst.loc.mem.offset);
1029       jit_addi(_jit, tmp, tmp, operand_addend(src));
1030       abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, tmp);
1031       unget_temp_gpr(_jit);
1032     }
1033     break;
1034   default:
1035     break;
1036   }
1037 }
1038 
1039 /* Preconditions: No dest operand is IMM.  No dest operand aliases
1040    another dest operand.  No dest MEM operand uses a base register which
1041    is used as a dest GPR.  No dst operand has an addend.  The registers
1042    returned by get_temp_gpr and get_temp_fpr do not appear in source or
1043    dest args.  */
1044 void
jit_move_operands(jit_state_t * _jit,jit_operand_t * dst,jit_operand_t * src,size_t argc)1045 jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
1046                   size_t argc)
1047 {
1048   // Check preconditions, except the condition about tmp registers.
1049   {
1050     uint64_t src_gprs = 0;
1051     uint64_t dst_gprs = 0;
1052     uint64_t dst_fprs = 0;
1053     uint64_t dst_mem_base_gprs = 0;
1054     for (size_t i = 0; i < argc; i++) {
1055       switch (src[i].kind) {
1056       case JIT_OPERAND_KIND_GPR:
1057         src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr);
1058         break;
1059       case JIT_OPERAND_KIND_FPR:
1060       case JIT_OPERAND_KIND_IMM:
1061       case JIT_OPERAND_KIND_MEM:
1062         break;
1063       default:
1064         abort();
1065       }
1066       switch (dst[i].kind) {
1067       case JIT_OPERAND_KIND_GPR: {
1068         ASSERT(dst[i].loc.gpr.addend == 0);
1069         uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr);
1070         ASSERT((dst_gprs & bit) == 0);
1071         dst_gprs |= bit;
1072         break;
1073       }
1074       case JIT_OPERAND_KIND_FPR: {
1075         uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr);
1076         ASSERT((dst_fprs & bit) == 0);
1077         dst_fprs |= bit;
1078         break;
1079       }
1080       case JIT_OPERAND_KIND_MEM: {
1081         ASSERT(dst[i].loc.mem.addend == 0);
1082         uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base);
1083         dst_mem_base_gprs |= bit;
1084         break;
1085       }
1086       case JIT_OPERAND_KIND_IMM:
1087       default:
1088         abort();
1089         break;
1090       }
1091     }
1092     ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0);
1093   }
1094 
1095   enum move_status status[argc];
1096   for (size_t i = 0; i < argc; i++)
1097     status[i] = TO_MOVE;
1098   for (size_t i = 0; i < argc; i++)
1099     if (status[i] == TO_MOVE)
1100       move_one(_jit, dst, src, argc, status, i);
1101 
1102   // Apply addends at the end.  We could do it earlier in some cases but
1103   // at least at the end we know that an in-place increment of one
1104   // operand won't alias another.
1105   for (size_t i = 0; i < argc; i++)
1106     apply_addend(_jit, dst[i], src[i]);
1107 }
1108 
1109 size_t
jit_align_stack(jit_state_t * _jit,size_t expand)1110 jit_align_stack(jit_state_t *_jit, size_t expand)
1111 {
1112   size_t new_size = _jit->frame_size + expand;
1113   // Align stack to double-word boundaries.  This isn't really a
1114   // principle but it does work for Aarch32, AArch64 and x86-64.
1115   size_t alignment = jit_stack_alignment ();
1116   size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1);
1117   size_t diff = aligned_size - _jit->frame_size;
1118   if (diff)
1119     jit_subi (_jit, JIT_SP, JIT_SP, diff);
1120   _jit->frame_size = aligned_size;
1121   return diff;
1122 }
1123 
1124 void
jit_shrink_stack(jit_state_t * _jit,size_t diff)1125 jit_shrink_stack(jit_state_t *_jit, size_t diff)
1126 {
1127   if (diff)
1128     jit_addi (_jit, JIT_SP, JIT_SP, diff);
1129   _jit->frame_size -= diff;
1130 }
1131 
1132 static const jit_gpr_t platform_callee_save_gprs[] = {
1133   JIT_PLATFORM_CALLEE_SAVE_GPRS
1134 };
1135 
1136 static const jit_gpr_t user_callee_save_gprs[] = {
1137   JIT_V0, JIT_V1, JIT_V2
1138 #ifdef JIT_V3
1139   , JIT_V3
1140 #endif
1141 #ifdef JIT_V4
1142   , JIT_V4
1143 #endif
1144 #ifdef JIT_V5
1145   , JIT_V5
1146 #endif
1147 #ifdef JIT_V6
1148   , JIT_V6
1149 #endif
1150 #ifdef JIT_V7
1151   , JIT_V7
1152 #endif
1153 #ifdef JIT_V8
1154   , JIT_V8
1155 #endif
1156 #ifdef JIT_V9
1157   , JIT_V9
1158 #endif
1159  };
1160 
1161 static const jit_fpr_t user_callee_save_fprs[] = {
1162 #ifdef JIT_VF0
1163   JIT_VF0
1164 #endif
1165 #ifdef JIT_VF1
1166   , JIT_VF1
1167 #endif
1168 #ifdef JIT_VF2
1169   , JIT_VF2
1170 #endif
1171 #ifdef JIT_VF3
1172   , JIT_VF3
1173 #endif
1174 #ifdef JIT_VF4
1175   , JIT_VF4
1176 #endif
1177 #ifdef JIT_VF5
1178   , JIT_VF5
1179 #endif
1180 #ifdef JIT_VF6
1181   , JIT_VF6
1182 #endif
1183 #ifdef JIT_VF7
1184   , JIT_VF7
1185 #endif
1186 };
1187 
1188 #define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
1189 static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
1190 static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
1191 static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
1192 
1193 size_t
jit_enter_jit_abi(jit_state_t * _jit,size_t v,size_t vf,size_t frame_size)1194 jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
1195 {
1196   ASSERT(v <= v_count);
1197   ASSERT(vf <= vf_count);
1198 
1199   ASSERT(_jit->frame_size == 0);
1200   _jit->frame_size = jit_initial_frame_size();
1201 
1202   size_t reserved =
1203     jit_align_stack(_jit, (pv_count + v) * (__WORDSIZE / 8) + vf * 8);
1204 
1205   size_t offset = 0;
1206   for (size_t i = 0; i < vf; i++, offset += 8)
1207     jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
1208   for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
1209     jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
1210   for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
1211     jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
1212   ASSERT(offset <= reserved);
1213 
1214   return reserved;
1215 }
1216 
1217 void
jit_leave_jit_abi(jit_state_t * _jit,size_t v,size_t vf,size_t frame_size)1218 jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
1219 {
1220   ASSERT(v <= v_count);
1221   ASSERT(vf <= vf_count);
1222   ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
1223 
1224   size_t offset = 0;
1225   for (size_t i = 0; i < vf; i++, offset += 8)
1226     jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
1227   for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
1228     jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
1229   for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
1230     jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
1231   ASSERT(offset <= frame_size);
1232 
1233   jit_shrink_stack(_jit, frame_size);
1234 }
1235 
1236 // Precondition: stack is already aligned.
1237 static size_t
prepare_call_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1238 prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1239 {
1240   jit_operand_t dst[argc];
1241   struct abi_arg_iterator iter;
1242 
1243   // Compute shuffle destinations and space for spilled arguments.
1244   reset_abi_arg_iterator(&iter, argc, args);
1245   for (size_t i = 0; i < argc; i++)
1246     next_abi_arg(&iter, &dst[i]);
1247 
1248   // Reserve space for spilled arguments and ensure stack alignment.
1249   size_t stack_size = jit_align_stack(_jit, iter.stack_size);
1250 
1251   // Fix up SP-relative operands.
1252   for (size_t i = 0; i < argc; i++) {
1253     switch(args[i].kind) {
1254     case JIT_OPERAND_KIND_GPR:
1255       if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP))
1256         args[i].loc.gpr.addend += stack_size;
1257       break;
1258     case JIT_OPERAND_KIND_MEM:
1259       if (jit_same_gprs (args[i].loc.mem.base, JIT_SP))
1260         args[i].loc.mem.offset += stack_size;
1261       break;
1262     default:
1263       break;
1264     }
1265   }
1266 
1267   jit_move_operands(_jit, dst, args, argc);
1268 
1269   return stack_size;
1270 }
1271 
1272 void
jit_calli(jit_state_t * _jit,jit_pointer_t f,size_t argc,jit_operand_t args[])1273 jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t args[])
1274 {
1275   size_t stack_bytes = prepare_call_args(_jit, argc, args);
1276 
1277   calli(_jit, (jit_word_t)f);
1278 
1279   jit_shrink_stack(_jit, stack_bytes);
1280 }
1281 
1282 void
jit_callr(jit_state_t * _jit,jit_gpr_t f,size_t argc,jit_operand_t args[])1283 jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[])
1284 {
1285   size_t stack_bytes = prepare_call_args(_jit, argc, args);
1286 
1287   callr(_jit, jit_gpr_regno(f));
1288 
1289   jit_shrink_stack(_jit, stack_bytes);
1290 }
1291 
1292 void
jit_locate_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1293 jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1294 {
1295   struct abi_arg_iterator iter;
1296 
1297   reset_abi_arg_iterator(&iter, argc, args);
1298   iter.stack_size += _jit->frame_size;
1299   for (size_t i = 0; i < argc; i++)
1300     next_abi_arg(&iter, &args[i]);
1301 }
1302 
1303 /* Precondition: args are distinct locations of type GPR or FPR.  All
1304    addends of arg operands are zero.  No GPR arg is SP.  */
1305 void
jit_load_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1306 jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1307 {
1308   jit_operand_t src[argc];
1309 
1310   memcpy(src, args, sizeof(src[0]) * argc);
1311 
1312   jit_locate_args(_jit, argc, src);
1313   jit_move_operands(_jit, args, src, argc);
1314 }
1315 
1316 #ifdef JIT_NEEDS_LITERAL_POOL
1317 static uint32_t
literal_pool_byte_size(struct jit_literal_pool * pool)1318 literal_pool_byte_size(struct jit_literal_pool *pool)
1319 {
1320   // Assume that we might need a uint32_t to branch over a table, and up
1321   // to 7 bytes for alignment of the table.  Then we assume that no
1322   // entry will be more than two words.
1323   return sizeof(uint32_t) + 7 + pool->size * sizeof(uintptr_t) * 2;
1324 }
1325 
1326 static void
reset_literal_pool(jit_state_t * _jit,struct jit_literal_pool * pool)1327 reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool)
1328 {
1329   pool->deadline = _jit->limit - _jit->start;
1330   memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size);
1331   pool->size = 0;
1332 }
1333 
1334 #define INITIAL_LITERAL_POOL_CAPACITY 12
1335 static struct jit_literal_pool*
alloc_literal_pool(jit_state_t * _jit,size_t capacity)1336 alloc_literal_pool(jit_state_t *_jit, size_t capacity)
1337 {
1338   if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY;
1339 
1340   struct jit_literal_pool *ret =
1341     _jit->alloc (sizeof (struct jit_literal_pool) +
1342                  sizeof (struct jit_literal_pool_entry) * capacity);
1343   ASSERT (ret);
1344   ret->capacity = capacity;
1345   reset_literal_pool(_jit, ret);
1346   return ret;
1347 }
1348 
1349 static void
grow_literal_pool(jit_state_t * _jit)1350 grow_literal_pool(jit_state_t *_jit)
1351 {
1352   struct jit_literal_pool *new_pool =
1353     alloc_literal_pool(_jit, _jit->pool->capacity * 2);
1354 
1355   for (size_t i = 0; i < _jit->pool->size; i++)
1356     new_pool->entries[new_pool->size++] = _jit->pool->entries[i];
1357   new_pool->deadline = _jit->pool->deadline;
1358 
1359   _jit->free (_jit->pool);
1360   _jit->pool = new_pool;
1361 }
1362 
1363 static jit_bool_t
add_literal_pool_entry(jit_state_t * _jit,struct jit_literal_pool_entry entry,uint32_t max_offset)1364 add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry,
1365                        uint32_t max_offset)
1366 {
1367   if (_jit->overflow)
1368     return 1;
1369 
1370   if (max_offset <= literal_pool_byte_size(_jit->pool)) {
1371     emit_literal_pool(_jit, GUARD_NEEDED);
1372     return 0;
1373   }
1374 
1375   if (_jit->pool->size == _jit->pool->capacity)
1376     grow_literal_pool (_jit);
1377 
1378   uint32_t loc_offset = _jit->pc.uc - _jit->start;
1379   uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset;
1380   uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset;
1381   uint32_t deadline =
1382     pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool));
1383   if (deadline < _jit->pool->deadline)
1384     _jit->pool->deadline = deadline;
1385 
1386   _jit->pool->entries[_jit->pool->size++] = entry;
1387 
1388   return 1;
1389 }
1390 
1391 static jit_bool_t
add_pending_literal(jit_state_t * _jit,jit_reloc_t src,uint8_t max_offset_bits)1392 add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
1393                     uint8_t max_offset_bits)
1394 {
1395   struct jit_literal_pool_entry entry = { src, 0 };
1396   uint32_t max_inst_size = sizeof(uint32_t);
1397   uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size;
1398   return add_literal_pool_entry(_jit, entry, max_offset);
1399 }
1400 
1401 static void
remove_pending_literal(jit_state_t * _jit,jit_reloc_t src)1402 remove_pending_literal(jit_state_t *_jit, jit_reloc_t src)
1403 {
1404   for (size_t i = _jit->pool->size; i--; ) {
1405     if (_jit->pool->entries[i].reloc.offset == src.offset) {
1406       for (size_t j = i + 1; j < _jit->pool->size; j++)
1407         _jit->pool->entries[j-1] = _jit->pool->entries[j];
1408       _jit->pool->size--;
1409       return;
1410     }
1411   }
1412   abort();
1413 }
1414 
1415 static void
patch_pending_literal(jit_state_t * _jit,jit_reloc_t src,uintptr_t value)1416 patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value)
1417 {
1418   for (size_t i = _jit->pool->size; i--; ) {
1419     if (_jit->pool->entries[i].reloc.offset == src.offset) {
1420       ASSERT(_jit->pool->entries[i].value == 0);
1421       _jit->pool->entries[i].value = value;
1422       return;
1423     }
1424   }
1425   abort();
1426 }
1427 
1428 static void
emit_literal_pool(jit_state_t * _jit,enum guard_pool guard)1429 emit_literal_pool(jit_state_t *_jit, enum guard_pool guard)
1430 {
1431   if (_jit->overflow)
1432     return;
1433 
1434   if (!_jit->pool->size)
1435     return;
1436 
1437   uint32_t *patch_loc = NULL;
1438   if (guard == GUARD_NEEDED)
1439     patch_loc = jmp_without_veneer(_jit);
1440 
1441   // FIXME: Could de-duplicate constants.
1442   for (size_t i = 0; i < _jit->pool->size; i++) {
1443     // Align to word boundary without emitting pool.
1444     if (_jit->pc.w & 1) emit_u8(_jit, 0);
1445     if (_jit->pc.w & 2) emit_u16(_jit, 0);
1446     if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4))
1447       emit_u32(_jit, 0);
1448     ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0);
1449     struct jit_literal_pool_entry *entry = &_jit->pool->entries[i];
1450     uint8_t *loc = _jit->start + entry->reloc.offset;
1451     uint8_t *pc_base =
1452       loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset;
1453     ptrdiff_t diff = _jit->pc.uc - pc_base;
1454     diff >>= entry->reloc.rsh;
1455 
1456     if (_jit->overflow)
1457       return;
1458 
1459     switch (entry->reloc.kind & JIT_RELOC_MASK) {
1460     case JIT_RELOC_JMP_WITH_VENEER:
1461       patch_veneer_jmp_offset((uint32_t*) loc, diff);
1462       emit_veneer(_jit, (void*) entry->value);
1463       break;
1464     case JIT_RELOC_JCC_WITH_VENEER:
1465       patch_veneer_jcc_offset((uint32_t*) loc, diff);
1466       emit_veneer(_jit, (void*) entry->value);
1467       break;
1468     case JIT_RELOC_LOAD_FROM_POOL:
1469       patch_load_from_pool_offset((uint32_t*) loc, diff);
1470       emit_uintptr(_jit, entry->value);
1471       break;
1472     default:
1473       abort();
1474     }
1475   }
1476 
1477   if (_jit->overflow)
1478     return;
1479 
1480   if (guard == GUARD_NEEDED)
1481     patch_jmp_without_veneer(_jit, patch_loc);
1482 
1483   reset_literal_pool(_jit, _jit->pool);
1484 }
1485 #endif
1486