1 /*
2 * Copyright (C) 2012-2020 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20 #if HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdio.h>
28
29 #include "../lightening.h"
30
31 #define ASSERT(x) do { if (!(x)) abort(); } while (0)
32
33 #if defined(__GNUC__)
34 # define maybe_unused __attribute__ ((unused))
35 # define UNLIKELY(exprn) __builtin_expect(exprn, 0)
36 #else
37 # define maybe_unused /**/
38 # define UNLIKELY(exprn) exprn
39 #endif
40
41 union jit_pc
42 {
43 uint8_t *uc;
44 uint16_t *us;
45 uint32_t *ui;
46 uint64_t *ul;
47 intptr_t w;
48 uintptr_t uw;
49 };
50
51 #ifdef JIT_NEEDS_LITERAL_POOL
52 struct jit_literal_pool_entry
53 {
54 jit_reloc_t reloc;
55 uintptr_t value;
56 };
57
58 struct jit_literal_pool
59 {
60 uint32_t deadline;
61 uint32_t size;
62 uint32_t capacity;
63 struct jit_literal_pool_entry entries[];
64 };
65 #endif // JIT_NEEDS_LITERAL_POOL
66
67 struct jit_state
68 {
69 union jit_pc pc;
70 uint8_t *start;
71 uint8_t *last_instruction_start;
72 uint8_t *limit;
73 uint8_t temp_gpr_saved;
74 uint8_t temp_fpr_saved;
75 uint8_t overflow;
76 uint8_t emitting_data;
77 int frame_size; // Used to know when to align stack.
78 #ifdef JIT_NEEDS_LITERAL_POOL
79 struct jit_literal_pool *pool;
80 #endif
81 void* (*alloc)(size_t);
82 void (*free)(void*);
83 };
84
85 static jit_bool_t jit_get_cpu(void);
86 static jit_bool_t jit_init(jit_state_t *);
87 static void jit_flush(void *fptr, void *tptr);
88 static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc,
89 jit_pointer_t addr);
90 static void* bless_function_pointer(void *ptr);
91
92 struct abi_arg_iterator;
93
94 #ifdef JIT_NEEDS_LITERAL_POOL
95 static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit,
96 size_t capacity);
97 static void reset_literal_pool(jit_state_t *_jit,
98 struct jit_literal_pool *pool);
99 static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
100 uint8_t max_offset_bits);
101 static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src);
102 static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src,
103 uintptr_t value);
104 enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED };
105 static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard);
106
107 static int32_t read_jmp_offset(uint32_t *loc);
108 static int offset_in_jmp_range(ptrdiff_t offset, int flags);
109 static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
110 static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset);
111 static int32_t read_jcc_offset(uint32_t *loc);
112 static int offset_in_jcc_range(ptrdiff_t offset, int flags);
113 static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset);
114 static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset);
115 static void patch_veneer(uint32_t *loc, jit_pointer_t addr);
116 static int32_t read_load_from_pool_offset(uint32_t *loc);
117 #endif
118
119 static jit_bool_t is_fpr_arg(enum jit_operand_abi arg);
120 static jit_bool_t is_gpr_arg(enum jit_operand_abi arg);
121 static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
122 const jit_operand_t *args);
123 static void next_abi_arg(struct abi_arg_iterator *iter,
124 jit_operand_t *arg);
125
126 jit_bool_t
init_jit(void)127 init_jit(void)
128 {
129 return jit_get_cpu ();
130 }
131
132 jit_state_t *
jit_new_state(void * (* alloc_fn)(size_t),void (* free_fn)(void *))133 jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*))
134 {
135 if (!alloc_fn) alloc_fn = malloc;
136 if (!free_fn) free_fn = free;
137
138 jit_state_t *_jit = alloc_fn (sizeof (*_jit));
139 if (!_jit)
140 abort ();
141
142 memset(_jit, 0, sizeof (*_jit));
143 _jit->alloc = alloc_fn;
144 _jit->free = free_fn;
145
146 if (!jit_init (_jit)) {
147 #ifdef JIT_NEEDS_LITERAL_POOL
148 free_fn (_jit->pool);
149 #endif
150 free_fn (_jit);
151 return NULL;
152 }
153
154 #ifdef JIT_NEEDS_LITERAL_POOL
155 _jit->pool = alloc_literal_pool(_jit, 0);
156 #endif
157
158 return _jit;
159 }
160
161 void
jit_destroy_state(jit_state_t * _jit)162 jit_destroy_state(jit_state_t *_jit)
163 {
164 #ifdef JIT_NEEDS_LITERAL_POOL
165 _jit->free (_jit->pool);
166 #endif
167 _jit->free (_jit);
168 }
169
170 jit_pointer_t
jit_address(jit_state_t * _jit)171 jit_address(jit_state_t *_jit)
172 {
173 ASSERT (_jit->start);
174 jit_pointer_t ret = _jit->pc.uc;
175 return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret);
176 }
177
178 void
jit_begin(jit_state_t * _jit,uint8_t * buf,size_t length)179 jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
180 {
181 ASSERT (!_jit->start);
182
183 _jit->pc.uc = _jit->start = buf;
184 _jit->limit = buf + length;
185 _jit->overflow = 0;
186 _jit->frame_size = 0;
187 _jit->emitting_data = 0;
188 #if JIT_NEEDS_LITERAL_POOL
189 ASSERT(_jit->pool->size == 0);
190 _jit->pool->deadline = length;
191 #endif
192 }
193
194 jit_bool_t
jit_has_overflow(jit_state_t * _jit)195 jit_has_overflow(jit_state_t *_jit)
196 {
197 ASSERT (_jit->start);
198 return _jit->overflow;
199 }
200
201 void
jit_reset(jit_state_t * _jit)202 jit_reset(jit_state_t *_jit)
203 {
204 ASSERT (_jit->start);
205 _jit->pc.uc = _jit->start = _jit->limit = NULL;
206 _jit->overflow = 0;
207 _jit->frame_size = 0;
208 _jit->emitting_data = 0;
209 #ifdef JIT_NEEDS_LITERAL_POOL
210 reset_literal_pool(_jit, _jit->pool);
211 #endif
212 }
213
214 jit_function_pointer_t
jit_address_to_function_pointer(jit_pointer_t p)215 jit_address_to_function_pointer(jit_pointer_t p)
216 {
217 return bless_function_pointer(p);
218 }
219
220 void*
jit_end(jit_state_t * _jit,size_t * length)221 jit_end(jit_state_t *_jit, size_t *length)
222 {
223 #ifdef JIT_NEEDS_LITERAL_POOL
224 if (_jit->pool->size)
225 emit_literal_pool(_jit, NO_GUARD_NEEDED);
226 #endif
227
228 if (_jit->overflow)
229 return NULL;
230
231 uint8_t *start = _jit->start;
232 uint8_t *end = _jit->pc.uc;
233
234 ASSERT(start);
235 ASSERT(start <= end);
236 ASSERT(end <= _jit->limit);
237 ASSERT(!_jit->emitting_data);
238
239 jit_flush (start, end);
240
241 if (length) {
242 *length = end - start;
243 }
244
245 _jit->pc.uc = _jit->start = _jit->limit = NULL;
246 _jit->overflow = 0;
247 _jit->frame_size = 0;
248 #ifdef JIT_NEEDS_LITERAL_POOL
249 reset_literal_pool(_jit, _jit->pool);
250 #endif
251
252 return jit_address_to_function_pointer(start);
253 }
254
255 static int
is_power_of_two(unsigned x)256 is_power_of_two (unsigned x)
257 {
258 return x && !(x & (x-1));
259 }
260
261 static jit_gpr_t
get_temp_gpr(jit_state_t * _jit)262 get_temp_gpr(jit_state_t *_jit)
263 {
264 switch(_jit->temp_gpr_saved++)
265 {
266 case 0:
267 return JIT_TMP0;
268 #ifdef JIT_TMP1
269 case 1:
270 return JIT_TMP1;
271 #endif
272 default:
273 abort();
274 }
275 }
276
277 static jit_fpr_t
get_temp_fpr(jit_state_t * _jit)278 get_temp_fpr(jit_state_t *_jit)
279 {
280 switch(_jit->temp_fpr_saved++)
281 {
282 case 0:
283 return JIT_FTMP;
284 default:
285 abort();
286 }
287 }
288
289 static void
unget_temp_fpr(jit_state_t * _jit)290 unget_temp_fpr(jit_state_t *_jit)
291 {
292 ASSERT(_jit->temp_fpr_saved);
293 _jit->temp_fpr_saved--;
294 }
295
296 static void
unget_temp_gpr(jit_state_t * _jit)297 unget_temp_gpr(jit_state_t *_jit)
298 {
299 ASSERT(_jit->temp_gpr_saved);
300 _jit->temp_gpr_saved--;
301 }
302
emit_u8(jit_state_t * _jit,uint8_t u8)303 static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
304 if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
305 _jit->overflow = 1;
306 } else {
307 *_jit->pc.uc++ = u8;
308 }
309 }
310
emit_u16(jit_state_t * _jit,uint16_t u16)311 static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
312 if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
313 _jit->overflow = 1;
314 } else {
315 *_jit->pc.us++ = u16;
316 }
317 }
318
emit_u32(jit_state_t * _jit,uint32_t u32)319 static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
320 if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
321 _jit->overflow = 1;
322 } else {
323 *_jit->pc.ui++ = u32;
324 }
325 }
326
327 #ifdef JIT_NEEDS_LITERAL_POOL
emit_u16_with_pool(jit_state_t * _jit,uint16_t u16)328 static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) {
329 emit_u16(_jit, u16);
330 if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
331 emit_literal_pool(_jit, GUARD_NEEDED);
332 }
333
emit_u32_with_pool(jit_state_t * _jit,uint32_t u32)334 static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) {
335 emit_u32(_jit, u32);
336 if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
337 emit_literal_pool(_jit, GUARD_NEEDED);
338 }
339 #endif
340
emit_u64(jit_state_t * _jit,uint64_t u64)341 static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
342 if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
343 _jit->overflow = 1;
344 } else {
345 *_jit->pc.ul++ = u64;
346 }
347 }
348
emit_uintptr(jit_state_t * _jit,uintptr_t u)349 static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) {
350 if (sizeof(u) == 4)
351 emit_u32 (_jit, u);
352 else
353 emit_u64 (_jit, u);
354 }
355
356 static inline jit_reloc_t
jit_reloc(jit_state_t * _jit,enum jit_reloc_kind kind,uint8_t inst_start_offset,uint8_t * loc,uint8_t * pc_base,uint8_t rsh)357 jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind,
358 uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base,
359 uint8_t rsh)
360 {
361 jit_reloc_t ret;
362
363 ASSERT(rsh < __WORDSIZE);
364 ASSERT(pc_base >= (loc - inst_start_offset));
365 ASSERT(pc_base - (loc - inst_start_offset) < 256);
366
367 ret.kind = kind;
368 ret.inst_start_offset = inst_start_offset;
369 ret.pc_base_offset = pc_base - (loc - inst_start_offset);
370 ret.rsh = rsh;
371 ret.offset = loc - _jit->start;
372
373 return ret;
374 }
375
376 static inline jit_reloc_t
emit_abs_reloc(jit_state_t * _jit,uint8_t inst_start)377 emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start)
378 {
379 uint8_t *loc = _jit->pc.uc;
380 emit_uintptr (_jit, 0);
381 return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0);
382 }
383
384 void
jit_patch_here(jit_state_t * _jit,jit_reloc_t reloc)385 jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
386 {
387 jit_patch_there (_jit, reloc, jit_address (_jit));
388 }
389
390 void
jit_patch_there(jit_state_t * _jit,jit_reloc_t reloc,jit_pointer_t addr)391 jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
392 {
393 if (_jit->overflow)
394 return;
395 union jit_pc loc;
396 uint8_t *end;
397 loc.uc = _jit->start + reloc.offset;
398 uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset;
399 ptrdiff_t diff = (uint8_t*)addr - pc_base;
400 ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0);
401 diff >>= reloc.rsh;
402 #ifdef JIT_NEEDS_LITERAL_POOL
403 int flags = reloc.kind & ~JIT_RELOC_MASK;
404 #endif
405
406 switch (reloc.kind & JIT_RELOC_MASK)
407 {
408 case JIT_RELOC_ABSOLUTE:
409 if (sizeof(diff) == 4)
410 *loc.ui = (uintptr_t)addr;
411 else
412 *loc.ul = (uintptr_t)addr;
413 end = loc.uc + sizeof(diff);
414 break;
415 case JIT_RELOC_REL8:
416 ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
417 *loc.uc = diff;
418 end = loc.uc + 1;
419 break;
420 case JIT_RELOC_REL16:
421 ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
422 *loc.us = diff;
423 end = loc.uc + 2;
424 break;
425 #ifdef JIT_NEEDS_LITERAL_POOL
426 case JIT_RELOC_JMP_WITH_VENEER: {
427 int32_t voff = read_jmp_offset(loc.ui);
428 uint8_t *target = pc_base + (voff << reloc.rsh);
429 if (target == loc.uc) {
430 // PC still in range to reify direct branch.
431 if (offset_in_jmp_range(diff, flags)) {
432 // Target also in range: reify direct branch.
433 patch_jmp_offset(loc.ui, diff);
434 remove_pending_literal(_jit, reloc);
435 } else {
436 // Target out of range; branch to veneer.
437 patch_pending_literal(_jit, reloc, (uintptr_t) addr);
438 }
439 } else {
440 // Already emitted a veneer. In this case, patch the veneer
441 // directly.
442 patch_veneer((uint32_t *) target, addr);
443 }
444 return;
445 }
446 case JIT_RELOC_JCC_WITH_VENEER: {
447 int32_t voff = read_jcc_offset(loc.ui);
448 uint8_t *target = pc_base + (voff << reloc.rsh);
449 if (target == loc.uc) {
450 if (offset_in_jcc_range(diff, flags)) {
451 patch_jcc_offset(loc.ui, diff);
452 remove_pending_literal(_jit, reloc);
453 } else {
454 patch_pending_literal(_jit, reloc, (uintptr_t) addr);
455 }
456 } else {
457 patch_veneer((uint32_t *) target, addr);
458 }
459 return;
460 }
461 case JIT_RELOC_LOAD_FROM_POOL: {
462 int32_t voff = read_load_from_pool_offset(loc.ui);
463 uint8_t *target = pc_base + (voff << reloc.rsh);
464 if (target == loc.uc) {
465 patch_pending_literal(_jit, reloc, (uintptr_t) addr);
466 } else {
467 *(uintptr_t *) target = (uintptr_t) addr;
468 }
469 return;
470 }
471 #endif
472 case JIT_RELOC_REL32:
473 ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
474 *loc.ui = diff;
475 end = loc.uc + 4;
476 break;
477 case JIT_RELOC_REL64:
478 *loc.ul = diff;
479 end = loc.uc + 8;
480 break;
481 default:
482 abort ();
483 }
484
485 if (end == _jit->pc.uc)
486 jit_try_shorten (_jit, reloc, addr);
487 }
488
489 void
jit_begin_data(jit_state_t * j,size_t max_size_or_zero)490 jit_begin_data(jit_state_t *j, size_t max_size_or_zero)
491 {
492 #ifdef JIT_NEEDS_LITERAL_POOL
493 if (j->pool->size) {
494 uint8_t *deadline = j->start + j->pool->deadline;
495 // Emit a literal pool now if the data might overwrite the deadline.
496 // Emitting data won't add entries to the pool.
497 if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline)
498 emit_literal_pool(j, NO_GUARD_NEEDED);
499 }
500 #endif
501
502 ASSERT(!j->emitting_data);
503 j->emitting_data = 1;
504 }
505
506 void
jit_end_data(jit_state_t * j)507 jit_end_data(jit_state_t *j)
508 {
509 ASSERT(j->emitting_data);
510 j->emitting_data = 0;
511 }
512
513 void
jit_emit_u8(jit_state_t * j,uint8_t u8)514 jit_emit_u8(jit_state_t *j, uint8_t u8)
515 {
516 ASSERT(j->emitting_data);
517 emit_u8(j, u8);
518 }
519
520 void
jit_emit_u16(jit_state_t * j,uint16_t u16)521 jit_emit_u16(jit_state_t *j, uint16_t u16)
522 {
523 ASSERT(j->emitting_data);
524 emit_u16(j, u16);
525 }
526
527 void
jit_emit_u32(jit_state_t * j,uint32_t u32)528 jit_emit_u32(jit_state_t *j, uint32_t u32)
529 {
530 ASSERT(j->emitting_data);
531 emit_u32(j, u32);
532 }
533
534 void
jit_emit_u64(jit_state_t * j,uint64_t u64)535 jit_emit_u64(jit_state_t *j, uint64_t u64)
536 {
537 ASSERT(j->emitting_data);
538 emit_u64(j, u64);
539 }
540
541 jit_reloc_t
jit_emit_addr(jit_state_t * j)542 jit_emit_addr(jit_state_t *j)
543 {
544 ASSERT(j->emitting_data);
545 uint8_t inst_start = 0;
546 return emit_abs_reloc(j, inst_start);
547 }
548
549 #if defined(__i386__) || defined(__x86_64__)
550 # include "x86.c"
551 #elif defined(__mips__)
552 # include "mips.c"
553 #elif defined(__arm__)
554 # include "arm.c"
555 #elif defined(__ppc__) || defined(__powerpc__)
556 # include "ppc.c"
557 #elif defined(__aarch64__)
558 # include "aarch64.c"
559 #elif defined(__s390__) || defined(__s390x__)
560 # include "s390.c"
561 #endif
562
563 #define JIT_IMPL_0(stem, ret) \
564 ret jit_##stem (jit_state_t* _jit) \
565 { \
566 return stem(_jit); \
567 }
568 #define JIT_IMPL_1(stem, ret, ta) \
569 ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
570 { \
571 return stem(_jit, unwrap_##ta(a)); \
572 }
573 #define JIT_IMPL_2(stem, ret, ta, tb) \
574 ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
575 { \
576 return stem(_jit, unwrap_##ta(a), unwrap_##tb(b)); \
577 }
578 #define JIT_IMPL_3(stem, ret, ta, tb, tc) \
579 ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \
580 { \
581 return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
582 }
583 #define JIT_IMPL_4(stem, ret, ta, tb, tc, td) \
584 ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \
585 { \
586 return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \
587 }
588
589 #define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
590 #define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
591 #define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
592 #define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
593 #define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
594 #define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
595 #define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
596 #define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
597 #define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
598 #define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
599 #define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
600 #define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
601 #define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
602 #define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
603 #define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
604 #define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
605 #define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
606 #define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
607 #define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
608 #define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
609 #define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
610 #define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
611 #define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
612 #define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
613 #define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
614 #define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
615 #define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
616 #define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
617 #define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
618 #define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
619 #define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
620 #define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
621 #define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
622 #define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
623 #define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
624
625 #define unwrap_gpr(r) jit_gpr_regno(r)
626 #define unwrap_fpr(r) jit_fpr_regno(r)
627 #define unwrap_imm(i) i
628 #define unwrap_uimm(u) u
629 #define unwrap_off(o) o
630 #define unwrap_pointer(p) ((uintptr_t) p)
631 #define unwrap_float32(f) f
632 #define unwrap_float64(d) d
633
634 #define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)635 FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
636 #undef IMPL_INSTRUCTION
637
638 void
639 jit_align(jit_state_t *_jit, unsigned align)
640 {
641 ASSERT (is_power_of_two (align));
642 uintptr_t here = _jit->pc.w;
643 uintptr_t there = (here + align - 1) & ~(align - 1);
644 if (there - here)
645 nop(_jit, there - here);
646 }
647
648 static jit_bool_t
is_fpr_arg(enum jit_operand_abi arg)649 is_fpr_arg(enum jit_operand_abi arg)
650 {
651 switch (arg)
652 {
653 case JIT_OPERAND_ABI_UINT8:
654 case JIT_OPERAND_ABI_INT8:
655 case JIT_OPERAND_ABI_UINT16:
656 case JIT_OPERAND_ABI_INT16:
657 case JIT_OPERAND_ABI_UINT32:
658 case JIT_OPERAND_ABI_INT32:
659 case JIT_OPERAND_ABI_UINT64:
660 case JIT_OPERAND_ABI_INT64:
661 case JIT_OPERAND_ABI_POINTER:
662 return 0;
663 case JIT_OPERAND_ABI_FLOAT:
664 case JIT_OPERAND_ABI_DOUBLE:
665 return 1;
666 default:
667 abort();
668 }
669 }
670
671 static jit_bool_t
is_gpr_arg(enum jit_operand_abi arg)672 is_gpr_arg(enum jit_operand_abi arg)
673 {
674 return !is_fpr_arg(arg);
675 }
676
677 static void
abi_imm_to_gpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t dst,intptr_t imm)678 abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst,
679 intptr_t imm)
680 {
681 switch (abi) {
682 case JIT_OPERAND_ABI_UINT8:
683 ASSERT(0 <= imm);
684 ASSERT(imm <= UINT8_MAX);
685 break;
686 case JIT_OPERAND_ABI_INT8:
687 ASSERT(INT8_MIN <= imm);
688 ASSERT(imm <= INT8_MAX);
689 break;
690 case JIT_OPERAND_ABI_UINT16:
691 ASSERT(0 <= imm);
692 ASSERT(imm <= UINT16_MAX);
693 break;
694 case JIT_OPERAND_ABI_INT16:
695 ASSERT(INT16_MIN <= imm);
696 ASSERT(imm <= INT16_MAX);
697 break;
698 case JIT_OPERAND_ABI_UINT32:
699 ASSERT(0 <= imm);
700 ASSERT(imm <= UINT32_MAX);
701 break;
702 case JIT_OPERAND_ABI_INT32:
703 ASSERT(INT32_MIN <= imm);
704 ASSERT(imm <= INT32_MAX);
705 break;
706 #if __WORDSIZE > 32
707 case JIT_OPERAND_ABI_UINT64:
708 case JIT_OPERAND_ABI_INT64:
709 break;
710 #endif
711 case JIT_OPERAND_ABI_POINTER:
712 break;
713 default:
714 abort();
715 }
716 jit_movi (_jit, dst, imm);
717 }
718
719 static void
abi_gpr_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_gpr_t src)720 abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
721 jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
722 {
723 // Invariant: GPR memory destination operand sizes are rounded up to words.
724 // True for ARM, AArch64, IA32, and X86-64. Some ABIs expect to be able to
725 // load operands from the stack via a full-word read, so we need to make sure
726 // we don't leave garbage in the high bytes of (for example) the stack slot
727 // for a uint8_t arg.
728 switch (abi) {
729 case JIT_OPERAND_ABI_UINT8:
730 case JIT_OPERAND_ABI_INT8:
731 jit_stxi(_jit, offset, base, src);
732 break;
733 case JIT_OPERAND_ABI_UINT16:
734 case JIT_OPERAND_ABI_INT16:
735 jit_stxi(_jit, offset, base, src);
736 break;
737 case JIT_OPERAND_ABI_UINT32:
738 case JIT_OPERAND_ABI_INT32:
739 #if __WORDSIZE == 32
740 case JIT_OPERAND_ABI_POINTER:
741 #endif
742 jit_stxi(_jit, offset, base, src);
743 break;
744 #if __WORDSIZE == 64
745 case JIT_OPERAND_ABI_UINT64:
746 case JIT_OPERAND_ABI_INT64:
747 case JIT_OPERAND_ABI_POINTER:
748 jit_stxi_l(_jit, offset, base, src);
749 break;
750 #endif
751 default:
752 abort();
753 }
754 }
755
756 static void
abi_fpr_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_fpr_t src)757 abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
758 jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src)
759 {
760 switch (abi) {
761 case JIT_OPERAND_ABI_FLOAT:
762 jit_stxi_f(_jit, offset, base, src);
763 break;
764 case JIT_OPERAND_ABI_DOUBLE:
765 jit_stxi_d(_jit, offset, base, src);
766 break;
767 default:
768 abort();
769 }
770 }
771
772 static void
abi_mem_to_gpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t dst,jit_gpr_t base,ptrdiff_t offset)773 abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi,
774 jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset)
775 {
776 switch (abi) {
777 case JIT_OPERAND_ABI_UINT8:
778 jit_ldxi_uc(_jit, dst, base, offset);
779 break;
780 case JIT_OPERAND_ABI_INT8:
781 jit_ldxi_c(_jit, dst, base, offset);
782 break;
783 case JIT_OPERAND_ABI_UINT16:
784 jit_ldxi_us(_jit, dst, base, offset);
785 break;
786 case JIT_OPERAND_ABI_INT16:
787 jit_ldxi_s(_jit, dst, base, offset);
788 break;
789 #if __WORDSIZE == 32
790 case JIT_OPERAND_ABI_UINT32:
791 case JIT_OPERAND_ABI_POINTER:
792 #endif
793 case JIT_OPERAND_ABI_INT32:
794 jit_ldxi_i(_jit, dst, base, offset);
795 break;
796 #if __WORDSIZE == 64
797 case JIT_OPERAND_ABI_UINT32:
798 jit_ldxi_ui(_jit, dst, base, offset);
799 break;
800 case JIT_OPERAND_ABI_UINT64:
801 case JIT_OPERAND_ABI_POINTER:
802 case JIT_OPERAND_ABI_INT64:
803 jit_ldxi_l(_jit, dst, base, offset);
804 break;
805 #endif
806 default:
807 abort();
808 }
809 }
810
811 static void
abi_mem_to_fpr(jit_state_t * _jit,enum jit_operand_abi abi,jit_fpr_t dst,jit_gpr_t base,ptrdiff_t offset)812 abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi,
813 jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset)
814 {
815 switch (abi) {
816 case JIT_OPERAND_ABI_FLOAT:
817 jit_ldxi_f(_jit, dst, base, offset);
818 break;
819 case JIT_OPERAND_ABI_DOUBLE:
820 jit_ldxi_d(_jit, dst, base, offset);
821 break;
822 default:
823 abort();
824 }
825 }
826
827 static void
abi_imm_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_imm_t imm)828 abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
829 ptrdiff_t offset, jit_imm_t imm)
830 {
831 ASSERT(!is_fpr_arg(abi));
832
833 jit_gpr_t tmp = get_temp_gpr(_jit);
834 abi_imm_to_gpr(_jit, abi, tmp, imm);
835 abi_gpr_to_mem(_jit, abi, base, offset, tmp);
836 unget_temp_gpr(_jit);
837 }
838
839 static void
abi_mem_to_mem(jit_state_t * _jit,enum jit_operand_abi abi,jit_gpr_t base,ptrdiff_t offset,jit_gpr_t src_base,ptrdiff_t src_offset)840 abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
841 ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset)
842 {
843 if (is_gpr_arg (abi)) {
844 jit_gpr_t tmp = get_temp_gpr(_jit);
845 abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset);
846 abi_gpr_to_mem(_jit, abi, base, offset, tmp);
847 unget_temp_gpr(_jit);
848 } else {
849 jit_fpr_t tmp = get_temp_fpr(_jit);
850 abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset);
851 abi_fpr_to_mem(_jit, abi, base, offset, tmp);
852 unget_temp_fpr(_jit);
853 }
854 }
855
856 #define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b))
857
858 #define MOVE_KIND_ENUM(a, b) \
859 MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b)
860 enum move_kind {
861 MOVE_KIND_ENUM(IMM, GPR),
862 MOVE_KIND_ENUM(GPR, GPR),
863 MOVE_KIND_ENUM(MEM, GPR),
864 MOVE_KIND_ENUM(FPR, FPR),
865 MOVE_KIND_ENUM(MEM, FPR),
866 MOVE_KIND_ENUM(IMM, MEM),
867 MOVE_KIND_ENUM(GPR, MEM),
868 MOVE_KIND_ENUM(FPR, MEM),
869 MOVE_KIND_ENUM(MEM, MEM)
870 };
871 #undef MOVE_KIND_ENUM
872
873 static void
move_operand(jit_state_t * _jit,jit_operand_t dst,jit_operand_t src)874 move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
875 {
876 switch (MOVE_KIND (src.kind, dst.kind)) {
877 case MOVE_IMM_TO_GPR:
878 return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm);
879
880 case MOVE_GPR_TO_GPR:
881 return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr);
882
883 case MOVE_MEM_TO_GPR:
884 return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base,
885 src.loc.mem.offset);
886
887 case MOVE_FPR_TO_FPR:
888 ASSERT(src.abi == dst.abi);
889 if (src.abi == JIT_OPERAND_ABI_DOUBLE)
890 return jit_movr_d(_jit, dst.loc.fpr, src.loc.fpr);
891 else
892 return jit_movr_f(_jit, dst.loc.fpr, src.loc.fpr);
893
894 case MOVE_MEM_TO_FPR:
895 return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr, src.loc.mem.base,
896 src.loc.mem.offset);
897
898 case MOVE_IMM_TO_MEM:
899 return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
900 src.loc.imm);
901
902 case MOVE_GPR_TO_MEM:
903 return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
904 src.loc.gpr.gpr);
905
906 case MOVE_FPR_TO_MEM:
907 return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
908 src.loc.fpr);
909
910 case MOVE_MEM_TO_MEM:
911 return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
912 src.loc.mem.base, src.loc.mem.offset);
913
914 default:
915 abort();
916 }
917 }
918
919 // A direct transliteration of "Tilting at windmills with Coq: formal
920 // verification of a compilation algorithm for parallel moves" by
921 // Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy:
922 // https://xavierleroy.org/publi/parallel-move.pdf
923
924 enum move_status { TO_MOVE, BEING_MOVED, MOVED };
925
926 static inline int
already_in_place(jit_operand_t src,jit_operand_t dst)927 already_in_place(jit_operand_t src, jit_operand_t dst)
928 {
929 switch (MOVE_KIND(src.kind, dst.kind)) {
930 case MOVE_GPR_TO_GPR:
931 return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr);
932 case MOVE_FPR_TO_FPR:
933 return jit_same_fprs (src.loc.fpr, dst.loc.fpr);
934 case MOVE_MEM_TO_MEM:
935 return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) &&
936 src.loc.mem.offset == dst.loc.mem.offset;
937 default:
938 return 0;
939 }
940 }
941
942 static inline int
write_would_clobber(jit_operand_t src,jit_operand_t dst)943 write_would_clobber(jit_operand_t src, jit_operand_t dst)
944 {
945 if (already_in_place (src, dst))
946 return 1;
947
948 if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR)
949 return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr);
950
951 return 0;
952 }
953
954 static inline ptrdiff_t
operand_addend(jit_operand_t op)955 operand_addend(jit_operand_t op)
956 {
957 switch (op.kind) {
958 case JIT_OPERAND_KIND_GPR:
959 return op.loc.gpr.addend;
960 case JIT_OPERAND_KIND_MEM:
961 return op.loc.mem.addend;
962 default:
963 abort();
964 }
965 }
966
967 static void
move_one(jit_state_t * _jit,jit_operand_t * dst,jit_operand_t * src,size_t argc,enum move_status * status,size_t i)968 move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
969 size_t argc, enum move_status *status, size_t i)
970 {
971 int tmp_gpr = 0, tmp_fpr = 0;
972
973 if (already_in_place(src[i], dst[i]))
974 return;
975
976 status[i] = BEING_MOVED;
977 for (size_t j = 0; j < argc; j++) {
978 if (write_would_clobber(src[j], dst[i])) {
979 switch (status[j]) {
980 case TO_MOVE:
981 move_one(_jit, dst, src, argc, status, j);
982 break;
983 case BEING_MOVED: {
984 jit_operand_t tmp;
985 if (is_fpr_arg (src[j].kind)) {
986 tmp_fpr = 1;
987 tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit));
988 } else {
989 tmp_gpr = 1;
990 /* Preserve addend, if any, from source operand, to be applied
991 at the end. */
992 tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit),
993 operand_addend(src[j]));
994 }
995 move_operand (_jit, tmp, src[j]);
996 src[j] = tmp;
997 break;
998 }
999 case MOVED:
1000 break;
1001 default:
1002 abort ();
1003 }
1004 }
1005 }
1006
1007 move_operand (_jit, dst[i], src[i]);
1008 status[i] = MOVED;
1009 if (tmp_gpr)
1010 unget_temp_gpr(_jit);
1011 else if (tmp_fpr)
1012 unget_temp_fpr(_jit);
1013 }
1014
1015 static void
apply_addend(jit_state_t * _jit,jit_operand_t dst,jit_operand_t src)1016 apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
1017 {
1018 switch (MOVE_KIND(src.kind, dst.kind)) {
1019 case MOVE_GPR_TO_GPR:
1020 case MOVE_MEM_TO_GPR:
1021 if (operand_addend(src))
1022 jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src));
1023 break;
1024 case MOVE_GPR_TO_MEM:
1025 case MOVE_MEM_TO_MEM:
1026 if (operand_addend(src)) {
1027 jit_gpr_t tmp = get_temp_gpr(_jit);
1028 abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, dst.loc.mem.offset);
1029 jit_addi(_jit, tmp, tmp, operand_addend(src));
1030 abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, tmp);
1031 unget_temp_gpr(_jit);
1032 }
1033 break;
1034 default:
1035 break;
1036 }
1037 }
1038
1039 /* Preconditions: No dest operand is IMM. No dest operand aliases
1040 another dest operand. No dest MEM operand uses a base register which
1041 is used as a dest GPR. No dst operand has an addend. The registers
1042 returned by get_temp_gpr and get_temp_fpr do not appear in source or
1043 dest args. */
1044 void
jit_move_operands(jit_state_t * _jit,jit_operand_t * dst,jit_operand_t * src,size_t argc)1045 jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
1046 size_t argc)
1047 {
1048 // Check preconditions, except the condition about tmp registers.
1049 {
1050 uint64_t src_gprs = 0;
1051 uint64_t dst_gprs = 0;
1052 uint64_t dst_fprs = 0;
1053 uint64_t dst_mem_base_gprs = 0;
1054 for (size_t i = 0; i < argc; i++) {
1055 switch (src[i].kind) {
1056 case JIT_OPERAND_KIND_GPR:
1057 src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr);
1058 break;
1059 case JIT_OPERAND_KIND_FPR:
1060 case JIT_OPERAND_KIND_IMM:
1061 case JIT_OPERAND_KIND_MEM:
1062 break;
1063 default:
1064 abort();
1065 }
1066 switch (dst[i].kind) {
1067 case JIT_OPERAND_KIND_GPR: {
1068 ASSERT(dst[i].loc.gpr.addend == 0);
1069 uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr);
1070 ASSERT((dst_gprs & bit) == 0);
1071 dst_gprs |= bit;
1072 break;
1073 }
1074 case JIT_OPERAND_KIND_FPR: {
1075 uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr);
1076 ASSERT((dst_fprs & bit) == 0);
1077 dst_fprs |= bit;
1078 break;
1079 }
1080 case JIT_OPERAND_KIND_MEM: {
1081 ASSERT(dst[i].loc.mem.addend == 0);
1082 uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base);
1083 dst_mem_base_gprs |= bit;
1084 break;
1085 }
1086 case JIT_OPERAND_KIND_IMM:
1087 default:
1088 abort();
1089 break;
1090 }
1091 }
1092 ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0);
1093 }
1094
1095 enum move_status status[argc];
1096 for (size_t i = 0; i < argc; i++)
1097 status[i] = TO_MOVE;
1098 for (size_t i = 0; i < argc; i++)
1099 if (status[i] == TO_MOVE)
1100 move_one(_jit, dst, src, argc, status, i);
1101
1102 // Apply addends at the end. We could do it earlier in some cases but
1103 // at least at the end we know that an in-place increment of one
1104 // operand won't alias another.
1105 for (size_t i = 0; i < argc; i++)
1106 apply_addend(_jit, dst[i], src[i]);
1107 }
1108
1109 size_t
jit_align_stack(jit_state_t * _jit,size_t expand)1110 jit_align_stack(jit_state_t *_jit, size_t expand)
1111 {
1112 size_t new_size = _jit->frame_size + expand;
1113 // Align stack to double-word boundaries. This isn't really a
1114 // principle but it does work for Aarch32, AArch64 and x86-64.
1115 size_t alignment = jit_stack_alignment ();
1116 size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1);
1117 size_t diff = aligned_size - _jit->frame_size;
1118 if (diff)
1119 jit_subi (_jit, JIT_SP, JIT_SP, diff);
1120 _jit->frame_size = aligned_size;
1121 return diff;
1122 }
1123
1124 void
jit_shrink_stack(jit_state_t * _jit,size_t diff)1125 jit_shrink_stack(jit_state_t *_jit, size_t diff)
1126 {
1127 if (diff)
1128 jit_addi (_jit, JIT_SP, JIT_SP, diff);
1129 _jit->frame_size -= diff;
1130 }
1131
1132 static const jit_gpr_t platform_callee_save_gprs[] = {
1133 JIT_PLATFORM_CALLEE_SAVE_GPRS
1134 };
1135
1136 static const jit_gpr_t user_callee_save_gprs[] = {
1137 JIT_V0, JIT_V1, JIT_V2
1138 #ifdef JIT_V3
1139 , JIT_V3
1140 #endif
1141 #ifdef JIT_V4
1142 , JIT_V4
1143 #endif
1144 #ifdef JIT_V5
1145 , JIT_V5
1146 #endif
1147 #ifdef JIT_V6
1148 , JIT_V6
1149 #endif
1150 #ifdef JIT_V7
1151 , JIT_V7
1152 #endif
1153 #ifdef JIT_V8
1154 , JIT_V8
1155 #endif
1156 #ifdef JIT_V9
1157 , JIT_V9
1158 #endif
1159 };
1160
1161 static const jit_fpr_t user_callee_save_fprs[] = {
1162 #ifdef JIT_VF0
1163 JIT_VF0
1164 #endif
1165 #ifdef JIT_VF1
1166 , JIT_VF1
1167 #endif
1168 #ifdef JIT_VF2
1169 , JIT_VF2
1170 #endif
1171 #ifdef JIT_VF3
1172 , JIT_VF3
1173 #endif
1174 #ifdef JIT_VF4
1175 , JIT_VF4
1176 #endif
1177 #ifdef JIT_VF5
1178 , JIT_VF5
1179 #endif
1180 #ifdef JIT_VF6
1181 , JIT_VF6
1182 #endif
1183 #ifdef JIT_VF7
1184 , JIT_VF7
1185 #endif
1186 };
1187
1188 #define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
1189 static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
1190 static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
1191 static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
1192
1193 size_t
jit_enter_jit_abi(jit_state_t * _jit,size_t v,size_t vf,size_t frame_size)1194 jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
1195 {
1196 ASSERT(v <= v_count);
1197 ASSERT(vf <= vf_count);
1198
1199 ASSERT(_jit->frame_size == 0);
1200 _jit->frame_size = jit_initial_frame_size();
1201
1202 size_t reserved =
1203 jit_align_stack(_jit, (pv_count + v) * (__WORDSIZE / 8) + vf * 8);
1204
1205 size_t offset = 0;
1206 for (size_t i = 0; i < vf; i++, offset += 8)
1207 jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
1208 for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
1209 jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
1210 for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
1211 jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
1212 ASSERT(offset <= reserved);
1213
1214 return reserved;
1215 }
1216
1217 void
jit_leave_jit_abi(jit_state_t * _jit,size_t v,size_t vf,size_t frame_size)1218 jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
1219 {
1220 ASSERT(v <= v_count);
1221 ASSERT(vf <= vf_count);
1222 ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
1223
1224 size_t offset = 0;
1225 for (size_t i = 0; i < vf; i++, offset += 8)
1226 jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
1227 for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
1228 jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
1229 for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
1230 jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
1231 ASSERT(offset <= frame_size);
1232
1233 jit_shrink_stack(_jit, frame_size);
1234 }
1235
1236 // Precondition: stack is already aligned.
1237 static size_t
prepare_call_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1238 prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1239 {
1240 jit_operand_t dst[argc];
1241 struct abi_arg_iterator iter;
1242
1243 // Compute shuffle destinations and space for spilled arguments.
1244 reset_abi_arg_iterator(&iter, argc, args);
1245 for (size_t i = 0; i < argc; i++)
1246 next_abi_arg(&iter, &dst[i]);
1247
1248 // Reserve space for spilled arguments and ensure stack alignment.
1249 size_t stack_size = jit_align_stack(_jit, iter.stack_size);
1250
1251 // Fix up SP-relative operands.
1252 for (size_t i = 0; i < argc; i++) {
1253 switch(args[i].kind) {
1254 case JIT_OPERAND_KIND_GPR:
1255 if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP))
1256 args[i].loc.gpr.addend += stack_size;
1257 break;
1258 case JIT_OPERAND_KIND_MEM:
1259 if (jit_same_gprs (args[i].loc.mem.base, JIT_SP))
1260 args[i].loc.mem.offset += stack_size;
1261 break;
1262 default:
1263 break;
1264 }
1265 }
1266
1267 jit_move_operands(_jit, dst, args, argc);
1268
1269 return stack_size;
1270 }
1271
1272 void
jit_calli(jit_state_t * _jit,jit_pointer_t f,size_t argc,jit_operand_t args[])1273 jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t args[])
1274 {
1275 size_t stack_bytes = prepare_call_args(_jit, argc, args);
1276
1277 calli(_jit, (jit_word_t)f);
1278
1279 jit_shrink_stack(_jit, stack_bytes);
1280 }
1281
1282 void
jit_callr(jit_state_t * _jit,jit_gpr_t f,size_t argc,jit_operand_t args[])1283 jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[])
1284 {
1285 size_t stack_bytes = prepare_call_args(_jit, argc, args);
1286
1287 callr(_jit, jit_gpr_regno(f));
1288
1289 jit_shrink_stack(_jit, stack_bytes);
1290 }
1291
1292 void
jit_locate_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1293 jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1294 {
1295 struct abi_arg_iterator iter;
1296
1297 reset_abi_arg_iterator(&iter, argc, args);
1298 iter.stack_size += _jit->frame_size;
1299 for (size_t i = 0; i < argc; i++)
1300 next_abi_arg(&iter, &args[i]);
1301 }
1302
1303 /* Precondition: args are distinct locations of type GPR or FPR. All
1304 addends of arg operands are zero. No GPR arg is SP. */
1305 void
jit_load_args(jit_state_t * _jit,size_t argc,jit_operand_t args[])1306 jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
1307 {
1308 jit_operand_t src[argc];
1309
1310 memcpy(src, args, sizeof(src[0]) * argc);
1311
1312 jit_locate_args(_jit, argc, src);
1313 jit_move_operands(_jit, args, src, argc);
1314 }
1315
1316 #ifdef JIT_NEEDS_LITERAL_POOL
1317 static uint32_t
literal_pool_byte_size(struct jit_literal_pool * pool)1318 literal_pool_byte_size(struct jit_literal_pool *pool)
1319 {
1320 // Assume that we might need a uint32_t to branch over a table, and up
1321 // to 7 bytes for alignment of the table. Then we assume that no
1322 // entry will be more than two words.
1323 return sizeof(uint32_t) + 7 + pool->size * sizeof(uintptr_t) * 2;
1324 }
1325
1326 static void
reset_literal_pool(jit_state_t * _jit,struct jit_literal_pool * pool)1327 reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool)
1328 {
1329 pool->deadline = _jit->limit - _jit->start;
1330 memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size);
1331 pool->size = 0;
1332 }
1333
1334 #define INITIAL_LITERAL_POOL_CAPACITY 12
1335 static struct jit_literal_pool*
alloc_literal_pool(jit_state_t * _jit,size_t capacity)1336 alloc_literal_pool(jit_state_t *_jit, size_t capacity)
1337 {
1338 if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY;
1339
1340 struct jit_literal_pool *ret =
1341 _jit->alloc (sizeof (struct jit_literal_pool) +
1342 sizeof (struct jit_literal_pool_entry) * capacity);
1343 ASSERT (ret);
1344 ret->capacity = capacity;
1345 reset_literal_pool(_jit, ret);
1346 return ret;
1347 }
1348
1349 static void
grow_literal_pool(jit_state_t * _jit)1350 grow_literal_pool(jit_state_t *_jit)
1351 {
1352 struct jit_literal_pool *new_pool =
1353 alloc_literal_pool(_jit, _jit->pool->capacity * 2);
1354
1355 for (size_t i = 0; i < _jit->pool->size; i++)
1356 new_pool->entries[new_pool->size++] = _jit->pool->entries[i];
1357 new_pool->deadline = _jit->pool->deadline;
1358
1359 _jit->free (_jit->pool);
1360 _jit->pool = new_pool;
1361 }
1362
1363 static jit_bool_t
add_literal_pool_entry(jit_state_t * _jit,struct jit_literal_pool_entry entry,uint32_t max_offset)1364 add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry,
1365 uint32_t max_offset)
1366 {
1367 if (_jit->overflow)
1368 return 1;
1369
1370 if (max_offset <= literal_pool_byte_size(_jit->pool)) {
1371 emit_literal_pool(_jit, GUARD_NEEDED);
1372 return 0;
1373 }
1374
1375 if (_jit->pool->size == _jit->pool->capacity)
1376 grow_literal_pool (_jit);
1377
1378 uint32_t loc_offset = _jit->pc.uc - _jit->start;
1379 uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset;
1380 uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset;
1381 uint32_t deadline =
1382 pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool));
1383 if (deadline < _jit->pool->deadline)
1384 _jit->pool->deadline = deadline;
1385
1386 _jit->pool->entries[_jit->pool->size++] = entry;
1387
1388 return 1;
1389 }
1390
1391 static jit_bool_t
add_pending_literal(jit_state_t * _jit,jit_reloc_t src,uint8_t max_offset_bits)1392 add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
1393 uint8_t max_offset_bits)
1394 {
1395 struct jit_literal_pool_entry entry = { src, 0 };
1396 uint32_t max_inst_size = sizeof(uint32_t);
1397 uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size;
1398 return add_literal_pool_entry(_jit, entry, max_offset);
1399 }
1400
1401 static void
remove_pending_literal(jit_state_t * _jit,jit_reloc_t src)1402 remove_pending_literal(jit_state_t *_jit, jit_reloc_t src)
1403 {
1404 for (size_t i = _jit->pool->size; i--; ) {
1405 if (_jit->pool->entries[i].reloc.offset == src.offset) {
1406 for (size_t j = i + 1; j < _jit->pool->size; j++)
1407 _jit->pool->entries[j-1] = _jit->pool->entries[j];
1408 _jit->pool->size--;
1409 return;
1410 }
1411 }
1412 abort();
1413 }
1414
1415 static void
patch_pending_literal(jit_state_t * _jit,jit_reloc_t src,uintptr_t value)1416 patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value)
1417 {
1418 for (size_t i = _jit->pool->size; i--; ) {
1419 if (_jit->pool->entries[i].reloc.offset == src.offset) {
1420 ASSERT(_jit->pool->entries[i].value == 0);
1421 _jit->pool->entries[i].value = value;
1422 return;
1423 }
1424 }
1425 abort();
1426 }
1427
1428 static void
emit_literal_pool(jit_state_t * _jit,enum guard_pool guard)1429 emit_literal_pool(jit_state_t *_jit, enum guard_pool guard)
1430 {
1431 if (_jit->overflow)
1432 return;
1433
1434 if (!_jit->pool->size)
1435 return;
1436
1437 uint32_t *patch_loc = NULL;
1438 if (guard == GUARD_NEEDED)
1439 patch_loc = jmp_without_veneer(_jit);
1440
1441 // FIXME: Could de-duplicate constants.
1442 for (size_t i = 0; i < _jit->pool->size; i++) {
1443 // Align to word boundary without emitting pool.
1444 if (_jit->pc.w & 1) emit_u8(_jit, 0);
1445 if (_jit->pc.w & 2) emit_u16(_jit, 0);
1446 if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4))
1447 emit_u32(_jit, 0);
1448 ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0);
1449 struct jit_literal_pool_entry *entry = &_jit->pool->entries[i];
1450 uint8_t *loc = _jit->start + entry->reloc.offset;
1451 uint8_t *pc_base =
1452 loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset;
1453 ptrdiff_t diff = _jit->pc.uc - pc_base;
1454 diff >>= entry->reloc.rsh;
1455
1456 if (_jit->overflow)
1457 return;
1458
1459 switch (entry->reloc.kind & JIT_RELOC_MASK) {
1460 case JIT_RELOC_JMP_WITH_VENEER:
1461 patch_veneer_jmp_offset((uint32_t*) loc, diff);
1462 emit_veneer(_jit, (void*) entry->value);
1463 break;
1464 case JIT_RELOC_JCC_WITH_VENEER:
1465 patch_veneer_jcc_offset((uint32_t*) loc, diff);
1466 emit_veneer(_jit, (void*) entry->value);
1467 break;
1468 case JIT_RELOC_LOAD_FROM_POOL:
1469 patch_load_from_pool_offset((uint32_t*) loc, diff);
1470 emit_uintptr(_jit, entry->value);
1471 break;
1472 default:
1473 abort();
1474 }
1475 }
1476
1477 if (_jit->overflow)
1478 return;
1479
1480 if (guard == GUARD_NEEDED)
1481 patch_jmp_without_veneer(_jit, patch_loc);
1482
1483 reset_literal_pool(_jit, _jit->pool);
1484 }
1485 #endif
1486