1 /*
2 * Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20 #include <lightning.h>
21 #include <lightning/jit_private.h>
22
23 #if __X32
24 # define jit_arg_reg_p(i) 0
25 # define jit_arg_f_reg_p(i) 0
26 # define stack_framesize 20
27 # define stack_adjust 12
28 # define CVT_OFFSET -12
29 # define REAL_WORDSIZE 4
30 # define va_gp_increment 4
31 # define va_fp_increment 8
32 #else
33 # if __CYGWIN__ || _WIN32
34 # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4)
35 # define jit_arg_f_reg_p(i) jit_arg_reg_p(i)
36 # define stack_framesize 152
37 # define va_fp_increment 8
38 # else
39 # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6)
40 # define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
41 # define stack_framesize 56
42 # define first_gp_argument rdi
43 # define first_gp_offset offsetof(jit_va_list_t, rdi)
44 # define first_gp_from_offset(gp) ((gp) / 8)
45 # define last_gp_argument r9
46 # define va_gp_max_offset \
47 (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
48 # define first_fp_argument xmm0
49 # define first_fp_offset offsetof(jit_va_list_t, xmm0)
50 # define last_fp_argument xmm7
51 # define va_fp_max_offset \
52 (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
53 # define va_fp_increment 16
54 # define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16)
55 # endif
56 # define va_gp_increment 8
57 # define stack_adjust 8
58 # define CVT_OFFSET -8
59 # define REAL_WORDSIZE 8
60 #endif
61
62 /*
63 * Types
64 */
65 #if __X32 || __CYGWIN__ || _WIN32
66 typedef jit_pointer_t jit_va_list_t;
67 #else
68 typedef struct jit_va_list {
69 jit_int32_t gpoff;
70 jit_int32_t fpoff;
71 jit_pointer_t over;
72 jit_pointer_t save;
73 /* Declared explicitly as int64 for the x32 abi */
74 jit_int64_t rdi;
75 jit_int64_t rsi;
76 jit_int64_t rdx;
77 jit_int64_t rcx;
78 jit_int64_t r8;
79 jit_int64_t r9;
80 jit_float64_t xmm0;
81 jit_float64_t _up0;
82 jit_float64_t xmm1;
83 jit_float64_t _up1;
84 jit_float64_t xmm2;
85 jit_float64_t _up2;
86 jit_float64_t xmm3;
87 jit_float64_t _up3;
88 jit_float64_t xmm4;
89 jit_float64_t _up4;
90 jit_float64_t xmm5;
91 jit_float64_t _up5;
92 jit_float64_t xmm6;
93 jit_float64_t _up6;
94 jit_float64_t xmm7;
95 jit_float64_t _up7;
96 } jit_va_list_t;
97 #endif
98
99 /*
100 * Prototypes
101 */
102 #define patch(instr, node) _patch(_jit, instr, node)
103 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
104 #define sse_from_x87_f(r0, r1) _sse_from_x87_f(_jit, r0, r1)
105 static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
106 #define sse_from_x87_d(r0, r1) _sse_from_x87_d(_jit, r0, r1)
107 static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
108 #define x87_from_sse_f(r0, r1) _x87_from_sse_f(_jit, r0, r1)
109 static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
110 #define x87_from_sse_d(r0, r1) _x87_from_sse_d(_jit, r0, r1)
111 static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
112
113 #define PROTO 1
114 # include "jit_x86-cpu.c"
115 # include "jit_x86-sse.c"
116 # include "jit_x86-x87.c"
117 #undef PROTO
118
119 /*
120 * Initialization
121 */
122 jit_cpu_t jit_cpu;
123 jit_register_t _rvs[] = {
124 #if __X32
125 { rc(gpr) | rc(rg8) | 0, "%eax" },
126 { rc(gpr) | rc(rg8) | 1, "%ecx" },
127 { rc(gpr) | rc(rg8) | 2, "%edx" },
128 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" },
129 { rc(sav) | rc(gpr) | 6, "%esi" },
130 { rc(sav) | rc(gpr) | 7, "%edi" },
131 { rc(sav) | 4, "%esp" },
132 { rc(sav) | 5, "%ebp" },
133 { rc(xpr) | rc(fpr) | 0, "%xmm0" },
134 { rc(xpr) | rc(fpr) | 1, "%xmm1" },
135 { rc(xpr) | rc(fpr) | 2, "%xmm2" },
136 { rc(xpr) | rc(fpr) | 3, "%xmm3" },
137 { rc(xpr) | rc(fpr) | 4, "%xmm4" },
138 { rc(xpr) | rc(fpr) | 5, "%xmm5" },
139 { rc(xpr) | rc(fpr) | 6, "%xmm6" },
140 { rc(xpr) | rc(fpr) | 7, "%xmm7" },
141 { rc(fpr) | 0, "st(0)" },
142 { rc(fpr) | 1, "st(1)" },
143 { rc(fpr) | 2, "st(2)" },
144 { rc(fpr) | 3, "st(3)" },
145 { rc(fpr) | 4, "st(4)" },
146 { rc(fpr) | 5, "st(5)" },
147 { rc(fpr) | 6, "st(6)" },
148 { rc(fpr) | 7, "st(7)" },
149 #else
150 # if __CYGWIN__ || _WIN32
151 { rc(gpr) | rc(rg8) | 0, "%rax" },
152 { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
153 { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
154 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
155 { rc(sav) | rc(gpr) | 7, "%rdi" },
156 { rc(sav) | rc(gpr) | 6, "%rsi" },
157 { rc(sav) | rc(gpr) | 12, "%r12" },
158 { rc(sav) | rc(gpr) | 13, "%r13" },
159 { rc(sav) | rc(gpr) | 14, "%r14" },
160 { rc(sav) | rc(gpr) | 15, "%r15" },
161 { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
162 { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
163 { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
164 { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
165 { rc(sav) | 4, "%rsp" },
166 { rc(sav) | 5, "%rbp" },
167 { rc(xpr) | rc(fpr) | 4, "%xmm4" },
168 { rc(xpr) | rc(fpr) | 5, "%xmm5" },
169 { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" },
170 { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" },
171 { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" },
172 { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" },
173 { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
174 { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
175 { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
176 { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
177 { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
178 { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
179 { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
180 { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
181 { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
182 { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
183 #else
184 /* %rax is a pseudo flag argument for varargs functions */
185 { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" },
186 { rc(gpr) | rc(rg8) | 10, "%r10" },
187 { rc(gpr) | rc(rg8) | 11, "%r11" },
188 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
189 { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
190 { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
191 { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
192 { rc(sav) | rc(gpr) | rc(rg8) | 12, "%r12" },
193 { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
194 { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
195 { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
196 { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
197 { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" },
198 { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" },
199 { rc(sav) | 4, "%rsp" },
200 { rc(sav) | 5, "%rbp" },
201 { rc(xpr) | rc(fpr) | 8, "%xmm8" },
202 { rc(xpr) | rc(fpr) | 9, "%xmm9" },
203 { rc(xpr) | rc(fpr) | 10, "%xmm10" },
204 { rc(xpr) | rc(fpr) | 11, "%xmm11" },
205 { rc(xpr) | rc(fpr) | 12, "%xmm12" },
206 { rc(xpr) | rc(fpr) | 13, "%xmm13" },
207 { rc(xpr) | rc(fpr) | 14, "%xmm14" },
208 { rc(xpr) | rc(fpr) | 15, "%xmm15" },
209 { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" },
210 { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" },
211 { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" },
212 { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" },
213 { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
214 { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
215 { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
216 { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
217 # endif
218 { rc(fpr) | 0, "st(0)" },
219 { rc(fpr) | 1, "st(1)" },
220 { rc(fpr) | 2, "st(2)" },
221 { rc(fpr) | 3, "st(3)" },
222 { rc(fpr) | 4, "st(4)" },
223 { rc(fpr) | 5, "st(5)" },
224 { rc(fpr) | 6, "st(6)" },
225 { rc(fpr) | 7, "st(7)" },
226 #endif
227 { _NOREG, "<none>" },
228 };
229
230 /*
231 * Implementation
232 */
233 void
jit_get_cpu(void)234 jit_get_cpu(void)
235 {
236 union {
237 struct {
238 jit_uint32_t sse3 : 1;
239 jit_uint32_t pclmulqdq : 1;
240 jit_uint32_t dtes64 : 1; /* amd reserved */
241 jit_uint32_t monitor : 1;
242 jit_uint32_t ds_cpl : 1; /* amd reserved */
243 jit_uint32_t vmx : 1; /* amd reserved */
244 jit_uint32_t smx : 1; /* amd reserved */
245 jit_uint32_t est : 1; /* amd reserved */
246 jit_uint32_t tm2 : 1; /* amd reserved */
247 jit_uint32_t ssse3 : 1;
248 jit_uint32_t cntx_id : 1; /* amd reserved */
249 jit_uint32_t __reserved0 : 1;
250 jit_uint32_t fma : 1;
251 jit_uint32_t cmpxchg16b : 1;
252 jit_uint32_t xtpr : 1; /* amd reserved */
253 jit_uint32_t pdcm : 1; /* amd reserved */
254 jit_uint32_t __reserved1 : 1;
255 jit_uint32_t pcid : 1; /* amd reserved */
256 jit_uint32_t dca : 1; /* amd reserved */
257 jit_uint32_t sse4_1 : 1;
258 jit_uint32_t sse4_2 : 1;
259 jit_uint32_t x2apic : 1; /* amd reserved */
260 jit_uint32_t movbe : 1; /* amd reserved */
261 jit_uint32_t popcnt : 1;
262 jit_uint32_t tsc : 1; /* amd reserved */
263 jit_uint32_t aes : 1;
264 jit_uint32_t xsave : 1;
265 jit_uint32_t osxsave : 1;
266 jit_uint32_t avx : 1;
267 jit_uint32_t __reserved2 : 1; /* amd F16C */
268 jit_uint32_t __reserved3 : 1;
269 jit_uint32_t __alwayszero : 1; /* amd RAZ */
270 } bits;
271 jit_uword_t cpuid;
272 } ecx;
273 union {
274 struct {
275 jit_uint32_t fpu : 1;
276 jit_uint32_t vme : 1;
277 jit_uint32_t de : 1;
278 jit_uint32_t pse : 1;
279 jit_uint32_t tsc : 1;
280 jit_uint32_t msr : 1;
281 jit_uint32_t pae : 1;
282 jit_uint32_t mce : 1;
283 jit_uint32_t cmpxchg8b : 1;
284 jit_uint32_t apic : 1;
285 jit_uint32_t __reserved0 : 1;
286 jit_uint32_t sep : 1;
287 jit_uint32_t mtrr : 1;
288 jit_uint32_t pge : 1;
289 jit_uint32_t mca : 1;
290 jit_uint32_t cmov : 1;
291 jit_uint32_t pat : 1;
292 jit_uint32_t pse36 : 1;
293 jit_uint32_t psn : 1; /* amd reserved */
294 jit_uint32_t clfsh : 1;
295 jit_uint32_t __reserved1 : 1;
296 jit_uint32_t ds : 1; /* amd reserved */
297 jit_uint32_t acpi : 1; /* amd reserved */
298 jit_uint32_t mmx : 1;
299 jit_uint32_t fxsr : 1;
300 jit_uint32_t sse : 1;
301 jit_uint32_t sse2 : 1;
302 jit_uint32_t ss : 1; /* amd reserved */
303 jit_uint32_t htt : 1;
304 jit_uint32_t tm : 1; /* amd reserved */
305 jit_uint32_t __reserved2 : 1;
306 jit_uint32_t pbe : 1; /* amd reserved */
307 } bits;
308 jit_uword_t cpuid;
309 } edx;
310 #if __X32
311 int ac, flags;
312 #endif
313 jit_uword_t eax, ebx;
314
315 #if __X32
316 /* adapted from glibc __sysconf */
317 __asm__ volatile ("pushfl;\n\t"
318 "popl %0;\n\t"
319 "movl $0x240000, %1;\n\t"
320 "xorl %0, %1;\n\t"
321 "pushl %1;\n\t"
322 "popfl;\n\t"
323 "pushfl;\n\t"
324 "popl %1;\n\t"
325 "xorl %0, %1;\n\t"
326 "pushl %0;\n\t"
327 "popfl"
328 : "=r" (flags), "=r" (ac));
329
330 /* i386 or i486 without cpuid */
331 if ((ac & (1 << 21)) == 0)
332 /* probably without x87 as well */
333 return;
334 #endif
335
336 /* query %eax = 1 function */
337 #if __X32 || __X64_32
338 __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
339 #else
340 __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
341 #endif
342 : "=a" (eax), "=r" (ebx),
343 "=c" (ecx.cpuid), "=d" (edx.cpuid)
344 : "0" (1));
345
346 jit_cpu.fpu = edx.bits.fpu;
347 jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b;
348 jit_cpu.cmov = edx.bits.cmov;
349 jit_cpu.mmx = edx.bits.mmx;
350 jit_cpu.sse = edx.bits.sse;
351 jit_cpu.sse2 = edx.bits.sse2;
352 jit_cpu.sse3 = ecx.bits.sse3;
353 jit_cpu.pclmulqdq = ecx.bits.pclmulqdq;
354 jit_cpu.ssse3 = ecx.bits.ssse3;
355 jit_cpu.fma = ecx.bits.fma;
356 jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
357 jit_cpu.sse4_1 = ecx.bits.sse4_1;
358 jit_cpu.sse4_2 = ecx.bits.sse4_2;
359 jit_cpu.movbe = ecx.bits.movbe;
360 jit_cpu.popcnt = ecx.bits.popcnt;
361 jit_cpu.aes = ecx.bits.aes;
362 jit_cpu.avx = ecx.bits.avx;
363
364 /* query %eax = 0x80000001 function */
365 #if __X64
366 # if __X64_32
367 __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
368 # else
369 __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
370 # endif
371 : "=a" (eax), "=r" (ebx),
372 "=c" (ecx.cpuid), "=d" (edx.cpuid)
373 : "0" (0x80000001));
374 jit_cpu.lahf = ecx.cpuid & 1;
375 #endif
376 }
377
378 void
_jit_init(jit_state_t * _jit)379 _jit_init(jit_state_t *_jit)
380 {
381 #if __X32
382 jit_int32_t regno;
383 static jit_bool_t first = 1;
384 #endif
385
386 _jitc->reglen = jit_size(_rvs) - 1;
387 #if __X32
388 if (first) {
389 if (!jit_cpu.sse2) {
390 for (regno = _jitc->reglen; regno >= 0; regno--) {
391 if (_rvs[regno].spec & jit_class_xpr)
392 _rvs[regno].spec = 0;
393 }
394 }
395 first = 0;
396 }
397 #endif
398 }
399
400 void
_jit_prolog(jit_state_t * _jit)401 _jit_prolog(jit_state_t *_jit)
402 {
403 jit_int32_t offset;
404
405 if (_jitc->function)
406 jit_epilog();
407 assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
408 jit_regset_set_ui(&_jitc->regsav, 0);
409 offset = _jitc->functions.offset;
410 if (offset >= _jitc->functions.length) {
411 jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
412 _jitc->functions.length * sizeof(jit_function_t),
413 (_jitc->functions.length + 16) * sizeof(jit_function_t));
414 _jitc->functions.length += 16;
415 }
416 _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
417 _jitc->function->self.size = stack_framesize;
418 _jitc->function->self.argi = _jitc->function->self.argf =
419 _jitc->function->self.aoff = _jitc->function->self.alen = 0;
420 /* sse/x87 conversion */
421 _jitc->function->self.aoff = CVT_OFFSET;
422 _jitc->function->self.call = jit_call_default;
423 jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
424 _jitc->reglen * sizeof(jit_int32_t));
425
426 /* _no_link here does not mean the jit_link() call can be removed
427 * by rewriting as:
428 * _jitc->function->prolog = jit_new_node(jit_code_prolog);
429 */
430 _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
431 jit_link(_jitc->function->prolog);
432 _jitc->function->prolog->w.w = offset;
433 _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
434 /* u: label value
435 * v: offset in blocks vector
436 * w: offset in functions vector
437 */
438 _jitc->function->epilog->w.w = offset;
439
440 jit_regset_new(&_jitc->function->regset);
441 }
442
443 jit_int32_t
_jit_allocai(jit_state_t * _jit,jit_int32_t length)444 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
445 {
446 assert(_jitc->function);
447 switch (length) {
448 case 0: case 1: break;
449 case 2: _jitc->function->self.aoff &= -2; break;
450 case 3: case 4: _jitc->function->self.aoff &= -4; break;
451 default: _jitc->function->self.aoff &= -8; break;
452 }
453 _jitc->function->self.aoff -= length;
454
455 /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
456 * generation restart on some conditions: div/rem and qmul/qdiv, due
457 * to registers constraints.
458 * The check is to prevent an assertion of a jit_xyz() being called
459 * during code generation, and attempting to add a node to the tail
460 * of the current IR generation. */
461 if (!_jitc->realize) {
462 jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
463 jit_dec_synth();
464 }
465
466 return (_jitc->function->self.aoff);
467 }
468
469 void
_jit_allocar(jit_state_t * _jit,jit_int32_t u,jit_int32_t v)470 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
471 {
472 jit_int32_t reg;
473 assert(_jitc->function);
474 jit_inc_synth_ww(allocar, u, v);
475 if (!_jitc->function->allocar) {
476 _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
477 _jitc->function->allocar = 1;
478 }
479 reg = jit_get_reg(jit_class_gpr);
480 jit_negr(reg, v);
481 jit_andi(reg, reg, -16);
482 jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
483 jit_addr(u, u, reg);
484 jit_addr(JIT_SP, JIT_SP, reg);
485 jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
486 jit_unget_reg(reg);
487 jit_dec_synth();
488 }
489
490 void
_jit_ret(jit_state_t * _jit)491 _jit_ret(jit_state_t *_jit)
492 {
493 jit_node_t *instr;
494 assert(_jitc->function);
495 jit_inc_synth(ret);
496 /* jump to epilog */
497 instr = jit_jmpi();
498 jit_patch_at(instr, _jitc->function->epilog);
499 jit_dec_synth();
500 }
501
502 void
_jit_retr(jit_state_t * _jit,jit_int32_t u)503 _jit_retr(jit_state_t *_jit, jit_int32_t u)
504 {
505 jit_inc_synth_w(retr, u);
506 /* movr(%ret, %ret) would be optimized out */
507 if (JIT_RET != u)
508 jit_movr(JIT_RET, u);
509 /* explicitly tell it is live */
510 jit_live(JIT_RET);
511 jit_ret();
512 jit_dec_synth();
513 }
514
515 void
_jit_reti(jit_state_t * _jit,jit_word_t u)516 _jit_reti(jit_state_t *_jit, jit_word_t u)
517 {
518 jit_inc_synth_w(reti, u);
519 jit_movi(JIT_RET, u);
520 jit_ret();
521 jit_dec_synth();
522 }
523
524 void
_jit_retr_f(jit_state_t * _jit,jit_int32_t u)525 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
526 {
527 jit_inc_synth_w(retr_f, u);
528 if (JIT_FRET != u)
529 jit_movr_f(JIT_FRET, u);
530 else
531 jit_live(JIT_FRET);
532 jit_ret();
533 jit_dec_synth();
534 }
535
536 void
_jit_reti_f(jit_state_t * _jit,jit_float32_t u)537 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
538 {
539 jit_inc_synth_f(reti_f, u);
540 jit_movi_f(JIT_FRET, u);
541 jit_ret();
542 jit_dec_synth();
543 }
544
545 void
_jit_retr_d(jit_state_t * _jit,jit_int32_t u)546 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
547 {
548 jit_inc_synth_w(retr_d, u);
549 if (JIT_FRET != u)
550 jit_movr_d(JIT_FRET, u);
551 else
552 jit_live(JIT_FRET);
553 jit_ret();
554 jit_dec_synth();
555 }
556
557 void
_jit_reti_d(jit_state_t * _jit,jit_float64_t u)558 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
559 {
560 jit_inc_synth_d(reti_d, u);
561 jit_movi_d(JIT_FRET, u);
562 jit_ret();
563 jit_dec_synth();
564 }
565
566 void
_jit_epilog(jit_state_t * _jit)567 _jit_epilog(jit_state_t *_jit)
568 {
569 assert(_jitc->function);
570 assert(_jitc->function->epilog->next == NULL);
571 jit_link(_jitc->function->epilog);
572 _jitc->function = NULL;
573 }
574
575 jit_bool_t
_jit_arg_register_p(jit_state_t * _jit,jit_node_t * u)576 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
577 {
578 if (u->code == jit_code_arg)
579 return (jit_arg_reg_p(u->u.w));
580 assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
581 return (jit_arg_f_reg_p(u->u.w));
582 }
583
584 void
_jit_ellipsis(jit_state_t * _jit)585 _jit_ellipsis(jit_state_t *_jit)
586 {
587 jit_inc_synth(ellipsis);
588 if (_jitc->prepare) {
589 jit_link_prepare();
590 /* Remember that a varargs function call is being constructed. */
591 assert(!(_jitc->function->call.call & jit_call_varargs));
592 _jitc->function->call.call |= jit_call_varargs;
593 }
594 else {
595 jit_link_prolog();
596 /* Remember the current function is varargs. */
597 assert(!(_jitc->function->self.call & jit_call_varargs));
598 _jitc->function->self.call |= jit_call_varargs;
599
600 #if __X64 && !(__CYGWIN__ || _WIN32)
601 /* Allocate va_list like object in the stack.
602 * If applicable, with enough space to save all argument
603 * registers, and use fixed offsets for them. */
604 _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
605
606 /* Initialize gp offset in save area. */
607 if (jit_arg_reg_p(_jitc->function->self.argi))
608 _jitc->function->vagp = _jitc->function->self.argi * 8;
609 else
610 _jitc->function->vagp = va_gp_max_offset;
611
612 /* Initialize fp offset in save area. */
613 if (jit_arg_f_reg_p(_jitc->function->self.argf))
614 _jitc->function->vafp = _jitc->function->self.argf * 16 +
615 va_gp_max_offset;
616 else
617 _jitc->function->vafp = va_fp_max_offset;
618 #endif
619 }
620 jit_dec_synth();
621 }
622
623 void
_jit_va_push(jit_state_t * _jit,jit_int32_t u)624 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
625 {
626 jit_inc_synth_w(va_push, u);
627 jit_pushargr(u);
628 jit_dec_synth();
629 }
630
631 jit_node_t *
_jit_arg(jit_state_t * _jit)632 _jit_arg(jit_state_t *_jit)
633 {
634 jit_node_t *node;
635 jit_int32_t offset;
636 assert(_jitc->function);
637 assert(!(_jitc->function->self.call & jit_call_varargs));
638 #if __X64
639 if (jit_arg_reg_p(_jitc->function->self.argi)) {
640 offset = _jitc->function->self.argi++;
641 # if __CYGWIN__ || _WIN32
642 _jitc->function->self.size += sizeof(jit_word_t);
643 # endif
644 }
645 else
646 #endif
647 {
648 offset = _jitc->function->self.size;
649 _jitc->function->self.size += REAL_WORDSIZE;
650 }
651 node = jit_new_node_ww(jit_code_arg, offset,
652 ++_jitc->function->self.argn);
653 jit_link_prolog();
654 return (node);
655 }
656
657 jit_node_t *
_jit_arg_f(jit_state_t * _jit)658 _jit_arg_f(jit_state_t *_jit)
659 {
660 jit_node_t *node;
661 jit_int32_t offset;
662 assert(_jitc->function);
663 assert(!(_jitc->function->self.call & jit_call_varargs));
664 #if __X64
665 # if __CYGWIN__ || _WIN32
666 if (jit_arg_reg_p(_jitc->function->self.argi)) {
667 offset = _jitc->function->self.argi++;
668 _jitc->function->self.size += sizeof(jit_word_t);
669 }
670 # else
671 if (jit_arg_f_reg_p(_jitc->function->self.argf))
672 offset = _jitc->function->self.argf++;
673 # endif
674 else
675 #endif
676 {
677 offset = _jitc->function->self.size;
678 _jitc->function->self.size += REAL_WORDSIZE;
679 }
680 node = jit_new_node_ww(jit_code_arg_f, offset,
681 ++_jitc->function->self.argn);
682 jit_link_prolog();
683 return (node);
684 }
685
686 jit_node_t *
_jit_arg_d(jit_state_t * _jit)687 _jit_arg_d(jit_state_t *_jit)
688 {
689 jit_node_t *node;
690 jit_int32_t offset;
691 assert(_jitc->function);
692 assert(!(_jitc->function->self.call & jit_call_varargs));
693 #if __X64
694 # if __CYGWIN__ || _WIN32
695 if (jit_arg_reg_p(_jitc->function->self.argi)) {
696 offset = _jitc->function->self.argi++;
697 _jitc->function->self.size += sizeof(jit_word_t);
698 }
699 # else
700 if (jit_arg_f_reg_p(_jitc->function->self.argf))
701 offset = _jitc->function->self.argf++;
702 # endif
703 else
704 #endif
705 {
706 offset = _jitc->function->self.size;
707 _jitc->function->self.size += sizeof(jit_float64_t);
708 }
709 node = jit_new_node_ww(jit_code_arg_d, offset,
710 ++_jitc->function->self.argn);
711 jit_link_prolog();
712 return (node);
713 }
714
715 void
_jit_getarg_c(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)716 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
717 {
718 assert(v->code == jit_code_arg);
719 jit_inc_synth_wp(getarg_c, u, v);
720 #if __X64
721 if (jit_arg_reg_p(v->u.w))
722 jit_extr_c(u, JIT_RA0 - v->u.w);
723 else
724 #endif
725 jit_ldxi_c(u, _RBP, v->u.w);
726 jit_dec_synth();
727 }
728
729 void
_jit_getarg_uc(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)730 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
731 {
732 assert(v->code == jit_code_arg);
733 jit_inc_synth_wp(getarg_uc, u, v);
734 #if __X64
735 if (jit_arg_reg_p(v->u.w))
736 jit_extr_uc(u, JIT_RA0 - v->u.w);
737 else
738 #endif
739 jit_ldxi_uc(u, _RBP, v->u.w);
740 jit_dec_synth();
741 }
742
743 void
_jit_getarg_s(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)744 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
745 {
746 assert(v->code == jit_code_arg);
747 jit_inc_synth_wp(getarg_s, u, v);
748 #if __X64
749 if (jit_arg_reg_p(v->u.w))
750 jit_extr_s(u, JIT_RA0 - v->u.w);
751 else
752 #endif
753 jit_ldxi_s(u, _RBP, v->u.w);
754 jit_dec_synth();
755 }
756
757 void
_jit_getarg_us(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)758 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
759 {
760 assert(v->code == jit_code_arg);
761 jit_inc_synth_wp(getarg_us, u, v);
762 #if __X64
763 if (jit_arg_reg_p(v->u.w))
764 jit_extr_us(u, JIT_RA0 - v->u.w);
765 else
766 #endif
767 jit_ldxi_us(u, _RBP, v->u.w);
768 jit_dec_synth();
769 }
770
771 void
_jit_getarg_i(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)772 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
773 {
774 assert(v->code == jit_code_arg);
775 jit_inc_synth_wp(getarg_i, u, v);
776 #if __X64
777 if (jit_arg_reg_p(v->u.w)) {
778 # if __X64_32
779 jit_movr(u, JIT_RA0 - v->u.w);
780 # else
781 jit_extr_i(u, JIT_RA0 - v->u.w);
782 # endif
783 }
784 else
785 #endif
786 jit_ldxi_i(u, _RBP, v->u.w);
787 jit_dec_synth();
788 }
789
790 #if __X64 && !__X64_32
791 void
_jit_getarg_ui(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)792 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
793 {
794 assert(v->code == jit_code_arg);
795 jit_inc_synth_wp(getarg_ui, u, v);
796 if (jit_arg_reg_p(v->u.w))
797 jit_extr_ui(u, JIT_RA0 - v->u.w);
798 else
799 jit_ldxi_ui(u, _RBP, v->u.w);
800 jit_dec_synth();
801 }
802
803 void
_jit_getarg_l(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)804 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
805 {
806 assert(v->code == jit_code_arg);
807 jit_inc_synth_wp(getarg_l, u, v);
808 if (jit_arg_reg_p(v->u.w))
809 jit_movr(u, JIT_RA0 - v->u.w);
810 else
811 jit_ldxi_l(u, _RBP, v->u.w);
812 jit_dec_synth();
813 }
814 #endif
815
816 void
_jit_putargr(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)817 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
818 {
819 assert(v->code == jit_code_arg);
820 jit_inc_synth_wp(putargr, u, v);
821 #if __X64
822 if (jit_arg_reg_p(v->u.w))
823 jit_movr(JIT_RA0 - v->u.w, u);
824 else
825 #endif
826 jit_stxi(v->u.w, _RBP, u);
827 jit_dec_synth();
828 }
829
830 void
_jit_putargi(jit_state_t * _jit,jit_word_t u,jit_node_t * v)831 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
832 {
833 jit_int32_t regno;
834 assert(v->code == jit_code_arg);
835 jit_inc_synth_wp(putargi, u, v);
836 #if __X64
837 if (jit_arg_reg_p(v->u.w))
838 jit_movi(JIT_RA0 - v->u.w, u);
839 else
840 #endif
841 {
842 regno = jit_get_reg(jit_class_gpr);
843 jit_movi(regno, u);
844 jit_stxi(v->u.w, _RBP, regno);
845 jit_unget_reg(regno);
846 }
847 jit_dec_synth();
848 }
849
850 void
_jit_getarg_f(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)851 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
852 {
853 assert(v->code == jit_code_arg_f);
854 jit_inc_synth_wp(getarg_f, u, v);
855 #if __X64
856 if (jit_arg_f_reg_p(v->u.w))
857 jit_movr_f(u, _XMM0 - v->u.w);
858 else
859 #endif
860 jit_ldxi_f(u, _RBP, v->u.w);
861 jit_dec_synth();
862 }
863
864 void
_jit_putargr_f(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)865 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
866 {
867 assert(v->code == jit_code_arg_f);
868 jit_inc_synth_wp(putargr_f, u, v);
869 #if __X64
870 if (jit_arg_reg_p(v->u.w))
871 jit_movr_f(_XMM0 - v->u.w, u);
872 else
873 #endif
874 jit_stxi_f(v->u.w, _RBP, u);
875 jit_dec_synth();
876 }
877
878 void
_jit_putargi_f(jit_state_t * _jit,jit_float32_t u,jit_node_t * v)879 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
880 {
881 jit_int32_t regno;
882 assert(v->code == jit_code_arg_f);
883 jit_inc_synth_fp(putargi_f, u, v);
884 #if __X64
885 if (jit_arg_reg_p(v->u.w))
886 jit_movi_f(_XMM0 - v->u.w, u);
887 else
888 #endif
889 {
890 regno = jit_get_reg(jit_class_gpr);
891 jit_movi_f(regno, u);
892 jit_stxi_f(v->u.w, _RBP, regno);
893 jit_unget_reg(regno);
894 }
895 jit_dec_synth();
896 }
897
898 void
_jit_getarg_d(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)899 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
900 {
901 assert(v->code == jit_code_arg_d);
902 jit_inc_synth_wp(getarg_d, u, v);
903 #if __X64
904 if (jit_arg_f_reg_p(v->u.w))
905 jit_movr_d(u, _XMM0 - v->u.w);
906 else
907 #endif
908 jit_ldxi_d(u, _RBP, v->u.w);
909 jit_dec_synth();
910 }
911
912 void
_jit_putargr_d(jit_state_t * _jit,jit_int32_t u,jit_node_t * v)913 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
914 {
915 assert(v->code == jit_code_arg_d);
916 jit_inc_synth_wp(putargr_d, u, v);
917 #if __X64
918 if (jit_arg_reg_p(v->u.w))
919 jit_movr_d(_XMM0 - v->u.w, u);
920 else
921 #endif
922 jit_stxi_d(v->u.w, _RBP, u);
923 jit_dec_synth();
924 }
925
926 void
_jit_putargi_d(jit_state_t * _jit,jit_float64_t u,jit_node_t * v)927 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
928 {
929 jit_int32_t regno;
930 assert(v->code == jit_code_arg_d);
931 jit_inc_synth_dp(putargi_d, u, v);
932 #if __X64
933 if (jit_arg_reg_p(v->u.w))
934 jit_movi_d(_XMM0 - v->u.w, u);
935 else
936 #endif
937 {
938 regno = jit_get_reg(jit_class_gpr);
939 jit_movi_d(regno, u);
940 jit_stxi_d(v->u.w, _RBP, regno);
941 jit_unget_reg(regno);
942 }
943 jit_dec_synth();
944 }
945
946 void
_jit_pushargr(jit_state_t * _jit,jit_int32_t u)947 _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
948 {
949 assert(_jitc->function);
950 jit_inc_synth_w(pushargr, u);
951 jit_link_prepare();
952 #if __X64
953 if (jit_arg_reg_p(_jitc->function->call.argi)) {
954 jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
955 ++_jitc->function->call.argi;
956 # if __CYGWIN__ || _WIN32
957 if (_jitc->function->call.call & jit_call_varargs)
958 jit_stxi(_jitc->function->call.size, _RSP, u);
959 _jitc->function->call.size += sizeof(jit_word_t);
960 # endif
961 }
962 else
963 #endif
964 {
965 jit_stxi(_jitc->function->call.size, _RSP, u);
966 _jitc->function->call.size += REAL_WORDSIZE;
967 }
968 jit_dec_synth();
969 }
970
971 void
_jit_pushargi(jit_state_t * _jit,jit_word_t u)972 _jit_pushargi(jit_state_t *_jit, jit_word_t u)
973 {
974 jit_int32_t regno;
975 assert(_jitc->function);
976 jit_inc_synth_w(pushargi, u);
977 jit_link_prepare();
978 #if __X64
979 if (jit_arg_reg_p(_jitc->function->call.argi)) {
980 jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
981 # if __CYGWIN__ || _WIN32
982 if (_jitc->function->call.call & jit_call_varargs)
983 jit_stxi(_jitc->function->call.size, _RSP,
984 JIT_RA0 - _jitc->function->call.argi);
985 _jitc->function->call.size += sizeof(jit_word_t);
986 # endif
987 ++_jitc->function->call.argi;
988 }
989 else
990 #endif
991 {
992 regno = jit_get_reg(jit_class_gpr);
993 jit_movi(regno, u);
994 jit_stxi(_jitc->function->call.size, _RSP, regno);
995 _jitc->function->call.size += REAL_WORDSIZE;
996 jit_unget_reg(regno);
997 }
998 jit_dec_synth();
999 }
1000
1001 void
_jit_pushargr_f(jit_state_t * _jit,jit_int32_t u)1002 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
1003 {
1004 assert(_jitc->function);
1005 jit_inc_synth_w(pushargr_f, u);
1006 jit_link_prepare();
1007 #if __X64
1008 # if __CYGWIN__ || _WIN32
1009 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1010 jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
1011 if (_jitc->function->call.call & jit_call_varargs) {
1012 jit_stxi_f(_jitc->function->call.size, _RSP,
1013 _XMM0 - _jitc->function->call.argi);
1014 jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1015 _jitc->function->call.size);
1016 }
1017 ++_jitc->function->call.argi;
1018 _jitc->function->call.size += sizeof(jit_word_t);
1019 }
1020 # else
1021 if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
1022 jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
1023 ++_jitc->function->call.argf;
1024 }
1025 # endif
1026 else
1027 #endif
1028 {
1029 jit_stxi_f(_jitc->function->call.size, _RSP, u);
1030 _jitc->function->call.size += REAL_WORDSIZE;
1031 }
1032 jit_dec_synth();
1033 }
1034
1035 void
_jit_pushargi_f(jit_state_t * _jit,jit_float32_t u)1036 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
1037 {
1038 jit_int32_t regno;
1039 assert(_jitc->function);
1040 jit_inc_synth_f(pushargi_f, u);
1041 jit_link_prepare();
1042 #if __X64
1043 # if __CYGWIN__ || _WIN32
1044 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1045 jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
1046 if (_jitc->function->call.call & jit_call_varargs) {
1047 jit_stxi_f(_jitc->function->call.size, _RSP,
1048 _XMM0 - _jitc->function->call.argi);
1049 jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1050 _jitc->function->call.size);
1051 }
1052 ++_jitc->function->call.argi;
1053 _jitc->function->call.size += sizeof(jit_word_t);
1054 }
1055 # else
1056 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1057 jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
1058 ++_jitc->function->call.argf;
1059 }
1060 # endif
1061 else
1062 #endif
1063 {
1064 regno = jit_get_reg(jit_class_fpr);
1065 jit_movi_f(regno, u);
1066 jit_stxi_f(_jitc->function->call.size, _RSP, regno);
1067 _jitc->function->call.size += REAL_WORDSIZE;
1068 jit_unget_reg(regno);
1069 }
1070 jit_dec_synth();
1071 }
1072
1073 void
_jit_pushargr_d(jit_state_t * _jit,jit_int32_t u)1074 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
1075 {
1076 assert(_jitc->function);
1077 jit_inc_synth_w(pushargr_d, u);
1078 jit_link_prepare();
1079 #if __X64
1080 # if __CYGWIN__ || _WIN32
1081 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1082 jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
1083 if (_jitc->function->call.call & jit_call_varargs) {
1084 jit_stxi_d(_jitc->function->call.size, _RSP,
1085 _XMM0 - _jitc->function->call.argi);
1086 jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1087 _jitc->function->call.size);
1088 }
1089 ++_jitc->function->call.argi;
1090 _jitc->function->call.size += sizeof(jit_word_t);
1091 }
1092 # else
1093 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1094 jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
1095 ++_jitc->function->call.argf;
1096 }
1097 # endif
1098 else
1099 #endif
1100 {
1101 jit_stxi_d(_jitc->function->call.size, _RSP, u);
1102 _jitc->function->call.size += sizeof(jit_float64_t);
1103 }
1104 jit_dec_synth();
1105 }
1106
1107 void
_jit_pushargi_d(jit_state_t * _jit,jit_float64_t u)1108 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
1109 {
1110 jit_int32_t regno;
1111 assert(_jitc->function);
1112 jit_inc_synth_d(pushargi_d, u);
1113 jit_link_prepare();
1114 #if __X64
1115 # if __CYGWIN__ || _WIN32
1116 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1117 jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
1118 if (_jitc->function->call.call & jit_call_varargs) {
1119 jit_stxi_d(_jitc->function->call.size, _RSP,
1120 _XMM0 - _jitc->function->call.argi);
1121 jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1122 _jitc->function->call.size);
1123 }
1124 ++_jitc->function->call.argi;
1125 _jitc->function->call.size += sizeof(jit_word_t);
1126 }
1127 # else
1128 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1129 jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
1130 ++_jitc->function->call.argf;
1131 }
1132 # endif
1133 else
1134 #endif
1135 {
1136 regno = jit_get_reg(jit_class_fpr);
1137 jit_movi_d(regno, u);
1138 jit_stxi_d(_jitc->function->call.size, _RSP, regno);
1139 _jitc->function->call.size += sizeof(jit_float64_t);
1140 jit_unget_reg(regno);
1141 }
1142 jit_dec_synth();
1143 }
1144
1145 jit_bool_t
_jit_regarg_p(jit_state_t * _jit,jit_node_t * node,jit_int32_t regno)1146 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
1147 {
1148 #if __X64
1149 jit_int32_t spec;
1150
1151 spec = jit_class(_rvs[regno].spec);
1152 if (spec & jit_class_arg) {
1153 if (spec & jit_class_gpr) {
1154 regno = JIT_RA0 - regno;
1155 if (regno >= 0 && regno < node->v.w)
1156 return (1);
1157 }
1158 else if (spec & jit_class_fpr) {
1159 regno = _XMM0 - regno;
1160 if (regno >= 0 && regno < node->w.w)
1161 return (1);
1162 }
1163 }
1164 #endif
1165 return (0);
1166 }
1167
1168 void
_jit_finishr(jit_state_t * _jit,jit_int32_t r0)1169 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
1170 {
1171 jit_int32_t reg;
1172 jit_node_t *call;
1173 assert(_jitc->function);
1174 reg = r0;
1175 jit_inc_synth_w(finishr, r0);
1176 if (_jitc->function->self.alen < _jitc->function->call.size)
1177 _jitc->function->self.alen = _jitc->function->call.size;
1178 #if __X64
1179 # if !(__CYGWIN__ || _WIN32)
1180 if (_jitc->function->call.call & jit_call_varargs) {
1181 if (jit_regno(reg) == _RAX) {
1182 reg = jit_get_reg(jit_class_gpr);
1183 jit_movr(reg, _RAX);
1184 }
1185 if (_jitc->function->call.argf)
1186 jit_movi(_RAX, _jitc->function->call.argf);
1187 else
1188 jit_movi(_RAX, 0);
1189 if (reg != r0)
1190 jit_unget_reg(reg);
1191 }
1192 # endif
1193 #endif
1194 call = jit_callr(reg);
1195 call->v.w = _jitc->function->call.argi;
1196 call->w.w = _jitc->function->call.argf;
1197 _jitc->function->call.argi = _jitc->function->call.argf =
1198 _jitc->function->call.size = 0;
1199 _jitc->prepare = 0;
1200 jit_dec_synth();
1201 }
1202
1203 jit_node_t *
_jit_finishi(jit_state_t * _jit,jit_pointer_t i0)1204 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
1205 {
1206 #if __X64
1207 jit_int32_t reg;
1208 #endif
1209 jit_node_t *node;
1210 assert(_jitc->function);
1211 jit_inc_synth_w(finishi, (jit_word_t)i0);
1212 if (_jitc->function->self.alen < _jitc->function->call.size)
1213 _jitc->function->self.alen = _jitc->function->call.size;
1214 #if __X64
1215 /* FIXME preventing %rax allocation is good enough, but for consistency
1216 * it should automatically detect %rax is dead, in case it has run out
1217 * registers, and not save/restore it, what would be wrong if using the
1218 * the return value, otherwise, just a needless noop */
1219 /* >> prevent %rax from being allocated as the function pointer */
1220 jit_regset_setbit(&_jitc->regarg, _RAX);
1221 reg = jit_get_reg(jit_class_gpr);
1222 node = jit_movi(reg, (jit_word_t)i0);
1223 jit_finishr(reg);
1224 jit_unget_reg(reg);
1225 /* << prevent %rax from being allocated as the function pointer */
1226 jit_regset_clrbit(&_jitc->regarg, _RAX);
1227 #else
1228 node = jit_calli(i0);
1229 node->v.w = _jitc->function->call.argi;
1230 node->w.w = _jitc->function->call.argf;
1231 #endif
1232 _jitc->function->call.argi = _jitc->function->call.argf =
1233 _jitc->function->call.size = 0;
1234 _jitc->prepare = 0;
1235 jit_dec_synth();
1236 return (node);
1237 }
1238
1239 void
_jit_retval_c(jit_state_t * _jit,jit_int32_t r0)1240 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
1241 {
1242 jit_inc_synth_w(retval_c, r0);
1243 jit_extr_c(r0, JIT_RET);
1244 jit_dec_synth();
1245 }
1246
1247 void
_jit_retval_uc(jit_state_t * _jit,jit_int32_t r0)1248 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
1249 {
1250 jit_inc_synth_w(retval_uc, r0);
1251 jit_extr_uc(r0, JIT_RET);
1252 jit_dec_synth();
1253 }
1254
1255 void
_jit_retval_s(jit_state_t * _jit,jit_int32_t r0)1256 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
1257 {
1258 jit_inc_synth_w(retval_s, r0);
1259 jit_extr_s(r0, JIT_RET);
1260 jit_dec_synth();
1261 }
1262
1263 void
_jit_retval_us(jit_state_t * _jit,jit_int32_t r0)1264 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
1265 {
1266 jit_inc_synth_w(retval_us, r0);
1267 jit_extr_us(r0, JIT_RET);
1268 jit_dec_synth();
1269 }
1270
1271 void
_jit_retval_i(jit_state_t * _jit,jit_int32_t r0)1272 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
1273 {
1274 jit_inc_synth_w(retval_i, r0);
1275 #if __X32 || __X64_32
1276 if (r0 != JIT_RET)
1277 jit_movr(r0, JIT_RET);
1278 #else
1279 jit_extr_i(r0, JIT_RET);
1280 #endif
1281 jit_dec_synth();
1282 }
1283
1284 #if __X64 && !__X64_32
1285 void
_jit_retval_ui(jit_state_t * _jit,jit_int32_t r0)1286 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
1287 {
1288 jit_inc_synth_w(retval_ui, r0);
1289 jit_extr_ui(r0, JIT_RET);
1290 jit_dec_synth();
1291 }
1292
1293 void
_jit_retval_l(jit_state_t * _jit,jit_int32_t r0)1294 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
1295 {
1296 jit_inc_synth_w(retval_l, r0);
1297 if (r0 != JIT_RET)
1298 jit_movr(r0, JIT_RET);
1299 jit_dec_synth();
1300 }
1301 #endif
1302
1303 void
_jit_retval_f(jit_state_t * _jit,jit_int32_t r0)1304 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
1305 {
1306 jit_inc_synth_w(retval_f, r0);
1307 #if __X64
1308 if (r0 != JIT_FRET)
1309 jit_movr_f(r0, JIT_FRET);
1310 #endif
1311 jit_dec_synth();
1312 }
1313
1314 void
_jit_retval_d(jit_state_t * _jit,jit_int32_t r0)1315 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
1316 {
1317 jit_inc_synth_w(retval_d, r0);
1318 #if __X64
1319 if (r0 != JIT_FRET)
1320 jit_movr_d(r0, JIT_FRET);
1321 #endif
1322 jit_dec_synth();
1323 }
1324
1325 jit_pointer_t
_emit_code(jit_state_t * _jit)1326 _emit_code(jit_state_t *_jit)
1327 {
1328 jit_node_t *node;
1329 jit_node_t *temp;
1330 jit_word_t word;
1331 jit_int32_t value;
1332 jit_int32_t offset;
1333 struct {
1334 jit_node_t *node;
1335 jit_word_t word;
1336 #if DEVEL_DISASSEMBLER
1337 jit_word_t prevw;
1338 #endif
1339 jit_int32_t patch_offset;
1340 } undo;
1341 #if DEVEL_DISASSEMBLER
1342 jit_word_t prevw;
1343 #endif
1344
1345 _jitc->function = NULL;
1346
1347 jit_reglive_setup();
1348
1349 undo.word = 0;
1350 undo.node = NULL;
1351 undo.patch_offset = 0;
1352 #define case_rr(name, type) \
1353 case jit_code_##name##r##type: \
1354 name##r##type(rn(node->u.w), rn(node->v.w)); \
1355 break
1356 #define case_rw(name, type) \
1357 case jit_code_##name##i##type: \
1358 name##i##type(rn(node->u.w), node->v.w); \
1359 break
1360 #define case_rf(name, type) \
1361 case jit_code_##name##r##type: \
1362 if (jit_x87_reg_p(node->v.w)) \
1363 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1364 else \
1365 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1366 break
1367 #define case_fr(name, type) \
1368 case jit_code_##name##r##type: \
1369 if (jit_x87_reg_p(node->u.w)) \
1370 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1371 else \
1372 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1373 break
1374 #define case_fw(name, type) \
1375 case jit_code_##name##i##type: \
1376 if (jit_x87_reg_p(node->u.w)) \
1377 x87_##name##i##type(rn(node->u.w), node->v.w); \
1378 else \
1379 sse_##name##i##type(rn(node->u.w), node->v.w); \
1380 break
1381 #define case_wr(name, type) \
1382 case jit_code_##name##i##type: \
1383 name##i##type(node->u.w, rn(node->v.w)); \
1384 break
1385 #define case_wf(name, type) \
1386 case jit_code_##name##i##type: \
1387 if (jit_x87_reg_p(node->v.w)) \
1388 x87_##name##i##type(node->u.w, rn(node->v.w)); \
1389 else \
1390 sse_##name##i##type(node->u.w, rn(node->v.w)); \
1391 break
1392 #define case_ff(name, type) \
1393 case jit_code_##name##r##type: \
1394 if (jit_x87_reg_p(node->u.w) && \
1395 jit_x87_reg_p(node->v.w)) \
1396 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1397 else \
1398 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1399 break;
1400 #define case_rrr(name, type) \
1401 case jit_code_##name##r##type: \
1402 name##r##type(rn(node->u.w), \
1403 rn(node->v.w), rn(node->w.w)); \
1404 break
1405 #define case_rrrr(name, type) \
1406 case jit_code_##name##r##type: \
1407 name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
1408 rn(node->v.w), rn(node->w.w)); \
1409 break
1410 #define case_frr(name, type) \
1411 case jit_code_##name##r##type: \
1412 if (jit_x87_reg_p(node->u.w)) \
1413 x87_##name##r##type(rn(node->u.w), \
1414 rn(node->v.w), rn(node->w.w)); \
1415 else \
1416 sse_##name##r##type(rn(node->u.w), \
1417 rn(node->v.w), rn(node->w.w)); \
1418 break
1419 #define case_rrf(name, type) \
1420 case jit_code_##name##r##type: \
1421 if (jit_x87_reg_p(node->w.w)) \
1422 x87_##name##r##type(rn(node->u.w), \
1423 rn(node->v.w), rn(node->w.w)); \
1424 else \
1425 sse_##name##r##type(rn(node->u.w), \
1426 rn(node->v.w), rn(node->w.w)); \
1427 break
1428 #define case_rrw(name, type) \
1429 case jit_code_##name##i##type: \
1430 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1431 break
1432 #define case_rrrw(name, type) \
1433 case jit_code_##name##i##type: \
1434 name##i##type(rn(node->u.q.l), rn(node->u.q.h), \
1435 rn(node->v.w), node->w.w); \
1436 break
1437 #define case_frw(name, type) \
1438 case jit_code_##name##i##type: \
1439 if (jit_x87_reg_p(node->u.w)) \
1440 x87_##name##i##type(rn(node->u.w), \
1441 rn(node->v.w), node->w.w); \
1442 else \
1443 sse_##name##i##type(rn(node->u.w), \
1444 rn(node->v.w), node->w.w); \
1445 break
1446 #define case_wrr(name, type) \
1447 case jit_code_##name##i##type: \
1448 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1449 break
1450 #define case_wrf(name, type) \
1451 case jit_code_##name##i##type: \
1452 if (jit_x87_reg_p(node->w.w)) \
1453 x87_##name##i##type(node->u.w, \
1454 rn(node->v.w), rn(node->w.w)); \
1455 else \
1456 sse_##name##i##type(node->u.w, \
1457 rn(node->v.w), rn(node->w.w)); \
1458 break
1459 #define case_brr(name, type) \
1460 case jit_code_##name##r##type: \
1461 temp = node->u.n; \
1462 assert(temp->code == jit_code_label || \
1463 temp->code == jit_code_epilog); \
1464 if (temp->flag & jit_flag_patch) \
1465 name##r##type(temp->u.w, rn(node->v.w), \
1466 rn(node->w.w)); \
1467 else { \
1468 word = name##r##type(_jit->pc.w, \
1469 rn(node->v.w), rn(node->w.w)); \
1470 patch(word, node); \
1471 } \
1472 break
1473 #define case_brw(name, type) \
1474 case jit_code_##name##i##type: \
1475 temp = node->u.n; \
1476 assert(temp->code == jit_code_label || \
1477 temp->code == jit_code_epilog); \
1478 if (temp->flag & jit_flag_patch) \
1479 name##i##type(temp->u.w, \
1480 rn(node->v.w), node->w.w); \
1481 else { \
1482 word = name##i##type(_jit->pc.w, \
1483 rn(node->v.w), node->w.w); \
1484 patch(word, node); \
1485 } \
1486 break
1487 #define case_rff(name, type) \
1488 case jit_code_##name##r##type: \
1489 if (jit_x87_reg_p(node->v.w) && \
1490 jit_x87_reg_p(node->w.w)) \
1491 x87_##name##r##type(rn(node->u.w), rn(node->v.w), \
1492 rn(node->w.w)); \
1493 else \
1494 sse_##name##r##type(rn(node->u.w), rn(node->v.w), \
1495 rn(node->w.w)); \
1496 break;
1497 #define case_rfw(name, type, size) \
1498 case jit_code_##name##i##type: \
1499 assert(node->flag & jit_flag_data); \
1500 if (jit_x87_reg_p(node->v.w)) \
1501 x87_##name##i##type(rn(node->u.w), rn(node->v.w), \
1502 (jit_float##size##_t *)node->w.n->u.w); \
1503 else \
1504 sse_##name##i##type(rn(node->u.w), rn(node->v.w), \
1505 (jit_float##size##_t *)node->w.n->u.w); \
1506 break
1507 #define case_fff(name, type) \
1508 case jit_code_##name##r##type: \
1509 if (jit_x87_reg_p(node->u.w) && \
1510 jit_x87_reg_p(node->v.w) && \
1511 jit_x87_reg_p(node->w.w)) \
1512 x87_##name##r##type(rn(node->u.w), \
1513 rn(node->v.w), rn(node->w.w)); \
1514 else \
1515 sse_##name##r##type(rn(node->u.w), \
1516 rn(node->v.w), rn(node->w.w)); \
1517 break
1518 #define case_ffw(name, type, size) \
1519 case jit_code_##name##i##type: \
1520 assert(node->flag & jit_flag_data); \
1521 if (jit_x87_reg_p(node->u.w) && \
1522 jit_x87_reg_p(node->v.w)) \
1523 x87_##name##i##type(rn(node->u.w), rn(node->v.w), \
1524 (jit_float##size##_t *)node->w.n->u.w); \
1525 else \
1526 sse_##name##i##type(rn(node->u.w), rn(node->v.w), \
1527 (jit_float##size##_t *)node->w.n->u.w); \
1528 break
1529 #define case_bff(name, type) \
1530 case jit_code_b##name##r##type: \
1531 temp = node->u.n; \
1532 assert(temp->code == jit_code_label || \
1533 temp->code == jit_code_epilog); \
1534 if (temp->flag & jit_flag_patch) { \
1535 if (jit_x87_reg_p(node->v.w) && \
1536 jit_x87_reg_p(node->w.w)) \
1537 x87_b##name##r##type(temp->u.w, \
1538 rn(node->v.w), rn(node->w.w)); \
1539 else \
1540 sse_b##name##r##type(temp->u.w, \
1541 rn(node->v.w), rn(node->w.w)); \
1542 } \
1543 else { \
1544 if (jit_x87_reg_p(node->v.w) && \
1545 jit_x87_reg_p(node->w.w)) \
1546 word = x87_b##name##r##type(_jit->pc.w, \
1547 rn(node->v.w), rn(node->w.w)); \
1548 else \
1549 word = sse_b##name##r##type(_jit->pc.w, \
1550 rn(node->v.w), rn(node->w.w)); \
1551 patch(word, node); \
1552 } \
1553 break
1554 #define case_bfw(name, type, size) \
1555 case jit_code_b##name##i##type: \
1556 temp = node->u.n; \
1557 assert(temp->code == jit_code_label || \
1558 temp->code == jit_code_epilog); \
1559 if (temp->flag & jit_flag_patch) { \
1560 if (jit_x87_reg_p(node->v.w)) \
1561 x87_b##name##i##type(temp->u.w, \
1562 rn(node->v.w), \
1563 (jit_float##size##_t *)node->w.n->u.w); \
1564 else \
1565 sse_b##name##i##type(temp->u.w, \
1566 rn(node->v.w), \
1567 (jit_float##size##_t *)node->w.n->u.w); \
1568 } \
1569 else { \
1570 if (jit_x87_reg_p(node->v.w)) \
1571 word = x87_b##name##i##type(_jit->pc.w, \
1572 rn(node->v.w), \
1573 (jit_float##size##_t *)node->w.n->u.w); \
1574 else \
1575 word = sse_b##name##i##type(_jit->pc.w, \
1576 rn(node->v.w), \
1577 (jit_float##size##_t *)node->w.n->u.w); \
1578 patch(word, node); \
1579 } \
1580 break
1581 #if DEVEL_DISASSEMBLER
1582 prevw = _jit->pc.w;
1583 #endif
1584 for (node = _jitc->head; node; node = node->next) {
1585 if (_jit->pc.uc >= _jitc->code.end)
1586 return (NULL);
1587
1588 #if DEVEL_DISASSEMBLER
1589 node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1590 prevw = _jit->pc.w;
1591 #endif
1592 value = jit_classify(node->code);
1593 jit_regarg_set(node, value);
1594 switch (node->code) {
1595 case jit_code_align:
1596 assert(!(node->u.w & (node->u.w - 1)) &&
1597 node->u.w <= sizeof(jit_word_t));
1598 if ((word = _jit->pc.w & (node->u.w - 1)))
1599 nop(node->u.w - word);
1600 break;
1601 case jit_code_note: case jit_code_name:
1602 node->u.w = _jit->pc.w;
1603 break;
1604 case jit_code_label:
1605 if ((node->link || (node->flag & jit_flag_use)) &&
1606 (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
1607 nop(sizeof(jit_word_t) - word);
1608 /* remember label is defined */
1609 node->flag |= jit_flag_patch;
1610 node->u.w = _jit->pc.w;
1611 break;
1612 case_rrr(add,);
1613 case_rrw(add,);
1614 case_rrr(addx,);
1615 case_rrw(addx,);
1616 case_rrr(addc,);
1617 case_rrw(addc,);
1618 case_rrr(sub,);
1619 case_rrw(sub,);
1620 case_rrr(subx,);
1621 case_rrw(subx,);
1622 case_rrr(subc,);
1623 case_rrw(subc,);
1624 case_rrw(rsb,);
1625 case_rrr(mul,);
1626 case_rrw(mul,);
1627 case_rrrr(qmul,);
1628 case_rrrw(qmul,);
1629 case_rrrr(qmul, _u);
1630 case_rrrw(qmul, _u);
1631 case_rrr(div,);
1632 case_rrw(div,);
1633 case_rrr(div, _u);
1634 case_rrw(div, _u);
1635 case_rrrr(qdiv,);
1636 case_rrrw(qdiv,);
1637 case_rrrr(qdiv, _u);
1638 case_rrrw(qdiv, _u);
1639 case_rrr(rem,);
1640 case_rrw(rem,);
1641 case_rrr(rem, _u);
1642 case_rrw(rem, _u);
1643 case_rrr(and,);
1644 case_rrw(and,);
1645 case_rrr(or,);
1646 case_rrw(or,);
1647 case_rrr(xor,);
1648 case_rrw(xor,);
1649 case_rrr(lsh,);
1650 case_rrw(lsh,);
1651 case_rrr(rsh,);
1652 case_rrw(rsh,);
1653 case_rrr(rsh, _u);
1654 case_rrw(rsh, _u);
1655 case_rr(neg,);
1656 case_rr(com,);
1657 case_rrr(lt,);
1658 case_rrw(lt,);
1659 case_rrr(lt, _u);
1660 case_rrw(lt, _u);
1661 case_rrr(le,);
1662 case_rrw(le,);
1663 case_rrr(le, _u);
1664 case_rrw(le, _u);
1665 case_rrr(eq,);
1666 case_rrw(eq,);
1667 case_rrr(ge,);
1668 case_rrw(ge,);
1669 case_rrr(ge, _u);
1670 case_rrw(ge, _u);
1671 case_rrr(gt,);
1672 case_rrw(gt,);
1673 case_rrr(gt, _u);
1674 case_rrw(gt, _u);
1675 case_rrr(ne,);
1676 case_rrw(ne,);
1677 case_rr(mov,);
1678 case jit_code_movi:
1679 if (node->flag & jit_flag_node) {
1680 temp = node->v.n;
1681 if (temp->code == jit_code_data ||
1682 (temp->code == jit_code_label &&
1683 (temp->flag & jit_flag_patch)))
1684 movi(rn(node->u.w), temp->u.w);
1685 else {
1686 assert(temp->code == jit_code_label ||
1687 temp->code == jit_code_epilog);
1688 word = movi_p(rn(node->u.w), node->v.w);
1689 patch(word, node);
1690 }
1691 }
1692 else
1693 movi(rn(node->u.w), node->v.w);
1694 break;
1695 case_rr(hton, _us);
1696 case_rr(hton, _ui);
1697 #if __X64 && !__X64_32
1698 case_rr(hton, _ul);
1699 #endif
1700 case_rr(ext, _c);
1701 case_rr(ext, _uc);
1702 case_rr(ext, _s);
1703 case_rr(ext, _us);
1704 #if __X64 && !__X64_32
1705 case_rr(ext, _i);
1706 case_rr(ext, _ui);
1707 #endif
1708 case_rf(trunc, _f_i);
1709 case_rf(trunc, _d_i);
1710 #if __X64
1711 case_rf(trunc, _f_l);
1712 case_rf(trunc, _d_l);
1713 #endif
1714 case_rr(ld, _c);
1715 case_rw(ld, _c);
1716 case_rr(ld, _uc);
1717 case_rw(ld, _uc);
1718 case_rr(ld, _s);
1719 case_rw(ld, _s);
1720 case_rr(ld, _us);
1721 case_rw(ld, _us);
1722 case_rr(ld, _i);
1723 case_rw(ld, _i);
1724 #if __X64 && !__X64_32
1725 case_rr(ld, _ui);
1726 case_rw(ld, _ui);
1727 case_rr(ld, _l);
1728 case_rw(ld, _l);
1729 #endif
1730 case_rrr(ldx, _c);
1731 case_rrw(ldx, _c);
1732 case_rrr(ldx, _uc);
1733 case_rrw(ldx, _uc);
1734 case_rrr(ldx, _s);
1735 case_rrw(ldx, _s);
1736 case_rrr(ldx, _us);
1737 case_rrw(ldx, _us);
1738 case_rrr(ldx, _i);
1739 case_rrw(ldx, _i);
1740 #if __X64 && !__X64_32
1741 case_rrr(ldx, _ui);
1742 case_rrw(ldx, _ui);
1743 case_rrr(ldx, _l);
1744 case_rrw(ldx, _l);
1745 #endif
1746 case_rr(st, _c);
1747 case_wr(st, _c);
1748 case_rr(st, _s);
1749 case_wr(st, _s);
1750 case_rr(st, _i);
1751 case_wr(st, _i);
1752 #if __X64 && !__X64_32
1753 case_rr(st, _l);
1754 case_wr(st, _l);
1755 #endif
1756 case_rrr(stx, _c);
1757 case_wrr(stx, _c);
1758 case_rrr(stx, _s);
1759 case_wrr(stx, _s);
1760 case_rrr(stx, _i);
1761 case_wrr(stx, _i);
1762 #if __X64 && !__X64_32
1763 case_rrr(stx, _l);
1764 case_wrr(stx, _l);
1765 #endif
1766 case_brr(blt,);
1767 case_brw(blt,);
1768 case_brr(blt, _u);
1769 case_brw(blt, _u);
1770 case_brr(ble,);
1771 case_brw(ble,);
1772 case_brr(ble, _u);
1773 case_brw(ble, _u);
1774 case_brr(beq,);
1775 case_brw(beq,);
1776 case_brr(bge,);
1777 case_brw(bge,);
1778 case_brr(bge, _u);
1779 case_brw(bge, _u);
1780 case_brr(bgt,);
1781 case_brw(bgt,);
1782 case_brr(bgt, _u);
1783 case_brw(bgt, _u);
1784 case_brr(bne,);
1785 case_brw(bne,);
1786 case_brr(bms,);
1787 case_brw(bms,);
1788 case_brr(bmc,);
1789 case_brw(bmc,);
1790 case_brr(boadd,);
1791 case_brw(boadd,);
1792 case_brr(boadd, _u);
1793 case_brw(boadd, _u);
1794 case_brr(bxadd,);
1795 case_brw(bxadd,);
1796 case_brr(bxadd, _u);
1797 case_brw(bxadd, _u);
1798 case_brr(bosub,);
1799 case_brw(bosub,);
1800 case_brr(bosub, _u);
1801 case_brw(bosub, _u);
1802 case_brr(bxsub,);
1803 case_brw(bxsub,);
1804 case_brr(bxsub, _u);
1805 case_brw(bxsub, _u);
1806 case_fff(add, _f);
1807 case_ffw(add, _f, 32);
1808 case_fff(sub, _f);
1809 case_ffw(sub, _f, 32);
1810 case_ffw(rsb, _f, 32);
1811 case_fff(mul, _f);
1812 case_ffw(mul, _f, 32);
1813 case_fff(div, _f);
1814 case_ffw(div, _f, 32);
1815 case_ff(abs, _f);
1816 case_ff(neg, _f);
1817 case_ff(sqrt, _f);
1818 case_fr(ext, _f);
1819 case_fr(ext, _d_f);
1820 case_rff(lt, _f);
1821 case_rfw(lt, _f, 32);
1822 case_rff(le, _f);
1823 case_rfw(le, _f, 32);
1824 case_rff(eq, _f);
1825 case_rfw(eq, _f, 32);
1826 case_rff(ge, _f);
1827 case_rfw(ge, _f, 32);
1828 case_rff(gt, _f);
1829 case_rfw(gt, _f, 32);
1830 case_rff(ne, _f);
1831 case_rfw(ne, _f, 32);
1832 case_rff(unlt, _f);
1833 case_rfw(unlt, _f, 32);
1834 case_rff(unle, _f);
1835 case_rfw(unle, _f, 32);
1836 case_rff(uneq, _f);
1837 case_rfw(uneq, _f, 32);
1838 case_rff(unge, _f);
1839 case_rfw(unge, _f, 32);
1840 case_rff(ungt, _f);
1841 case_rfw(ungt, _f, 32);
1842 case_rff(ltgt, _f);
1843 case_rfw(ltgt, _f, 32);
1844 case_rff(ord, _f);
1845 case_rfw(ord, _f, 32);
1846 case_rff(unord, _f);
1847 case_rfw(unord, _f, 32);
1848 case jit_code_movr_f:
1849 if (jit_x87_reg_p(node->u.w)) {
1850 if (jit_x87_reg_p(node->v.w))
1851 x87_movr_f(rn(node->u.w), rn(node->v.w));
1852 else
1853 x87_from_sse_f(rn(node->u.w), rn(node->v.w));
1854 }
1855 else {
1856 if (jit_sse_reg_p(node->v.w))
1857 sse_movr_f(rn(node->u.w), rn(node->v.w));
1858 else
1859 sse_from_x87_f(rn(node->u.w), rn(node->v.w));
1860 }
1861 break;
1862 case jit_code_movi_f:
1863 assert(node->flag & jit_flag_data);
1864 if (jit_x87_reg_p(node->u.w))
1865 x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
1866 else
1867 sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
1868 break;
1869 case_fr(ld, _f);
1870 case_fw(ld, _f);
1871 case_frr(ldx, _f);
1872 case_frw(ldx, _f);
1873 case_rf(st, _f);
1874 case_wf(st, _f);
1875 case_rrf(stx, _f);
1876 case_wrf(stx, _f);
1877 case_bff(lt, _f);
1878 case_bfw(lt, _f, 32);
1879 case_bff(le, _f);
1880 case_bfw(le, _f, 32);
1881 case_bff(eq, _f);
1882 case_bfw(eq, _f, 32);
1883 case_bff(ge, _f);
1884 case_bfw(ge, _f, 32);
1885 case_bff(gt, _f);
1886 case_bfw(gt, _f, 32);
1887 case_bff(ne, _f);
1888 case_bfw(ne, _f, 32);
1889 case_bff(unlt, _f);
1890 case_bfw(unlt, _f, 32);
1891 case_bff(unle, _f);
1892 case_bfw(unle, _f, 32);
1893 case_bff(uneq, _f);
1894 case_bfw(uneq, _f, 32);
1895 case_bff(unge, _f);
1896 case_bfw(unge, _f, 32);
1897 case_bff(ungt, _f);
1898 case_bfw(ungt, _f, 32);
1899 case_bff(ltgt, _f);
1900 case_bfw(ltgt, _f, 32);
1901 case_bff(ord, _f);
1902 case_bfw(ord, _f, 32);
1903 case_bff(unord, _f);
1904 case_bfw(unord, _f, 32);
1905 case_fff(add, _d);
1906 case_ffw(add, _d, 64);
1907 case_fff(sub, _d);
1908 case_ffw(sub, _d, 64);
1909 case_ffw(rsb, _d, 64);
1910 case_fff(mul, _d);
1911 case_ffw(mul, _d, 64);
1912 case_fff(div, _d);
1913 case_ffw(div, _d, 64);
1914 case_ff(abs, _d);
1915 case_ff(neg, _d);
1916 case_ff(sqrt, _d);
1917 case_fr(ext, _d);
1918 case_fr(ext, _f_d);
1919 case_rff(lt, _d);
1920 case_rfw(lt, _d, 64);
1921 case_rff(le, _d);
1922 case_rfw(le, _d, 64);
1923 case_rff(eq, _d);
1924 case_rfw(eq, _d, 64);
1925 case_rff(ge, _d);
1926 case_rfw(ge, _d, 64);
1927 case_rff(gt, _d);
1928 case_rfw(gt, _d, 64);
1929 case_rff(ne, _d);
1930 case_rfw(ne, _d, 64);
1931 case_rff(unlt, _d);
1932 case_rfw(unlt, _d, 64);
1933 case_rff(unle, _d);
1934 case_rfw(unle, _d, 64);
1935 case_rff(uneq, _d);
1936 case_rfw(uneq, _d, 64);
1937 case_rff(unge, _d);
1938 case_rfw(unge, _d, 64);
1939 case_rff(ungt, _d);
1940 case_rfw(ungt, _d, 64);
1941 case_rff(ltgt, _d);
1942 case_rfw(ltgt, _d, 64);
1943 case_rff(ord, _d);
1944 case_rfw(ord, _d, 64);
1945 case_rff(unord, _d);
1946 case_rfw(unord, _d, 64);
1947 case jit_code_movr_d:
1948 if (jit_x87_reg_p(node->u.w)) {
1949 if (jit_x87_reg_p(node->v.w))
1950 x87_movr_d(rn(node->u.w), rn(node->v.w));
1951 else
1952 x87_from_sse_d(rn(node->u.w), rn(node->v.w));
1953 }
1954 else {
1955 if (jit_sse_reg_p(node->v.w))
1956 sse_movr_d(rn(node->u.w), rn(node->v.w));
1957 else
1958 sse_from_x87_d(rn(node->u.w), rn(node->v.w));
1959 }
1960 break;
1961 case jit_code_movi_d:
1962 assert(node->flag & jit_flag_data);
1963 if (jit_x87_reg_p(node->u.w))
1964 x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
1965 else
1966 sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
1967 break;
1968 case_fr(ld, _d);
1969 case_fw(ld, _d);
1970 case_frr(ldx, _d);
1971 case_frw(ldx, _d);
1972 case_rf(st, _d);
1973 case_wf(st, _d);
1974 case_rrf(stx, _d);
1975 case_wrf(stx, _d);
1976 case_bff(lt, _d);
1977 case_bfw(lt, _d, 64);
1978 case_bff(le, _d);
1979 case_bfw(le, _d, 64);
1980 case_bff(eq, _d);
1981 case_bfw(eq, _d, 64);
1982 case_bff(ge, _d);
1983 case_bfw(ge, _d, 64);
1984 case_bff(gt, _d);
1985 case_bfw(gt, _d, 64);
1986 case_bff(ne, _d);
1987 case_bfw(ne, _d, 64);
1988 case_bff(unlt, _d);
1989 case_bfw(unlt, _d, 64);
1990 case_bff(unle, _d);
1991 case_bfw(unle, _d, 64);
1992 case_bff(uneq, _d);
1993 case_bfw(uneq, _d, 64);
1994 case_bff(unge, _d);
1995 case_bfw(unge, _d, 64);
1996 case_bff(ungt, _d);
1997 case_bfw(ungt, _d, 64);
1998 case_bff(ltgt, _d);
1999 case_bfw(ltgt, _d, 64);
2000 case_bff(ord, _d);
2001 case_bfw(ord, _d, 64);
2002 case_bff(unord, _d);
2003 case_bfw(unord, _d, 64);
2004 case jit_code_jmpr:
2005 jmpr(rn(node->u.w));
2006 break;
2007 case jit_code_jmpi:
2008 if (node->flag & jit_flag_node) {
2009 temp = node->u.n;
2010 assert(temp->code == jit_code_label ||
2011 temp->code == jit_code_epilog);
2012 if (temp->flag & jit_flag_patch)
2013 jmpi(temp->u.w);
2014 else {
2015 word = jmpi(_jit->pc.w);
2016 patch(word, node);
2017 }
2018 }
2019 else
2020 jmpi(node->u.w);
2021 break;
2022 case jit_code_callr:
2023 callr(rn(node->u.w));
2024 break;
2025 case jit_code_calli:
2026 if (node->flag & jit_flag_node) {
2027 temp = node->u.n;
2028 assert(temp->code == jit_code_label ||
2029 temp->code == jit_code_epilog);
2030 word = calli(temp->u.w);
2031 if (!(temp->flag & jit_flag_patch))
2032 patch(word, node);
2033 }
2034 else
2035 calli(node->u.w);
2036 break;
2037 case jit_code_prolog:
2038 _jitc->function = _jitc->functions.ptr + node->w.w;
2039 undo.node = node;
2040 undo.word = _jit->pc.w;
2041 #if DEVEL_DISASSEMBLER
2042 undo.prevw = prevw;
2043 #endif
2044 undo.patch_offset = _jitc->patches.offset;
2045 restart_function:
2046 _jitc->again = 0;
2047 prolog(node);
2048 break;
2049 case jit_code_epilog:
2050 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
2051 if (_jitc->again) {
2052 for (temp = undo.node->next;
2053 temp != node; temp = temp->next) {
2054 if (temp->code == jit_code_label ||
2055 temp->code == jit_code_epilog)
2056 temp->flag &= ~jit_flag_patch;
2057 }
2058 temp->flag &= ~jit_flag_patch;
2059 node = undo.node;
2060 _jit->pc.w = undo.word;
2061 #if DEVEL_DISASSEMBLER
2062 prevw = undo.prevw;
2063 #endif
2064 _jitc->patches.offset = undo.patch_offset;
2065 goto restart_function;
2066 }
2067 if (node->link &&
2068 (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
2069 nop(sizeof(jit_word_t) - word);
2070 /* remember label is defined */
2071 node->flag |= jit_flag_patch;
2072 node->u.w = _jit->pc.w;
2073 epilog(node);
2074 _jitc->function = NULL;
2075 break;
2076 case jit_code_va_start:
2077 vastart(rn(node->u.w));
2078 break;
2079 case jit_code_va_arg:
2080 vaarg(rn(node->u.w), rn(node->v.w));
2081 break;
2082 case jit_code_va_arg_d:
2083 vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
2084 break;
2085 case jit_code_live: case jit_code_ellipsis:
2086 case jit_code_va_push:
2087 case jit_code_allocai: case jit_code_allocar:
2088 case jit_code_arg:
2089 case jit_code_arg_f: case jit_code_arg_d:
2090 case jit_code_va_end:
2091 case jit_code_ret:
2092 case jit_code_retr: case jit_code_reti:
2093 case jit_code_retr_f: case jit_code_reti_f:
2094 case jit_code_retr_d: case jit_code_reti_d:
2095 case jit_code_getarg_c: case jit_code_getarg_uc:
2096 case jit_code_getarg_s: case jit_code_getarg_us:
2097 case jit_code_getarg_i:
2098 #if __X64 && !__X64_32
2099 case jit_code_getarg_ui: case jit_code_getarg_l:
2100 #endif
2101 case jit_code_getarg_f: case jit_code_getarg_d:
2102 case jit_code_putargr: case jit_code_putargi:
2103 case jit_code_putargr_f: case jit_code_putargi_f:
2104 case jit_code_putargr_d: case jit_code_putargi_d:
2105 case jit_code_pushargr: case jit_code_pushargi:
2106 case jit_code_pushargr_f: case jit_code_pushargi_f:
2107 case jit_code_pushargr_d: case jit_code_pushargi_d:
2108 case jit_code_retval_c: case jit_code_retval_uc:
2109 case jit_code_retval_s: case jit_code_retval_us:
2110 case jit_code_retval_i:
2111 #if __X64 && !__X32
2112 case jit_code_retval_ui: case jit_code_retval_l:
2113 #endif
2114 case jit_code_prepare:
2115 case jit_code_finishr: case jit_code_finishi:
2116 break;
2117 case jit_code_retval_f:
2118 #if __X32
2119 if (jit_sse_reg_p(node->u.w)) {
2120 fstpr(_ST1_REGNO);
2121 sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
2122 }
2123 else
2124 fstpr(rn(node->u.w) + 1);
2125 #endif
2126 break;
2127 case jit_code_retval_d:
2128 #if __X32
2129 if (jit_sse_reg_p(node->u.w)) {
2130 fstpr(_ST1_REGNO);
2131 sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
2132 }
2133 else
2134 fstpr(rn(node->u.w) + 1);
2135 #endif
2136 break;
2137 default:
2138 abort();
2139 }
2140 jit_regarg_clr(node, value);
2141 assert(_jitc->regarg == 0 && _jitc->synth == 0);
2142 /* update register live state */
2143 jit_reglive(node);
2144 }
2145 #undef case_bfw
2146 #undef case_bff
2147 #undef case_ffw
2148 #undef case_rfw
2149 #undef case_rff
2150 #undef case_brw
2151 #undef case_brr
2152 #undef case_wrf
2153 #undef case_wrr
2154 #undef case_frw
2155 #undef case_rrf
2156 #undef case_rrw
2157 #undef case_frr
2158 #undef case_rrr
2159 #undef case_wf
2160 #undef case_fw
2161 #undef case_fr
2162 #undef case_rr
2163
2164 for (offset = 0; offset < _jitc->patches.offset; offset++) {
2165 node = _jitc->patches.ptr[offset].node;
2166 word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
2167 patch_at(node, _jitc->patches.ptr[offset].inst, word);
2168 }
2169
2170 jit_flush(_jit->code.ptr, _jit->pc.uc);
2171
2172 return (_jit->code.ptr);
2173 }
2174
2175 #define CODE 1
2176 # include "jit_x86-cpu.c"
2177 # include "jit_x86-sse.c"
2178 # include "jit_x86-x87.c"
2179 #undef CODE
2180
2181 void
jit_flush(void * fptr,void * tptr)2182 jit_flush(void *fptr, void *tptr)
2183 {
2184 }
2185
2186 void
_emit_ldxi(jit_state_t * _jit,jit_gpr_t r0,jit_gpr_t r1,jit_word_t i0)2187 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
2188 {
2189 ldxi(rn(r0), rn(r1), i0);
2190 }
2191
2192 void
_emit_stxi(jit_state_t * _jit,jit_word_t i0,jit_gpr_t r0,jit_gpr_t r1)2193 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
2194 {
2195 stxi(i0, rn(r0), rn(r1));
2196 }
2197
2198 void
_emit_ldxi_d(jit_state_t * _jit,jit_fpr_t r0,jit_gpr_t r1,jit_word_t i0)2199 _emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
2200 {
2201 if (jit_x87_reg_p(r0))
2202 x87_ldxi_d(rn(r0), rn(r1), i0);
2203 else
2204 sse_ldxi_d(rn(r0), rn(r1), i0);
2205 }
2206
2207 void
_emit_stxi_d(jit_state_t * _jit,jit_word_t i0,jit_gpr_t r0,jit_fpr_t r1)2208 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
2209 {
2210 if (jit_x87_reg_p(r1))
2211 x87_stxi_d(i0, rn(r0), rn(r1));
2212 else
2213 sse_stxi_d(i0, rn(r0), rn(r1));
2214 }
2215
2216 static void
_patch(jit_state_t * _jit,jit_word_t instr,jit_node_t * node)2217 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2218 {
2219 jit_int32_t flag;
2220
2221 assert(node->flag & jit_flag_node);
2222 if (node->code == jit_code_movi)
2223 flag = node->v.n->flag;
2224 else
2225 flag = node->u.n->flag;
2226 assert(!(flag & jit_flag_patch));
2227 if (_jitc->patches.offset >= _jitc->patches.length) {
2228 jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2229 _jitc->patches.length * sizeof(jit_patch_t),
2230 (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2231 _jitc->patches.length += 1024;
2232 }
2233 _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2234 _jitc->patches.ptr[_jitc->patches.offset].node = node;
2235 ++_jitc->patches.offset;
2236 }
2237
2238 static void
_sse_from_x87_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2239 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2240 {
2241 x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2242 sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2243 }
2244
2245 static void
_sse_from_x87_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2246 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2247 {
2248 x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2249 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2250 }
2251
2252 static void
_x87_from_sse_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2253 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2254 {
2255 sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2256 x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2257 }
2258
2259 static void
_x87_from_sse_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2260 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2261 {
2262 sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2263 x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2264 }
2265