1 /*
2  * Copyright (C) 2012-2017, 2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19 
20 #define _XMM0_REGNO                     0
21 #define _XMM1_REGNO                     1
22 #define _XMM2_REGNO                     2
23 #define _XMM3_REGNO                     3
24 #define _XMM4_REGNO                     4
25 #define _XMM5_REGNO                     5
26 #define _XMM6_REGNO                     6
27 #define _XMM7_REGNO                     7
28 #define _XMM8_REGNO                     8
29 #define _XMM9_REGNO                     9
30 #define _XMM10_REGNO                    10
31 #define _XMM11_REGNO                    11
32 #define _XMM12_REGNO                    12
33 #define _XMM13_REGNO                    13
34 #define _XMM14_REGNO                    14
35 #define _XMM15_REGNO                    15
36 #define X86_SSE_MOV                     0x10
37 #define X86_SSE_MOV1                    0x11
38 #define X86_SSE_MOVLP                   0x12
39 #define X86_SSE_MOVHP                   0x16
40 #define X86_SSE_MOVA                    0x28
41 #define X86_SSE_CVTIS                   0x2a
42 #define X86_SSE_CVTTSI                  0x2c
43 #define X86_SSE_CVTSI                   0x2d
44 #define X86_SSE_UCOMI                   0x2e
45 #define X86_SSE_COMI                    0x2f
46 #define X86_SSE_ROUND                   0x3a
47 #define X86_SSE_SQRT                    0x51
48 #define X86_SSE_RSQRT                   0x52
49 #define X86_SSE_RCP                     0x53
50 #define X86_SSE_AND                     0x54
51 #define X86_SSE_ANDN                    0x55
52 #define X86_SSE_OR                      0x56
53 #define X86_SSE_XOR                     0x57
54 #define X86_SSE_ADD                     0x58
55 #define X86_SSE_MUL                     0x59
56 #define X86_SSE_CVTSD                   0x5a
57 #define X86_SSE_CVTDT                   0x5b
58 #define X86_SSE_SUB                     0x5c
59 #define X86_SSE_MIN                     0x5d
60 #define X86_SSE_DIV                     0x5e
61 #define X86_SSE_MAX                     0x5f
62 #define X86_SSE_X2G                     0x6e
63 #define X86_SSE_EQB                     0x74
64 #define X86_SSE_EQW                     0x75
65 #define X86_SSE_EQD                     0x76
66 #define X86_SSE_G2X                     0x7e
67 #define X86_SSE_MOV2                    0xd6
68 
69 static void
sser(jit_state_t * _jit,int32_t c,int32_t r0,int32_t r1)70 sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1)
71 {
72   rex(_jit, 0, 0, r0, 0, r1);
73   ic(_jit, 0x0f);
74   ic(_jit, c);
75   mrm(_jit, 0x03, r7(r0), r7(r1));
76 }
77 
78 static void
ssexr(jit_state_t * _jit,int32_t p,int32_t c,int32_t r0,int32_t r1)79 ssexr(jit_state_t *_jit, int32_t p, int32_t c,
80       int32_t r0, int32_t r1)
81 {
82   ic(_jit, p);
83   rex(_jit, 0, 0, r0, 0, r1);
84   ic(_jit, 0x0f);
85   ic(_jit, c);
86   mrm(_jit, 0x03, r7(r0), r7(r1));
87 }
88 
89 static void
ssexi(jit_state_t * _jit,int32_t c,int32_t r0,int32_t m,int32_t i)90 ssexi(jit_state_t *_jit, int32_t c, int32_t r0,
91       int32_t m, int32_t i)
92 {
93   ic(_jit, 0x66);
94   rex(_jit, 0, 0, 0, 0, r0);
95   ic(_jit, 0x0f);
96   ic(_jit, c);
97   mrm(_jit, 0x03, r7(m), r7(r0));
98   ic(_jit, i);
99 }
100 
101 static void
sselxr(jit_state_t * _jit,int32_t p,int32_t c,int32_t r0,int32_t r1)102 sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1)
103 {
104   if (__X64) {
105     ic(_jit, p);
106     rex(_jit, 0, 1, r0, 0, r1);
107     ic(_jit, 0x0f);
108     ic(_jit, c);
109     mrm(_jit, 0x03, r7(r0), r7(r1));
110   } else {
111     ssexr(_jit, p, c, r0, r1);
112   }
113 }
114 
115 static void
ssexrx(jit_state_t * _jit,int32_t px,int32_t code,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)116 ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md,
117        int32_t rb, int32_t ri, int32_t ms, int32_t rd)
118 {
119   ic(_jit, px);
120   rex(_jit, 0, 0, rd, ri, rb);
121   ic(_jit, 0x0f);
122   ic(_jit, code);
123   rx(_jit, rd, md, rb, ri, ms);
124 }
125 
126 static void
movdlxr(jit_state_t * _jit,int32_t r0,int32_t r1)127 movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1)
128 {
129   ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1);
130 }
131 
132 static void movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
133 static void
movdqxr(jit_state_t * _jit,int32_t r0,int32_t r1)134 movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1)
135 {
136   sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1);
137 }
138 
139 static void
movssmr(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)140 movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
141 {
142   ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd);
143 }
144 static void
movsdmr(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)145 movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
146 {
147   ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd);
148 }
149 static void
movssrm(jit_state_t * _jit,int32_t rs,int32_t md,int32_t mb,int32_t mi,int32_t ms)150 movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
151 {
152   ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs);
153 }
154 static void
movsdrm(jit_state_t * _jit,int32_t rs,int32_t md,int32_t mb,int32_t mi,int32_t ms)155 movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
156 {
157   ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs);
158 }
159 
160 static void
movr_f(jit_state_t * _jit,int32_t r0,int32_t r1)161 movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
162 {
163   if (r0 != r1)
164     ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1);
165 }
166 
167 static void
movr_d(jit_state_t * _jit,int32_t r0,int32_t r1)168 movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
169 {
170   if (r0 != r1)
171     ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1);
172 }
173 
174 static void
addssr(jit_state_t * _jit,int32_t r0,int32_t r1)175 addssr(jit_state_t *_jit, int32_t r0, int32_t r1)
176 {
177   ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1);
178 }
179 static void
addsdr(jit_state_t * _jit,int32_t r0,int32_t r1)180 addsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
181 {
182   ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1);
183 }
184 static void
subssr(jit_state_t * _jit,int32_t r0,int32_t r1)185 subssr(jit_state_t *_jit, int32_t r0, int32_t r1)
186 {
187   ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1);
188 }
189 static void
subsdr(jit_state_t * _jit,int32_t r0,int32_t r1)190 subsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
191 {
192   ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1);
193 }
194 static void
mulssr(jit_state_t * _jit,int32_t r0,int32_t r1)195 mulssr(jit_state_t *_jit, int32_t r0, int32_t r1)
196 {
197   ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1);
198 }
199 static void
mulsdr(jit_state_t * _jit,int32_t r0,int32_t r1)200 mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
201 {
202   ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1);
203 }
204 static void
divssr(jit_state_t * _jit,int32_t r0,int32_t r1)205 divssr(jit_state_t *_jit, int32_t r0, int32_t r1)
206 {
207   ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1);
208 }
209 static void
divsdr(jit_state_t * _jit,int32_t r0,int32_t r1)210 divsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
211 {
212   ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1);
213 }
214 static void
andpsr(jit_state_t * _jit,int32_t r0,int32_t r1)215 andpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
216 {
217   sser(_jit,        X86_SSE_AND, r0, r1);
218 }
219 static void
andpdr(jit_state_t * _jit,int32_t r0,int32_t r1)220 andpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
221 {
222   ssexr(_jit, 0x66, X86_SSE_AND, r0, r1);
223 }
224 static void
truncr_f_i(jit_state_t * _jit,int32_t r0,int32_t r1)225 truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
226 {
227   ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
228 }
229 static void
truncr_d_i(jit_state_t * _jit,int32_t r0,int32_t r1)230 truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
231 {
232   ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
233 }
234 #if __X64
235 static void
truncr_f_l(jit_state_t * _jit,int32_t r0,int32_t r1)236 truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
237 {
238   sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
239 }
240 static void
truncr_d_l(jit_state_t * _jit,int32_t r0,int32_t r1)241 truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
242 {
243   sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
244 }
245 #endif
246 static void
extr_f(jit_state_t * _jit,int32_t r0,int32_t r1)247 extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
248 {
249   sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1);
250 }
251 static void
extr_d(jit_state_t * _jit,int32_t r0,int32_t r1)252 extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
253 {
254   sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1);
255 }
256 
257 static void
extr_f_d(jit_state_t * _jit,int32_t r0,int32_t r1)258 extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
259 {
260   ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1);
261 }
262 static void
extr_d_f(jit_state_t * _jit,int32_t r0,int32_t r1)263 extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
264 {
265   ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1);
266 }
267 static void
ucomissr(jit_state_t * _jit,int32_t r0,int32_t r1)268 ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1)
269 {
270   sser(_jit, X86_SSE_UCOMI, r0, r1);
271 }
272 static void
ucomisdr(jit_state_t * _jit,int32_t r0,int32_t r1)273 ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1)
274 {
275   ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1);
276 }
277 static void
xorpsr(jit_state_t * _jit,int32_t r0,int32_t r1)278 xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
279 {
280   sser(_jit, X86_SSE_XOR, r0, r1);
281 }
282 static void
xorpdr(jit_state_t * _jit,int32_t r0,int32_t r1)283 xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
284 {
285   ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1);
286 }
287 static void orpdr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
288 static void
orpdr(jit_state_t * _jit,int32_t r0,int32_t r1)289 orpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
290 {
291   ssexr(_jit, 0x66, X86_SSE_OR, r0, r1);
292 }
293 static void
pcmpeqlr(jit_state_t * _jit,int32_t r0,int32_t r1)294 pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1)
295 {
296   ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1);
297 }
298 static void
psrl(jit_state_t * _jit,int32_t r0,int32_t i0)299 psrl(jit_state_t *_jit, int32_t r0, int32_t i0)
300 {
301   ssexi(_jit, 0x72, r0, 0x02, i0);
302 }
303 static void
psrq(jit_state_t * _jit,int32_t r0,int32_t i0)304 psrq(jit_state_t *_jit, int32_t r0, int32_t i0)
305 {
306   ssexi(_jit, 0x73, r0, 0x02, i0);
307 }
308 static void
pslq(jit_state_t * _jit,int32_t r0,int32_t i0)309 pslq(jit_state_t *_jit, int32_t r0, int32_t i0)
310 {
311   ssexi(_jit, 0x73, r0, 0x06, i0);
312 }
313 static void
sqrtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)314 sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
315 {
316   ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1);
317 }
318 static void
sqrtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)319 sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
320 {
321   ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1);
322 }
323 static void
ldr_f(jit_state_t * _jit,int32_t r0,int32_t r1)324 ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
325 {
326   movssmr(_jit, 0, r1, _NOREG, _SCL1, r0);
327 }
328 static void
str_f(jit_state_t * _jit,int32_t r0,int32_t r1)329 str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
330 {
331   movssrm(_jit, r1, 0, r0, _NOREG, _SCL1);
332 }
333 static void
ldr_d(jit_state_t * _jit,int32_t r0,int32_t r1)334 ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
335 {
336   movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0);
337 }
338 static void
str_d(jit_state_t * _jit,int32_t r0,int32_t r1)339 str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
340 {
341   movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1);
342 }
343 
344 static void
movi_f(jit_state_t * _jit,int32_t r0,jit_float32_t i0)345 movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
346 {
347   union {
348     int32_t i;
349     jit_float32_t f;
350   } data;
351 
352   data.f = i0;
353   if (data.f == 0.0 && !(data.i & 0x80000000))
354     xorpsr(_jit, r0, r0);
355   else {
356     jit_gpr_t reg = get_temp_gpr(_jit);
357     movi(_jit, jit_gpr_regno(reg), data.i);
358     movdlxr(_jit, r0, jit_gpr_regno(reg));
359     unget_temp_gpr(_jit);
360   }
361 }
362 
363 static void
movi_d(jit_state_t * _jit,int32_t r0,jit_float64_t i0)364 movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
365 {
366   union {
367     int32_t ii[2];
368     jit_word_t w;
369     jit_float64_t d;
370   } data;
371 
372   data.d = i0;
373   if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
374     xorpdr(_jit, r0, r0);
375   else {
376     jit_gpr_t ireg = get_temp_gpr(_jit);
377 #if __X64
378     movi(_jit, jit_gpr_regno(ireg), data.w);
379     movdqxr(_jit, r0, jit_gpr_regno(ireg));
380     unget_temp_gpr(_jit);
381 #else
382     jit_fpr_t freg = get_temp_fpr(_jit);
383     movi(_jit, jit_gpr_regno(ireg), data.ii[1]);
384     movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
385     pslq(_jit, jit_fpr_regno(freg), 32);
386     movi(_jit, jit_gpr_regno(ireg), data.ii[0]);
387     movdlxr(_jit, r0, jit_gpr_regno(ireg));
388     orpdr(_jit, r0, jit_fpr_regno(freg));
389     unget_temp_fpr(_jit);
390     unget_temp_gpr(_jit);
391 #endif
392   }
393 }
394 
395 #if __X32
396 static void
x87rx(jit_state_t * _jit,int32_t code,int32_t md,int32_t rb,int32_t ri,int32_t ms)397 x87rx(jit_state_t *_jit, int32_t code, int32_t md,
398       int32_t rb, int32_t ri, int32_t ms)
399 {
400   rex(_jit, 0, 1, rb, ri, _NOREG);
401   ic(_jit, 0xd8 | (code >> 3));
402   rx(_jit, (code & 7), md, rb, ri, ms);
403 }
404 
405 static void
fldsm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)406 fldsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
407 {
408   return x87rx(_jit, 010, md, rb, ri, ms);
409 }
410 
411 static void
fstsm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)412 fstsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
413 {
414   return x87rx(_jit, 013, md, rb, ri, ms);
415 }
416 
417 static void
fldlm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)418 fldlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
419 {
420   return x87rx(_jit, 050, md, rb, ri, ms);
421 }
422 
423 static void
fstlm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)424 fstlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
425 {
426   return x87rx(_jit, 053, md, rb, ri, ms);
427 }
428 #endif
429 
430 static void
retval_f(jit_state_t * _jit,int32_t r0)431 retval_f(jit_state_t *_jit, int32_t r0)
432 {
433 #if __X32
434   subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
435   fstsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
436   ldr_f(_jit, r0, _RSP_REGNO);
437   addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
438 #else
439   movr_f(_jit, r0, _XMM0_REGNO);
440 #endif
441 }
442 
443 static void
retval_d(jit_state_t * _jit,int32_t r0)444 retval_d(jit_state_t *_jit, int32_t r0)
445 {
446 #if __X32
447   subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
448   fstlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
449   ldr_d(_jit, r0, _RSP_REGNO);
450   addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
451 #else
452   movr_d(_jit, r0, _XMM0_REGNO);
453 #endif
454 }
455 
456 static void
retr_f(jit_state_t * _jit,int32_t u)457 retr_f(jit_state_t *_jit, int32_t u)
458 {
459 #if __X32
460   subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
461   str_f(_jit, _RSP_REGNO, u);
462   fldsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
463   addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
464 #else
465   movr_f(_jit, _XMM0_REGNO, u);
466 #endif
467   ret(_jit);
468 }
469 
470 static void
retr_d(jit_state_t * _jit,int32_t u)471 retr_d(jit_state_t *_jit, int32_t u)
472 {
473 #if __X32
474   subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
475   str_d(_jit, _RSP_REGNO, u);
476   fldlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
477   addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
478 #else
479   movr_d(_jit, _XMM0_REGNO, u);
480 #endif
481   ret(_jit);
482 }
483 
484 static void
addr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)485 addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
486 {
487   if (r0 == r1)
488     addssr(_jit, r0, r2);
489   else if (r0 == r2)
490     addssr(_jit, r0, r1);
491   else {
492     movr_f(_jit, r0, r1);
493     addssr(_jit, r0, r2);
494   }
495 }
496 
497 static void
addr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)498 addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
499 {
500   if (r0 == r1)
501     addsdr(_jit, r0, r2);
502   else if (r0 == r2)
503     addsdr(_jit, r0, r1);
504   else {
505     movr_d(_jit, r0, r1);
506     addsdr(_jit, r0, r2);
507   }
508 }
509 
510 static void
subr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)511 subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
512 {
513   if (r0 == r1)
514     subssr(_jit, r0, r2);
515   else if (r0 == r2) {
516     jit_fpr_t reg = get_temp_fpr(_jit);
517     movr_f(_jit, jit_fpr_regno(reg), r0);
518     movr_f(_jit, r0, r1);
519     subssr(_jit, r0, jit_fpr_regno(reg));
520     unget_temp_fpr(_jit);
521   }
522   else {
523     movr_f(_jit, r0, r1);
524     subssr(_jit, r0, r2);
525   }
526 }
527 
528 static void
subr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)529 subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
530 {
531   if (r0 == r1)
532     subsdr(_jit, r0, r2);
533   else if (r0 == r2) {
534     jit_fpr_t reg = get_temp_fpr(_jit);
535     movr_d(_jit, jit_fpr_regno(reg), r0);
536     movr_d(_jit, r0, r1);
537     subsdr(_jit, r0, jit_fpr_regno(reg));
538     unget_temp_fpr(_jit);
539   }
540   else {
541     movr_d(_jit, r0, r1);
542     subsdr(_jit, r0, r2);
543   }
544 }
545 
546 static void
mulr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)547 mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
548 {
549   if (r0 == r1)
550     mulssr(_jit, r0, r2);
551   else if (r0 == r2)
552     mulssr(_jit, r0, r1);
553   else {
554     movr_f(_jit, r0, r1);
555     mulssr(_jit, r0, r2);
556   }
557 }
558 
559 static void
mulr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)560 mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
561 {
562   if (r0 == r1)
563     mulsdr(_jit, r0, r2);
564   else if (r0 == r2)
565     mulsdr(_jit, r0, r1);
566   else {
567     movr_d(_jit, r0, r1);
568     mulsdr(_jit, r0, r2);
569   }
570 }
571 
572 static void
divr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)573 divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
574 {
575   if (r0 == r1)
576     divssr(_jit, r0, r2);
577   else if (r0 == r2) {
578     jit_fpr_t reg = get_temp_fpr(_jit);
579     movr_f(_jit, jit_fpr_regno(reg), r0);
580     movr_f(_jit, r0, r1);
581     divssr(_jit, r0, jit_fpr_regno(reg));
582     unget_temp_fpr(_jit);
583   }
584   else {
585     movr_f(_jit, r0, r1);
586     divssr(_jit, r0, r2);
587   }
588 }
589 
590 static void
divr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)591 divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
592 {
593   if (r0 == r1)
594     divsdr(_jit, r0, r2);
595   else if (r0 == r2) {
596     jit_fpr_t reg = get_temp_fpr(_jit);
597     movr_d(_jit, jit_fpr_regno(reg), r0);
598     movr_d(_jit, r0, r1);
599     divsdr(_jit, r0, jit_fpr_regno(reg));
600     unget_temp_fpr(_jit);
601   }
602   else {
603     movr_d(_jit, r0, r1);
604     divsdr(_jit, r0, r2);
605   }
606 }
607 
608 static void
absr_f(jit_state_t * _jit,int32_t r0,int32_t r1)609 absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
610 {
611   if (r0 == r1) {
612     jit_fpr_t reg = get_temp_fpr(_jit);
613     pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
614     psrl(_jit, jit_fpr_regno(reg), 1);
615     andpsr(_jit, r0, jit_fpr_regno(reg));
616     unget_temp_fpr(_jit);
617   }
618   else {
619     pcmpeqlr(_jit, r0, r0);
620     psrl(_jit, r0, 1);
621     andpsr(_jit, r0, r1);
622   }
623 }
624 
625 static void
absr_d(jit_state_t * _jit,int32_t r0,int32_t r1)626 absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
627 {
628   if (r0 == r1) {
629     jit_fpr_t reg = get_temp_fpr(_jit);
630     pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
631     psrq(_jit, jit_fpr_regno(reg), 1);
632     andpdr(_jit, r0, jit_fpr_regno(reg));
633     unget_temp_fpr(_jit);
634   }
635   else {
636     pcmpeqlr(_jit, r0, r0);
637     psrq(_jit, r0, 1);
638     andpdr(_jit, r0, r1);
639   }
640 }
641 
642 static void
negr_f(jit_state_t * _jit,int32_t r0,int32_t r1)643 negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
644 {
645   jit_gpr_t ireg = get_temp_gpr(_jit);
646   imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
647   if (r0 == r1) {
648     jit_fpr_t freg = get_temp_fpr(_jit);
649     movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
650     xorpsr(_jit, r0, jit_fpr_regno(freg));
651     unget_temp_fpr(_jit);
652   } else {
653     movdlxr(_jit, r0, jit_gpr_regno(ireg));
654     xorpsr(_jit, r0, r1);
655   }
656   unget_temp_gpr(_jit);
657 }
658 
659 static void
negr_d(jit_state_t * _jit,int32_t r0,int32_t r1)660 negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
661 {
662   jit_gpr_t ireg = get_temp_gpr(_jit);
663   imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
664   if (r0 == r1) {
665     jit_fpr_t freg = get_temp_fpr(_jit);
666     movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
667     pslq(_jit, jit_fpr_regno(freg), 32);
668     xorpdr(_jit, r0, jit_fpr_regno(freg));
669     unget_temp_fpr(_jit);
670   } else {
671     movdlxr(_jit, r0, jit_gpr_regno(ireg));
672     pslq(_jit, r0, 32);
673     xorpdr(_jit, r0, r1);
674   }
675   unget_temp_gpr(_jit);
676 }
677 
678 static void
ldi_f(jit_state_t * _jit,int32_t r0,jit_word_t i0)679 ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
680 {
681   if (can_sign_extend_int_p(i0))
682     movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
683   else {
684     jit_gpr_t reg = get_temp_gpr(_jit);
685     movi(_jit, jit_gpr_regno(reg), i0);
686     ldr_f(_jit, r0, jit_gpr_regno(reg));
687     unget_temp_gpr(_jit);
688   }
689 }
690 
691 static void
ldxr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)692 ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
693 {
694   movssmr(_jit, 0, r1, r2, _SCL1, r0);
695 }
696 
697 static void
ldxi_f(jit_state_t * _jit,int32_t r0,int32_t r1,jit_word_t i0)698 ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
699 {
700   if (can_sign_extend_int_p(i0))
701     movssmr(_jit, i0, r1, _NOREG, _SCL1, r0);
702   else {
703     jit_gpr_t reg = get_temp_gpr(_jit);
704     movi(_jit, jit_gpr_regno(reg), i0);
705     ldxr_f(_jit, r0, r1, jit_gpr_regno(reg));
706     unget_temp_gpr(_jit);
707   }
708 }
709 
710 static void
sti_f(jit_state_t * _jit,jit_word_t i0,int32_t r0)711 sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
712 {
713   if (can_sign_extend_int_p(i0))
714     movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
715   else {
716     jit_gpr_t reg = get_temp_gpr(_jit);
717     movi(_jit, jit_gpr_regno(reg), i0);
718     str_f(_jit, jit_gpr_regno(reg), r0);
719     unget_temp_gpr(_jit);
720   }
721 }
722 
723 static void
stxr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)724 stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
725 {
726   movssrm(_jit, r2, 0, r0, r1, _SCL1);
727 }
728 
729 static void
stxi_f(jit_state_t * _jit,jit_word_t i0,int32_t r0,int32_t r1)730 stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
731 {
732   if (can_sign_extend_int_p(i0))
733     movssrm(_jit, r1, i0, r0, _NOREG, _SCL1);
734   else {
735     jit_gpr_t reg = get_temp_gpr(_jit);
736     movi(_jit, jit_gpr_regno(reg), i0);
737     stxr_f(_jit, jit_gpr_regno(reg), r0, r1);
738     unget_temp_gpr(_jit);
739   }
740 }
741 
742 static jit_reloc_t
bltr_f(jit_state_t * _jit,int32_t r0,int32_t r1)743 bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
744 {
745   ucomissr(_jit, r1, r0);
746   return ja(_jit);
747 }
748 
749 static jit_reloc_t
bler_f(jit_state_t * _jit,int32_t r0,int32_t r1)750 bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
751 {
752   ucomissr(_jit, r1, r0);
753   return jae(_jit);
754 }
755 
756 static jit_reloc_t
beqr_f(jit_state_t * _jit,int32_t r0,int32_t r1)757 beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
758 {
759   ucomissr(_jit, r0, r1);
760   jit_reloc_t pos = jps(_jit);
761   jit_reloc_t ret = je(_jit);
762   jit_patch_here(_jit, pos);
763   return ret;
764 }
765 
766 static jit_reloc_t
bger_f(jit_state_t * _jit,int32_t r0,int32_t r1)767 bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
768 {
769   ucomissr(_jit, r0, r1);
770   return jae(_jit);
771 }
772 
773 static jit_reloc_t
bgtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)774 bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
775 {
776   ucomissr(_jit, r0, r1);
777   return ja(_jit);
778 }
779 
780 static jit_reloc_t
bner_f(jit_state_t * _jit,int32_t r0,int32_t r1)781 bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
782 {
783   ucomissr(_jit, r0, r1);
784   jit_reloc_t pos = jps(_jit);
785   jit_reloc_t zero = jzs(_jit);
786   jit_patch_here(_jit, pos);
787   jit_reloc_t ret = jmp(_jit);
788   jit_patch_here(_jit, zero);
789   return ret;
790 }
791 
792 static jit_reloc_t
bunltr_f(jit_state_t * _jit,int32_t r0,int32_t r1)793 bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
794 {
795   ucomissr(_jit, r0, r1);
796   return jnae(_jit);
797 }
798 
799 static jit_reloc_t
bunler_f(jit_state_t * _jit,int32_t r0,int32_t r1)800 bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
801 {
802   ucomissr(_jit, r0, r1);
803   return jna(_jit);
804 }
805 
806 static jit_reloc_t
buneqr_f(jit_state_t * _jit,int32_t r0,int32_t r1)807 buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
808 {
809   ucomissr(_jit, r0, r1);
810   return je(_jit);
811 }
812 
813 static jit_reloc_t
bunger_f(jit_state_t * _jit,int32_t r0,int32_t r1)814 bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
815 {
816   ucomissr(_jit, r1, r0);
817   return jna(_jit);
818 }
819 
820 static jit_reloc_t
bungtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)821 bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
822 {
823   ucomissr(_jit, r1, r0);
824   return jnae(_jit);
825 }
826 
827 static jit_reloc_t
bltgtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)828 bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
829 {
830   ucomissr(_jit, r0, r1);
831   return jne(_jit);
832 }
833 
834 static jit_reloc_t
bordr_f(jit_state_t * _jit,int32_t r0,int32_t r1)835 bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
836 {
837   ucomissr(_jit, r0, r1);
838   return jnp(_jit);
839 }
840 
841 static jit_reloc_t
bunordr_f(jit_state_t * _jit,int32_t r0,int32_t r1)842 bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
843 {
844   ucomissr(_jit, r0, r1);
845   return jp(_jit);
846 }
847 
848 static void
ldi_d(jit_state_t * _jit,int32_t r0,jit_word_t i0)849 ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
850 {
851   if (can_sign_extend_int_p(i0))
852     movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
853   else {
854     jit_gpr_t reg = get_temp_gpr(_jit);
855     movi(_jit, jit_gpr_regno(reg), i0);
856     ldr_d(_jit, r0, jit_gpr_regno(reg));
857     unget_temp_gpr(_jit);
858   }
859 }
860 
861 static void
ldxr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)862 ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
863 {
864   movsdmr(_jit, 0, r1, r2, _SCL1, r0);
865 }
866 
867 static void
ldxi_d(jit_state_t * _jit,int32_t r0,int32_t r1,jit_word_t i0)868 ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
869 {
870   if (can_sign_extend_int_p(i0))
871     movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0);
872   else {
873     jit_gpr_t reg = get_temp_gpr(_jit);
874     movi(_jit, jit_gpr_regno(reg), i0);
875     ldxr_d(_jit, r0, r1, jit_gpr_regno(reg));
876     unget_temp_gpr(_jit);
877   }
878 }
879 
880 static void
sti_d(jit_state_t * _jit,jit_word_t i0,int32_t r0)881 sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
882 {
883   if (can_sign_extend_int_p(i0))
884     movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
885   else {
886     jit_gpr_t reg = get_temp_gpr(_jit);
887     movi(_jit, jit_gpr_regno(reg), i0);
888     str_d(_jit, jit_gpr_regno(reg), r0);
889     unget_temp_gpr(_jit);
890   }
891 }
892 
893 static void
stxr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)894 stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
895 {
896   movsdrm(_jit, r2, 0, r0, r1, _SCL1);
897 }
898 
899 static void
stxi_d(jit_state_t * _jit,jit_word_t i0,int32_t r0,int32_t r1)900 stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
901 {
902   if (can_sign_extend_int_p(i0))
903     movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1);
904   else {
905     jit_gpr_t reg = get_temp_gpr(_jit);
906     movi(_jit, jit_gpr_regno(reg), i0);
907     stxr_d(_jit, jit_gpr_regno(reg), r0, r1);
908     unget_temp_gpr(_jit);
909   }
910 }
911 
912 static jit_reloc_t
bltr_d(jit_state_t * _jit,int32_t r0,int32_t r1)913 bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
914 {
915   ucomisdr(_jit, r1, r0);
916   return ja(_jit);
917 }
918 
919 static jit_reloc_t
bler_d(jit_state_t * _jit,int32_t r0,int32_t r1)920 bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
921 {
922   ucomisdr(_jit, r1, r0);
923   return jae(_jit);
924 }
925 
926 static jit_reloc_t
beqr_d(jit_state_t * _jit,int32_t r0,int32_t r1)927 beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
928 {
929   ucomisdr(_jit, r0, r1);
930   jit_reloc_t pos = jps(_jit);
931   jit_reloc_t ret = je(_jit);
932   jit_patch_here(_jit, pos);
933   return ret;
934 }
935 
936 static jit_reloc_t
bger_d(jit_state_t * _jit,int32_t r0,int32_t r1)937 bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
938 {
939   ucomisdr(_jit, r0, r1);
940   return jae(_jit);
941 }
942 
943 static jit_reloc_t
bgtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)944 bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
945 {
946   ucomisdr(_jit, r0, r1);
947   return ja(_jit);
948 }
949 
950 static jit_reloc_t
bner_d(jit_state_t * _jit,int32_t r0,int32_t r1)951 bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
952 {
953   ucomisdr(_jit, r0, r1);
954   jit_reloc_t pos = jps(_jit);
955   jit_reloc_t zero = jzs(_jit);
956   jit_patch_here(_jit, pos);
957   jit_reloc_t ret = jmp(_jit);
958   jit_patch_here(_jit, zero);
959   return ret;
960 }
961 
962 static jit_reloc_t
bunltr_d(jit_state_t * _jit,int32_t r0,int32_t r1)963 bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
964 {
965   ucomisdr(_jit, r0, r1);
966   return jnae(_jit);
967 }
968 
969 static jit_reloc_t
bunler_d(jit_state_t * _jit,int32_t r0,int32_t r1)970 bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
971 {
972   ucomisdr(_jit, r0, r1);
973   return jna(_jit);
974 }
975 
976 static jit_reloc_t
buneqr_d(jit_state_t * _jit,int32_t r0,int32_t r1)977 buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
978 {
979   ucomisdr(_jit, r0, r1);
980   return je(_jit);
981 }
982 
983 static jit_reloc_t
bunger_d(jit_state_t * _jit,int32_t r0,int32_t r1)984 bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
985 {
986   ucomisdr(_jit, r1, r0);
987   return jna(_jit);
988 }
989 
990 static jit_reloc_t
bungtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)991 bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
992 {
993   ucomisdr(_jit, r1, r0);
994   return jnae(_jit);
995 }
996 
997 static jit_reloc_t
bltgtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)998 bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
999 {
1000   ucomisdr(_jit, r0, r1);
1001   return jne(_jit);
1002 }
1003 
1004 static jit_reloc_t
bordr_d(jit_state_t * _jit,int32_t r0,int32_t r1)1005 bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
1006 {
1007   ucomisdr(_jit, r0, r1);
1008   return jnp(_jit);
1009 }
1010 
1011 static jit_reloc_t
bunordr_d(jit_state_t * _jit,int32_t r0,int32_t r1)1012 bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
1013 {
1014   ucomisdr(_jit, r0, r1);
1015   return jp(_jit);
1016 }
1017