1 /*
2 * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20 #define _XMM0_REGNO 0
21 #define _XMM1_REGNO 1
22 #define _XMM2_REGNO 2
23 #define _XMM3_REGNO 3
24 #define _XMM4_REGNO 4
25 #define _XMM5_REGNO 5
26 #define _XMM6_REGNO 6
27 #define _XMM7_REGNO 7
28 #define _XMM8_REGNO 8
29 #define _XMM9_REGNO 9
30 #define _XMM10_REGNO 10
31 #define _XMM11_REGNO 11
32 #define _XMM12_REGNO 12
33 #define _XMM13_REGNO 13
34 #define _XMM14_REGNO 14
35 #define _XMM15_REGNO 15
36 #define X86_SSE_MOV 0x10
37 #define X86_SSE_MOV1 0x11
38 #define X86_SSE_MOVLP 0x12
39 #define X86_SSE_MOVHP 0x16
40 #define X86_SSE_MOVA 0x28
41 #define X86_SSE_CVTIS 0x2a
42 #define X86_SSE_CVTTSI 0x2c
43 #define X86_SSE_CVTSI 0x2d
44 #define X86_SSE_UCOMI 0x2e
45 #define X86_SSE_COMI 0x2f
46 #define X86_SSE_ROUND 0x3a
47 #define X86_SSE_SQRT 0x51
48 #define X86_SSE_RSQRT 0x52
49 #define X86_SSE_RCP 0x53
50 #define X86_SSE_AND 0x54
51 #define X86_SSE_ANDN 0x55
52 #define X86_SSE_OR 0x56
53 #define X86_SSE_XOR 0x57
54 #define X86_SSE_ADD 0x58
55 #define X86_SSE_MUL 0x59
56 #define X86_SSE_CVTSD 0x5a
57 #define X86_SSE_CVTDT 0x5b
58 #define X86_SSE_SUB 0x5c
59 #define X86_SSE_MIN 0x5d
60 #define X86_SSE_DIV 0x5e
61 #define X86_SSE_MAX 0x5f
62 #define X86_SSE_X2G 0x6e
63 #define X86_SSE_EQB 0x74
64 #define X86_SSE_EQW 0x75
65 #define X86_SSE_EQD 0x76
66 #define X86_SSE_G2X 0x7e
67 #define X86_SSE_MOV2 0xd6
68
69 static void
sser(jit_state_t * _jit,int32_t c,int32_t r0,int32_t r1)70 sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1)
71 {
72 rex(_jit, 0, 0, r0, 0, r1);
73 ic(_jit, 0x0f);
74 ic(_jit, c);
75 mrm(_jit, 0x03, r7(r0), r7(r1));
76 }
77
78 static void
ssexr(jit_state_t * _jit,int32_t p,int32_t c,int32_t r0,int32_t r1)79 ssexr(jit_state_t *_jit, int32_t p, int32_t c,
80 int32_t r0, int32_t r1)
81 {
82 ic(_jit, p);
83 rex(_jit, 0, 0, r0, 0, r1);
84 ic(_jit, 0x0f);
85 ic(_jit, c);
86 mrm(_jit, 0x03, r7(r0), r7(r1));
87 }
88
89 static void
ssexi(jit_state_t * _jit,int32_t c,int32_t r0,int32_t m,int32_t i)90 ssexi(jit_state_t *_jit, int32_t c, int32_t r0,
91 int32_t m, int32_t i)
92 {
93 ic(_jit, 0x66);
94 rex(_jit, 0, 0, 0, 0, r0);
95 ic(_jit, 0x0f);
96 ic(_jit, c);
97 mrm(_jit, 0x03, r7(m), r7(r0));
98 ic(_jit, i);
99 }
100
101 static void
sselxr(jit_state_t * _jit,int32_t p,int32_t c,int32_t r0,int32_t r1)102 sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1)
103 {
104 if (__X64) {
105 ic(_jit, p);
106 rex(_jit, 0, 1, r0, 0, r1);
107 ic(_jit, 0x0f);
108 ic(_jit, c);
109 mrm(_jit, 0x03, r7(r0), r7(r1));
110 } else {
111 ssexr(_jit, p, c, r0, r1);
112 }
113 }
114
115 static void
ssexrx(jit_state_t * _jit,int32_t px,int32_t code,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)116 ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md,
117 int32_t rb, int32_t ri, int32_t ms, int32_t rd)
118 {
119 ic(_jit, px);
120 rex(_jit, 0, 0, rd, ri, rb);
121 ic(_jit, 0x0f);
122 ic(_jit, code);
123 rx(_jit, rd, md, rb, ri, ms);
124 }
125
126 static void
movdlxr(jit_state_t * _jit,int32_t r0,int32_t r1)127 movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1)
128 {
129 ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1);
130 }
131
132 static void movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
133 static void
movdqxr(jit_state_t * _jit,int32_t r0,int32_t r1)134 movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1)
135 {
136 sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1);
137 }
138
139 static void
movssmr(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)140 movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
141 {
142 ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd);
143 }
144 static void
movsdmr(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms,int32_t rd)145 movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
146 {
147 ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd);
148 }
149 static void
movssrm(jit_state_t * _jit,int32_t rs,int32_t md,int32_t mb,int32_t mi,int32_t ms)150 movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
151 {
152 ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs);
153 }
154 static void
movsdrm(jit_state_t * _jit,int32_t rs,int32_t md,int32_t mb,int32_t mi,int32_t ms)155 movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
156 {
157 ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs);
158 }
159
160 static void
movr_f(jit_state_t * _jit,int32_t r0,int32_t r1)161 movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
162 {
163 if (r0 != r1)
164 ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1);
165 }
166
167 static void
movr_d(jit_state_t * _jit,int32_t r0,int32_t r1)168 movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
169 {
170 if (r0 != r1)
171 ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1);
172 }
173
174 static void
addssr(jit_state_t * _jit,int32_t r0,int32_t r1)175 addssr(jit_state_t *_jit, int32_t r0, int32_t r1)
176 {
177 ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1);
178 }
179 static void
addsdr(jit_state_t * _jit,int32_t r0,int32_t r1)180 addsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
181 {
182 ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1);
183 }
184 static void
subssr(jit_state_t * _jit,int32_t r0,int32_t r1)185 subssr(jit_state_t *_jit, int32_t r0, int32_t r1)
186 {
187 ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1);
188 }
189 static void
subsdr(jit_state_t * _jit,int32_t r0,int32_t r1)190 subsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
191 {
192 ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1);
193 }
194 static void
mulssr(jit_state_t * _jit,int32_t r0,int32_t r1)195 mulssr(jit_state_t *_jit, int32_t r0, int32_t r1)
196 {
197 ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1);
198 }
199 static void
mulsdr(jit_state_t * _jit,int32_t r0,int32_t r1)200 mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
201 {
202 ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1);
203 }
204 static void
divssr(jit_state_t * _jit,int32_t r0,int32_t r1)205 divssr(jit_state_t *_jit, int32_t r0, int32_t r1)
206 {
207 ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1);
208 }
209 static void
divsdr(jit_state_t * _jit,int32_t r0,int32_t r1)210 divsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
211 {
212 ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1);
213 }
214 static void
andpsr(jit_state_t * _jit,int32_t r0,int32_t r1)215 andpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
216 {
217 sser(_jit, X86_SSE_AND, r0, r1);
218 }
219 static void
andpdr(jit_state_t * _jit,int32_t r0,int32_t r1)220 andpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
221 {
222 ssexr(_jit, 0x66, X86_SSE_AND, r0, r1);
223 }
224 static void
truncr_f_i(jit_state_t * _jit,int32_t r0,int32_t r1)225 truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
226 {
227 ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
228 }
229 static void
truncr_d_i(jit_state_t * _jit,int32_t r0,int32_t r1)230 truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
231 {
232 ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
233 }
234 #if __X64
235 static void
truncr_f_l(jit_state_t * _jit,int32_t r0,int32_t r1)236 truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
237 {
238 sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
239 }
240 static void
truncr_d_l(jit_state_t * _jit,int32_t r0,int32_t r1)241 truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
242 {
243 sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
244 }
245 #endif
246 static void
extr_f(jit_state_t * _jit,int32_t r0,int32_t r1)247 extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
248 {
249 sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1);
250 }
251 static void
extr_d(jit_state_t * _jit,int32_t r0,int32_t r1)252 extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
253 {
254 sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1);
255 }
256
257 static void
extr_f_d(jit_state_t * _jit,int32_t r0,int32_t r1)258 extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
259 {
260 ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1);
261 }
262 static void
extr_d_f(jit_state_t * _jit,int32_t r0,int32_t r1)263 extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
264 {
265 ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1);
266 }
267 static void
ucomissr(jit_state_t * _jit,int32_t r0,int32_t r1)268 ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1)
269 {
270 sser(_jit, X86_SSE_UCOMI, r0, r1);
271 }
272 static void
ucomisdr(jit_state_t * _jit,int32_t r0,int32_t r1)273 ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1)
274 {
275 ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1);
276 }
277 static void
xorpsr(jit_state_t * _jit,int32_t r0,int32_t r1)278 xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
279 {
280 sser(_jit, X86_SSE_XOR, r0, r1);
281 }
282 static void
xorpdr(jit_state_t * _jit,int32_t r0,int32_t r1)283 xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
284 {
285 ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1);
286 }
287 static void orpdr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
288 static void
orpdr(jit_state_t * _jit,int32_t r0,int32_t r1)289 orpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
290 {
291 ssexr(_jit, 0x66, X86_SSE_OR, r0, r1);
292 }
293 static void
pcmpeqlr(jit_state_t * _jit,int32_t r0,int32_t r1)294 pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1)
295 {
296 ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1);
297 }
298 static void
psrl(jit_state_t * _jit,int32_t r0,int32_t i0)299 psrl(jit_state_t *_jit, int32_t r0, int32_t i0)
300 {
301 ssexi(_jit, 0x72, r0, 0x02, i0);
302 }
303 static void
psrq(jit_state_t * _jit,int32_t r0,int32_t i0)304 psrq(jit_state_t *_jit, int32_t r0, int32_t i0)
305 {
306 ssexi(_jit, 0x73, r0, 0x02, i0);
307 }
308 static void
pslq(jit_state_t * _jit,int32_t r0,int32_t i0)309 pslq(jit_state_t *_jit, int32_t r0, int32_t i0)
310 {
311 ssexi(_jit, 0x73, r0, 0x06, i0);
312 }
313 static void
sqrtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)314 sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
315 {
316 ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1);
317 }
318 static void
sqrtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)319 sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
320 {
321 ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1);
322 }
323 static void
ldr_f(jit_state_t * _jit,int32_t r0,int32_t r1)324 ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
325 {
326 movssmr(_jit, 0, r1, _NOREG, _SCL1, r0);
327 }
328 static void
str_f(jit_state_t * _jit,int32_t r0,int32_t r1)329 str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
330 {
331 movssrm(_jit, r1, 0, r0, _NOREG, _SCL1);
332 }
333 static void
ldr_d(jit_state_t * _jit,int32_t r0,int32_t r1)334 ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
335 {
336 movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0);
337 }
338 static void
str_d(jit_state_t * _jit,int32_t r0,int32_t r1)339 str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
340 {
341 movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1);
342 }
343
344 static void
movi_f(jit_state_t * _jit,int32_t r0,jit_float32_t i0)345 movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
346 {
347 union {
348 int32_t i;
349 jit_float32_t f;
350 } data;
351
352 data.f = i0;
353 if (data.f == 0.0 && !(data.i & 0x80000000))
354 xorpsr(_jit, r0, r0);
355 else {
356 jit_gpr_t reg = get_temp_gpr(_jit);
357 movi(_jit, jit_gpr_regno(reg), data.i);
358 movdlxr(_jit, r0, jit_gpr_regno(reg));
359 unget_temp_gpr(_jit);
360 }
361 }
362
363 static void
movi_d(jit_state_t * _jit,int32_t r0,jit_float64_t i0)364 movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
365 {
366 union {
367 int32_t ii[2];
368 jit_word_t w;
369 jit_float64_t d;
370 } data;
371
372 data.d = i0;
373 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
374 xorpdr(_jit, r0, r0);
375 else {
376 jit_gpr_t ireg = get_temp_gpr(_jit);
377 #if __X64
378 movi(_jit, jit_gpr_regno(ireg), data.w);
379 movdqxr(_jit, r0, jit_gpr_regno(ireg));
380 unget_temp_gpr(_jit);
381 #else
382 jit_fpr_t freg = get_temp_fpr(_jit);
383 movi(_jit, jit_gpr_regno(ireg), data.ii[1]);
384 movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
385 pslq(_jit, jit_fpr_regno(freg), 32);
386 movi(_jit, jit_gpr_regno(ireg), data.ii[0]);
387 movdlxr(_jit, r0, jit_gpr_regno(ireg));
388 orpdr(_jit, r0, jit_fpr_regno(freg));
389 unget_temp_fpr(_jit);
390 unget_temp_gpr(_jit);
391 #endif
392 }
393 }
394
395 #if __X32
396 static void
x87rx(jit_state_t * _jit,int32_t code,int32_t md,int32_t rb,int32_t ri,int32_t ms)397 x87rx(jit_state_t *_jit, int32_t code, int32_t md,
398 int32_t rb, int32_t ri, int32_t ms)
399 {
400 rex(_jit, 0, 1, rb, ri, _NOREG);
401 ic(_jit, 0xd8 | (code >> 3));
402 rx(_jit, (code & 7), md, rb, ri, ms);
403 }
404
405 static void
fldsm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)406 fldsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
407 {
408 return x87rx(_jit, 010, md, rb, ri, ms);
409 }
410
411 static void
fstsm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)412 fstsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
413 {
414 return x87rx(_jit, 013, md, rb, ri, ms);
415 }
416
417 static void
fldlm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)418 fldlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
419 {
420 return x87rx(_jit, 050, md, rb, ri, ms);
421 }
422
423 static void
fstlm(jit_state_t * _jit,int32_t md,int32_t rb,int32_t ri,int32_t ms)424 fstlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
425 {
426 return x87rx(_jit, 053, md, rb, ri, ms);
427 }
428 #endif
429
430 static void
retval_f(jit_state_t * _jit,int32_t r0)431 retval_f(jit_state_t *_jit, int32_t r0)
432 {
433 #if __X32
434 subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
435 fstsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
436 ldr_f(_jit, r0, _RSP_REGNO);
437 addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
438 #else
439 movr_f(_jit, r0, _XMM0_REGNO);
440 #endif
441 }
442
443 static void
retval_d(jit_state_t * _jit,int32_t r0)444 retval_d(jit_state_t *_jit, int32_t r0)
445 {
446 #if __X32
447 subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
448 fstlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
449 ldr_d(_jit, r0, _RSP_REGNO);
450 addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
451 #else
452 movr_d(_jit, r0, _XMM0_REGNO);
453 #endif
454 }
455
456 static void
retr_f(jit_state_t * _jit,int32_t u)457 retr_f(jit_state_t *_jit, int32_t u)
458 {
459 #if __X32
460 subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
461 str_f(_jit, _RSP_REGNO, u);
462 fldsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
463 addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
464 #else
465 movr_f(_jit, _XMM0_REGNO, u);
466 #endif
467 ret(_jit);
468 }
469
470 static void
retr_d(jit_state_t * _jit,int32_t u)471 retr_d(jit_state_t *_jit, int32_t u)
472 {
473 #if __X32
474 subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
475 str_d(_jit, _RSP_REGNO, u);
476 fldlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
477 addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
478 #else
479 movr_d(_jit, _XMM0_REGNO, u);
480 #endif
481 ret(_jit);
482 }
483
484 static void
addr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)485 addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
486 {
487 if (r0 == r1)
488 addssr(_jit, r0, r2);
489 else if (r0 == r2)
490 addssr(_jit, r0, r1);
491 else {
492 movr_f(_jit, r0, r1);
493 addssr(_jit, r0, r2);
494 }
495 }
496
497 static void
addr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)498 addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
499 {
500 if (r0 == r1)
501 addsdr(_jit, r0, r2);
502 else if (r0 == r2)
503 addsdr(_jit, r0, r1);
504 else {
505 movr_d(_jit, r0, r1);
506 addsdr(_jit, r0, r2);
507 }
508 }
509
510 static void
subr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)511 subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
512 {
513 if (r0 == r1)
514 subssr(_jit, r0, r2);
515 else if (r0 == r2) {
516 jit_fpr_t reg = get_temp_fpr(_jit);
517 movr_f(_jit, jit_fpr_regno(reg), r0);
518 movr_f(_jit, r0, r1);
519 subssr(_jit, r0, jit_fpr_regno(reg));
520 unget_temp_fpr(_jit);
521 }
522 else {
523 movr_f(_jit, r0, r1);
524 subssr(_jit, r0, r2);
525 }
526 }
527
528 static void
subr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)529 subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
530 {
531 if (r0 == r1)
532 subsdr(_jit, r0, r2);
533 else if (r0 == r2) {
534 jit_fpr_t reg = get_temp_fpr(_jit);
535 movr_d(_jit, jit_fpr_regno(reg), r0);
536 movr_d(_jit, r0, r1);
537 subsdr(_jit, r0, jit_fpr_regno(reg));
538 unget_temp_fpr(_jit);
539 }
540 else {
541 movr_d(_jit, r0, r1);
542 subsdr(_jit, r0, r2);
543 }
544 }
545
546 static void
mulr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)547 mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
548 {
549 if (r0 == r1)
550 mulssr(_jit, r0, r2);
551 else if (r0 == r2)
552 mulssr(_jit, r0, r1);
553 else {
554 movr_f(_jit, r0, r1);
555 mulssr(_jit, r0, r2);
556 }
557 }
558
559 static void
mulr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)560 mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
561 {
562 if (r0 == r1)
563 mulsdr(_jit, r0, r2);
564 else if (r0 == r2)
565 mulsdr(_jit, r0, r1);
566 else {
567 movr_d(_jit, r0, r1);
568 mulsdr(_jit, r0, r2);
569 }
570 }
571
572 static void
divr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)573 divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
574 {
575 if (r0 == r1)
576 divssr(_jit, r0, r2);
577 else if (r0 == r2) {
578 jit_fpr_t reg = get_temp_fpr(_jit);
579 movr_f(_jit, jit_fpr_regno(reg), r0);
580 movr_f(_jit, r0, r1);
581 divssr(_jit, r0, jit_fpr_regno(reg));
582 unget_temp_fpr(_jit);
583 }
584 else {
585 movr_f(_jit, r0, r1);
586 divssr(_jit, r0, r2);
587 }
588 }
589
590 static void
divr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)591 divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
592 {
593 if (r0 == r1)
594 divsdr(_jit, r0, r2);
595 else if (r0 == r2) {
596 jit_fpr_t reg = get_temp_fpr(_jit);
597 movr_d(_jit, jit_fpr_regno(reg), r0);
598 movr_d(_jit, r0, r1);
599 divsdr(_jit, r0, jit_fpr_regno(reg));
600 unget_temp_fpr(_jit);
601 }
602 else {
603 movr_d(_jit, r0, r1);
604 divsdr(_jit, r0, r2);
605 }
606 }
607
608 static void
absr_f(jit_state_t * _jit,int32_t r0,int32_t r1)609 absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
610 {
611 if (r0 == r1) {
612 jit_fpr_t reg = get_temp_fpr(_jit);
613 pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
614 psrl(_jit, jit_fpr_regno(reg), 1);
615 andpsr(_jit, r0, jit_fpr_regno(reg));
616 unget_temp_fpr(_jit);
617 }
618 else {
619 pcmpeqlr(_jit, r0, r0);
620 psrl(_jit, r0, 1);
621 andpsr(_jit, r0, r1);
622 }
623 }
624
625 static void
absr_d(jit_state_t * _jit,int32_t r0,int32_t r1)626 absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
627 {
628 if (r0 == r1) {
629 jit_fpr_t reg = get_temp_fpr(_jit);
630 pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
631 psrq(_jit, jit_fpr_regno(reg), 1);
632 andpdr(_jit, r0, jit_fpr_regno(reg));
633 unget_temp_fpr(_jit);
634 }
635 else {
636 pcmpeqlr(_jit, r0, r0);
637 psrq(_jit, r0, 1);
638 andpdr(_jit, r0, r1);
639 }
640 }
641
642 static void
negr_f(jit_state_t * _jit,int32_t r0,int32_t r1)643 negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
644 {
645 jit_gpr_t ireg = get_temp_gpr(_jit);
646 imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
647 if (r0 == r1) {
648 jit_fpr_t freg = get_temp_fpr(_jit);
649 movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
650 xorpsr(_jit, r0, jit_fpr_regno(freg));
651 unget_temp_fpr(_jit);
652 } else {
653 movdlxr(_jit, r0, jit_gpr_regno(ireg));
654 xorpsr(_jit, r0, r1);
655 }
656 unget_temp_gpr(_jit);
657 }
658
659 static void
negr_d(jit_state_t * _jit,int32_t r0,int32_t r1)660 negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
661 {
662 jit_gpr_t ireg = get_temp_gpr(_jit);
663 imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
664 if (r0 == r1) {
665 jit_fpr_t freg = get_temp_fpr(_jit);
666 movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
667 pslq(_jit, jit_fpr_regno(freg), 32);
668 xorpdr(_jit, r0, jit_fpr_regno(freg));
669 unget_temp_fpr(_jit);
670 } else {
671 movdlxr(_jit, r0, jit_gpr_regno(ireg));
672 pslq(_jit, r0, 32);
673 xorpdr(_jit, r0, r1);
674 }
675 unget_temp_gpr(_jit);
676 }
677
678 static void
ldi_f(jit_state_t * _jit,int32_t r0,jit_word_t i0)679 ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
680 {
681 if (can_sign_extend_int_p(i0))
682 movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
683 else {
684 jit_gpr_t reg = get_temp_gpr(_jit);
685 movi(_jit, jit_gpr_regno(reg), i0);
686 ldr_f(_jit, r0, jit_gpr_regno(reg));
687 unget_temp_gpr(_jit);
688 }
689 }
690
691 static void
ldxr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)692 ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
693 {
694 movssmr(_jit, 0, r1, r2, _SCL1, r0);
695 }
696
697 static void
ldxi_f(jit_state_t * _jit,int32_t r0,int32_t r1,jit_word_t i0)698 ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
699 {
700 if (can_sign_extend_int_p(i0))
701 movssmr(_jit, i0, r1, _NOREG, _SCL1, r0);
702 else {
703 jit_gpr_t reg = get_temp_gpr(_jit);
704 movi(_jit, jit_gpr_regno(reg), i0);
705 ldxr_f(_jit, r0, r1, jit_gpr_regno(reg));
706 unget_temp_gpr(_jit);
707 }
708 }
709
710 static void
sti_f(jit_state_t * _jit,jit_word_t i0,int32_t r0)711 sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
712 {
713 if (can_sign_extend_int_p(i0))
714 movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
715 else {
716 jit_gpr_t reg = get_temp_gpr(_jit);
717 movi(_jit, jit_gpr_regno(reg), i0);
718 str_f(_jit, jit_gpr_regno(reg), r0);
719 unget_temp_gpr(_jit);
720 }
721 }
722
723 static void
stxr_f(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)724 stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
725 {
726 movssrm(_jit, r2, 0, r0, r1, _SCL1);
727 }
728
729 static void
stxi_f(jit_state_t * _jit,jit_word_t i0,int32_t r0,int32_t r1)730 stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
731 {
732 if (can_sign_extend_int_p(i0))
733 movssrm(_jit, r1, i0, r0, _NOREG, _SCL1);
734 else {
735 jit_gpr_t reg = get_temp_gpr(_jit);
736 movi(_jit, jit_gpr_regno(reg), i0);
737 stxr_f(_jit, jit_gpr_regno(reg), r0, r1);
738 unget_temp_gpr(_jit);
739 }
740 }
741
742 static jit_reloc_t
bltr_f(jit_state_t * _jit,int32_t r0,int32_t r1)743 bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
744 {
745 ucomissr(_jit, r1, r0);
746 return ja(_jit);
747 }
748
749 static jit_reloc_t
bler_f(jit_state_t * _jit,int32_t r0,int32_t r1)750 bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
751 {
752 ucomissr(_jit, r1, r0);
753 return jae(_jit);
754 }
755
756 static jit_reloc_t
beqr_f(jit_state_t * _jit,int32_t r0,int32_t r1)757 beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
758 {
759 ucomissr(_jit, r0, r1);
760 jit_reloc_t pos = jps(_jit);
761 jit_reloc_t ret = je(_jit);
762 jit_patch_here(_jit, pos);
763 return ret;
764 }
765
766 static jit_reloc_t
bger_f(jit_state_t * _jit,int32_t r0,int32_t r1)767 bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
768 {
769 ucomissr(_jit, r0, r1);
770 return jae(_jit);
771 }
772
773 static jit_reloc_t
bgtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)774 bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
775 {
776 ucomissr(_jit, r0, r1);
777 return ja(_jit);
778 }
779
780 static jit_reloc_t
bner_f(jit_state_t * _jit,int32_t r0,int32_t r1)781 bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
782 {
783 ucomissr(_jit, r0, r1);
784 jit_reloc_t pos = jps(_jit);
785 jit_reloc_t zero = jzs(_jit);
786 jit_patch_here(_jit, pos);
787 jit_reloc_t ret = jmp(_jit);
788 jit_patch_here(_jit, zero);
789 return ret;
790 }
791
792 static jit_reloc_t
bunltr_f(jit_state_t * _jit,int32_t r0,int32_t r1)793 bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
794 {
795 ucomissr(_jit, r0, r1);
796 return jnae(_jit);
797 }
798
799 static jit_reloc_t
bunler_f(jit_state_t * _jit,int32_t r0,int32_t r1)800 bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
801 {
802 ucomissr(_jit, r0, r1);
803 return jna(_jit);
804 }
805
806 static jit_reloc_t
buneqr_f(jit_state_t * _jit,int32_t r0,int32_t r1)807 buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
808 {
809 ucomissr(_jit, r0, r1);
810 return je(_jit);
811 }
812
813 static jit_reloc_t
bunger_f(jit_state_t * _jit,int32_t r0,int32_t r1)814 bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
815 {
816 ucomissr(_jit, r1, r0);
817 return jna(_jit);
818 }
819
820 static jit_reloc_t
bungtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)821 bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
822 {
823 ucomissr(_jit, r1, r0);
824 return jnae(_jit);
825 }
826
827 static jit_reloc_t
bltgtr_f(jit_state_t * _jit,int32_t r0,int32_t r1)828 bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
829 {
830 ucomissr(_jit, r0, r1);
831 return jne(_jit);
832 }
833
834 static jit_reloc_t
bordr_f(jit_state_t * _jit,int32_t r0,int32_t r1)835 bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
836 {
837 ucomissr(_jit, r0, r1);
838 return jnp(_jit);
839 }
840
841 static jit_reloc_t
bunordr_f(jit_state_t * _jit,int32_t r0,int32_t r1)842 bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
843 {
844 ucomissr(_jit, r0, r1);
845 return jp(_jit);
846 }
847
848 static void
ldi_d(jit_state_t * _jit,int32_t r0,jit_word_t i0)849 ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
850 {
851 if (can_sign_extend_int_p(i0))
852 movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
853 else {
854 jit_gpr_t reg = get_temp_gpr(_jit);
855 movi(_jit, jit_gpr_regno(reg), i0);
856 ldr_d(_jit, r0, jit_gpr_regno(reg));
857 unget_temp_gpr(_jit);
858 }
859 }
860
861 static void
ldxr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)862 ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
863 {
864 movsdmr(_jit, 0, r1, r2, _SCL1, r0);
865 }
866
867 static void
ldxi_d(jit_state_t * _jit,int32_t r0,int32_t r1,jit_word_t i0)868 ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
869 {
870 if (can_sign_extend_int_p(i0))
871 movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0);
872 else {
873 jit_gpr_t reg = get_temp_gpr(_jit);
874 movi(_jit, jit_gpr_regno(reg), i0);
875 ldxr_d(_jit, r0, r1, jit_gpr_regno(reg));
876 unget_temp_gpr(_jit);
877 }
878 }
879
880 static void
sti_d(jit_state_t * _jit,jit_word_t i0,int32_t r0)881 sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
882 {
883 if (can_sign_extend_int_p(i0))
884 movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
885 else {
886 jit_gpr_t reg = get_temp_gpr(_jit);
887 movi(_jit, jit_gpr_regno(reg), i0);
888 str_d(_jit, jit_gpr_regno(reg), r0);
889 unget_temp_gpr(_jit);
890 }
891 }
892
893 static void
stxr_d(jit_state_t * _jit,int32_t r0,int32_t r1,int32_t r2)894 stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
895 {
896 movsdrm(_jit, r2, 0, r0, r1, _SCL1);
897 }
898
899 static void
stxi_d(jit_state_t * _jit,jit_word_t i0,int32_t r0,int32_t r1)900 stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
901 {
902 if (can_sign_extend_int_p(i0))
903 movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1);
904 else {
905 jit_gpr_t reg = get_temp_gpr(_jit);
906 movi(_jit, jit_gpr_regno(reg), i0);
907 stxr_d(_jit, jit_gpr_regno(reg), r0, r1);
908 unget_temp_gpr(_jit);
909 }
910 }
911
912 static jit_reloc_t
bltr_d(jit_state_t * _jit,int32_t r0,int32_t r1)913 bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
914 {
915 ucomisdr(_jit, r1, r0);
916 return ja(_jit);
917 }
918
919 static jit_reloc_t
bler_d(jit_state_t * _jit,int32_t r0,int32_t r1)920 bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
921 {
922 ucomisdr(_jit, r1, r0);
923 return jae(_jit);
924 }
925
926 static jit_reloc_t
beqr_d(jit_state_t * _jit,int32_t r0,int32_t r1)927 beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
928 {
929 ucomisdr(_jit, r0, r1);
930 jit_reloc_t pos = jps(_jit);
931 jit_reloc_t ret = je(_jit);
932 jit_patch_here(_jit, pos);
933 return ret;
934 }
935
936 static jit_reloc_t
bger_d(jit_state_t * _jit,int32_t r0,int32_t r1)937 bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
938 {
939 ucomisdr(_jit, r0, r1);
940 return jae(_jit);
941 }
942
943 static jit_reloc_t
bgtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)944 bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
945 {
946 ucomisdr(_jit, r0, r1);
947 return ja(_jit);
948 }
949
950 static jit_reloc_t
bner_d(jit_state_t * _jit,int32_t r0,int32_t r1)951 bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
952 {
953 ucomisdr(_jit, r0, r1);
954 jit_reloc_t pos = jps(_jit);
955 jit_reloc_t zero = jzs(_jit);
956 jit_patch_here(_jit, pos);
957 jit_reloc_t ret = jmp(_jit);
958 jit_patch_here(_jit, zero);
959 return ret;
960 }
961
962 static jit_reloc_t
bunltr_d(jit_state_t * _jit,int32_t r0,int32_t r1)963 bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
964 {
965 ucomisdr(_jit, r0, r1);
966 return jnae(_jit);
967 }
968
969 static jit_reloc_t
bunler_d(jit_state_t * _jit,int32_t r0,int32_t r1)970 bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
971 {
972 ucomisdr(_jit, r0, r1);
973 return jna(_jit);
974 }
975
976 static jit_reloc_t
buneqr_d(jit_state_t * _jit,int32_t r0,int32_t r1)977 buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
978 {
979 ucomisdr(_jit, r0, r1);
980 return je(_jit);
981 }
982
983 static jit_reloc_t
bunger_d(jit_state_t * _jit,int32_t r0,int32_t r1)984 bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
985 {
986 ucomisdr(_jit, r1, r0);
987 return jna(_jit);
988 }
989
990 static jit_reloc_t
bungtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)991 bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
992 {
993 ucomisdr(_jit, r1, r0);
994 return jnae(_jit);
995 }
996
997 static jit_reloc_t
bltgtr_d(jit_state_t * _jit,int32_t r0,int32_t r1)998 bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
999 {
1000 ucomisdr(_jit, r0, r1);
1001 return jne(_jit);
1002 }
1003
1004 static jit_reloc_t
bordr_d(jit_state_t * _jit,int32_t r0,int32_t r1)1005 bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
1006 {
1007 ucomisdr(_jit, r0, r1);
1008 return jnp(_jit);
1009 }
1010
1011 static jit_reloc_t
bunordr_d(jit_state_t * _jit,int32_t r0,int32_t r1)1012 bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
1013 {
1014 ucomisdr(_jit, r0, r1);
1015 return jp(_jit);
1016 }
1017