1 /*
2 * Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20 #if PROTO
21 # if __X32
22 # define sse_address_p(i0) 1
23 # else
24 # if __X64_32
25 # define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
26 # else
27 # define sse_address_p(i0) can_sign_extend_int_p(i0)
28 # endif
29 # endif
30 # define _XMM6_REGNO 6
31 # define _XMM7_REGNO 7
32 # define _XMM8_REGNO 8
33 # define _XMM9_REGNO 9
34 # define _XMM10_REGNO 10
35 # define _XMM11_REGNO 11
36 # define _XMM12_REGNO 12
37 # define _XMM13_REGNO 13
38 # define _XMM14_REGNO 14
39 # define _XMM15_REGNO 15
40 #define X86_SSE_MOV 0x10
41 #define X86_SSE_MOV1 0x11
42 #define X86_SSE_MOVLP 0x12
43 #define X86_SSE_MOVHP 0x16
44 #define X86_SSE_MOVA 0x28
45 #define X86_SSE_CVTIS 0x2a
46 #define X86_SSE_CVTTSI 0x2c
47 #define X86_SSE_CVTSI 0x2d
48 #define X86_SSE_UCOMI 0x2e
49 #define X86_SSE_COMI 0x2f
50 #define X86_SSE_ROUND 0x3a
51 #define X86_SSE_SQRT 0x51
52 #define X86_SSE_RSQRT 0x52
53 #define X86_SSE_RCP 0x53
54 #define X86_SSE_AND 0x54
55 #define X86_SSE_ANDN 0x55
56 #define X86_SSE_OR 0x56
57 #define X86_SSE_XOR 0x57
58 #define X86_SSE_ADD 0x58
59 #define X86_SSE_MUL 0x59
60 #define X86_SSE_CVTSD 0x5a
61 #define X86_SSE_CVTDT 0x5b
62 #define X86_SSE_SUB 0x5c
63 #define X86_SSE_MIN 0x5d
64 #define X86_SSE_DIV 0x5e
65 #define X86_SSE_MAX 0x5f
66 #define X86_SSE_X2G 0x6e
67 #define X86_SSE_EQB 0x74
68 #define X86_SSE_EQW 0x75
69 #define X86_SSE_EQD 0x76
70 #define X86_SSE_G2X 0x7e
71 #define X86_SSE_MOV2 0xd6
72 # define sser(c,r0,r1) _sser(_jit,c,r0,r1)
73 static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
74 # define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
75 static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
76 # define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
77 static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
78 # define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
79 # define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
80 # define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
81 # define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
82 # define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
83 # define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
84 # define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
85 # define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
86 # define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
87 # define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
88 # define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
89 # define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
90 # if __X64
91 # define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
92 # define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
93 # define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
94 # define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
95 # else
96 # define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
97 # define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
98 # endif
99 # define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
100 # define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
101 # define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
102 # define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
103 # define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
104 # define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
105 # define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
106 # define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
107 # define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
108 # define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
109 # define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
110 # define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
111 # define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
112 # if __X64 && !__X64_32
113 # define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
114 static void
115 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
116 # else
117 # define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
118 # endif
119 # define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
120 # define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
121 # define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
122 # define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
123 # define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
124 static void
125 _ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
126 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
127 # define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
128 static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
129 # define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
130 static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
131 # define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
132 static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
133 # define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
134 static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
135 # define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
136 static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
137 # define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
138 static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
139 # define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
140 static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
141 # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
142 static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
143 # define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
144 # define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
145 static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
146 # define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
147 # define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
148 static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
149 # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
150 static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
151 # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
152 static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
153 # define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
154 static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
155 # define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
156 static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
157 # define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
158 static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
159 # define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
160 static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
161 # define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
162 static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
163 # define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
164 static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
165 # define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
166 static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
167 # define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
168 static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
169 # define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
170 static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
171 # define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
172 static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
173 # define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
174 # define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
175 # define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
176 # define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
177 static void
178 _ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
179 jit_int32_t, jit_int32_t, jit_int32_t);
180 #define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
181 static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
182 #define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
183 static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
184 # define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
185 static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
186 # define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
187 # define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
188 static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
189 # define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
190 # define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
191 static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
192 # define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
193 static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
194 # define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
195 static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
196 # define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
197 # define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
198 static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
199 # define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
200 # define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
201 static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
202 # define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
203 static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204 # define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
205 static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
206 # define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
207 # define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
208 static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
209 # define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
210 # define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
211 static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212 static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
213 # define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
214 static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
215 # define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
216 static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
217 # define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
218 static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219 # define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
220 static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
221 # define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
222 # define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
223 static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
224 # define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
225 static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226 # define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
227 static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
228 # define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
229 # define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
230 static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
231 # define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
232 # define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
233 # define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
234 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
235 # define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
236 static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
237 # define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
238 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239 # define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
240 # define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
241 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
242 # define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
243 static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
244 # define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
245 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
246 # define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
247 static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
248 # define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
249 static jit_word_t
250 _sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
251 # define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
252 static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
253 # define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
254 static jit_word_t
255 _sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
256 # define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
257 static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
258 # define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
259 static jit_word_t
260 _sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
261 # define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
262 static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
263 # define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
264 static jit_word_t
265 _sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
266 # define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
267 static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
268 # define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
269 static jit_word_t
270 _sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
271 # define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
272 static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
273 # define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
274 static jit_word_t
275 _sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
276 # define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
277 static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278 # define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
279 static jit_word_t
280 _sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
281 # define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
282 static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
283 # define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
284 static jit_word_t
285 _sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286 # define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
287 static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
288 # define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
289 static jit_word_t
290 _sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
291 # define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
292 static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
293 # define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
294 static jit_word_t
295 _sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
296 # define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
297 static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
298 # define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
299 static jit_word_t
300 _sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
301 # define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
302 static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
303 # define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
304 static jit_word_t
305 _sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306 # define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
307 static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
308 # define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
309 static jit_word_t
310 _sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
311 # define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
312 static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
313 # define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
314 static jit_word_t
315 _sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
316 #define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
317 static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
318 #define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
319 static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
320 # define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
321 # define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
322 static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
323 # define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
324 # define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
325 static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
326 # define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
327 static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
328 # define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
329 static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
330 # define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
331 # define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
332 static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
333 # define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
334 # define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
335 static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
336 # define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
337 static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
338 # define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
339 static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
340 # define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
341 # define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
342 static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
343 # define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
344 static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
345 # define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
346 static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
347 # define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
348 static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
349 # define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
350 static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
351 # define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
352 static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
353 # define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
354 static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
355 # define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
356 # define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
357 static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
358 # define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
359 static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
360 # define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
361 static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
362 # define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
363 # define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
364 static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
365 # define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
366 # define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
367 static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
368 # define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
369 # define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
370 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
371 # define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
372 static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
373 # define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
374 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
375 # define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
376 # define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
377 # define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
378 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
379 # define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
380 static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
381 # define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
382 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
383 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
384 # define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
385 static jit_word_t
386 _sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
387 # define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
388 static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
389 # define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
390 static jit_word_t
391 _sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
392 # define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
393 static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
394 # define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
395 static jit_word_t
396 _sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
397 # define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
398 static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
399 # define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
400 static jit_word_t
401 _sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
402 # define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
403 static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
404 # define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
405 static jit_word_t
406 _sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
407 # define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
408 static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
409 # define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
410 static jit_word_t
411 _sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
412 # define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
413 static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
414 # define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
415 static jit_word_t
416 _sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
417 # define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
418 static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
419 # define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
420 static jit_word_t
421 _sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
422 # define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
423 static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
424 # define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
425 static jit_word_t
426 _sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
427 # define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
428 static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
429 # define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
430 static jit_word_t
431 _sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
432 # define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
433 static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
434 # define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
435 static jit_word_t
436 _sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
437 # define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
438 static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
439 # define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
440 static jit_word_t
441 _sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
442 # define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
443 static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
444 # define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
445 static jit_word_t
446 _sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
447 # define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
448 static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
449 # define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
450 static jit_word_t
451 _sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
452 #endif
453
454 #if CODE
455 # define fpr_opi(name, type, size) \
456 static void \
457 _sse_##name##i_##type(jit_state_t *_jit, \
458 jit_int32_t r0, jit_int32_t r1, \
459 jit_float##size##_t *i0) \
460 { \
461 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
462 assert(jit_sse_reg_p(reg)); \
463 sse_movi_##type(rn(reg), i0); \
464 sse_##name##r_##type(r0, r1, rn(reg)); \
465 jit_unget_reg(reg); \
466 }
467 # define fpr_bopi(name, type, size) \
468 static jit_word_t \
469 _sse_b##name##i_##type(jit_state_t *_jit, \
470 jit_word_t i0, jit_int32_t r0, \
471 jit_float##size##_t *i1) \
472 { \
473 jit_word_t word; \
474 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
475 jit_class_nospill); \
476 assert(jit_sse_reg_p(reg)); \
477 sse_movi_##type(rn(reg), i1); \
478 word = sse_b##name##r_##type(i0, r0, rn(reg)); \
479 jit_unget_reg(reg); \
480 return (word); \
481 }
482 # define fopi(name) fpr_opi(name, f, 32)
483 # define fbopi(name) fpr_bopi(name, f, 32)
484 # define dopi(name) fpr_opi(name, d, 64)
485 # define dbopi(name) fpr_bopi(name, d, 64)
486 static void
_sser(jit_state_t * _jit,jit_int32_t c,jit_int32_t r0,jit_int32_t r1)487 _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
488 {
489 rex(0, 0, r0, 0, r1);
490 ic(0x0f);
491 ic(c);
492 mrm(0x03, r7(r0), r7(r1));
493 }
494
495 static void
_ssexr(jit_state_t * _jit,jit_int32_t p,jit_int32_t c,jit_int32_t r0,jit_int32_t r1)496 _ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
497 jit_int32_t r0, jit_int32_t r1)
498 {
499 ic(p);
500 rex(0, 0, r0, 0, r1);
501 ic(0x0f);
502 ic(c);
503 mrm(0x03, r7(r0), r7(r1));
504 }
505
506 static void
_ssexi(jit_state_t * _jit,jit_int32_t c,jit_int32_t r0,jit_int32_t m,jit_int32_t i)507 _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
508 jit_int32_t m, jit_int32_t i)
509 {
510 ic(0x66);
511 rex(0, 0, 0, 0, r0);
512 ic(0x0f);
513 ic(c);
514 mrm(0x03, r7(m), r7(r0));
515 ic(i);
516 }
517
518 #if __X64
519 static void
_sselxr(jit_state_t * _jit,jit_int32_t p,jit_int32_t c,jit_int32_t r0,jit_int32_t r1)520 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
521 jit_int32_t r0, jit_int32_t r1)
522 {
523 ic(p);
524 rex(0, 1, r0, 0, r1);
525 ic(0x0f);
526 ic(c);
527 mrm(0x03, r7(r0), r7(r1));
528 }
529 #endif
530
531 static void
_ssexrx(jit_state_t * _jit,jit_int32_t px,jit_int32_t code,jit_int32_t md,jit_int32_t rb,jit_int32_t ri,jit_int32_t ms,jit_int32_t rd)532 _ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
533 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
534 {
535 ic(px);
536 rex(0, 0, rd, ri, rb);
537 ic(0x0f);
538 ic(code);
539 rx(rd, md, rb, ri, ms);
540 }
541
542 static void
_sse_addr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)543 _sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
544 {
545 if (r0 == r1)
546 addssr(r0, r2);
547 else if (r0 == r2)
548 addssr(r0, r1);
549 else {
550 sse_movr_f(r0, r1);
551 addssr(r0, r2);
552 }
553 }
554
fopi(add)555 fopi(add)
556
557 static void
558 _sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
559 {
560 if (r0 == r1)
561 addsdr(r0, r2);
562 else if (r0 == r2)
563 addsdr(r0, r1);
564 else {
565 sse_movr_d(r0, r1);
566 addsdr(r0, r2);
567 }
568 }
569
dopi(add)570 dopi(add)
571
572 static void
573 _sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
574 {
575 jit_int32_t reg;
576 if (r0 == r1)
577 subssr(r0, r2);
578 else if (r0 == r2) {
579 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
580 sse_movr_f(rn(reg), r0);
581 sse_movr_f(r0, r1);
582 subssr(r0, rn(reg));
583 jit_unget_reg(reg);
584 }
585 else {
586 sse_movr_f(r0, r1);
587 subssr(r0, r2);
588 }
589 }
590
fopi(sub)591 fopi(sub)
592
593 static void
594 _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
595 {
596 jit_int32_t reg;
597 if (r0 == r1)
598 subsdr(r0, r2);
599 else if (r0 == r2) {
600 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
601 sse_movr_d(rn(reg), r0);
602 sse_movr_d(r0, r1);
603 subsdr(r0, rn(reg));
604 jit_unget_reg(reg);
605 }
606 else {
607 sse_movr_d(r0, r1);
608 subsdr(r0, r2);
609 }
610 }
611
612 dopi(sub)
613
fopi(rsb)614 fopi(rsb)
615
616 dopi(rsb)
617
618 static void
619 _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
620 {
621 if (r0 == r1)
622 mulssr(r0, r2);
623 else if (r0 == r2)
624 mulssr(r0, r1);
625 else {
626 sse_movr_f(r0, r1);
627 mulssr(r0, r2);
628 }
629 }
630
fopi(mul)631 fopi(mul)
632
633 static void
634 _sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
635 {
636 if (r0 == r1)
637 mulsdr(r0, r2);
638 else if (r0 == r2)
639 mulsdr(r0, r1);
640 else {
641 sse_movr_d(r0, r1);
642 mulsdr(r0, r2);
643 }
644 }
645
dopi(mul)646 dopi(mul)
647
648 static void
649 _sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
650 {
651 jit_int32_t reg;
652 if (r0 == r1)
653 divssr(r0, r2);
654 else if (r0 == r2) {
655 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
656 sse_movr_f(rn(reg), r0);
657 sse_movr_f(r0, r1);
658 divssr(r0, rn(reg));
659 jit_unget_reg(reg);
660 }
661 else {
662 sse_movr_f(r0, r1);
663 divssr(r0, r2);
664 }
665 }
666
fopi(div)667 fopi(div)
668
669 static void
670 _sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
671 {
672 jit_int32_t reg;
673 if (r0 == r1)
674 divsdr(r0, r2);
675 else if (r0 == r2) {
676 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
677 sse_movr_d(rn(reg), r0);
678 sse_movr_d(r0, r1);
679 divsdr(r0, rn(reg));
680 jit_unget_reg(reg);
681 }
682 else {
683 sse_movr_d(r0, r1);
684 divsdr(r0, r2);
685 }
686 }
687
dopi(div)688 dopi(div)
689
690 static void
691 _sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
692 {
693 jit_int32_t reg;
694 if (r0 == r1) {
695 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
696 pcmpeqlr(rn(reg), rn(reg));
697 psrl(rn(reg), 1);
698 andpsr(r0, rn(reg));
699 jit_unget_reg(reg);
700 }
701 else {
702 pcmpeqlr(r0, r0);
703 psrl(r0, 1);
704 andpsr(r0, r1);
705 }
706 }
707
708 static void
_sse_absr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)709 _sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
710 {
711 jit_int32_t reg;
712 if (r0 == r1) {
713 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
714 pcmpeqlr(rn(reg), rn(reg));
715 psrq(rn(reg), 1);
716 andpdr(r0, rn(reg));
717 jit_unget_reg(reg);
718 }
719 else {
720 pcmpeqlr(r0, r0);
721 psrq(r0, 1);
722 andpdr(r0, r1);
723 }
724 }
725
726 static void
_sse_negr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)727 _sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
728 {
729 jit_int32_t freg, ireg;
730 ireg = jit_get_reg(jit_class_gpr);
731 imovi(rn(ireg), 0x80000000);
732 if (r0 == r1) {
733 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
734 movdlxr(rn(freg), rn(ireg));
735 xorpsr(r0, rn(freg));
736 jit_unget_reg(freg);
737 }
738 else {
739 movdlxr(r0, rn(ireg));
740 xorpsr(r0, r1);
741 }
742 jit_unget_reg(ireg);
743 }
744
745 static void
_sse_negr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)746 _sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
747 {
748 jit_int32_t freg, ireg;
749 ireg = jit_get_reg(jit_class_gpr);
750 imovi(rn(ireg), 0x80000000);
751 if (r0 == r1) {
752 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
753 movdlxr(rn(freg), rn(ireg));
754 pslq(rn(freg), 32);
755 xorpdr(r0, rn(freg));
756 jit_unget_reg(freg);
757 }
758 else {
759 movdlxr(r0, rn(ireg));
760 pslq(r0, 32);
761 xorpdr(r0, r1);
762 }
763 jit_unget_reg(ireg);
764 }
765
766 static void
_ssecmp(jit_state_t * _jit,jit_bool_t d,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)767 _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
768 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
769 {
770 jit_bool_t rc;
771 jit_int32_t reg;
772 if ((rc = reg8_p(r0)))
773 reg = r0;
774 else {
775 reg = _RAX_REGNO;
776 movr(r0, reg);
777 }
778 ixorr(reg, reg);
779 if (d)
780 ucomisdr(r2, r1);
781 else
782 ucomissr(r2, r1);
783 cc(code, reg);
784 if (!rc)
785 xchgr(r0, reg);
786 }
787
788 static void
_sse_movr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)789 _sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
790 {
791 if (r0 != r1)
792 ssexr(0xf3, X86_SSE_MOV, r0, r1);
793 }
794
795 static void
_sse_movi_f(jit_state_t * _jit,jit_int32_t r0,jit_float32_t * i0)796 _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
797 {
798 union {
799 jit_int32_t i;
800 jit_float32_t f;
801 } data;
802 jit_int32_t reg;
803 jit_bool_t ldi;
804
805 data.f = *i0;
806 if (data.f == 0.0 && !(data.i & 0x80000000))
807 xorpsr(r0, r0);
808 else {
809 ldi = !_jitc->no_data;
810 #if __X64
811 /* if will allocate a register for offset, just use immediate */
812 if (ldi && !sse_address_p(i0))
813 ldi = 0;
814 #endif
815 if (ldi)
816 sse_ldi_f(r0, (jit_word_t)i0);
817 else {
818 reg = jit_get_reg(jit_class_gpr);
819 movi(rn(reg), data.i);
820 movdlxr(r0, rn(reg));
821 jit_unget_reg(reg);
822 }
823 }
824 }
825
826 fopi(lt)
fopi(le)827 fopi(le)
828
829 static void
830 _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
831 {
832 jit_bool_t rc;
833 jit_int32_t reg;
834 jit_word_t jp_code;
835 if ((rc = reg8_p(r0)))
836 reg = r0;
837 else {
838 reg = _RAX_REGNO;
839 movr(r0, _RAX_REGNO);
840 }
841 ixorr(reg, reg);
842 ucomissr(r2, r1);
843 jpes(0);
844 jp_code = _jit->pc.w;
845 cc(X86_CC_E, reg);
846 patch_rel_char(jp_code, _jit->pc.w);
847 if (!rc)
848 xchgr(r0, reg);
849 }
850
851 fopi(eq)
fopi(ge)852 fopi(ge)
853 fopi(gt)
854
855 static void
856 _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
857 {
858 jit_bool_t rc;
859 jit_int32_t reg;
860 jit_word_t jp_code;
861 if ((rc = reg8_p(r0)))
862 reg = r0;
863 else {
864 reg = _RAX_REGNO;
865 movr(r0, _RAX_REGNO);
866 }
867 imovi(reg, 1);
868 ucomissr(r2, r1);
869 jpes(0);
870 jp_code = _jit->pc.w;
871 cc(X86_CC_NE, reg);
872 patch_rel_char(jp_code, _jit->pc.w);
873 if (!rc)
874 xchgr(r0, reg);
875 }
876
877 fopi(ne)
fopi(unlt)878 fopi(unlt)
879
880 static void
881 _sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
882 {
883 if (r1 == r2)
884 movi(r0, 1);
885 else
886 ssecmpf(X86_CC_NA, r0, r2, r1);
887 }
888
fopi(unle)889 fopi(unle)
890
891 static void
892 _sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
893 {
894 if (r1 == r2)
895 movi(r0, 1);
896 else
897 ssecmpf(X86_CC_E, r0, r1, r2);
898 }
899
fopi(uneq)900 fopi(uneq)
901
902 static void
903 _sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
904 {
905 if (r1 == r2)
906 movi(r0, 1);
907 else
908 ssecmpf(X86_CC_NA, r0, r1, r2);
909 }
910
911 fopi(unge)
fopi(ungt)912 fopi(ungt)
913
914 static void
915 _sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
916 {
917 if (r1 == r2)
918 ixorr(r0, r0);
919 else
920 ssecmpf(X86_CC_NE, r0, r1, r2);
921 }
922
923 fopi(ltgt)
fopi(ord)924 fopi(ord)
925 fopi(unord)
926
927 static void
928 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
929 {
930 jit_int32_t reg;
931 if (sse_address_p(i0))
932 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
933 else {
934 reg = jit_get_reg(jit_class_gpr);
935 movi(rn(reg), i0);
936 sse_ldr_f(r0, rn(reg));
937 jit_unget_reg(reg);
938 }
939 }
940
941 static void
_sse_ldxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)942 _sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
943 {
944 #if __X64_32
945 jit_int32_t reg;
946 reg = jit_get_reg(jit_class_gpr);
947 addr(rn(reg), r1, r2);
948 sse_ldr_f(r0, rn(reg));
949 jit_unget_reg(reg);
950 #else
951 movssmr(0, r1, r2, _SCL1, r0);
952 #endif
953 }
954
955 static void
_sse_ldxi_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)956 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
957 {
958 jit_int32_t reg;
959 if (can_sign_extend_int_p(i0))
960 movssmr(i0, r1, _NOREG, _SCL1, r0);
961 else {
962 reg = jit_get_reg(jit_class_gpr);
963 #if __X64_32
964 addi(rn(reg), r1, i0);
965 sse_ldr_f(r0, rn(reg));
966 #else
967 movi(rn(reg), i0);
968 sse_ldxr_f(r0, r1, rn(reg));
969 #endif
970 jit_unget_reg(reg);
971 }
972 }
973
974 static void
_sse_sti_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)975 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
976 {
977 jit_int32_t reg;
978 if (sse_address_p(i0))
979 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
980 else {
981 reg = jit_get_reg(jit_class_gpr);
982 movi(rn(reg), i0);
983 sse_str_f(rn(reg), r0);
984 jit_unget_reg(reg);
985 }
986 }
987
988 static void
_sse_stxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)989 _sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
990 {
991 #if __X64_32
992 jit_int32_t reg;
993 reg = jit_get_reg(jit_class_gpr);
994 addr(rn(reg), r0, r1);
995 sse_str_f(rn(reg), r2);
996 jit_unget_reg(reg);
997 #else
998 movssrm(r2, 0, r0, r1, _SCL1);
999 #endif
1000 }
1001
1002 static void
_sse_stxi_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1003 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1004 {
1005 jit_int32_t reg;
1006 if (can_sign_extend_int_p(i0))
1007 movssrm(r1, i0, r0, _NOREG, _SCL1);
1008 else {
1009 reg = jit_get_reg(jit_class_gpr);
1010 #if __X64_32
1011 addi(rn(reg), r0, i0);
1012 sse_str_f(rn(reg), r1);
1013 #else
1014 movi(rn(reg), i0);
1015 sse_stxr_f(rn(reg), r0, r1);
1016 #endif
1017 jit_unget_reg(reg);
1018 }
1019 }
1020
1021 static jit_word_t
_sse_bltr_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1022 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1023 {
1024 ucomissr(r1, r0);
1025 ja(i0);
1026 return (_jit->pc.w);
1027 }
fbopi(lt)1028 fbopi(lt)
1029
1030 static jit_word_t
1031 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1032 {
1033 ucomissr(r1, r0);
1034 jae(i0);
1035 return (_jit->pc.w);
1036 }
fbopi(le)1037 fbopi(le)
1038
1039 static jit_word_t
1040 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1041 {
1042 jit_word_t jp_code;
1043 ucomissr(r0, r1);
1044 jps(0);
1045 jp_code = _jit->pc.w;
1046 je(i0);
1047 patch_rel_char(jp_code, _jit->pc.w);
1048 return (_jit->pc.w);
1049 }
fbopi(eq)1050 fbopi(eq)
1051
1052 static jit_word_t
1053 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1054 {
1055 ucomissr(r0, r1);
1056 jae(i0);
1057 return (_jit->pc.w);
1058 }
fbopi(ge)1059 fbopi(ge)
1060
1061 static jit_word_t
1062 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1063 {
1064 ucomissr(r0, r1);
1065 ja(i0);
1066 return (_jit->pc.w);
1067 }
fbopi(gt)1068 fbopi(gt)
1069
1070 static jit_word_t
1071 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1072 {
1073 jit_word_t jp_code;
1074 jit_word_t jz_code;
1075 ucomissr(r0, r1);
1076 jps(0);
1077 jp_code = _jit->pc.w;
1078 jzs(0);
1079 jz_code = _jit->pc.w;
1080 patch_rel_char(jp_code, _jit->pc.w);
1081 jmpi(i0);
1082 patch_rel_char(jz_code, _jit->pc.w);
1083 return (_jit->pc.w);
1084 }
fbopi(ne)1085 fbopi(ne)
1086
1087 static jit_word_t
1088 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1089 {
1090 ucomissr(r0, r1);
1091 jnae(i0);
1092 return (_jit->pc.w);
1093 }
fbopi(unlt)1094 fbopi(unlt)
1095
1096 static jit_word_t
1097 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1098 {
1099 if (r0 == r1)
1100 jmpi(i0);
1101 else {
1102 ucomissr(r0, r1);
1103 jna(i0);
1104 }
1105 return (_jit->pc.w);
1106 }
fbopi(unle)1107 fbopi(unle)
1108
1109 static jit_word_t
1110 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1111 {
1112 if (r0 == r1)
1113 jmpi(i0);
1114 else {
1115 ucomissr(r0, r1);
1116 je(i0);
1117 }
1118 return (_jit->pc.w);
1119 }
fbopi(uneq)1120 fbopi(uneq)
1121
1122 static jit_word_t
1123 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1124 {
1125 if (r0 == r1)
1126 jmpi(i0);
1127 else {
1128 ucomissr(r1, r0);
1129 jna(i0);
1130 }
1131 return (_jit->pc.w);
1132 }
fbopi(unge)1133 fbopi(unge)
1134
1135 static jit_word_t
1136 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1137 {
1138 ucomissr(r1, r0);
1139 jnae(i0);
1140 return (_jit->pc.w);
1141 }
fbopi(ungt)1142 fbopi(ungt)
1143
1144 static jit_word_t
1145 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1146 {
1147 ucomissr(r0, r1);
1148 jne(i0);
1149 return (_jit->pc.w);
1150 }
fbopi(ltgt)1151 fbopi(ltgt)
1152
1153 static jit_word_t
1154 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1155 {
1156 ucomissr(r0, r1);
1157 jnp(i0);
1158 return (_jit->pc.w);
1159 }
fbopi(ord)1160 fbopi(ord)
1161
1162 static jit_word_t
1163 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1164 {
1165 ucomissr(r0, r1);
1166 jp(i0);
1167 return (_jit->pc.w);
1168 }
1169 fbopi(unord)
1170
dopi(lt)1171 dopi(lt)
1172 dopi(le)
1173
1174 static void
1175 _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1176 {
1177 jit_bool_t rc;
1178 jit_int32_t reg;
1179 jit_word_t jp_code;
1180 if ((rc = reg8_p(r0)))
1181 reg = r0;
1182 else {
1183 reg = _RAX_REGNO;
1184 movr(r0, _RAX_REGNO);
1185 }
1186 ixorr(reg, reg);
1187 ucomisdr(r2, r1);
1188 jpes(0);
1189 jp_code = _jit->pc.w;
1190 cc(X86_CC_E, reg);
1191 patch_rel_char(jp_code, _jit->pc.w);
1192 if (!rc)
1193 xchgr(r0, reg);
1194 }
1195
1196 dopi(eq)
dopi(ge)1197 dopi(ge)
1198 dopi(gt)
1199
1200 static void
1201 _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1202 {
1203 jit_bool_t rc;
1204 jit_int32_t reg;
1205 jit_word_t jp_code;
1206 if ((rc = reg8_p(r0)))
1207 reg = r0;
1208 else {
1209 reg = _RAX_REGNO;
1210 movr(r0, _RAX_REGNO);
1211 }
1212 imovi(reg, 1);
1213 ucomisdr(r2, r1);
1214 jpes(0);
1215 jp_code = _jit->pc.w;
1216 cc(X86_CC_NE, reg);
1217 patch_rel_char(jp_code, _jit->pc.w);
1218 if (!rc)
1219 xchgr(r0, reg);
1220 }
1221
1222 dopi(ne)
dopi(unlt)1223 dopi(unlt)
1224
1225 static void
1226 _sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1227 {
1228 if (r1 == r2)
1229 movi(r0, 1);
1230 else
1231 ssecmpd(X86_CC_NA, r0, r2, r1);
1232 }
1233
dopi(unle)1234 dopi(unle)
1235
1236 static void
1237 _sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1238 {
1239 if (r1 == r2)
1240 movi(r0, 1);
1241 else
1242 ssecmpd(X86_CC_E, r0, r1, r2);
1243 }
1244
dopi(uneq)1245 dopi(uneq)
1246
1247 static void
1248 _sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1249 {
1250 if (r1 == r2)
1251 movi(r0, 1);
1252 else
1253 ssecmpd(X86_CC_NA, r0, r1, r2);
1254 }
1255
1256 dopi(unge)
dopi(ungt)1257 dopi(ungt)
1258
1259 static void
1260 _sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1261 {
1262 if (r1 == r2)
1263 ixorr(r0, r0);
1264 else
1265 ssecmpd(X86_CC_NE, r0, r1, r2);
1266 }
1267
1268 dopi(ltgt)
dopi(ord)1269 dopi(ord)
1270 dopi(unord)
1271
1272 static void
1273 _sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1274 {
1275 if (r0 != r1)
1276 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1277 }
1278
1279 static void
_sse_movi_d(jit_state_t * _jit,jit_int32_t r0,jit_float64_t * i0)1280 _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1281 {
1282 union {
1283 jit_int32_t ii[2];
1284 jit_word_t w;
1285 jit_float64_t d;
1286 } data;
1287 jit_int32_t reg;
1288 jit_bool_t ldi;
1289
1290 data.d = *i0;
1291 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1292 xorpdr(r0, r0);
1293 else {
1294 ldi = !_jitc->no_data;
1295 #if __X64
1296 /* if will allocate a register for offset, just use immediate */
1297 if (ldi && !sse_address_p(i0))
1298 ldi = 0;
1299 #endif
1300 if (ldi)
1301 sse_ldi_d(r0, (jit_word_t)i0);
1302 else {
1303 reg = jit_get_reg(jit_class_gpr);
1304 #if __X64 && !__X64_32
1305 movi(rn(reg), data.w);
1306 movdqxr(r0, rn(reg));
1307 jit_unget_reg(reg);
1308 #else
1309 movi(rn(reg), data.ii[0]);
1310 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1311 movi(rn(reg), data.ii[1]);
1312 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1313 jit_unget_reg(reg);
1314 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1315 #endif
1316 }
1317 }
1318 }
1319
1320 static void
_sse_ldi_d(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1321 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1322 {
1323 jit_int32_t reg;
1324 if (sse_address_p(i0))
1325 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1326 else {
1327 reg = jit_get_reg(jit_class_gpr);
1328 movi(rn(reg), i0);
1329 sse_ldr_d(r0, rn(reg));
1330 jit_unget_reg(reg);
1331 }
1332 }
1333
1334 static void
_sse_ldxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1335 _sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1336 {
1337 #if __X64_32
1338 jit_int32_t reg;
1339 reg = jit_get_reg(jit_class_gpr);
1340 addr(rn(reg), r1, r2);
1341 sse_ldr_d(r0, rn(reg));
1342 jit_unget_reg(reg);
1343 #else
1344 movsdmr(0, r1, r2, _SCL1, r0);
1345 #endif
1346 }
1347
1348 static void
_sse_ldxi_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1349 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1350 {
1351 jit_int32_t reg;
1352 if (can_sign_extend_int_p(i0))
1353 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1354 else {
1355 reg = jit_get_reg(jit_class_gpr);
1356 #if __X64_32
1357 addi(rn(reg), r1, i0);
1358 sse_ldr_d(r0, rn(reg));
1359 #else
1360 movi(rn(reg), i0);
1361 sse_ldxr_d(r0, r1, rn(reg));
1362 #endif
1363 jit_unget_reg(reg);
1364 }
1365 }
1366
1367 static void
_sse_sti_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1368 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1369 {
1370 jit_int32_t reg;
1371 if (sse_address_p(i0))
1372 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1373 else {
1374 reg = jit_get_reg(jit_class_gpr);
1375 movi(rn(reg), i0);
1376 sse_str_d(rn(reg), r0);
1377 jit_unget_reg(reg);
1378 }
1379 }
1380
1381 static void
_sse_stxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1382 _sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1383 {
1384 #if __X64_32
1385 jit_int32_t reg;
1386 reg = jit_get_reg(jit_class_gpr);
1387 addr(rn(reg), r0, r1);
1388 sse_str_d(rn(reg), r2);
1389 jit_unget_reg(reg);
1390 #else
1391 movsdrm(r2, 0, r0, r1, _SCL1);
1392 #endif
1393 }
1394
1395 static void
_sse_stxi_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1396 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1397 {
1398 jit_int32_t reg;
1399 if (can_sign_extend_int_p(i0))
1400 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1401 else {
1402 reg = jit_get_reg(jit_class_gpr);
1403 #if __X64_32
1404 addi(rn(reg), r0, i0);
1405 sse_str_d(rn(reg), r1);
1406 #else
1407 movi(rn(reg), i0);
1408 sse_stxr_f(rn(reg), r0, r1);
1409 #endif
1410 jit_unget_reg(reg);
1411 }
1412 }
1413
1414 static jit_word_t
_sse_bltr_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1415 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1416 {
1417 ucomisdr(r1, r0);
1418 ja(i0);
1419 return (_jit->pc.w);
1420 }
dbopi(lt)1421 dbopi(lt)
1422
1423 static jit_word_t
1424 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1425 {
1426 ucomisdr(r1, r0);
1427 jae(i0);
1428 return (_jit->pc.w);
1429 }
dbopi(le)1430 dbopi(le)
1431
1432 static jit_word_t
1433 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1434 {
1435 jit_word_t jp_code;
1436 ucomisdr(r0, r1);
1437 jps(0);
1438 jp_code = _jit->pc.w;
1439 je(i0);
1440 patch_rel_char(jp_code, _jit->pc.w);
1441 return (_jit->pc.w);
1442 }
dbopi(eq)1443 dbopi(eq)
1444
1445 static jit_word_t
1446 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1447 {
1448 ucomisdr(r0, r1);
1449 jae(i0);
1450 return (_jit->pc.w);
1451 }
dbopi(ge)1452 dbopi(ge)
1453
1454 static jit_word_t
1455 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1456 {
1457 ucomisdr(r0, r1);
1458 ja(i0);
1459 return (_jit->pc.w);
1460 }
dbopi(gt)1461 dbopi(gt)
1462
1463 static jit_word_t
1464 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1465 {
1466 jit_word_t jp_code;
1467 jit_word_t jz_code;
1468 ucomisdr(r0, r1);
1469 jps(0);
1470 jp_code = _jit->pc.w;
1471 jzs(0);
1472 jz_code = _jit->pc.w;
1473 patch_rel_char(jp_code, _jit->pc.w);
1474 jmpi(i0);
1475 patch_rel_char(jz_code, _jit->pc.w);
1476 return (_jit->pc.w);
1477 }
dbopi(ne)1478 dbopi(ne)
1479
1480 static jit_word_t
1481 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1482 {
1483 ucomisdr(r0, r1);
1484 jnae(i0);
1485 return (_jit->pc.w);
1486 }
dbopi(unlt)1487 dbopi(unlt)
1488
1489 static jit_word_t
1490 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1491 {
1492 if (r0 == r1)
1493 jmpi(i0);
1494 else {
1495 ucomisdr(r0, r1);
1496 jna(i0);
1497 }
1498 return (_jit->pc.w);
1499 }
dbopi(unle)1500 dbopi(unle)
1501
1502 static jit_word_t
1503 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1504 {
1505 if (r0 == r1)
1506 jmpi(i0);
1507 else {
1508 ucomisdr(r0, r1);
1509 je(i0);
1510 }
1511 return (_jit->pc.w);
1512 }
dbopi(uneq)1513 dbopi(uneq)
1514
1515 static jit_word_t
1516 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1517 {
1518 if (r0 == r1)
1519 jmpi(i0);
1520 else {
1521 ucomisdr(r1, r0);
1522 jna(i0);
1523 }
1524 return (_jit->pc.w);
1525 }
dbopi(unge)1526 dbopi(unge)
1527
1528 static jit_word_t
1529 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1530 {
1531 ucomisdr(r1, r0);
1532 jnae(i0);
1533 return (_jit->pc.w);
1534 }
dbopi(ungt)1535 dbopi(ungt)
1536
1537 static jit_word_t
1538 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1539 {
1540 ucomisdr(r0, r1);
1541 jne(i0);
1542 return (_jit->pc.w);
1543 }
dbopi(ltgt)1544 dbopi(ltgt)
1545
1546 static jit_word_t
1547 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1548 {
1549 ucomisdr(r0, r1);
1550 jnp(i0);
1551 return (_jit->pc.w);
1552 }
dbopi(ord)1553 dbopi(ord)
1554
1555 static jit_word_t
1556 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1557 {
1558 ucomisdr(r0, r1);
1559 jp(i0);
1560 return (_jit->pc.w);
1561 }
1562 dbopi(unord)
1563 # undef fopi
1564 # undef fbopi
1565 # undef bopi
1566 # undef dbopi
1567 # undef fpr_bopi
1568 # undef fpr_opi
1569 #endif
1570