1 /*
2  * Copyright (C) 2012-2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *	Paulo Cesar Pereira de Andrade
18  */
19 
20 #if PROTO
21 #  if __X32
22 #    define x87_address_p(i0)		1
23 #  else
24 #    if __X64_32
25 #      define x87_address_p(i0)		((jit_word_t)(i0) >= 0)
26 #    else
27 #      define x87_address_p(i0)		can_sign_extend_int_p(i0)
28 #    endif
29 #  endif
30 #  define _ST0_REGNO			0
31 #  define _ST1_REGNO			1
32 #  define _ST2_REGNO			2
33 #  define _ST3_REGNO			3
34 #  define _ST4_REGNO			4
35 #  define _ST5_REGNO			5
36 #  define _ST6_REGNO			6
37 #  define x87rx(code, md, rb, ri, ms)	_x87rx(_jit, code, md, rb, ri, ms)
38 #  define fldcwm(md, rb, ri, ms)	x87rx(015, md, rb, ri, ms)
39 #  define fstcwm(md, rb, ri, ms)	_fstcwm(_jit, md, rb, ri, ms)
40 static void
41 _fstcwm(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
42 #  define fldsm(md, rb, ri, ms)		x87rx(010, md, rb, ri, ms)
43 #  define fstsm(md, rb, ri, ms)		x87rx(012, md, rb, ri, ms)
44 #  define fldlm(md, rb, ri, ms)		x87rx(050, md, rb, ri, ms)
45 #  define fstlm(md, rb, ri, ms)		x87rx(052, md, rb, ri, ms)
46 #  define fisttplm(md, rb, ri, ms)	x87rx(031, md, rb, ri, ms)
47 #  define fistlm(md, rb, ri, ms)	x87rx(032, md, rb, ri, ms)
48 #  define fisttpqm(md, rb, ri, ms)	x87rx(071, md, rb, ri, ms)
49 #  define fildlm(md, rb, ri, ms)	x87rx(030, md, rb,ri, ms)
50 #  define fildqm(md, rb, ri, ms)	x87rx(075, md, rb,ri, ms)
51 static void
52 _x87rx(jit_state_t*, jit_int32_t, jit_int32_t,
53        jit_int32_t, jit_int32_t, jit_int32_t);
54 #  define x87ri(cc,r0)			_x87ri(_jit,cc,r0)
55 #  define fchs_()			x87ri(014, 0)
56 #  define fabs_()			x87ri(014, 1)
57 #  define fld1()			x87ri(015, 0)
58 #  define fldl2t()			x87ri(015, 1)
59 #  define fldl2e()			x87ri(015, 2)
60 #  define fldpi()			x87ri(015, 3)
61 #  define fldlg2()			x87ri(015, 4)
62 #  define fldln2()			x87ri(015, 5)
63 #  define fldz()			x87ri(015, 6)
64 #  define fsqrt_()			x87ri(017, 2)
65 #  define fldr(r0)			x87ri(010, r0)
66 #  define fxchr(r0)			x87ri(011, r0)
67 #  define fstr(r0)			x87ri(052, r0)
68 #  define fstpr(r0)			x87ri(053, r0)
69 #  define fucomir(r0)			x87ri(035, r0)
70 #  define fucomipr(r0)			x87ri(075, r0)
71 static void _x87ri(jit_state_t*, jit_int32_t, jit_int32_t);
72 #  define faddr(r0, r1)			x87rri(000, r0, r1)
73 #  define fmulr(r0, r1)			x87rri(001, r0, r1)
74 #  define fsubr(r0, r1)			x87rri(004, r0, r1)
75 #  define fsubrr(r0, r1)		x87rri(005, r0, r1)
76 #  define fdivr(r0, r1)			x87rri(006, r0, r1)
77 #  define fdivrr(r0, r1)		x87rri(007, r0, r1)
78 #  define x87rri(cc, r0, r1)		_x87rri(_jit, cc, r0, r1)
79 static void _x87rri(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
80 #  define x87_addr_f(r0, r1, r2)	_x87_addr_d(_jit, r0, r1, r2)
81 #  define x87_addi_f(r0, r1, i0)	_x87_addi_f(_jit, r0, r1, i0)
82 static void _x87_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
83 #  define x87_addr_d(r0, r1, r2)	_x87_addr_d(_jit, r0, r1, r2)
84 static void _x87_addr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
85 #  define x87_addi_d(r0, r1, i0)	_x87_addi_d(_jit, r0, r1, i0)
86 static void _x87_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
87 #  define x87_subr_f(r0, r1, r2)	_x87_subr_d(_jit, r0, r1, r2)
88 #  define x87_subi_f(r0, r1, i0)	_x87_subi_f(_jit, r0, r1, i0)
89 static void _x87_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
90 #  define x87_subr_d(r0, r1, r2)	_x87_subr_d(_jit, r0, r1, r2)
91 static void _x87_subr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
92 #  define x87_subi_d(r0, r1, i0)	_x87_subi_d(_jit, r0, r1, i0)
93 static void _x87_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
94 #  define x87_rsbr_f(r0, r1, r2)	x87_subr_f(r0, r2, r1)
95 #  define x87_rsbi_f(r0, r1, i0)	_x87_rsbi_f(_jit, r0, r1, i0)
96 static void _x87_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
97 #  define x87_rsbr_d(r0, r1, r2)	x87_subr_d(r0, r2, r1)
98 #  define x87_rsbi_d(r0, r1, i0)	_x87_rsbi_d(_jit, r0, r1, i0)
99 static void _x87_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
100 #  define x87_mulr_f(r0, r1, r2)	_x87_mulr_d(_jit, r0, r1, r2)
101 #  define x87_muli_f(r0, r1, i0)	_x87_muli_f(_jit, r0, r1, i0)
102 static void _x87_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
103 #  define x87_mulr_d(r0, r1, r2)	_x87_mulr_d(_jit, r0, r1, r2)
104 static void _x87_mulr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
105 #  define x87_muli_d(r0, r1, i0)	_x87_muli_d(_jit, r0, r1, i0)
106 static void _x87_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
107 #  define x87_divr_f(r0, r1, r2)	_x87_divr_d(_jit, r0, r1, r2)
108 #  define x87_divi_f(r0, r1, i0)	_x87_divi_f(_jit, r0, r1, i0)
109 static void _x87_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
110 #  define x87_divr_d(r0, r1, r2)	_x87_divr_d(_jit, r0, r1, r2)
111 static void _x87_divr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
112 #  define x87_divi_d(r0, r1, i0)	_x87_divi_d(_jit, r0, r1, i0)
113 static void _x87_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
114 #  define x87_absr_f(r0, r1)		_x87_absr_d(_jit, r0, r1)
115 #  define x87_absr_d(r0, r1)		_x87_absr_d(_jit, r0, r1)
116 static void _x87_absr_d(jit_state_t*, jit_int32_t, jit_int32_t);
117 #  define x87_negr_f(r0, r1)		_x87_negr_d(_jit, r0, r1)
118 #  define x87_negr_d(r0, r1)		_x87_negr_d(_jit, r0, r1)
119 static void _x87_negr_d(jit_state_t*, jit_int32_t, jit_int32_t);
120 #  define x87_sqrtr_f(r0, r1)		_x87_sqrtr_d(_jit, r0, r1)
121 #  define x87_sqrtr_d(r0, r1)		_x87_sqrtr_d(_jit, r0, r1)
122 static void _x87_sqrtr_d(jit_state_t*, jit_int32_t, jit_int32_t);
123 #  define x87_truncr_f_i(r0, r1)	_x87_truncr_d_i(_jit, r0, r1)
124 #  define x87_truncr_d_i(r0, r1)	_x87_truncr_d_i(_jit, r0, r1)
125 static void _x87_truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t);
126 #  if __X64
127 #    define x87_truncr_f_l(r0, r1)	_x87_truncr_d_l(_jit, r0, r1)
128 #    define x87_truncr_d_l(r0, r1)	_x87_truncr_d_l(_jit, r0, r1)
129 static void _x87_truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t);
130 #  endif
131 #  define x87_extr_f(r0, r1)		_x87_extr_d(_jit, r0, r1)
132 #  define x87_extr_d(r0, r1)		_x87_extr_d(_jit, r0, r1)
133 #  define x87_extr_f_d(r0, r1)		x87_movr_d(r0, r1)
134 #  define x87_extr_d_f(r0, r1)		x87_movr_d(r0, r1)
135 static void _x87_extr_d(jit_state_t*, jit_int32_t, jit_int32_t);
136 #  define x87cmp(code, r0, r1, r2)	_x87cmp(_jit, code, r0, r1, r2)
137 static void
138 _x87cmp(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
139 #  define x87cmp2(code, r0, r1, r2)	_x87cmp2(_jit, code, r0, r1, r2)
140 static void
141 _x87cmp2(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
142 #  define x87jcc(code, i0, r0, r1)	_x87jcc(_jit, code, i0, r0, r1)
143 static jit_word_t
144 _x87jcc(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t, jit_int32_t);
145 #  define x87jcc2(code, i0, r0, r1)	_x87jcc2(_jit, code, i0, r0, r1)
146 static jit_word_t
147 _x87jcc2(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t, jit_int32_t);
148 #define x87_movi_f(r0,i0)		_x87_movi_f(_jit,r0,i0)
149 static void _x87_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
150 #  define x87_ldr_f(r0, r1)		_x87_ldr_f(_jit, r0, r1)
151 static void _x87_ldr_f(jit_state_t*, jit_int32_t, jit_int32_t);
152 #  define x87_ldi_f(r0, i0)		_x87_ldi_f(_jit, r0, i0)
153 static void _x87_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
154 #  define x87_ldxr_f(r0, r1, r2)	_x87_ldxr_f(_jit, r0, r1, r2)
155 static void _x87_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
156 #  define x87_ldxi_f(r0, r1, i0)	_x87_ldxi_f(_jit, r0, r1, i0)
157 static void _x87_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
158 #  define x87_str_f(r0, r1)		_x87_str_f(_jit, r0, r1)
159 static void _x87_str_f(jit_state_t*,jit_int32_t,jit_int32_t);
160 #  define x87_sti_f(i0, r0)		_x87_sti_f(_jit, i0, r0)
161 static void _x87_sti_f(jit_state_t*,jit_word_t, jit_int32_t);
162 #  define x87_stxr_f(r0, r1, r2)	_x87_stxr_f(_jit, r0, r1, r2)
163 static void _x87_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
164 #  define x87_stxi_f(i0, r0, r1)	_x87_stxi_f(_jit, i0, r0, r1)
165 static void _x87_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
166 #  define x87_ltr_f(r0, r1, r2)		x87cmp(X86_CC_A, r0, r2, r1)
167 #  define x87_lti_f(r0, r1, i0)		_x87_lti_f(_jit, r0, r1, i0)
168 static void _x87_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
169 #  define x87_ler_f(r0, r1, r2)		x87cmp(X86_CC_AE, r0, r2, r1)
170 #  define x87_lei_f(r0, r1, i0)		_x87_lei_f(_jit, r0, r1, i0)
171 static void _x87_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
172 #  define x87_eqr_f(r0, r1, r2)		x87_eqr_d(r0, r2, r1)
173 #  define x87_eqi_f(r0, r1, i0)		_x87_eqi_f(_jit, r0, r1, i0)
174 static void _x87_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
175 #  define x87_ger_f(r0, r1, r2)		x87cmp(X86_CC_AE, r0, r1, r2)
176 #  define x87_gei_f(r0, r1, i0)		_x87_gei_f(_jit, r0, r1, i0)
177 static void _x87_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
178 #  define x87_gtr_f(r0, r1, r2)		x87cmp(X86_CC_A, r0, r1, r2)
179 #  define x87_gti_f(r0, r1, i0)		_x87_gti_f(_jit, r0, r1, i0)
180 static void _x87_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
181 #  define x87_ner_f(r0, r1, r2)		x87_ner_d(r0, r2, r1)
182 #  define x87_nei_f(r0, r1, i0)		_x87_nei_f(_jit, r0, r1, i0)
183 static void _x87_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
184 #  define x87_unltr_f(r0, r1, r2)	x87cmp(X86_CC_NAE, r0, r1, r2)
185 #  define x87_unlti_f(r0, r1, i0)	_x87_unlti_f(_jit, r0, r1, i0)
186 static void _x87_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
187 #  define x87_unler_f(r0, r1, r2)	x87cmp(X86_CC_NA, r0, r1, r2)
188 #  define x87_unlei_f(r0, r1, i0)	_x87_unlei_f(_jit, r0, r1, i0)
189 static void _x87_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
190 #  define x87_uneqr_f(r0, r1, r2)	x87cmp2(X86_CC_E, r0, r1, r2)
191 #  define x87_uneqi_f(r0, r1, i0)	_x87_uneqi_f(_jit, r0, r1, i0)
192 static void _x87_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
193 #  define x87_unger_f(r0, r1, r2)	x87cmp(X86_CC_NA, r0, r2, r1)
194 #  define x87_ungei_f(r0, r1, i0)	_x87_ungei_f(_jit, r0, r1, i0)
195 static void _x87_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
196 #  define x87_ungtr_f(r0, r1, r2)	x87cmp(X86_CC_NAE, r0, r2, r1)
197 #  define x87_ungti_f(r0, r1, i0)	_x87_ungti_f(_jit, r0, r1, i0)
198 static void _x87_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
199 #  define x87_ltgtr_f(r0, r1, r2)	x87_ltgtr_d(r0, r1, r2)
200 #  define x87_ltgti_f(r0, r1, i0)	_x87_ltgti_f(_jit, r0, r1, i0)
201 static void _x87_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
202 #  define x87_ordr_f(r0, r1, r2)	x87cmp2(X86_CC_NP, r0, r2, r1)
203 #  define x87_ordi_f(r0, r1, i0)	_x87_ordi_f(_jit, r0, r1, i0)
204 static void _x87_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
205 #  define x87_unordr_f(r0, r1, r2)	x87cmp2(X86_CC_P, r0, r2, r1)
206 #  define x87_unordi_f(r0, r1, i0)	_x87_unordi_f(_jit, r0, r1, i0)
207 static void _x87_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208 #  define x87_ltr_d(r0, r1, r2)		x87cmp(X86_CC_A, r0, r2, r1)
209 #  define x87_lti_d(r0, r1, i0)		_x87_lti_d(_jit, r0, r1, i0)
210 static void _x87_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
211 #  define x87_ler_d(r0, r1, r2)		x87cmp(X86_CC_AE, r0, r2, r1)
212 #  define x87_lei_d(r0, r1, i0)		_x87_lei_d(_jit, r0, r1, i0)
213 static void _x87_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
214 #  define x87_eqr_d(r0, r1, r2)		_x87_eqr_d(_jit, r0, r2, r1)
215 static void _x87_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
216 #  define x87_eqi_d(r0, r1, i0)		_x87_eqi_d(_jit, r0, r1, i0)
217 static void _x87_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
218 #  define x87_ger_d(r0, r1, r2)		x87cmp(X86_CC_AE, r0, r1, r2)
219 #  define x87_gei_d(r0, r1, i0)		_x87_gei_d(_jit, r0, r1, i0)
220 static void _x87_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
221 #  define x87_gtr_d(r0, r1, r2)		x87cmp(X86_CC_A, r0, r1, r2)
222 #  define x87_gti_d(r0, r1, i0)		_x87_gti_d(_jit, r0, r1, i0)
223 static void _x87_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
224 #  define x87_ner_d(r0, r1, r2)		_x87_ner_d(_jit, r0, r2, r1)
225 static void _x87_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226 #  define x87_nei_d(r0, r1, i0)		_x87_nei_d(_jit, r0, r1, i0)
227 static void _x87_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
228 #  define x87_unltr_d(r0, r1, r2)	x87cmp(X86_CC_NAE, r0, r1, r2)
229 #  define x87_unlti_d(r0, r1, i0)	_x87_unlti_d(_jit, r0, r1, i0)
230 static void _x87_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
231 #  define x87_unler_d(r0, r1, r2)	x87cmp(X86_CC_NA, r0, r1, r2)
232 #  define x87_unlei_d(r0, r1, i0)	_x87_unlei_d(_jit, r0, r1, i0)
233 static void _x87_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
234 #  define x87_uneqr_d(r0, r1, r2)	x87cmp2(X86_CC_E, r0, r1, r2)
235 #  define x87_uneqi_d(r0, r1, i0)	_x87_uneqi_d(_jit, r0, r1, i0)
236 static void _x87_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
237 #  define x87_unger_d(r0, r1, r2)	x87cmp(X86_CC_NA, r0, r2, r1)
238 #  define x87_ungei_d(r0, r1, i0)	_x87_ungei_d(_jit, r0, r1, i0)
239 static void _x87_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
240 #  define x87_ungtr_d(r0, r1, r2)	x87cmp(X86_CC_NAE, r0, r2, r1)
241 #  define x87_ungti_d(r0, r1, i0)	_x87_ungti_d(_jit, r0, r1, i0)
242 static void _x87_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
243 #  define x87_ltgtr_d(r0, r1, r2)	_x87_ltgtr_d(_jit, r0, r1, r2)
244 static void _x87_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
245 #  define x87_ltgti_d(r0, r1, i0)	_x87_ltgti_d(_jit, r0, r1, i0)
246 static void _x87_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
247 #  define x87_ordr_d(r0, r1, r2)	x87cmp2(X86_CC_NP, r0, r2, r1)
248 #  define x87_ordi_d(r0, r1, i0)	_x87_ordi_d(_jit, r0, r1, i0)
249 static void _x87_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
250 #  define x87_unordr_d(r0, r1, r2)	x87cmp2(X86_CC_P, r0, r2, r1)
251 #  define x87_unordi_d(r0, r1, i0)	_x87_unordi_d(_jit, r0, r1, i0)
252 static void _x87_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
253 #define x87_movr_f(r0,r1)		_x87_movr_d(_jit,r0,r1)
254 #define x87_movr_d(r0,r1)		_x87_movr_d(_jit,r0,r1)
255 static void _x87_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
256 #define x87_movi_d(r0,i0)		_x87_movi_d(_jit,r0,i0)
257 static void _x87_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
258 #  define x87_ldr_d(r0, r1)		_x87_ldr_d(_jit, r0, r1)
259 static void _x87_ldr_d(jit_state_t*, jit_int32_t, jit_int32_t);
260 #  define x87_ldi_d(r0, i0)		_x87_ldi_d(_jit, r0, i0)
261 static void _x87_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
262 #  define x87_ldxr_d(r0, r1, r2)	_x87_ldxr_d(_jit, r0, r1, r2)
263 static void _x87_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
264 #  define x87_ldxi_d(r0, r1, i0)	_x87_ldxi_d(_jit, r0, r1, i0)
265 static void _x87_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
266 #  define x87_str_d(r0, r1)		_x87_str_d(_jit, r0, r1)
267 static void _x87_str_d(jit_state_t*,jit_int32_t,jit_int32_t);
268 #  define x87_sti_d(i0, r0)		_x87_sti_d(_jit, i0, r0)
269 static void _x87_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
270 #  define x87_stxr_d(r0, r1, r2)	_x87_stxr_d(_jit, r0, r1, r2)
271 static void _x87_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
272 #  define x87_stxi_d(i0, r0, r1)	_x87_stxi_d(_jit, i0, r0, r1)
273 static void _x87_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
274 #  define x87_bltr_f(i0, r0, r1)	x87jcc(X86_CC_A, i0, r1, r0)
275 #  define x87_blti_f(i0, r0, i1)	_x87_blti_f(_jit, i0, r0, i1)
276 static jit_word_t
277 _x87_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
278 #  define x87_bler_f(i0, r0, r1)	x87jcc(X86_CC_AE, i0, r1, r0)
279 #  define x87_blei_f(i0, r0, i1)	_x87_blei_f(_jit, i0, r0, i1)
280 static jit_word_t
281 _x87_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
282 #  define x87_beqr_f(i0, r0, r1)	_x87_beqr_d(_jit, i0, r0, r1)
283 #  define x87_beqi_f(i0, r0, i1)	_x87_beqi_f(_jit, i0, r0, i1)
284 static jit_word_t
285 _x87_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286 #  define x87_bger_f(i0, r0, r1)	x87jcc(X86_CC_AE, i0, r0, r1)
287 #  define x87_bgei_f(i0, r0, i1)	_x87_bgei_f(_jit, i0, r0, i1)
288 static jit_word_t
289 _x87_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
290 #  define x87_bgtr_f(i0, r0, r1)	x87jcc(X86_CC_A, i0, r0, r1)
291 #  define x87_bgti_f(i0, r0, i1)	_x87_bgti_f(_jit, i0, r0, i1)
292 static jit_word_t
293 _x87_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
294 #  define x87_bner_f(i0, r0, r1)	_x87_bner_d(_jit, i0, r0, r1)
295 #  define x87_bnei_f(i0, r0, i1)	_x87_bnei_f(_jit, i0, r0, i1)
296 static jit_word_t
297 _x87_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
298 #  define x87_bunltr_f(i0, r0, r1)	x87jcc(X86_CC_NAE, i0, r0, r1)
299 #  define x87_bunlti_f(i0, r0, i1)	_x87_bunlti_f(_jit, i0, r0, i1)
300 static jit_word_t
301 _x87_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
302 #  define x87_bunler_f(i0, r0, r1)	x87jcc(X86_CC_NA, i0, r0, r1)
303 #  define x87_bunlei_f(i0, r0, i1)	_x87_bunlei_f(_jit, i0, r0, i1)
304 static jit_word_t
305 _x87_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306 #  define x87_buneqr_f(i0, r0, r1)	x87jcc2(X86_CC_E, i0, r0, r1)
307 #  define x87_buneqi_f(i0, r0, i1)	_x87_buneqi_f(_jit, i0, r0, i1)
308 static jit_word_t
309 _x87_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
310 #  define x87_bunger_f(i0, r0, r1)	x87jcc(X86_CC_NA, i0, r1, r0)
311 #  define x87_bungei_f(i0, r0, i1)	_x87_bungei_f(_jit, i0, r0, i1)
312 static jit_word_t
313 _x87_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
314 #  define x87_bungtr_f(i0, r0, r1)	x87jcc(X86_CC_NAE, i0, r1, r0)
315 #  define x87_bungti_f(i0, r0, i1)	_x87_bungti_f(_jit, i0, r0, i1)
316 static jit_word_t
317 _x87_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
318 #  define x87_bltgtr_f(i0, r0, r1)	x87jcc2(X86_CC_NE, i0, r0, r1)
319 #  define x87_bltgti_f(i0, r0, i1)	_x87_bltgti_f(_jit, i0, r0, i1)
320 static jit_word_t
321 _x87_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
322 #  define x87_bordr_f(i0, r0, r1)	x87jcc2(X86_CC_NP, i0, r0, r1)
323 #  define x87_bordi_f(i0, r0, i1)	_x87_bordi_f(_jit, i0, r0, i1)
324 static jit_word_t
325 _x87_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
326 #  define x87_bunordr_f(i0, r0, r1)	x87jcc2(X86_CC_P, i0, r0, r1)
327 #  define x87_bunordi_f(i0, r0, i1)	_x87_bunordi_f(_jit, i0, r0, i1)
328 static jit_word_t
329 _x87_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
330 #  define x87_bltr_d(i0, r0, r1)	x87jcc(X86_CC_A, i0, r1, r0)
331 #  define x87_blti_d(i0, r0, i1)	_x87_blti_d(_jit, i0, r0, i1)
332 static jit_word_t
333 _x87_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
334 #  define x87_bler_d(i0, r0, r1)	x87jcc(X86_CC_AE, i0, r1, r0)
335 #  define x87_blei_d(i0, r0, i1)	_x87_blei_d(_jit, i0, r0, i1)
336 static jit_word_t
337 _x87_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
338 #  define x87_beqr_d(i0, r0, r1)	_x87_beqr_d(_jit, i0, r0, r1)
339 static jit_word_t
340 _x87_beqr_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
341 #  define x87_beqi_d(i0, r0, i1)	_x87_beqi_d(_jit, i0, r0, i1)
342 static jit_word_t
343 _x87_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
344 #  define x87_bger_d(i0, r0, r1)	x87jcc(X86_CC_AE, i0, r0, r1)
345 #  define x87_bgei_d(i0, r0, i1)	_x87_bgei_d(_jit, i0, r0, i1)
346 static jit_word_t
347 _x87_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
348 #  define x87_bgtr_d(i0, r0, r1)	x87jcc(X86_CC_A, i0, r0, r1)
349 #  define x87_bgti_d(i0, r0, i1)	_x87_bgti_d(_jit, i0, r0, i1)
350 static jit_word_t
351 _x87_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
352 #  define x87_bner_d(i0, r0, r1)	_x87_bner_d(_jit, i0, r0, r1)
353 static jit_word_t
354 _x87_bner_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
355 #  define x87_bnei_d(i0, r0, i1)	_x87_bnei_d(_jit, i0, r0, i1)
356 static jit_word_t
357 _x87_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
358 #  define x87_bunltr_d(i0, r0, r1)	x87jcc(X86_CC_NAE, i0, r0, r1)
359 #  define x87_bunlti_d(i0, r0, i1)	_x87_bunlti_d(_jit, i0, r0, i1)
360 static jit_word_t
361 _x87_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
362 #  define x87_bunler_d(i0, r0, r1)	x87jcc(X86_CC_NA, i0, r0, r1)
363 #  define x87_bunlei_d(i0, r0, i1)	_x87_bunlei_d(_jit, i0, r0, i1)
364 static jit_word_t
365 _x87_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
366 #  define x87_buneqr_d(i0, r0, r1)	x87jcc2(X86_CC_E, i0, r0, r1)
367 #  define x87_buneqi_d(i0, r0, i1)	_x87_buneqi_d(_jit, i0, r0, i1)
368 static jit_word_t
369 _x87_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
370 #  define x87_bunger_d(i0, r0, r1)	x87jcc(X86_CC_NA, i0, r1, r0)
371 #  define x87_bungei_d(i0, r0, i1)	_x87_bungei_d(_jit, i0, r0, i1)
372 static jit_word_t
373 _x87_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
374 #  define x87_bungtr_d(i0, r0, r1)	x87jcc(X86_CC_NAE, i0, r1, r0)
375 #  define x87_bungti_d(i0, r0, i1)	_x87_bungti_d(_jit, i0, r0, i1)
376 static jit_word_t
377 _x87_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
378 #  define x87_bltgtr_d(i0, r0, r1)	x87jcc2(X86_CC_NE, i0, r0, r1)
379 #  define x87_bltgti_d(i0, r0, i1)	_x87_bltgti_d(_jit, i0, r0, i1)
380 static jit_word_t
381 _x87_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
382 #  define x87_bordr_d(i0, r0, r1)	x87jcc2(X86_CC_NP, i0, r0, r1)
383 #  define x87_bordi_d(i0, r0, i1)	_x87_bordi_d(_jit, i0, r0, i1)
384 static jit_word_t
385 _x87_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
386 #  define x87_bunordr_d(i0, r0, r1)	x87jcc2(X86_CC_P, i0, r0, r1)
387 #  define x87_bunordi_d(i0, r0, i1)	_x87_bunordi_d(_jit, i0, r0, i1)
388 static jit_word_t
389 _x87_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
390 #endif
391 
392 #if CODE
393 #  define fpr_opi(name, type, size)					\
394 static void								\
395 _x87_##name##i_##type(jit_state_t *_jit,				\
396 		      jit_int32_t r0, jit_int32_t r1,			\
397 		      jit_float##size##_t *i0)				\
398 {									\
399     jit_int32_t		reg = jit_get_reg(jit_class_fpr);		\
400     assert(jit_x87_reg_p(reg));						\
401     x87_movi_##type(rn(reg), i0);					\
402     x87_##name##r_##type(r0, r1, rn(reg));				\
403     jit_unget_reg(reg);							\
404 }
405 #  define fpr_bopi(name, type, size)					\
406 static jit_word_t							\
407 _x87_b##name##i_##type(jit_state_t *_jit,				\
408 		       jit_word_t i0, jit_int32_t r0,			\
409 		       jit_float##size##_t *i1)				\
410 {									\
411     jit_word_t		word;						\
412     jit_int32_t		reg = jit_get_reg(jit_class_fpr|		\
413 					  jit_class_nospill);		\
414     assert(jit_x87_reg_p(reg));						\
415     x87_movi_##type(rn(reg), i1);					\
416     word = x87_b##name##r_##type(i0, r0, rn(reg));			\
417     jit_unget_reg(reg);							\
418     return (word);							\
419 }
420 #  define fopi(name)			fpr_opi(name, f, 32)
421 #  define fbopi(name)			fpr_bopi(name, f, 32)
422 #  define dopi(name)			fpr_opi(name, d, 64)
423 #  define dbopi(name)			fpr_bopi(name, d, 64)
424 
425 static void
_fstcwm(jit_state_t * _jit,jit_int32_t md,jit_int32_t rb,jit_int32_t ri,jit_int32_t ms)426 _fstcwm(jit_state_t *_jit, jit_int32_t md,
427 	jit_int32_t rb,	jit_int32_t ri, jit_int32_t ms)
428 {
429     ic(0x9b);
430     rex(0, 1, rb, ri, _NOREG);
431     x87rx(017, md, rb, ri, ms);
432 }
433 
434 static void
_x87rx(jit_state_t * _jit,jit_int32_t code,jit_int32_t md,jit_int32_t rb,jit_int32_t ri,jit_int32_t ms)435 _x87rx(jit_state_t *_jit, jit_int32_t code, jit_int32_t md,
436        jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
437 {
438     rex(0, 1, rb, ri, _NOREG);
439     ic(0xd8 | (code >> 3));
440     rx((code & 7), md, rb, ri, ms);
441 }
442 
443 static void
_x87ri(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0)444 _x87ri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
445 {
446     ic(0xd8 | (code >> 3));
447     mrm(0x03, (code & 7), r0);
448 }
449 
450 static void
_x87rri(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1)451 _x87rri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
452 {
453     if (r1 == _ST0_REGNO)
454 	x87ri(code | 040, r0);
455     else {
456 	assert(r0 == _ST0_REGNO);
457 	x87ri(code, r1);
458     }
459 }
460 
461 fopi(add)
fopi(sub)462 fopi(sub)
463 fopi(rsb)
464 fopi(mul)
465 fopi(div)
466 
467 static void
468 _x87_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
469 {
470     if (r0 == r1) {
471 	if (r2 == _ST0_REGNO)
472 	    faddr(r0, _ST0_REGNO);
473 	else if (r0 == _ST0_REGNO)
474 	    faddr(_ST0_REGNO, r2);
475 	else {
476 	    fxchr(r0);
477 	    faddr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
478 	    fxchr(r0);
479 	}
480     }
481     else if (r0 == r2) {
482 	if (r1 == _ST0_REGNO)
483 	    faddr(r0, _ST0_REGNO);
484 	else if (r0 == _ST0_REGNO)
485 	    faddr(_ST0_REGNO, r1);
486 	else {
487 	    fxchr(r0);
488 	    faddr(_ST0_REGNO, r1);
489 	    fxchr(r0);
490 	}
491     }
492     else {
493 	fldr(r1);
494 	faddr(_ST0_REGNO, r2 + 1);
495 	fstpr(r0 + 1);
496     }
497 }
498 
dopi(add)499 dopi(add)
500 
501 static void
502 _x87_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
503 {
504     if (r0 == r1) {
505 	if (r2 == _ST0_REGNO)
506 	    fsubrr(r0, _ST0_REGNO);
507 	else if (r0 == _ST0_REGNO)
508 	    fsubr(_ST0_REGNO, r2);
509 	else {
510 	    fxchr(r0);
511 	    fsubr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
512 	    fxchr(r0);
513 	}
514     }
515     else if (r0 == r2) {
516 	if (r1 == _ST0_REGNO)
517 	    fsubr(r0, _ST0_REGNO);
518 	else if (r0 == _ST0_REGNO)
519 	    fsubrr(_ST0_REGNO, r1);
520 	else {
521 	    fxchr(r0);
522 	    fsubrr(_ST0_REGNO, r1);
523 	    fxchr(r0);
524 	}
525     }
526     else {
527 	fldr(r1);
528 	fsubr(_ST0_REGNO, r2 + 1);
529 	fstpr(r0 + 1);
530     }
531 }
532 
533 dopi(sub)
534 
dopi(rsb)535 dopi(rsb)
536 
537 static void
538 _x87_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
539 {
540     if (r0 == r1) {
541 	if (r2 == _ST0_REGNO)
542 	    fmulr(r0, _ST0_REGNO);
543 	else if (r0 == _ST0_REGNO)
544 	    fmulr(_ST0_REGNO, r2);
545 	else {
546 	    fxchr(r0);
547 	    fmulr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
548 	    fxchr(r0);
549 	}
550     }
551     else if (r0 == r2) {
552 	if (r1 == _ST0_REGNO)
553 	    fmulr(r0, _ST0_REGNO);
554 	else if (r0 == _ST0_REGNO)
555 	    fmulr(_ST0_REGNO, r1);
556 	else {
557 	    fxchr(r0);
558 	    fmulr(_ST0_REGNO, r1);
559 	    fxchr(r0);
560 	}
561     }
562     else {
563 	fldr(r1);
564 	fmulr(_ST0_REGNO, r2 + 1);
565 	fstpr(r0 + 1);
566     }
567 }
568 
dopi(mul)569 dopi(mul)
570 
571 static void
572 _x87_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
573 {
574     if (r0 == r1) {
575 	if (r2 == _ST0_REGNO)
576 	    fdivrr(r0, _ST0_REGNO);
577 	else if (r0 == _ST0_REGNO)
578 	    fdivr(_ST0_REGNO, r2);
579 	else {
580 	    fxchr(r0);
581 	    fdivr(_ST0_REGNO, r0 == r2 ? _ST0_REGNO : r2);
582 	    fxchr(r0);
583 	}
584     }
585     else if (r0 == r2) {
586 	if (r1 == _ST0_REGNO)
587 	    fdivr(r0, _ST0_REGNO);
588 	else if (r0 == _ST0_REGNO)
589 	    fsubrr(_ST0_REGNO, r1);
590 	else {
591 	    fxchr(r0);
592 	    fdivrr(_ST0_REGNO, r1);
593 	    fxchr(r0);
594 	}
595     }
596     else {
597 	fldr(r1);
598 	fdivr(_ST0_REGNO, r2 + 1);
599 	fstpr(r0 + 1);
600     }
601 }
602 
dopi(div)603 dopi(div)
604 
605 static void
606 _x87_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
607 {
608     if (r0 == r1) {
609 	if (r1 == _ST0_REGNO)
610 	    fabs_();
611 	else {
612 	    fxchr(r0);
613 	    fabs_();
614 	    fxchr(r0);
615 	}
616     }
617     else {
618 	fldr(r1);
619 	fabs_();
620 	fstpr(r0 + 1);
621     }
622 }
623 
624 static void
_x87_negr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)625 _x87_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
626 {
627     if (r0 == r1) {
628 	if (r1 == _ST0_REGNO)
629 	    fchs_();
630 	else {
631 	    fxchr(r0);
632 	    fchs_();
633 	    fxchr(r0);
634 	}
635     }
636     else {
637 	fldr(r1);
638 	fchs_();
639 	fstpr(r0 + 1);
640     }
641 }
642 
643 static void
_x87_sqrtr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)644 _x87_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
645 {
646     if (r0 == r1) {
647 	if (r1 == _ST0_REGNO)
648 	    fsqrt_();
649 	else {
650 	    fxchr(r0);
651 	    fsqrt_();
652 	    fxchr(r0);
653 	}
654     }
655     else {
656 	fldr(r1);
657 	fsqrt_();
658 	fstpr(r0 + 1);
659     }
660 }
661 
662 static void
_x87_truncr_d_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)663 _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
664 {
665 #if defined(sun)
666     /* for the sake of passing test cases in x87 mode, otherwise only sse
667      * is supported */
668     fstcwm(-4, _RBP_REGNO, _NOREG, _SCL1);
669     ldxi_s(r0, _RBP_REGNO, -4);
670     extr_uc(r0, r0);
671 #  define FPCW_CHOP	0xc00
672     ori(r0, r0, FPCW_CHOP);
673     stxi_s(-8, _RBP_REGNO, r0);
674     fldcwm(-8, _RBP_REGNO, _NOREG, _SCL1);
675     if (r1 == _ST0_REGNO)
676 	fistlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
677     else {
678 	fxchr(r1);
679 	fistlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
680 	fxchr(r1);
681     }
682     fldcwm(-4, _RBP_REGNO, _NOREG, _SCL1);
683     ldxi(r0, _RBP_REGNO, CVT_OFFSET);
684 #else
685     fldr(r1);
686     fisttplm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
687     ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
688 #endif
689 }
690 
691 #  if __X64
692 static void
_x87_truncr_d_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)693 _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
694 {
695     fldr(r1);
696     fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
697     ldxi(r0, _RBP_REGNO, CVT_OFFSET);
698 }
699 #  endif
700 
701 static void
_x87_extr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)702 _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
703 {
704     stxi(CVT_OFFSET, _RBP_REGNO, r1);
705 #  if __X32
706     fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
707 #  else
708     fildqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
709 #  endif
710     fstpr(r0 + 1);
711 }
712 
713 static void
_x87cmp(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)714 _x87cmp(jit_state_t *_jit, jit_int32_t code,
715 	jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
716 {
717     jit_bool_t		rc;
718     jit_int32_t		reg;
719     if ((rc = reg8_p(r0)))
720 	reg = r0;
721     else {
722 	reg = _RAX_REGNO;
723 	movr(r0, reg);
724     }
725     ixorr(reg, reg);
726     if (r1 == _ST0_REGNO)
727 	fucomir(r2);
728     else {
729 	fldr(r1);
730 	fucomipr(r2 + 1);
731     }
732     cc(code, reg);
733     if (!rc)
734 	xchgr(r0, reg);
735 }
736 
737 static void
_x87cmp2(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)738 _x87cmp2(jit_state_t *_jit, jit_int32_t code,
739 	 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
740 {
741     jit_bool_t			rc;
742     jit_int32_t			reg;
743     jit_int32_t			f1, f2;
744     if (r2 == _ST0_REGNO)	f1 = r2, f2 = r1;
745     else			f1 = r1, f2 = r2;
746     if ((rc = reg8_p(r0)))
747 	reg = r0;
748     else {
749 	reg = _RAX_REGNO;
750 	movr(r0, reg);
751     }
752     ixorr(reg, reg);
753     if (f1 == _ST0_REGNO)
754 	fucomir(f2);
755     else {
756 	fldr(f1);
757 	fucomipr(f2 + 1);
758     }
759     cc(code, reg);
760     if (!rc)
761 	xchgr(r0, reg);
762 }
763 
764 static jit_word_t
_x87jcc(jit_state_t * _jit,jit_int32_t code,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)765 _x87jcc(jit_state_t *_jit, jit_int32_t code,
766 	jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
767 {
768     if (r0 == _ST0_REGNO)
769 	fucomir(r1);
770     else {
771 	fldr(r0);
772 	fucomipr(r1 + 1);
773     }
774     jcc(code, i0);
775     return (_jit->pc.w);
776 }
777 
778 static jit_word_t
_x87jcc2(jit_state_t * _jit,jit_int32_t code,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)779 _x87jcc2(jit_state_t *_jit, jit_int32_t code,
780 	 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
781 {
782     jit_int32_t			f0, f1;
783     if (r1 == _ST0_REGNO)	f0 = r1, f1 = r0;
784     else			f0 = r0, f1 = r1;
785     if (f0 == _ST0_REGNO)
786 	fucomir(f1);
787     else {
788 	fldr(f0);
789 	fucomipr(f1 + 1);
790     }
791     jcc(code, i0);
792     return (_jit->pc.w);
793 }
794 
795 fopi(lt)
fopi(le)796 fopi(le)
797 fopi(eq)
798 fopi(ge)
799 fopi(gt)
800 fopi(ne)
801 fopi(unlt)
802 fopi(unle)
803 fopi(uneq)
804 fopi(unge)
805 fopi(ungt)
806 fopi(ltgt)
807 fopi(ord)
808 fopi(unord)
809 fbopi(lt)
810 fbopi(le)
811 fbopi(eq)
812 fbopi(ge)
813 fbopi(gt)
814 fbopi(ne)
815 fbopi(unlt)
816 fbopi(unle)
817 fbopi(uneq)
818 fbopi(unge)
819 fbopi(ungt)
820 fbopi(ltgt)
821 fbopi(ord)
822 fbopi(unord)
823 
824 static void
825 _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
826 {
827     union {
828 	jit_int32_t	 i;
829 	jit_float32_t	 f;
830     } data;
831     jit_int32_t		 reg;
832 
833     data.f = *i0;
834     if (data.f == 0.0 && !(data.i & 0x80000000))
835 	fldz();
836     else if (data.f == 1.0)
837 	fld1();
838     else if (data.f == 3.3219280948873623478703195458468f)
839 	fldl2t();
840     else if (data.f == 1.4426950408889634073599246886656f)
841 	fldl2e();
842     else if (data.f == 3.1415926535897932384626421096161f)
843 	fldpi();
844     else if (data.f == 0.3010299956639811952137387498515f)
845 	fldlg2();
846     else if (data.f == 0.6931471805599453094172323683399f)
847 	fldln2();
848     else {
849 	if (_jitc->no_data) {
850 	    reg = jit_get_reg(jit_class_gpr);
851 	    movi(rn(reg), data.i);
852 	    stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
853 	    jit_unget_reg(reg);
854 	    x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
855 	}
856 	else
857 	    x87_ldi_f(r0, (jit_word_t)i0);
858 	return;
859     }
860     fstpr(r0 + 1);
861 }
862 
863 static void
_x87_ldr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)864 _x87_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
865 {
866     fldsm(0, r1, _NOREG, _SCL1);
867     fstpr(r0 + 1);
868 }
869 
870 static void
_x87_ldi_f(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)871 _x87_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
872 {
873     jit_int32_t		reg;
874     if (x87_address_p(i0)) {
875 	fldsm(i0, _NOREG, _NOREG, _SCL1);
876 	fstpr(r0 + 1);
877     }
878     else {
879 	reg = jit_get_reg(jit_class_gpr);
880 	movi(rn(reg), i0);
881 	x87_ldr_f(r0, rn(reg));
882 	jit_unget_reg(reg);
883     }
884 }
885 
886 static void
_x87_ldxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)887 _x87_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
888 {
889 #if __X64_32
890     jit_int32_t		reg;
891     reg = jit_get_reg(jit_class_gpr);
892     addr(rn(reg), r1, r2);
893     x87_ldr_f(r0, rn(reg));
894     jit_unget_reg(reg);
895 #else
896     fldsm(0, r1, r2, _SCL1);
897     fstpr(r0 + 1);
898 #endif
899 }
900 
901 static void
_x87_ldxi_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)902 _x87_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
903 {
904     jit_int32_t		reg;
905     if (can_sign_extend_int_p(i0)) {
906 	fldsm(i0, r1, _NOREG, _SCL1);
907 	fstpr(r0 + 1);
908     }
909     else {
910 	reg = jit_get_reg(jit_class_gpr);
911 #if __X64_32
912 	addi(rn(reg), r1, i0);
913 	x87_ldr_f(r0, rn(reg));
914 #else
915 	movi(rn(reg), i0);
916 	x87_ldxr_f(r0, r1, rn(reg));
917 #endif
918 	jit_unget_reg(reg);
919     }
920 }
921 
922 static void
_x87_str_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)923 _x87_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
924 {
925     if (r1 == _ST0_REGNO)
926 	fstsm(0, r0, _NOREG, _SCL1);
927     else {
928 	fxchr(r1);
929 	fstsm(0, r0, _NOREG, _SCL1);
930 	fxchr(r1);
931     }
932 }
933 
934 static void
_x87_sti_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)935 _x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
936 {
937     jit_int32_t		reg;
938     if (!x87_address_p(i0)) {
939 	reg = jit_get_reg(jit_class_gpr);
940 	movi(rn(reg), i0);
941 	x87_str_f(rn(reg), r0);
942 	jit_unget_reg(reg);
943     }
944     else if (r0 == _ST0_REGNO)
945 	fstsm(i0, _NOREG, _NOREG, _SCL1);
946     else {
947 	fxchr(r0);
948 	fstsm(i0, _NOREG, _NOREG, _SCL1);
949 	fxchr(r0);
950     }
951 }
952 
953 static void
_x87_stxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)954 _x87_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
955 {
956 #if __X64_32
957     jit_int32_t		reg;
958     reg = jit_get_reg(jit_class_gpr);
959     addr(rn(reg), r0, r1);
960     x87_str_f(rn(reg), r2);
961     jit_unget_reg(reg);
962 #else
963     if (r2 == _ST0_REGNO)
964 	fstsm(0, r0, r1, _SCL1);
965     else {
966 	fxchr(r2);
967 	fstsm(0, r0, r1, _SCL1);
968 	fxchr(r2);
969     }
970 #endif
971 }
972 
973 static void
_x87_stxi_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)974 _x87_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
975 {
976     jit_int32_t		reg;
977     if (!can_sign_extend_int_p(i0)) {
978 	reg = jit_get_reg(jit_class_gpr);
979 #if __X64_32
980 	addi(rn(reg), r0, i0);
981 	x87_str_f(rn(reg), r1);
982 #else
983 	movi(rn(reg), i0);
984 	x87_stxr_f(rn(reg), r0, r1);
985 #endif
986 	jit_unget_reg(reg);
987     }
988     else if (r1 == _ST0_REGNO)
989 	fstsm(i0, r0, _NOREG, _SCL1);
990     else {
991 	fxchr(r1);
992 	fstsm(i0, r0, _NOREG, _SCL1);
993 	fxchr(r1);
994     }
995 }
996 
997 static void
_x87_movr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)998 _x87_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
999 {
1000     if (r0 != r1) {
1001 	if (r1 == _ST0)
1002 	    fstr(r0);
1003 	else if (r0 == _ST0) {
1004 	    fxchr(r1);
1005 	    fstr(r1);
1006 	}
1007 	else {
1008 	    fldr(r1);
1009 	    fstpr(r0 + 1);
1010 	}
1011     }
1012 }
1013 
1014 static void
_x87_movi_d(jit_state_t * _jit,jit_int32_t r0,jit_float64_t * i0)1015 _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1016 {
1017     union {
1018 	jit_int32_t	 ii[2];
1019 	jit_word_t	 w;
1020 	jit_float64_t	 d;
1021     } data;
1022     jit_int32_t		 reg;
1023 
1024     data.d = *i0;
1025     if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1026 	fldz();
1027     else if (data.d == 1.0)
1028 	fld1();
1029     else if (data.d == 3.3219280948873623478703195458468)
1030 	fldl2t();
1031     else if (data.d == 1.4426950408889634073599246886656)
1032 	fldl2e();
1033     else if (data.d == 3.1415926535897932384626421096161)
1034 	fldpi();
1035     else if (data.d == 0.3010299956639811952137387498515)
1036 	fldlg2();
1037     else if (data.d == 0.6931471805599453094172323683399)
1038 	fldln2();
1039     else {
1040 	if (_jitc->no_data) {
1041 	    reg = jit_get_reg(jit_class_gpr);
1042 #if __X32 || __X64_32
1043 	    movi(rn(reg), data.ii[0]);
1044 	    stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1045 	    movi(rn(reg), data.ii[1]);
1046 	    stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1047 #else
1048 	    movi(rn(reg), data.w);
1049 	    stxi_l(CVT_OFFSET, _RBP_REGNO, rn(reg));
1050 #endif
1051 	    jit_unget_reg(reg);
1052 	    x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1053 	}
1054 	else
1055 	    x87_ldi_d(r0, (jit_word_t)i0);
1056 	return;
1057     }
1058     fstpr(r0 + 1);
1059 }
1060 
1061 dopi(lt)
dopi(le)1062 dopi(le)
1063 
1064 static void
1065 _x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1066 {
1067     jit_bool_t			rc;
1068     jit_word_t			jp_code;
1069     jit_int32_t			reg, f1, f2;
1070     if (r2 == _ST0_REGNO)	f1 = r2, f2 = r1;
1071     else			f1 = r1, f2 = r2;
1072     if ((rc = reg8_p(r0)))
1073 	reg = r0;
1074     else {
1075 	reg = _RAX_REGNO;
1076 	movr(r0, reg);
1077     }
1078     ixorr(reg, reg);
1079     if (f1 == _ST0_REGNO)
1080 	fucomir(f2);
1081     else {
1082 	fldr(f1);
1083 	fucomipr(f2 + 1);
1084     }
1085     jpes(0);
1086     jp_code = _jit->pc.w;
1087     cc(X86_CC_E, reg);
1088     patch_rel_char(jp_code, _jit->pc.w);
1089     if (!rc)
1090 	xchgr(r0, reg);
1091 }
1092 
1093 dopi(eq)
dopi(ge)1094 dopi(ge)
1095 dopi(gt)
1096 
1097 static void
1098 _x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1099 {
1100     jit_bool_t			rc;
1101     jit_word_t			jp_code;
1102     jit_int32_t			reg, f1, f2;
1103     if (r2 == _ST0_REGNO)	f1 = r2, f2 = r1;
1104     else			f1 = r1, f2 = r2;
1105     if ((rc = reg8_p(r0)))
1106 	reg = r0;
1107     else {
1108 	reg = _RAX_REGNO;
1109 	movr(r0, reg);
1110     }
1111     imovi(reg, 1);
1112     if (f1 == _ST0_REGNO)
1113 	fucomir(f2);
1114     else {
1115 	fldr(f1);
1116 	fucomipr(f2 + 1);
1117     }
1118     jpes(0);
1119     jp_code = _jit->pc.w;
1120     cc(X86_CC_NE, reg);
1121     patch_rel_char(jp_code, _jit->pc.w);
1122     if (!rc)
1123 	xchgr(r0, reg);
1124 }
1125 
1126 dopi(ne)
dopi(unlt)1127 dopi(unlt)
1128 dopi(unle)
1129 dopi(uneq)
1130 dopi(unge)
1131 dopi(ungt)
1132 
1133 static void
1134 _x87_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1135 {
1136     if (r1 == r2)
1137 	movi(r0, 1);
1138     else
1139 	x87cmp2(X86_CC_NE, r0, r1, r2);
1140 }
1141 
1142 dopi(ltgt)
dopi(ord)1143 dopi(ord)
1144 dopi(unord)
1145 
1146 static void
1147 _x87_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1148 {
1149     fldlm(0, r1, _NOREG, _SCL1);
1150     fstpr(r0 + 1);
1151 }
1152 
1153 static void
_x87_ldi_d(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1154 _x87_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1155 {
1156     jit_int32_t		reg;
1157     if (x87_address_p(i0)) {
1158 	fldlm(i0, _NOREG, _NOREG, _SCL1);
1159 	fstpr(r0 + 1);
1160     }
1161     else {
1162 	reg = jit_get_reg(jit_class_gpr);
1163 	movi(rn(reg), i0);
1164 	x87_ldr_d(r0, rn(reg));
1165 	jit_unget_reg(reg);
1166     }
1167 }
1168 
1169 static void
_x87_ldxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1170 _x87_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1171 {
1172 #if __X64_32
1173     jit_int32_t		reg;
1174     reg = jit_get_reg(jit_class_gpr);
1175     addr(rn(reg), r1, r2);
1176     x87_ldr_d(r0, rn(reg));
1177     jit_unget_reg(reg);
1178 #else
1179     fldlm(0, r1, r2, _SCL1);
1180     fstpr(r0 + 1);
1181 #endif
1182 }
1183 
1184 static void
_x87_ldxi_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1185 _x87_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1186 {
1187     jit_int32_t		reg;
1188     if (can_sign_extend_int_p(i0)) {
1189 	fldlm(i0, r1, _NOREG, _SCL1);
1190 	fstpr(r0 + 1);
1191     }
1192     else {
1193 	reg = jit_get_reg(jit_class_gpr);
1194 #if __X64_32
1195 	addi(rn(reg), r1, i0);
1196 	x87_ldr_d(r0, rn(reg));
1197 #else
1198 	movi(rn(reg), i0);
1199 	x87_ldxr_d(r0, r1, rn(reg));
1200 #endif
1201 	jit_unget_reg(reg);
1202     }
1203 }
1204 
1205 static void
_x87_str_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1206 _x87_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1207 {
1208     if (r1 == _ST0_REGNO)
1209 	fstlm(0, r0, _NOREG, _SCL1);
1210     else {
1211 	fxchr(r1);
1212 	fstlm(0, r0, _NOREG, _SCL1);
1213 	fxchr(r1);
1214     }
1215 }
1216 
1217 static void
_x87_sti_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1218 _x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1219 {
1220     jit_int32_t		reg;
1221     if (!x87_address_p(i0)) {
1222 	reg = jit_get_reg(jit_class_gpr);
1223 	movi(rn(reg), i0);
1224 	x87_str_d(rn(reg), r0);
1225 	jit_unget_reg(reg);
1226     }
1227     else if (r0 == _ST0_REGNO)
1228 	fstlm(i0, _NOREG, _NOREG, _SCL1);
1229     else {
1230 	fxchr(r0);
1231 	fstlm(i0, _NOREG, _NOREG, _SCL1);
1232 	fxchr(r0);
1233     }
1234 }
1235 
1236 static void
_x87_stxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1237 _x87_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1238 {
1239 #if __X64_32
1240     jit_int32_t		reg;
1241     reg = jit_get_reg(jit_class_gpr);
1242     addr(rn(reg), r0, r1);
1243     x87_str_d(rn(reg), r2);
1244     jit_unget_reg(reg);
1245 #else
1246     if (r2 == _ST0_REGNO)
1247 	fstlm(0, r0, r1, _SCL1);
1248     else {
1249 	fxchr(r2);
1250 	fstlm(0, r0, r1, _SCL1);
1251 	fxchr(r2);
1252     }
1253 #endif
1254 }
1255 
1256 static void
_x87_stxi_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1257 _x87_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1258 {
1259     jit_int32_t		reg;
1260     if (!can_sign_extend_int_p(i0)) {
1261 	reg = jit_get_reg(jit_class_gpr);
1262 #if __X64_32
1263 	addi(rn(reg), r0, i0);
1264 	x87_str_d(rn(reg), r1);
1265 #else
1266 	movi(rn(reg), i0);
1267 	x87_stxr_d(rn(reg), r0, r1);
1268 #endif
1269 	jit_unget_reg(reg);
1270     }
1271     else if (r1 == _ST0_REGNO)
1272 	fstlm(i0, r0, _NOREG, _SCL1);
1273     else {
1274 	fxchr(r1);
1275 	fstlm(i0, r0, _NOREG, _SCL1);
1276 	fxchr(r1);
1277     }
1278 }
1279 
1280 dbopi(lt)
dbopi(le)1281 dbopi(le)
1282 
1283 static jit_word_t
1284 _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1285 {
1286     jit_int32_t			f0, f1;
1287     jit_word_t			jp_code;
1288     if (r1 == _ST0_REGNO)	f0 = r1, f1 = r0;
1289     else			f0 = r0, f1 = r1;
1290     if (f0 == _ST0_REGNO)
1291 	fucomir(f1);
1292     else {
1293 	fldr(f0);
1294 	fucomipr(f1 + 1);
1295     }
1296     jpes(0);
1297     jp_code = _jit->pc.w;
1298     jcc(X86_CC_E, i0);
1299     patch_rel_char(jp_code, _jit->pc.w);
1300     return (_jit->pc.w);
1301 }
1302 dbopi(eq)
dbopi(ge)1303 dbopi(ge)
1304 dbopi(gt)
1305 
1306 static jit_word_t
1307 _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1308 {
1309     jit_int32_t			f0, f1;
1310     jit_word_t			jp_code;
1311     jit_word_t			jz_code;
1312     if (r1 == _ST0_REGNO)	f0 = r1, f1 = r0;
1313     else			f0 = r0, f1 = r1;
1314     if (f0 == _ST0_REGNO)
1315 	fucomir(f1);
1316     else {
1317 	fldr(f0);
1318 	fucomipr(f1 + 1);
1319     }
1320     jpes(0);
1321     jp_code = _jit->pc.w;
1322     jzs(0);
1323     jz_code = _jit->pc.w;
1324     patch_rel_char(jp_code, _jit->pc.w);
1325     jmpi(i0);
1326     patch_rel_char(jz_code, _jit->pc.w);
1327     return (_jit->pc.w);
1328 }
1329 dbopi(ne)
1330 dbopi(unlt)
1331 dbopi(unle)
1332 dbopi(uneq)
1333 dbopi(unge)
1334 dbopi(ungt)
1335 dbopi(ltgt)
1336 dbopi(ord)
1337 dbopi(unord)
1338 #  undef fopi
1339 #  undef fbopi
1340 #  undef dopi
1341 #  undef dbopi
1342 #  undef fpr_bopi
1343 #  undef fpr_opi
1344 #endif
1345