1 /*
2  * Copyright (C) 2012-2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *	Paulo Cesar Pereira de Andrade
18  */
19 
20 /* avoid using it due to partial stalls */
21 #define USE_INC_DEC			0
22 
23 #if PROTO
24 #  if __X32 || __X64_32
25 #    define WIDE			0
26 #    define ldi(u, v)			ldi_i(u, v)
27 #    define ldr(u, v)			ldr_i(u, v)
28 #    define ldxr(u, v, w)		ldxr_i(u, v, w)
29 #    define ldxi(u, v, w)		ldxi_i(u, v, w)
30 #    define sti(u, v)			sti_i(u, v)
31 #    define stxi(u, v, w)		stxi_i(u, v, w)
32 #    define can_sign_extend_int_p(im)	1
33 #    define can_zero_extend_int_p(im)	1
34 #    define fits_uint32_p(im)		1
35 #  else
36 #    define WIDE			1
37 #    define ldi(u, v)			ldi_l(u, v)
38 #    define ldr(u, v)			ldr_l(u, v)
39 #    define ldxr(u, v, w)		ldxr_l(u, v, w)
40 #    define ldxi(u, v, w)		ldxi_l(u, v, w)
41 #    define sti(u, v)			sti_l(u, v)
42 #    define stxi(u, v, w)		stxi_l(u, v, w)
43 #    define can_sign_extend_int_p(im)					\
44 	(((im) >= 0 && (long long)(im) <=  0x7fffffffLL) ||		\
45 	 ((im) <  0 && (long long)(im) >  -0x80000000LL))
46 #    define can_zero_extend_int_p(im)					\
47 	((im) >= 0 && (im) < 0x80000000LL)
48 #    define fits_uint32_p(im)		(((im) & 0xffffffff00000000LL) == 0)
49 #  endif
50 #  if __X32 || __CYGWIN__ || __X64_32 || _WIN32
51 #      define reg8_p(rn)						\
52       ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
53 #  else
54 #      define reg8_p(rn)		1
55 #  endif
56 #  define _RAX_REGNO			0
57 #  define _RCX_REGNO			1
58 #  define _RDX_REGNO			2
59 #  define _RBX_REGNO			3
60 #  define _RSP_REGNO			4
61 #  define _RBP_REGNO			5
62 #  define _RSI_REGNO			6
63 #  define _RDI_REGNO			7
64 #  define _R8_REGNO			8
65 #  define _R9_REGNO			9
66 #  define _R10_REGNO			10
67 #  define _R11_REGNO			11
68 #  define _R12_REGNO			12
69 #  define _R13_REGNO			13
70 #  define _R14_REGNO			14
71 #  define _R15_REGNO			15
72 #  define r7(reg)			((reg) & 7)
73 #  define r8(reg)			((reg) & 15)
74 #  define _SCL1				0x00
75 #  define _SCL2				0x01
76 #  define _SCL4				0x02
77 #  define _SCL8				0x03
78 #  define X86_ADD			0
79 #  define X86_OR			1 << 3
80 #  define X86_ADC			2 << 3
81 #  define X86_SBB			3 << 3
82 #  define X86_AND			4 << 3
83 #  define X86_SUB			5 << 3
84 #  define X86_XOR			6 << 3
85 #  define X86_CMP			7 << 3
86 #  define X86_ROL			0
87 #  define X86_ROR			1
88 #  define X86_RCL			2
89 #  define X86_RCR			3
90 #  define X86_SHL			4
91 #  define X86_SHR			5
92 #  define X86_SAR			7
93 #  define X86_NOT			2
94 #  define X86_NEG			3
95 #  define X86_MUL			4
96 #  define X86_IMUL			5
97 #  define X86_DIV			6
98 #  define X86_IDIV			7
99 #  define X86_CC_O			0x0
100 #  define X86_CC_NO			0x1
101 #  define X86_CC_NAE			0x2
102 #  define X86_CC_B			0x2
103 #  define X86_CC_C			0x2
104 #  define X86_CC_AE			0x3
105 #  define X86_CC_NB			0x3
106 #  define X86_CC_NC			0x3
107 #  define X86_CC_E			0x4
108 #  define X86_CC_Z			0x4
109 #  define X86_CC_NE			0x5
110 #  define X86_CC_NZ			0x5
111 #  define X86_CC_BE			0x6
112 #  define X86_CC_NA			0x6
113 #  define X86_CC_A			0x7
114 #  define X86_CC_NBE			0x7
115 #  define X86_CC_S			0x8
116 #  define X86_CC_NS			0x9
117 #  define X86_CC_P			0xa
118 #  define X86_CC_PE			0xa
119 #  define X86_CC_NP			0xb
120 #  define X86_CC_PO			0xb
121 #  define X86_CC_L			0xc
122 #  define X86_CC_NGE			0xc
123 #  define X86_CC_GE			0xd
124 #  define X86_CC_NL			0xd
125 #  define X86_CC_LE			0xe
126 #  define X86_CC_NG			0xe
127 #  define X86_CC_G			0xf
128 #  define X86_CC_NLE			0xf
129 #  define mrm(md, r, m)			*_jit->pc.uc++ = (md<<6) | (r<<3) | m
130 #  define sib(sc, i, b)			*_jit->pc.uc++ = (sc<<6) | (i<<3) | b
131 #  define ic(c)				*_jit->pc.uc++ = c
132 #  define is(s)				*_jit->pc.us++ = s
133 #  define ii(i)				*_jit->pc.ui++ = i
134 #  if __X64 && !__X64_32
135 #    define il(l)			*_jit->pc.ul++ = l
136 #  else
137 #    define il(l)			ii(l)
138 #  endif
139 #  define patch_abs(instr, label)					\
140 	*(jit_word_t *)(instr - sizeof(jit_word_t)) = label
141 #  define patch_rel(instr, label)					\
142 	*(jit_int32_t *)(instr - 4) = label - instr
143 #  define patch_rel_char(instr, label)					\
144 	*(jit_int8_t *)(instr - 1) = label - instr
145 #  define rex(l, w, r, x, b)		_rex(_jit, l, w, r, x, b)
146 static void
147 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
148 #  define rx(rd, md, rb, ri, ms)	_rx(_jit, rd, md, rb, ri, ms)
149 static void
150 _rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
151 #  define nop(n)			_nop(_jit, n)
152 static void _nop(jit_state_t*, jit_int32_t);
153 #  define emms()			is(0x770f)
154 #  define lea(md, rb, ri, ms, rd)	_lea(_jit, md, rb, ri, ms, rd)
155 static void
156 _lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
157 #  define pushr(r0)			_pushr(_jit, r0)
158 static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
159 #  define popr(r0)			_popr(_jit, r0)
160 static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
161 #  define xchgr(r0, r1)			_xchgr(_jit, r0, r1)
162 static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
163 #  define testr(r0, r1)			_testr(_jit, r0, r1)
164 static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
165 #  define testi(r0, i0)			_testi(_jit, r0, i0)
166 static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
167 #  define cc(code, r0)			_cc(_jit, code, r0)
168 static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
169 #  define icmpr(r0, r1)			alur(X86_CMP, r0, r1)
170 #  define alur(code, r0, r1)		_alur(_jit, code, r0, r1)
171 static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
172 #  define icmpi(r0, i0)			alui(X86_CMP, r0, i0)
173 #  define alui(code, r0, i0)		_alui(_jit, code, r0, i0)
174 static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
175 #  define iaddr(r0, r1)			alur(X86_ADD, r0, r1)
176 #  define save(r0)			_save(_jit, r0)
177 static void _save(jit_state_t*, jit_int32_t);
178 #  define load(r0)			_load(_jit, r0)
179 static void _load(jit_state_t*, jit_int32_t);
180 #  define addr(r0, r1, r2)		_addr(_jit, r0, r1, r2)
181 static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
182 #  define iaddi(r0, i0)			alui(X86_ADD, r0, i0)
183 #  define addi(r0, r1, i0)		_addi(_jit, r0, r1, i0)
184 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
185 #define addcr(r0, r1, r2)		_addcr(_jit, r0, r1, r2)
186 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
187 #define addci(r0, r1, i0)		_addci(_jit, r0, r1, i0)
188 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
189 #  define iaddxr(r0, r1)		alur(X86_ADC, r0, r1)
190 #  define addxr(r0, r1, r2)		_addxr(_jit, r0, r1, r2)
191 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
192 #  define iaddxi(r0, i0)		alui(X86_ADC, r0, i0)
193 #  define addxi(r0, r1, i0)		_addxi(_jit, r0, r1, i0)
194 static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
195 #  define isubr(r0, r1)			alur(X86_SUB, r0, r1)
196 #  define subr(r0, r1, r2)		_subr(_jit, r0, r1, r2)
197 static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
198 #  define isubi(r0, i0)			alui(X86_SUB, r0, i0)
199 #  define subi(r0, r1, i0)		_subi(_jit, r0, r1, i0)
200 static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
201 #  define subcr(r0, r1, r2)		_subcr(_jit, r0, r1, r2)
202 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
203 #  define subci(r0, r1, i0)		_subci(_jit, r0, r1, i0)
204 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
205 #  define isubxr(r0, r1)		alur(X86_SBB, r0, r1)
206 #  define subxr(r0, r1, r2)		_subxr(_jit, r0, r1, r2)
207 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
208 #  define isubxi(r0, i0)		alui(X86_SBB, r0, i0)
209 #  define subxi(r0, r1, i0)		_subxi(_jit, r0, r1, i0)
210 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
211 #  define rsbi(r0, r1, i0)		_rsbi(_jit, r0, r1, i0)
212 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
213 #  define imulr(r0, r1)			_imulr(_jit, r0, r1)
214 static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
215 #  define imuli(r0, r1, i0)		_imuli(_jit, r0, r1, i0)
216 static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
217 #  define mulr(r0, r1, r2)		_mulr(_jit, r0, r1, r2)
218 static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219 #  define muli(r0, r1, i0)		_muli(_jit, r0, r1, i0)
220 static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
221 #  define umulr(r0)			unr(X86_IMUL, r0)
222 #  define umulr_u(r0)			unr(X86_MUL, r0)
223 #  define qmulr(r0, r1, r2, r3)		_iqmulr(_jit, r0, r1, r2, r3, 1)
224 #  define qmulr_u(r0, r1, r2, r3)	_iqmulr(_jit, r0, r1, r2, r3, 0)
225 #  define iqmulr(r0, r1, r2, r3, sign)	_iqmulr(_jit, r0, r1, r2, r3, sign)
226 static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
227 		    jit_int32_t,jit_int32_t, jit_bool_t);
228 #  define qmuli(r0, r1, r2, i0)		_iqmuli(_jit, r0, r1, r2, i0, 1)
229 #  define qmuli_u(r0, r1, r2, i0)	_iqmuli(_jit, r0, r1, r2, i0, 0)
230 #  define iqmuli(r0, r1, r2, i0, sign)	_iqmuli(_jit, r0, r1, r2, i0, sign)
231 static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
232 		    jit_int32_t,jit_word_t, jit_bool_t);
233 #  define sign_extend_rdx_rax()		_sign_extend_rdx_rax(_jit)
234 static void _sign_extend_rdx_rax(jit_state_t*);
235 #  define idivr(r0)			unr(X86_IDIV, r0)
236 #  define idivr_u(r0)			unr(X86_DIV, r0)
237 #  define divremr(r0, r1, r2, i0, i1)	_divremr(_jit, r0, r1, r2, i0, i1)
238 static void
239 _divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
240 	 jit_bool_t,jit_bool_t);
241 #  define divremi(r0, r1, i0, i1, i2)	_divremi(_jit, r0, r1, i0, i1, i2)
242 static void
243 _divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
244 #  define divr(r0, r1, r2)		divremr(r0, r1, r2, 1, 1)
245 #  define divi(r0, r1, i0)		divremi(r0, r1, i0, 1, 1)
246 #  define divr_u(r0, r1, r2)		divremr(r0, r1, r2, 0, 1)
247 #  define divi_u(r0, r1, i0)		divremi(r0, r1, i0, 0, 1)
248 #  define qdivr(r0, r1, r2, r3)		_iqdivr(_jit, r0, r1, r2, r3, 1)
249 #  define qdivr_u(r0, r1, r2, r3)	_iqdivr(_jit, r0, r1, r2, r3, 0)
250 #  define iqdivr(r0, r1, r2, r3, sign)	_iqdivr(_jit, r0, r1, r2, r3, sign)
251 static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
252 		    jit_int32_t,jit_int32_t, jit_bool_t);
253 #  define qdivi(r0, r1, r2, i0)		_iqdivi(_jit, r0, r1, r2, i0, 1)
254 #  define qdivi_u(r0, r1, r2, i0)	_iqdivi(_jit, r0, r1, r2, i0, 0)
255 #  define iqdivi(r0, r1, r2, i0, sign)	_iqdivi(_jit, r0, r1, r2, i0, sign)
256 static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
257 		    jit_int32_t,jit_word_t, jit_bool_t);
258 #  define remr(r0, r1, r2)		divremr(r0, r1, r2, 1, 0)
259 #  define remi(r0, r1, i0)		divremi(r0, r1, i0, 1, 0)
260 #  define remr_u(r0, r1, r2)		divremr(r0, r1, r2, 0, 0)
261 #  define remi_u(r0, r1, i0)		divremi(r0, r1, i0, 0, 0)
262 #  define iandr(r0, r1)			alur(X86_AND, r0, r1)
263 #  define andr(r0, r1, r2)		_andr(_jit, r0, r1, r2)
264 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
265 #  define iandi(r0, i0)			alui(X86_AND, r0, i0)
266 #  define andi(r0, r1, i0)		_andi(_jit, r0, r1, i0)
267 static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
268 #  define iorr(r0, r1)			alur(X86_OR, r0, r1)
269 #  define orr(r0, r1, r2)		_orr(_jit, r0, r1, r2)
270 static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
271 #  define iori(r0, i0)			alui(X86_OR, r0, i0)
272 #  define ori(r0, r1, i0)		_ori(_jit, r0, r1, i0)
273 static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
274 #  define ixorr(r0, r1)			alur(X86_XOR, r0, r1)
275 #  define xorr(r0, r1, r2)		_xorr(_jit, r0, r1, r2)
276 static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
277 #  define ixori(r0, i0)			alui(X86_XOR, r0, i0)
278 #  define xori(r0, r1, i0)		_xori(_jit, r0, r1, i0)
279 static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
280 #  define irotshr(code, r0)		_irotshr(_jit, code, r0)
281 static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
282 #  define rotshr(code, r0, r1, r2)	_rotshr(_jit, code, r0, r1, r2)
283 static void
284 _rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
285 #  define irotshi(code, r0, i0)		_irotshi(_jit, code, r0, i0)
286 static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
287 #  define rotshi(code, r0, r1, i0)	_rotshi(_jit, code, r0, r1, i0)
288 static void
289 _rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
290 #  define lshr(r0, r1, r2)		rotshr(X86_SHL, r0, r1, r2)
291 #  define lshi(r0, r1, i0)		_lshi(_jit, r0, r1, i0)
292 static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
293 #  define rshr(r0, r1, r2)		rotshr(X86_SAR, r0, r1, r2)
294 #  define rshi(r0, r1, i0)		rotshi(X86_SAR, r0, r1, i0)
295 #  define rshr_u(r0, r1, r2)		rotshr(X86_SHR, r0, r1, r2)
296 #  define rshi_u(r0, r1, i0)		rotshi(X86_SHR, r0, r1, i0)
297 #  define unr(code, r0)			_unr(_jit, code, r0)
298 static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
299 #  define inegr(r0)			unr(X86_NEG, r0)
300 #  define negr(r0, r1)			_negr(_jit, r0, r1)
301 static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
302 #  define icomr(r0)			unr(X86_NOT, r0)
303 #  define comr(r0, r1)			_comr(_jit, r0, r1)
304 static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
305 #  if USE_INC_DEC
306 #    define incr(r0, r1)		_incr(_jit, r0, r1)
307 static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
308 #    define decr(r0, r1)		_decr(_jit, r0, r1)
309 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
310 #  endif
311 #  define cr(code, r0, r1, r2)		_cr(_jit, code, r0, r1, r2)
312 static void
313 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
314 #  define ci(code, r0, r1, i0)		_ci(_jit, code, r0, r1, i0)
315 static void
316 _ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
317 #  define ci0(code, r0, r1)		_ci0(_jit, code, r0, r1)
318 static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
319 #  define ltr(r0, r1, r2)		_ltr(_jit, r0, r1, r2)
320 static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
321 #  define lti(r0, r1, i0)			_lti(_jit, r0, r1, i0)
322 static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
323 #  define ltr_u(r0, r1, r2)		_ltr_u(_jit, r0, r1, r2)
324 static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
325 #  define lti_u(r0, r1, i0)		ci(X86_CC_B, r0, r1, i0)
326 #  define ler(r0, r1, r2)		_ler(_jit, r0, r1, r2)
327 static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
328 #  define lei(r0, r1, i0)		ci(X86_CC_LE, r0, r1, i0)
329 #  define ler_u(r0, r1, r2)		_ler_u(_jit, r0, r1, r2)
330 static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
331 #  define lei_u(r0, r1, i0)		_lei_u(_jit, r0, r1, i0)
332 static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
333 #  define eqr(r0, r1, r2)		_eqr(_jit, r0, r1, r2)
334 static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
335 #  define eqi(r0, r1, i0)		_eqi(_jit, r0, r1, i0)
336 static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
337 #  define ger(r0, r1, r2)		_ger(_jit, r0, r1, r2)
338 static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
339 #  define gei(r0, r1, i0)		_gei(_jit, r0, r1, i0)
340 static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
341 #  define ger_u(r0, r1, r2)		_ger_u(_jit, r0, r1, r2)
342 static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
343 #  define gei_u(r0, r1, i0)		_gei_u(_jit, r0, r1, i0)
344 static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
345 #  define gtr(r0, r1, r2)		_gtr(_jit, r0, r1, r2)
346 static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
347 #  define gti(r0, r1, i0)		_ci(_jit, X86_CC_G, r0, r1, i0)
348 #  define gtr_u(r0, r1, r2)		_gtr_u(_jit, r0, r1, r2)
349 static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
350 #  define gti_u(r0, r1, i0)		_gti_u(_jit, r0, r1, i0)
351 static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
352 #  define ner(r0, r1, r2)		_ner(_jit, r0, r1, r2)
353 static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
354 #  define nei(r0, r1, i0)		_nei(_jit, r0, r1, i0)
355 static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
356 #  define movr(r0, r1)			_movr(_jit, r0, r1)
357 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
358 #  define imovi(r0, i0)			_imovi(_jit, r0, i0)
359 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
360 #  define movi(r0, i0)			_movi(_jit, r0, i0)
361 static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
362 #  define movi_p(r0, i0)		_movi_p(_jit, r0, i0)
363 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
364 #  define movcr(r0, r1)			_movcr(_jit, r0, r1)
365 static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
366 #  define movcr_u(r0, r1)		_movcr_u(_jit, r0, r1)
367 static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
368 #  define movsr(r0, r1)			_movsr(_jit, r0, r1)
369 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
370 #  define movsr_u(r0, r1)		_movsr_u(_jit, r0, r1)
371 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
372 #  if __X64 && !__X64_32
373 #    define movir(r0, r1)		_movir(_jit, r0, r1)
374 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
375 #    define movir_u(r0, r1)		_movir_u(_jit, r0, r1)
376 static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
377 #  endif
378 #  define htonr_us(r0, r1)		_htonr_us(_jit, r0, r1)
379 static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
380 #  define htonr_ui(r0, r1)		_htonr_ui(_jit, r0, r1)
381 static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
382 #  if __X64 && !__X64_32
383 #define htonr_ul(r0, r1)		_htonr_ul(_jit, r0, r1)
384 static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
385 #endif
386 #  define extr_c(r0, r1)		_extr_c(_jit, r0, r1)
387 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
388 #  define extr_uc(r0, r1)		_extr_uc(_jit, r0, r1)
389 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
390 #  define extr_s(r0, r1)		movsr(r0, r1)
391 #  define extr_us(r0, r1)		movsr_u(r0, r1)
392 #  if __X64 && !__X64_32
393 #    define extr_i(r0, r1)		movir(r0, r1)
394 #    define extr_ui(r0, r1)		movir_u(r0, r1)
395 #  endif
396 #  define ldr_c(r0, r1)			_ldr_c(_jit, r0, r1)
397 static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
398 #  define ldi_c(r0, i0)			_ldi_c(_jit, r0, i0)
399 static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
400 #  define ldr_uc(r0, r1)		_ldr_uc(_jit, r0, r1)
401 static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
402 #  define ldi_uc(r0, i0)		_ldi_uc(_jit, r0, i0)
403 static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
404 #  define ldr_s(r0, r1)			_ldr_s(_jit, r0, r1)
405 static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
406 #  define ldi_s(r0, i0)			_ldi_s(_jit, r0, i0)
407 static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
408 #  define ldr_us(r0, r1)		_ldr_us(_jit, r0, r1)
409 static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
410 #  define ldi_us(r0, i0)		_ldi_us(_jit, r0, i0)
411 static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
412 #  if __X32 || !__X64_32
413 #    define ldr_i(r0, r1)		_ldr_i(_jit, r0, r1)
414 static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
415 #    define ldi_i(r0, i0)		_ldi_i(_jit, r0, i0)
416 static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
417 #  endif
418 #  if __X64
419 #    if __X64_32
420 #      define ldr_i(r0, r1)		_ldr_ui(_jit, r0, r1)
421 #      define ldi_i(r0, i0)		_ldi_ui(_jit, r0, i0)
422 #    else
423 #      define ldr_ui(r0, r1)		_ldr_ui(_jit, r0, r1)
424 #      define ldi_ui(r0, i0)		_ldi_ui(_jit, r0, i0)
425 #    endif
426 static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
427 static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
428 #    if !__X64_32
429 #      define ldr_l(r0, r1)		_ldr_l(_jit, r0, r1)
430 static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
431 #      define ldi_l(r0, i0)		_ldi_l(_jit, r0, i0)
432 static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
433 #    endif
434 #  endif
435 #  define ldxr_c(r0, r1, r2)		_ldxr_c(_jit, r0, r1, r2)
436 static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
437 #  define ldxi_c(r0, r1, i0)		_ldxi_c(_jit, r0, r1, i0)
438 static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
439 #  define ldxr_uc(r0, r1, r2)		_ldxr_uc(_jit, r0, r1, r2)
440 static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
441 #  define ldxi_uc(r0, r1, i0)		_ldxi_uc(_jit, r0, r1, i0)
442 static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
443 #  define ldxr_s(r0, r1, r2)		_ldxr_s(_jit, r0, r1, r2)
444 static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
445 #  define ldxi_s(r0, r1, i0)		_ldxi_s(_jit, r0, r1, i0)
446 static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
447 #  define ldxr_us(r0, r1, r2)		_ldxr_us(_jit, r0, r1, r2)
448 static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
449 #  define ldxi_us(r0, r1, i0)		_ldxi_us(_jit, r0, r1, i0)
450 static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
451 #  if __X32 || !__X64_32
452 #    define ldxr_i(r0, r1, r2)		_ldxr_i(_jit, r0, r1, r2)
453 static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
454 #    define ldxi_i(r0, r1, i0)		_ldxi_i(_jit, r0, r1, i0)
455 static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
456 #  endif
457 #  if __X64
458 #    if __X64_32
459 #      define ldxr_i(r0, r1, r2)	_ldxr_ui(_jit, r0, r1, r2)
460 #      define ldxi_i(r0, r1, i0)	_ldxi_ui(_jit, r0, r1, i0)
461 #    else
462 #      define ldxr_ui(r0, r1, r2)	_ldxr_ui(_jit, r0, r1, r2)
463 #      define ldxi_ui(r0, r1, i0)	_ldxi_ui(_jit, r0, r1, i0)
464 #    endif
465 static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
466 static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
467 #    if !__X64_32
468 #      define ldxr_l(r0, r1, r2)	_ldxr_l(_jit, r0, r1, r2)
469 static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
470 #      define ldxi_l(r0, r1, i0)	_ldxi_l(_jit, r0, r1, i0)
471 static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
472 #    endif
473 #  endif
474 #  define str_c(r0, r1)			_str_c(_jit, r0, r1)
475 static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
476 #  define sti_c(i0, r0)			_sti_c(_jit, i0, r0)
477 static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
478 #  define str_s(r0, r1)			_str_s(_jit, r0, r1)
479 static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
480 #  define sti_s(i0, r0)			_sti_s(_jit, i0, r0)
481 static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
482 #  define str_i(r0, r1)			_str_i(_jit, r0, r1)
483 static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
484 #  define sti_i(i0, r0)			_sti_i(_jit, i0, r0)
485 static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
486 #  if __X64 && !__X64_32
487 #    define str_l(r0, r1)		_str_l(_jit, r0, r1)
488 static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
489 #    define sti_l(i0, r0)		_sti_l(_jit, i0, r0)
490 static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
491 #  endif
492 #  define stxr_c(r0, r1, r2)		_stxr_c(_jit, r0, r1, r2)
493 static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
494 #  define stxi_c(i0, r0, r1)		_stxi_c(_jit, i0, r0, r1)
495 static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
496 #  define stxr_s(r0, r1, r2)		_stxr_s(_jit, r0, r1, r2)
497 static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
498 #  define stxi_s(i0, r0, r1)		_stxi_s(_jit, i0, r0, r1)
499 static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
500 #  define stxr_i(r0, r1, r2)		_stxr_i(_jit, r0, r1, r2)
501 static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
502 #  define stxi_i(i0, r0, r1)		_stxi_i(_jit, i0, r0, r1)
503 static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
504 #  if __X64 && !__X64_32
505 #    define stxr_l(r0, r1, r2)		_stxr_l(_jit, r0, r1, r2)
506 static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
507 #    define stxi_l(i0, r0, r1)		_stxi_l(_jit, i0, r0, r1)
508 static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
509 #  endif
510 #  define jcc(code, i0)			_jcc(_jit, code, i0)
511 #  define jo(i0)			jcc(X86_CC_O, i0)
512 #  define jno(i0)			jcc(X86_CC_NO, i0)
513 #  define jnae(i0)			jcc(X86_CC_NAE, i0)
514 #  define jb(i0)			jcc(X86_CC_B, i0)
515 #  define jc(i0)			jcc(X86_CC_C, i0)
516 #  define jae(i0)			jcc(X86_CC_AE, i0)
517 #  define jnb(i0)			jcc(X86_CC_NB, i0)
518 #  define jnc(i0)			jcc(X86_CC_NC, i0)
519 #  define je(i0)			jcc(X86_CC_E, i0)
520 #  define jz(i0)			jcc(X86_CC_Z, i0)
521 #  define jne(i0)			jcc(X86_CC_NE, i0)
522 #  define jnz(i0)			jcc(X86_CC_NZ, i0)
523 #  define jbe(i0)			jcc(X86_CC_BE, i0)
524 #  define jna(i0)			jcc(X86_CC_NA, i0)
525 #  define ja(i0)			jcc(X86_CC_A, i0)
526 #  define jnbe(i0)			jcc(X86_CC_NBE, i0)
527 #  define js(i0)			jcc(X86_CC_S, i0)
528 #  define jns(i0)			jcc(X86_CC_NS, i0)
529 #  define jp(i0)			jcc(X86_CC_P, i0)
530 #  define jpe(i0)			jcc(X86_CC_PE, i0)
531 #  define jnp(i0)			jcc(X86_CC_NP, i0)
532 #  define jpo(i0)			jcc(X86_CC_PO, i0)
533 #  define jl(i0)			jcc(X86_CC_L, i0)
534 #  define jnge(i0)			jcc(X86_CC_NGE, i0)
535 #  define jge(i0)			jcc(X86_CC_GE, i0)
536 #  define jnl(i0)			jcc(X86_CC_NL, i0)
537 #  define jle(i0)			jcc(X86_CC_LE, i0)
538 #  define jng(i0)			jcc(X86_CC_NG, i0)
539 #  define jg(i0)			jcc(X86_CC_G, i0)
540 #  define jnle(i0)			jcc(X86_CC_NLE, i0)
541 static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
542 #  define jccs(code, i0)		_jccs(_jit, code, i0)
543 #  define jos(i0)			jccs(X86_CC_O, i0)
544 #  define jnos(i0)			jccs(X86_CC_NO, i0)
545 #  define jnaes(i0)			jccs(X86_CC_NAE, i0)
546 #  define jbs(i0)			jccs(X86_CC_B, i0)
547 #  define jcs(i0)			jccs(X86_CC_C, i0)
548 #  define jaes(i0)			jccs(X86_CC_AE, i0)
549 #  define jnbs(i0)			jccs(X86_CC_NB, i0)
550 #  define jncs(i0)			jccs(X86_CC_NC, i0)
551 #  define jes(i0)			jccs(X86_CC_E, i0)
552 #  define jzs(i0)			jccs(X86_CC_Z, i0)
553 #  define jnes(i0)			jccs(X86_CC_NE, i0)
554 #  define jnzs(i0)			jccs(X86_CC_NZ, i0)
555 #  define jbes(i0)			jccs(X86_CC_BE, i0)
556 #  define jnas(i0)			jccs(X86_CC_NA, i0)
557 #  define jas(i0)			jccs(X86_CC_A, i0)
558 #  define jnbes(i0)			jccs(X86_CC_NBE, i0)
559 #  define jss(i0)			jccs(X86_CC_S, i0)
560 #  define jnss(i0)			jccs(X86_CC_NS, i0)
561 #  define jps(i0)			jccs(X86_CC_P, i0)
562 #  define jpes(i0)			jccs(X86_CC_PE, i0)
563 #  define jnps(i0)			jccs(X86_CC_NP, i0)
564 #  define jpos(i0)			jccs(X86_CC_PO, i0)
565 #  define jls(i0)			jccs(X86_CC_L, i0)
566 #  define jnges(i0)			jccs(X86_CC_NGE, i0)
567 #  define jges(i0)			jccs(X86_CC_GE, i0)
568 #  define jnls(i0)			jccs(X86_CC_NL, i0)
569 #  define jles(i0)			jccs(X86_CC_LE, i0)
570 #  define jngs(i0)			jccs(X86_CC_NG, i0)
571 #  define jgs(i0)			jccs(X86_CC_G, i0)
572 #  define jnles(i0)			jccs(X86_CC_NLE, i0)
573 static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
574 #  define jcr(code, i0, r0, r1)		_jcr(_jit, code, i0, r0, r1)
575 static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
576 #  define jci(code, i0, r0, i1)		_jci(_jit, code, i0, r0, i1)
577 static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
578 #  define jci0(code, i0, r0)		_jci0(_jit, code, i0, r0)
579 static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
580 #  define bltr(i0, r0, r1)		_bltr(_jit, i0, r0, r1)
581 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
582 #  define blti(i0, r0, i1)		_blti(_jit, i0, r0, i1)
583 static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
584 #  define bltr_u(i0, r0, r1)		_bltr_u(_jit, i0, r0, r1)
585 static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
586 #  define blti_u(i0, r0, i1)		_blti_u(_jit, i0, r0, i1)
587 static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
588 #  define bler(i0, r0, r1)		_bler(_jit, i0, r0, r1)
589 static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
590 #  define blei(i0, r0, i1)		_blei(_jit, i0, r0, i1)
591 static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
592 #  define bler_u(i0, r0, r1)		_bler_u(_jit, i0, r0, r1)
593 static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
594 #  define blei_u(i0, r0, i1)		_blei_u(_jit, i0, r0, i1)
595 static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
596 #  define beqr(i0, r0, r1)		_beqr(_jit, i0, r0, r1)
597 static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
598 #  define beqi(i0, r0, i1)		_beqi(_jit, i0, r0, i1)
599 static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
600 #  define bger(i0, r0, r1)		_bger(_jit, i0, r0, r1)
601 static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
602 #  define bgei(i0, r0, i1)		_bgei(_jit, i0, r0, i1)
603 static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
604 #  define bger_u(i0, r0, r1)		_bger_u(_jit, i0, r0, r1)
605 static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
606 #  define bgei_u(i0, r0, i1)		_bgei_u(_jit, i0, r0, i1)
607 static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
608 #  define bgtr(i0, r0, r1)		_bgtr(_jit, i0, r0, r1)
609 static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
610 #  define bgti(i0, r0, i1)		_bgti(_jit, i0, r0, i1)
611 static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
612 #  define bgtr_u(i0, r0, r1)		_bgtr_u(_jit, i0, r0, r1)
613 static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
614 #  define bgti_u(i0, r0, i1)		_bgti_u(_jit, i0, r0, i1)
615 static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
616 #  define bner(i0, r0, r1)		_bner(_jit, i0, r0, r1)
617 static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
618 #  define bnei(i0, r0, i1)		_bnei(_jit, i0, r0, i1)
619 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
620 #  define bmsr(i0, r0, r1)		_bmsr(_jit, i0, r0, r1)
621 static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
622 #  define bmsi(i0, r0, i1)		_bmsi(_jit, i0, r0, i1)
623 static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
624 #  define bmcr(i0, r0, r1)		_bmcr(_jit, i0, r0, r1)
625 static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
626 #  define bmci(i0, r0, i1)		_bmci(_jit, i0, r0, i1)
627 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
628 #  define boaddr(i0, r0, r1)		_boaddr(_jit, i0, r0, r1)
629 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
630 #  define boaddi(i0, r0, i1)		_boaddi(_jit, i0, r0, i1)
631 static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
632 #  define boaddr_u(i0, r0, r1)		_boaddr_u(_jit, i0, r0, r1)
633 static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
634 #  define boaddi_u(i0, r0, i1)		_boaddi_u(_jit, i0, r0, i1)
635 static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
636 #  define bxaddr(i0, r0, r1)		_bxaddr(_jit, i0, r0, r1)
637 static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
638 #  define bxaddi(i0, r0, i1)		_bxaddi(_jit, i0, r0, i1)
639 static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
640 #  define bxaddr_u(i0, r0, r1)		_bxaddr_u(_jit, i0, r0, r1)
641 static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
642 #  define bxaddi_u(i0, r0, i1)		_bxaddi_u(_jit, i0, r0, i1)
643 static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
644 #  define bosubr(i0, r0, r1)		_bosubr(_jit, i0, r0, r1)
645 static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
646 #  define bosubi(i0, r0, i1)		_bosubi(_jit, i0, r0, i1)
647 static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
648 #  define bosubr_u(i0, r0, r1)		_bosubr_u(_jit, i0, r0, r1)
649 static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
650 #  define bosubi_u(i0, r0, i1)		_bosubi_u(_jit, i0, r0, i1)
651 static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
652 #  define bxsubr(i0, r0, r1)		_bxsubr(_jit, i0, r0, r1)
653 static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
654 #  define bxsubi(i0, r0, i1)		_bxsubi(_jit, i0, r0, i1)
655 static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
656 #  define bxsubr_u(i0, r0, r1)		_bxsubr_u(_jit, i0, r0, r1)
657 static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
658 #  define bxsubi_u(i0, r0, i1)		_bxsubi_u(_jit, i0, r0, i1)
659 static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
660 #  define callr(r0)			_callr(_jit, r0)
661 static void _callr(jit_state_t*, jit_int32_t);
662 #  define calli(i0)			_calli(_jit, i0)
663 static jit_word_t _calli(jit_state_t*, jit_word_t);
664 #  define jmpr(r0)			_jmpr(_jit, r0)
665 static void _jmpr(jit_state_t*, jit_int32_t);
666 #  define jmpi(i0)			_jmpi(_jit, i0)
667 static jit_word_t _jmpi(jit_state_t*, jit_word_t);
668 #  define jmpsi(i0)			_jmpsi(_jit, i0)
669 static void _jmpsi(jit_state_t*, jit_uint8_t);
670 #  define prolog(node)			_prolog(_jit, node)
671 static void _prolog(jit_state_t*, jit_node_t*);
672 #  define epilog(node)			_epilog(_jit, node)
673 static void _epilog(jit_state_t*, jit_node_t*);
674 #  define vastart(r0)			_vastart(_jit, r0)
675 static void _vastart(jit_state_t*, jit_int32_t);
676 #  define vaarg(r0, r1)			_vaarg(_jit, r0, r1)
677 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
678 #  define vaarg_d(r0, r1, i0)		_vaarg_d(_jit, r0, r1, i0)
679 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
680 #  define patch_at(node, instr, label)	_patch_at(_jit, node, instr, label)
681 static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
682 #  if !defined(HAVE_FFSL)
683 #    if __X32
684 #      define ffsl(i)			__builtin_ffs(i)
685 #    else
686 #      define ffsl(l)			__builtin_ffsl(l)
687 #    endif
688 #  endif
689 #endif
690 
691 #if CODE
692 static void
_rex(jit_state_t * _jit,jit_int32_t l,jit_int32_t w,jit_int32_t r,jit_int32_t x,jit_int32_t b)693 _rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
694      jit_int32_t r, jit_int32_t x, jit_int32_t b)
695 {
696 #if __X64
697     jit_int32_t	v = 0x40 | (w << 3);
698 
699     if (r != _NOREG)
700 	v |= (r & 8) >> 1;
701     if (x != _NOREG)
702 	v |= (x & 8) >> 2;
703     if (b != _NOREG)
704 	v |= (b & 8) >> 3;
705     if (l || v != 0x40)
706 	ic(v);
707 #endif
708 }
709 
710 static void
_rx(jit_state_t * _jit,jit_int32_t rd,jit_int32_t md,jit_int32_t rb,jit_int32_t ri,jit_int32_t ms)711 _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
712     jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
713 {
714     if (ri == _NOREG) {
715 	if (rb == _NOREG) {
716 #if __X32
717 	    mrm(0x00, r7(rd), 0x05);
718 #else
719 	    mrm(0x00, r7(rd), 0x04);
720 	    sib(_SCL1, 0x04, 0x05);
721 #endif
722 	    ii(md);
723 	}
724 	else if (r7(rb) == _RSP_REGNO) {
725 	    if (md == 0) {
726 		mrm(0x00, r7(rd), 0x04);
727 		sib(ms, 0x04, 0x04);
728 	    }
729 	    else if ((jit_int8_t)md == md) {
730 		mrm(0x01, r7(rd), 0x04);
731 		sib(ms, 0x04, 0x04);
732 		ic(md);
733 	    }
734 	    else {
735 		mrm(0x02, r7(rd), 0x04);
736 		sib(ms, 0x04, 0x04);
737 		ii(md);
738 	    }
739 	}
740 	else {
741 	    if (md == 0 && r7(rb) != _RBP_REGNO)
742 		mrm(0x00, r7(rd), r7(rb));
743 	    else if ((jit_int8_t)md == md) {
744 		mrm(0x01, r7(rd), r7(rb));
745 		ic(md);
746 	    }
747 	    else {
748 		mrm(0x02, r7(rd), r7(rb));
749 		ii(md);
750 	    }
751 	}
752     }
753     else if (rb == _NOREG) {
754 	mrm(0x00, r7(rd), 0x04);
755 	sib(ms, r7(ri), 0x05);
756 	ii(md);
757     }
758     else if (r8(ri) != _RSP_REGNO) {
759 	if (md == 0 && r7(rb) != _RBP_REGNO) {
760 	    mrm(0x00, r7(rd), 0x04);
761 	    sib(ms, r7(ri), r7(rb));
762 	}
763 	else if ((jit_int8_t)md == md) {
764 	    mrm(0x01, r7(rd), 0x04);
765 	    sib(ms, r7(ri), r7(rb));
766 	    ic(md);
767 	}
768 	else {
769 	    mrm(0x02, r7(rd), 0x04);
770 	    sib(ms, r7(ri), r7(rb));
771 	    ic(md);
772 	}
773     }
774     else {
775 	fprintf(stderr, "illegal index register");
776 	abort();
777     }
778 }
779 
780 static void
_nop(jit_state_t * _jit,jit_int32_t count)781 _nop(jit_state_t *_jit, jit_int32_t count)
782 {
783     switch (count) {
784 	case 0:
785 	    break;
786 	case 1:		/* NOP */
787 	    ic(0x90);	break;
788 	case 2:		/* 66 NOP */
789 	    ic(0x66);	ic(0x90);
790 	    break;
791 	case 3:		/* NOP DWORD ptr [EAX] */
792 	    ic(0x0f);	ic(0x1f);	ic(0x00);
793 	    break;
794 	case 4:		/* NOP DWORD ptr [EAX + 00H] */
795 	    ic(0x0f);	ic(0x1f);	ic(0x40);	ic(0x00);
796 	    break;
797 	case 5:		/* NOP DWORD ptr [EAX + EAX*1 + 00H] */
798 	    ic(0x0f);	ic(0x1f);	ic(0x44);	ic(0x00);
799 	    ic(0x00);
800 	    break;
801 	case 6:		/* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
802 	    ic(0x66);	ic(0x0f);	ic(0x1f);	ic(0x44);
803 	    ic(0x00);	ic(0x00);
804 	    break;
805 	case 7:		/* NOP DWORD ptr [EAX + 00000000H] */
806 	    ic(0x0f);	ic(0x1f);	ic(0x80);	ii(0x0000);
807 	    break;
808 	case 8:		/* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
809 	    ic(0x0f);	ic(0x1f);	ic(0x84);	ic(0x00);
810 	    ii(0x0000);
811 	    break;
812 	case 9:		/* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
813 	    ic(0x66);	ic(0x0f);	ic(0x1f);	ic(0x84);
814 	    ic(0x00);	ii(0x0000);
815 	    break;
816 	default:
817 	    abort();
818     }
819 }
820 
821 static void
_lea(jit_state_t * _jit,jit_int32_t md,jit_int32_t rb,jit_int32_t ri,jit_int32_t ms,jit_int32_t rd)822 _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
823      jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
824 {
825     rex(0, WIDE, rd, ri, rb);
826     ic(0x8d);
827     rx(rd, md, rb, ri, ms);
828 }
829 
830 static void
_pushr(jit_state_t * _jit,jit_int32_t r0)831 _pushr(jit_state_t *_jit, jit_int32_t r0)
832 {
833     rex(0, WIDE, 0, 0, r0);
834     ic(0x50 | r7(r0));
835 }
836 
837 static void
_popr(jit_state_t * _jit,jit_int32_t r0)838 _popr(jit_state_t *_jit, jit_int32_t r0)
839 {
840     rex(0, WIDE, 0, 0, r0);
841     ic(0x58 | r7(r0));
842 }
843 
844 static void
_xchgr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)845 _xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
846 {
847     rex(0, WIDE, r1, _NOREG, r0);
848     ic(0x87);
849     mrm(0x03, r7(r1), r7(r0));
850 }
851 
852 static void
_testr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)853 _testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
854 {
855     rex(0, WIDE, r1, _NOREG, r0);
856     ic(0x85);
857     mrm(0x03, r7(r1), r7(r0));
858 }
859 
860 static void
_testi(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)861 _testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
862 {
863     rex(0, WIDE, _NOREG, _NOREG, r0);
864     if (r0 == _RAX_REGNO)
865 	ic(0xa9);
866     else {
867 	ic(0xf7);
868 	mrm(0x03, 0x00, r7(r0));
869     }
870     ii(i0);
871 }
872 
873 static void
_cc(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0)874 _cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
875 {
876     rex(0, 0, _NOREG, _NOREG, r0);
877     ic(0x0f);
878     ic(0x90 | code);
879     mrm(0x03, 0x00, r7(r0));
880 }
881 
882 static void
_alur(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1)883 _alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
884 {
885     rex(0, WIDE, r1, _NOREG, r0);
886     ic(code | 0x01);
887     mrm(0x03, r7(r1), r7(r0));
888 }
889 
890 static void
_alui(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_word_t i0)891 _alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
892 {
893     jit_int32_t		reg;
894     if (can_sign_extend_int_p(i0)) {
895 	rex(0, WIDE, _NOREG, _NOREG, r0);
896 	if ((jit_int8_t)i0 == i0) {
897 	    ic(0x83);
898 	    ic(0xc0 | code | r7(r0));
899 	    ic(i0);
900 	}
901 	else {
902 	    if (r0 == _RAX_REGNO)
903 		ic(code | 0x05);
904 	    else {
905 		ic(0x81);
906 		ic(0xc0 | code | r7(r0));
907 	    }
908 	    ii(i0);
909 	}
910     }
911     else {
912 	reg = jit_get_reg(jit_class_gpr);
913 	movi(rn(reg), i0);
914 	alur(code, r0, rn(reg));
915 	jit_unget_reg(reg);
916     }
917 }
918 
919 static void
_save(jit_state_t * _jit,jit_int32_t r0)920 _save(jit_state_t *_jit, jit_int32_t r0)
921 {
922     if (!_jitc->function->regoff[r0]) {
923 	_jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
924 	_jitc->again = 1;
925     }
926     assert(!jit_regset_tstbit(&_jitc->regsav, r0));
927     jit_regset_setbit(&_jitc->regsav, r0);
928     stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
929 }
930 
931 static void
_load(jit_state_t * _jit,jit_int32_t r0)932 _load(jit_state_t *_jit, jit_int32_t r0)
933 {
934     assert(_jitc->function->regoff[r0]);
935     assert(jit_regset_tstbit(&_jitc->regsav, r0));
936     jit_regset_clrbit(&_jitc->regsav, r0);
937     ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
938 }
939 
940 static void
_addr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)941 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
942 {
943     if (r0 == r1)
944 	iaddr(r0, r2);
945     else if (r0 == r2)
946 	iaddr(r0, r1);
947     else
948 	lea(0, r1, r2, _SCL1, r0);
949 }
950 
951 static void
_addi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)952 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
953 {
954     jit_int32_t		reg;
955     if (i0 == 0)
956 	movr(r0, r1);
957 #if USE_INC_DEC
958     else if (i0 == 1)
959 	incr(r0, r1);
960     else if (i0 == -1)
961 	decr(r0, r1);
962 #endif
963     else if (can_sign_extend_int_p(i0)) {
964 	if (r0 == r1)
965 	    iaddi(r0, i0);
966 	else
967 	    lea(i0, r1, _NOREG, _SCL1, r0);
968     }
969     else if (r0 != r1) {
970 	movi(r0, i0);
971 	iaddr(r0, r1);
972     }
973     else {
974 	reg = jit_get_reg(jit_class_gpr);
975 	movi(rn(reg), i0);
976 	iaddr(r0, rn(reg));
977 	jit_unget_reg(reg);
978     }
979 }
980 
981 static void
_addcr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)982 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
983 {
984     if (r0 == r2)
985 	iaddr(r0, r1);
986     else {
987 	movr(r0, r1);
988 	iaddr(r0, r2);
989     }
990 }
991 
992 static void
_addci(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)993 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
994 {
995     jit_int32_t		reg;
996     if (can_sign_extend_int_p(i0)) {
997 	movr(r0, r1);
998 	iaddi(r0, i0);
999     }
1000     else if (r0 == r1) {
1001 	reg = jit_get_reg(jit_class_gpr);
1002 	movi(rn(reg), i0);
1003 	iaddr(r0, rn(reg));
1004 	jit_unget_reg(reg);
1005     }
1006     else {
1007 	movi(r0, i0);
1008 	iaddr(r0, r1);
1009     }
1010 }
1011 
1012 static void
_addxr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1013 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1014 {
1015     if (r0 == r2)
1016 	iaddxr(r0, r1);
1017     else {
1018 	movr(r0, r1);
1019 	iaddxr(r0, r2);
1020     }
1021 }
1022 
1023 static void
_addxi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1024 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1025 {
1026     jit_int32_t		reg;
1027     if (can_sign_extend_int_p(i0)) {
1028 	movr(r0, r1);
1029 	iaddxi(r0, i0);
1030     }
1031     else if (r0 == r1) {
1032 	reg = jit_get_reg(jit_class_gpr);
1033 	movi(rn(reg), i0);
1034 	iaddxr(r0, rn(reg));
1035 	jit_unget_reg(reg);
1036     }
1037     else {
1038 	movi(r0, i0);
1039 	iaddxr(r0, r1);
1040     }
1041 }
1042 
1043 static void
_subr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1044 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1045 {
1046     if (r1 == r2)
1047 	ixorr(r0, r0);
1048     else if (r0 == r2) {
1049 	isubr(r0, r1);
1050 	inegr(r0);
1051     }
1052     else {
1053 	movr(r0, r1);
1054 	isubr(r0, r2);
1055     }
1056 }
1057 
1058 static void
_subi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1059 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1060 {
1061     jit_int32_t		reg;
1062     if (i0 == 0)
1063 	movr(r0, r1);
1064 #if USE_INC_DEC
1065     else if (i0 == 1)
1066 	decr(r0, r1);
1067     else if (i0 == -1)
1068 	incr(r0, r1);
1069 #endif
1070     else if (can_sign_extend_int_p(i0)) {
1071 	if (r0 == r1)
1072 	    isubi(r0, i0);
1073 	else
1074 	    lea(-i0, r1, _NOREG, _SCL1, r0);
1075     }
1076     else if (r0 != r1) {
1077 	movi(r0, -i0);
1078 	iaddr(r0, r1);
1079     }
1080     else {
1081 	reg = jit_get_reg(jit_class_gpr);
1082 	movi(rn(reg), i0);
1083 	isubr(r0, rn(reg));
1084 	jit_unget_reg(reg);
1085     }
1086 }
1087 
1088 static void
_subcr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1089 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1090 {
1091     jit_int32_t		reg;
1092     if (r0 == r2 && r0 != r1) {
1093 	reg = jit_get_reg(jit_class_gpr);
1094 	movr(rn(reg), r0);
1095 	movr(r0, r1);
1096 	isubr(r0, rn(reg));
1097 	jit_unget_reg(reg);
1098     }
1099     else {
1100 	movr(r0, r1);
1101 	isubr(r0, r2);
1102     }
1103 }
1104 
1105 static void
_subci(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1106 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1107 {
1108     jit_int32_t		reg;
1109     movr(r0, r1);
1110     if (can_sign_extend_int_p(i0))
1111 	isubi(r0, i0);
1112     else {
1113 	reg = jit_get_reg(jit_class_gpr);
1114 	movi(rn(reg), i0);
1115 	isubr(r0, rn(reg));
1116 	jit_unget_reg(reg);
1117     }
1118 }
1119 
1120 static void
_subxr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1121 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1122 {
1123     jit_int32_t		reg;
1124     if (r0 == r2 && r0 != r1) {
1125 	reg = jit_get_reg(jit_class_gpr);
1126 	movr(rn(reg), r0);
1127 	movr(r0, r1);
1128 	isubxr(r0, rn(reg));
1129 	jit_unget_reg(reg);
1130     }
1131     else {
1132 	movr(r0, r1);
1133 	isubxr(r0, r2);
1134     }
1135 }
1136 
1137 static void
_subxi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1138 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1139 {
1140     jit_int32_t		reg;
1141     movr(r0, r1);
1142     if (can_sign_extend_int_p(i0))
1143 	isubxi(r0, i0);
1144     else {
1145 	reg = jit_get_reg(jit_class_gpr);
1146 	imovi(rn(reg), i0);
1147 	isubxr(r0, rn(reg));
1148 	jit_unget_reg(reg);
1149     }
1150 }
1151 
1152 static void
_rsbi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1153 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1154 {
1155     subi(r0, r1, i0);
1156     negr(r0, r0);
1157 }
1158 
1159 static void
_imulr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1160 _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1161 {
1162     rex(0, WIDE, r0, _NOREG, r1);
1163     ic(0x0f);
1164     ic(0xaf);
1165     mrm(0x03, r7(r0), r7(r1));
1166 }
1167 
1168 static void
_imuli(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1169 _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1170 {
1171     jit_int32_t		reg;
1172     if (can_sign_extend_int_p(i0)) {
1173 	rex(0, WIDE, r0, _NOREG, r1);
1174 	if ((jit_int8_t)i0 == i0) {
1175 	    ic(0x6b);
1176 	    mrm(0x03, r7(r0), r7(r1));
1177 	    ic(i0);
1178 	}
1179 	else {
1180 	    ic(0x69);
1181 	    mrm(0x03, r7(r0), r7(r1));
1182 	    ii(i0);
1183 	}
1184     }
1185     else {
1186 	reg = jit_get_reg(jit_class_gpr);
1187 	movi(rn(reg), i0);
1188 	imulr(r0, rn(reg));
1189 	jit_unget_reg(reg);
1190     }
1191 }
1192 
1193 static void
_mulr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1194 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1195 {
1196     if (r0 == r1)
1197 	imulr(r0, r2);
1198     else if (r0 == r2)
1199 	imulr(r0, r1);
1200     else {
1201 	movr(r0, r1);
1202 	imulr(r0, r2);
1203     }
1204 }
1205 
1206 static void
_muli(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1207 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1208 {
1209     switch (i0) {
1210 	case 0:
1211 	    ixorr(r0, r0);
1212 	    break;
1213 	case 1:
1214 	    movr(r0, r1);
1215 	    break;
1216 	case -1:
1217 	    negr(r0, r1);
1218 	    break;
1219 	case 2:
1220 	    lea(0, _NOREG, r1, _SCL2, r0);
1221 	    break;
1222 	case 4:
1223 	    lea(0, _NOREG, r1, _SCL4, r0);
1224 	    break;
1225 	case 8:
1226 	    lea(0, _NOREG, r1, _SCL8, r0);
1227 	    break;
1228 	default:
1229 	    if (i0 > 0 && !(i0 & (i0 - 1)))
1230 		lshi(r0, r1, ffsl(i0) - 1);
1231 	    else if (can_sign_extend_int_p(i0))
1232 		imuli(r0, r1, i0);
1233 	    else if (r0 != r1) {
1234 		movi(r0, i0);
1235 		imulr(r0, r1);
1236 	    }
1237 	    else
1238 		imuli(r0, r0, i0);
1239 	    break;
1240     }
1241 }
1242 
1243 #define savset(rn)							\
1244     if (r0 != rn) {							\
1245 	sav |= 1 << rn;							\
1246 	if (r1 != rn && r2 != rn)					\
1247 	    set |= 1 << rn;						\
1248     }
1249 #define isavset(rn)							\
1250     if (r0 != rn) {							\
1251 	sav |= 1 << rn;							\
1252 	if (r1 != rn)							\
1253 	    set |= 1 << rn;						\
1254     }
1255 #define qsavset(rn)							\
1256     if (r0 != rn && r1 != rn) {						\
1257 	sav |= 1 << rn;							\
1258 	if (r2 != rn && r3 != rn)					\
1259 	    set |= 1 << rn;						\
1260     }
1261 #define allocr(rn, rv)							\
1262     if (set & (1 << rn))						\
1263 	(void)jit_get_reg(rv|jit_class_gpr|jit_class_named);		\
1264     if (sav & (1 << rn)) {						\
1265 	if ( jit_regset_tstbit(&_jitc->regsav, rv) ||			\
1266 	    !jit_regset_tstbit(&_jitc->reglive, rv))			\
1267 	    sav &= ~(1 << rn);						\
1268 	else								\
1269 	    save(rv);							\
1270     }
1271 #define clear(rn, rv)							\
1272     if (set & (1 << rn))						\
1273 	jit_unget_reg(rv);						\
1274     if (sav & (1 << rn))						\
1275 	load(rv);
1276 static void
_iqmulr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_int32_t r3,jit_bool_t sign)1277 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1278 	jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1279 {
1280     jit_int32_t		mul;
1281     jit_int32_t		sav;
1282     jit_int32_t		set;
1283 
1284     sav = set = 0;
1285     qsavset(_RDX_REGNO);
1286     qsavset(_RAX_REGNO);
1287     allocr(_RDX_REGNO, _RDX);
1288     allocr(_RAX_REGNO, _RAX);
1289 
1290     if (r3 == _RAX_REGNO)
1291 	mul = r2;
1292     else {
1293 	mul = r3;
1294 	movr(_RAX_REGNO, r2);
1295     }
1296     if (sign)
1297 	umulr(mul);
1298     else
1299 	umulr_u(mul);
1300 
1301     if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1302 	xchgr(_RAX_REGNO, _RDX_REGNO);
1303     else {
1304 	if (r0 != _RDX_REGNO)
1305 	    movr(r0, _RAX_REGNO);
1306 	movr(r1, _RDX_REGNO);
1307 	if (r0 == _RDX_REGNO)
1308 	    movr(r0, _RAX_REGNO);
1309     }
1310 
1311     clear(_RDX_REGNO, _RDX);
1312     clear(_RAX_REGNO, _RAX);
1313 }
1314 
1315 static void
_iqmuli(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0,jit_bool_t sign)1316 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1317 	jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1318 {
1319     jit_int32_t		reg;
1320 
1321     if (i0 == 0) {
1322 	ixorr(r0, r0);
1323 	ixorr(r1, r1);
1324     }
1325     else {
1326 	reg = jit_get_reg(jit_class_gpr);
1327 	movi(rn(reg), i0);
1328 	if (sign)
1329 	    qmulr(r0, r1, r2, rn(reg));
1330 	else
1331 	    qmulr_u(r0, r1, r2, rn(reg));
1332 	jit_unget_reg(reg);
1333     }
1334 }
1335 
1336 static void
_sign_extend_rdx_rax(jit_state_t * _jit)1337 _sign_extend_rdx_rax(jit_state_t *_jit)
1338 {
1339     rex(0, WIDE, 0, 0, 0);
1340     ic(0x99);
1341 }
1342 
1343 static void
_divremr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_bool_t sign,jit_bool_t divide)1344 _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
1345 	 jit_bool_t sign, jit_bool_t divide)
1346 {
1347     jit_int32_t		div;
1348     jit_int32_t		reg;
1349     jit_int32_t		set;
1350     jit_int32_t		sav;
1351     jit_int32_t		use;
1352 
1353     sav = set = use = 0;
1354     savset(_RDX_REGNO);
1355     savset(_RAX_REGNO);
1356     allocr(_RDX_REGNO, _RDX);
1357     allocr(_RAX_REGNO, _RAX);
1358 
1359     if (r2 == _RAX_REGNO) {
1360 	if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1361 	    if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1362 		reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1363 				  jit_class_gpr|jit_class_named);
1364 	    use = 1;
1365 	    div = rn(reg);
1366 	    movr(div, _RAX_REGNO);
1367 	    if (r1 != _RAX_REGNO)
1368 		movr(_RAX_REGNO, r1);
1369 	}
1370 	else {
1371 	    if (r0 == r1)
1372 		xchgr(r0, _RAX_REGNO);
1373 	    else {
1374 		if (r0 != _RAX_REGNO)
1375 		    movr(r0, _RAX_REGNO);
1376 		if (r1 != _RAX_REGNO)
1377 		    movr(_RAX_REGNO, r1);
1378 	    }
1379 	    div = r0;
1380 	}
1381     }
1382     else if (r2 == _RDX_REGNO) {
1383 	if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1384 	    if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1385 		reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1386 				  jit_class_gpr|jit_class_named);
1387 	    use = 1;
1388 	    div = rn(reg);
1389 	    movr(div, _RDX_REGNO);
1390 	    if (r1 != _RAX_REGNO)
1391 		movr(_RAX_REGNO, r1);
1392 	}
1393 	else {
1394 	    if (r1 != _RAX_REGNO)
1395 		movr(_RAX_REGNO, r1);
1396 	    movr(r0, _RDX_REGNO);
1397 	    div = r0;
1398 	}
1399     }
1400     else {
1401 	if (r1 != _RAX_REGNO)
1402 	    movr(_RAX_REGNO, r1);
1403 	div = r2;
1404     }
1405 
1406     if (sign) {
1407 	sign_extend_rdx_rax();
1408 	idivr(div);
1409     }
1410     else {
1411 	ixorr(_RDX_REGNO, _RDX_REGNO);
1412 	idivr_u(div);
1413     }
1414 
1415     if (use)
1416 	jit_unget_reg(reg);
1417 
1418     if (divide)
1419 	movr(r0, _RAX_REGNO);
1420     else
1421 	movr(r0, _RDX_REGNO);
1422 
1423     clear(_RDX_REGNO, _RDX);
1424     clear(_RAX_REGNO, _RAX);
1425 }
1426 
1427 static void
_divremi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0,jit_bool_t sign,jit_bool_t divide)1428 _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
1429 	 jit_bool_t sign, jit_bool_t divide)
1430 {
1431     jit_int32_t		reg;
1432     jit_int32_t		div;
1433     jit_int32_t		sav;
1434     jit_int32_t		set;
1435     jit_int32_t		use;
1436 
1437     if (divide) {
1438 	switch (i0) {
1439 	    case 1:
1440 		movr(r0, r1);
1441 		return;
1442 	    case -1:
1443 		if (sign) {
1444 		    negr(r0, r1);
1445 		    return;
1446 		}
1447 		break;
1448 	    default:
1449 		if (i0 > 0 && !(i0 & (i0 - 1))) {
1450 		    movr(r0, r1);
1451 		    if (sign)
1452 			rshi(r0, r0, ffsl(i0) - 1);
1453 		    else
1454 			rshi_u(r0, r0, ffsl(i0) - 1);
1455 		    return;
1456 		}
1457 		break;
1458 	}
1459     }
1460     else if (i0 == 1 || (sign && i0 == -1)) {
1461 	ixorr(r0, r0);
1462 	return;
1463     }
1464     else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
1465 	if (can_sign_extend_int_p(i0)) {
1466 	    movr(r0, r1);
1467 	    iandi(r0, i0 - 1);
1468 	}
1469 	else if (r0 != r1) {
1470 	    movi(r0, i0 - 1);
1471 	    iandr(r0, r1);
1472 	}
1473 	else {
1474 	    reg = jit_get_reg(jit_class_gpr);
1475 	    movi(rn(reg), i0 - 1);
1476 	    iandr(r0, rn(reg));
1477 	    jit_unget_reg(reg);
1478 	}
1479 	return;
1480     }
1481 
1482     sav = set = use = 0;
1483     isavset(_RDX_REGNO);
1484     isavset(_RAX_REGNO);
1485     allocr(_RDX_REGNO, _RDX);
1486     allocr(_RAX_REGNO, _RAX);
1487 
1488     if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
1489 	if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1490 	    reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1491 			      jit_class_gpr|jit_class_named);
1492 	use = 1;
1493 	div = rn(reg);
1494     }
1495     else
1496 	div = r0;
1497 
1498     movi(div, i0);
1499     movr(_RAX_REGNO, r1);
1500 
1501     if (sign) {
1502 	sign_extend_rdx_rax();
1503 	idivr(div);
1504     }
1505     else {
1506 	ixorr(_RDX_REGNO, _RDX_REGNO);
1507 	idivr_u(div);
1508     }
1509 
1510     if (use)
1511 	jit_unget_reg(reg);
1512 
1513     if (divide)
1514 	movr(r0, _RAX_REGNO);
1515     else
1516 	movr(r0, _RDX_REGNO);
1517 
1518     clear(_RDX_REGNO, _RDX);
1519     clear(_RAX_REGNO, _RAX);
1520 }
1521 
1522 static void
_iqdivr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_int32_t r3,jit_bool_t sign)1523 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1524 	jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1525 {
1526     jit_int32_t		div;
1527     jit_int32_t		reg;
1528     jit_int32_t		sav;
1529     jit_int32_t		set;
1530     jit_int32_t		use;
1531 
1532     sav = set = use = 0;
1533     qsavset(_RDX_REGNO);
1534     qsavset(_RAX_REGNO);
1535     allocr(_RDX_REGNO, _RDX);
1536     allocr(_RAX_REGNO, _RAX);
1537     if (r3 == _RAX_REGNO) {
1538 	if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1539 	    if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1540 		reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1541 				  jit_class_gpr|jit_class_named);
1542 	    use = 1;
1543 	    div = rn(reg);
1544 	    movr(div, _RAX_REGNO);
1545 	    if (r2 != _RAX_REGNO)
1546 		movr(_RAX_REGNO, r2);
1547 	}
1548 	else {
1549 	    if (r0 == r2)
1550 		xchgr(r0, _RAX_REGNO);
1551 	    else {
1552 		if (r0 != _RAX_REGNO)
1553 		    movr(r0, _RAX_REGNO);
1554 		if (r2 != _RAX_REGNO)
1555 		    movr(_RAX_REGNO, r2);
1556 	    }
1557 	    div = r0;
1558 	}
1559     }
1560     else if (r3 == _RDX_REGNO) {
1561 	if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1562 	    if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1563 		reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1564 				  jit_class_gpr|jit_class_named);
1565 	    use = 1;
1566 	    div = rn(reg);
1567 	    movr(div, _RDX_REGNO);
1568 	    if (r2 != _RAX_REGNO)
1569 		movr(_RAX_REGNO, r2);
1570 	}
1571 	else {
1572 	    if (r2 != _RAX_REGNO)
1573 		movr(_RAX_REGNO, r2);
1574 	    movr(r0, _RDX_REGNO);
1575 	    div = r0;
1576 	}
1577     }
1578     else {
1579 	if (r2 != _RAX_REGNO)
1580 	    movr(_RAX_REGNO, r2);
1581 	div = r3;
1582     }
1583     if (sign) {
1584 	sign_extend_rdx_rax();
1585 	idivr(div);
1586     }
1587     else {
1588 	ixorr(_RDX_REGNO, _RDX_REGNO);
1589 	idivr_u(div);
1590     }
1591     if (use)
1592 	jit_unget_reg(reg);
1593 
1594     if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1595 	xchgr(_RAX_REGNO, _RDX_REGNO);
1596     else {
1597 	if (r0 != _RDX_REGNO)
1598 	    movr(r0, _RAX_REGNO);
1599 	movr(r1, _RDX_REGNO);
1600 	if (r0 == _RDX_REGNO)
1601 	    movr(r0, _RAX_REGNO);
1602     }
1603 
1604     clear(_RDX_REGNO, _RDX);
1605     clear(_RAX_REGNO, _RAX);
1606 }
1607 
1608 static void
_iqdivi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0,jit_bool_t sign)1609 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1610 	jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1611 {
1612     jit_int32_t		reg;
1613 
1614     reg = jit_get_reg(jit_class_gpr);
1615     movi(rn(reg), i0);
1616     if (sign)
1617 	qdivr(r0, r1, r2, rn(reg));
1618     else
1619 	qdivr_u(r0, r1, r2, rn(reg));
1620     jit_unget_reg(reg);
1621 }
1622 #undef clear
1623 #undef allocr
1624 #undef savset
1625 
1626 static void
_andr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1627 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1628 {
1629     if (r1 == r2)
1630 	movr(r0, r1);
1631     else if (r0 == r1)
1632 	iandr(r0, r2);
1633     else if (r0 == r2)
1634 	iandr(r0, r1);
1635     else {
1636 	movr(r0, r1);
1637 	iandr(r0, r2);
1638     }
1639 }
1640 
1641 static void
_andi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1642 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1643 {
1644     jit_int32_t		reg;
1645 
1646     if (i0 == 0)
1647 	ixorr(r0, r0);
1648     else if (i0 == -1)
1649 	movr(r0, r1);
1650     else if (r0 == r1) {
1651 	if (can_sign_extend_int_p(i0))
1652 	    iandi(r0, i0);
1653 	else {
1654 	    reg = jit_get_reg(jit_class_gpr);
1655 	    movi(rn(reg), i0);
1656 	    iandr(r0, rn(reg));
1657 	    jit_unget_reg(reg);
1658 	}
1659     }
1660     else {
1661 	movi(r0, i0);
1662 	iandr(r0, r1);
1663     }
1664 }
1665 
1666 static void
_orr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1667 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1668 {
1669     if (r1 == r2)
1670 	movr(r0, r1);
1671     else if (r0 == r1)
1672 	iorr(r0, r2);
1673     else if (r0 == r2)
1674 	iorr(r0, r1);
1675     else {
1676 	movr(r0, r1);
1677 	iorr(r0, r2);
1678     }
1679 }
1680 
1681 static void
_ori(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1682 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1683 {
1684     jit_int32_t		reg;
1685     if (i0 == 0)
1686 	movr(r0, r1);
1687     else if (i0 == -1)
1688 	movi(r0, -1);
1689     else if (can_sign_extend_int_p(i0)) {
1690 	movr(r0, r1);
1691 	iori(r0, i0);
1692     }
1693     else if (r0 != r1) {
1694 	movi(r0, i0);
1695 	iorr(r0, r1);
1696     }
1697     else {
1698 	reg = jit_get_reg(jit_class_gpr);
1699 	movi(rn(reg), i0);
1700 	iorr(r0, rn(reg));
1701 	jit_unget_reg(reg);
1702     }
1703 }
1704 
1705 static void
_xorr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1706 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1707 {
1708     if (r1 == r2)
1709 	ixorr(r0, r0);
1710     else if (r0 == r1)
1711 	ixorr(r0, r2);
1712     else if (r0 == r2)
1713 	ixorr(r0, r1);
1714     else {
1715 	movr(r0, r1);
1716 	ixorr(r0, r2);
1717     }
1718 }
1719 
1720 static void
_xori(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1721 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1722 {
1723     jit_int32_t		reg;
1724     if (i0 == 0)
1725 	movr(r0, r1);
1726     else if (i0 == -1)
1727 	comr(r0, r1);
1728     else if (can_sign_extend_int_p(i0)) {
1729 	movr(r0, r1);
1730 	ixori(r0, i0);
1731     }
1732     else if (r0 != r1) {
1733 	movi(r0, i0);
1734 	ixorr(r0, r1);
1735     }
1736     else {
1737 	reg = jit_get_reg(jit_class_gpr);
1738 	movi(rn(reg), i0);
1739 	ixorr(r0, rn(reg));
1740 	jit_unget_reg(reg);
1741     }
1742 }
1743 
1744 static void
_irotshr(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0)1745 _irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1746 {
1747     rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
1748     ic(0xd3);
1749     mrm(0x03, code, r7(r0));
1750 }
1751 
1752 static void
_rotshr(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1753 _rotshr(jit_state_t *_jit, jit_int32_t code,
1754 	jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1755 {
1756     jit_int32_t		reg;
1757     jit_int32_t		use;
1758 
1759     if (r0 == _RCX_REGNO) {
1760 	reg = jit_get_reg(jit_class_gpr);
1761 	movr(rn(reg), r1);
1762 	if (r2 != _RCX_REGNO)
1763 	    movr(_RCX_REGNO, r2);
1764 	irotshr(code, rn(reg));
1765 	movr(_RCX_REGNO, rn(reg));
1766 	jit_unget_reg(reg);
1767     }
1768     else if (r2 != _RCX_REGNO) {
1769 	use = !jit_reg_free_p(_RCX);
1770 	if (use) {
1771 	    reg = jit_get_reg(jit_class_gpr);
1772 	    movr(rn(reg), _RCX_REGNO);
1773 	}
1774 	else
1775 	    reg = 0;
1776 	if (r1 == _RCX_REGNO) {
1777 	    if (r0 == r2)
1778 		xchgr(r0, _RCX_REGNO);
1779 	    else {
1780 		movr(r0, r1);
1781 		movr(_RCX_REGNO, r2);
1782 	    }
1783 	}
1784 	else {
1785 	    movr(_RCX_REGNO, r2);
1786 	    movr(r0, r1);
1787 	}
1788 	irotshr(code, r0);
1789 	if (use) {
1790 	    movr(_RCX_REGNO, rn(reg));
1791 	    jit_unget_reg(reg);
1792 	}
1793     }
1794     else {
1795 	movr(r0, r1);
1796 	irotshr(code, r0);
1797     }
1798 }
1799 
1800 static void
_irotshi(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_word_t i0)1801 _irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
1802 {
1803     rex(0, WIDE, _NOREG, _NOREG, r0);
1804     if (i0 == 1) {
1805 	ic(0xd1);
1806 	mrm(0x03, code, r7(r0));
1807     }
1808     else {
1809 	ic(0xc1);
1810 	mrm(0x03, code, r7(r0));
1811 	ic(i0);
1812     }
1813 }
1814 
1815 static void
_rotshi(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1816 _rotshi(jit_state_t *_jit, jit_int32_t code,
1817 	jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1818 {
1819     movr(r0, r1);
1820     if (i0)
1821 	irotshi(code, r0, i0);
1822 }
1823 
1824 static void
_lshi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1825 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1826 {
1827     if (i0 == 0)
1828 	movr(r0, r1);
1829     else if (i0 <= 3)
1830 	lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
1831     else
1832 	rotshi(X86_SHL, r0, r1, i0);
1833 }
1834 
1835 static void
_unr(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0)1836 _unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1837 {
1838     rex(0, WIDE, _NOREG, _NOREG, r0);
1839     ic(0xf7);
1840     mrm(0x03, code, r7(r0));
1841 }
1842 
1843 static void
_negr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1844 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1845 {
1846     if (r0 == r1)
1847 	inegr(r0);
1848     else {
1849 	ixorr(r0, r0);
1850 	isubr(r0, r1);
1851     }
1852 }
1853 
1854 static void
_comr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1855 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1856 {
1857     movr(r0, r1);
1858     icomr(r0);
1859 }
1860 
1861 #if USE_INC_DEC
1862 static void
_incr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1863 _incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1864 {
1865     movr(r0, r1);
1866 #  if __X64
1867     rex(0, WIDE, _NOREG, _NOREG, r0);
1868     ic(0xff);
1869     ic(0xc0 | r7(r0));
1870 #  else
1871     ic(0x40 | r7(r0));
1872 #  endif
1873 }
1874 
1875 static void
_decr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1876 _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1877 {
1878     movr(r0, r1);
1879 #  if __X64
1880     rex(0, WIDE, _NOREG, _NOREG, r0);
1881     ic(0xff);
1882     ic(0xc8 | r7(r0));
1883 #  else
1884     ic(0x48 | r7(r0));
1885 #  endif
1886 }
1887 #endif
1888 
1889 static void
_cr(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1890 _cr(jit_state_t *_jit,
1891     jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1892 {
1893     jit_int32_t		reg;
1894     jit_bool_t		same;
1895     if (reg8_p(r0)) {
1896 	same = r0 == r1 || r0 == r2;
1897 	if (!same)
1898 	    ixorr(r0, r0);
1899 	icmpr(r1, r2);
1900 	if (same)
1901 	    imovi(r0, 0);
1902 	cc(code, r0);
1903     }
1904     else {
1905 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
1906 	ixorr(rn(reg), rn(reg));
1907 	icmpr(r1, r2);
1908 	cc(code, rn(reg));
1909 	movr(r0, rn(reg));
1910 	jit_unget_reg(reg);
1911     }
1912 }
1913 
1914 static void
_ci(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1915 _ci(jit_state_t *_jit,
1916     jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1917 {
1918     jit_int32_t		reg;
1919     jit_bool_t		same;
1920     if (reg8_p(r0)) {
1921 	same = r0 == r1;
1922 	if (!same)
1923 	    ixorr(r0, r0);
1924 	icmpi(r1, i0);
1925 	if (same)
1926 	    imovi(r0, 0);
1927 	cc(code, r0);
1928     }
1929     else {
1930 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
1931 	ixorr(rn(reg), rn(reg));
1932 	icmpi(r1, i0);
1933 	cc(code, rn(reg));
1934 	movr(r0, rn(reg));
1935 	jit_unget_reg(reg);
1936     }
1937 }
1938 
1939 static void
_ci0(jit_state_t * _jit,jit_int32_t code,jit_int32_t r0,jit_int32_t r1)1940 _ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
1941 {
1942     jit_int32_t		reg;
1943     jit_bool_t		same;
1944     if (reg8_p(r0)) {
1945 	same = r0 == r1;
1946 	if (!same)
1947 	    ixorr(r0, r0);
1948 	testr(r1, r1);
1949 	if (same)
1950 	    imovi(r0, 0);
1951 	cc(code, r0);
1952     }
1953     else {
1954 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
1955 	ixorr(rn(reg), rn(reg));
1956 	testr(r1, r1);
1957 	cc(code, rn(reg));
1958 	movr(r0, rn(reg));
1959 	jit_unget_reg(reg);
1960     }
1961 }
1962 
1963 static void
_ltr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1964 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1965 {
1966     if (r1 == r2)
1967 	movi(r0, 0);
1968     else
1969 	cr(X86_CC_L, r0, r1, r2);
1970 }
1971 
1972 static void
_lti(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1973 _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1974 {
1975     if (i0)
1976 	ci(X86_CC_L, r0, r1, i0);
1977     else
1978 	ci0(X86_CC_S, r0, r1);
1979 }
1980 
1981 static void
_ltr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1982 _ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1983 {
1984     if (r1 == r2)
1985 	movi(r0, 0);
1986     else
1987 	cr(X86_CC_B, r0, r1, r2);
1988 }
1989 
1990 static void
_ler(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1991 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1992 {
1993     if (r1 == r2)
1994 	movi(r0, 1);
1995     else
1996 	cr(X86_CC_LE, r0, r1, r2);
1997 }
1998 
1999 static void
_ler_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2000 _ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2001 {
2002     if (r1 == r2)
2003 	movi(r0, 1);
2004     else
2005 	cr(X86_CC_BE, r0, r1, r2);
2006 }
2007 
2008 static void
_lei_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2009 _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2010 {
2011     if (i0)
2012 	ci(X86_CC_BE, r0, r1, i0);
2013     else
2014 	ci0(X86_CC_E, r0, r1);
2015 }
2016 
2017 static void
_eqr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2018 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2019 {
2020     if (r1 == r2)
2021 	movi(r0, 1);
2022     else
2023 	cr(X86_CC_E, r0, r1, r2);
2024 }
2025 
2026 static void
_eqi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2027 _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2028 {
2029     if (i0)
2030 	ci(X86_CC_E, r0, r1, i0);
2031     else
2032 	ci0(X86_CC_E, r0, r1);
2033 }
2034 
2035 static void
_ger(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2036 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2037 {
2038     if (r1 == r2)
2039 	movi(r0, 1);
2040     else
2041 	cr(X86_CC_GE, r0, r1, r2);
2042 }
2043 
2044 static void
_gei(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2045 _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2046 {
2047     if (i0)
2048 	ci(X86_CC_GE, r0, r1, i0);
2049     else
2050 	ci0(X86_CC_NS, r0, r1);
2051 }
2052 
2053 static void
_ger_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2054 _ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2055 {
2056     if (r1 == r2)
2057 	movi(r0, 1);
2058     else
2059 	cr(X86_CC_AE, r0, r1, r2);
2060 }
2061 
2062 static void
_gei_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2063 _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2064 {
2065     if (i0)
2066 	ci(X86_CC_AE, r0, r1, i0);
2067     else
2068 	ci0(X86_CC_NB, r0, r1);
2069 }
2070 
2071 static void
_gtr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2072 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2073 {
2074     if (r1 == r2)
2075 	movi(r0, 0);
2076     else
2077 	cr(X86_CC_G, r0, r1, r2);
2078 }
2079 
2080 static void
_gtr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2081 _gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2082 {
2083     if (r1 == r2)
2084 	movi(r0, 0);
2085     else
2086 	cr(X86_CC_A, r0, r1, r2);
2087 }
2088 
2089 static void
_gti_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2090 _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2091 {
2092     if (i0)
2093 	ci(X86_CC_A, r0, r1, i0);
2094     else
2095 	ci0(X86_CC_NE, r0, r1);
2096 }
2097 
2098 static void
_ner(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2099 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2100 {
2101     if (r1 == r2)
2102 	movi(r0, 0);
2103     else
2104 	cr(X86_CC_NE, r0, r1, r2);
2105 }
2106 
2107 static void
_nei(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2108 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2109 {
2110     if (i0)
2111 	ci(X86_CC_NE, r0, r1, i0);
2112     else
2113 	ci0(X86_CC_NE, r0, r1);
2114 }
2115 
2116 static void
_movr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2117 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2118 {
2119     if (r0 != r1) {
2120 	rex(0, 1, r1, _NOREG, r0);
2121 	ic(0x89);
2122 	ic(0xc0 | (r1 << 3) | r7(r0));
2123     }
2124 }
2125 
2126 static void
_imovi(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2127 _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2128 {
2129 #if __X64
2130 #  if !__X64_32
2131     if (fits_uint32_p(i0)) {
2132 #  endif
2133 	rex(0, 0, _NOREG, _NOREG, r0);
2134 	ic(0xb8 | r7(r0));
2135 	ii(i0);
2136 #  if !__X64_32
2137     }
2138     else {
2139 	rex(0, 1, _NOREG, _NOREG, r0);
2140 	ic(0xb8 | r7(r0));
2141 	il(i0);
2142     }
2143 #  endif
2144 #else
2145     ic(0xb8 | r7(r0));
2146     ii(i0);
2147 #endif
2148 }
2149 
2150 static void
_movi(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2151 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2152 {
2153     if (i0)
2154 	imovi(r0, i0);
2155     else
2156 	ixorr(r0, r0);
2157 }
2158 
2159 static jit_word_t
_movi_p(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2160 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2161 {
2162     rex(0, WIDE, _NOREG, _NOREG, r0);
2163     ic(0xb8 | r7(r0));
2164     il(i0);
2165     return (_jit->pc.w);
2166 }
2167 
2168 static void
_movcr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2169 _movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2170 {
2171     rex(0, WIDE, r0, _NOREG, r1);
2172     ic(0x0f);
2173     ic(0xbe);
2174     mrm(0x03, r7(r0), r7(r1));
2175 }
2176 
2177 static void
_movcr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2178 _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2179 {
2180     rex(0, WIDE, r0, _NOREG, r1);
2181     ic(0x0f);
2182     ic(0xb6);
2183     mrm(0x03, r7(r0), r7(r1));
2184 }
2185 
2186 static void
_movsr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2187 _movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2188 {
2189     rex(0, WIDE, r0, _NOREG, r1);
2190     ic(0x0f);
2191     ic(0xbf);
2192     mrm(0x03, r7(r0), r7(r1));
2193 }
2194 
2195 static void
_movsr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2196 _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2197 {
2198     rex(0, WIDE, r0, _NOREG, r1);
2199     ic(0x0f);
2200     ic(0xb7);
2201     mrm(0x03, r7(r0), r7(r1));
2202 }
2203 
2204 #if __X64
2205 static void
_movir(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2206 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2207 {
2208     rex(0, 1, r0, _NOREG, r1);
2209     ic(0x63);
2210     mrm(0x03, r7(r0), r7(r1));
2211 }
2212 
2213 static void
_movir_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2214 _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2215 {
2216     rex(0, 0, r1, _NOREG, r0);
2217     ic(0x89);
2218     ic(0xc0 | (r1 << 3) | r7(r0));
2219 }
2220 #endif
2221 
2222 static void
_htonr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2223 _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2224 {
2225     extr_us(r0, r1);
2226     ic(0x66);
2227     rex(0, 0, _NOREG, _NOREG, r0);
2228     ic(0xc1);
2229     mrm(0x03, X86_ROR, r7(r0));
2230     ic(8);
2231 }
2232 
2233 static void
_htonr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2234 _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2235 {
2236     movr(r0, r1);
2237     rex(0, 0, _NOREG, _NOREG, r0);
2238     ic(0x0f);
2239     ic(0xc8 | r7(r0));
2240 }
2241 
2242 #if __X64 && !__X64_32
2243 static void
_htonr_ul(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2244 _htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2245 {
2246     movr(r0, r1);
2247     rex(0, 1, _NOREG, _NOREG, r0);
2248     ic(0x0f);
2249     ic(0xc8 | r7(r0));
2250 }
2251 #endif
2252 
2253 static void
_extr_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2254 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2255 {
2256     jit_int32_t		reg;
2257     if (reg8_p(r1))
2258 	movcr(r0, r1);
2259     else {
2260 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2261 	movr(rn(reg), r1);
2262 	movcr(r0, rn(reg));
2263 	jit_unget_reg(reg);
2264     }
2265 }
2266 
2267 static void
_extr_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2268 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2269 {
2270     jit_int32_t		reg;
2271     if (reg8_p(r1))
2272 	movcr_u(r0, r1);
2273     else {
2274 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2275 	movr(rn(reg), r1);
2276 	movcr_u(r0, rn(reg));
2277 	jit_unget_reg(reg);
2278     }
2279 }
2280 
2281 static void
_ldr_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2282 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2283 {
2284     rex(0, WIDE, r0, _NOREG, r1);
2285     ic(0x0f);
2286     ic(0xbe);
2287     rx(r0, 0, r1, _NOREG, _SCL1);
2288 }
2289 
2290 static void
_ldi_c(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2291 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2292 {
2293     jit_int32_t		reg;
2294     if (can_sign_extend_int_p(i0)) {
2295 	rex(0, WIDE, r0, _NOREG, _NOREG);
2296 	ic(0x0f);
2297 	ic(0xbe);
2298 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2299     }
2300     else {
2301 	reg = jit_get_reg(jit_class_gpr);
2302 	movi(rn(reg), i0);
2303 	ldr_c(r0, rn(reg));
2304 	jit_unget_reg(reg);
2305     }
2306 }
2307 
2308 static void
_ldr_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2309 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2310 {
2311     rex(0, WIDE, r0, _NOREG, r1);
2312     ic(0x0f);
2313     ic(0xb6);
2314     rx(r0, 0, r1, _NOREG, _SCL1);
2315 }
2316 
2317 static void
_ldi_uc(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2318 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2319 {
2320     jit_int32_t		reg;
2321     if (can_sign_extend_int_p(i0)) {
2322 	rex(0, WIDE, r0, _NOREG, _NOREG);
2323 	ic(0x0f);
2324 	ic(0xb6);
2325 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2326     }
2327     else {
2328 	reg = jit_get_reg(jit_class_gpr);
2329 	movi(rn(reg), i0);
2330 	ldr_uc(r0, rn(reg));
2331 	jit_unget_reg(reg);
2332     }
2333 }
2334 
2335 static void
_ldr_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2336 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2337 {
2338     rex(0, WIDE, r0, _NOREG, r1);
2339     ic(0x0f);
2340     ic(0xbf);
2341     rx(r0, 0, r1, _NOREG, _SCL1);
2342 }
2343 
2344 static void
_ldi_s(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2345 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2346 {
2347     jit_int32_t		reg;
2348     if (can_sign_extend_int_p(i0)) {
2349 	rex(0, WIDE, r0, _NOREG, _NOREG);
2350 	ic(0x0f);
2351 	ic(0xbf);
2352 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2353     }
2354     else {
2355 	reg = jit_get_reg(jit_class_gpr);
2356 	movi(rn(reg), i0);
2357 	ldr_s(r0, rn(reg));
2358 	jit_unget_reg(reg);
2359     }
2360 }
2361 
2362 static void
_ldr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2363 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2364 {
2365     rex(0, WIDE, r0, _NOREG, r1);
2366     ic(0x0f);
2367     ic(0xb7);
2368     rx(r0, 0, r1, _NOREG, _SCL1);
2369 }
2370 
2371 static void
_ldi_us(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2372 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2373 {
2374     jit_int32_t		reg;
2375     if (can_sign_extend_int_p(i0)) {
2376 	rex(0, WIDE, r0, _NOREG, _NOREG);
2377 	ic(0x0f);
2378 	ic(0xb7);
2379 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2380     }
2381     else {
2382 	reg = jit_get_reg(jit_class_gpr);
2383 	movi(rn(reg), i0);
2384 	ldr_us(r0, rn(reg));
2385 	jit_unget_reg(reg);
2386     }
2387 }
2388 
2389 #if __X32 || !__X64_32
2390 static void
_ldr_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2391 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2392 {
2393 #if __X64
2394     rex(0, WIDE, r0, _NOREG, r1);
2395     ic(0x63);
2396 #else
2397     ic(0x8b);
2398 #endif
2399     rx(r0, 0, r1, _NOREG, _SCL1);
2400 }
2401 
2402 static void
_ldi_i(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2403 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2404 {
2405     jit_int32_t		reg;
2406     if (can_sign_extend_int_p(i0)) {
2407 #if __X64
2408 	rex(0, WIDE, r0, _NOREG, _NOREG);
2409 	ic(0x63);
2410 #else
2411 	ic(0x8b);
2412 #endif
2413 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2414     }
2415     else {
2416 	reg = jit_get_reg(jit_class_gpr);
2417 	movi(rn(reg), i0);
2418 	ldr_i(r0, rn(reg));
2419 	jit_unget_reg(reg);
2420     }
2421 }
2422 #endif
2423 
2424 #if __X64
2425 static void
_ldr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2426 _ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2427 {
2428     rex(0, 0, r0, _NOREG, r1);
2429     ic(0x63);
2430     rx(r0, 0, r1, _NOREG, _SCL1);
2431 }
2432 
2433 static void
_ldi_ui(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2434 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2435 {
2436     jit_int32_t		reg;
2437     if (can_sign_extend_int_p(i0)) {
2438 	rex(0, 0, r0, _NOREG, _NOREG);
2439 	ic(0x63);
2440 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2441     }
2442     else {
2443 	reg = jit_get_reg(jit_class_gpr);
2444 	movi(rn(reg), i0);
2445 	ldr_ui(r0, rn(reg));
2446 	jit_unget_reg(reg);
2447     }
2448 }
2449 
2450 #  if !__X64_32
2451 static void
_ldr_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2452 _ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2453 {
2454     rex(0, 1, r0, _NOREG, r1);
2455     ic(0x8b);
2456     rx(r0, 0, r1, _NOREG, _SCL1);
2457 }
2458 
2459 static void
_ldi_l(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2460 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2461 {
2462     jit_int32_t		reg;
2463     if (can_sign_extend_int_p(i0)) {
2464 	rex(0, 1, r0, _NOREG, _NOREG);
2465 	ic(0x8b);
2466 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2467     }
2468     else {
2469 	reg = jit_get_reg(jit_class_gpr);
2470 	movi(rn(reg), i0);
2471 	ldr_l(r0, rn(reg));
2472 	jit_unget_reg(reg);
2473     }
2474 }
2475 #  endif
2476 #endif
2477 
2478 static void
_ldxr_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2479 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2480 {
2481 #if __X64_32
2482     addr(r0, r1, r2);
2483     ldr_c(r0, r0);
2484 #else
2485     rex(0, WIDE, r0, r1, r2);
2486     ic(0x0f);
2487     ic(0xbe);
2488     rx(r0, 0, r2, r1, _SCL1);
2489 #endif
2490 }
2491 
2492 static void
_ldxi_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2493 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2494 {
2495     jit_int32_t		reg;
2496     if (can_sign_extend_int_p(i0)) {
2497 	rex(0, WIDE, r0, _NOREG, r1);
2498 	ic(0x0f);
2499 	ic(0xbe);
2500 	rx(r0, i0, r1, _NOREG, _SCL1);
2501     }
2502     else {
2503 	reg = jit_get_reg(jit_class_gpr);
2504 	movi(rn(reg), i0);
2505 	ldxr_c(r0, r1, rn(reg));
2506 	jit_unget_reg(reg);
2507     }
2508 }
2509 
2510 static void
_ldxr_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2511 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2512 {
2513 #if __X64_32
2514     addr(r0, r1, r2);
2515     ldr_uc(r0, r0);
2516 #else
2517     rex(0, WIDE, r0, r1, r2);
2518     ic(0x0f);
2519     ic(0xb6);
2520     rx(r0, 0, r2, r1, _SCL1);
2521 #endif
2522 }
2523 
2524 static void
_ldxi_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2525 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2526 {
2527     jit_int32_t		reg;
2528     if (can_sign_extend_int_p(i0)) {
2529 	rex(0, WIDE, r0, _NOREG, r1);
2530 	ic(0x0f);
2531 	ic(0xb6);
2532 	rx(r0, i0, r1, _NOREG, _SCL1);
2533     }
2534     else {
2535 	reg = jit_get_reg(jit_class_gpr);
2536 	movi(rn(reg), i0);
2537 	ldxr_uc(r0, r1, rn(reg));
2538 	jit_unget_reg(reg);
2539     }
2540 }
2541 
2542 static void
_ldxr_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2543 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2544 {
2545 #if __X64_32
2546     addr(r0, r1, r2);
2547     ldr_s(r0, r0);
2548 #else
2549     rex(0, WIDE, r0, r1, r2);
2550     ic(0x0f);
2551     ic(0xbf);
2552     rx(r0, 0, r2, r1, _SCL1);
2553 #endif
2554 }
2555 
2556 static void
_ldxi_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2557 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2558 {
2559     jit_int32_t		reg;
2560     if (can_sign_extend_int_p(i0)) {
2561 	rex(0, WIDE, r0, _NOREG, r1);
2562 	ic(0x0f);
2563 	ic(0xbf);
2564 	rx(r0, i0, r1, _NOREG, _SCL1);
2565     }
2566     else {
2567 	reg = jit_get_reg(jit_class_gpr);
2568 	movi(rn(reg), i0);
2569 	ldxr_s(r0, r1, rn(reg));
2570 	jit_unget_reg(reg);
2571     }
2572 }
2573 
2574 static void
_ldxr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2575 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2576 {
2577 #if __X64_32
2578     addr(r0, r1, r2);
2579     ldr_us(r0, r0);
2580 #else
2581     rex(0, WIDE, r0, r1, r2);
2582     ic(0x0f);
2583     ic(0xb7);
2584     rx(r0, 0, r2, r1, _SCL1);
2585 #endif
2586 }
2587 
2588 static void
_ldxi_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2589 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2590 {
2591     jit_int32_t		reg;
2592     if (can_sign_extend_int_p(i0)) {
2593 	rex(0, WIDE, r0, _NOREG, r1);
2594 	ic(0x0f);
2595 	ic(0xb7);
2596 	rx(r0, i0, r1, _NOREG, _SCL1);
2597     }
2598     else {
2599 	reg = jit_get_reg(jit_class_gpr);
2600 	movi(rn(reg), i0);
2601 	ldxr_us(r0, r1, rn(reg));
2602 	jit_unget_reg(reg);
2603     }
2604 }
2605 
2606 #if __X64 || !__X64_32
2607 static void
_ldxr_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2608 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2609 {
2610 #if __X64
2611     rex(0, WIDE, r0, r1, r2);
2612     ic(0x63);
2613 #else
2614     ic(0x8b);
2615 #endif
2616     rx(r0, 0, r2, r1, _SCL1);
2617 }
2618 
2619 static void
_ldxi_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2620 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2621 {
2622     jit_int32_t		reg;
2623     if (can_sign_extend_int_p(i0)) {
2624 #if __X64
2625 	rex(0, WIDE, r0, _NOREG, r1);
2626 	ic(0x63);
2627 #else
2628 	ic(0x8b);
2629 #endif
2630 	rx(r0, i0, r1, _NOREG, _SCL1);
2631     }
2632     else {
2633 	reg = jit_get_reg(jit_class_gpr);
2634 	movi(rn(reg), i0);
2635 	ldxr_i(r0, r1, rn(reg));
2636 	jit_unget_reg(reg);
2637     }
2638 }
2639 #endif
2640 
2641 #if __X64
2642 static void
_ldxr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2643 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2644 {
2645 #if __X64_32
2646     addr(r0, r1, r2);
2647     /* to avoid confusion with macro renames */
2648     _ldr_ui(_jit, r0, r0);
2649 #else
2650     rex(0, 0, r0, r1, r2);
2651     ic(0x8b);
2652     rx(r0, 0, r2, r1, _SCL1);
2653 #endif
2654 }
2655 
2656 static void
_ldxi_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2657 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2658 {
2659     jit_int32_t		reg;
2660     if (can_sign_extend_int_p(i0)) {
2661 	rex(0, 0, r0, _NOREG, r1);
2662 	ic(0x8b);
2663 	rx(r0, i0, r1, _NOREG, _SCL1);
2664     }
2665     else {
2666 	reg = jit_get_reg(jit_class_gpr);
2667 	movi(rn(reg), i0);
2668 	ldxr_ui(r0, r1, rn(reg));
2669 	jit_unget_reg(reg);
2670     }
2671 }
2672 
2673 #  if !__X64_32
2674 static void
_ldxr_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2675 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2676 {
2677     rex(0, 1, r0, r1, r2);
2678     ic(0x8b);
2679     rx(r0, 0, r2, r1, _SCL1);
2680 }
2681 
2682 static void
_ldxi_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2683 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2684 {
2685     jit_int32_t		reg;
2686     if (can_sign_extend_int_p(i0)) {
2687 	rex(0, 1, r0, _NOREG, r1);
2688 	ic(0x8b);
2689 	rx(r0, i0, r1, _NOREG, _SCL1);
2690     }
2691     else {
2692 	reg = jit_get_reg(jit_class_gpr);
2693 	movi(rn(reg), i0);
2694 	ldxr_l(r0, r1, rn(reg));
2695 	jit_unget_reg(reg);
2696     }
2697 }
2698 #  endif
2699 #endif
2700 
2701 static void
_str_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2702 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2703 {
2704     jit_int32_t		reg;
2705     if (reg8_p(r1)) {
2706 	rex(0, 0, r1, _NOREG, r0);
2707 	ic(0x88);
2708 	rx(r1, 0, r0, _NOREG, _SCL1);
2709     }
2710     else {
2711 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2712 	movr(rn(reg), r1);
2713 	rex(0, 0, rn(reg), _NOREG, r0);
2714 	ic(0x88);
2715 	rx(rn(reg), 0, r0, _NOREG, _SCL1);
2716 	jit_unget_reg(reg);
2717     }
2718 }
2719 
2720 static void
_sti_c(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2721 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2722 {
2723     jit_int32_t		reg;
2724     if (can_sign_extend_int_p(i0)) {
2725 	if (reg8_p(r0)) {
2726 	    rex(0, 0, r0, _NOREG, _NOREG);
2727 	    ic(0x88);
2728 	    rx(r0, i0, _NOREG, _NOREG, _SCL1);
2729 	}
2730 	else {
2731 	    reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2732 	    movr(rn(reg), r0);
2733 	    rex(0, 0, rn(reg), _NOREG, _NOREG);
2734 	    ic(0x88);
2735 	    rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
2736 	    jit_unget_reg(reg);
2737 	}
2738     }
2739     else {
2740 	reg = jit_get_reg(jit_class_gpr);
2741 	movi(rn(reg), i0);
2742 	str_c(rn(reg), r0);
2743 	jit_unget_reg(reg);
2744     }
2745 }
2746 
2747 static void
_str_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2748 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2749 {
2750     ic(0x66);
2751     rex(0, 0, r1, _NOREG, r0);
2752     ic(0x89);
2753     rx(r1, 0, r0, _NOREG, _SCL1);
2754 }
2755 
2756 static void
_sti_s(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2757 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2758 {
2759     jit_int32_t		reg;
2760     if (can_sign_extend_int_p(i0)) {
2761 	ic(0x66);
2762 	rex(0, 0, r0, _NOREG, _NOREG);
2763 	ic(0x89);
2764 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2765     }
2766     else {
2767 	reg = jit_get_reg(jit_class_gpr);
2768 	movi(rn(reg), i0);
2769 	str_s(rn(reg), r0);
2770 	jit_unget_reg(reg);
2771     }
2772 }
2773 
2774 static void
_str_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2775 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2776 {
2777     rex(0, 0, r1, _NOREG, r0);
2778     ic(0x89);
2779     rx(r1, 0, r0, _NOREG, _SCL1);
2780 }
2781 
2782 static void
_sti_i(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2783 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2784 {
2785     jit_int32_t		reg;
2786     if (can_sign_extend_int_p(i0)) {
2787 	rex(0, 0, r0, _NOREG, _NOREG);
2788 	ic(0x89);
2789 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2790     }
2791     else {
2792 	reg = jit_get_reg(jit_class_gpr);
2793 	movi(rn(reg), i0);
2794 	str_i(rn(reg), r0);
2795 	jit_unget_reg(reg);
2796     }
2797 }
2798 
2799 #if __X64 && !__X64_32
2800 static void
_str_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2801 _str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2802 {
2803     rex(0, 1, r1, _NOREG, r0);
2804     ic(0x89);
2805     rx(r1, 0, r0, _NOREG, _SCL1);
2806 }
2807 
2808 static void
_sti_l(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2809 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2810 {
2811     jit_int32_t		reg;
2812     if (can_sign_extend_int_p(i0)) {
2813 	rex(0, 1, r0, _NOREG, _NOREG);
2814 	ic(0x89);
2815 	rx(r0, i0, _NOREG, _NOREG, _SCL1);
2816     }
2817     else {
2818 	reg = jit_get_reg(jit_class_gpr);
2819 	movi(rn(reg), i0);
2820 	str_l(rn(reg), r0);
2821 	jit_unget_reg(reg);
2822     }
2823 }
2824 #endif
2825 
2826 static void
_stxr_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2827 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2828 {
2829     jit_int32_t		reg;
2830 #if __X64_32
2831     reg = jit_get_reg(jit_class_gpr);
2832     addr(rn(reg), r0, r1);
2833     str_c(rn(reg), r2);
2834     jit_unget_reg(reg);
2835 #else
2836     if (reg8_p(r2)) {
2837 	rex(0, 0, r2, r1, r0);
2838 	ic(0x88);
2839 	rx(r2, 0, r0, r1, _SCL1);
2840     }
2841     else {
2842 	reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2843 	movr(rn(reg), r2);
2844 	rex(0, 0, rn(reg), r1, r0);
2845 	ic(0x88);
2846 	rx(rn(reg), 0, r0, r1, _SCL1);
2847 	jit_unget_reg(reg);
2848     }
2849 #endif
2850 }
2851 
2852 static void
_stxi_c(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2853 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2854 {
2855     jit_int32_t		reg;
2856     if (can_sign_extend_int_p(i0)) {
2857 	if (reg8_p(r1)) {
2858 	    rex(0, 0, r1, _NOREG, r0);
2859 	    ic(0x88);
2860 	    rx(r1, i0, r0, _NOREG, _SCL1);
2861 	}
2862 	else {
2863 	    reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2864 	    movr(rn(reg), r1);
2865 	    rex(0, 0, rn(reg), _NOREG, r0);
2866 	    ic(0x88);
2867 	    rx(rn(reg), i0, r0, _NOREG, _SCL1);
2868 	    jit_unget_reg(reg);
2869 	}
2870     }
2871     else {
2872 	reg = jit_get_reg(jit_class_gpr);
2873 	movi(rn(reg), i0);
2874 	stxr_c(rn(reg), r0, r1);
2875 	jit_unget_reg(reg);
2876     }
2877 }
2878 
2879 static void
_stxr_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2880 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2881 {
2882 #if __X64_32
2883     jit_int32_t		reg;
2884     reg = jit_get_reg(jit_class_gpr);
2885     addr(rn(reg), r0, r1);
2886     str_s(rn(reg), r2);
2887     jit_unget_reg(reg);
2888 #else
2889     ic(0x66);
2890     rex(0, 0, r2, r1, r0);
2891     ic(0x89);
2892     rx(r2, 0, r0, r1, _SCL1);
2893 #endif
2894 }
2895 
2896 static void
_stxi_s(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2897 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2898 {
2899     jit_int32_t		reg;
2900     if (can_sign_extend_int_p(i0)) {
2901 	ic(0x66);
2902 	rex(0, 0, r1, _NOREG, r0);
2903 	ic(0x89);
2904 	rx(r1, i0, r0, _NOREG, _SCL1);
2905     }
2906     else {
2907 	reg = jit_get_reg(jit_class_gpr);
2908 	movi(rn(reg), i0);
2909 	stxr_s(rn(reg), r0, r1);
2910 	jit_unget_reg(reg);
2911     }
2912 }
2913 
2914 static void
_stxr_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2915 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2916 {
2917 #if __X64_32
2918     jit_int32_t		reg;
2919     reg = jit_get_reg(jit_class_gpr);
2920     addr(rn(reg), r0, r1);
2921     str_i(rn(reg), r2);
2922     jit_unget_reg(reg);
2923 #else
2924     rex(0, 0, r2, r1, r0);
2925     ic(0x89);
2926     rx(r2, 0, r0, r1, _SCL1);
2927 #endif
2928 }
2929 
2930 static void
_stxi_i(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2931 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2932 {
2933     jit_int32_t		reg;
2934     if (can_sign_extend_int_p(i0)) {
2935 	rex(0, 0, r1, _NOREG, r0);
2936 	ic(0x89);
2937 	rx(r1, i0, r0, _NOREG, _SCL1);
2938     }
2939     else {
2940 	reg = jit_get_reg(jit_class_gpr);
2941 	movi(rn(reg), i0);
2942 	stxr_i(rn(reg), r0, r1);
2943 	jit_unget_reg(reg);
2944     }
2945 }
2946 
2947 #if __X64 && !__X64_32
2948 static void
_stxr_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2949 _stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2950 {
2951     rex(0, 1, r2, r1, r0);
2952     ic(0x89);
2953     rx(r2, 0, r0, r1, _SCL1);
2954 }
2955 
2956 static void
_stxi_l(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2957 _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2958 {
2959     jit_int32_t		reg;
2960     if (can_sign_extend_int_p(i0)) {
2961 	rex(0, 1, r1, _NOREG, r0);
2962 	ic(0x89);
2963 	rx(r1, i0, r0, _NOREG, _SCL1);
2964     }
2965     else {
2966 	reg = jit_get_reg(jit_class_gpr);
2967 	movi(rn(reg), i0);
2968 	stxr_l(rn(reg), r0, r1);
2969 	jit_unget_reg(reg);
2970     }
2971 }
2972 #endif
2973 
2974 static void
_jccs(jit_state_t * _jit,jit_int32_t code,jit_word_t i0)2975 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
2976 {
2977     jit_word_t		w;
2978     ic(0x70 | code);
2979     w = i0 - (_jit->pc.w + 1);
2980     ic(w);
2981 }
2982 
2983 static void
_jcc(jit_state_t * _jit,jit_int32_t code,jit_word_t i0)2984 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
2985 {
2986     jit_word_t		w;
2987     ic(0x0f);
2988     ic(0x80 | code);
2989     w = i0 - (_jit->pc.w + 4);
2990     ii(w);
2991 }
2992 
2993 static void
_jcr(jit_state_t * _jit,jit_int32_t code,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2994 _jcr(jit_state_t *_jit,
2995      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2996 {
2997     alur(X86_CMP, r0, r1);
2998     jcc(code, i0);
2999 }
3000 
3001 static void
_jci(jit_state_t * _jit,jit_int32_t code,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3002 _jci(jit_state_t *_jit,
3003      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3004 {
3005     alui(X86_CMP, r0, i1);
3006     jcc(code, i0);
3007 }
3008 
3009 static void
_jci0(jit_state_t * _jit,jit_int32_t code,jit_word_t i0,jit_int32_t r0)3010 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
3011 {
3012     testr(r0, r0);
3013     jcc(code, i0);
3014 }
3015 
3016 static jit_word_t
_bltr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3017 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3018 {
3019     jcr(X86_CC_L, i0, r0, r1);
3020     return (_jit->pc.w);
3021 }
3022 
3023 static jit_word_t
_blti(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3024 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3025 {
3026     if (i1)		jci (X86_CC_L, i0, r0, i1);
3027     else		jci0(X86_CC_S, i0, r0);
3028     return (_jit->pc.w);
3029 }
3030 
3031 static jit_word_t
_bltr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3032 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3033 {
3034     jcr(X86_CC_B, i0, r0, r1);
3035     return (_jit->pc.w);
3036 }
3037 
3038 static jit_word_t
_blti_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3039 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3040 {
3041     if (i1)		jci (X86_CC_B, i0, r0, i1);
3042     else		jci0(X86_CC_B, i0, r0);
3043     return (_jit->pc.w);
3044 }
3045 
3046 static jit_word_t
_bler(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3047 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3048 {
3049     if (r0 == r1)	jmpi(i0);
3050     else		jcr (X86_CC_LE, i0, r0, r1);
3051     return (_jit->pc.w);
3052 }
3053 
3054 static jit_word_t
_blei(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3055 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3056 {
3057     if (i1)		jci (X86_CC_LE, i0, r0, i1);
3058     else		jci0(X86_CC_LE, i0, r0);
3059     return (_jit->pc.w);
3060 }
3061 
3062 static jit_word_t
_bler_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3063 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3064 {
3065     if (r0 == r1)	jmpi(i0);
3066     else		jcr (X86_CC_BE, i0, r0, r1);
3067     return (_jit->pc.w);
3068 }
3069 
3070 static jit_word_t
_blei_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3071 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3072 {
3073     if (i1)		jci (X86_CC_BE, i0, r0, i1);
3074     else		jci0(X86_CC_BE, i0, r0);
3075     return (_jit->pc.w);
3076 }
3077 
3078 static jit_word_t
_beqr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3079 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3080 {
3081     if (r0 == r1)	jmpi(i0);
3082     else		jcr (X86_CC_E, i0, r0, r1);
3083     return (_jit->pc.w);
3084 }
3085 
3086 static jit_word_t
_beqi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3087 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3088 {
3089     if (i1)		jci (X86_CC_E, i0, r0, i1);
3090     else		jci0(X86_CC_E, i0, r0);
3091     return (_jit->pc.w);
3092 }
3093 
3094 static jit_word_t
_bger(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3095 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3096 {
3097     if (r0 == r1)	jmpi(i0);
3098     else		jcr (X86_CC_GE, i0, r0, r1);
3099     return (_jit->pc.w);
3100 }
3101 
3102 static jit_word_t
_bgei(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3103 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3104 {
3105     if (i1)		jci (X86_CC_GE, i0, r0, i1);
3106     else		jci0(X86_CC_NS, i0, r0);
3107     return (_jit->pc.w);
3108 }
3109 
3110 static jit_word_t
_bger_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3111 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3112 {
3113     if (r0 == r1)	jmpi(i0);
3114     else		jcr (X86_CC_AE, i0, r0, r1);
3115     return (_jit->pc.w);
3116 }
3117 
3118 static jit_word_t
_bgei_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3119 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3120 {
3121     if (i1)		jci (X86_CC_AE, i0, r0, i1);
3122     else		jmpi(i0);
3123     return (_jit->pc.w);
3124 }
3125 
3126 static jit_word_t
_bgtr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3127 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3128 {
3129     jcr(X86_CC_G, i0, r0, r1);
3130     return (_jit->pc.w);
3131 }
3132 
3133 static jit_word_t
_bgti(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3134 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3135 {
3136     jci(X86_CC_G, i0, r0, i1);
3137     return (_jit->pc.w);
3138 }
3139 
3140 static jit_word_t
_bgtr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3141 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3142 {
3143     jcr(X86_CC_A, i0, r0, r1);
3144     return (_jit->pc.w);
3145 }
3146 
3147 static jit_word_t
_bgti_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3148 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3149 {
3150     if (i1)		jci (X86_CC_A, i0, r0, i1);
3151     else		jci0(X86_CC_NE, i0, r0);
3152     return (_jit->pc.w);
3153 }
3154 
3155 static jit_word_t
_bner(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3156 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3157 {
3158     jcr(X86_CC_NE, i0, r0, r1);
3159     return (_jit->pc.w);
3160 }
3161 
3162 static jit_word_t
_bnei(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3163 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3164 {
3165     if (i1)		jci (X86_CC_NE, i0, r0, i1);
3166     else		jci0(X86_CC_NE, i0, r0);
3167     return (_jit->pc.w);
3168 }
3169 
3170 static jit_word_t
_bmsr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3171 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3172 {
3173     testr(r0, r1);
3174     jnz(i0);
3175     return (_jit->pc.w);
3176 }
3177 
3178 static jit_word_t
_bmsi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3179 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3180 {
3181     jit_int32_t		reg;
3182     if (can_zero_extend_int_p(i1))
3183 	testi(r0, i1);
3184     else {
3185 	reg = jit_get_reg(jit_class_gpr);
3186 	movi(rn(reg), i1);
3187 	testr(r0, rn(reg));
3188 	jit_unget_reg(reg);
3189     }
3190     jnz(i0);
3191     return (_jit->pc.w);
3192 }
3193 
3194 static jit_word_t
_bmcr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3195 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3196 {
3197     testr(r0, r1);
3198     jz(i0);
3199     return (_jit->pc.w);
3200 }
3201 
3202 static jit_word_t
_bmci(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3203 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3204 {
3205     jit_int32_t		reg;
3206     if (can_zero_extend_int_p(i1))
3207 	testi(r0, i1);
3208     else {
3209 	reg = jit_get_reg(jit_class_gpr);
3210 	movi(rn(reg), i1);
3211 	testr(r0, rn(reg));
3212 	jit_unget_reg(reg);
3213     }
3214     jz(i0);
3215     return (_jit->pc.w);
3216 }
3217 
3218 static jit_word_t
_boaddr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3219 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3220 {
3221     iaddr(r0, r1);
3222     jo(i0);
3223     return (_jit->pc.w);
3224 }
3225 
3226 static jit_word_t
_boaddi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3227 _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3228 {
3229     jit_int32_t		reg;
3230     if (can_sign_extend_int_p(i1)) {
3231 	iaddi(r0, i1);
3232 	jo(i0);
3233 	return (_jit->pc.w);
3234     }
3235     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3236     movi(rn(reg), i1);
3237     jit_unget_reg(reg);
3238     return (boaddr(i0, r0, rn(reg)));
3239 }
3240 
3241 static jit_word_t
_boaddr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3242 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3243 {
3244     iaddr(r0, r1);
3245     jc(i0);
3246     return (_jit->pc.w);
3247 }
3248 
3249 static jit_word_t
_boaddi_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3250 _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3251 {
3252     jit_int32_t		reg;
3253     if (can_sign_extend_int_p(i1)) {
3254 	iaddi(r0, i1);
3255 	jc(i0);
3256 	return (_jit->pc.w);
3257     }
3258     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3259     movi(rn(reg), i1);
3260     jit_unget_reg(reg);
3261     return (boaddr_u(i0, r0, rn(reg)));
3262 }
3263 
3264 static jit_word_t
_bxaddr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3265 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3266 {
3267     iaddr(r0, r1);
3268     jno(i0);
3269     return (_jit->pc.w);
3270 }
3271 
3272 static jit_word_t
_bxaddi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3273 _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3274 {
3275     jit_int32_t		reg;
3276     if (can_sign_extend_int_p(i1)) {
3277 	iaddi(r0, i1);
3278 	jno(i0);
3279 	return (_jit->pc.w);
3280     }
3281     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3282     movi(rn(reg), i1);
3283     jit_unget_reg(reg);
3284     return (bxaddr(i0, r0, rn(reg)));
3285 }
3286 
3287 static jit_word_t
_bxaddr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3288 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3289 {
3290     iaddr(r0, r1);
3291     jnc(i0);
3292     return (_jit->pc.w);
3293 }
3294 
3295 static jit_word_t
_bxaddi_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3296 _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3297 {
3298     jit_int32_t		reg;
3299     if (can_sign_extend_int_p(i1)) {
3300 	iaddi(r0, i1);
3301 	jnc(i0);
3302 	return (_jit->pc.w);
3303     }
3304     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3305     movi(rn(reg), i1);
3306     jit_unget_reg(reg);
3307     return (bxaddr_u(i0, r0, rn(reg)));
3308 }
3309 
3310 static jit_word_t
_bosubr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3311 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3312 {
3313     isubr(r0, r1);
3314     jo(i0);
3315     return (_jit->pc.w);
3316 }
3317 
3318 static jit_word_t
_bosubi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3319 _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3320 {
3321     jit_int32_t		reg;
3322     if (can_sign_extend_int_p(i1)) {
3323 	isubi(r0, i1);
3324 	jo(i0);
3325 	return (_jit->pc.w);
3326     }
3327     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3328     movi(rn(reg), i1);
3329     jit_unget_reg(reg);
3330     return (bosubr(i0, r0, rn(reg)));
3331 }
3332 
3333 static jit_word_t
_bosubr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3334 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3335 {
3336     isubr(r0, r1);
3337     jc(i0);
3338     return (_jit->pc.w);
3339 }
3340 
3341 static jit_word_t
_bosubi_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3342 _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3343 {
3344     jit_int32_t		reg;
3345     if (can_sign_extend_int_p(i1)) {
3346 	isubi(r0, i1);
3347 	jc(i0);
3348 	return (_jit->pc.w);
3349     }
3350     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3351     movi(rn(reg), i1);
3352     jit_unget_reg(reg);
3353     return (bosubr_u(i0, r0, rn(reg)));
3354 }
3355 
3356 static jit_word_t
_bxsubr(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3357 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3358 {
3359     isubr(r0, r1);
3360     jno(i0);
3361     return (_jit->pc.w);
3362 }
3363 
3364 static jit_word_t
_bxsubi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3365 _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3366 {
3367     jit_int32_t		reg;
3368     if (can_sign_extend_int_p(i1)) {
3369 	isubi(r0, i1);
3370 	jno(i0);
3371 	return (_jit->pc.w);
3372     }
3373     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3374     movi(rn(reg), i1);
3375     jit_unget_reg(reg);
3376     return (bxsubr(i0, r0, rn(reg)));
3377 }
3378 
3379 static jit_word_t
_bxsubr_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)3380 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3381 {
3382     isubr(r0, r1);
3383     jnc(i0);
3384     return (_jit->pc.w);
3385 }
3386 
3387 static jit_word_t
_bxsubi_u(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)3388 _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3389 {
3390     jit_int32_t		reg;
3391     if (can_sign_extend_int_p(i1)) {
3392 	isubi(r0, i1);
3393 	jnc(i0);
3394 	return (_jit->pc.w);
3395     }
3396     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3397     movi(rn(reg), i1);
3398     jit_unget_reg(reg);
3399     return (bxsubr_u(i0, r0, rn(reg)));
3400 }
3401 
3402 static void
_callr(jit_state_t * _jit,jit_int32_t r0)3403 _callr(jit_state_t *_jit, jit_int32_t r0)
3404 {
3405     rex(0, 0, _NOREG, _NOREG, r0);
3406     ic(0xff);
3407     mrm(0x03, 0x02, r7(r0));
3408 }
3409 
3410 static jit_word_t
_calli(jit_state_t * _jit,jit_word_t i0)3411 _calli(jit_state_t *_jit, jit_word_t i0)
3412 {
3413     jit_word_t		word;
3414 #if __X64
3415     jit_int32_t		reg;
3416 
3417     reg = jit_get_reg(jit_class_gpr);
3418     word = movi_p(rn(reg), i0);
3419     callr(rn(reg));
3420     jit_unget_reg(reg);
3421 #else
3422     jit_word_t		w;
3423     ic(0xe8);
3424     w = i0 - (_jit->pc.w + 4);
3425     ii(w);
3426     word = _jit->pc.w;
3427 #endif
3428     return (word);
3429 }
3430 
3431 static void
_jmpr(jit_state_t * _jit,jit_int32_t r0)3432 _jmpr(jit_state_t *_jit, jit_int32_t r0)
3433 {
3434     rex(0, WIDE, _NOREG, _NOREG, r0);
3435     ic(0xff);
3436     mrm(0x03, 0x04, r7(r0));
3437 }
3438 
3439 static jit_word_t
_jmpi(jit_state_t * _jit,jit_word_t i0)3440 _jmpi(jit_state_t *_jit, jit_word_t i0)
3441 {
3442     jit_word_t		w;
3443     ic(0xe9);
3444     w = i0 - (_jit->pc.w + 4);
3445     ii(w);
3446     return (_jit->pc.w);
3447 }
3448 
3449 static void
_jmpsi(jit_state_t * _jit,jit_uint8_t i0)3450 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
3451 {
3452     ic(0xeb);
3453     ic(i0);
3454 }
3455 
3456 static void
_prolog(jit_state_t * _jit,jit_node_t * node)3457 _prolog(jit_state_t *_jit, jit_node_t *node)
3458 {
3459     jit_int32_t		reg;
3460     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
3461 	jit_int32_t	frame = -_jitc->function->frame;
3462 	assert(_jitc->function->self.aoff >= frame);
3463 	if (_jitc->function->assume_frame)
3464 	    return;
3465 	_jitc->function->self.aoff = frame;
3466     }
3467     if (_jitc->function->allocar)
3468 	_jitc->function->self.aoff &= -16;
3469 #if __X64 && (__CYGWIN__ || _WIN32)
3470     _jitc->function->stack = (((/* first 32 bytes must be allocated */
3471 				(_jitc->function->self.alen > 32 ?
3472 				 _jitc->function->self.alen : 32) -
3473 				/* align stack at 16 bytes */
3474 				_jitc->function->self.aoff) + 15) & -16) +
3475 	stack_adjust;
3476 #else
3477     _jitc->function->stack = (((_jitc->function->self.alen -
3478 			       _jitc->function->self.aoff) + 15) & -16) +
3479 	stack_adjust;
3480 #endif
3481     subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
3482     /* callee save registers */
3483 #if __X32
3484     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
3485 	stxi(12, _RSP_REGNO, _RDI_REGNO);
3486     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
3487 	stxi( 8, _RSP_REGNO, _RSI_REGNO);
3488     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3489 	stxi( 4, _RSP_REGNO, _RBX_REGNO);
3490 #else
3491 #  if __CYGWIN__ || _WIN32
3492     if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
3493 	sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
3494     if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
3495 	sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
3496     if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
3497 	sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
3498     if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
3499 	sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
3500     if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
3501 	sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
3502     if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
3503 	sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
3504     if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
3505 	sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
3506     if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
3507 	sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
3508     if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
3509 	sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
3510     if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
3511 	sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
3512     if (jit_regset_tstbit(&_jitc->function->regset, _R15))
3513 	stxi(56, _RSP_REGNO, _R15_REGNO);
3514     if (jit_regset_tstbit(&_jitc->function->regset, _R14))
3515 	stxi(48, _RSP_REGNO, _R14_REGNO);
3516     if (jit_regset_tstbit(&_jitc->function->regset, _R13))
3517 	stxi(40, _RSP_REGNO, _R13_REGNO);
3518     if (jit_regset_tstbit(&_jitc->function->regset, _R12))
3519 	stxi(32, _RSP_REGNO, _R12_REGNO);
3520     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
3521 	stxi(24, _RSP_REGNO, _RSI_REGNO);
3522     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
3523 	stxi(16, _RSP_REGNO, _RDI_REGNO);
3524     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3525 	stxi( 8, _RSP_REGNO, _RBX_REGNO);
3526 #  else
3527     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3528 	stxi(40, _RSP_REGNO, _RBX_REGNO);
3529     if (jit_regset_tstbit(&_jitc->function->regset, _R12))
3530 	stxi(32, _RSP_REGNO, _R12_REGNO);
3531     if (jit_regset_tstbit(&_jitc->function->regset, _R13))
3532 	stxi(24, _RSP_REGNO, _R13_REGNO);
3533     if (jit_regset_tstbit(&_jitc->function->regset, _R14))
3534 	stxi(16, _RSP_REGNO, _R14_REGNO);
3535     if (jit_regset_tstbit(&_jitc->function->regset, _R15))
3536 	stxi( 8, _RSP_REGNO, _R15_REGNO);
3537 #  endif
3538 #endif
3539     stxi(0, _RSP_REGNO, _RBP_REGNO);
3540     movr(_RBP_REGNO, _RSP_REGNO);
3541 
3542     /* alloca */
3543     subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
3544     if (_jitc->function->allocar) {
3545 	reg = jit_get_reg(jit_class_gpr);
3546 	movi(rn(reg), _jitc->function->self.aoff);
3547 	stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
3548 	jit_unget_reg(reg);
3549     }
3550 
3551 #if __X64 && !(__CYGWIN__ || _WIN32)
3552     if (_jitc->function->self.call & jit_call_varargs) {
3553 	jit_word_t	nofp_code;
3554 
3555 	/* Save gp registers in the save area, if any is a vararg */
3556 	for (reg = first_gp_from_offset(_jitc->function->vagp);
3557 	     jit_arg_reg_p(reg); ++reg)
3558 	    stxi(_jitc->function->vaoff + first_gp_offset +
3559 		 reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
3560 
3561 	reg = first_fp_from_offset(_jitc->function->vafp);
3562 	if (jit_arg_f_reg_p(reg)) {
3563 	    /* Skip over if no float registers were passed as argument */
3564 	    /* test %al, %al */
3565 	    ic(0x84);
3566 	    ic(0xc0);
3567 	    jes(0);
3568 	    nofp_code = _jit->pc.w;
3569 
3570 	    /* Save fp registers in the save area, if any is a vararg */
3571 	    /* Note that the full 16 byte xmm is not saved, because
3572 	     * lightning only handles float and double, and, while
3573 	     * attempting to provide a va_list compatible pointer as
3574 	     * jit_va_start return, does not guarantee it (on all ports). */
3575 	    for (; jit_arg_f_reg_p(reg); ++reg)
3576 		sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
3577 			   reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
3578 
3579 	    patch_rel_char(nofp_code, _jit->pc.w);
3580 	}
3581     }
3582 #endif
3583 }
3584 
3585 static void
_epilog(jit_state_t * _jit,jit_node_t * node)3586 _epilog(jit_state_t *_jit, jit_node_t *node)
3587 {
3588     if (_jitc->function->assume_frame)
3589 	return;
3590     /* callee save registers */
3591     movr(_RSP_REGNO, _RBP_REGNO);
3592 #if __X32
3593     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
3594 	ldxi(_RDI_REGNO, _RSP_REGNO, 12);
3595     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
3596 	ldxi(_RSI_REGNO, _RSP_REGNO,  8);
3597     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3598 	ldxi(_RBX_REGNO, _RSP_REGNO,  4);
3599 #else
3600 #  if __CYGWIN__ || _WIN32
3601     if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
3602 	sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
3603     if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
3604 	sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
3605     if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
3606 	sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
3607     if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
3608 	sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
3609     if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
3610 	sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
3611     if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
3612 	sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
3613     if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
3614 	sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
3615     if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
3616 	sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
3617     if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
3618 	sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
3619     if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
3620 	sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
3621     if (jit_regset_tstbit(&_jitc->function->regset, _R15))
3622 	ldxi(_R15_REGNO, _RSP_REGNO, 56);
3623     if (jit_regset_tstbit(&_jitc->function->regset, _R14))
3624 	ldxi(_R14_REGNO, _RSP_REGNO, 48);
3625     if (jit_regset_tstbit(&_jitc->function->regset, _R13))
3626 	ldxi(_R13_REGNO, _RSP_REGNO, 40);
3627     if (jit_regset_tstbit(&_jitc->function->regset, _R12))
3628 	ldxi(_R12_REGNO, _RSP_REGNO, 32);
3629     if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
3630 	ldxi(_RSI_REGNO, _RSP_REGNO, 24);
3631     if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
3632 	ldxi(_RDI_REGNO, _RSP_REGNO, 16);
3633     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3634 	ldxi(_RBX_REGNO, _RSP_REGNO,  8);
3635 #  else
3636     if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
3637 	ldxi(_RBX_REGNO, _RSP_REGNO, 40);
3638     if (jit_regset_tstbit(&_jitc->function->regset, _R12))
3639 	ldxi(_R12_REGNO, _RSP_REGNO, 32);
3640     if (jit_regset_tstbit(&_jitc->function->regset, _R13))
3641 	ldxi(_R13_REGNO, _RSP_REGNO, 24);
3642     if (jit_regset_tstbit(&_jitc->function->regset, _R14))
3643 	ldxi(_R14_REGNO, _RSP_REGNO, 16);
3644     if (jit_regset_tstbit(&_jitc->function->regset, _R15))
3645 	ldxi(_R15_REGNO, _RSP_REGNO,  8);
3646 #  endif
3647 #endif
3648     ldxi(_RBP_REGNO, _RSP_REGNO, 0);
3649     addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
3650 
3651     ic(0xc3);
3652 }
3653 
3654 static void
_vastart(jit_state_t * _jit,jit_int32_t r0)3655 _vastart(jit_state_t *_jit, jit_int32_t r0)
3656 {
3657 #if __X32 || __CYGWIN__ || _WIN32
3658     assert(_jitc->function->self.call & jit_call_varargs);
3659     addi(r0, _RBP_REGNO, _jitc->function->self.size);
3660 #else
3661     jit_int32_t		reg;
3662 
3663     assert(_jitc->function->self.call & jit_call_varargs);
3664 
3665     /* Return jit_va_list_t in the register argument */
3666     addi(r0, _RBP_REGNO, _jitc->function->vaoff);
3667     reg = jit_get_reg(jit_class_gpr);
3668 
3669     /* Initialize gp offset in the save area. */
3670     movi(rn(reg), _jitc->function->vagp);
3671     stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
3672 
3673     /* Initialize fp offset in the save area. */
3674     movi(rn(reg), _jitc->function->vafp);
3675     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
3676 
3677     /* Initialize overflow pointer to the first stack argument. */
3678     addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
3679     stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
3680 
3681     /* Initialize register save area pointer. */
3682     addi(rn(reg), r0, first_gp_offset);
3683     stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
3684 
3685     jit_unget_reg(reg);
3686 #endif
3687 }
3688 
3689 static void
_vaarg(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)3690 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3691 {
3692 #if __X32 || __CYGWIN__ || _WIN32
3693     assert(_jitc->function->self.call & jit_call_varargs);
3694     ldr(r0, r1);
3695     addi(r1, r1, va_gp_increment);
3696 #else
3697     jit_int32_t		rg0;
3698     jit_int32_t		rg1;
3699     jit_word_t		ge_code;
3700     jit_word_t		lt_code;
3701 
3702     assert(_jitc->function->self.call & jit_call_varargs);
3703 
3704     rg0 = jit_get_reg(jit_class_gpr);
3705     rg1 = jit_get_reg(jit_class_gpr);
3706 
3707     /* Load the gp offset in save area in the first temporary. */
3708     ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
3709 
3710     /* Jump over if there are no remaining arguments in the save area. */
3711     icmpi(rn(rg0), va_gp_max_offset);
3712     jaes(0);
3713     ge_code = _jit->pc.w;
3714 
3715     /* Load the save area pointer in the second temporary. */
3716     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
3717 
3718     /* Load the vararg argument in the first argument. */
3719     ldxr(r0, rn(rg1), rn(rg0));
3720 
3721     /* Update the gp offset. */
3722     addi(rn(rg0), rn(rg0), 8);
3723     stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
3724 
3725     /* Will only need one temporary register below. */
3726     jit_unget_reg(rg1);
3727 
3728     /* Jump over overflow code. */
3729     jmpsi(0);
3730     lt_code = _jit->pc.w;
3731 
3732     /* Where to land if argument is in overflow area. */
3733     patch_rel_char(ge_code, _jit->pc.w);
3734 
3735     /* Load overflow pointer. */
3736     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
3737 
3738     /* Load argument. */
3739     ldr(r0, rn(rg0));
3740 
3741     /* Update overflow pointer. */
3742     addi(rn(rg0), rn(rg0), va_gp_increment);
3743     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
3744 
3745     /* Where to land if argument is in save area. */
3746     patch_rel_char(lt_code, _jit->pc.w);
3747 
3748     jit_unget_reg(rg0);
3749 #endif
3750 }
3751 
3752 /* The x87 boolean argument tells if will put the result in a x87
3753  * register if non false, in a sse register otherwise. */
3754 static void
_vaarg_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_bool_t x87)3755 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
3756 {
3757 #if __X32 || __CYGWIN__ || _WIN32
3758     assert(_jitc->function->self.call & jit_call_varargs);
3759     if (x87)
3760 	x87_ldr_d(r0, r1);
3761     else
3762 	sse_ldr_d(r0, r1);
3763     addi(r1, r1, 8);
3764 #else
3765     jit_int32_t		rg0;
3766     jit_int32_t		rg1;
3767     jit_word_t		ge_code;
3768     jit_word_t		lt_code;
3769 
3770     assert(_jitc->function->self.call & jit_call_varargs);
3771 
3772     rg0 = jit_get_reg(jit_class_gpr);
3773     rg1 = jit_get_reg(jit_class_gpr);
3774 
3775     /* Load the fp offset in save area in the first temporary. */
3776     ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
3777 
3778     /* Jump over if there are no remaining arguments in the save area. */
3779     icmpi(rn(rg0), va_fp_max_offset);
3780     jaes(0);
3781     ge_code = _jit->pc.w;
3782 
3783     /* Load the save area pointer in the second temporary. */
3784     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
3785 
3786     /* Load the vararg argument in the first argument. */
3787     if (x87)
3788 	x87_ldxr_d(r0, rn(rg1), rn(rg0));
3789     else
3790 	sse_ldxr_d(r0, rn(rg1), rn(rg0));
3791 
3792     /* Update the fp offset. */
3793     addi(rn(rg0), rn(rg0), va_fp_increment);
3794     stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
3795 
3796     /* Will only need one temporary register below. */
3797     jit_unget_reg(rg1);
3798 
3799     /* Jump over overflow code. */
3800     jmpsi(0);
3801     lt_code = _jit->pc.w;
3802 
3803     /* Where to land if argument is in overflow area. */
3804     patch_rel_char(ge_code, _jit->pc.w);
3805 
3806     /* Load overflow pointer. */
3807     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
3808 
3809     /* Load argument. */
3810     if (x87)
3811 	x87_ldr_d(r0, rn(rg0));
3812     else
3813 	sse_ldr_d(r0, rn(rg0));
3814 
3815     /* Update overflow pointer. */
3816     addi(rn(rg0), rn(rg0), 8);
3817     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
3818 
3819     /* Where to land if argument is in save area. */
3820     patch_rel_char(lt_code, _jit->pc.w);
3821 
3822     jit_unget_reg(rg0);
3823 #endif
3824 }
3825 
3826 static void
_patch_at(jit_state_t * _jit,jit_node_t * node,jit_word_t instr,jit_word_t label)3827 _patch_at(jit_state_t *_jit, jit_node_t *node,
3828 	  jit_word_t instr, jit_word_t label)
3829 {
3830     switch (node->code) {
3831 #  if __X64
3832 	case jit_code_calli:
3833 #  endif
3834 	case jit_code_movi:
3835 	    patch_abs(instr, label);
3836 	    break;
3837 	default:
3838 	    patch_rel(instr, label);
3839 	    break;
3840     }
3841 }
3842 #endif
3843