1 /*
2  * Copyright (C) 2013-2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *	Paulo Cesar Pereira de Andrade
18  */
19 
20 #if PROTO
21 typedef union {
22 /* aarch64-opc.c */
23 #  define ui			jit_uint32_t
24 #  if __BYTE_ORDER == __LITTLE_ENDIAN
25     /* cond2: condition in truly conditional-executed inst.  */
26     struct {		ui b:  4; } cond2;
27     /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
28     struct {		ui b:  4; } nzcv;
29     /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
30     struct { ui _:  5;	ui b:  5; } defgh;
31     /* abc: a:b:c bits in AdvSIMD modified immediate.  */
32     struct { ui _: 16;	ui b:  3; } abc;
33     /* imm19: e.g. in CBZ.  */
34     struct { ui _:  5;	ui b: 19; } imm19;
35     /* immhi: e.g. in ADRP.  */
36     struct { ui _:  5;	ui b: 19; } immhi;
37     /* immlo: e.g. in ADRP.  */
38     struct { ui _: 29;	ui b:  2; } immlo;
39     /* size: in most AdvSIMD and floating-point instructions.  */
40     struct { ui _: 22;	ui b:  2; } size;
41     /* vldst_size: size field in the AdvSIMD load/store inst.  */
42     struct { ui _: 10;	ui b:  2; } vldst_size;
43     /* op: in AdvSIMD modified immediate instructions.  */
44     struct { ui _: 29;	ui b:  1; } op;
45     /* Q: in most AdvSIMD instructions.  */
46     struct { ui _: 30;	ui b:  1; } Q;
47     /* Rt: in load/store instructions.  */
48     struct { 		ui b:  5; } Rt;
49     /* Rd: in many integer instructions.  */
50     struct { 		ui b:  5; } Rd;
51     /* Rn: in many integer instructions.  */
52     struct { ui _:  5;	ui b:  5; } Rn;
53     /* Rt2: in load/store pair instructions.  */
54     struct { ui _: 10;	ui b:  5; } Rt2;
55     /* Ra: in fp instructions.  */
56     struct { ui _: 10;	ui b:  5; } Ra;
57     /* op2: in the system instructions.  */
58     struct { ui _:  5;	ui b:  3; } op2;
59     /* CRm: in the system instructions.  */
60     struct { ui _:  8;	ui b:  4; } CRm;
61     /* CRn: in the system instructions.  */
62     struct { ui _: 12;	ui b:  4; } CRn;
63     /* op1: in the system instructions.  */
64     struct { ui _: 16;	ui b:  3; } op1;
65     /* op0: in the system instructions.  */
66     struct { ui _: 19;	ui b:  2; } op0;
67     /* imm3: in add/sub extended reg instructions.  */
68     struct { ui _: 10;	ui b:  3; } imm3;
69     /* cond: condition flags as a source operand.  */
70     struct { ui _: 12;	ui b:  4; } cond;
71     /* opcode: in advsimd load/store instructions.  */
72     struct { ui _: 12;	ui b:  4; } opcode;
73     /* cmode: in advsimd modified immediate instructions.  */
74     struct { ui _: 12;	ui b:  4; } cmode;
75     /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
76     struct { ui _: 13;	ui b:  3; } asisdlso_opcode;
77     /* len: in advsimd tbl/tbx instructions.  */
78     struct { ui _: 13;	ui b:  2; } len;
79     /* Rm: in ld/st reg offset and some integer inst.  */
80     struct { ui _: 16;	ui b:  5; } Rm;
81     /* Rs: in load/store exclusive instructions.  */
82     struct { ui _: 16;	ui b:  5; } Rs;
83     /* option: in ld/st reg offset + add/sub extended reg inst.  */
84     struct { ui _: 13;	ui b:  3; } option;
85     /* S: in load/store reg offset instructions.  */
86     struct { ui _: 12;	ui b:  1; } S;
87     /* hw: in move wide constant instructions.  */
88     struct { ui _: 21;	ui b:  2; } hw;
89     /* opc: in load/store reg offset instructions.  */
90     struct { ui _: 22;	ui b:  2; } opc;
91     /* opc1: in load/store reg offset instructions.  */
92     struct { ui _: 23;	ui b:  1; } opc1;
93     /* shift: in add/sub reg/imm shifted instructions.  */
94     struct { ui _: 22;	ui b:  2; } shift;
95     /* type: floating point type field in fp data inst.  */
96     struct { ui _: 22;	ui b:  2; } type;
97     /* ldst_size: size field in ld/st reg offset inst.  */
98     struct { ui _: 30;	ui b:  2; } ldst_size;
99     /* imm6: in add/sub reg shifted instructions.  */
100     struct { ui _: 10;	ui b:  6; } imm6;
101     /* imm4: in advsimd ext and advsimd ins instructions.  */
102     struct { ui _: 11;	ui b:  4; } imm4;
103     /* imm5: in conditional compare (immediate) instructions.  */
104     struct { ui _: 16;	ui b:  5; } imm5;
105     /* imm7: in load/store pair pre/post index instructions.  */
106     struct { ui _: 15;	ui b:  7; } imm7;
107     /* imm8: in floating-point scalar move immediate inst.  */
108     struct { ui _: 13;	ui b:  8; } imm8;
109     /* imm9: in load/store pre/post index instructions.  */
110     struct { ui _: 12;	ui b:  9; } imm9;
111     /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
112     struct { ui _: 10;	ui b: 12; } imm12;
113     /* imm14: in test bit and branch instructions.  */
114     struct { ui _:  5;	ui b: 14; } imm14;
115     /* imm16: in exception instructions.  */
116     struct { ui _:  5;	ui b: 16; } imm16;
117     /* imm26: in unconditional branch instructions.  */
118     struct {		ui b: 26; } imm26;
119     /* imms: in bitfield and logical immediate instructions.  */
120     struct { ui _: 10;	ui b:  6; } imms;
121     /* immr: in bitfield and logical immediate instructions.  */
122     struct { ui _: 16;	ui b:  6; } immr;
123     /* immb: in advsimd shift by immediate instructions.  */
124     struct { ui _: 16;	ui b:  3; } immb;
125     /* immh: in advsimd shift by immediate instructions.  */
126     struct { ui _: 19;	ui b:  4; } immh;
127     /* N: in logical (immediate) instructions.  */
128     struct { ui _: 22;	ui b:  1; } N;
129     /* index: in ld/st inst deciding the pre/post-index.  */
130     struct { ui _: 11;	ui b:  1; } index;
131     /* index2: in ld/st pair inst deciding the pre/post-index.  */
132     struct { ui _: 24;	ui b:  1; } index2;
133     /* sf: in integer data processing instructions.  */
134     struct { ui _: 31;	ui b:  1; } sf;
135     /* H: in advsimd scalar x indexed element instructions.  */
136     struct { ui _: 11;	ui b:  1; } H;
137     /* L: in advsimd scalar x indexed element instructions.  */
138     struct { ui _: 21;	ui b:  1; } L;
139     /* M: in advsimd scalar x indexed element instructions.  */
140     struct { ui _: 20;	ui b:  1; } M;
141     /* b5: in the test bit and branch instructions.  */
142     struct { ui _: 31;	ui b:  1; } b5;
143     /* b40: in the test bit and branch instructions.  */
144     struct { ui _: 19;	ui b:  5; } b40;
145     /* scale: in the fixed-point scalar to fp converting inst.  */
146     struct { ui _: 10;	ui b:  6; } scale;
147 #  else
148     struct { ui _: 28;	ui b:  4; } cond2;
149     struct { ui _: 28;	ui b:  4; } nzcv;
150     struct { ui _: 22;	ui b:  5; } defgh;
151     struct { ui _: 13;	ui b:  3; } abc;
152     struct { ui _:  8;	ui b: 19; } imm19;
153     struct { ui _:  8;	ui b: 19; } immhi;
154     struct { ui _:  1;	ui b: 29; } immlo;
155     struct { ui _:  8;	ui b:  2; } size;
156     struct { ui _: 20;	ui b:  2; } vldst_size;
157     struct { ui _:  2;	ui b:  1; } op;
158     struct { ui _:  1;	ui b:  1; } Q;
159     struct { ui _: 27;	ui b:  1; } Rt;
160     struct { ui _: 27;	ui b:  1; } Rd;
161     struct { ui _: 22;	ui b:  5; } Rn;
162     struct { ui _: 17;	ui b:  5; } Rt2;
163     struct { ui _: 17;	ui b:  5; } Ra;
164     struct { ui _: 24;	ui b:  3; } op2;
165     struct { ui _: 20;	ui b:  4; } CRm;
166     struct { ui _: 16;	ui b:  4; } CRn;
167     struct { ui _: 13;	ui b:  3; } op1;
168     struct { ui _: 11;	ui b:  2; } op0;
169     struct { ui _: 19;	ui b:  3; } imm3;
170     struct { ui _: 16;	ui b:  4; } cond;
171     struct { ui _: 16;	ui b:  4; } opcode;
172     struct { ui _: 16;	ui b:  4; } cmode;
173     struct { ui _: 16;	ui b:  3; } asisdlso_opcode;
174     struct { ui _: 17;	ui b:  2; } len;
175     struct { ui _: 11;	ui b:  5; } Rm;
176     struct { ui _: 11;	ui b:  5; } Rs;
177     struct { ui _: 16;	ui b:  3; } option;
178     struct { ui _: 19;	ui b:  1; } S;
179     struct { ui _:  9;	ui b:  2; } hw;
180     struct { ui _:  8;	ui b:  2; } opc;
181     struct { ui _:  8;	ui b:  1; } opc1;
182     struct { ui _:  8;	ui b:  2; } shift;
183     struct { ui _:  8;	ui b:  2; } type;
184     struct {		ui b:  2; } ldst_size;
185     struct { ui _: 16;	ui b:  6; } imm6;
186     struct { ui _: 17;	ui b:  4; } imm4;
187     struct { ui _: 11;	ui b:  5; } imm5;
188     struct { ui _: 10;	ui b:  7; } imm7;
189     struct { ui _: 11;	ui b:  8; } imm8;
190     struct { ui _: 11;	ui b:  9; } imm9;
191     struct { ui _: 10;	ui b: 12; } imm12;
192     struct { ui _: 13;	ui b: 14; } imm14;
193     struct { ui _: 11;	ui b: 16; } imm16;
194     struct { ui _:  6;	ui b: 26; } imm26;
195     struct { ui _: 16;	ui b:  6; } imms;
196     struct { ui _: 10;	ui b:  6; } immr;
197     struct { ui _: 13;	ui b:  3; } immb;
198     struct { ui _:  9;	ui b:  4; } immh;
199     struct { ui _:  9;	ui b:  1; } N;
200     struct { ui _: 20;	ui b:  1; } index;
201     struct { ui _:  7;	ui b:  1; } index2;
202     struct {		ui b:  1; } sf;
203     struct { ui _: 20;	ui b:  1; } H;
204     struct { ui _: 10;	ui b:  1; } L;
205     struct { ui _: 11;	ui b:  1; } M;
206     struct {		ui b:  1; } b5;
207     struct { ui _:  8;	ui b:  5; } b40;
208     struct { ui _: 16;	ui b:  6; } scale;
209 #  endif
210     jit_int32_t		w;
211 #  undef ui
212 } instr_t;
213 #  define stack_framesize		160
214 #  define ii(i)				*_jit->pc.ui++ = i
215 #  define ldr(r0,r1)			ldr_l(r0,r1)
216 #  define ldxr(r0,r1,r2)		ldxr_l(r0,r1,r2)
217 #  define ldxi(r0,r1,i0)		ldxi_l(r0,r1,i0)
218 #  define stxi(i0,r0,r1)		stxi_l(i0,r0,r1)
219 #  define FP_REGNO			0x1d
220 #  define LR_REGNO			0x1e
221 #  define SP_REGNO			0x1f
222 #  define XZR_REGNO			0x1f
223 #  define WZR_REGNO			XZR_REGNO
224 #  define LSL_12			0x00400000
225 #  define MOVI_LSL_16			0x00200000
226 #  define MOVI_LSL_32			0x00400000
227 #  define MOVI_LSL_48			0x00600000
228 #  define XS				0x80000000	/* Wn -> Xn */
229 #  define DS				0x00400000	/* Sn -> Dn */
230 #  define CC_NE				0x0
231 #  define CC_EQ				0x1
232 #  define CC_CC				0x2
233 #  define CC_LO				CC_CC
234 #  define CC_CS				0x3
235 #  define CC_HS				CC_CS
236 #  define CC_PL				0x4
237 #  define CC_MI				0x5
238 #  define CC_VC				0x6
239 #  define CC_VS				0x7
240 #  define CC_LS				0x8
241 #  define CC_HI				0x9
242 #  define CC_LT				0xa
243 #  define CC_GE				0xb
244 #  define CC_LE				0xc
245 #  define CC_GT				0xd
246 #  define CC_NV				0xe
247 #  define CC_AL				0xf
248 /* Branches need inverted condition */
249 #  define BCC_EQ			0x0
250 #  define BCC_NE			0x1
251 #  define BCC_CS			0x2
252 #  define BCC_HS			BCC_CS
253 #  define BCC_CC			0x3
254 #  define BCC_LO			BCC_CC
255 #  define BCC_MI			0x4
256 #  define BCC_PL			0x5
257 #  define BCC_VS			0x6
258 #  define BCC_VC			0x7
259 #  define BCC_HI			0x8
260 #  define BCC_LS			0x9
261 #  define BCC_GE			0xa
262 #  define BCC_LT			0xb
263 #  define BCC_GT			0xc
264 #  define BCC_LE			0xd
265 #  define BCC_AL			0xe
266 #  define BCC_NV			0xf
267 /* adapted and cut down to only tested and required by lightning,
268  * from data in binutils/aarch64-tbl.h */
269 #  define A64_ADCS			0x3a000000
270 #  define A64_SBCS			0x7a000000
271 #  define A64_ADDI			0x11000000
272 #  define A64_ADDSI			0xb1000000
273 #  define A64_SUBI			0x51000000
274 #  define A64_SUBSI			0x71000000
275 #  define A64_ADD			0x0b000000
276 #  define A64_ADDS			0x2b000000
277 #  define A64_SUB			0x4b000000
278 #  define A64_NEG			0x4b0003e0
279 #  define A64_SUBS			0x6b000000
280 #  define A64_CMP			0x6b00001f
281 #  define A64_SBFM			0x93400000
282 #  define A64_UBFM			0x53400000
283 #  define A64_UBFX			0x53000000
284 #  define A64_B				0x14000000
285 #  define A64_BL			0x94000000
286 #  define A64_BR			0xd61f0000
287 #  define A64_BLR			0xd63f0000
288 #  define A64_RET			0xd65f0000
289 #  define A64_CBZ			0x34000000
290 #  define A64_CBNZ			0x35000000
291 #  define A64_B_C			0x54000000
292 #  define A64_CSINC			0x1a800400
293 #  define A64_REV			0xdac00c00
294 #  define A64_UDIV			0x1ac00800
295 #  define A64_SDIV			0x1ac00c00
296 #  define A64_LSL			0x1ac02000
297 #  define A64_LSR			0x1ac02400
298 #  define A64_ASR			0x1ac02800
299 #  define A64_MUL			0x1b007c00
300 #  define A64_SMULL			0x9b207c00
301 #  define A64_SMULH			0x9b407c00
302 #  define A64_UMULL			0x9ba07c00
303 #  define A64_UMULH			0x9bc07c00
304 #  define A64_STRBI			0x39000000
305 #  define A64_LDRBI			0x39400000
306 #  define A64_LDRSBI			0x39800000
307 #  define A64_STRI			0xf9000000
308 #  define A64_LDRI			0xf9400000
309 #  define A64_STRHI			0x79000000
310 #  define A64_LDRHI			0x79400000
311 #  define A64_LDRSHI			0x79800000
312 #  define A64_STRWI			0xb9000000
313 #  define A64_LDRWI			0xb9400000
314 #  define A64_LDRSWI			0xb9800000
315 #  define A64_STRB			0x38206800
316 #  define A64_LDRB			0x38606800
317 #  define A64_LDRSB			0x38e06800
318 #  define A64_STR			0xf8206800
319 #  define A64_LDR			0xf8606800
320 #  define A64_STRH			0x78206800
321 #  define A64_LDRH			0x78606800
322 #  define A64_LDRSH			0x78a06800
323 #  define A64_STRW			0xb8206800
324 #  define A64_LDRW			0xb8606800
325 #  define A64_LDRSW			0xb8a06800
326 #  define A64_STURB			0x38000000
327 #  define A64_LDURB			0x38400000
328 #  define A64_LDURSB			0x38800000
329 #  define A64_STUR			0xf8000000
330 #  define A64_LDUR			0xf8400000
331 #  define A64_STURH			0x78000000
332 #  define A64_LDURH			0x78400000
333 #  define A64_LDURSH			0x78800000
334 #  define A64_STURW			0xb8000000
335 #  define A64_LDURW			0xb8400000
336 #  define A64_LDURSW			0xb8800000
337 #  define A64_STP			0x29000000
338 #  define A64_LDP			0x29400000
339 #  define A64_STP_POS			0x29800000
340 #  define A64_LDP_PRE			0x28c00000
341 #  define A64_ANDI			0x12400000
342 #  define A64_ORRI			0x32400000
343 #  define A64_EORI			0x52400000
344 #  define A64_ANDSI			0x72000000
345 #  define A64_AND			0x0a000000
346 #  define A64_ORR			0x2a000000
347 #  define A64_MOV			0x2a0003e0	/* AKA orr Rd,xzr,Rm */
348 #  define A64_MVN			0x2a2003e0
349 #  define A64_UXTW			0x2a0003e0	/* AKA MOV */
350 #  define A64_EOR			0x4a000000
351 #  define A64_ANDS			0x6a000000
352 #  define A64_MOVN			0x12800000
353 #  define A64_MOVZ			0x52800000
354 #  define A64_MOVK			0x72800000
355 #  define SBFM(Rd,Rn,ImmR,ImmS)		oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS)
356 #  define UBFM(Rd,Rn,ImmR,ImmS)		oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS)
357 #  define UBFX(Rd,Rn,ImmR,ImmS)		oxxrs(A64_UBFX,Rd,Rn,ImmR,ImmS)
358 #  define CMP(Rn,Rm)			oxx_(A64_CMP|XS,Rn,Rm)
359 #  define CMPI(Rn,Imm12)		oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12)
360 #  define CMPI_12(Rn,Imm12)		oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
361 #  define CMNI(Rn,Imm12)		oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12)
362 #  define CMNI_12(Rn,Imm12)		oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
363 #  define CSINC(Rd,Rn,Rm,Cc)		oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc)
364 #  define TST(Rn,Rm)			oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm)
365 /* actually should use oxxrs but logical_immediate returns proper encoding */
366 #  define TSTI(Rn,Imm12)		oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12)
367 #  define MOV(Rd,Rm)			ox_x(A64_MOV|XS,Rd,Rm)
368 #  define MVN(Rd,Rm)			ox_x(A64_MVN|XS,Rd,Rm)
369 #  define NEG(Rd,Rm)			ox_x(A64_NEG|XS,Rd,Rm)
370 #  define MOVN(Rd,Imm16)		ox_h(A64_MOVN|XS,Rd,Imm16)
371 #  define MOVN_16(Rd,Imm16)		ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
372 #  define MOVN_32(Rd,Imm16)		ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
373 #  define MOVN_48(Rd,Imm16)		ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16)
374 #  define MOVZ(Rd,Imm16)		ox_h(A64_MOVZ|XS,Rd,Imm16)
375 #  define MOVZ_16(Rd,Imm16)		ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16)
376 #  define MOVZ_32(Rd,Imm16)		ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16)
377 #  define MOVZ_48(Rd,Imm16)		ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16)
378 #  define MOVK(Rd,Imm16)		ox_h(A64_MOVK|XS,Rd,Imm16)
379 #  define MOVK_16(Rd,Imm16)		ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16)
380 #  define MOVK_32(Rd,Imm16)		ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16)
381 #  define MOVK_48(Rd,Imm16)		ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16)
382 #  define ADD(Rd,Rn,Rm)			oxxx(A64_ADD|XS,Rd,Rn,Rm)
383 #  define ADDI(Rd,Rn,Imm12)		oxxi(A64_ADDI|XS,Rd,Rn,Imm12)
384 #  define ADDI_12(Rd,Rn,Imm12)		oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12)
385 #  define MOV_XSP(Rd,Rn)		ADDI(Rd,Rn,0)
386 #  define ADDS(Rd,Rn,Rm)		oxxx(A64_ADDS|XS,Rd,Rn,Rm)
387 #  define ADDSI(Rd,Rn,Imm12)		oxxi(A64_ADDSI|XS,Rd,Rn,Imm12)
388 #  define ADDSI_12(Rd,Rn,Imm12)		oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12)
389 #  define ADCS(Rd,Rn,Rm)		oxxx(A64_ADCS|XS,Rd,Rn,Rm)
390 #  define SUB(Rd,Rn,Rm)			oxxx(A64_SUB|XS,Rd,Rn,Rm)
391 #  define SUBI(Rd,Rn,Imm12)		oxxi(A64_SUBI|XS,Rd,Rn,Imm12)
392 #  define SUBI_12(Rd,Rn,Imm12)		oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12)
393 #  define SUBS(Rd,Rn,Rm)		oxxx(A64_SUBS|XS,Rd,Rn,Rm)
394 #  define SUBSI(Rd,Rn,Imm12)		oxxi(A64_SUBSI|XS,Rd,Rn,Imm12)
395 #  define SUBSI_12(Rd,Rn,Imm12)		oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12)
396 #  define SBCS(Rd,Rn,Rm)		oxxx(A64_SBCS|XS,Rd,Rn,Rm)
397 #  define MUL(Rd,Rn,Rm)			oxxx(A64_MUL|XS,Rd,Rn,Rm)
398 #  define SMULL(Rd,Rn,Rm)		oxxx(A64_SMULL,Rd,Rn,Rm)
399 #  define SMULH(Rd,Rn,Rm)		oxxx(A64_SMULH,Rd,Rn,Rm)
400 #  define UMULL(Rd,Rn,Rm)		oxxx(A64_UMULL,Rd,Rn,Rm)
401 #  define UMULH(Rd,Rn,Rm)		oxxx(A64_UMULH,Rd,Rn,Rm)
402 #  define SDIV(Rd,Rn,Rm)		oxxx(A64_SDIV|XS,Rd,Rn,Rm)
403 #  define UDIV(Rd,Rn,Rm)		oxxx(A64_UDIV|XS,Rd,Rn,Rm)
404 #  define LSL(Rd,Rn,Rm)			oxxx(A64_LSL|XS,Rd,Rn,Rm)
405 #  define LSLI(r0,r1,i0)		UBFM(r0,r1,(64-i0)&63,63-i0)
406 #  define ASR(Rd,Rn,Rm)			oxxx(A64_ASR|XS,Rd,Rn,Rm)
407 #  define ASRI(r0,r1,i0)		SBFM(r0,r1,i0,63)
408 #  define LSR(Rd,Rn,Rm)			oxxx(A64_LSR|XS,Rd,Rn,Rm)
409 #  define LSRI(r0,r1,i0)		UBFM(r0,r1,i0,63)
410 #  define AND(Rd,Rn,Rm)			oxxx(A64_AND|XS,Rd,Rn,Rm)
411 /* actually should use oxxrs but logical_immediate returns proper encoding */
412 #  define ANDI(Rd,Rn,Imm12)		oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
413 #  define ORR(Rd,Rn,Rm)			oxxx(A64_ORR|XS,Rd,Rn,Rm)
414 /* actually should use oxxrs but logical_immediate returns proper encoding */
415 #  define ORRI(Rd,Rn,Imm12)		oxxi(A64_ORRI|XS,Rd,Rn,Imm12)
416 #  define EOR(Rd,Rn,Rm)			oxxx(A64_EOR|XS,Rd,Rn,Rm)
417 /* actually should use oxxrs but logical_immediate returns proper encoding */
418 #  define EORI(Rd,Rn,Imm12)		oxxi(A64_EORI|XS,Rd,Rn,Imm12)
419 #  define SXTB(Rd,Rn)			SBFM(Rd,Rn,0,7)
420 #  define SXTH(Rd,Rn)			SBFM(Rd,Rn,0,15)
421 #  define SXTW(Rd,Rn)			SBFM(Rd,Rn,0,31)
422 #  define UXTB(Rd,Rn)			UBFX(Rd,Rn,0,7)
423 #  define UXTH(Rd,Rn)			UBFX(Rd,Rn,0,15)
424 #  define UXTW(Rd,Rm)			ox_x(A64_UXTW,Rd,Rm)
425 #  define REV(Rd,Rn)			o_xx(A64_REV,Rd,Rn)
426 #  define LDRSB(Rt,Rn,Rm)		oxxx(A64_LDRSB,Rt,Rn,Rm)
427 #  define LDRSBI(Rt,Rn,Imm12)		oxxi(A64_LDRSBI,Rt,Rn,Imm12)
428 #  define LDURSB(Rt,Rn,Imm9)		oxx9(A64_LDURSB,Rt,Rn,Imm9)
429 #  define LDRB(Rt,Rn,Rm)		oxxx(A64_LDRB,Rt,Rn,Rm)
430 #  define LDRBI(Rt,Rn,Imm12)		oxxi(A64_LDRBI,Rt,Rn,Imm12)
431 #  define LDURB(Rt,Rn,Imm9)		oxx9(A64_LDURB,Rt,Rn,Imm9)
432 #  define LDRSH(Rt,Rn,Rm)		oxxx(A64_LDRSH,Rt,Rn,Rm)
433 #  define LDRSHI(Rt,Rn,Imm12)		oxxi(A64_LDRSHI,Rt,Rn,Imm12)
434 #  define LDURSH(Rt,Rn,Imm9)		oxx9(A64_LDURSH,Rt,Rn,Imm9)
435 #  define LDRH(Rt,Rn,Rm)		oxxx(A64_LDRH,Rt,Rn,Rm)
436 #  define LDRHI(Rt,Rn,Imm12)		oxxi(A64_LDRHI,Rt,Rn,Imm12)
437 #  define LDURH(Rt,Rn,Imm9)		oxx9(A64_LDURH,Rt,Rn,Imm9)
438 #  define LDRSW(Rt,Rn,Rm)		oxxx(A64_LDRSW,Rt,Rn,Rm)
439 #  define LDRSWI(Rt,Rn,Imm12)		oxxi(A64_LDRSWI,Rt,Rn,Imm12)
440 #  define LDURSW(Rt,Rn,Imm9)		oxx9(A64_LDURSW,Rt,Rn,Imm9)
441 #  define LDRW(Rt,Rn,Rm)		oxxx(A64_LDRW,Rt,Rn,Rm)
442 #  define LDRWI(Rt,Rn,Imm12)		oxxi(A64_LDRWI,Rt,Rn,Imm12)
443 #  define LDURW(Rt,Rn,Imm9)		oxx9(A64_LDURW,Rt,Rn,Imm9)
444 #  define LDR(Rt,Rn,Rm)			oxxx(A64_LDR,Rt,Rn,Rm)
445 #  define LDRI(Rt,Rn,Imm12)		oxxi(A64_LDRI,Rt,Rn,Imm12)
446 #  define LDUR(Rt,Rn,Imm9)		oxx9(A64_LDUR,Rt,Rn,Imm9)
447 #  define STRB(Rt,Rn,Rm)		oxxx(A64_STRB,Rt,Rn,Rm)
448 #  define STRBI(Rt,Rn,Imm12)		oxxi(A64_STRBI,Rt,Rn,Imm12)
449 #  define STURB(Rt,Rn,Imm9)		oxx9(A64_STURB,Rt,Rn,Imm9)
450 #  define STRH(Rt,Rn,Rm)		oxxx(A64_STRH,Rt,Rn,Rm)
451 #  define STRHI(Rt,Rn,Imm12)		oxxi(A64_STRHI,Rt,Rn,Imm12)
452 #  define STURH(Rt,Rn,Imm9)		oxx9(A64_STURH,Rt,Rn,Imm9)
453 #  define STRW(Rt,Rn,Rm)		oxxx(A64_STRW,Rt,Rn,Rm)
454 #  define STRWI(Rt,Rn,Imm12)		oxxi(A64_STRWI,Rt,Rn,Imm12)
455 #  define STURW(Rt,Rn,Imm9)		oxx9(A64_STURW,Rt,Rn,Imm9)
456 #  define STR(Rt,Rn,Rm)			oxxx(A64_STR,Rt,Rn,Rm)
457 #  define STRI(Rt,Rn,Imm12)		oxxi(A64_STRI,Rt,Rn,Imm12)
458 #  define STUR(Rt,Rn,Imm9)		oxx9(A64_STUR,Rt,Rn,Imm9)
459 #  define LDPI(Rt,Rt2,Rn,Simm7)		oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
460 #  define STPI(Rt,Rt2,Rn,Simm7)		oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
461 #  define LDPI_PRE(Rt,Rt2,Rn,Simm7)	oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
462 #  define STPI_POS(Rt,Rt2,Rn,Simm7)	oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7)
463 #  define CSET(Rd,Cc)			CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc)
464 #  define B(Simm26)			o26(A64_B,Simm26)
465 #  define BL(Simm26)			o26(A64_BL,Simm26)
466 #  define BR(Rn)			o_x_(A64_BR,Rn)
467 #  define BLR(Rn)			o_x_(A64_BLR,Rn)
468 #  define RET()				o_x_(A64_RET,LR_REGNO)
469 #  define B_C(Cc,Simm19)		oc19(A64_B_C,Cc,Simm19)
470 #  define CBZ(Rd,Simm19)		ox19(A64_CBZ|XS,Rd,Simm19)
471 #  define CBNZ(Rd,Simm19)		ox19(A64_CBNZ|XS,Rd,Simm19)
472 #  define NOP()				ii(0xd503201f)
473 static jit_int32_t logical_immediate(jit_word_t);
474 #  define oxxx(Op,Rd,Rn,Rm)		_oxxx(_jit,Op,Rd,Rn,Rm)
475 static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
476 #  define oxxi(Op,Rd,Rn,Imm12)		_oxxi(_jit,Op,Rd,Rn,Imm12)
477 static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
478 #  define oxx9(Op,Rd,Rn,Imm9)		_oxx9(_jit,Op,Rd,Rn,Imm9)
479 static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
480 #  define ox19(Op,Rd,Simm19)		_ox19(_jit,Op,Rd,Simm19)
481 static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
482 #  define oc19(Op,Cc,Simm19)		_oc19(_jit,Op,Cc,Simm19)
483 static void _oc19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
484 #  define o26(Op,Simm26)		_o26(_jit,Op,Simm26)
485 static void _oc26(jit_state_t*,jit_int32_t,jit_int32_t);
486 #  define ox_x(Op,Rd,Rn)		_ox_x(_jit,Op,Rd,Rn)
487 static void _ox_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
488 #  define o_xx(Op,Rd,Rn)		_o_xx(_jit,Op,Rd,Rn)
489 static void _o_xx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
490 #  define oxx_(Op,Rn,Rm)		_oxx_(_jit,Op,Rn,Rm)
491 static void _oxx_(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
492 #  define o_x_(Op,Rn)			_o_x_(_jit,Op,Rn)
493 static void _o_x_(jit_state_t*,jit_int32_t,jit_int32_t);
494 #  define ox_h(Op,Rd,Imm16)		_ox_h(_jit,Op,Rd,Imm16)
495 static void _ox_h(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
496 #  define oxxrs(Op,Rd,Rn,R,S)		_oxxrs(_jit,Op,Rd,Rn,R,S)
497 static void _oxxrs(jit_state_t*,jit_int32_t,jit_int32_t,
498 		   jit_int32_t,jit_int32_t,jit_int32_t);
499 #  define oxxxc(Op,Rd,Rn,Rm,Cc)		_oxxxc(_jit,Op,Rd,Rn,Rm,Cc)
500 static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
501 		   jit_int32_t,jit_int32_t,jit_int32_t);
502 #  define oxxx7(Op,Rt,Rt2,Rn,Simm7)	_oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
503 static void _oxxx7(jit_state_t*,jit_int32_t,
504 		   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
505 #  define nop(i0)			_nop(_jit,i0)
506 static void _nop(jit_state_t*,jit_int32_t);
507 #  define addr(r0,r1,r2)		ADD(r0,r1,r2)
508 #  define addi(r0,r1,i0)		_addi(_jit,r0,r1,i0)
509 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
510 #  define addcr(r0,r1,r2)		ADDS(r0,r1,r2)
511 #  define addci(r0,r1,i0)		_addci(_jit,r0,r1,i0)
512 static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
513 #  define addxr(r0,r1,r2)		ADCS(r0,r1,r2)
514 #  define addxi(r0,r1,i0)		_addxi(_jit,r0,r1,i0)
515 static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
516 #  define subr(r0,r1,r2)		SUB(r0,r1,r2)
517 #  define subi(r0,r1,i0)		_subi(_jit,r0,r1,i0)
518 static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
519 #  define subcr(r0,r1,r2)		SUBS(r0,r1,r2)
520 #  define subci(r0,r1,i0)		_subci(_jit,r0,r1,i0)
521 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
522 #  define subxr(r0,r1,r2)		SBCS(r0,r1,r2)
523 #  define subxi(r0,r1,i0)		_subxi(_jit,r0,r1,i0)
524 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
525 #  define rsbi(r0, r1, i0)		_rsbi(_jit, r0, r1, i0)
526 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
527 #  define mulr(r0,r1,r2)		MUL(r0,r1,r2)
528 #  define muli(r0,r1,i0)		_muli(_jit,r0,r1,i0)
529 static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
530 #  define qmulr(r0,r1,r2,r3)		_qmulr(_jit,r0,r1,r2,r3)
531 static void _qmulr(jit_state_t*,jit_int32_t,
532 		   jit_int32_t,jit_int32_t,jit_int32_t);
533 #  define qmuli(r0,r1,r2,i0)		_qmuli(_jit,r0,r1,r2,i0)
534 static void _qmuli(jit_state_t*,jit_int32_t,
535 		   jit_int32_t,jit_int32_t,jit_word_t);
536 #  define qmulr_u(r0,r1,r2,r3)		_qmulr_u(_jit,r0,r1,r2,r3)
537 static void _qmulr_u(jit_state_t*,jit_int32_t,
538 		     jit_int32_t,jit_int32_t,jit_int32_t);
539 #  define qmuli_u(r0,r1,r2,i0)		_qmuli_u(_jit,r0,r1,r2,i0)
540 static void _qmuli_u(jit_state_t*,jit_int32_t,
541 		     jit_int32_t,jit_int32_t,jit_word_t);
542 #  define divr(r0,r1,r2)		SDIV(r0,r1,r2)
543 #  define divi(r0,r1,i0)		_divi(_jit,r0,r1,i0)
544 static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
545 #  define divr_u(r0,r1,r2)		UDIV(r0,r1,r2)
546 #  define divi_u(r0,r1,i0)		_divi_u(_jit,r0,r1,i0)
547 static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
548 #  define qdivr(r0,r1,r2,r3)		_iqdivr(_jit,1,r0,r1,r2,r3)
549 #  define qdivr_u(r0,r1,r2,r3)		_iqdivr(_jit,0,r0,r1,r2,r3)
550 static void _iqdivr(jit_state_t*,jit_bool_t,
551 		    jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
552 #  define qdivi(r0,r1,r2,i0)		_qdivi(_jit,r0,r1,r2,i0)
553 static void _qdivi(jit_state_t*,jit_int32_t,
554 		   jit_int32_t,jit_int32_t,jit_word_t);
555 #  define qdivi_u(r0,r1,r2,i0)		_qdivi_u(_jit,r0,r1,r2,i0)
556 static void _qdivi_u(jit_state_t*,jit_int32_t,
557 		     jit_int32_t,jit_int32_t,jit_word_t);
558 #  define remr(r0,r1,r2)		_remr(_jit,r0,r1,r2)
559 static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
560 #  define remi(r0,r1,i0)		_remi(_jit,r0,r1,i0)
561 static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
562 #  define remr_u(r0,r1,r2)		_remr_u(_jit,r0,r1,r2)
563 static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
564 #  define remi_u(r0,r1,i0)		_remi_u(_jit,r0,r1,i0)
565 static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
566 #  define lshr(r0,r1,r2)		LSL(r0,r1,r2)
567 #  define lshi(r0,r1,i0)		_lshi(_jit,r0,r1,i0)
568 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
569 #  define rshr(r0,r1,r2)		ASR(r0,r1,r2)
570 #  define rshi(r0,r1,i0)		_rshi(_jit,r0,r1,i0)
571 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
572 #  define rshr_u(r0,r1,r2)		LSR(r0,r1,r2)
573 #  define rshi_u(r0,r1,i0)		_rshi_u(_jit,r0,r1,i0)
574 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
575 #  define negr(r0,r1)			NEG(r0,r1)
576 #  define comr(r0,r1)			MVN(r0,r1)
577 #  define andr(r0,r1,r2)		AND(r0,r1,r2)
578 #  define andi(r0,r1,i0)		_andi(_jit,r0,r1,i0)
579 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
580 #  define orr(r0,r1,r2)			ORR(r0,r1,r2)
581 #  define ori(r0,r1,i0)			_ori(_jit,r0,r1,i0)
582 static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
583 #  define xorr(r0,r1,r2)		EOR(r0,r1,r2)
584 #  define xori(r0,r1,i0)		_xori(_jit,r0,r1,i0)
585 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
586 #  define ldr_c(r0,r1)			LDRSBI(r0,r1,0)
587 #  define ldi_c(r0,i0)			_ldi_c(_jit,r0,i0)
588 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
589 #  define ldr_uc(r0,r1)			_ldr_uc(_jit,r0,r1)
590 static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
591 #  define ldi_uc(r0,i0)			_ldi_uc(_jit,r0,i0)
592 static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
593 #  define ldr_s(r0,r1)			LDRSHI(r0,r1,0)
594 #  define ldi_s(r0,i0)			_ldi_s(_jit,r0,i0)
595 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
596 #  define ldr_us(r0,r1)			_ldr_us(_jit,r0,r1)
597 static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
598 #  define ldi_us(r0,i0)			_ldi_us(_jit,r0,i0)
599 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
600 #  define ldr_i(r0,r1)			LDRSWI(r0,r1,0)
601 #  define ldi_i(r0,i0)			_ldi_i(_jit,r0,i0)
602 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
603 #  define ldr_ui(r0,r1)			_ldr_ui(_jit,r0,r1)
604 static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
605 #  define ldi_ui(r0,i0)			_ldi_ui(_jit,r0,i0)
606 static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
607 #  define ldr_l(r0,r1)			LDRI(r0,r1,0)
608 static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t);
609 #  define ldi_l(r0,i0)			_ldi_l(_jit,r0,i0)
610 static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
611 #  define ldxr_c(r0,r1,r2)		_ldxr_c(_jit,r0,r1,r2)
612 static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
613 #  define ldxi_c(r0,r1,i0)		_ldxi_c(_jit,r0,r1,i0)
614 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
615 #  define ldxr_uc(r0,r1,r2)		_ldxr_uc(_jit,r0,r1,r2)
616 static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
617 #  define ldxi_uc(r0,r1,i0)		_ldxi_uc(_jit,r0,r1,i0)
618 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
619 #  define ldxr_s(r0,r1,r2)		LDRSH(r0,r1,r2)
620 #  define ldxi_s(r0,r1,i0)		_ldxi_s(_jit,r0,r1,i0)
621 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
622 #  define ldxr_us(r0,r1,r2)		_ldxr_us(_jit,r0,r1,r2)
623 static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
624 #  define ldxi_us(r0,r1,i0)		_ldxi_us(_jit,r0,r1,i0)
625 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
626 #  define ldxr_i(r0,r1,r2)		LDRSW(r0,r1,r2)
627 #  define ldxi_i(r0,r1,i0)		_ldxi_i(_jit,r0,r1,i0)
628 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
629 #  define ldxr_ui(r0,r1,r2)		_ldxr_ui(_jit,r0,r1,r2)
630 static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
631 #  define ldxi_ui(r0,r1,i0)		_ldxi_ui(_jit,r0,r1,i0)
632 static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
633 #  define ldxr_l(r0,r1,r2)		LDR(r0,r1,r2)
634 #  define ldxi_l(r0,r1,i0)		_ldxi_l(_jit,r0,r1,i0)
635 static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
636 #  define str_c(r0,r1)			STRBI(r1,r0,0)
637 #  define sti_c(i0,r0)			_sti_c(_jit,i0,r0)
638 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
639 #  define str_s(r0,r1)			STRHI(r1,r0,0)
640 #  define sti_s(i0,r0)			_sti_s(_jit,i0,r0)
641 static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
642 #  define str_i(r0,r1)			STRWI(r1,r0,0)
643 #  define sti_i(i0,r0)			_sti_i(_jit,i0,r0)
644 static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
645 #  define str_l(r0,r1)			STRI(r1,r0,0)
646 #  define sti_l(i0,r0)			_sti_l(_jit,i0,r0)
647 static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
648 #  define stxr_c(r0,r1,r2)		STRB(r2,r1,r0)
649 #  define stxi_c(i0,r0,r1)		_stxi_c(_jit,i0,r0,r1)
650 static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
651 #  define stxr_s(r0,r1,r2)		STRH(r2,r1,r0)
652 #  define stxi_s(i0,r0,r1)		_stxi_s(_jit,i0,r0,r1)
653 static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
654 #  define stxr_i(r0,r1,r2)		STRW(r2,r1,r0)
655 #  define stxi_i(i0,r0,r1)		_stxi_i(_jit,i0,r0,r1)
656 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
657 #  define stxr_l(r0,r1,r2)		STR(r2,r1,r0)
658 #  define stxi_l(i0,r0,r1)		_stxi_l(_jit,i0,r0,r1)
659 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
660 #  if __BYTE_ORDER == __LITTLE_ENDIAN
661 #  define htonr_us(r0,r1)		_htonr_us(_jit,r0,r1)
662 static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
663 #  define htonr_ui(r0,r1)		_htonr_ui(_jit,r0,r1)
664 static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
665 #    define htonr_ul(r0,r1)		REV(r0,r1)
666 #  else
667 #    define htonr_us(r0,r1)		extr_us(r0,r1)
668 #    define htonr_ui(r0,r1)		extr_ui(r0,r1)
669 #    define htonr_ul(r0,r1)		movr(r0,r1)
670 #  endif
671 #  define extr_c(r0,r1)			SXTB(r0,r1)
672 #  define extr_uc(r0,r1)		UXTB(r0,r1)
673 #  define extr_s(r0,r1)			SXTH(r0,r1)
674 #  define extr_us(r0,r1)		UXTH(r0,r1)
675 #  define extr_i(r0,r1)			SXTW(r0,r1)
676 #  define extr_ui(r0,r1)		UXTW(r0,r1)
677 #  define movr(r0,r1)			_movr(_jit,r0,r1)
678 static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
679 #  define movi(r0,i0)			_movi(_jit,r0,i0)
680 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
681 #  define movi_p(r0,i0)			_movi_p(_jit,r0,i0)
682 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
683 #  define ccr(cc,r0,r1,r2)		_ccr(_jit,cc,r0,r1,r2)
684 static void _ccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
685 #  define cci(cc,r0,r1,i0)		_cci(_jit,cc,r0,r1,i0)
686 static void _cci(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
687 #  define ltr(r0,r1,r2)			ccr(CC_LT,r0,r1,r2)
688 #  define lti(r0,r1,i0)			cci(CC_LT,r0,r1,i0)
689 #  define ltr_u(r0,r1,r2)		ccr(CC_CC,r0,r1,r2)
690 #  define lti_u(r0,r1,i0)		cci(CC_CC,r0,r1,i0)
691 #  define ler(r0,r1,r2)			ccr(CC_LE,r0,r1,r2)
692 #  define lei(r0,r1,i0)			cci(CC_LE,r0,r1,i0)
693 #  define ler_u(r0,r1,r2)		ccr(CC_LS,r0,r1,r2)
694 #  define lei_u(r0,r1,i0)		cci(CC_LS,r0,r1,i0)
695 #  define eqr(r0,r1,r2)			ccr(CC_EQ,r0,r1,r2)
696 #  define eqi(r0,r1,i0)			cci(CC_EQ,r0,r1,i0)
697 #  define ger(r0,r1,r2)			ccr(CC_GE,r0,r1,r2)
698 #  define gei(r0,r1,i0)			cci(CC_GE,r0,r1,i0)
699 #  define ger_u(r0,r1,r2)		ccr(CC_CS,r0,r1,r2)
700 #  define gei_u(r0,r1,i0)		cci(CC_CS,r0,r1,i0)
701 #  define gtr(r0,r1,r2)			ccr(CC_GT,r0,r1,r2)
702 #  define gti(r0,r1,i0)			cci(CC_GT,r0,r1,i0)
703 #  define gtr_u(r0,r1,r2)		ccr(CC_HI,r0,r1,r2)
704 #  define gti_u(r0,r1,i0)		cci(CC_HI,r0,r1,i0)
705 #  define ner(r0,r1,r2)			ccr(CC_NE,r0,r1,r2)
706 #  define nei(r0,r1,i0)			cci(CC_NE,r0,r1,i0)
707 #  define bccr(cc,i0,r0,r1)		_bccr(_jit,cc,i0,r0,r1)
708 static jit_word_t
709 _bccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
710 #  define bcci(cc,i0,r0,i1)		_bcci(_jit,cc,i0,r0,i1)
711 static jit_word_t
712 _bcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
713 #  define bltr(i0,r0,r1)		bccr(BCC_LT,i0,r0,r1)
714 #  define blti(i0,r0,i1)		bcci(BCC_LT,i0,r0,i1)
715 #  define bltr_u(i0,r0,r1)		bccr(BCC_CC,i0,r0,r1)
716 #  define blti_u(i0,r0,i1)		bcci(BCC_CC,i0,r0,i1)
717 #  define bler(i0,r0,r1)		bccr(BCC_LE,i0,r0,r1)
718 #  define blei(i0,r0,i1)		bcci(BCC_LE,i0,r0,i1)
719 #  define bler_u(i0,r0,r1)		bccr(BCC_LS,i0,r0,r1)
720 #  define blei_u(i0,r0,i1)		bcci(BCC_LS,i0,r0,i1)
721 #  define beqr(i0,r0,r1)		bccr(BCC_EQ,i0,r0,r1)
722 #  define beqi(i0,r0,i1)		_beqi(_jit,i0,r0,i1)
723 static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
724 #  define bger(i0,r0,r1)		bccr(BCC_GE,i0,r0,r1)
725 #  define bgei(i0,r0,i1)		bcci(BCC_GE,i0,r0,i1)
726 #  define bger_u(i0,r0,r1)		bccr(BCC_CS,i0,r0,r1)
727 #  define bgei_u(i0,r0,i1)		bcci(BCC_CS,i0,r0,i1)
728 #  define bgtr(i0,r0,r1)		bccr(BCC_GT,i0,r0,r1)
729 #  define bgti(i0,r0,i1)		bcci(BCC_GT,i0,r0,i1)
730 #  define bgtr_u(i0,r0,r1)		bccr(BCC_HI,i0,r0,r1)
731 #  define bgti_u(i0,r0,i1)		bcci(BCC_HI,i0,r0,i1)
732 #  define bner(i0,r0,r1)		bccr(BCC_NE,i0,r0,r1)
733 #  define bnei(i0,r0,i1)		_bnei(_jit,i0,r0,i1)
734 static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
735 #  define baddr(cc,i0,r0,r1)		_baddr(_jit,cc,i0,r0,r1)
736 static jit_word_t
737 _baddr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
738 #  define baddi(cc,i0,r0,i1)		_baddi(_jit,cc,i0,r0,i1)
739 static jit_word_t
740 _baddi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
741 #  define boaddr(i0,r0,r1)		baddr(BCC_VS,i0,r0,r1)
742 #  define boaddi(i0,r0,i1)		baddi(BCC_VS,i0,r0,i1)
743 #  define boaddr_u(i0,r0,r1)		baddr(BCC_HS,i0,r0,r1)
744 #  define boaddi_u(i0,r0,i1)		baddi(BCC_HS,i0,r0,i1)
745 #  define bxaddr(i0,r0,r1)		baddr(BCC_VC,i0,r0,r1)
746 #  define bxaddi(i0,r0,i1)		baddi(BCC_VC,i0,r0,i1)
747 #  define bxaddr_u(i0,r0,r1)		baddr(BCC_LO,i0,r0,r1)
748 #  define bxaddi_u(i0,r0,i1)		baddi(BCC_LO,i0,r0,i1)
749 #  define bsubr(cc,i0,r0,r1)		_bsubr(_jit,cc,i0,r0,r1)
750 static jit_word_t
751 _bsubr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
752 #  define bsubi(cc,i0,r0,i1)		_bsubi(_jit,cc,i0,r0,i1)
753 static jit_word_t
754 _bsubi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
755 #  define bosubr(i0,r0,r1)		bsubr(BCC_VS,i0,r0,r1)
756 #  define bosubi(i0,r0,i1)		bsubi(BCC_VS,i0,r0,i1)
757 #  define bosubr_u(i0,r0,r1)		bsubr(BCC_LO,i0,r0,r1)
758 #  define bosubi_u(i0,r0,i1)		bsubi(BCC_LO,i0,r0,i1)
759 #  define bxsubr(i0,r0,r1)		bsubr(BCC_VC,i0,r0,r1)
760 #  define bxsubi(i0,r0,i1)		bsubi(BCC_VC,i0,r0,i1)
761 #  define bxsubr_u(i0,r0,r1)		bsubr(BCC_HS,i0,r0,r1)
762 #  define bxsubi_u(i0,r0,i1)		bsubi(BCC_HS,i0,r0,i1)
763 #  define bmxr(cc,i0,r0,r1)		_bmxr(_jit,cc,i0,r0,r1)
764 static jit_word_t
765 _bmxr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
766 #  define bmxi(cc,i0,r0,r1)		_bmxi(_jit,cc,i0,r0,r1)
767 static jit_word_t
768 _bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
769 #  define bmsr(i0,r0,r1)		bmxr(BCC_NE,i0,r0,r1)
770 #  define bmsi(i0,r0,i1)		bmxi(BCC_NE,i0,r0,i1)
771 #  define bmcr(i0,r0,r1)		bmxr(BCC_EQ,i0,r0,r1)
772 #  define bmci(i0,r0,i1)		bmxi(BCC_EQ,i0,r0,i1)
773 #  define jmpr(r0)			BR(r0)
774 #  define jmpi(i0)			_jmpi(_jit,i0)
775 static void _jmpi(jit_state_t*,jit_word_t);
776 #  define jmpi_p(i0)			_jmpi_p(_jit,i0)
777 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
778 #  define callr(r0)			BLR(r0)
779 #  define calli(i0)			_calli(_jit,i0)
780 static void _calli(jit_state_t*,jit_word_t);
781 #  define calli_p(i0)			_calli_p(_jit,i0)
782 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
783 #  define prolog(i0)			_prolog(_jit,i0)
784 static void _prolog(jit_state_t*,jit_node_t*);
785 #  define epilog(i0)			_epilog(_jit,i0)
786 static void _epilog(jit_state_t*,jit_node_t*);
787 #  define vastart(r0)			_vastart(_jit, r0)
788 static void _vastart(jit_state_t*, jit_int32_t);
789 #  define vaarg(r0, r1)			_vaarg(_jit, r0, r1)
790 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
791 #  define patch_at(jump,label)		_patch_at(_jit,jump,label)
792 static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
793 #endif
794 
795 #if CODE
796 static jit_int32_t
logical_immediate(jit_word_t imm)797 logical_immediate(jit_word_t imm)
798 {
799     /* There are 5334 possible immediate values, but to avoid the
800      * need of either too complex code or large lookup tables,
801      * only check for (simply) encodable common/small values */
802     switch (imm) {
803 	case -16:	return (0xf3b);
804 	case -15:	return (0xf3c);
805 	case -13:	return (0xf3d);
806 	case -9:	return (0xf3e);
807 	case -8:	return (0xf7c);
808 	case -7:	return (0xf7d);
809 	case -5:	return (0xf7e);
810 	case -4:	return (0xfbd);
811 	case -3:	return (0xfbe);
812 	case -2:	return (0xffe);
813 	case 1:		return (0x000);
814 	case 2:		return (0xfc0);
815 	case 3:		return (0x001);
816 	case 4:		return (0xf80);
817 	case 6:		return (0xfc1);
818 	case 7:		return (0x002);
819 	case 8:		return (0xf40);
820 	case 12:	return (0xf81);
821 	case 14:	return (0xfc2);
822 	case 15:	return (0x003);
823 	case 16:	return (0xf00);
824 	default:	return (-1);
825     }
826 }
827 
828 static void
_oxxx(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn,jit_int32_t Rm)829 _oxxx(jit_state_t *_jit, jit_int32_t Op,
830       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
831 {
832     instr_t	i;
833     assert(!(Rd &       ~0x1f));
834     assert(!(Rn &       ~0x1f));
835     assert(!(Rm &       ~0x1f));
836     assert(!(Op & ~0xffe0fc00));
837     i.w = Op;
838     i.Rd.b = Rd;
839     i.Rn.b = Rn;
840     i.Rm.b = Rm;
841     ii(i.w);
842 }
843 
844 static void
_oxxi(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn,jit_int32_t Imm12)845 _oxxi(jit_state_t *_jit, jit_int32_t Op,
846       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm12)
847 {
848     instr_t	i;
849     assert(!(Rd    &       ~0x1f));
850     assert(!(Rn    &       ~0x1f));
851     assert(!(Imm12 &      ~0xfff));
852     assert(!(Op    & ~0xffe00000));
853     i.w = Op;
854     i.Rd.b = Rd;
855     i.Rn.b = Rn;
856     i.imm12.b = Imm12;
857     ii(i.w);
858 }
859 
860 static void
_oxx9(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn,jit_int32_t Imm9)861 _oxx9(jit_state_t *_jit, jit_int32_t Op,
862       jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9)
863 {
864     instr_t	i;
865     assert(!(Rd   &       ~0x1f));
866     assert(!(Rn   &       ~0x1f));
867     assert(!(Imm9 &      ~0x1ff));
868     assert(!(Op   & ~0xffe00000));
869     i.w = Op;
870     i.Rd.b = Rd;
871     i.Rn.b = Rn;
872     i.imm9.b = Imm9;
873     ii(i.w);
874 }
875 
876 static void
_ox19(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Simm19)877 _ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19)
878 {
879     instr_t	i;
880     assert(!(Rd &         ~0x1f));
881     assert(Simm19 >= -262148 && Simm19 <= 262143);
882     assert(!(Op   & ~0xff000000));
883     i.w = Op;
884     i.Rd.b = Rd;
885     i.imm19.b = Simm19;
886     ii(i.w);
887 }
888 
889 static void
_oc19(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Cc,jit_int32_t Simm19)890 _oc19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Cc, jit_int32_t Simm19)
891 {
892     instr_t	i;
893     assert(!(Cc &          ~0xf));
894     assert(Simm19 >= -262148 && Simm19 <= 262143);
895     assert(!(Op   & ~0xff000000));
896     i.w = Op;
897     i.cond2.b = Cc;
898     i.imm19.b = Simm19;
899     ii(i.w);
900 }
901 
902 static void
_o26(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Simm26)903 _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
904 {
905     instr_t	i;
906     assert(Simm26 >= -33554432 && Simm26 <= 33554431);
907     assert(!(Op   & ~0xfc000000));
908     i.w = Op;
909     i.imm26.b = Simm26;
910     ii(i.w);
911 }
912 
913 static void
_ox_x(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rm)914 _ox_x(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rm)
915 {
916     instr_t	i;
917     assert(!(Rd &       ~0x1f));
918     assert(!(Rm &       ~0x1f));
919     assert(!(Op & ~0xffe0ffe0));
920     i.w = Op;
921     i.Rd.b = Rd;
922     i.Rm.b = Rm;
923     ii(i.w);
924 }
925 
926 static void
_o_xx(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn)927 _o_xx(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rn)
928 {
929     instr_t	i;
930     assert(!(Rd &       ~0x1f));
931     assert(!(Rn &       ~0x1f));
932     assert(!(Op & ~0xfffffc00));
933     i.w = Op;
934     i.Rd.b = Rd;
935     i.Rn.b = Rn;
936     ii(i.w);
937 }
938 
939 static void
_oxx_(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rn,jit_int32_t Rm)940 _oxx_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rm)
941 {
942     instr_t	i;
943     assert(!(Rn &       ~0x1f));
944     assert(!(Rm &       ~0x1f));
945     assert(!(Op & ~0xffc0fc1f));
946     i.w = Op;
947     i.Rn.b = Rn;
948     i.Rm.b = Rm;
949     ii(i.w);
950 }
951 
952 static void
_o_x_(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rn)953 _o_x_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn)
954 {
955     instr_t	i;
956     assert(!(Rn & ~0x1f));
957     assert(!(Op & 0x3e0));
958     i.w = Op;
959     i.Rn.b = Rn;
960     ii(i.w);
961 }
962 
963 static void
_ox_h(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Imm16)964 _ox_h(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Imm16)
965 {
966     instr_t	i;
967     assert(!(Rd    &       ~0x1f));
968     assert(!(Imm16 &     ~0xffff));
969     assert(!(Op    & ~0xffe00000));
970     i.w = Op;
971     i.Rd.b = Rd;
972     i.imm16.b = Imm16;
973     ii(i.w);
974 }
975 
976 static void
_oxxrs(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn,jit_int32_t R,jit_int32_t S)977 _oxxrs(jit_state_t *_jit, jit_int32_t Op,
978        jit_int32_t Rd, jit_int32_t Rn, jit_int32_t R, jit_int32_t S)
979 {
980     instr_t	i;
981     assert(!(Rd &       ~0x1f));
982     assert(!(Rn &       ~0x1f));
983     assert(!(R  &       ~0x3f));
984     assert(!(S  &       ~0x3f));
985     assert(!(Op & ~0xffc00000));
986     i.w = Op;
987     i.Rd.b = Rd;
988     i.Rn.b = Rn;
989     i.immr.b = R;
990     i.imms.b = S;
991     ii(i.w);
992 }
993 
994 static void
_oxxxc(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rd,jit_int32_t Rn,jit_int32_t Rm,jit_int32_t Cc)995 _oxxxc(jit_state_t *_jit, jit_int32_t Op,
996        jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Cc)
997 {
998     instr_t	i;
999     assert(!(Rd &       ~0x1f));
1000     assert(!(Rn &       ~0x1f));
1001     assert(!(Rm &       ~0x1f));
1002     assert(!(Cc  &       ~0xf));
1003     assert(!(Op & ~0xffc00c00));
1004     i.w = Op;
1005     i.Rd.b = Rd;
1006     i.Rn.b = Rn;
1007     i.Rm.b = Rm;
1008     i.cond.b = Cc;
1009     ii(i.w);
1010 }
1011 
1012 static void
_oxxx7(jit_state_t * _jit,jit_int32_t Op,jit_int32_t Rt,jit_int32_t Rt2,jit_int32_t Rn,jit_int32_t Simm7)1013 _oxxx7(jit_state_t *_jit, jit_int32_t Op,
1014        jit_int32_t Rt, jit_int32_t Rt2, jit_int32_t Rn, jit_int32_t Simm7)
1015 {
1016     instr_t	i;
1017     assert(!(Rt  &       ~0x1f));
1018     assert(!(Rt2 &       ~0x1f));
1019     assert(!(Rn  &       ~0x1f));
1020     assert(Simm7 >= -128 && Simm7 <= 127);
1021     assert(!(Op & ~0xffc003e0));
1022     i.w = Op;
1023     i.Rt.b = Rt;
1024     i.Rt2.b = Rt2;
1025     i.Rn.b = Rn;
1026     i.imm7.b = Simm7;
1027     ii(i.w);
1028 }
1029 
1030 static void
_nop(jit_state_t * _jit,jit_int32_t i0)1031 _nop(jit_state_t *_jit, jit_int32_t i0)
1032 {
1033     for (; i0 > 0; i0 -= 4)
1034 	NOP();
1035     assert(i0 == 0);
1036 }
1037 
1038 static void
_addi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1039 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1040 {
1041     jit_int32_t		reg;
1042     jit_word_t		is =  i0 >> 12;
1043     jit_word_t		in = -i0;
1044     jit_word_t		iS =  in >> 12;
1045     if (      i0 >= 0 && i0 <= 0xfff)
1046 	ADDI   (r0, r1, i0);
1047     else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1048 	ADDI_12(r0, r1, is);
1049     else if ( in >= 0 && in <= 0xfff)
1050 	SUBI   (r0, r1, in);
1051     else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
1052 	SUBI_12(r0, r1, iS);
1053     else {
1054 	reg = jit_get_reg(jit_class_gpr);
1055 	movi(rn(reg), i0);
1056 	addr(r0, r1, rn(reg));
1057 	jit_unget_reg(reg);
1058     }
1059 }
1060 
1061 static void
_addci(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1062 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1063 {
1064     jit_int32_t		reg;
1065     jit_word_t		is =  i0 >> 12;
1066     jit_word_t		in = -i0;
1067     jit_word_t		iS =  in >> 12;
1068     if (      i0 >= 0 && i0 <= 0xfff)
1069 	ADDSI   (r0, r1, i0);
1070     else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1071 	ADDSI_12(r0, r1, is);
1072     else if ( in >= 0 && in <= 0xfff)
1073 	SUBSI   (r0, r1, in);
1074     else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
1075 	SUBSI_12(r0, r1, iS);
1076     else {
1077 	reg = jit_get_reg(jit_class_gpr);
1078 	movi(rn(reg), i0);
1079 	addcr(r0, r1, rn(reg));
1080 	jit_unget_reg(reg);
1081     }
1082 }
1083 
1084 static void
_addxi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1085 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1086 {
1087     jit_int32_t		reg;
1088     reg = jit_get_reg(jit_class_gpr);
1089     movi(rn(reg), i0);
1090     addxr(r0, r1, rn(reg));
1091     jit_unget_reg(reg);
1092 }
1093 
1094 static void
_subi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1095 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1096 {
1097     jit_int32_t		reg;
1098     jit_word_t		is = i0 >> 12;
1099     if (      i0 >= 0 && i0 <= 0xfff)
1100 	SUBI   (r0, r1, i0);
1101     else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1102 	SUBI_12(r0, r1, is);
1103     else {
1104 	reg = jit_get_reg(jit_class_gpr);
1105 	movi(rn(reg), i0);
1106 	subr(r0, r1, rn(reg));
1107 	jit_unget_reg(reg);
1108     }
1109 }
1110 
1111 static void
_subci(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1112 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1113 {
1114     jit_int32_t		reg;
1115     jit_word_t		is = i0 >> 12;
1116     if (      i0 >= 0 && i0 <= 0xfff)
1117 	SUBSI   (r0, r1, i0);
1118     else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1119 	SUBSI_12(r0, r1, is);
1120     else {
1121 	reg = jit_get_reg(jit_class_gpr);
1122 	movi(rn(reg), i0);
1123 	subcr(r0, r1, rn(reg));
1124 	jit_unget_reg(reg);
1125     }
1126 }
1127 
1128 static void
_subxi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1129 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1130 {
1131     jit_int32_t		reg;
1132     reg = jit_get_reg(jit_class_gpr);
1133     movi(rn(reg), i0);
1134     subxr(r0, r1, rn(reg));
1135     jit_unget_reg(reg);
1136 }
1137 
1138 static void
_rsbi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1139 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1140 {
1141     subi(r0, r1, i0);
1142     negr(r0, r0);
1143 }
1144 
1145 static void
_muli(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1146 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1147 {
1148     jit_int32_t		reg;
1149     reg = jit_get_reg(jit_class_gpr);
1150     movi(rn(reg), i0);
1151     mulr(r0, r1, rn(reg));
1152     jit_unget_reg(reg);
1153 }
1154 
1155 static void
_qmulr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_int32_t r3)1156 _qmulr(jit_state_t *_jit, jit_int32_t r0,
1157        jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1158 {
1159     jit_int32_t		reg;
1160     if (r0 == r2 || r0 == r3) {
1161 	reg = jit_get_reg(jit_class_gpr);
1162 	mulr(rn(reg), r2, r3);
1163     }
1164     else
1165 	mulr(r0, r2, r3);
1166     SMULH(r1, r2, r3);
1167     if (r0 == r2 || r0 == r3) {
1168 	movr(r0, rn(reg));
1169 	jit_unget_reg(reg);
1170     }
1171 }
1172 
1173 static void
_qmuli(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0)1174 _qmuli(jit_state_t *_jit, jit_int32_t r0,
1175        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1176 {
1177     jit_int32_t		reg;
1178     reg = jit_get_reg(jit_class_gpr);
1179     movi(rn(reg), i0);
1180     qmulr(r0, r1, r2, rn(reg));
1181     jit_unget_reg(reg);
1182 }
1183 
1184 static void
_qmulr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_int32_t r3)1185 _qmulr_u(jit_state_t *_jit, jit_int32_t r0,
1186 	 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1187 {
1188     jit_int32_t		reg;
1189     if (r0 == r2 || r0 == r3) {
1190 	reg = jit_get_reg(jit_class_gpr);
1191 	mulr(rn(reg), r2, r3);
1192     }
1193     else
1194 	mulr(r0, r2, r3);
1195     UMULH(r1, r2, r3);
1196     if (r0 == r2 || r0 == r3) {
1197 	movr(r0, rn(reg));
1198 	jit_unget_reg(reg);
1199     }
1200 }
1201 
1202 static void
_qmuli_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0)1203 _qmuli_u(jit_state_t *_jit, jit_int32_t r0,
1204 	 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1205 {
1206     jit_int32_t		reg;
1207     reg = jit_get_reg(jit_class_gpr);
1208     movi(rn(reg), i0);
1209     qmulr_u(r0, r1, r2, rn(reg));
1210     jit_unget_reg(reg);
1211 }
1212 
1213 static void
_divi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1214 _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1215 {
1216     jit_int32_t		reg;
1217     reg = jit_get_reg(jit_class_gpr);
1218     movi(rn(reg), i0);
1219     divr(r0, r1, rn(reg));
1220     jit_unget_reg(reg);
1221 }
1222 
1223 static void
_divi_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1224 _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1225 {
1226     jit_int32_t		reg;
1227     reg = jit_get_reg(jit_class_gpr);
1228     movi(rn(reg), i0);
1229     divr_u(r0, r1, rn(reg));
1230     jit_unget_reg(reg);
1231 }
1232 
1233 static void
_iqdivr(jit_state_t * _jit,jit_bool_t sign,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_int32_t r3)1234 _iqdivr(jit_state_t *_jit, jit_bool_t sign,
1235 	jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1236 {
1237     jit_int32_t		sv0, rg0;
1238     jit_int32_t		sv1, rg1;
1239     if (r0 == r2 || r0 == r3) {
1240 	sv0 = jit_get_reg(jit_class_gpr);
1241 	rg0 = rn(sv0);
1242     }
1243     else
1244 	rg0 = r0;
1245     if (r1 == r2 || r1 == r3) {
1246 	sv1 = jit_get_reg(jit_class_gpr);
1247 	rg1 = rn(sv1);
1248     }
1249     else
1250 	rg1 = r1;
1251     if (sign)
1252 	divr(rg0, r2, r3);
1253     else
1254 	divr_u(rg0, r2, r3);
1255     mulr(rg1, r3, rg0);
1256     subr(rg1, r2, rg1);
1257     if (rg0 != r0) {
1258 	movr(r0, rg0);
1259 	jit_unget_reg(sv0);
1260     }
1261     if (rg1 != r1) {
1262 	movr(r1, rg1);
1263 	jit_unget_reg(sv1);
1264     }
1265 }
1266 
1267 static void
_qdivi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0)1268 _qdivi(jit_state_t *_jit, jit_int32_t r0,
1269        jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1270 {
1271     jit_int32_t		reg;
1272     reg = jit_get_reg(jit_class_gpr);
1273     movi(rn(reg), i0);
1274     qdivr(r0, r1, r2, rn(reg));
1275     jit_unget_reg(reg);
1276 }
1277 
1278 static void
_qdivi_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2,jit_word_t i0)1279 _qdivi_u(jit_state_t *_jit, jit_int32_t r0,
1280 	 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1281 {
1282     jit_int32_t		reg;
1283     reg = jit_get_reg(jit_class_gpr);
1284     movi(rn(reg), i0);
1285     qdivr_u(r0, r1, r2, rn(reg));
1286     jit_unget_reg(reg);
1287 }
1288 
1289 static void
_remr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1290 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1291 {
1292     jit_int32_t		reg;
1293     if (r0 == r1 || r0 == r2) {
1294 	reg = jit_get_reg(jit_class_gpr);
1295 	divr(rn(reg), r1, r2);
1296 	mulr(rn(reg), r2, rn(reg));
1297 	subr(r0, r1, rn(reg));
1298 	jit_unget_reg(reg);
1299     }
1300     else {
1301 	divr(r0, r1, r2);
1302 	mulr(r0, r2, r0);
1303 	subr(r0, r1, r0);
1304     }
1305 }
1306 
1307 static void
_remi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1308 _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1309 {
1310     jit_int32_t		reg;
1311     reg = jit_get_reg(jit_class_gpr);
1312     movi(rn(reg), i0);
1313     remr(r0, r1, rn(reg));
1314     jit_unget_reg(reg);
1315 }
1316 
1317 static void
_remr_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1318 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1319 {
1320     jit_int32_t		reg;
1321     if (r0 == r1 || r0 == r2) {
1322 	reg = jit_get_reg(jit_class_gpr);
1323 	divr_u(rn(reg), r1, r2);
1324 	mulr(rn(reg), r2, rn(reg));
1325 	subr(r0, r1, rn(reg));
1326 	jit_unget_reg(reg);
1327     }
1328     else {
1329 	divr_u(r0, r1, r2);
1330 	mulr(r0, r2, r0);
1331 	subr(r0, r1, r0);
1332     }
1333 }
1334 
1335 static void
_remi_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1336 _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1337 {
1338     jit_int32_t		reg;
1339     reg = jit_get_reg(jit_class_gpr);
1340     movi(rn(reg), i0);
1341     remr_u(r0, r1, rn(reg));
1342     jit_unget_reg(reg);
1343 }
1344 
1345 static void
_lshi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1346 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1347 {
1348     if (i0 == 0)
1349 	movr(r0, r1);
1350     else {
1351 	assert(i0 > 0 && i0 < 64);
1352 	LSLI(r0, r1, i0);
1353     }
1354 }
1355 
1356 static void
_rshi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1357 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1358 {
1359     if (i0 == 0)
1360 	movr(r0, r1);
1361     else {
1362 	assert(i0 > 0 && i0 < 64);
1363 	ASRI(r0, r1, i0);
1364     }
1365 }
1366 
1367 static void
_rshi_u(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1368 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1369 {
1370     if (i0 == 0)
1371 	movr(r0, r1);
1372     else {
1373 	assert(i0 > 0 && i0 < 64);
1374 	LSRI(r0, r1, i0);
1375     }
1376 }
1377 
1378 static void
_andi(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1379 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1380 {
1381     jit_int32_t		reg;
1382     jit_int32_t		imm;
1383     if (i0 == 0)
1384 	movi(r0, 0);
1385     else if (i0 == -1)
1386 	movr(r0, r1);
1387     else {
1388 	imm = logical_immediate(i0);
1389 	if (imm != -1)
1390 	    ANDI(r0, r1, imm);
1391 	else {
1392 	    reg = jit_get_reg(jit_class_gpr);
1393 	    movi(rn(reg), i0);
1394 	    andr(r0, r1, rn(reg));
1395 	    jit_unget_reg(reg);
1396 	}
1397     }
1398 }
1399 
1400 static void
_ori(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1401 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1402 {
1403     jit_int32_t		reg;
1404     jit_int32_t		imm;
1405     if (i0 == 0)
1406 	movr(r0, r1);
1407     else if (i0 == -1)
1408 	movi(r0, -1);
1409     else {
1410 	imm = logical_immediate(i0);
1411 	if (imm != -1)
1412 	    ORRI(r0, r1, imm);
1413 	else {
1414 	    reg = jit_get_reg(jit_class_gpr);
1415 	    movi(rn(reg), i0);
1416 	    orr(r0, r1, rn(reg));
1417 	    jit_unget_reg(reg);
1418 	}
1419     }
1420 }
1421 
1422 static void
_xori(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1423 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1424 {
1425     jit_int32_t		reg;
1426     jit_int32_t		imm;
1427     if (i0 == 0)
1428 	movr(r0, r1);
1429     else if (i0 == -1)
1430 	comr(r0, r1);
1431     else {
1432 	imm = logical_immediate(i0);
1433 	if (imm != -1)
1434 	    EORI(r0, r1, imm);
1435 	else {
1436 	    reg = jit_get_reg(jit_class_gpr);
1437 	    movi(rn(reg), i0);
1438 	    xorr(r0, r1, rn(reg));
1439 	    jit_unget_reg(reg);
1440 	}
1441     }
1442 }
1443 
1444 #if __BYTE_ORDER == __LITTLE_ENDIAN
1445 static void
_htonr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1446 _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1447 {
1448     htonr_ul(r0, r1);
1449     rshi_u(r0, r0, 48);
1450 }
1451 
1452 static void
_htonr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1453 _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1454 {
1455     htonr_ul(r0, r1);
1456     rshi_u(r0, r0, 32);
1457 }
1458 #endif
1459 
1460 static void
_ldi_c(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1461 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1462 {
1463     jit_int32_t		reg;
1464     reg = jit_get_reg(jit_class_gpr);
1465     movi(rn(reg), i0);
1466     ldr_c(r0, rn(reg));
1467     jit_unget_reg(reg);
1468 }
1469 
1470 static void
_ldr_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1471 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1472 {
1473     LDRBI(r0, r1, 0);
1474 #if 0
1475     extr_uc(r0, r0);
1476 #endif
1477 }
1478 
1479 static void
_ldi_uc(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1480 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1481 {
1482     jit_int32_t		reg;
1483     reg = jit_get_reg(jit_class_gpr);
1484     movi(rn(reg), i0);
1485     ldr_uc(r0, rn(reg));
1486     jit_unget_reg(reg);
1487 }
1488 
1489 static void
_ldi_s(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1490 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1491 {
1492     jit_int32_t		reg;
1493     reg = jit_get_reg(jit_class_gpr);
1494     movi(rn(reg), i0);
1495     ldr_s(r0, rn(reg));
1496     jit_unget_reg(reg);
1497 }
1498 
1499 static void
_ldr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1500 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1501 {
1502     LDRHI(r0, r1, 0);
1503 #if 0
1504     extr_us(r0, r0);
1505 #endif
1506 }
1507 
1508 static void
_ldi_us(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1509 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1510 {
1511     jit_int32_t		reg;
1512     reg = jit_get_reg(jit_class_gpr);
1513     movi(rn(reg), i0);
1514     ldr_us(r0, rn(reg));
1515     jit_unget_reg(reg);
1516 }
1517 
1518 static void
_ldi_i(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1519 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1520 {
1521     jit_int32_t		reg;
1522     reg = jit_get_reg(jit_class_gpr);
1523     movi(rn(reg), i0);
1524     ldr_i(r0, rn(reg));
1525     jit_unget_reg(reg);
1526 }
1527 
1528 static void
_ldr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1529 _ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1530 {
1531     LDRWI(r0, r1, 0);
1532 #if 0
1533     extr_ui(r0, r0);
1534 #endif
1535 }
1536 
1537 static void
_ldi_ui(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1538 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1539 {
1540     jit_int32_t		reg;
1541     reg = jit_get_reg(jit_class_gpr);
1542     movi(rn(reg), i0);
1543     ldr_ui(r0, rn(reg));
1544     jit_unget_reg(reg);
1545 }
1546 
1547 static void
_ldi_l(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1548 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1549 {
1550     jit_int32_t		reg;
1551     reg = jit_get_reg(jit_class_gpr);
1552     movi(rn(reg), i0);
1553     ldr_l(r0, rn(reg));
1554     jit_unget_reg(reg);
1555 }
1556 
1557 static void
_ldxr_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1558 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1559 {
1560     LDRSB(r0, r1, r2);
1561     extr_c(r0, r0);
1562 }
1563 
1564 static void
_ldxi_c(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1565 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1566 {
1567     jit_int32_t		reg;
1568     if (i0 >= 0 && i0 <= 4095)
1569 	LDRSBI(r0, r1, i0);
1570     else if (i0 > -256 && i0 < 0)
1571 	LDURSB(r0, r1, i0 & 0x1ff);
1572     else {
1573 	reg = jit_get_reg(jit_class_gpr);
1574 	movi(rn(reg), i0);
1575 	LDRSB(r0, r1, rn(reg));
1576 	jit_unget_reg(reg);
1577     }
1578     extr_c(r0, r0);
1579 }
1580 
1581 static void
_ldxr_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1582 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1583 {
1584     LDRB(r0, r1, r2);
1585 #if 0
1586     extr_uc(r0, r0);
1587 #endif
1588 }
1589 
1590 static void
_ldxi_uc(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1591 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1592 {
1593     jit_int32_t		reg;
1594     if (i0 >= 0 && i0 <= 4095)
1595 	LDRBI(r0, r1, i0);
1596     else if (i0 > -256 && i0 < 0)
1597 	LDURB(r0, r1, i0 & 0x1ff);
1598     else {
1599 	reg = jit_get_reg(jit_class_gpr);
1600 	addi(rn(reg), r1, i0);
1601 	ldr_uc(r0, rn(reg));
1602 	jit_unget_reg(reg);
1603     }
1604 #if 0
1605     extr_uc(r0, r0);
1606 #endif
1607 }
1608 
1609 static void
_ldxi_s(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1610 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1611 {
1612     jit_int32_t		reg;
1613     assert(!(i0 & 1));
1614     if (i0 >= 0 && i0 <= 8191)
1615 	LDRSHI(r0, r1, i0 >> 1);
1616     else if (i0 > -256 && i0 < 0)
1617 	LDURSH(r0, r1, i0 & 0x1ff);
1618     else {
1619 	reg = jit_get_reg(jit_class_gpr);
1620 	movi(rn(reg), i0);
1621 	LDRSH(r0, r1, rn(reg));
1622 	jit_unget_reg(reg);
1623     }
1624 }
1625 
1626 static void
_ldxr_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1627 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1628 {
1629     LDRH(r0, r1, r2);
1630 #if 0
1631     extr_us(r0, r0);
1632 #endif
1633 }
1634 
1635 static void
_ldxi_us(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1636 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1637 {
1638     jit_int32_t		reg;
1639     assert(!(i0 & 1));
1640     if (i0 >= 0 && i0 <= 8191)
1641 	LDRHI(r0, r1, i0 >> 1);
1642     else if (i0 > -256 && i0 < 0)
1643 	LDURH(r0, r1, i0 & 0x1ff);
1644     else {
1645 	reg = jit_get_reg(jit_class_gpr);
1646 	movi(rn(reg), i0);
1647 	LDRH(r0, r1, rn(reg));
1648 	jit_unget_reg(reg);
1649     }
1650 #if 0
1651     extr_us(r0, r0);
1652 #endif
1653 }
1654 
1655 static void
_ldxi_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1656 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1657 {
1658     jit_int32_t		reg;
1659     assert(!(i0 & 3));
1660     if (i0 >= 0 && i0 <= 16383)
1661 	LDRSWI(r0, r1, i0 >> 2);
1662     else if (i0 > -256 && i0 < 0)
1663 	LDURSW(r0, r1, i0 & 0x1ff);
1664     else {
1665 	reg = jit_get_reg(jit_class_gpr);
1666 	addi(rn(reg), r1, i0);
1667 	ldr_i(r0, rn(reg));
1668 	jit_unget_reg(reg);
1669     }
1670 }
1671 
1672 static void
_ldxr_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1673 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1674 {
1675     LDRW(r0, r1, r2);
1676 #if 0
1677     extr_ui(r0, r0);
1678 #endif
1679 }
1680 
1681 static void
_ldxi_ui(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1682 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1683 {
1684     jit_int32_t		reg;
1685     assert(!(i0 & 3));
1686     if (i0 >= 0 && i0 <= 16383)
1687 	LDRWI(r0, r1, i0 >> 2);
1688     else if (i0 > -256 && i0 < 0)
1689 	LDURW(r0, r1, i0 & 0x1ff);
1690     else {
1691 	reg = jit_get_reg(jit_class_gpr);
1692 	movi(rn(reg), i0);
1693 	LDRW(r0, r1, rn(reg));
1694 	jit_unget_reg(reg);
1695     }
1696 #if 0
1697     extr_ui(r0, r0);
1698 #endif
1699 }
1700 
1701 static void
_ldxi_l(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1702 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1703 {
1704     jit_int32_t		reg;
1705     assert(!(i0 & 7));
1706     if (i0 >= 0 && i0 <= 32767)
1707 	LDRI(r0, r1, i0 >> 3);
1708     else if (i0 > -256 && i0 < 0)
1709 	LDUR(r0, r1, i0 & 0x1ff);
1710     else {
1711 	reg = jit_get_reg(jit_class_gpr);
1712 	addi(rn(reg), r1, i0);
1713 	ldr_l(r0, rn(reg));
1714 	jit_unget_reg(reg);
1715     }
1716 }
1717 
1718 static void
_sti_c(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1719 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1720 {
1721     jit_int32_t		reg;
1722     reg = jit_get_reg(jit_class_gpr);
1723     movi(rn(reg), i0);
1724     str_c(rn(reg), r0);
1725     jit_unget_reg(reg);
1726 }
1727 
1728 static void
_sti_s(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1729 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1730 {
1731     jit_int32_t		reg;
1732     reg = jit_get_reg(jit_class_gpr);
1733     movi(rn(reg), i0);
1734     str_s(rn(reg), r0);
1735     jit_unget_reg(reg);
1736 }
1737 
1738 static void
_sti_i(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1739 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1740 {
1741     jit_int32_t		reg;
1742     reg = jit_get_reg(jit_class_gpr);
1743     movi(rn(reg), i0);
1744     str_i(rn(reg), r0);
1745     jit_unget_reg(reg);
1746 }
1747 
1748 static void
_sti_l(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)1749 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1750 {
1751     jit_int32_t		reg;
1752     reg = jit_get_reg(jit_class_gpr);
1753     movi(rn(reg), i0);
1754     str_l(rn(reg), r0);
1755     jit_unget_reg(reg);
1756 }
1757 
1758 static void
_stxi_c(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1759 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1760 {
1761     jit_int32_t		reg;
1762     if (i0 >= 0 && i0 <= 4095)
1763 	STRBI(r1, r0, i0);
1764     else if (i0 > -256 && i0 < 0)
1765 	STURB(r1, r0, i0 & 0x1ff);
1766     else {
1767 	reg = jit_get_reg(jit_class_gpr);
1768 	addi(rn(reg), r0, i0);
1769 	str_c(rn(reg), r1);
1770 	jit_unget_reg(reg);
1771     }
1772 }
1773 
1774 static void
_stxi_s(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1775 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1776 {
1777     jit_int32_t		reg;
1778     assert(!(i0 & 1));
1779     if (i0 >= 0 && i0 <= 8191)
1780 	STRHI(r1, r0, i0 >> 1);
1781     else if (i0 > -256 && i0 < 0)
1782 	STURH(r1, r0, i0 & 0x1ff);
1783     else {
1784 	reg = jit_get_reg(jit_class_gpr);
1785 	addi(rn(reg), r0, i0);
1786 	str_s(rn(reg), r1);
1787 	jit_unget_reg(reg);
1788     }
1789 }
1790 
1791 static void
_stxi_i(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1792 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1793 {
1794     jit_int32_t		reg;
1795     assert(!(i0 & 3));
1796     if (i0 >= 0 && i0 <= 16383)
1797 	STRWI(r1, r0, i0 >> 2);
1798     else if (i0 > -256 && i0 < 0)
1799 	STURW(r1, r0, i0 & 0x1ff);
1800     else {
1801 	reg = jit_get_reg(jit_class_gpr);
1802 	addi(rn(reg), r0, i0);
1803 	str_i(rn(reg), r1);
1804 	jit_unget_reg(reg);
1805     }
1806 }
1807 
1808 static void
_stxi_l(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1809 _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1810 {
1811     jit_int32_t		reg;
1812     assert(!(i0 & 7));
1813     if (i0 >= 0 && i0 <= 32767)
1814 	STRI(r1, r0, i0 >> 3);
1815     else if (i0 > -256 && i0 < 0)
1816 	STUR(r1, r0, i0 & 0x1ff);
1817     else {
1818 	reg = jit_get_reg(jit_class_gpr);
1819 	addi(rn(reg), r0, i0);
1820 	str_l(rn(reg), r1);
1821 	jit_unget_reg(reg);
1822     }
1823 }
1824 
1825 static void
_movr(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1826 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1827 {
1828     if (r0 != r1)
1829 	MOV(r0, r1);
1830 }
1831 
1832 static void
_movi(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1833 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1834 {
1835     jit_word_t		n0, ibit, nbit;
1836     n0 = ~i0;
1837     ibit = nbit = 0;
1838     if (i0 & 0x000000000000ffffL)	ibit |= 1;
1839     if (i0 & 0x00000000ffff0000L)	ibit |= 2;
1840     if (i0 & 0x0000ffff00000000L)	ibit |= 4;
1841     if (i0 & 0xffff000000000000L)	ibit |= 8;
1842     if (n0 & 0x000000000000ffffL)	nbit |= 1;
1843     if (n0 & 0x00000000ffff0000L)	nbit |= 2;
1844     if (n0 & 0x0000ffff00000000L)	nbit |= 4;
1845     if (n0 & 0xffff000000000000L)	nbit |= 8;
1846     switch (ibit) {
1847 	case 0:
1848 	    MOVZ   (r0,  0);
1849 	    break;
1850 	case 1:
1851 	    MOVZ   (r0,  i0        & 0xffff);
1852 	    break;
1853 	case 2:
1854 	    MOVZ_16(r0, (i0 >> 16) & 0xffff);
1855 	    break;
1856 	case 3:
1857 	    MOVZ   (r0,  i0        & 0xffff);
1858 	    MOVK_16(r0, (i0 >> 16) & 0xffff);
1859 	    break;
1860 	case 4:
1861 	    MOVZ_32(r0, (i0 >> 32) & 0xffff);
1862 	    break;
1863 	case 5:
1864 	    MOVZ   (r0,  i0        & 0xffff);
1865 	    MOVK_32(r0, (i0 >> 32) & 0xffff);
1866 	    break;
1867 	case 6:
1868 	    MOVZ_16(r0, (i0 >> 16) & 0xffff);
1869 	    MOVK_32(r0, (i0 >> 32) & 0xffff);
1870 	    break;
1871 	case 7:
1872 	    if (nbit == 8)
1873 		MOVN_48(r0, (n0 >> 48) & 0xffff);
1874 	    else {
1875 		MOVZ   (r0,  i0        & 0xffff);
1876 		MOVK_16(r0, (i0 >> 16) & 0xffff);
1877 		MOVK_32(r0, (i0 >> 32) & 0xffff);
1878 	    }
1879 	    break;
1880 	case 8:
1881 	    MOVZ_48(r0, (i0 >> 48) & 0xffff);
1882 	    break;
1883 	case 9:
1884 	    MOVZ   (r0,  i0        & 0xffff);
1885 	    MOVK_48(r0, (i0 >> 48) & 0xffff);
1886 	    break;
1887 	case 10:
1888 	    MOVZ_16(r0, (i0 >> 16) & 0xffff);
1889 	    MOVK_48(r0, (i0 >> 48) & 0xffff);
1890 	    break;
1891 	case 11:
1892 	    if (nbit == 4)
1893 		MOVN_32(r0, (n0 >> 32) & 0xffff);
1894 	    else {
1895 		MOVZ   (r0,  i0        & 0xffff);
1896 		MOVK_16(r0, (i0 >> 16) & 0xffff);
1897 		MOVK_48(r0, (i0 >> 48) & 0xffff);
1898 	    }
1899 	    break;
1900 	case 12:
1901 	    MOVZ_32(r0, (i0 >> 32) & 0xffff);
1902 	    MOVK_48(r0, (i0 >> 48) & 0xffff);
1903 	    break;
1904 	case 13:
1905 	    if (nbit == 2)
1906 		MOVN_16(r0, (n0 >> 16) & 0xffff);
1907 	    else {
1908 		MOVZ   (r0,  i0        & 0xffff);
1909 		MOVK_32(r0, (i0 >> 32) & 0xffff);
1910 		MOVK_48(r0, (i0 >> 48) & 0xffff);
1911 	    }
1912 	    break;
1913 	case 14:
1914 	    if (nbit == 1)
1915 		MOVN   (r0, (n0)       & 0xffff);
1916 	    else {
1917 		MOVZ_16(r0, (i0 >> 16) & 0xffff);
1918 		MOVK_32(r0, (i0 >> 32) & 0xffff);
1919 		MOVK_48(r0, (i0 >> 48) & 0xffff);
1920 	    }
1921 	    break;
1922 	case 15:
1923 	    if (nbit == 0)
1924 		MOVN   (r0,  0);
1925 	    else if (nbit == 1)
1926 		MOVN   (r0,  n0        & 0xffff);
1927 	    else if (nbit == 8)
1928 		MOVN_48(r0, (n0 >> 48) & 0xffff);
1929 	    else {
1930 		MOVZ   (r0,  i0        & 0xffff);
1931 		MOVK_16(r0, (i0 >> 16) & 0xffff);
1932 		MOVK_32(r0, (i0 >> 32) & 0xffff);
1933 		MOVK_48(r0, (i0 >> 48) & 0xffff);
1934 	    }
1935 	    break;
1936 	default:
1937 	    abort();
1938     }
1939 }
1940 
1941 static jit_word_t
_movi_p(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)1942 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1943 {
1944     jit_word_t		w;
1945     w = _jit->pc.w;
1946     MOVZ   (r0,  i0        & 0xffff);
1947     MOVK_16(r0, (i0 >> 16) & 0xffff);
1948     MOVK_32(r0, (i0 >> 32) & 0xffff);
1949     MOVK_48(r0, (i0 >> 48) & 0xffff);
1950     return (w);
1951 }
1952 
1953 static void
_ccr(jit_state_t * _jit,jit_int32_t cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1954 _ccr(jit_state_t *_jit, jit_int32_t cc,
1955      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1956 {
1957     CMP(r1, r2);
1958     CSET(r0, cc);
1959 }
1960 
1961 static void
_cci(jit_state_t * _jit,jit_int32_t cc,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)1962 _cci(jit_state_t *_jit, jit_int32_t cc,
1963      jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1964 {
1965     jit_int32_t		reg;
1966     jit_word_t		is =  i0 >> 12;
1967     jit_word_t		in = -i0;
1968     jit_word_t		iS =  in >> 12;
1969     if (      i0 >= 0 && i0 <= 0xfff)
1970 	CMPI   (r1, i0);
1971     else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1972 	CMPI_12(r1, is);
1973     else if ( in >= 0 && in <= 0xfff)
1974 	CMNI   (r1, in);
1975     else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
1976 	CMNI_12(r1, iS);
1977     else {
1978 	reg = jit_get_reg(jit_class_gpr);
1979 	movi(rn(reg), i0);
1980 	CMP(r1, rn(reg));
1981 	jit_unget_reg(reg);
1982     }
1983     CSET(r0, cc);
1984 }
1985 
1986 static jit_word_t
_bccr(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1987 _bccr(jit_state_t *_jit, jit_int32_t cc,
1988       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1989 {
1990     jit_word_t		w, d;
1991     CMP(r0, r1);
1992     w = _jit->pc.w;
1993     d = (i0 - w) >> 2;
1994     B_C(cc, d);
1995     return (w);
1996 }
1997 
1998 static jit_word_t
_bcci(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_word_t i1)1999 _bcci(jit_state_t *_jit, jit_int32_t cc,
2000       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2001 {
2002     jit_int32_t		reg;
2003     jit_word_t		w, d;
2004     jit_word_t		is =  i1 >> 12;
2005     jit_word_t		in = -i1;
2006     jit_word_t		iS =  in >> 12;
2007     if (      i1 >= 0 && i1 <= 0xfff)
2008 	CMPI   (r0, i1);
2009     else if ((is << 12) == i1 && is >= 0 && is <= 0xfff)
2010 	CMPI_12(r0, is);
2011     else if ( in >= 0 && in <= 0xfff)
2012 	CMNI   (r0, in);
2013     else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
2014 	CMNI_12(r0, iS);
2015     else {
2016 	reg = jit_get_reg(jit_class_gpr);
2017 	movi(rn(reg), i1);
2018 	CMP(r0, rn(reg));
2019 	jit_unget_reg(reg);
2020     }
2021     w = _jit->pc.w;
2022     d = (i0 - w) >> 2;
2023     B_C(cc, d);
2024     return (w);
2025 }
2026 
2027 static jit_word_t
_beqi(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)2028 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2029 {
2030     jit_word_t		w;
2031     if (i1 == 0) {
2032 	w = _jit->pc.w;
2033 	CBZ(r0, (i0 - w) >> 2);
2034     }
2035     else
2036 	w = bcci(BCC_EQ, i0, r0, i1);
2037     return (w);
2038 }
2039 
2040 static jit_word_t
_bnei(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_word_t i1)2041 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2042 {
2043     jit_word_t		w;
2044     if (i1 == 0) {
2045 	w = _jit->pc.w;
2046 	CBNZ(r0, (i0 - w) >> 2);
2047     }
2048     else
2049 	w = bcci(BCC_NE, i0, r0, i1);
2050     return (w);
2051 }
2052 
2053 static jit_word_t
_baddr(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2054 _baddr(jit_state_t *_jit, jit_int32_t cc,
2055        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2056 {
2057     jit_word_t		w;
2058     addcr(r0, r0, r1);
2059     w = _jit->pc.w;
2060     B_C(cc, (i0 - w) >> 2);
2061     return (w);
2062 }
2063 
2064 static jit_word_t
_baddi(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_word_t i1)2065 _baddi(jit_state_t *_jit, jit_int32_t cc,
2066        jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2067 {
2068     jit_word_t		w;
2069     addci(r0, r0, i1);
2070     w = _jit->pc.w;
2071     B_C(cc, (i0 - w) >> 2);
2072     return (w);
2073 }
2074 
2075 static jit_word_t
_bsubr(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2076 _bsubr(jit_state_t *_jit, jit_int32_t cc,
2077        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2078 {
2079     jit_word_t		w;
2080     subcr(r0, r0, r1);
2081     w = _jit->pc.w;
2082     B_C(cc, (i0 - w) >> 2);
2083     return (w);
2084 }
2085 
2086 static jit_word_t
_bsubi(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_word_t i1)2087 _bsubi(jit_state_t *_jit, jit_int32_t cc,
2088        jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2089 {
2090     jit_word_t		w;
2091     subci(r0, r0, i1);
2092     w = _jit->pc.w;
2093     B_C(cc, (i0 - w) >> 2);
2094     return (w);
2095 }
2096 
2097 static jit_word_t
_bmxr(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2098 _bmxr(jit_state_t *_jit, jit_int32_t cc,
2099       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2100 {
2101     jit_word_t		w;
2102     TST(r0, r1);
2103     w = _jit->pc.w;
2104     B_C(cc, (i0 - w) >> 2);
2105     return (w);
2106 }
2107 
2108 static jit_word_t
_bmxi(jit_state_t * _jit,jit_int32_t cc,jit_word_t i0,jit_int32_t r0,jit_word_t i1)2109 _bmxi(jit_state_t *_jit, jit_int32_t cc,
2110       jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2111 {
2112     jit_word_t		w;
2113     jit_int32_t		reg;
2114     jit_int32_t		imm;
2115     imm = logical_immediate(i1);
2116     if (imm != -1)
2117 	TSTI(r0, imm);
2118     else {
2119 	reg = jit_get_reg(jit_class_gpr);
2120 	movi(rn(reg), i1);
2121 	TST(r0, rn(reg));
2122 	jit_unget_reg(reg);
2123     }
2124     w = _jit->pc.w;
2125     B_C(cc, (i0 - w) >> 2);
2126     return (w);
2127 }
2128 
2129 static void
_jmpi(jit_state_t * _jit,jit_word_t i0)2130 _jmpi(jit_state_t *_jit, jit_word_t i0)
2131 {
2132     jit_word_t		w;
2133     jit_int32_t		reg;
2134     w = (i0 - _jit->pc.w) >> 2;
2135     if (w >= -33554432 && w <= 33554431)
2136 	B(w);
2137     else {
2138 	reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2139 	movi(rn(reg), i0);
2140 	jmpr(rn(reg));
2141 	jit_unget_reg(reg);
2142     }
2143 }
2144 
2145 static jit_word_t
_jmpi_p(jit_state_t * _jit,jit_word_t i0)2146 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
2147 {
2148     jit_word_t		w;
2149     jit_int32_t		reg;
2150     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2151     w = movi_p(rn(reg), i0);
2152     jmpr(rn(reg));
2153     jit_unget_reg(reg);
2154     return (w);
2155 }
2156 
2157 static void
_calli(jit_state_t * _jit,jit_word_t i0)2158 _calli(jit_state_t *_jit, jit_word_t i0)
2159 {
2160     jit_word_t		w;
2161     jit_int32_t		reg;
2162     w = (i0 - _jit->pc.w) >> 2;
2163     if (w >= -33554432 && w <= 33554431)
2164 	BL(w);
2165     else {
2166 	reg = jit_get_reg(jit_class_gpr);
2167 	movi(rn(reg), i0);
2168 	callr(rn(reg));
2169 	jit_unget_reg(reg);
2170     }
2171 }
2172 
2173 static jit_word_t
_calli_p(jit_state_t * _jit,jit_word_t i0)2174 _calli_p(jit_state_t *_jit, jit_word_t i0)
2175 {
2176     jit_word_t		w;
2177     jit_int32_t		reg;
2178     reg = jit_get_reg(jit_class_gpr);
2179     w = movi_p(rn(reg), i0);
2180     callr(rn(reg));
2181     jit_unget_reg(reg);
2182     return (w);
2183 }
2184 
2185 /*
2186  * prolog and epilog not as "optimized" as one would like, but the
2187  * problem of overallocating stack space to save callee save registers
2188  * exists on all ports, and is still a todo to use a variable
2189  *	stack_framesize
2190  * value, what would cause needing to patch some calls, most likely
2191  * the offset of jit_arg* of stack arguments.
2192  */
2193 static void
_prolog(jit_state_t * _jit,jit_node_t * node)2194 _prolog(jit_state_t *_jit, jit_node_t *node)
2195 {
2196     jit_int32_t		reg;
2197     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
2198 	jit_int32_t	frame = -_jitc->function->frame;
2199 	assert(_jitc->function->self.aoff >= frame);
2200 	if (_jitc->function->assume_frame)
2201 	    return;
2202 	_jitc->function->self.aoff = frame;
2203     }
2204     if (_jitc->function->allocar)
2205 	_jitc->function->self.aoff &= -16;
2206     _jitc->function->stack = ((_jitc->function->self.alen -
2207 			      /* align stack at 16 bytes */
2208 			      _jitc->function->self.aoff) + 15) & -16;
2209     STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
2210     MOV_XSP(FP_REGNO, SP_REGNO);
2211 #define SPILL(L, R, O)							\
2212     do {								\
2213 	if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {	\
2214 	    if (jit_regset_tstbit(&_jitc->function->regset, _R##R))	\
2215 		STPI(L, R, SP_REGNO, O);				\
2216 	    else							\
2217 		STRI(L, SP_REGNO, O);					\
2218 	}								\
2219 	else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))	\
2220 	    STRI(R, SP_REGNO, O + 1);					\
2221     } while (0)
2222     SPILL(19, 20,  2);
2223     SPILL(21, 22,  4);
2224     SPILL(23, 24,  6);
2225     SPILL(25, 26,  8);
2226     SPILL(27, 28, 10);
2227 #undef SPILL
2228 #define SPILL(R, O)							\
2229     do {								\
2230 	if (jit_regset_tstbit(&_jitc->function->regset, _V##R))		\
2231 		stxi_d(O, SP_REGNO, R);					\
2232     } while (0)
2233     SPILL( 8,  96);
2234     SPILL( 9, 104);
2235     SPILL(10, 112);
2236     SPILL(11, 120);
2237     SPILL(12, 128);
2238     SPILL(13, 136);
2239     SPILL(14, 144);
2240     SPILL(15, 152);
2241 #undef SPILL
2242     if (_jitc->function->stack)
2243 	subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
2244     if (_jitc->function->allocar) {
2245 	reg = jit_get_reg(jit_class_gpr);
2246 	movi(rn(reg), _jitc->function->self.aoff);
2247 	stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg));
2248 	jit_unget_reg(reg);
2249     }
2250 
2251     if (_jitc->function->self.call & jit_call_varargs) {
2252 	/* Save gp registers in the save area, if any is a vararg */
2253 	for (reg = 8 - _jitc->function->vagp / -8;
2254 	     jit_arg_reg_p(reg); ++reg)
2255 	    stxi(_jitc->function->vaoff + offsetof(jit_va_list_t, x0) +
2256 		 reg * 8, FP_REGNO, rn(JIT_RA0 - reg));
2257 
2258 	for (reg = 8 - _jitc->function->vafp / -16;
2259 	     jit_arg_f_reg_p(reg); ++reg)
2260 	    /* Save fp registers in the save area, if any is a vararg */
2261 	    /* Note that the full 16 byte register is not saved, because
2262 	     * lightning only handles float and double, and, while
2263 	     * attempting to provide a va_list compatible pointer as
2264 	     * jit_va_start return, does not guarantee it (on all ports). */
2265 	    stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
2266 		   reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
2267     }
2268 }
2269 
2270 static void
_epilog(jit_state_t * _jit,jit_node_t * node)2271 _epilog(jit_state_t *_jit, jit_node_t *node)
2272 {
2273     if (_jitc->function->assume_frame)
2274 	return;
2275     if (_jitc->function->stack)
2276 	MOV_XSP(SP_REGNO, FP_REGNO);
2277 #define LOAD(L, R, O)							\
2278     do {								\
2279 	if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {	\
2280 	    if (jit_regset_tstbit(&_jitc->function->regset, _R##R))	\
2281 		LDPI(L, R, SP_REGNO, O);				\
2282 	    else							\
2283 		LDRI(L, SP_REGNO, O);					\
2284 	}								\
2285 	else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))	\
2286 	    LDRI(R, SP_REGNO, O + 1);					\
2287     } while (0)
2288     LOAD(19, 20,  2);
2289     LOAD(21, 22,  4);
2290     LOAD(23, 24,  6);
2291     LOAD(25, 26,  8);
2292     LOAD(27, 28, 10);
2293 #undef LOAD
2294 #define LOAD(R, O)							\
2295     do {								\
2296 	if (jit_regset_tstbit(&_jitc->function->regset, _V##R))		\
2297 		ldxi_d(R, SP_REGNO, O);					\
2298     } while (0)
2299     LOAD( 8,  96);
2300     LOAD( 9, 104);
2301     LOAD(10, 112);
2302     LOAD(11, 120);
2303     LOAD(12, 128);
2304     LOAD(13, 136);
2305     LOAD(14, 144);
2306     LOAD(15, 152);
2307 #undef LOAD
2308     LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
2309     RET();
2310 }
2311 
2312 static void
_vastart(jit_state_t * _jit,jit_int32_t r0)2313 _vastart(jit_state_t *_jit, jit_int32_t r0)
2314 {
2315     jit_int32_t		reg;
2316 
2317     assert(_jitc->function->self.call & jit_call_varargs);
2318 
2319     /* Return jit_va_list_t in the register argument */
2320     addi(r0, FP_REGNO, _jitc->function->vaoff);
2321 
2322     reg = jit_get_reg(jit_class_gpr);
2323 
2324     /* Initialize stack pointer to the first stack argument. */
2325     addi(rn(reg), FP_REGNO, _jitc->function->self.size);
2326     stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
2327 
2328     /* Initialize gp top pointer to the first stack argument. */
2329     addi(rn(reg), r0, va_gp_top_offset);
2330     stxi(offsetof(jit_va_list_t, gptop), r0, rn(reg));
2331 
2332     /* Initialize fp top pointer to the first stack argument. */
2333     addi(rn(reg), r0, va_fp_top_offset);
2334     stxi(offsetof(jit_va_list_t, fptop), r0, rn(reg));
2335 
2336     /* Initialize gp offset in the save area. */
2337     movi(rn(reg), _jitc->function->vagp);
2338     stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
2339 
2340     /* Initialize fp offset in the save area. */
2341     movi(rn(reg), _jitc->function->vafp);
2342     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
2343 
2344     jit_unget_reg(reg);
2345 }
2346 
2347 static void
_vaarg(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2348 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2349 {
2350     jit_word_t		ge_code;
2351     jit_word_t		lt_code;
2352     jit_int32_t		rg0, rg1;
2353 
2354     assert(_jitc->function->self.call & jit_call_varargs);
2355 
2356     rg0 = jit_get_reg(jit_class_gpr);
2357     rg1 = jit_get_reg(jit_class_gpr);
2358 
2359     /* Load the gp offset in save area in the first temporary. */
2360     ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
2361 
2362     /* Jump over if there are no remaining arguments in the save area. */
2363     ge_code = bgei(_jit->pc.w, rn(rg0), 0);
2364 
2365     /* Load the gp save pointer in the second temporary. */
2366     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, gptop));
2367 
2368     /* Load the vararg argument in the first argument. */
2369     ldxr(r0, rn(rg1), rn(rg0));
2370 
2371     /* Update the gp offset. */
2372     addi(rn(rg0), rn(rg0), 8);
2373     stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
2374 
2375     /* Will only need one temporary register below. */
2376     jit_unget_reg(rg1);
2377 
2378     /* Jump over overflow code. */
2379     lt_code = jmpi_p(_jit->pc.w);
2380 
2381     /* Where to land if argument is in overflow area. */
2382     patch_at(ge_code, _jit->pc.w);
2383 
2384     /* Load stack pointer. */
2385     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
2386 
2387     /* Load argument. */
2388     ldr(r0, rn(rg0));
2389 
2390     /* Update stack pointer. */
2391     addi(rn(rg0), rn(rg0), 8);
2392     stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
2393 
2394     /* Where to land if argument is in gp save area. */
2395     patch_at(lt_code, _jit->pc.w);
2396 
2397     jit_unget_reg(rg0);
2398 }
2399 
2400 static void
_patch_at(jit_state_t * _jit,jit_word_t instr,jit_word_t label)2401 _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
2402 {
2403     instr_t		 i;
2404     jit_word_t		 d;
2405     jit_int32_t		 fc, ff, ffc;
2406     union {
2407 	jit_int32_t	*i;
2408 	jit_word_t	 w;
2409     } u;
2410     u.w = instr;
2411     i.w = u.i[0];
2412     fc  = i.w & 0xfc000000;
2413     ff  = i.w & 0xff000000;
2414     ffc = i.w & 0xffc00000;
2415     if (fc == A64_B || fc == A64_BL) {
2416 	d = (label - instr) >> 2;
2417 	assert(d >= -33554432 && d <= 33554431);
2418 	i.imm26.b = d;
2419 	u.i[0] = i.w;
2420     }
2421     else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) {
2422 	d = (label - instr) >> 2;
2423 	assert(d >= -262148 && d <= 262143);
2424 	i.imm19.b = d;
2425 	u.i[0] = i.w;
2426     }
2427     else if (ffc == (A64_MOVZ|XS)) {
2428 	i.imm16.b = label;
2429 	u.i[0] = i.w;
2430 	i.w = u.i[1];
2431 	assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16));
2432 	i.imm16.b = label >> 16;
2433 	u.i[1] = i.w;
2434 	i.w = u.i[2];
2435 	assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32));
2436 	i.imm16.b = label >> 32;
2437 	u.i[2] = i.w;
2438 	i.w = u.i[3];
2439 	assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48));
2440 	i.imm16.b = label >> 48;
2441 	u.i[3] = i.w;
2442     }
2443     else
2444 	abort();
2445 }
2446 #endif
2447