1 /*
2  * Copyright (C) 2012-2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *	Paulo Cesar Pereira de Andrade
18  */
19 
20 #if PROTO
21 /* as per vfp_regno macro, required due to "support" to soft float registers
22  * or using integer registers as arguments to float operations */
23 #  define _D8_REGNO			32
24 #  define ARM_V_Q			0x00000040
25 #  define FPSCR_N			0x80000000 /* Negative flag */
26 #  define FPSCR_Z			0x40000000 /* Zero flag */
27 #  define FPSCR_C			0x20000000 /* Carry flag */
28 #  define FPSCR_V			0x10000000 /* Overflow flag */
29 #  define FPSCR_QC			0x08000000 /* Cumulative saturation */
30 #  define FPSCR_AHP			0x04000000 /* Alt. half-precision */
31 #  define FPSCR_DN			0x02000000 /* Default NaN mode */
32 #  define FPSCR_FZ			0x01000000 /* Flush to zero */
33 #  define FPSCR_RMASK			0x00c00000
34 #    define FPSCR_RN			0x00000000 /* Round to Nearest */
35 #    define FPSCR_RP			0x00400000 /* Round to Plus Infinity */
36 #    define FPSCR_RM			0x00800000 /* Round to Minus Infinity */
37 #    define FPSCR_RZ			0x00c00000 /* Round towards Zero */
38 #  define FPSCR_STRIDE			0x00300000
39 #  define FPSCR_RES1			0x00080000 /* Reserved, UNK/SBZP */
40 #  define FPSCR_LEN			0x00070000
41 #  define FPSCR_IDE			0x00008000 /* Input Denormal trap */
42 #  define FPSCR_IXE			0x00001000 /* Inexact trap */
43 #  define FPSCR_UFE			0x00000800 /* Underflow trap */
44 #  define FPSCR_OFE			0x00000400 /* Overflow trap */
45 #  define FPSCR_DZE			0x00000200 /* Division by zero trap */
46 #  define FPSCR_IOE			0x00000100 /* Invalid Operation trap */
47 #  define FPSCR_IDC			0x00000080 /* Input Denormal flag */
48 #  define FPSCR_RES0			0x00000060 /* Reserved, UNK/SBZP */
49 #  define FPSCR_IXC			0x00000010 /* Inexact flag */
50 #  define FPSCR_UFC			0x00000008 /* Underflow flag */
51 #  define FPSCR_OFC			0x00000004 /* Overflow flag */
52 #  define FPSCR_DZC			0x00000002 /* Division by zero flag */
53 #  define FPSCR_IOC			0x00000001 /* Invalid Operation flag */
54 #  define ARM_V_E			0x00000080 /* ARM_VCMP except if NaN */
55 #  define ARM_V_Z			0x00010000 /* ARM_VCMP with zero */
56 #  define ARM_V_F64			0x00000100
57 #  define ARM_VADD_F			0x0e300a00
58 #  define ARM_VSUB_F			0x0e300a40
59 #  define ARM_VMUL_F			0x0e200a00
60 #  define ARM_VDIV_F			0x0e800a00
61 #  define ARM_VABS_F			0x0eb00ac0
62 #  define ARM_VNEG_F			0x0eb10a40
63 #  define ARM_VSQRT_F			0x0eb10ac0
64 #  define ARM_VMOV_F			0x0eb00a40
65 #  define ARM_VMOV_A_S			0x0e100a10 /* vmov rn, sn */
66 #  define ARM_VMOV_S_A			0x0e000a10 /* vmov sn, rn */
67 #  define ARM_VMOV_AA_D			0x0c500b10 /* vmov rn,rn, dn */
68 #  define ARM_VMOV_D_AA			0x0c400b10 /* vmov dn, rn,rn */
69 #  define ARM_VCMP			0x0eb40a40
70 #  define ARM_VMRS			0x0ef10a10
71 #  define ARM_VMSR			0x0ee10a10
72 #  define ARM_VCVT_2I			0x00040000 /* to integer */
73 #  define ARM_VCVT_2S			0x00010000 /* to signed */
74 #  define ARM_VCVT_RS			0x00000080 /* round to zero or signed */
75 #  define ARM_VCVT			0x0eb80a40
76 #  define ARM_VCVT_S32_F32		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77 #  define ARM_VCVT_U32_F32		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78 #  define ARM_VCVT_S32_F64		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79 #  define ARM_VCVT_U32_F64		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80 #  define ARM_VCVT_F32_S32		ARM_VCVT|ARM_VCVT_RS
81 #  define ARM_VCVT_F32_U32		ARM_VCVT
82 #  define ARM_VCVT_F64_S32		ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83 #  define ARM_VCVT_F64_U32		ARM_VCVT|ARM_V_F64
84 #  define ARM_VCVT_F			0x0eb70ac0
85 #  define ARM_VCVT_F32_F64		ARM_VCVT_F
86 #  define ARM_VCVT_F64_F32		ARM_VCVT_F|ARM_V_F64
87 #  define ARM_VCVTR_S32_F32		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88 #  define ARM_VCVTR_U32_F32		ARM_VCVT|ARM_VCVT_2I
89 #  define ARM_VCVTR_S32_F64		ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90 #  define ARM_VCVTR_U32_F64		ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91 #  define ARM_V_D			0x00400000
92 #  define ARM_V_N			0x00000080
93 #  define ARM_V_Q			0x00000040
94 #  define ARM_V_M			0x00000020
95 #  define ARM_V_U			0x01000000
96 #  define ARM_V_I16			0x00100000
97 #  define ARM_V_I32			0x00200000
98 #  define ARM_V_I64			0x00300000
99 #  define ARM_V_S16			0x00040000
100 #  define ARM_V_S32			0x00080000
101 #  define ARM_VADD_I			0x02000800
102 #  define ARM_VQADD_I			0x02000010 /* set flag on over/carry */
103 #  define ARM_VADDL_I			0x02800000 /* q=d+d */
104 #  define ARM_VADDW_I			0x02800100 /* q=q+d */
105 #  define ARM_VSUB_I			0x03000800
106 #  define ARM_VQSUB_I			0x02000210 /* set flag on over/carry */
107 #  define ARM_VSUBL_I			0x02800200
108 #  define ARM_VSUBW_I			0x02800300
109 #  define ARM_VMUL_I			0x02000910
110 #  define ARM_VMULL_I			0x02800c00
111 #  define ARM_VABS_I			0x03b10300
112 #  define ARM_VQABS_I			0x03b00700 /* sets flag on overflow */
113 #  define ARM_VNEG_I			0x03b10380
114 #  define ARM_VQNEG_I			0x03b00780 /* sets flag on overflow */
115 #  define ARM_VAND			0x02000110
116 #  define ARM_VBIC			0x02100110
117 #  define ARM_VORR			0x02200110
118 #  define ARM_VORN			0x02300110
119 #  define ARM_VEOR			0x03000110
120 #  define ARM_VMOVL_S8			0x00080000
121 #  define ARM_VMOVL_S16			0x00100000
122 #  define ARM_VMOVL_S32			0x00200000
123 #  define ARM_VMOVL_I			0x02800a10
124 #  define ARM_VMOVI			0x02800010
125 #  define ARM_VMVNI			0x02800030
126 #  define ARM_VLDR			0x0d100a00
127 #  define ARM_VSTR			0x0d000a00
128 #  define ARM_VM			0x0c000a00
129 #  define ARM_VMOV_ADV_U		0x00800000 /* zero extend */
130 #  define ARM_VMOV_ADV_8		0x00400000
131 #  define ARM_VMOV_ADV_16		0x00000020
132 #  define ARM_VMOV_A_D			0x0e100b10
133 #  define ARM_VMOV_D_A			0x0e000b10
134 
135 #  define vodi(oi,r0)			_vodi(_jit,oi,r0)
136 static void _vodi(jit_state_t*,int,int) maybe_unused;
137 #  define voqi(oi,r0)			_voqi(_jit,oi,r0)
138 static void _voqi(jit_state_t*,int,int) maybe_unused;
139 #  define vo_ss(o,r0,r1)		_cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
140 #  define cc_vo_ss(cc,o,r0,r1)		_cc_vo_ss(_jit,cc,o,r0,r1)
141 static void _cc_vo_ss(jit_state_t*,int,int,int,int);
142 #  define vo_dd(o,r0,r1)		_cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
143 #  define cc_vo_dd(cc,o,r0,r1)		_cc_vo_dd(_jit,cc,o,r0,r1)
144 static void _cc_vo_dd(jit_state_t*,int,int,int,int);
145 #  define vo_qd(o,r0,r1)		_cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
146 #  define cc_vo_qd(cc,o,r0,r1)		_cc_vo_qd(_jit,cc,o,r0,r1)
147 static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
148 #  define vo_qq(o,r0,r1)		_cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
149 #  define cc_vo_qq(cc,o,r0,r1)		_cc_vo_qq(_jit,cc,o,r0,r1)
150 static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
151 #  define vorr_(o,r0,r1)		_cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
152 #  define cc_vorr_(cc,o,r0,r1)		_cc_vorr_(_jit,cc,o,r0,r1)
153 static void _cc_vorr_(jit_state_t*,int,int,int,int);
154 #  define vors_(o,r0,r1)		_cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
155 #  define cc_vors_(cc,o,r0,r1)		_cc_vors_(_jit,cc,o,r0,r1)
156 static void _cc_vors_(jit_state_t*,int,int,int,int);
157 #  define vorv_(o,r0,r1)		_cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
158 #  define cc_vorv_(cc,o,r0,r1)		_cc_vorv_(_jit,cc,o,r0,r1)
159 static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
160 #  define vori_(o,r0,r1)		_cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
161 #  define cc_vori_(cc,o,r0,r1)		_cc_vori_(_jit,cc,o,r0,r1)
162 static void _cc_vori_(jit_state_t*,int,int,int,int);
163 #  define vorrd(o,r0,r1,r2)		_cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
164 #  define cc_vorrd(cc,o,r0,r1,r2)	_cc_vorrd(_jit,cc,o,r0,r1,r2)
165 static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
166 #  define vosss(o,r0,r1,r2)		_cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
167 #  define cc_vosss(cc,o,r0,r1,r2)	_cc_vosss(_jit,cc,o,r0,r1,r2)
168 static void _cc_vosss(jit_state_t*,int,int,int,int,int);
169 #  define voddd(o,r0,r1,r2)		_cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
170 #  define cc_voddd(cc,o,r0,r1,r2)	_cc_voddd(_jit,cc,o,r0,r1,r2)
171 static void _cc_voddd(jit_state_t*,int,int,int,int,int);
172 #  define voqdd(o,r0,r1,r2)		_cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
173 #  define cc_voqdd(cc,o,r0,r1,r2)	_cc_voqdd(_jit,cc,o,r0,r1,r2)
174 static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
175 #  define voqqd(o,r0,r1,r2)		_cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
176 #  define cc_voqqd(cc,o,r0,r1,r2)	_cc_voqqd(_jit,cc,o,r0,r1,r2)
177 static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
178 #  define voqqq(o,r0,r1,r2)		_cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
179 #  define cc_voqqq(cc,o,r0,r1,r2)	_cc_voqqq(_jit,cc,o,r0,r1,r2)
180 static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
181 #  define cc_vldst(cc,o,r0,r1,i0)	_cc_vldst(_jit,cc,o,r0,r1,i0)
182 static void _cc_vldst(jit_state_t*,int,int,int,int,int);
183 #  define cc_vorsl(cc,o,r0,r1,i0)	_cc_vorsl(_jit,cc,o,r0,r1,i0)
184 static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
185 #  define CC_VADD_F32(cc,r0,r1,r2)	cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
186 #  define VADD_F32(r0,r1,r2)		CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
187 #  define CC_VADD_F64(cc,r0,r1,r2)	cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
188 #  define VADD_F64(r0,r1,r2)		CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
189 #  define CC_VSUB_F32(cc,r0,r1,r2)	cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
190 #  define VSUB_F32(r0,r1,r2)		CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
191 #  define CC_VSUB_F64(cc,r0,r1,r2)	cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
192 #  define VSUB_F64(r0,r1,r2)		CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
193 #  define CC_VMUL_F32(cc,r0,r1,r2)	cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
194 #  define VMUL_F32(r0,r1,r2)		CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
195 #  define CC_VMUL_F64(cc,r0,r1,r2)	cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
196 #  define VMUL_F64(r0,r1,r2)		CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
197 #  define CC_VDIV_F32(cc,r0,r1,r2)	cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
198 #  define VDIV_F32(r0,r1,r2)		CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
199 #  define CC_VDIV_F64(cc,r0,r1,r2)	cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
200 #  define VDIV_F64(r0,r1,r2)		CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
201 #  define CC_VABS_F32(cc,r0,r1)		cc_vo_ss(cc,ARM_VABS_F,r0,r1)
202 #  define VABS_F32(r0,r1)		CC_VABS_F32(ARM_CC_AL,r0,r1)
203 #  define CC_VABS_F64(cc,r0,r1)		cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
204 #  define VABS_F64(r0,r1)		CC_VABS_F64(ARM_CC_AL,r0,r1)
205 #  define CC_VNEG_F32(cc,r0,r1)		cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
206 #  define VNEG_F32(r0,r1)		CC_VNEG_F32(ARM_CC_AL,r0,r1)
207 #  define CC_VNEG_F64(cc,r0,r1)		cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
208 #  define VNEG_F64(r0,r1)		CC_VNEG_F64(ARM_CC_AL,r0,r1)
209 #  define CC_VSQRT_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
210 #  define VSQRT_F32(r0,r1)		CC_VSQRT_F32(ARM_CC_AL,r0,r1)
211 #  define CC_VSQRT_F64(cc,r0,r1)	cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
212 #  define VSQRT_F64(r0,r1)		CC_VSQRT_F64(ARM_CC_AL,r0,r1)
213 #  define CC_VMOV_F32(cc,r0,r1)		cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
214 #  define VMOV_F32(r0,r1)		CC_VMOV_F32(ARM_CC_AL,r0,r1)
215 #  define CC_VMOV_F64(cc,r0,r1)		cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
216 #  define VMOV_F64(r0,r1)		CC_VMOV_F64(ARM_CC_AL,r0,r1)
217 #  define CC_VMOV_AA_D(cc,r0,r1,r2)	cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
218 #  define VMOV_AA_D(r0,r1,r2)		CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
219 #  define CC_VMOV_D_AA(cc,r0,r1,r2)	cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
220 #  define VMOV_D_AA(r0,r1,r2)		CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
221 #  define CC_VMOV_A_S(cc,r0,r1)		cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
222 #  define VMOV_A_S(r0,r1)		CC_VMOV_A_S(ARM_CC_AL,r0,r1)
223 #  define CC_VMOV_S_A(cc,r0,r1)		cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
224 #  define VMOV_S_A(r0,r1)		CC_VMOV_S_A(ARM_CC_AL,r0,r1)
225 #  define CC_VCMP_F32(cc,r0,r1)		cc_vo_ss(cc,ARM_VCMP,r0,r1)
226 #  define VCMP_F32(r0,r1)		CC_VCMP_F32(ARM_CC_AL,r0,r1)
227 #  define CC_VCMP_F64(cc,r0,r1)		cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
228 #  define VCMP_F64(r0,r1)		CC_VCMP_F64(ARM_CC_AL,r0,r1)
229 #  define CC_VCMPE_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
230 #  define VCMPE_F32(r0,r1)		CC_VCMPE_F32(ARM_CC_AL,r0,r1)
231 #  define CC_VCMPE_F64(cc,r0,r1)	cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
232 #  define VCMPE_F64(r0,r1)		CC_VCMPE_F64(ARM_CC_AL,r0,r1)
233 #  define CC_VCMPZ_F32(cc,r0)		cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
234 #  define VCMPZ_F32(r0)			CC_VCMPZ_F32(ARM_CC_AL,r0)
235 #  define CC_VCMPZ_F64(cc,r0)		cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
236 #  define VCMPZ_F64(r0)			CC_VCMPZ_F64(ARM_CC_AL,r0)
237 #  define CC_VCMPEZ_F32(cc,r0)		cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
238 #  define VCMPEZ_F32(r0)		CC_VCMPEZ_F32(ARM_CC_AL,r0)
239 #  define CC_VCMPEZ_F64(cc,r0)		cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
240 #  define VCMPEZ_F64(r0)		CC_VCMPEZ_F64(ARM_CC_AL,r0)
241 #  define CC_VMRS(cc,r0)		cc_vorr_(cc,ARM_VMRS,r0,0)
242 #  define VMRS(r0)			CC_VMRS(ARM_CC_AL,r0)
243 #  define CC_VMSR(cc,r0)		cc_vorr_(cc,ARM_VMSR,r0,0)
244 #  define VMSR(r0)			CC_VMSR(ARM_CC_AL,r0)
245 #  define CC_VCVT_S32_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
246 #  define VCVT_S32_F32(r0,r1)		CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
247 #  define CC_VCVT_U32_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
248 #  define VCVT_U32_F32(r0,r1)		CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
249 #  define CC_VCVT_S32_F64(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
250 #  define VCVT_S32_F64(r0,r1)		CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
251 #  define CC_VCVT_U32_F64(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
252 #  define VCVT_U32_F64(r0,r1)		CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
253 #  define CC_VCVT_F32_S32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
254 #  define VCVT_F32_S32(r0,r1)		CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
255 #  define CC_VCVT_F32_U32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
256 #  define VCVT_F32_U32(r0,r1)		CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
257 #  define CC_VCVT_F64_S32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
258 #  define VCVT_F64_S32(r0,r1)		CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
259 #  define CC_VCVT_F64_U32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
260 #  define VCVT_F64_U32(r0,r1)		CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
261 #  define CC_VCVT_F32_F64(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
262 #  define VCVT_F32_F64(r0,r1)		CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
263 #  define CC_VCVT_F64_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
264 #  define VCVT_F64_F32(r0,r1)		CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
265 #  define CC_VCVTR_S32_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
266 #  define VCVTR_S32_F32(r0,r1)		CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
267 #  define CC_VCVTR_U32_F32(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
268 #  define VCVTR_U32_F32(r0,r1)		CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
269 #  define CC_VCVTR_S32_F64(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
270 #  define VCVTR_S32_F64(r0,r1)		CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
271 #  define CC_VCVTR_U32_F64(cc,r0,r1)	cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
272 #  define VCVTR_U32_F64(r0,r1)		CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
273 #  define CC_VLDMIA_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
274 #  define VLDMIA_F32(r0,r1,i0)		CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
275 #  define CC_VLDMIA_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
276 #  define VLDMIA_F64(r0,r1,i0)		CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
277 #  define CC_VSTMIA_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
278 #  define VSTMIA_F32(r0,r1,i0)		CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
279 #  define CC_VSTMIA_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
280 #  define VSTMIA_F64(r0,r1,i0)		CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
281 #  define CC_VLDMIA_U_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
282 #  define VLDMIA_U_F32(r0,r1,i0)	CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
283 #  define CC_VLDMIA_U_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
284 #  define VLDMIA_U_F64(r0,r1,i0)	CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
285 #  define CC_VSTMIA_U_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
286 #  define VSTMIA_U_F32(r0,r1,i0)	CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
287 #  define CC_VSTMIA_U_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
288 #  define VSTMIA_U_F64(r0,r1,i0)	CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
289 #  define CC_VLDMDB_U_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
290 #  define VLDMDB_U_F32(r0,r1,i0)	CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
291 #  define CC_VLDMDB_U_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
292 #  define VLDMDB_U_F64(r0,r1,i0)	CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
293 #  define CC_VSTMDB_U_F32(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
294 #  define VSTMDB_U_F32(r0,r1,i0)	CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
295 #  define CC_VSTMDB_U_F64(cc,r0,r1,i0)	cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
296 #  define VSTMDB_U_F64(r0,r1,i0)	CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
297 #  define CC_VPUSH_F32(cc,r0,i0)	CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
298 #  define VPUSH_F32(r0,i0)		CC_VPUSH_F32(ARM_CC_AL,r0,i0)
299 #  define CC_VPUSH_F64(cc,r0,i0)	CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
300 #  define VPUSH_F64(r0,i0)		CC_VPUSH_F64(ARM_CC_AL,r0,i0)
301 #  define CC_VPOP_F32(cc,r0,i0)		CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
302 #  define VPOP_F32(r0,i0)		CC_VPOP_F32(ARM_CC_AL,r0,i0)
303 #  define CC_VPOP_F64(cc,r0,i0)		CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
304 #  define VPOP_F64(r0,i0)		CC_VPOP_F64(ARM_CC_AL,r0,i0)
305 #  define CC_VMOV_A_S8(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
306 #  define VMOV_A_S8(r0,r1)		CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
307 #  define CC_VMOV_A_U8(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
308 #  define VMOV_A_U8(r0,r1)		CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
309 #  define CC_VMOV_A_S16(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
310 #  define VMOV_A_S16(r0,r1)		CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
311 #  define CC_VMOV_A_U16(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
312 #  define VMOV_A_U16(r0,r1)		CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
313 #  define CC_VMOV_A_S32(cc,r0,r1)	cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
314 #  define VMOV_A_S32(r0,r1)		CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
315 #  define CC_VMOV_A_U32(cc,r0,r1)	cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
316 #  define VMOV_A_U32(r0,r1)		CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
317 #  define CC_VMOV_V_I8(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
318 #  define VMOV_V_I8(r0,r1)		CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
319 #  define CC_VMOV_V_I16(cc,r0,r1)	cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
320 #  define VMOV_V_I16(r0,r1)		CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
321 #  define CC_VMOV_V_I32(cc,r0,r1)	cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
322 #  define VMOV_V_I32(r0,r1)		CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
323 #  define VADD_I8(r0,r1,r2)		voddd(ARM_VADD_I,r0,r1,r2)
324 #  define VADDQ_I8(r0,r1,r2)		voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
325 #  define VADD_I16(r0,r1,r2)		voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
326 #  define VADDQ_I16(r0,r1,r2)		voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
327 #  define VADD_I32(r0,r1,r2)		voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
328 #  define VADDQ_I32(r0,r1,r2)		voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
329 #  define VADD_I64(r0,r1,r2)		voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
330 #  define VADDQ_I64(r0,r1,r2)		voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
331 #  define VQADD_S8(r0,r1,r2)		voddd(ARM_VQADD_I,r0,r1,r2)
332 #  define VQADDQ_S8(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
333 #  define VQADD_U8(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
334 #  define VQADDQ_U8(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
335 #  define VQADD_S16(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
336 #  define VQADDQ_S16(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
337 #  define VQADD_U16(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
338 #  define VQADDQ_U16(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
339 #  define VQADD_S32(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
340 #  define VQADDQ_S32(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
341 #  define VQADD_U32(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
342 #  define VQADDQ_U32(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
343 #  define VQADD_S64(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
344 #  define VQADDQ_S64(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
345 #  define VQADD_U64(r0,r1,r2)		voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
346 #  define VQADDQ_U64(r0,r1,r2)		voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
347 #  define VADDL_S8(r0,r1,r2)		voqdd(ARM_VADDL_I,r0,r1,r2)
348 #  define VADDL_U8(r0,r1,r2)		voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
349 #  define VADDL_S16(r0,r1,r2)		voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
350 #  define VADDL_U16(r0,r1,r2)		voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
351 #  define VADDL_S32(r0,r1,r2)		voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
352 #  define VADDL_U32(r0,r1,r2)		voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
353 #  define VADDW_S8(r0,r1,r2)		voqqd(ARM_VADDW_I,r0,r1,r2)
354 #  define VADDW_U8(r0,r1,r2)		voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
355 #  define VADDW_S16(r0,r1,r2)		voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
356 #  define VADDW_U16(r0,r1,r2)		voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
357 #  define VADDW_S32(r0,r1,r2)		voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
358 #  define VADDW_U32(r0,r1,r2)		voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
359 #  define VSUB_I8(r0,r1,r2)		voddd(ARM_VSUB_I,r0,r1,r2)
360 #  define VSUBQ_I8(r0,r1,r2)		voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
361 #  define VSUB_I16(r0,r1,r2)		voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
362 #  define VSUBQ_I16(r0,r1,r2)		voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
363 #  define VSUB_I32(r0,r1,r2)		voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
364 #  define VSUBQ_I32(r0,r1,r2)		voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
365 #  define VSUB_I64(r0,r1,r2)		voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
366 #  define VSUBQ_I64(r0,r1,r2)		voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
367 #  define VQSUB_S8(r0,r1,r2)		voddd(ARM_VQSUB_I,r0,r1,r2)
368 #  define VQSUBQ_S8(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
369 #  define VQSUB_U8(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
370 #  define VQSUBQ_U8(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
371 #  define VQSUB_S16(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
372 #  define VQSUBQ_S16(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
373 #  define VQSUB_U16(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374 #  define VQSUBQ_U16(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
375 #  define VQSUB_S32(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
376 #  define VQSUBQ_S32(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
377 #  define VQSUB_U32(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
378 #  define VQSUBQ_U32(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
379 #  define VQSUB_S64(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
380 #  define VQSUBQ_S64(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
381 #  define VQSUB_U64(r0,r1,r2)		voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
382 #  define VQSUBQ_U64(r0,r1,r2)		voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
383 #  define VSUBL_S8(r0,r1,r2)		voqdd(ARM_VSUBL_I,r0,r1,r2)
384 #  define VSUBL_U8(r0,r1,r2)		voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
385 #  define VSUBL_S16(r0,r1,r2)		voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
386 #  define VSUBL_U16(r0,r1,r2)		voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
387 #  define VSUBL_S32(r0,r1,r2)		voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
388 #  define VSUBL_U32(r0,r1,r2)		voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
389 #  define VSUBW_S8(r0,r1,r2)		voqqd(ARM_VSUBW_I,r0,r1,r2)
390 #  define VSUBW_U8(r0,r1,r2)		voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
391 #  define VSUBW_S16(r0,r1,r2)		voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
392 #  define VSUBW_U16(r0,r1,r2)		voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
393 #  define VSUBW_S32(r0,r1,r2)		voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
394 #  define VSUBW_U32(r0,r1,r2)		voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
395 #  define VMUL_I8(r0,r1,r2)		voddd(ARM_VMUL_I,r0,r1,r2)
396 #  define VMULQ_I8(r0,r1,r2)		voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
397 #  define VMUL_I16(r0,r1,r2)		voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
398 #  define VMULQ_I16(r0,r1,r2)		voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
399 #  define VMUL_I32(r0,r1,r2)		voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
400 #  define VMULQ_I32(r0,r1,r2)		voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
401 #  define VMULL_S8(r0,r1,r2)		voddd(ARM_VMULL_I,r0,r1,r2)
402 #  define VMULL_U8(r0,r1,r2)		voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
403 #  define VMULL_S16(r0,r1,r2)		voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
404 #  define VMULL_U16(r0,r1,r2)		voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
405 #  define VMULL_S32(r0,r1,r2)		voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
406 #  define VMULL_U32(r0,r1,r2)		voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
407 #  define VABS_S8(r0,r1)		vo_dd(ARM_VABS_I,r0,r1)
408 #  define VABSQ_S8(r0,r1)		vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
409 #  define VABS_S16(r0,r1)		vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
410 #  define VABSQ_S16(r0,r1)		vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
411 #  define VABS_S32(r0,r1)		vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
412 #  define VABSQ_S32(r0,r1)		vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
413 #  define VQABS_S8(r0,r1)		vo_dd(ARM_VQABS_I,r0,r1)
414 #  define VQABSQ_S8(r0,r1)		vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
415 #  define VQABS_S16(r0,r1)		vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
416 #  define VQABSQ_S16(r0,r1)		vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
417 #  define VQABS_S32(r0,r1)		vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
418 #  define VQABSQ_S32(r0,r1)		vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
419 #  define VNEG_S8(r0,r1)		vo_dd(ARM_VNEG_I,r0,r1)
420 #  define VNEGQ_S8(r0,r1)		vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
421 #  define VNEG_S16(r0,r1)		vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
422 #  define VNEGQ_S16(r0,r1)		vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
423 #  define VNEG_S32(r0,r1)		vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
424 #  define VNEGQ_S32(r0,r1)		vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
425 #  define VQNEG_S8(r0,r1)		vo_dd(ARM_VQNEG_I,r0,r1)
426 #  define VQNEGQ_S8(r0,r1)		vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
427 #  define VQNEG_S16(r0,r1)		vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
428 #  define VQNEGQ_S16(r0,r1)		vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
429 #  define VQNEG_S32(r0,r1)		vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
430 #  define VQNEGQ_S32(r0,r1)		vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
431 #  define VAND(r0,r1,r2)		voddd(ARM_VAND,r0,r1,r2)
432 #  define VANDQ(r0,r1,r2)		voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
433 #  define VBIC(r0,r1,r2)		voddd(ARM_VBIC,r0,r1,r2)
434 #  define VBICQ(r0,r1,r2)		voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
435 #  define VORR(r0,r1,r2)		voddd(ARM_VORR,r0,r1,r2)
436 #  define VORRQ(r0,r1,r2)		voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
437 #  define VORN(r0,r1,r2)		voddd(ARM_VORN,r0,r1,r2)
438 #  define VORNQ(r0,r1,r2)		voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
439 #  define VEOR(r0,r1,r2)		voddd(ARM_VEOR,r0,r1,r2)
440 #  define VEORQ(r0,r1,r2)		voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
441 #  define VMOV(r0,r1)			VORR(r0,r1,r1)
442 #  define VMOVQ(r0,r1)			VORRQ(r0,r1,r1)
443 #  define VMOVL_S8(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
444 #  define VMOVL_U8(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
445 #  define VMOVL_S16(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
446 #  define VMOVL_U16(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
447 #  define VMOVL_S32(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
448 #  define VMOVL_U32(r0,r1)		vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
449 /* "oi" should be the result of encode_vfp_double */
450 #  define VIMM(oi,r0)			vodi(oi,r0)
451 #  define VIMMQ(oi,r0)			voqi(oi|ARM_V_Q,r0)
452 /* index is multipled by four */
453 #  define CC_VLDRN_F32(cc,r0,r1,i0)	cc_vldst(cc,ARM_VLDR,r0,r1,i0)
454 #  define VLDRN_F32(r0,r1,i0)		CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
455 #  define CC_VLDR_F32(cc,r0,r1,i0)	cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
456 #  define VLDR_F32(r0,r1,i0)		CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
457 #  define CC_VLDRN_F64(cc,r0,r1,i0)	cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
458 #  define VLDRN_F64(r0,r1,i0)		CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
459 #  define CC_VLDR_F64(cc,r0,r1,i0)	cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
460 #  define VLDR_F64(r0,r1,i0)		CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
461 #  define CC_VSTRN_F32(cc,r0,r1,i0)	cc_vldst(cc,ARM_VSTR,r0,r1,i0)
462 #  define VSTRN_F32(r0,r1,i0)		CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
463 #  define CC_VSTR_F32(cc,r0,r1,i0)	cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
464 #  define VSTR_F32(r0,r1,i0)		CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
465 #  define CC_VSTRN_F64(cc,r0,r1,i0)	cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
466 #  define VSTRN_F64(r0,r1,i0)		CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
467 #  define CC_VSTR_F64(cc,r0,r1,i0)	cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
468 #  define VSTR_F64(r0,r1,i0)		CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
469 #  define vfp_movr_f(r0,r1)		_vfp_movr_f(_jit,r0,r1)
470 static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
471 #  define vfp_movr_d(r0,r1)		_vfp_movr_d(_jit,r0,r1)
472 static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
473 #  define vfp_movi_f(r0,i0)		_vfp_movi_f(_jit,r0,i0)
474 static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
475 #  define vfp_movi_d(r0,i0)		_vfp_movi_d(_jit,r0,i0)
476 static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
477 #  define vfp_extr_f(r0,r1)		_vfp_extr_f(_jit,r0,r1)
478 static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
479 #  define vfp_extr_d(r0,r1)		_vfp_extr_d(_jit,r0,r1)
480 static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
481 #  define vfp_extr_d_f(r0,r1)		_vfp_extr_d_f(_jit,r0,r1)
482 static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
483 #  define vfp_extr_f_d(r0,r1)		_vfp_extr_f_d(_jit,r0,r1)
484 static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
485 #  define vfp_truncr_f_i(r0,r1)		_vfp_truncr_f_i(_jit,r0,r1)
486 static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
487 #  define vfp_truncr_d_i(r0,r1)		_vfp_truncr_d_i(_jit,r0,r1)
488 static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
489 #  define vfp_absr_f(r0,r1)		VABS_F32(r0,r1)
490 #  define vfp_absr_d(r0,r1)		VABS_F64(r0,r1)
491 #  define vfp_negr_f(r0,r1)		VNEG_F32(r0,r1)
492 #  define vfp_negr_d(r0,r1)		VNEG_F64(r0,r1)
493 #  define vfp_sqrtr_f(r0,r1)		VSQRT_F32(r0,r1)
494 #  define vfp_sqrtr_d(r0,r1)		VSQRT_F64(r0,r1)
495 #  define vfp_addr_f(r0,r1,r2)		VADD_F32(r0,r1,r2)
496 #  define vfp_addi_f(r0,r1,i0)		_vfp_addi_f(_jit,r0,r1,i0)
497 static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
498 #  define vfp_addr_d(r0,r1,r2)		VADD_F64(r0,r1,r2)
499 #  define vfp_addi_d(r0,r1,i0)		_vfp_addi_d(_jit,r0,r1,i0)
500 static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
501 #  define vfp_subr_f(r0,r1,r2)		VSUB_F32(r0,r1,r2)
502 #  define vfp_subi_f(r0,r1,i0)		_vfp_subi_f(_jit,r0,r1,i0)
503 static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
504 #  define vfp_subr_d(r0,r1,r2)		VSUB_F64(r0,r1,r2)
505 #  define vfp_subi_d(r0,r1,i0)		_vfp_subi_d(_jit,r0,r1,i0)
506 static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
507 #  define vfp_rsbr_f(r0,r1,r2)		vfp_subr_f(r0,r2,r1)
508 #  define vfp_rsbi_f(r0,r1,i0)		_vfp_rsbi_f(_jit,r0,r1,i0)
509 static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
510 #  define vfp_rsbr_d(r0,r1,r2)		vfp_subr_d(r0,r2,r1)
511 #  define vfp_rsbi_d(r0,r1,i0)		_vfp_rsbi_d(_jit,r0,r1,i0)
512 static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
513 #  define vfp_mulr_f(r0,r1,r2)		VMUL_F32(r0,r1,r2)
514 #  define vfp_muli_f(r0,r1,i0)		_vfp_muli_f(_jit,r0,r1,i0)
515 static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
516 #  define vfp_mulr_d(r0,r1,r2)		VMUL_F64(r0,r1,r2)
517 #  define vfp_muli_d(r0,r1,i0)		_vfp_muli_d(_jit,r0,r1,i0)
518 static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
519 #  define vfp_divr_f(r0,r1,r2)		VDIV_F32(r0,r1,r2)
520 #  define vfp_divi_f(r0,r1,i0)		_vfp_divi_f(_jit,r0,r1,i0)
521 static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
522 #  define vfp_divr_d(r0,r1,r2)		VDIV_F64(r0,r1,r2)
523 #  define vfp_divi_d(r0,r1,i0)		_vfp_divi_d(_jit,r0,r1,i0)
524 static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
525 #  define vfp_cmp_f(r0,r1)		_vfp_cmp_f(_jit,r0,r1)
526 static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
527 #  define vfp_cmp_d(r0,r1)		_vfp_cmp_d(_jit,r0,r1)
528 static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
529 #  define vcmp01_x(c0,c1,r0)		_vcmp01_x(_jit,c0,c1,r0)
530 static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
531 #  define vcmp01_f(c0,c1,r0,r1,r2)	_vcmp01_f(_jit,c0,c1,r0,r1,r2)
532 static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
533 #  define vcmp01_d(c0,c1,r0,r1,r2)	_vcmp01_d(_jit,c0,c1,r0,r1,r2)
534 static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
535 #  define vfp_ltr_f(r0,r1,r2)		vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
536 #  define vfp_lti_f(r0,r1,i0)		_vfp_lti_f(_jit,r0,r1,i0)
537 static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
538 #  define vfp_ltr_d(r0,r1,r2)		vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
539 #  define vfp_lti_d(r0,r1,i0)		_vfp_lti_d(_jit,r0,r1,i0)
540 static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
541 #  define vfp_ler_f(r0,r1,r2)		vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
542 #  define vfp_lei_f(r0,r1,i0)		_vfp_lei_f(_jit,r0,r1,i0)
543 static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
544 #  define vfp_ler_d(r0,r1,r2)		vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
545 #  define vfp_lei_d(r0,r1,i0)		_vfp_lei_d(_jit,r0,r1,i0)
546 static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
547 #  define vfp_eqr_f(r0,r1,r2)		vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
548 #  define vfp_eqi_f(r0,r1,i0)		_vfp_eqi_f(_jit,r0,r1,i0)
549 static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
550 #  define vfp_eqr_d(r0,r1,r2)		vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
551 #  define vfp_eqi_d(r0,r1,i0)		_vfp_eqi_d(_jit,r0,r1,i0)
552 static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
553 #  define vfp_ger_f(r0,r1,r2)		vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
554 #  define vfp_gei_f(r0,r1,i0)		_vfp_gei_f(_jit,r0,r1,i0)
555 static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556 #  define vfp_ger_d(r0,r1,r2)		vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
557 #  define vfp_gei_d(r0,r1,i0)		_vfp_gei_d(_jit,r0,r1,i0)
558 static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559 #  define vfp_gtr_f(r0,r1,r2)		vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
560 #  define vfp_gti_f(r0,r1,i0)		_vfp_gti_f(_jit,r0,r1,i0)
561 static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562 #  define vfp_gtr_d(r0,r1,r2)		vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
563 #  define vfp_gti_d(r0,r1,i0)		_vfp_gti_d(_jit,r0,r1,i0)
564 static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565 #  define vfp_ner_f(r0,r1,r2)		vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
566 #  define vfp_nei_f(r0,r1,i0)		_vfp_nei_f(_jit,r0,r1,i0)
567 static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568 #  define vfp_ner_d(r0,r1,r2)		vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
569 #  define vfp_nei_d(r0,r1,i0)		_vfp_nei_d(_jit,r0,r1,i0)
570 static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571 #  define vcmp10_x(c0,r0)		_vcmp10_x(_jit,c0,r0)
572 static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
573 #  define vcmp_10_f(c0,r0,r1,r2)	_vcmp_10_f(_jit,c0,r0,r1,r2)
574 static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
575 #  define vcmp_10_d(c0,r0,r1,r2)	_vcmp_10_d(_jit,c0,r0,r1,r2)
576 static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
577 #  define vfp_unltr_f(r0,r1,r2)		vcmp_10_f(ARM_CC_GE,r0,r1,r2)
578 #  define vfp_unlti_f(r0,r1,i0)		_vfp_unlti_f(_jit,r0,r1,i0)
579 static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580 #  define vfp_unltr_d(r0,r1,r2)		vcmp_10_d(ARM_CC_GE,r0,r1,r2)
581 #  define vfp_unlti_d(r0,r1,i0)		_vfp_unlti_d(_jit,r0,r1,i0)
582 static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583 #  define vfp_unler_f(r0,r1,r2)		vcmp_10_f(ARM_CC_GT,r0,r1,r2)
584 #  define vfp_unlei_f(r0,r1,i0)		_vfp_unlei_f(_jit,r0,r1,i0)
585 static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
586 #  define vfp_unler_d(r0,r1,r2)		vcmp_10_d(ARM_CC_GT,r0,r1,r2)
587 #  define vfp_unlei_d(r0,r1,i0)		_vfp_unlei_d(_jit,r0,r1,i0)
588 static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
589 #  define vfp_uneqr_x(r0)		_vfp_uneqr_x(_jit,r0)
590 static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
591 #  define vfp_uneqr_f(r0,r1,r2)		_vfp_uneqr_f(_jit,r0,r1,r2)
592 static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
593 #  define vfp_uneqi_f(r0,r1,i0)		_vfp_uneqi_f(_jit,r0,r1,i0)
594 static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
595 #  define vfp_uneqr_d(r0,r1,r2)		_vfp_uneqr_d(_jit,r0,r1,r2)
596 static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
597 #  define vfp_uneqi_d(r0,r1,i0)		_vfp_uneqi_d(_jit,r0,r1,i0)
598 static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599 #  define vcmp_01_x(c0,r0)		_vcmp_01_x(_jit,c0,r0)
600 static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
601 #  define vcmp_01_f(c0,r0,r1,r2)	_vcmp_01_f(_jit,c0,r0,r1,r2)
602 static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
603 #  define vcmp_01_d(c0,r0,r1,r2)	_vcmp_01_d(_jit,c0,r0,r1,r2)
604 static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
605 #  define vfp_unger_f(r0,r1,r2)		vcmp_01_f(ARM_CC_CS,r0,r1,r2)
606 #  define vfp_ungei_f(r0,r1,i0)		_vfp_ungei_f(_jit,r0,r1,i0)
607 static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608 #  define vfp_unger_d(r0,r1,r2)		vcmp_01_d(ARM_CC_CS,r0,r1,r2)
609 #  define vfp_ungei_d(r0,r1,i0)		_vfp_ungei_d(_jit,r0,r1,i0)
610 static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611 #  define vfp_ungtr_f(r0,r1,r2)		vcmp_01_f(ARM_CC_HI,r0,r1,r2)
612 #  define vfp_ungti_f(r0,r1,i0)		_vfp_ungti_f(_jit,r0,r1,i0)
613 static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614 #  define vfp_ungtr_d(r0,r1,r2)		vcmp_01_d(ARM_CC_HI,r0,r1,r2)
615 #  define vfp_ungti_d(r0,r1,i0)		_vfp_ungti_d(_jit,r0,r1,i0)
616 static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617 #  define vfp_ltgtr_x(r0)		_vfp_ltgtr_x(_jit,r0)
618 static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
619 #  define vfp_ltgtr_f(r0,r1,r2)		_vfp_ltgtr_f(_jit,r0,r1,r2)
620 static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
621 #  define vfp_ltgti_f(r0,r1,i0)		_vfp_ltgti_f(_jit,r0,r1,i0)
622 static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
623 #  define vfp_ltgtr_d(r0,r1,r2)		_vfp_ltgtr_d(_jit,r0,r1,r2)
624 static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
625 #  define vfp_ltgti_d(r0,r1,i0)		_vfp_ltgti_d(_jit,r0,r1,i0)
626 static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
627 #  define vfp_ordr_f(r0,r1,r2)		_vfp_ordr_f(_jit,r0,r1,r2)
628 static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
629 #  define vfp_ordi_f(r0,r1,i0)		_vfp_ordi_f(_jit,r0,r1,i0)
630 static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
631 #  define vfp_ordr_d(r0,r1,r2)		_vfp_ordr_d(_jit,r0,r1,r2)
632 static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
633 #  define vfp_ordi_d(r0,r1,i0)		_vfp_ordi_d(_jit,r0,r1,i0)
634 static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
635 #  define vfp_unordr_f(r0,r1,r2)	_vfp_unordr_f(_jit,r0,r1,r2)
636 static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
637 #  define vfp_unordi_f(r0,r1,i0)	_vfp_unordi_f(_jit,r0,r1,i0)
638 static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
639 #  define vfp_unordr_d(r0,r1,r2)	_vfp_unordr_d(_jit,r0,r1,r2)
640 static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
641 #  define vfp_unordi_d(r0,r1,i0)	_vfp_unordi_d(_jit,r0,r1,i0)
642 static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
643 #  define vbcmp_x(cc,i0)		_vbcmp_x(_jit,cc,i0)
644 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
645 #  define vbcmp_f(cc,i0,r0,r1)		_vbcmp_f(_jit,cc,i0,r0,r1)
646 static jit_word_t
647 _vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
648 #  define vbcmp_x(cc,i0)		_vbcmp_x(_jit,cc,i0)
649 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
650 #  define vbcmp_d(cc,i0,r0,r1)		_vbcmp_d(_jit,cc,i0,r0,r1)
651 static jit_word_t
652 _vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
653 #  define vfp_bltr_f(i0,r0,r1)		vbcmp_f(ARM_CC_MI,i0,r0,r1)
654 #  define vfp_blti_f(i0,r0,i1)		_vfp_blti_f(_jit,i0,r0,i1)
655 static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
656 #  define vfp_bltr_d(i0,r0,r1)		vbcmp_d(ARM_CC_MI,i0,r0,r1)
657 static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
658 #  define vfp_blti_d(i0,r0,i1)		_vfp_blti_d(_jit,i0,r0,i1)
659 #  define vfp_bler_f(i0,r0,r1)		vbcmp_f(ARM_CC_LS,i0,r0,r1)
660 #  define vfp_blei_f(i0,r0,i1)		_vfp_blei_f(_jit,i0,r0,i1)
661 static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
662 #  define vfp_bler_d(i0,r0,r1)		vbcmp_d(ARM_CC_LS,i0,r0,r1)
663 #  define vfp_blei_d(i0,r0,i1)		_vfp_blei_d(_jit,i0,r0,i1)
664 static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
665 #  define vfp_beqr_f(i0,r0,r1)		vbcmp_f(ARM_CC_EQ,i0,r0,r1)
666 #  define vfp_beqi_f(i0,r0,i1)		_vfp_beqi_f(_jit,i0,r0,i1)
667 static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
668 #  define vfp_beqr_d(i0,r0,r1)		vbcmp_d(ARM_CC_EQ,i0,r0,r1)
669 #  define vfp_beqi_d(i0,r0,i1)		_vfp_beqi_d(_jit,i0,r0,i1)
670 static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
671 #  define vfp_bger_f(i0,r0,r1)		vbcmp_f(ARM_CC_GE,i0,r0,r1)
672 #  define vfp_bgei_f(i0,r0,i1)		_vfp_bgei_f(_jit,i0,r0,i1)
673 static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
674 #  define vfp_bger_d(i0,r0,r1)		vbcmp_d(ARM_CC_GE,i0,r0,r1)
675 #  define vfp_bgei_d(i0,r0,i1)		_vfp_bgei_d(_jit,i0,r0,i1)
676 static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
677 #  define vfp_bgtr_f(i0,r0,r1)		vbcmp_f(ARM_CC_GT,i0,r0,r1)
678 #  define vfp_bgti_f(i0,r0,i1)		_vfp_bgti_f(_jit,i0,r0,i1)
679 static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
680 #  define vfp_bgtr_d(i0,r0,r1)		vbcmp_d(ARM_CC_GT,i0,r0,r1)
681 #  define vfp_bgti_d(i0,r0,i1)		_vfp_bgti_d(_jit,i0,r0,i1)
682 static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
683 #  define vfp_bner_f(i0,r0,r1)		vbcmp_f(ARM_CC_NE,i0,r0,r1)
684 #  define vfp_bnei_f(i0,r0,i1)		_vfp_bnei_f(_jit,i0,r0,i1)
685 static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
686 #  define vfp_bner_d(i0,r0,r1)		vbcmp_d(ARM_CC_NE,i0,r0,r1)
687 #  define vfp_bnei_d(i0,r0,i1)		_vfp_bnei_d(_jit,i0,r0,i1)
688 static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
689 #  define vbncmp_x(cc,i0)		_vbncmp_x(_jit,cc,i0)
690 static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
691 #  define vbncmp_f(cc,i0,r0,r1)		_vbncmp_f(_jit,cc,i0,r0,r1)
692 static jit_word_t
693 _vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
694 #  define vbncmp_d(cc,i0,r0,r1)		_vbncmp_d(_jit,cc,i0,r0,r1)
695 static jit_word_t
696 _vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
697 #  define vfp_bunltr_f(i0,r0,r1)	vbncmp_f(ARM_CC_GE,i0,r0,r1)
698 #  define vfp_bunlti_f(i0,r0,i1)	_vfp_bunlti_f(_jit,i0,r0,i1)
699 static jit_word_t
700 _vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
701 #  define vfp_bunltr_d(i0,r0,r1)	vbncmp_d(ARM_CC_GE,i0,r0,r1)
702 #  define vfp_bunlti_d(i0,r0,i1)	_vfp_bunlti_d(_jit,i0,r0,i1)
703 static jit_word_t
704 _vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
705 #  define vfp_bunler_f(i0,r0,r1)	vbncmp_f(ARM_CC_GT,i0,r0,r1)
706 #  define vfp_bunlei_f(i0,r0,i1)	_vfp_bunlei_f(_jit,i0,r0,i1)
707 static jit_word_t
708 _vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
709 #  define vfp_bunler_d(i0,r0,r1)	vbncmp_d(ARM_CC_GT,i0,r0,r1)
710 #  define vfp_bunlei_d(i0,r0,i1)	_vfp_bunlei_d(_jit,i0,r0,i1)
711 static jit_word_t
712 _vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
713 #  define vfp_buneqr_x(i0)		_vfp_buneqr_x(_jit,i0)
714 static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
715 #  define vfp_buneqr_f(i0,r0,r1)	_vfp_buneqr_f(_jit,i0,r0,r1)
716 static jit_word_t
717 _vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
718 #  define vfp_buneqi_f(i0,r0,i1)	_vfp_buneqi_f(_jit,i0,r0,i1)
719 static jit_word_t
720 _vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
721 #  define vfp_buneqr_d(i0,r0,r1)	_vfp_buneqr_d(_jit,i0,r0,r1)
722 static jit_word_t
723 _vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
724 #  define vfp_buneqi_d(i0,r0,i1)	_vfp_buneqi_d(_jit,i0,r0,i1)
725 static jit_word_t
726 _vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
727 #  define vfp_bunger_x(i0)		_vfp_bunger_x(_jit,i0)
728 static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
729 #  define vfp_bunger_f(i0,r0,r1)	_vfp_bunger_f(_jit,i0,r0,r1)
730 static jit_word_t
731 _vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
732 #  define vfp_bungei_f(i0,r0,i1)	_vfp_bungei_f(_jit,i0,r0,i1)
733 static jit_word_t
734 _vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
735 #  define vfp_bunger_d(i0,r0,r1)	_vfp_bunger_d(_jit,i0,r0,r1)
736 static jit_word_t
737 _vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
738 #  define vfp_bungei_d(i0,r0,i1)	_vfp_bungei_d(_jit,i0,r0,i1)
739 static jit_word_t
740 _vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741 #  define vfp_bungtr_f(i0,r0,r1)	vbcmp_f(ARM_CC_HI,i0,r0,r1)
742 #  define vfp_bungti_f(i0,r0,i1)	_vfp_bungti_f(_jit,i0,r0,i1)
743 static jit_word_t
744 _vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
745 #  define vfp_bungtr_d(i0,r0,r1)	vbcmp_d(ARM_CC_HI,i0,r0,r1)
746 #  define vfp_bungti_d(i0,r0,i1)	_vfp_bungti_d(_jit,i0,r0,i1)
747 static jit_word_t
748 _vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
749 #  define vfp_bltgtr_x(i0)		_vfp_bltgtr_x(_jit,i0)
750 static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
751 #  define vfp_bltgtr_f(i0,r0,r1)	_vfp_bltgtr_f(_jit,i0,r0,r1)
752 static jit_word_t
753 _vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
754 #  define vfp_bltgti_f(i0,r0,i1)	_vfp_bltgti_f(_jit,i0,r0,i1)
755 static jit_word_t
756 _vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
757 #  define vfp_bltgtr_d(i0,r0,r1)	_vfp_bltgtr_d(_jit,i0,r0,r1)
758 static jit_word_t
759 _vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
760 #  define vfp_bltgti_d(i0,r0,i1)	_vfp_bltgti_d(_jit,i0,r0,i1)
761 static jit_word_t
762 _vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763 #  define vfp_bordr_f(i0,r0,r1)		vbcmp_f(ARM_CC_VC,i0,r0,r1)
764 #  define vfp_bordi_f(i0,r0,i1)		_vfp_bordi_f(_jit,i0,r0,i1)
765 static jit_word_t
766 _vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767 #  define vfp_bordr_d(i0,r0,r1)		vbcmp_d(ARM_CC_VC,i0,r0,r1)
768 #  define vfp_bordi_d(i0,r0,i1)		_vfp_bordi_d(_jit,i0,r0,i1)
769 static jit_word_t
770 _vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771 #  define vfp_bunordr_f(i0,r0,r1)	vbcmp_f(ARM_CC_VS,i0,r0,r1)
772 #  define vfp_bunordi_f(i0,r0,i1)	_vfp_bunordi_f(_jit,i0,r0,i1)
773 static jit_word_t
774 _vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
775 #  define vfp_bunordr_d(i0,r0,r1)	vbcmp_d(ARM_CC_VS,i0,r0,r1)
776 #  define vfp_bunordi_d(i0,r0,i1)	_vfp_bunordi_d(_jit,i0,r0,i1)
777 static jit_word_t
778 _vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
779 #  define vfp_ldr_f(r0,r1)		VLDR_F32(r0,r1,0)
780 #  define vfp_ldr_d(r0,r1)		VLDR_F64(r0,r1,0)
781 #  define vfp_ldi_f(r0,i0)		_vfp_ldi_f(_jit,r0,i0)
782 static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
783 #  define vfp_ldi_d(r0,i0)		_vfp_ldi_d(_jit,r0,i0)
784 static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
785 #  define vfp_ldxr_f(r0,r1,r2)		_vfp_ldxr_f(_jit,r0,r1,r2)
786 static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
787 #  define vfp_ldxr_d(r0,r1,r2)		_vfp_ldxr_d(_jit,r0,r1,r2)
788 static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
789 #  define vfp_ldxi_f(r0,r1,i0)		_vfp_ldxi_f(_jit,r0,r1,i0)
790 static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
791 #  define vfp_ldxi_d(r0,r1,i0)		_vfp_ldxi_d(_jit,r0,r1,i0)
792 static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
793 #  define vfp_str_f(r0,r1)		VSTR_F32(r1,r0,0)
794 #  define vfp_str_d(r0,r1)		VSTR_F64(r1,r0,0)
795 #  define vfp_sti_f(i0,r0)		_vfp_sti_f(_jit,i0,r0)
796 static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
797 #  define vfp_sti_d(i0,r0)		_vfp_sti_d(_jit,i0,r0)
798 static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
799 #  define vfp_stxr_f(r0,r1,r2)		_vfp_stxr_f(_jit,r0,r1,r2)
800 static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
801 #  define vfp_stxr_d(r0,r1,r2)		_vfp_stxr_d(_jit,r0,r1,r2)
802 static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
803 #  define vfp_stxi_f(i0,r0,r1)		_vfp_stxi_f(_jit,i0,r0,r1)
804 static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
805 #  define vfp_stxi_d(i0,r0,r1)		_vfp_stxi_d(_jit,i0,r0,r1)
806 static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
807 #  define vfp_vaarg_d(r0, r1)		_vfp_vaarg_d(_jit, r0, r1)
808 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
809 #endif
810 
811 #if CODE
812 #  define vfp_regno(rn)		(((rn) - 16) >> 1)
813 
814 static int
encode_vfp_double(int mov,int inv,unsigned lo,unsigned hi)815 encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
816 {
817     int		code, mode, imm, mask;
818 
819     if (hi != lo) {
820 	if (mov && !inv) {
821 	    /* (I64)
822 	     *	aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
823 	     */
824 	    for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
825 		imm = lo & mask;
826 		if (imm != mask && imm != 0)
827 		    goto fail;
828 		imm = hi & mask;
829 		if (imm != mask && imm != 0)
830 		    goto fail;
831 	    }
832 	    mode = 0xe20;
833 	    imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
834 		   ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >>  3) |
835 		   ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
836 		   ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >>  7));
837 	    goto success;
838 	}
839 	goto fail;
840     }
841     /*  (I32)
842      *  00000000 00000000 00000000 abcdefgh
843      *  00000000 00000000 abcdefgh 00000000
844      *  00000000 abcdefgh 00000000 00000000
845      *  abcdefgh 00000000 00000000 00000000 */
846     for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
847 	if ((lo & mask) == lo) {
848 	    imm = lo >> (mode << 3);
849 	    mode <<= 9;
850 	    goto success;
851 	}
852     }
853     /*  (I16)
854      *  00000000 abcdefgh 00000000 abcdefgh
855      *  abcdefgh 00000000 abcdefgh 00000000 */
856     for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
857 	if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
858 	    imm = lo >> (mode << 3);
859 	    mode = 0x800 | (mode << 9);
860 	    goto success;
861 	}
862     }
863     if (mov) {
864 	/*  (I32)
865 	 *  00000000 00000000 abcdefgh 11111111
866 	 *  00000000 abcdefgh 11111111 11111111 */
867 	for (mode = 0, mask = 0xff; mode < 2;
868 	     mask = (mask << 8) | 0xff, mode++) {
869 	    if ((lo & mask) == mask &&
870 		!((lo & ~mask) >> 8) &&
871 		(imm = lo >> (8 + (mode << 8)))) {
872 		mode = 0xc00 | (mode << 8);
873 		goto success;
874 	    }
875 	}
876 	if (!inv) {
877 	    /* (F32)
878 	     *  aBbbbbbc defgh000 00000000 00000000
879 	     *  from the ARM Architecture Reference Manual:
880 	     *  In this entry, B = NOT(b). The bit pattern represents the
881 	     *  floating-point number (-1)^s* 2^exp * mantissa, where
882 	     *  S = UInt(a),
883 	     *  exp = UInt(NOT(b):c:d)-3 and
884 	     *  mantissa = (16+UInt(e:f:g:h))/16. */
885 	    if ((lo & 0x7ffff) == 0 &&
886 		(((lo & 0x7e000000) == 0x3e000000) ||
887 		 ((lo & 0x7e000000) == 0x40000000))) {
888 		mode = 0xf00;
889 		imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
890 		goto success;
891 	    }
892 	}
893     }
894 
895 fail:
896     /* need another approach (load from memory, move from arm register, etc) */
897     return (-1);
898 
899 success:
900     code = inv ? ARM_VMVNI : ARM_VMOVI;
901     switch ((mode & 0xf00) >> 8) {
902 	case 0x0:	case 0x2:	case 0x4:	case 0x6:
903 	case 0x8:	case 0xa:
904 	    if (inv)	mode |= 0x20;
905 	    if (!mov)	mode |= 0x100;
906 	    break;
907 	case 0x1:	case 0x3:	case 0x5:	case 0x7:
908 	    /* should actually not reach here */
909 	    assert(!inv);
910 	case 0x9:	case 0xb:
911 	    assert(!mov);
912 	    break;
913 	case 0xc:	case 0xd:
914 	    /* should actually not reach here */
915 	    assert(inv);
916 	case 0xe:
917 	    assert(mode & 0x20);
918 	    assert(mov && !inv);
919 	    break;
920 	default:
921 	    assert(!(mode & 0x20));
922 	    break;
923     }
924     imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
925     code |= mode | imm;
926     if (jit_thumb_p()) {
927 	if (code & 0x1000000)
928 	    code |= 0xff000000;
929 	else
930 	    code |= 0xef000000;
931     }
932     else
933 	code |= ARM_CC_NV;
934     return (code);
935 }
936 
937 static void
_vodi(jit_state_t * _jit,int oi,int r0)938 _vodi(jit_state_t *_jit, int oi, int r0)
939 {
940     jit_thumb_t	thumb;
941     assert(!(oi  & 0x0000f000));
942     assert(!(r0 & 1));	r0 = vfp_regno(r0);
943     thumb.i = oi|(_u4(r0)<<12);
944     if (jit_thumb_p())
945 	iss(thumb.s[0], thumb.s[1]);
946     else
947 	ii(thumb.i);
948 }
949 
950 static void
_voqi(jit_state_t * _jit,int oi,int r0)951 _voqi(jit_state_t *_jit, int oi, int r0)
952 {
953     jit_thumb_t	thumb;
954     assert(!(oi  & 0x0000f000));
955     assert(!(r0 & 3));	r0 = vfp_regno(r0);
956     thumb.i = oi|(_u4(r0)<<12);
957     if (jit_thumb_p())
958 	iss(thumb.s[0], thumb.s[1]);
959     else
960 	ii(thumb.i);
961 }
962 
963 static void
_cc_vo_ss(jit_state_t * _jit,int cc,int o,int r0,int r1)964 _cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
965 {
966     jit_thumb_t	thumb;
967     assert(!(cc & 0x0fffffff));
968     assert(!(o  & 0xf000f00f));
969     if (r0 & 1)	o |= ARM_V_D;	r0 = vfp_regno(r0);
970     if (r1 & 1)	o |= ARM_V_M;	r1 = vfp_regno(r1);
971     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
972     if (jit_thumb_p())
973 	iss(thumb.s[0], thumb.s[1]);
974     else
975 	ii(thumb.i);
976 }
977 
978 static void
_cc_vo_dd(jit_state_t * _jit,int cc,int o,int r0,int r1)979 _cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
980 {
981     jit_thumb_t	thumb;
982     assert(!(cc & 0x0fffffff));
983     assert(!(o  & 0xf000f00f));
984     assert(!(r0 & 1) && !(r1 & 1));
985     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);
986     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
987     if (jit_thumb_p())
988 	iss(thumb.s[0], thumb.s[1]);
989     else
990 	ii(thumb.i);
991 }
992 
993 static void
_cc_vo_qd(jit_state_t * _jit,int cc,int o,int r0,int r1)994 _cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
995 {
996     jit_thumb_t	thumb;
997     assert(!(cc & 0x0fffffff));
998     assert(!(o  & 0xf000f00f));
999     assert(!(r0 & 3) && !(r1 & 1));
1000     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);
1001     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1002     if (jit_thumb_p())
1003 	iss(thumb.s[0], thumb.s[1]);
1004     else
1005 	ii(thumb.i);
1006 }
1007 
1008 static void
_cc_vo_qq(jit_state_t * _jit,int cc,int o,int r0,int r1)1009 _cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1010 {
1011     jit_thumb_t	thumb;
1012     assert(!(cc & 0x0fffffff));
1013     assert(!(o  & 0xf000f00f));
1014     assert(!(r0 & 3) && !(r1 & 3));
1015     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);
1016     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1017     if (jit_thumb_p())
1018 	iss(thumb.s[0], thumb.s[1]);
1019     else
1020 	ii(thumb.i);
1021 }
1022 
1023 static void
_cc_vorr_(jit_state_t * _jit,int cc,int o,int r0,int r1)1024 _cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1025 {
1026     jit_thumb_t	thumb;
1027     assert(!(cc & 0x0fffffff));
1028     assert(!(o  & 0xf000f00f));
1029     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1030     if (jit_thumb_p())
1031 	iss(thumb.s[0], thumb.s[1]);
1032     else
1033 	ii(thumb.i);
1034 }
1035 
1036 static void
_cc_vors_(jit_state_t * _jit,int cc,int o,int r0,int r1)1037 _cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1038 {
1039     jit_thumb_t	thumb;
1040     assert(!(cc & 0x0fffffff));
1041     assert(!(o  & 0xf000f00f));
1042     if (r1 & 1)	o |= ARM_V_N;	r1 = vfp_regno(r1);
1043     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1044     if (jit_thumb_p())
1045 	iss(thumb.s[0], thumb.s[1]);
1046     else
1047 	ii(thumb.i);
1048 }
1049 
1050 static void
_cc_vorv_(jit_state_t * _jit,int cc,int o,int r0,int r1)1051 _cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1052 {
1053     jit_thumb_t	thumb;
1054     assert(!(cc & 0x0fffffff));
1055     assert(!(o  & 0xf000f00f));
1056     if (r1 & 1)	o |= ARM_V_M;	r1 = vfp_regno(r1);
1057     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1058     if (jit_thumb_p())
1059 	iss(thumb.s[0], thumb.s[1]);
1060     else
1061 	ii(thumb.i);
1062 }
1063 
1064 static void
_cc_vori_(jit_state_t * _jit,int cc,int o,int r0,int r1)1065 _cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1066 {
1067     jit_thumb_t	thumb;
1068     assert(!(cc & 0x0fffffff));
1069     assert(!(o  & 0xf000f00f));
1070     /* use same bit pattern, to set opc1... */
1071     if (r1 & 1)	o |= ARM_V_I32;	r1 = vfp_regno(r1);
1072     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1073     if (jit_thumb_p())
1074 	iss(thumb.s[0], thumb.s[1]);
1075     else
1076 	ii(thumb.i);
1077 }
1078 
1079 static void
_cc_vorrd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1080 _cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1081 {
1082     jit_thumb_t	thumb;
1083     assert(!(cc & 0x0fffffff));
1084     assert(!(o  & 0xf00ff00f));
1085     assert(!(r2 & 1));
1086     r2 = vfp_regno(r2);
1087     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1088     if (jit_thumb_p())
1089 	iss(thumb.s[0], thumb.s[1]);
1090     else
1091 	ii(thumb.i);
1092 }
1093 
1094 static void
_cc_vosss(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1095 _cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1096 {
1097     jit_thumb_t	thumb;
1098     assert(!(cc & 0x0fffffff));
1099     assert(!(o  & 0xf00ff00f));
1100     if (r0 & 1)	o |= ARM_V_D;	r0 = vfp_regno(r0);
1101     if (r1 & 1)	o |= ARM_V_N;	r1 = vfp_regno(r1);
1102     if (r2 & 1)	o |= ARM_V_M;	r2 = vfp_regno(r2);
1103     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1104     if (jit_thumb_p())
1105 	iss(thumb.s[0], thumb.s[1]);
1106     else
1107 	ii(thumb.i);
1108 }
1109 
1110 static void
_cc_voddd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1111 _cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1112 {
1113     jit_thumb_t	thumb;
1114     assert(!(cc & 0x0fffffff));
1115     assert(!(o  & 0xf00ff00f));
1116     assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1117     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);	r2 = vfp_regno(r2);
1118     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1119     if (jit_thumb_p())
1120 	iss(thumb.s[0], thumb.s[1]);
1121     else
1122 	ii(thumb.i);
1123 }
1124 
1125 static void
_cc_voqdd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1126 _cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1127 {
1128     jit_thumb_t	thumb;
1129     assert(!(cc & 0x0fffffff));
1130     assert(!(o  & 0xf00ff00f));
1131     assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1132     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);	r2 = vfp_regno(r2);
1133     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1134     if (jit_thumb_p())
1135 	iss(thumb.s[0], thumb.s[1]);
1136     else
1137 	ii(thumb.i);
1138 }
1139 
1140 static void
_cc_voqqd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1141 _cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1142 {
1143     jit_thumb_t	thumb;
1144     assert(!(cc & 0x0fffffff));
1145     assert(!(o  & 0xf00ff00f));
1146     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1147     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);	r2 = vfp_regno(r2);
1148     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1149     if (jit_thumb_p())
1150 	iss(thumb.s[0], thumb.s[1]);
1151     else
1152 	ii(thumb.i);
1153 }
1154 
1155 static void
_cc_voqqq(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1156 _cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1157 {
1158     jit_thumb_t	thumb;
1159     assert(!(cc & 0x0fffffff));
1160     assert(!(o  & 0xf00ff00f));
1161     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1162     r0 = vfp_regno(r0);	r1 = vfp_regno(r1);	r2 = vfp_regno(r2);
1163     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1164     if (jit_thumb_p())
1165 	iss(thumb.s[0], thumb.s[1]);
1166     else
1167 	ii(thumb.i);
1168 }
1169 
1170 static void
_cc_vldst(jit_state_t * _jit,int cc,int o,int r0,int r1,int i0)1171 _cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1172 {
1173     jit_thumb_t	thumb;
1174     /* i0 << 2 is byte offset */
1175     assert(!(cc & 0x0fffffff));
1176     assert(!(o  & 0xf00ff0ff));
1177     if (r0 & 1) {
1178 	assert(!(o & ARM_V_F64));
1179 	o |= ARM_V_D;
1180     }
1181     r0 = vfp_regno(r0);
1182     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1183     if (jit_thumb_p())
1184 	iss(thumb.s[0], thumb.s[1]);
1185     else
1186 	ii(thumb.i);
1187 }
1188 
1189 static void
_cc_vorsl(jit_state_t * _jit,int cc,int o,int r0,int r1,int i0)1190 _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1191 {
1192     jit_thumb_t	thumb;
1193     assert(!(cc & 0x0fffffff));
1194     assert(!(o  & 0xf00ff0ff));
1195     /* save i0 double precision registers */
1196     if (o & ARM_V_F64)		i0 <<= 1;
1197     /* if (r1 & 1) cc & ARM_V_F64 must be false */
1198     if (r1 & 1)	o |= ARM_V_D;	r1 = vfp_regno(r1);
1199     assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1200     thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1201     if (jit_thumb_p())
1202 	iss(thumb.s[0], thumb.s[1]);
1203     else
1204 	ii(thumb.i);
1205 }
1206 
1207 static void
_vfp_movr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1208 _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1209 {
1210     if (r0 != r1) {
1211 	if (jit_fpr_p(r1)) {
1212 	    if (jit_fpr_p(r0))
1213 		VMOV_F32(r0, r1);
1214 	    else
1215 		VMOV_A_S(r0, r1);
1216 	}
1217 	else if (jit_fpr_p(r0))
1218 	    VMOV_S_A(r0, r1);
1219 	else
1220 	    movr(r0, r1);
1221     }
1222 }
1223 
1224 static void
_vfp_movr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1225 _vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1226 {
1227     if (r0 != r1) {
1228 	if (jit_fpr_p(r1)) {
1229 	    if (jit_fpr_p(r0))
1230 		VMOV_F64(r0, r1);
1231 	    else
1232 		VMOV_AA_D(r0, r0 + 1, r1);
1233 	}
1234 	else if (jit_fpr_p(r0))
1235 	    VMOV_D_AA(r0, r1, r1 + 1);
1236 	else {
1237 	    /* minor consistency check */
1238 	    assert(r0 + 1 != r1 && r0 -1 != r1);
1239 	    movr(r0, r1);
1240 	    movr(r0 + 1, r1 + 1);
1241 	}
1242     }
1243 }
1244 
1245 static void
_vfp_movi_f(jit_state_t * _jit,jit_int32_t r0,jit_float32_t i0)1246 _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1247 {
1248     union {
1249 	jit_int32_t	i;
1250 	jit_float32_t	f;
1251     } u;
1252     jit_int32_t		reg;
1253     jit_int32_t		code;
1254     u.f = i0;
1255     if (jit_fpr_p(r0)) {
1256 	/* float arguments are packed, for others,
1257 	 * lightning only address even registers */
1258 	if (!(r0 & 1) && (r0 - 16) >= 0 &&
1259 	    ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1260 	     (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1261 	    VIMM(code, r0);
1262 	else {
1263 	    reg = jit_get_reg(jit_class_gpr);
1264 	    movi(rn(reg), u.i);
1265 	    VMOV_S_A(r0, rn(reg));
1266 	    jit_unget_reg(reg);
1267 	}
1268     }
1269     else
1270 	movi(r0, u.i);
1271 }
1272 
1273 static void
_vfp_movi_d(jit_state_t * _jit,jit_int32_t r0,jit_float64_t i0)1274 _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1275 {
1276     union {
1277 	jit_int32_t	i[2];
1278 	jit_float64_t	d;
1279     } u;
1280     jit_int32_t		code;
1281     jit_int32_t		rg0, rg1;
1282     u.d = i0;
1283     if (jit_fpr_p(r0)) {
1284 	if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1285 	    (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1286 	    VIMM(code, r0);
1287 	else {
1288 	    rg0 = jit_get_reg(jit_class_gpr);
1289 	    rg1 = jit_get_reg(jit_class_gpr);
1290 	    movi(rn(rg0), u.i[0]);
1291 	    movi(rn(rg1), u.i[1]);
1292 	    VMOV_D_AA(r0, rn(rg0), rn(rg1));
1293 	    jit_unget_reg(rg1);
1294 	    jit_unget_reg(rg0);
1295 	}
1296     }
1297     else {
1298 	movi(r0, u.i[0]);
1299 	movi(r0 + 1, u.i[1]);
1300     }
1301 }
1302 
1303 static void
_vfp_extr_d_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1304 _vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1305 {
1306     jit_int32_t		reg;
1307     if (jit_fpr_p(r1)) {
1308 	if (jit_fpr_p(r0))
1309 	    VCVT_F64_F32(r0, r1);
1310 	else {
1311 	    reg = jit_get_reg(jit_class_fpr);
1312 	    VCVT_F64_F32(rn(reg), r1);
1313 	    VMOV_A_S(r0, rn(reg));
1314 	    jit_unget_reg(reg);
1315 	}
1316     }
1317     else {
1318 	reg = jit_get_reg(jit_class_fpr);
1319 	VMOV_S_A(rn(reg), r1);
1320 	VCVT_F64_F32(rn(reg), rn(reg));
1321 	if (jit_fpr_p(r0))
1322 	    VMOV_F32(r0, rn(reg));
1323 	else
1324 	    VMOV_A_S(r0, rn(reg));
1325 	jit_unget_reg(reg);
1326     }
1327 }
1328 
1329 static void
_vfp_extr_f_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1330 _vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1331 {
1332     jit_int32_t		reg;
1333     if (jit_fpr_p(r1)) {
1334 	if (jit_fpr_p(r0))
1335 	    VCVT_F32_F64(r0, r1);
1336 	else {
1337 	    reg = jit_get_reg(jit_class_fpr);
1338 	    VCVT_F32_F64(rn(reg), r1);
1339 	    VMOV_AA_D(r0, r0 + 1, rn(reg));
1340 	    jit_unget_reg(reg);
1341 	}
1342     }
1343     else {
1344 	reg = jit_get_reg(jit_class_fpr);
1345 	VMOV_D_AA(rn(reg), r1, r1 + 1);
1346 	VCVT_F32_F64(rn(reg), rn(reg));
1347 	if (jit_fpr_p(r0))
1348 	    VMOV_F64(r0, rn(reg));
1349 	else
1350 	    VMOV_AA_D(r0, r0 + 1, rn(reg));
1351 	jit_unget_reg(reg);
1352     }
1353 }
1354 
1355 static void
_vfp_extr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1356 _vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1357 {
1358     jit_int32_t		reg;
1359     if (jit_fpr_p(r0)) {
1360 	VMOV_V_I32(r0, r1);
1361 	VCVT_F32_S32(r0, r0);
1362     }
1363     else {
1364 	reg = jit_get_reg(jit_class_fpr);
1365 	VMOV_V_I32(rn(reg), r1);
1366 	VCVT_F32_S32(rn(reg), rn(reg));
1367 	VMOV_F32(r0, rn(reg));
1368 	jit_unget_reg(reg);
1369     }
1370 }
1371 
1372 static void
_vfp_extr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1373 _vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1374 {
1375     jit_int32_t		reg;
1376     if (jit_fpr_p(r0)) {
1377 	VMOV_V_I32(r0, r1);
1378 	VCVT_F64_S32(r0, r0);
1379     }
1380     else {
1381 	reg = jit_get_reg(jit_class_fpr);
1382 	VMOV_V_I32(rn(reg), r1);
1383 	VCVT_F64_S32(rn(reg), rn(reg));
1384 	VMOV_F64(r0, rn(reg));
1385 	jit_unget_reg(reg);
1386     }
1387 }
1388 
1389 static void
_vfp_truncr_f_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1390 _vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1391 {
1392     jit_int32_t		reg;
1393     reg = jit_get_reg(jit_class_fpr);
1394     if (jit_fpr_p(r1))
1395 	VCVT_S32_F32(rn(reg), r1);
1396     else {
1397 	VMOV_V_I32(rn(reg), r1);
1398 	VCVT_S32_F32(rn(reg), rn(reg));
1399     }
1400     VMOV_A_S32(r0, rn(reg));
1401     jit_unget_reg(reg);
1402 }
1403 
1404 static void
_vfp_truncr_d_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1405 _vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1406 {
1407     jit_int32_t		reg;
1408     reg = jit_get_reg(jit_class_fpr);
1409     if (jit_fpr_p(r1))
1410 	VCVT_S32_F64(rn(reg), r1);
1411     else {
1412 	VMOV_V_I32(rn(reg), r1);
1413 	VCVT_S32_F64(rn(reg), rn(reg));
1414     }
1415     VMOV_A_S32(r0, rn(reg));
1416     jit_unget_reg(reg);
1417 }
1418 
1419 #  define fopi(name)							\
1420 static void								\
1421 _vfp_##name##i_f(jit_state_t *_jit,					\
1422 		 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)	\
1423 {									\
1424     jit_int32_t		reg = jit_get_reg(jit_class_fpr);		\
1425     vfp_movi_f(rn(reg), i0);						\
1426     vfp_##name##r_f(r0, r1, rn(reg));					\
1427     jit_unget_reg(reg);							\
1428 }
1429 #  define dopi(name)							\
1430 static void								\
1431 _vfp_##name##i_d(jit_state_t *_jit,					\
1432 		 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)	\
1433 {									\
1434     jit_int32_t		reg = jit_get_reg(jit_class_fpr);		\
1435     vfp_movi_d(rn(reg), i0);						\
1436     vfp_##name##r_d(r0, r1, rn(reg));					\
1437     jit_unget_reg(reg);							\
1438 }
1439 #  define fbopi(name)							\
1440 static jit_word_t							\
1441 _vfp_b##name##i_f(jit_state_t *_jit,					\
1442 		  jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)	\
1443 {									\
1444     jit_word_t		word;						\
1445     jit_int32_t		reg = jit_get_reg(jit_class_fpr|		\
1446 					  jit_class_nospill);		\
1447     vfp_movi_f(rn(reg), i0);						\
1448     word = vfp_b##name##r_f(r0, r1, rn(reg));				\
1449     jit_unget_reg(reg);							\
1450     return (word);							\
1451 }
1452 #  define dbopi(name)							\
1453 static jit_word_t							\
1454 _vfp_b##name##i_d(jit_state_t *_jit,					\
1455 		  jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)	\
1456 {									\
1457     jit_word_t		word;						\
1458     jit_int32_t		reg = jit_get_reg(jit_class_fpr|		\
1459 					  jit_class_nospill);		\
1460     vfp_movi_d(rn(reg), i0);						\
1461     word = vfp_b##name##r_d(r0, r1, rn(reg));				\
1462     jit_unget_reg(reg);							\
1463     return (word);							\
1464 }
1465 
1466 fopi(add)
dopi(add)1467 dopi(add)
1468 fopi(sub)
1469 fopi(rsb)
1470 dopi(rsb)
1471 dopi(sub)
1472 fopi(mul)
1473 dopi(mul)
1474 fopi(div)
1475 dopi(div)
1476 
1477 static void
1478 _vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1479 {
1480     jit_int32_t		rg0, rg1;
1481     if (jit_fpr_p(r0)) {
1482 	if (jit_fpr_p(r1))
1483 	    VCMP_F32(r0, r1);
1484 	else {
1485 	    rg1 = jit_get_reg(jit_class_fpr);
1486 	    VMOV_S_A(rn(rg1), r1);
1487 	    VCMP_F32(r0, rn(rg1));
1488 	    jit_unget_reg(rg1);
1489 	}
1490     }
1491     else {
1492 	rg0 = jit_get_reg(jit_class_fpr);
1493 	VMOV_S_A(rn(rg0), r0);
1494 	if (jit_fpr_p(r1))
1495 	    VCMP_F32(rn(rg0), r1);
1496 	else {
1497 	    rg1 = jit_get_reg(jit_class_fpr);
1498 	    VMOV_S_A(rn(rg1), r1);
1499 	    VCMP_F32(rn(rg0), rn(rg1));
1500 	    jit_unget_reg(rg1);
1501 	}
1502 	jit_unget_reg(rg0);
1503     }
1504 }
1505 
1506 static void
_vfp_cmp_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1507 _vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1508 {
1509     jit_int32_t		rg0, rg1;
1510     if (jit_fpr_p(r0)) {
1511 	if (jit_fpr_p(r1))
1512 	    VCMP_F64(r0, r1);
1513 	else {
1514 	    rg1 = jit_get_reg(jit_class_fpr);
1515 	    VMOV_D_AA(rn(rg1), r1, r1 + 1);
1516 	    VCMP_F64(r0, rn(rg1));
1517 	    jit_unget_reg(rg1);
1518 	}
1519     }
1520     else {
1521 	rg0 = jit_get_reg(jit_class_fpr);
1522 	VMOV_D_AA(rn(rg0), r0, r0 + 1);
1523 	if (jit_fpr_p(r1))
1524 	    VCMP_F64(rn(rg0), r1);
1525 	else {
1526 	    rg1 = jit_get_reg(jit_class_fpr);
1527 	    VMOV_D_AA(rn(rg1), r1, r1 + 1);
1528 	    VCMP_F64(rn(rg0), rn(rg1));
1529 	    jit_unget_reg(rg1);
1530 	}
1531 	jit_unget_reg(rg0);
1532     }
1533 }
1534 
1535 static void
_vcmp01_x(jit_state_t * _jit,int c0,int c1,jit_int32_t r0)1536 _vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1537 {
1538     VMRS(_R15_REGNO);
1539     if (jit_thumb_p()) {
1540 	if ((c0 ^ c1) >> 28 == 1) {
1541 	    ITE(c0);
1542 	    if (r0 < 8) {
1543 		T1_MOVI(r0, 0);
1544 		T1_MOVI(r0, 1);
1545 	    }
1546 	    else {
1547 		T2_MOVI(r0, 0);
1548 		T2_MOVI(r0, 1);
1549 	    }
1550 	}
1551 	else {
1552 	    if (r0 < 8) {
1553 		IT(c0);
1554 		T1_MOVI(r0, 0);
1555 		IT(c1);
1556 		T1_MOVI(r0, 1);
1557 	    }
1558 	    else {
1559 		IT(c0);
1560 		T2_MOVI(r0, 0);
1561 		IT(c1);
1562 		T2_MOVI(r0, 1);
1563 	    }
1564 	}
1565     }
1566     else {
1567 	CC_MOVI(c0, r0, 0);
1568 	CC_MOVI(c1, r0, 1);
1569     }
1570 }
1571 
1572 static void
_vcmp01_f(jit_state_t * _jit,int c0,int c1,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1573 _vcmp01_f(jit_state_t *_jit, int c0, int c1,
1574 	  jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1575 {
1576     vfp_cmp_f(r1, r2);
1577     vcmp01_x(c0, c1, r0);
1578 }
1579 
1580 static void
_vcmp01_d(jit_state_t * _jit,int c0,int c1,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1581 _vcmp01_d(jit_state_t *_jit, int c0, int c1,
1582 	  jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1583 {
1584     vfp_cmp_d(r1, r2);
1585     vcmp01_x(c0, c1, r0);
1586 }
1587 
1588 static void
_vcmp10_x(jit_state_t * _jit,int cc,jit_int32_t r0)1589 _vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1590 {
1591     if (jit_thumb_p()) {
1592 	if (r0 < 8) {
1593 	    T1_MOVI(r0, 1);
1594 	    VMRS(_R15_REGNO);
1595 	    IT(cc);
1596 	    T1_MOVI(r0, 0);
1597 	}
1598 	else {
1599 	    T2_MOVI(r0, 1);
1600 	    VMRS(_R15_REGNO);
1601 	    IT(cc);
1602 	    T2_MOVI(r0, 0);
1603 	}
1604     }
1605     else {
1606 	VMRS(_R15_REGNO);
1607 	MOVI(r0, 1);
1608 	CC_MOVI(cc, r0, 0);
1609     }
1610 }
1611 static void
_vcmp_10_f(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1612 _vcmp_10_f(jit_state_t *_jit, int cc,
1613 	   jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1614 {
1615     vfp_cmp_f(r1, r2);
1616     vcmp10_x(cc, r0);
1617 }
1618 
1619 static void
_vcmp_10_d(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1620 _vcmp_10_d(jit_state_t *_jit, int cc,
1621 	   jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1622 {
1623     vfp_cmp_d(r1, r2);
1624     vcmp10_x(cc, r0);
1625 }
1626 
1627 fopi(lt)
dopi(lt)1628 dopi(lt)
1629 fopi(le)
1630 dopi(le)
1631 fopi(eq)
1632 dopi(eq)
1633 fopi(ge)
1634 dopi(ge)
1635 fopi(gt)
1636 dopi(gt)
1637 fopi(ne)
1638 dopi(ne)
1639 fopi(unlt)
1640 dopi(unlt)
1641 fopi(unle)
1642 dopi(unle)
1643 
1644 static void
1645 _vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1646 {
1647     VMRS(_R15_REGNO);
1648     if (jit_thumb_p()) {
1649 	ITE(ARM_CC_NE);
1650 	if (r0 < 8) {
1651 	    T1_MOVI(r0, 0);
1652 	    T1_MOVI(r0, 1);
1653 	    IT(ARM_CC_VS);
1654 	    T1_MOVI(r0, 1);
1655 	}
1656 	else {
1657 	    T2_MOVI(r0, 0);
1658 	    T2_MOVI(r0, 1);
1659 	    IT(ARM_CC_VS);
1660 	    T2_MOVI(r0, 1);
1661 	}
1662     }
1663     else {
1664 	CC_MOVI(ARM_CC_NE, r0, 0);
1665 	CC_MOVI(ARM_CC_EQ, r0, 1);
1666 	CC_MOVI(ARM_CC_VS, r0, 1);
1667     }
1668 }
1669 
1670 static void
_vfp_uneqr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1671 _vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1672 {
1673     vfp_cmp_f(r1, r2);
1674     vfp_uneqr_x(r0);
1675 }
1676 
fopi(uneq)1677 fopi(uneq)
1678 
1679 static void
1680 _vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1681 {
1682     vfp_cmp_d(r1, r2);
1683     vfp_uneqr_x(r0);
1684 }
1685 
dopi(uneq)1686 dopi(uneq)
1687 
1688 static void
1689 _vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1690 {
1691     if (jit_thumb_p()) {
1692 	if (r0 < 8) {
1693 	    T1_MOVI(r0, 0);
1694 	    VMRS(_R15_REGNO);
1695 	    IT(cc);
1696 	    T1_MOVI(r0, 1);
1697 	}
1698 	else {
1699 	    T2_MOVI(r0, 0);
1700 	    VMRS(_R15_REGNO);
1701 	    IT(cc);
1702 	    T2_MOVI(r0, 1);
1703 	}
1704     }
1705     else {
1706 	MOVI(r0, 0);
1707 	VMRS(_R15_REGNO);
1708 	CC_MOVI(cc, r0, 1);
1709     }
1710 }
1711 
1712 static void
_vcmp_01_f(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1713 _vcmp_01_f(jit_state_t *_jit, int cc,
1714 	   jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1715 {
1716     vfp_cmp_f(r1, r2);
1717     vcmp_01_x(cc, r0);
1718 }
1719 
1720 static void
_vcmp_01_d(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1721 _vcmp_01_d(jit_state_t *_jit, int cc,
1722 	   jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1723 {
1724     vfp_cmp_d(r1, r2);
1725     vcmp_01_x(cc, r0);
1726 }
1727 
1728 fopi(unge)
dopi(unge)1729 dopi(unge)
1730 fopi(ungt)
1731 dopi(ungt)
1732 
1733 static void
1734 _vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
1735 {
1736     VMRS(_R15_REGNO);
1737     if (jit_thumb_p()) {
1738 	ITE(ARM_CC_NE);
1739 	if (r0 < 8) {
1740 	    T1_MOVI(r0, 1);
1741 	    T1_MOVI(r0, 0);
1742 	    IT(ARM_CC_VS);
1743 	    T1_MOVI(r0, 0);
1744 	}
1745 	else {
1746 	    T2_MOVI(r0, 1);
1747 	    T2_MOVI(r0, 0);
1748 	    IT(ARM_CC_VS);
1749 	    T2_MOVI(r0, 0);
1750 	}
1751     }
1752     else {
1753 	CC_MOVI(ARM_CC_NE, r0, 1);
1754 	CC_MOVI(ARM_CC_EQ, r0, 0);
1755 	CC_MOVI(ARM_CC_VS, r0, 0);
1756     }
1757 }
1758 
1759 static void
_vfp_ltgtr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1760 _vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1761 {
1762     vfp_cmp_f(r1, r2);
1763     vfp_ltgtr_x(r0);
1764 }
1765 
fopi(ltgt)1766 fopi(ltgt)
1767 
1768 static void
1769 _vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1770 {
1771     vfp_cmp_d(r1, r2);
1772     vfp_ltgtr_x(r0);
1773 }
1774 
dopi(ltgt)1775 dopi(ltgt)
1776 
1777 static void
1778 _vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1779 {
1780     vfp_cmp_f(r1, r2);
1781     vcmp10_x(ARM_CC_VS, r0);
1782 }
1783 
fopi(ord)1784 fopi(ord)
1785 
1786 static void
1787 _vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1788 {
1789     vfp_cmp_d(r1, r2);
1790     vcmp10_x(ARM_CC_VS, r0);
1791 }
1792 
dopi(ord)1793 dopi(ord)
1794 
1795 static void
1796 _vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1797 {
1798     vfp_cmp_f(r1, r2);
1799     vcmp_01_x(ARM_CC_VS, r0);
1800 }
1801 
fopi(unord)1802 fopi(unord)
1803 
1804 static void
1805 _vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1806 {
1807     vfp_cmp_d(r1, r2);
1808     vcmp_01_x(ARM_CC_VS, r0);
1809 }
1810 
dopi(unord)1811 dopi(unord)
1812 
1813 static jit_word_t
1814 _vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1815 {
1816     jit_word_t		d, w;
1817     VMRS(_R15_REGNO);
1818     w = _jit->pc.w;
1819     if (jit_thumb_p()) {
1820 	d = ((i0 - w) >> 1) - 2;
1821 	assert(_s20P(d));
1822 	T2_CC_B(cc, encode_thumb_cc_jump(d));
1823     }
1824     else {
1825 	d = ((i0 - w) >> 2) - 2;
1826 	assert(_s24P(d));
1827 	CC_B(cc, d & 0x00ffffff);
1828     }
1829     return (w);
1830 }
1831 
1832 
1833 static jit_word_t
_vbcmp_f(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1834 _vbcmp_f(jit_state_t *_jit, int cc,
1835 	 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1836 {
1837     vfp_cmp_f(r0, r1);
1838     return (vbcmp_x(cc, i0));
1839 }
1840 
1841 static jit_word_t
_vbcmp_d(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1842 _vbcmp_d(jit_state_t *_jit, int cc,
1843 	 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1844 {
1845     vfp_cmp_d(r0, r1);
1846     return (vbcmp_x(cc, i0));
1847 }
1848 
1849 static jit_word_t
_vbncmp_x(jit_state_t * _jit,int cc,jit_word_t i0)1850 _vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1851 {
1852     jit_word_t		d, p, w;
1853     VMRS(_R15_REGNO);
1854     p = _jit->pc.w;
1855     if (jit_thumb_p()) {
1856 	T2_CC_B(cc, 0);
1857 	w = _jit->pc.w;
1858 	d = ((i0 - w) >> 1) - 2;
1859 	assert(_s20P(d));
1860 	T2_B(encode_thumb_jump(d));
1861     }
1862     else {
1863 	CC_B(cc, 0);
1864 	w = _jit->pc.w;
1865 	d = ((i0 - w) >> 2) - 2;
1866 	assert(_s24P(d));
1867 	B(d & 0x00ffffff);
1868     }
1869     patch_at(arm_patch_jump, p, _jit->pc.w);
1870     return (w);
1871 }
1872 
1873 static jit_word_t
_vbncmp_f(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1874 _vbncmp_f(jit_state_t *_jit, int cc,
1875 	  jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1876 {
1877     vfp_cmp_f(r0, r1);
1878     return (vbncmp_x(cc, i0));
1879 }
1880 
1881 static jit_word_t
_vbncmp_d(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1882 _vbncmp_d(jit_state_t *_jit, int cc,
1883 	  jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1884 {
1885     vfp_cmp_d(r0, r1);
1886     return (vbncmp_x(cc, i0));
1887 }
1888 
1889 fbopi(lt)
dbopi(lt)1890 dbopi(lt)
1891 fbopi(le)
1892 dbopi(le)
1893 fbopi(eq)
1894 dbopi(eq)
1895 fbopi(ge)
1896 dbopi(ge)
1897 fbopi(gt)
1898 dbopi(gt)
1899 fbopi(ne)
1900 dbopi(ne)
1901 fbopi(unlt)
1902 dbopi(unlt)
1903 fbopi(unle)
1904 dbopi(unle)
1905 
1906 static jit_word_t
1907 _vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
1908 {
1909     jit_word_t		d, p, q, w;
1910     VMRS(_R15_REGNO);
1911     p = _jit->pc.w;
1912     if (jit_thumb_p()) {
1913 	T2_CC_B(ARM_CC_VS, 0);
1914 	q = _jit->pc.w;
1915 	T2_CC_B(ARM_CC_NE, 0);
1916 	patch_at(arm_patch_jump, p, _jit->pc.w);
1917 	w = _jit->pc.w;
1918 	d = ((i0 - w) >> 1) - 2;
1919 	assert(_s20P(d));
1920 	T2_B(encode_thumb_jump(d));
1921     }
1922     else {
1923 	CC_B(ARM_CC_VS, 0);
1924 	q = _jit->pc.w;
1925 	CC_B(ARM_CC_NE, 0);
1926 	patch_at(arm_patch_jump, p, _jit->pc.w);
1927 	w = _jit->pc.w;
1928 	d = ((i0 - w) >> 2) - 2;
1929 	assert(_s24P(d));
1930 	B(d & 0x00ffffff);
1931     }
1932     patch_at(arm_patch_jump, q, _jit->pc.w);
1933     return (w);
1934 }
1935 
1936 static jit_word_t
_vfp_buneqr_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1937 _vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1938 {
1939     vfp_cmp_f(r0, r1);
1940     return (vfp_buneqr_x(i0));
1941 }
1942 
fbopi(uneq)1943 fbopi(uneq)
1944 
1945 static jit_word_t
1946 _vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1947 {
1948     vfp_cmp_d(r0, r1);
1949     return (vfp_buneqr_x(i0));
1950 }
1951 
dbopi(uneq)1952 dbopi(uneq)
1953 
1954 static jit_word_t
1955 _vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
1956 {
1957     jit_word_t		d, p, w;
1958     VMRS(_R15_REGNO);
1959     p = _jit->pc.w;
1960     if (jit_thumb_p()) {
1961 	T2_CC_B(ARM_CC_MI, 0);
1962 	w = _jit->pc.w;
1963 	d = ((i0 - w) >> 1) - 2;
1964 	assert(_s20P(d));
1965 	T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
1966     }
1967     else {
1968 	CC_B(ARM_CC_MI, 0);
1969 	w = _jit->pc.w;
1970 	d = ((i0 - w) >> 2) - 2;
1971 	assert(_s24P(d));
1972 	CC_B(ARM_CC_HS, d & 0x00ffffff);
1973     }
1974     patch_at(arm_patch_jump, p, _jit->pc.w);
1975     return (w);
1976 }
1977 
1978 static jit_word_t
_vfp_bunger_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1979 _vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1980 {
1981     vfp_cmp_f(r0, r1);
1982     return (vfp_bunger_x(i0));
1983 }
1984 
fbopi(unge)1985 fbopi(unge)
1986 
1987 static jit_word_t
1988 _vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1989 {
1990     vfp_cmp_d(r0, r1);
1991     return (vfp_bunger_x(i0));
1992 }
1993 
dbopi(unge)1994 dbopi(unge)
1995 
1996 static jit_word_t
1997 _vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
1998 {
1999     jit_word_t		d, p, q, w;
2000     VMRS(_R15_REGNO);
2001     p = _jit->pc.w;
2002     if (jit_thumb_p()) {
2003 	T2_CC_B(ARM_CC_VS, 0);
2004 	q = _jit->pc.w;
2005 	T2_CC_B(ARM_CC_EQ, 0);
2006 	w = _jit->pc.w;
2007 	d = ((i0 - w) >> 1) - 2;
2008 	assert(_s20P(d));
2009 	T2_B(encode_thumb_jump(d));
2010     }
2011     else {
2012 	CC_B(ARM_CC_VS, 0);
2013 	q = _jit->pc.w;
2014 	CC_B(ARM_CC_EQ, 0);
2015 	w = _jit->pc.w;
2016 	d = ((i0 - w) >> 2) - 2;
2017 	assert(_s24P(d));
2018 	B(d & 0x00ffffff);
2019     }
2020     patch_at(arm_patch_jump, p, _jit->pc.w);
2021     patch_at(arm_patch_jump, q, _jit->pc.w);
2022     return (w);
2023 }
2024 
2025 static jit_word_t
_vfp_bltgtr_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2026 _vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2027 {
2028     vfp_cmp_f(r0, r1);
2029     return (vfp_bltgtr_x(i0));
2030 }
2031 
2032 fbopi(ungt)
dbopi(ungt)2033 dbopi(ungt)
2034 fbopi(ltgt)
2035 
2036 static jit_word_t
2037 _vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2038 {
2039     vfp_cmp_d(r0, r1);
2040     return (vfp_bltgtr_x(i0));
2041 }
2042 
2043 dbopi(ltgt)
fbopi(ord)2044 fbopi(ord)
2045 dbopi(ord)
2046 fbopi(unord)
2047 dbopi(unord)
2048 
2049 static void
2050 _vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2051 {
2052     jit_int32_t		gpr;
2053     if (jit_fpr_p(r0)) {
2054 	gpr = jit_get_reg(jit_class_gpr);
2055 	movi(rn(gpr), i0);
2056 	VLDR_F32(r0, rn(gpr), 0);
2057 	jit_unget_reg(gpr);
2058     }
2059     else
2060 	ldi_i(r0, i0);
2061 }
2062 
2063 static void
_vfp_ldi_d(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2064 _vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2065 {
2066     jit_int32_t		reg;
2067     reg = jit_get_reg(jit_class_gpr);
2068     movi(rn(reg), i0);
2069     if (jit_fpr_p(r0))
2070 	VLDR_F64(r0, rn(reg), 0);
2071     else {
2072 	ldr_i(r0, rn(reg));
2073 	ldxi_i(r0 + 1, rn(reg), 4);
2074     }
2075     jit_unget_reg(reg);
2076 }
2077 
2078 static void
_vfp_ldxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2079 _vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2080 {
2081     jit_int32_t		reg;
2082     if (jit_fpr_p(r0)) {
2083 	reg = jit_get_reg(jit_class_gpr);
2084 	addr(rn(reg), r1, r2);
2085 	VLDR_F32(r0, rn(reg), 0);
2086 	jit_unget_reg(reg);
2087     }
2088     else
2089 	ldxr_i(r0, r1, r2);
2090 }
2091 
2092 static void
_vfp_ldxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2093 _vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2094 {
2095     jit_int32_t		reg;
2096     reg = jit_get_reg(jit_class_gpr);
2097     addr(rn(reg), r1, r2);
2098     if (jit_fpr_p(r0))
2099 	VLDR_F64(r0, rn(reg), 0);
2100     else {
2101 	ldr_i(r0, rn(reg));
2102 	ldxi_i(r0 + 1, rn(reg), 4);
2103     }
2104     jit_unget_reg(reg);
2105 }
2106 
2107 static void
_vfp_ldxi_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2108 _vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2109 {
2110     jit_int32_t		reg;
2111     if (jit_fpr_p(r0)) {
2112 	if (i0 >= 0) {
2113 	    assert(!(i0 & 3));
2114 	    if (i0 < 1024)
2115 		VLDR_F32(r0, r1, i0 >> 2);
2116 	    else {
2117 		reg = jit_get_reg(jit_class_gpr);
2118 		addi(rn(reg), r1, i0);
2119 		VLDR_F32(r0, rn(reg), 0);
2120 		jit_unget_reg(reg);
2121 	    }
2122 	}
2123 	else {
2124 	    i0 = -i0;
2125 	    assert(!(i0 & 3));
2126 	    if (i0 < 1024)
2127 		VLDRN_F32(r0, r1, i0 >> 2);
2128 	    else {
2129 		reg = jit_get_reg(jit_class_gpr);
2130 		subi(rn(reg), r1, i0);
2131 		VLDR_F32(r0, rn(reg), 0);
2132 		jit_unget_reg(reg);
2133 	    }
2134 	}
2135     }
2136     else
2137 	ldxi_i(r0, r1, i0);
2138 }
2139 
2140 static void
_vfp_ldxi_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2141 _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2142 {
2143     jit_int32_t		reg;
2144     if (jit_fpr_p(r0)) {
2145 	if (i0 >= 0) {
2146 	    assert(!(i0 & 3));
2147 	    if (i0 < 1024)
2148 		VLDR_F64(r0, r1, i0 >> 2);
2149 	    else {
2150 		reg = jit_get_reg(jit_class_gpr);
2151 		addi(rn(reg), r1, i0);
2152 		VLDR_F64(r0, rn(reg), 0);
2153 		jit_unget_reg(reg);
2154 	    }
2155 	}
2156 	else {
2157 	    i0 = -i0;
2158 	    assert(!(i0 & 3));
2159 	    if (i0 < 1024)
2160 		VLDRN_F64(r0, r1, i0 >> 2);
2161 	    else {
2162 		reg = jit_get_reg(jit_class_gpr);
2163 		subi(rn(reg), r1, i0);
2164 		VLDR_F64(r0, rn(reg), 0);
2165 		jit_unget_reg(reg);
2166 	    }
2167 	}
2168     }
2169     else {
2170 	reg = jit_get_reg(jit_class_gpr);
2171 	addi(rn(reg), r1, i0);
2172 	ldr_i(r0, rn(reg));
2173 	ldxi_i(r0 + 1, rn(reg), 4);
2174 	jit_unget_reg(reg);
2175     }
2176 }
2177 
2178 static void
_vfp_sti_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2179 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2180 {
2181     jit_int32_t		reg;
2182     if (jit_fpr_p(r0)) {
2183 	reg = jit_get_reg(jit_class_gpr);
2184 	movi(rn(reg), i0);
2185 	VSTR_F32(r0, rn(reg), 0);
2186 	jit_unget_reg(reg);
2187     }
2188     else
2189 	sti_i(i0, r0);
2190 }
2191 
2192 static void
_vfp_sti_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2193 _vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2194 {
2195     jit_int32_t		reg;
2196     reg = jit_get_reg(jit_class_gpr);
2197     movi(rn(reg), i0);
2198     if (jit_fpr_p(r0))
2199 	VSTR_F64(r0, rn(reg), 0);
2200     else {
2201 	str_i(rn(reg), r0);
2202 	stxi_i(4, rn(reg), r0 + 1);
2203     }
2204     jit_unget_reg(reg);
2205 }
2206 
2207 static void
_vfp_stxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2208 _vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2209 {
2210     jit_int32_t		reg;
2211     if (jit_fpr_p(r2)) {
2212 	reg = jit_get_reg(jit_class_gpr);
2213 	addr(rn(reg), r0, r1);
2214 	VSTR_F32(r2, rn(reg), 0);
2215 	jit_unget_reg(reg);
2216     }
2217     else
2218 	stxr_i(r0, r1, r2);
2219 }
2220 
2221 static void
_vfp_stxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2222 _vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2223 {
2224     jit_int32_t		reg;
2225     reg = jit_get_reg(jit_class_gpr);
2226     addr(rn(reg), r0, r1);
2227     if (jit_fpr_p(r2))
2228 	VSTR_F64(r2, rn(reg), 0);
2229     else {
2230 	str_i(rn(reg), r2);
2231 	stxi_i(4, rn(reg), r2 + 1);
2232     }
2233     jit_unget_reg(reg);
2234 }
2235 
2236 static void
_vfp_stxi_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2237 _vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2238 {
2239     jit_int32_t		reg;
2240     if (jit_fpr_p(r1)) {
2241 	if (i0 >= 0) {
2242 	    assert(!(i0 & 3));
2243 	    if (i0 < 1024)
2244 		VSTR_F32(r1, r0, i0 >> 2);
2245 	    else {
2246 		reg = jit_get_reg(jit_class_gpr);
2247 		addi(rn(reg), r0, i0);
2248 		VSTR_F32(r1, rn(reg), 0);
2249 		jit_unget_reg(reg);
2250 	    }
2251 	}
2252 	else {
2253 	    i0 = -i0;
2254 	    assert(!(i0 & 3));
2255 	    if (i0 < 1024)
2256 		VSTRN_F32(r1, r0, i0 >> 2);
2257 	    else {
2258 		reg = jit_get_reg(jit_class_gpr);
2259 		subi(rn(reg), r0, i0);
2260 		VSTR_F32(r1, rn(reg), 0);
2261 		jit_unget_reg(reg);
2262 	    }
2263 	}
2264     }
2265     else
2266 	stxi_i(i0, r0, r1);
2267 }
2268 
2269 static void
_vfp_stxi_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2270 _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2271 {
2272     jit_int32_t		reg;
2273     if (jit_fpr_p(r1)) {
2274 	if (i0 >= 0) {
2275 	    assert(!(i0 & 3));
2276 	    if (i0 < 0124)
2277 		VSTR_F64(r1, r0, i0 >> 2);
2278 	    else {
2279 		reg = jit_get_reg(jit_class_gpr);
2280 		addi(rn(reg), r0, i0);
2281 		VSTR_F64(r1, rn(reg), 0);
2282 		jit_unget_reg(reg);
2283 	    }
2284 	}
2285 	else {
2286 	    i0 = -i0;
2287 	    assert(!(i0 & 3));
2288 	    if (i0 < 1024)
2289 		VSTRN_F64(r1, r0, i0 >> 2);
2290 	    else {
2291 		reg = jit_get_reg(jit_class_gpr);
2292 		subi(rn(reg), r0, i0);
2293 		VSTR_F64(r1, rn(reg), 0);
2294 		jit_unget_reg(reg);
2295 	    }
2296 	}
2297     }
2298     else {
2299 	reg = jit_get_reg(jit_class_gpr);
2300 	addi(rn(reg), r0, i0);
2301 	str_i(rn(reg), r1);
2302 	stxi_i(4, rn(reg), r1 + 1);
2303 	jit_unget_reg(reg);
2304     }
2305 }
2306 
2307 static void
_vfp_vaarg_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2308 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2309 {
2310     jit_int32_t		reg;
2311 
2312     assert(_jitc->function->self.call & jit_call_varargs);
2313 
2314     /* Adjust pointer. */
2315     reg = jit_get_reg(jit_class_gpr);
2316     andi(rn(reg), r1, 7);
2317     addr(r1, r1, rn(reg));
2318     jit_unget_reg(reg);
2319 
2320     /* Load argument. */
2321     vfp_ldr_d(r0, r1);
2322 
2323     /* Update stack pointer. */
2324     addi(r1, r1, sizeof(jit_float64_t));
2325 }
2326 #  undef dbopi
2327 #  undef fbopi
2328 #  undef dopi
2329 #  undef fopi
2330 #endif
2331