1 /*
2 * Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20 #if PROTO
21 /* as per vfp_regno macro, required due to "support" to soft float registers
22 * or using integer registers as arguments to float operations */
23 # define _D8_REGNO 32
24 # define ARM_V_Q 0x00000040
25 # define FPSCR_N 0x80000000 /* Negative flag */
26 # define FPSCR_Z 0x40000000 /* Zero flag */
27 # define FPSCR_C 0x20000000 /* Carry flag */
28 # define FPSCR_V 0x10000000 /* Overflow flag */
29 # define FPSCR_QC 0x08000000 /* Cumulative saturation */
30 # define FPSCR_AHP 0x04000000 /* Alt. half-precision */
31 # define FPSCR_DN 0x02000000 /* Default NaN mode */
32 # define FPSCR_FZ 0x01000000 /* Flush to zero */
33 # define FPSCR_RMASK 0x00c00000
34 # define FPSCR_RN 0x00000000 /* Round to Nearest */
35 # define FPSCR_RP 0x00400000 /* Round to Plus Infinity */
36 # define FPSCR_RM 0x00800000 /* Round to Minus Infinity */
37 # define FPSCR_RZ 0x00c00000 /* Round towards Zero */
38 # define FPSCR_STRIDE 0x00300000
39 # define FPSCR_RES1 0x00080000 /* Reserved, UNK/SBZP */
40 # define FPSCR_LEN 0x00070000
41 # define FPSCR_IDE 0x00008000 /* Input Denormal trap */
42 # define FPSCR_IXE 0x00001000 /* Inexact trap */
43 # define FPSCR_UFE 0x00000800 /* Underflow trap */
44 # define FPSCR_OFE 0x00000400 /* Overflow trap */
45 # define FPSCR_DZE 0x00000200 /* Division by zero trap */
46 # define FPSCR_IOE 0x00000100 /* Invalid Operation trap */
47 # define FPSCR_IDC 0x00000080 /* Input Denormal flag */
48 # define FPSCR_RES0 0x00000060 /* Reserved, UNK/SBZP */
49 # define FPSCR_IXC 0x00000010 /* Inexact flag */
50 # define FPSCR_UFC 0x00000008 /* Underflow flag */
51 # define FPSCR_OFC 0x00000004 /* Overflow flag */
52 # define FPSCR_DZC 0x00000002 /* Division by zero flag */
53 # define FPSCR_IOC 0x00000001 /* Invalid Operation flag */
54 # define ARM_V_E 0x00000080 /* ARM_VCMP except if NaN */
55 # define ARM_V_Z 0x00010000 /* ARM_VCMP with zero */
56 # define ARM_V_F64 0x00000100
57 # define ARM_VADD_F 0x0e300a00
58 # define ARM_VSUB_F 0x0e300a40
59 # define ARM_VMUL_F 0x0e200a00
60 # define ARM_VDIV_F 0x0e800a00
61 # define ARM_VABS_F 0x0eb00ac0
62 # define ARM_VNEG_F 0x0eb10a40
63 # define ARM_VSQRT_F 0x0eb10ac0
64 # define ARM_VMOV_F 0x0eb00a40
65 # define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */
66 # define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */
67 # define ARM_VMOV_AA_D 0x0c500b10 /* vmov rn,rn, dn */
68 # define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */
69 # define ARM_VCMP 0x0eb40a40
70 # define ARM_VMRS 0x0ef10a10
71 # define ARM_VMSR 0x0ee10a10
72 # define ARM_VCVT_2I 0x00040000 /* to integer */
73 # define ARM_VCVT_2S 0x00010000 /* to signed */
74 # define ARM_VCVT_RS 0x00000080 /* round to zero or signed */
75 # define ARM_VCVT 0x0eb80a40
76 # define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77 # define ARM_VCVT_U32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78 # define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79 # define ARM_VCVT_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80 # define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS
81 # define ARM_VCVT_F32_U32 ARM_VCVT
82 # define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83 # define ARM_VCVT_F64_U32 ARM_VCVT|ARM_V_F64
84 # define ARM_VCVT_F 0x0eb70ac0
85 # define ARM_VCVT_F32_F64 ARM_VCVT_F
86 # define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64
87 # define ARM_VCVTR_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88 # define ARM_VCVTR_U32_F32 ARM_VCVT|ARM_VCVT_2I
89 # define ARM_VCVTR_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90 # define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91 # define ARM_V_D 0x00400000
92 # define ARM_V_N 0x00000080
93 # define ARM_V_Q 0x00000040
94 # define ARM_V_M 0x00000020
95 # define ARM_V_U 0x01000000
96 # define ARM_V_I16 0x00100000
97 # define ARM_V_I32 0x00200000
98 # define ARM_V_I64 0x00300000
99 # define ARM_V_S16 0x00040000
100 # define ARM_V_S32 0x00080000
101 # define ARM_VADD_I 0x02000800
102 # define ARM_VQADD_I 0x02000010 /* set flag on over/carry */
103 # define ARM_VADDL_I 0x02800000 /* q=d+d */
104 # define ARM_VADDW_I 0x02800100 /* q=q+d */
105 # define ARM_VSUB_I 0x03000800
106 # define ARM_VQSUB_I 0x02000210 /* set flag on over/carry */
107 # define ARM_VSUBL_I 0x02800200
108 # define ARM_VSUBW_I 0x02800300
109 # define ARM_VMUL_I 0x02000910
110 # define ARM_VMULL_I 0x02800c00
111 # define ARM_VABS_I 0x03b10300
112 # define ARM_VQABS_I 0x03b00700 /* sets flag on overflow */
113 # define ARM_VNEG_I 0x03b10380
114 # define ARM_VQNEG_I 0x03b00780 /* sets flag on overflow */
115 # define ARM_VAND 0x02000110
116 # define ARM_VBIC 0x02100110
117 # define ARM_VORR 0x02200110
118 # define ARM_VORN 0x02300110
119 # define ARM_VEOR 0x03000110
120 # define ARM_VMOVL_S8 0x00080000
121 # define ARM_VMOVL_S16 0x00100000
122 # define ARM_VMOVL_S32 0x00200000
123 # define ARM_VMOVL_I 0x02800a10
124 # define ARM_VMOVI 0x02800010
125 # define ARM_VMVNI 0x02800030
126 # define ARM_VLDR 0x0d100a00
127 # define ARM_VSTR 0x0d000a00
128 # define ARM_VM 0x0c000a00
129 # define ARM_VMOV_ADV_U 0x00800000 /* zero extend */
130 # define ARM_VMOV_ADV_8 0x00400000
131 # define ARM_VMOV_ADV_16 0x00000020
132 # define ARM_VMOV_A_D 0x0e100b10
133 # define ARM_VMOV_D_A 0x0e000b10
134
135 # define vodi(oi,r0) _vodi(_jit,oi,r0)
136 static void _vodi(jit_state_t*,int,int) maybe_unused;
137 # define voqi(oi,r0) _voqi(_jit,oi,r0)
138 static void _voqi(jit_state_t*,int,int) maybe_unused;
139 # define vo_ss(o,r0,r1) _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
140 # define cc_vo_ss(cc,o,r0,r1) _cc_vo_ss(_jit,cc,o,r0,r1)
141 static void _cc_vo_ss(jit_state_t*,int,int,int,int);
142 # define vo_dd(o,r0,r1) _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
143 # define cc_vo_dd(cc,o,r0,r1) _cc_vo_dd(_jit,cc,o,r0,r1)
144 static void _cc_vo_dd(jit_state_t*,int,int,int,int);
145 # define vo_qd(o,r0,r1) _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
146 # define cc_vo_qd(cc,o,r0,r1) _cc_vo_qd(_jit,cc,o,r0,r1)
147 static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
148 # define vo_qq(o,r0,r1) _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
149 # define cc_vo_qq(cc,o,r0,r1) _cc_vo_qq(_jit,cc,o,r0,r1)
150 static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
151 # define vorr_(o,r0,r1) _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
152 # define cc_vorr_(cc,o,r0,r1) _cc_vorr_(_jit,cc,o,r0,r1)
153 static void _cc_vorr_(jit_state_t*,int,int,int,int);
154 # define vors_(o,r0,r1) _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
155 # define cc_vors_(cc,o,r0,r1) _cc_vors_(_jit,cc,o,r0,r1)
156 static void _cc_vors_(jit_state_t*,int,int,int,int);
157 # define vorv_(o,r0,r1) _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
158 # define cc_vorv_(cc,o,r0,r1) _cc_vorv_(_jit,cc,o,r0,r1)
159 static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
160 # define vori_(o,r0,r1) _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
161 # define cc_vori_(cc,o,r0,r1) _cc_vori_(_jit,cc,o,r0,r1)
162 static void _cc_vori_(jit_state_t*,int,int,int,int);
163 # define vorrd(o,r0,r1,r2) _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
164 # define cc_vorrd(cc,o,r0,r1,r2) _cc_vorrd(_jit,cc,o,r0,r1,r2)
165 static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
166 # define vosss(o,r0,r1,r2) _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
167 # define cc_vosss(cc,o,r0,r1,r2) _cc_vosss(_jit,cc,o,r0,r1,r2)
168 static void _cc_vosss(jit_state_t*,int,int,int,int,int);
169 # define voddd(o,r0,r1,r2) _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
170 # define cc_voddd(cc,o,r0,r1,r2) _cc_voddd(_jit,cc,o,r0,r1,r2)
171 static void _cc_voddd(jit_state_t*,int,int,int,int,int);
172 # define voqdd(o,r0,r1,r2) _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
173 # define cc_voqdd(cc,o,r0,r1,r2) _cc_voqdd(_jit,cc,o,r0,r1,r2)
174 static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
175 # define voqqd(o,r0,r1,r2) _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
176 # define cc_voqqd(cc,o,r0,r1,r2) _cc_voqqd(_jit,cc,o,r0,r1,r2)
177 static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
178 # define voqqq(o,r0,r1,r2) _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
179 # define cc_voqqq(cc,o,r0,r1,r2) _cc_voqqq(_jit,cc,o,r0,r1,r2)
180 static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
181 # define cc_vldst(cc,o,r0,r1,i0) _cc_vldst(_jit,cc,o,r0,r1,i0)
182 static void _cc_vldst(jit_state_t*,int,int,int,int,int);
183 # define cc_vorsl(cc,o,r0,r1,i0) _cc_vorsl(_jit,cc,o,r0,r1,i0)
184 static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
185 # define CC_VADD_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
186 # define VADD_F32(r0,r1,r2) CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
187 # define CC_VADD_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
188 # define VADD_F64(r0,r1,r2) CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
189 # define CC_VSUB_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
190 # define VSUB_F32(r0,r1,r2) CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
191 # define CC_VSUB_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
192 # define VSUB_F64(r0,r1,r2) CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
193 # define CC_VMUL_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
194 # define VMUL_F32(r0,r1,r2) CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
195 # define CC_VMUL_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
196 # define VMUL_F64(r0,r1,r2) CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
197 # define CC_VDIV_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
198 # define VDIV_F32(r0,r1,r2) CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
199 # define CC_VDIV_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
200 # define VDIV_F64(r0,r1,r2) CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
201 # define CC_VABS_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VABS_F,r0,r1)
202 # define VABS_F32(r0,r1) CC_VABS_F32(ARM_CC_AL,r0,r1)
203 # define CC_VABS_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
204 # define VABS_F64(r0,r1) CC_VABS_F64(ARM_CC_AL,r0,r1)
205 # define CC_VNEG_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
206 # define VNEG_F32(r0,r1) CC_VNEG_F32(ARM_CC_AL,r0,r1)
207 # define CC_VNEG_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
208 # define VNEG_F64(r0,r1) CC_VNEG_F64(ARM_CC_AL,r0,r1)
209 # define CC_VSQRT_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
210 # define VSQRT_F32(r0,r1) CC_VSQRT_F32(ARM_CC_AL,r0,r1)
211 # define CC_VSQRT_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
212 # define VSQRT_F64(r0,r1) CC_VSQRT_F64(ARM_CC_AL,r0,r1)
213 # define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
214 # define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1)
215 # define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
216 # define VMOV_F64(r0,r1) CC_VMOV_F64(ARM_CC_AL,r0,r1)
217 # define CC_VMOV_AA_D(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
218 # define VMOV_AA_D(r0,r1,r2) CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
219 # define CC_VMOV_D_AA(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
220 # define VMOV_D_AA(r0,r1,r2) CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
221 # define CC_VMOV_A_S(cc,r0,r1) cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
222 # define VMOV_A_S(r0,r1) CC_VMOV_A_S(ARM_CC_AL,r0,r1)
223 # define CC_VMOV_S_A(cc,r0,r1) cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
224 # define VMOV_S_A(r0,r1) CC_VMOV_S_A(ARM_CC_AL,r0,r1)
225 # define CC_VCMP_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP,r0,r1)
226 # define VCMP_F32(r0,r1) CC_VCMP_F32(ARM_CC_AL,r0,r1)
227 # define CC_VCMP_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
228 # define VCMP_F64(r0,r1) CC_VCMP_F64(ARM_CC_AL,r0,r1)
229 # define CC_VCMPE_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
230 # define VCMPE_F32(r0,r1) CC_VCMPE_F32(ARM_CC_AL,r0,r1)
231 # define CC_VCMPE_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
232 # define VCMPE_F64(r0,r1) CC_VCMPE_F64(ARM_CC_AL,r0,r1)
233 # define CC_VCMPZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
234 # define VCMPZ_F32(r0) CC_VCMPZ_F32(ARM_CC_AL,r0)
235 # define CC_VCMPZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
236 # define VCMPZ_F64(r0) CC_VCMPZ_F64(ARM_CC_AL,r0)
237 # define CC_VCMPEZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
238 # define VCMPEZ_F32(r0) CC_VCMPEZ_F32(ARM_CC_AL,r0)
239 # define CC_VCMPEZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
240 # define VCMPEZ_F64(r0) CC_VCMPEZ_F64(ARM_CC_AL,r0)
241 # define CC_VMRS(cc,r0) cc_vorr_(cc,ARM_VMRS,r0,0)
242 # define VMRS(r0) CC_VMRS(ARM_CC_AL,r0)
243 # define CC_VMSR(cc,r0) cc_vorr_(cc,ARM_VMSR,r0,0)
244 # define VMSR(r0) CC_VMSR(ARM_CC_AL,r0)
245 # define CC_VCVT_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
246 # define VCVT_S32_F32(r0,r1) CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
247 # define CC_VCVT_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
248 # define VCVT_U32_F32(r0,r1) CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
249 # define CC_VCVT_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
250 # define VCVT_S32_F64(r0,r1) CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
251 # define CC_VCVT_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
252 # define VCVT_U32_F64(r0,r1) CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
253 # define CC_VCVT_F32_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
254 # define VCVT_F32_S32(r0,r1) CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
255 # define CC_VCVT_F32_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
256 # define VCVT_F32_U32(r0,r1) CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
257 # define CC_VCVT_F64_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
258 # define VCVT_F64_S32(r0,r1) CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
259 # define CC_VCVT_F64_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
260 # define VCVT_F64_U32(r0,r1) CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
261 # define CC_VCVT_F32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
262 # define VCVT_F32_F64(r0,r1) CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
263 # define CC_VCVT_F64_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
264 # define VCVT_F64_F32(r0,r1) CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
265 # define CC_VCVTR_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
266 # define VCVTR_S32_F32(r0,r1) CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
267 # define CC_VCVTR_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
268 # define VCVTR_U32_F32(r0,r1) CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
269 # define CC_VCVTR_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
270 # define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
271 # define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
272 # define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
273 # define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
274 # define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
275 # define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
276 # define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
277 # define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
278 # define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
279 # define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
280 # define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
281 # define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
282 # define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
283 # define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
284 # define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
285 # define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
286 # define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
287 # define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
288 # define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
289 # define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
290 # define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
291 # define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
292 # define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
293 # define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
294 # define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
295 # define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
296 # define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
297 # define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
298 # define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0)
299 # define CC_VPUSH_F64(cc,r0,i0) CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
300 # define VPUSH_F64(r0,i0) CC_VPUSH_F64(ARM_CC_AL,r0,i0)
301 # define CC_VPOP_F32(cc,r0,i0) CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
302 # define VPOP_F32(r0,i0) CC_VPOP_F32(ARM_CC_AL,r0,i0)
303 # define CC_VPOP_F64(cc,r0,i0) CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
304 # define VPOP_F64(r0,i0) CC_VPOP_F64(ARM_CC_AL,r0,i0)
305 # define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
306 # define VMOV_A_S8(r0,r1) CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
307 # define CC_VMOV_A_U8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
308 # define VMOV_A_U8(r0,r1) CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
309 # define CC_VMOV_A_S16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
310 # define VMOV_A_S16(r0,r1) CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
311 # define CC_VMOV_A_U16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
312 # define VMOV_A_U16(r0,r1) CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
313 # define CC_VMOV_A_S32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
314 # define VMOV_A_S32(r0,r1) CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
315 # define CC_VMOV_A_U32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
316 # define VMOV_A_U32(r0,r1) CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
317 # define CC_VMOV_V_I8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
318 # define VMOV_V_I8(r0,r1) CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
319 # define CC_VMOV_V_I16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
320 # define VMOV_V_I16(r0,r1) CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
321 # define CC_VMOV_V_I32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
322 # define VMOV_V_I32(r0,r1) CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
323 # define VADD_I8(r0,r1,r2) voddd(ARM_VADD_I,r0,r1,r2)
324 # define VADDQ_I8(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
325 # define VADD_I16(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
326 # define VADDQ_I16(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
327 # define VADD_I32(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
328 # define VADDQ_I32(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
329 # define VADD_I64(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
330 # define VADDQ_I64(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
331 # define VQADD_S8(r0,r1,r2) voddd(ARM_VQADD_I,r0,r1,r2)
332 # define VQADDQ_S8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
333 # define VQADD_U8(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
334 # define VQADDQ_U8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
335 # define VQADD_S16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
336 # define VQADDQ_S16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
337 # define VQADD_U16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
338 # define VQADDQ_U16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
339 # define VQADD_S32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
340 # define VQADDQ_S32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
341 # define VQADD_U32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
342 # define VQADDQ_U32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
343 # define VQADD_S64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
344 # define VQADDQ_S64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
345 # define VQADD_U64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
346 # define VQADDQ_U64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
347 # define VADDL_S8(r0,r1,r2) voqdd(ARM_VADDL_I,r0,r1,r2)
348 # define VADDL_U8(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
349 # define VADDL_S16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
350 # define VADDL_U16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
351 # define VADDL_S32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
352 # define VADDL_U32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
353 # define VADDW_S8(r0,r1,r2) voqqd(ARM_VADDW_I,r0,r1,r2)
354 # define VADDW_U8(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
355 # define VADDW_S16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
356 # define VADDW_U16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
357 # define VADDW_S32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
358 # define VADDW_U32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
359 # define VSUB_I8(r0,r1,r2) voddd(ARM_VSUB_I,r0,r1,r2)
360 # define VSUBQ_I8(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
361 # define VSUB_I16(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
362 # define VSUBQ_I16(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
363 # define VSUB_I32(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
364 # define VSUBQ_I32(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
365 # define VSUB_I64(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
366 # define VSUBQ_I64(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
367 # define VQSUB_S8(r0,r1,r2) voddd(ARM_VQSUB_I,r0,r1,r2)
368 # define VQSUBQ_S8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
369 # define VQSUB_U8(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
370 # define VQSUBQ_U8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
371 # define VQSUB_S16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
372 # define VQSUBQ_S16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
373 # define VQSUB_U16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374 # define VQSUBQ_U16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
375 # define VQSUB_S32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
376 # define VQSUBQ_S32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
377 # define VQSUB_U32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
378 # define VQSUBQ_U32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
379 # define VQSUB_S64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
380 # define VQSUBQ_S64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
381 # define VQSUB_U64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
382 # define VQSUBQ_U64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
383 # define VSUBL_S8(r0,r1,r2) voqdd(ARM_VSUBL_I,r0,r1,r2)
384 # define VSUBL_U8(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
385 # define VSUBL_S16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
386 # define VSUBL_U16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
387 # define VSUBL_S32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
388 # define VSUBL_U32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
389 # define VSUBW_S8(r0,r1,r2) voqqd(ARM_VSUBW_I,r0,r1,r2)
390 # define VSUBW_U8(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
391 # define VSUBW_S16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
392 # define VSUBW_U16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
393 # define VSUBW_S32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
394 # define VSUBW_U32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
395 # define VMUL_I8(r0,r1,r2) voddd(ARM_VMUL_I,r0,r1,r2)
396 # define VMULQ_I8(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
397 # define VMUL_I16(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
398 # define VMULQ_I16(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
399 # define VMUL_I32(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
400 # define VMULQ_I32(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
401 # define VMULL_S8(r0,r1,r2) voddd(ARM_VMULL_I,r0,r1,r2)
402 # define VMULL_U8(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
403 # define VMULL_S16(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
404 # define VMULL_U16(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
405 # define VMULL_S32(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
406 # define VMULL_U32(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
407 # define VABS_S8(r0,r1) vo_dd(ARM_VABS_I,r0,r1)
408 # define VABSQ_S8(r0,r1) vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
409 # define VABS_S16(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
410 # define VABSQ_S16(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
411 # define VABS_S32(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
412 # define VABSQ_S32(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
413 # define VQABS_S8(r0,r1) vo_dd(ARM_VQABS_I,r0,r1)
414 # define VQABSQ_S8(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
415 # define VQABS_S16(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
416 # define VQABSQ_S16(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
417 # define VQABS_S32(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
418 # define VQABSQ_S32(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
419 # define VNEG_S8(r0,r1) vo_dd(ARM_VNEG_I,r0,r1)
420 # define VNEGQ_S8(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
421 # define VNEG_S16(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
422 # define VNEGQ_S16(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
423 # define VNEG_S32(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
424 # define VNEGQ_S32(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
425 # define VQNEG_S8(r0,r1) vo_dd(ARM_VQNEG_I,r0,r1)
426 # define VQNEGQ_S8(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
427 # define VQNEG_S16(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
428 # define VQNEGQ_S16(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
429 # define VQNEG_S32(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
430 # define VQNEGQ_S32(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
431 # define VAND(r0,r1,r2) voddd(ARM_VAND,r0,r1,r2)
432 # define VANDQ(r0,r1,r2) voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
433 # define VBIC(r0,r1,r2) voddd(ARM_VBIC,r0,r1,r2)
434 # define VBICQ(r0,r1,r2) voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
435 # define VORR(r0,r1,r2) voddd(ARM_VORR,r0,r1,r2)
436 # define VORRQ(r0,r1,r2) voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
437 # define VORN(r0,r1,r2) voddd(ARM_VORN,r0,r1,r2)
438 # define VORNQ(r0,r1,r2) voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
439 # define VEOR(r0,r1,r2) voddd(ARM_VEOR,r0,r1,r2)
440 # define VEORQ(r0,r1,r2) voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
441 # define VMOV(r0,r1) VORR(r0,r1,r1)
442 # define VMOVQ(r0,r1) VORRQ(r0,r1,r1)
443 # define VMOVL_S8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
444 # define VMOVL_U8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
445 # define VMOVL_S16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
446 # define VMOVL_U16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
447 # define VMOVL_S32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
448 # define VMOVL_U32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
449 /* "oi" should be the result of encode_vfp_double */
450 # define VIMM(oi,r0) vodi(oi,r0)
451 # define VIMMQ(oi,r0) voqi(oi|ARM_V_Q,r0)
452 /* index is multipled by four */
453 # define CC_VLDRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR,r0,r1,i0)
454 # define VLDRN_F32(r0,r1,i0) CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
455 # define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
456 # define VLDR_F32(r0,r1,i0) CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
457 # define CC_VLDRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
458 # define VLDRN_F64(r0,r1,i0) CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
459 # define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
460 # define VLDR_F64(r0,r1,i0) CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
461 # define CC_VSTRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR,r0,r1,i0)
462 # define VSTRN_F32(r0,r1,i0) CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
463 # define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
464 # define VSTR_F32(r0,r1,i0) CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
465 # define CC_VSTRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
466 # define VSTRN_F64(r0,r1,i0) CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
467 # define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
468 # define VSTR_F64(r0,r1,i0) CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
469 # define vfp_movr_f(r0,r1) _vfp_movr_f(_jit,r0,r1)
470 static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
471 # define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1)
472 static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
473 # define vfp_movi_f(r0,i0) _vfp_movi_f(_jit,r0,i0)
474 static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
475 # define vfp_movi_d(r0,i0) _vfp_movi_d(_jit,r0,i0)
476 static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
477 # define vfp_extr_f(r0,r1) _vfp_extr_f(_jit,r0,r1)
478 static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
479 # define vfp_extr_d(r0,r1) _vfp_extr_d(_jit,r0,r1)
480 static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
481 # define vfp_extr_d_f(r0,r1) _vfp_extr_d_f(_jit,r0,r1)
482 static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
483 # define vfp_extr_f_d(r0,r1) _vfp_extr_f_d(_jit,r0,r1)
484 static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
485 # define vfp_truncr_f_i(r0,r1) _vfp_truncr_f_i(_jit,r0,r1)
486 static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
487 # define vfp_truncr_d_i(r0,r1) _vfp_truncr_d_i(_jit,r0,r1)
488 static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
489 # define vfp_absr_f(r0,r1) VABS_F32(r0,r1)
490 # define vfp_absr_d(r0,r1) VABS_F64(r0,r1)
491 # define vfp_negr_f(r0,r1) VNEG_F32(r0,r1)
492 # define vfp_negr_d(r0,r1) VNEG_F64(r0,r1)
493 # define vfp_sqrtr_f(r0,r1) VSQRT_F32(r0,r1)
494 # define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1)
495 # define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2)
496 # define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0)
497 static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
498 # define vfp_addr_d(r0,r1,r2) VADD_F64(r0,r1,r2)
499 # define vfp_addi_d(r0,r1,i0) _vfp_addi_d(_jit,r0,r1,i0)
500 static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
501 # define vfp_subr_f(r0,r1,r2) VSUB_F32(r0,r1,r2)
502 # define vfp_subi_f(r0,r1,i0) _vfp_subi_f(_jit,r0,r1,i0)
503 static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
504 # define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2)
505 # define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0)
506 static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
507 # define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1)
508 # define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0)
509 static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
510 # define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1)
511 # define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0)
512 static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
513 # define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2)
514 # define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0)
515 static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
516 # define vfp_mulr_d(r0,r1,r2) VMUL_F64(r0,r1,r2)
517 # define vfp_muli_d(r0,r1,i0) _vfp_muli_d(_jit,r0,r1,i0)
518 static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
519 # define vfp_divr_f(r0,r1,r2) VDIV_F32(r0,r1,r2)
520 # define vfp_divi_f(r0,r1,i0) _vfp_divi_f(_jit,r0,r1,i0)
521 static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
522 # define vfp_divr_d(r0,r1,r2) VDIV_F64(r0,r1,r2)
523 # define vfp_divi_d(r0,r1,i0) _vfp_divi_d(_jit,r0,r1,i0)
524 static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
525 # define vfp_cmp_f(r0,r1) _vfp_cmp_f(_jit,r0,r1)
526 static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
527 # define vfp_cmp_d(r0,r1) _vfp_cmp_d(_jit,r0,r1)
528 static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
529 # define vcmp01_x(c0,c1,r0) _vcmp01_x(_jit,c0,c1,r0)
530 static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
531 # define vcmp01_f(c0,c1,r0,r1,r2) _vcmp01_f(_jit,c0,c1,r0,r1,r2)
532 static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
533 # define vcmp01_d(c0,c1,r0,r1,r2) _vcmp01_d(_jit,c0,c1,r0,r1,r2)
534 static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
535 # define vfp_ltr_f(r0,r1,r2) vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
536 # define vfp_lti_f(r0,r1,i0) _vfp_lti_f(_jit,r0,r1,i0)
537 static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
538 # define vfp_ltr_d(r0,r1,r2) vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
539 # define vfp_lti_d(r0,r1,i0) _vfp_lti_d(_jit,r0,r1,i0)
540 static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
541 # define vfp_ler_f(r0,r1,r2) vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
542 # define vfp_lei_f(r0,r1,i0) _vfp_lei_f(_jit,r0,r1,i0)
543 static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
544 # define vfp_ler_d(r0,r1,r2) vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
545 # define vfp_lei_d(r0,r1,i0) _vfp_lei_d(_jit,r0,r1,i0)
546 static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
547 # define vfp_eqr_f(r0,r1,r2) vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
548 # define vfp_eqi_f(r0,r1,i0) _vfp_eqi_f(_jit,r0,r1,i0)
549 static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
550 # define vfp_eqr_d(r0,r1,r2) vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
551 # define vfp_eqi_d(r0,r1,i0) _vfp_eqi_d(_jit,r0,r1,i0)
552 static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
553 # define vfp_ger_f(r0,r1,r2) vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
554 # define vfp_gei_f(r0,r1,i0) _vfp_gei_f(_jit,r0,r1,i0)
555 static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556 # define vfp_ger_d(r0,r1,r2) vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
557 # define vfp_gei_d(r0,r1,i0) _vfp_gei_d(_jit,r0,r1,i0)
558 static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559 # define vfp_gtr_f(r0,r1,r2) vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
560 # define vfp_gti_f(r0,r1,i0) _vfp_gti_f(_jit,r0,r1,i0)
561 static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562 # define vfp_gtr_d(r0,r1,r2) vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
563 # define vfp_gti_d(r0,r1,i0) _vfp_gti_d(_jit,r0,r1,i0)
564 static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565 # define vfp_ner_f(r0,r1,r2) vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
566 # define vfp_nei_f(r0,r1,i0) _vfp_nei_f(_jit,r0,r1,i0)
567 static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568 # define vfp_ner_d(r0,r1,r2) vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
569 # define vfp_nei_d(r0,r1,i0) _vfp_nei_d(_jit,r0,r1,i0)
570 static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571 # define vcmp10_x(c0,r0) _vcmp10_x(_jit,c0,r0)
572 static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
573 # define vcmp_10_f(c0,r0,r1,r2) _vcmp_10_f(_jit,c0,r0,r1,r2)
574 static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
575 # define vcmp_10_d(c0,r0,r1,r2) _vcmp_10_d(_jit,c0,r0,r1,r2)
576 static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
577 # define vfp_unltr_f(r0,r1,r2) vcmp_10_f(ARM_CC_GE,r0,r1,r2)
578 # define vfp_unlti_f(r0,r1,i0) _vfp_unlti_f(_jit,r0,r1,i0)
579 static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580 # define vfp_unltr_d(r0,r1,r2) vcmp_10_d(ARM_CC_GE,r0,r1,r2)
581 # define vfp_unlti_d(r0,r1,i0) _vfp_unlti_d(_jit,r0,r1,i0)
582 static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583 # define vfp_unler_f(r0,r1,r2) vcmp_10_f(ARM_CC_GT,r0,r1,r2)
584 # define vfp_unlei_f(r0,r1,i0) _vfp_unlei_f(_jit,r0,r1,i0)
585 static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
586 # define vfp_unler_d(r0,r1,r2) vcmp_10_d(ARM_CC_GT,r0,r1,r2)
587 # define vfp_unlei_d(r0,r1,i0) _vfp_unlei_d(_jit,r0,r1,i0)
588 static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
589 # define vfp_uneqr_x(r0) _vfp_uneqr_x(_jit,r0)
590 static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
591 # define vfp_uneqr_f(r0,r1,r2) _vfp_uneqr_f(_jit,r0,r1,r2)
592 static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
593 # define vfp_uneqi_f(r0,r1,i0) _vfp_uneqi_f(_jit,r0,r1,i0)
594 static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
595 # define vfp_uneqr_d(r0,r1,r2) _vfp_uneqr_d(_jit,r0,r1,r2)
596 static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
597 # define vfp_uneqi_d(r0,r1,i0) _vfp_uneqi_d(_jit,r0,r1,i0)
598 static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599 # define vcmp_01_x(c0,r0) _vcmp_01_x(_jit,c0,r0)
600 static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
601 # define vcmp_01_f(c0,r0,r1,r2) _vcmp_01_f(_jit,c0,r0,r1,r2)
602 static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
603 # define vcmp_01_d(c0,r0,r1,r2) _vcmp_01_d(_jit,c0,r0,r1,r2)
604 static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
605 # define vfp_unger_f(r0,r1,r2) vcmp_01_f(ARM_CC_CS,r0,r1,r2)
606 # define vfp_ungei_f(r0,r1,i0) _vfp_ungei_f(_jit,r0,r1,i0)
607 static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608 # define vfp_unger_d(r0,r1,r2) vcmp_01_d(ARM_CC_CS,r0,r1,r2)
609 # define vfp_ungei_d(r0,r1,i0) _vfp_ungei_d(_jit,r0,r1,i0)
610 static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611 # define vfp_ungtr_f(r0,r1,r2) vcmp_01_f(ARM_CC_HI,r0,r1,r2)
612 # define vfp_ungti_f(r0,r1,i0) _vfp_ungti_f(_jit,r0,r1,i0)
613 static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614 # define vfp_ungtr_d(r0,r1,r2) vcmp_01_d(ARM_CC_HI,r0,r1,r2)
615 # define vfp_ungti_d(r0,r1,i0) _vfp_ungti_d(_jit,r0,r1,i0)
616 static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617 # define vfp_ltgtr_x(r0) _vfp_ltgtr_x(_jit,r0)
618 static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
619 # define vfp_ltgtr_f(r0,r1,r2) _vfp_ltgtr_f(_jit,r0,r1,r2)
620 static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
621 # define vfp_ltgti_f(r0,r1,i0) _vfp_ltgti_f(_jit,r0,r1,i0)
622 static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
623 # define vfp_ltgtr_d(r0,r1,r2) _vfp_ltgtr_d(_jit,r0,r1,r2)
624 static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
625 # define vfp_ltgti_d(r0,r1,i0) _vfp_ltgti_d(_jit,r0,r1,i0)
626 static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
627 # define vfp_ordr_f(r0,r1,r2) _vfp_ordr_f(_jit,r0,r1,r2)
628 static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
629 # define vfp_ordi_f(r0,r1,i0) _vfp_ordi_f(_jit,r0,r1,i0)
630 static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
631 # define vfp_ordr_d(r0,r1,r2) _vfp_ordr_d(_jit,r0,r1,r2)
632 static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
633 # define vfp_ordi_d(r0,r1,i0) _vfp_ordi_d(_jit,r0,r1,i0)
634 static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
635 # define vfp_unordr_f(r0,r1,r2) _vfp_unordr_f(_jit,r0,r1,r2)
636 static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
637 # define vfp_unordi_f(r0,r1,i0) _vfp_unordi_f(_jit,r0,r1,i0)
638 static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
639 # define vfp_unordr_d(r0,r1,r2) _vfp_unordr_d(_jit,r0,r1,r2)
640 static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
641 # define vfp_unordi_d(r0,r1,i0) _vfp_unordi_d(_jit,r0,r1,i0)
642 static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
643 # define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
644 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
645 # define vbcmp_f(cc,i0,r0,r1) _vbcmp_f(_jit,cc,i0,r0,r1)
646 static jit_word_t
647 _vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
648 # define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
649 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
650 # define vbcmp_d(cc,i0,r0,r1) _vbcmp_d(_jit,cc,i0,r0,r1)
651 static jit_word_t
652 _vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
653 # define vfp_bltr_f(i0,r0,r1) vbcmp_f(ARM_CC_MI,i0,r0,r1)
654 # define vfp_blti_f(i0,r0,i1) _vfp_blti_f(_jit,i0,r0,i1)
655 static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
656 # define vfp_bltr_d(i0,r0,r1) vbcmp_d(ARM_CC_MI,i0,r0,r1)
657 static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
658 # define vfp_blti_d(i0,r0,i1) _vfp_blti_d(_jit,i0,r0,i1)
659 # define vfp_bler_f(i0,r0,r1) vbcmp_f(ARM_CC_LS,i0,r0,r1)
660 # define vfp_blei_f(i0,r0,i1) _vfp_blei_f(_jit,i0,r0,i1)
661 static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
662 # define vfp_bler_d(i0,r0,r1) vbcmp_d(ARM_CC_LS,i0,r0,r1)
663 # define vfp_blei_d(i0,r0,i1) _vfp_blei_d(_jit,i0,r0,i1)
664 static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
665 # define vfp_beqr_f(i0,r0,r1) vbcmp_f(ARM_CC_EQ,i0,r0,r1)
666 # define vfp_beqi_f(i0,r0,i1) _vfp_beqi_f(_jit,i0,r0,i1)
667 static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
668 # define vfp_beqr_d(i0,r0,r1) vbcmp_d(ARM_CC_EQ,i0,r0,r1)
669 # define vfp_beqi_d(i0,r0,i1) _vfp_beqi_d(_jit,i0,r0,i1)
670 static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
671 # define vfp_bger_f(i0,r0,r1) vbcmp_f(ARM_CC_GE,i0,r0,r1)
672 # define vfp_bgei_f(i0,r0,i1) _vfp_bgei_f(_jit,i0,r0,i1)
673 static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
674 # define vfp_bger_d(i0,r0,r1) vbcmp_d(ARM_CC_GE,i0,r0,r1)
675 # define vfp_bgei_d(i0,r0,i1) _vfp_bgei_d(_jit,i0,r0,i1)
676 static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
677 # define vfp_bgtr_f(i0,r0,r1) vbcmp_f(ARM_CC_GT,i0,r0,r1)
678 # define vfp_bgti_f(i0,r0,i1) _vfp_bgti_f(_jit,i0,r0,i1)
679 static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
680 # define vfp_bgtr_d(i0,r0,r1) vbcmp_d(ARM_CC_GT,i0,r0,r1)
681 # define vfp_bgti_d(i0,r0,i1) _vfp_bgti_d(_jit,i0,r0,i1)
682 static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
683 # define vfp_bner_f(i0,r0,r1) vbcmp_f(ARM_CC_NE,i0,r0,r1)
684 # define vfp_bnei_f(i0,r0,i1) _vfp_bnei_f(_jit,i0,r0,i1)
685 static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
686 # define vfp_bner_d(i0,r0,r1) vbcmp_d(ARM_CC_NE,i0,r0,r1)
687 # define vfp_bnei_d(i0,r0,i1) _vfp_bnei_d(_jit,i0,r0,i1)
688 static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
689 # define vbncmp_x(cc,i0) _vbncmp_x(_jit,cc,i0)
690 static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
691 # define vbncmp_f(cc,i0,r0,r1) _vbncmp_f(_jit,cc,i0,r0,r1)
692 static jit_word_t
693 _vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
694 # define vbncmp_d(cc,i0,r0,r1) _vbncmp_d(_jit,cc,i0,r0,r1)
695 static jit_word_t
696 _vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
697 # define vfp_bunltr_f(i0,r0,r1) vbncmp_f(ARM_CC_GE,i0,r0,r1)
698 # define vfp_bunlti_f(i0,r0,i1) _vfp_bunlti_f(_jit,i0,r0,i1)
699 static jit_word_t
700 _vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
701 # define vfp_bunltr_d(i0,r0,r1) vbncmp_d(ARM_CC_GE,i0,r0,r1)
702 # define vfp_bunlti_d(i0,r0,i1) _vfp_bunlti_d(_jit,i0,r0,i1)
703 static jit_word_t
704 _vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
705 # define vfp_bunler_f(i0,r0,r1) vbncmp_f(ARM_CC_GT,i0,r0,r1)
706 # define vfp_bunlei_f(i0,r0,i1) _vfp_bunlei_f(_jit,i0,r0,i1)
707 static jit_word_t
708 _vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
709 # define vfp_bunler_d(i0,r0,r1) vbncmp_d(ARM_CC_GT,i0,r0,r1)
710 # define vfp_bunlei_d(i0,r0,i1) _vfp_bunlei_d(_jit,i0,r0,i1)
711 static jit_word_t
712 _vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
713 # define vfp_buneqr_x(i0) _vfp_buneqr_x(_jit,i0)
714 static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
715 # define vfp_buneqr_f(i0,r0,r1) _vfp_buneqr_f(_jit,i0,r0,r1)
716 static jit_word_t
717 _vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
718 # define vfp_buneqi_f(i0,r0,i1) _vfp_buneqi_f(_jit,i0,r0,i1)
719 static jit_word_t
720 _vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
721 # define vfp_buneqr_d(i0,r0,r1) _vfp_buneqr_d(_jit,i0,r0,r1)
722 static jit_word_t
723 _vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
724 # define vfp_buneqi_d(i0,r0,i1) _vfp_buneqi_d(_jit,i0,r0,i1)
725 static jit_word_t
726 _vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
727 # define vfp_bunger_x(i0) _vfp_bunger_x(_jit,i0)
728 static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
729 # define vfp_bunger_f(i0,r0,r1) _vfp_bunger_f(_jit,i0,r0,r1)
730 static jit_word_t
731 _vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
732 # define vfp_bungei_f(i0,r0,i1) _vfp_bungei_f(_jit,i0,r0,i1)
733 static jit_word_t
734 _vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
735 # define vfp_bunger_d(i0,r0,r1) _vfp_bunger_d(_jit,i0,r0,r1)
736 static jit_word_t
737 _vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
738 # define vfp_bungei_d(i0,r0,i1) _vfp_bungei_d(_jit,i0,r0,i1)
739 static jit_word_t
740 _vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741 # define vfp_bungtr_f(i0,r0,r1) vbcmp_f(ARM_CC_HI,i0,r0,r1)
742 # define vfp_bungti_f(i0,r0,i1) _vfp_bungti_f(_jit,i0,r0,i1)
743 static jit_word_t
744 _vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
745 # define vfp_bungtr_d(i0,r0,r1) vbcmp_d(ARM_CC_HI,i0,r0,r1)
746 # define vfp_bungti_d(i0,r0,i1) _vfp_bungti_d(_jit,i0,r0,i1)
747 static jit_word_t
748 _vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
749 # define vfp_bltgtr_x(i0) _vfp_bltgtr_x(_jit,i0)
750 static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
751 # define vfp_bltgtr_f(i0,r0,r1) _vfp_bltgtr_f(_jit,i0,r0,r1)
752 static jit_word_t
753 _vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
754 # define vfp_bltgti_f(i0,r0,i1) _vfp_bltgti_f(_jit,i0,r0,i1)
755 static jit_word_t
756 _vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
757 # define vfp_bltgtr_d(i0,r0,r1) _vfp_bltgtr_d(_jit,i0,r0,r1)
758 static jit_word_t
759 _vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
760 # define vfp_bltgti_d(i0,r0,i1) _vfp_bltgti_d(_jit,i0,r0,i1)
761 static jit_word_t
762 _vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763 # define vfp_bordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VC,i0,r0,r1)
764 # define vfp_bordi_f(i0,r0,i1) _vfp_bordi_f(_jit,i0,r0,i1)
765 static jit_word_t
766 _vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767 # define vfp_bordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VC,i0,r0,r1)
768 # define vfp_bordi_d(i0,r0,i1) _vfp_bordi_d(_jit,i0,r0,i1)
769 static jit_word_t
770 _vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771 # define vfp_bunordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VS,i0,r0,r1)
772 # define vfp_bunordi_f(i0,r0,i1) _vfp_bunordi_f(_jit,i0,r0,i1)
773 static jit_word_t
774 _vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
775 # define vfp_bunordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VS,i0,r0,r1)
776 # define vfp_bunordi_d(i0,r0,i1) _vfp_bunordi_d(_jit,i0,r0,i1)
777 static jit_word_t
778 _vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
779 # define vfp_ldr_f(r0,r1) VLDR_F32(r0,r1,0)
780 # define vfp_ldr_d(r0,r1) VLDR_F64(r0,r1,0)
781 # define vfp_ldi_f(r0,i0) _vfp_ldi_f(_jit,r0,i0)
782 static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
783 # define vfp_ldi_d(r0,i0) _vfp_ldi_d(_jit,r0,i0)
784 static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
785 # define vfp_ldxr_f(r0,r1,r2) _vfp_ldxr_f(_jit,r0,r1,r2)
786 static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
787 # define vfp_ldxr_d(r0,r1,r2) _vfp_ldxr_d(_jit,r0,r1,r2)
788 static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
789 # define vfp_ldxi_f(r0,r1,i0) _vfp_ldxi_f(_jit,r0,r1,i0)
790 static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
791 # define vfp_ldxi_d(r0,r1,i0) _vfp_ldxi_d(_jit,r0,r1,i0)
792 static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
793 # define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0)
794 # define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0)
795 # define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0)
796 static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
797 # define vfp_sti_d(i0,r0) _vfp_sti_d(_jit,i0,r0)
798 static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
799 # define vfp_stxr_f(r0,r1,r2) _vfp_stxr_f(_jit,r0,r1,r2)
800 static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
801 # define vfp_stxr_d(r0,r1,r2) _vfp_stxr_d(_jit,r0,r1,r2)
802 static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
803 # define vfp_stxi_f(i0,r0,r1) _vfp_stxi_f(_jit,i0,r0,r1)
804 static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
805 # define vfp_stxi_d(i0,r0,r1) _vfp_stxi_d(_jit,i0,r0,r1)
806 static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
807 # define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1)
808 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
809 #endif
810
811 #if CODE
812 # define vfp_regno(rn) (((rn) - 16) >> 1)
813
814 static int
encode_vfp_double(int mov,int inv,unsigned lo,unsigned hi)815 encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
816 {
817 int code, mode, imm, mask;
818
819 if (hi != lo) {
820 if (mov && !inv) {
821 /* (I64)
822 * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
823 */
824 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
825 imm = lo & mask;
826 if (imm != mask && imm != 0)
827 goto fail;
828 imm = hi & mask;
829 if (imm != mask && imm != 0)
830 goto fail;
831 }
832 mode = 0xe20;
833 imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
834 ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) |
835 ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
836 ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7));
837 goto success;
838 }
839 goto fail;
840 }
841 /* (I32)
842 * 00000000 00000000 00000000 abcdefgh
843 * 00000000 00000000 abcdefgh 00000000
844 * 00000000 abcdefgh 00000000 00000000
845 * abcdefgh 00000000 00000000 00000000 */
846 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
847 if ((lo & mask) == lo) {
848 imm = lo >> (mode << 3);
849 mode <<= 9;
850 goto success;
851 }
852 }
853 /* (I16)
854 * 00000000 abcdefgh 00000000 abcdefgh
855 * abcdefgh 00000000 abcdefgh 00000000 */
856 for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
857 if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
858 imm = lo >> (mode << 3);
859 mode = 0x800 | (mode << 9);
860 goto success;
861 }
862 }
863 if (mov) {
864 /* (I32)
865 * 00000000 00000000 abcdefgh 11111111
866 * 00000000 abcdefgh 11111111 11111111 */
867 for (mode = 0, mask = 0xff; mode < 2;
868 mask = (mask << 8) | 0xff, mode++) {
869 if ((lo & mask) == mask &&
870 !((lo & ~mask) >> 8) &&
871 (imm = lo >> (8 + (mode << 8)))) {
872 mode = 0xc00 | (mode << 8);
873 goto success;
874 }
875 }
876 if (!inv) {
877 /* (F32)
878 * aBbbbbbc defgh000 00000000 00000000
879 * from the ARM Architecture Reference Manual:
880 * In this entry, B = NOT(b). The bit pattern represents the
881 * floating-point number (-1)^s* 2^exp * mantissa, where
882 * S = UInt(a),
883 * exp = UInt(NOT(b):c:d)-3 and
884 * mantissa = (16+UInt(e:f:g:h))/16. */
885 if ((lo & 0x7ffff) == 0 &&
886 (((lo & 0x7e000000) == 0x3e000000) ||
887 ((lo & 0x7e000000) == 0x40000000))) {
888 mode = 0xf00;
889 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
890 goto success;
891 }
892 }
893 }
894
895 fail:
896 /* need another approach (load from memory, move from arm register, etc) */
897 return (-1);
898
899 success:
900 code = inv ? ARM_VMVNI : ARM_VMOVI;
901 switch ((mode & 0xf00) >> 8) {
902 case 0x0: case 0x2: case 0x4: case 0x6:
903 case 0x8: case 0xa:
904 if (inv) mode |= 0x20;
905 if (!mov) mode |= 0x100;
906 break;
907 case 0x1: case 0x3: case 0x5: case 0x7:
908 /* should actually not reach here */
909 assert(!inv);
910 case 0x9: case 0xb:
911 assert(!mov);
912 break;
913 case 0xc: case 0xd:
914 /* should actually not reach here */
915 assert(inv);
916 case 0xe:
917 assert(mode & 0x20);
918 assert(mov && !inv);
919 break;
920 default:
921 assert(!(mode & 0x20));
922 break;
923 }
924 imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
925 code |= mode | imm;
926 if (jit_thumb_p()) {
927 if (code & 0x1000000)
928 code |= 0xff000000;
929 else
930 code |= 0xef000000;
931 }
932 else
933 code |= ARM_CC_NV;
934 return (code);
935 }
936
937 static void
_vodi(jit_state_t * _jit,int oi,int r0)938 _vodi(jit_state_t *_jit, int oi, int r0)
939 {
940 jit_thumb_t thumb;
941 assert(!(oi & 0x0000f000));
942 assert(!(r0 & 1)); r0 = vfp_regno(r0);
943 thumb.i = oi|(_u4(r0)<<12);
944 if (jit_thumb_p())
945 iss(thumb.s[0], thumb.s[1]);
946 else
947 ii(thumb.i);
948 }
949
950 static void
_voqi(jit_state_t * _jit,int oi,int r0)951 _voqi(jit_state_t *_jit, int oi, int r0)
952 {
953 jit_thumb_t thumb;
954 assert(!(oi & 0x0000f000));
955 assert(!(r0 & 3)); r0 = vfp_regno(r0);
956 thumb.i = oi|(_u4(r0)<<12);
957 if (jit_thumb_p())
958 iss(thumb.s[0], thumb.s[1]);
959 else
960 ii(thumb.i);
961 }
962
963 static void
_cc_vo_ss(jit_state_t * _jit,int cc,int o,int r0,int r1)964 _cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
965 {
966 jit_thumb_t thumb;
967 assert(!(cc & 0x0fffffff));
968 assert(!(o & 0xf000f00f));
969 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
970 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
971 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
972 if (jit_thumb_p())
973 iss(thumb.s[0], thumb.s[1]);
974 else
975 ii(thumb.i);
976 }
977
978 static void
_cc_vo_dd(jit_state_t * _jit,int cc,int o,int r0,int r1)979 _cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
980 {
981 jit_thumb_t thumb;
982 assert(!(cc & 0x0fffffff));
983 assert(!(o & 0xf000f00f));
984 assert(!(r0 & 1) && !(r1 & 1));
985 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
986 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
987 if (jit_thumb_p())
988 iss(thumb.s[0], thumb.s[1]);
989 else
990 ii(thumb.i);
991 }
992
993 static void
_cc_vo_qd(jit_state_t * _jit,int cc,int o,int r0,int r1)994 _cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
995 {
996 jit_thumb_t thumb;
997 assert(!(cc & 0x0fffffff));
998 assert(!(o & 0xf000f00f));
999 assert(!(r0 & 3) && !(r1 & 1));
1000 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1001 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1002 if (jit_thumb_p())
1003 iss(thumb.s[0], thumb.s[1]);
1004 else
1005 ii(thumb.i);
1006 }
1007
1008 static void
_cc_vo_qq(jit_state_t * _jit,int cc,int o,int r0,int r1)1009 _cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1010 {
1011 jit_thumb_t thumb;
1012 assert(!(cc & 0x0fffffff));
1013 assert(!(o & 0xf000f00f));
1014 assert(!(r0 & 3) && !(r1 & 3));
1015 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1016 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1017 if (jit_thumb_p())
1018 iss(thumb.s[0], thumb.s[1]);
1019 else
1020 ii(thumb.i);
1021 }
1022
1023 static void
_cc_vorr_(jit_state_t * _jit,int cc,int o,int r0,int r1)1024 _cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1025 {
1026 jit_thumb_t thumb;
1027 assert(!(cc & 0x0fffffff));
1028 assert(!(o & 0xf000f00f));
1029 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1030 if (jit_thumb_p())
1031 iss(thumb.s[0], thumb.s[1]);
1032 else
1033 ii(thumb.i);
1034 }
1035
1036 static void
_cc_vors_(jit_state_t * _jit,int cc,int o,int r0,int r1)1037 _cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1038 {
1039 jit_thumb_t thumb;
1040 assert(!(cc & 0x0fffffff));
1041 assert(!(o & 0xf000f00f));
1042 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1043 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1044 if (jit_thumb_p())
1045 iss(thumb.s[0], thumb.s[1]);
1046 else
1047 ii(thumb.i);
1048 }
1049
1050 static void
_cc_vorv_(jit_state_t * _jit,int cc,int o,int r0,int r1)1051 _cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1052 {
1053 jit_thumb_t thumb;
1054 assert(!(cc & 0x0fffffff));
1055 assert(!(o & 0xf000f00f));
1056 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1057 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1058 if (jit_thumb_p())
1059 iss(thumb.s[0], thumb.s[1]);
1060 else
1061 ii(thumb.i);
1062 }
1063
1064 static void
_cc_vori_(jit_state_t * _jit,int cc,int o,int r0,int r1)1065 _cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1066 {
1067 jit_thumb_t thumb;
1068 assert(!(cc & 0x0fffffff));
1069 assert(!(o & 0xf000f00f));
1070 /* use same bit pattern, to set opc1... */
1071 if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1072 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1073 if (jit_thumb_p())
1074 iss(thumb.s[0], thumb.s[1]);
1075 else
1076 ii(thumb.i);
1077 }
1078
1079 static void
_cc_vorrd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1080 _cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1081 {
1082 jit_thumb_t thumb;
1083 assert(!(cc & 0x0fffffff));
1084 assert(!(o & 0xf00ff00f));
1085 assert(!(r2 & 1));
1086 r2 = vfp_regno(r2);
1087 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1088 if (jit_thumb_p())
1089 iss(thumb.s[0], thumb.s[1]);
1090 else
1091 ii(thumb.i);
1092 }
1093
1094 static void
_cc_vosss(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1095 _cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1096 {
1097 jit_thumb_t thumb;
1098 assert(!(cc & 0x0fffffff));
1099 assert(!(o & 0xf00ff00f));
1100 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1101 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1102 if (r2 & 1) o |= ARM_V_M; r2 = vfp_regno(r2);
1103 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1104 if (jit_thumb_p())
1105 iss(thumb.s[0], thumb.s[1]);
1106 else
1107 ii(thumb.i);
1108 }
1109
1110 static void
_cc_voddd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1111 _cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1112 {
1113 jit_thumb_t thumb;
1114 assert(!(cc & 0x0fffffff));
1115 assert(!(o & 0xf00ff00f));
1116 assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1117 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1118 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1119 if (jit_thumb_p())
1120 iss(thumb.s[0], thumb.s[1]);
1121 else
1122 ii(thumb.i);
1123 }
1124
1125 static void
_cc_voqdd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1126 _cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1127 {
1128 jit_thumb_t thumb;
1129 assert(!(cc & 0x0fffffff));
1130 assert(!(o & 0xf00ff00f));
1131 assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1132 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1133 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1134 if (jit_thumb_p())
1135 iss(thumb.s[0], thumb.s[1]);
1136 else
1137 ii(thumb.i);
1138 }
1139
1140 static void
_cc_voqqd(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1141 _cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1142 {
1143 jit_thumb_t thumb;
1144 assert(!(cc & 0x0fffffff));
1145 assert(!(o & 0xf00ff00f));
1146 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1147 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1148 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1149 if (jit_thumb_p())
1150 iss(thumb.s[0], thumb.s[1]);
1151 else
1152 ii(thumb.i);
1153 }
1154
1155 static void
_cc_voqqq(jit_state_t * _jit,int cc,int o,int r0,int r1,int r2)1156 _cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1157 {
1158 jit_thumb_t thumb;
1159 assert(!(cc & 0x0fffffff));
1160 assert(!(o & 0xf00ff00f));
1161 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1162 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1163 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1164 if (jit_thumb_p())
1165 iss(thumb.s[0], thumb.s[1]);
1166 else
1167 ii(thumb.i);
1168 }
1169
1170 static void
_cc_vldst(jit_state_t * _jit,int cc,int o,int r0,int r1,int i0)1171 _cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1172 {
1173 jit_thumb_t thumb;
1174 /* i0 << 2 is byte offset */
1175 assert(!(cc & 0x0fffffff));
1176 assert(!(o & 0xf00ff0ff));
1177 if (r0 & 1) {
1178 assert(!(o & ARM_V_F64));
1179 o |= ARM_V_D;
1180 }
1181 r0 = vfp_regno(r0);
1182 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1183 if (jit_thumb_p())
1184 iss(thumb.s[0], thumb.s[1]);
1185 else
1186 ii(thumb.i);
1187 }
1188
1189 static void
_cc_vorsl(jit_state_t * _jit,int cc,int o,int r0,int r1,int i0)1190 _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1191 {
1192 jit_thumb_t thumb;
1193 assert(!(cc & 0x0fffffff));
1194 assert(!(o & 0xf00ff0ff));
1195 /* save i0 double precision registers */
1196 if (o & ARM_V_F64) i0 <<= 1;
1197 /* if (r1 & 1) cc & ARM_V_F64 must be false */
1198 if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1);
1199 assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1200 thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1201 if (jit_thumb_p())
1202 iss(thumb.s[0], thumb.s[1]);
1203 else
1204 ii(thumb.i);
1205 }
1206
1207 static void
_vfp_movr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1208 _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1209 {
1210 if (r0 != r1) {
1211 if (jit_fpr_p(r1)) {
1212 if (jit_fpr_p(r0))
1213 VMOV_F32(r0, r1);
1214 else
1215 VMOV_A_S(r0, r1);
1216 }
1217 else if (jit_fpr_p(r0))
1218 VMOV_S_A(r0, r1);
1219 else
1220 movr(r0, r1);
1221 }
1222 }
1223
1224 static void
_vfp_movr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1225 _vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1226 {
1227 if (r0 != r1) {
1228 if (jit_fpr_p(r1)) {
1229 if (jit_fpr_p(r0))
1230 VMOV_F64(r0, r1);
1231 else
1232 VMOV_AA_D(r0, r0 + 1, r1);
1233 }
1234 else if (jit_fpr_p(r0))
1235 VMOV_D_AA(r0, r1, r1 + 1);
1236 else {
1237 /* minor consistency check */
1238 assert(r0 + 1 != r1 && r0 -1 != r1);
1239 movr(r0, r1);
1240 movr(r0 + 1, r1 + 1);
1241 }
1242 }
1243 }
1244
1245 static void
_vfp_movi_f(jit_state_t * _jit,jit_int32_t r0,jit_float32_t i0)1246 _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1247 {
1248 union {
1249 jit_int32_t i;
1250 jit_float32_t f;
1251 } u;
1252 jit_int32_t reg;
1253 jit_int32_t code;
1254 u.f = i0;
1255 if (jit_fpr_p(r0)) {
1256 /* float arguments are packed, for others,
1257 * lightning only address even registers */
1258 if (!(r0 & 1) && (r0 - 16) >= 0 &&
1259 ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1260 (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1261 VIMM(code, r0);
1262 else {
1263 reg = jit_get_reg(jit_class_gpr);
1264 movi(rn(reg), u.i);
1265 VMOV_S_A(r0, rn(reg));
1266 jit_unget_reg(reg);
1267 }
1268 }
1269 else
1270 movi(r0, u.i);
1271 }
1272
1273 static void
_vfp_movi_d(jit_state_t * _jit,jit_int32_t r0,jit_float64_t i0)1274 _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1275 {
1276 union {
1277 jit_int32_t i[2];
1278 jit_float64_t d;
1279 } u;
1280 jit_int32_t code;
1281 jit_int32_t rg0, rg1;
1282 u.d = i0;
1283 if (jit_fpr_p(r0)) {
1284 if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1285 (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1286 VIMM(code, r0);
1287 else {
1288 rg0 = jit_get_reg(jit_class_gpr);
1289 rg1 = jit_get_reg(jit_class_gpr);
1290 movi(rn(rg0), u.i[0]);
1291 movi(rn(rg1), u.i[1]);
1292 VMOV_D_AA(r0, rn(rg0), rn(rg1));
1293 jit_unget_reg(rg1);
1294 jit_unget_reg(rg0);
1295 }
1296 }
1297 else {
1298 movi(r0, u.i[0]);
1299 movi(r0 + 1, u.i[1]);
1300 }
1301 }
1302
1303 static void
_vfp_extr_d_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1304 _vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1305 {
1306 jit_int32_t reg;
1307 if (jit_fpr_p(r1)) {
1308 if (jit_fpr_p(r0))
1309 VCVT_F64_F32(r0, r1);
1310 else {
1311 reg = jit_get_reg(jit_class_fpr);
1312 VCVT_F64_F32(rn(reg), r1);
1313 VMOV_A_S(r0, rn(reg));
1314 jit_unget_reg(reg);
1315 }
1316 }
1317 else {
1318 reg = jit_get_reg(jit_class_fpr);
1319 VMOV_S_A(rn(reg), r1);
1320 VCVT_F64_F32(rn(reg), rn(reg));
1321 if (jit_fpr_p(r0))
1322 VMOV_F32(r0, rn(reg));
1323 else
1324 VMOV_A_S(r0, rn(reg));
1325 jit_unget_reg(reg);
1326 }
1327 }
1328
1329 static void
_vfp_extr_f_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1330 _vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1331 {
1332 jit_int32_t reg;
1333 if (jit_fpr_p(r1)) {
1334 if (jit_fpr_p(r0))
1335 VCVT_F32_F64(r0, r1);
1336 else {
1337 reg = jit_get_reg(jit_class_fpr);
1338 VCVT_F32_F64(rn(reg), r1);
1339 VMOV_AA_D(r0, r0 + 1, rn(reg));
1340 jit_unget_reg(reg);
1341 }
1342 }
1343 else {
1344 reg = jit_get_reg(jit_class_fpr);
1345 VMOV_D_AA(rn(reg), r1, r1 + 1);
1346 VCVT_F32_F64(rn(reg), rn(reg));
1347 if (jit_fpr_p(r0))
1348 VMOV_F64(r0, rn(reg));
1349 else
1350 VMOV_AA_D(r0, r0 + 1, rn(reg));
1351 jit_unget_reg(reg);
1352 }
1353 }
1354
1355 static void
_vfp_extr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1356 _vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1357 {
1358 jit_int32_t reg;
1359 if (jit_fpr_p(r0)) {
1360 VMOV_V_I32(r0, r1);
1361 VCVT_F32_S32(r0, r0);
1362 }
1363 else {
1364 reg = jit_get_reg(jit_class_fpr);
1365 VMOV_V_I32(rn(reg), r1);
1366 VCVT_F32_S32(rn(reg), rn(reg));
1367 VMOV_F32(r0, rn(reg));
1368 jit_unget_reg(reg);
1369 }
1370 }
1371
1372 static void
_vfp_extr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1373 _vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1374 {
1375 jit_int32_t reg;
1376 if (jit_fpr_p(r0)) {
1377 VMOV_V_I32(r0, r1);
1378 VCVT_F64_S32(r0, r0);
1379 }
1380 else {
1381 reg = jit_get_reg(jit_class_fpr);
1382 VMOV_V_I32(rn(reg), r1);
1383 VCVT_F64_S32(rn(reg), rn(reg));
1384 VMOV_F64(r0, rn(reg));
1385 jit_unget_reg(reg);
1386 }
1387 }
1388
1389 static void
_vfp_truncr_f_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1390 _vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1391 {
1392 jit_int32_t reg;
1393 reg = jit_get_reg(jit_class_fpr);
1394 if (jit_fpr_p(r1))
1395 VCVT_S32_F32(rn(reg), r1);
1396 else {
1397 VMOV_V_I32(rn(reg), r1);
1398 VCVT_S32_F32(rn(reg), rn(reg));
1399 }
1400 VMOV_A_S32(r0, rn(reg));
1401 jit_unget_reg(reg);
1402 }
1403
1404 static void
_vfp_truncr_d_i(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1405 _vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1406 {
1407 jit_int32_t reg;
1408 reg = jit_get_reg(jit_class_fpr);
1409 if (jit_fpr_p(r1))
1410 VCVT_S32_F64(rn(reg), r1);
1411 else {
1412 VMOV_V_I32(rn(reg), r1);
1413 VCVT_S32_F64(rn(reg), rn(reg));
1414 }
1415 VMOV_A_S32(r0, rn(reg));
1416 jit_unget_reg(reg);
1417 }
1418
1419 # define fopi(name) \
1420 static void \
1421 _vfp_##name##i_f(jit_state_t *_jit, \
1422 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1423 { \
1424 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1425 vfp_movi_f(rn(reg), i0); \
1426 vfp_##name##r_f(r0, r1, rn(reg)); \
1427 jit_unget_reg(reg); \
1428 }
1429 # define dopi(name) \
1430 static void \
1431 _vfp_##name##i_d(jit_state_t *_jit, \
1432 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1433 { \
1434 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1435 vfp_movi_d(rn(reg), i0); \
1436 vfp_##name##r_d(r0, r1, rn(reg)); \
1437 jit_unget_reg(reg); \
1438 }
1439 # define fbopi(name) \
1440 static jit_word_t \
1441 _vfp_b##name##i_f(jit_state_t *_jit, \
1442 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1443 { \
1444 jit_word_t word; \
1445 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1446 jit_class_nospill); \
1447 vfp_movi_f(rn(reg), i0); \
1448 word = vfp_b##name##r_f(r0, r1, rn(reg)); \
1449 jit_unget_reg(reg); \
1450 return (word); \
1451 }
1452 # define dbopi(name) \
1453 static jit_word_t \
1454 _vfp_b##name##i_d(jit_state_t *_jit, \
1455 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1456 { \
1457 jit_word_t word; \
1458 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1459 jit_class_nospill); \
1460 vfp_movi_d(rn(reg), i0); \
1461 word = vfp_b##name##r_d(r0, r1, rn(reg)); \
1462 jit_unget_reg(reg); \
1463 return (word); \
1464 }
1465
1466 fopi(add)
dopi(add)1467 dopi(add)
1468 fopi(sub)
1469 fopi(rsb)
1470 dopi(rsb)
1471 dopi(sub)
1472 fopi(mul)
1473 dopi(mul)
1474 fopi(div)
1475 dopi(div)
1476
1477 static void
1478 _vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1479 {
1480 jit_int32_t rg0, rg1;
1481 if (jit_fpr_p(r0)) {
1482 if (jit_fpr_p(r1))
1483 VCMP_F32(r0, r1);
1484 else {
1485 rg1 = jit_get_reg(jit_class_fpr);
1486 VMOV_S_A(rn(rg1), r1);
1487 VCMP_F32(r0, rn(rg1));
1488 jit_unget_reg(rg1);
1489 }
1490 }
1491 else {
1492 rg0 = jit_get_reg(jit_class_fpr);
1493 VMOV_S_A(rn(rg0), r0);
1494 if (jit_fpr_p(r1))
1495 VCMP_F32(rn(rg0), r1);
1496 else {
1497 rg1 = jit_get_reg(jit_class_fpr);
1498 VMOV_S_A(rn(rg1), r1);
1499 VCMP_F32(rn(rg0), rn(rg1));
1500 jit_unget_reg(rg1);
1501 }
1502 jit_unget_reg(rg0);
1503 }
1504 }
1505
1506 static void
_vfp_cmp_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)1507 _vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1508 {
1509 jit_int32_t rg0, rg1;
1510 if (jit_fpr_p(r0)) {
1511 if (jit_fpr_p(r1))
1512 VCMP_F64(r0, r1);
1513 else {
1514 rg1 = jit_get_reg(jit_class_fpr);
1515 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1516 VCMP_F64(r0, rn(rg1));
1517 jit_unget_reg(rg1);
1518 }
1519 }
1520 else {
1521 rg0 = jit_get_reg(jit_class_fpr);
1522 VMOV_D_AA(rn(rg0), r0, r0 + 1);
1523 if (jit_fpr_p(r1))
1524 VCMP_F64(rn(rg0), r1);
1525 else {
1526 rg1 = jit_get_reg(jit_class_fpr);
1527 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1528 VCMP_F64(rn(rg0), rn(rg1));
1529 jit_unget_reg(rg1);
1530 }
1531 jit_unget_reg(rg0);
1532 }
1533 }
1534
1535 static void
_vcmp01_x(jit_state_t * _jit,int c0,int c1,jit_int32_t r0)1536 _vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1537 {
1538 VMRS(_R15_REGNO);
1539 if (jit_thumb_p()) {
1540 if ((c0 ^ c1) >> 28 == 1) {
1541 ITE(c0);
1542 if (r0 < 8) {
1543 T1_MOVI(r0, 0);
1544 T1_MOVI(r0, 1);
1545 }
1546 else {
1547 T2_MOVI(r0, 0);
1548 T2_MOVI(r0, 1);
1549 }
1550 }
1551 else {
1552 if (r0 < 8) {
1553 IT(c0);
1554 T1_MOVI(r0, 0);
1555 IT(c1);
1556 T1_MOVI(r0, 1);
1557 }
1558 else {
1559 IT(c0);
1560 T2_MOVI(r0, 0);
1561 IT(c1);
1562 T2_MOVI(r0, 1);
1563 }
1564 }
1565 }
1566 else {
1567 CC_MOVI(c0, r0, 0);
1568 CC_MOVI(c1, r0, 1);
1569 }
1570 }
1571
1572 static void
_vcmp01_f(jit_state_t * _jit,int c0,int c1,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1573 _vcmp01_f(jit_state_t *_jit, int c0, int c1,
1574 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1575 {
1576 vfp_cmp_f(r1, r2);
1577 vcmp01_x(c0, c1, r0);
1578 }
1579
1580 static void
_vcmp01_d(jit_state_t * _jit,int c0,int c1,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1581 _vcmp01_d(jit_state_t *_jit, int c0, int c1,
1582 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1583 {
1584 vfp_cmp_d(r1, r2);
1585 vcmp01_x(c0, c1, r0);
1586 }
1587
1588 static void
_vcmp10_x(jit_state_t * _jit,int cc,jit_int32_t r0)1589 _vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1590 {
1591 if (jit_thumb_p()) {
1592 if (r0 < 8) {
1593 T1_MOVI(r0, 1);
1594 VMRS(_R15_REGNO);
1595 IT(cc);
1596 T1_MOVI(r0, 0);
1597 }
1598 else {
1599 T2_MOVI(r0, 1);
1600 VMRS(_R15_REGNO);
1601 IT(cc);
1602 T2_MOVI(r0, 0);
1603 }
1604 }
1605 else {
1606 VMRS(_R15_REGNO);
1607 MOVI(r0, 1);
1608 CC_MOVI(cc, r0, 0);
1609 }
1610 }
1611 static void
_vcmp_10_f(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1612 _vcmp_10_f(jit_state_t *_jit, int cc,
1613 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1614 {
1615 vfp_cmp_f(r1, r2);
1616 vcmp10_x(cc, r0);
1617 }
1618
1619 static void
_vcmp_10_d(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1620 _vcmp_10_d(jit_state_t *_jit, int cc,
1621 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1622 {
1623 vfp_cmp_d(r1, r2);
1624 vcmp10_x(cc, r0);
1625 }
1626
1627 fopi(lt)
dopi(lt)1628 dopi(lt)
1629 fopi(le)
1630 dopi(le)
1631 fopi(eq)
1632 dopi(eq)
1633 fopi(ge)
1634 dopi(ge)
1635 fopi(gt)
1636 dopi(gt)
1637 fopi(ne)
1638 dopi(ne)
1639 fopi(unlt)
1640 dopi(unlt)
1641 fopi(unle)
1642 dopi(unle)
1643
1644 static void
1645 _vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1646 {
1647 VMRS(_R15_REGNO);
1648 if (jit_thumb_p()) {
1649 ITE(ARM_CC_NE);
1650 if (r0 < 8) {
1651 T1_MOVI(r0, 0);
1652 T1_MOVI(r0, 1);
1653 IT(ARM_CC_VS);
1654 T1_MOVI(r0, 1);
1655 }
1656 else {
1657 T2_MOVI(r0, 0);
1658 T2_MOVI(r0, 1);
1659 IT(ARM_CC_VS);
1660 T2_MOVI(r0, 1);
1661 }
1662 }
1663 else {
1664 CC_MOVI(ARM_CC_NE, r0, 0);
1665 CC_MOVI(ARM_CC_EQ, r0, 1);
1666 CC_MOVI(ARM_CC_VS, r0, 1);
1667 }
1668 }
1669
1670 static void
_vfp_uneqr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1671 _vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1672 {
1673 vfp_cmp_f(r1, r2);
1674 vfp_uneqr_x(r0);
1675 }
1676
fopi(uneq)1677 fopi(uneq)
1678
1679 static void
1680 _vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1681 {
1682 vfp_cmp_d(r1, r2);
1683 vfp_uneqr_x(r0);
1684 }
1685
dopi(uneq)1686 dopi(uneq)
1687
1688 static void
1689 _vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1690 {
1691 if (jit_thumb_p()) {
1692 if (r0 < 8) {
1693 T1_MOVI(r0, 0);
1694 VMRS(_R15_REGNO);
1695 IT(cc);
1696 T1_MOVI(r0, 1);
1697 }
1698 else {
1699 T2_MOVI(r0, 0);
1700 VMRS(_R15_REGNO);
1701 IT(cc);
1702 T2_MOVI(r0, 1);
1703 }
1704 }
1705 else {
1706 MOVI(r0, 0);
1707 VMRS(_R15_REGNO);
1708 CC_MOVI(cc, r0, 1);
1709 }
1710 }
1711
1712 static void
_vcmp_01_f(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1713 _vcmp_01_f(jit_state_t *_jit, int cc,
1714 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1715 {
1716 vfp_cmp_f(r1, r2);
1717 vcmp_01_x(cc, r0);
1718 }
1719
1720 static void
_vcmp_01_d(jit_state_t * _jit,int cc,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1721 _vcmp_01_d(jit_state_t *_jit, int cc,
1722 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1723 {
1724 vfp_cmp_d(r1, r2);
1725 vcmp_01_x(cc, r0);
1726 }
1727
1728 fopi(unge)
dopi(unge)1729 dopi(unge)
1730 fopi(ungt)
1731 dopi(ungt)
1732
1733 static void
1734 _vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
1735 {
1736 VMRS(_R15_REGNO);
1737 if (jit_thumb_p()) {
1738 ITE(ARM_CC_NE);
1739 if (r0 < 8) {
1740 T1_MOVI(r0, 1);
1741 T1_MOVI(r0, 0);
1742 IT(ARM_CC_VS);
1743 T1_MOVI(r0, 0);
1744 }
1745 else {
1746 T2_MOVI(r0, 1);
1747 T2_MOVI(r0, 0);
1748 IT(ARM_CC_VS);
1749 T2_MOVI(r0, 0);
1750 }
1751 }
1752 else {
1753 CC_MOVI(ARM_CC_NE, r0, 1);
1754 CC_MOVI(ARM_CC_EQ, r0, 0);
1755 CC_MOVI(ARM_CC_VS, r0, 0);
1756 }
1757 }
1758
1759 static void
_vfp_ltgtr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)1760 _vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1761 {
1762 vfp_cmp_f(r1, r2);
1763 vfp_ltgtr_x(r0);
1764 }
1765
fopi(ltgt)1766 fopi(ltgt)
1767
1768 static void
1769 _vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1770 {
1771 vfp_cmp_d(r1, r2);
1772 vfp_ltgtr_x(r0);
1773 }
1774
dopi(ltgt)1775 dopi(ltgt)
1776
1777 static void
1778 _vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1779 {
1780 vfp_cmp_f(r1, r2);
1781 vcmp10_x(ARM_CC_VS, r0);
1782 }
1783
fopi(ord)1784 fopi(ord)
1785
1786 static void
1787 _vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1788 {
1789 vfp_cmp_d(r1, r2);
1790 vcmp10_x(ARM_CC_VS, r0);
1791 }
1792
dopi(ord)1793 dopi(ord)
1794
1795 static void
1796 _vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1797 {
1798 vfp_cmp_f(r1, r2);
1799 vcmp_01_x(ARM_CC_VS, r0);
1800 }
1801
fopi(unord)1802 fopi(unord)
1803
1804 static void
1805 _vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1806 {
1807 vfp_cmp_d(r1, r2);
1808 vcmp_01_x(ARM_CC_VS, r0);
1809 }
1810
dopi(unord)1811 dopi(unord)
1812
1813 static jit_word_t
1814 _vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1815 {
1816 jit_word_t d, w;
1817 VMRS(_R15_REGNO);
1818 w = _jit->pc.w;
1819 if (jit_thumb_p()) {
1820 d = ((i0 - w) >> 1) - 2;
1821 assert(_s20P(d));
1822 T2_CC_B(cc, encode_thumb_cc_jump(d));
1823 }
1824 else {
1825 d = ((i0 - w) >> 2) - 2;
1826 assert(_s24P(d));
1827 CC_B(cc, d & 0x00ffffff);
1828 }
1829 return (w);
1830 }
1831
1832
1833 static jit_word_t
_vbcmp_f(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1834 _vbcmp_f(jit_state_t *_jit, int cc,
1835 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1836 {
1837 vfp_cmp_f(r0, r1);
1838 return (vbcmp_x(cc, i0));
1839 }
1840
1841 static jit_word_t
_vbcmp_d(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1842 _vbcmp_d(jit_state_t *_jit, int cc,
1843 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1844 {
1845 vfp_cmp_d(r0, r1);
1846 return (vbcmp_x(cc, i0));
1847 }
1848
1849 static jit_word_t
_vbncmp_x(jit_state_t * _jit,int cc,jit_word_t i0)1850 _vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1851 {
1852 jit_word_t d, p, w;
1853 VMRS(_R15_REGNO);
1854 p = _jit->pc.w;
1855 if (jit_thumb_p()) {
1856 T2_CC_B(cc, 0);
1857 w = _jit->pc.w;
1858 d = ((i0 - w) >> 1) - 2;
1859 assert(_s20P(d));
1860 T2_B(encode_thumb_jump(d));
1861 }
1862 else {
1863 CC_B(cc, 0);
1864 w = _jit->pc.w;
1865 d = ((i0 - w) >> 2) - 2;
1866 assert(_s24P(d));
1867 B(d & 0x00ffffff);
1868 }
1869 patch_at(arm_patch_jump, p, _jit->pc.w);
1870 return (w);
1871 }
1872
1873 static jit_word_t
_vbncmp_f(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1874 _vbncmp_f(jit_state_t *_jit, int cc,
1875 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1876 {
1877 vfp_cmp_f(r0, r1);
1878 return (vbncmp_x(cc, i0));
1879 }
1880
1881 static jit_word_t
_vbncmp_d(jit_state_t * _jit,int cc,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1882 _vbncmp_d(jit_state_t *_jit, int cc,
1883 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1884 {
1885 vfp_cmp_d(r0, r1);
1886 return (vbncmp_x(cc, i0));
1887 }
1888
1889 fbopi(lt)
dbopi(lt)1890 dbopi(lt)
1891 fbopi(le)
1892 dbopi(le)
1893 fbopi(eq)
1894 dbopi(eq)
1895 fbopi(ge)
1896 dbopi(ge)
1897 fbopi(gt)
1898 dbopi(gt)
1899 fbopi(ne)
1900 dbopi(ne)
1901 fbopi(unlt)
1902 dbopi(unlt)
1903 fbopi(unle)
1904 dbopi(unle)
1905
1906 static jit_word_t
1907 _vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
1908 {
1909 jit_word_t d, p, q, w;
1910 VMRS(_R15_REGNO);
1911 p = _jit->pc.w;
1912 if (jit_thumb_p()) {
1913 T2_CC_B(ARM_CC_VS, 0);
1914 q = _jit->pc.w;
1915 T2_CC_B(ARM_CC_NE, 0);
1916 patch_at(arm_patch_jump, p, _jit->pc.w);
1917 w = _jit->pc.w;
1918 d = ((i0 - w) >> 1) - 2;
1919 assert(_s20P(d));
1920 T2_B(encode_thumb_jump(d));
1921 }
1922 else {
1923 CC_B(ARM_CC_VS, 0);
1924 q = _jit->pc.w;
1925 CC_B(ARM_CC_NE, 0);
1926 patch_at(arm_patch_jump, p, _jit->pc.w);
1927 w = _jit->pc.w;
1928 d = ((i0 - w) >> 2) - 2;
1929 assert(_s24P(d));
1930 B(d & 0x00ffffff);
1931 }
1932 patch_at(arm_patch_jump, q, _jit->pc.w);
1933 return (w);
1934 }
1935
1936 static jit_word_t
_vfp_buneqr_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1937 _vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1938 {
1939 vfp_cmp_f(r0, r1);
1940 return (vfp_buneqr_x(i0));
1941 }
1942
fbopi(uneq)1943 fbopi(uneq)
1944
1945 static jit_word_t
1946 _vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1947 {
1948 vfp_cmp_d(r0, r1);
1949 return (vfp_buneqr_x(i0));
1950 }
1951
dbopi(uneq)1952 dbopi(uneq)
1953
1954 static jit_word_t
1955 _vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
1956 {
1957 jit_word_t d, p, w;
1958 VMRS(_R15_REGNO);
1959 p = _jit->pc.w;
1960 if (jit_thumb_p()) {
1961 T2_CC_B(ARM_CC_MI, 0);
1962 w = _jit->pc.w;
1963 d = ((i0 - w) >> 1) - 2;
1964 assert(_s20P(d));
1965 T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
1966 }
1967 else {
1968 CC_B(ARM_CC_MI, 0);
1969 w = _jit->pc.w;
1970 d = ((i0 - w) >> 2) - 2;
1971 assert(_s24P(d));
1972 CC_B(ARM_CC_HS, d & 0x00ffffff);
1973 }
1974 patch_at(arm_patch_jump, p, _jit->pc.w);
1975 return (w);
1976 }
1977
1978 static jit_word_t
_vfp_bunger_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)1979 _vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1980 {
1981 vfp_cmp_f(r0, r1);
1982 return (vfp_bunger_x(i0));
1983 }
1984
fbopi(unge)1985 fbopi(unge)
1986
1987 static jit_word_t
1988 _vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1989 {
1990 vfp_cmp_d(r0, r1);
1991 return (vfp_bunger_x(i0));
1992 }
1993
dbopi(unge)1994 dbopi(unge)
1995
1996 static jit_word_t
1997 _vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
1998 {
1999 jit_word_t d, p, q, w;
2000 VMRS(_R15_REGNO);
2001 p = _jit->pc.w;
2002 if (jit_thumb_p()) {
2003 T2_CC_B(ARM_CC_VS, 0);
2004 q = _jit->pc.w;
2005 T2_CC_B(ARM_CC_EQ, 0);
2006 w = _jit->pc.w;
2007 d = ((i0 - w) >> 1) - 2;
2008 assert(_s20P(d));
2009 T2_B(encode_thumb_jump(d));
2010 }
2011 else {
2012 CC_B(ARM_CC_VS, 0);
2013 q = _jit->pc.w;
2014 CC_B(ARM_CC_EQ, 0);
2015 w = _jit->pc.w;
2016 d = ((i0 - w) >> 2) - 2;
2017 assert(_s24P(d));
2018 B(d & 0x00ffffff);
2019 }
2020 patch_at(arm_patch_jump, p, _jit->pc.w);
2021 patch_at(arm_patch_jump, q, _jit->pc.w);
2022 return (w);
2023 }
2024
2025 static jit_word_t
_vfp_bltgtr_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2026 _vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2027 {
2028 vfp_cmp_f(r0, r1);
2029 return (vfp_bltgtr_x(i0));
2030 }
2031
2032 fbopi(ungt)
dbopi(ungt)2033 dbopi(ungt)
2034 fbopi(ltgt)
2035
2036 static jit_word_t
2037 _vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2038 {
2039 vfp_cmp_d(r0, r1);
2040 return (vfp_bltgtr_x(i0));
2041 }
2042
2043 dbopi(ltgt)
fbopi(ord)2044 fbopi(ord)
2045 dbopi(ord)
2046 fbopi(unord)
2047 dbopi(unord)
2048
2049 static void
2050 _vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2051 {
2052 jit_int32_t gpr;
2053 if (jit_fpr_p(r0)) {
2054 gpr = jit_get_reg(jit_class_gpr);
2055 movi(rn(gpr), i0);
2056 VLDR_F32(r0, rn(gpr), 0);
2057 jit_unget_reg(gpr);
2058 }
2059 else
2060 ldi_i(r0, i0);
2061 }
2062
2063 static void
_vfp_ldi_d(jit_state_t * _jit,jit_int32_t r0,jit_word_t i0)2064 _vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2065 {
2066 jit_int32_t reg;
2067 reg = jit_get_reg(jit_class_gpr);
2068 movi(rn(reg), i0);
2069 if (jit_fpr_p(r0))
2070 VLDR_F64(r0, rn(reg), 0);
2071 else {
2072 ldr_i(r0, rn(reg));
2073 ldxi_i(r0 + 1, rn(reg), 4);
2074 }
2075 jit_unget_reg(reg);
2076 }
2077
2078 static void
_vfp_ldxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2079 _vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2080 {
2081 jit_int32_t reg;
2082 if (jit_fpr_p(r0)) {
2083 reg = jit_get_reg(jit_class_gpr);
2084 addr(rn(reg), r1, r2);
2085 VLDR_F32(r0, rn(reg), 0);
2086 jit_unget_reg(reg);
2087 }
2088 else
2089 ldxr_i(r0, r1, r2);
2090 }
2091
2092 static void
_vfp_ldxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2093 _vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2094 {
2095 jit_int32_t reg;
2096 reg = jit_get_reg(jit_class_gpr);
2097 addr(rn(reg), r1, r2);
2098 if (jit_fpr_p(r0))
2099 VLDR_F64(r0, rn(reg), 0);
2100 else {
2101 ldr_i(r0, rn(reg));
2102 ldxi_i(r0 + 1, rn(reg), 4);
2103 }
2104 jit_unget_reg(reg);
2105 }
2106
2107 static void
_vfp_ldxi_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2108 _vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2109 {
2110 jit_int32_t reg;
2111 if (jit_fpr_p(r0)) {
2112 if (i0 >= 0) {
2113 assert(!(i0 & 3));
2114 if (i0 < 1024)
2115 VLDR_F32(r0, r1, i0 >> 2);
2116 else {
2117 reg = jit_get_reg(jit_class_gpr);
2118 addi(rn(reg), r1, i0);
2119 VLDR_F32(r0, rn(reg), 0);
2120 jit_unget_reg(reg);
2121 }
2122 }
2123 else {
2124 i0 = -i0;
2125 assert(!(i0 & 3));
2126 if (i0 < 1024)
2127 VLDRN_F32(r0, r1, i0 >> 2);
2128 else {
2129 reg = jit_get_reg(jit_class_gpr);
2130 subi(rn(reg), r1, i0);
2131 VLDR_F32(r0, rn(reg), 0);
2132 jit_unget_reg(reg);
2133 }
2134 }
2135 }
2136 else
2137 ldxi_i(r0, r1, i0);
2138 }
2139
2140 static void
_vfp_ldxi_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_word_t i0)2141 _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2142 {
2143 jit_int32_t reg;
2144 if (jit_fpr_p(r0)) {
2145 if (i0 >= 0) {
2146 assert(!(i0 & 3));
2147 if (i0 < 1024)
2148 VLDR_F64(r0, r1, i0 >> 2);
2149 else {
2150 reg = jit_get_reg(jit_class_gpr);
2151 addi(rn(reg), r1, i0);
2152 VLDR_F64(r0, rn(reg), 0);
2153 jit_unget_reg(reg);
2154 }
2155 }
2156 else {
2157 i0 = -i0;
2158 assert(!(i0 & 3));
2159 if (i0 < 1024)
2160 VLDRN_F64(r0, r1, i0 >> 2);
2161 else {
2162 reg = jit_get_reg(jit_class_gpr);
2163 subi(rn(reg), r1, i0);
2164 VLDR_F64(r0, rn(reg), 0);
2165 jit_unget_reg(reg);
2166 }
2167 }
2168 }
2169 else {
2170 reg = jit_get_reg(jit_class_gpr);
2171 addi(rn(reg), r1, i0);
2172 ldr_i(r0, rn(reg));
2173 ldxi_i(r0 + 1, rn(reg), 4);
2174 jit_unget_reg(reg);
2175 }
2176 }
2177
2178 static void
_vfp_sti_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2179 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2180 {
2181 jit_int32_t reg;
2182 if (jit_fpr_p(r0)) {
2183 reg = jit_get_reg(jit_class_gpr);
2184 movi(rn(reg), i0);
2185 VSTR_F32(r0, rn(reg), 0);
2186 jit_unget_reg(reg);
2187 }
2188 else
2189 sti_i(i0, r0);
2190 }
2191
2192 static void
_vfp_sti_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0)2193 _vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2194 {
2195 jit_int32_t reg;
2196 reg = jit_get_reg(jit_class_gpr);
2197 movi(rn(reg), i0);
2198 if (jit_fpr_p(r0))
2199 VSTR_F64(r0, rn(reg), 0);
2200 else {
2201 str_i(rn(reg), r0);
2202 stxi_i(4, rn(reg), r0 + 1);
2203 }
2204 jit_unget_reg(reg);
2205 }
2206
2207 static void
_vfp_stxr_f(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2208 _vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2209 {
2210 jit_int32_t reg;
2211 if (jit_fpr_p(r2)) {
2212 reg = jit_get_reg(jit_class_gpr);
2213 addr(rn(reg), r0, r1);
2214 VSTR_F32(r2, rn(reg), 0);
2215 jit_unget_reg(reg);
2216 }
2217 else
2218 stxr_i(r0, r1, r2);
2219 }
2220
2221 static void
_vfp_stxr_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1,jit_int32_t r2)2222 _vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2223 {
2224 jit_int32_t reg;
2225 reg = jit_get_reg(jit_class_gpr);
2226 addr(rn(reg), r0, r1);
2227 if (jit_fpr_p(r2))
2228 VSTR_F64(r2, rn(reg), 0);
2229 else {
2230 str_i(rn(reg), r2);
2231 stxi_i(4, rn(reg), r2 + 1);
2232 }
2233 jit_unget_reg(reg);
2234 }
2235
2236 static void
_vfp_stxi_f(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2237 _vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2238 {
2239 jit_int32_t reg;
2240 if (jit_fpr_p(r1)) {
2241 if (i0 >= 0) {
2242 assert(!(i0 & 3));
2243 if (i0 < 1024)
2244 VSTR_F32(r1, r0, i0 >> 2);
2245 else {
2246 reg = jit_get_reg(jit_class_gpr);
2247 addi(rn(reg), r0, i0);
2248 VSTR_F32(r1, rn(reg), 0);
2249 jit_unget_reg(reg);
2250 }
2251 }
2252 else {
2253 i0 = -i0;
2254 assert(!(i0 & 3));
2255 if (i0 < 1024)
2256 VSTRN_F32(r1, r0, i0 >> 2);
2257 else {
2258 reg = jit_get_reg(jit_class_gpr);
2259 subi(rn(reg), r0, i0);
2260 VSTR_F32(r1, rn(reg), 0);
2261 jit_unget_reg(reg);
2262 }
2263 }
2264 }
2265 else
2266 stxi_i(i0, r0, r1);
2267 }
2268
2269 static void
_vfp_stxi_d(jit_state_t * _jit,jit_word_t i0,jit_int32_t r0,jit_int32_t r1)2270 _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2271 {
2272 jit_int32_t reg;
2273 if (jit_fpr_p(r1)) {
2274 if (i0 >= 0) {
2275 assert(!(i0 & 3));
2276 if (i0 < 0124)
2277 VSTR_F64(r1, r0, i0 >> 2);
2278 else {
2279 reg = jit_get_reg(jit_class_gpr);
2280 addi(rn(reg), r0, i0);
2281 VSTR_F64(r1, rn(reg), 0);
2282 jit_unget_reg(reg);
2283 }
2284 }
2285 else {
2286 i0 = -i0;
2287 assert(!(i0 & 3));
2288 if (i0 < 1024)
2289 VSTRN_F64(r1, r0, i0 >> 2);
2290 else {
2291 reg = jit_get_reg(jit_class_gpr);
2292 subi(rn(reg), r0, i0);
2293 VSTR_F64(r1, rn(reg), 0);
2294 jit_unget_reg(reg);
2295 }
2296 }
2297 }
2298 else {
2299 reg = jit_get_reg(jit_class_gpr);
2300 addi(rn(reg), r0, i0);
2301 str_i(rn(reg), r1);
2302 stxi_i(4, rn(reg), r1 + 1);
2303 jit_unget_reg(reg);
2304 }
2305 }
2306
2307 static void
_vfp_vaarg_d(jit_state_t * _jit,jit_int32_t r0,jit_int32_t r1)2308 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2309 {
2310 jit_int32_t reg;
2311
2312 assert(_jitc->function->self.call & jit_call_varargs);
2313
2314 /* Adjust pointer. */
2315 reg = jit_get_reg(jit_class_gpr);
2316 andi(rn(reg), r1, 7);
2317 addr(r1, r1, rn(reg));
2318 jit_unget_reg(reg);
2319
2320 /* Load argument. */
2321 vfp_ldr_d(r0, r1);
2322
2323 /* Update stack pointer. */
2324 addi(r1, r1, sizeof(jit_float64_t));
2325 }
2326 # undef dbopi
2327 # undef fbopi
2328 # undef dopi
2329 # undef fopi
2330 #endif
2331