1 /*
2  *  Copyright (C) 2002-2021  The DOSBox Team
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License along
15  *  with this program; if not, write to the Free Software Foundation, Inc.,
16  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* ARMv4/ARMv7 (little endian) backend by M-HT (arm version) */
20 
21 // temporary registers
22 #define temp1 HOST_ip
23 #define temp2 HOST_v3
24 #define temp3 HOST_v4
25 
26 // register that holds function return values
27 #define FC_RETOP HOST_a1
28 
29 // register used for address calculations,
30 #define FC_ADDR HOST_v1			// has to be saved across calls, see DRC_PROTECT_ADDR_REG
31 
32 // register that holds the first parameter
33 #define FC_OP1 HOST_a1
34 
35 // register that holds the second parameter
36 #define FC_OP2 HOST_a2
37 
38 // special register that holds the third parameter for _R3 calls (byte accessible)
39 #define FC_OP3 HOST_v2
40 
41 // register that holds byte-accessible temporary values
42 #define FC_TMP_BA1 HOST_a1
43 
44 // register that holds byte-accessible temporary values
45 #define FC_TMP_BA2 HOST_a2
46 
47 // temporary register for LEA
48 #define TEMP_REG_DRC HOST_v2
49 
50 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
51 #define FC_REGS_ADDR HOST_v7
52 
53 // used to hold the address of "Segs" - preferably filled in function gen_run_code
54 #define FC_SEGS_ADDR HOST_v8
55 
56 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
57 #define readdata_addr HOST_v5
58 
59 
60 // helper macro
61 #define ROTATE_SCALE(x) ( (x)?(32 - x):(0) )
62 
63 
64 // instruction encodings
65 
66 // move
67 // mov dst, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
68 #define MOV_IMM(dst, imm, rimm) (0xe3a00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
69 // mov dst, src, lsl #imm
70 #define MOV_REG_LSL_IMM(dst, src, imm) (0xe1a00000 + ((dst) << 12) + (src) + ((imm) << 7) )
71 // movs dst, src, lsl #imm
72 #define MOVS_REG_LSL_IMM(dst, src, imm) (0xe1b00000 + ((dst) << 12) + (src) + ((imm) << 7) )
73 // mov dst, src, lsr #imm
74 #define MOV_REG_LSR_IMM(dst, src, imm) (0xe1a00020 + ((dst) << 12) + (src) + ((imm) << 7) )
75 // mov dst, src, asr #imm
76 #define MOV_REG_ASR_IMM(dst, src, imm) (0xe1a00040 + ((dst) << 12) + (src) + ((imm) << 7) )
77 // mov dst, src, lsl rreg
78 #define MOV_REG_LSL_REG(dst, src, rreg) (0xe1a00010 + ((dst) << 12) + (src) + ((rreg) << 8) )
79 // mov dst, src, lsr rreg
80 #define MOV_REG_LSR_REG(dst, src, rreg) (0xe1a00030 + ((dst) << 12) + (src) + ((rreg) << 8) )
81 // mov dst, src, asr rreg
82 #define MOV_REG_ASR_REG(dst, src, rreg) (0xe1a00050 + ((dst) << 12) + (src) + ((rreg) << 8) )
83 // mov dst, src, ror rreg
84 #define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) )
85 // mvn dst, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
86 #define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
87 #if C_TARGETCPU == ARMV7LE
88 // movw dst, #imm		@	0 <= imm <= 65535
89 #define MOVW(dst, imm) (0xe3000000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) )
90 // movt dst, #imm		@	0 <= imm <= 65535
91 #define MOVT(dst, imm) (0xe3400000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) )
92 #endif
93 
94 // arithmetic
95 // add dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
96 #define ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
97 // add dst, src1, src2, lsl #imm
98 #define ADD_REG_LSL_IMM(dst, src1, src2, imm) (0xe0800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
99 // sub dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
100 #define SUB_IMM(dst, src, imm, rimm) (0xe2400000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
101 // sub dst, src1, src2, lsl #imm
102 #define SUB_REG_LSL_IMM(dst, src1, src2, imm) (0xe0400000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
103 // rsb dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
104 #define RSB_IMM(dst, src, imm, rimm) (0xe2600000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
105 // cmp src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
106 #define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) )
107 // nop
108 #if C_TARGETCPU == ARMV7LE
109 #define NOP (0xe320f000)
110 #else
111 #define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0)
112 #endif
113 
114 // logical
115 // tst src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
116 #define TST_IMM(src, imm, rimm) (0xe3100000 + ((src) << 16) + (imm) + ((rimm) << 7) )
117 // and dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
118 #define AND_IMM(dst, src, imm, rimm) (0xe2000000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
119 // and dst, src1, src2, lsl #imm
120 #define AND_REG_LSL_IMM(dst, src1, src2, imm) (0xe0000000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
121 // orr dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
122 #define ORR_IMM(dst, src, imm, rimm) (0xe3800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
123 // orr dst, src1, src2, lsl #imm
124 #define ORR_REG_LSL_IMM(dst, src1, src2, imm) (0xe1800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
125 // orr dst, src1, src2, lsr #imm
126 #define ORR_REG_LSR_IMM(dst, src1, src2, imm) (0xe1800020 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
127 // eor dst, src1, src2, lsl #imm
128 #define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
129 // bic dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
130 #define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
131 // bic dst, src1, src2, lsl #imm		@	0 <= imm <= 31
132 #define BIC_REG_LSL_IMM(dst, src1, src2, imm) (0xe1c00000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
133 
134 // load
135 // ldr reg, [addr, #imm]		@	0 <= imm < 4096
136 #define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
137 // ldr reg, [addr, #-(imm)]		@	0 <= imm < 4096
138 #define LDR_IMM_M(reg, addr, imm) (0xe5100000 + ((reg) << 12) + ((addr) << 16) + (imm) )
139 // ldrh reg, [addr, #imm]		@	0 <= imm < 256
140 #define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
141 // ldrh reg, [addr, #-(imm)]		@	0 <= imm < 256
142 #define LDRH_IMM_M(reg, addr, imm) (0xe15000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
143 // ldrb reg, [addr, #imm]		@	0 <= imm < 4096
144 #define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
145 // ldrb reg, [addr, #-(imm)]		@	0 <= imm < 4096
146 #define LDRB_IMM_M(reg, addr, imm) (0xe5500000 + ((reg) << 12) + ((addr) << 16) + (imm) )
147 // ldr reg, [addr1, addr2, lsl #imm]		@	0 <= imm < 31
148 #define LDR_REG_LSL_IMM(reg, addr1, addr2, imm) (0xe7900000 + ((reg) << 12) + ((addr1) << 16) + (addr2) + ((imm) << 7) )
149 
150 // store
151 // str reg, [addr, #imm]		@	0 <= imm < 4096
152 #define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) )
153 // str reg, [addr, #-(imm)]		@	0 <= imm < 4096
154 #define STR_IMM_M(reg, addr, imm) (0xe5000000 + ((reg) << 12) + ((addr) << 16) + (imm) )
155 // strh reg, [addr, #imm]		@	0 <= imm < 256
156 #define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
157 // strh reg, [addr, #-(imm)]		@	0 <= imm < 256
158 #define STRH_IMM_M(reg, addr, imm) (0xe14000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
159 // strb reg, [addr, #imm]		@	0 <= imm < 4096
160 #define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
161 // strb reg, [addr, #-(imm)]		@	0 <= imm < 4096
162 #define STRB_IMM_M(reg, addr, imm) (0xe5400000 + ((reg) << 12) + ((addr) << 16) + (imm) )
163 
164 // branch
165 // beq pc+imm		@	0 <= imm < 32M	&	imm mod 4 = 0
166 #define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) )
167 // bne pc+imm		@	0 <= imm < 32M	&	imm mod 4 = 0
168 #define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) )
169 // ble pc+imm		@	0 <= imm < 32M	&	imm mod 4 = 0
170 #define BLE_FWD(imm) (0xda000000 + ((imm) >> 2) )
171 // b pc+imm		@	0 <= imm < 32M	&	imm mod 4 = 0
172 #define B_FWD(imm) (0xea000000 + ((imm) >> 2) )
173 // bx reg
174 #define BX(reg) (0xe12fff10 + (reg) )
175 #if C_TARGETCPU == ARMV7LE
176 // blx reg
177 #define BLX_REG(reg) (0xe12fff30 + (reg) )
178 
179 // extend
180 // sxth dst, src, ror #rimm		@	rimm = 0 | 8 | 16 | 24
181 #define SXTH(dst, src, rimm) (0xe6bf0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) )
182 // sxtb dst, src, ror #rimm		@	rimm = 0 | 8 | 16 | 24
183 #define SXTB(dst, src, rimm) (0xe6af0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) )
184 // uxth dst, src, ror #rimm		@	rimm = 0 | 8 | 16 | 24
185 #define UXTH(dst, src, rimm) (0xe6ff0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) )
186 // uxtb dst, src, ror #rimm		@	rimm = 0 | 8 | 16 | 24
187 #define UXTB(dst, src, rimm) (0xe6ef0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) )
188 
189 // bit field
190 // bfi dst, src, #lsb, #width		@	lsb >= 0, width >= 1, lsb+width <= 32
191 #define BFI(dst, src, lsb, width) (0xe7c00010 + ((dst) << 12) + (src) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) )
192 // bfc dst, #lsb, #width		@	lsb >= 0, width >= 1, lsb+width <= 32
193 #define BFC(dst, lsb, width) (0xe7c0001f + ((dst) << 12) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) )
194 #endif
195 
196 
197 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)198 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
199 	if(reg_src == reg_dst) return;
200 	cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) );      // mov reg_dst, reg_src
201 }
202 
203 // helper function
val_is_operand2(Bit32u value,Bit32u * val_shift)204 static bool val_is_operand2(Bit32u value, Bit32u *val_shift) {
205 	Bit32u shift;
206 
207 	if (GCC_UNLIKELY(value == 0)) {
208 		*val_shift = 0;
209 		return true;
210 	}
211 
212 	shift = 0;
213 	while ((value & 3) == 0) {
214 		value>>=2;
215 		shift+=2;
216 	}
217 
218 	if ((value >> 8) != 0) return false;
219 
220 	*val_shift = shift;
221 	return true;
222 }
223 
224 #if C_TARGETCPU != ARMV7LE
225 // helper function
get_imm_gen_len(Bit32u imm)226 static Bits get_imm_gen_len(Bit32u imm) {
227 	Bits ret;
228 	if (imm == 0) {
229 		return 1;
230 	} else {
231 		ret = 0;
232 		while (imm) {
233 			while ((imm & 3) == 0) {
234 				imm>>=2;
235 			}
236 			ret++;
237 			imm>>=8;
238 		}
239 		return ret;
240 	}
241 }
242 
243 // helper function
get_min_imm_gen_len(Bit32u imm)244 static Bits get_min_imm_gen_len(Bit32u imm) {
245 	Bits num1, num2;
246 
247 	num1 = get_imm_gen_len(imm);
248 	num2 = get_imm_gen_len(~imm);
249 
250 	return (num1 <= num2)?num1:num2;
251 }
252 #endif
253 
254 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)255 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
256 #if C_TARGETCPU == ARMV7LE
257 	Bit32u scale;
258 
259 	if ( val_is_operand2(imm, &scale) ) {
260 		cache_addd( MOV_IMM(dest_reg, imm >> scale, ROTATE_SCALE(scale)) );      // mov dest_reg, #imm
261 	} else if ( val_is_operand2(~imm, &scale) ) {
262 		cache_addd( MVN_IMM(dest_reg, (~imm) >> scale, ROTATE_SCALE(scale)) );      // mvn dest_reg, #~imm
263 	} else {
264 		cache_addd( MOVW(dest_reg, imm & 0xffff) );      // movw dest_reg, #(imm & 0xffff)
265 
266 		if (imm >= 0x10000)
267 		{
268 			cache_addd( MOVT(dest_reg, imm >> 16) );      // movt dest_reg, #(imm >> 16)
269 		}
270 	}
271 #else
272 	Bit32u imm2, first, scale;
273 
274 	scale = 0;
275 	first = 1;
276 	imm2 = ~imm;
277 
278 	if (get_imm_gen_len(imm) <= get_imm_gen_len(imm2)) {
279 		if (imm == 0) {
280 			cache_addd( MOV_IMM(dest_reg, 0, 0) );      // mov dest_reg, #0
281 		} else {
282 			while (imm) {
283 				while ((imm & 3) == 0) {
284 					imm>>=2;
285 					scale+=2;
286 				}
287 				if (first) {
288 					cache_addd( MOV_IMM(dest_reg, imm & 0xff, ROTATE_SCALE(scale)) );      // mov dest_reg, #((imm & 0xff) << scale)
289 					first = 0;
290 				} else {
291 					cache_addd( ORR_IMM(dest_reg, dest_reg, imm & 0xff, ROTATE_SCALE(scale)) );      // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
292 				}
293 				imm>>=8;
294 				scale+=8;
295 			}
296 		}
297 	} else {
298 		if (imm2 == 0) {
299 			cache_addd( MVN_IMM(dest_reg, 0, 0) );      // mvn dest_reg, #0
300 		} else {
301 			while (imm2) {
302 				while ((imm2 & 3) == 0) {
303 					imm2>>=2;
304 					scale+=2;
305 				}
306 				if (first) {
307 					cache_addd( MVN_IMM(dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) );      // mvn dest_reg, #((imm2 & 0xff) << scale)
308 					first = 0;
309 				} else {
310 					cache_addd( BIC_IMM(dest_reg, dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) );      // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale)
311 				}
312 				imm2>>=8;
313 				scale+=8;
314 			}
315 		}
316 	}
317 #endif
318 }
319 
320 // helper function
gen_mov_memval_to_reg_helper(HostReg dest_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)321 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
322 	switch (size) {
323 		case 4:
324 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
325 			if ((data & 3) == 0)
326 #endif
327 			{
328 				if ((data >= addr_data) && (data < addr_data + 4096)) {
329 					cache_addd( LDR_IMM(dest_reg, addr_reg, data - addr_data) );      // ldr dest_reg, [addr_reg, #(data - addr_data)]
330 					return true;
331 				} else if ((data < addr_data) && (data > addr_data - 4096)) {
332 					cache_addd( LDR_IMM_M(dest_reg, addr_reg, addr_data - data) );      // ldr dest_reg, [addr_reg, #-(addr_data - data)]
333 					return true;
334 				}
335 			}
336 			break;
337 		case 2:
338 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
339 			if ((data & 1) == 0)
340 #endif
341 			{
342 				if ((data >= addr_data) && (data < addr_data + 256)) {
343 					cache_addd( LDRH_IMM(dest_reg, addr_reg, data - addr_data) );      // ldrh dest_reg, [addr_reg, #(data - addr_data)]
344 					return true;
345 				} else if ((data < addr_data) && (data > addr_data - 256)) {
346 					cache_addd( LDRH_IMM_M(dest_reg, addr_reg, addr_data - data) );      // ldrh dest_reg, [addr_reg, #-(addr_data - data)]
347 					return true;
348 				}
349 			}
350 			break;
351 		case 1:
352 			if ((data >= addr_data) && (data < addr_data + 4096)) {
353 				cache_addd( LDRB_IMM(dest_reg, addr_reg, data - addr_data) );      // ldrb dest_reg, [addr_reg, #(data - addr_data)]
354 				return true;
355 			} else if ((data < addr_data) && (data > addr_data - 4096)) {
356 				cache_addd( LDRB_IMM_M(dest_reg, addr_reg, addr_data - data) );      // ldrb dest_reg, [addr_reg, #-(addr_data - data)]
357 				return true;
358 			}
359 		default:
360 			break;
361 	}
362 	return false;
363 }
364 
365 // helper function
gen_mov_memval_to_reg(HostReg dest_reg,void * data,Bitu size)366 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
367 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
368 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
369 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
370 	return false;
371 }
372 
373 // helper function for gen_mov_word_to_reg
gen_mov_word_to_reg_helper(HostReg dest_reg,void * data,bool dword,HostReg data_reg)374 static void gen_mov_word_to_reg_helper(HostReg dest_reg, [[maybe_unused]] void* data,bool dword,HostReg data_reg) {
375 	// alignment....
376 	if (dword) {
377 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
378 		if ((Bit32u)data & 3) {
379 			if ( ((Bit32u)data & 3) == 2 ) {
380 				cache_addd( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
381 				cache_addd( LDRH_IMM(temp2, data_reg, 2) );      // ldrh temp2, [data_reg, #2]
382 				cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 16) );      // orr dest_reg, dest_reg, temp2, lsl #16
383 			} else {
384 				cache_addd( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
385 				cache_addd( LDRH_IMM(temp2, data_reg, 1) );      // ldrh temp2, [data_reg, #1]
386 				cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) );      // orr dest_reg, dest_reg, temp2, lsl #8
387 				cache_addd( LDRB_IMM(temp2, data_reg, 3) );      // ldrb temp2, [data_reg, #3]
388 				cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) );      // orr dest_reg, dest_reg, temp2, lsl #24
389 			}
390 		} else
391 #endif
392 		{
393 			cache_addd( LDR_IMM(dest_reg, data_reg, 0) );      // ldr dest_reg, [data_reg]
394 		}
395 	} else {
396 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
397 		if ((Bit32u)data & 1) {
398 			cache_addd( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
399 			cache_addd( LDRB_IMM(temp2, data_reg, 1) );      // ldrb temp2, [data_reg, #1]
400 			cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) );      // orr dest_reg, dest_reg, temp2, lsl #8
401 		} else
402 #endif
403 		{
404 			cache_addd( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
405 		}
406 	}
407 }
408 
409 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
410 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)411 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
412 	if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
413 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
414 		gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1);
415 	}
416 }
417 
418 // move a 16bit constant value into dest_reg
419 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)420 static void inline gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
421 	gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
422 }
423 
424 // helper function
gen_mov_memval_from_reg_helper(HostReg src_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)425 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
426 	switch (size) {
427 		case 4:
428 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
429 			if ((data & 3) == 0)
430 #endif
431 			{
432 				if ((data >= addr_data) && (data < addr_data + 4096)) {
433 					cache_addd( STR_IMM(src_reg, addr_reg, data - addr_data) );      // str src_reg, [addr_reg, #(data - addr_data)]
434 					return true;
435 				} else if ((data < addr_data) && (data > addr_data - 4096)) {
436 					cache_addd( STR_IMM_M(src_reg, addr_reg, addr_data - data) );      // str src_reg, [addr_reg, #-(addr_data - data)]
437 					return true;
438 				}
439 			}
440 			break;
441 		case 2:
442 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
443 			if ((data & 1) == 0)
444 #endif
445 			{
446 				if ((data >= addr_data) && (data < addr_data + 256)) {
447 					cache_addd( STRH_IMM(src_reg, addr_reg, data - addr_data) );      // strh src_reg, [addr_reg, #(data - addr_data)]
448 					return true;
449 				} else if ((data < addr_data) && (data > addr_data - 256)) {
450 					cache_addd( STRH_IMM_M(src_reg, addr_reg, addr_data - data) );      // strh src_reg, [addr_reg, #-(addr_data - data)]
451 					return true;
452 				}
453 			}
454 			break;
455 		case 1:
456 			if ((data >= addr_data) && (data < addr_data + 4096)) {
457 				cache_addd( STRB_IMM(src_reg, addr_reg, data - addr_data) );      // strb src_reg, [addr_reg, #(data - addr_data)]
458 				return true;
459 			} else if ((data < addr_data) && (data > addr_data - 4096)) {
460 				cache_addd( STRB_IMM_M(src_reg, addr_reg, addr_data - data) );      // strb src_reg, [addr_reg, #-(addr_data - data)]
461 				return true;
462 			}
463 		default:
464 			break;
465 	}
466 	return false;
467 }
468 
469 // helper function
gen_mov_memval_from_reg(HostReg src_reg,void * dest,Bitu size)470 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
471 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
472 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
473 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
474 	return false;
475 }
476 
477 // helper function for gen_mov_word_from_reg
gen_mov_word_from_reg_helper(HostReg src_reg,void * dest,bool dword,HostReg data_reg)478 static void gen_mov_word_from_reg_helper(HostReg src_reg, [[maybe_unused]] void* dest,bool dword, HostReg data_reg) {
479 	// alignment....
480 	if (dword) {
481 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
482 		if ((Bit32u)dest & 3) {
483 			if ( ((Bit32u)dest & 3) == 2 ) {
484 				cache_addd( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
485 				cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 16) );      // mov temp2, src_reg, lsr #16
486 				cache_addd( STRH_IMM(temp2, data_reg, 2) );      // strh temp2, [data_reg, #2]
487 			} else {
488 				cache_addd( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
489 				cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) );      // mov temp2, src_reg, lsr #8
490 				cache_addd( STRH_IMM(temp2, data_reg, 1) );      // strh temp2, [data_reg, #1]
491 				cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) );      // mov temp2, temp2, lsr #16
492 				cache_addd( STRB_IMM(temp2, data_reg, 3) );      // strb temp2, [data_reg, #3]
493 			}
494 		} else
495 #endif
496 		{
497 			cache_addd( STR_IMM(src_reg, data_reg, 0) );      // str src_reg, [data_reg]
498 		}
499 	} else {
500 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
501 		if ((Bit32u)dest & 1) {
502 			cache_addd( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
503 			cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) );      // mov temp2, src_reg, lsr #8
504 			cache_addd( STRB_IMM(temp2, data_reg, 1) );      // strb temp2, [data_reg, #1]
505 		} else
506 #endif
507 		{
508 			cache_addd( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
509 		}
510 	}
511 }
512 
513 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)514 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
515 	if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
516 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
517 		gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1);
518 	}
519 }
520 
521 // move an 8bit value from memory into dest_reg
522 // the upper 24bit of the destination register can be destroyed
523 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
524 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)525 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
526 	if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
527 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
528 		cache_addd( LDRB_IMM(dest_reg, temp1, 0) );      // ldrb dest_reg, [temp1]
529 	}
530 }
531 
532 // move an 8bit value from memory into dest_reg
533 // the upper 24bit of the destination register can be destroyed
534 // this function can use FC_OP1/FC_OP2 as dest_reg which are
535 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)536 static void inline gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
537 	gen_mov_byte_to_reg_low(dest_reg, data);
538 }
539 
540 // move an 8bit constant value into dest_reg
541 // the upper 24bit of the destination register can be destroyed
542 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
543 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)544 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
545 	cache_addd( MOV_IMM(dest_reg, imm, 0) );      // mov dest_reg, #(imm)
546 }
547 
548 // move an 8bit constant value into dest_reg
549 // the upper 24bit of the destination register can be destroyed
550 // this function can use FC_OP1/FC_OP2 as dest_reg which are
551 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)552 static void inline gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
553 	gen_mov_byte_to_reg_low_imm(dest_reg, imm);
554 }
555 
556 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)557 [[maybe_unused]] static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
558 	if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
559 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
560 		cache_addd( STRB_IMM(src_reg, temp1, 0) );      // strb src_reg, [temp1]
561 	}
562 }
563 
564 
565 
566 // convert an 8bit word to a 32bit dword
567 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)568 static void gen_extend_byte(bool sign,HostReg reg) {
569 	if (sign) {
570 #if C_TARGETCPU == ARMV7LE
571 		cache_addd( SXTB(reg, reg, 0) );      // sxtb reg, reg
572 #else
573 		cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) );      // mov reg, reg, lsl #24
574 		cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) );      // mov reg, reg, asr #24
575 #endif
576 	} else {
577 #if C_TARGETCPU == ARMV7LE
578 		cache_addd( UXTB(reg, reg, 0) );      // uxtb reg, reg
579 #else
580 		cache_addd( AND_IMM(reg, reg, 0xff, 0) );      // and reg, reg, #0xff
581 #endif
582 	}
583 }
584 
585 // convert a 16bit word to a 32bit dword
586 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)587 static void gen_extend_word(bool sign,HostReg reg) {
588 	if (sign) {
589 #if C_TARGETCPU == ARMV7LE
590 		cache_addd( SXTH(reg, reg, 0) );      // sxth reg, reg
591 #else
592 		cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) );      // mov reg, reg, lsl #16
593 		cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) );      // mov reg, reg, asr #16
594 #endif
595 	} else {
596 #if C_TARGETCPU == ARMV7LE
597 		cache_addd( UXTH(reg, reg, 0) );      // uxth reg, reg
598 #else
599 		cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) );      // mov reg, reg, lsl #16
600 		cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) );      // mov reg, reg, lsr #16
601 #endif
602 	}
603 }
604 
605 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)606 static void gen_add(HostReg reg,void* op) {
607 	gen_mov_word_to_reg(temp3, op, 1);
608 	cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) );      // add reg, reg, temp3
609 }
610 
611 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)612 static void gen_add_imm(HostReg reg,Bit32u imm) {
613 	Bit32u imm2, scale;
614 
615 	if(!imm) return;
616 
617 	imm2 = (Bit32u) (-((Bit32s)imm));
618 
619 	if ( val_is_operand2(imm, &scale) ) {
620 		cache_addd( ADD_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) );      // add reg, reg, #imm
621 	} else if ( val_is_operand2(imm2, &scale) ) {
622 		cache_addd( SUB_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) );      // sub reg, reg, #(-imm)
623 #if C_TARGETCPU == ARMV7LE
624 	} else if (imm2 < 0x10000) {
625 		cache_addd( MOVW(temp2, imm2) );      // movw temp2, #(-imm)
626 		cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) );      // sub reg, reg, temp2
627 #endif
628 	} else {
629 #if C_TARGETCPU != ARMV7LE
630 		if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) {
631 #endif
632 			gen_mov_dword_to_reg_imm(temp2, imm);
633 			cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) );      // add reg, reg, temp2
634 #if C_TARGETCPU != ARMV7LE
635 		} else {
636 			gen_mov_dword_to_reg_imm(temp2, imm2);
637 			cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) );      // sub reg, reg, temp2
638 		}
639 #endif
640 	}
641 }
642 
643 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)644 static void gen_and_imm(HostReg reg,Bit32u imm) {
645 	Bit32u imm2, scale;
646 
647 	imm2 = ~imm;
648 	if(!imm2) return;
649 
650 	if (!imm) {
651 		cache_addd( MOV_IMM(reg, 0, 0) );      // mov reg, #0
652 	} else if ( val_is_operand2(imm, &scale) ) {
653 		cache_addd( AND_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) );      // and reg, reg, #imm
654 	} else if ( val_is_operand2(imm2, &scale) ) {
655 		cache_addd( BIC_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) );      // bic reg, reg, #(~imm)
656 #if C_TARGETCPU == ARMV7LE
657 	} else if (imm2 < 0x10000) {
658 		cache_addd( MOVW(temp2, imm2) );      // movw temp2, #(~imm)
659 		cache_addd( BIC_REG_LSL_IMM(reg, reg, temp2, 0) );      // bic reg, reg, temp2
660 #endif
661 	} else {
662 		gen_mov_dword_to_reg_imm(temp2, imm);
663 		cache_addd( AND_REG_LSL_IMM(reg, reg, temp2, 0) );      // and reg, reg, temp2
664 	}
665 }
666 
667 
668 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)669 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
670 	gen_mov_dword_to_reg_imm(temp3, imm);
671 	gen_mov_word_from_reg(temp3, dest, 1);
672 }
673 
674 // move an address into memory
gen_mov_direct_ptr(void * dest,Bit32u imm)675 static void inline gen_mov_direct_ptr(void* dest,Bit32u imm) {
676 	gen_mov_direct_dword(dest,imm);
677 }
678 
679 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)680 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
681 	if (!dword) imm &= 0xffff;
682 	if(!imm) return;
683 
684 	if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) {
685 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
686 		gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
687 	}
688 	gen_add_imm(temp3, imm);
689 	if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) {
690 		gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
691 	}
692 }
693 
694 // add an 8bit constant value to a dword memory value
gen_add_direct_byte(void * dest,Bit8s imm)695 [[maybe_unused]] static void gen_add_direct_byte(void* dest,Bit8s imm) {
696 	gen_add_direct_word(dest, (Bit32s)imm, 1);
697 }
698 
699 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)700 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
701 	Bit32u imm2, scale;
702 
703 	if (!dword) imm &= 0xffff;
704 	if(!imm) return;
705 
706 	if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) {
707 		gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
708 		gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
709 	}
710 
711 	imm2 = (Bit32u) (-((Bit32s)imm));
712 
713 	if ( val_is_operand2(imm, &scale) ) {
714 		cache_addd( SUB_IMM(temp3, temp3, imm >> scale, ROTATE_SCALE(scale)) );      // sub temp3, temp3, #imm
715 	} else if ( val_is_operand2(imm2, &scale) ) {
716 		cache_addd( ADD_IMM(temp3, temp3, imm2 >> scale, ROTATE_SCALE(scale)) );      // add temp3, temp3, #(-imm)
717 #if C_TARGETCPU == ARMV7LE
718 	} else if (imm2 < 0x10000) {
719 		cache_addd( MOVW(temp2, imm2) );      // movw temp2, #(-imm)
720 		cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) );      // add temp3, temp3, temp2
721 #endif
722 	} else {
723 #if C_TARGETCPU != ARMV7LE
724 		if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) {
725 #endif
726 			gen_mov_dword_to_reg_imm(temp2, imm);
727 			cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) );      // sub temp3, temp3, temp2
728 #if C_TARGETCPU != ARMV7LE
729 		} else {
730 			gen_mov_dword_to_reg_imm(temp2, imm2);
731 			cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) );      // add temp3, temp3, temp2
732 		}
733 #endif
734 	}
735 
736 	if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) {
737 		gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
738 	}
739 }
740 
741 // subtract an 8bit constant value from a dword memory value
gen_sub_direct_byte(void * dest,Bit8s imm)742 [[maybe_unused]] static void gen_sub_direct_byte(void* dest,Bit8s imm) {
743 	gen_sub_direct_word(dest, (Bit32s)imm, 1);
744 }
745 
746 // effective address calculation, destination is dest_reg
747 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
748 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)749 static inline void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
750 	cache_addd( ADD_REG_LSL_IMM(dest_reg, dest_reg, scale_reg, scale) );      // add dest_reg, dest_reg, scale_reg, lsl #(scale)
751 	gen_add_imm(dest_reg, imm);
752 }
753 
754 // effective address calculation, destination is dest_reg
755 // dest_reg is scaled by scale (dest_reg*(2^scale)),
756 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)757 static inline void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
758 	if (scale) {
759 		cache_addd( MOV_REG_LSL_IMM(dest_reg, dest_reg, scale) );      // mov dest_reg, dest_reg, lsl #(scale)
760 	}
761 	gen_add_imm(dest_reg, imm);
762 }
763 
764 // generate a call to a parameterless function
gen_call_function_raw(void * func)765 static void inline gen_call_function_raw(void * func) {
766 #if C_TARGETCPU == ARMV7LE
767 	cache_addd( MOVW(temp1, ((Bit32u)func) & 0xffff) );      // movw temp1, #(func & 0xffff)
768 	cache_addd( MOVT(temp1, ((Bit32u)func) >> 16) );      // movt temp1, #(func >> 16)
769 	cache_addd( BLX_REG(temp1) );      // blx temp1
770 #else
771 	cache_addd( LDR_IMM(temp1, HOST_pc, 4) );      // ldr temp1, [pc, #4]
772 	cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) );      // add lr, pc, #4
773 	cache_addd( BX(temp1) );      // bx temp1
774 	cache_addd((Bit32u)func);      // .int func
775 #endif
776 }
777 
778 // generate a call to a function with paramcount parameters
779 // note: the parameters are loaded in the architecture specific way
780 // using the gen_load_param_ functions below
781 static inline const Bit8u* gen_call_function_setup(void * func, [[maybe_unused]] Bitu paramcount, [[maybe_unused]] bool fastcall=false) {
782 	const Bit8u* proc_addr = cache.pos;
783 	gen_call_function_raw(func);
784 	return proc_addr;
785 }
786 
787 #if (1)
788 // max of 4 parameters in a1-a4
789 
790 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)791 static void inline gen_load_param_imm(Bitu imm,Bitu param) {
792 	gen_mov_dword_to_reg_imm(param, imm);
793 }
794 
795 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)796 static void inline gen_load_param_addr(Bitu addr,Bitu param) {
797 	gen_mov_dword_to_reg_imm(param, addr);
798 }
799 
800 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)801 static void inline gen_load_param_reg(Bitu reg,Bitu param) {
802 	gen_mov_regs(param, reg);
803 }
804 
805 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)806 static void inline gen_load_param_mem(Bitu mem,Bitu param) {
807 	gen_mov_word_to_reg(param, (void *)mem, 1);
808 }
809 #else
810 	other arm abis
811 #endif
812 
813 // jump to an address pointed at by ptr, offset is in imm
814 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
815 	gen_mov_word_to_reg(temp3, ptr, 1);
816 
817 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
818 // (*ptr) should be word aligned
819 	if ((imm & 0x03) == 0) {
820 #endif
821 		if ((imm >= 0) && (imm < 4096)) {
822 			cache_addd( LDR_IMM(temp1, temp3, imm) );      // ldr temp1, [temp3, #imm]
823 		} else {
824 			gen_mov_dword_to_reg_imm(temp2, imm);
825 			cache_addd( LDR_REG_LSL_IMM(temp1, temp3, temp2, 0) );      // ldr temp1, [temp3, temp2]
826 		}
827 #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE))
828 	} else {
829 		gen_add_imm(temp3, imm);
830 
831 		cache_addd( LDRB_IMM(temp1, temp3, 0) );      // ldrb temp1, [temp3]
832 		cache_addd( LDRB_IMM(temp2, temp3, 1) );      // ldrb temp2, [temp3, #1]
833 		cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) );      // orr temp1, temp1, temp2, lsl #8
834 		cache_addd( LDRB_IMM(temp2, temp3, 2) );      // ldrb temp2, [temp3, #2]
835 		cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 16) );      // orr temp1, temp1, temp2, lsl #16
836 		cache_addd( LDRB_IMM(temp2, temp3, 3) );      // ldrb temp2, [temp3, #3]
837 		cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) );      // orr temp1, temp1, temp2, lsl #24
838 	}
839 #endif
840 
841 	cache_addd( BX(temp1) );      // bx temp1
842 }
843 
844 // short conditional jump (+-127 bytes) if register is zero
845 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)846 static const Bit8u* gen_create_branch_on_zero(HostReg reg,bool dword) {
847 	if (dword) {
848 		cache_addd( CMP_IMM(reg, 0, 0) );      // cmp reg, #0
849 	} else {
850 		cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) );      // movs temp1, reg, lsl #16
851 	}
852 	cache_addd( BEQ_FWD(0) );      // beq j
853 	return (cache.pos-4);
854 }
855 
856 // short conditional jump (+-127 bytes) if register is nonzero
857 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)858 static const Bit8u* gen_create_branch_on_nonzero(HostReg reg,bool dword) {
859 	if (dword) {
860 		cache_addd( CMP_IMM(reg, 0, 0) );      // cmp reg, #0
861 	} else {
862 		cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) );      // movs temp1, reg, lsl #16
863 	}
864 	cache_addd( BNE_FWD(0) );      // bne j
865 	return (cache.pos-4);
866 }
867 
868 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(const Bit8u * data)869 static void inline gen_fill_branch(const Bit8u* data) {
870 #if C_DEBUG
871 	Bits len=cache.pos-(data+8);
872 	if (len<0) len=-len;
873 	if (len>0x02000000) LOG_MSG("Big jump %d",len);
874 #endif
875 	Bitu off = (cache.pos - (data+8)) >> 2;
876 	cache_addw((Bit16u)off,data);
877 	cache_addb((Bit8u)(off>>16),data+2);
878 }
879 
880 // conditional jump if register is nonzero
881 // for isdword==true the 32bit of the register are tested
882 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)883 static const Bit8u* gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
884 	if (isdword) {
885 		cache_addd( CMP_IMM(reg, 0, 0) );      // cmp reg, #0
886 	} else {
887 		cache_addd( TST_IMM(reg, 0xff, 0) );      // tst reg, #0xff
888 	}
889 	cache_addd( BNE_FWD(0) );      // bne j
890 	return (cache.pos-4);
891 }
892 
893 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)894 static const Bit8u* gen_create_branch_long_leqzero(HostReg reg) {
895 	cache_addd( CMP_IMM(reg, 0, 0) );      // cmp reg, #0
896 	cache_addd( BLE_FWD(0) );      // ble j
897 	return (cache.pos-4);
898 }
899 
900 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(const Bit8u * data)901 static void inline gen_fill_branch_long(const Bit8u* data) {
902 	gen_fill_branch(data);
903 }
904 
gen_run_code(void)905 static void gen_run_code(void) {
906 #if C_TARGETCPU == ARMV7LE
907 	cache_addd(0xe92d4df0);			// stmfd sp!, {v1-v5,v7,v8,lr}
908 
909 	cache_addd( MOVW(FC_SEGS_ADDR, ((Bit32u)&Segs) & 0xffff) );      // movw FC_SEGS_ADDR, #(&Segs & 0xffff)
910 	cache_addd( MOVT(FC_SEGS_ADDR, ((Bit32u)&Segs) >> 16) );      // movt FC_SEGS_ADDR, #(&Segs >> 16)
911 
912 	cache_addd( MOVW(FC_REGS_ADDR, ((Bit32u)&cpu_regs) & 0xffff) );      // movw FC_REGS_ADDR, #(&cpu_regs & 0xffff)
913 	cache_addd( MOVT(FC_REGS_ADDR, ((Bit32u)&cpu_regs) >> 16) );      // movt FC_REGS_ADDR, #(&cpu_regs >> 16)
914 
915 	cache_addd( MOVW(readdata_addr, ((Bitu)&core_dynrec.readdata) & 0xffff) );      // movw readdata_addr, #(&core_dynrec.readdata & 0xffff)
916 	cache_addd( MOVT(readdata_addr, ((Bitu)&core_dynrec.readdata) >> 16) );      // movt readdata_addr, #(&core_dynrec.readdata >> 16)
917 
918 	cache_addd( BX(HOST_r0) );			// bx r0
919 #else
920 	const Bit8u *pos1, *pos2, *pos3;
921 
922 	cache_addd(0xe92d4df0);			// stmfd sp!, {v1-v5,v7,v8,lr}
923 
924 	pos1 = cache.pos;
925 	cache_addd( 0 );
926 	pos2 = cache.pos;
927 	cache_addd( 0 );
928 	pos3 = cache.pos;
929 	cache_addd( 0 );
930 
931 	cache_addd( BX(HOST_r0) );			// bx r0
932 
933 	// align cache.pos to 32 bytes
934 	if ((((Bitu)cache.pos) & 0x1f) != 0) {
935 		cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
936 	}
937 
938 	cache_addd(LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)),pos1);      // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
939 	cache_addd((Bit32u)&Segs);      // address of "Segs"
940 
941 	cache_addd(LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)),pos2);      // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
942 	cache_addd((Bit32u)&cpu_regs);  // address of "cpu_regs"
943 
944 	cache_addd(LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)),pos3);      // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
945 	cache_addd((Bit32u)&core_dynrec.readdata);  // address of "core_dynrec.readdata"
946 
947 	// align cache.pos to 32 bytes
948 	if ((((Bitu)cache.pos) & 0x1f) != 0) {
949 		cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
950 	}
951 #endif
952 }
953 
954 // return from a function
gen_return_function(void)955 static void gen_return_function(void) {
956 	cache_addd(0xe8bd8df0);			// ldmfd sp!, {v1-v5,v7,v8,pc}
957 }
958 
959 #ifdef DRC_FLAGS_INVALIDATION
960 
961 // called when a call to a function can be replaced by a
962 // call to a simpler function
gen_fill_function_ptr(const Bit8u * pos,void * fct_ptr,Bitu flags_type)963 static void gen_fill_function_ptr(const Bit8u * pos,void* fct_ptr,Bitu flags_type) {
964 #ifdef DRC_FLAGS_INVALIDATION_DCODE
965 	// try to avoid function calls but rather directly fill in code
966 	switch (flags_type) {
967 		case t_ADDb:
968 		case t_ADDw:
969 		case t_ADDd:
970 			cache_addd(NOP,pos+0);				// nop
971 			cache_addd(NOP,pos+4);				// nop
972 			cache_addd(ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0),pos+8);	// add FC_RETOP, a1, a2
973 #if C_TARGETCPU != ARMV7LE
974 			cache_addd(NOP,pos+12);				// nop
975 #endif
976 			break;
977 		case t_ORb:
978 		case t_ORw:
979 		case t_ORd:
980 			cache_addd(NOP,pos+0);				// nop
981 			cache_addd(NOP,pos+4);				// nop
982 			cache_addd(ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0),pos+8);	// orr FC_RETOP, a1, a2
983 #if C_TARGETCPU != ARMV7LE
984 			cache_addd(NOP,pos+12);				// nop
985 #endif
986 			break;
987 		case t_ANDb:
988 		case t_ANDw:
989 		case t_ANDd:
990 			cache_addd(NOP,pos+0);				// nop
991 			cache_addd(NOP,pos+4);				// nop
992 			cache_addd(AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0),pos+8);	// and FC_RETOP, a1, a2
993 #if C_TARGETCPU != ARMV7LE
994 			cache_addd(NOP,pos+12);				// nop
995 #endif
996 			break;
997 		case t_SUBb:
998 		case t_SUBw:
999 		case t_SUBd:
1000 			cache_addd(NOP,pos+0);				// nop
1001 			cache_addd(NOP,pos+4);				// nop
1002 			cache_addd(SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0),pos+8);	// sub FC_RETOP, a1, a2
1003 #if C_TARGETCPU != ARMV7LE
1004 			cache_addd(NOP,pos+12);				// nop
1005 #endif
1006 			break;
1007 		case t_XORb:
1008 		case t_XORw:
1009 		case t_XORd:
1010 			cache_addd(NOP,pos+0);				// nop
1011 			cache_addd(NOP,pos+4);				// nop
1012 			cache_addd(EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0),pos+8);	// eor FC_RETOP, a1, a2
1013 #if C_TARGETCPU != ARMV7LE
1014 			cache_addd(NOP,pos+12);				// nop
1015 #endif
1016 			break;
1017 		case t_CMPb:
1018 		case t_CMPw:
1019 		case t_CMPd:
1020 		case t_TESTb:
1021 		case t_TESTw:
1022 		case t_TESTd:
1023 			cache_addd(NOP,pos+0);				// nop
1024 			cache_addd(NOP,pos+4);				// nop
1025 			cache_addd(NOP,pos+8);				// nop
1026 #if C_TARGETCPU != ARMV7LE
1027 			cache_addd(NOP,pos+12);				// nop
1028 #endif
1029 			break;
1030 		case t_INCb:
1031 		case t_INCw:
1032 		case t_INCd:
1033 			cache_addd(NOP,pos+0);				// nop
1034 			cache_addd(NOP,pos+4);				// nop
1035 			cache_addd(ADD_IMM(FC_RETOP, HOST_a1, 1, 0),pos+8);	// add FC_RETOP, a1, #1
1036 #if C_TARGETCPU != ARMV7LE
1037 			cache_addd(NOP,pos+12);				// nop
1038 #endif
1039 			break;
1040 		case t_DECb:
1041 		case t_DECw:
1042 		case t_DECd:
1043 			cache_addd(NOP,pos+0);				// nop
1044 			cache_addd(NOP,pos+4);				// nop
1045 			cache_addd(SUB_IMM(FC_RETOP, HOST_a1, 1, 0),pos+8);	// sub FC_RETOP, a1, #1
1046 #if C_TARGETCPU != ARMV7LE
1047 			cache_addd(NOP,pos+12);				// nop
1048 #endif
1049 			break;
1050 		case t_SHLb:
1051 		case t_SHLw:
1052 		case t_SHLd:
1053 			cache_addd(NOP,pos+0);				// nop
1054 			cache_addd(NOP,pos+4);				// nop
1055 			cache_addd(MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, lsl a2
1056 #if C_TARGETCPU != ARMV7LE
1057 			cache_addd(NOP,pos+12);				// nop
1058 #endif
1059 			break;
1060 		case t_SHRb:
1061 			cache_addd(NOP,pos+0);				// nop
1062 #if C_TARGETCPU == ARMV7LE
1063 			cache_addd(BFC(HOST_a1, 8, 24),pos+4);	// bfc a1, 8, 24
1064 			cache_addd(MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, lsr a2
1065 #else
1066 			cache_addd(NOP,pos+4);				// nop
1067 			cache_addd(AND_IMM(FC_RETOP, HOST_a1, 0xff, 0),pos+8);				// and FC_RETOP, a1, #0xff
1068 			cache_addd(MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);	// mov FC_RETOP, FC_RETOP, lsr a2
1069 #endif
1070 			break;
1071 		case t_SHRw:
1072 			cache_addd(NOP,pos+0);				// nop
1073 #if C_TARGETCPU == ARMV7LE
1074 			cache_addd(BFC(HOST_a1, 16, 16),pos+4);	// bfc a1, 16, 16
1075 			cache_addd(MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, lsr a2
1076 #else
1077 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16),pos+4);			// mov FC_RETOP, a1, lsl #16
1078 			cache_addd(MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16),pos+8);			// mov FC_RETOP, FC_RETOP, lsr #16
1079 			cache_addd(MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);	// mov FC_RETOP, FC_RETOP, lsr a2
1080 #endif
1081 			break;
1082 		case t_SHRd:
1083 			cache_addd(NOP,pos+0);				// nop
1084 			cache_addd(NOP,pos+4);				// nop
1085 			cache_addd(MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, lsr a2
1086 #if C_TARGETCPU != ARMV7LE
1087 			cache_addd(NOP,pos+12);				// nop
1088 #endif
1089 			break;
1090 		case t_SARb:
1091 			cache_addd(NOP,pos+0);				// nop
1092 #if C_TARGETCPU == ARMV7LE
1093 			cache_addd(SXTB(FC_RETOP, HOST_a1, 0),pos+4);					// sxtb FC_RETOP, a1
1094 			cache_addd(MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+8);	// mov FC_RETOP, FC_RETOP, asr a2
1095 #else
1096 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24),pos+4);			// mov FC_RETOP, a1, lsl #24
1097 			cache_addd(MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24),pos+8);			// mov FC_RETOP, FC_RETOP, asr #24
1098 			cache_addd(MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);	// mov FC_RETOP, FC_RETOP, asr a2
1099 #endif
1100 			break;
1101 		case t_SARw:
1102 			cache_addd(NOP,pos+0);				// nop
1103 #if C_TARGETCPU == ARMV7LE
1104 			cache_addd(SXTH(FC_RETOP, HOST_a1, 0),pos+4);					// sxth FC_RETOP, a1
1105 			cache_addd(MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+8);	// mov FC_RETOP, FC_RETOP, asr a2
1106 #else
1107 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16),pos+4);			// mov FC_RETOP, a1, lsl #16
1108 			cache_addd(MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16),pos+8);			// mov FC_RETOP, FC_RETOP, asr #16
1109 			cache_addd(MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);	// mov FC_RETOP, FC_RETOP, asr a2
1110 #endif
1111 			break;
1112 		case t_SARd:
1113 			cache_addd(NOP,pos+0);				// nop
1114 			cache_addd(NOP,pos+4);				// nop
1115 			cache_addd(MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, asr a2
1116 #if C_TARGETCPU != ARMV7LE
1117 			cache_addd(NOP,pos+12);				// nop
1118 #endif
1119 			break;
1120 		case t_RORb:
1121 #if C_TARGETCPU == ARMV7LE
1122 			cache_addd(BFI(HOST_a1, HOST_a1, 8, 8),pos+0);						// bfi a1, a1, 8, 8
1123 			cache_addd(BFI(HOST_a1, HOST_a1, 16, 16),pos+4);				// bfi a1, a1, 16, 16
1124 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, ror a2
1125 #else
1126 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24),pos+0);					// mov FC_RETOP, a1, lsl #24
1127 			cache_addd(ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8),pos+4);		// orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
1128 			cache_addd(ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16),pos+8);	// orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
1129 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);		// mov FC_RETOP, FC_RETOP, ror a2
1130 #endif
1131 			break;
1132 		case t_RORw:
1133 			cache_addd(NOP,pos+0);				// nop
1134 #if C_TARGETCPU == ARMV7LE
1135 			cache_addd(BFI(HOST_a1, HOST_a1, 16, 16),pos+4);				// bfi a1, a1, 16, 16
1136 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, ror a2
1137 #else
1138 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16),pos+4);				// mov FC_RETOP, a1, lsl #16
1139 			cache_addd(ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16),pos+8);	// orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
1140 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);		// mov FC_RETOP, FC_RETOP, ror a2
1141 #endif
1142 			break;
1143 		case t_RORd:
1144 			cache_addd(NOP,pos+0);				// nop
1145 			cache_addd(NOP,pos+4);				// nop
1146 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, ror a2
1147 #if C_TARGETCPU != ARMV7LE
1148 			cache_addd(NOP,pos+12);				// nop
1149 #endif
1150 			break;
1151 		case t_ROLw:
1152 #if C_TARGETCPU == ARMV7LE
1153 			cache_addd(BFI(HOST_a1, HOST_a1, 16, 16),pos+0);					// bfi a1, a1, 16, 16
1154 			cache_addd(RSB_IMM(HOST_a2, HOST_a2, 32, 0),pos+4);				// rsb a2, a2, #32
1155 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, ror a2
1156 #else
1157 			cache_addd(MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16),pos+0);					// mov FC_RETOP, a1, lsl #16
1158 			cache_addd(RSB_IMM(HOST_a2, HOST_a2, 32, 0),pos+4);						// rsb a2, a2, #32
1159 			cache_addd(ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16),pos+8);	// orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
1160 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2),pos+12);		// mov FC_RETOP, FC_RETOP, ror a2
1161 #endif
1162 			break;
1163 		case t_ROLd:
1164 			cache_addd(NOP,pos+0);				// nop
1165 #if C_TARGETCPU == ARMV7LE
1166 			cache_addd(RSB_IMM(HOST_a2, HOST_a2, 32, 0),pos+4);				// rsb a2, a2, #32
1167 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+8);	// mov FC_RETOP, a1, ror a2
1168 #else
1169 			cache_addd(NOP,pos+4);				// nop
1170 			cache_addd(RSB_IMM(HOST_a2, HOST_a2, 32, 0),pos+8);				// rsb a2, a2, #32
1171 			cache_addd(MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2),pos+12);	// mov FC_RETOP, a1, ror a2
1172 #endif
1173 			break;
1174 		case t_NEGb:
1175 		case t_NEGw:
1176 		case t_NEGd:
1177 			cache_addd(NOP,pos+0);				// nop
1178 			cache_addd(NOP,pos+4);				// nop
1179 			cache_addd(RSB_IMM(FC_RETOP, HOST_a1, 0, 0),pos+8);	// rsb FC_RETOP, a1, #0
1180 #if C_TARGETCPU != ARMV7LE
1181 			cache_addd(NOP,pos+12);				// nop
1182 #endif
1183 			break;
1184 		default:
1185 #if C_TARGETCPU == ARMV7LE
1186 			cache_addd(MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff),pos+0);      // movw temp1, #(fct_ptr & 0xffff)
1187 			cache_addd(MOVT(temp1, ((Bit32u)fct_ptr) >> 16),pos+4);      // movt temp1, #(fct_ptr >> 16)
1188 #else
1189 			cache_addd((Bit32u)fct_ptr,pos+12);		// simple_func
1190 #endif
1191 			break;
1192 
1193 	}
1194 #else
1195 #if C_TARGETCPU == ARMV7LE
1196 	cache_addd(MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff),pos+0);      // movw temp1, #(fct_ptr & 0xffff)
1197 	cache_addd(MOVT(temp1, ((Bit32u)fct_ptr) >> 16),pos+4);      // movt temp1, #(fct_ptr >> 16)
1198 #else
1199 	cache_addd((Bit32u)fct_ptr,pos+12);		// simple_func
1200 #endif
1201 #endif
1202 }
1203 #endif
1204 
cache_block_before_close(void)1205 static void cache_block_before_close(void) { }
1206 
1207 #ifdef DRC_USE_SEGS_ADDR
1208 
1209 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
1210 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)1211 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
1212 	cache_addd( LDRH_IMM(dest_reg, FC_SEGS_ADDR, index) );      // ldrh dest_reg, [FC_SEGS_ADDR, #index]
1213 }
1214 
1215 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)1216 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
1217 	cache_addd( LDR_IMM(dest_reg, FC_SEGS_ADDR, index) );      // ldr dest_reg, [FC_SEGS_ADDR, #index]
1218 }
1219 
1220 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)1221 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
1222 	cache_addd( LDR_IMM(temp1, FC_SEGS_ADDR, index) );      // ldr temp1, [FC_SEGS_ADDR, #index]
1223 	cache_addd( ADD_REG_LSL_IMM(reg, reg, temp1, 0) );      // add reg, reg, temp1
1224 }
1225 
1226 #endif
1227 
1228 #ifdef DRC_USE_REGS_ADDR
1229 
1230 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
1231 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)1232 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
1233 	cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldrh dest_reg, [FC_REGS_ADDR, #index]
1234 }
1235 
1236 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)1237 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
1238 	cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldr dest_reg, [FC_REGS_ADDR, #index]
1239 }
1240 
1241 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1242 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)1243 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
1244 	if (dword) {
1245 		cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldr dest_reg, [FC_REGS_ADDR, #index]
1246 	} else {
1247 		cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldrh dest_reg, [FC_REGS_ADDR, #index]
1248 	}
1249 }
1250 
1251 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
1252 // the upper 24bit of the destination register can be destroyed
1253 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
1254 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)1255 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
1256 	cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldrb dest_reg, [FC_REGS_ADDR, #index]
1257 }
1258 
1259 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
1260 // the upper 24bit of the destination register can be destroyed
1261 // this function can use FC_OP1/FC_OP2 as dest_reg which are
1262 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)1263 static void gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
1264 	cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) );      // ldrb dest_reg, [FC_REGS_ADDR, #index]
1265 }
1266 
1267 
1268 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)1269 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
1270 	cache_addd( LDR_IMM(temp2, FC_REGS_ADDR, index) );      // ldr temp2, [FC_REGS_ADDR, #index]
1271 	cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) );      // add reg, reg, temp2
1272 }
1273 
1274 
1275 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)1276 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
1277 	cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) );      // strh src_reg, [FC_REGS_ADDR, #index]
1278 }
1279 
1280 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)1281 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
1282 	cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) );      // str src_reg, [FC_REGS_ADDR, #index]
1283 }
1284 
1285 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)1286 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
1287 	if (dword) {
1288 		cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) );      // str src_reg, [FC_REGS_ADDR, #index]
1289 	} else {
1290 		cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) );      // strh src_reg, [FC_REGS_ADDR, #index]
1291 	}
1292 }
1293 
1294 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)1295 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
1296 	cache_addd( STRB_IMM(src_reg, FC_REGS_ADDR, index) );      // strb src_reg, [FC_REGS_ADDR, #index]
1297 }
1298 
1299 #endif
1300