1 /*
2  *  Copyright (C) 2002-2015  The DOSBox Team
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  */
18 
19 
20 
21 /* ARMv4 (little endian) backend by M-HT (thumb version) */
22 
23 
24 // temporary "lo" registers
25 #define templo1 HOST_v3
26 #define templo2 HOST_v4
27 #define templo3 HOST_v2
28 
29 // register that holds function return values
30 #define FC_RETOP HOST_a1
31 
32 // register used for address calculations,
33 #define FC_ADDR HOST_v1			// has to be saved across calls, see DRC_PROTECT_ADDR_REG
34 
35 // register that holds the first parameter
36 #define FC_OP1 HOST_a1
37 
38 // register that holds the second parameter
39 #define FC_OP2 HOST_a2
40 
41 // special register that holds the third parameter for _R3 calls (byte accessible)
42 #define FC_OP3 HOST_a4
43 
44 // register that holds byte-accessible temporary values
45 #define FC_TMP_BA1 HOST_a1
46 
47 // register that holds byte-accessible temporary values
48 #define FC_TMP_BA2 HOST_a2
49 
50 // temporary register for LEA
51 #define TEMP_REG_DRC HOST_a4
52 
53 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
54 #define FC_REGS_ADDR HOST_v7
55 
56 // used to hold the address of "Segs" - preferably filled in function gen_run_code
57 #define FC_SEGS_ADDR HOST_v8
58 
59 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
60 #define readdata_addr HOST_v5
61 
62 
63 // instruction encodings
64 
65 // move
66 // mov dst, #imm		@	0 <= imm <= 255
67 #define MOV_IMM(dst, imm) (0x2000 + ((dst) << 8) + (imm) )
68 // mov dst, src
69 #define MOV_REG(dst, src) ADD_IMM3(dst, src, 0)
70 // mov dst, src
71 #define MOV_LO_HI(dst, src) (0x4640 + (dst) + (((src) - HOST_r8) << 3) )
72 // mov dst, src
73 #define MOV_HI_LO(dst, src) (0x4680 + ((dst) - HOST_r8) + ((src) << 3) )
74 
75 // arithmetic
76 // add dst, src, #imm		@	0 <= imm <= 7
77 #define ADD_IMM3(dst, src, imm) (0x1c00 + (dst) + ((src) << 3) + ((imm) << 6) )
78 // add dst, #imm		@	0 <= imm <= 255
79 #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) )
80 // add dst, src1, src2
81 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) )
82 // add dst, pc, #imm		@	0 <= imm < 1024	&	imm mod 4 = 0
83 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) )
84 // sub dst, src1, src2
85 #define SUB_REG(dst, src1, src2) (0x1a00 + (dst) + ((src1) << 3) + ((src2) << 6) )
86 // sub dst, src, #imm		@	0 <= imm <= 7
87 #define SUB_IMM3(dst, src, imm) (0x1e00 + (dst) + ((src) << 3) + ((imm) << 6) )
88 // sub dst, #imm		@	0 <= imm <= 255
89 #define SUB_IMM8(dst, imm) (0x3800 + ((dst) << 8) + (imm) )
90 // neg dst, src
91 #define NEG(dst, src) (0x4240 + (dst) + ((src) << 3) )
92 // cmp dst, #imm		@	0 <= imm <= 255
93 #define CMP_IMM(dst, imm) (0x2800 + ((dst) << 8) + (imm) )
94 // nop
95 #define NOP (0x46c0)
96 
97 // logical
98 // and dst, src
99 #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) )
100 // bic dst, src
101 #define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) )
102 // eor dst, src
103 #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) )
104 // orr dst, src
105 #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) )
106 // mvn dst, src
107 #define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) )
108 
109 // shift/rotate
110 // lsl dst, src, #imm
111 #define LSL_IMM(dst, src, imm) (0x0000 + (dst) + ((src) << 3) + ((imm) << 6) )
112 // lsl dst, reg
113 #define LSL_REG(dst, reg) (0x4080 + (dst) + ((reg) << 3) )
114 // lsr dst, src, #imm
115 #define LSR_IMM(dst, src, imm) (0x0800 + (dst) + ((src) << 3) + ((imm) << 6) )
116 // lsr dst, reg
117 #define LSR_REG(dst, reg) (0x40c0 + (dst) + ((reg) << 3) )
118 // asr dst, src, #imm
119 #define ASR_IMM(dst, src, imm) (0x1000 + (dst) + ((src) << 3) + ((imm) << 6) )
120 // asr dst, reg
121 #define ASR_REG(dst, reg) (0x4100 + (dst) + ((reg) << 3) )
122 // ror dst, reg
123 #define ROR_REG(dst, reg) (0x41c0 + (dst) + ((reg) << 3) )
124 
125 // load
126 // ldr reg, [addr, #imm]		@	0 <= imm < 128	&	imm mod 4 = 0
127 #define LDR_IMM(reg, addr, imm) (0x6800 + (reg) + ((addr) << 3) + ((imm) << 4) )
128 // ldrh reg, [addr, #imm]		@	0 <= imm < 64	&	imm mod 2 = 0
129 #define LDRH_IMM(reg, addr, imm) (0x8800 + (reg) + ((addr) << 3) + ((imm) << 5) )
130 // ldrb reg, [addr, #imm]		@	0 <= imm < 32
131 #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) )
132 // ldr reg, [pc, #imm]		@	0 <= imm < 1024	&	imm mod 4 = 0
133 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) )
134 // ldr reg, [addr1, addr2]
135 #define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) )
136 
137 // store
138 // str reg, [addr, #imm]		@	0 <= imm < 128	&	imm mod 4 = 0
139 #define STR_IMM(reg, addr, imm) (0x6000 + (reg) + ((addr) << 3) + ((imm) << 4) )
140 // strh reg, [addr, #imm]		@	0 <= imm < 64	&	imm mod 2 = 0
141 #define STRH_IMM(reg, addr, imm) (0x8000 + (reg) + ((addr) << 3) + ((imm) << 5) )
142 // strb reg, [addr, #imm]		@	0 <= imm < 32
143 #define STRB_IMM(reg, addr, imm) (0x7000 + (reg) + ((addr) << 3) + ((imm) << 6) )
144 
145 // branch
146 // beq pc+imm		@	0 <= imm < 256	&	imm mod 2 = 0
147 #define BEQ_FWD(imm) (0xd000 + ((imm) >> 1) )
148 // bne pc+imm		@	0 <= imm < 256	&	imm mod 2 = 0
149 #define BNE_FWD(imm) (0xd100 + ((imm) >> 1) )
150 // bgt pc+imm		@	0 <= imm < 256	&	imm mod 2 = 0
151 #define BGT_FWD(imm) (0xdc00 + ((imm) >> 1) )
152 // b pc+imm		@	0 <= imm < 2048	&	imm mod 2 = 0
153 #define B_FWD(imm) (0xe000 + ((imm) >> 1) )
154 // bx reg
155 #define BX(reg) (0x4700 + ((reg) << 3) )
156 
157 
158 // arm instructions
159 
160 // arithmetic
161 // add dst, src, #(imm ror rimm)		@	0 <= imm <= 255	&	rimm mod 2 = 0
162 #define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
163 
164 // load
165 // ldr reg, [addr, #imm]		@	0 <= imm < 4096
166 #define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
167 
168 // store
169 // str reg, [addr, #-(imm)]!		@	0 <= imm < 4096
170 #define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) )
171 
172 // branch
173 // bx reg
174 #define ARM_BX(reg) (0xe12fff10 + (reg) )
175 
176 
177 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)178 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
179 	if(reg_src == reg_dst) return;
180 	cache_addw( MOV_REG(reg_dst, reg_src) );      // mov reg_dst, reg_src
181 }
182 
183 // helper function
val_single_shift(Bit32u value,Bit32u * val_shift)184 static bool val_single_shift(Bit32u value, Bit32u *val_shift) {
185 	Bit32u shift;
186 
187 	if (GCC_UNLIKELY(value == 0)) {
188 		*val_shift = 0;
189 		return true;
190 	}
191 
192 	shift = 0;
193 	while ((value & 1) == 0) {
194 		value>>=1;
195 		shift+=1;
196 	}
197 
198 	if ((value >> 8) != 0) return false;
199 
200 	*val_shift = shift;
201 	return true;
202 }
203 
204 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)205 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
206 	Bit32u scale;
207 
208 	if (imm < 256) {
209 		cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #imm
210 	} else if ((~imm) < 256) {
211 		cache_addw( MOV_IMM(dest_reg, ~imm) );      // mov dest_reg, #(~imm)
212 		cache_addw( MVN(dest_reg, dest_reg) );      // mvn dest_reg, dest_reg
213 	} else if (val_single_shift(imm, &scale)) {
214 		cache_addw( MOV_IMM(dest_reg, imm >> scale) );      // mov dest_reg, #(imm >> scale)
215 		cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #scale
216 	} else {
217 		Bit32u diff;
218 
219 		diff = imm - ((Bit32u)cache.pos+4);
220 
221 		if ((diff < 1024) && ((imm & 0x03) == 0)) {
222 			if (((Bit32u)cache.pos & 0x03) == 0) {
223 				cache_addw( ADD_LO_PC_IMM(dest_reg, diff) );      // add dest_reg, pc, #(diff >> 2)
224 			} else {
225 				cache_addw( NOP );      // nop
226 				cache_addw( ADD_LO_PC_IMM(dest_reg, diff - 2) );      // add dest_reg, pc, #((diff - 2) >> 2)
227 			}
228 		} else {
229 			if (((Bit32u)cache.pos & 0x03) == 0) {
230 				cache_addw( LDR_PC_IMM(dest_reg, 0) );      // ldr dest_reg, [pc, #0]
231 				cache_addw( B_FWD(2) );      // b next_code (pc+2)
232 				cache_addd(imm);      // .int imm
233 				// next_code:
234 			} else {
235 				cache_addw( LDR_PC_IMM(dest_reg, 4) );      // ldr dest_reg, [pc, #4]
236 				cache_addw( B_FWD(4) );      // b next_code (pc+4)
237 				cache_addw( NOP );      // nop
238 				cache_addd(imm);      // .int imm
239 				// next_code:
240 			}
241 		}
242 	}
243 }
244 
245 // helper function
gen_mov_memval_to_reg_helper(HostReg dest_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)246 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
247 	switch (size) {
248 		case 4:
249 #if !defined(C_UNALIGNED_MEMORY)
250 			if ((data & 3) == 0)
251 #endif
252 			{
253 				if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
254 					cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
255 					cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) );      // ldr dest_reg, [templo2, #(data - addr_data)]
256 					return true;
257 				}
258 			}
259 			break;
260 		case 2:
261 #if !defined(C_UNALIGNED_MEMORY)
262 			if ((data & 1) == 0)
263 #endif
264 			{
265 				if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
266 					cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
267 					cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) );      // ldrh dest_reg, [templo2, #(data - addr_data)]
268 					return true;
269 				}
270 			}
271 			break;
272 		case 1:
273 			if ((data >= addr_data) && (data < addr_data + 32)) {
274 				cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
275 				cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) );      // ldrb dest_reg, [templo2, #(data - addr_data)]
276 				return true;
277 			}
278 		default:
279 			break;
280 	}
281 	return false;
282 }
283 
284 // helper function
gen_mov_memval_to_reg(HostReg dest_reg,void * data,Bitu size)285 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
286 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
287 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
288 	if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
289 	return false;
290 }
291 
292 // helper function for gen_mov_word_to_reg
gen_mov_word_to_reg_helper(HostReg dest_reg,void * data,bool dword,HostReg data_reg)293 static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
294 	// alignment....
295 	if (dword) {
296 #if !defined(C_UNALIGNED_MEMORY)
297 		if ((Bit32u)data & 3) {
298 			if ( ((Bit32u)data & 3) == 2 ) {
299 				cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
300 				cache_addw( LDRH_IMM(templo1, data_reg, 2) );      // ldrh templo1, [data_reg, #2]
301 				cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
302 				cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
303 			} else {
304 				cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
305 				cache_addw( ADD_IMM3(templo1, data_reg, 1) );      // add templo1, data_reg, #1
306 				cache_addw( LDRH_IMM(templo1, templo1, 0) );      // ldrh templo1, [templo1]
307 				cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
308 				cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
309 				cache_addw( LDRB_IMM(templo1, data_reg, 3) );      // ldrb templo1, [data_reg, #3]
310 				cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
311 				cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
312 			}
313 		} else
314 #endif
315 		{
316 			cache_addw( LDR_IMM(dest_reg, data_reg, 0) );      // ldr dest_reg, [data_reg]
317 		}
318 	} else {
319 #if !defined(C_UNALIGNED_MEMORY)
320 		if ((Bit32u)data & 1) {
321 			cache_addw( LDRB_IMM(dest_reg, data_reg, 0) );      // ldrb dest_reg, [data_reg]
322 			cache_addw( LDRB_IMM(templo1, data_reg, 1) );      // ldrb templo1, [data_reg, #1]
323 			cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
324 			cache_addw( ORR(dest_reg, templo1) );      // orr dest_reg, templo1
325 		} else
326 #endif
327 		{
328 			cache_addw( LDRH_IMM(dest_reg, data_reg, 0) );      // ldrh dest_reg, [data_reg]
329 		}
330 	}
331 }
332 
333 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
334 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)335 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
336 	if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
337 		gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
338 		gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
339 	}
340 }
341 
342 // move a 16bit constant value into dest_reg
343 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)344 static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
345 	gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
346 }
347 
348 // helper function
gen_mov_memval_from_reg_helper(HostReg src_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)349 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
350 	switch (size) {
351 		case 4:
352 #if !defined(C_UNALIGNED_MEMORY)
353 			if ((data & 3) == 0)
354 #endif
355 			{
356 				if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
357 					cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
358 					cache_addw( STR_IMM(src_reg, templo2, data - addr_data) );      // str src_reg, [templo2, #(data - addr_data)]
359 					return true;
360 				}
361 			}
362 			break;
363 		case 2:
364 #if !defined(C_UNALIGNED_MEMORY)
365 			if ((data & 1) == 0)
366 #endif
367 			{
368 				if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
369 					cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
370 					cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) );      // strh src_reg, [templo2, #(data - addr_data)]
371 					return true;
372 				}
373 			}
374 			break;
375 		case 1:
376 			if ((data >= addr_data) && (data < addr_data + 32)) {
377 				cache_addw( MOV_LO_HI(templo2, addr_reg) );      // mov templo2, addr_reg
378 				cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) );      // strb src_reg, [templo2, #(data - addr_data)]
379 				return true;
380 			}
381 		default:
382 			break;
383 	}
384 	return false;
385 }
386 
387 // helper function
gen_mov_memval_from_reg(HostReg src_reg,void * dest,Bitu size)388 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
389 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
390 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
391 	if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
392 	return false;
393 }
394 
395 // helper function for gen_mov_word_from_reg
gen_mov_word_from_reg_helper(HostReg src_reg,void * dest,bool dword,HostReg data_reg)396 static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
397 	// alignment....
398 	if (dword) {
399 #if !defined(C_UNALIGNED_MEMORY)
400 		if ((Bit32u)dest & 3) {
401 			if ( ((Bit32u)dest & 3) == 2 ) {
402 				cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
403 				cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
404 				cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
405 				cache_addw( STRH_IMM(templo1, data_reg, 2) );      // strh templo1, [data_reg, #2]
406 			} else {
407 				cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
408 				cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
409 				cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
410 				cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
411 				cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
412 				cache_addw( LSR_IMM(templo1, templo1, 16) );      // lsr templo1, templo1, #16
413 				cache_addw( STRB_IMM(templo1, data_reg, 2) );      // strb templo1, [data_reg, #2]
414 				cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
415 				cache_addw( LSR_IMM(templo1, templo1, 24) );      // lsr templo1, templo1, #24
416 				cache_addw( STRB_IMM(templo1, data_reg, 3) );      // strb templo1, [data_reg, #3]
417 			}
418 		} else
419 #endif
420 		{
421 			cache_addw( STR_IMM(src_reg, data_reg, 0) );      // str src_reg, [data_reg]
422 		}
423 	} else {
424 #if !defined(C_UNALIGNED_MEMORY)
425 		if ((Bit32u)dest & 1) {
426 			cache_addw( STRB_IMM(src_reg, data_reg, 0) );      // strb src_reg, [data_reg]
427 			cache_addw( MOV_REG(templo1, src_reg) );      // mov templo1, src_reg
428 			cache_addw( LSR_IMM(templo1, templo1, 8) );      // lsr templo1, templo1, #8
429 			cache_addw( STRB_IMM(templo1, data_reg, 1) );      // strb templo1, [data_reg, #1]
430 		} else
431 #endif
432 		{
433 			cache_addw( STRH_IMM(src_reg, data_reg, 0) );      // strh src_reg, [data_reg]
434 		}
435 	}
436 }
437 
438 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)439 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
440 	if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
441 		gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
442 		gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
443 	}
444 }
445 
446 // move an 8bit value from memory into dest_reg
447 // the upper 24bit of the destination register can be destroyed
448 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
449 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)450 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
451 	if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
452 		gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
453 		cache_addw( LDRB_IMM(dest_reg, templo1, 0) );      // ldrb dest_reg, [templo1]
454 	}
455 }
456 
457 // move an 8bit value from memory into dest_reg
458 // the upper 24bit of the destination register can be destroyed
459 // this function can use FC_OP1/FC_OP2 as dest_reg which are
460 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)461 static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
462 	gen_mov_byte_to_reg_low(dest_reg, data);
463 }
464 
465 // move an 8bit constant value into dest_reg
466 // the upper 24bit of the destination register can be destroyed
467 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
468 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)469 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
470 	cache_addw( MOV_IMM(dest_reg, imm) );      // mov dest_reg, #(imm)
471 }
472 
473 // move an 8bit constant value into dest_reg
474 // the upper 24bit of the destination register can be destroyed
475 // this function can use FC_OP1/FC_OP2 as dest_reg which are
476 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)477 static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
478 	gen_mov_byte_to_reg_low_imm(dest_reg, imm);
479 }
480 
481 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)482 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
483 	if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
484 		gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
485 		cache_addw( STRB_IMM(src_reg, templo1, 0) );      // strb src_reg, [templo1]
486 	}
487 }
488 
489 
490 
491 // convert an 8bit word to a 32bit dword
492 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)493 static void gen_extend_byte(bool sign,HostReg reg) {
494 	cache_addw( LSL_IMM(reg, reg, 24) );      // lsl reg, reg, #24
495 
496 	if (sign) {
497 		cache_addw( ASR_IMM(reg, reg, 24) );      // asr reg, reg, #24
498 	} else {
499 		cache_addw( LSR_IMM(reg, reg, 24) );      // lsr reg, reg, #24
500 	}
501 }
502 
503 // convert a 16bit word to a 32bit dword
504 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)505 static void gen_extend_word(bool sign,HostReg reg) {
506 	cache_addw( LSL_IMM(reg, reg, 16) );      // lsl reg, reg, #16
507 
508 	if (sign) {
509 		cache_addw( ASR_IMM(reg, reg, 16) );      // asr reg, reg, #16
510 	} else {
511 		cache_addw( LSR_IMM(reg, reg, 16) );      // lsr reg, reg, #16
512 	}
513 }
514 
515 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)516 static void gen_add(HostReg reg,void* op) {
517 	gen_mov_word_to_reg(templo3, op, 1);
518 	cache_addw( ADD_REG(reg, reg, templo3) );      // add reg, reg, templo3
519 }
520 
521 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)522 static void gen_add_imm(HostReg reg,Bit32u imm) {
523 	Bit32u imm2, scale;
524 
525 	if(!imm) return;
526 
527 	imm2 = (Bit32u) (-((Bit32s)imm));
528 
529 	if (imm <= 255) {
530 		cache_addw( ADD_IMM8(reg, imm) );      // add reg, #imm
531 	} else if (imm2 <= 255) {
532 		cache_addw( SUB_IMM8(reg, imm2) );      // sub reg, #(-imm)
533 	} else {
534 		if (val_single_shift(imm2, &scale)) {
535 			cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
536 			if (scale) {
537 				cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
538 			}
539 			cache_addw( SUB_REG(reg, reg, templo1) );      // sub reg, reg, templo1
540 		} else {
541 			gen_mov_dword_to_reg_imm(templo1, imm);
542 			cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
543 		}
544 	}
545 }
546 
547 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)548 static void gen_and_imm(HostReg reg,Bit32u imm) {
549 	Bit32u imm2, scale;
550 
551 	imm2 = ~imm;
552 	if(!imm2) return;
553 
554 	if (!imm) {
555 		cache_addw( MOV_IMM(reg, 0) );      // mov reg, #0
556 	} else {
557 		if (val_single_shift(imm2, &scale)) {
558 			cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
559 			if (scale) {
560 				cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
561 			}
562 			cache_addw( BIC(reg, templo1) );      // bic reg, templo1
563 		} else {
564 			gen_mov_dword_to_reg_imm(templo1, imm);
565 			cache_addw( AND(reg, templo1) );      // and reg, templo1
566 		}
567 	}
568 }
569 
570 
571 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)572 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
573 	gen_mov_dword_to_reg_imm(templo3, imm);
574 	gen_mov_word_from_reg(templo3, dest, 1);
575 }
576 
577 // move an address into memory
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)578 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
579 	gen_mov_direct_dword(dest,(Bit32u)imm);
580 }
581 
582 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)583 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
584 	if (!dword) imm &= 0xffff;
585 	if(!imm) return;
586 
587 	if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
588 		gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
589 		gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
590 	}
591 	gen_add_imm(templo3, imm);
592 	if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
593 		gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
594 	}
595 }
596 
597 // add an 8bit constant value to a dword memory value
gen_add_direct_byte(void * dest,Bit8s imm)598 static void gen_add_direct_byte(void* dest,Bit8s imm) {
599 	gen_add_direct_word(dest, (Bit32s)imm, 1);
600 }
601 
602 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)603 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
604 	Bit32u imm2, scale;
605 
606 	if (!dword) imm &= 0xffff;
607 	if(!imm) return;
608 
609 	if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
610 		gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
611 		gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
612 	}
613 
614 	imm2 = (Bit32u) (-((Bit32s)imm));
615 
616 	if (imm <= 255) {
617 		cache_addw( SUB_IMM8(templo3, imm) );      // sub templo3, #imm
618 	} else if (imm2 <= 255) {
619 		cache_addw( ADD_IMM8(templo3, imm2) );      // add templo3, #(-imm)
620 	} else {
621 		if (val_single_shift(imm2, &scale)) {
622 			cache_addw( MOV_IMM(templo1, imm2 >> scale) );      // mov templo1, #(~imm >> scale)
623 			if (scale) {
624 				cache_addw( LSL_IMM(templo1, templo1, scale) );      // lsl templo1, templo1, #scale
625 			}
626 			cache_addw( ADD_REG(templo3, templo3, templo1) );      // add templo3, templo3, templo1
627 		} else {
628 			gen_mov_dword_to_reg_imm(templo1, imm);
629 			cache_addw( SUB_REG(templo3, templo3, templo1) );      // sub templo3, templo3, templo1
630 		}
631 	}
632 
633 	if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
634 		gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
635 	}
636 }
637 
638 // subtract an 8bit constant value from a dword memory value
gen_sub_direct_byte(void * dest,Bit8s imm)639 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
640 	gen_sub_direct_word(dest, (Bit32s)imm, 1);
641 }
642 
643 // effective address calculation, destination is dest_reg
644 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
645 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)646 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
647 	if (scale) {
648 		cache_addw( LSL_IMM(templo1, scale_reg, scale) );      // lsl templo1, scale_reg, #(scale)
649 		cache_addw( ADD_REG(dest_reg, dest_reg, templo1) );      // add dest_reg, dest_reg, templo1
650 	} else {
651 		cache_addw( ADD_REG(dest_reg, dest_reg, scale_reg) );      // add dest_reg, dest_reg, scale_reg
652 	}
653 	gen_add_imm(dest_reg, imm);
654 }
655 
656 // effective address calculation, destination is dest_reg
657 // dest_reg is scaled by scale (dest_reg*(2^scale)),
658 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)659 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
660 	if (scale) {
661 		cache_addw( LSL_IMM(dest_reg, dest_reg, scale) );      // lsl dest_reg, dest_reg, #(scale)
662 	}
663 	gen_add_imm(dest_reg, imm);
664 }
665 
666 // generate a call to a parameterless function
gen_call_function_raw(void * func)667 static void INLINE gen_call_function_raw(void * func) {
668 	if (((Bit32u)cache.pos & 0x03) == 0) {
669 		cache_addw( LDR_PC_IMM(templo1, 4) );      // ldr templo1, [pc, #4]
670 		cache_addw( ADD_LO_PC_IMM(templo2, 8) );      // adr templo2, after_call (add templo2, pc, #8)
671 		cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
672 		cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
673 	} else {
674 		cache_addw( LDR_PC_IMM(templo1, 8) );      // ldr templo1, [pc, #8]
675 		cache_addw( ADD_LO_PC_IMM(templo2, 8) );      // adr templo2, after_call (add templo2, pc, #8)
676 		cache_addw( MOV_HI_LO(HOST_lr, templo2) );      // mov lr, templo2
677 		cache_addw( BX(templo1) );      // bx templo1     --- switch to arm state
678 		cache_addw( NOP );      // nop
679 	}
680 	cache_addd((Bit32u)func);      // .int func
681 	// after_call:
682 
683 	// switch from arm to thumb state
684 	cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1));      // add templo1, pc, #1
685 	cache_addd(0xe12fff10 + (templo1));      // bx templo1
686 
687 	// thumb state from now on
688 }
689 
690 // generate a call to a function with paramcount parameters
691 // note: the parameters are loaded in the architecture specific way
692 // using the gen_load_param_ functions below
693 static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
694 	Bit32u proc_addr = (Bit32u)cache.pos;
695 	gen_call_function_raw(func);
696 	return proc_addr;
697 	// if proc_addr is on word  boundary ((proc_addr & 0x03) == 0)
698 	//   then length of generated code is 20 bytes
699 	//   otherwise length of generated code is 22 bytes
700 }
701 
702 #if (1)
703 // max of 4 parameters in a1-a4
704 
705 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)706 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
707 	gen_mov_dword_to_reg_imm(param, imm);
708 }
709 
710 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)711 static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
712 	gen_mov_dword_to_reg_imm(param, addr);
713 }
714 
715 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)716 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
717 	gen_mov_regs(param, reg);
718 }
719 
720 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)721 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
722 	gen_mov_word_to_reg(param, (void *)mem, 1);
723 }
724 #else
725 	other arm abis
726 #endif
727 
728 // jump to an address pointed at by ptr, offset is in imm
729 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
730 	gen_mov_word_to_reg(templo3, ptr, 1);
731 
732 #if !defined(C_UNALIGNED_MEMORY)
733 // (*ptr) should be word aligned
734 	if ((imm & 0x03) == 0) {
735 #endif
736 		if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) {
737 			cache_addw( LDR_IMM(templo2, templo3, imm) );      // ldr templo2, [templo3, #imm]
738 		} else {
739 			gen_mov_dword_to_reg_imm(templo2, imm);
740 			cache_addw( LDR_REG(templo2, templo3, templo2) );      // ldr templo2, [templo3, templo2]
741 		}
742 #if !defined(C_UNALIGNED_MEMORY)
743 	} else {
744 		gen_add_imm(templo3, imm);
745 
746 		cache_addw( LDRB_IMM(templo2, templo3, 0) );      // ldrb templo2, [templo3]
747 		cache_addw( LDRB_IMM(templo1, templo3, 1) );      // ldrb templo1, [templo3, #1]
748 		cache_addw( LSL_IMM(templo1, templo1, 8) );      // lsl templo1, templo1, #8
749 		cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
750 		cache_addw( LDRB_IMM(templo1, templo3, 2) );      // ldrb templo1, [templo3, #2]
751 		cache_addw( LSL_IMM(templo1, templo1, 16) );      // lsl templo1, templo1, #16
752 		cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
753 		cache_addw( LDRB_IMM(templo1, templo3, 3) );      // ldrb templo1, [templo3, #3]
754 		cache_addw( LSL_IMM(templo1, templo1, 24) );      // lsl templo1, templo1, #24
755 		cache_addw( ORR(templo2, templo1) );      // orr templo2, templo1
756 	}
757 #endif
758 
759 	// increase jmp address to keep thumb state
760 	cache_addw( ADD_IMM3(templo2, templo2, 1) );      // add templo2, templo2, #1
761 
762 	cache_addw( BX(templo2) );      // bx templo2
763 }
764 
765 // short conditional jump (+-127 bytes) if register is zero
766 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)767 static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
768 	if (dword) {
769 		cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
770 	} else {
771 		cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
772 	}
773 	cache_addw( BEQ_FWD(0) );      // beq j
774 	return ((Bit32u)cache.pos-2);
775 }
776 
777 // short conditional jump (+-127 bytes) if register is nonzero
778 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)779 static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
780 	if (dword) {
781 		cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
782 	} else {
783 		cache_addw( LSL_IMM(templo1, reg, 16) );      // lsl templo1, reg, #16
784 	}
785 	cache_addw( BNE_FWD(0) );      // bne j
786 	return ((Bit32u)cache.pos-2);
787 }
788 
789 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)790 static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
791 #if C_DEBUG
792 	Bits len=(Bit32u)cache.pos-(data+4);
793 	if (len<0) len=-len;
794 	if (len>252) LOG_MSG("Big jump %d",len);
795 #endif
796 	*(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 );
797 }
798 
799 // conditional jump if register is nonzero
800 // for isdword==true the 32bit of the register are tested
801 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)802 static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
803 	if (isdword) {
804 		cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
805 	} else {
806 		cache_addw( LSL_IMM(templo2, reg, 24) );      // lsl templo2, reg, #24
807 	}
808 	if (((Bit32u)cache.pos & 0x03) == 0) {
809 		cache_addw( BEQ_FWD(8) );      // beq nobranch (pc+8)
810 		cache_addw( LDR_PC_IMM(templo1, 4) );      // ldr templo1, [pc, #4]
811 		cache_addw( BX(templo1) );      // bx templo1
812 		cache_addw( NOP );      // nop
813 	} else {
814 		cache_addw( BEQ_FWD(6) );      // beq nobranch (pc+6)
815 		cache_addw( LDR_PC_IMM(templo1, 0) );      // ldr templo1, [pc, #0]
816 		cache_addw( BX(templo1) );      // bx templo1
817 	}
818 	cache_addd(0);      // fill j
819 	// nobranch:
820 	return ((Bit32u)cache.pos-4);
821 }
822 
823 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)824 static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
825 	cache_addw( CMP_IMM(reg, 0) );      // cmp reg, #0
826 	if (((Bit32u)cache.pos & 0x03) == 0) {
827 		cache_addw( BGT_FWD(8) );      // bgt nobranch (pc+8)
828 		cache_addw( LDR_PC_IMM(templo1, 4) );      // ldr templo1, [pc, #4]
829 		cache_addw( BX(templo1) );      // bx templo1
830 		cache_addw( NOP );      // nop
831 	} else {
832 		cache_addw( BGT_FWD(6) );      // bgt nobranch (pc+6)
833 		cache_addw( LDR_PC_IMM(templo1, 0) );      // ldr templo1, [pc, #0]
834 		cache_addw( BX(templo1) );      // bx templo1
835 	}
836 	cache_addd(0);      // fill j
837 	// nobranch:
838 	return ((Bit32u)cache.pos-4);
839 }
840 
841 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit32u data)842 static void INLINE gen_fill_branch_long(Bit32u data) {
843 	// this is an absolute branch
844 	*(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state
845 }
846 
gen_run_code(void)847 static void gen_run_code(void) {
848 	Bit8u *pos1, *pos2, *pos3;
849 
850 #if (__ARM_EABI__)
851 	// 8-byte stack alignment
852 	cache_addd(0xe92d4ff0);			// stmfd sp!, {v1-v8,lr}
853 #else
854 	cache_addd(0xe92d4df0);			// stmfd sp!, {v1-v5,v7,v8,lr}
855 #endif
856 
857 	cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) );      // add r0, r0, #1
858 
859 	pos1 = cache.pos;
860 	cache_addd( 0 );
861 	pos2 = cache.pos;
862 	cache_addd( 0 );
863 	pos3 = cache.pos;
864 	cache_addd( 0 );
865 
866 	cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) );			// add lr, pc, #4
867 	cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) );      // str lr, [sp, #-4]!
868 	cache_addd( ARM_BX(HOST_r0) );			// bx r0
869 
870 #if (__ARM_EABI__)
871 	cache_addd(0xe8bd4ff0);			// ldmfd sp!, {v1-v8,lr}
872 #else
873 	cache_addd(0xe8bd4df0);			// ldmfd sp!, {v1-v5,v7,v8,lr}
874 #endif
875 	cache_addd( ARM_BX(HOST_lr) );			// bx lr
876 
877 	// align cache.pos to 32 bytes
878 	if ((((Bitu)cache.pos) & 0x1f) != 0) {
879 		cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
880 	}
881 
882 	*(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8));      // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
883 	cache_addd((Bit32u)&Segs);      // address of "Segs"
884 
885 	*(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8));      // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
886 	cache_addd((Bit32u)&cpu_regs);  // address of "cpu_regs"
887 
888 	*(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8));      // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
889 	cache_addd((Bit32u)&core_dynrec.readdata);  // address of "core_dynrec.readdata"
890 
891 	// align cache.pos to 32 bytes
892 	if ((((Bitu)cache.pos) & 0x1f) != 0) {
893 		cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
894 	}
895 }
896 
897 // return from a function
gen_return_function(void)898 static void gen_return_function(void) {
899 	cache_addw(0xbc08);      // pop {r3}
900 	cache_addw( BX(HOST_r3) );      // bx r3
901 }
902 
903 #ifdef DRC_FLAGS_INVALIDATION
904 
905 // called when a call to a function can be replaced by a
906 // call to a simpler function
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)907 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
908 #ifdef DRC_FLAGS_INVALIDATION_DCODE
909 	if (((Bit32u)pos & 0x03) == 0)
910 	{
911 		// try to avoid function calls but rather directly fill in code
912 		switch (flags_type) {
913 			case t_ADDb:
914 			case t_ADDw:
915 			case t_ADDd:
916 				*(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);	// add a1, a1, a2
917 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
918 				break;
919 			case t_ORb:
920 			case t_ORw:
921 			case t_ORd:
922 				*(Bit16u*)pos=ORR(HOST_a1, HOST_a2);				// orr a1, a2
923 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
924 				break;
925 			case t_ANDb:
926 			case t_ANDw:
927 			case t_ANDd:
928 				*(Bit16u*)pos=AND(HOST_a1, HOST_a2);				// and a1, a2
929 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
930 				break;
931 			case t_SUBb:
932 			case t_SUBw:
933 			case t_SUBd:
934 				*(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);	// sub a1, a1, a2
935 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
936 				break;
937 			case t_XORb:
938 			case t_XORw:
939 			case t_XORd:
940 				*(Bit16u*)pos=EOR(HOST_a1, HOST_a2);				// eor a1, a2
941 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
942 				break;
943 			case t_CMPb:
944 			case t_CMPw:
945 			case t_CMPd:
946 			case t_TESTb:
947 			case t_TESTw:
948 			case t_TESTd:
949 				*(Bit16u*)pos=B_FWD(16);							// b after_call (pc+16)
950 				break;
951 			case t_INCb:
952 			case t_INCw:
953 			case t_INCd:
954 				*(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);		// add a1, a1, #1
955 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
956 				break;
957 			case t_DECb:
958 			case t_DECw:
959 			case t_DECd:
960 				*(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);		// sub a1, a1, #1
961 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
962 				break;
963 			case t_SHLb:
964 			case t_SHLw:
965 			case t_SHLd:
966 				*(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);			// lsl a1, a2
967 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
968 				break;
969 			case t_SHRb:
970 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
971 				*(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24);	// lsr a1, a1, #24
972 				*(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);		// lsr a1, a2
973 				*(Bit16u*)(pos+6)=B_FWD(10);						// b after_call (pc+10)
974 				break;
975 			case t_SHRw:
976 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
977 				*(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16);	// lsr a1, a1, #16
978 				*(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);		// lsr a1, a2
979 				*(Bit16u*)(pos+6)=B_FWD(10);						// b after_call (pc+10)
980 				break;
981 			case t_SHRd:
982 				*(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);			// lsr a1, a2
983 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
984 				break;
985 			case t_SARb:
986 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
987 				*(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24);	// asr a1, a1, #24
988 				*(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);		// asr a1, a2
989 				*(Bit16u*)(pos+6)=B_FWD(10);						// b after_call (pc+10)
990 				break;
991 			case t_SARw:
992 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
993 				*(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16);	// asr a1, a1, #16
994 				*(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);		// asr a1, a2
995 				*(Bit16u*)(pos+6)=B_FWD(10);						// b after_call (pc+10)
996 				break;
997 			case t_SARd:
998 				*(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);			// asr a1, a2
999 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
1000 				break;
1001 			case t_RORb:
1002 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1003 				*(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8);		// lsr templo1, a1, #8
1004 				*(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);			// orr a1, templo1
1005 				*(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1006 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1007 				*(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1008 				*(Bit16u*)(pos+12)=B_FWD(4);						// b after_call (pc+4)
1009 				break;
1010 			case t_RORw:
1011 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1012 				*(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1013 				*(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);			// orr a1, templo1
1014 				*(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1015 				*(Bit16u*)(pos+8)=B_FWD(8);							// b after_call (pc+8)
1016 				break;
1017 			case t_RORd:
1018 				*(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);			// ror a1, a2
1019 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
1020 				break;
1021 			case t_ROLb:
1022 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1023 				*(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);			// neg a2, a2
1024 				*(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8);		// lsr templo1, a1, #8
1025 				*(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1026 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1027 				*(Bit16u*)(pos+10)=NOP;								// nop
1028 				*(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1029 				*(Bit16u*)(pos+14)=NOP;								// nop
1030 				*(Bit16u*)(pos+16)=ORR(HOST_a1, templo1);			// orr a1, templo1
1031 				*(Bit16u*)(pos+18)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1032 				break;
1033 			case t_ROLw:
1034 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1035 				*(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);			// neg a2, a2
1036 				*(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1037 				*(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1038 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1039 				*(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1040 				*(Bit16u*)(pos+12)=B_FWD(4);						// b after_call (pc+4)
1041 				break;
1042 			case t_ROLd:
1043 				*(Bit16u*)pos=NEG(HOST_a2, HOST_a2);				// neg a2, a2
1044 				*(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1045 				*(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1046 				*(Bit16u*)(pos+6)=B_FWD(10);						// b after_call (pc+10)
1047 				break;
1048 			case t_NEGb:
1049 			case t_NEGw:
1050 			case t_NEGd:
1051 				*(Bit16u*)pos=NEG(HOST_a1, HOST_a1);				// neg a1, a1
1052 				*(Bit16u*)(pos+2)=B_FWD(14);						// b after_call (pc+14)
1053 				break;
1054 			default:
1055 				*(Bit32u*)(pos+8)=(Bit32u)fct_ptr;		// simple_func
1056 				break;
1057 		}
1058 	}
1059 	else
1060 	{
1061 		// try to avoid function calls but rather directly fill in code
1062 		switch (flags_type) {
1063 			case t_ADDb:
1064 			case t_ADDw:
1065 			case t_ADDd:
1066 				*(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2);	// add a1, a1, a2
1067 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1068 				break;
1069 			case t_ORb:
1070 			case t_ORw:
1071 			case t_ORd:
1072 				*(Bit16u*)pos=ORR(HOST_a1, HOST_a2);				// orr a1, a2
1073 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1074 				break;
1075 			case t_ANDb:
1076 			case t_ANDw:
1077 			case t_ANDd:
1078 				*(Bit16u*)pos=AND(HOST_a1, HOST_a2);				// and a1, a2
1079 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1080 				break;
1081 			case t_SUBb:
1082 			case t_SUBw:
1083 			case t_SUBd:
1084 				*(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2);	// sub a1, a1, a2
1085 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1086 				break;
1087 			case t_XORb:
1088 			case t_XORw:
1089 			case t_XORd:
1090 				*(Bit16u*)pos=EOR(HOST_a1, HOST_a2);				// eor a1, a2
1091 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1092 				break;
1093 			case t_CMPb:
1094 			case t_CMPw:
1095 			case t_CMPd:
1096 			case t_TESTb:
1097 			case t_TESTw:
1098 			case t_TESTd:
1099 				*(Bit16u*)pos=B_FWD(18);							// b after_call (pc+18)
1100 				break;
1101 			case t_INCb:
1102 			case t_INCw:
1103 			case t_INCd:
1104 				*(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1);		// add a1, a1, #1
1105 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1106 				break;
1107 			case t_DECb:
1108 			case t_DECw:
1109 			case t_DECd:
1110 				*(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1);		// sub a1, a1, #1
1111 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1112 				break;
1113 			case t_SHLb:
1114 			case t_SHLw:
1115 			case t_SHLd:
1116 				*(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2);			// lsl a1, a2
1117 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1118 				break;
1119 			case t_SHRb:
1120 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1121 				*(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24);	// lsr a1, a1, #24
1122 				*(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);		// lsr a1, a2
1123 				*(Bit16u*)(pos+6)=B_FWD(12);						// b after_call (pc+12)
1124 				break;
1125 			case t_SHRw:
1126 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1127 				*(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16);	// lsr a1, a1, #16
1128 				*(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2);		// lsr a1, a2
1129 				*(Bit16u*)(pos+6)=B_FWD(12);						// b after_call (pc+12)
1130 				break;
1131 			case t_SHRd:
1132 				*(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2);			// lsr a1, a2
1133 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1134 				break;
1135 			case t_SARb:
1136 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1137 				*(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24);	// asr a1, a1, #24
1138 				*(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);		// asr a1, a2
1139 				*(Bit16u*)(pos+6)=B_FWD(12);						// b after_call (pc+12)
1140 				break;
1141 			case t_SARw:
1142 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1143 				*(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16);	// asr a1, a1, #16
1144 				*(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2);		// asr a1, a2
1145 				*(Bit16u*)(pos+6)=B_FWD(12);						// b after_call (pc+12)
1146 				break;
1147 			case t_SARd:
1148 				*(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2);			// asr a1, a2
1149 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1150 				break;
1151 			case t_RORb:
1152 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1153 				*(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8);		// lsr templo1, a1, #8
1154 				*(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);			// orr a1, templo1
1155 				*(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1156 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1157 				*(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1158 				*(Bit16u*)(pos+12)=B_FWD(6);						// b after_call (pc+6)
1159 				break;
1160 			case t_RORw:
1161 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1162 				*(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1163 				*(Bit16u*)(pos+4)=ORR(HOST_a1, templo1);			// orr a1, templo1
1164 				*(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1165 				*(Bit16u*)(pos+8)=B_FWD(10);						// b after_call (pc+10)
1166 				break;
1167 			case t_RORd:
1168 				*(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2);			// ror a1, a2
1169 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1170 				break;
1171 			case t_ROLb:
1172 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24);		// lsl a1, a1, #24
1173 				*(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);			// neg a2, a2
1174 				*(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8);		// lsr templo1, a1, #8
1175 				*(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1176 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1177 				*(Bit16u*)(pos+10)=NOP;								// nop
1178 				*(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1179 				*(Bit16u*)(pos+14)=NOP;								// nop
1180 				*(Bit16u*)(pos+16)=ORR(HOST_a1, templo1);			// orr a1, templo1
1181 				*(Bit16u*)(pos+18)=NOP;								// nop
1182 				*(Bit16u*)(pos+20)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1183 				break;
1184 			case t_ROLw:
1185 				*(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16);		// lsl a1, a1, #16
1186 				*(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2);			// neg a2, a2
1187 				*(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16);	// lsr templo1, a1, #16
1188 				*(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1189 				*(Bit16u*)(pos+8)=ORR(HOST_a1, templo1);			// orr a1, templo1
1190 				*(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1191 				*(Bit16u*)(pos+12)=B_FWD(6);						// b after_call (pc+6)
1192 				break;
1193 			case t_ROLd:
1194 				*(Bit16u*)pos=NEG(HOST_a2, HOST_a2);				// neg a2, a2
1195 				*(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32);			// add a2, #32
1196 				*(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2);		// ror a1, a2
1197 				*(Bit16u*)(pos+6)=B_FWD(12);						// b after_call (pc+12)
1198 				break;
1199 			case t_NEGb:
1200 			case t_NEGw:
1201 			case t_NEGd:
1202 				*(Bit16u*)pos=NEG(HOST_a1, HOST_a1);				// neg a1, a1
1203 				*(Bit16u*)(pos+2)=B_FWD(16);						// b after_call (pc+16)
1204 				break;
1205 			default:
1206 				*(Bit32u*)(pos+10)=(Bit32u)fct_ptr;		// simple_func
1207 				break;
1208 		}
1209 
1210 	}
1211 #else
1212 	if (((Bit32u)pos & 0x03) == 0)
1213 	{
1214 		*(Bit32u*)(pos+8)=(Bit32u)fct_ptr;		// simple_func
1215 	}
1216 	else
1217 	{
1218 		*(Bit32u*)(pos+10)=(Bit32u)fct_ptr;		// simple_func
1219 	}
1220 #endif
1221 }
1222 #endif
1223 
cache_block_before_close(void)1224 static void cache_block_before_close(void) {
1225 	if ((((Bit32u)cache.pos) & 3) != 0) {
1226 		cache_addw( NOP );      // nop
1227 	}
1228 }
1229 
1230 #ifdef DRC_USE_SEGS_ADDR
1231 
1232 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
1233 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)1234 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
1235 	cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
1236 	cache_addw( LDRH_IMM(dest_reg, templo1, index) );      // ldrh dest_reg, [templo1, #index]
1237 }
1238 
1239 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)1240 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
1241 	cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
1242 	cache_addw( LDR_IMM(dest_reg, templo1, index) );      // ldr dest_reg, [templo1, #index]
1243 }
1244 
1245 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)1246 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
1247 	cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) );      // mov templo1, FC_SEGS_ADDR
1248 	cache_addw( LDR_IMM(templo2, templo1, index) );      // ldr templo2, [templo1, #index]
1249 	cache_addw( ADD_REG(reg, reg, templo2) );      // add reg, reg, templo2
1250 }
1251 
1252 #endif
1253 
1254 #ifdef DRC_USE_REGS_ADDR
1255 
1256 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
1257 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)1258 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
1259 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1260 	cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
1261 }
1262 
1263 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)1264 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
1265 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1266 	cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
1267 }
1268 
1269 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1270 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)1271 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
1272 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1273 	if (dword) {
1274 		cache_addw( LDR_IMM(dest_reg, templo2, index) );      // ldr dest_reg, [templo2, #index]
1275 	} else {
1276 		cache_addw( LDRH_IMM(dest_reg, templo2, index) );      // ldrh dest_reg, [templo2, #index]
1277 	}
1278 }
1279 
1280 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
1281 // the upper 24bit of the destination register can be destroyed
1282 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
1283 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)1284 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
1285 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1286 	cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
1287 }
1288 
1289 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
1290 // the upper 24bit of the destination register can be destroyed
1291 // this function can use FC_OP1/FC_OP2 as dest_reg which are
1292 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)1293 static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
1294 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1295 	cache_addw( LDRB_IMM(dest_reg, templo2, index) );      // ldrb dest_reg, [templo2, #index]
1296 }
1297 
1298 
1299 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)1300 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
1301 	cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) );      // mov templo2, FC_REGS_ADDR
1302 	cache_addw( LDR_IMM(templo1, templo2, index) );      // ldr templo1, [templo2, #index]
1303 	cache_addw( ADD_REG(reg, reg, templo1) );      // add reg, reg, templo1
1304 }
1305 
1306 
1307 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)1308 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
1309 	cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
1310 	cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
1311 }
1312 
1313 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)1314 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
1315 	cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
1316 	cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
1317 }
1318 
1319 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)1320 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
1321 	cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
1322 	if (dword) {
1323 		cache_addw( STR_IMM(src_reg, templo1, index) );      // str src_reg, [templo1, #index]
1324 	} else {
1325 		cache_addw( STRH_IMM(src_reg, templo1, index) );      // strh src_reg, [templo1, #index]
1326 	}
1327 }
1328 
1329 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)1330 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
1331 	cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) );      // mov templo1, FC_REGS_ADDR
1332 	cache_addw( STRB_IMM(src_reg, templo1, index) );      // strb src_reg, [templo1, #index]
1333 }
1334 
1335 #endif
1336