1 /*
2 * Copyright (C) 2002-2015 The DOSBox Team
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18
19
20
21 /* ARMv4 (little endian) backend by M-HT (thumb version) */
22
23
24 // temporary "lo" registers
25 #define templo1 HOST_v3
26 #define templo2 HOST_v4
27 #define templo3 HOST_v2
28
29 // register that holds function return values
30 #define FC_RETOP HOST_a1
31
32 // register used for address calculations,
33 #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
34
35 // register that holds the first parameter
36 #define FC_OP1 HOST_a1
37
38 // register that holds the second parameter
39 #define FC_OP2 HOST_a2
40
41 // special register that holds the third parameter for _R3 calls (byte accessible)
42 #define FC_OP3 HOST_a4
43
44 // register that holds byte-accessible temporary values
45 #define FC_TMP_BA1 HOST_a1
46
47 // register that holds byte-accessible temporary values
48 #define FC_TMP_BA2 HOST_a2
49
50 // temporary register for LEA
51 #define TEMP_REG_DRC HOST_a4
52
53 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
54 #define FC_REGS_ADDR HOST_v7
55
56 // used to hold the address of "Segs" - preferably filled in function gen_run_code
57 #define FC_SEGS_ADDR HOST_v8
58
59 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
60 #define readdata_addr HOST_v5
61
62
63 // instruction encodings
64
65 // move
66 // mov dst, #imm @ 0 <= imm <= 255
67 #define MOV_IMM(dst, imm) (0x2000 + ((dst) << 8) + (imm) )
68 // mov dst, src
69 #define MOV_REG(dst, src) ADD_IMM3(dst, src, 0)
70 // mov dst, src
71 #define MOV_LO_HI(dst, src) (0x4640 + (dst) + (((src) - HOST_r8) << 3) )
72 // mov dst, src
73 #define MOV_HI_LO(dst, src) (0x4680 + ((dst) - HOST_r8) + ((src) << 3) )
74
75 // arithmetic
76 // add dst, src, #imm @ 0 <= imm <= 7
77 #define ADD_IMM3(dst, src, imm) (0x1c00 + (dst) + ((src) << 3) + ((imm) << 6) )
78 // add dst, #imm @ 0 <= imm <= 255
79 #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) )
80 // add dst, src1, src2
81 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) )
82 // add dst, pc, #imm @ 0 <= imm < 1024 & imm mod 4 = 0
83 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) )
84 // sub dst, src1, src2
85 #define SUB_REG(dst, src1, src2) (0x1a00 + (dst) + ((src1) << 3) + ((src2) << 6) )
86 // sub dst, src, #imm @ 0 <= imm <= 7
87 #define SUB_IMM3(dst, src, imm) (0x1e00 + (dst) + ((src) << 3) + ((imm) << 6) )
88 // sub dst, #imm @ 0 <= imm <= 255
89 #define SUB_IMM8(dst, imm) (0x3800 + ((dst) << 8) + (imm) )
90 // neg dst, src
91 #define NEG(dst, src) (0x4240 + (dst) + ((src) << 3) )
92 // cmp dst, #imm @ 0 <= imm <= 255
93 #define CMP_IMM(dst, imm) (0x2800 + ((dst) << 8) + (imm) )
94 // nop
95 #define NOP (0x46c0)
96
97 // logical
98 // and dst, src
99 #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) )
100 // bic dst, src
101 #define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) )
102 // eor dst, src
103 #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) )
104 // orr dst, src
105 #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) )
106 // mvn dst, src
107 #define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) )
108
109 // shift/rotate
110 // lsl dst, src, #imm
111 #define LSL_IMM(dst, src, imm) (0x0000 + (dst) + ((src) << 3) + ((imm) << 6) )
112 // lsl dst, reg
113 #define LSL_REG(dst, reg) (0x4080 + (dst) + ((reg) << 3) )
114 // lsr dst, src, #imm
115 #define LSR_IMM(dst, src, imm) (0x0800 + (dst) + ((src) << 3) + ((imm) << 6) )
116 // lsr dst, reg
117 #define LSR_REG(dst, reg) (0x40c0 + (dst) + ((reg) << 3) )
118 // asr dst, src, #imm
119 #define ASR_IMM(dst, src, imm) (0x1000 + (dst) + ((src) << 3) + ((imm) << 6) )
120 // asr dst, reg
121 #define ASR_REG(dst, reg) (0x4100 + (dst) + ((reg) << 3) )
122 // ror dst, reg
123 #define ROR_REG(dst, reg) (0x41c0 + (dst) + ((reg) << 3) )
124
125 // load
126 // ldr reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0
127 #define LDR_IMM(reg, addr, imm) (0x6800 + (reg) + ((addr) << 3) + ((imm) << 4) )
128 // ldrh reg, [addr, #imm] @ 0 <= imm < 64 & imm mod 2 = 0
129 #define LDRH_IMM(reg, addr, imm) (0x8800 + (reg) + ((addr) << 3) + ((imm) << 5) )
130 // ldrb reg, [addr, #imm] @ 0 <= imm < 32
131 #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) )
132 // ldr reg, [pc, #imm] @ 0 <= imm < 1024 & imm mod 4 = 0
133 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) )
134 // ldr reg, [addr1, addr2]
135 #define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) )
136
137 // store
138 // str reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0
139 #define STR_IMM(reg, addr, imm) (0x6000 + (reg) + ((addr) << 3) + ((imm) << 4) )
140 // strh reg, [addr, #imm] @ 0 <= imm < 64 & imm mod 2 = 0
141 #define STRH_IMM(reg, addr, imm) (0x8000 + (reg) + ((addr) << 3) + ((imm) << 5) )
142 // strb reg, [addr, #imm] @ 0 <= imm < 32
143 #define STRB_IMM(reg, addr, imm) (0x7000 + (reg) + ((addr) << 3) + ((imm) << 6) )
144
145 // branch
146 // beq pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
147 #define BEQ_FWD(imm) (0xd000 + ((imm) >> 1) )
148 // bne pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
149 #define BNE_FWD(imm) (0xd100 + ((imm) >> 1) )
150 // bgt pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
151 #define BGT_FWD(imm) (0xdc00 + ((imm) >> 1) )
152 // b pc+imm @ 0 <= imm < 2048 & imm mod 2 = 0
153 #define B_FWD(imm) (0xe000 + ((imm) >> 1) )
154 // bx reg
155 #define BX(reg) (0x4700 + ((reg) << 3) )
156
157
158 // arm instructions
159
160 // arithmetic
161 // add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
162 #define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
163
164 // load
165 // ldr reg, [addr, #imm] @ 0 <= imm < 4096
166 #define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
167
168 // store
169 // str reg, [addr, #-(imm)]! @ 0 <= imm < 4096
170 #define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) )
171
172 // branch
173 // bx reg
174 #define ARM_BX(reg) (0xe12fff10 + (reg) )
175
176
177 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)178 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
179 if(reg_src == reg_dst) return;
180 cache_addw( MOV_REG(reg_dst, reg_src) ); // mov reg_dst, reg_src
181 }
182
183 // helper function
val_single_shift(Bit32u value,Bit32u * val_shift)184 static bool val_single_shift(Bit32u value, Bit32u *val_shift) {
185 Bit32u shift;
186
187 if (GCC_UNLIKELY(value == 0)) {
188 *val_shift = 0;
189 return true;
190 }
191
192 shift = 0;
193 while ((value & 1) == 0) {
194 value>>=1;
195 shift+=1;
196 }
197
198 if ((value >> 8) != 0) return false;
199
200 *val_shift = shift;
201 return true;
202 }
203
204 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)205 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
206 Bit32u scale;
207
208 if (imm < 256) {
209 cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #imm
210 } else if ((~imm) < 256) {
211 cache_addw( MOV_IMM(dest_reg, ~imm) ); // mov dest_reg, #(~imm)
212 cache_addw( MVN(dest_reg, dest_reg) ); // mvn dest_reg, dest_reg
213 } else if (val_single_shift(imm, &scale)) {
214 cache_addw( MOV_IMM(dest_reg, imm >> scale) ); // mov dest_reg, #(imm >> scale)
215 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #scale
216 } else {
217 Bit32u diff;
218
219 diff = imm - ((Bit32u)cache.pos+4);
220
221 if ((diff < 1024) && ((imm & 0x03) == 0)) {
222 if (((Bit32u)cache.pos & 0x03) == 0) {
223 cache_addw( ADD_LO_PC_IMM(dest_reg, diff) ); // add dest_reg, pc, #(diff >> 2)
224 } else {
225 cache_addw( NOP ); // nop
226 cache_addw( ADD_LO_PC_IMM(dest_reg, diff - 2) ); // add dest_reg, pc, #((diff - 2) >> 2)
227 }
228 } else {
229 if (((Bit32u)cache.pos & 0x03) == 0) {
230 cache_addw( LDR_PC_IMM(dest_reg, 0) ); // ldr dest_reg, [pc, #0]
231 cache_addw( B_FWD(2) ); // b next_code (pc+2)
232 cache_addd(imm); // .int imm
233 // next_code:
234 } else {
235 cache_addw( LDR_PC_IMM(dest_reg, 4) ); // ldr dest_reg, [pc, #4]
236 cache_addw( B_FWD(4) ); // b next_code (pc+4)
237 cache_addw( NOP ); // nop
238 cache_addd(imm); // .int imm
239 // next_code:
240 }
241 }
242 }
243 }
244
245 // helper function
gen_mov_memval_to_reg_helper(HostReg dest_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)246 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
247 switch (size) {
248 case 4:
249 #if !defined(C_UNALIGNED_MEMORY)
250 if ((data & 3) == 0)
251 #endif
252 {
253 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
254 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
255 cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) ); // ldr dest_reg, [templo2, #(data - addr_data)]
256 return true;
257 }
258 }
259 break;
260 case 2:
261 #if !defined(C_UNALIGNED_MEMORY)
262 if ((data & 1) == 0)
263 #endif
264 {
265 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
266 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
267 cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) ); // ldrh dest_reg, [templo2, #(data - addr_data)]
268 return true;
269 }
270 }
271 break;
272 case 1:
273 if ((data >= addr_data) && (data < addr_data + 32)) {
274 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
275 cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) ); // ldrb dest_reg, [templo2, #(data - addr_data)]
276 return true;
277 }
278 default:
279 break;
280 }
281 return false;
282 }
283
284 // helper function
gen_mov_memval_to_reg(HostReg dest_reg,void * data,Bitu size)285 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
286 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
287 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
288 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
289 return false;
290 }
291
292 // helper function for gen_mov_word_to_reg
gen_mov_word_to_reg_helper(HostReg dest_reg,void * data,bool dword,HostReg data_reg)293 static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
294 // alignment....
295 if (dword) {
296 #if !defined(C_UNALIGNED_MEMORY)
297 if ((Bit32u)data & 3) {
298 if ( ((Bit32u)data & 3) == 2 ) {
299 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
300 cache_addw( LDRH_IMM(templo1, data_reg, 2) ); // ldrh templo1, [data_reg, #2]
301 cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16
302 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
303 } else {
304 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
305 cache_addw( ADD_IMM3(templo1, data_reg, 1) ); // add templo1, data_reg, #1
306 cache_addw( LDRH_IMM(templo1, templo1, 0) ); // ldrh templo1, [templo1]
307 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
308 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
309 cache_addw( LDRB_IMM(templo1, data_reg, 3) ); // ldrb templo1, [data_reg, #3]
310 cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24
311 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
312 }
313 } else
314 #endif
315 {
316 cache_addw( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg]
317 }
318 } else {
319 #if !defined(C_UNALIGNED_MEMORY)
320 if ((Bit32u)data & 1) {
321 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
322 cache_addw( LDRB_IMM(templo1, data_reg, 1) ); // ldrb templo1, [data_reg, #1]
323 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
324 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
325 } else
326 #endif
327 {
328 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
329 }
330 }
331 }
332
333 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
334 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)335 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
336 if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
337 gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
338 gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
339 }
340 }
341
342 // move a 16bit constant value into dest_reg
343 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)344 static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
345 gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
346 }
347
348 // helper function
gen_mov_memval_from_reg_helper(HostReg src_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)349 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
350 switch (size) {
351 case 4:
352 #if !defined(C_UNALIGNED_MEMORY)
353 if ((data & 3) == 0)
354 #endif
355 {
356 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
357 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
358 cache_addw( STR_IMM(src_reg, templo2, data - addr_data) ); // str src_reg, [templo2, #(data - addr_data)]
359 return true;
360 }
361 }
362 break;
363 case 2:
364 #if !defined(C_UNALIGNED_MEMORY)
365 if ((data & 1) == 0)
366 #endif
367 {
368 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
369 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
370 cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) ); // strh src_reg, [templo2, #(data - addr_data)]
371 return true;
372 }
373 }
374 break;
375 case 1:
376 if ((data >= addr_data) && (data < addr_data + 32)) {
377 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
378 cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) ); // strb src_reg, [templo2, #(data - addr_data)]
379 return true;
380 }
381 default:
382 break;
383 }
384 return false;
385 }
386
387 // helper function
gen_mov_memval_from_reg(HostReg src_reg,void * dest,Bitu size)388 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
389 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
390 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
391 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
392 return false;
393 }
394
395 // helper function for gen_mov_word_from_reg
gen_mov_word_from_reg_helper(HostReg src_reg,void * dest,bool dword,HostReg data_reg)396 static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
397 // alignment....
398 if (dword) {
399 #if !defined(C_UNALIGNED_MEMORY)
400 if ((Bit32u)dest & 3) {
401 if ( ((Bit32u)dest & 3) == 2 ) {
402 cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
403 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
404 cache_addw( LSR_IMM(templo1, templo1, 16) ); // lsr templo1, templo1, #16
405 cache_addw( STRH_IMM(templo1, data_reg, 2) ); // strh templo1, [data_reg, #2]
406 } else {
407 cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
408 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
409 cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8
410 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1]
411 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
412 cache_addw( LSR_IMM(templo1, templo1, 16) ); // lsr templo1, templo1, #16
413 cache_addw( STRB_IMM(templo1, data_reg, 2) ); // strb templo1, [data_reg, #2]
414 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
415 cache_addw( LSR_IMM(templo1, templo1, 24) ); // lsr templo1, templo1, #24
416 cache_addw( STRB_IMM(templo1, data_reg, 3) ); // strb templo1, [data_reg, #3]
417 }
418 } else
419 #endif
420 {
421 cache_addw( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg]
422 }
423 } else {
424 #if !defined(C_UNALIGNED_MEMORY)
425 if ((Bit32u)dest & 1) {
426 cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
427 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
428 cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8
429 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1]
430 } else
431 #endif
432 {
433 cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
434 }
435 }
436 }
437
438 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)439 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
440 if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
441 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
442 gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
443 }
444 }
445
446 // move an 8bit value from memory into dest_reg
447 // the upper 24bit of the destination register can be destroyed
448 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
449 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)450 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
451 if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
452 gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
453 cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1]
454 }
455 }
456
457 // move an 8bit value from memory into dest_reg
458 // the upper 24bit of the destination register can be destroyed
459 // this function can use FC_OP1/FC_OP2 as dest_reg which are
460 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)461 static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
462 gen_mov_byte_to_reg_low(dest_reg, data);
463 }
464
465 // move an 8bit constant value into dest_reg
466 // the upper 24bit of the destination register can be destroyed
467 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
468 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)469 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
470 cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm)
471 }
472
473 // move an 8bit constant value into dest_reg
474 // the upper 24bit of the destination register can be destroyed
475 // this function can use FC_OP1/FC_OP2 as dest_reg which are
476 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)477 static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
478 gen_mov_byte_to_reg_low_imm(dest_reg, imm);
479 }
480
481 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)482 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
483 if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
484 gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
485 cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1]
486 }
487 }
488
489
490
491 // convert an 8bit word to a 32bit dword
492 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)493 static void gen_extend_byte(bool sign,HostReg reg) {
494 cache_addw( LSL_IMM(reg, reg, 24) ); // lsl reg, reg, #24
495
496 if (sign) {
497 cache_addw( ASR_IMM(reg, reg, 24) ); // asr reg, reg, #24
498 } else {
499 cache_addw( LSR_IMM(reg, reg, 24) ); // lsr reg, reg, #24
500 }
501 }
502
503 // convert a 16bit word to a 32bit dword
504 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)505 static void gen_extend_word(bool sign,HostReg reg) {
506 cache_addw( LSL_IMM(reg, reg, 16) ); // lsl reg, reg, #16
507
508 if (sign) {
509 cache_addw( ASR_IMM(reg, reg, 16) ); // asr reg, reg, #16
510 } else {
511 cache_addw( LSR_IMM(reg, reg, 16) ); // lsr reg, reg, #16
512 }
513 }
514
515 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)516 static void gen_add(HostReg reg,void* op) {
517 gen_mov_word_to_reg(templo3, op, 1);
518 cache_addw( ADD_REG(reg, reg, templo3) ); // add reg, reg, templo3
519 }
520
521 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)522 static void gen_add_imm(HostReg reg,Bit32u imm) {
523 Bit32u imm2, scale;
524
525 if(!imm) return;
526
527 imm2 = (Bit32u) (-((Bit32s)imm));
528
529 if (imm <= 255) {
530 cache_addw( ADD_IMM8(reg, imm) ); // add reg, #imm
531 } else if (imm2 <= 255) {
532 cache_addw( SUB_IMM8(reg, imm2) ); // sub reg, #(-imm)
533 } else {
534 if (val_single_shift(imm2, &scale)) {
535 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
536 if (scale) {
537 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
538 }
539 cache_addw( SUB_REG(reg, reg, templo1) ); // sub reg, reg, templo1
540 } else {
541 gen_mov_dword_to_reg_imm(templo1, imm);
542 cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1
543 }
544 }
545 }
546
547 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)548 static void gen_and_imm(HostReg reg,Bit32u imm) {
549 Bit32u imm2, scale;
550
551 imm2 = ~imm;
552 if(!imm2) return;
553
554 if (!imm) {
555 cache_addw( MOV_IMM(reg, 0) ); // mov reg, #0
556 } else {
557 if (val_single_shift(imm2, &scale)) {
558 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
559 if (scale) {
560 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
561 }
562 cache_addw( BIC(reg, templo1) ); // bic reg, templo1
563 } else {
564 gen_mov_dword_to_reg_imm(templo1, imm);
565 cache_addw( AND(reg, templo1) ); // and reg, templo1
566 }
567 }
568 }
569
570
571 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)572 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
573 gen_mov_dword_to_reg_imm(templo3, imm);
574 gen_mov_word_from_reg(templo3, dest, 1);
575 }
576
577 // move an address into memory
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)578 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
579 gen_mov_direct_dword(dest,(Bit32u)imm);
580 }
581
582 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)583 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
584 if (!dword) imm &= 0xffff;
585 if(!imm) return;
586
587 if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
588 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
589 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
590 }
591 gen_add_imm(templo3, imm);
592 if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
593 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
594 }
595 }
596
597 // add an 8bit constant value to a dword memory value
gen_add_direct_byte(void * dest,Bit8s imm)598 static void gen_add_direct_byte(void* dest,Bit8s imm) {
599 gen_add_direct_word(dest, (Bit32s)imm, 1);
600 }
601
602 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)603 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
604 Bit32u imm2, scale;
605
606 if (!dword) imm &= 0xffff;
607 if(!imm) return;
608
609 if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
610 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
611 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
612 }
613
614 imm2 = (Bit32u) (-((Bit32s)imm));
615
616 if (imm <= 255) {
617 cache_addw( SUB_IMM8(templo3, imm) ); // sub templo3, #imm
618 } else if (imm2 <= 255) {
619 cache_addw( ADD_IMM8(templo3, imm2) ); // add templo3, #(-imm)
620 } else {
621 if (val_single_shift(imm2, &scale)) {
622 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
623 if (scale) {
624 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
625 }
626 cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1
627 } else {
628 gen_mov_dword_to_reg_imm(templo1, imm);
629 cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1
630 }
631 }
632
633 if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
634 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
635 }
636 }
637
638 // subtract an 8bit constant value from a dword memory value
gen_sub_direct_byte(void * dest,Bit8s imm)639 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
640 gen_sub_direct_word(dest, (Bit32s)imm, 1);
641 }
642
643 // effective address calculation, destination is dest_reg
644 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
645 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)646 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
647 if (scale) {
648 cache_addw( LSL_IMM(templo1, scale_reg, scale) ); // lsl templo1, scale_reg, #(scale)
649 cache_addw( ADD_REG(dest_reg, dest_reg, templo1) ); // add dest_reg, dest_reg, templo1
650 } else {
651 cache_addw( ADD_REG(dest_reg, dest_reg, scale_reg) ); // add dest_reg, dest_reg, scale_reg
652 }
653 gen_add_imm(dest_reg, imm);
654 }
655
656 // effective address calculation, destination is dest_reg
657 // dest_reg is scaled by scale (dest_reg*(2^scale)),
658 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)659 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
660 if (scale) {
661 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #(scale)
662 }
663 gen_add_imm(dest_reg, imm);
664 }
665
666 // generate a call to a parameterless function
gen_call_function_raw(void * func)667 static void INLINE gen_call_function_raw(void * func) {
668 if (((Bit32u)cache.pos & 0x03) == 0) {
669 cache_addw( LDR_PC_IMM(templo1, 4) ); // ldr templo1, [pc, #4]
670 cache_addw( ADD_LO_PC_IMM(templo2, 8) ); // adr templo2, after_call (add templo2, pc, #8)
671 cache_addw( MOV_HI_LO(HOST_lr, templo2) ); // mov lr, templo2
672 cache_addw( BX(templo1) ); // bx templo1 --- switch to arm state
673 } else {
674 cache_addw( LDR_PC_IMM(templo1, 8) ); // ldr templo1, [pc, #8]
675 cache_addw( ADD_LO_PC_IMM(templo2, 8) ); // adr templo2, after_call (add templo2, pc, #8)
676 cache_addw( MOV_HI_LO(HOST_lr, templo2) ); // mov lr, templo2
677 cache_addw( BX(templo1) ); // bx templo1 --- switch to arm state
678 cache_addw( NOP ); // nop
679 }
680 cache_addd((Bit32u)func); // .int func
681 // after_call:
682
683 // switch from arm to thumb state
684 cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1)); // add templo1, pc, #1
685 cache_addd(0xe12fff10 + (templo1)); // bx templo1
686
687 // thumb state from now on
688 }
689
690 // generate a call to a function with paramcount parameters
691 // note: the parameters are loaded in the architecture specific way
692 // using the gen_load_param_ functions below
693 static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
694 Bit32u proc_addr = (Bit32u)cache.pos;
695 gen_call_function_raw(func);
696 return proc_addr;
697 // if proc_addr is on word boundary ((proc_addr & 0x03) == 0)
698 // then length of generated code is 20 bytes
699 // otherwise length of generated code is 22 bytes
700 }
701
702 #if (1)
703 // max of 4 parameters in a1-a4
704
705 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)706 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
707 gen_mov_dword_to_reg_imm(param, imm);
708 }
709
710 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)711 static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
712 gen_mov_dword_to_reg_imm(param, addr);
713 }
714
715 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)716 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
717 gen_mov_regs(param, reg);
718 }
719
720 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)721 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
722 gen_mov_word_to_reg(param, (void *)mem, 1);
723 }
724 #else
725 other arm abis
726 #endif
727
728 // jump to an address pointed at by ptr, offset is in imm
729 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
730 gen_mov_word_to_reg(templo3, ptr, 1);
731
732 #if !defined(C_UNALIGNED_MEMORY)
733 // (*ptr) should be word aligned
734 if ((imm & 0x03) == 0) {
735 #endif
736 if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) {
737 cache_addw( LDR_IMM(templo2, templo3, imm) ); // ldr templo2, [templo3, #imm]
738 } else {
739 gen_mov_dword_to_reg_imm(templo2, imm);
740 cache_addw( LDR_REG(templo2, templo3, templo2) ); // ldr templo2, [templo3, templo2]
741 }
742 #if !defined(C_UNALIGNED_MEMORY)
743 } else {
744 gen_add_imm(templo3, imm);
745
746 cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3]
747 cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1]
748 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
749 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
750 cache_addw( LDRB_IMM(templo1, templo3, 2) ); // ldrb templo1, [templo3, #2]
751 cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16
752 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
753 cache_addw( LDRB_IMM(templo1, templo3, 3) ); // ldrb templo1, [templo3, #3]
754 cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24
755 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
756 }
757 #endif
758
759 // increase jmp address to keep thumb state
760 cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1
761
762 cache_addw( BX(templo2) ); // bx templo2
763 }
764
765 // short conditional jump (+-127 bytes) if register is zero
766 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)767 static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
768 if (dword) {
769 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
770 } else {
771 cache_addw( LSL_IMM(templo1, reg, 16) ); // lsl templo1, reg, #16
772 }
773 cache_addw( BEQ_FWD(0) ); // beq j
774 return ((Bit32u)cache.pos-2);
775 }
776
777 // short conditional jump (+-127 bytes) if register is nonzero
778 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)779 static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
780 if (dword) {
781 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
782 } else {
783 cache_addw( LSL_IMM(templo1, reg, 16) ); // lsl templo1, reg, #16
784 }
785 cache_addw( BNE_FWD(0) ); // bne j
786 return ((Bit32u)cache.pos-2);
787 }
788
789 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)790 static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
791 #if C_DEBUG
792 Bits len=(Bit32u)cache.pos-(data+4);
793 if (len<0) len=-len;
794 if (len>252) LOG_MSG("Big jump %d",len);
795 #endif
796 *(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 );
797 }
798
799 // conditional jump if register is nonzero
800 // for isdword==true the 32bit of the register are tested
801 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)802 static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
803 if (isdword) {
804 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
805 } else {
806 cache_addw( LSL_IMM(templo2, reg, 24) ); // lsl templo2, reg, #24
807 }
808 if (((Bit32u)cache.pos & 0x03) == 0) {
809 cache_addw( BEQ_FWD(8) ); // beq nobranch (pc+8)
810 cache_addw( LDR_PC_IMM(templo1, 4) ); // ldr templo1, [pc, #4]
811 cache_addw( BX(templo1) ); // bx templo1
812 cache_addw( NOP ); // nop
813 } else {
814 cache_addw( BEQ_FWD(6) ); // beq nobranch (pc+6)
815 cache_addw( LDR_PC_IMM(templo1, 0) ); // ldr templo1, [pc, #0]
816 cache_addw( BX(templo1) ); // bx templo1
817 }
818 cache_addd(0); // fill j
819 // nobranch:
820 return ((Bit32u)cache.pos-4);
821 }
822
823 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)824 static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
825 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
826 if (((Bit32u)cache.pos & 0x03) == 0) {
827 cache_addw( BGT_FWD(8) ); // bgt nobranch (pc+8)
828 cache_addw( LDR_PC_IMM(templo1, 4) ); // ldr templo1, [pc, #4]
829 cache_addw( BX(templo1) ); // bx templo1
830 cache_addw( NOP ); // nop
831 } else {
832 cache_addw( BGT_FWD(6) ); // bgt nobranch (pc+6)
833 cache_addw( LDR_PC_IMM(templo1, 0) ); // ldr templo1, [pc, #0]
834 cache_addw( BX(templo1) ); // bx templo1
835 }
836 cache_addd(0); // fill j
837 // nobranch:
838 return ((Bit32u)cache.pos-4);
839 }
840
841 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit32u data)842 static void INLINE gen_fill_branch_long(Bit32u data) {
843 // this is an absolute branch
844 *(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state
845 }
846
gen_run_code(void)847 static void gen_run_code(void) {
848 Bit8u *pos1, *pos2, *pos3;
849
850 #if (__ARM_EABI__)
851 // 8-byte stack alignment
852 cache_addd(0xe92d4ff0); // stmfd sp!, {v1-v8,lr}
853 #else
854 cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr}
855 #endif
856
857 cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) ); // add r0, r0, #1
858
859 pos1 = cache.pos;
860 cache_addd( 0 );
861 pos2 = cache.pos;
862 cache_addd( 0 );
863 pos3 = cache.pos;
864 cache_addd( 0 );
865
866 cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
867 cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) ); // str lr, [sp, #-4]!
868 cache_addd( ARM_BX(HOST_r0) ); // bx r0
869
870 #if (__ARM_EABI__)
871 cache_addd(0xe8bd4ff0); // ldmfd sp!, {v1-v8,lr}
872 #else
873 cache_addd(0xe8bd4df0); // ldmfd sp!, {v1-v5,v7,v8,lr}
874 #endif
875 cache_addd( ARM_BX(HOST_lr) ); // bx lr
876
877 // align cache.pos to 32 bytes
878 if ((((Bitu)cache.pos) & 0x1f) != 0) {
879 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
880 }
881
882 *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
883 cache_addd((Bit32u)&Segs); // address of "Segs"
884
885 *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
886 cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs"
887
888 *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
889 cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata"
890
891 // align cache.pos to 32 bytes
892 if ((((Bitu)cache.pos) & 0x1f) != 0) {
893 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
894 }
895 }
896
897 // return from a function
gen_return_function(void)898 static void gen_return_function(void) {
899 cache_addw(0xbc08); // pop {r3}
900 cache_addw( BX(HOST_r3) ); // bx r3
901 }
902
903 #ifdef DRC_FLAGS_INVALIDATION
904
905 // called when a call to a function can be replaced by a
906 // call to a simpler function
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)907 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
908 #ifdef DRC_FLAGS_INVALIDATION_DCODE
909 if (((Bit32u)pos & 0x03) == 0)
910 {
911 // try to avoid function calls but rather directly fill in code
912 switch (flags_type) {
913 case t_ADDb:
914 case t_ADDw:
915 case t_ADDd:
916 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2
917 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
918 break;
919 case t_ORb:
920 case t_ORw:
921 case t_ORd:
922 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2
923 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
924 break;
925 case t_ANDb:
926 case t_ANDw:
927 case t_ANDd:
928 *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2
929 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
930 break;
931 case t_SUBb:
932 case t_SUBw:
933 case t_SUBd:
934 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2
935 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
936 break;
937 case t_XORb:
938 case t_XORw:
939 case t_XORd:
940 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2
941 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
942 break;
943 case t_CMPb:
944 case t_CMPw:
945 case t_CMPd:
946 case t_TESTb:
947 case t_TESTw:
948 case t_TESTd:
949 *(Bit16u*)pos=B_FWD(16); // b after_call (pc+16)
950 break;
951 case t_INCb:
952 case t_INCw:
953 case t_INCd:
954 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1
955 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
956 break;
957 case t_DECb:
958 case t_DECw:
959 case t_DECd:
960 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1
961 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
962 break;
963 case t_SHLb:
964 case t_SHLw:
965 case t_SHLd:
966 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2
967 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
968 break;
969 case t_SHRb:
970 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
971 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24
972 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
973 *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10)
974 break;
975 case t_SHRw:
976 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
977 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16
978 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
979 *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10)
980 break;
981 case t_SHRd:
982 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
983 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
984 break;
985 case t_SARb:
986 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
987 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24
988 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
989 *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10)
990 break;
991 case t_SARw:
992 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
993 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16
994 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
995 *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10)
996 break;
997 case t_SARd:
998 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
999 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
1000 break;
1001 case t_RORb:
1002 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1003 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8
1004 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1
1005 *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1006 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1007 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1008 *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4)
1009 break;
1010 case t_RORw:
1011 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1012 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1013 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1
1014 *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1015 *(Bit16u*)(pos+8)=B_FWD(8); // b after_call (pc+8)
1016 break;
1017 case t_RORd:
1018 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1019 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
1020 break;
1021 case t_ROLb:
1022 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1023 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2
1024 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8
1025 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1026 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1027 *(Bit16u*)(pos+10)=NOP; // nop
1028 *(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1029 *(Bit16u*)(pos+14)=NOP; // nop
1030 *(Bit16u*)(pos+16)=ORR(HOST_a1, templo1); // orr a1, templo1
1031 *(Bit16u*)(pos+18)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1032 break;
1033 case t_ROLw:
1034 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1035 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2
1036 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1037 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1038 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1039 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1040 *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4)
1041 break;
1042 case t_ROLd:
1043 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2
1044 *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1045 *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1046 *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10)
1047 break;
1048 case t_NEGb:
1049 case t_NEGw:
1050 case t_NEGd:
1051 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1
1052 *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14)
1053 break;
1054 default:
1055 *(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func
1056 break;
1057 }
1058 }
1059 else
1060 {
1061 // try to avoid function calls but rather directly fill in code
1062 switch (flags_type) {
1063 case t_ADDb:
1064 case t_ADDw:
1065 case t_ADDd:
1066 *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2
1067 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1068 break;
1069 case t_ORb:
1070 case t_ORw:
1071 case t_ORd:
1072 *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2
1073 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1074 break;
1075 case t_ANDb:
1076 case t_ANDw:
1077 case t_ANDd:
1078 *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2
1079 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1080 break;
1081 case t_SUBb:
1082 case t_SUBw:
1083 case t_SUBd:
1084 *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2
1085 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1086 break;
1087 case t_XORb:
1088 case t_XORw:
1089 case t_XORd:
1090 *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2
1091 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1092 break;
1093 case t_CMPb:
1094 case t_CMPw:
1095 case t_CMPd:
1096 case t_TESTb:
1097 case t_TESTw:
1098 case t_TESTd:
1099 *(Bit16u*)pos=B_FWD(18); // b after_call (pc+18)
1100 break;
1101 case t_INCb:
1102 case t_INCw:
1103 case t_INCd:
1104 *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1
1105 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1106 break;
1107 case t_DECb:
1108 case t_DECw:
1109 case t_DECd:
1110 *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1
1111 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1112 break;
1113 case t_SHLb:
1114 case t_SHLw:
1115 case t_SHLd:
1116 *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2
1117 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1118 break;
1119 case t_SHRb:
1120 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1121 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24
1122 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
1123 *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12)
1124 break;
1125 case t_SHRw:
1126 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1127 *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16
1128 *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
1129 *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12)
1130 break;
1131 case t_SHRd:
1132 *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2
1133 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1134 break;
1135 case t_SARb:
1136 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1137 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24
1138 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
1139 *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12)
1140 break;
1141 case t_SARw:
1142 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1143 *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16
1144 *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
1145 *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12)
1146 break;
1147 case t_SARd:
1148 *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2
1149 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1150 break;
1151 case t_RORb:
1152 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1153 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8
1154 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1
1155 *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1156 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1157 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1158 *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6)
1159 break;
1160 case t_RORw:
1161 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1162 *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1163 *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1
1164 *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1165 *(Bit16u*)(pos+8)=B_FWD(10); // b after_call (pc+10)
1166 break;
1167 case t_RORd:
1168 *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1169 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1170 break;
1171 case t_ROLb:
1172 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24
1173 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2
1174 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8
1175 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1176 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1177 *(Bit16u*)(pos+10)=NOP; // nop
1178 *(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1179 *(Bit16u*)(pos+14)=NOP; // nop
1180 *(Bit16u*)(pos+16)=ORR(HOST_a1, templo1); // orr a1, templo1
1181 *(Bit16u*)(pos+18)=NOP; // nop
1182 *(Bit16u*)(pos+20)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1183 break;
1184 case t_ROLw:
1185 *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16
1186 *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2
1187 *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16
1188 *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1189 *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1
1190 *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1191 *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6)
1192 break;
1193 case t_ROLd:
1194 *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2
1195 *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32
1196 *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2
1197 *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12)
1198 break;
1199 case t_NEGb:
1200 case t_NEGw:
1201 case t_NEGd:
1202 *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1
1203 *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16)
1204 break;
1205 default:
1206 *(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func
1207 break;
1208 }
1209
1210 }
1211 #else
1212 if (((Bit32u)pos & 0x03) == 0)
1213 {
1214 *(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func
1215 }
1216 else
1217 {
1218 *(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func
1219 }
1220 #endif
1221 }
1222 #endif
1223
cache_block_before_close(void)1224 static void cache_block_before_close(void) {
1225 if ((((Bit32u)cache.pos) & 3) != 0) {
1226 cache_addw( NOP ); // nop
1227 }
1228 }
1229
1230 #ifdef DRC_USE_SEGS_ADDR
1231
1232 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
1233 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)1234 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
1235 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1236 cache_addw( LDRH_IMM(dest_reg, templo1, index) ); // ldrh dest_reg, [templo1, #index]
1237 }
1238
1239 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)1240 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
1241 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1242 cache_addw( LDR_IMM(dest_reg, templo1, index) ); // ldr dest_reg, [templo1, #index]
1243 }
1244
1245 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)1246 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
1247 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1248 cache_addw( LDR_IMM(templo2, templo1, index) ); // ldr templo2, [templo1, #index]
1249 cache_addw( ADD_REG(reg, reg, templo2) ); // add reg, reg, templo2
1250 }
1251
1252 #endif
1253
1254 #ifdef DRC_USE_REGS_ADDR
1255
1256 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
1257 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)1258 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
1259 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1260 cache_addw( LDRH_IMM(dest_reg, templo2, index) ); // ldrh dest_reg, [templo2, #index]
1261 }
1262
1263 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)1264 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
1265 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1266 cache_addw( LDR_IMM(dest_reg, templo2, index) ); // ldr dest_reg, [templo2, #index]
1267 }
1268
1269 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1270 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)1271 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
1272 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1273 if (dword) {
1274 cache_addw( LDR_IMM(dest_reg, templo2, index) ); // ldr dest_reg, [templo2, #index]
1275 } else {
1276 cache_addw( LDRH_IMM(dest_reg, templo2, index) ); // ldrh dest_reg, [templo2, #index]
1277 }
1278 }
1279
1280 // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1281 // the upper 24bit of the destination register can be destroyed
1282 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
1283 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)1284 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
1285 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1286 cache_addw( LDRB_IMM(dest_reg, templo2, index) ); // ldrb dest_reg, [templo2, #index]
1287 }
1288
1289 // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1290 // the upper 24bit of the destination register can be destroyed
1291 // this function can use FC_OP1/FC_OP2 as dest_reg which are
1292 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)1293 static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
1294 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1295 cache_addw( LDRB_IMM(dest_reg, templo2, index) ); // ldrb dest_reg, [templo2, #index]
1296 }
1297
1298
1299 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)1300 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
1301 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1302 cache_addw( LDR_IMM(templo1, templo2, index) ); // ldr templo1, [templo2, #index]
1303 cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1
1304 }
1305
1306
1307 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)1308 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
1309 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1310 cache_addw( STRH_IMM(src_reg, templo1, index) ); // strh src_reg, [templo1, #index]
1311 }
1312
1313 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)1314 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
1315 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1316 cache_addw( STR_IMM(src_reg, templo1, index) ); // str src_reg, [templo1, #index]
1317 }
1318
1319 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)1320 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
1321 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1322 if (dword) {
1323 cache_addw( STR_IMM(src_reg, templo1, index) ); // str src_reg, [templo1, #index]
1324 } else {
1325 cache_addw( STRH_IMM(src_reg, templo1, index) ); // strh src_reg, [templo1, #index]
1326 }
1327 }
1328
1329 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)1330 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
1331 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1332 cache_addw( STRB_IMM(src_reg, templo1, index) ); // strb src_reg, [templo1, #index]
1333 }
1334
1335 #endif
1336