1 /*
2 * Copyright (C) 2002-2021 The DOSBox Team
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 /* ARMv4 (little endian) backend by M-HT (thumb version with data pool)
20 *
21 * Requires -mthumb-interwork switch during compilation.
22 */
23
24 // temporary "lo" registers
25 #define templo1 HOST_v3
26 #define templo2 HOST_v4
27 #define templo3 HOST_v2
28
29 // register that holds function return values
30 #define FC_RETOP HOST_a1
31
32 // register used for address calculations,
33 #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
34
35 // register that holds the first parameter
36 #define FC_OP1 HOST_a1
37
38 // register that holds the second parameter
39 #define FC_OP2 HOST_a2
40
41 // special register that holds the third parameter for _R3 calls (byte accessible)
42 #define FC_OP3 HOST_a4
43
44 // register that holds byte-accessible temporary values
45 #define FC_TMP_BA1 HOST_a1
46
47 // register that holds byte-accessible temporary values
48 #define FC_TMP_BA2 HOST_a2
49
50 // temporary register for LEA
51 #define TEMP_REG_DRC HOST_a4
52
53 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code
54 #define FC_REGS_ADDR HOST_v7
55
56 // used to hold the address of "Segs" - preferably filled in function gen_run_code
57 #define FC_SEGS_ADDR HOST_v8
58
59 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code
60 #define readdata_addr HOST_v5
61
62
63 // instruction encodings
64
65 // move
66 // mov dst, #imm @ 0 <= imm <= 255
67 #define MOV_IMM(dst, imm) (0x2000 + ((dst) << 8) + (imm) )
68 // mov dst, src
69 #define MOV_REG(dst, src) ADD_IMM3(dst, src, 0)
70 // mov dst, src
71 #define MOV_LO_HI(dst, src) (0x4640 + (dst) + (((src) - HOST_r8) << 3) )
72 // mov dst, src
73 #define MOV_HI_LO(dst, src) (0x4680 + ((dst) - HOST_r8) + ((src) << 3) )
74
75 // arithmetic
76 // add dst, src, #imm @ 0 <= imm <= 7
77 #define ADD_IMM3(dst, src, imm) (0x1c00 + (dst) + ((src) << 3) + ((imm) << 6) )
78 // add dst, #imm @ 0 <= imm <= 255
79 #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) )
80 // add dst, src1, src2
81 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) )
82 // add dst, pc, #imm @ 0 <= imm < 1024 & imm mod 4 = 0
83 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) )
84 // sub dst, src1, src2
85 #define SUB_REG(dst, src1, src2) (0x1a00 + (dst) + ((src1) << 3) + ((src2) << 6) )
86 // sub dst, src, #imm @ 0 <= imm <= 7
87 #define SUB_IMM3(dst, src, imm) (0x1e00 + (dst) + ((src) << 3) + ((imm) << 6) )
88 // sub dst, #imm @ 0 <= imm <= 255
89 #define SUB_IMM8(dst, imm) (0x3800 + ((dst) << 8) + (imm) )
90 // neg dst, src
91 #define NEG(dst, src) (0x4240 + (dst) + ((src) << 3) )
92 // cmp dst, #imm @ 0 <= imm <= 255
93 #define CMP_IMM(dst, imm) (0x2800 + ((dst) << 8) + (imm) )
94 // nop
95 #define NOP (0x46c0)
96
97 // logical
98 // and dst, src
99 #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) )
100 // bic dst, src
101 #define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) )
102 // eor dst, src
103 #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) )
104 // orr dst, src
105 #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) )
106 // mvn dst, src
107 #define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) )
108
109 // shift/rotate
110 // lsl dst, src, #imm
111 #define LSL_IMM(dst, src, imm) (0x0000 + (dst) + ((src) << 3) + ((imm) << 6) )
112 // lsl dst, reg
113 #define LSL_REG(dst, reg) (0x4080 + (dst) + ((reg) << 3) )
114 // lsr dst, src, #imm
115 #define LSR_IMM(dst, src, imm) (0x0800 + (dst) + ((src) << 3) + ((imm) << 6) )
116 // lsr dst, reg
117 #define LSR_REG(dst, reg) (0x40c0 + (dst) + ((reg) << 3) )
118 // asr dst, src, #imm
119 #define ASR_IMM(dst, src, imm) (0x1000 + (dst) + ((src) << 3) + ((imm) << 6) )
120 // asr dst, reg
121 #define ASR_REG(dst, reg) (0x4100 + (dst) + ((reg) << 3) )
122 // ror dst, reg
123 #define ROR_REG(dst, reg) (0x41c0 + (dst) + ((reg) << 3) )
124
125 // load
126 // ldr reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0
127 #define LDR_IMM(reg, addr, imm) (0x6800 + (reg) + ((addr) << 3) + ((imm) << 4) )
128 // ldrh reg, [addr, #imm] @ 0 <= imm < 64 & imm mod 2 = 0
129 #define LDRH_IMM(reg, addr, imm) (0x8800 + (reg) + ((addr) << 3) + ((imm) << 5) )
130 // ldrb reg, [addr, #imm] @ 0 <= imm < 32
131 #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) )
132 // ldr reg, [pc, #imm] @ 0 <= imm < 1024 & imm mod 4 = 0
133 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) )
134 // ldr reg, [addr1, addr2]
135 #define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) )
136
137 // store
138 // str reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0
139 #define STR_IMM(reg, addr, imm) (0x6000 + (reg) + ((addr) << 3) + ((imm) << 4) )
140 // strh reg, [addr, #imm] @ 0 <= imm < 64 & imm mod 2 = 0
141 #define STRH_IMM(reg, addr, imm) (0x8000 + (reg) + ((addr) << 3) + ((imm) << 5) )
142 // strb reg, [addr, #imm] @ 0 <= imm < 32
143 #define STRB_IMM(reg, addr, imm) (0x7000 + (reg) + ((addr) << 3) + ((imm) << 6) )
144
145 // branch
146 // beq pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
147 #define BEQ_FWD(imm) (0xd000 + ((imm) >> 1) )
148 // bne pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
149 #define BNE_FWD(imm) (0xd100 + ((imm) >> 1) )
150 // bgt pc+imm @ 0 <= imm < 256 & imm mod 2 = 0
151 #define BGT_FWD(imm) (0xdc00 + ((imm) >> 1) )
152 // b pc+imm @ 0 <= imm < 2048 & imm mod 2 = 0
153 #define B_FWD(imm) (0xe000 + ((imm) >> 1) )
154 // bx reg
155 #define BX(reg) (0x4700 + ((reg) << 3) )
156
157
158 // arm instructions
159
160 // arithmetic
161 // add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
162 #define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
163
164 // load
165 // ldr reg, [addr, #imm] @ 0 <= imm < 4096
166 #define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
167
168 // store
169 // str reg, [addr, #-(imm)]! @ 0 <= imm < 4096
170 #define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) )
171
172 // branch
173 // bx reg
174 #define ARM_BX(reg) (0xe12fff10 + (reg) )
175
176
177 // data pool defines
178 #define CACHE_DATA_JUMP (2)
179 #define CACHE_DATA_ALIGN (32)
180 #define CACHE_DATA_MIN (32)
181 #define CACHE_DATA_MAX (288)
182
183 // data pool variables
184 static const Bit8u * cache_datapos = NULL; // position of data pool in the cache block
185 static Bit32u cache_datasize = 0; // total size of data pool
186 static Bit32u cache_dataindex = 0; // used size of data pool = index of free data item (in bytes) in data pool
187
188
189 // forwarded function
190 static void inline gen_create_branch_short(const Bit8u * func);
191
192 // function to check distance to data pool
193 // if too close, then generate jump after data pool
cache_checkinstr(Bit32u size)194 static void cache_checkinstr(Bit32u size) {
195 if (cache_datasize == 0) {
196 if (cache_datapos != NULL) {
197 if (cache.pos + size + CACHE_DATA_JUMP >= cache_datapos) {
198 cache_datapos = NULL;
199 }
200 }
201 return;
202 }
203
204 if (cache.pos + size + CACHE_DATA_JUMP <= cache_datapos) return;
205
206 {
207 register const Bit8u * newcachepos;
208
209 newcachepos = cache_datapos + cache_datasize;
210 gen_create_branch_short(newcachepos);
211 cache.pos = newcachepos;
212 }
213
214 if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
215 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
216 {
217 cache_datapos = (const Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
218 } else {
219 register Bit32u cachemodsize;
220
221 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
222
223 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
224 cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
225 {
226 cache_datapos = (const Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
227 } else {
228 cache_datapos = (const Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
229 }
230 }
231
232 cache_datasize = 0;
233 cache_dataindex = 0;
234 }
235
236 // function to reserve item in data pool
237 // returns address of item
cache_reservedata(void)238 static const Bit8u * cache_reservedata(void) {
239 // if data pool not yet initialized, then initialize data pool
240 if (GCC_UNLIKELY(cache_datapos == NULL)) {
241 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN < cache.block.active->cache.start + CACHE_DATA_MAX) {
242 cache_datapos = (const Bit8u *) (((Bitu)cache.block.active->cache.start + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
243 }
244 }
245
246 // if data pool not yet used, then set data pool
247 if (cache_datasize == 0) {
248 // set data pool address is too close (or behind) cache.pos then set new data pool size
249 if (cache.pos + CACHE_DATA_MIN + CACHE_DATA_JUMP /*+ CACHE_DATA_ALIGN*/ > cache_datapos) {
250 if (cache.pos + CACHE_DATA_MAX + CACHE_DATA_ALIGN >= cache.block.active->cache.start + cache.block.active->cache.size &&
251 cache.pos + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) < cache.block.active->cache.start + cache.block.active->cache.size)
252 {
253 cache_datapos = (const Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1));
254 } else {
255 register Bit32u cachemodsize;
256
257 cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1);
258
259 if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE ||
260 cachemodsize + CACHE_DATA_MIN + CACHE_DATA_ALIGN + (CACHE_DATA_ALIGN - CACHE_ALIGN) > CACHE_MAXSIZE)
261 {
262 cache_datapos = (const Bit8u *) (((Bitu)cache.pos + CACHE_DATA_MAX) & ~(CACHE_DATA_ALIGN - 1));
263 } else {
264 cache_datapos = (const Bit8u *) (((Bitu)cache.pos + (CACHE_MAXSIZE - CACHE_DATA_ALIGN) - cachemodsize) & ~(CACHE_DATA_ALIGN - 1));
265 }
266 }
267 }
268 // set initial data pool size
269 cache_datasize = CACHE_DATA_ALIGN;
270 }
271
272 // if data pool is full, then enlarge data pool
273 if (cache_dataindex == cache_datasize) {
274 cache_datasize += CACHE_DATA_ALIGN;
275 }
276
277 cache_dataindex += 4;
278 return (cache_datapos + (cache_dataindex - 4));
279 }
280
cache_block_before_close(void)281 static void cache_block_before_close(void) {
282 // if data pool in use, then resize cache block to include the data pool
283 if (cache_datasize != 0)
284 {
285 cache.pos = cache_datapos + cache_dataindex;
286 }
287
288 // clear the values before next use
289 cache_datapos = NULL;
290 cache_datasize = 0;
291 cache_dataindex = 0;
292 }
293
294
295 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)296 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
297 if(reg_src == reg_dst) return;
298 cache_checkinstr(2);
299 cache_addw( MOV_REG(reg_dst, reg_src) ); // mov reg_dst, reg_src
300 }
301
302 // helper function
val_single_shift(Bit32u value,Bit32u * val_shift)303 static bool val_single_shift(Bit32u value, Bit32u *val_shift) {
304 Bit32u shift;
305
306 if (GCC_UNLIKELY(value == 0)) {
307 *val_shift = 0;
308 return true;
309 }
310
311 shift = 0;
312 while ((value & 1) == 0) {
313 value>>=1;
314 shift+=1;
315 }
316
317 if ((value >> 8) != 0) return false;
318
319 *val_shift = shift;
320 return true;
321 }
322
323 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)324 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
325 Bit32u scale;
326
327 if (imm < 256) {
328 cache_checkinstr(2);
329 cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm)
330 } else if ((~imm) < 256) {
331 cache_checkinstr(4);
332 cache_addw( MOV_IMM(dest_reg, ~imm) ); // mov dest_reg, #(~imm)
333 cache_addw( MVN(dest_reg, dest_reg) ); // mvn dest_reg, dest_reg
334 } else if (val_single_shift(imm, &scale)) {
335 cache_checkinstr(4);
336 cache_addw( MOV_IMM(dest_reg, imm >> scale) ); // mov dest_reg, #(imm >> scale)
337 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #scale
338 } else {
339 Bit32u diff;
340
341 cache_checkinstr(4);
342
343 diff = imm - ((Bit32u)cache.pos+4);
344
345 if ((diff < 1024) && ((imm & 0x03) == 0)) {
346 if (((Bit32u)cache.pos & 0x03) == 0) {
347 cache_addw( ADD_LO_PC_IMM(dest_reg, diff >> 2) ); // add dest_reg, pc, #(diff >> 2)
348 } else {
349 cache_addw( NOP ); // nop
350 cache_addw( ADD_LO_PC_IMM(dest_reg, (diff - 2) >> 2) ); // add dest_reg, pc, #((diff - 2) >> 2)
351 }
352 } else {
353 const Bit8u *datapos;
354
355 datapos = cache_reservedata();
356 cache_addd(imm,datapos);
357
358 if (((Bit32u)cache.pos & 0x03) == 0) {
359 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 4)) ); // ldr dest_reg, [pc, datapos]
360 } else {
361 cache_addw( LDR_PC_IMM(dest_reg, datapos - (cache.pos + 2)) ); // ldr dest_reg, [pc, datapos]
362 }
363 }
364 }
365 }
366
367 // helper function
gen_mov_memval_to_reg_helper(HostReg dest_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)368 static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
369 switch (size) {
370 case 4:
371 #if !defined(C_UNALIGNED_MEMORY)
372 if ((data & 3) == 0)
373 #endif
374 {
375 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
376 cache_checkinstr(4);
377 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
378 cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) ); // ldr dest_reg, [templo2, #(data - addr_data)]
379 return true;
380 }
381 }
382 break;
383 case 2:
384 #if !defined(C_UNALIGNED_MEMORY)
385 if ((data & 1) == 0)
386 #endif
387 {
388 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
389 cache_checkinstr(4);
390 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
391 cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) ); // ldrh dest_reg, [templo2, #(data - addr_data)]
392 return true;
393 }
394 }
395 break;
396 case 1:
397 if ((data >= addr_data) && (data < addr_data + 32)) {
398 cache_checkinstr(4);
399 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
400 cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) ); // ldrb dest_reg, [templo2, #(data - addr_data)]
401 return true;
402 }
403 default:
404 break;
405 }
406 return false;
407 }
408
409 // helper function
gen_mov_memval_to_reg(HostReg dest_reg,void * data,Bitu size)410 static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) {
411 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
412 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
413 if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
414 return false;
415 }
416
417 // helper function for gen_mov_word_to_reg
gen_mov_word_to_reg_helper(HostReg dest_reg,void * data,bool dword,HostReg data_reg)418 static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
419 // alignment....
420 if (dword) {
421 #if !defined(C_UNALIGNED_MEMORY)
422 if ((Bit32u)data & 3) {
423 if ( ((Bit32u)data & 3) == 2 ) {
424 cache_checkinstr(8);
425 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
426 cache_addw( LDRH_IMM(templo1, data_reg, 2) ); // ldrh templo1, [data_reg, #2]
427 cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16
428 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
429 } else {
430 cache_checkinstr(16);
431 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
432 cache_addw( ADD_IMM3(templo1, data_reg, 1) ); // add templo1, data_reg, #1
433 cache_addw( LDRH_IMM(templo1, templo1, 0) ); // ldrh templo1, [templo1]
434 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
435 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
436 cache_addw( LDRB_IMM(templo1, data_reg, 3) ); // ldrb templo1, [data_reg, #3]
437 cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24
438 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
439 }
440 } else
441 #endif
442 {
443 cache_checkinstr(2);
444 cache_addw( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg]
445 }
446 } else {
447 #if !defined(C_UNALIGNED_MEMORY)
448 if ((Bit32u)data & 1) {
449 cache_checkinstr(8);
450 cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
451 cache_addw( LDRB_IMM(templo1, data_reg, 1) ); // ldrb templo1, [data_reg, #1]
452 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
453 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1
454 } else
455 #endif
456 {
457 cache_checkinstr(2);
458 cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
459 }
460 }
461 }
462
463 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
464 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)465 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
466 if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) {
467 gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
468 gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
469 }
470 }
471
472 // move a 16bit constant value into dest_reg
473 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)474 static void inline gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
475 gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
476 }
477
478 // helper function
gen_mov_memval_from_reg_helper(HostReg src_reg,Bit32u data,Bitu size,HostReg addr_reg,Bit32u addr_data)479 static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) {
480 switch (size) {
481 case 4:
482 #if !defined(C_UNALIGNED_MEMORY)
483 if ((data & 3) == 0)
484 #endif
485 {
486 if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) {
487 cache_checkinstr(4);
488 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
489 cache_addw( STR_IMM(src_reg, templo2, data - addr_data) ); // str src_reg, [templo2, #(data - addr_data)]
490 return true;
491 }
492 }
493 break;
494 case 2:
495 #if !defined(C_UNALIGNED_MEMORY)
496 if ((data & 1) == 0)
497 #endif
498 {
499 if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) {
500 cache_checkinstr(4);
501 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
502 cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) ); // strh src_reg, [templo2, #(data - addr_data)]
503 return true;
504 }
505 }
506 break;
507 case 1:
508 if ((data >= addr_data) && (data < addr_data + 32)) {
509 cache_checkinstr(4);
510 cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg
511 cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) ); // strb src_reg, [templo2, #(data - addr_data)]
512 return true;
513 }
514 default:
515 break;
516 }
517 return false;
518 }
519
520 // helper function
gen_mov_memval_from_reg(HostReg src_reg,void * dest,Bitu size)521 static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) {
522 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true;
523 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true;
524 if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true;
525 return false;
526 }
527
528 // helper function for gen_mov_word_from_reg
gen_mov_word_from_reg_helper(HostReg src_reg,void * dest,bool dword,HostReg data_reg)529 static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
530 // alignment....
531 if (dword) {
532 #if !defined(C_UNALIGNED_MEMORY)
533 if ((Bit32u)dest & 3) {
534 if ( ((Bit32u)dest & 3) == 2 ) {
535 cache_checkinstr(8);
536 cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
537 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
538 cache_addw( LSR_IMM(templo1, templo1, 16) ); // lsr templo1, templo1, #16
539 cache_addw( STRH_IMM(templo1, data_reg, 2) ); // strh templo1, [data_reg, #2]
540 } else {
541 cache_checkinstr(20);
542 cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
543 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
544 cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8
545 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1]
546 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
547 cache_addw( LSR_IMM(templo1, templo1, 16) ); // lsr templo1, templo1, #16
548 cache_addw( STRB_IMM(templo1, data_reg, 2) ); // strb templo1, [data_reg, #2]
549 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
550 cache_addw( LSR_IMM(templo1, templo1, 24) ); // lsr templo1, templo1, #24
551 cache_addw( STRB_IMM(templo1, data_reg, 3) ); // strb templo1, [data_reg, #3]
552 }
553 } else
554 #endif
555 {
556 cache_checkinstr(2);
557 cache_addw( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg]
558 }
559 } else {
560 #if !defined(C_UNALIGNED_MEMORY)
561 if ((Bit32u)dest & 1) {
562 cache_checkinstr(8);
563 cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
564 cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg
565 cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8
566 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1]
567 } else
568 #endif
569 {
570 cache_checkinstr(2);
571 cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
572 }
573 }
574 }
575
576 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)577 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
578 if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) {
579 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
580 gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
581 }
582 }
583
584 // move an 8bit value from memory into dest_reg
585 // the upper 24bit of the destination register can be destroyed
586 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
587 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)588 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
589 if (!gen_mov_memval_to_reg(dest_reg, data, 1)) {
590 gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
591 cache_checkinstr(2);
592 cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1]
593 }
594 }
595
596 // move an 8bit value from memory into dest_reg
597 // the upper 24bit of the destination register can be destroyed
598 // this function can use FC_OP1/FC_OP2 as dest_reg which are
599 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)600 static void inline gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
601 gen_mov_byte_to_reg_low(dest_reg, data);
602 }
603
604 // move an 8bit constant value into dest_reg
605 // the upper 24bit of the destination register can be destroyed
606 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
607 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)608 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
609 cache_checkinstr(2);
610 cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm)
611 }
612
613 // move an 8bit constant value into dest_reg
614 // the upper 24bit of the destination register can be destroyed
615 // this function can use FC_OP1/FC_OP2 as dest_reg which are
616 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)617 static void inline gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
618 gen_mov_byte_to_reg_low_imm(dest_reg, imm);
619 }
620
621 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)622 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
623 if (!gen_mov_memval_from_reg(src_reg, dest, 1)) {
624 gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
625 cache_checkinstr(2);
626 cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1]
627 }
628 }
629
630
631
632 // convert an 8bit word to a 32bit dword
633 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)634 static void gen_extend_byte(bool sign,HostReg reg) {
635 cache_checkinstr(4);
636 cache_addw( LSL_IMM(reg, reg, 24) ); // lsl reg, reg, #24
637
638 if (sign) {
639 cache_addw( ASR_IMM(reg, reg, 24) ); // asr reg, reg, #24
640 } else {
641 cache_addw( LSR_IMM(reg, reg, 24) ); // lsr reg, reg, #24
642 }
643 }
644
645 // convert a 16bit word to a 32bit dword
646 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)647 static void gen_extend_word(bool sign,HostReg reg) {
648 cache_checkinstr(4);
649 cache_addw( LSL_IMM(reg, reg, 16) ); // lsl reg, reg, #16
650
651 if (sign) {
652 cache_addw( ASR_IMM(reg, reg, 16) ); // asr reg, reg, #16
653 } else {
654 cache_addw( LSR_IMM(reg, reg, 16) ); // lsr reg, reg, #16
655 }
656 }
657
658 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)659 static void gen_add(HostReg reg,void* op) {
660 gen_mov_word_to_reg(templo3, op, 1);
661 cache_checkinstr(2);
662 cache_addw( ADD_REG(reg, reg, templo3) ); // add reg, reg, templo3
663 }
664
665 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)666 static void gen_add_imm(HostReg reg,Bit32u imm) {
667 Bit32u imm2, scale;
668
669 if(!imm) return;
670
671 imm2 = (Bit32u) (-((Bit32s)imm));
672
673 if (imm <= 255) {
674 cache_checkinstr(2);
675 cache_addw( ADD_IMM8(reg, imm) ); // add reg, #imm
676 } else if (imm2 <= 255) {
677 cache_checkinstr(2);
678 cache_addw( SUB_IMM8(reg, imm2) ); // sub reg, #(-imm)
679 } else {
680 if (val_single_shift(imm2, &scale)) {
681 cache_checkinstr((scale)?6:4);
682 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
683 if (scale) {
684 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
685 }
686 cache_addw( SUB_REG(reg, reg, templo1) ); // sub reg, reg, templo1
687 } else {
688 gen_mov_dword_to_reg_imm(templo1, imm);
689 cache_checkinstr(2);
690 cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1
691 }
692 }
693 }
694
695 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)696 static void gen_and_imm(HostReg reg,Bit32u imm) {
697 Bit32u imm2, scale;
698
699 imm2 = ~imm;
700 if(!imm2) return;
701
702 if (!imm) {
703 cache_checkinstr(2);
704 cache_addw( MOV_IMM(reg, 0) ); // mov reg, #0
705 } else {
706 if (val_single_shift(imm2, &scale)) {
707 cache_checkinstr((scale)?6:4);
708 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
709 if (scale) {
710 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
711 }
712 cache_addw( BIC(reg, templo1) ); // bic reg, templo1
713 } else {
714 gen_mov_dword_to_reg_imm(templo1, imm);
715 cache_checkinstr(2);
716 cache_addw( AND(reg, templo1) ); // and reg, templo1
717 }
718 }
719 }
720
721
722 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)723 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
724 gen_mov_dword_to_reg_imm(templo3, imm);
725 gen_mov_word_from_reg(templo3, dest, 1);
726 }
727
728 // move an address into memory
gen_mov_direct_ptr(void * dest,Bit32u imm)729 static void inline gen_mov_direct_ptr(void* dest,Bit32u imm) {
730 gen_mov_direct_dword(dest,imm);
731 }
732
733 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)734 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
735 if (!dword) imm &= 0xffff;
736 if(!imm) return;
737
738 if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
739 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
740 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
741 }
742 gen_add_imm(templo3, imm);
743 if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
744 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
745 }
746 }
747
748 // add an 8bit constant value to a dword memory value
gen_add_direct_byte(void * dest,Bit8s imm)749 static void gen_add_direct_byte(void* dest,Bit8s imm) {
750 gen_add_direct_word(dest, (Bit32s)imm, 1);
751 }
752
753 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)754 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
755 Bit32u imm2, scale;
756
757 if (!dword) imm &= 0xffff;
758 if(!imm) return;
759
760 if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) {
761 gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
762 gen_mov_word_to_reg_helper(templo3, dest, dword, templo2);
763 }
764
765 imm2 = (Bit32u) (-((Bit32s)imm));
766
767 if (imm <= 255) {
768 cache_checkinstr(2);
769 cache_addw( SUB_IMM8(templo3, imm) ); // sub templo3, #imm
770 } else if (imm2 <= 255) {
771 cache_checkinstr(2);
772 cache_addw( ADD_IMM8(templo3, imm2) ); // add templo3, #(-imm)
773 } else {
774 if (val_single_shift(imm2, &scale)) {
775 cache_checkinstr((scale)?6:4);
776 cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale)
777 if (scale) {
778 cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale
779 }
780 cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1
781 } else {
782 gen_mov_dword_to_reg_imm(templo1, imm);
783 cache_checkinstr(2);
784 cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1
785 }
786 }
787
788 if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) {
789 gen_mov_word_from_reg_helper(templo3, dest, dword, templo2);
790 }
791 }
792
793 // subtract an 8bit constant value from a dword memory value
gen_sub_direct_byte(void * dest,Bit8s imm)794 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
795 gen_sub_direct_word(dest, (Bit32s)imm, 1);
796 }
797
798 // effective address calculation, destination is dest_reg
799 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
800 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)801 static inline void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
802 if (scale) {
803 cache_checkinstr(4);
804 cache_addw( LSL_IMM(templo1, scale_reg, scale) ); // lsl templo1, scale_reg, #(scale)
805 cache_addw( ADD_REG(dest_reg, dest_reg, templo1) ); // add dest_reg, dest_reg, templo1
806 } else {
807 cache_checkinstr(2);
808 cache_addw( ADD_REG(dest_reg, dest_reg, scale_reg) ); // add dest_reg, dest_reg, scale_reg
809 }
810 gen_add_imm(dest_reg, imm);
811 }
812
813 // effective address calculation, destination is dest_reg
814 // dest_reg is scaled by scale (dest_reg*(2^scale)),
815 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)816 static inline void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
817 if (scale) {
818 cache_checkinstr(2);
819 cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #(scale)
820 }
821 gen_add_imm(dest_reg, imm);
822 }
823
824 // helper function for gen_call_function_raw and gen_call_function_setup
gen_call_function_helper(void * func)825 static void gen_call_function_helper(void * func) {
826 const Bit8u *datapos;
827
828 datapos = cache_reservedata();
829 cache_addd((Bit32u)func,datapos);
830
831 if (((Bit32u)cache.pos & 0x03) == 0) {
832 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) ); // ldr templo1, [pc, datapos]
833 cache_addw( ADD_LO_PC_IMM(templo2, 8) ); // adr templo2, after_call (add templo2, pc, #8)
834 cache_addw( ADD_IMM8(templo2, 1) ); // add templo2, #1
835 cache_addw( MOV_HI_LO(HOST_lr, templo2) ); // mov lr, templo2
836 cache_addw( BX(templo1) ); // bx templo1 --- switch to arm state
837 cache_addw( NOP ); // nop
838 } else {
839 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) ); // ldr templo1, [pc, datapos]
840 cache_addw( ADD_LO_PC_IMM(templo2, 4) ); // adr templo2, after_call (add templo2, pc, #4)
841 cache_addw( ADD_IMM8(templo2, 1) ); // add templo2, #1
842 cache_addw( MOV_HI_LO(HOST_lr, templo2) ); // mov lr, templo2
843 cache_addw( BX(templo1) ); // bx templo1 --- switch to arm state
844 }
845 // after_call:
846
847 // thumb state from now on
848 }
849
850 // generate a call to a parameterless function
gen_call_function_raw(void * func)851 static void inline gen_call_function_raw(void * func) {
852 cache_checkinstr(12);
853 gen_call_function_helper(func);
854 }
855
856 // generate a call to a function with paramcount parameters
857 // note: the parameters are loaded in the architecture specific way
858 // using the gen_load_param_ functions below
859 static inline const Bit8u* gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
860 cache_checkinstr(12);
861 const Bit8u* proc_addr = cache.pos;
862 gen_call_function_helper(func);
863 return proc_addr;
864 // if proc_addr is on word boundary ((proc_addr & 0x03) == 0)
865 // then length of generated code is 12 bytes
866 // otherwise length of generated code is 10 bytes
867 }
868
869 #if (1)
870 // max of 4 parameters in a1-a4
871
872 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)873 static void inline gen_load_param_imm(Bitu imm,Bitu param) {
874 gen_mov_dword_to_reg_imm(param, imm);
875 }
876
877 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)878 static void inline gen_load_param_addr(Bitu addr,Bitu param) {
879 gen_mov_dword_to_reg_imm(param, addr);
880 }
881
882 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)883 static void inline gen_load_param_reg(Bitu reg,Bitu param) {
884 gen_mov_regs(param, reg);
885 }
886
887 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)888 static void inline gen_load_param_mem(Bitu mem,Bitu param) {
889 gen_mov_word_to_reg(param, (void *)mem, 1);
890 }
891 #else
892 other arm abis
893 #endif
894
895 // jump to an address pointed at by ptr, offset is in imm
896 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
897 gen_mov_word_to_reg(templo3, ptr, 1);
898
899 #if !defined(C_UNALIGNED_MEMORY)
900 // (*ptr) should be word aligned
901 if ((imm & 0x03) == 0) {
902 #endif
903 if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) {
904 cache_checkinstr(6);
905 cache_addw( LDR_IMM(templo2, templo3, imm) ); // ldr templo2, [templo3, #imm]
906 } else {
907 gen_mov_dword_to_reg_imm(templo2, imm);
908 cache_checkinstr(6);
909 cache_addw( LDR_REG(templo2, templo3, templo2) ); // ldr templo2, [templo3, templo2]
910 }
911 #if !defined(C_UNALIGNED_MEMORY)
912 } else {
913 gen_add_imm(templo3, imm);
914
915 cache_checkinstr(24);
916 cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3]
917 cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1]
918 cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8
919 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
920 cache_addw( LDRB_IMM(templo1, templo3, 2) ); // ldrb templo1, [templo3, #2]
921 cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16
922 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
923 cache_addw( LDRB_IMM(templo1, templo3, 3) ); // ldrb templo1, [templo3, #3]
924 cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24
925 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1
926 }
927 #endif
928
929 // increase jmp address to keep thumb state
930 cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1
931
932 cache_addw( BX(templo2) ); // bx templo2
933 }
934
935 // short conditional jump (+-127 bytes) if register is zero
936 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)937 static const Bit8u* gen_create_branch_on_zero(HostReg reg,bool dword) {
938 cache_checkinstr(4);
939 if (dword) {
940 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
941 } else {
942 cache_addw( LSL_IMM(templo1, reg, 16) ); // lsl templo1, reg, #16
943 }
944 cache_addw( BEQ_FWD(0) ); // beq j
945 return (cache.pos-2);
946 }
947
948 // short conditional jump (+-127 bytes) if register is nonzero
949 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)950 static const Bit8u* gen_create_branch_on_nonzero(HostReg reg,bool dword) {
951 cache_checkinstr(4);
952 if (dword) {
953 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
954 } else {
955 cache_addw( LSL_IMM(templo1, reg, 16) ); // lsl templo1, reg, #16
956 }
957 cache_addw( BNE_FWD(0) ); // bne j
958 return (cache.pos-2);
959 }
960
961 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(const Bit8u * data)962 static void inline gen_fill_branch(const Bit8u* data) {
963 #if C_DEBUG
964 Bits len=cache.pos-(data+4);
965 if (len<0) len=-len;
966 if (len>252) LOG_MSG("Big jump %d",len);
967 #endif
968 cache_addb((Bit8u)((cache.pos-(data+4))>>1),data);
969 }
970
971
972 // conditional jump if register is nonzero
973 // for isdword==true the 32bit of the register are tested
974 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)975 static const Bit8u* gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
976 const Bit8u *datapos;
977
978 cache_checkinstr(8);
979 datapos = cache_reservedata();
980
981 if (isdword) {
982 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
983 } else {
984 cache_addw( LSL_IMM(templo2, reg, 24) ); // lsl templo2, reg, #24
985 }
986 cache_addw( BEQ_FWD(2) ); // beq nobranch (pc+2)
987 if (((Bit32u)cache.pos & 0x03) == 0) {
988 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) ); // ldr templo1, [pc, datapos]
989 } else {
990 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) ); // ldr templo1, [pc, datapos]
991 }
992 cache_addw( BX(templo1) ); // bx templo1
993 // nobranch:
994 return (datapos);
995 }
996
997 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)998 static const Bit8u* gen_create_branch_long_leqzero(HostReg reg) {
999 const Bit8u *datapos;
1000
1001 cache_checkinstr(8);
1002 datapos = cache_reservedata();
1003
1004 cache_addw( CMP_IMM(reg, 0) ); // cmp reg, #0
1005 cache_addw( BGT_FWD(2) ); // bgt nobranch (pc+2)
1006 if (((Bit32u)cache.pos & 0x03) == 0) {
1007 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 4)) ); // ldr templo1, [pc, datapos]
1008 } else {
1009 cache_addw( LDR_PC_IMM(templo1, datapos - (cache.pos + 2)) ); // ldr templo1, [pc, datapos]
1010 }
1011 cache_addw( BX(templo1) ); // bx templo1
1012 // nobranch:
1013 return (datapos);
1014 }
1015
1016 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(const Bit8u * data)1017 static void inline gen_fill_branch_long(const Bit8u* data) {
1018 // this is an absolute branch
1019 cache_addd((Bit32u)cache.pos+1,data); // add 1 to keep processor in thumb state
1020 }
1021
gen_run_code(void)1022 static void gen_run_code(void) {
1023 const Bit8u *pos1, *pos2, *pos3;
1024
1025 #if (__ARM_EABI__)
1026 // 8-byte stack alignment
1027 cache_addd(0xe92d4ff0); // stmfd sp!, {v1-v8,lr}
1028 #else
1029 cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr}
1030 #endif
1031
1032 cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) ); // add r0, r0, #1
1033
1034 pos1 = cache.pos;
1035 cache_addd( 0 );
1036 pos2 = cache.pos;
1037 cache_addd( 0 );
1038 pos3 = cache.pos;
1039 cache_addd( 0 );
1040
1041 cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
1042 cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) ); // str lr, [sp, #-4]!
1043 cache_addd( ARM_BX(HOST_r0) ); // bx r0
1044
1045 #if (__ARM_EABI__)
1046 cache_addd(0xe8bd4ff0); // ldmfd sp!, {v1-v8,lr}
1047 #else
1048 cache_addd(0xe8bd4df0); // ldmfd sp!, {v1-v5,v7,v8,lr}
1049 #endif
1050 cache_addd( ARM_BX(HOST_lr) ); // bx lr
1051
1052 // align cache.pos to 32 bytes
1053 if ((((Bitu)cache.pos) & 0x1f) != 0) {
1054 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
1055 }
1056
1057 cache_addd(ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)),pos1); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
1058 cache_addd((Bit32u)&Segs); // address of "Segs"
1059
1060 cache_addd(ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)),pos2); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
1061 cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs"
1062
1063 cache_addd(ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)),pos3); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)]
1064 cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata"
1065
1066 // align cache.pos to 32 bytes
1067 if ((((Bitu)cache.pos) & 0x1f) != 0) {
1068 cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f));
1069 }
1070 }
1071
1072 // return from a function
gen_return_function(void)1073 static void gen_return_function(void) {
1074 cache_checkinstr(4);
1075 cache_addw(0xbc08); // pop {r3}
1076 cache_addw( BX(HOST_r3) ); // bx r3
1077 }
1078
1079
1080 // short unconditional jump (over data pool)
1081 // must emit at most CACHE_DATA_JUMP bytes
gen_create_branch_short(const Bit8u * func)1082 static void inline gen_create_branch_short(const Bit8u * func) {
1083 cache_addw( B_FWD(func - (cache.pos + 4)) ); // b func
1084 }
1085
1086
1087 #ifdef DRC_FLAGS_INVALIDATION
1088
1089 // called when a call to a function can be replaced by a
1090 // call to a simpler function
gen_fill_function_ptr(const Bit8u * pos,void * fct_ptr,Bitu flags_type)1091 static void gen_fill_function_ptr(const Bit8u * pos,void* fct_ptr,Bitu flags_type) {
1092 if ((*(const Bit16u*)pos & 0xf000) == 0xe000) {
1093 if ((*(const Bit16u*)pos & 0x0fff) >= ((CACHE_DATA_ALIGN / 2) - 1) &&
1094 (*(const Bit16u*)pos & 0x0fff) < 0x0800)
1095 {
1096 pos = (const Bit8u *) ( ( ( (Bit32u)(*(const Bit16u*)pos & 0x0fff) ) << 1 ) + ((Bit32u)pos + 4) );
1097 }
1098 }
1099
1100 #ifdef DRC_FLAGS_INVALIDATION_DCODE
1101 if (((Bit32u)pos & 0x03) == 0)
1102 {
1103 // try to avoid function calls but rather directly fill in code
1104 switch (flags_type) {
1105 case t_ADDb:
1106 case t_ADDw:
1107 case t_ADDd:
1108 cache_addw(ADD_REG(HOST_a1, HOST_a1, HOST_a2),pos+0); // add a1, a1, a2
1109 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1110 break;
1111 case t_ORb:
1112 case t_ORw:
1113 case t_ORd:
1114 cache_addw(ORR(HOST_a1, HOST_a2),pos+0); // orr a1, a2
1115 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1116 break;
1117 case t_ANDb:
1118 case t_ANDw:
1119 case t_ANDd:
1120 cache_addw(AND(HOST_a1, HOST_a2),pos+0); // and a1, a2
1121 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1122 break;
1123 case t_SUBb:
1124 case t_SUBw:
1125 case t_SUBd:
1126 cache_addw(SUB_REG(HOST_a1, HOST_a1, HOST_a2),pos+0); // sub a1, a1, a2
1127 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1128 break;
1129 case t_XORb:
1130 case t_XORw:
1131 case t_XORd:
1132 cache_addw(EOR(HOST_a1, HOST_a2),pos+0); // eor a1, a2
1133 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1134 break;
1135 case t_CMPb:
1136 case t_CMPw:
1137 case t_CMPd:
1138 case t_TESTb:
1139 case t_TESTw:
1140 case t_TESTd:
1141 cache_addw(B_FWD(8),pos+0); // b after_call (pc+8)
1142 break;
1143 case t_INCb:
1144 case t_INCw:
1145 case t_INCd:
1146 cache_addw(ADD_IMM3(HOST_a1, HOST_a1, 1),pos+0); // add a1, a1, #1
1147 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1148 break;
1149 case t_DECb:
1150 case t_DECw:
1151 case t_DECd:
1152 cache_addw(SUB_IMM3(HOST_a1, HOST_a1, 1),pos+0); // sub a1, a1, #1
1153 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1154 break;
1155 case t_SHLb:
1156 case t_SHLw:
1157 case t_SHLd:
1158 cache_addw(LSL_REG(HOST_a1, HOST_a2),pos+0); // lsl a1, a2
1159 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1160 break;
1161 case t_SHRb:
1162 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 24),pos+0); // lsl a1, a1, #24
1163 cache_addw(NOP,pos+2); // nop
1164 cache_addw(LSR_IMM(HOST_a1, HOST_a1, 24),pos+4); // lsr a1, a1, #24
1165 cache_addw(NOP,pos+6); // nop
1166 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+8); // lsr a1, a2
1167 cache_addw(NOP,pos+10); // nop
1168 break;
1169 case t_SHRw:
1170 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1171 cache_addw(NOP,pos+2); // nop
1172 cache_addw(LSR_IMM(HOST_a1, HOST_a1, 16),pos+4); // lsr a1, a1, #16
1173 cache_addw(NOP,pos+6); // nop
1174 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+8); // lsr a1, a2
1175 cache_addw(NOP,pos+10); // nop
1176 break;
1177 case t_SHRd:
1178 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+0); // lsr a1, a2
1179 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1180 break;
1181 case t_SARb:
1182 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 24),pos+0); // lsl a1, a1, #24
1183 cache_addw(NOP,pos+2); // nop
1184 cache_addw(ASR_IMM(HOST_a1, HOST_a1, 24),pos+4); // asr a1, a1, #24
1185 cache_addw(NOP,pos+6); // nop
1186 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+8); // asr a1, a2
1187 cache_addw(NOP,pos+10); // nop
1188 break;
1189 case t_SARw:
1190 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1191 cache_addw(NOP,pos+2); // nop
1192 cache_addw(ASR_IMM(HOST_a1, HOST_a1, 16),pos+4); // asr a1, a1, #16
1193 cache_addw(NOP,pos+6); // nop
1194 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+8); // asr a1, a2
1195 cache_addw(NOP,pos+10); // nop
1196 break;
1197 case t_SARd:
1198 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+0); // asr a1, a2
1199 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1200 break;
1201 case t_RORb:
1202 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 24),pos+0); // lsl a1, a1, #24
1203 cache_addw(LSR_IMM(templo1, HOST_a1, 8),pos+2); // lsr templo1, a1, #8
1204 cache_addw(ORR(HOST_a1, templo1),pos+4); // orr a1, templo1
1205 cache_addw(LSR_IMM(templo1, HOST_a1, 16),pos+6); // lsr templo1, a1, #16
1206 cache_addw(ORR(HOST_a1, templo1),pos+8); // orr a1, templo1
1207 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+10); // ror a1, a2
1208 break;
1209 case t_RORw:
1210 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1211 cache_addw(LSR_IMM(templo1, HOST_a1, 16),pos+2); // lsr templo1, a1, #16
1212 cache_addw(NOP,pos+4); // nop
1213 cache_addw(ORR(HOST_a1, templo1),pos+6); // orr a1, templo1
1214 cache_addw(NOP,pos+8); // nop
1215 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+10); // ror a1, a2
1216 break;
1217 case t_RORd:
1218 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+0); // ror a1, a2
1219 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1220 break;
1221 case t_ROLw:
1222 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1223 cache_addw(NEG(HOST_a2, HOST_a2),pos+2); // neg a2, a2
1224 cache_addw(LSR_IMM(templo1, HOST_a1, 16),pos+4); // lsr templo1, a1, #16
1225 cache_addw(ADD_IMM8(HOST_a2, 32),pos+6); // add a2, #32
1226 cache_addw(ORR(HOST_a1, templo1),pos+8); // orr a1, templo1
1227 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+10); // ror a1, a2
1228 break;
1229 case t_ROLd:
1230 cache_addw(NEG(HOST_a2, HOST_a2),pos+0); // neg a2, a2
1231 cache_addw(NOP,pos+2); // nop
1232 cache_addw(ADD_IMM8(HOST_a2, 32),pos+4); // add a2, #32
1233 cache_addw(NOP,pos+6); // nop
1234 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+8); // ror a1, a2
1235 cache_addw(NOP,pos+10); // nop
1236 break;
1237 case t_NEGb:
1238 case t_NEGw:
1239 case t_NEGd:
1240 cache_addw(NEG(HOST_a1, HOST_a1),pos+0); // neg a1, a1
1241 cache_addw(B_FWD(6),pos+2); // b after_call (pc+6)
1242 break;
1243 default:
1244 cache_addd((Bit32u)fct_ptr,pos+4+pos[0]*4); // simple_func
1245 break;
1246 }
1247 }
1248 else
1249 {
1250 // try to avoid function calls but rather directly fill in code
1251 switch (flags_type) {
1252 case t_ADDb:
1253 case t_ADDw:
1254 case t_ADDd:
1255 cache_addw(ADD_REG(HOST_a1, HOST_a1, HOST_a2),pos+0); // add a1, a1, a2
1256 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1257 break;
1258 case t_ORb:
1259 case t_ORw:
1260 case t_ORd:
1261 cache_addw(ORR(HOST_a1, HOST_a2),pos+0); // orr a1, a2
1262 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1263 break;
1264 case t_ANDb:
1265 case t_ANDw:
1266 case t_ANDd:
1267 cache_addw(AND(HOST_a1, HOST_a2),pos+0); // and a1, a2
1268 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1269 break;
1270 case t_SUBb:
1271 case t_SUBw:
1272 case t_SUBd:
1273 cache_addw(SUB_REG(HOST_a1, HOST_a1, HOST_a2),pos+0); // sub a1, a1, a2
1274 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1275 break;
1276 case t_XORb:
1277 case t_XORw:
1278 case t_XORd:
1279 cache_addw(EOR(HOST_a1, HOST_a2),pos+0); // eor a1, a2
1280 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1281 break;
1282 case t_CMPb:
1283 case t_CMPw:
1284 case t_CMPd:
1285 case t_TESTb:
1286 case t_TESTw:
1287 case t_TESTd:
1288 cache_addw(B_FWD(6),pos+0); // b after_call (pc+6)
1289 break;
1290 case t_INCb:
1291 case t_INCw:
1292 case t_INCd:
1293 cache_addw(ADD_IMM3(HOST_a1, HOST_a1, 1),pos+0); // add a1, a1, #1
1294 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1295 break;
1296 case t_DECb:
1297 case t_DECw:
1298 case t_DECd:
1299 cache_addw(SUB_IMM3(HOST_a1, HOST_a1, 1),pos+0); // sub a1, a1, #1
1300 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1301 break;
1302 case t_SHLb:
1303 case t_SHLw:
1304 case t_SHLd:
1305 cache_addw(LSL_REG(HOST_a1, HOST_a2),pos+0); // lsl a1, a2
1306 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1307 break;
1308 case t_SHRb:
1309 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 24),pos+0); // lsl a1, a1, #24
1310 cache_addw(NOP,pos+2); // nop
1311 cache_addw(LSR_IMM(HOST_a1, HOST_a1, 24),pos+4); // lsr a1, a1, #24
1312 cache_addw(NOP,pos+6); // nop
1313 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+8); // lsr a1, a2
1314 break;
1315 case t_SHRw:
1316 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1317 cache_addw(NOP,pos+2); // nop
1318 cache_addw(LSR_IMM(HOST_a1, HOST_a1, 16),pos+4); // lsr a1, a1, #16
1319 cache_addw(NOP,pos+6); // nop
1320 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+8); // lsr a1, a2
1321 break;
1322 case t_SHRd:
1323 cache_addw(LSR_REG(HOST_a1, HOST_a2),pos+0); // lsr a1, a2
1324 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1325 break;
1326 case t_SARb:
1327 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 24),pos+0); // lsl a1, a1, #24
1328 cache_addw(NOP,pos+2); // nop
1329 cache_addw(ASR_IMM(HOST_a1, HOST_a1, 24),pos+4); // asr a1, a1, #24
1330 cache_addw(NOP,pos+6); // nop
1331 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+8); // asr a1, a2
1332 break;
1333 case t_SARw:
1334 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1335 cache_addw(NOP,pos+2); // nop
1336 cache_addw(ASR_IMM(HOST_a1, HOST_a1, 16),pos+4); // asr a1, a1, #16
1337 cache_addw(NOP,pos+6); // nop
1338 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+8); // asr a1, a2
1339 break;
1340 case t_SARd:
1341 cache_addw(ASR_REG(HOST_a1, HOST_a2),pos+0); // asr a1, a2
1342 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1343 break;
1344 case t_RORw:
1345 cache_addw(LSL_IMM(HOST_a1, HOST_a1, 16),pos+0); // lsl a1, a1, #16
1346 cache_addw(LSR_IMM(templo1, HOST_a1, 16),pos+2); // lsr templo1, a1, #16
1347 cache_addw(NOP,pos+4); // nop
1348 cache_addw(ORR(HOST_a1, templo1),pos+6); // orr a1, templo1
1349 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+8); // ror a1, a2
1350 break;
1351 case t_RORd:
1352 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+0); // ror a1, a2
1353 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1354 break;
1355 case t_ROLd:
1356 cache_addw(NEG(HOST_a2, HOST_a2),pos+0); // neg a2, a2
1357 cache_addw(NOP,pos+2); // nop
1358 cache_addw(ADD_IMM8(HOST_a2, 32),pos+4); // add a2, #32
1359 cache_addw(NOP,pos+6); // nop
1360 cache_addw(ROR_REG(HOST_a1, HOST_a2),pos+8); // ror a1, a2
1361 break;
1362 case t_NEGb:
1363 case t_NEGw:
1364 case t_NEGd:
1365 cache_addw(NEG(HOST_a1, HOST_a1),pos+0); // neg a1, a1
1366 cache_addw(B_FWD(4),pos+2); // b after_call (pc+4)
1367 break;
1368 default:
1369 cache_addd((Bit32u)fct_ptr,pos+2+pos[0]*4); // simple_func
1370 break;
1371 }
1372
1373 }
1374 #else
1375 if (((Bit32u)pos & 0x03) == 0)
1376 {
1377 cache_addd((Bit32u)fct_ptr,pos+4+pos[0]*4); // simple_func
1378 }
1379 else
1380 {
1381 cache_addd((Bit32u)fct_ptr,pos+2+pos[0]*4); // simple_func
1382 }
1383 #endif
1384 }
1385 #endif
1386
1387 #ifdef DRC_USE_SEGS_ADDR
1388
1389 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
1390 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)1391 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
1392 cache_checkinstr(4);
1393 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1394 cache_addw( LDRH_IMM(dest_reg, templo1, index) ); // ldrh dest_reg, [templo1, #index]
1395 }
1396
1397 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)1398 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
1399 cache_checkinstr(4);
1400 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1401 cache_addw( LDR_IMM(dest_reg, templo1, index) ); // ldr dest_reg, [templo1, #index]
1402 }
1403
1404 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)1405 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
1406 cache_checkinstr(6);
1407 cache_addw( MOV_LO_HI(templo1, FC_SEGS_ADDR) ); // mov templo1, FC_SEGS_ADDR
1408 cache_addw( LDR_IMM(templo2, templo1, index) ); // ldr templo2, [templo1, #index]
1409 cache_addw( ADD_REG(reg, reg, templo2) ); // add reg, reg, templo2
1410 }
1411
1412 #endif
1413
1414 #ifdef DRC_USE_REGS_ADDR
1415
1416 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
1417 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)1418 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
1419 cache_checkinstr(4);
1420 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1421 cache_addw( LDRH_IMM(dest_reg, templo2, index) ); // ldrh dest_reg, [templo2, #index]
1422 }
1423
1424 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)1425 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
1426 cache_checkinstr(4);
1427 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1428 cache_addw( LDR_IMM(dest_reg, templo2, index) ); // ldr dest_reg, [templo2, #index]
1429 }
1430
1431 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1432 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)1433 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
1434 cache_checkinstr(4);
1435 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1436 if (dword) {
1437 cache_addw( LDR_IMM(dest_reg, templo2, index) ); // ldr dest_reg, [templo2, #index]
1438 } else {
1439 cache_addw( LDRH_IMM(dest_reg, templo2, index) ); // ldrh dest_reg, [templo2, #index]
1440 }
1441 }
1442
1443 // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1444 // the upper 24bit of the destination register can be destroyed
1445 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
1446 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)1447 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
1448 cache_checkinstr(4);
1449 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1450 cache_addw( LDRB_IMM(dest_reg, templo2, index) ); // ldrb dest_reg, [templo2, #index]
1451 }
1452
1453 // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1454 // the upper 24bit of the destination register can be destroyed
1455 // this function can use FC_OP1/FC_OP2 as dest_reg which are
1456 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)1457 static void inline gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
1458 cache_checkinstr(4);
1459 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1460 cache_addw( LDRB_IMM(dest_reg, templo2, index) ); // ldrb dest_reg, [templo2, #index]
1461 }
1462
1463
1464 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)1465 static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
1466 cache_checkinstr(6);
1467 cache_addw( MOV_LO_HI(templo2, FC_REGS_ADDR) ); // mov templo2, FC_REGS_ADDR
1468 cache_addw( LDR_IMM(templo1, templo2, index) ); // ldr templo1, [templo2, #index]
1469 cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1
1470 }
1471
1472
1473 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)1474 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
1475 cache_checkinstr(4);
1476 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1477 cache_addw( STRH_IMM(src_reg, templo1, index) ); // strh src_reg, [templo1, #index]
1478 }
1479
1480 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)1481 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
1482 cache_checkinstr(4);
1483 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1484 cache_addw( STR_IMM(src_reg, templo1, index) ); // str src_reg, [templo1, #index]
1485 }
1486
1487 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)1488 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
1489 cache_checkinstr(4);
1490 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1491 if (dword) {
1492 cache_addw( STR_IMM(src_reg, templo1, index) ); // str src_reg, [templo1, #index]
1493 } else {
1494 cache_addw( STRH_IMM(src_reg, templo1, index) ); // strh src_reg, [templo1, #index]
1495 }
1496 }
1497
1498 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)1499 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
1500 cache_checkinstr(4);
1501 cache_addw( MOV_LO_HI(templo1, FC_REGS_ADDR) ); // mov templo1, FC_REGS_ADDR
1502 cache_addw( STRB_IMM(src_reg, templo1, index) ); // strb src_reg, [templo1, #index]
1503 }
1504
1505 #endif
1506