1 /*
2  *  SPDX-License-Identifier: GPL-2.0-or-later
3  *
4  *  Copyright (C) 2020-2021  The DOSBox Staging Team
5  *  Copyright (C) 2002-2019  The DOSBox Team
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License along
18  *  with this program; if not, write to the Free Software Foundation, Inc.,
19  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  */
21 
22 /* PowerPC (big endian, 32-bit) backend */
23 
24 // some configuring defines that specify the capabilities of this architecture
25 // or aspects of the recompiling
26 
27 // protect FC_ADDR over function calls if necessaray
28 //#define DRC_PROTECT_ADDR_REG
29 
30 // try to use non-flags generating functions if possible
31 #define DRC_FLAGS_INVALIDATION
32 // try to replace _simple functions by code
33 #define DRC_FLAGS_INVALIDATION_DCODE
34 
35 // type with the same size as a pointer
36 #define DRC_PTR_SIZE_IM Bit32u
37 
38 // calling convention modifier
39 #define DRC_FC /* nothing */
40 #define DRC_CALL_CONV /* nothing */
41 
42 #define DRC_USE_REGS_ADDR
43 #define DRC_USE_SEGS_ADDR
44 
45 #if defined(_CALL_SYSV)
46 // disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value)
47 #define USE_SDA_BASE
48 #endif
49 
50 // register mapping
51 enum HostReg {
52 	HOST_R0 = 0,
53 	HOST_R1,
54 	HOST_R2,
55 	HOST_R3,
56 	HOST_R4,
57 	HOST_R5,
58 	HOST_R6,
59 	HOST_R7,
60 	HOST_R8,
61 	HOST_R9,
62 	HOST_R10,
63 	HOST_R11,
64 	HOST_R12,
65 	HOST_R13,
66 	HOST_R14,
67 	HOST_R15,
68 	HOST_R16,
69 	HOST_R17,
70 	HOST_R18,
71 	HOST_R19,
72 	HOST_R20,
73 	HOST_R21,
74 	HOST_R22,
75 	HOST_R23,
76 	HOST_R24,
77 	HOST_R25,
78 	HOST_R26,  // generic non-volatile (used for inline adc/sbb)
79 	HOST_R27,  // points to current CacheBlock (decode.block)
80 	HOST_R28,  // points to fpu
81 	HOST_R29,  // FC_ADDR
82 	HOST_R30,  // points to Segs
83 	HOST_R31,  // points to cpu_regs
84 
85 	HOST_NONE
86 };
87 
88 static const HostReg RegParams[] = {
89 	HOST_R3, HOST_R4, HOST_R5, HOST_R6,
90 	HOST_R7, HOST_R8, HOST_R9, HOST_R10
91 };
92 
93 #if C_FPU
94 #include "fpu.h"
95 extern struct FPU_rec fpu;
96 #endif
97 
98 #if defined(USE_SDA_BASE)
99 extern Bit32u _SDA_BASE_[];
100 #endif
101 
102 // register that holds function return values
103 #define FC_RETOP HOST_R3
104 
105 // register used for address calculations, if the ABI does not
106 // state that this register is preserved across function calls
107 // then define DRC_PROTECT_ADDR_REG above
108 #define FC_ADDR HOST_R29
109 
110 // register that points to Segs[]
111 #define FC_SEGS_ADDR HOST_R30
112 // register that points to cpu_regs[]
113 #define FC_REGS_ADDR HOST_R31
114 
115 // register that holds the first parameter
116 #define FC_OP1 RegParams[0]
117 
118 // register that holds the second parameter
119 #define FC_OP2 RegParams[1]
120 
121 // special register that holds the third parameter for _R3 calls (byte accessible)
122 #define FC_OP3 RegParams[2]
123 
124 // register that holds byte-accessible temporary values
125 #define FC_TMP_BA1 FC_OP2
126 
127 // register that holds byte-accessible temporary values
128 #define FC_TMP_BA2 FC_OP1
129 
130 // temporary register for LEA
131 #define TEMP_REG_DRC HOST_R10
132 
133 #define IMM(op, regsd, rega, imm)           (((op)<<26)|((regsd)<<21)|((rega)<<16)|             (((Bit32u)(imm))&0xFFFF))
134 #define EXT(regsd, rega, regb, op, rc)      (  (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)|          ((op)<<1)|(rc))
135 #define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)|  ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc))
136 
137 #define IMM_OP(op, regsd, rega, imm)           cache_addd(IMM(op, regsd, rega, imm))
138 #define EXT_OP(regsd, rega, regb, op, rc)      cache_addd(EXT(regsd, rega, regb, op, rc))
139 #define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
140 
141 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)142 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src)
143 {
144 	if (reg_dst != reg_src)
145 		EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src)
146 }
147 
148 // move a 16bit constant value into dest_reg
149 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)150 static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)
151 {
152 	IMM_OP(14, dest_reg, 0, imm); // li dest,imm
153 }
154 
155 DRC_PTR_SIZE_IM block_ptr;
156 
157 // Helper for loading addresses
gen_addr(Bit32s & addr,HostReg dest)158 static HostReg inline gen_addr(Bit32s &addr, HostReg dest)
159 {
160 	Bit32s off;
161 
162 	if ((Bit16s)addr == addr)
163 		return HOST_R0;
164 
165 	off = addr - (Bit32s)&Segs;
166 	if ((Bit16s)off == off)
167 	{
168 		addr = off;
169 		return FC_SEGS_ADDR;
170 	}
171 
172 	off = addr - (Bit32s)&cpu_regs;
173 	if ((Bit16s)off == off)
174 	{
175 		addr = off;
176 		return FC_REGS_ADDR;
177 	}
178 
179 	off = addr - (Bit32s)block_ptr;
180 	if ((Bit16s)off == off)
181 	{
182 		addr = off;
183 		return HOST_R27;
184 	}
185 
186 #if C_FPU
187 	off = addr - (Bit32s)&fpu;
188 	if ((Bit16s)off == off)
189 	{
190 		addr = off;
191 		return HOST_R28;
192 	}
193 #endif
194 
195 #if defined(USE_SDA_BASE)
196 	off = addr - (Bit32s)_SDA_BASE_;
197 	if ((Bit16s)off == off)
198 	{
199 		addr = off;
200 		return HOST_R13;
201 	}
202 #endif
203 
204 	IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha
205 	addr = (Bit16s)addr;
206 	return dest;
207 }
208 
209 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)210 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)
211 {
212 	HostReg ld = gen_addr((Bit32s&)imm, dest_reg);
213 	if (imm || ld != dest_reg)
214 		IMM_OP(14, dest_reg, ld, imm);   // addi dest_reg, ldr, imm@l
215 }
216 
217 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
218 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)219 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword)
220 {
221 	Bit32s addr = (Bit32s)data;
222 	HostReg ld = gen_addr(addr, dest_reg);
223 	IMM_OP(dword ? 32:40, dest_reg, ld, addr);  // lwz/lhz dest, addr@l(ld)
224 }
225 
226 // move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg
gen_mov_LE_word_to_reg(HostReg dest_reg,void * data,bool dword)227 static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) {
228 	Bit32u addr = (Bit32u)data;
229 	gen_mov_dword_to_reg_imm(dest_reg, addr);
230 	EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest
231 }
232 
233 // move an 8bit constant value into dest_reg
234 // the upper 24bit of the destination register can be destroyed
235 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
236 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)237 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
238 	gen_mov_word_to_reg_imm(dest_reg, imm);
239 }
240 
241 // move an 8bit constant value into dest_reg
242 // the upper 24bit of the destination register can be destroyed
243 // this function can use FC_OP1/FC_OP2 as dest_reg which are
244 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)245 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
246 	gen_mov_word_to_reg_imm(dest_reg, imm);
247 }
248 
249 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)250 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword)
251 {
252 	Bit32s addr = (Bit32s)dest;
253 	HostReg ld = gen_addr(addr, HOST_R8);
254 	IMM_OP(dword ? 36 : 44, src_reg, ld, addr);  // stw/sth src,addr@l(ld)
255 }
256 
257 // move an 8bit value from memory into dest_reg
258 // the upper 24bit of the destination register can be destroyed
259 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
260 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)261 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data)
262 {
263 	Bit32s addr = (Bit32s)data;
264 	HostReg ld = gen_addr(addr, dest_reg);
265 	IMM_OP(34, dest_reg, ld, addr);  // lbz dest,addr@l(ld)
266 }
267 
268 // move an 8bit value from memory into dest_reg
269 // the upper 24bit of the destination register can be destroyed
270 // this function can use FC_OP1/FC_OP2 as dest_reg which are
271 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)272 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
273 	gen_mov_byte_to_reg_low(dest_reg, data);
274 }
275 
276 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)277 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest)
278 {
279 	Bit32s addr = (Bit32s)dest;
280 	HostReg ld = gen_addr(addr, HOST_R8);
281 	IMM_OP(38, src_reg, ld, addr);  // stb src_reg,addr@l(ld)
282 }
283 
284 // convert an 8bit word to a 32bit dword
285 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)286 static void gen_extend_byte(bool sign,HostReg reg)
287 {
288 	if (sign)
289 		EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src
290 	else
291 		RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31
292 }
293 
294 // convert a 16bit word to a 32bit dword
295 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)296 static void gen_extend_word(bool sign,HostReg reg)
297 {
298 	if (sign)
299 		EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
300 	else
301 		RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
302 }
303 
304 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)305 static void gen_add(HostReg reg,void* op)
306 {
307 	gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
308 	EXT_OP(reg,reg,HOST_R8,266,0);          // add reg,reg,r8
309 }
310 
311 // add a 32bit value from host memory to a full register
gen_add_LE(HostReg reg,void * op)312 static void gen_add_LE(HostReg reg,void* op)
313 {
314 	gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24);
315 	EXT_OP(reg,reg,HOST_R8,266,0);       // add reg,reg,r8
316 }
317 
318 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)319 static void gen_add_imm(HostReg reg,Bit32u imm)
320 {
321 	if ((Bit16s)imm != (Bit32s)imm)
322 		IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
323 	if ((Bit16s)imm)
324 		IMM_OP(14, reg, reg, imm);              // addi reg, reg, imm@l
325 }
326 
327 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)328 static void gen_and_imm(HostReg reg,Bit32u imm) {
329 	Bits sbit,ebit,tbit,bbit,abit,i;
330 
331 	// sbit = number of leading 0 bits
332 	// ebit = number of trailing 0 bits
333 	// tbit = number of total 0 bits
334 	// bbit = number of leading 1 bits
335 	// abit = number of trailing 1 bits
336 
337 	if (imm == 0xFFFFFFFF)
338 		return;
339 
340 	if (!imm)
341 		return gen_mov_word_to_reg_imm(reg, 0);
342 
343 	sbit = ebit = tbit = bbit = abit = 0;
344 	for (i=0; i < 32; i++)
345 	{
346 		if (!(imm & (1<<(31-i))))
347 		{
348 			abit = 0;
349 			tbit++;
350 			if (sbit == i)
351 				sbit++;
352 			ebit++;
353 		}
354 		else
355 		{
356 			ebit = 0;
357 			if (bbit == i)
358 				bbit++;
359 			abit++;
360 		}
361 	}
362 
363 	if (sbit + ebit == tbit)
364 	{
365 		RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
366 		return;
367 	}
368 
369 	if (sbit >= 16)
370 	{
371 		IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
372 		return;
373 	}
374 	if (ebit >= 16)
375 	{
376 		IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
377 		return;
378 	}
379 
380 	if (bbit + abit == (32 - tbit))
381 	{
382 		RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1
383 		return;
384 	}
385 
386 	IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l
387 	IMM_OP(29, reg, reg, imm>16);  // andis. reg, reg, imm@h
388 	EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0
389 }
390 
391 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)392 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
393 	gen_mov_dword_to_reg_imm(HOST_R9, imm);
394 	gen_mov_word_from_reg(HOST_R9, dest, 1);
395 }
396 
397 // move an address into memory (assumes address != NULL)
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)398 static void inline gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm)
399 {
400 	block_ptr = 0;
401 	gen_mov_dword_to_reg_imm(HOST_R27, imm);
402 	// this will be used to look-up the linked blocks
403 	block_ptr = imm;
404 	gen_mov_word_from_reg(HOST_R27, dest, 1);
405 }
406 
407 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)408 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword)
409 {
410 	HostReg ld;
411 	Bit32s addr = (Bit32s)dest;
412 
413 	if (!dword)
414 	{
415 		imm &= 0xFFFF;
416 		addr += 2;
417 	}
418 
419 	if (!imm)
420 		return;
421 
422 	ld = gen_addr(addr, HOST_R8);
423 	IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
424 	if (dword && (Bit16s)imm != (Bit32s)imm)
425 		IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
426 	if (!dword || (Bit16s)imm)
427 		IMM_OP(14, HOST_R9, HOST_R9, imm);      // addi r9,r9,imm@l
428 	IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
429 }
430 
431 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)432 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
433 	gen_add_direct_word(dest, -(Bit32s)imm, dword);
434 }
435 
436 // effective address calculation, destination is dest_reg
437 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
438 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)439 static inline void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)
440 {
441 	if (scale)
442 	{
443 		RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale
444 		scale_reg = HOST_R8;
445 	}
446 
447 	gen_add_imm(dest_reg, imm);
448 	EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
449 }
450 
451 // effective address calculation, destination is dest_reg
452 // dest_reg is scaled by scale (dest_reg*(2^scale)),
453 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)454 static inline void gen_lea(HostReg dest_reg,Bitu scale,Bits imm)
455 {
456 	if (scale)
457 	{
458 		RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale
459 	}
460 
461 	gen_add_imm(dest_reg, imm);
462 }
463 
464 // helper function to choose direct or indirect call
do_gen_call(void * func,Bit32u * pos,bool pad)465 static int inline do_gen_call(void *func, Bit32u *pos, bool pad)
466 {
467 	Bit32s f = (Bit32s)func;
468 	Bit32s off = f - (Bit32s)pos;
469 
470 	// relative branches are limited to +/- ~32MB
471 	if (off < 0x02000000 && off >= -0x02000000)
472 	{
473 		pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func
474 		if (pad)
475 		{
476 			pos[1] = 0x4800000C;       // b 12+
477 			pos[2] = pos[3] = IMM(24, 0, 0, 0); // nop
478 			return 16;
479 		}
480 		return 4;
481 	}
482 
483 	pos[0] = IMM(15, HOST_R8, 0, f>>16);      // lis r8,imm@h
484 	pos[1] = IMM(24, HOST_R8, HOST_R8, f);    // ori r8,r8,imm@l
485 	pos[2] = EXT(HOST_R8, 9, 0, 467, 0);      // mtctr r8
486 	pos[3] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl
487 	return 16;
488 }
489 
490 // generate a call to a parameterless function
491 static void inline gen_call_function_raw(void * func,bool fastcall=true)
492 {
493 	cache.pos += do_gen_call(func, (Bit32u*)cache.pos, fastcall);
494 }
495 
496 // generate a call to a function with paramcount parameters
497 // note: the parameters are loaded in the architecture specific way
498 // using the gen_load_param_ functions below
499 static Bit32u inline gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false)
500 {
501 	Bit32u proc_addr=(Bit32u)cache.pos;
502 	gen_call_function_raw(func,fastcall);
503 	return proc_addr;
504 }
505 
506 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)507 static void inline gen_load_param_imm(Bitu imm,Bitu param) {
508 	gen_mov_dword_to_reg_imm(RegParams[param], imm);
509 }
510 
511 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)512 static void inline gen_load_param_addr(Bitu addr,Bitu param) {
513 	gen_load_param_imm(addr, param);
514 }
515 
516 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)517 static void inline gen_load_param_reg(Bitu reg,Bitu param) {
518 	gen_mov_regs(RegParams[param], (HostReg)reg);
519 }
520 
521 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)522 static void inline gen_load_param_mem(Bitu mem,Bitu param) {
523 	gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
524 }
525 
526 // jump to an address pointed at by ptr, offset is in imm
527 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
528 	gen_mov_word_to_reg(HOST_R8,ptr,true);                // r8 = *(Bit32u*)ptr
529 	if ((Bit16s)imm != (Bit32s)imm)
530 		IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha
531 	IMM_OP(32, HOST_R8, HOST_R8, imm);                    // lwz r8, imm@l(r8)
532 	EXT_OP(HOST_R8, 9, 0, 467, 0);                        // mtctr r8
533 	IMM_OP(19, 0x14, 0, 528<<1);                       // bctr
534 }
535 
536 // short conditional jump (+-127 bytes) if register is zero
537 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)538 static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword)
539 {
540 	if (!dword)
541 		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
542 	else
543 		IMM_OP(11, 0, reg, 0);         // cmpwi cr0, reg, 0
544 
545 	IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq)
546 	return ((Bit32u)cache.pos-4);
547 }
548 
549 // short conditional jump (+-127 bytes) if register is nonzero
550 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)551 static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword)
552 {
553 	if (!dword)
554 		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
555 	else
556 		IMM_OP(11, 0, reg, 0);         // cmpwi cr0, reg, 0
557 
558 	IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne)
559 	return ((Bit32u)cache.pos-4);
560 }
561 
562 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)563 static void gen_fill_branch(DRC_PTR_SIZE_IM data)
564 {
565 #if C_DEBUG
566 	Bits len=(Bit32u)cache.pos-data;
567 	if (len<0) len=-len;
568 	if (len >= 0x8000) LOG_MSG("Big jump %d",len);
569 #endif
570 
571 	((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC;
572 }
573 
574 
575 // conditional jump if register is nonzero
576 // for isdword==true the 32bit of the register are tested
577 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool dword)578 static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword)
579 {
580 	if (!dword)
581 		IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
582 	else
583 		IMM_OP(11, 0, reg, 0);       // cmpwi cr0, reg, 0
584 
585 	IMM_OP(16, 0x04, 2, 0); // bne
586 	return ((Bit32u)cache.pos-4);
587 }
588 
589 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)590 static Bit32u gen_create_branch_long_leqzero(HostReg reg)
591 {
592 	IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
593 
594 	IMM_OP(16, 0x04, 1, 0); // ble
595 	return ((Bit32u)cache.pos-4);
596 }
597 
598 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit32u data)599 static void gen_fill_branch_long(Bit32u data) {
600 	return gen_fill_branch((DRC_PTR_SIZE_IM)data);
601 }
602 
cache_block_closing(const uint8_t * block_start,Bitu block_size)603 static void cache_block_closing(const uint8_t *block_start, Bitu block_size)
604 {
605 #if defined(__GNUC__)
606 	Bit8u* start = (Bit8u*)((Bit32u)block_start & -32);
607 
608 	while (start < block_start + block_size)
609 	{
610 		asm volatile("dcbst %y0\n\t icbi %y0" :: "Z"(*start));
611 		start += 32;
612 	}
613 	asm volatile("sync\n\t isync");
614 #else
615 	#error "Don't know how to flush/invalidate CacheBlock with this compiler"
616 #endif
617 }
618 
cache_block_before_close(void)619 static void cache_block_before_close(void) {}
620 
gen_function(void * func)621 static void gen_function(void* func)
622 {
623 	Bit32s off = (Bit32s)func - (Bit32s)cache.pos;
624 
625 	// relative branches are limited to +/- 32MB
626 	if (off < 0x02000000 && off >= -0x02000000) {
627 		cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func
628 		return;
629 	}
630 
631 	gen_mov_dword_to_reg_imm(HOST_R8, (Bit32u)func); // r8 = func
632 	EXT_OP(HOST_R8, 9, 0, 467, 0);  // mtctr r8
633 	IMM_OP(19, 0x14, 0, 528<<1); // bctr
634 }
635 
636 // gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
637 static void* epilog_addr;
638 static Bit8u *getCF_glue;
gen_run_code(void)639 static void gen_run_code(void)
640 {
641 	// prolog
642 	IMM_OP(37, HOST_R1, HOST_R1, -256); // stwu sp,-256(sp)
643 	EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
644 	EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
645 
646 	IMM_OP(47, HOST_R26, HOST_R1, 128); // stmw r26, 128(sp)
647 
648 	IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16);  // lis FC_SEGS_ADDR, Segs@h
649 	IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs);     // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l
650 
651 	IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16);  // lis FC_REGS_ADDR, cpu_regs@h
652 	IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs);     // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l
653 
654 #if C_FPU
655 	IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16);  // lis r28, fpu@h
656 	IMM_OP(24, HOST_R28, HOST_R28, &fpu);         // ori r28, r28, fpu@l
657 #endif
658 
659 	IMM_OP(36, HOST_R0, HOST_R1, 256+4); // stw r0,256+4(sp)
660 	IMM_OP(19, 0x14, 0, 528<<1);     // bctr
661 
662 	// epilog
663 	epilog_addr = cache.pos;
664 	IMM_OP(32, HOST_R0, HOST_R1, 256+4); // lwz r0,256+4(sp)
665 	IMM_OP(46, HOST_R26, HOST_R1, 128);   // lmw r26, 128(sp)
666 	EXT_OP(HOST_R0, 8, 0, 467, 0);       // mtlr r0
667 	IMM_OP(14, HOST_R1, HOST_R1, 256);   // addi sp, sp, 256
668 	IMM_OP(19, 0x14, 0, 16<<1);       // blr
669 
670 	// trampoline to call get_CF()
671 	getCF_glue = cache.pos;
672 	gen_function((void*)get_CF);
673 }
674 
675 // return from a function
gen_return_function(void)676 static void gen_return_function(void)
677 {
678 	gen_function(epilog_addr);
679 }
680 
681 // called when a call to a function can be replaced by a
682 // call to a simpler function
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)683 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type)
684 {
685 	Bit32u *op = (Bit32u*)pos;
686 	Bit32u *end = op+4;
687 
688 	switch (flags_type) {
689 #if defined(DRC_FLAGS_INVALIDATION_DCODE)
690 		// try to avoid function calls but rather directly fill in code
691 		case t_ADDb:
692 		case t_ADDw:
693 		case t_ADDd:
694 			*op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
695 			break;
696 		case t_ORb:
697 		case t_ORw:
698 		case t_ORd:
699 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
700 			break;
701 		case t_ADCb:
702 		case t_ADCw:
703 		case t_ADCd:
704 			op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
705 			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
706 			op[2] = IMM(12, HOST_R0, FC_RETOP, -1);        // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF)
707 			op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0);    // addze; FC_RETOP = r26 + !!CF
708 			return;
709 		case t_SBBb:
710 		case t_SBBw:
711 		case t_SBBd:
712 			op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0);  // r26 = FC_OP1 - FC_OP2
713 			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
714 			op[2] = IMM(8, HOST_R0, FC_RETOP, 0);          // subfic r0, FC_RETOP, 0 (XER[CA] = !CF)
715 			op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0);    // addme; FC_RETOP = r26 - 1 + !CF
716 			return;
717 		case t_ANDb:
718 		case t_ANDw:
719 		case t_ANDd:
720 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
721 			break;
722 		case t_SUBb:
723 		case t_SUBw:
724 		case t_SUBd:
725 			*op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
726 			break;
727 		case t_XORb:
728 		case t_XORw:
729 		case t_XORd:
730 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
731 			break;
732 		case t_CMPb:
733 		case t_CMPw:
734 		case t_CMPd:
735 		case t_TESTb:
736 		case t_TESTw:
737 		case t_TESTd:
738 			break;
739 		case t_INCb:
740 		case t_INCw:
741 		case t_INCd:
742 			*op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
743 			break;
744 		case t_DECb:
745 		case t_DECw:
746 		case t_DECd:
747 			*op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
748 			break;
749 		case t_NEGb:
750 		case t_NEGw:
751 		case t_NEGd:
752 			*op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
753 			break;
754 		case t_SHLb:
755 		case t_SHLw:
756 		case t_SHLd:
757 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
758 			break;
759 		case t_SHRb:
760 		case t_SHRw:
761 		case t_SHRd:
762 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
763 			break;
764 		case t_SARb:
765 			*op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
766 		case t_SARw:
767 			if (flags_type == t_SARw)
768 				*op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
769 		case t_SARd:
770 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
771 			break;
772 
773 		case t_ROLb:
774 			*op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
775 		case t_ROLw:
776 			if (flags_type == t_ROLw)
777 				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
778 		case t_ROLd:
779 			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
780 			break;
781 
782 		case t_RORb:
783 			*op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
784 		case t_RORw:
785 			if (flags_type == t_RORw)
786 				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
787 		case t_RORd:
788 			*op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
789 			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
790 			break;
791 
792 		case t_DSHLw: // technically not correct for FC_OP3 > 16
793 			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
794 			*op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
795 			break;
796 		case t_DSHLd:
797 			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
798 			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
799 			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
800 			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
801 			return;
802 		case t_DSHRw: // technically not correct for FC_OP3 > 16
803 			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
804 			*op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
805 			break;
806 		case t_DSHRd:
807 			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
808 			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
809 			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
810 			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
811 			return;
812 #endif
813 		default:
814 			do_gen_call(fct_ptr, op, true);
815 			return;
816 	}
817 
818 	*op = 0x48000000 + 4*(end-op); // b end
819 }
820 
821 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
822 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)823 static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
824 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false);
825 }
826 
827 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)828 static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
829 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true);
830 }
831 
832 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)833 static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
834 	gen_add(reg, (Bit8u*)&Segs + index);
835 }
836 
837 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
838 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)839 static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)
840 {
841 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false);
842 }
843 
844 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)845 static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)
846 {
847 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true);
848 }
849 
850 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
851 // the upper 24bit of the destination register can be destroyed
852 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
853 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)854 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)
855 {
856 	gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index);
857 }
858 
859 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
860 // the upper 24bit of the destination register can be destroyed
861 // this function can use FC_OP1/FC_OP2 as dest_reg which are
862 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)863 static void inline gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
864 	gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index);
865 }
866 
867 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)868 static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)
869 {
870 	gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false);
871 }
872 
873 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)874 static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)
875 {
876 	gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true);
877 }
878 
879 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)880 static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)
881 {
882 	gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index);
883 }
884 
885 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)886 static void gen_add_regval32_to_reg(HostReg reg,Bitu index)
887 {
888 	gen_add(reg, (Bit8u*)&cpu_regs + index);
889 }
890 
891 // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)892 static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
893 	if (dword)
894 		gen_mov_regval32_from_reg(src_reg, index);
895 	else
896 		gen_mov_regval16_from_reg(src_reg, index);
897 }
898 
899 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
900 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)901 static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
902 	if (dword)
903 		gen_mov_regval32_to_reg(dest_reg, index);
904 	else
905 		gen_mov_regval16_to_reg(dest_reg, index);
906 }
907