1 /*
2  *  SPDX-License-Identifier: GPL-2.0-or-later
3  *
4  *  Copyright (C) 2020-2021  The DOSBox Staging Team
5  *  Copyright (C) 2002-2019  The DOSBox Team
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License along
18  *  with this program; if not, write to the Free Software Foundation, Inc.,
19  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  */
21 
22 /* PPC64LE/OpenPOWER (little endian) backend */
23 
24 // debugging
25 #define __ASSERT(x,...) \
26     { if(!(x)) { fprintf(stderr, "ASSERT:" __VA_ARGS__); asm("trap\n"); } }
27 
28 // some configuring defines that specify the capabilities of this architecture
29 // or aspects of the recompiling
30 
31 // protect FC_ADDR over function calls if necessaray
32 //#define DRC_PROTECT_ADDR_REG
33 
34 // try to use non-flags generating functions if possible
35 #define DRC_FLAGS_INVALIDATION
36 // try to replace _simple functions by code
37 #define DRC_FLAGS_INVALIDATION_DCODE
38 
39 // type with the same size as a pointer
40 #define DRC_PTR_SIZE_IM Bit64u
41 
42 // calling convention modifier
43 #define DRC_FC /* nothing */
44 #define DRC_CALL_CONV /* nothing */
45 
46 #define DRC_USE_REGS_ADDR
47 #define DRC_USE_SEGS_ADDR
48 
49 // register mapping
50 enum HostReg {
51 	HOST_R0 = 0,
52 	HOST_R1,
53 	HOST_R2,
54 	HOST_R3,
55 	HOST_R4,
56 	HOST_R5,
57 	HOST_R6,
58 	HOST_R7,
59 	HOST_R8,
60 	HOST_R9,
61 	HOST_R10,
62 	HOST_R11,
63 	HOST_R12, // end of volatile registers. use for CTR calls
64 	HOST_R13,
65 	HOST_R14,
66 	HOST_R15,
67 	HOST_R16,
68 	HOST_R17,
69 	HOST_R18,
70 	HOST_R19,
71 	HOST_R20,
72 	HOST_R21,
73 	HOST_R22,
74 	HOST_R23,
75 	HOST_R24,
76 	HOST_R25,
77 	HOST_R26, // generic non-volatile (used for inline adc/sbb)
78 	HOST_R27, // points to current CacheBlock (decode.block)
79 	HOST_R28, // points to fpu
80 	HOST_R29, // FC_ADDR
81 	HOST_R30, // points to Segs
82 	HOST_R31, // points to cpu_regs
83 
84 	HOST_NONE
85 };
86 
87 static const HostReg RegParams[] = {
88 	HOST_R3, HOST_R4, HOST_R5, HOST_R6,
89 	HOST_R7, HOST_R8, HOST_R9, HOST_R10
90 };
91 
92 #if C_FPU
93 #include "fpu.h"
94 extern FPU_rec fpu;
95 #endif
96 
97 // register that holds function return values
98 #define FC_RETOP HOST_R3
99 
100 // register used for address calculations, if the ABI does not
101 // state that this register is preserved across function calls
102 // then define DRC_PROTECT_ADDR_REG above
103 #define FC_ADDR HOST_R29
104 
105 // register that points to Segs[]
106 #define FC_SEGS_ADDR HOST_R30
107 // register that points to cpu_regs[]
108 #define FC_REGS_ADDR HOST_R31
109 
110 // register that holds the first parameter
111 #define FC_OP1 RegParams[0]
112 
113 // register that holds the second parameter
114 #define FC_OP2 RegParams[1]
115 
116 // special register that holds the third parameter for _R3 calls (byte accessible)
117 #define FC_OP3 RegParams[2]
118 
119 // register that holds byte-accessible temporary values
120 #define FC_TMP_BA1 FC_OP2
121 
122 // register that holds byte-accessible temporary values
123 #define FC_TMP_BA2 FC_OP1
124 
125 // temporary register for LEA
126 #define TEMP_REG_DRC HOST_R10
127 
128 // op comes right out of the PowerISA 3.0 documentation
129 #define IMM(op, regsd, rega, imm)            (Bit32u)(((op)<<26)|((regsd)<<21)|((rega)<<16)|             (((Bit64u)(imm))&0xFFFF))
130 #define DSF(op, regs, rega, ds, bb)          (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)|             (((Bit64u)(ds))&0xFFFC)|(bb))
131 #define EXT(regsd, rega, regb, op, rc)       (Bit32u)(  (31<<26)|((regsd)<<21)|((rega)<<16)| ((regb)<<11)|                       ((op)<<1)              |(rc))
132 #define RLW(op, regs, rega, sh, mb, me, rc)  (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)|   ((sh)<<11)|((mb   )<<6)|((me)<<1)                        |(rc))
133 #define RLD(op, regs, rega, sh, mx, opb, rc) (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)|((sh&31)<<11)|((mx&31)<<6)|(mx&32)  |((opb)<<2)|((sh&32)>>4)|(rc))
134 
135 #define IMM_OP(op, regsd, rega, imm)                cache_addd(IMM(op, regsd, rega, imm))
136 #define DSF_OP(op, regs, rega, ds, bb)              cache_addd(DSF(op, regs, rega, ds, bb))
137 #define EXT_OP(regsd, rega, regb, op, rc)           cache_addd(EXT(regsd, rega, regb, op, rc))
138 #define RLW_OP(op, regs, rega, sh, mb, me, rc)      cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
139 #define RLD_OP(op, regs, rega, sh, mx, opb, rc)     cache_addd(RLD(op, regs, rega, sh, mx, opb, rc))
140 
141 #define NOP                                         IMM(24, 0, 0, 0) // or 0,0,0
142 #define NOP_OP()                                    cache_addd(NOP)
143 #define TRAP()                                      cache_addd(EXT(31, 0, 0, 4, 0)) // tw 31,0,0
144 
145 // move a full register from reg_src to reg_dst
146 // truncate to 32-bits (matches x86_64, which uses 32-bit mov)
gen_mov_regs(HostReg reg_dst,HostReg reg_src)147 static void gen_mov_regs(HostReg reg_dst, HostReg reg_src)
148 {
149 // rld* etc. are backwards: rS is first in the encoding
150 // always move, even if reg_src == reg_dst, because we may need truncation
151 	RLD_OP(30, reg_src, reg_dst, 0, 32, 0, 0); // clrldi dst, src, 32
152 }
153 
154 // move a 16bit constant value into dest_reg
155 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)156 static void gen_mov_word_to_reg_imm(HostReg dest_reg, Bit16u imm)
157 {
158 	IMM_OP(14, dest_reg, 0, imm); // li dest,imm
159 }
160 
161 DRC_PTR_SIZE_IM block_ptr;
162 
163 // Helper for loading addresses
164 // Emits relevant code to load the upper 48 bits if needed
gen_addr(Bit64s & addr,HostReg dest)165 static HostReg inline gen_addr(Bit64s &addr, HostReg dest)
166 {
167 	Bit64s off;
168 
169 	if ((Bit16s)addr == addr)
170 		return HOST_R0; // lower to immediate
171 
172 	off = addr - (Bit64s)&Segs;
173 	if ((Bit16s)off == off)
174 	{
175 		addr = off;
176 		return FC_SEGS_ADDR;
177 	}
178 
179 	off = addr - (Bit64s)&cpu_regs;
180 	if ((Bit16s)off == off)
181 	{
182 		addr = off;
183 		return FC_REGS_ADDR;
184 	}
185 
186 	off = addr - (Bit64s)block_ptr;
187 	if ((Bit16s)off == off)
188 	{
189 		addr = off;
190 		return HOST_R27;
191 	}
192 
193 #if C_FPU
194 	off = addr - (Bit64s)&fpu;
195 	if ((Bit16s)off == off)
196 	{
197 		addr = off;
198 		return HOST_R28;
199 	}
200 #endif
201 
202 	if (addr & 0xffffffff00000000) {
203 		IMM_OP(15, dest, 0, (addr & 0xffff000000000000) >> 48); // lis dest, upper
204 		if (addr & 0x0000ffff00000000)
205 			IMM_OP(24, dest, dest, (addr & 0x0000ffff00000000) >> 32); // ori dest, dest, ...
206 		RLD_OP(30, dest, dest, 32, 31, 1, 0); // rldicr dest, dest, 32, 31
207 		if (addr & 0x00000000ffff0000)
208 			IMM_OP(25, dest, dest, (addr & 0x00000000ffff0000) >> 16); // oris dest, dest, ...
209 	} else {
210 		IMM_OP(15, dest, 0, (addr & 0x00000000ffff0000) >> 16); // lis dest, lower
211 	}
212 	// watch unexpected sign extension with following instructions
213 	if (addr & 0x8000) {
214 		// make the displacement in the following instruction 0 for safety
215 		IMM_OP(24, dest, dest, (addr & 0x000000000000ffff));
216 		addr = 0;
217 	} else {
218 		addr = (Bit16s)addr;
219 	}
220 	return dest;
221 }
222 
223 // move a 64bit constant value into dest_reg
gen_mov_qword_to_reg_imm(HostReg dest_reg,Bit64u imm)224 static void gen_mov_qword_to_reg_imm(HostReg dest_reg, Bit64u imm)
225 {
226 	if (imm & 0xffffffff00000000) {
227 		IMM_OP(15, dest_reg, 0, (imm & 0xffff000000000000) >> 48); // lis dest, upper
228 		if (imm & 0x0000ffff00000000)
229 			IMM_OP(24, dest_reg, dest_reg, (imm & 0x0000ffff00000000) >> 32); // ori dest, dest, ...
230 		RLD_OP(30, dest_reg, dest_reg, 32, 31, 1, 0); // rldicr dest, dest, 32, 31
231 		if (imm & 0x00000000ffff0000)
232 			IMM_OP(25, dest_reg, dest_reg, (imm & 0x00000000ffff0000) >> 16); // oris dest, dest, ...
233 	} else {
234 		IMM_OP(15, dest_reg, 0, (imm & 0x00000000ffff0000) >> 16); // lis dest, lower
235 	}
236 	if (imm & 0xffff)
237 		IMM_OP(24, dest_reg, dest_reg, (imm & 0x000000000000ffff)); // ori dest, dest, ...
238 }
239 
240 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,uint32_t imm)241 static void gen_mov_dword_to_reg_imm(HostReg dest_reg, uint32_t imm)
242 {
243 	if (static_cast<int16_t>(imm) != static_cast<int32_t>(imm)) {
244 		IMM_OP(15, dest_reg, 0, (imm & 0xffff0000) >> 16); // lis
245 		if (imm & 0x0000ffff)
246 			IMM_OP(24, dest_reg, dest_reg, (imm & 0x0000ffff)); // ori
247 	} else {
248 		IMM_OP(14, dest_reg, 0, imm); // li
249 	}
250 }
251 
252 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into
253 // dest_reg 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)254 static void gen_mov_word_to_reg(HostReg dest_reg, void *data, bool dword)
255 {
256 	Bit64s addr = (Bit64s)data;
257 	HostReg ld = gen_addr(addr, dest_reg);
258 	IMM_OP(dword ? 32:40, dest_reg, ld, addr);  // lwz/lhz dest, addr@l(ld)
259 }
260 
261 // move an 8bit constant value into dest_reg
262 // the upper 24bit of the destination register can be destroyed
263 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
264 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)265 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg, Bit8u imm)
266 {
267 	gen_mov_word_to_reg_imm(dest_reg, imm);
268 }
269 
270 // move an 8bit constant value into dest_reg
271 // the upper 24bit of the destination register can be destroyed
272 // this function can use FC_OP1/FC_OP2 as dest_reg which are
273 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)274 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg, Bit8u imm)
275 {
276 	gen_mov_word_to_reg_imm(dest_reg, imm);
277 }
278 
279 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)280 static void gen_mov_word_from_reg(HostReg src_reg, void *dest, bool dword)
281 {
282 	Bit64s addr = (Bit64s)dest;
283 	HostReg ld = gen_addr(addr, HOST_R8);
284 	IMM_OP(dword ? 36 : 44, src_reg, ld, addr);  // stw/sth src,addr@l(ld)
285 }
286 
287 // move an 8bit value from memory into dest_reg
288 // the upper 24bit of the destination register can be destroyed
289 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
290 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)291 static void gen_mov_byte_to_reg_low(HostReg dest_reg, void *data)
292 {
293 	Bit64s addr = (Bit64s)data;
294 	HostReg ld = gen_addr(addr, dest_reg);
295 	IMM_OP(34, dest_reg, ld, addr);  // lbz dest,addr@l(ld)
296 }
297 
298 // move an 8bit value from memory into dest_reg
299 // the upper 24bit of the destination register can be destroyed
300 // this function can use FC_OP1/FC_OP2 as dest_reg which are
301 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)302 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg, void *data)
303 {
304 	gen_mov_byte_to_reg_low(dest_reg, data);
305 }
306 
307 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)308 static void gen_mov_byte_from_reg_low(HostReg src_reg, void *dest)
309 {
310 	Bit64s addr = (Bit64s)dest;
311 	HostReg ld = gen_addr(addr, HOST_R8);
312 	IMM_OP(38, src_reg, ld, addr);  // stb src_reg,addr@l(ld)
313 }
314 
315 // convert an 8bit word to a 32bit dword
316 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)317 static void gen_extend_byte(bool sign, HostReg reg)
318 {
319 	if (sign)
320 		EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src
321 	else
322 		RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31
323 }
324 
325 // convert a 16bit word to a 32bit dword
326 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)327 static void gen_extend_word(bool sign, HostReg reg)
328 {
329 	if (sign)
330 		EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
331 	else
332 		RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
333 }
334 
335 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)336 static void gen_add(HostReg reg, void *op)
337 {
338 	gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
339 	EXT_OP(reg,reg,HOST_R8,266,0);          // add reg,reg,r8
340 }
341 
342 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)343 static void gen_add_imm(HostReg reg, Bit32u imm)
344 {
345     if (!imm) return;
346 	if ((Bit16s)imm != (Bit32s)imm)
347 		IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
348 	if ((Bit16s)imm)
349 		IMM_OP(14, reg, reg, imm);              // addi reg, reg, imm@l
350 }
351 
352 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)353 static void gen_and_imm(HostReg reg, Bit32u imm)
354 {
355 	Bits sbit,ebit,tbit,bbit,abit,i;
356 
357 	// sbit = number of leading 0 bits
358 	// ebit = number of trailing 0 bits
359 	// tbit = number of total 0 bits
360 	// bbit = number of leading 1 bits
361 	// abit = number of trailing 1 bits
362 
363 	if (imm == 0xFFFFFFFF)
364 		return;
365 
366 	if (!imm)
367 		return gen_mov_word_to_reg_imm(reg, 0);
368 
369 	sbit = ebit = tbit = bbit = abit = 0;
370 	for (i=0; i < 32; i++)
371 	{
372 		if (!(imm & (1<<(31-i))))
373 		{
374 			abit = 0;
375 			tbit++;
376 			if (sbit == i)
377 				sbit++;
378 			ebit++;
379 		}
380 		else
381 		{
382 			ebit = 0;
383 			if (bbit == i)
384 				bbit++;
385 			abit++;
386 		}
387 	}
388 
389 	if (sbit + ebit == tbit)
390 	{
391 		RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
392 		return;
393 	}
394 
395 	if (sbit >= 16)
396 	{
397 		IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
398 		return;
399 	}
400 	if (ebit >= 16)
401 	{
402 		IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
403 		return;
404 	}
405 
406 	if (bbit + abit == (32 - tbit))
407 	{
408 		RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1
409 		return;
410 	}
411 
412 	IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l
413 	IMM_OP(29, reg, reg, imm>16);  // andis. reg, reg, imm@h
414 	EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0
415 }
416 
417 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)418 static void gen_mov_direct_dword(void *dest, Bit32u imm)
419 {
420 	gen_mov_dword_to_reg_imm(HOST_R9, imm);
421 	gen_mov_word_from_reg(HOST_R9, dest, 1);
422 }
423 
424 // move an address into memory (assumes address != NULL)
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)425 static void inline gen_mov_direct_ptr(void *dest, DRC_PTR_SIZE_IM imm)
426 {
427 	block_ptr = 0;
428 	gen_mov_qword_to_reg_imm(HOST_R27, imm);
429 	// this will be used to look-up the linked blocks
430 	block_ptr = imm;
431 	// "gen_mov_qword_from_reg(HOST_R27, dest, 1);"
432 	Bit64s addr = (Bit64s)dest;
433 	HostReg ld = gen_addr(addr, HOST_R8);
434 	DSF_OP(62, HOST_R27, ld, addr, 0); // std r27, addr@l(ld)
435 }
436 
437 // add a 32bit (dword==true) or 16bit (dword==false) constant value
438 // to a 32bit memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)439 static void gen_add_direct_word(void *dest, Bit32u imm, bool dword)
440 {
441 	HostReg ld;
442 	Bit64s addr = (Bit64s)dest;
443 
444 	if (!dword)
445 	{
446 		imm &= 0xFFFF;
447 		//addr += 2; // ENDIAN!!!
448 	}
449 
450 	if (!imm)
451 		return;
452 
453 	ld = gen_addr(addr, HOST_R8);
454 	IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
455 	if (dword && (Bit16s)imm != (Bit32s)imm)
456 		IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
457 	if (!dword || (Bit16s)imm)
458 		IMM_OP(14, HOST_R9, HOST_R9, imm);      // addi r9,r9,imm@l
459 	IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
460 }
461 
462 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value
463 // from a 32-bit memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)464 static void gen_sub_direct_word(void *dest, Bit32u imm, bool dword)
465 {
466 	gen_add_direct_word(dest, -(Bit32s)imm, dword);
467 }
468 
469 // effective address calculation, destination is dest_reg
470 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
471 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)472 static inline void gen_lea(HostReg dest_reg, HostReg scale_reg, Bitu scale, Bits imm)
473 {
474 	if (scale)
475 	{
476 		RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale
477 		scale_reg = HOST_R8;
478 	}
479 
480 	gen_add_imm(dest_reg, imm);
481 	EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
482 }
483 
484 // effective address calculation, destination is dest_reg
485 // dest_reg is scaled by scale (dest_reg*(2^scale)),
486 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)487 static inline void gen_lea(HostReg dest_reg, Bitu scale, Bits imm)
488 {
489 	if (scale)
490 	{
491 		RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale
492 	}
493 
494 	gen_add_imm(dest_reg, imm);
495 }
496 
497 // helper function to choose direct or indirect call
do_gen_call(void * func,Bit64u * npos,bool pad)498 static int inline do_gen_call(void *func, Bit64u *npos, bool pad)
499 {
500 	Bit64s f = (Bit64s)func;
501 	Bit64s off = f - (Bit64s)npos;
502 	Bit32u *pos = (Bit32u *)npos;
503 
504 	// the length of this branch stanza must match the assumptions in
505 	// gen_fill_function_ptr
506 
507 	// relative branches are limited to +/- ~32MB
508 	if (off < 0x02000000 && off >= -0x02000000)
509 	{
510 		pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func // "lis"
511 		if (pad)
512 		{
513 			// keep this patchable
514 			pos[1] = NOP; // nop "ori"
515 			pos[2] = NOP; // nop "rldicr"
516 			pos[3] = NOP; // nop "oris"
517 			pos[4] = NOP; // nop "ori"
518 			pos[5] = NOP; // nop "mtctr"
519 			pos[6] = NOP; // nop "bctrl"
520 			return 28;
521 		}
522 		return 4;
523 	}
524 
525 	// for ppc64le ELF ABI, use r12 to branch
526 	pos[0] = IMM(15, HOST_R12, 0,        (f & 0xffff000000000000)>>48); // lis
527 	pos[1] = IMM(24, HOST_R12, HOST_R12, (f & 0x0000ffff00000000)>>32); // ori
528 	pos[2] = RLD(30, HOST_R12, HOST_R12, 32, 31, 1, 0); // rldicr
529 	pos[3] = IMM(25, HOST_R12, HOST_R12, (f & 0x00000000ffff0000)>>16); // oris
530 	pos[4] = IMM(24, HOST_R12, HOST_R12, (f & 0x000000000000ffff)    ); // ori
531 	pos[5] = EXT(HOST_R12, 9, 0, 467, 0);      // mtctr r12
532 	pos[6] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl
533 	return 28;
534 }
535 
536 // generate a call to a parameterless function
537 static void inline gen_call_function_raw(void *func, bool fastcall = true)
538 {
539 	cache.pos += do_gen_call(func, (Bit64u*)cache.pos, fastcall);
540 }
541 
542 // generate a call to a function with paramcount parameters
543 // note: the parameters are loaded in the architecture specific way
544 // using the gen_load_param_ functions below
545 static Bit64u inline gen_call_function_setup(void *func,
546                                              Bitu paramcount,
547                                              bool fastcall = false)
548 {
549 	Bit64u proc_addr=(Bit64u)cache.pos;
550 	gen_call_function_raw(func,fastcall);
551 	return proc_addr;
552 }
553 
554 // load an immediate value as param'th function parameter
555 // these are 32-bit (see risc_x64.h)
gen_load_param_imm(Bitu imm,Bitu param)556 static void inline gen_load_param_imm(Bitu imm, Bitu param)
557 {
558 	gen_mov_dword_to_reg_imm(RegParams[param], imm);
559 }
560 
561 // load an address as param'th function parameter
562 // 32-bit
gen_load_param_addr(Bitu addr,Bitu param)563 static void inline gen_load_param_addr(Bitu addr, Bitu param)
564 {
565 	gen_load_param_imm(addr, param);
566 }
567 
568 // load a host-register as param'th function parameter
569 // 32-bit
gen_load_param_reg(Bitu reg,Bitu param)570 static void inline gen_load_param_reg(Bitu reg, Bitu param)
571 {
572 	gen_mov_regs(RegParams[param], (HostReg)reg);
573 }
574 
575 // load a value from memory as param'th function parameter
576 // 32-bit
gen_load_param_mem(Bitu mem,Bitu param)577 static void inline gen_load_param_mem(Bitu mem, Bitu param)
578 {
579 	gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
580 }
581 
582 // jump to an address pointed at by ptr, offset is in imm
583 // use r12 for ppc64le ABI compatibility
584 static void gen_jmp_ptr(void *ptr, Bits imm = 0)
585 {
586 	// "gen_mov_qword_to_reg"
587 	gen_mov_qword_to_reg_imm(HOST_R12,(Bit64u)ptr);         // r12 = *(Bit64u*)ptr
588 	DSF_OP(58, HOST_R12, HOST_R12, 0, 0);
589 
590 	if ((Bit16s)imm != (Bit32s)imm) {
591 		// FIXME: this is not tested. I've left it as a quasi-assertion.
592 		fprintf(stderr, "large gen_jmp_ptr offset\n");
593 		__asm__("trap\n");
594 		IMM_OP(15, HOST_R12, HOST_R12, (imm + 0x8000)>>16); // addis r12, r12, imm@ha
595 	}
596 	DSF_OP(58, HOST_R12, HOST_R12, (Bit16s)imm, 0);                 // ld r12, imm@l(r12)
597 	EXT_OP(HOST_R12, 9, 0, 467, 0);                         // mtctr r12
598 	IMM_OP(19, 0x14, 0, 528<<1);                            // bctr
599 }
600 
601 // short conditional jump (+-127 bytes) if register is zero
602 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)603 static Bit64u gen_create_branch_on_zero(HostReg reg, bool dword)
604 {
605 	if (!dword)
606 		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
607 	else
608 		IMM_OP(11, 0, reg, 0);         // cmpwi cr0, reg, 0
609 
610 	IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq)
611 	return ((Bit64u)cache.pos-4);
612 }
613 
614 // short conditional jump (+-127 bytes) if register is nonzero
615 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)616 static Bit64u gen_create_branch_on_nonzero(HostReg reg, bool dword)
617 {
618 	if (!dword)
619 		IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
620 	else
621 		IMM_OP(11, 0, reg, 0);         // cmpwi cr0, reg, 0
622 
623 	IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne)
624 	return ((Bit64u)cache.pos-4);
625 }
626 
627 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)628 static void gen_fill_branch(DRC_PTR_SIZE_IM data)
629 {
630 #if C_DEBUG
631 	Bits len=(Bit64u)cache.pos-data;
632 	if (len<0) len=-len;
633 	if (len >= 0x8000) LOG_MSG("Big jump %d",len);
634 #endif
635 
636 	// FIXME: assert???
637 	((Bit16u*)data)[0] =((Bit64u)cache.pos-data) & 0xFFFC; // ENDIAN!!!
638 }
639 
640 
641 // conditional jump if register is nonzero
642 // for isdword==true the 32bit of the register are tested
643 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool dword)644 static Bit64u gen_create_branch_long_nonzero(HostReg reg, bool dword)
645 {
646 	if (!dword)
647 		IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
648 	else
649 		IMM_OP(11, 0, reg, 0);       // cmpwi cr0, reg, 0
650 
651 	IMM_OP(16, 0x04, 2, 0); // bne
652 	return ((Bit64u)cache.pos-4);
653 }
654 
655 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)656 static Bit64u gen_create_branch_long_leqzero(HostReg reg)
657 {
658 	IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
659 
660 	IMM_OP(16, 0x04, 1, 0); // ble
661 	return ((Bit64u)cache.pos-4);
662 }
663 
664 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit64u data)665 static void gen_fill_branch_long(Bit64u data)
666 {
667 	return gen_fill_branch((DRC_PTR_SIZE_IM)data);
668 }
669 
cache_block_closing(const uint8_t * block_start,Bitu block_size)670 static void cache_block_closing(const uint8_t *block_start, Bitu block_size)
671 {
672 	// in the Linux kernel i-cache and d-cache are flushed separately
673 	// there's probably a good reason for this ...
674 	Bit8u* dstart = (Bit8u*)((Bit64u)block_start & -128);
675 	Bit8u* istart = dstart;
676 
677 	while (dstart < block_start + block_size)
678 	{
679 		asm volatile("dcbf %y0" :: "Z"(*dstart));
680 		// cache line size for POWER8 and POWER9 is 128 bytes
681 		dstart += 128;
682 	}
683 	asm volatile("sync");
684 
685 	while (istart < block_start + block_size)
686 	{
687 		asm volatile("icbi %y0" :: "Z"(*istart));
688 		istart += 128;
689 	}
690 	asm volatile("isync");
691 }
692 
cache_block_before_close(void)693 static void cache_block_before_close(void) {}
694 
gen_function(void * func)695 static void gen_function(void *func)
696 {
697 	Bit64s off = (Bit64s)func - (Bit64s)cache.pos;
698 
699 	// relative branches are limited to +/- 32MB
700 	if (off < 0x02000000 && off >= -0x02000000) {
701 		cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func
702 		return;
703 	}
704 
705 	gen_mov_qword_to_reg_imm(HOST_R12, (Bit64u)func); // r12 = func
706 	EXT_OP(HOST_R12, 9, 0, 467, 0);  // mtctr r12
707 	IMM_OP(19, 0x14, 0, 528<<1); // bctr
708 }
709 
710 // gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
711 static void* epilog_addr;
712 static Bit8u *getCF_glue;
gen_run_code(void)713 static void gen_run_code(void)
714 {
715 	// prolog
716 	DSF_OP(62, HOST_R1, HOST_R1, -256, 1); // stdu sp,-256(sp)
717 	EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
718 	EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
719 	// we don't clobber any CR fields we need to restore, so no need to save
720 
721 	// put at the very end of the stack frame, since we have no floats
722 	// to save
723 	DSF_OP(62, HOST_R26, HOST_R1, 208+0 , 0); // std r26, 208(sp)
724 	DSF_OP(62, HOST_R27, HOST_R1, 208+8 , 0); // std r27, 216(sp)
725 	DSF_OP(62, HOST_R28, HOST_R1, 208+16, 0); // :
726 	DSF_OP(62, HOST_R29, HOST_R1, 208+24, 0); // :
727 	DSF_OP(62, HOST_R30, HOST_R1, 208+32, 0); // :
728 	DSF_OP(62, HOST_R31, HOST_R1, 208+40, 0); // std r31, 248(sp)
729 
730 #if C_FPU
731 	gen_mov_qword_to_reg_imm(HOST_R28, ((Bit64u)&fpu));
732 #endif
733 	gen_mov_qword_to_reg_imm(FC_SEGS_ADDR, ((Bit64u)&Segs));
734 	gen_mov_qword_to_reg_imm(FC_REGS_ADDR, ((Bit64u)&cpu_regs));
735 	DSF_OP(62, HOST_R0,  HOST_R1, 256+16, 0); // std r0,256+16(sp)
736 	//TRAP();
737 	IMM_OP(19, 0x14, 0, 528<<1);     // bctr
738 
739 	// epilog
740 	epilog_addr = cache.pos;
741 	//TRAP();
742 	DSF_OP(58, HOST_R0, HOST_R1, 256+16, 0);  // ld r0,256+16(sp)
743 	EXT_OP(HOST_R0, 8, 0, 467, 0);            // mtlr r0
744 	DSF_OP(58, HOST_R31, HOST_R1, 208+40, 0); // ld r31, 248(sp)
745 	DSF_OP(58, HOST_R30, HOST_R1, 208+32, 0); // etc.
746 	DSF_OP(58, HOST_R29, HOST_R1, 208+24, 0);
747 	DSF_OP(58, HOST_R28, HOST_R1, 208+16, 0);
748 	DSF_OP(58, HOST_R27, HOST_R1, 208+8 , 0);
749 	DSF_OP(58, HOST_R26, HOST_R1, 208+0 , 0);
750 
751 	IMM_OP(14, HOST_R1, HOST_R1, 256);   // addi sp, sp, 256
752 	IMM_OP(19, 0x14, 0, 16<<1);          // blr
753 
754 	// trampoline to call get_CF()
755 	getCF_glue = cache.pos;
756 	gen_function((void*)get_CF);
757 }
758 
759 // return from a function
gen_return_function(void)760 static void gen_return_function(void)
761 {
762 	gen_function(epilog_addr);
763 }
764 
765 // called when a call to a function can be replaced by a
766 // call to a simpler function
767 // these must equal the length of a branch stanza (see
768 // do_gen_call)
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)769 static void gen_fill_function_ptr(Bit8u *pos, void *fct_ptr, Bitu flags_type)
770 {
771 	Bit32u *op = (Bit32u*)pos;
772 
773 	// blank the entire old stanza
774 	op[1] = NOP;
775 	op[2] = NOP;
776 	op[3] = NOP;
777 	op[4] = NOP;
778 	op[5] = NOP;
779 	op[6] = NOP;
780 
781 	switch (flags_type) {
782 #if defined(DRC_FLAGS_INVALIDATION_DCODE)
783 		// try to avoid function calls but rather directly fill in code
784 		case t_ADDb:
785 		case t_ADDw:
786 		case t_ADDd:
787 			*op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
788 			break;
789 		case t_ORb:
790 		case t_ORw:
791 		case t_ORd:
792 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
793 			break;
794 		case t_ADCb:
795 		case t_ADCw:
796 		case t_ADCd:
797 			op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
798 			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
799 			op[2] = IMM(12, HOST_R0, FC_RETOP, -1);        // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF)
800 			op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0);    // addze; FC_RETOP = r26 + !!CF
801 			return;
802 		case t_SBBb:
803 		case t_SBBw:
804 		case t_SBBd:
805 			op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0);  // r26 = FC_OP1 - FC_OP2
806 			op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
807 			op[2] = IMM(8, HOST_R0, FC_RETOP, 0);          // subfic r0, FC_RETOP, 0 (XER[CA] = !CF)
808 			op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0);    // addme; FC_RETOP = r26 - 1 + !CF
809 			return;
810 		case t_ANDb:
811 		case t_ANDw:
812 		case t_ANDd:
813 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
814 			break;
815 		case t_SUBb:
816 		case t_SUBw:
817 		case t_SUBd:
818 			*op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
819 			break;
820 		case t_XORb:
821 		case t_XORw:
822 		case t_XORd:
823 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
824 			break;
825 		case t_CMPb:
826 		case t_CMPw:
827 		case t_CMPd:
828 		case t_TESTb:
829 		case t_TESTw:
830 		case t_TESTd:
831 			break;
832 		case t_INCb:
833 		case t_INCw:
834 		case t_INCd:
835 			*op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
836 			break;
837 		case t_DECb:
838 		case t_DECw:
839 		case t_DECd:
840 			*op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
841 			break;
842 		case t_NEGb:
843 		case t_NEGw:
844 		case t_NEGd:
845 			*op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
846 			break;
847 		case t_SHLb:
848 		case t_SHLw:
849 		case t_SHLd:
850 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
851 			break;
852 		case t_SHRb:
853 		case t_SHRw:
854 		case t_SHRd:
855 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
856 			break;
857 		case t_SARb:
858 			*op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
859 		case t_SARw:
860 			if (flags_type == t_SARw)
861 				*op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
862 		case t_SARd:
863 			*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
864 			break;
865 
866 		case t_ROLb:
867 			*op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
868 		case t_ROLw:
869 			if (flags_type == t_ROLw)
870 				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
871 		case t_ROLd:
872 			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
873 			break;
874 
875 		case t_RORb:
876 			*op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
877 		case t_RORw:
878 			if (flags_type == t_RORw)
879 				*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
880 		case t_RORd:
881 			*op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
882 			*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
883 			break;
884 
885 		case t_DSHLw: // technically not correct for FC_OP3 > 16
886 			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
887 			*op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
888 			break;
889 		case t_DSHLd:
890 			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
891 			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
892 			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
893 			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
894 			return;
895 		case t_DSHRw: // technically not correct for FC_OP3 > 16
896 			*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
897 			*op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
898 			break;
899 		case t_DSHRd:
900 			op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
901 			op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
902 			op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
903 			op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
904 			return;
905 #endif
906 		default:
907 			do_gen_call(fct_ptr, (Bit64u*)op, true);
908 			return;
909 	}
910 }
911 
912 // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index
913 // modulo 2 must be zero) 16bit moves may destroy the upper 16bit of the
914 // destination register
gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index)915 static void gen_mov_seg16_to_reg(HostReg dest_reg, Bitu index)
916 {
917 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false);
918 }
919 
920 // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index
921 // modulo 4 must be zero)
gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index)922 static void gen_mov_seg32_to_reg(HostReg dest_reg, Bitu index)
923 {
924 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true);
925 }
926 
927 // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR
928 // (index modulo 4 must be zero)
gen_add_seg32_to_reg(HostReg reg,Bitu index)929 static void gen_add_seg32_to_reg(HostReg reg, Bitu index)
930 {
931 	gen_add(reg, (Bit8u*)&Segs + index);
932 }
933 
934 // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index
935 // modulo 2 must be zero) 16bit moves may destroy the upper 16bit of the
936 // destination register
gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)937 static void gen_mov_regval16_to_reg(HostReg dest_reg, Bitu index)
938 {
939 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false);
940 }
941 
942 // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index
943 // modulo 4 must be zero)
gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)944 static void gen_mov_regval32_to_reg(HostReg dest_reg, Bitu index)
945 {
946 	gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true);
947 }
948 
949 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
950 // the upper 24bit of the destination register can be destroyed
951 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
952 // registers might not be directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)953 static void gen_mov_regbyte_to_reg_low(HostReg dest_reg, Bitu index)
954 {
955 	gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index);
956 }
957 
958 // move an 8bit value from cpu_regs[index]  into dest_reg using FC_REGS_ADDR
959 // the upper 24bit of the destination register can be destroyed
960 // this function can use FC_OP1/FC_OP2 as dest_reg which are
961 // not directly byte-accessible on some architectures
gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index)962 static void inline gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg, Bitu index)
963 {
964 	gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index);
965 }
966 
967 // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR
968 // (index modulo 2 must be zero)
gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)969 static void gen_mov_regval16_from_reg(HostReg src_reg, Bitu index)
970 {
971 	gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false);
972 }
973 
974 // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR
975 // (index modulo 4 must be zero)
gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)976 static void gen_mov_regval32_from_reg(HostReg src_reg, Bitu index)
977 {
978 	gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true);
979 }
980 
981 // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)982 static void gen_mov_regbyte_from_reg_low(HostReg src_reg, Bitu index)
983 {
984 	gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index);
985 }
986 
987 // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR
988 // (index modulo 4 must be zero)
gen_add_regval32_to_reg(HostReg reg,Bitu index)989 static void gen_add_regval32_to_reg(HostReg reg, Bitu index)
990 {
991 	gen_add(reg, (Bit8u*)&cpu_regs + index);
992 }
993 
994 // move 32bit (dword==true) or 16bit (dword==false) of a register into
995 // cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be
996 // zero) (if dword==false index modulo 2 must be zero)
gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword)997 static void gen_mov_regword_from_reg(HostReg src_reg, Bitu index, bool dword)
998 {
999 	if (dword)
1000 		gen_mov_regval32_from_reg(src_reg, index);
1001 	else
1002 		gen_mov_regval16_from_reg(src_reg, index);
1003 }
1004 
1005 // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index]
1006 // into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero)
1007 // (if dword==false index modulo 2 must be zero) 16bit moves may destroy the
1008 // upper 16bit of the destination register
gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword)1009 static void gen_mov_regword_to_reg(HostReg dest_reg, Bitu index, bool dword)
1010 {
1011 	if (dword)
1012 		gen_mov_regval32_to_reg(dest_reg, index);
1013 	else
1014 		gen_mov_regval16_to_reg(dest_reg, index);
1015 }
1016