1 /*
2 ===========================================================================
3 Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org>
4 
5 This file is part of Quake III Arena source code.
6 
7 Quake III Arena source code is free software; you can redistribute it
8 and/or modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the License,
10 or (at your option) any later version.
11 
12 Quake III Arena source code is distributed in the hope that it will be
13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Quake III Arena source code; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 ===========================================================================
21 */
22 
23 #include <sys/types.h> /* needed by sys/mman.h on OSX */
24 #include <sys/mman.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <stddef.h>
28 
29 #ifndef MAP_ANONYMOUS
30 # define MAP_ANONYMOUS MAP_ANON
31 #endif
32 
33 #include "vm_local.h"
34 #include "vm_powerpc_asm.h"
35 
36 /*
37  * VM_TIMES enables showing information about time spent inside
38  * and outside generated code
39  */
40 //#define VM_TIMES
41 #ifdef VM_TIMES
42 #include <sys/times.h>
43 static clock_t time_outside_vm = 0;
44 static clock_t time_total_vm = 0;
45 #endif
46 
47 /* exit() won't be called but use it because it is marked with noreturn */
48 #define DIE( reason ) Com_Error( ERR_DROP, "vm_powerpc compiler error: " reason )
49 
50 /*
51  * vm_powerpc uses large quantities of memory during compilation,
52  * Z_Malloc memory may not be enough for some big qvm files
53  */
54 
55 //#define VM_SYSTEM_MALLOC
56 #ifdef VM_SYSTEM_MALLOC
57 static inline void *
PPC_Malloc(size_t size)58 PPC_Malloc( size_t size )
59 {
60 	void *mem = malloc( size );
61 	if ( ! mem )
62 		DIE( "Not enough memory" );
63 
64 	return mem;
65 }
66 # define PPC_Free free
67 #else
68 # define PPC_Malloc Z_Malloc
69 # define PPC_Free Z_Free
70 #endif
71 
72 /*
73  * optimizations:
74  * - hole: bubble optimization (OP_CONST+instruction)
75  * - copy: inline OP_BLOCK_COPY for lengths under 16/32 bytes
76  * - mask: use rlwinm instruction as dataMask
77  */
78 
79 #ifdef __OPTIMIZE__
80 # define OPTIMIZE_HOLE 1
81 # define OPTIMIZE_COPY 1
82 # define OPTIMIZE_MASK 1
83 #else
84 # define OPTIMIZE_HOLE 0
85 # define OPTIMIZE_COPY 0
86 # define OPTIMIZE_MASK 0
87 #endif
88 
89 /*
90  * SUPPORTED TARGETS:
91  * - Linux 32 bits
92  *   ( http://refspecs.freestandards.org/elf/elfspec_ppc.pdf )
93  *   * LR at r0 + 4
94  *   * Local variable space not needed
95  *     -> store caller safe regs at 16+
96  *
97  * - Linux 64 bits (not fully conformant)
98  *   ( http://www.ibm.com/developerworks/linux/library/l-powasm4.html )
99  *   * needs "official procedure descriptors" (only first function has one)
100  *   * LR at r0 + 16
101  *   * local variable space required, min 64 bytes, starts at 48
102  *     -> store caller safe regs at 128+
103  *
104  * - OS X 32 bits
105  *   ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html )
106  *   * LR at r0 + 8
107  *   * local variable space required, min 32 bytes (?), starts at 24
108  *     -> store caller safe regs at 64+
109  *
110  * - OS X 64 bits (completely untested)
111  *   ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/64bitPowerPC.html )
112  *   * LR at r0 + 16
113  *   * local variable space required, min 64 bytes (?), starts at 48
114  *     -> store caller safe regs at 128+
115  */
116 
117 /* Select Length - first value on 32 bits, second on 64 */
118 #ifdef __PPC64__
119 #  define SL( a, b ) (b)
120 #else
121 #  define SL( a, b ) (a)
122 #endif
123 
124 /* Select ABI - first for ELF, second for OS X */
125 #ifdef __ELF__
126 #  define SA( a, b ) (a)
127 #else
128 #  define SA( a, b ) (b)
129 #endif
130 
131 #define ELF32	SL( SA( 1, 0 ), 0 )
132 #define ELF64	SL( 0, SA( 1, 0 ) )
133 #define OSX32	SL( SA( 0, 1 ), 0 )
134 #define OSX64	SL( 0, SA( 0, 1 ) )
135 
136 /* native length load/store instructions ( L stands for long ) */
137 #define iSTLU	SL( iSTWU, iSTDU )
138 #define iSTL	SL( iSTW, iSTD )
139 #define iLL	SL( iLWZ, iLD )
140 #define iLLX	SL( iLWZX, iLDX )
141 
142 /* register length */
143 #define GPRLEN	SL( 4, 8 )
144 #define FPRLEN	(8)
145 /* shift that many bits to obtain value miltiplied by GPRLEN */
146 #define GPRLEN_SHIFT	SL( 2, 3 )
147 
148 /* Link register position */
149 #define STACK_LR	SL( SA( 4, 8 ), 16 )
150 /* register save position */
151 #define STACK_SAVE	SL( SA( 16, 64 ), 128 )
152 /* temporary space, for float<->int exchange */
153 #define STACK_TEMP	SL( SA( 8, 24 ), 48 )
154 /* red zone temporary space, used instead of STACK_TEMP if stack isn't
155  * prepared properly */
156 #define STACK_RTEMP	(-16)
157 
158 #if ELF64
159 /*
160  * Official Procedure Descriptor
161  *  we need to prepare one for generated code if we want to call it
162  * as function
163  */
164 typedef struct {
165 	void *function;
166 	void *toc;
167 	void *env;
168 } opd_t;
169 #endif
170 
171 
172 /*
173  * opcode information table:
174  * - length of immediate value
175  * - returned register type
176  * - required register(s) type
177  */
178 #define opImm0	0x0000 /* no immediate */
179 #define opImm1	0x0001 /* 1 byte immadiate value after opcode */
180 #define opImm4	0x0002 /* 4 bytes immediate value after opcode */
181 
182 #define opRet0	0x0000 /* returns nothing */
183 #define opRetI	0x0004 /* returns integer */
184 #define opRetF	0x0008 /* returns float */
185 #define opRetIF	(opRetI | opRetF) /* returns integer or float */
186 
187 #define opArg0	0x0000 /* requires nothing */
188 #define opArgI	0x0010 /* requires integer(s) */
189 #define opArgF	0x0020 /* requires float(s) */
190 #define opArgIF	(opArgI | opArgF) /* requires integer or float */
191 
192 #define opArg2I	0x0040 /* requires second argument, integer */
193 #define opArg2F	0x0080 /* requires second argument, float */
194 #define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
195 
196 static const unsigned char vm_opInfo[256] =
197 {
198 	[OP_UNDEF]	= opImm0,
199 	[OP_IGNORE]	= opImm0,
200 	[OP_BREAK]	= opImm0,
201 	[OP_ENTER]	= opImm4,
202 			/* OP_LEAVE has to accept floats, they will be converted to ints */
203 	[OP_LEAVE]	= opImm4 | opRet0 | opArgIF,
204 			/* only STORE4 and POP use values from OP_CALL,
205 			 * no need to convert floats back */
206 	[OP_CALL]	= opImm0 | opRetI | opArgI,
207 	[OP_PUSH]	= opImm0 | opRetIF,
208 	[OP_POP]	= opImm0 | opRet0 | opArgIF,
209 	[OP_CONST]	= opImm4 | opRetIF,
210 	[OP_LOCAL]	= opImm4 | opRetI,
211 	[OP_JUMP]	= opImm0 | opRet0 | opArgI,
212 
213 	[OP_EQ]		= opImm4 | opRet0 | opArgI | opArg2I,
214 	[OP_NE]		= opImm4 | opRet0 | opArgI | opArg2I,
215 	[OP_LTI]	= opImm4 | opRet0 | opArgI | opArg2I,
216 	[OP_LEI]	= opImm4 | opRet0 | opArgI | opArg2I,
217 	[OP_GTI]	= opImm4 | opRet0 | opArgI | opArg2I,
218 	[OP_GEI]	= opImm4 | opRet0 | opArgI | opArg2I,
219 	[OP_LTU]	= opImm4 | opRet0 | opArgI | opArg2I,
220 	[OP_LEU]	= opImm4 | opRet0 | opArgI | opArg2I,
221 	[OP_GTU]	= opImm4 | opRet0 | opArgI | opArg2I,
222 	[OP_GEU]	= opImm4 | opRet0 | opArgI | opArg2I,
223 	[OP_EQF]	= opImm4 | opRet0 | opArgF | opArg2F,
224 	[OP_NEF]	= opImm4 | opRet0 | opArgF | opArg2F,
225 	[OP_LTF]	= opImm4 | opRet0 | opArgF | opArg2F,
226 	[OP_LEF]	= opImm4 | opRet0 | opArgF | opArg2F,
227 	[OP_GTF]	= opImm4 | opRet0 | opArgF | opArg2F,
228 	[OP_GEF]	= opImm4 | opRet0 | opArgF | opArg2F,
229 
230 	[OP_LOAD1]	= opImm0 | opRetI | opArgI,
231 	[OP_LOAD2]	= opImm0 | opRetI | opArgI,
232 	[OP_LOAD4]	= opImm0 | opRetIF| opArgI,
233 	[OP_STORE1]	= opImm0 | opRet0 | opArgI | opArg2I,
234 	[OP_STORE2]	= opImm0 | opRet0 | opArgI | opArg2I,
235 	[OP_STORE4]	= opImm0 | opRet0 | opArgIF| opArg2I,
236 	[OP_ARG]	= opImm1 | opRet0 | opArgIF,
237 	[OP_BLOCK_COPY]	= opImm4 | opRet0 | opArgI | opArg2I,
238 
239 	[OP_SEX8]	= opImm0 | opRetI | opArgI,
240 	[OP_SEX16]	= opImm0 | opRetI | opArgI,
241 	[OP_NEGI]	= opImm0 | opRetI | opArgI,
242 	[OP_ADD]	= opImm0 | opRetI | opArgI | opArg2I,
243 	[OP_SUB]	= opImm0 | opRetI | opArgI | opArg2I,
244 	[OP_DIVI]	= opImm0 | opRetI | opArgI | opArg2I,
245 	[OP_DIVU]	= opImm0 | opRetI | opArgI | opArg2I,
246 	[OP_MODI]	= opImm0 | opRetI | opArgI | opArg2I,
247 	[OP_MODU]	= opImm0 | opRetI | opArgI | opArg2I,
248 	[OP_MULI]	= opImm0 | opRetI | opArgI | opArg2I,
249 	[OP_MULU]	= opImm0 | opRetI | opArgI | opArg2I,
250 	[OP_BAND]	= opImm0 | opRetI | opArgI | opArg2I,
251 	[OP_BOR]	= opImm0 | opRetI | opArgI | opArg2I,
252 	[OP_BXOR]	= opImm0 | opRetI | opArgI | opArg2I,
253 	[OP_BCOM]	= opImm0 | opRetI | opArgI,
254 	[OP_LSH]	= opImm0 | opRetI | opArgI | opArg2I,
255 	[OP_RSHI]	= opImm0 | opRetI | opArgI | opArg2I,
256 	[OP_RSHU]	= opImm0 | opRetI | opArgI | opArg2I,
257 	[OP_NEGF]	= opImm0 | opRetF | opArgF,
258 	[OP_ADDF]	= opImm0 | opRetF | opArgF | opArg2F,
259 	[OP_SUBF]	= opImm0 | opRetF | opArgF | opArg2F,
260 	[OP_DIVF]	= opImm0 | opRetF | opArgF | opArg2F,
261 	[OP_MULF]	= opImm0 | opRetF | opArgF | opArg2F,
262 	[OP_CVIF]	= opImm0 | opRetF | opArgI,
263 	[OP_CVFI]	= opImm0 | opRetI | opArgF,
264 };
265 
266 /*
267  * source instruction data
268  */
269 typedef struct source_instruction_s source_instruction_t;
270 struct source_instruction_s {
271 	// opcode
272 	unsigned long int op;
273 
274 	// number of instruction
275 	unsigned long int i_count;
276 
277 	// immediate value (if any)
278 	union {
279 		unsigned int i;
280 		signed int si;
281 		signed short ss[2];
282 		unsigned short us[2];
283 		unsigned char b;
284 	} arg;
285 
286 	// required and returned registers
287 	unsigned char regA1;
288 	unsigned char regA2;
289 	unsigned char regR;
290 	unsigned char regPos;
291 
292 	// next instruction
293 	source_instruction_t *next;
294 };
295 
296 
297 
298 /*
299  * read-only data needed by the generated code
300  */
301 typedef struct VM_Data {
302 	// length of this struct + data
303 	size_t dataLength;
304 	// compiled code size (in bytes)
305 	// it only is code size, without the data
306 	size_t codeLength;
307 
308 	// function pointers, no use to waste registers for them
309 	long int (* AsmCall)( int, int );
310 	void (* BlockCopy )( unsigned int, unsigned int, size_t );
311 
312 	// instruction pointers, rarely used so don't waste register
313 	ppc_instruction_t *iPointers;
314 
315 	// data mask for load and store, not used if optimized
316 	unsigned int dataMask;
317 
318 	// fixed number used to convert from integer to float
319 	unsigned int floatBase; // 0x59800004
320 
321 #if ELF64
322 	// official procedure descriptor
323 	opd_t opd;
324 #endif
325 
326 	// additional constants, for floating point OP_CONST
327 	// this data has dynamic length, thus '0' here
328 	unsigned int data[0];
329 } vm_data_t;
330 
331 #ifdef offsetof
332 # define VM_Data_Offset( field )	offsetof( vm_data_t, field )
333 #else
334 # define OFFSET( structName, field ) \
335 	( (void *)&(((structName *)NULL)->field) - NULL )
336 # define VM_Data_Offset( field )	OFFSET( vm_data_t, field )
337 #endif
338 
339 
340 /*
341  * functions used by generated code
342  */
343 static long int
VM_AsmCall(int callSyscallInvNum,int callProgramStack)344 VM_AsmCall( int callSyscallInvNum, int callProgramStack )
345 {
346 	vm_t *savedVM = currentVM;
347 	long int i, ret;
348 #ifdef VM_TIMES
349 	struct tms start_time, stop_time;
350 	clock_t saved_time = time_outside_vm;
351 	times( &start_time );
352 #endif
353 
354 	// save the stack to allow recursive VM entry
355 	currentVM->programStack = callProgramStack - 4;
356 
357 	// we need to convert ints to longs on 64bit powerpcs
358 	if ( sizeof( intptr_t ) == sizeof( int ) ) {
359 		intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + callProgramStack + 4);
360 
361 		// generated code does not invert syscall number
362 		argPosition[ 0 ] = -1 - callSyscallInvNum;
363 
364 		ret = currentVM->systemCall( argPosition );
365 	} else {
366 		intptr_t args[MAX_VMSYSCALL_ARGS];
367 
368 		// generated code does not invert syscall number
369 		args[0] = -1 - callSyscallInvNum;
370 
371 		int *argPosition = (int *)((byte *)currentVM->dataBase + callProgramStack + 4);
372 		for( i = 1; i < ARRAY_LEN(args); i++ )
373 			args[ i ] = argPosition[ i ];
374 
375 		ret = currentVM->systemCall( args );
376 	}
377 
378 	currentVM = savedVM;
379 
380 #ifdef VM_TIMES
381 	times( &stop_time );
382 	time_outside_vm = saved_time + ( stop_time.tms_utime - start_time.tms_utime );
383 #endif
384 
385 	return ret;
386 }
387 
388 /*
389  * code-block descriptors
390  */
391 typedef struct dest_instruction dest_instruction_t;
392 typedef struct symbolic_jump symbolic_jump_t;
393 
394 struct symbolic_jump {
395 	// number of source instruction it has to jump to
396 	unsigned long int jump_to;
397 
398 	// jump condition true/false, (4*cr7+(eq|gt..))
399 	long int bo, bi;
400 
401 	// extensions / modifiers (branch-link)
402 	unsigned long ext;
403 
404 	// dest_instruction refering to this jump
405 	dest_instruction_t *parent;
406 
407 	// next jump
408 	symbolic_jump_t *nextJump;
409 };
410 
411 struct dest_instruction {
412 	// position in the output chain
413 	unsigned long int count;
414 
415 	// source instruction number
416 	unsigned long int i_count;
417 
418 	// exact (for instructins), or maximum (for jump) length
419 	unsigned short length;
420 
421 	dest_instruction_t *next;
422 
423 	// if the instruction is a jump than jump will be non NULL
424 	symbolic_jump_t *jump;
425 
426 	// if jump is NULL than all the instructions will be here
427 	ppc_instruction_t code[0];
428 };
429 
430 // first and last instruction,
431 // di_first is a dummy instruction
432 static dest_instruction_t *di_first = NULL, *di_last = NULL;
433 // number of instructions
434 static unsigned long int di_count = 0;
435 // pointers needed to compute local jumps, those aren't pointers to
436 // actual instructions, just used to check how long the jump is going
437 // to be and whether it is positive or negative
438 static dest_instruction_t **di_pointers = NULL;
439 
440 // output instructions which does not come from source code
441 // use false i_count value
442 #define FALSE_ICOUNT 0xffffffff
443 
444 
445 /*
446  * append specified instructions at the end of instruction chain
447  */
448 static void
PPC_Append(dest_instruction_t * di_now,unsigned long int i_count)449 PPC_Append(
450 		dest_instruction_t *di_now,
451 		unsigned long int i_count
452   	  )
453 {
454 	di_now->count = di_count++;
455 	di_now->i_count = i_count;
456 	di_now->next = NULL;
457 
458 	di_last->next = di_now;
459 	di_last = di_now;
460 
461 	if ( i_count != FALSE_ICOUNT ) {
462 		if ( ! di_pointers[ i_count ] )
463 			di_pointers[ i_count ] = di_now;
464 	}
465 }
466 
467 /*
468  * make space for instructions and append
469  */
470 static void
PPC_AppendInstructions(unsigned long int i_count,size_t num_instructions,const ppc_instruction_t * is)471 PPC_AppendInstructions(
472 		unsigned long int i_count,
473 		size_t num_instructions,
474 		const ppc_instruction_t *is
475 	)
476 {
477 	if ( num_instructions < 0 )
478 		num_instructions = 0;
479 	size_t iBytes = sizeof( ppc_instruction_t ) * num_instructions;
480 	dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) + iBytes );
481 
482 	di_now->length = num_instructions;
483 	di_now->jump = NULL;
484 
485 	if ( iBytes > 0 )
486 		memcpy( &(di_now->code[0]), is, iBytes );
487 
488 	PPC_Append( di_now, i_count );
489 }
490 
491 /*
492  * create symbolic jump and append
493  */
494 static symbolic_jump_t *sj_first = NULL, *sj_last = NULL;
495 static void
PPC_PrepareJump(unsigned long int i_count,unsigned long int dest,long int bo,long int bi,unsigned long int ext)496 PPC_PrepareJump(
497 		unsigned long int i_count,
498 		unsigned long int dest,
499 		long int bo,
500 		long int bi,
501 		unsigned long int ext
502 	)
503 {
504 	dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) );
505 	symbolic_jump_t *sj = PPC_Malloc( sizeof( symbolic_jump_t ) );
506 
507 	sj->jump_to = dest;
508 	sj->bo = bo;
509 	sj->bi = bi;
510 	sj->ext = ext;
511 	sj->parent = di_now;
512 	sj->nextJump = NULL;
513 
514 	sj_last->nextJump = sj;
515 	sj_last = sj;
516 
517 	di_now->length = (bo == branchAlways ? 1 : 2);
518 	di_now->jump = sj;
519 
520 	PPC_Append( di_now, i_count );
521 }
522 
523 /*
524  * simplyfy instruction emission
525  */
526 #define emitStart( i_cnt ) \
527 	unsigned long int i_count = i_cnt; \
528 	size_t num_instructions = 0; \
529 	long int force_emit = 0; \
530 	ppc_instruction_t instructions[50];
531 
532 #define pushIn( inst ) \
533 	(instructions[ num_instructions++ ] = inst)
534 #define in( inst, args... ) pushIn( IN( inst, args ) )
535 
536 #define emitEnd() \
537 	do{ \
538 		if ( num_instructions || force_emit ) \
539 			PPC_AppendInstructions( i_count, num_instructions, instructions );\
540 		num_instructions = 0; \
541 	} while(0)
542 
543 #define emitJump( dest, bo, bi, ext ) \
544 	do { \
545 		emitEnd(); \
546 		PPC_PrepareJump( i_count, dest, bo, bi, ext ); \
547 	} while(0)
548 
549 
550 /*
551  * definitions for creating .data section,
552  * used in cases where constant float is needed
553  */
554 #define LOCAL_DATA_CHUNK 50
555 typedef struct local_data_s local_data_t;
556 struct local_data_s {
557 	// number of data in this structure
558 	long int count;
559 
560 	// data placeholder
561 	unsigned int data[ LOCAL_DATA_CHUNK ];
562 
563 	// next chunk, if this one wasn't enough
564 	local_data_t *next;
565 };
566 
567 // first data chunk
568 static local_data_t *data_first = NULL;
569 // total number of data
570 static long int data_acc = 0;
571 
572 /*
573  * append the data and return its offset
574  */
575 static size_t
PPC_PushData(unsigned int datum)576 PPC_PushData( unsigned int datum )
577 {
578 	local_data_t *d_now = data_first;
579 	long int accumulated = 0;
580 
581 	// check whether we have this one already
582 	do {
583 		long int i;
584 		for ( i = 0; i < d_now->count; i++ ) {
585 			if ( d_now->data[ i ] == datum ) {
586 				accumulated += i;
587 				return VM_Data_Offset( data[ accumulated ] );
588 			}
589 		}
590 		if ( !d_now->next )
591 			break;
592 
593 		accumulated += d_now->count;
594 		d_now = d_now->next;
595 	} while (1);
596 
597 	// not found, need to append
598 	accumulated += d_now->count;
599 
600 	// last chunk is full, create new one
601 	if ( d_now->count >= LOCAL_DATA_CHUNK ) {
602 		d_now->next = PPC_Malloc( sizeof( local_data_t ) );
603 		d_now = d_now->next;
604 		d_now->count = 0;
605 		d_now->next = NULL;
606 	}
607 
608 	d_now->data[ d_now->count ] = datum;
609 	d_now->count += 1;
610 
611 	data_acc = accumulated + 1;
612 
613 	return VM_Data_Offset( data[ accumulated ] );
614 }
615 
616 /*
617  * find leading zeros in dataMask to implement it with
618  * "rotate and mask" instruction
619  */
620 static long int fastMaskHi = 0, fastMaskLo = 31;
621 static void
PPC_MakeFastMask(int mask)622 PPC_MakeFastMask( int mask )
623 {
624 #if defined( __GNUC__ ) && ( __GNUC__ >= 4 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) )
625 	/* count leading zeros */
626 	fastMaskHi = __builtin_clz( mask );
627 
628 	/* count trailing zeros */
629 	fastMaskLo = 31 - __builtin_ctz( mask );
630 #else
631 	fastMaskHi = 0;
632 	while ( ( mask & ( 0x80000000 >> fastMaskHi ) ) == 0 )
633 		fastMaskHi++;
634 
635 	fastMaskLo = 31;
636 	while ( ( mask & ( 0x80000000 >> fastMaskLo ) ) == 0 )
637 		fastMaskLo--;
638 #endif
639 }
640 
641 
642 /*
643  * register definitions
644  */
645 
646 /* registers which are global for generated code */
647 
648 // pointer to VM_Data (constant)
649 #define rVMDATA r14
650 // vm->dataBase (constant)
651 #define rDATABASE r15
652 // programStack (variable)
653 #define rPSTACK r16
654 
655 /*
656  * function local registers,
657  *
658  * normally only volatile registers are used, but if there aren't enough
659  * or function has to preserve some value while calling annother one
660  * then caller safe registers are used as well
661  */
662 static const long int gpr_list[] = {
663 	/* caller safe registers, normally only one is used */
664 	r24, r23, r22, r21,
665 	r20, r19, r18, r17,
666 	/* volatile registers (preferred),
667 	 * normally no more than 5 is used */
668 	r3, r4, r5, r6,
669 	r7, r8, r9, r10,
670 };
671 static const long int gpr_vstart = 8; /* position of first volatile register */
672 static const long int gpr_total = ARRAY_LEN( gpr_list );
673 
674 static const long int fpr_list[] = {
675 	/* static registers, normally none is used */
676 	f20, f21, f19, f18,
677 	f17, f16, f15, f14,
678 	/* volatile registers (preferred),
679 	 * normally no more than 7 is used */
680 	f0, f1, f2, f3,
681 	f4, f5, f6, f7,
682 	f8, f9, f10, f11,
683 	f12, f13,
684 };
685 static const long int fpr_vstart = 8;
686 static const long int fpr_total = ARRAY_LEN( fpr_list );
687 
688 /*
689  * prepare some dummy structures and emit init code
690  */
691 static void
PPC_CompileInit(void)692 PPC_CompileInit( void )
693 {
694 	di_first = di_last = PPC_Malloc( sizeof( dest_instruction_t ) );
695 	di_first->count = 0;
696 	di_first->next = NULL;
697 	di_first->jump = NULL;
698 
699 	sj_first = sj_last = PPC_Malloc( sizeof( symbolic_jump_t ) );
700 	sj_first->nextJump = NULL;
701 
702 	data_first = PPC_Malloc( sizeof( local_data_t ) );
703 	data_first->count = 0;
704 	data_first->next = NULL;
705 
706 	/*
707 	 * init function:
708 	 * saves old values of global registers and sets our values
709 	 * function prototype is:
710 	 *  int begin( void *data, int programStack, void *vm->dataBase )
711 	 */
712 
713 	/* first instruction must not be placed on instruction list */
714 	emitStart( FALSE_ICOUNT );
715 
716 	long int stack = STACK_SAVE + 4 * GPRLEN;
717 
718 	in( iMFLR, r0 );
719 	in( iSTLU, r1, -stack, r1 );
720 	in( iSTL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
721 	in( iSTL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
722 	in( iSTL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
723 	in( iSTL, r0, stack + STACK_LR, r1 );
724 	in( iMR, rVMDATA, r3 );
725 	in( iMR, rPSTACK, r4 );
726 	in( iMR, rDATABASE, r5 );
727 	in( iBL, +4*8 ); // LINK JUMP: first generated instruction | XXX jump !
728 	in( iLL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
729 	in( iLL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
730 	in( iLL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
731 	in( iLL, r0, stack + STACK_LR, r1 );
732 	in( iMTLR, r0 );
733 	in( iADDI, r1, r1, stack );
734 	in( iBLR );
735 
736 	emitEnd();
737 }
738 
739 // rFIRST is the copy of the top value on the opstack
740 #define rFIRST		(gpr_list[ gpr_pos - 1])
741 // second value on the opstack
742 #define rSECOND		(gpr_list[ gpr_pos - 2 ])
743 // temporary registers, not on the opstack
744 #define rTEMP(x)	(gpr_list[ gpr_pos + x ])
745 #define rTMP		rTEMP(0)
746 
747 #define fFIRST		(fpr_list[ fpr_pos - 1 ])
748 #define fSECOND		(fpr_list[ fpr_pos - 2 ])
749 #define fTEMP(x)	(fpr_list[ fpr_pos + x ])
750 #define fTMP		fTEMP(0)
751 
752 // register types
753 #define rTYPE_STATIC	0x01
754 #define rTYPE_FLOAT	0x02
755 
756 // what type should this opcode return
757 #define RET_INT		( !(i_now->regR & rTYPE_FLOAT) )
758 #define RET_FLOAT	( i_now->regR & rTYPE_FLOAT )
759 // what type should it accept
760 #define ARG_INT		( ! i_now->regA1 )
761 #define ARG_FLOAT	( i_now->regA1 )
762 #define ARG2_INT	( ! i_now->regA2 )
763 #define ARG2_FLOAT	( i_now->regA2 )
764 
765 /*
766  * emit OP_CONST, called if nothing has used the const value directly
767  */
768 static void
PPC_EmitConst(source_instruction_t * const i_const)769 PPC_EmitConst( source_instruction_t * const i_const )
770 {
771 	emitStart( i_const->i_count );
772 
773 	if ( !(i_const->regR & rTYPE_FLOAT) ) {
774 		// gpr_pos needed for "rFIRST" to work
775 		long int gpr_pos = i_const->regPos;
776 
777 		if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
778 			in( iLI, rFIRST, i_const->arg.si );
779 		} else if ( i_const->arg.i < 0x10000 ) {
780 			in( iLI, rFIRST, 0 );
781 			in( iORI, rFIRST, rFIRST, i_const->arg.i );
782 		} else {
783 			in( iLIS, rFIRST, i_const->arg.ss[ 0 ] );
784 			if ( i_const->arg.us[ 1 ] != 0 )
785 				in( iORI, rFIRST, rFIRST, i_const->arg.us[ 1 ] );
786 		}
787 
788 	} else {
789 		// fpr_pos needed for "fFIRST" to work
790 		long int fpr_pos = i_const->regPos;
791 
792 		// there's no good way to generate the data,
793 		// just read it from data section
794 		in( iLFS, fFIRST, PPC_PushData( i_const->arg.i ), rVMDATA );
795 	}
796 
797 	emitEnd();
798 }
799 #define MAYBE_EMIT_CONST() if ( i_const ) PPC_EmitConst( i_const )
800 
801 /*
802  * emit empty instruction, just sets the needed pointers
803  */
804 static inline void
PPC_EmitNull(source_instruction_t * const i_null)805 PPC_EmitNull( source_instruction_t * const i_null )
806 {
807 	PPC_AppendInstructions( i_null->i_count, 0, NULL );
808 }
809 #define EMIT_FALSE_CONST() PPC_EmitNull( i_const )
810 
811 
812 /*
813  * analize function for register usage and whether it needs stack (r1) prepared
814  */
815 static void
VM_AnalyzeFunction(source_instruction_t * const i_first,long int * prepareStack,long int * gpr_start_pos,long int * fpr_start_pos)816 VM_AnalyzeFunction(
817 		source_instruction_t * const i_first,
818 		long int *prepareStack,
819 		long int *gpr_start_pos,
820 		long int *fpr_start_pos
821 		)
822 {
823 	source_instruction_t *i_now = i_first;
824 
825 	source_instruction_t *value_provider[20] = { NULL };
826 	unsigned long int opstack_depth = 0;
827 
828 	/*
829 	 * first step:
830 	 *  remember what codes returned some value and mark the value type
831 	 *  when we get to know what it should be
832 	 */
833 	while ( (i_now = i_now->next) ) {
834 		unsigned long int op = i_now->op;
835 		unsigned long int opi = vm_opInfo[ op ];
836 
837 		if ( opi & opArgIF ) {
838 			assert( opstack_depth > 0 );
839 
840 			opstack_depth--;
841 			source_instruction_t *vp = value_provider[ opstack_depth ];
842 			unsigned long int vpopi = vm_opInfo[ vp->op ];
843 
844 			if ( (opi & opArgI) && (vpopi & opRetI) ) {
845 				// instruction accepts integer, provider returns integer
846 				//vp->regR |= rTYPE_INT;
847 				//i_now->regA1 = rTYPE_INT;
848 			} else if ( (opi & opArgF) && (vpopi & opRetF) ) {
849 				// instruction accepts float, provider returns float
850 				vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
851 				i_now->regA1 = rTYPE_FLOAT;
852 			} else {
853 				// instruction arg type does not agree with
854 				// provider return type
855 				DIE( "unrecognized instruction combination" );
856 			}
857 
858 		}
859 		if ( opi & opArg2IF ) {
860 			assert( opstack_depth > 0 );
861 
862 			opstack_depth--;
863 			source_instruction_t *vp = value_provider[ opstack_depth ];
864 			unsigned long int vpopi = vm_opInfo[ vp->op ];
865 
866 			if ( (opi & opArg2I) && (vpopi & opRetI) ) {
867 				// instruction accepts integer, provider returns integer
868 				//vp->regR |= rTYPE_INT;
869 				//i_now->regA2 = rTYPE_INT;
870 			} else if ( (opi & opArg2F) && (vpopi & opRetF) ) {
871 				// instruction accepts float, provider returns float
872 				vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
873 				i_now->regA2 = rTYPE_FLOAT;
874 			} else {
875 				// instruction arg type does not agree with
876 				// provider return type
877 				DIE( "unrecognized instruction combination" );
878 			}
879 		}
880 
881 
882 		if (
883 			( op == OP_CALL )
884 				||
885 			( op == OP_BLOCK_COPY && ( i_now->arg.i > SL( 16, 32 ) || !OPTIMIZE_COPY ) )
886 		) {
887 			long int i;
888 			*prepareStack = 1;
889 			// force caller safe registers so we won't have to save them
890 			for ( i = 0; i < opstack_depth; i++ ) {
891 				source_instruction_t *vp = value_provider[ i ];
892 				vp->regR |= rTYPE_STATIC;
893 			}
894 		}
895 
896 
897 		if ( opi & opRetIF ) {
898 			value_provider[ opstack_depth ] = i_now;
899 			opstack_depth++;
900 		}
901 	}
902 
903 	/*
904 	 * second step:
905 	 *  now that we know register types; compute exactly how many registers
906 	 *  of each type we need
907 	 */
908 
909 	i_now = i_first;
910 	long int needed_reg[4] = {0,0,0,0}, max_reg[4] = {0,0,0,0};
911 	opstack_depth = 0;
912 	while ( (i_now = i_now->next) ) {
913 		unsigned long int op = i_now->op;
914 		unsigned long int opi = vm_opInfo[ op ];
915 
916 		if ( opi & opArgIF ) {
917 			assert( opstack_depth > 0 );
918 			opstack_depth--;
919 			source_instruction_t *vp = value_provider[ opstack_depth ];
920 
921 			needed_reg[ ( vp->regR & 2 ) ] -= 1;
922 			if ( vp->regR & 1 ) // static
923 				needed_reg[ ( vp->regR & 3 ) ] -= 1;
924 		}
925 		if ( opi & opArg2IF ) {
926 			assert( opstack_depth > 0 );
927 			opstack_depth--;
928 			source_instruction_t *vp = value_provider[ opstack_depth ];
929 
930 			needed_reg[ ( vp->regR & 2 ) ] -= 1;
931 			if ( vp->regR & 1 ) // static
932 				needed_reg[ ( vp->regR & 3 ) ] -= 1;
933 		}
934 
935 		if ( opi & opRetIF ) {
936 			long int i;
937 			value_provider[ opstack_depth ] = i_now;
938 			opstack_depth++;
939 
940 			i = i_now->regR & 2;
941 			needed_reg[ i ] += 1;
942 			if ( max_reg[ i ] < needed_reg[ i ] )
943 				max_reg[ i ] = needed_reg[ i ];
944 
945 			i = i_now->regR & 3;
946 			if ( i & 1 ) {
947 				needed_reg[ i ] += 1;
948 				if ( max_reg[ i ] < needed_reg[ i ] )
949 					max_reg[ i ] = needed_reg[ i ];
950 			}
951 		}
952 	}
953 
954 	long int gpr_start = gpr_vstart;
955 	const long int gpr_volatile = gpr_total - gpr_vstart;
956 	if ( max_reg[ 1 ] > 0 || max_reg[ 0 ] > gpr_volatile ) {
957 		// max_reg[ 0 ] - all gprs needed
958 		// max_reg[ 1 ] - static gprs needed
959 		long int max = max_reg[ 0 ] - gpr_volatile;
960 		if ( max_reg[ 1 ] > max )
961 			max = max_reg[ 1 ];
962 		if ( max > gpr_vstart ) {
963 			/* error */
964 			DIE( "Need more GPRs" );
965 		}
966 
967 		gpr_start -= max;
968 
969 		// need stack to save caller safe registers
970 		*prepareStack = 1;
971 	}
972 	*gpr_start_pos = gpr_start;
973 
974 	long int fpr_start = fpr_vstart;
975 	const long int fpr_volatile = fpr_total - fpr_vstart;
976 	if ( max_reg[ 3 ] > 0 || max_reg[ 2 ] > fpr_volatile ) {
977 		// max_reg[ 2 ] - all fprs needed
978 		// max_reg[ 3 ] - static fprs needed
979 		long int max = max_reg[ 2 ] - fpr_volatile;
980 		if ( max_reg[ 3 ] > max )
981 			max = max_reg[ 3 ];
982 		if ( max > fpr_vstart ) {
983 			/* error */
984 			DIE( "Need more FPRs" );
985 		}
986 
987 		fpr_start -= max;
988 
989 		// need stack to save caller safe registers
990 		*prepareStack = 1;
991 	}
992 	*fpr_start_pos = fpr_start;
993 }
994 
995 /*
996  * translate opcodes to ppc instructions,
997  * it works on functions, not on whole code at once
998  */
999 static void
VM_CompileFunction(source_instruction_t * const i_first)1000 VM_CompileFunction( source_instruction_t * const i_first )
1001 {
1002 	long int prepareStack = 0;
1003 	long int gpr_start_pos, fpr_start_pos;
1004 
1005 	VM_AnalyzeFunction( i_first, &prepareStack, &gpr_start_pos, &fpr_start_pos );
1006 
1007 	long int gpr_pos = gpr_start_pos, fpr_pos = fpr_start_pos;
1008 
1009 	// OP_CONST combines well with many opcodes so we treat it in a special way
1010 	source_instruction_t *i_const = NULL;
1011 	source_instruction_t *i_now = i_first;
1012 
1013 	// how big the stack has to be
1014 	long int save_space = STACK_SAVE;
1015 	{
1016 		if ( gpr_start_pos < gpr_vstart )
1017 			save_space += (gpr_vstart - gpr_start_pos) * GPRLEN;
1018 		save_space = ( save_space + 15 ) & ~0x0f;
1019 
1020 		if ( fpr_start_pos < fpr_vstart )
1021 			save_space += (fpr_vstart - fpr_start_pos) * FPRLEN;
1022 		save_space = ( save_space + 15 ) & ~0x0f;
1023 	}
1024 
1025 	long int stack_temp = prepareStack ? STACK_TEMP : STACK_RTEMP;
1026 
1027 	while ( (i_now = i_now->next) ) {
1028 		emitStart( i_now->i_count );
1029 
1030 		switch ( i_now->op )
1031 		{
1032 			default:
1033 			case OP_UNDEF:
1034 			case OP_IGNORE:
1035 				MAYBE_EMIT_CONST();
1036 				in( iNOP );
1037 				break;
1038 
1039 			case OP_BREAK:
1040 				MAYBE_EMIT_CONST();
1041 				// force SEGV
1042 				in( iLWZ, r0, 0, r0 );
1043 				break;
1044 
1045 			case OP_ENTER:
1046 				if ( i_const )
1047 					DIE( "Weird opcode order" );
1048 
1049 				// don't prepare stack if not needed
1050 				if ( prepareStack ) {
1051 					long int i, save_pos = STACK_SAVE;
1052 
1053 					in( iMFLR, r0 );
1054 					in( iSTLU, r1, -save_space, r1 );
1055 					in( iSTL, r0, save_space + STACK_LR, r1 );
1056 
1057 					/* save registers */
1058 					for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
1059 						in( iSTL, gpr_list[ i ], save_pos, r1 );
1060 						save_pos += GPRLEN;
1061 					}
1062 					save_pos = ( save_pos + 15 ) & ~0x0f;
1063 
1064 					for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
1065 						in( iSTFD, fpr_list[ i ], save_pos, r1 );
1066 						save_pos += FPRLEN;
1067 					}
1068 					prepareStack = 2;
1069 				}
1070 
1071 				in( iADDI, rPSTACK, rPSTACK, - i_now->arg.si );
1072 				break;
1073 
1074 			case OP_LEAVE:
1075 				if ( i_const ) {
1076 					EMIT_FALSE_CONST();
1077 
1078 					if ( i_const->regR & rTYPE_FLOAT)
1079 						DIE( "constant float in OP_LEAVE" );
1080 
1081 					if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1082 						in( iLI, r3, i_const->arg.si );
1083 					} else if ( i_const->arg.i < 0x10000 ) {
1084 						in( iLI, r3, 0 );
1085 						in( iORI, r3, r3, i_const->arg.i );
1086 					} else {
1087 						in( iLIS, r3, i_const->arg.ss[ 0 ] );
1088 						if ( i_const->arg.us[ 1 ] != 0 )
1089 							in( iORI, r3, r3, i_const->arg.us[ 1 ] );
1090 					}
1091 					gpr_pos--;
1092 				} else {
1093 					MAYBE_EMIT_CONST();
1094 
1095 					/* place return value in r3 */
1096 					if ( ARG_INT ) {
1097 						if ( rFIRST != r3 )
1098 							in( iMR, r3, rFIRST );
1099 						gpr_pos--;
1100 					} else {
1101 						in( iSTFS, fFIRST, stack_temp, r1 );
1102 						in( iLWZ, r3, stack_temp, r1 );
1103 						fpr_pos--;
1104 					}
1105 				}
1106 
1107 				// don't undo stack if not prepared
1108 				if ( prepareStack >= 2 ) {
1109 					long int i, save_pos = STACK_SAVE;
1110 
1111 					in( iLL, r0, save_space + STACK_LR, r1 );
1112 
1113 
1114 					/* restore registers */
1115 					for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
1116 						in( iLL, gpr_list[ i ], save_pos, r1 );
1117 						save_pos += GPRLEN;
1118 					}
1119 					save_pos = ( save_pos + 15 ) & ~0x0f;
1120 					for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
1121 						in( iLFD, fpr_list[ i ], save_pos, r1 );
1122 						save_pos += FPRLEN;
1123 					}
1124 
1125 					in( iMTLR, r0 );
1126 					in( iADDI, r1, r1, save_space );
1127 				}
1128 				in( iADDI, rPSTACK, rPSTACK, i_now->arg.si);
1129 				in( iBLR );
1130 				assert( gpr_pos == gpr_start_pos );
1131 				assert( fpr_pos == fpr_start_pos );
1132 				break;
1133 
1134 			case OP_CALL:
1135 				if ( i_const ) {
1136 					EMIT_FALSE_CONST();
1137 
1138 					if ( i_const->arg.si >= 0 ) {
1139 						emitJump(
1140 							i_const->arg.i,
1141 							branchAlways, 0, branchExtLink
1142 						);
1143 					} else {
1144 						/* syscall */
1145 						in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA );
1146 
1147 						in( iLI, r3, i_const->arg.si ); // negative value
1148 						in( iMR, r4, rPSTACK ); // push PSTACK on argument list
1149 
1150 						in( iMTCTR, r0 );
1151 						in( iBCTRL );
1152 					}
1153 					if ( rFIRST != r3 )
1154 						in( iMR, rFIRST, r3 );
1155 				} else {
1156 					MAYBE_EMIT_CONST();
1157 
1158 					in( iCMPWI, cr7, rFIRST, 0 );
1159 					in( iBLTm, cr7, +4*5 /* syscall */ ); // XXX jump !
1160 					/* instruction call */
1161 
1162 					// get instruction address
1163 					in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
1164 					in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
1165 					in( iLLX, r0, rFIRST, r0 ); // load pointer
1166 
1167 					in( iB, +4*(3 + (rFIRST != r3 ? 1 : 0) ) ); // XXX jump !
1168 
1169 					/* syscall */
1170 					in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA ); // get asmCall pointer
1171 					/* rFIRST can be r3 or some static register */
1172 					if ( rFIRST != r3 )
1173 						in( iMR, r3, rFIRST ); // push OPSTACK top value on argument list
1174 					in( iMR, r4, rPSTACK ); // push PSTACK on argument list
1175 
1176 					/* common code */
1177 					in( iMTCTR, r0 );
1178 					in( iBCTRL );
1179 
1180 					if ( rFIRST != r3 )
1181 						in( iMR, rFIRST, r3 ); // push return value on the top of the opstack
1182 				}
1183 				break;
1184 
1185 			case OP_PUSH:
1186 				MAYBE_EMIT_CONST();
1187 				if ( RET_INT )
1188 					gpr_pos++;
1189 				else
1190 					fpr_pos++;
1191 				/* no instructions here */
1192 				force_emit = 1;
1193 				break;
1194 
1195 			case OP_POP:
1196 				MAYBE_EMIT_CONST();
1197 				if ( ARG_INT )
1198 					gpr_pos--;
1199 				else
1200 					fpr_pos--;
1201 				/* no instructions here */
1202 				force_emit = 1;
1203 				break;
1204 
1205 			case OP_CONST:
1206 				MAYBE_EMIT_CONST();
1207 				/* nothing here */
1208 				break;
1209 
1210 			case OP_LOCAL:
1211 				MAYBE_EMIT_CONST();
1212 				{
1213 					signed long int hi, lo;
1214 					hi = i_now->arg.ss[ 0 ];
1215 					lo = i_now->arg.ss[ 1 ];
1216 					if ( lo < 0 )
1217 						hi += 1;
1218 
1219 					gpr_pos++;
1220 					if ( hi == 0 ) {
1221 						in( iADDI, rFIRST, rPSTACK, lo );
1222 					} else {
1223 						in( iADDIS, rFIRST, rPSTACK, hi );
1224 						if ( lo != 0 )
1225 							in( iADDI, rFIRST, rFIRST, lo );
1226 					}
1227 				}
1228 				break;
1229 
1230 			case OP_JUMP:
1231 				if ( i_const ) {
1232 					EMIT_FALSE_CONST();
1233 
1234 					emitJump(
1235 						i_const->arg.i,
1236 						branchAlways, 0, 0
1237 					);
1238 				} else {
1239 					MAYBE_EMIT_CONST();
1240 
1241 					in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
1242 					in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
1243 					in( iLLX, r0, rFIRST, r0 ); // load pointer
1244 					in( iMTCTR, r0 );
1245 					in( iBCTR );
1246 				}
1247 				gpr_pos--;
1248 				break;
1249 
1250 			case OP_EQ:
1251 			case OP_NE:
1252 				if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x10000 ) {
1253 					EMIT_FALSE_CONST();
1254 					if ( i_const->arg.si >= 0x8000 )
1255 						in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1256 					else
1257 						in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1258 				} else {
1259 					MAYBE_EMIT_CONST();
1260 					in( iCMPW, cr7, rSECOND, rFIRST );
1261 				}
1262 				emitJump(
1263 					i_now->arg.i,
1264 					(i_now->op == OP_EQ ? branchTrue : branchFalse),
1265 					4*cr7+eq, 0
1266 				);
1267 				gpr_pos -= 2;
1268 				break;
1269 
1270 			case OP_LTI:
1271 			case OP_GEI:
1272 				if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1273 					EMIT_FALSE_CONST();
1274 					in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1275 				} else {
1276 					MAYBE_EMIT_CONST();
1277 					in( iCMPW, cr7, rSECOND, rFIRST );
1278 				}
1279 				emitJump(
1280 					i_now->arg.i,
1281 					( i_now->op == OP_LTI ? branchTrue : branchFalse ),
1282 					4*cr7+lt, 0
1283 				);
1284 				gpr_pos -= 2;
1285 				break;
1286 
1287 			case OP_GTI:
1288 			case OP_LEI:
1289 				if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1290 					EMIT_FALSE_CONST();
1291 					in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1292 				} else {
1293 					MAYBE_EMIT_CONST();
1294 					in( iCMPW, cr7, rSECOND, rFIRST );
1295 				}
1296 				emitJump(
1297 					i_now->arg.i,
1298 					( i_now->op == OP_GTI ? branchTrue : branchFalse ),
1299 					4*cr7+gt, 0
1300 				);
1301 				gpr_pos -= 2;
1302 				break;
1303 
1304 			case OP_LTU:
1305 			case OP_GEU:
1306 				if ( i_const && i_const->arg.i < 0x10000 ) {
1307 					EMIT_FALSE_CONST();
1308 					in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1309 				} else {
1310 					MAYBE_EMIT_CONST();
1311 					in( iCMPLW, cr7, rSECOND, rFIRST );
1312 				}
1313 				emitJump(
1314 					i_now->arg.i,
1315 					( i_now->op == OP_LTU ? branchTrue : branchFalse ),
1316 					4*cr7+lt, 0
1317 				);
1318 				gpr_pos -= 2;
1319 				break;
1320 
1321 			case OP_GTU:
1322 			case OP_LEU:
1323 				if ( i_const && i_const->arg.i < 0x10000 ) {
1324 					EMIT_FALSE_CONST();
1325 					in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1326 				} else {
1327 					MAYBE_EMIT_CONST();
1328 					in( iCMPLW, cr7, rSECOND, rFIRST );
1329 				}
1330 				emitJump(
1331 					i_now->arg.i,
1332 					( i_now->op == OP_GTU ? branchTrue : branchFalse ),
1333 					4*cr7+gt, 0
1334 				);
1335 				gpr_pos -= 2;
1336 				break;
1337 
1338 			case OP_EQF:
1339 			case OP_NEF:
1340 				MAYBE_EMIT_CONST();
1341 				in( iFCMPU, cr7, fSECOND, fFIRST );
1342 				emitJump(
1343 					i_now->arg.i,
1344 					( i_now->op == OP_EQF ? branchTrue : branchFalse ),
1345 					4*cr7+eq, 0
1346 				);
1347 				fpr_pos -= 2;
1348 				break;
1349 
1350 			case OP_LTF:
1351 			case OP_GEF:
1352 				MAYBE_EMIT_CONST();
1353 				in( iFCMPU, cr7, fSECOND, fFIRST );
1354 				emitJump(
1355 					i_now->arg.i,
1356 					( i_now->op == OP_LTF ? branchTrue : branchFalse ),
1357 					4*cr7+lt, 0
1358 				);
1359 				fpr_pos -= 2;
1360 				break;
1361 
1362 			case OP_GTF:
1363 			case OP_LEF:
1364 				MAYBE_EMIT_CONST();
1365 				in( iFCMPU, cr7, fSECOND, fFIRST );
1366 				emitJump(
1367 					i_now->arg.i,
1368 					( i_now->op == OP_GTF ? branchTrue : branchFalse ),
1369 					4*cr7+gt, 0
1370 				);
1371 				fpr_pos -= 2;
1372 				break;
1373 
1374 			case OP_LOAD1:
1375 				MAYBE_EMIT_CONST();
1376 #if OPTIMIZE_MASK
1377 				in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1378 #else
1379 				in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1380 				in( iAND, rFIRST, rFIRST, r0 );
1381 #endif
1382 				in( iLBZX, rFIRST, rFIRST, rDATABASE );
1383 				break;
1384 
1385 			case OP_LOAD2:
1386 				MAYBE_EMIT_CONST();
1387 #if OPTIMIZE_MASK
1388 				in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1389 #else
1390 				in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1391 				in( iAND, rFIRST, rFIRST, r0 );
1392 #endif
1393 				in( iLHZX, rFIRST, rFIRST, rDATABASE );
1394 				break;
1395 
1396 			case OP_LOAD4:
1397 				MAYBE_EMIT_CONST();
1398 #if OPTIMIZE_MASK
1399 				in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1400 #else
1401 				in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1402 				in( iAND, rFIRST, rFIRST, r0 );
1403 #endif
1404 				if ( RET_INT ) {
1405 					in( iLWZX, rFIRST, rFIRST, rDATABASE );
1406 				} else {
1407 					fpr_pos++;
1408 					in( iLFSX, fFIRST, rFIRST, rDATABASE );
1409 					gpr_pos--;
1410 				}
1411 				break;
1412 
1413 			case OP_STORE1:
1414 				MAYBE_EMIT_CONST();
1415 #if OPTIMIZE_MASK
1416 				in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1417 #else
1418 				in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1419 				in( iAND, rSECOND, rSECOND, r0 );
1420 #endif
1421 				in( iSTBX, rFIRST, rSECOND, rDATABASE );
1422 				gpr_pos -= 2;
1423 				break;
1424 
1425 			case OP_STORE2:
1426 				MAYBE_EMIT_CONST();
1427 #if OPTIMIZE_MASK
1428 				in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1429 #else
1430 				in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1431 				in( iAND, rSECOND, rSECOND, r0 );
1432 #endif
1433 				in( iSTHX, rFIRST, rSECOND, rDATABASE );
1434 				gpr_pos -= 2;
1435 				break;
1436 
1437 			case OP_STORE4:
1438 				MAYBE_EMIT_CONST();
1439 				if ( ARG_INT ) {
1440 #if OPTIMIZE_MASK
1441 					in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1442 #else
1443 					in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1444 					in( iAND, rSECOND, rSECOND, r0 );
1445 #endif
1446 
1447 					in( iSTWX, rFIRST, rSECOND, rDATABASE );
1448 					gpr_pos--;
1449 				} else {
1450 #if OPTIMIZE_MASK
1451 					in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1452 #else
1453 					in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1454 					in( iAND, rFIRST, rFIRST, r0 );
1455 #endif
1456 
1457 					in( iSTFSX, fFIRST, rFIRST, rDATABASE );
1458 					fpr_pos--;
1459 				}
1460 				gpr_pos--;
1461 				break;
1462 
1463 			case OP_ARG:
1464 				MAYBE_EMIT_CONST();
1465 				in( iADDI, r0, rPSTACK, i_now->arg.b );
1466 				if ( ARG_INT ) {
1467 					in( iSTWX, rFIRST, rDATABASE, r0 );
1468 					gpr_pos--;
1469 				} else {
1470 					in( iSTFSX, fFIRST, rDATABASE, r0 );
1471 					fpr_pos--;
1472 				}
1473 				break;
1474 
1475 			case OP_BLOCK_COPY:
1476 				MAYBE_EMIT_CONST();
1477 #if OPTIMIZE_COPY
1478 				if ( i_now->arg.i <= SL( 16, 32 ) ) {
1479 					/* block is very short so copy it in-place */
1480 
1481 					unsigned int len = i_now->arg.i;
1482 					unsigned int copied = 0, left = len;
1483 
1484 					in( iADD, rFIRST, rFIRST, rDATABASE );
1485 					in( iADD, rSECOND, rSECOND, rDATABASE );
1486 
1487 					if ( len >= GPRLEN ) {
1488 						long int i, words = len / GPRLEN;
1489 						in( iLL, r0, 0, rFIRST );
1490 						for ( i = 1; i < words; i++ )
1491 							in( iLL, rTEMP( i - 1 ), GPRLEN * i, rFIRST );
1492 
1493 						in( iSTL, r0, 0, rSECOND );
1494 						for ( i = 1; i < words; i++ )
1495 							in( iSTL, rTEMP( i - 1 ), GPRLEN * i, rSECOND );
1496 
1497 						copied += words * GPRLEN;
1498 						left -= copied;
1499 					}
1500 
1501 					if ( SL( 0, left >= 4 ) ) {
1502 						in( iLWZ, r0, copied+0, rFIRST );
1503 						in( iSTW, r0, copied+0, rSECOND );
1504 						copied += 4;
1505 						left -= 4;
1506 					}
1507 					if ( left >= 4 ) {
1508 						DIE("Bug in OP_BLOCK_COPY");
1509 					}
1510 					if ( left == 3 ) {
1511 						in( iLHZ, r0,	copied+0, rFIRST );
1512 						in( iLBZ, rTMP,	copied+2, rFIRST );
1513 						in( iSTH, r0,	copied+0, rSECOND );
1514 						in( iSTB, rTMP,	copied+2, rSECOND );
1515 					} else if ( left == 2 ) {
1516 						in( iLHZ, r0, copied+0, rFIRST );
1517 						in( iSTH, r0, copied+0, rSECOND );
1518 					} else if ( left == 1 ) {
1519 						in( iLBZ, r0, copied+0, rFIRST );
1520 						in( iSTB, r0, copied+0, rSECOND );
1521 					}
1522 				} else
1523 #endif
1524 				{
1525 					unsigned long int r5_ori = 0;
1526 					if ( i_now->arg.si >= -0x8000 && i_now->arg.si < 0x8000 ) {
1527 						in( iLI, r5, i_now->arg.si );
1528 					} else if ( i_now->arg.i < 0x10000 ) {
1529 						in( iLI, r5, 0 );
1530 						r5_ori = i_now->arg.i;
1531 					} else {
1532 						in( iLIS, r5, i_now->arg.ss[ 0 ] );
1533 						r5_ori = i_now->arg.us[ 1 ];
1534 					}
1535 
1536 					in( iLL, r0, VM_Data_Offset( BlockCopy ), rVMDATA ); // get blockCopy pointer
1537 
1538 					if ( r5_ori )
1539 						in( iORI, r5, r5, r5_ori );
1540 
1541 					in( iMTCTR, r0 );
1542 
1543 					if ( rFIRST != r4 )
1544 						in( iMR, r4, rFIRST );
1545 					if ( rSECOND != r3 )
1546 						in( iMR, r3, rSECOND );
1547 
1548 					in( iBCTRL );
1549 				}
1550 
1551 				gpr_pos -= 2;
1552 				break;
1553 
1554 			case OP_SEX8:
1555 				MAYBE_EMIT_CONST();
1556 				in( iEXTSB, rFIRST, rFIRST );
1557 				break;
1558 
1559 			case OP_SEX16:
1560 				MAYBE_EMIT_CONST();
1561 				in( iEXTSH, rFIRST, rFIRST );
1562 				break;
1563 
1564 			case OP_NEGI:
1565 				MAYBE_EMIT_CONST();
1566 				in( iNEG, rFIRST, rFIRST );
1567 				break;
1568 
1569 			case OP_ADD:
1570 				if ( i_const ) {
1571 					EMIT_FALSE_CONST();
1572 
1573 					signed short int hi, lo;
1574 					hi = i_const->arg.ss[ 0 ];
1575 					lo = i_const->arg.ss[ 1 ];
1576 					if ( lo < 0 )
1577 						hi += 1;
1578 
1579 					if ( hi != 0 )
1580 						in( iADDIS, rSECOND, rSECOND, hi );
1581 					if ( lo != 0 )
1582 						in( iADDI, rSECOND, rSECOND, lo );
1583 
1584 					// if both are zero no instruction will be written
1585 					if ( hi == 0 && lo == 0 )
1586 						force_emit = 1;
1587 				} else {
1588 					MAYBE_EMIT_CONST();
1589 					in( iADD, rSECOND, rSECOND, rFIRST );
1590 				}
1591 				gpr_pos--;
1592 				break;
1593 
1594 			case OP_SUB:
1595 				MAYBE_EMIT_CONST();
1596 				in( iSUB, rSECOND, rSECOND, rFIRST );
1597 				gpr_pos--;
1598 				break;
1599 
1600 			case OP_DIVI:
1601 				MAYBE_EMIT_CONST();
1602 				in( iDIVW, rSECOND, rSECOND, rFIRST );
1603 				gpr_pos--;
1604 				break;
1605 
1606 			case OP_DIVU:
1607 				MAYBE_EMIT_CONST();
1608 				in( iDIVWU, rSECOND, rSECOND, rFIRST );
1609 				gpr_pos--;
1610 				break;
1611 
1612 			case OP_MODI:
1613 				MAYBE_EMIT_CONST();
1614 				in( iDIVW, r0, rSECOND, rFIRST );
1615 				in( iMULLW, r0, r0, rFIRST );
1616 				in( iSUB, rSECOND, rSECOND, r0 );
1617 				gpr_pos--;
1618 				break;
1619 
1620 			case OP_MODU:
1621 				MAYBE_EMIT_CONST();
1622 				in( iDIVWU, r0, rSECOND, rFIRST );
1623 				in( iMULLW, r0, r0, rFIRST );
1624 				in( iSUB, rSECOND, rSECOND, r0 );
1625 				gpr_pos--;
1626 				break;
1627 
1628 			case OP_MULI:
1629 			case OP_MULU:
1630 				MAYBE_EMIT_CONST();
1631 				in( iMULLW, rSECOND, rSECOND, rFIRST );
1632 				gpr_pos--;
1633 				break;
1634 
1635 			case OP_BAND:
1636 				MAYBE_EMIT_CONST();
1637 				in( iAND, rSECOND, rSECOND, rFIRST );
1638 				gpr_pos--;
1639 				break;
1640 
1641 			case OP_BOR:
1642 				MAYBE_EMIT_CONST();
1643 				in( iOR, rSECOND, rSECOND, rFIRST );
1644 				gpr_pos--;
1645 				break;
1646 
1647 			case OP_BXOR:
1648 				MAYBE_EMIT_CONST();
1649 				in( iXOR, rSECOND, rSECOND, rFIRST );
1650 				gpr_pos--;
1651 				break;
1652 
1653 			case OP_BCOM:
1654 				MAYBE_EMIT_CONST();
1655 				in( iNOT, rFIRST, rFIRST );
1656 				break;
1657 
1658 			case OP_LSH:
1659 				MAYBE_EMIT_CONST();
1660 				in( iSLW, rSECOND, rSECOND, rFIRST );
1661 				gpr_pos--;
1662 				break;
1663 
1664 			case OP_RSHI:
1665 				MAYBE_EMIT_CONST();
1666 				in( iSRAW, rSECOND, rSECOND, rFIRST );
1667 				gpr_pos--;
1668 				break;
1669 
1670 			case OP_RSHU:
1671 				MAYBE_EMIT_CONST();
1672 				in( iSRW, rSECOND, rSECOND, rFIRST );
1673 				gpr_pos--;
1674 				break;
1675 
1676 			case OP_NEGF:
1677 				MAYBE_EMIT_CONST();
1678 				in( iFNEG, fFIRST, fFIRST );
1679 				break;
1680 
1681 			case OP_ADDF:
1682 				MAYBE_EMIT_CONST();
1683 				in( iFADDS, fSECOND, fSECOND, fFIRST );
1684 				fpr_pos--;
1685 				break;
1686 
1687 			case OP_SUBF:
1688 				MAYBE_EMIT_CONST();
1689 				in( iFSUBS, fSECOND, fSECOND, fFIRST );
1690 				fpr_pos--;
1691 				break;
1692 
1693 			case OP_DIVF:
1694 				MAYBE_EMIT_CONST();
1695 				in( iFDIVS, fSECOND, fSECOND, fFIRST );
1696 				fpr_pos--;
1697 				break;
1698 
1699 			case OP_MULF:
1700 				MAYBE_EMIT_CONST();
1701 				in( iFMULS, fSECOND, fSECOND, fFIRST );
1702 				fpr_pos--;
1703 				break;
1704 
1705 			case OP_CVIF:
1706 				MAYBE_EMIT_CONST();
1707 				fpr_pos++;
1708 				in( iXORIS, rFIRST, rFIRST, 0x8000 );
1709 				in( iLIS, r0, 0x4330 );
1710 				in( iSTW, rFIRST, stack_temp + 4, r1 );
1711 				in( iSTW, r0, stack_temp, r1 );
1712 				in( iLFS, fTMP, VM_Data_Offset( floatBase ), rVMDATA );
1713 				in( iLFD, fFIRST, stack_temp, r1 );
1714 				in( iFSUB, fFIRST, fFIRST, fTMP );
1715 				in( iFRSP, fFIRST, fFIRST );
1716 				gpr_pos--;
1717 				break;
1718 
1719 			case OP_CVFI:
1720 				MAYBE_EMIT_CONST();
1721 				gpr_pos++;
1722 				in( iFCTIWZ, fFIRST, fFIRST );
1723 				in( iSTFD, fFIRST, stack_temp, r1 );
1724 				in( iLWZ, rFIRST, stack_temp + 4, r1 );
1725 				fpr_pos--;
1726 				break;
1727 		}
1728 
1729 		i_const = NULL;
1730 
1731 		if ( i_now->op != OP_CONST ) {
1732 			// emit the instructions if it isn't OP_CONST
1733 			emitEnd();
1734 		} else {
1735 			// mark in what register the value should be saved
1736 			if ( RET_INT )
1737 				i_now->regPos = ++gpr_pos;
1738 			else
1739 				i_now->regPos = ++fpr_pos;
1740 
1741 #if OPTIMIZE_HOLE
1742 			i_const = i_now;
1743 #else
1744 			PPC_EmitConst( i_now );
1745 #endif
1746 		}
1747 	}
1748 	if ( i_const )
1749 		DIE( "left (unused) OP_CONST" );
1750 
1751 	{
1752 		// free opcode information, don't free first dummy one
1753 		source_instruction_t *i_next = i_first->next;
1754 		while ( i_next ) {
1755 			i_now = i_next;
1756 			i_next = i_now->next;
1757 			PPC_Free( i_now );
1758 		}
1759 	}
1760 }
1761 
1762 
1763 /*
1764  * check which jumps are short enough to use signed 16bit immediate branch
1765  */
1766 static void
PPC_ShrinkJumps(void)1767 PPC_ShrinkJumps( void )
1768 {
1769 	symbolic_jump_t *sj_now = sj_first;
1770 	while ( (sj_now = sj_now->nextJump) ) {
1771 		if ( sj_now->bo == branchAlways )
1772 			// non-conditional branch has 26bit immediate
1773 			sj_now->parent->length = 1;
1774 
1775 		else {
1776 			dest_instruction_t *di = di_pointers[ sj_now->jump_to ];
1777 			dest_instruction_t *ji = sj_now->parent;
1778 			long int jump_length = 0;
1779 			if ( ! di )
1780 				DIE( "No instruction to jump to" );
1781 			if ( ji->count > di->count ) {
1782 				do {
1783 					jump_length += di->length;
1784 				} while ( ( di = di->next ) != ji );
1785 			} else {
1786 				jump_length = 1;
1787 				while ( ( ji = ji->next ) != di )
1788 					jump_length += ji->length;
1789 			}
1790 			if ( jump_length < 0x2000 )
1791 				// jump is short, use normal instruction
1792 				sj_now->parent->length = 1;
1793 		}
1794 	}
1795 }
1796 
1797 /*
1798  * puts all the data in one place, it consists of many different tasks
1799  */
1800 static void
PPC_ComputeCode(vm_t * vm)1801 PPC_ComputeCode( vm_t *vm )
1802 {
1803 	dest_instruction_t *di_now = di_first;
1804 
1805 	unsigned long int codeInstructions = 0;
1806 	// count total instruciton number
1807 	while ( (di_now = di_now->next ) )
1808 		codeInstructions += di_now->length;
1809 
1810 	size_t codeLength = sizeof( vm_data_t )
1811 		+ sizeof( unsigned int ) * data_acc
1812 		+ sizeof( ppc_instruction_t ) * codeInstructions;
1813 
1814 	// get the memory for the generated code, smarter ppcs need the
1815 	// mem to be marked as executable (whill change later)
1816 	unsigned char *dataAndCode = mmap( NULL, codeLength,
1817 		PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0 );
1818 
1819 	if (dataAndCode == MAP_FAILED)
1820 		DIE( "Not enough memory" );
1821 
1822 	ppc_instruction_t *codeNow, *codeBegin;
1823 	codeNow = codeBegin = (ppc_instruction_t *)( dataAndCode + VM_Data_Offset( data[ data_acc ] ) );
1824 
1825 	ppc_instruction_t nop = IN( iNOP );
1826 
1827 	// copy instructions to the destination
1828 	// fills the jump instructions with nops
1829 	// saves pointers of all instructions
1830 	di_now = di_first;
1831 	while ( (di_now = di_now->next ) ) {
1832 		unsigned long int i_count = di_now->i_count;
1833 		if ( i_count != FALSE_ICOUNT ) {
1834 			if ( ! di_pointers[ i_count ] )
1835 				di_pointers[ i_count ] = (void *) codeNow;
1836 		}
1837 
1838 		if ( di_now->jump == NULL ) {
1839 			memcpy( codeNow, &(di_now->code[0]), di_now->length * sizeof( ppc_instruction_t ) );
1840 			codeNow += di_now->length;
1841 		} else {
1842 			long int i;
1843 			symbolic_jump_t *sj;
1844 			for ( i = 0; i < di_now->length; i++ )
1845 				codeNow[ i ] = nop;
1846 			codeNow += di_now->length;
1847 
1848 			sj = di_now->jump;
1849 			// save position of jumping instruction
1850 			sj->parent = (void *)(codeNow - 1);
1851 		}
1852 	}
1853 
1854 	// compute the jumps and write corresponding instructions
1855 	symbolic_jump_t *sj_now = sj_first;
1856 	while ( (sj_now = sj_now->nextJump ) ) {
1857 		ppc_instruction_t *jumpFrom = (void *) sj_now->parent;
1858 		ppc_instruction_t *jumpTo = (void *) di_pointers[ sj_now->jump_to ];
1859 		signed long int jumpLength = jumpTo - jumpFrom;
1860 
1861 		// if jump is short, just write it
1862 		if ( jumpLength >= - 8192 && jumpLength < 8192 ) {
1863 			powerpc_iname_t branchConditional = sj_now->ext & branchExtLink ? iBCL : iBC;
1864 			*jumpFrom = IN( branchConditional, sj_now->bo, sj_now->bi, jumpLength * 4 );
1865 			continue;
1866 		}
1867 
1868 		// jump isn't short so write it as two instructions
1869 		//
1870 		// the letter one is a non-conditional branch instruction which
1871 		// accepts immediate values big enough (26 bits)
1872 		*jumpFrom = IN( (sj_now->ext & branchExtLink ? iBL : iB), jumpLength * 4 );
1873 		if ( sj_now->bo == branchAlways )
1874 			continue;
1875 
1876 		// there should have been additional space prepared for this case
1877 		if ( jumpFrom[ -1 ] != nop )
1878 			DIE( "additional space for long jump not prepared" );
1879 
1880 		// invert instruction condition
1881 		long int bo = 0;
1882 		switch ( sj_now->bo ) {
1883 			case branchTrue:
1884 				bo = branchFalse;
1885 				break;
1886 			case branchFalse:
1887 				bo = branchTrue;
1888 				break;
1889 			default:
1890 				DIE( "unrecognized branch type" );
1891 				break;
1892 		}
1893 
1894 		// the former instruction is an inverted conditional branch which
1895 		// jumps over the non-conditional one
1896 		jumpFrom[ -1 ] = IN( iBC, bo, sj_now->bi, +2*4 );
1897 	}
1898 
1899 	vm->codeBase = dataAndCode;
1900 	vm->codeLength = codeLength;
1901 
1902 	vm_data_t *data = (vm_data_t *)dataAndCode;
1903 
1904 #if ELF64
1905 	// prepare Official Procedure Descriptor for the generated code
1906 	// and retrieve real function pointer for helper functions
1907 
1908 	opd_t *ac = (void *)VM_AsmCall, *bc = (void *)VM_BlockCopy;
1909 	data->opd.function = codeBegin;
1910 	// trick it into using the same TOC
1911 	// this way we won't have to switch TOC before calling AsmCall or BlockCopy
1912 	data->opd.toc = ac->toc;
1913 	data->opd.env = ac->env;
1914 
1915 	data->AsmCall = ac->function;
1916 	data->BlockCopy = bc->function;
1917 #else
1918 	data->AsmCall = VM_AsmCall;
1919 	data->BlockCopy = VM_BlockCopy;
1920 #endif
1921 
1922 	data->dataMask = vm->dataMask;
1923 	data->iPointers = (ppc_instruction_t *)vm->instructionPointers;
1924 	data->dataLength = VM_Data_Offset( data[ data_acc ] );
1925 	data->codeLength = ( codeNow - codeBegin ) * sizeof( ppc_instruction_t );
1926 	data->floatBase = 0x59800004;
1927 
1928 
1929 	/* write dynamic data (float constants) */
1930 	{
1931 		local_data_t *d_next, *d_now = data_first;
1932 		long int accumulated = 0;
1933 
1934 		do {
1935 			long int i;
1936 			for ( i = 0; i < d_now->count; i++ )
1937 				data->data[ accumulated + i ] = d_now->data[ i ];
1938 
1939 			accumulated += d_now->count;
1940 			d_next = d_now->next;
1941 			PPC_Free( d_now );
1942 
1943 			if ( !d_next )
1944 				break;
1945 			d_now = d_next;
1946 		} while (1);
1947 		data_first = NULL;
1948 	}
1949 
1950 	/* free most of the compilation memory */
1951 	{
1952 		di_now = di_first->next;
1953 		PPC_Free( di_first );
1954 		PPC_Free( sj_first );
1955 
1956 		while ( di_now ) {
1957 			di_first = di_now->next;
1958 			if ( di_now->jump )
1959 				PPC_Free( di_now->jump );
1960 			PPC_Free( di_now );
1961 			di_now = di_first;
1962 		}
1963 	}
1964 }
1965 
1966 static void
VM_Destroy_Compiled(vm_t * self)1967 VM_Destroy_Compiled( vm_t *self )
1968 {
1969 	if ( self->codeBase ) {
1970 		if ( munmap( self->codeBase, self->codeLength ) )
1971 			Com_Printf( S_COLOR_RED "Memory unmap failed, possible memory leak\n" );
1972 	}
1973 	self->codeBase = NULL;
1974 }
1975 
1976 void
VM_Compile(vm_t * vm,vmHeader_t * header)1977 VM_Compile( vm_t *vm, vmHeader_t *header )
1978 {
1979 	long int pc = 0;
1980 	unsigned long int i_count;
1981 	char* code;
1982 	struct timeval tvstart = {0, 0};
1983 	source_instruction_t *i_first /* dummy */, *i_last = NULL, *i_now;
1984 
1985 	vm->compiled = qfalse;
1986 
1987 	gettimeofday(&tvstart, NULL);
1988 
1989 	PPC_MakeFastMask( vm->dataMask );
1990 
1991 	i_first = PPC_Malloc( sizeof( source_instruction_t ) );
1992 	i_first->next = NULL;
1993 
1994 	// realloc instructionPointers with correct size
1995 	// use Z_Malloc so vm.c will be able to free the memory
1996 	if ( sizeof( void * ) != sizeof( int ) ) {
1997 		Z_Free( vm->instructionPointers );
1998 		vm->instructionPointers = Z_Malloc( header->instructionCount * sizeof( void * ) );
1999 	}
2000 	di_pointers = (void *)vm->instructionPointers;
2001 	memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
2002 
2003 
2004 	PPC_CompileInit();
2005 
2006 	/*
2007 	 * read the input program
2008 	 * divide it into functions and send each function to compiler
2009 	 */
2010 	code = (char *)header + header->codeOffset;
2011 	for ( i_count = 0; i_count < header->instructionCount; ++i_count )
2012 	{
2013 		unsigned char op = code[ pc++ ];
2014 
2015 		if ( op == OP_ENTER ) {
2016 			if ( i_first->next )
2017 				VM_CompileFunction( i_first );
2018 			i_first->next = NULL;
2019 			i_last = i_first;
2020 		}
2021 
2022 		i_now = PPC_Malloc( sizeof( source_instruction_t ) );
2023 		i_now->op = op;
2024 		i_now->i_count = i_count;
2025 		i_now->arg.i = 0;
2026 		i_now->regA1 = 0;
2027 		i_now->regA2 = 0;
2028 		i_now->regR = 0;
2029 		i_now->regPos = 0;
2030 		i_now->next = NULL;
2031 
2032 		if ( vm_opInfo[op] & opImm4 ) {
2033 			union {
2034 				unsigned char b[4];
2035 				unsigned int i;
2036 			} c = { { code[ pc + 3 ], code[ pc + 2 ], code[ pc + 1 ], code[ pc + 0 ] }, };
2037 
2038 			i_now->arg.i = c.i;
2039 			pc += 4;
2040 		} else if ( vm_opInfo[op] & opImm1 ) {
2041 			i_now->arg.b = code[ pc++ ];
2042 		}
2043 
2044 		i_last->next = i_now;
2045 		i_last = i_now;
2046 	}
2047 	VM_CompileFunction( i_first );
2048 	PPC_Free( i_first );
2049 
2050 	PPC_ShrinkJumps();
2051 	memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
2052 	PPC_ComputeCode( vm );
2053 
2054 	/* check for uninitialized pointers */
2055 #ifdef DEBUG_VM
2056 	long int i;
2057 	for ( i = 0; i < header->instructionCount; i++ )
2058 		if ( di_pointers[ i ] == 0 )
2059 			Com_Printf( S_COLOR_RED "Pointer %ld not initialized !\n", i );
2060 #endif
2061 
2062 	/* mark memory as executable and not writeable */
2063 	if ( mprotect( vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC ) ) {
2064 
2065 		// it has failed, make sure memory is unmapped before throwing the error
2066 		VM_Destroy_Compiled( vm );
2067 		DIE( "mprotect failed" );
2068 	}
2069 
2070 	vm->destroy = VM_Destroy_Compiled;
2071 	vm->compiled = qtrue;
2072 
2073 	{
2074 		struct timeval tvdone = {0, 0};
2075 		struct timeval dur = {0, 0};
2076 		Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n",
2077 			vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
2078 
2079 		gettimeofday(&tvdone, NULL);
2080 		timersub(&tvdone, &tvstart, &dur);
2081 		Com_Printf( "compilation took %lu.%06lu seconds\n",
2082 			(long unsigned int)dur.tv_sec, (long unsigned int)dur.tv_usec );
2083 	}
2084 }
2085 
2086 int
VM_CallCompiled(vm_t * vm,int * args)2087 VM_CallCompiled( vm_t *vm, int *args )
2088 {
2089 	int retVal;
2090 	int *argPointer;
2091 
2092 	vm_data_t *vm_dataAndCode = (void *)( vm->codeBase );
2093 	int programStack = vm->programStack;
2094 	int stackOnEntry = programStack;
2095 
2096 	byte *image = vm->dataBase;
2097 
2098 	currentVM = vm;
2099 
2100 	vm->currentlyInterpreting = qtrue;
2101 
2102 	programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
2103 	argPointer = (int *)&image[ programStack + 8 ];
2104 	memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
2105 	argPointer[ -1 ] = 0;
2106 	argPointer[ -2 ] = -1;
2107 
2108 #ifdef VM_TIMES
2109 	struct tms start_time, stop_time;
2110 	clock_t time_diff;
2111 
2112 	times( &start_time );
2113 	time_outside_vm = 0;
2114 #endif
2115 
2116 	/* call generated code */
2117 	{
2118 		int ( *entry )( void *, int, void * );
2119 #ifdef __PPC64__
2120 		entry = (void *)&(vm_dataAndCode->opd);
2121 #else
2122 		entry = (void *)(vm->codeBase + vm_dataAndCode->dataLength);
2123 #endif
2124 		retVal = entry( vm->codeBase, programStack, vm->dataBase );
2125 	}
2126 
2127 #ifdef VM_TIMES
2128 	times( &stop_time );
2129 	time_diff = stop_time.tms_utime - start_time.tms_utime;
2130 	time_total_vm += time_diff - time_outside_vm;
2131 	if ( time_diff > 100 ) {
2132 		printf( "App clock: %ld, vm total: %ld, vm this: %ld, vm real: %ld, vm out: %ld\n"
2133 			"Inside VM %f%% of app time\n",
2134 			stop_time.tms_utime,
2135 			time_total_vm,
2136 			time_diff,
2137 			time_diff - time_outside_vm,
2138 			time_outside_vm,
2139 			(double)100 * time_total_vm / stop_time.tms_utime );
2140 	}
2141 #endif
2142 
2143 	vm->programStack = stackOnEntry;
2144 	vm->currentlyInterpreting = qfalse;
2145 
2146 	return retVal;
2147 }
2148