1 /*
2 ===========================================================================
3 Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org>
4
5 This file is part of Quake III Arena source code.
6
7 Quake III Arena source code is free software; you can redistribute it
8 and/or modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the License,
10 or (at your option) any later version.
11
12 Quake III Arena source code is distributed in the hope that it will be
13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Quake III Arena source code; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 ===========================================================================
21 */
22
23 #include <sys/types.h> /* needed by sys/mman.h on OSX */
24 #include <sys/mman.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <stddef.h>
28
29 #ifndef MAP_ANONYMOUS
30 # define MAP_ANONYMOUS MAP_ANON
31 #endif
32
33 #include "vm_local.h"
34 #include "vm_powerpc_asm.h"
35
36 /*
37 * VM_TIMES enables showing information about time spent inside
38 * and outside generated code
39 */
40 //#define VM_TIMES
41 #ifdef VM_TIMES
42 #include <sys/times.h>
43 static clock_t time_outside_vm = 0;
44 static clock_t time_total_vm = 0;
45 #endif
46
47 /* exit() won't be called but use it because it is marked with noreturn */
48 #define DIE( reason ) Com_Error( ERR_DROP, "vm_powerpc compiler error: " reason )
49
50 /*
51 * vm_powerpc uses large quantities of memory during compilation,
52 * Z_Malloc memory may not be enough for some big qvm files
53 */
54
55 //#define VM_SYSTEM_MALLOC
56 #ifdef VM_SYSTEM_MALLOC
57 static inline void *
PPC_Malloc(size_t size)58 PPC_Malloc( size_t size )
59 {
60 void *mem = malloc( size );
61 if ( ! mem )
62 DIE( "Not enough memory" );
63
64 return mem;
65 }
66 # define PPC_Free free
67 #else
68 # define PPC_Malloc Z_Malloc
69 # define PPC_Free Z_Free
70 #endif
71
72 /*
73 * optimizations:
74 * - hole: bubble optimization (OP_CONST+instruction)
75 * - copy: inline OP_BLOCK_COPY for lengths under 16/32 bytes
76 * - mask: use rlwinm instruction as dataMask
77 */
78
79 #ifdef __OPTIMIZE__
80 # define OPTIMIZE_HOLE 1
81 # define OPTIMIZE_COPY 1
82 # define OPTIMIZE_MASK 1
83 #else
84 # define OPTIMIZE_HOLE 0
85 # define OPTIMIZE_COPY 0
86 # define OPTIMIZE_MASK 0
87 #endif
88
89 /*
90 * SUPPORTED TARGETS:
91 * - Linux 32 bits
92 * ( http://refspecs.freestandards.org/elf/elfspec_ppc.pdf )
93 * * LR at r0 + 4
94 * * Local variable space not needed
95 * -> store caller safe regs at 16+
96 *
97 * - Linux 64 bits (not fully conformant)
98 * ( http://www.ibm.com/developerworks/linux/library/l-powasm4.html )
99 * * needs "official procedure descriptors" (only first function has one)
100 * * LR at r0 + 16
101 * * local variable space required, min 64 bytes, starts at 48
102 * -> store caller safe regs at 128+
103 *
104 * - OS X 32 bits
105 * ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html )
106 * * LR at r0 + 8
107 * * local variable space required, min 32 bytes (?), starts at 24
108 * -> store caller safe regs at 64+
109 *
110 * - OS X 64 bits (completely untested)
111 * ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/64bitPowerPC.html )
112 * * LR at r0 + 16
113 * * local variable space required, min 64 bytes (?), starts at 48
114 * -> store caller safe regs at 128+
115 */
116
117 /* Select Length - first value on 32 bits, second on 64 */
118 #ifdef __PPC64__
119 # define SL( a, b ) (b)
120 #else
121 # define SL( a, b ) (a)
122 #endif
123
124 /* Select ABI - first for ELF, second for OS X */
125 #ifdef __ELF__
126 # define SA( a, b ) (a)
127 #else
128 # define SA( a, b ) (b)
129 #endif
130
131 #define ELF32 SL( SA( 1, 0 ), 0 )
132 #define ELF64 SL( 0, SA( 1, 0 ) )
133 #define OSX32 SL( SA( 0, 1 ), 0 )
134 #define OSX64 SL( 0, SA( 0, 1 ) )
135
136 /* native length load/store instructions ( L stands for long ) */
137 #define iSTLU SL( iSTWU, iSTDU )
138 #define iSTL SL( iSTW, iSTD )
139 #define iLL SL( iLWZ, iLD )
140 #define iLLX SL( iLWZX, iLDX )
141
142 /* register length */
143 #define GPRLEN SL( 4, 8 )
144 #define FPRLEN (8)
145 /* shift that many bits to obtain value miltiplied by GPRLEN */
146 #define GPRLEN_SHIFT SL( 2, 3 )
147
148 /* Link register position */
149 #define STACK_LR SL( SA( 4, 8 ), 16 )
150 /* register save position */
151 #define STACK_SAVE SL( SA( 16, 64 ), 128 )
152 /* temporary space, for float<->int exchange */
153 #define STACK_TEMP SL( SA( 8, 24 ), 48 )
154 /* red zone temporary space, used instead of STACK_TEMP if stack isn't
155 * prepared properly */
156 #define STACK_RTEMP (-16)
157
158 #if ELF64
159 /*
160 * Official Procedure Descriptor
161 * we need to prepare one for generated code if we want to call it
162 * as function
163 */
164 typedef struct {
165 void *function;
166 void *toc;
167 void *env;
168 } opd_t;
169 #endif
170
171
172 /*
173 * opcode information table:
174 * - length of immediate value
175 * - returned register type
176 * - required register(s) type
177 */
178 #define opImm0 0x0000 /* no immediate */
179 #define opImm1 0x0001 /* 1 byte immadiate value after opcode */
180 #define opImm4 0x0002 /* 4 bytes immediate value after opcode */
181
182 #define opRet0 0x0000 /* returns nothing */
183 #define opRetI 0x0004 /* returns integer */
184 #define opRetF 0x0008 /* returns float */
185 #define opRetIF (opRetI | opRetF) /* returns integer or float */
186
187 #define opArg0 0x0000 /* requires nothing */
188 #define opArgI 0x0010 /* requires integer(s) */
189 #define opArgF 0x0020 /* requires float(s) */
190 #define opArgIF (opArgI | opArgF) /* requires integer or float */
191
192 #define opArg2I 0x0040 /* requires second argument, integer */
193 #define opArg2F 0x0080 /* requires second argument, float */
194 #define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
195
196 static const unsigned char vm_opInfo[256] =
197 {
198 [OP_UNDEF] = opImm0,
199 [OP_IGNORE] = opImm0,
200 [OP_BREAK] = opImm0,
201 [OP_ENTER] = opImm4,
202 /* OP_LEAVE has to accept floats, they will be converted to ints */
203 [OP_LEAVE] = opImm4 | opRet0 | opArgIF,
204 /* only STORE4 and POP use values from OP_CALL,
205 * no need to convert floats back */
206 [OP_CALL] = opImm0 | opRetI | opArgI,
207 [OP_PUSH] = opImm0 | opRetIF,
208 [OP_POP] = opImm0 | opRet0 | opArgIF,
209 [OP_CONST] = opImm4 | opRetIF,
210 [OP_LOCAL] = opImm4 | opRetI,
211 [OP_JUMP] = opImm0 | opRet0 | opArgI,
212
213 [OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I,
214 [OP_NE] = opImm4 | opRet0 | opArgI | opArg2I,
215 [OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I,
216 [OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I,
217 [OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I,
218 [OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I,
219 [OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I,
220 [OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I,
221 [OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I,
222 [OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I,
223 [OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F,
224 [OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F,
225 [OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F,
226 [OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F,
227 [OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F,
228 [OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F,
229
230 [OP_LOAD1] = opImm0 | opRetI | opArgI,
231 [OP_LOAD2] = opImm0 | opRetI | opArgI,
232 [OP_LOAD4] = opImm0 | opRetIF| opArgI,
233 [OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I,
234 [OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I,
235 [OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I,
236 [OP_ARG] = opImm1 | opRet0 | opArgIF,
237 [OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I,
238
239 [OP_SEX8] = opImm0 | opRetI | opArgI,
240 [OP_SEX16] = opImm0 | opRetI | opArgI,
241 [OP_NEGI] = opImm0 | opRetI | opArgI,
242 [OP_ADD] = opImm0 | opRetI | opArgI | opArg2I,
243 [OP_SUB] = opImm0 | opRetI | opArgI | opArg2I,
244 [OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I,
245 [OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I,
246 [OP_MODI] = opImm0 | opRetI | opArgI | opArg2I,
247 [OP_MODU] = opImm0 | opRetI | opArgI | opArg2I,
248 [OP_MULI] = opImm0 | opRetI | opArgI | opArg2I,
249 [OP_MULU] = opImm0 | opRetI | opArgI | opArg2I,
250 [OP_BAND] = opImm0 | opRetI | opArgI | opArg2I,
251 [OP_BOR] = opImm0 | opRetI | opArgI | opArg2I,
252 [OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I,
253 [OP_BCOM] = opImm0 | opRetI | opArgI,
254 [OP_LSH] = opImm0 | opRetI | opArgI | opArg2I,
255 [OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I,
256 [OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I,
257 [OP_NEGF] = opImm0 | opRetF | opArgF,
258 [OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F,
259 [OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F,
260 [OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F,
261 [OP_MULF] = opImm0 | opRetF | opArgF | opArg2F,
262 [OP_CVIF] = opImm0 | opRetF | opArgI,
263 [OP_CVFI] = opImm0 | opRetI | opArgF,
264 };
265
266 /*
267 * source instruction data
268 */
269 typedef struct source_instruction_s source_instruction_t;
270 struct source_instruction_s {
271 // opcode
272 unsigned long int op;
273
274 // number of instruction
275 unsigned long int i_count;
276
277 // immediate value (if any)
278 union {
279 unsigned int i;
280 signed int si;
281 signed short ss[2];
282 unsigned short us[2];
283 unsigned char b;
284 } arg;
285
286 // required and returned registers
287 unsigned char regA1;
288 unsigned char regA2;
289 unsigned char regR;
290 unsigned char regPos;
291
292 // next instruction
293 source_instruction_t *next;
294 };
295
296
297
298 /*
299 * read-only data needed by the generated code
300 */
301 typedef struct VM_Data {
302 // length of this struct + data
303 size_t dataLength;
304 // compiled code size (in bytes)
305 // it only is code size, without the data
306 size_t codeLength;
307
308 // function pointers, no use to waste registers for them
309 long int (* AsmCall)( int, int );
310 void (* BlockCopy )( unsigned int, unsigned int, size_t );
311
312 // instruction pointers, rarely used so don't waste register
313 ppc_instruction_t *iPointers;
314
315 // data mask for load and store, not used if optimized
316 unsigned int dataMask;
317
318 // fixed number used to convert from integer to float
319 unsigned int floatBase; // 0x59800004
320
321 #if ELF64
322 // official procedure descriptor
323 opd_t opd;
324 #endif
325
326 // additional constants, for floating point OP_CONST
327 // this data has dynamic length, thus '0' here
328 unsigned int data[0];
329 } vm_data_t;
330
331 #ifdef offsetof
332 # define VM_Data_Offset( field ) offsetof( vm_data_t, field )
333 #else
334 # define OFFSET( structName, field ) \
335 ( (void *)&(((structName *)NULL)->field) - NULL )
336 # define VM_Data_Offset( field ) OFFSET( vm_data_t, field )
337 #endif
338
339
340 /*
341 * functions used by generated code
342 */
343 static long int
VM_AsmCall(int callSyscallInvNum,int callProgramStack)344 VM_AsmCall( int callSyscallInvNum, int callProgramStack )
345 {
346 vm_t *savedVM = currentVM;
347 long int i, ret;
348 #ifdef VM_TIMES
349 struct tms start_time, stop_time;
350 clock_t saved_time = time_outside_vm;
351 times( &start_time );
352 #endif
353
354 // save the stack to allow recursive VM entry
355 currentVM->programStack = callProgramStack - 4;
356
357 // we need to convert ints to longs on 64bit powerpcs
358 if ( sizeof( intptr_t ) == sizeof( int ) ) {
359 intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + callProgramStack + 4);
360
361 // generated code does not invert syscall number
362 argPosition[ 0 ] = -1 - callSyscallInvNum;
363
364 ret = currentVM->systemCall( argPosition );
365 } else {
366 intptr_t args[MAX_VMSYSCALL_ARGS];
367
368 // generated code does not invert syscall number
369 args[0] = -1 - callSyscallInvNum;
370
371 int *argPosition = (int *)((byte *)currentVM->dataBase + callProgramStack + 4);
372 for( i = 1; i < ARRAY_LEN(args); i++ )
373 args[ i ] = argPosition[ i ];
374
375 ret = currentVM->systemCall( args );
376 }
377
378 currentVM = savedVM;
379
380 #ifdef VM_TIMES
381 times( &stop_time );
382 time_outside_vm = saved_time + ( stop_time.tms_utime - start_time.tms_utime );
383 #endif
384
385 return ret;
386 }
387
388 /*
389 * code-block descriptors
390 */
391 typedef struct dest_instruction dest_instruction_t;
392 typedef struct symbolic_jump symbolic_jump_t;
393
394 struct symbolic_jump {
395 // number of source instruction it has to jump to
396 unsigned long int jump_to;
397
398 // jump condition true/false, (4*cr7+(eq|gt..))
399 long int bo, bi;
400
401 // extensions / modifiers (branch-link)
402 unsigned long ext;
403
404 // dest_instruction refering to this jump
405 dest_instruction_t *parent;
406
407 // next jump
408 symbolic_jump_t *nextJump;
409 };
410
411 struct dest_instruction {
412 // position in the output chain
413 unsigned long int count;
414
415 // source instruction number
416 unsigned long int i_count;
417
418 // exact (for instructins), or maximum (for jump) length
419 unsigned short length;
420
421 dest_instruction_t *next;
422
423 // if the instruction is a jump than jump will be non NULL
424 symbolic_jump_t *jump;
425
426 // if jump is NULL than all the instructions will be here
427 ppc_instruction_t code[0];
428 };
429
430 // first and last instruction,
431 // di_first is a dummy instruction
432 static dest_instruction_t *di_first = NULL, *di_last = NULL;
433 // number of instructions
434 static unsigned long int di_count = 0;
435 // pointers needed to compute local jumps, those aren't pointers to
436 // actual instructions, just used to check how long the jump is going
437 // to be and whether it is positive or negative
438 static dest_instruction_t **di_pointers = NULL;
439
440 // output instructions which does not come from source code
441 // use false i_count value
442 #define FALSE_ICOUNT 0xffffffff
443
444
445 /*
446 * append specified instructions at the end of instruction chain
447 */
448 static void
PPC_Append(dest_instruction_t * di_now,unsigned long int i_count)449 PPC_Append(
450 dest_instruction_t *di_now,
451 unsigned long int i_count
452 )
453 {
454 di_now->count = di_count++;
455 di_now->i_count = i_count;
456 di_now->next = NULL;
457
458 di_last->next = di_now;
459 di_last = di_now;
460
461 if ( i_count != FALSE_ICOUNT ) {
462 if ( ! di_pointers[ i_count ] )
463 di_pointers[ i_count ] = di_now;
464 }
465 }
466
467 /*
468 * make space for instructions and append
469 */
470 static void
PPC_AppendInstructions(unsigned long int i_count,size_t num_instructions,const ppc_instruction_t * is)471 PPC_AppendInstructions(
472 unsigned long int i_count,
473 size_t num_instructions,
474 const ppc_instruction_t *is
475 )
476 {
477 if ( num_instructions < 0 )
478 num_instructions = 0;
479 size_t iBytes = sizeof( ppc_instruction_t ) * num_instructions;
480 dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) + iBytes );
481
482 di_now->length = num_instructions;
483 di_now->jump = NULL;
484
485 if ( iBytes > 0 )
486 memcpy( &(di_now->code[0]), is, iBytes );
487
488 PPC_Append( di_now, i_count );
489 }
490
491 /*
492 * create symbolic jump and append
493 */
494 static symbolic_jump_t *sj_first = NULL, *sj_last = NULL;
495 static void
PPC_PrepareJump(unsigned long int i_count,unsigned long int dest,long int bo,long int bi,unsigned long int ext)496 PPC_PrepareJump(
497 unsigned long int i_count,
498 unsigned long int dest,
499 long int bo,
500 long int bi,
501 unsigned long int ext
502 )
503 {
504 dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) );
505 symbolic_jump_t *sj = PPC_Malloc( sizeof( symbolic_jump_t ) );
506
507 sj->jump_to = dest;
508 sj->bo = bo;
509 sj->bi = bi;
510 sj->ext = ext;
511 sj->parent = di_now;
512 sj->nextJump = NULL;
513
514 sj_last->nextJump = sj;
515 sj_last = sj;
516
517 di_now->length = (bo == branchAlways ? 1 : 2);
518 di_now->jump = sj;
519
520 PPC_Append( di_now, i_count );
521 }
522
523 /*
524 * simplyfy instruction emission
525 */
526 #define emitStart( i_cnt ) \
527 unsigned long int i_count = i_cnt; \
528 size_t num_instructions = 0; \
529 long int force_emit = 0; \
530 ppc_instruction_t instructions[50];
531
532 #define pushIn( inst ) \
533 (instructions[ num_instructions++ ] = inst)
534 #define in( inst, args... ) pushIn( IN( inst, args ) )
535
536 #define emitEnd() \
537 do{ \
538 if ( num_instructions || force_emit ) \
539 PPC_AppendInstructions( i_count, num_instructions, instructions );\
540 num_instructions = 0; \
541 } while(0)
542
543 #define emitJump( dest, bo, bi, ext ) \
544 do { \
545 emitEnd(); \
546 PPC_PrepareJump( i_count, dest, bo, bi, ext ); \
547 } while(0)
548
549
550 /*
551 * definitions for creating .data section,
552 * used in cases where constant float is needed
553 */
554 #define LOCAL_DATA_CHUNK 50
555 typedef struct local_data_s local_data_t;
556 struct local_data_s {
557 // number of data in this structure
558 long int count;
559
560 // data placeholder
561 unsigned int data[ LOCAL_DATA_CHUNK ];
562
563 // next chunk, if this one wasn't enough
564 local_data_t *next;
565 };
566
567 // first data chunk
568 static local_data_t *data_first = NULL;
569 // total number of data
570 static long int data_acc = 0;
571
572 /*
573 * append the data and return its offset
574 */
575 static size_t
PPC_PushData(unsigned int datum)576 PPC_PushData( unsigned int datum )
577 {
578 local_data_t *d_now = data_first;
579 long int accumulated = 0;
580
581 // check whether we have this one already
582 do {
583 long int i;
584 for ( i = 0; i < d_now->count; i++ ) {
585 if ( d_now->data[ i ] == datum ) {
586 accumulated += i;
587 return VM_Data_Offset( data[ accumulated ] );
588 }
589 }
590 if ( !d_now->next )
591 break;
592
593 accumulated += d_now->count;
594 d_now = d_now->next;
595 } while (1);
596
597 // not found, need to append
598 accumulated += d_now->count;
599
600 // last chunk is full, create new one
601 if ( d_now->count >= LOCAL_DATA_CHUNK ) {
602 d_now->next = PPC_Malloc( sizeof( local_data_t ) );
603 d_now = d_now->next;
604 d_now->count = 0;
605 d_now->next = NULL;
606 }
607
608 d_now->data[ d_now->count ] = datum;
609 d_now->count += 1;
610
611 data_acc = accumulated + 1;
612
613 return VM_Data_Offset( data[ accumulated ] );
614 }
615
616 /*
617 * find leading zeros in dataMask to implement it with
618 * "rotate and mask" instruction
619 */
620 static long int fastMaskHi = 0, fastMaskLo = 31;
621 static void
PPC_MakeFastMask(int mask)622 PPC_MakeFastMask( int mask )
623 {
624 #if defined( __GNUC__ ) && ( __GNUC__ >= 4 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) )
625 /* count leading zeros */
626 fastMaskHi = __builtin_clz( mask );
627
628 /* count trailing zeros */
629 fastMaskLo = 31 - __builtin_ctz( mask );
630 #else
631 fastMaskHi = 0;
632 while ( ( mask & ( 0x80000000 >> fastMaskHi ) ) == 0 )
633 fastMaskHi++;
634
635 fastMaskLo = 31;
636 while ( ( mask & ( 0x80000000 >> fastMaskLo ) ) == 0 )
637 fastMaskLo--;
638 #endif
639 }
640
641
642 /*
643 * register definitions
644 */
645
646 /* registers which are global for generated code */
647
648 // pointer to VM_Data (constant)
649 #define rVMDATA r14
650 // vm->dataBase (constant)
651 #define rDATABASE r15
652 // programStack (variable)
653 #define rPSTACK r16
654
655 /*
656 * function local registers,
657 *
658 * normally only volatile registers are used, but if there aren't enough
659 * or function has to preserve some value while calling annother one
660 * then caller safe registers are used as well
661 */
662 static const long int gpr_list[] = {
663 /* caller safe registers, normally only one is used */
664 r24, r23, r22, r21,
665 r20, r19, r18, r17,
666 /* volatile registers (preferred),
667 * normally no more than 5 is used */
668 r3, r4, r5, r6,
669 r7, r8, r9, r10,
670 };
671 static const long int gpr_vstart = 8; /* position of first volatile register */
672 static const long int gpr_total = ARRAY_LEN( gpr_list );
673
674 static const long int fpr_list[] = {
675 /* static registers, normally none is used */
676 f20, f21, f19, f18,
677 f17, f16, f15, f14,
678 /* volatile registers (preferred),
679 * normally no more than 7 is used */
680 f0, f1, f2, f3,
681 f4, f5, f6, f7,
682 f8, f9, f10, f11,
683 f12, f13,
684 };
685 static const long int fpr_vstart = 8;
686 static const long int fpr_total = ARRAY_LEN( fpr_list );
687
688 /*
689 * prepare some dummy structures and emit init code
690 */
691 static void
PPC_CompileInit(void)692 PPC_CompileInit( void )
693 {
694 di_first = di_last = PPC_Malloc( sizeof( dest_instruction_t ) );
695 di_first->count = 0;
696 di_first->next = NULL;
697 di_first->jump = NULL;
698
699 sj_first = sj_last = PPC_Malloc( sizeof( symbolic_jump_t ) );
700 sj_first->nextJump = NULL;
701
702 data_first = PPC_Malloc( sizeof( local_data_t ) );
703 data_first->count = 0;
704 data_first->next = NULL;
705
706 /*
707 * init function:
708 * saves old values of global registers and sets our values
709 * function prototype is:
710 * int begin( void *data, int programStack, void *vm->dataBase )
711 */
712
713 /* first instruction must not be placed on instruction list */
714 emitStart( FALSE_ICOUNT );
715
716 long int stack = STACK_SAVE + 4 * GPRLEN;
717
718 in( iMFLR, r0 );
719 in( iSTLU, r1, -stack, r1 );
720 in( iSTL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
721 in( iSTL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
722 in( iSTL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
723 in( iSTL, r0, stack + STACK_LR, r1 );
724 in( iMR, rVMDATA, r3 );
725 in( iMR, rPSTACK, r4 );
726 in( iMR, rDATABASE, r5 );
727 in( iBL, +4*8 ); // LINK JUMP: first generated instruction | XXX jump !
728 in( iLL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
729 in( iLL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
730 in( iLL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
731 in( iLL, r0, stack + STACK_LR, r1 );
732 in( iMTLR, r0 );
733 in( iADDI, r1, r1, stack );
734 in( iBLR );
735
736 emitEnd();
737 }
738
739 // rFIRST is the copy of the top value on the opstack
740 #define rFIRST (gpr_list[ gpr_pos - 1])
741 // second value on the opstack
742 #define rSECOND (gpr_list[ gpr_pos - 2 ])
743 // temporary registers, not on the opstack
744 #define rTEMP(x) (gpr_list[ gpr_pos + x ])
745 #define rTMP rTEMP(0)
746
747 #define fFIRST (fpr_list[ fpr_pos - 1 ])
748 #define fSECOND (fpr_list[ fpr_pos - 2 ])
749 #define fTEMP(x) (fpr_list[ fpr_pos + x ])
750 #define fTMP fTEMP(0)
751
752 // register types
753 #define rTYPE_STATIC 0x01
754 #define rTYPE_FLOAT 0x02
755
756 // what type should this opcode return
757 #define RET_INT ( !(i_now->regR & rTYPE_FLOAT) )
758 #define RET_FLOAT ( i_now->regR & rTYPE_FLOAT )
759 // what type should it accept
760 #define ARG_INT ( ! i_now->regA1 )
761 #define ARG_FLOAT ( i_now->regA1 )
762 #define ARG2_INT ( ! i_now->regA2 )
763 #define ARG2_FLOAT ( i_now->regA2 )
764
765 /*
766 * emit OP_CONST, called if nothing has used the const value directly
767 */
768 static void
PPC_EmitConst(source_instruction_t * const i_const)769 PPC_EmitConst( source_instruction_t * const i_const )
770 {
771 emitStart( i_const->i_count );
772
773 if ( !(i_const->regR & rTYPE_FLOAT) ) {
774 // gpr_pos needed for "rFIRST" to work
775 long int gpr_pos = i_const->regPos;
776
777 if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
778 in( iLI, rFIRST, i_const->arg.si );
779 } else if ( i_const->arg.i < 0x10000 ) {
780 in( iLI, rFIRST, 0 );
781 in( iORI, rFIRST, rFIRST, i_const->arg.i );
782 } else {
783 in( iLIS, rFIRST, i_const->arg.ss[ 0 ] );
784 if ( i_const->arg.us[ 1 ] != 0 )
785 in( iORI, rFIRST, rFIRST, i_const->arg.us[ 1 ] );
786 }
787
788 } else {
789 // fpr_pos needed for "fFIRST" to work
790 long int fpr_pos = i_const->regPos;
791
792 // there's no good way to generate the data,
793 // just read it from data section
794 in( iLFS, fFIRST, PPC_PushData( i_const->arg.i ), rVMDATA );
795 }
796
797 emitEnd();
798 }
799 #define MAYBE_EMIT_CONST() if ( i_const ) PPC_EmitConst( i_const )
800
801 /*
802 * emit empty instruction, just sets the needed pointers
803 */
804 static inline void
PPC_EmitNull(source_instruction_t * const i_null)805 PPC_EmitNull( source_instruction_t * const i_null )
806 {
807 PPC_AppendInstructions( i_null->i_count, 0, NULL );
808 }
809 #define EMIT_FALSE_CONST() PPC_EmitNull( i_const )
810
811
812 /*
813 * analize function for register usage and whether it needs stack (r1) prepared
814 */
815 static void
VM_AnalyzeFunction(source_instruction_t * const i_first,long int * prepareStack,long int * gpr_start_pos,long int * fpr_start_pos)816 VM_AnalyzeFunction(
817 source_instruction_t * const i_first,
818 long int *prepareStack,
819 long int *gpr_start_pos,
820 long int *fpr_start_pos
821 )
822 {
823 source_instruction_t *i_now = i_first;
824
825 source_instruction_t *value_provider[20] = { NULL };
826 unsigned long int opstack_depth = 0;
827
828 /*
829 * first step:
830 * remember what codes returned some value and mark the value type
831 * when we get to know what it should be
832 */
833 while ( (i_now = i_now->next) ) {
834 unsigned long int op = i_now->op;
835 unsigned long int opi = vm_opInfo[ op ];
836
837 if ( opi & opArgIF ) {
838 assert( opstack_depth > 0 );
839
840 opstack_depth--;
841 source_instruction_t *vp = value_provider[ opstack_depth ];
842 unsigned long int vpopi = vm_opInfo[ vp->op ];
843
844 if ( (opi & opArgI) && (vpopi & opRetI) ) {
845 // instruction accepts integer, provider returns integer
846 //vp->regR |= rTYPE_INT;
847 //i_now->regA1 = rTYPE_INT;
848 } else if ( (opi & opArgF) && (vpopi & opRetF) ) {
849 // instruction accepts float, provider returns float
850 vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
851 i_now->regA1 = rTYPE_FLOAT;
852 } else {
853 // instruction arg type does not agree with
854 // provider return type
855 DIE( "unrecognized instruction combination" );
856 }
857
858 }
859 if ( opi & opArg2IF ) {
860 assert( opstack_depth > 0 );
861
862 opstack_depth--;
863 source_instruction_t *vp = value_provider[ opstack_depth ];
864 unsigned long int vpopi = vm_opInfo[ vp->op ];
865
866 if ( (opi & opArg2I) && (vpopi & opRetI) ) {
867 // instruction accepts integer, provider returns integer
868 //vp->regR |= rTYPE_INT;
869 //i_now->regA2 = rTYPE_INT;
870 } else if ( (opi & opArg2F) && (vpopi & opRetF) ) {
871 // instruction accepts float, provider returns float
872 vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
873 i_now->regA2 = rTYPE_FLOAT;
874 } else {
875 // instruction arg type does not agree with
876 // provider return type
877 DIE( "unrecognized instruction combination" );
878 }
879 }
880
881
882 if (
883 ( op == OP_CALL )
884 ||
885 ( op == OP_BLOCK_COPY && ( i_now->arg.i > SL( 16, 32 ) || !OPTIMIZE_COPY ) )
886 ) {
887 long int i;
888 *prepareStack = 1;
889 // force caller safe registers so we won't have to save them
890 for ( i = 0; i < opstack_depth; i++ ) {
891 source_instruction_t *vp = value_provider[ i ];
892 vp->regR |= rTYPE_STATIC;
893 }
894 }
895
896
897 if ( opi & opRetIF ) {
898 value_provider[ opstack_depth ] = i_now;
899 opstack_depth++;
900 }
901 }
902
903 /*
904 * second step:
905 * now that we know register types; compute exactly how many registers
906 * of each type we need
907 */
908
909 i_now = i_first;
910 long int needed_reg[4] = {0,0,0,0}, max_reg[4] = {0,0,0,0};
911 opstack_depth = 0;
912 while ( (i_now = i_now->next) ) {
913 unsigned long int op = i_now->op;
914 unsigned long int opi = vm_opInfo[ op ];
915
916 if ( opi & opArgIF ) {
917 assert( opstack_depth > 0 );
918 opstack_depth--;
919 source_instruction_t *vp = value_provider[ opstack_depth ];
920
921 needed_reg[ ( vp->regR & 2 ) ] -= 1;
922 if ( vp->regR & 1 ) // static
923 needed_reg[ ( vp->regR & 3 ) ] -= 1;
924 }
925 if ( opi & opArg2IF ) {
926 assert( opstack_depth > 0 );
927 opstack_depth--;
928 source_instruction_t *vp = value_provider[ opstack_depth ];
929
930 needed_reg[ ( vp->regR & 2 ) ] -= 1;
931 if ( vp->regR & 1 ) // static
932 needed_reg[ ( vp->regR & 3 ) ] -= 1;
933 }
934
935 if ( opi & opRetIF ) {
936 long int i;
937 value_provider[ opstack_depth ] = i_now;
938 opstack_depth++;
939
940 i = i_now->regR & 2;
941 needed_reg[ i ] += 1;
942 if ( max_reg[ i ] < needed_reg[ i ] )
943 max_reg[ i ] = needed_reg[ i ];
944
945 i = i_now->regR & 3;
946 if ( i & 1 ) {
947 needed_reg[ i ] += 1;
948 if ( max_reg[ i ] < needed_reg[ i ] )
949 max_reg[ i ] = needed_reg[ i ];
950 }
951 }
952 }
953
954 long int gpr_start = gpr_vstart;
955 const long int gpr_volatile = gpr_total - gpr_vstart;
956 if ( max_reg[ 1 ] > 0 || max_reg[ 0 ] > gpr_volatile ) {
957 // max_reg[ 0 ] - all gprs needed
958 // max_reg[ 1 ] - static gprs needed
959 long int max = max_reg[ 0 ] - gpr_volatile;
960 if ( max_reg[ 1 ] > max )
961 max = max_reg[ 1 ];
962 if ( max > gpr_vstart ) {
963 /* error */
964 DIE( "Need more GPRs" );
965 }
966
967 gpr_start -= max;
968
969 // need stack to save caller safe registers
970 *prepareStack = 1;
971 }
972 *gpr_start_pos = gpr_start;
973
974 long int fpr_start = fpr_vstart;
975 const long int fpr_volatile = fpr_total - fpr_vstart;
976 if ( max_reg[ 3 ] > 0 || max_reg[ 2 ] > fpr_volatile ) {
977 // max_reg[ 2 ] - all fprs needed
978 // max_reg[ 3 ] - static fprs needed
979 long int max = max_reg[ 2 ] - fpr_volatile;
980 if ( max_reg[ 3 ] > max )
981 max = max_reg[ 3 ];
982 if ( max > fpr_vstart ) {
983 /* error */
984 DIE( "Need more FPRs" );
985 }
986
987 fpr_start -= max;
988
989 // need stack to save caller safe registers
990 *prepareStack = 1;
991 }
992 *fpr_start_pos = fpr_start;
993 }
994
995 /*
996 * translate opcodes to ppc instructions,
997 * it works on functions, not on whole code at once
998 */
999 static void
VM_CompileFunction(source_instruction_t * const i_first)1000 VM_CompileFunction( source_instruction_t * const i_first )
1001 {
1002 long int prepareStack = 0;
1003 long int gpr_start_pos, fpr_start_pos;
1004
1005 VM_AnalyzeFunction( i_first, &prepareStack, &gpr_start_pos, &fpr_start_pos );
1006
1007 long int gpr_pos = gpr_start_pos, fpr_pos = fpr_start_pos;
1008
1009 // OP_CONST combines well with many opcodes so we treat it in a special way
1010 source_instruction_t *i_const = NULL;
1011 source_instruction_t *i_now = i_first;
1012
1013 // how big the stack has to be
1014 long int save_space = STACK_SAVE;
1015 {
1016 if ( gpr_start_pos < gpr_vstart )
1017 save_space += (gpr_vstart - gpr_start_pos) * GPRLEN;
1018 save_space = ( save_space + 15 ) & ~0x0f;
1019
1020 if ( fpr_start_pos < fpr_vstart )
1021 save_space += (fpr_vstart - fpr_start_pos) * FPRLEN;
1022 save_space = ( save_space + 15 ) & ~0x0f;
1023 }
1024
1025 long int stack_temp = prepareStack ? STACK_TEMP : STACK_RTEMP;
1026
1027 while ( (i_now = i_now->next) ) {
1028 emitStart( i_now->i_count );
1029
1030 switch ( i_now->op )
1031 {
1032 default:
1033 case OP_UNDEF:
1034 case OP_IGNORE:
1035 MAYBE_EMIT_CONST();
1036 in( iNOP );
1037 break;
1038
1039 case OP_BREAK:
1040 MAYBE_EMIT_CONST();
1041 // force SEGV
1042 in( iLWZ, r0, 0, r0 );
1043 break;
1044
1045 case OP_ENTER:
1046 if ( i_const )
1047 DIE( "Weird opcode order" );
1048
1049 // don't prepare stack if not needed
1050 if ( prepareStack ) {
1051 long int i, save_pos = STACK_SAVE;
1052
1053 in( iMFLR, r0 );
1054 in( iSTLU, r1, -save_space, r1 );
1055 in( iSTL, r0, save_space + STACK_LR, r1 );
1056
1057 /* save registers */
1058 for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
1059 in( iSTL, gpr_list[ i ], save_pos, r1 );
1060 save_pos += GPRLEN;
1061 }
1062 save_pos = ( save_pos + 15 ) & ~0x0f;
1063
1064 for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
1065 in( iSTFD, fpr_list[ i ], save_pos, r1 );
1066 save_pos += FPRLEN;
1067 }
1068 prepareStack = 2;
1069 }
1070
1071 in( iADDI, rPSTACK, rPSTACK, - i_now->arg.si );
1072 break;
1073
1074 case OP_LEAVE:
1075 if ( i_const ) {
1076 EMIT_FALSE_CONST();
1077
1078 if ( i_const->regR & rTYPE_FLOAT)
1079 DIE( "constant float in OP_LEAVE" );
1080
1081 if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1082 in( iLI, r3, i_const->arg.si );
1083 } else if ( i_const->arg.i < 0x10000 ) {
1084 in( iLI, r3, 0 );
1085 in( iORI, r3, r3, i_const->arg.i );
1086 } else {
1087 in( iLIS, r3, i_const->arg.ss[ 0 ] );
1088 if ( i_const->arg.us[ 1 ] != 0 )
1089 in( iORI, r3, r3, i_const->arg.us[ 1 ] );
1090 }
1091 gpr_pos--;
1092 } else {
1093 MAYBE_EMIT_CONST();
1094
1095 /* place return value in r3 */
1096 if ( ARG_INT ) {
1097 if ( rFIRST != r3 )
1098 in( iMR, r3, rFIRST );
1099 gpr_pos--;
1100 } else {
1101 in( iSTFS, fFIRST, stack_temp, r1 );
1102 in( iLWZ, r3, stack_temp, r1 );
1103 fpr_pos--;
1104 }
1105 }
1106
1107 // don't undo stack if not prepared
1108 if ( prepareStack >= 2 ) {
1109 long int i, save_pos = STACK_SAVE;
1110
1111 in( iLL, r0, save_space + STACK_LR, r1 );
1112
1113
1114 /* restore registers */
1115 for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
1116 in( iLL, gpr_list[ i ], save_pos, r1 );
1117 save_pos += GPRLEN;
1118 }
1119 save_pos = ( save_pos + 15 ) & ~0x0f;
1120 for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
1121 in( iLFD, fpr_list[ i ], save_pos, r1 );
1122 save_pos += FPRLEN;
1123 }
1124
1125 in( iMTLR, r0 );
1126 in( iADDI, r1, r1, save_space );
1127 }
1128 in( iADDI, rPSTACK, rPSTACK, i_now->arg.si);
1129 in( iBLR );
1130 assert( gpr_pos == gpr_start_pos );
1131 assert( fpr_pos == fpr_start_pos );
1132 break;
1133
1134 case OP_CALL:
1135 if ( i_const ) {
1136 EMIT_FALSE_CONST();
1137
1138 if ( i_const->arg.si >= 0 ) {
1139 emitJump(
1140 i_const->arg.i,
1141 branchAlways, 0, branchExtLink
1142 );
1143 } else {
1144 /* syscall */
1145 in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA );
1146
1147 in( iLI, r3, i_const->arg.si ); // negative value
1148 in( iMR, r4, rPSTACK ); // push PSTACK on argument list
1149
1150 in( iMTCTR, r0 );
1151 in( iBCTRL );
1152 }
1153 if ( rFIRST != r3 )
1154 in( iMR, rFIRST, r3 );
1155 } else {
1156 MAYBE_EMIT_CONST();
1157
1158 in( iCMPWI, cr7, rFIRST, 0 );
1159 in( iBLTm, cr7, +4*5 /* syscall */ ); // XXX jump !
1160 /* instruction call */
1161
1162 // get instruction address
1163 in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
1164 in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
1165 in( iLLX, r0, rFIRST, r0 ); // load pointer
1166
1167 in( iB, +4*(3 + (rFIRST != r3 ? 1 : 0) ) ); // XXX jump !
1168
1169 /* syscall */
1170 in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA ); // get asmCall pointer
1171 /* rFIRST can be r3 or some static register */
1172 if ( rFIRST != r3 )
1173 in( iMR, r3, rFIRST ); // push OPSTACK top value on argument list
1174 in( iMR, r4, rPSTACK ); // push PSTACK on argument list
1175
1176 /* common code */
1177 in( iMTCTR, r0 );
1178 in( iBCTRL );
1179
1180 if ( rFIRST != r3 )
1181 in( iMR, rFIRST, r3 ); // push return value on the top of the opstack
1182 }
1183 break;
1184
1185 case OP_PUSH:
1186 MAYBE_EMIT_CONST();
1187 if ( RET_INT )
1188 gpr_pos++;
1189 else
1190 fpr_pos++;
1191 /* no instructions here */
1192 force_emit = 1;
1193 break;
1194
1195 case OP_POP:
1196 MAYBE_EMIT_CONST();
1197 if ( ARG_INT )
1198 gpr_pos--;
1199 else
1200 fpr_pos--;
1201 /* no instructions here */
1202 force_emit = 1;
1203 break;
1204
1205 case OP_CONST:
1206 MAYBE_EMIT_CONST();
1207 /* nothing here */
1208 break;
1209
1210 case OP_LOCAL:
1211 MAYBE_EMIT_CONST();
1212 {
1213 signed long int hi, lo;
1214 hi = i_now->arg.ss[ 0 ];
1215 lo = i_now->arg.ss[ 1 ];
1216 if ( lo < 0 )
1217 hi += 1;
1218
1219 gpr_pos++;
1220 if ( hi == 0 ) {
1221 in( iADDI, rFIRST, rPSTACK, lo );
1222 } else {
1223 in( iADDIS, rFIRST, rPSTACK, hi );
1224 if ( lo != 0 )
1225 in( iADDI, rFIRST, rFIRST, lo );
1226 }
1227 }
1228 break;
1229
1230 case OP_JUMP:
1231 if ( i_const ) {
1232 EMIT_FALSE_CONST();
1233
1234 emitJump(
1235 i_const->arg.i,
1236 branchAlways, 0, 0
1237 );
1238 } else {
1239 MAYBE_EMIT_CONST();
1240
1241 in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
1242 in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
1243 in( iLLX, r0, rFIRST, r0 ); // load pointer
1244 in( iMTCTR, r0 );
1245 in( iBCTR );
1246 }
1247 gpr_pos--;
1248 break;
1249
1250 case OP_EQ:
1251 case OP_NE:
1252 if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x10000 ) {
1253 EMIT_FALSE_CONST();
1254 if ( i_const->arg.si >= 0x8000 )
1255 in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1256 else
1257 in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1258 } else {
1259 MAYBE_EMIT_CONST();
1260 in( iCMPW, cr7, rSECOND, rFIRST );
1261 }
1262 emitJump(
1263 i_now->arg.i,
1264 (i_now->op == OP_EQ ? branchTrue : branchFalse),
1265 4*cr7+eq, 0
1266 );
1267 gpr_pos -= 2;
1268 break;
1269
1270 case OP_LTI:
1271 case OP_GEI:
1272 if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1273 EMIT_FALSE_CONST();
1274 in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1275 } else {
1276 MAYBE_EMIT_CONST();
1277 in( iCMPW, cr7, rSECOND, rFIRST );
1278 }
1279 emitJump(
1280 i_now->arg.i,
1281 ( i_now->op == OP_LTI ? branchTrue : branchFalse ),
1282 4*cr7+lt, 0
1283 );
1284 gpr_pos -= 2;
1285 break;
1286
1287 case OP_GTI:
1288 case OP_LEI:
1289 if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
1290 EMIT_FALSE_CONST();
1291 in( iCMPWI, cr7, rSECOND, i_const->arg.si );
1292 } else {
1293 MAYBE_EMIT_CONST();
1294 in( iCMPW, cr7, rSECOND, rFIRST );
1295 }
1296 emitJump(
1297 i_now->arg.i,
1298 ( i_now->op == OP_GTI ? branchTrue : branchFalse ),
1299 4*cr7+gt, 0
1300 );
1301 gpr_pos -= 2;
1302 break;
1303
1304 case OP_LTU:
1305 case OP_GEU:
1306 if ( i_const && i_const->arg.i < 0x10000 ) {
1307 EMIT_FALSE_CONST();
1308 in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1309 } else {
1310 MAYBE_EMIT_CONST();
1311 in( iCMPLW, cr7, rSECOND, rFIRST );
1312 }
1313 emitJump(
1314 i_now->arg.i,
1315 ( i_now->op == OP_LTU ? branchTrue : branchFalse ),
1316 4*cr7+lt, 0
1317 );
1318 gpr_pos -= 2;
1319 break;
1320
1321 case OP_GTU:
1322 case OP_LEU:
1323 if ( i_const && i_const->arg.i < 0x10000 ) {
1324 EMIT_FALSE_CONST();
1325 in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
1326 } else {
1327 MAYBE_EMIT_CONST();
1328 in( iCMPLW, cr7, rSECOND, rFIRST );
1329 }
1330 emitJump(
1331 i_now->arg.i,
1332 ( i_now->op == OP_GTU ? branchTrue : branchFalse ),
1333 4*cr7+gt, 0
1334 );
1335 gpr_pos -= 2;
1336 break;
1337
1338 case OP_EQF:
1339 case OP_NEF:
1340 MAYBE_EMIT_CONST();
1341 in( iFCMPU, cr7, fSECOND, fFIRST );
1342 emitJump(
1343 i_now->arg.i,
1344 ( i_now->op == OP_EQF ? branchTrue : branchFalse ),
1345 4*cr7+eq, 0
1346 );
1347 fpr_pos -= 2;
1348 break;
1349
1350 case OP_LTF:
1351 case OP_GEF:
1352 MAYBE_EMIT_CONST();
1353 in( iFCMPU, cr7, fSECOND, fFIRST );
1354 emitJump(
1355 i_now->arg.i,
1356 ( i_now->op == OP_LTF ? branchTrue : branchFalse ),
1357 4*cr7+lt, 0
1358 );
1359 fpr_pos -= 2;
1360 break;
1361
1362 case OP_GTF:
1363 case OP_LEF:
1364 MAYBE_EMIT_CONST();
1365 in( iFCMPU, cr7, fSECOND, fFIRST );
1366 emitJump(
1367 i_now->arg.i,
1368 ( i_now->op == OP_GTF ? branchTrue : branchFalse ),
1369 4*cr7+gt, 0
1370 );
1371 fpr_pos -= 2;
1372 break;
1373
1374 case OP_LOAD1:
1375 MAYBE_EMIT_CONST();
1376 #if OPTIMIZE_MASK
1377 in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1378 #else
1379 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1380 in( iAND, rFIRST, rFIRST, r0 );
1381 #endif
1382 in( iLBZX, rFIRST, rFIRST, rDATABASE );
1383 break;
1384
1385 case OP_LOAD2:
1386 MAYBE_EMIT_CONST();
1387 #if OPTIMIZE_MASK
1388 in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1389 #else
1390 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1391 in( iAND, rFIRST, rFIRST, r0 );
1392 #endif
1393 in( iLHZX, rFIRST, rFIRST, rDATABASE );
1394 break;
1395
1396 case OP_LOAD4:
1397 MAYBE_EMIT_CONST();
1398 #if OPTIMIZE_MASK
1399 in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1400 #else
1401 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1402 in( iAND, rFIRST, rFIRST, r0 );
1403 #endif
1404 if ( RET_INT ) {
1405 in( iLWZX, rFIRST, rFIRST, rDATABASE );
1406 } else {
1407 fpr_pos++;
1408 in( iLFSX, fFIRST, rFIRST, rDATABASE );
1409 gpr_pos--;
1410 }
1411 break;
1412
1413 case OP_STORE1:
1414 MAYBE_EMIT_CONST();
1415 #if OPTIMIZE_MASK
1416 in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1417 #else
1418 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1419 in( iAND, rSECOND, rSECOND, r0 );
1420 #endif
1421 in( iSTBX, rFIRST, rSECOND, rDATABASE );
1422 gpr_pos -= 2;
1423 break;
1424
1425 case OP_STORE2:
1426 MAYBE_EMIT_CONST();
1427 #if OPTIMIZE_MASK
1428 in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1429 #else
1430 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1431 in( iAND, rSECOND, rSECOND, r0 );
1432 #endif
1433 in( iSTHX, rFIRST, rSECOND, rDATABASE );
1434 gpr_pos -= 2;
1435 break;
1436
1437 case OP_STORE4:
1438 MAYBE_EMIT_CONST();
1439 if ( ARG_INT ) {
1440 #if OPTIMIZE_MASK
1441 in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
1442 #else
1443 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1444 in( iAND, rSECOND, rSECOND, r0 );
1445 #endif
1446
1447 in( iSTWX, rFIRST, rSECOND, rDATABASE );
1448 gpr_pos--;
1449 } else {
1450 #if OPTIMIZE_MASK
1451 in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
1452 #else
1453 in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
1454 in( iAND, rFIRST, rFIRST, r0 );
1455 #endif
1456
1457 in( iSTFSX, fFIRST, rFIRST, rDATABASE );
1458 fpr_pos--;
1459 }
1460 gpr_pos--;
1461 break;
1462
1463 case OP_ARG:
1464 MAYBE_EMIT_CONST();
1465 in( iADDI, r0, rPSTACK, i_now->arg.b );
1466 if ( ARG_INT ) {
1467 in( iSTWX, rFIRST, rDATABASE, r0 );
1468 gpr_pos--;
1469 } else {
1470 in( iSTFSX, fFIRST, rDATABASE, r0 );
1471 fpr_pos--;
1472 }
1473 break;
1474
1475 case OP_BLOCK_COPY:
1476 MAYBE_EMIT_CONST();
1477 #if OPTIMIZE_COPY
1478 if ( i_now->arg.i <= SL( 16, 32 ) ) {
1479 /* block is very short so copy it in-place */
1480
1481 unsigned int len = i_now->arg.i;
1482 unsigned int copied = 0, left = len;
1483
1484 in( iADD, rFIRST, rFIRST, rDATABASE );
1485 in( iADD, rSECOND, rSECOND, rDATABASE );
1486
1487 if ( len >= GPRLEN ) {
1488 long int i, words = len / GPRLEN;
1489 in( iLL, r0, 0, rFIRST );
1490 for ( i = 1; i < words; i++ )
1491 in( iLL, rTEMP( i - 1 ), GPRLEN * i, rFIRST );
1492
1493 in( iSTL, r0, 0, rSECOND );
1494 for ( i = 1; i < words; i++ )
1495 in( iSTL, rTEMP( i - 1 ), GPRLEN * i, rSECOND );
1496
1497 copied += words * GPRLEN;
1498 left -= copied;
1499 }
1500
1501 if ( SL( 0, left >= 4 ) ) {
1502 in( iLWZ, r0, copied+0, rFIRST );
1503 in( iSTW, r0, copied+0, rSECOND );
1504 copied += 4;
1505 left -= 4;
1506 }
1507 if ( left >= 4 ) {
1508 DIE("Bug in OP_BLOCK_COPY");
1509 }
1510 if ( left == 3 ) {
1511 in( iLHZ, r0, copied+0, rFIRST );
1512 in( iLBZ, rTMP, copied+2, rFIRST );
1513 in( iSTH, r0, copied+0, rSECOND );
1514 in( iSTB, rTMP, copied+2, rSECOND );
1515 } else if ( left == 2 ) {
1516 in( iLHZ, r0, copied+0, rFIRST );
1517 in( iSTH, r0, copied+0, rSECOND );
1518 } else if ( left == 1 ) {
1519 in( iLBZ, r0, copied+0, rFIRST );
1520 in( iSTB, r0, copied+0, rSECOND );
1521 }
1522 } else
1523 #endif
1524 {
1525 unsigned long int r5_ori = 0;
1526 if ( i_now->arg.si >= -0x8000 && i_now->arg.si < 0x8000 ) {
1527 in( iLI, r5, i_now->arg.si );
1528 } else if ( i_now->arg.i < 0x10000 ) {
1529 in( iLI, r5, 0 );
1530 r5_ori = i_now->arg.i;
1531 } else {
1532 in( iLIS, r5, i_now->arg.ss[ 0 ] );
1533 r5_ori = i_now->arg.us[ 1 ];
1534 }
1535
1536 in( iLL, r0, VM_Data_Offset( BlockCopy ), rVMDATA ); // get blockCopy pointer
1537
1538 if ( r5_ori )
1539 in( iORI, r5, r5, r5_ori );
1540
1541 in( iMTCTR, r0 );
1542
1543 if ( rFIRST != r4 )
1544 in( iMR, r4, rFIRST );
1545 if ( rSECOND != r3 )
1546 in( iMR, r3, rSECOND );
1547
1548 in( iBCTRL );
1549 }
1550
1551 gpr_pos -= 2;
1552 break;
1553
1554 case OP_SEX8:
1555 MAYBE_EMIT_CONST();
1556 in( iEXTSB, rFIRST, rFIRST );
1557 break;
1558
1559 case OP_SEX16:
1560 MAYBE_EMIT_CONST();
1561 in( iEXTSH, rFIRST, rFIRST );
1562 break;
1563
1564 case OP_NEGI:
1565 MAYBE_EMIT_CONST();
1566 in( iNEG, rFIRST, rFIRST );
1567 break;
1568
1569 case OP_ADD:
1570 if ( i_const ) {
1571 EMIT_FALSE_CONST();
1572
1573 signed short int hi, lo;
1574 hi = i_const->arg.ss[ 0 ];
1575 lo = i_const->arg.ss[ 1 ];
1576 if ( lo < 0 )
1577 hi += 1;
1578
1579 if ( hi != 0 )
1580 in( iADDIS, rSECOND, rSECOND, hi );
1581 if ( lo != 0 )
1582 in( iADDI, rSECOND, rSECOND, lo );
1583
1584 // if both are zero no instruction will be written
1585 if ( hi == 0 && lo == 0 )
1586 force_emit = 1;
1587 } else {
1588 MAYBE_EMIT_CONST();
1589 in( iADD, rSECOND, rSECOND, rFIRST );
1590 }
1591 gpr_pos--;
1592 break;
1593
1594 case OP_SUB:
1595 MAYBE_EMIT_CONST();
1596 in( iSUB, rSECOND, rSECOND, rFIRST );
1597 gpr_pos--;
1598 break;
1599
1600 case OP_DIVI:
1601 MAYBE_EMIT_CONST();
1602 in( iDIVW, rSECOND, rSECOND, rFIRST );
1603 gpr_pos--;
1604 break;
1605
1606 case OP_DIVU:
1607 MAYBE_EMIT_CONST();
1608 in( iDIVWU, rSECOND, rSECOND, rFIRST );
1609 gpr_pos--;
1610 break;
1611
1612 case OP_MODI:
1613 MAYBE_EMIT_CONST();
1614 in( iDIVW, r0, rSECOND, rFIRST );
1615 in( iMULLW, r0, r0, rFIRST );
1616 in( iSUB, rSECOND, rSECOND, r0 );
1617 gpr_pos--;
1618 break;
1619
1620 case OP_MODU:
1621 MAYBE_EMIT_CONST();
1622 in( iDIVWU, r0, rSECOND, rFIRST );
1623 in( iMULLW, r0, r0, rFIRST );
1624 in( iSUB, rSECOND, rSECOND, r0 );
1625 gpr_pos--;
1626 break;
1627
1628 case OP_MULI:
1629 case OP_MULU:
1630 MAYBE_EMIT_CONST();
1631 in( iMULLW, rSECOND, rSECOND, rFIRST );
1632 gpr_pos--;
1633 break;
1634
1635 case OP_BAND:
1636 MAYBE_EMIT_CONST();
1637 in( iAND, rSECOND, rSECOND, rFIRST );
1638 gpr_pos--;
1639 break;
1640
1641 case OP_BOR:
1642 MAYBE_EMIT_CONST();
1643 in( iOR, rSECOND, rSECOND, rFIRST );
1644 gpr_pos--;
1645 break;
1646
1647 case OP_BXOR:
1648 MAYBE_EMIT_CONST();
1649 in( iXOR, rSECOND, rSECOND, rFIRST );
1650 gpr_pos--;
1651 break;
1652
1653 case OP_BCOM:
1654 MAYBE_EMIT_CONST();
1655 in( iNOT, rFIRST, rFIRST );
1656 break;
1657
1658 case OP_LSH:
1659 MAYBE_EMIT_CONST();
1660 in( iSLW, rSECOND, rSECOND, rFIRST );
1661 gpr_pos--;
1662 break;
1663
1664 case OP_RSHI:
1665 MAYBE_EMIT_CONST();
1666 in( iSRAW, rSECOND, rSECOND, rFIRST );
1667 gpr_pos--;
1668 break;
1669
1670 case OP_RSHU:
1671 MAYBE_EMIT_CONST();
1672 in( iSRW, rSECOND, rSECOND, rFIRST );
1673 gpr_pos--;
1674 break;
1675
1676 case OP_NEGF:
1677 MAYBE_EMIT_CONST();
1678 in( iFNEG, fFIRST, fFIRST );
1679 break;
1680
1681 case OP_ADDF:
1682 MAYBE_EMIT_CONST();
1683 in( iFADDS, fSECOND, fSECOND, fFIRST );
1684 fpr_pos--;
1685 break;
1686
1687 case OP_SUBF:
1688 MAYBE_EMIT_CONST();
1689 in( iFSUBS, fSECOND, fSECOND, fFIRST );
1690 fpr_pos--;
1691 break;
1692
1693 case OP_DIVF:
1694 MAYBE_EMIT_CONST();
1695 in( iFDIVS, fSECOND, fSECOND, fFIRST );
1696 fpr_pos--;
1697 break;
1698
1699 case OP_MULF:
1700 MAYBE_EMIT_CONST();
1701 in( iFMULS, fSECOND, fSECOND, fFIRST );
1702 fpr_pos--;
1703 break;
1704
1705 case OP_CVIF:
1706 MAYBE_EMIT_CONST();
1707 fpr_pos++;
1708 in( iXORIS, rFIRST, rFIRST, 0x8000 );
1709 in( iLIS, r0, 0x4330 );
1710 in( iSTW, rFIRST, stack_temp + 4, r1 );
1711 in( iSTW, r0, stack_temp, r1 );
1712 in( iLFS, fTMP, VM_Data_Offset( floatBase ), rVMDATA );
1713 in( iLFD, fFIRST, stack_temp, r1 );
1714 in( iFSUB, fFIRST, fFIRST, fTMP );
1715 in( iFRSP, fFIRST, fFIRST );
1716 gpr_pos--;
1717 break;
1718
1719 case OP_CVFI:
1720 MAYBE_EMIT_CONST();
1721 gpr_pos++;
1722 in( iFCTIWZ, fFIRST, fFIRST );
1723 in( iSTFD, fFIRST, stack_temp, r1 );
1724 in( iLWZ, rFIRST, stack_temp + 4, r1 );
1725 fpr_pos--;
1726 break;
1727 }
1728
1729 i_const = NULL;
1730
1731 if ( i_now->op != OP_CONST ) {
1732 // emit the instructions if it isn't OP_CONST
1733 emitEnd();
1734 } else {
1735 // mark in what register the value should be saved
1736 if ( RET_INT )
1737 i_now->regPos = ++gpr_pos;
1738 else
1739 i_now->regPos = ++fpr_pos;
1740
1741 #if OPTIMIZE_HOLE
1742 i_const = i_now;
1743 #else
1744 PPC_EmitConst( i_now );
1745 #endif
1746 }
1747 }
1748 if ( i_const )
1749 DIE( "left (unused) OP_CONST" );
1750
1751 {
1752 // free opcode information, don't free first dummy one
1753 source_instruction_t *i_next = i_first->next;
1754 while ( i_next ) {
1755 i_now = i_next;
1756 i_next = i_now->next;
1757 PPC_Free( i_now );
1758 }
1759 }
1760 }
1761
1762
1763 /*
1764 * check which jumps are short enough to use signed 16bit immediate branch
1765 */
1766 static void
PPC_ShrinkJumps(void)1767 PPC_ShrinkJumps( void )
1768 {
1769 symbolic_jump_t *sj_now = sj_first;
1770 while ( (sj_now = sj_now->nextJump) ) {
1771 if ( sj_now->bo == branchAlways )
1772 // non-conditional branch has 26bit immediate
1773 sj_now->parent->length = 1;
1774
1775 else {
1776 dest_instruction_t *di = di_pointers[ sj_now->jump_to ];
1777 dest_instruction_t *ji = sj_now->parent;
1778 long int jump_length = 0;
1779 if ( ! di )
1780 DIE( "No instruction to jump to" );
1781 if ( ji->count > di->count ) {
1782 do {
1783 jump_length += di->length;
1784 } while ( ( di = di->next ) != ji );
1785 } else {
1786 jump_length = 1;
1787 while ( ( ji = ji->next ) != di )
1788 jump_length += ji->length;
1789 }
1790 if ( jump_length < 0x2000 )
1791 // jump is short, use normal instruction
1792 sj_now->parent->length = 1;
1793 }
1794 }
1795 }
1796
1797 /*
1798 * puts all the data in one place, it consists of many different tasks
1799 */
1800 static void
PPC_ComputeCode(vm_t * vm)1801 PPC_ComputeCode( vm_t *vm )
1802 {
1803 dest_instruction_t *di_now = di_first;
1804
1805 unsigned long int codeInstructions = 0;
1806 // count total instruciton number
1807 while ( (di_now = di_now->next ) )
1808 codeInstructions += di_now->length;
1809
1810 size_t codeLength = sizeof( vm_data_t )
1811 + sizeof( unsigned int ) * data_acc
1812 + sizeof( ppc_instruction_t ) * codeInstructions;
1813
1814 // get the memory for the generated code, smarter ppcs need the
1815 // mem to be marked as executable (whill change later)
1816 unsigned char *dataAndCode = mmap( NULL, codeLength,
1817 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0 );
1818
1819 if (dataAndCode == MAP_FAILED)
1820 DIE( "Not enough memory" );
1821
1822 ppc_instruction_t *codeNow, *codeBegin;
1823 codeNow = codeBegin = (ppc_instruction_t *)( dataAndCode + VM_Data_Offset( data[ data_acc ] ) );
1824
1825 ppc_instruction_t nop = IN( iNOP );
1826
1827 // copy instructions to the destination
1828 // fills the jump instructions with nops
1829 // saves pointers of all instructions
1830 di_now = di_first;
1831 while ( (di_now = di_now->next ) ) {
1832 unsigned long int i_count = di_now->i_count;
1833 if ( i_count != FALSE_ICOUNT ) {
1834 if ( ! di_pointers[ i_count ] )
1835 di_pointers[ i_count ] = (void *) codeNow;
1836 }
1837
1838 if ( di_now->jump == NULL ) {
1839 memcpy( codeNow, &(di_now->code[0]), di_now->length * sizeof( ppc_instruction_t ) );
1840 codeNow += di_now->length;
1841 } else {
1842 long int i;
1843 symbolic_jump_t *sj;
1844 for ( i = 0; i < di_now->length; i++ )
1845 codeNow[ i ] = nop;
1846 codeNow += di_now->length;
1847
1848 sj = di_now->jump;
1849 // save position of jumping instruction
1850 sj->parent = (void *)(codeNow - 1);
1851 }
1852 }
1853
1854 // compute the jumps and write corresponding instructions
1855 symbolic_jump_t *sj_now = sj_first;
1856 while ( (sj_now = sj_now->nextJump ) ) {
1857 ppc_instruction_t *jumpFrom = (void *) sj_now->parent;
1858 ppc_instruction_t *jumpTo = (void *) di_pointers[ sj_now->jump_to ];
1859 signed long int jumpLength = jumpTo - jumpFrom;
1860
1861 // if jump is short, just write it
1862 if ( jumpLength >= - 8192 && jumpLength < 8192 ) {
1863 powerpc_iname_t branchConditional = sj_now->ext & branchExtLink ? iBCL : iBC;
1864 *jumpFrom = IN( branchConditional, sj_now->bo, sj_now->bi, jumpLength * 4 );
1865 continue;
1866 }
1867
1868 // jump isn't short so write it as two instructions
1869 //
1870 // the letter one is a non-conditional branch instruction which
1871 // accepts immediate values big enough (26 bits)
1872 *jumpFrom = IN( (sj_now->ext & branchExtLink ? iBL : iB), jumpLength * 4 );
1873 if ( sj_now->bo == branchAlways )
1874 continue;
1875
1876 // there should have been additional space prepared for this case
1877 if ( jumpFrom[ -1 ] != nop )
1878 DIE( "additional space for long jump not prepared" );
1879
1880 // invert instruction condition
1881 long int bo = 0;
1882 switch ( sj_now->bo ) {
1883 case branchTrue:
1884 bo = branchFalse;
1885 break;
1886 case branchFalse:
1887 bo = branchTrue;
1888 break;
1889 default:
1890 DIE( "unrecognized branch type" );
1891 break;
1892 }
1893
1894 // the former instruction is an inverted conditional branch which
1895 // jumps over the non-conditional one
1896 jumpFrom[ -1 ] = IN( iBC, bo, sj_now->bi, +2*4 );
1897 }
1898
1899 vm->codeBase = dataAndCode;
1900 vm->codeLength = codeLength;
1901
1902 vm_data_t *data = (vm_data_t *)dataAndCode;
1903
1904 #if ELF64
1905 // prepare Official Procedure Descriptor for the generated code
1906 // and retrieve real function pointer for helper functions
1907
1908 opd_t *ac = (void *)VM_AsmCall, *bc = (void *)VM_BlockCopy;
1909 data->opd.function = codeBegin;
1910 // trick it into using the same TOC
1911 // this way we won't have to switch TOC before calling AsmCall or BlockCopy
1912 data->opd.toc = ac->toc;
1913 data->opd.env = ac->env;
1914
1915 data->AsmCall = ac->function;
1916 data->BlockCopy = bc->function;
1917 #else
1918 data->AsmCall = VM_AsmCall;
1919 data->BlockCopy = VM_BlockCopy;
1920 #endif
1921
1922 data->dataMask = vm->dataMask;
1923 data->iPointers = (ppc_instruction_t *)vm->instructionPointers;
1924 data->dataLength = VM_Data_Offset( data[ data_acc ] );
1925 data->codeLength = ( codeNow - codeBegin ) * sizeof( ppc_instruction_t );
1926 data->floatBase = 0x59800004;
1927
1928
1929 /* write dynamic data (float constants) */
1930 {
1931 local_data_t *d_next, *d_now = data_first;
1932 long int accumulated = 0;
1933
1934 do {
1935 long int i;
1936 for ( i = 0; i < d_now->count; i++ )
1937 data->data[ accumulated + i ] = d_now->data[ i ];
1938
1939 accumulated += d_now->count;
1940 d_next = d_now->next;
1941 PPC_Free( d_now );
1942
1943 if ( !d_next )
1944 break;
1945 d_now = d_next;
1946 } while (1);
1947 data_first = NULL;
1948 }
1949
1950 /* free most of the compilation memory */
1951 {
1952 di_now = di_first->next;
1953 PPC_Free( di_first );
1954 PPC_Free( sj_first );
1955
1956 while ( di_now ) {
1957 di_first = di_now->next;
1958 if ( di_now->jump )
1959 PPC_Free( di_now->jump );
1960 PPC_Free( di_now );
1961 di_now = di_first;
1962 }
1963 }
1964 }
1965
1966 static void
VM_Destroy_Compiled(vm_t * self)1967 VM_Destroy_Compiled( vm_t *self )
1968 {
1969 if ( self->codeBase ) {
1970 if ( munmap( self->codeBase, self->codeLength ) )
1971 Com_Printf( S_COLOR_RED "Memory unmap failed, possible memory leak\n" );
1972 }
1973 self->codeBase = NULL;
1974 }
1975
1976 void
VM_Compile(vm_t * vm,vmHeader_t * header)1977 VM_Compile( vm_t *vm, vmHeader_t *header )
1978 {
1979 long int pc = 0;
1980 unsigned long int i_count;
1981 char* code;
1982 struct timeval tvstart = {0, 0};
1983 source_instruction_t *i_first /* dummy */, *i_last = NULL, *i_now;
1984
1985 vm->compiled = qfalse;
1986
1987 gettimeofday(&tvstart, NULL);
1988
1989 PPC_MakeFastMask( vm->dataMask );
1990
1991 i_first = PPC_Malloc( sizeof( source_instruction_t ) );
1992 i_first->next = NULL;
1993
1994 // realloc instructionPointers with correct size
1995 // use Z_Malloc so vm.c will be able to free the memory
1996 if ( sizeof( void * ) != sizeof( int ) ) {
1997 Z_Free( vm->instructionPointers );
1998 vm->instructionPointers = Z_Malloc( header->instructionCount * sizeof( void * ) );
1999 }
2000 di_pointers = (void *)vm->instructionPointers;
2001 memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
2002
2003
2004 PPC_CompileInit();
2005
2006 /*
2007 * read the input program
2008 * divide it into functions and send each function to compiler
2009 */
2010 code = (char *)header + header->codeOffset;
2011 for ( i_count = 0; i_count < header->instructionCount; ++i_count )
2012 {
2013 unsigned char op = code[ pc++ ];
2014
2015 if ( op == OP_ENTER ) {
2016 if ( i_first->next )
2017 VM_CompileFunction( i_first );
2018 i_first->next = NULL;
2019 i_last = i_first;
2020 }
2021
2022 i_now = PPC_Malloc( sizeof( source_instruction_t ) );
2023 i_now->op = op;
2024 i_now->i_count = i_count;
2025 i_now->arg.i = 0;
2026 i_now->regA1 = 0;
2027 i_now->regA2 = 0;
2028 i_now->regR = 0;
2029 i_now->regPos = 0;
2030 i_now->next = NULL;
2031
2032 if ( vm_opInfo[op] & opImm4 ) {
2033 union {
2034 unsigned char b[4];
2035 unsigned int i;
2036 } c = { { code[ pc + 3 ], code[ pc + 2 ], code[ pc + 1 ], code[ pc + 0 ] }, };
2037
2038 i_now->arg.i = c.i;
2039 pc += 4;
2040 } else if ( vm_opInfo[op] & opImm1 ) {
2041 i_now->arg.b = code[ pc++ ];
2042 }
2043
2044 i_last->next = i_now;
2045 i_last = i_now;
2046 }
2047 VM_CompileFunction( i_first );
2048 PPC_Free( i_first );
2049
2050 PPC_ShrinkJumps();
2051 memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
2052 PPC_ComputeCode( vm );
2053
2054 /* check for uninitialized pointers */
2055 #ifdef DEBUG_VM
2056 long int i;
2057 for ( i = 0; i < header->instructionCount; i++ )
2058 if ( di_pointers[ i ] == 0 )
2059 Com_Printf( S_COLOR_RED "Pointer %ld not initialized !\n", i );
2060 #endif
2061
2062 /* mark memory as executable and not writeable */
2063 if ( mprotect( vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC ) ) {
2064
2065 // it has failed, make sure memory is unmapped before throwing the error
2066 VM_Destroy_Compiled( vm );
2067 DIE( "mprotect failed" );
2068 }
2069
2070 vm->destroy = VM_Destroy_Compiled;
2071 vm->compiled = qtrue;
2072
2073 {
2074 struct timeval tvdone = {0, 0};
2075 struct timeval dur = {0, 0};
2076 Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n",
2077 vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
2078
2079 gettimeofday(&tvdone, NULL);
2080 timersub(&tvdone, &tvstart, &dur);
2081 Com_Printf( "compilation took %lu.%06lu seconds\n",
2082 (long unsigned int)dur.tv_sec, (long unsigned int)dur.tv_usec );
2083 }
2084 }
2085
2086 int
VM_CallCompiled(vm_t * vm,int * args)2087 VM_CallCompiled( vm_t *vm, int *args )
2088 {
2089 int retVal;
2090 int *argPointer;
2091
2092 vm_data_t *vm_dataAndCode = (void *)( vm->codeBase );
2093 int programStack = vm->programStack;
2094 int stackOnEntry = programStack;
2095
2096 byte *image = vm->dataBase;
2097
2098 currentVM = vm;
2099
2100 vm->currentlyInterpreting = qtrue;
2101
2102 programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
2103 argPointer = (int *)&image[ programStack + 8 ];
2104 memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
2105 argPointer[ -1 ] = 0;
2106 argPointer[ -2 ] = -1;
2107
2108 #ifdef VM_TIMES
2109 struct tms start_time, stop_time;
2110 clock_t time_diff;
2111
2112 times( &start_time );
2113 time_outside_vm = 0;
2114 #endif
2115
2116 /* call generated code */
2117 {
2118 int ( *entry )( void *, int, void * );
2119 #ifdef __PPC64__
2120 entry = (void *)&(vm_dataAndCode->opd);
2121 #else
2122 entry = (void *)(vm->codeBase + vm_dataAndCode->dataLength);
2123 #endif
2124 retVal = entry( vm->codeBase, programStack, vm->dataBase );
2125 }
2126
2127 #ifdef VM_TIMES
2128 times( &stop_time );
2129 time_diff = stop_time.tms_utime - start_time.tms_utime;
2130 time_total_vm += time_diff - time_outside_vm;
2131 if ( time_diff > 100 ) {
2132 printf( "App clock: %ld, vm total: %ld, vm this: %ld, vm real: %ld, vm out: %ld\n"
2133 "Inside VM %f%% of app time\n",
2134 stop_time.tms_utime,
2135 time_total_vm,
2136 time_diff,
2137 time_diff - time_outside_vm,
2138 time_outside_vm,
2139 (double)100 * time_total_vm / stop_time.tms_utime );
2140 }
2141 #endif
2142
2143 vm->programStack = stackOnEntry;
2144 vm->currentlyInterpreting = qfalse;
2145
2146 return retVal;
2147 }
2148