1 /*
2 ===========================================================================
3 Copyright (C) 2009 David S. Miller <davem@davemloft.net>
4 Copyright (C) 2013,2014 SUSE Linux Products GmbH
5 
6 This file is part of Quake III Arena source code.
7 
8 Quake III Arena source code is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the License,
11 or (at your option) any later version.
12 
13 Quake III Arena source code is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Quake III Arena source code; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21 ===========================================================================
22 
23 ARMv7l VM by Ludwig Nussel <ludwig.nussel@suse.de>
24 
25 TODO: optimization
26 
27 Docu:
28 http://www.coranac.com/tonc/text/asm.htm
29 http://www.heyrick.co.uk/armwiki/Category:Opcodes
30 ARMv7-A_ARMv7-R_DDI0406_2007.pdf
31 */
32 
33 #include <sys/types.h>
34 #include <sys/mman.h>
35 #include <sys/time.h>
36 #include <time.h>
37 #include <stddef.h>
38 
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 
43 #include "vm_local.h"
44 #define R0	0
45 #define R1	1
46 #define R2	2
47 #define R3	3
48 #define R4	4
49 
50 #define R12	12
51 
52 #define FP	11
53 #define SP	13
54 #define LR	14
55 #define PC	15
56 
57 #define APSR_nzcv	15
58 
59 #define S14     14
60 #define S15     15
61 
62 #define rOPSTACK	5
63 #define rOPSTACKBASE	6
64 #define rCODEBASE	7
65 #define rPSTACK		8
66 #define rDATABASE	9
67 #define rDATAMASK	10
68 
69 #define bit(x) (1<<x)
70 
71 /* arm eabi, builtin gcc functions */
72 int __aeabi_idiv (int, int);
73 unsigned __aeabi_uidiv (unsigned, unsigned);
74 void __aeabi_idivmod(void);
75 void __aeabi_uidivmod(void);
76 
77 /* exit() won't be called but use it because it is marked with noreturn */
78 #define DIE( reason, args... ) \
79 	do { \
80 		Com_Error(ERR_DROP, "vm_arm compiler error: " reason, ##args); \
81 		exit(1); \
82 	} while(0)
83 
84 /*
85  * opcode information table:
86  * - length of immediate value
87  * - returned register type
88  * - required register(s) type
89  */
90 #define opImm0	0x0000 /* no immediate */
91 #define opImm1	0x0001 /* 1 byte immadiate value after opcode */
92 #define opImm4	0x0002 /* 4 bytes immediate value after opcode */
93 
94 #define opRet0	0x0000 /* returns nothing */
95 #define opRetI	0x0004 /* returns integer */
96 #define opRetF	0x0008 /* returns float */
97 #define opRetIF	(opRetI | opRetF) /* returns integer or float */
98 
99 #define opArg0	0x0000 /* requires nothing */
100 #define opArgI	0x0010 /* requires integer(s) */
101 #define opArgF	0x0020 /* requires float(s) */
102 #define opArgIF	(opArgI | opArgF) /* requires integer or float */
103 
104 #define opArg2I	0x0040 /* requires second argument, integer */
105 #define opArg2F	0x0080 /* requires second argument, float */
106 #define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
107 
108 static const unsigned char vm_opInfo[256] =
109 {
110 	[OP_UNDEF]	= opImm0,
111 	[OP_IGNORE]	= opImm0,
112 	[OP_BREAK]	= opImm0,
113 	[OP_ENTER]	= opImm4,
114 			/* OP_LEAVE has to accept floats, they will be converted to ints */
115 	[OP_LEAVE]	= opImm4 | opRet0 | opArgIF,
116 			/* only STORE4 and POP use values from OP_CALL,
117 			 * no need to convert floats back */
118 	[OP_CALL]	= opImm0 | opRetI | opArgI,
119 	[OP_PUSH]	= opImm0 | opRetIF,
120 	[OP_POP]	= opImm0 | opRet0 | opArgIF,
121 	[OP_CONST]	= opImm4 | opRetIF,
122 	[OP_LOCAL]	= opImm4 | opRetI,
123 	[OP_JUMP]	= opImm0 | opRet0 | opArgI,
124 
125 	[OP_EQ]		= opImm4 | opRet0 | opArgI | opArg2I,
126 	[OP_NE]		= opImm4 | opRet0 | opArgI | opArg2I,
127 	[OP_LTI]	= opImm4 | opRet0 | opArgI | opArg2I,
128 	[OP_LEI]	= opImm4 | opRet0 | opArgI | opArg2I,
129 	[OP_GTI]	= opImm4 | opRet0 | opArgI | opArg2I,
130 	[OP_GEI]	= opImm4 | opRet0 | opArgI | opArg2I,
131 	[OP_LTU]	= opImm4 | opRet0 | opArgI | opArg2I,
132 	[OP_LEU]	= opImm4 | opRet0 | opArgI | opArg2I,
133 	[OP_GTU]	= opImm4 | opRet0 | opArgI | opArg2I,
134 	[OP_GEU]	= opImm4 | opRet0 | opArgI | opArg2I,
135 	[OP_EQF]	= opImm4 | opRet0 | opArgF | opArg2F,
136 	[OP_NEF]	= opImm4 | opRet0 | opArgF | opArg2F,
137 	[OP_LTF]	= opImm4 | opRet0 | opArgF | opArg2F,
138 	[OP_LEF]	= opImm4 | opRet0 | opArgF | opArg2F,
139 	[OP_GTF]	= opImm4 | opRet0 | opArgF | opArg2F,
140 	[OP_GEF]	= opImm4 | opRet0 | opArgF | opArg2F,
141 
142 	[OP_LOAD1]	= opImm0 | opRetI | opArgI,
143 	[OP_LOAD2]	= opImm0 | opRetI | opArgI,
144 	[OP_LOAD4]	= opImm0 | opRetIF| opArgI,
145 	[OP_STORE1]	= opImm0 | opRet0 | opArgI | opArg2I,
146 	[OP_STORE2]	= opImm0 | opRet0 | opArgI | opArg2I,
147 	[OP_STORE4]	= opImm0 | opRet0 | opArgIF| opArg2I,
148 	[OP_ARG]	= opImm1 | opRet0 | opArgIF,
149 	[OP_BLOCK_COPY]	= opImm4 | opRet0 | opArgI | opArg2I,
150 
151 	[OP_SEX8]	= opImm0 | opRetI | opArgI,
152 	[OP_SEX16]	= opImm0 | opRetI | opArgI,
153 	[OP_NEGI]	= opImm0 | opRetI | opArgI,
154 	[OP_ADD]	= opImm0 | opRetI | opArgI | opArg2I,
155 	[OP_SUB]	= opImm0 | opRetI | opArgI | opArg2I,
156 	[OP_DIVI]	= opImm0 | opRetI | opArgI | opArg2I,
157 	[OP_DIVU]	= opImm0 | opRetI | opArgI | opArg2I,
158 	[OP_MODI]	= opImm0 | opRetI | opArgI | opArg2I,
159 	[OP_MODU]	= opImm0 | opRetI | opArgI | opArg2I,
160 	[OP_MULI]	= opImm0 | opRetI | opArgI | opArg2I,
161 	[OP_MULU]	= opImm0 | opRetI | opArgI | opArg2I,
162 	[OP_BAND]	= opImm0 | opRetI | opArgI | opArg2I,
163 	[OP_BOR]	= opImm0 | opRetI | opArgI | opArg2I,
164 	[OP_BXOR]	= opImm0 | opRetI | opArgI | opArg2I,
165 	[OP_BCOM]	= opImm0 | opRetI | opArgI,
166 	[OP_LSH]	= opImm0 | opRetI | opArgI | opArg2I,
167 	[OP_RSHI]	= opImm0 | opRetI | opArgI | opArg2I,
168 	[OP_RSHU]	= opImm0 | opRetI | opArgI | opArg2I,
169 	[OP_NEGF]	= opImm0 | opRetF | opArgF,
170 	[OP_ADDF]	= opImm0 | opRetF | opArgF | opArg2F,
171 	[OP_SUBF]	= opImm0 | opRetF | opArgF | opArg2F,
172 	[OP_DIVF]	= opImm0 | opRetF | opArgF | opArg2F,
173 	[OP_MULF]	= opImm0 | opRetF | opArgF | opArg2F,
174 	[OP_CVIF]	= opImm0 | opRetF | opArgI,
175 	[OP_CVFI]	= opImm0 | opRetI | opArgF,
176 };
177 
178 #ifdef DEBUG_VM
179 static const char *opnames[256] = {
180 	"OP_UNDEF", "OP_IGNORE", "OP_BREAK", "OP_ENTER", "OP_LEAVE", "OP_CALL",
181 	"OP_PUSH", "OP_POP", "OP_CONST", "OP_LOCAL", "OP_JUMP",
182 	"OP_EQ", "OP_NE", "OP_LTI", "OP_LEI", "OP_GTI", "OP_GEI",
183 	"OP_LTU", "OP_LEU", "OP_GTU", "OP_GEU", "OP_EQF", "OP_NEF",
184 	"OP_LTF", "OP_LEF", "OP_GTF", "OP_GEF",
185 	"OP_LOAD1", "OP_LOAD2", "OP_LOAD4", "OP_STORE1", "OP_STORE2",
186 	"OP_STORE4", "OP_ARG", "OP_BLOCK_COPY",
187 	"OP_SEX8", "OP_SEX16",
188 	"OP_NEGI", "OP_ADD", "OP_SUB", "OP_DIVI", "OP_DIVU",
189 	"OP_MODI", "OP_MODU", "OP_MULI", "OP_MULU", "OP_BAND",
190 	"OP_BOR", "OP_BXOR", "OP_BCOM", "OP_LSH", "OP_RSHI", "OP_RSHU",
191 	"OP_NEGF", "OP_ADDF", "OP_SUBF", "OP_DIVF", "OP_MULF",
192 	"OP_CVIF", "OP_CVFI",
193 };
194 
195 #define NOTIMPL(x) \
196 	do { Com_Error(ERR_DROP, "instruction not implemented: %s", opnames[x]); } while(0)
197 #else
198 #define NOTIMPL(x) \
199 	do { Com_Printf(S_COLOR_RED "instruction not implemented: %x\n", x); vm->compiled = qfalse; return; } while(0)
200 #endif
201 
VM_Destroy_Compiled(vm_t * vm)202 static void VM_Destroy_Compiled(vm_t *vm)
203 {
204 	if (vm->codeBase) {
205 		if (munmap(vm->codeBase, vm->codeLength))
206 			Com_Printf(S_COLOR_RED "Memory unmap failed, possible memory leak\n");
207 	}
208 	vm->codeBase = NULL;
209 }
210 
211 /*
212 =================
213 ErrJump
214 Error handler for jump/call to invalid instruction number
215 =================
216 */
217 
ErrJump(unsigned num)218 static void __attribute__((__noreturn__)) ErrJump(unsigned num)
219 {
220 	Com_Error(ERR_DROP, "program tried to execute code outside VM (%x)", num);
221 }
222 
asmcall(int call,int pstack)223 static int asmcall(int call, int pstack)
224 {
225 	// save currentVM so as to allow for recursive VM entry
226 	vm_t *savedVM = currentVM;
227 	int i, ret;
228 
229 	// modify VM stack pointer for recursive VM entry
230 	currentVM->programStack = pstack - 4;
231 
232 	if (sizeof(intptr_t) == sizeof(int)) {
233 		intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + pstack + 4);
234 		argPosition[0] = -1 - call;
235 		ret = currentVM->systemCall(argPosition);
236 	} else {
237 		intptr_t args[MAX_VMSYSCALL_ARGS];
238 
239 		args[0] = -1 - call;
240 		int *argPosition = (int *)((byte *)currentVM->dataBase + pstack + 4);
241 		for( i = 1; i < ARRAY_LEN(args); i++ )
242 			args[i] = argPosition[i];
243 
244 		ret = currentVM->systemCall(args);
245 	}
246 
247 	currentVM = savedVM;
248 
249 	return ret;
250 }
251 
_emit(vm_t * vm,unsigned isn,int pass)252 void _emit(vm_t *vm, unsigned isn, int pass)
253 {
254 #if 0
255 	static int fd = -2;
256 	if (fd == -2)
257 		fd = open("code.bin", O_TRUNC|O_WRONLY|O_CREAT, 0644);
258 	if (fd > 0)
259 		write(fd, &isn, 4);
260 #endif
261 
262 	if (pass)
263 		memcpy(vm->codeBase+vm->codeLength, &isn, 4);
264 	vm->codeLength+=4;
265 }
266 
267 #define emit(isn) _emit(vm, isn, pass)
268 
off8(unsigned val)269 static unsigned char off8(unsigned val)
270 {
271 	if (val&3)
272 		DIE("offset must be multiple of four");
273 	if (val > 1020)
274 		DIE("offset too large");
275 	return val>>2;
276 }
277 
278 // ARM is really crazy ...
rimm(unsigned val)279 static unsigned short rimm(unsigned val)
280 {
281 	unsigned shift = 0;
282 	if (val < 256)
283 		return val;
284 	// rotate the value until it fits
285 	while (shift < 16 && (val>255 || !(val&3))) {
286 		val =  (val&3)<<30 | val>>2;
287 		++shift;
288 	}
289 	if (shift > 15 || val > 255) {
290 		DIE("immediate cannot be encoded (%d, %d)\n", shift, val);
291 	}
292 	return (16-shift)<<8 | val;
293 }
294 
295 // same as rimm but doesn't die, returns 0 if not encodable so don't call with zero as argument!
can_encode(unsigned val)296 static unsigned short can_encode(unsigned val)
297 {
298 	unsigned shift = 0;
299 	if (!val)
300 		DIE("can_encode: invalid argument");
301 	if (val < 256)
302 		return val;
303 	// rotate the value until it fits
304 	while (shift < 16 && (val>255 || !(val&3))) {
305 		val =  (val&3)<<30 | val>>2;
306 		++shift;
307 	}
308 	if (shift > 15 || val > 255) {
309 		return 0;
310 	}
311 	return (16-shift)<<8 | val;
312 }
313 
314 #define PREINDEX (1<<24)
315 
316 #define rASR(i, reg) (0b10<<5 | ((i&31)<<7) | reg)
317 #define rLSL(i, reg) (0b00<<5 | ((i&31)<<7) | reg)
318 #define rLSR(i, reg) (0b01<<5 | ((i&31)<<7) | reg)
319 #define rROR(i, reg) (0b11<<5 | ((i&31)<<7) | reg)
320 
321 // conditions
322 #define EQ (0b0000<<28)
323 #define NE (0b0001<<28)
324 #define CS (0b0010<<28)
325 #define HS CS
326 #define CC (0b0011<<28)
327 #define LO CC
328 #define MI (0b0100<<28)
329 #define PL (0b0101<<28)
330 #define VS (0b0110<<28)
331 #define VC (0b0111<<28)
332 #define HI (0b1000<<28)
333 #define LS (0b1001<<28)
334 #define GE (0b1010<<28)
335 #define LT (0b1011<<28)
336 #define GT (0b1100<<28)
337 #define LE (0b1101<<28)
338 #define AL (0b1110<<28)
339 #define cond(what, op) (what | (op&~AL))
340 
341 // XXX: v not correctly computed
342 #define BKPT(v) (AL | 0b10010<<20 | ((v&~0xF)<<4) | 0b0111<<4 | (v&0xF))
343 
344 #define YIELD (0b110010<<20 | 0b1111<<12 | 1)
345 #define NOP cond(AL, YIELD)
346 
347 // immediate value must fit in 0xFF!
348 #define ANDi(dst, src, i) (AL | (0b001<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | rimm(i))
349 #define EORi(dst, src, i) (AL | (0b001<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | rimm(i))
350 #define SUBi(dst, src, i) (AL | (0b001<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | rimm(i))
351 #define RSBi(dst, src, i) (AL | (0b001<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | rimm(i))
352 #define ADDi(dst, src, i) (AL | (0b001<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | rimm(i))
353 #define ADCi(dst, src, i) (AL | (0b001<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | rimm(i))
354 #define SBCi(dst, src, i) (AL | (0b001<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | rimm(i))
355 #define RSCi(dst, src, i) (AL | (0b001<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | rimm(i))
356 
357 #define ORRi(dst, src, i) (AL | (0b001<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | rimm(i))
358 #define MOVi(dst,      i) (AL | (0b001<<25) | (0b11010<<20) |             (dst<<12) | rimm(i))
359 #define BICi(dst, src, i) (AL | (0b001<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | rimm(i))
360 #define MVNi(dst,      i) (AL | (0b001<<25) | (0b11110<<20) |             (dst<<12) | rimm(i))
361 
362 #define MOVW(dst,      i) (AL |  (0b11<<24)                 | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
363 #define MOVT(dst,      i) (AL |  (0b11<<24) |  (0b0100<<20) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
364 
365 #define TSTi(     src, i) (AL | (0b001<<25) | (0b10001<<20) | (src<<16) |             rimm(i))
366 #define TEQi(     src, i) (AL | (0b001<<25) | (0b10011<<20) | (src<<16) |             rimm(i))
367 #define CMPi(     src, i) (AL | (0b001<<25) | (0b10101<<20) | (src<<16) |             rimm(i))
368 #define CMNi(     src, i) (AL | (0b001<<25) | (0b10111<<20) | (src<<16) |             rimm(i))
369 
370 #define ANDSi(dst, src, i) (ANDi(dst, src, i) | (1<<20))
371 #define EORSi(dst, src, i) (EORi(dst, src, i) | (1<<20))
372 #define SUBSi(dst, src, i) (SUBi(dst, src, i) | (1<<20))
373 #define RSBSi(dst, src, i) (RSBi(dst, src, i) | (1<<20))
374 #define ADDSi(dst, src, i) (ADDi(dst, src, i) | (1<<20))
375 #define ADCSi(dst, src, i) (ADCi(dst, src, i) | (1<<20))
376 #define SBCSi(dst, src, i) (SBCi(dst, src, i) | (1<<20))
377 #define RSCSi(dst, src, i) (RSCi(dst, src, i) | (1<<20))
378 
379 #define ORRSi(dst, src, i) (ORRi(dst, src, i) | (1<<20))
380 #define MOVSi(dst,      i) (MOVi(dst,      i) | (1<<20))
381 #define BICSi(dst, src, i) (BICi(dst, src, i) | (1<<20))
382 #define MVNSi(dst,      i) (MVNi(dst, src, i) | (1<<20))
383 
384 #define AND(dst, src, reg) (AL | (0b000<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | reg)
385 #define EOR(dst, src, reg) (AL | (0b000<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | reg)
386 #define SUB(dst, src, reg) (AL | (0b000<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | reg)
387 #define RSB(dst, src, reg) (AL | (0b000<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | reg)
388 #define ADD(dst, src, reg) (AL | (0b000<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | reg)
389 #define ADC(dst, src, reg) (AL | (0b000<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | reg)
390 #define SBC(dst, src, reg) (AL | (0b000<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | reg)
391 #define RSC(dst, src, reg) (AL | (0b000<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | reg)
392 
393 #define ORR(dst, src, reg) (AL | (0b000<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | reg)
394 #define MOV(dst,      src) (AL | (0b000<<25) | (0b11010<<20) |             (dst<<12) | src)
395 
396 #define LSL(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8)     | (0b0001<<4) | src)
397 #define LSR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8)     | (0b0011<<4) | src)
398 #define ASR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8)     | (0b0101<<4) | src)
399 #define ROR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8)     | (0b0111<<4) | src)
400 
401 #define LSLi(dst, src, i)  (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b000<<4) | src)
402 #define LSRi(dst, src, i)  (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b010<<4) | src)
403 #define ASRi(dst, src, i)  (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b100<<4) | src)
404 #define RORi(dst, src, i)  (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b110<<4) | src)
405 #define RRX(dst, src)      (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) |                 (0b110<<4) | src)
406 
407 #define BIC(dst, src, reg) (AL | (0b000<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | reg)
408 #define MVN(dst,      reg) (AL | (0b000<<25) | (0b11110<<20) |             (dst<<12) | reg)
409 
410 #define TST(     src, reg) (AL | (0b000<<25) | (0b10001<<20) | (src<<16) |             reg)
411 #define TEQ(     src, reg) (AL | (0b000<<25) | (0b10011<<20) | (src<<16) |             reg)
412 #define CMP(     src, reg) (AL | (0b000<<25) | (0b10101<<20) | (src<<16) |             reg)
413 #define CMN(     src, reg) (AL | (0b000<<25) | (0b10111<<20) | (src<<16) |             reg)
414 
415 #define LDRa(dst, base, off)   (AL | (0b011<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | off)
416 #define LDRx(dst, base, off)   (AL | (0b011<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | off)
417 
418 #define LDRai(dst, base, off)  (AL | (0b010<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
419 #define LDRxi(dst, base, off)  (AL | (0b010<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
420 #define LDRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
421 
422 #define LDRTa(dst, base, off)  (AL | (0b011<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | off)
423 #define LDRTx(dst, base, off)  (AL | (0b011<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | off)
424 #define LDRTai(dst, base, off) (AL | (0b010<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
425 #define LDRTxi(dst, base, off) (AL | (0b010<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
426 
427 #define LDRBa(dst, base, off)  (AL | (0b011<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | off)
428 #define LDRSBai(dst, base, off) (AL | (0b000<<25) | (0b0110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1101<<4|(off&0x0F))
429 #define STRBa(dst, base, off)  (AL | (0b011<<25) | (0b1110<<21) | (0<<20) | base<<16 | dst<<12 | off)
430 
431 #define LDRHa(dst, base, off)   (AL | (0b000<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
432 #define LDRSHai(dst, base, off) (AL | (0b000<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1111<<4|(off&0x0F))
433 #define STRHa(dst, base, off)   (AL | (0b000<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
434 
435 #define STRa(dst, base, off)   (AL | (0b011<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | off)
436 #define STRx(dst, base, off)   (AL | (0b011<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | off)
437 #define STRai(dst, base, off)  (AL | (0b010<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
438 #define STRxi(dst, base, off)  (AL | (0b010<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
439 #define STRaiw(dst, base, off) (AL | (0b010<<25) | (0b1101<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
440 #define STRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
441 
442 // load with post-increment
443 #define POP1(reg)              (AL | (0b010<<25) | (0b0100<<21) | (1<<20) |   SP<<16 | reg<<12 | reg)
444 // store with post-increment
445 #define PUSH1(reg)             (AL | (0b010<<25) | (0b1001<<21) | (0<<20) |   SP<<16 | reg<<12 | 4)
446 
447 // branch to target address (for small jumps)
448 #define Bi(i) \
449 	(AL | (0b10)<<26 | (1<<25) /*I*/ | (0<<24) /*L*/ | (i))
450 // call subroutine
451 #define BLi(i) \
452 	(AL | (0b10)<<26 | (1<<25) /*I*/ | (1<<24) /*L*/ | (i))
453 // branch and exchange (register)
454 #define BX(reg) \
455 	(AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0001<<4 | reg)
456 // call subroutine (register)
457 #define BLX(reg) \
458 	(AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0011<<4 | reg)
459 
460 #define PUSH(mask)    (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) |  mask)
461 #define PUSH2(r1, r2) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) |  1<<r1 | 1<<r2)
462 //#define PUSH1(reg) STRxiw(SP, reg, 4)
463 
464 #define POP(mask)     (0xe8bd0000|mask)
465 
466 #define STM(base, regs) \
467 	(AL | 0b100<<25 | 0<<24/*P*/| 0<<24/*U*/| 0<<24/*S*/| 0<<24/*W*/ | (base<<16) | (regs&~(1<<16)))
468 
469 // note: op1 and op2 must not be the same
470 #define MUL(op1, op2, op3) \
471 	(AL | 0b0000000<<21 | (1<<20) /*S*/ | (op1<<16) | (op3<<8) | 0b1001<<4 | (op2))
472 
473 // puts integer in R0
474 #define emit_MOVR0i(arg) emit_MOVRxi(R0, arg)
475 
476 // puts integer arg in register reg
477 #define emit_MOVRxi(reg, arg) do { \
478 	emit(MOVW(reg, (arg&0xFFFF))); \
479 	if (arg > 0xFFFF) \
480 		emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
481 	} while(0)
482 
483 // puts integer arg in register reg. adds nop if only one instr is needed to
484 // make size constant
485 #define emit_MOVRxi_or_NOP(reg, arg) do { \
486 	emit(MOVW(reg, (arg&0xFFFF))); \
487 	if (arg > 0xFFFF) \
488 		emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
489 	else \
490 		emit(NOP); \
491 	} while(0)
492 
493 // arm core register -> singe precision register
494 #define VMOVass(Vn, Rt) (AL|(0b1110<<24)|(0b000<<21)|(0<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
495 // singe precision register -> arm core register
496 #define VMOVssa(Rt, Vn) (AL|(0b1110<<24)|(0b000<<21)|(1<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
497 
498 #define _VCVT_F(Vd, Vm, opc2, op) \
499 	(AL|(0b11101<<23)|((Vd&1)<<22)|(0b111<<19)|(opc2<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(op<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
500 #define VCVT_F32_U32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 0 /* unsigned */)
501 #define VCVT_U32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b100, 1 /* round zero */)
502 #define VCVT_F32_S32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 1 /* unsigned */)
503 #define VCVT_S32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b101, 1 /* round zero */)
504 
505 #define VLDRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|1<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
506 #define VSTRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|0<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
507 
508 #define VNEG_F32(Vd, Vm) \
509 	(AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|(1<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
510 
511 #define VADD_F32(Vd, Vn, Vm) \
512 	(AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
513 #define VSUB_F32(Vd, Vn, Vm) \
514 	(AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
515 #define VMUL_F32(Vd, Vn, Vm) \
516 	(AL|(0b11100<<23)|((Vd&1)<<22)|(0b10<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101)<<9|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
517 #define VDIV_F32(Vd, Vn, Vm) \
518 	(AL|(0b11101<<23)|((Vd&1)<<22)|(0b00<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
519 
520 #define _VCMP_F32(Vd, Vm, E) \
521 	(AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|((0b0100)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(E<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
522 #define VCMP_F32(Vd, Vm) _VCMP_F32(Vd, Vm, 0)
523 
524 #define VMRS(Rt) \
525 	(AL|(0b11101111<<20)|(0b0001<<16)|(Rt<<12)|(0b1010<<8)|(1<<4))
526 
527 // check if instruction in R0 is within range. Clobbers R1, R12
528 #define CHECK_JUMP do { \
529 	static int bytes_to_skip = -1; \
530 	static unsigned branch = -1; \
531 	emit_MOVRxi(R1, (unsigned)vm->instructionCount); \
532 	emit(CMP(R0, R1)); \
533 	if (branch == -1) \
534 		branch = vm->codeLength; \
535 	emit(cond(LT, Bi(j_rel(bytes_to_skip)))); \
536 	emit_MOVRxi_or_NOP(R12, (unsigned)ErrJump); \
537 	emit(BLX(R12)); \
538 	if (bytes_to_skip == -1) \
539 		bytes_to_skip = vm->codeLength - branch; \
540 } while(0)
541 
542 //#define CONST_OPTIMIZE
543 #ifdef CONST_OPTIMIZE
544 #define MAYBE_EMIT_CONST() \
545 	if (got_const) \
546 	{ \
547 		got_const = 0; \
548 		vm->instructionPointers[instruction-1] = assembler_get_code_size(); \
549 		STACK_PUSH(4); \
550 		emit("movl $%d, (%%r9, %%rbx, 4)", const_value); \
551 	}
552 #else
553 #define MAYBE_EMIT_CONST()
554 #endif
555 
556 // optimize: use load multiple
557 #define IJ(comparator) do { \
558 	MAYBE_EMIT_CONST(); \
559 	emit_MOVRxi(R0, arg.i); \
560 	CHECK_JUMP; \
561 	emit(LDRTxi(R0, rOPSTACK, 4)); \
562 	emit(LDRTxi(R1, rOPSTACK, 4));  \
563 	emit(CMP(R1, R0)); \
564 	emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
565 } while (0)
566 
567 #define FJ(comparator) do { \
568 	emit_MOVRxi(R0, arg.i); \
569 	CHECK_JUMP; \
570 	emit(SUBi(rOPSTACK, rOPSTACK, 8)); \
571 	emit(VLDRa(S15, rOPSTACK, 4)); \
572 	emit(VLDRa(S14, rOPSTACK, 8)); \
573 	emit(VCMP_F32(S15, S14)); \
574 	emit(VMRS(APSR_nzcv)); \
575 	emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
576 } while (0)
577 
578 #define printreg(reg) emit(PUSH1(R3)); emit(BLX(reg)); emit(POP1(R3));
579 
_j_rel(int x,int pc)580 static inline unsigned _j_rel(int x, int pc)
581 {
582 	if (x&3) goto err;
583 	x = (x>>2)-2;
584 	if (x < 0)
585 	{
586 		if ((x&(0xFF<<24)) != 0xFF<<24)
587 			goto err;
588 		x &= ~(0xFF<<24);
589 	}
590 	else if (x&(0xFF<<24))
591 		goto err;
592 	return x;
593 err:
594 	DIE("jump %d out of range at %d", x, pc);
595 }
596 
VM_Compile(vm_t * vm,vmHeader_t * header)597 void VM_Compile(vm_t *vm, vmHeader_t *header)
598 {
599 	unsigned char *code;
600 	int i_count, pc = 0;
601 	int pass;
602 	int codeoffsets[2]; // was 1024 but it's only used for OFF_CODE and OFF_IMMEDIATES
603 
604 #define j_rel(x) (pass?_j_rel(x, pc):0xBAD)
605 #define OFFSET(i) (pass?(j_rel(codeoffsets[i]-vm->codeLength)):(0xF000000F))
606 //#define new_offset() (offsidx++)
607 #define get_offset(i) (codeoffsets[i])
608 #define save_offset(i) (codeoffsets[i] = vm->codeLength)
609 #define OFF_CODE 0
610 #define OFF_IMMEDIATES 1
611 
612 	vm->compiled = qfalse;
613 
614 	vm->codeBase = NULL;
615 	vm->codeLength = 0;
616 
617 	for (pass = 0; pass < 2; ++pass) {
618 
619 //	int offsidx = 0;
620 
621 #ifdef CONST_OPTIMIZE
622 	// const optimization
623 	unsigned got_const = 0, const_value = 0;
624 #endif
625 
626 	if(pass)
627 	{
628 		vm->codeBase = mmap(NULL, vm->codeLength, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
629 		if(vm->codeBase == MAP_FAILED)
630 			Com_Error(ERR_FATAL, "VM_CompileARM: can't mmap memory");
631 		vm->codeLength = 0;
632 	}
633 
634 	//int (*entry)(vm_t*, int*, int*);
635 	emit(PUSH((((1<<8)-1)<<4)|(1<<14))); // push R4-R11, LR
636 	emit(SUBi(SP, SP, 12)); // align stack!
637 	emit(LDRai(rCODEBASE, R0, offsetof(vm_t, codeBase)));
638 	emit(LDRai(rDATABASE, R0, offsetof(vm_t, dataBase)));
639 	emit(LDRai(rDATAMASK, R0, offsetof(vm_t, dataMask)));
640 	emit(LDRai(rPSTACK, R1, 0));
641 	emit(MOV(rOPSTACK, R2)); // TODO: reverse opstack to avoid writing to return address
642 	emit(MOV(rOPSTACKBASE, rOPSTACK));
643 
644 	emit(BLi(OFFSET(OFF_CODE)));
645 
646 	// save return value in r0
647 	emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
648 
649 	emit(ADDi(SP, SP, 12)); // align stack!
650 	emit(POP((((1<<8)-1)<<4)|(1<<15))); // pop R4-R11, LR -> PC
651 
652 	/* save some immediates here */
653 	emit(BKPT(0));
654 	emit(BKPT(0));
655 	save_offset(OFF_IMMEDIATES);
656 //	emit((unsigned)whatever);
657 	emit(BKPT(0));
658 	emit(BKPT(0));
659 
660 	save_offset(OFF_CODE);
661 //	offsidx = OFF_IMMEDIATES+1;
662 
663 	code = (unsigned char *) header + header->codeOffset;
664 	pc = 0;
665 
666 	for (i_count = 0; i_count < header->instructionCount; i_count++) {
667 		union {
668 			unsigned char b[4];
669 			unsigned int i;
670 		} arg;
671 		unsigned char op = code[pc++];
672 
673 		vm->instructionPointers[i_count] = vm->codeLength;
674 
675 		if (vm_opInfo[op] & opImm4)
676 		{
677 			memcpy(arg.b, &code[pc], 4);
678 			pc += 4;
679 #ifdef EXCESSIVE_DEBUG
680 			Com_Printf("%d: instruction %d (%s %d), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
681 #endif
682 		}
683 		else if (vm_opInfo[op] & opImm1)
684 		{
685 			arg.b[0] = code[pc];
686 			++pc;
687 #ifdef EXCESSIVE_DEBUG
688 			Com_Printf("%d: instruction %d (%s %hhd), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
689 #endif
690 		}
691 		else
692 		{
693 #ifdef EXCESSIVE_DEBUG
694 			Com_Printf("%d: instruction %d (%s), offset %d\n", pass, i_count, opnames[op], vm->codeLength);
695 #endif
696 		}
697 
698 		// TODO: for debug only
699 		//emit_MOVRxi(R4, i_count);
700 
701 		switch ( op )
702 		{
703 			case OP_UNDEF:
704 				break;
705 
706 			case OP_IGNORE:
707 				NOTIMPL(op);
708 				break;
709 
710 			case OP_BREAK:
711 				emit(BKPT(0));
712 				break;
713 
714 			case OP_ENTER:
715 				MAYBE_EMIT_CONST();
716 				emit(PUSH1(LR));
717 				emit(SUBi(SP, SP, 12)); // align stack
718 				if (arg.i == 0 || can_encode(arg.i))
719 				{
720 					emit(SUBi(rPSTACK, rPSTACK, arg.i)); // pstack -= arg
721 				}
722 				else
723 				{
724 					emit_MOVR0i(arg.i);
725 					emit(SUB(rPSTACK, rPSTACK, R0)); // pstack -= arg
726 				}
727 				break;
728 
729 			case OP_LEAVE:
730 				if (arg.i == 0 || can_encode(arg.i))
731 				{
732 					emit(ADDi(rPSTACK, rPSTACK, arg.i)); // pstack += arg
733 				}
734 				else
735 				{
736 					emit_MOVR0i(arg.i);
737 					emit(ADD(rPSTACK, rPSTACK, R0)); // pstack += arg
738 				}
739 				emit(ADDi(SP, SP, 12));
740 				emit(0xe49df004); // pop pc
741 				break;
742 
743 			case OP_CALL:
744 #if 0
745 				// save next instruction
746 				emit_MOVR0i(i_count);
747 				emit(STRa(R0, rDATABASE, rPSTACK));      // dataBase[pstack] = r0
748 #endif
749 #ifdef CONST_OPTIMIZE
750 				if (got_const)
751 				{
752 					NOTIMPL(op);
753 				}
754 				else
755 #endif
756 				{
757 					static int bytes_to_skip = -1;
758 					static unsigned start_block = -1;
759 					MAYBE_EMIT_CONST();
760 					// get instruction nr from stack
761 					emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
762 					emit(CMPi(R0, 0)); // check if syscall
763 					if (start_block == -1)
764 						start_block = vm->codeLength;
765 					emit(cond(LT, Bi(j_rel(bytes_to_skip))));
766 						CHECK_JUMP;
767 						emit_MOVRxi_or_NOP(R1, (unsigned)vm->instructionPointers);
768 						emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
769 						emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
770 						emit(BLX(R0));
771 						emit(Bi(j_rel(vm->instructionPointers[i_count+1]-vm->codeLength)));
772 					if (bytes_to_skip == -1)
773 						bytes_to_skip = vm->codeLength - start_block;
774 					emit(MOV(R1, rPSTACK));
775 					emit_MOVRxi(R12, (unsigned)asmcall);
776 					emit(BLX(R12));
777 					// store return value
778 					emit(STRaiw(R0, rOPSTACK, 4));      // opstack+=4; *opstack = r0
779 				}
780 				break;
781 
782 			case OP_PUSH:
783 				MAYBE_EMIT_CONST();
784 				emit(ADDi(rOPSTACK, rOPSTACK, 4));
785 				break;
786 
787 			case OP_POP:
788 				MAYBE_EMIT_CONST();
789 				emit(SUBi(rOPSTACK, rOPSTACK, 4));
790 				break;
791 
792 			case OP_CONST:
793 				MAYBE_EMIT_CONST();
794 				emit_MOVR0i(arg.i);
795 				emit(STRaiw(R0, rOPSTACK, 4));      // opstack+=4; *opstack = r0
796 				break;
797 
798 			case OP_LOCAL:
799 				MAYBE_EMIT_CONST();
800 				if (arg.i == 0 || can_encode(arg.i))
801 				{
802 					emit(ADDi(R0, rPSTACK, arg.i));     // r0 = pstack+arg
803 				}
804 				else
805 				{
806 					emit_MOVR0i(arg.i);
807 					emit(ADD(R0, rPSTACK, R0));     // r0 = pstack+arg
808 				}
809 				emit(STRaiw(R0, rOPSTACK, 4));      // opstack+=4; *opstack = r0
810 				break;
811 
812 			case OP_JUMP:
813 #ifdef CONST_OPTIMIZE
814 				if (got_const)
815 				{
816 					NOTIMPL(op);
817 				}
818 				else
819 #endif
820 				{
821 					emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
822 					CHECK_JUMP;
823 					emit_MOVRxi(R1, (unsigned)vm->instructionPointers);
824 					emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
825 					emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
826 					emit(BLX(R0));
827 				}
828 				break;
829 
830 			case OP_EQ:
831 				IJ(EQ);
832 				break;
833 
834 			case OP_NE:
835 				IJ(NE);
836 				break;
837 
838 			case OP_LTI:
839 				IJ(LT);
840 				break;
841 
842 			case OP_LEI:
843 				IJ(LE);
844 				break;
845 
846 			case OP_GTI:
847 				IJ(GT);
848 				break;
849 
850 			case OP_GEI:
851 				IJ(GE);
852 				break;
853 
854 			case OP_LTU:
855 				IJ(LO);
856 				break;
857 
858 			case OP_LEU:
859 				IJ(LS);
860 				break;
861 
862 			case OP_GTU:
863 				IJ(HI);
864 				break;
865 
866 			case OP_GEU:
867 				IJ(HS);
868 				break;
869 
870 			case OP_EQF:
871 				FJ(EQ);
872 				break;
873 
874 			case OP_NEF:
875 				FJ(NE);
876 				break;
877 
878 			case OP_LTF:
879 				FJ(LT);
880 				break;
881 
882 			case OP_LEF:
883 				FJ(LE);
884 				break;
885 
886 			case OP_GTF:
887 				FJ(GT);
888 				break;
889 
890 			case OP_GEF:
891 				FJ(GE);
892 				break;
893 
894 			case OP_LOAD1:
895 				MAYBE_EMIT_CONST();
896 				emit(LDRai(R0, rOPSTACK, 0));   // r0 = *opstack
897 				emit(AND(R0, rDATAMASK, R0));    // r0 = r0 & rDATAMASK
898 				emit(LDRBa(R0, rDATABASE, R0));  // r0 = (unsigned char)dataBase[r0]
899 				emit(STRai(R0, rOPSTACK, 0));   // *opstack = r0
900 				break;
901 
902 			case OP_LOAD2:
903 				MAYBE_EMIT_CONST();
904 				emit(LDRai(R0, rOPSTACK, 0));   // r0 = *opstack
905 				emit(AND(R0, rDATAMASK, R0));    // r0 = r0 & rDATAMASK
906 				emit(LDRHa(R0, rDATABASE, R0));  // r0 = (unsigned short)dataBase[r0]
907 				emit(STRai(R0, rOPSTACK, 0));   // *opstack = r0
908 				break;
909 
910 			case OP_LOAD4:
911 				MAYBE_EMIT_CONST();
912 				emit(LDRai(R0, rOPSTACK, 0));   // r0 = *opstack
913 				emit(AND(R0, rDATAMASK, R0));    // r0 = r0 & rDATAMASK
914 				emit(LDRa(R0, rDATABASE, R0));  // r0 = dataBase[r0]
915 				emit(STRai(R0, rOPSTACK, 0));   // *opstack = r0
916 				break;
917 
918 			case OP_STORE1:
919 				MAYBE_EMIT_CONST();
920 				emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
921 				emit(LDRTxi(R1, rOPSTACK, 4));  // r1 = *opstack; rOPSTACK -= 4
922 				emit(AND(R1, rDATAMASK, R1));    // r1 = r1 & rDATAMASK
923 				emit(STRBa(R0, rDATABASE, R1)); // database[r1] = r0
924 				break;
925 
926 			case OP_STORE2:
927 				MAYBE_EMIT_CONST();
928 				emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
929 				emit(LDRTxi(R1, rOPSTACK, 4));  // r1 = *opstack; rOPSTACK -= 4
930 				emit(AND(R1, rDATAMASK, R1));    // r1 = r1 & rDATAMASK
931 				emit(STRHa(R0, rDATABASE, R1)); // database[r1] = r0
932 				break;
933 
934 			case OP_STORE4:
935 				MAYBE_EMIT_CONST();
936 				// optimize: use load multiple
937 				// value
938 				emit(LDRTxi(R0, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
939 				// pointer
940 				emit(LDRTxi(R1, rOPSTACK, 4));  // r1 = *opstack; rOPSTACK -= 4
941 				emit(AND(R1, rDATAMASK, R1));    // r1 = r1 & rDATAMASK
942 				// store value at pointer
943 				emit(STRa(R0, rDATABASE, R1)); // database[r1] = r0
944 				break;
945 
946 			case OP_ARG:
947 				MAYBE_EMIT_CONST();
948 				emit(LDRTxi(R0, rOPSTACK, 4));      // r0 = *opstack; rOPSTACK -= 4
949 				emit(ADDi(R1, rPSTACK, arg.b[0]));  // r1 = programStack+arg
950 				emit(AND(R1, rDATAMASK, R1));       // r1 = r1 & rDATAMASK
951 				emit(STRa(R0, rDATABASE, R1));      // dataBase[r1] = r0
952 				break;
953 
954 			case OP_BLOCK_COPY:
955 				MAYBE_EMIT_CONST();
956 				emit(LDRTxi(R1, rOPSTACK, 4));  // r0 = *opstack; rOPSTACK -= 4
957 				emit(LDRTxi(R0, rOPSTACK, 4));
958 				emit_MOVRxi(R2, arg.i);
959 				emit_MOVRxi(R12, (unsigned)VM_BlockCopy);
960 				emit(BLX(R12));
961 				break;
962 
963 			case OP_SEX8:
964 				MAYBE_EMIT_CONST();
965 				emit(LDRSBai(R0, rOPSTACK, 0));      // sign extend *opstack
966 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
967 				break;
968 
969 			case OP_SEX16:
970 				MAYBE_EMIT_CONST();
971 				emit(LDRSHai(R0, rOPSTACK, 0));      // sign extend *opstack
972 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
973 				break;
974 
975 			case OP_NEGI:
976 				MAYBE_EMIT_CONST();
977 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
978 				emit(RSBi(R0, R0, 0));         // r0 = -r0
979 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
980 				break;
981 
982 			case OP_ADD:
983 				MAYBE_EMIT_CONST();
984 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
985 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
986 				emit(ADD(R0, R1, R0));         // r0 = r1 + r0
987 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
988 				break;
989 
990 			case OP_SUB:
991 				MAYBE_EMIT_CONST();
992 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
993 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
994 				emit(SUB(R0, R1, R0));         // r0 = r1 - r0
995 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
996 				break;
997 
998 			case OP_DIVI:
999 			case OP_DIVU:
1000 				MAYBE_EMIT_CONST();
1001 				emit(LDRai(R1, rOPSTACK, 0));  // r1 = *opstack
1002 				emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
1003 				if ( op == OP_DIVI )
1004 					emit_MOVRxi(R12, (unsigned)__aeabi_idiv);
1005 				else
1006 					emit_MOVRxi(R12, (unsigned)__aeabi_uidiv);
1007 				emit(BLX(R12));
1008 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1009 				break;
1010 
1011 			case OP_MODI:
1012 			case OP_MODU:
1013 				MAYBE_EMIT_CONST();
1014 				emit(LDRai(R1, rOPSTACK, 0));  // r1 = *opstack
1015 				emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
1016 				if ( op == OP_MODI )
1017 					emit_MOVRxi(R12, (unsigned)__aeabi_idivmod);
1018 				else
1019 					emit_MOVRxi(R12, (unsigned)__aeabi_uidivmod);
1020 				emit(BLX(R12));
1021 				emit(STRai(R1, rOPSTACK, 0));  // *opstack = r1
1022 				break;
1023 
1024 			case OP_MULI:
1025 			case OP_MULU:
1026 				MAYBE_EMIT_CONST();
1027 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1028 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1029 				emit(MUL(R0, R1, R0));         // r0 = r1 * r0
1030 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1031 				break;
1032 
1033 			case OP_BAND:
1034 				MAYBE_EMIT_CONST();
1035 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1036 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1037 				emit(AND(R0, R1, R0));         // r0 = r1 & r0
1038 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1039 				break;
1040 
1041 			case OP_BOR:
1042 				MAYBE_EMIT_CONST();
1043 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1044 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1045 				emit(ORR(R0, R1, R0));         // r0 = r1 | r0
1046 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1047 				break;
1048 
1049 			case OP_BXOR:
1050 				MAYBE_EMIT_CONST();
1051 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1052 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1053 				emit(EOR(R0, R1, R0));         // r0 = r1 ^ r0
1054 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1055 				break;
1056 
1057 			case OP_BCOM:
1058 				MAYBE_EMIT_CONST();
1059 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1060 				emit(MVN(R0, R0));             // r0 = ~r0
1061 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1062 				break;
1063 
1064 			case OP_LSH:
1065 				MAYBE_EMIT_CONST();
1066 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1067 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1068 				emit(LSL(R0, R1, R0));         // r0 = r1 << r0
1069 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1070 				break;
1071 
1072 			case OP_RSHI:
1073 				MAYBE_EMIT_CONST();
1074 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1075 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1076 				emit(ASR(R0, R1, R0));         // r0 = r1 >> r0
1077 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1078 				break;
1079 
1080 			case OP_RSHU:
1081 				MAYBE_EMIT_CONST();
1082 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1083 				emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1084 				emit(LSR(R0, R1, R0));         // r0 = (unsigned)r1 >> r0
1085 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1086 				break;
1087 
1088 			case OP_NEGF:
1089 				MAYBE_EMIT_CONST();
1090 				emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1091 				emit(VNEG_F32(S14, S14));      // s15 = -s14
1092 				emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1093 				break;
1094 
1095 			case OP_ADDF:
1096 				MAYBE_EMIT_CONST();
1097 				emit(VLDRa(S14, rOPSTACK, 0));   // s14 = *((float*)opstack)
1098 				// vldr can't modify rOPSTACK so
1099 				// we'd either need to change it
1100 				// with sub or use regular ldr+vmov
1101 				emit(LDRxiw(R0, rOPSTACK, 4));   // opstack-=4; r1 = *opstack
1102 				emit(VMOVass(S15,R0));           // s15 = r0
1103 				emit(VADD_F32(S14, S15, S14));   // s14 = s14 + s15
1104 				emit(VSTRa(S14, rOPSTACK, 0));   // *((float*)opstack) = s15
1105 				break;
1106 
1107 			case OP_SUBF:
1108 				emit(VLDRa(S14, rOPSTACK, 0));   // s14 = *((float*)opstack)
1109 				// see OP_ADDF
1110 				emit(LDRxiw(R0, rOPSTACK, 4));   // opstack-=4; r1 = *opstack
1111 				emit(VMOVass(S15,R0));           // s15 = r0
1112 				emit(VSUB_F32(S14, S15, S14));   // s14 = s14 - s15
1113 				emit(VSTRa(S14, rOPSTACK, 0));   // *((float*)opstack) = s15
1114 				break;
1115 
1116 			case OP_DIVF:
1117 				emit(VLDRa(S14, rOPSTACK, 0));   // s14 = *((float*)opstack)
1118 				// see OP_ADDF
1119 				emit(LDRxiw(R0, rOPSTACK, 4));   // opstack-=4; r1 = *opstack
1120 				emit(VMOVass(S15,R0));           // s15 = r0
1121 				emit(VDIV_F32(S14, S15, S14));   // s14 = s14 / s15
1122 				emit(VSTRa(S14, rOPSTACK, 0));   // *((float*)opstack) = s15
1123 				break;
1124 
1125 			case OP_MULF:
1126 				emit(VLDRa(S14, rOPSTACK, 0));   // s14 = *((float*)opstack)
1127 				// see OP_ADDF
1128 				emit(LDRxiw(R0, rOPSTACK, 4));   // opstack-=4; r1 = *opstack
1129 				emit(VMOVass(S15,R0));           // s15 = r0
1130 				emit(VMUL_F32(S14, S15, S14));   // s14 = s14 * s15
1131 				emit(VSTRa(S14, rOPSTACK, 0));   // *((float*)opstack) = s15
1132 				break;
1133 
1134 			case OP_CVIF:
1135 				MAYBE_EMIT_CONST();
1136 				emit(LDRai(R0, rOPSTACK, 0));  // r0 = *opstack
1137 				emit(VMOVass(S14,R0));         // s14 = r0
1138 				emit(VCVT_F32_S32(S14, S14));  // s15 = (float)s14
1139 				emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1140 				break;
1141 
1142 			case OP_CVFI:
1143 				MAYBE_EMIT_CONST();
1144 				emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1145 				emit(VCVT_S32_F32(S14, S14));  // s15 = (int)s14
1146 				emit(VMOVssa(R0,S14));         // s14 = r0
1147 				emit(STRai(R0, rOPSTACK, 0));  // *opstack = r0
1148 				break;
1149 		}
1150 	}
1151 
1152 	// never reached
1153 	emit(BKPT(0));
1154 	} // pass
1155 
1156 	if (mprotect(vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC/* |PROT_WRITE */)) {
1157 		VM_Destroy_Compiled(vm);
1158 		DIE("mprotect failed");
1159 	}
1160 
1161 	// clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/
1162 	__clear_cache(vm->codeBase, vm->codeBase+vm->codeLength);
1163 
1164 	vm->destroy = VM_Destroy_Compiled;
1165 	vm->compiled = qtrue;
1166 }
1167 
VM_CallCompiled(vm_t * vm,int * args)1168 int VM_CallCompiled(vm_t *vm, int *args)
1169 {
1170 	byte	stack[OPSTACK_SIZE + 15];
1171 	int	*opStack;
1172 	int	programStack = vm->programStack;
1173 	int	stackOnEntry = programStack;
1174 	byte	*image = vm->dataBase;
1175 	int	*argPointer;
1176 	int	retVal;
1177 
1178 	currentVM = vm;
1179 
1180 	vm->currentlyInterpreting = qtrue;
1181 
1182 	programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
1183 	argPointer = (int *)&image[ programStack + 8 ];
1184 	memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
1185 	argPointer[-1] = 0;
1186 	argPointer[-2] = -1;
1187 
1188 
1189 	opStack = PADP(stack, 16);
1190 	*opStack = 0xDEADBEEF;
1191 
1192 #if 0
1193 	Com_Printf("r5 opStack:\t\t%p\n", opStack);
1194 	Com_Printf("r7 codeBase:\t\t%p\n", vm->codeBase);
1195 	Com_Printf("r8 programStack:\t0x%x\n", programStack);
1196 	Com_Printf("r9 dataBase:\t\t%p\n", vm->dataBase);
1197 #endif
1198 
1199 	/* call generated code */
1200 	{
1201 		//int (*entry)(void *, int, void *, int);
1202 		int (*entry)(vm_t*, int*, int*);
1203 
1204 		entry = (void *)(vm->codeBase);
1205 		//__asm__ volatile("bkpt");
1206 		//retVal = entry(vm->codeBase, programStack, vm->dataBase, vm->dataMask);
1207 		retVal = entry(vm, &programStack, opStack);
1208 	}
1209 
1210 	if(*opStack != 0xDEADBEEF)
1211 	{
1212 		Com_Error(ERR_DROP, "opStack corrupted in compiled code");
1213 	}
1214 
1215 	if(programStack != stackOnEntry - (8 + 4 * MAX_VMMAIN_ARGS))
1216 		Com_Error(ERR_DROP, "programStack corrupted in compiled code");
1217 
1218 	vm->programStack = stackOnEntry;
1219 	vm->currentlyInterpreting = qfalse;
1220 
1221 	return retVal;
1222 }
1223