1 /*
2 ===========================================================================
3 Copyright (C) 2009 David S. Miller <davem@davemloft.net>
4 Copyright (C) 2013,2014 SUSE Linux Products GmbH
5
6 This file is part of Quake III Arena source code.
7
8 Quake III Arena source code is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the License,
11 or (at your option) any later version.
12
13 Quake III Arena source code is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Quake III Arena source code; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 ===========================================================================
22
23 ARMv7l VM by Ludwig Nussel <ludwig.nussel@suse.de>
24
25 TODO: optimization
26
27 Docu:
28 http://www.coranac.com/tonc/text/asm.htm
29 http://www.heyrick.co.uk/armwiki/Category:Opcodes
30 ARMv7-A_ARMv7-R_DDI0406_2007.pdf
31 */
32
33 #include <sys/types.h>
34 #include <sys/mman.h>
35 #include <sys/time.h>
36 #include <time.h>
37 #include <stddef.h>
38
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42
43 #include "vm_local.h"
44 #define R0 0
45 #define R1 1
46 #define R2 2
47 #define R3 3
48 #define R4 4
49
50 #define R12 12
51
52 #define FP 11
53 #define SP 13
54 #define LR 14
55 #define PC 15
56
57 #define APSR_nzcv 15
58
59 #define S14 14
60 #define S15 15
61
62 #define rOPSTACK 5
63 #define rOPSTACKBASE 6
64 #define rCODEBASE 7
65 #define rPSTACK 8
66 #define rDATABASE 9
67 #define rDATAMASK 10
68
69 #define bit(x) (1<<x)
70
71 /* arm eabi, builtin gcc functions */
72 int __aeabi_idiv (int, int);
73 unsigned __aeabi_uidiv (unsigned, unsigned);
74 void __aeabi_idivmod(void);
75 void __aeabi_uidivmod(void);
76
77 /* exit() won't be called but use it because it is marked with noreturn */
78 #define DIE( reason, args... ) \
79 do { \
80 Com_Error(ERR_DROP, "vm_arm compiler error: " reason, ##args); \
81 exit(1); \
82 } while(0)
83
84 /*
85 * opcode information table:
86 * - length of immediate value
87 * - returned register type
88 * - required register(s) type
89 */
90 #define opImm0 0x0000 /* no immediate */
91 #define opImm1 0x0001 /* 1 byte immadiate value after opcode */
92 #define opImm4 0x0002 /* 4 bytes immediate value after opcode */
93
94 #define opRet0 0x0000 /* returns nothing */
95 #define opRetI 0x0004 /* returns integer */
96 #define opRetF 0x0008 /* returns float */
97 #define opRetIF (opRetI | opRetF) /* returns integer or float */
98
99 #define opArg0 0x0000 /* requires nothing */
100 #define opArgI 0x0010 /* requires integer(s) */
101 #define opArgF 0x0020 /* requires float(s) */
102 #define opArgIF (opArgI | opArgF) /* requires integer or float */
103
104 #define opArg2I 0x0040 /* requires second argument, integer */
105 #define opArg2F 0x0080 /* requires second argument, float */
106 #define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
107
108 static const unsigned char vm_opInfo[256] =
109 {
110 [OP_UNDEF] = opImm0,
111 [OP_IGNORE] = opImm0,
112 [OP_BREAK] = opImm0,
113 [OP_ENTER] = opImm4,
114 /* OP_LEAVE has to accept floats, they will be converted to ints */
115 [OP_LEAVE] = opImm4 | opRet0 | opArgIF,
116 /* only STORE4 and POP use values from OP_CALL,
117 * no need to convert floats back */
118 [OP_CALL] = opImm0 | opRetI | opArgI,
119 [OP_PUSH] = opImm0 | opRetIF,
120 [OP_POP] = opImm0 | opRet0 | opArgIF,
121 [OP_CONST] = opImm4 | opRetIF,
122 [OP_LOCAL] = opImm4 | opRetI,
123 [OP_JUMP] = opImm0 | opRet0 | opArgI,
124
125 [OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I,
126 [OP_NE] = opImm4 | opRet0 | opArgI | opArg2I,
127 [OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I,
128 [OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I,
129 [OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I,
130 [OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I,
131 [OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I,
132 [OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I,
133 [OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I,
134 [OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I,
135 [OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F,
136 [OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F,
137 [OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F,
138 [OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F,
139 [OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F,
140 [OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F,
141
142 [OP_LOAD1] = opImm0 | opRetI | opArgI,
143 [OP_LOAD2] = opImm0 | opRetI | opArgI,
144 [OP_LOAD4] = opImm0 | opRetIF| opArgI,
145 [OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I,
146 [OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I,
147 [OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I,
148 [OP_ARG] = opImm1 | opRet0 | opArgIF,
149 [OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I,
150
151 [OP_SEX8] = opImm0 | opRetI | opArgI,
152 [OP_SEX16] = opImm0 | opRetI | opArgI,
153 [OP_NEGI] = opImm0 | opRetI | opArgI,
154 [OP_ADD] = opImm0 | opRetI | opArgI | opArg2I,
155 [OP_SUB] = opImm0 | opRetI | opArgI | opArg2I,
156 [OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I,
157 [OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I,
158 [OP_MODI] = opImm0 | opRetI | opArgI | opArg2I,
159 [OP_MODU] = opImm0 | opRetI | opArgI | opArg2I,
160 [OP_MULI] = opImm0 | opRetI | opArgI | opArg2I,
161 [OP_MULU] = opImm0 | opRetI | opArgI | opArg2I,
162 [OP_BAND] = opImm0 | opRetI | opArgI | opArg2I,
163 [OP_BOR] = opImm0 | opRetI | opArgI | opArg2I,
164 [OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I,
165 [OP_BCOM] = opImm0 | opRetI | opArgI,
166 [OP_LSH] = opImm0 | opRetI | opArgI | opArg2I,
167 [OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I,
168 [OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I,
169 [OP_NEGF] = opImm0 | opRetF | opArgF,
170 [OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F,
171 [OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F,
172 [OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F,
173 [OP_MULF] = opImm0 | opRetF | opArgF | opArg2F,
174 [OP_CVIF] = opImm0 | opRetF | opArgI,
175 [OP_CVFI] = opImm0 | opRetI | opArgF,
176 };
177
178 #ifdef DEBUG_VM
179 static const char *opnames[256] = {
180 "OP_UNDEF", "OP_IGNORE", "OP_BREAK", "OP_ENTER", "OP_LEAVE", "OP_CALL",
181 "OP_PUSH", "OP_POP", "OP_CONST", "OP_LOCAL", "OP_JUMP",
182 "OP_EQ", "OP_NE", "OP_LTI", "OP_LEI", "OP_GTI", "OP_GEI",
183 "OP_LTU", "OP_LEU", "OP_GTU", "OP_GEU", "OP_EQF", "OP_NEF",
184 "OP_LTF", "OP_LEF", "OP_GTF", "OP_GEF",
185 "OP_LOAD1", "OP_LOAD2", "OP_LOAD4", "OP_STORE1", "OP_STORE2",
186 "OP_STORE4", "OP_ARG", "OP_BLOCK_COPY",
187 "OP_SEX8", "OP_SEX16",
188 "OP_NEGI", "OP_ADD", "OP_SUB", "OP_DIVI", "OP_DIVU",
189 "OP_MODI", "OP_MODU", "OP_MULI", "OP_MULU", "OP_BAND",
190 "OP_BOR", "OP_BXOR", "OP_BCOM", "OP_LSH", "OP_RSHI", "OP_RSHU",
191 "OP_NEGF", "OP_ADDF", "OP_SUBF", "OP_DIVF", "OP_MULF",
192 "OP_CVIF", "OP_CVFI",
193 };
194
195 #define NOTIMPL(x) \
196 do { Com_Error(ERR_DROP, "instruction not implemented: %s", opnames[x]); } while(0)
197 #else
198 #define NOTIMPL(x) \
199 do { Com_Printf(S_COLOR_RED "instruction not implemented: %x\n", x); vm->compiled = qfalse; return; } while(0)
200 #endif
201
VM_Destroy_Compiled(vm_t * vm)202 static void VM_Destroy_Compiled(vm_t *vm)
203 {
204 if (vm->codeBase) {
205 if (munmap(vm->codeBase, vm->codeLength))
206 Com_Printf(S_COLOR_RED "Memory unmap failed, possible memory leak\n");
207 }
208 vm->codeBase = NULL;
209 }
210
211 /*
212 =================
213 ErrJump
214 Error handler for jump/call to invalid instruction number
215 =================
216 */
217
ErrJump(unsigned num)218 static void __attribute__((__noreturn__)) ErrJump(unsigned num)
219 {
220 Com_Error(ERR_DROP, "program tried to execute code outside VM (%x)", num);
221 }
222
asmcall(int call,int pstack)223 static int asmcall(int call, int pstack)
224 {
225 // save currentVM so as to allow for recursive VM entry
226 vm_t *savedVM = currentVM;
227 int i, ret;
228
229 // modify VM stack pointer for recursive VM entry
230 currentVM->programStack = pstack - 4;
231
232 if (sizeof(intptr_t) == sizeof(int)) {
233 intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + pstack + 4);
234 argPosition[0] = -1 - call;
235 ret = currentVM->systemCall(argPosition);
236 } else {
237 intptr_t args[MAX_VMSYSCALL_ARGS];
238
239 args[0] = -1 - call;
240 int *argPosition = (int *)((byte *)currentVM->dataBase + pstack + 4);
241 for( i = 1; i < ARRAY_LEN(args); i++ )
242 args[i] = argPosition[i];
243
244 ret = currentVM->systemCall(args);
245 }
246
247 currentVM = savedVM;
248
249 return ret;
250 }
251
_emit(vm_t * vm,unsigned isn,int pass)252 void _emit(vm_t *vm, unsigned isn, int pass)
253 {
254 #if 0
255 static int fd = -2;
256 if (fd == -2)
257 fd = open("code.bin", O_TRUNC|O_WRONLY|O_CREAT, 0644);
258 if (fd > 0)
259 write(fd, &isn, 4);
260 #endif
261
262 if (pass)
263 memcpy(vm->codeBase+vm->codeLength, &isn, 4);
264 vm->codeLength+=4;
265 }
266
267 #define emit(isn) _emit(vm, isn, pass)
268
off8(unsigned val)269 static unsigned char off8(unsigned val)
270 {
271 if (val&3)
272 DIE("offset must be multiple of four");
273 if (val > 1020)
274 DIE("offset too large");
275 return val>>2;
276 }
277
278 // ARM is really crazy ...
rimm(unsigned val)279 static unsigned short rimm(unsigned val)
280 {
281 unsigned shift = 0;
282 if (val < 256)
283 return val;
284 // rotate the value until it fits
285 while (shift < 16 && (val>255 || !(val&3))) {
286 val = (val&3)<<30 | val>>2;
287 ++shift;
288 }
289 if (shift > 15 || val > 255) {
290 DIE("immediate cannot be encoded (%d, %d)\n", shift, val);
291 }
292 return (16-shift)<<8 | val;
293 }
294
295 // same as rimm but doesn't die, returns 0 if not encodable so don't call with zero as argument!
can_encode(unsigned val)296 static unsigned short can_encode(unsigned val)
297 {
298 unsigned shift = 0;
299 if (!val)
300 DIE("can_encode: invalid argument");
301 if (val < 256)
302 return val;
303 // rotate the value until it fits
304 while (shift < 16 && (val>255 || !(val&3))) {
305 val = (val&3)<<30 | val>>2;
306 ++shift;
307 }
308 if (shift > 15 || val > 255) {
309 return 0;
310 }
311 return (16-shift)<<8 | val;
312 }
313
314 #define PREINDEX (1<<24)
315
316 #define rASR(i, reg) (0b10<<5 | ((i&31)<<7) | reg)
317 #define rLSL(i, reg) (0b00<<5 | ((i&31)<<7) | reg)
318 #define rLSR(i, reg) (0b01<<5 | ((i&31)<<7) | reg)
319 #define rROR(i, reg) (0b11<<5 | ((i&31)<<7) | reg)
320
321 // conditions
322 #define EQ (0b0000<<28)
323 #define NE (0b0001<<28)
324 #define CS (0b0010<<28)
325 #define HS CS
326 #define CC (0b0011<<28)
327 #define LO CC
328 #define MI (0b0100<<28)
329 #define PL (0b0101<<28)
330 #define VS (0b0110<<28)
331 #define VC (0b0111<<28)
332 #define HI (0b1000<<28)
333 #define LS (0b1001<<28)
334 #define GE (0b1010<<28)
335 #define LT (0b1011<<28)
336 #define GT (0b1100<<28)
337 #define LE (0b1101<<28)
338 #define AL (0b1110<<28)
339 #define cond(what, op) (what | (op&~AL))
340
341 // XXX: v not correctly computed
342 #define BKPT(v) (AL | 0b10010<<20 | ((v&~0xF)<<4) | 0b0111<<4 | (v&0xF))
343
344 #define YIELD (0b110010<<20 | 0b1111<<12 | 1)
345 #define NOP cond(AL, YIELD)
346
347 // immediate value must fit in 0xFF!
348 #define ANDi(dst, src, i) (AL | (0b001<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | rimm(i))
349 #define EORi(dst, src, i) (AL | (0b001<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | rimm(i))
350 #define SUBi(dst, src, i) (AL | (0b001<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | rimm(i))
351 #define RSBi(dst, src, i) (AL | (0b001<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | rimm(i))
352 #define ADDi(dst, src, i) (AL | (0b001<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | rimm(i))
353 #define ADCi(dst, src, i) (AL | (0b001<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | rimm(i))
354 #define SBCi(dst, src, i) (AL | (0b001<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | rimm(i))
355 #define RSCi(dst, src, i) (AL | (0b001<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | rimm(i))
356
357 #define ORRi(dst, src, i) (AL | (0b001<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | rimm(i))
358 #define MOVi(dst, i) (AL | (0b001<<25) | (0b11010<<20) | (dst<<12) | rimm(i))
359 #define BICi(dst, src, i) (AL | (0b001<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | rimm(i))
360 #define MVNi(dst, i) (AL | (0b001<<25) | (0b11110<<20) | (dst<<12) | rimm(i))
361
362 #define MOVW(dst, i) (AL | (0b11<<24) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
363 #define MOVT(dst, i) (AL | (0b11<<24) | (0b0100<<20) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
364
365 #define TSTi( src, i) (AL | (0b001<<25) | (0b10001<<20) | (src<<16) | rimm(i))
366 #define TEQi( src, i) (AL | (0b001<<25) | (0b10011<<20) | (src<<16) | rimm(i))
367 #define CMPi( src, i) (AL | (0b001<<25) | (0b10101<<20) | (src<<16) | rimm(i))
368 #define CMNi( src, i) (AL | (0b001<<25) | (0b10111<<20) | (src<<16) | rimm(i))
369
370 #define ANDSi(dst, src, i) (ANDi(dst, src, i) | (1<<20))
371 #define EORSi(dst, src, i) (EORi(dst, src, i) | (1<<20))
372 #define SUBSi(dst, src, i) (SUBi(dst, src, i) | (1<<20))
373 #define RSBSi(dst, src, i) (RSBi(dst, src, i) | (1<<20))
374 #define ADDSi(dst, src, i) (ADDi(dst, src, i) | (1<<20))
375 #define ADCSi(dst, src, i) (ADCi(dst, src, i) | (1<<20))
376 #define SBCSi(dst, src, i) (SBCi(dst, src, i) | (1<<20))
377 #define RSCSi(dst, src, i) (RSCi(dst, src, i) | (1<<20))
378
379 #define ORRSi(dst, src, i) (ORRi(dst, src, i) | (1<<20))
380 #define MOVSi(dst, i) (MOVi(dst, i) | (1<<20))
381 #define BICSi(dst, src, i) (BICi(dst, src, i) | (1<<20))
382 #define MVNSi(dst, i) (MVNi(dst, src, i) | (1<<20))
383
384 #define AND(dst, src, reg) (AL | (0b000<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | reg)
385 #define EOR(dst, src, reg) (AL | (0b000<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | reg)
386 #define SUB(dst, src, reg) (AL | (0b000<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | reg)
387 #define RSB(dst, src, reg) (AL | (0b000<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | reg)
388 #define ADD(dst, src, reg) (AL | (0b000<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | reg)
389 #define ADC(dst, src, reg) (AL | (0b000<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | reg)
390 #define SBC(dst, src, reg) (AL | (0b000<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | reg)
391 #define RSC(dst, src, reg) (AL | (0b000<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | reg)
392
393 #define ORR(dst, src, reg) (AL | (0b000<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | reg)
394 #define MOV(dst, src) (AL | (0b000<<25) | (0b11010<<20) | (dst<<12) | src)
395
396 #define LSL(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0001<<4) | src)
397 #define LSR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0011<<4) | src)
398 #define ASR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0101<<4) | src)
399 #define ROR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0111<<4) | src)
400
401 #define LSLi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b000<<4) | src)
402 #define LSRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b010<<4) | src)
403 #define ASRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b100<<4) | src)
404 #define RORi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b110<<4) | src)
405 #define RRX(dst, src) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (0b110<<4) | src)
406
407 #define BIC(dst, src, reg) (AL | (0b000<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | reg)
408 #define MVN(dst, reg) (AL | (0b000<<25) | (0b11110<<20) | (dst<<12) | reg)
409
410 #define TST( src, reg) (AL | (0b000<<25) | (0b10001<<20) | (src<<16) | reg)
411 #define TEQ( src, reg) (AL | (0b000<<25) | (0b10011<<20) | (src<<16) | reg)
412 #define CMP( src, reg) (AL | (0b000<<25) | (0b10101<<20) | (src<<16) | reg)
413 #define CMN( src, reg) (AL | (0b000<<25) | (0b10111<<20) | (src<<16) | reg)
414
415 #define LDRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | off)
416 #define LDRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | off)
417
418 #define LDRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
419 #define LDRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
420 #define LDRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
421
422 #define LDRTa(dst, base, off) (AL | (0b011<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | off)
423 #define LDRTx(dst, base, off) (AL | (0b011<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | off)
424 #define LDRTai(dst, base, off) (AL | (0b010<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
425 #define LDRTxi(dst, base, off) (AL | (0b010<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
426
427 #define LDRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | off)
428 #define LDRSBai(dst, base, off) (AL | (0b000<<25) | (0b0110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1101<<4|(off&0x0F))
429 #define STRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (0<<20) | base<<16 | dst<<12 | off)
430
431 #define LDRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
432 #define LDRSHai(dst, base, off) (AL | (0b000<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1111<<4|(off&0x0F))
433 #define STRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
434
435 #define STRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | off)
436 #define STRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | off)
437 #define STRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
438 #define STRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
439 #define STRaiw(dst, base, off) (AL | (0b010<<25) | (0b1101<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
440 #define STRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
441
442 // load with post-increment
443 #define POP1(reg) (AL | (0b010<<25) | (0b0100<<21) | (1<<20) | SP<<16 | reg<<12 | reg)
444 // store with post-increment
445 #define PUSH1(reg) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | SP<<16 | reg<<12 | 4)
446
447 // branch to target address (for small jumps)
448 #define Bi(i) \
449 (AL | (0b10)<<26 | (1<<25) /*I*/ | (0<<24) /*L*/ | (i))
450 // call subroutine
451 #define BLi(i) \
452 (AL | (0b10)<<26 | (1<<25) /*I*/ | (1<<24) /*L*/ | (i))
453 // branch and exchange (register)
454 #define BX(reg) \
455 (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0001<<4 | reg)
456 // call subroutine (register)
457 #define BLX(reg) \
458 (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0011<<4 | reg)
459
460 #define PUSH(mask) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | mask)
461 #define PUSH2(r1, r2) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | 1<<r1 | 1<<r2)
462 //#define PUSH1(reg) STRxiw(SP, reg, 4)
463
464 #define POP(mask) (0xe8bd0000|mask)
465
466 #define STM(base, regs) \
467 (AL | 0b100<<25 | 0<<24/*P*/| 0<<24/*U*/| 0<<24/*S*/| 0<<24/*W*/ | (base<<16) | (regs&~(1<<16)))
468
469 // note: op1 and op2 must not be the same
470 #define MUL(op1, op2, op3) \
471 (AL | 0b0000000<<21 | (1<<20) /*S*/ | (op1<<16) | (op3<<8) | 0b1001<<4 | (op2))
472
473 // puts integer in R0
474 #define emit_MOVR0i(arg) emit_MOVRxi(R0, arg)
475
476 // puts integer arg in register reg
477 #define emit_MOVRxi(reg, arg) do { \
478 emit(MOVW(reg, (arg&0xFFFF))); \
479 if (arg > 0xFFFF) \
480 emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
481 } while(0)
482
483 // puts integer arg in register reg. adds nop if only one instr is needed to
484 // make size constant
485 #define emit_MOVRxi_or_NOP(reg, arg) do { \
486 emit(MOVW(reg, (arg&0xFFFF))); \
487 if (arg > 0xFFFF) \
488 emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
489 else \
490 emit(NOP); \
491 } while(0)
492
493 // arm core register -> singe precision register
494 #define VMOVass(Vn, Rt) (AL|(0b1110<<24)|(0b000<<21)|(0<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
495 // singe precision register -> arm core register
496 #define VMOVssa(Rt, Vn) (AL|(0b1110<<24)|(0b000<<21)|(1<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
497
498 #define _VCVT_F(Vd, Vm, opc2, op) \
499 (AL|(0b11101<<23)|((Vd&1)<<22)|(0b111<<19)|(opc2<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(op<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
500 #define VCVT_F32_U32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 0 /* unsigned */)
501 #define VCVT_U32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b100, 1 /* round zero */)
502 #define VCVT_F32_S32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 1 /* unsigned */)
503 #define VCVT_S32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b101, 1 /* round zero */)
504
505 #define VLDRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|1<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
506 #define VSTRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|0<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
507
508 #define VNEG_F32(Vd, Vm) \
509 (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|(1<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
510
511 #define VADD_F32(Vd, Vn, Vm) \
512 (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
513 #define VSUB_F32(Vd, Vn, Vm) \
514 (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
515 #define VMUL_F32(Vd, Vn, Vm) \
516 (AL|(0b11100<<23)|((Vd&1)<<22)|(0b10<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101)<<9|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
517 #define VDIV_F32(Vd, Vn, Vm) \
518 (AL|(0b11101<<23)|((Vd&1)<<22)|(0b00<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
519
520 #define _VCMP_F32(Vd, Vm, E) \
521 (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|((0b0100)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(E<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
522 #define VCMP_F32(Vd, Vm) _VCMP_F32(Vd, Vm, 0)
523
524 #define VMRS(Rt) \
525 (AL|(0b11101111<<20)|(0b0001<<16)|(Rt<<12)|(0b1010<<8)|(1<<4))
526
527 // check if instruction in R0 is within range. Clobbers R1, R12
528 #define CHECK_JUMP do { \
529 static int bytes_to_skip = -1; \
530 static unsigned branch = -1; \
531 emit_MOVRxi(R1, (unsigned)vm->instructionCount); \
532 emit(CMP(R0, R1)); \
533 if (branch == -1) \
534 branch = vm->codeLength; \
535 emit(cond(LT, Bi(j_rel(bytes_to_skip)))); \
536 emit_MOVRxi_or_NOP(R12, (unsigned)ErrJump); \
537 emit(BLX(R12)); \
538 if (bytes_to_skip == -1) \
539 bytes_to_skip = vm->codeLength - branch; \
540 } while(0)
541
542 //#define CONST_OPTIMIZE
543 #ifdef CONST_OPTIMIZE
544 #define MAYBE_EMIT_CONST() \
545 if (got_const) \
546 { \
547 got_const = 0; \
548 vm->instructionPointers[instruction-1] = assembler_get_code_size(); \
549 STACK_PUSH(4); \
550 emit("movl $%d, (%%r9, %%rbx, 4)", const_value); \
551 }
552 #else
553 #define MAYBE_EMIT_CONST()
554 #endif
555
556 // optimize: use load multiple
557 #define IJ(comparator) do { \
558 MAYBE_EMIT_CONST(); \
559 emit_MOVRxi(R0, arg.i); \
560 CHECK_JUMP; \
561 emit(LDRTxi(R0, rOPSTACK, 4)); \
562 emit(LDRTxi(R1, rOPSTACK, 4)); \
563 emit(CMP(R1, R0)); \
564 emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
565 } while (0)
566
567 #define FJ(comparator) do { \
568 emit_MOVRxi(R0, arg.i); \
569 CHECK_JUMP; \
570 emit(SUBi(rOPSTACK, rOPSTACK, 8)); \
571 emit(VLDRa(S15, rOPSTACK, 4)); \
572 emit(VLDRa(S14, rOPSTACK, 8)); \
573 emit(VCMP_F32(S15, S14)); \
574 emit(VMRS(APSR_nzcv)); \
575 emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
576 } while (0)
577
578 #define printreg(reg) emit(PUSH1(R3)); emit(BLX(reg)); emit(POP1(R3));
579
_j_rel(int x,int pc)580 static inline unsigned _j_rel(int x, int pc)
581 {
582 if (x&3) goto err;
583 x = (x>>2)-2;
584 if (x < 0)
585 {
586 if ((x&(0xFF<<24)) != 0xFF<<24)
587 goto err;
588 x &= ~(0xFF<<24);
589 }
590 else if (x&(0xFF<<24))
591 goto err;
592 return x;
593 err:
594 DIE("jump %d out of range at %d", x, pc);
595 }
596
VM_Compile(vm_t * vm,vmHeader_t * header)597 void VM_Compile(vm_t *vm, vmHeader_t *header)
598 {
599 unsigned char *code;
600 int i_count, pc = 0;
601 int pass;
602 int codeoffsets[2]; // was 1024 but it's only used for OFF_CODE and OFF_IMMEDIATES
603
604 #define j_rel(x) (pass?_j_rel(x, pc):0xBAD)
605 #define OFFSET(i) (pass?(j_rel(codeoffsets[i]-vm->codeLength)):(0xF000000F))
606 //#define new_offset() (offsidx++)
607 #define get_offset(i) (codeoffsets[i])
608 #define save_offset(i) (codeoffsets[i] = vm->codeLength)
609 #define OFF_CODE 0
610 #define OFF_IMMEDIATES 1
611
612 vm->compiled = qfalse;
613
614 vm->codeBase = NULL;
615 vm->codeLength = 0;
616
617 for (pass = 0; pass < 2; ++pass) {
618
619 // int offsidx = 0;
620
621 #ifdef CONST_OPTIMIZE
622 // const optimization
623 unsigned got_const = 0, const_value = 0;
624 #endif
625
626 if(pass)
627 {
628 vm->codeBase = mmap(NULL, vm->codeLength, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
629 if(vm->codeBase == MAP_FAILED)
630 Com_Error(ERR_FATAL, "VM_CompileARM: can't mmap memory");
631 vm->codeLength = 0;
632 }
633
634 //int (*entry)(vm_t*, int*, int*);
635 emit(PUSH((((1<<8)-1)<<4)|(1<<14))); // push R4-R11, LR
636 emit(SUBi(SP, SP, 12)); // align stack!
637 emit(LDRai(rCODEBASE, R0, offsetof(vm_t, codeBase)));
638 emit(LDRai(rDATABASE, R0, offsetof(vm_t, dataBase)));
639 emit(LDRai(rDATAMASK, R0, offsetof(vm_t, dataMask)));
640 emit(LDRai(rPSTACK, R1, 0));
641 emit(MOV(rOPSTACK, R2)); // TODO: reverse opstack to avoid writing to return address
642 emit(MOV(rOPSTACKBASE, rOPSTACK));
643
644 emit(BLi(OFFSET(OFF_CODE)));
645
646 // save return value in r0
647 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
648
649 emit(ADDi(SP, SP, 12)); // align stack!
650 emit(POP((((1<<8)-1)<<4)|(1<<15))); // pop R4-R11, LR -> PC
651
652 /* save some immediates here */
653 emit(BKPT(0));
654 emit(BKPT(0));
655 save_offset(OFF_IMMEDIATES);
656 // emit((unsigned)whatever);
657 emit(BKPT(0));
658 emit(BKPT(0));
659
660 save_offset(OFF_CODE);
661 // offsidx = OFF_IMMEDIATES+1;
662
663 code = (unsigned char *) header + header->codeOffset;
664 pc = 0;
665
666 for (i_count = 0; i_count < header->instructionCount; i_count++) {
667 union {
668 unsigned char b[4];
669 unsigned int i;
670 } arg;
671 unsigned char op = code[pc++];
672
673 vm->instructionPointers[i_count] = vm->codeLength;
674
675 if (vm_opInfo[op] & opImm4)
676 {
677 memcpy(arg.b, &code[pc], 4);
678 pc += 4;
679 #ifdef EXCESSIVE_DEBUG
680 Com_Printf("%d: instruction %d (%s %d), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
681 #endif
682 }
683 else if (vm_opInfo[op] & opImm1)
684 {
685 arg.b[0] = code[pc];
686 ++pc;
687 #ifdef EXCESSIVE_DEBUG
688 Com_Printf("%d: instruction %d (%s %hhd), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
689 #endif
690 }
691 else
692 {
693 #ifdef EXCESSIVE_DEBUG
694 Com_Printf("%d: instruction %d (%s), offset %d\n", pass, i_count, opnames[op], vm->codeLength);
695 #endif
696 }
697
698 // TODO: for debug only
699 //emit_MOVRxi(R4, i_count);
700
701 switch ( op )
702 {
703 case OP_UNDEF:
704 break;
705
706 case OP_IGNORE:
707 NOTIMPL(op);
708 break;
709
710 case OP_BREAK:
711 emit(BKPT(0));
712 break;
713
714 case OP_ENTER:
715 MAYBE_EMIT_CONST();
716 emit(PUSH1(LR));
717 emit(SUBi(SP, SP, 12)); // align stack
718 if (arg.i == 0 || can_encode(arg.i))
719 {
720 emit(SUBi(rPSTACK, rPSTACK, arg.i)); // pstack -= arg
721 }
722 else
723 {
724 emit_MOVR0i(arg.i);
725 emit(SUB(rPSTACK, rPSTACK, R0)); // pstack -= arg
726 }
727 break;
728
729 case OP_LEAVE:
730 if (arg.i == 0 || can_encode(arg.i))
731 {
732 emit(ADDi(rPSTACK, rPSTACK, arg.i)); // pstack += arg
733 }
734 else
735 {
736 emit_MOVR0i(arg.i);
737 emit(ADD(rPSTACK, rPSTACK, R0)); // pstack += arg
738 }
739 emit(ADDi(SP, SP, 12));
740 emit(0xe49df004); // pop pc
741 break;
742
743 case OP_CALL:
744 #if 0
745 // save next instruction
746 emit_MOVR0i(i_count);
747 emit(STRa(R0, rDATABASE, rPSTACK)); // dataBase[pstack] = r0
748 #endif
749 #ifdef CONST_OPTIMIZE
750 if (got_const)
751 {
752 NOTIMPL(op);
753 }
754 else
755 #endif
756 {
757 static int bytes_to_skip = -1;
758 static unsigned start_block = -1;
759 MAYBE_EMIT_CONST();
760 // get instruction nr from stack
761 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
762 emit(CMPi(R0, 0)); // check if syscall
763 if (start_block == -1)
764 start_block = vm->codeLength;
765 emit(cond(LT, Bi(j_rel(bytes_to_skip))));
766 CHECK_JUMP;
767 emit_MOVRxi_or_NOP(R1, (unsigned)vm->instructionPointers);
768 emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
769 emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
770 emit(BLX(R0));
771 emit(Bi(j_rel(vm->instructionPointers[i_count+1]-vm->codeLength)));
772 if (bytes_to_skip == -1)
773 bytes_to_skip = vm->codeLength - start_block;
774 emit(MOV(R1, rPSTACK));
775 emit_MOVRxi(R12, (unsigned)asmcall);
776 emit(BLX(R12));
777 // store return value
778 emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
779 }
780 break;
781
782 case OP_PUSH:
783 MAYBE_EMIT_CONST();
784 emit(ADDi(rOPSTACK, rOPSTACK, 4));
785 break;
786
787 case OP_POP:
788 MAYBE_EMIT_CONST();
789 emit(SUBi(rOPSTACK, rOPSTACK, 4));
790 break;
791
792 case OP_CONST:
793 MAYBE_EMIT_CONST();
794 emit_MOVR0i(arg.i);
795 emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
796 break;
797
798 case OP_LOCAL:
799 MAYBE_EMIT_CONST();
800 if (arg.i == 0 || can_encode(arg.i))
801 {
802 emit(ADDi(R0, rPSTACK, arg.i)); // r0 = pstack+arg
803 }
804 else
805 {
806 emit_MOVR0i(arg.i);
807 emit(ADD(R0, rPSTACK, R0)); // r0 = pstack+arg
808 }
809 emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
810 break;
811
812 case OP_JUMP:
813 #ifdef CONST_OPTIMIZE
814 if (got_const)
815 {
816 NOTIMPL(op);
817 }
818 else
819 #endif
820 {
821 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
822 CHECK_JUMP;
823 emit_MOVRxi(R1, (unsigned)vm->instructionPointers);
824 emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
825 emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
826 emit(BLX(R0));
827 }
828 break;
829
830 case OP_EQ:
831 IJ(EQ);
832 break;
833
834 case OP_NE:
835 IJ(NE);
836 break;
837
838 case OP_LTI:
839 IJ(LT);
840 break;
841
842 case OP_LEI:
843 IJ(LE);
844 break;
845
846 case OP_GTI:
847 IJ(GT);
848 break;
849
850 case OP_GEI:
851 IJ(GE);
852 break;
853
854 case OP_LTU:
855 IJ(LO);
856 break;
857
858 case OP_LEU:
859 IJ(LS);
860 break;
861
862 case OP_GTU:
863 IJ(HI);
864 break;
865
866 case OP_GEU:
867 IJ(HS);
868 break;
869
870 case OP_EQF:
871 FJ(EQ);
872 break;
873
874 case OP_NEF:
875 FJ(NE);
876 break;
877
878 case OP_LTF:
879 FJ(LT);
880 break;
881
882 case OP_LEF:
883 FJ(LE);
884 break;
885
886 case OP_GTF:
887 FJ(GT);
888 break;
889
890 case OP_GEF:
891 FJ(GE);
892 break;
893
894 case OP_LOAD1:
895 MAYBE_EMIT_CONST();
896 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
897 emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
898 emit(LDRBa(R0, rDATABASE, R0)); // r0 = (unsigned char)dataBase[r0]
899 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
900 break;
901
902 case OP_LOAD2:
903 MAYBE_EMIT_CONST();
904 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
905 emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
906 emit(LDRHa(R0, rDATABASE, R0)); // r0 = (unsigned short)dataBase[r0]
907 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
908 break;
909
910 case OP_LOAD4:
911 MAYBE_EMIT_CONST();
912 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
913 emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
914 emit(LDRa(R0, rDATABASE, R0)); // r0 = dataBase[r0]
915 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
916 break;
917
918 case OP_STORE1:
919 MAYBE_EMIT_CONST();
920 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
921 emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
922 emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
923 emit(STRBa(R0, rDATABASE, R1)); // database[r1] = r0
924 break;
925
926 case OP_STORE2:
927 MAYBE_EMIT_CONST();
928 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
929 emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
930 emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
931 emit(STRHa(R0, rDATABASE, R1)); // database[r1] = r0
932 break;
933
934 case OP_STORE4:
935 MAYBE_EMIT_CONST();
936 // optimize: use load multiple
937 // value
938 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
939 // pointer
940 emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
941 emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
942 // store value at pointer
943 emit(STRa(R0, rDATABASE, R1)); // database[r1] = r0
944 break;
945
946 case OP_ARG:
947 MAYBE_EMIT_CONST();
948 emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
949 emit(ADDi(R1, rPSTACK, arg.b[0])); // r1 = programStack+arg
950 emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
951 emit(STRa(R0, rDATABASE, R1)); // dataBase[r1] = r0
952 break;
953
954 case OP_BLOCK_COPY:
955 MAYBE_EMIT_CONST();
956 emit(LDRTxi(R1, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
957 emit(LDRTxi(R0, rOPSTACK, 4));
958 emit_MOVRxi(R2, arg.i);
959 emit_MOVRxi(R12, (unsigned)VM_BlockCopy);
960 emit(BLX(R12));
961 break;
962
963 case OP_SEX8:
964 MAYBE_EMIT_CONST();
965 emit(LDRSBai(R0, rOPSTACK, 0)); // sign extend *opstack
966 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
967 break;
968
969 case OP_SEX16:
970 MAYBE_EMIT_CONST();
971 emit(LDRSHai(R0, rOPSTACK, 0)); // sign extend *opstack
972 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
973 break;
974
975 case OP_NEGI:
976 MAYBE_EMIT_CONST();
977 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
978 emit(RSBi(R0, R0, 0)); // r0 = -r0
979 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
980 break;
981
982 case OP_ADD:
983 MAYBE_EMIT_CONST();
984 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
985 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
986 emit(ADD(R0, R1, R0)); // r0 = r1 + r0
987 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
988 break;
989
990 case OP_SUB:
991 MAYBE_EMIT_CONST();
992 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
993 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
994 emit(SUB(R0, R1, R0)); // r0 = r1 - r0
995 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
996 break;
997
998 case OP_DIVI:
999 case OP_DIVU:
1000 MAYBE_EMIT_CONST();
1001 emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
1002 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
1003 if ( op == OP_DIVI )
1004 emit_MOVRxi(R12, (unsigned)__aeabi_idiv);
1005 else
1006 emit_MOVRxi(R12, (unsigned)__aeabi_uidiv);
1007 emit(BLX(R12));
1008 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1009 break;
1010
1011 case OP_MODI:
1012 case OP_MODU:
1013 MAYBE_EMIT_CONST();
1014 emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
1015 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
1016 if ( op == OP_MODI )
1017 emit_MOVRxi(R12, (unsigned)__aeabi_idivmod);
1018 else
1019 emit_MOVRxi(R12, (unsigned)__aeabi_uidivmod);
1020 emit(BLX(R12));
1021 emit(STRai(R1, rOPSTACK, 0)); // *opstack = r1
1022 break;
1023
1024 case OP_MULI:
1025 case OP_MULU:
1026 MAYBE_EMIT_CONST();
1027 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1028 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1029 emit(MUL(R0, R1, R0)); // r0 = r1 * r0
1030 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1031 break;
1032
1033 case OP_BAND:
1034 MAYBE_EMIT_CONST();
1035 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1036 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1037 emit(AND(R0, R1, R0)); // r0 = r1 & r0
1038 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1039 break;
1040
1041 case OP_BOR:
1042 MAYBE_EMIT_CONST();
1043 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1044 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1045 emit(ORR(R0, R1, R0)); // r0 = r1 | r0
1046 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1047 break;
1048
1049 case OP_BXOR:
1050 MAYBE_EMIT_CONST();
1051 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1052 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1053 emit(EOR(R0, R1, R0)); // r0 = r1 ^ r0
1054 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1055 break;
1056
1057 case OP_BCOM:
1058 MAYBE_EMIT_CONST();
1059 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1060 emit(MVN(R0, R0)); // r0 = ~r0
1061 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1062 break;
1063
1064 case OP_LSH:
1065 MAYBE_EMIT_CONST();
1066 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1067 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1068 emit(LSL(R0, R1, R0)); // r0 = r1 << r0
1069 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1070 break;
1071
1072 case OP_RSHI:
1073 MAYBE_EMIT_CONST();
1074 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1075 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1076 emit(ASR(R0, R1, R0)); // r0 = r1 >> r0
1077 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1078 break;
1079
1080 case OP_RSHU:
1081 MAYBE_EMIT_CONST();
1082 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1083 emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1084 emit(LSR(R0, R1, R0)); // r0 = (unsigned)r1 >> r0
1085 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1086 break;
1087
1088 case OP_NEGF:
1089 MAYBE_EMIT_CONST();
1090 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1091 emit(VNEG_F32(S14, S14)); // s15 = -s14
1092 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1093 break;
1094
1095 case OP_ADDF:
1096 MAYBE_EMIT_CONST();
1097 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1098 // vldr can't modify rOPSTACK so
1099 // we'd either need to change it
1100 // with sub or use regular ldr+vmov
1101 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1102 emit(VMOVass(S15,R0)); // s15 = r0
1103 emit(VADD_F32(S14, S15, S14)); // s14 = s14 + s15
1104 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1105 break;
1106
1107 case OP_SUBF:
1108 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1109 // see OP_ADDF
1110 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1111 emit(VMOVass(S15,R0)); // s15 = r0
1112 emit(VSUB_F32(S14, S15, S14)); // s14 = s14 - s15
1113 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1114 break;
1115
1116 case OP_DIVF:
1117 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1118 // see OP_ADDF
1119 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1120 emit(VMOVass(S15,R0)); // s15 = r0
1121 emit(VDIV_F32(S14, S15, S14)); // s14 = s14 / s15
1122 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1123 break;
1124
1125 case OP_MULF:
1126 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1127 // see OP_ADDF
1128 emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
1129 emit(VMOVass(S15,R0)); // s15 = r0
1130 emit(VMUL_F32(S14, S15, S14)); // s14 = s14 * s15
1131 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1132 break;
1133
1134 case OP_CVIF:
1135 MAYBE_EMIT_CONST();
1136 emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
1137 emit(VMOVass(S14,R0)); // s14 = r0
1138 emit(VCVT_F32_S32(S14, S14)); // s15 = (float)s14
1139 emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
1140 break;
1141
1142 case OP_CVFI:
1143 MAYBE_EMIT_CONST();
1144 emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
1145 emit(VCVT_S32_F32(S14, S14)); // s15 = (int)s14
1146 emit(VMOVssa(R0,S14)); // s14 = r0
1147 emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
1148 break;
1149 }
1150 }
1151
1152 // never reached
1153 emit(BKPT(0));
1154 } // pass
1155
1156 if (mprotect(vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC/* |PROT_WRITE */)) {
1157 VM_Destroy_Compiled(vm);
1158 DIE("mprotect failed");
1159 }
1160
1161 // clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/
1162 __clear_cache(vm->codeBase, vm->codeBase+vm->codeLength);
1163
1164 vm->destroy = VM_Destroy_Compiled;
1165 vm->compiled = qtrue;
1166 }
1167
VM_CallCompiled(vm_t * vm,int * args)1168 int VM_CallCompiled(vm_t *vm, int *args)
1169 {
1170 byte stack[OPSTACK_SIZE + 15];
1171 int *opStack;
1172 int programStack = vm->programStack;
1173 int stackOnEntry = programStack;
1174 byte *image = vm->dataBase;
1175 int *argPointer;
1176 int retVal;
1177
1178 currentVM = vm;
1179
1180 vm->currentlyInterpreting = qtrue;
1181
1182 programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
1183 argPointer = (int *)&image[ programStack + 8 ];
1184 memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
1185 argPointer[-1] = 0;
1186 argPointer[-2] = -1;
1187
1188
1189 opStack = PADP(stack, 16);
1190 *opStack = 0xDEADBEEF;
1191
1192 #if 0
1193 Com_Printf("r5 opStack:\t\t%p\n", opStack);
1194 Com_Printf("r7 codeBase:\t\t%p\n", vm->codeBase);
1195 Com_Printf("r8 programStack:\t0x%x\n", programStack);
1196 Com_Printf("r9 dataBase:\t\t%p\n", vm->dataBase);
1197 #endif
1198
1199 /* call generated code */
1200 {
1201 //int (*entry)(void *, int, void *, int);
1202 int (*entry)(vm_t*, int*, int*);
1203
1204 entry = (void *)(vm->codeBase);
1205 //__asm__ volatile("bkpt");
1206 //retVal = entry(vm->codeBase, programStack, vm->dataBase, vm->dataMask);
1207 retVal = entry(vm, &programStack, opStack);
1208 }
1209
1210 if(*opStack != 0xDEADBEEF)
1211 {
1212 Com_Error(ERR_DROP, "opStack corrupted in compiled code");
1213 }
1214
1215 if(programStack != stackOnEntry - (8 + 4 * MAX_VMMAIN_ARGS))
1216 Com_Error(ERR_DROP, "programStack corrupted in compiled code");
1217
1218 vm->programStack = stackOnEntry;
1219 vm->currentlyInterpreting = qfalse;
1220
1221 return retVal;
1222 }
1223