1 /*
2 * code.c - compiled code builder/handler
3 *
4 * Copyright (c) 2005-2020 Shiro Kawai <shiro@acm.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #define LIBGAUCHE_BODY
35 #include "gauche.h"
36 #include "gauche/class.h"
37 #include "gauche/code.h"
38 #include "gauche/vminsn.h"
39 #include "gauche/priv/codeP.h"
40 #include "gauche/priv/identifierP.h"
41 #include "gauche/priv/builtin-syms.h"
42
43 /*===============================================================
44 * NVM related stuff
45 */
46
47 /* Debug information:
48 *
49 * debug info is kept as an assoc-list with insn offset
50 * as a key.
51 */
52
Scm_CompiledCodeFullName(ScmCompiledCode * cc)53 ScmObj Scm_CompiledCodeFullName(ScmCompiledCode *cc)
54 {
55 if (SCM_COMPILED_CODE_P(cc->parent)
56 && !SCM_EQ(SCM_COMPILED_CODE(cc->parent)->name, SCM_SYM_TOPLEVEL)) {
57 ScmObj h = SCM_NIL, t = SCM_NIL;
58 for (;;) {
59 SCM_APPEND1(h, t, cc->name);
60 if (!SCM_COMPILED_CODE_P(cc->parent)) break;
61 cc = SCM_COMPILED_CODE(cc->parent);
62 if (SCM_EQ(cc->name, SCM_SYM_TOPLEVEL)) break;
63 }
64 return Scm_ReverseX(h);
65 } else {
66 return cc->name;
67 }
68 }
69
make_compiled_code(void)70 static ScmCompiledCode *make_compiled_code(void)
71 {
72 ScmCompiledCode *cc = SCM_NEW(ScmCompiledCode);
73 SCM_SET_CLASS(cc, SCM_CLASS_COMPILED_CODE);
74 cc->code = NULL;
75 cc->constants = NULL;
76 cc->maxstack = -1;
77 cc->debugInfo = SCM_NIL;
78 cc->signatureInfo = SCM_FALSE;
79 cc->name = SCM_FALSE;
80 cc->parent = SCM_FALSE;
81 cc->builder = NULL;
82 return cc;
83 }
84
85 /* Copy the source compiled-code into the destination. This is used in
86 the external optimizer to 'edit' code vector---such routines can
87 create a new compiled-code, then copy it to the original so that
88 the identity of compiled-code is kept. */
Scm_CompiledCodeCopyX(ScmCompiledCode * dest,const ScmCompiledCode * src)89 void Scm_CompiledCodeCopyX(ScmCompiledCode *dest,
90 const ScmCompiledCode *src)
91 {
92 SCM_ASSERT(dest->builder == NULL);
93 SCM_ASSERT(src->builder == NULL);
94
95 memcpy(dest, src, sizeof(ScmCompiledCode));
96 }
97
98 /*----------------------------------------------------------------------
99 * An API to execute statically compiled toplevel code. *PROVISIONAL*
100 */
101 static ScmSubrProc execute_toplevels;
102
Scm_VMExecuteToplevels(ScmCompiledCode * cs[])103 void Scm_VMExecuteToplevels(ScmCompiledCode *cs[])
104 {
105 ScmObj proc = Scm_MakeSubr(execute_toplevels, cs, 0, 0, SCM_FALSE);
106 Scm_ApplyRec(proc, SCM_NIL);
107 }
108
execute_toplevels_cc(ScmObj result SCM_UNUSED,void ** data)109 static ScmObj execute_toplevels_cc(ScmObj result SCM_UNUSED, void **data)
110 {
111 ScmCompiledCode **cs = (ScmCompiledCode **)data[0];
112 if (cs[0] == NULL) return SCM_UNDEFINED;
113 data[0] = cs+1;
114 ScmVM *vm = Scm_VM();
115 Scm_VMPushCC(execute_toplevels_cc, data, 1);
116 vm->base = cs[0];
117 vm->pc = vm->base->code;
118 return SCM_UNDEFINED;
119 }
120
execute_toplevels(ScmObj * args SCM_UNUSED,int nargs SCM_UNUSED,void * cv)121 static ScmObj execute_toplevels(ScmObj *args SCM_UNUSED,
122 int nargs SCM_UNUSED,
123 void *cv)
124 {
125 Scm_VMPushCC(execute_toplevels_cc, &cv, 1);
126 return SCM_UNDEFINED;
127 }
128
129 /*----------------------------------------------------------------------
130 * Disassembler
131 */
132 static ScmObj check_lifted_closure(ScmWord *p, ScmObj lifted);
133 static void print_header(const char *prefix, ScmObj name, ScmCompiledCode *cc);
134
Scm_CompiledCodeDump(ScmCompiledCode * cc)135 void Scm_CompiledCodeDump(ScmCompiledCode *cc)
136 {
137 ScmObj closures = SCM_NIL, lifted = SCM_NIL, shown_lifted = SCM_NIL;
138 int clonum = 0, more = FALSE;
139
140 print_header("main_code", SCM_MAKE_STR(""), cc);
141 do {
142 ScmWord *p = cc->code;
143 Scm_Printf(SCM_CUROUT, "signatureInfo: %S\n", cc->signatureInfo);
144 for (int i=0; i < cc->codeSize; i++) {
145 ScmWord insn = p[i];
146 ScmPort *out = SCM_PORT(Scm_MakeOutputStringPort(TRUE));
147 ScmObj info = Scm_Assq(SCM_MAKE_INT(i), cc->debugInfo);
148 u_int code = SCM_VM_INSN_CODE(insn);
149 const char *insn_name = Scm_VMInsnName(code);
150
151 switch (Scm_VMInsnNumParams(code)) {
152 case 0:
153 Scm_Printf(out, " %4d %s ", i, insn_name);
154 break;
155 case 1:
156 Scm_Printf(out, " %4d %s(%d) ", i, insn_name,
157 SCM_VM_INSN_ARG(insn));
158 break;
159 case 2:
160 Scm_Printf(out, " %4d %s(%d,%d) ", i, insn_name,
161 SCM_VM_INSN_ARG0(insn),SCM_VM_INSN_ARG1(insn));
162 break;
163 }
164 switch (Scm_VMInsnOperandType(code)) {
165 case SCM_VM_OPERAND_ADDR:
166 Scm_Printf(out, "%d", (ScmWord*)p[i+1] - cc->code);
167 i++;
168 break;
169 case SCM_VM_OPERAND_OBJ:
170 /* Check if we're referring to a lifted closure. */
171 lifted = check_lifted_closure(p+i, lifted);
172 Scm_Printf(out, "%S", p[i+1]);
173 i++;
174 break;
175 case SCM_VM_OPERAND_OBJ_ADDR:
176 Scm_Printf(out, "%S, %d", p[i+1], (ScmWord*)p[i+2] - cc->code);
177 i += 2;
178 break;
179 case SCM_VM_OPERAND_CODE:
180 Scm_Printf(out, "#<lambda %d>", clonum);
181 closures = Scm_Acons(SCM_OBJ(p[i+1]), SCM_MAKE_INT(clonum),
182 closures);
183 clonum++;
184 i++;
185 break;
186 case SCM_VM_OPERAND_CODES: {
187 Scm_Printf(out, "(");
188 ScmObj cp;
189 SCM_FOR_EACH(cp, SCM_OBJ(p[i+1])) {
190 if (SCM_COMPILED_CODE_P(SCM_CAR(cp))) {
191 closures = Scm_Acons(SCM_CAR(cp),
192 SCM_MAKE_INT(clonum),
193 closures);
194 Scm_Printf(out, "#<lambda %d>", clonum);
195 clonum++;
196 }
197 }
198 Scm_Printf(out, ")");
199 i++;
200 break;
201 }
202 default:
203 /*nothing*/;
204 }
205
206 /* Show info */
207 ScmObj s = Scm_GetOutputStringUnsafe(out, 0);
208 if (!SCM_PAIRP(info)) {
209 Scm_Puts(SCM_STRING(s), SCM_CUROUT);
210 Scm_Putc('\n', SCM_CUROUT);
211 } else {
212 int len = SCM_STRING_BODY_SIZE(SCM_STRING_BODY(s));
213 ScmObj srcinfo = Scm_Assq(SCM_SYM_SOURCE_INFO, info);
214 ScmObj bindinfo = Scm_Assq(SCM_SYM_BIND_INFO, info);
215 Scm_Puts(SCM_STRING(s), SCM_CUROUT);
216 Scm_Flush(SCM_CUROUT);
217 for (; len<32; len++) {
218 Scm_Putc(' ', SCM_CUROUT);
219 }
220 if (SCM_FALSEP(srcinfo)) {
221 Scm_Printf(SCM_CUROUT, "; lambda %#40.1S\n",
222 SCM_CDR(bindinfo));
223 } else {
224 Scm_Printf(SCM_CUROUT, "; %#40.1S\n",
225 Scm_UnwrapSyntax2(SCM_CDR(srcinfo), FALSE));
226 }
227 }
228 }
229 more = FALSE;
230 if (!SCM_NULLP(closures)) {
231 cc = SCM_COMPILED_CODE(SCM_CAAR(closures));
232 print_header("closure:", SCM_CDAR(closures), cc);
233 closures = SCM_CDR(closures);
234 more = TRUE;
235 } else if (!SCM_NULLP(lifted)) {
236 while (!SCM_NULLP(lifted)) {
237 if (SCM_FALSEP(Scm_Memq(SCM_CAAR(lifted), shown_lifted))) {
238 cc = SCM_COMPILED_CODE(SCM_CAAR(lifted));
239 print_header("lifted:", SCM_CDAR(lifted), cc);
240 shown_lifted = Scm_Cons(SCM_CAAR(lifted), shown_lifted);
241 lifted = SCM_CDR(lifted);
242 more = TRUE;
243 break;
244 } else {
245 lifted = SCM_CDR(lifted);
246 continue;
247 }
248 }
249 }
250 } while (more);
251 }
252
print_header(const char * prefix,ScmObj name,ScmCompiledCode * cc)253 static void print_header(const char *prefix, ScmObj name, ScmCompiledCode *cc)
254 {
255 Scm_Printf(SCM_CUROUT, "=== %s%A (name=%S, cc=%p, codevec=%p, size=%d, const=%d stack=%d):\n",
256 prefix, name, cc->name, cc, cc->code,
257 cc->codeSize, cc->constantSize, cc->maxstack);
258 }
259
260 /* The compiler may have lifted an internal closure to a global procedure.
261 We can tell so if the opcode is GREF_x, and the operand is an identifier,
262 whose name is an uninterned symbol and it is globally bound to a procedure.
263
264 If we indeed have a lifted closure, we chain the closure's code and
265 the identifier into the lifted list, returns the updated list.
266 Otherwise, we return lifted list as is.
267 */
check_lifted_closure(ScmWord * p,ScmObj lifted)268 static ScmObj check_lifted_closure(ScmWord *p, ScmObj lifted)
269 {
270 ScmWord code = SCM_VM_INSN_CODE(p[0]);
271 static ScmWord gref_insns[] = {
272 SCM_VM_GREF,
273 SCM_VM_GREF_PUSH,
274 SCM_VM_GREF_CALL,
275 SCM_VM_GREF_TAIL_CALL,
276 SCM_VM_PUSH_GREF,
277 SCM_VM_PUSH_GREF_CALL,
278 SCM_VM_PUSH_GREF_TAIL_CALL
279 };
280
281 if (!SCM_IDENTIFIERP(p[1])) return lifted;
282 ScmIdentifier *id = Scm_OutermostIdentifier(SCM_IDENTIFIER(p[1]));
283 if (SCM_SYMBOL_INTERNED(id->name)) return lifted;
284
285 for (u_int i=0; i < sizeof(gref_insns)/sizeof(ScmWord); i++) {
286 if (code == gref_insns[i]) {
287 ScmObj g = Scm_GlobalVariableRef(id->module, SCM_SYMBOL(id->name),
288 SCM_BINDING_STAY_IN_MODULE);
289 if (SCM_CLOSUREP(g)) {
290 if (SCM_FALSEP(Scm_Assq(SCM_CLOSURE(g)->code, lifted))) {
291 return Scm_Acons(SCM_CLOSURE(g)->code,
292 SCM_OBJ(id->name),
293 lifted);
294 } else {
295 return lifted;
296 }
297 }
298 }
299 }
300 return lifted;
301 }
302
303 /*------------------------------------------------------------------
304 * Builder - used by the new compiler
305 */
306
307 #define CC_BUILDER_CHUNK_BITS 5
308 #define CC_BUILDER_CHUNK_SIZE (1L<<CC_BUILDER_CHUNK_BITS)
309 #define CC_BUILDER_CHUNK_MASK (CC_BUILDER_CHUNK_SIZE-1)
310
311 typedef struct cc_builder_chunk {
312 struct cc_builder_chunk *prev;
313 ScmWord code[CC_BUILDER_CHUNK_SIZE];
314 } cc_builder_chunk;
315
316 /* To perform instruction combination, the builder buffers one insn/operand.
317 * currentInsn == SCM_WORD(-1) indicates there's no buffered insn.
318 */
319 typedef struct cc_builder_rec {
320 cc_builder_chunk *chunks;
321 int numChunks;
322 ScmObj constants; /* list of constants */
323 int currentIndex;
324 ScmWord currentInsn; /* buffer for instruction combining.
325 this can be a special value either
326 CC_BUILDER_BUFFER_EMPTY or
327 CC_BUILDER_BUFFER_TRANS. see below. */
328 int prevOpcode; /* previous saved insn opcode */
329 int currentOpcode; /* saved insn opcode */
330 int currentArg0; /* ditto */
331 int currentArg1; /* ditto */
332 ScmObj currentOperand; /* ditto */
333 ScmObj currentInfo; /* ditto */
334 int currentState; /* index to the current state of
335 combiner STN */
336 ScmObj labelDefs; /* alist of (name . offset) */
337 ScmObj labelRefs; /* alist of (name . offset-to-fill) */
338 int labelCount; /* counter to generate unique labels */
339 ScmObj debugInfo; /* alist of (offset (source-info obj)) */
340 } cc_builder;
341
342 /* Indicates that there's no pending instruction. */
343 #define CC_BUILDER_BUFFER_EMPTY SCM_WORD(-1)
344
345 /* Indicates that the instruction combiner is in the transitional
346 state. In ordinary circumstances this state will be resolved
347 as the code generation goes. However, if the instruction combination
348 is "cut off", for example by emitting a jump destination label, we
349 have to complete the instruction. It is done by seeking for
350 the default arc of the current state. */
351 #define CC_BUILDER_BUFFER_TRANS SCM_WORD(-2)
352
353 /* Some internal stuff */
354
355 #define CC_BUILDER_GET(b, cc) \
356 do { \
357 if (cc->builder == NULL) { \
358 Scm_Error("[internal error] CompiledCode is already frozen"); \
359 } \
360 (b) = (cc_builder*)cc->builder; \
361 } while (0)
362
make_cc_builder(void)363 static cc_builder *make_cc_builder(void)
364 {
365 cc_builder *b = SCM_NEW(cc_builder);
366 b->chunks = NULL;
367 b->numChunks = 0;
368 b->constants = SCM_NIL;
369 b->currentIndex = 0;
370 b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
371 b->currentOpcode = b->prevOpcode = -1;
372 b->currentOperand = b->currentInfo = SCM_FALSE;
373 b->currentState = -1;
374 b->labelDefs = b->labelRefs = SCM_NIL;
375 b->labelCount = 0;
376 b->debugInfo = SCM_NIL;
377 return b;
378 }
379
cc_builder_add_word(cc_builder * b,ScmWord w)380 static void cc_builder_add_word(cc_builder *b, ScmWord w)
381 {
382 int ni = b->currentIndex & CC_BUILDER_CHUNK_MASK;
383 if (ni == 0) {
384 cc_builder_chunk *newchunk = SCM_NEW(cc_builder_chunk);
385 newchunk->prev = b->chunks;
386 b->chunks = newchunk;
387 b->numChunks++;
388 }
389 b->chunks->code[ni] = w;
390 b->currentIndex++;
391 }
392
cc_builder_add_constant(cc_builder * b,ScmObj obj)393 static void cc_builder_add_constant(cc_builder *b, ScmObj obj)
394 {
395 if (!SCM_PTRP(obj)) return;
396 if (!SCM_FALSEP(Scm_Memq(obj, b->constants))) return;
397 b->constants = Scm_Cons(obj, b->constants);
398 }
399
cc_builder_add_info(cc_builder * b)400 static void cc_builder_add_info(cc_builder *b)
401 {
402 if (SCM_FALSEP(b->currentInfo)) return;
403 b->debugInfo = Scm_Acons(SCM_MAKE_INT(b->currentIndex),
404 SCM_LIST1(Scm_Cons(SCM_SYM_SOURCE_INFO,
405 b->currentInfo)),
406 b->debugInfo);
407 b->currentInfo = SCM_FALSE;
408 }
409
410 /* Returns label offset of the given label, if the label is already defined.
411 Otherwise, returns -1. */
cc_builder_label_def(cc_builder * b,ScmObj label)412 static int cc_builder_label_def(cc_builder *b, ScmObj label)
413 {
414 ScmObj p = Scm_Assq(label, b->labelDefs);
415 if (SCM_PAIRP(p)) {
416 return SCM_INT_VALUE(SCM_CDR(p));
417 } else {
418 return -1;
419 }
420 }
421
422 static void finish_transition(cc_builder *b);
423
424 /* Flush the currentInsn buffer. */
cc_builder_flush(cc_builder * b)425 static void cc_builder_flush(cc_builder *b)
426 {
427 if ((b)->currentInsn == CC_BUILDER_BUFFER_EMPTY) return;
428 if ((b)->currentInsn == CC_BUILDER_BUFFER_TRANS) {
429 finish_transition(b);
430 }
431 cc_builder_add_info(b);
432 cc_builder_add_word(b, b->currentInsn);
433
434 u_int code = SCM_VM_INSN_CODE(b->currentInsn);
435 switch (Scm_VMInsnOperandType(code)) {
436 case SCM_VM_OPERAND_ADDR:
437 /* Addr should be a label. We just push the label reference
438 into labelRefs, and emit a dummy address for the time being.
439 (we can't emit the actual number even if we're referring to
440 the label that has already appeared, since the number should
441 be calculated after the code vector is allocated.) */
442 b->labelRefs = Scm_Acons(b->currentOperand,
443 SCM_MAKE_INT(b->currentIndex),
444 b->labelRefs);
445 cc_builder_add_word(b, SCM_WORD(0)); /* dummy */
446 break;
447 case SCM_VM_OPERAND_OBJ:;
448 case SCM_VM_OPERAND_CODES:
449 cc_builder_add_word(b, SCM_WORD(b->currentOperand));
450 cc_builder_add_constant(b, b->currentOperand);
451 break;
452 case SCM_VM_OPERAND_OBJ_ADDR:
453 /* operand would be given as a list of (OBJ LABEL). */
454 SCM_ASSERT(SCM_PAIRP(b->currentOperand)
455 && SCM_PAIRP(SCM_CDR(b->currentOperand)));
456 cc_builder_add_word(b, SCM_WORD(SCM_CAR(b->currentOperand)));
457 cc_builder_add_constant(b, SCM_CAR(b->currentOperand));
458 b->labelRefs = Scm_Acons(SCM_CADR(b->currentOperand),
459 SCM_MAKE_INT(b->currentIndex),
460 b->labelRefs);
461 cc_builder_add_word(b, SCM_WORD(0)); /* dummy */
462 break;
463 case SCM_VM_OPERAND_CODE:
464 if (!SCM_COMPILED_CODE_P(b->currentOperand)) goto badoperand;
465 cc_builder_add_word(b, SCM_WORD(b->currentOperand));
466 cc_builder_add_constant(b, b->currentOperand);
467 default:
468 break;
469 }
470 b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
471 b->currentState = -1;
472 b->currentOpcode = -1;
473 return;
474 badoperand:
475 b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
476 b->currentState = -1;
477 Scm_Error("[internal error] bad operand: %S", b->currentOperand);
478 return;
479 }
480
481 /* a peephole optimization; rewrite jump destination for cascaded jump
482 *
483 * - if the destination of JUMP-like insn (including conditional jump
484 * and PRE-CALL) is another JUMP, rewrite the destination.
485 * - if the destination of BF is another BF (this pattern appears frequently,
486 * e.g. 'or' is used in the test clause of 'cond'), rewrite the destination.
487 */
cc_builder_jumpopt(ScmCompiledCode * cc)488 static void cc_builder_jumpopt(ScmCompiledCode *cc)
489 {
490 ScmWord *cp = cc->code;
491
492 for (u_int i=0; i<(u_int)cc->codeSize; i++) {
493 u_int code = SCM_VM_INSN_CODE(*cp); cp++;
494 switch (Scm_VMInsnOperandType(code)) {
495 case SCM_VM_OPERAND_OBJ:;
496 case SCM_VM_OPERAND_CODE:;
497 case SCM_VM_OPERAND_CODES:;
498 i++; cp++;
499 break;
500 case SCM_VM_OPERAND_OBJ_ADDR:
501 i++; cp++;
502 /*FALLTHROUGH*/
503 case SCM_VM_OPERAND_ADDR: {
504 ScmWord *target = (ScmWord*)*cp;
505 while (SCM_VM_INSN_CODE(*target) == SCM_VM_JUMP
506 || (code == SCM_VM_BF
507 && SCM_VM_INSN_CODE(*target) == SCM_VM_BF)) {
508 target = (ScmWord*)target[1];
509 }
510 if (target != (ScmWord*)*cp) {
511 *cp = SCM_WORD(target);
512 }
513 i++; cp++;
514 break;
515 }
516 default:
517 break;
518 }
519 }
520 }
521
522 /* Creates and returns a new empty compiled-code object for building
523 new code chunk. */
Scm_MakeCompiledCodeBuilder(int reqargs,int optargs,ScmObj name,ScmObj parent,ScmObj intForm)524 ScmObj Scm_MakeCompiledCodeBuilder(int reqargs, int optargs,
525 ScmObj name, ScmObj parent, ScmObj intForm)
526 {
527 ScmCompiledCode *cc = make_compiled_code();
528 cc->builder = make_cc_builder();
529 cc->requiredArgs = reqargs;
530 cc->optionalArgs = optargs;
531 cc->name = name;
532 cc->parent = parent;
533 cc->intermediateForm = intForm;
534 return SCM_OBJ(cc);
535 }
536
537 /* Returns a label identifier (integer) unique to this code block */
Scm_CompiledCodeNewLabel(ScmCompiledCode * cc)538 ScmObj Scm_CompiledCodeNewLabel(ScmCompiledCode *cc)
539 {
540 cc_builder *b;
541 CC_BUILDER_GET(b, cc);
542 ScmObj label = SCM_MAKE_INT(b->labelCount);
543 b->labelCount++;
544 return label;
545 }
546
547 /* Set label to the current instruction position. */
Scm_CompiledCodeSetLabel(ScmCompiledCode * cc,ScmObj label)548 void Scm_CompiledCodeSetLabel(ScmCompiledCode *cc, ScmObj label)
549 {
550 cc_builder *b;
551
552 CC_BUILDER_GET(b, cc);
553
554 /* Flush buffered insn first. */
555 cc_builder_flush(b);
556
557 /* NB: should check duplicate labels */
558 b->labelDefs = Scm_Acons(label, SCM_MAKE_INT(b->currentIndex),
559 b->labelDefs);
560 }
561
562 /* Push arbitrary debug-info into the builder.
563 <debug-info> : (<insn-offset> <item> ...)
564 <insn-offset> : <integer> or 'definition
565 Currently supported item:
566 <item> : (<source-info> . source)
567 */
Scm_CompiledCodePushInfo(ScmCompiledCode * cc,ScmObj info)568 void Scm_CompiledCodePushInfo(ScmCompiledCode *cc, ScmObj info)
569 {
570 cc_builder *b;
571 CC_BUILDER_GET(b, cc);
572 b->debugInfo = Scm_Cons(info, b->debugInfo);
573 }
574
575 /* Pack the code accumulated in the builder into a code vector.
576 Perform label resolution and jump optimization. */
Scm_CompiledCodeFinishBuilder(ScmCompiledCode * cc,int maxstack)577 void Scm_CompiledCodeFinishBuilder(ScmCompiledCode *cc, int maxstack)
578 {
579 cc_builder *b;
580 CC_BUILDER_GET(b, cc);
581 cc_builder_flush(b);
582 cc->code = SCM_NEW_ATOMIC2(ScmWord *, b->currentIndex * sizeof(ScmWord));
583 cc->codeSize = b->currentIndex;
584
585 /* reverse chunks, leaving the first chunk in bcprev. */
586 cc_builder_chunk *bc, *bcprev = NULL;
587 for (bc = b->chunks; bc;) {
588 cc_builder_chunk *next = bc->prev;
589 bc->prev = bcprev;
590 bcprev = bc;
591 bc = next;
592 }
593
594 /* pack words */
595 bc = bcprev;
596 for (int i=0, j=0; i<b->currentIndex; i++, j++) {
597 if (j >= CC_BUILDER_CHUNK_SIZE) {
598 bc = bc->prev;
599 j = 0;
600 }
601 cc->code[i] = bc->code[j];
602 }
603
604 /* pack constants */
605 int numConstants = Scm_Length(b->constants);
606 if (numConstants > 0) {
607 cc->constants = SCM_NEW_ARRAY(ScmObj, numConstants);
608 ScmObj cp = b->constants;
609 for (int i=0; i<numConstants; i++, cp=SCM_CDR(cp)) {
610 cc->constants[i] = SCM_CAR(cp);
611 }
612 }
613 cc->constantSize = numConstants;
614
615 /* resolve labels */
616 ScmObj cp;
617 SCM_FOR_EACH(cp, b->labelRefs) {
618 int destAddr = cc_builder_label_def(b, SCM_CAAR(cp));
619 int operandAddr;
620 if (destAddr < 0) {
621 Scm_Error("[internal error] undefined label in compiled code: %S",
622 SCM_CAAR(cp));
623 }
624 operandAddr = SCM_INT_VALUE(SCM_CDAR(cp));
625 SCM_ASSERT(operandAddr >= 0 && operandAddr < cc->codeSize);
626 cc->code[operandAddr] = SCM_WORD(cc->code + destAddr);
627 }
628
629 /* jump destination optimization */
630 cc_builder_jumpopt(cc);
631
632 /* record debug info */
633 cc->debugInfo = b->debugInfo;
634
635 /* set max stack depth */
636 cc->maxstack = maxstack;
637
638 /* make sure this code is 'fixed'---no more building */
639 cc->builder = NULL;
640 }
641
642 /*----------------------------------------------------------------
643 * Emitting instruction and operand, performing instruction combination
644 */
645
646 /* This is originally implemented in Scheme, but moved here for efficiency,
647 * since this routine is the most frequently called one during compilation.
648 */
649
650 /* The state transition table */
651 struct stn_arc {
652 int input; /* input insn, or -1 for wildcard */
653 int action; /* EMIT, KEEP, NEXT */
654 int next; /* emitting insn / next state */
655 };
656
657 /* State transition actions */
658 enum {
659 NEXT,
660 EMIT,
661 KEEP
662 };
663
664 /* Include STN generated from vminsn.scm */
665 static struct stn_arc stn[] = {
666 #define STATE_TABLE
667 #include "vminsn.c"
668 #undef STATE_TABLE
669 };
670
671 /* Save the args/operand if necessary */
save_params(cc_builder * b,int code,int arg0,int arg1,ScmObj operand,ScmObj info)672 static inline void save_params(cc_builder *b, int code,
673 int arg0, int arg1, ScmObj operand,
674 ScmObj info)
675 {
676 b->prevOpcode = b->currentOpcode;
677 b->currentOpcode = code;
678 switch (Scm_VMInsnNumParams(code)) {
679 case 2: b->currentArg1 = arg1;
680 /* FALLTHROUGH */
681 case 1: b->currentArg0 = arg0;
682 /* FALLTHROUGH */
683 case 0:;
684 }
685 if (Scm_VMInsnOperandType(code) != SCM_VM_OPERAND_NONE) {
686 b->currentOperand = operand;
687 }
688 if (!SCM_FALSEP(info)) {
689 b->currentInfo = info;
690 }
691 }
692
693 static int vm_insn_flags(u_int code);
694
695 /* Fill the current insn word */
fill_current_insn(cc_builder * b,int code)696 static inline void fill_current_insn(cc_builder *b, int code)
697 {
698 /* A special handling of fold-lref insn.
699 Fold-lref insn is a combined insn LREF-XXXX(depth,offset). What's
700 special about it is that we 'fold' specialized LREF insn
701 (e.g. LREF10) into generic LREF.
702 */
703 #define SET_LREF_ARGS(dep, off) \
704 b->currentArg0 = (dep); b->currentArg1 = (off); break
705 if (vm_insn_flags(code) & SCM_VM_INSN_FOLD_LREF) {
706 switch (b->prevOpcode) {
707 case SCM_VM_LREF0: SET_LREF_ARGS(0, 0);
708 case SCM_VM_LREF1: SET_LREF_ARGS(0, 1);
709 case SCM_VM_LREF2: SET_LREF_ARGS(0, 2);
710 case SCM_VM_LREF3: SET_LREF_ARGS(0, 3);
711 case SCM_VM_LREF10: SET_LREF_ARGS(1, 0);
712 case SCM_VM_LREF11: SET_LREF_ARGS(1, 1);
713 case SCM_VM_LREF12: SET_LREF_ARGS(1, 2);
714 case SCM_VM_LREF20: SET_LREF_ARGS(2, 0);
715 case SCM_VM_LREF21: SET_LREF_ARGS(2, 1);
716 case SCM_VM_LREF30: SET_LREF_ARGS(3, 0);
717 case SCM_VM_LREF: /* args are already set */ break;
718 default: Scm_Error("[internal] Compiler internal error: FOLD_LREF insn needs to be combined with LREF*, but prevOpcode = %d", b->prevOpcode);
719 }
720 }
721 #undef SET_LREF_ARGS
722
723 /* Compose insn word */
724 switch (Scm_VMInsnNumParams(code)) {
725 case 0: b->currentInsn = SCM_VM_INSN(code); break;
726 case 1: b->currentInsn = SCM_VM_INSN1(code, b->currentArg0); break;
727 case 2: b->currentInsn = SCM_VM_INSN2(code,
728 b->currentArg0,
729 b->currentArg1); break;
730 }
731 }
732
733 /* Called by cc_builder_flush to finish the current transition forcibly.
734 We look for the default arc (-1) of the current state and use that
735 insn to represent the current state. We can assume parameters and
736 operands are set properly in the cc_builder. */
finish_transition(cc_builder * b)737 static void finish_transition(cc_builder *b)
738 {
739 int i = b->currentState;
740 SCM_ASSERT(i >= 0 && i < (int)(sizeof(stn)/sizeof(struct stn_arc[1])));
741 for (;; i++) {
742 if (stn[i].input < 0) {
743 fill_current_insn(b, stn[i].next);
744 break;
745 }
746 SCM_ASSERT(i < (int)(sizeof(stn)/sizeof(struct stn_arc[1])));
747 }
748 }
749
Scm_CompiledCodeEmit(ScmCompiledCode * cc,int code,int arg0,int arg1,ScmObj operand,ScmObj info)750 void Scm_CompiledCodeEmit(ScmCompiledCode *cc,
751 int code, /* instruction code number */
752 int arg0, /* instruction code parameter 0 */
753 int arg1, /* instruction code parameter 1 */
754 ScmObj operand,
755 ScmObj info) /* debug info */
756 {
757 cc_builder *b;
758 struct stn_arc *arc;
759 CC_BUILDER_GET(b, cc);
760
761 if (SCM_VM_COMPILER_FLAG_IS_SET(Scm_VM(), SCM_COMPILE_NOCOMBINE)) {
762 save_params(b, code, arg0, arg1, operand, info);
763 fill_current_insn(b, code);
764 cc_builder_flush(b);
765 return;
766 }
767
768 restart:
769 /* Some insn needs special treatment. */
770 if (code == SCM_VM_LREF) {
771 static const int lrefs[4][4] = {
772 { SCM_VM_LREF0, SCM_VM_LREF1, SCM_VM_LREF2, SCM_VM_LREF3 },
773 { SCM_VM_LREF10, SCM_VM_LREF11, SCM_VM_LREF12, -1 },
774 { SCM_VM_LREF20, SCM_VM_LREF21, -1, -1 },
775 { SCM_VM_LREF30, -1, -1, -1 }
776 };
777 if (arg0 < 4 && arg1 < 4) {
778 int insn = lrefs[arg0][arg1];
779 if (insn >= 0) code = insn;
780 }
781 } else if (code == SCM_VM_CONST) {
782 if (SCM_NULLP(operand)) {
783 code = SCM_VM_CONSTN;
784 } else if (SCM_FALSEP(operand)) {
785 code = SCM_VM_CONSTF;
786 } else if (SCM_UNDEFINEDP(operand)) {
787 code = SCM_VM_CONSTU;
788 } else if (SCM_INTP(operand)) {
789 long v = SCM_INT_VALUE(operand);
790 if (SCM_VM_INSN_ARG_FITS(v)) {
791 code = SCM_VM_CONSTI;
792 arg0 = v;
793 }
794 }
795 }
796
797 /* Look up the state */
798 if (b->currentState < 0) {
799 arc = stn + code;
800 } else {
801 int i = b->currentState;
802 for (;;i++) {
803 if (stn[i].input == code || stn[i].input == -1) {
804 arc = stn + i;
805 break;
806 }
807 }
808 }
809
810 switch (arc->action) {
811 case EMIT:
812 save_params(b, code, arg0, arg1, operand, info);
813 fill_current_insn(b, arc->next);
814 cc_builder_flush(b);
815 b->currentState = -1;
816 break;
817 case KEEP:
818 fill_current_insn(b, arc->next);
819 cc_builder_flush(b);
820 b->currentState = -1;
821 goto restart;
822 case NEXT:
823 save_params(b, code, arg0, arg1, operand, info);
824 b->currentState = arc->next;
825 b->currentInsn = CC_BUILDER_BUFFER_TRANS;
826 break;
827 }
828 }
829
830 /*----------------------------------------------------------------
831 * CompiledCode - introspection
832 */
833
834 /* Converts the code vector into a list.
835 Instruction -> (<insn-symbol> [<arg0> <arg1>])
836 Obj/Code operand -> as is
837 Addr operand -> integer offset from the beginning of the code */
Scm_CompiledCodeToList(ScmCompiledCode * cc)838 ScmObj Scm_CompiledCodeToList(ScmCompiledCode *cc)
839 {
840 ScmObj h = SCM_NIL, t = SCM_NIL;
841
842 for (u_int i=0; i<(u_int)cc->codeSize; i++) {
843 ScmWord insn = cc->code[i];
844 u_int code = SCM_VM_INSN_CODE(insn);
845 const char *name = Scm_VMInsnName(code);
846
847 switch (Scm_VMInsnNumParams(code)) {
848 case 0:
849 SCM_APPEND1(h, t, SCM_LIST1(SCM_INTERN(name)));
850 break;
851 case 1:
852 SCM_APPEND1(h, t, SCM_LIST2(SCM_INTERN(name),
853 SCM_MAKE_INT(SCM_VM_INSN_ARG(insn))));
854 break;
855 case 2:
856 SCM_APPEND1(h, t, SCM_LIST3(SCM_INTERN(name),
857 SCM_MAKE_INT(SCM_VM_INSN_ARG0(insn)),
858 SCM_MAKE_INT(SCM_VM_INSN_ARG1(insn))));
859 break;
860 }
861
862 switch (Scm_VMInsnOperandType(code)) {
863 case SCM_VM_OPERAND_OBJ:;
864 case SCM_VM_OPERAND_CODE:;
865 case SCM_VM_OPERAND_CODES:;
866 SCM_APPEND1(h, t, SCM_OBJ(cc->code[++i]));
867 break;
868 case SCM_VM_OPERAND_ADDR: {
869 u_int off = (u_int)((ScmWord*)cc->code[++i] - cc->code);
870 SCM_APPEND1(h, t, SCM_MAKE_INT(off));
871 break;
872 }
873 case SCM_VM_OPERAND_OBJ_ADDR: {
874 u_int off = (u_int)((ScmWord*)cc->code[i+2] - cc->code);
875 SCM_APPEND(h, t, SCM_LIST2(SCM_OBJ(cc->code[i+1]),
876 SCM_MAKE_INT(off)));
877 i += 2;
878 break;
879 }
880 }
881 }
882 return h;
883 }
884
885 /*===========================================================
886 * VM Instruction introspection
887 */
888
889 static struct {
890 const char *name; /* name */
891 int nparams; /* # of parameters */
892 int operandType; /* operand type */
893 int flags; /* flags */
894 } insn_table[] = {
895 #define DEFINSN(sym, nam, np, type, flags) \
896 { nam, np, SCM_CPP_CAT(SCM_VM_OPERAND_, type), flags },
897 #include "vminsn.c"
898 #undef DEFINSN
899 };
900
901 #define CHECK_CODE(code) \
902 do { \
903 if (code >= SCM_VM_NUM_INSNS) { \
904 Scm_Error("invalid VM instruction code: %d", code); \
905 } \
906 } while (0)
907
Scm_VMInsnName(u_int code)908 const char *Scm_VMInsnName(u_int code)
909 {
910 CHECK_CODE(code);
911 return insn_table[code].name;
912 }
913
Scm_VMInsnNumParams(u_int code)914 int Scm_VMInsnNumParams(u_int code)
915 {
916 CHECK_CODE(code);
917 return insn_table[code].nparams;
918 }
919
vm_insn_flags(u_int code)920 int vm_insn_flags(u_int code) /* private for the time being */
921 {
922 CHECK_CODE(code);
923 return insn_table[code].flags;
924 }
925
Scm_VMInsnOperandType(u_int code)926 int Scm_VMInsnOperandType(u_int code)
927 {
928 CHECK_CODE(code);
929 return insn_table[code].operandType;
930 }
931
Scm_VMInsnNameToCode(ScmObj name)932 int Scm_VMInsnNameToCode(ScmObj name)
933 {
934 if (SCM_SYMBOLP(name)) name = SCM_OBJ(SCM_SYMBOL_NAME(name));
935 else if (!SCM_STRINGP(name)) {
936 Scm_Error("vm-insn-name->code: requires a symbol or a string, but got %S", name);
937 }
938 const char *n = Scm_GetStringConst(SCM_STRING(name));
939 for (int i=0; i<SCM_VM_NUM_INSNS; i++) {
940 if (strcmp(insn_table[i].name, n) == 0) {
941 return i;
942 }
943 }
944 Scm_Error("vm-insn-name->code: no such instruction: %A", name);
945 return -1; /* dummy */
946 }
947
948 /* (kind of) inversion of VMInsnInspect. */
Scm_VMInsnBuild(ScmObj obj)949 ScmWord Scm_VMInsnBuild(ScmObj obj)
950 {
951 int len = Scm_Length(obj);
952
953 if (len < 1 || len > 3 || !SCM_SYMBOLP(SCM_CAR(obj))) goto badspec;
954 int code = Scm_VMInsnNameToCode(SCM_CAR(obj));
955
956 switch (Scm_VMInsnNumParams(code)) {
957 case 0:
958 if (len != 1) {
959 Scm_Error("VM instruction %S takes no parameters, but got %S",
960 SCM_CAR(obj), obj);
961 }
962 return SCM_VM_INSN(code);
963 case 1: {
964 if (len != 2) {
965 Scm_Error("VM instruction %S takes one parameter, but got %S",
966 SCM_CAR(obj), obj);
967 }
968 if (!SCM_INTP(SCM_CADR(obj))) goto badspec;
969 int arg0 = SCM_INT_VALUE(SCM_CADR(obj));
970 return SCM_VM_INSN1(code, arg0);
971 }
972 case 2: {
973 if (len != 3) {
974 Scm_Error("VM instruction %S takes two parameters, but got %S",
975 SCM_CAR(obj), obj);
976 }
977 if (!SCM_INTP(SCM_CADR(obj))) goto badspec;
978 if (!SCM_INTP(SCM_CAR(SCM_CDDR(obj)))) goto badspec;
979 int arg0 = SCM_INT_VALUE(SCM_CADR(obj));
980 int arg1 = SCM_INT_VALUE(SCM_CAR(SCM_CDDR(obj)));
981 return SCM_VM_INSN2(code, arg0, arg1);
982 }
983 }
984 /*FALLTHROUGH*/
985 badspec:
986 Scm_Error("Bad VM insn spec: %S", obj);
987 return 0; /* dummy */
988 }
989
990