1 /*
2  * code.c - compiled code builder/handler
3  *
4  *   Copyright (c) 2005-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define LIBGAUCHE_BODY
35 #include "gauche.h"
36 #include "gauche/class.h"
37 #include "gauche/code.h"
38 #include "gauche/vminsn.h"
39 #include "gauche/priv/codeP.h"
40 #include "gauche/priv/identifierP.h"
41 #include "gauche/priv/builtin-syms.h"
42 
43 /*===============================================================
44  * NVM related stuff
45  */
46 
47 /* Debug information:
48  *
49  *  debug info is kept as an assoc-list with insn offset
50  *  as a key.
51  */
52 
Scm_CompiledCodeFullName(ScmCompiledCode * cc)53 ScmObj Scm_CompiledCodeFullName(ScmCompiledCode *cc)
54 {
55     if (SCM_COMPILED_CODE_P(cc->parent)
56         && !SCM_EQ(SCM_COMPILED_CODE(cc->parent)->name, SCM_SYM_TOPLEVEL)) {
57         ScmObj h = SCM_NIL, t = SCM_NIL;
58         for (;;) {
59             SCM_APPEND1(h, t, cc->name);
60             if (!SCM_COMPILED_CODE_P(cc->parent)) break;
61             cc = SCM_COMPILED_CODE(cc->parent);
62             if (SCM_EQ(cc->name, SCM_SYM_TOPLEVEL)) break;
63         }
64         return Scm_ReverseX(h);
65     } else {
66         return cc->name;
67     }
68 }
69 
make_compiled_code(void)70 static ScmCompiledCode *make_compiled_code(void)
71 {
72     ScmCompiledCode *cc = SCM_NEW(ScmCompiledCode);
73     SCM_SET_CLASS(cc, SCM_CLASS_COMPILED_CODE);
74     cc->code = NULL;
75     cc->constants = NULL;
76     cc->maxstack = -1;
77     cc->debugInfo = SCM_NIL;
78     cc->signatureInfo = SCM_FALSE;
79     cc->name = SCM_FALSE;
80     cc->parent = SCM_FALSE;
81     cc->builder = NULL;
82     return cc;
83 }
84 
85 /* Copy the source compiled-code into the destination.  This is used in
86    the external optimizer to 'edit' code vector---such routines can
87    create a new compiled-code, then copy it to the original so that
88    the identity of compiled-code is kept. */
Scm_CompiledCodeCopyX(ScmCompiledCode * dest,const ScmCompiledCode * src)89 void Scm_CompiledCodeCopyX(ScmCompiledCode *dest,
90                            const ScmCompiledCode *src)
91 {
92     SCM_ASSERT(dest->builder == NULL);
93     SCM_ASSERT(src->builder == NULL);
94 
95     memcpy(dest, src, sizeof(ScmCompiledCode));
96 }
97 
98 /*----------------------------------------------------------------------
99  * An API to execute statically compiled toplevel code.  *PROVISIONAL*
100  */
101 static ScmSubrProc execute_toplevels;
102 
Scm_VMExecuteToplevels(ScmCompiledCode * cs[])103 void Scm_VMExecuteToplevels(ScmCompiledCode *cs[])
104 {
105     ScmObj proc = Scm_MakeSubr(execute_toplevels, cs, 0, 0, SCM_FALSE);
106     Scm_ApplyRec(proc, SCM_NIL);
107 }
108 
execute_toplevels_cc(ScmObj result SCM_UNUSED,void ** data)109 static ScmObj execute_toplevels_cc(ScmObj result SCM_UNUSED, void **data)
110 {
111     ScmCompiledCode **cs = (ScmCompiledCode **)data[0];
112     if (cs[0] == NULL) return SCM_UNDEFINED;
113     data[0] = cs+1;
114     ScmVM *vm = Scm_VM();
115     Scm_VMPushCC(execute_toplevels_cc, data, 1);
116     vm->base = cs[0];
117     vm->pc = vm->base->code;
118     return SCM_UNDEFINED;
119 }
120 
execute_toplevels(ScmObj * args SCM_UNUSED,int nargs SCM_UNUSED,void * cv)121 static ScmObj execute_toplevels(ScmObj *args SCM_UNUSED,
122                                 int nargs SCM_UNUSED,
123                                 void *cv)
124 {
125     Scm_VMPushCC(execute_toplevels_cc, &cv, 1);
126     return SCM_UNDEFINED;
127 }
128 
129 /*----------------------------------------------------------------------
130  * Disassembler
131  */
132 static ScmObj check_lifted_closure(ScmWord *p, ScmObj lifted);
133 static void print_header(const char *prefix, ScmObj name, ScmCompiledCode *cc);
134 
Scm_CompiledCodeDump(ScmCompiledCode * cc)135 void Scm_CompiledCodeDump(ScmCompiledCode *cc)
136 {
137     ScmObj closures = SCM_NIL, lifted = SCM_NIL, shown_lifted = SCM_NIL;
138     int clonum = 0, more = FALSE;
139 
140     print_header("main_code", SCM_MAKE_STR(""), cc);
141     do {
142         ScmWord *p = cc->code;
143         Scm_Printf(SCM_CUROUT, "signatureInfo: %S\n", cc->signatureInfo);
144         for (int i=0; i < cc->codeSize; i++) {
145             ScmWord insn = p[i];
146             ScmPort *out = SCM_PORT(Scm_MakeOutputStringPort(TRUE));
147             ScmObj info = Scm_Assq(SCM_MAKE_INT(i), cc->debugInfo);
148             u_int code = SCM_VM_INSN_CODE(insn);
149             const char *insn_name = Scm_VMInsnName(code);
150 
151             switch (Scm_VMInsnNumParams(code)) {
152             case 0:
153                 Scm_Printf(out, "  %4d %s ", i, insn_name);
154                 break;
155             case 1:
156                 Scm_Printf(out, "  %4d %s(%d) ", i, insn_name,
157                            SCM_VM_INSN_ARG(insn));
158                 break;
159             case 2:
160                 Scm_Printf(out, "  %4d %s(%d,%d) ", i, insn_name,
161                            SCM_VM_INSN_ARG0(insn),SCM_VM_INSN_ARG1(insn));
162                 break;
163             }
164             switch (Scm_VMInsnOperandType(code)) {
165             case SCM_VM_OPERAND_ADDR:
166                 Scm_Printf(out, "%d", (ScmWord*)p[i+1] - cc->code);
167                 i++;
168                 break;
169             case SCM_VM_OPERAND_OBJ:
170                 /* Check if we're referring to a lifted closure. */
171                 lifted = check_lifted_closure(p+i, lifted);
172                 Scm_Printf(out, "%S", p[i+1]);
173                 i++;
174                 break;
175             case SCM_VM_OPERAND_OBJ_ADDR:
176                 Scm_Printf(out, "%S, %d", p[i+1], (ScmWord*)p[i+2] - cc->code);
177                 i += 2;
178                 break;
179             case SCM_VM_OPERAND_CODE:
180                 Scm_Printf(out, "#<lambda %d>", clonum);
181                 closures = Scm_Acons(SCM_OBJ(p[i+1]), SCM_MAKE_INT(clonum),
182                                      closures);
183                 clonum++;
184                 i++;
185                 break;
186             case SCM_VM_OPERAND_CODES: {
187                 Scm_Printf(out, "(");
188                 ScmObj cp;
189                 SCM_FOR_EACH(cp, SCM_OBJ(p[i+1])) {
190                     if (SCM_COMPILED_CODE_P(SCM_CAR(cp))) {
191                         closures = Scm_Acons(SCM_CAR(cp),
192                                              SCM_MAKE_INT(clonum),
193                                              closures);
194                         Scm_Printf(out, "#<lambda %d>", clonum);
195                         clonum++;
196                     }
197                 }
198                 Scm_Printf(out, ")");
199                 i++;
200                 break;
201             }
202             default:
203                 /*nothing*/;
204             }
205 
206             /* Show info */
207             ScmObj s = Scm_GetOutputStringUnsafe(out, 0);
208             if (!SCM_PAIRP(info)) {
209                 Scm_Puts(SCM_STRING(s), SCM_CUROUT);
210                 Scm_Putc('\n', SCM_CUROUT);
211             } else {
212                 int len = SCM_STRING_BODY_SIZE(SCM_STRING_BODY(s));
213                 ScmObj srcinfo = Scm_Assq(SCM_SYM_SOURCE_INFO, info);
214                 ScmObj bindinfo = Scm_Assq(SCM_SYM_BIND_INFO, info);
215                 Scm_Puts(SCM_STRING(s), SCM_CUROUT);
216                 Scm_Flush(SCM_CUROUT);
217                 for (; len<32; len++) {
218                     Scm_Putc(' ', SCM_CUROUT);
219                 }
220                 if (SCM_FALSEP(srcinfo)) {
221                     Scm_Printf(SCM_CUROUT, "; lambda %#40.1S\n",
222                                SCM_CDR(bindinfo));
223                 } else {
224                     Scm_Printf(SCM_CUROUT, "; %#40.1S\n",
225                                Scm_UnwrapSyntax2(SCM_CDR(srcinfo), FALSE));
226                 }
227             }
228         }
229         more = FALSE;
230         if (!SCM_NULLP(closures)) {
231             cc = SCM_COMPILED_CODE(SCM_CAAR(closures));
232             print_header("closure:", SCM_CDAR(closures), cc);
233             closures = SCM_CDR(closures);
234             more = TRUE;
235         } else if (!SCM_NULLP(lifted)) {
236             while (!SCM_NULLP(lifted)) {
237                 if (SCM_FALSEP(Scm_Memq(SCM_CAAR(lifted), shown_lifted))) {
238                     cc = SCM_COMPILED_CODE(SCM_CAAR(lifted));
239                     print_header("lifted:", SCM_CDAR(lifted), cc);
240                     shown_lifted = Scm_Cons(SCM_CAAR(lifted), shown_lifted);
241                     lifted = SCM_CDR(lifted);
242                     more = TRUE;
243                     break;
244                 } else {
245                     lifted = SCM_CDR(lifted);
246                     continue;
247                 }
248             }
249         }
250     } while (more);
251 }
252 
print_header(const char * prefix,ScmObj name,ScmCompiledCode * cc)253 static void print_header(const char *prefix, ScmObj name, ScmCompiledCode *cc)
254 {
255     Scm_Printf(SCM_CUROUT, "=== %s%A (name=%S, cc=%p, codevec=%p, size=%d, const=%d stack=%d):\n",
256                prefix, name, cc->name, cc, cc->code,
257                cc->codeSize, cc->constantSize, cc->maxstack);
258 }
259 
260 /* The compiler may have lifted an internal closure to a global procedure.
261    We can tell so if the opcode is GREF_x, and the operand is an identifier,
262    whose name is an uninterned symbol and it is globally bound to a procedure.
263 
264    If we indeed have a lifted closure, we chain the closure's code and
265    the identifier into the lifted list, returns the updated list.
266    Otherwise, we return lifted list as is.
267  */
check_lifted_closure(ScmWord * p,ScmObj lifted)268 static ScmObj check_lifted_closure(ScmWord *p, ScmObj lifted)
269 {
270     ScmWord code = SCM_VM_INSN_CODE(p[0]);
271     static ScmWord gref_insns[] = {
272         SCM_VM_GREF,
273         SCM_VM_GREF_PUSH,
274         SCM_VM_GREF_CALL,
275         SCM_VM_GREF_TAIL_CALL,
276         SCM_VM_PUSH_GREF,
277         SCM_VM_PUSH_GREF_CALL,
278         SCM_VM_PUSH_GREF_TAIL_CALL
279     };
280 
281     if (!SCM_IDENTIFIERP(p[1])) return lifted;
282     ScmIdentifier *id = Scm_OutermostIdentifier(SCM_IDENTIFIER(p[1]));
283     if (SCM_SYMBOL_INTERNED(id->name)) return lifted;
284 
285     for (u_int i=0; i < sizeof(gref_insns)/sizeof(ScmWord); i++) {
286         if (code == gref_insns[i]) {
287             ScmObj g = Scm_GlobalVariableRef(id->module, SCM_SYMBOL(id->name),
288                                              SCM_BINDING_STAY_IN_MODULE);
289             if (SCM_CLOSUREP(g)) {
290                 if (SCM_FALSEP(Scm_Assq(SCM_CLOSURE(g)->code, lifted))) {
291                     return Scm_Acons(SCM_CLOSURE(g)->code,
292                                      SCM_OBJ(id->name),
293                                      lifted);
294                 } else {
295                     return lifted;
296                 }
297             }
298         }
299     }
300     return lifted;
301 }
302 
303 /*------------------------------------------------------------------
304  * Builder - used by the new compiler
305  */
306 
307 #define CC_BUILDER_CHUNK_BITS  5
308 #define CC_BUILDER_CHUNK_SIZE  (1L<<CC_BUILDER_CHUNK_BITS)
309 #define CC_BUILDER_CHUNK_MASK  (CC_BUILDER_CHUNK_SIZE-1)
310 
311 typedef struct cc_builder_chunk {
312     struct cc_builder_chunk *prev;
313     ScmWord code[CC_BUILDER_CHUNK_SIZE];
314 } cc_builder_chunk;
315 
316 /* To perform instruction combination, the builder buffers one insn/operand.
317  * currentInsn == SCM_WORD(-1) indicates there's no buffered insn.
318  */
319 typedef struct cc_builder_rec {
320     cc_builder_chunk *chunks;
321     int numChunks;
322     ScmObj constants;           /* list of constants */
323     int currentIndex;
324     ScmWord currentInsn;        /* buffer for instruction combining.
325                                    this can be a special value either
326                                    CC_BUILDER_BUFFER_EMPTY or
327                                    CC_BUILDER_BUFFER_TRANS.  see below. */
328     int    prevOpcode;          /* previous saved insn opcode */
329     int    currentOpcode;       /* saved insn opcode */
330     int    currentArg0;         /* ditto */
331     int    currentArg1;         /* ditto */
332     ScmObj currentOperand;      /* ditto */
333     ScmObj currentInfo;         /* ditto */
334     int    currentState;        /* index to the current state of
335                                    combiner STN */
336     ScmObj labelDefs;           /* alist of (name . offset) */
337     ScmObj labelRefs;           /* alist of (name . offset-to-fill) */
338     int labelCount;             /* counter to generate unique labels */
339     ScmObj debugInfo;           /* alist of (offset (source-info obj)) */
340 } cc_builder;
341 
342 /* Indicates that there's no pending instruction. */
343 #define CC_BUILDER_BUFFER_EMPTY       SCM_WORD(-1)
344 
345 /* Indicates that the instruction combiner is in the transitional
346    state.  In ordinary circumstances this state will be resolved
347    as the code generation goes.  However, if the instruction combination
348    is "cut off", for example by emitting a jump destination label, we
349    have to complete the instruction.  It is done by seeking for
350    the default arc of the current state.  */
351 #define CC_BUILDER_BUFFER_TRANS       SCM_WORD(-2)
352 
353 /* Some internal stuff */
354 
355 #define CC_BUILDER_GET(b, cc)                                           \
356     do {                                                                \
357         if (cc->builder == NULL) {                                      \
358             Scm_Error("[internal error] CompiledCode is already frozen"); \
359         }                                                               \
360         (b) = (cc_builder*)cc->builder;                                 \
361     } while (0)
362 
make_cc_builder(void)363 static cc_builder *make_cc_builder(void)
364 {
365     cc_builder *b = SCM_NEW(cc_builder);
366     b->chunks = NULL;
367     b->numChunks = 0;
368     b->constants = SCM_NIL;
369     b->currentIndex = 0;
370     b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
371     b->currentOpcode = b->prevOpcode = -1;
372     b->currentOperand = b->currentInfo = SCM_FALSE;
373     b->currentState = -1;
374     b->labelDefs = b->labelRefs = SCM_NIL;
375     b->labelCount = 0;
376     b->debugInfo = SCM_NIL;
377     return b;
378 }
379 
cc_builder_add_word(cc_builder * b,ScmWord w)380 static void cc_builder_add_word(cc_builder *b, ScmWord w)
381 {
382     int ni = b->currentIndex & CC_BUILDER_CHUNK_MASK;
383     if (ni == 0) {
384         cc_builder_chunk *newchunk = SCM_NEW(cc_builder_chunk);
385         newchunk->prev = b->chunks;
386         b->chunks = newchunk;
387         b->numChunks++;
388     }
389     b->chunks->code[ni] = w;
390     b->currentIndex++;
391 }
392 
cc_builder_add_constant(cc_builder * b,ScmObj obj)393 static void cc_builder_add_constant(cc_builder *b, ScmObj obj)
394 {
395     if (!SCM_PTRP(obj)) return;
396     if (!SCM_FALSEP(Scm_Memq(obj, b->constants))) return;
397     b->constants = Scm_Cons(obj, b->constants);
398 }
399 
cc_builder_add_info(cc_builder * b)400 static void cc_builder_add_info(cc_builder *b)
401 {
402     if (SCM_FALSEP(b->currentInfo)) return;
403     b->debugInfo = Scm_Acons(SCM_MAKE_INT(b->currentIndex),
404                              SCM_LIST1(Scm_Cons(SCM_SYM_SOURCE_INFO,
405                                                 b->currentInfo)),
406                              b->debugInfo);
407     b->currentInfo = SCM_FALSE;
408 }
409 
410 /* Returns label offset of the given label, if the label is already defined.
411    Otherwise, returns -1. */
cc_builder_label_def(cc_builder * b,ScmObj label)412 static int cc_builder_label_def(cc_builder *b, ScmObj label)
413 {
414     ScmObj p = Scm_Assq(label, b->labelDefs);
415     if (SCM_PAIRP(p)) {
416         return SCM_INT_VALUE(SCM_CDR(p));
417     } else {
418         return -1;
419     }
420 }
421 
422 static void finish_transition(cc_builder *b);
423 
424 /* Flush the currentInsn buffer. */
cc_builder_flush(cc_builder * b)425 static void cc_builder_flush(cc_builder *b)
426 {
427     if ((b)->currentInsn == CC_BUILDER_BUFFER_EMPTY) return;
428     if ((b)->currentInsn == CC_BUILDER_BUFFER_TRANS) {
429         finish_transition(b);
430     }
431     cc_builder_add_info(b);
432     cc_builder_add_word(b, b->currentInsn);
433 
434     u_int code = SCM_VM_INSN_CODE(b->currentInsn);
435     switch (Scm_VMInsnOperandType(code)) {
436     case SCM_VM_OPERAND_ADDR:
437         /* Addr should be a label.  We just push the label reference
438            into labelRefs, and emit a dummy address for the time being.
439            (we can't emit the actual number even if we're referring to
440            the label that has already appeared, since the number should
441            be calculated after the code vector is allocated.) */
442         b->labelRefs = Scm_Acons(b->currentOperand,
443                                  SCM_MAKE_INT(b->currentIndex),
444                                  b->labelRefs);
445         cc_builder_add_word(b, SCM_WORD(0)); /* dummy */
446         break;
447     case SCM_VM_OPERAND_OBJ:;
448     case SCM_VM_OPERAND_CODES:
449         cc_builder_add_word(b, SCM_WORD(b->currentOperand));
450         cc_builder_add_constant(b, b->currentOperand);
451         break;
452     case SCM_VM_OPERAND_OBJ_ADDR:
453         /* operand would be given as a list of (OBJ LABEL). */
454         SCM_ASSERT(SCM_PAIRP(b->currentOperand)
455                    && SCM_PAIRP(SCM_CDR(b->currentOperand)));
456         cc_builder_add_word(b, SCM_WORD(SCM_CAR(b->currentOperand)));
457         cc_builder_add_constant(b, SCM_CAR(b->currentOperand));
458         b->labelRefs = Scm_Acons(SCM_CADR(b->currentOperand),
459                                  SCM_MAKE_INT(b->currentIndex),
460                                  b->labelRefs);
461         cc_builder_add_word(b, SCM_WORD(0)); /* dummy */
462         break;
463     case SCM_VM_OPERAND_CODE:
464         if (!SCM_COMPILED_CODE_P(b->currentOperand)) goto badoperand;
465         cc_builder_add_word(b, SCM_WORD(b->currentOperand));
466         cc_builder_add_constant(b, b->currentOperand);
467     default:
468         break;
469     }
470     b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
471     b->currentState = -1;
472     b->currentOpcode = -1;
473     return;
474   badoperand:
475     b->currentInsn = CC_BUILDER_BUFFER_EMPTY;
476     b->currentState = -1;
477     Scm_Error("[internal error] bad operand: %S", b->currentOperand);
478     return;
479 }
480 
481 /* a peephole optimization; rewrite jump destination for cascaded jump
482  *
483  * - if the destination of JUMP-like insn (including conditional jump
484  *   and PRE-CALL) is another JUMP, rewrite the destination.
485  * - if the destination of BF is another BF (this pattern appears frequently,
486  *   e.g. 'or' is used in the test clause of 'cond'), rewrite the destination.
487  */
cc_builder_jumpopt(ScmCompiledCode * cc)488 static void cc_builder_jumpopt(ScmCompiledCode *cc)
489 {
490     ScmWord *cp = cc->code;
491 
492     for (u_int i=0; i<(u_int)cc->codeSize; i++) {
493         u_int code = SCM_VM_INSN_CODE(*cp); cp++;
494         switch (Scm_VMInsnOperandType(code)) {
495         case SCM_VM_OPERAND_OBJ:;
496         case SCM_VM_OPERAND_CODE:;
497         case SCM_VM_OPERAND_CODES:;
498             i++; cp++;
499             break;
500         case SCM_VM_OPERAND_OBJ_ADDR:
501             i++; cp++;
502             /*FALLTHROUGH*/
503         case SCM_VM_OPERAND_ADDR: {
504             ScmWord *target = (ScmWord*)*cp;
505             while (SCM_VM_INSN_CODE(*target) == SCM_VM_JUMP
506                    || (code == SCM_VM_BF
507                        && SCM_VM_INSN_CODE(*target) == SCM_VM_BF)) {
508                 target = (ScmWord*)target[1];
509             }
510             if (target != (ScmWord*)*cp) {
511                 *cp = SCM_WORD(target);
512             }
513             i++; cp++;
514             break;
515         }
516         default:
517             break;
518         }
519     }
520 }
521 
522 /* Creates and returns a new empty compiled-code object for building
523    new code chunk. */
Scm_MakeCompiledCodeBuilder(int reqargs,int optargs,ScmObj name,ScmObj parent,ScmObj intForm)524 ScmObj Scm_MakeCompiledCodeBuilder(int reqargs, int optargs,
525                                    ScmObj name, ScmObj parent, ScmObj intForm)
526 {
527     ScmCompiledCode *cc = make_compiled_code();
528     cc->builder = make_cc_builder();
529     cc->requiredArgs = reqargs;
530     cc->optionalArgs = optargs;
531     cc->name = name;
532     cc->parent = parent;
533     cc->intermediateForm = intForm;
534     return SCM_OBJ(cc);
535 }
536 
537 /* Returns a label identifier (integer) unique to this code block */
Scm_CompiledCodeNewLabel(ScmCompiledCode * cc)538 ScmObj Scm_CompiledCodeNewLabel(ScmCompiledCode *cc)
539 {
540     cc_builder *b;
541     CC_BUILDER_GET(b, cc);
542     ScmObj label = SCM_MAKE_INT(b->labelCount);
543     b->labelCount++;
544     return label;
545 }
546 
547 /* Set label to the current instruction position. */
Scm_CompiledCodeSetLabel(ScmCompiledCode * cc,ScmObj label)548 void Scm_CompiledCodeSetLabel(ScmCompiledCode *cc, ScmObj label)
549 {
550     cc_builder *b;
551 
552     CC_BUILDER_GET(b, cc);
553 
554     /* Flush buffered insn first. */
555     cc_builder_flush(b);
556 
557     /* NB: should check duplicate labels */
558     b->labelDefs = Scm_Acons(label, SCM_MAKE_INT(b->currentIndex),
559                              b->labelDefs);
560 }
561 
562 /* Push arbitrary debug-info into the builder.
563    <debug-info> : (<insn-offset> <item> ...)
564    <insn-offset> : <integer> or 'definition
565    Currently supported item:
566    <item> : (<source-info> . source)
567 */
Scm_CompiledCodePushInfo(ScmCompiledCode * cc,ScmObj info)568 void Scm_CompiledCodePushInfo(ScmCompiledCode *cc, ScmObj info)
569 {
570     cc_builder *b;
571     CC_BUILDER_GET(b, cc);
572     b->debugInfo = Scm_Cons(info, b->debugInfo);
573 }
574 
575 /* Pack the code accumulated in the builder into a code vector.
576    Perform label resolution and jump optimization. */
Scm_CompiledCodeFinishBuilder(ScmCompiledCode * cc,int maxstack)577 void Scm_CompiledCodeFinishBuilder(ScmCompiledCode *cc, int maxstack)
578 {
579     cc_builder *b;
580     CC_BUILDER_GET(b, cc);
581     cc_builder_flush(b);
582     cc->code = SCM_NEW_ATOMIC2(ScmWord *, b->currentIndex * sizeof(ScmWord));
583     cc->codeSize = b->currentIndex;
584 
585     /* reverse chunks, leaving the first chunk in bcprev. */
586     cc_builder_chunk *bc, *bcprev = NULL;
587     for (bc = b->chunks; bc;) {
588         cc_builder_chunk *next = bc->prev;
589         bc->prev = bcprev;
590         bcprev = bc;
591         bc = next;
592     }
593 
594     /* pack words */
595     bc = bcprev;
596     for (int i=0, j=0; i<b->currentIndex; i++, j++) {
597         if (j >= CC_BUILDER_CHUNK_SIZE) {
598             bc = bc->prev;
599             j = 0;
600         }
601         cc->code[i] = bc->code[j];
602     }
603 
604     /* pack constants */
605     int numConstants = Scm_Length(b->constants);
606     if (numConstants > 0) {
607         cc->constants = SCM_NEW_ARRAY(ScmObj, numConstants);
608         ScmObj cp = b->constants;
609         for (int i=0; i<numConstants; i++, cp=SCM_CDR(cp)) {
610             cc->constants[i] = SCM_CAR(cp);
611         }
612     }
613     cc->constantSize = numConstants;
614 
615     /* resolve labels */
616     ScmObj cp;
617     SCM_FOR_EACH(cp, b->labelRefs) {
618         int destAddr = cc_builder_label_def(b, SCM_CAAR(cp));
619         int operandAddr;
620         if (destAddr < 0) {
621             Scm_Error("[internal error] undefined label in compiled code: %S",
622                       SCM_CAAR(cp));
623         }
624         operandAddr = SCM_INT_VALUE(SCM_CDAR(cp));
625         SCM_ASSERT(operandAddr >= 0 && operandAddr < cc->codeSize);
626         cc->code[operandAddr] = SCM_WORD(cc->code + destAddr);
627     }
628 
629     /* jump destination optimization */
630     cc_builder_jumpopt(cc);
631 
632     /* record debug info */
633     cc->debugInfo = b->debugInfo;
634 
635     /* set max stack depth */
636     cc->maxstack = maxstack;
637 
638     /* make sure this code is 'fixed'---no more building */
639     cc->builder = NULL;
640 }
641 
642 /*----------------------------------------------------------------
643  * Emitting instruction and operand, performing instruction combination
644  */
645 
646 /* This is originally implemented in Scheme, but moved here for efficiency,
647  * since this routine is the most frequently called one during compilation.
648  */
649 
650 /* The state transition table */
651 struct stn_arc {
652     int input;                  /* input insn, or -1 for wildcard */
653     int action;                 /* EMIT, KEEP, NEXT */
654     int next;                   /* emitting insn / next state */
655 };
656 
657 /* State transition actions */
658 enum {
659     NEXT,
660     EMIT,
661     KEEP
662 };
663 
664 /* Include STN generated from vminsn.scm */
665 static struct stn_arc stn[] = {
666 #define STATE_TABLE
667 #include "vminsn.c"
668 #undef STATE_TABLE
669 };
670 
671 /* Save the args/operand if necessary */
save_params(cc_builder * b,int code,int arg0,int arg1,ScmObj operand,ScmObj info)672 static inline void save_params(cc_builder *b, int code,
673                                int arg0, int arg1, ScmObj operand,
674                                ScmObj info)
675 {
676     b->prevOpcode = b->currentOpcode;
677     b->currentOpcode = code;
678     switch (Scm_VMInsnNumParams(code)) {
679     case 2: b->currentArg1 = arg1;
680         /* FALLTHROUGH */
681     case 1: b->currentArg0 = arg0;
682         /* FALLTHROUGH */
683     case 0:;
684     }
685     if (Scm_VMInsnOperandType(code) != SCM_VM_OPERAND_NONE) {
686         b->currentOperand = operand;
687     }
688     if (!SCM_FALSEP(info)) {
689         b->currentInfo = info;
690     }
691 }
692 
693 static int vm_insn_flags(u_int code);
694 
695 /* Fill the current insn word */
fill_current_insn(cc_builder * b,int code)696 static inline void fill_current_insn(cc_builder *b, int code)
697 {
698     /* A special handling of fold-lref insn.
699        Fold-lref insn is a combined insn LREF-XXXX(depth,offset).  What's
700        special about it is that we 'fold' specialized LREF insn
701        (e.g. LREF10) into generic LREF.
702      */
703 #define SET_LREF_ARGS(dep, off) \
704     b->currentArg0 = (dep); b->currentArg1 = (off); break
705     if (vm_insn_flags(code) & SCM_VM_INSN_FOLD_LREF) {
706         switch (b->prevOpcode) {
707         case SCM_VM_LREF0:  SET_LREF_ARGS(0, 0);
708         case SCM_VM_LREF1:  SET_LREF_ARGS(0, 1);
709         case SCM_VM_LREF2:  SET_LREF_ARGS(0, 2);
710         case SCM_VM_LREF3:  SET_LREF_ARGS(0, 3);
711         case SCM_VM_LREF10: SET_LREF_ARGS(1, 0);
712         case SCM_VM_LREF11: SET_LREF_ARGS(1, 1);
713         case SCM_VM_LREF12: SET_LREF_ARGS(1, 2);
714         case SCM_VM_LREF20: SET_LREF_ARGS(2, 0);
715         case SCM_VM_LREF21: SET_LREF_ARGS(2, 1);
716         case SCM_VM_LREF30: SET_LREF_ARGS(3, 0);
717         case SCM_VM_LREF: /* args are already set */ break;
718         default: Scm_Error("[internal] Compiler internal error: FOLD_LREF insn needs to be combined with LREF*, but prevOpcode = %d", b->prevOpcode);
719         }
720     }
721 #undef SET_LREF_ARGS
722 
723     /* Compose insn word */
724     switch (Scm_VMInsnNumParams(code)) {
725     case 0: b->currentInsn = SCM_VM_INSN(code); break;
726     case 1: b->currentInsn = SCM_VM_INSN1(code, b->currentArg0); break;
727     case 2: b->currentInsn = SCM_VM_INSN2(code,
728                                           b->currentArg0,
729                                           b->currentArg1); break;
730     }
731 }
732 
733 /* Called by cc_builder_flush to finish the current transition forcibly.
734    We look for the default arc (-1) of the current state and use that
735    insn to represent the current state.  We can assume parameters and
736    operands are set properly in the cc_builder. */
finish_transition(cc_builder * b)737 static void finish_transition(cc_builder *b)
738 {
739     int i = b->currentState;
740     SCM_ASSERT(i >= 0 && i < (int)(sizeof(stn)/sizeof(struct stn_arc[1])));
741     for (;; i++) {
742         if (stn[i].input < 0) {
743             fill_current_insn(b, stn[i].next);
744             break;
745         }
746         SCM_ASSERT(i < (int)(sizeof(stn)/sizeof(struct stn_arc[1])));
747     }
748 }
749 
Scm_CompiledCodeEmit(ScmCompiledCode * cc,int code,int arg0,int arg1,ScmObj operand,ScmObj info)750 void Scm_CompiledCodeEmit(ScmCompiledCode *cc,
751                           int code, /* instruction code number */
752                           int arg0, /* instruction code parameter 0 */
753                           int arg1, /* instruction code parameter 1 */
754                           ScmObj operand,
755                           ScmObj info) /* debug info */
756 {
757     cc_builder *b;
758     struct stn_arc *arc;
759     CC_BUILDER_GET(b, cc);
760 
761     if (SCM_VM_COMPILER_FLAG_IS_SET(Scm_VM(), SCM_COMPILE_NOCOMBINE)) {
762         save_params(b, code, arg0, arg1, operand, info);
763         fill_current_insn(b, code);
764         cc_builder_flush(b);
765         return;
766     }
767 
768   restart:
769     /* Some insn needs special treatment. */
770     if (code == SCM_VM_LREF) {
771         static const int lrefs[4][4] = {
772             { SCM_VM_LREF0,  SCM_VM_LREF1,  SCM_VM_LREF2,  SCM_VM_LREF3 },
773             { SCM_VM_LREF10, SCM_VM_LREF11, SCM_VM_LREF12, -1 },
774             { SCM_VM_LREF20, SCM_VM_LREF21, -1, -1 },
775             { SCM_VM_LREF30, -1, -1, -1 }
776         };
777         if (arg0 < 4 && arg1 < 4) {
778             int insn = lrefs[arg0][arg1];
779             if (insn >= 0) code = insn;
780         }
781     } else if (code == SCM_VM_CONST) {
782         if (SCM_NULLP(operand)) {
783             code = SCM_VM_CONSTN;
784         } else if (SCM_FALSEP(operand)) {
785             code = SCM_VM_CONSTF;
786         } else if (SCM_UNDEFINEDP(operand)) {
787             code = SCM_VM_CONSTU;
788         } else if (SCM_INTP(operand)) {
789             long v = SCM_INT_VALUE(operand);
790             if (SCM_VM_INSN_ARG_FITS(v)) {
791                 code = SCM_VM_CONSTI;
792                 arg0 = v;
793             }
794         }
795     }
796 
797     /* Look up the state */
798     if (b->currentState < 0) {
799         arc = stn + code;
800     } else {
801         int i = b->currentState;
802         for (;;i++) {
803             if (stn[i].input == code || stn[i].input == -1) {
804                 arc = stn + i;
805                 break;
806             }
807         }
808     }
809 
810     switch (arc->action) {
811     case EMIT:
812         save_params(b, code, arg0, arg1, operand, info);
813         fill_current_insn(b, arc->next);
814         cc_builder_flush(b);
815         b->currentState = -1;
816         break;
817     case KEEP:
818         fill_current_insn(b, arc->next);
819         cc_builder_flush(b);
820         b->currentState = -1;
821         goto restart;
822     case NEXT:
823         save_params(b, code, arg0, arg1, operand, info);
824         b->currentState = arc->next;
825         b->currentInsn = CC_BUILDER_BUFFER_TRANS;
826         break;
827     }
828 }
829 
830 /*----------------------------------------------------------------
831  * CompiledCode - introspection
832  */
833 
834 /* Converts the code vector into a list.
835    Instruction -> (<insn-symbol> [<arg0> <arg1>])
836    Obj/Code operand -> as is
837    Addr operand -> integer offset from the beginning of the code */
Scm_CompiledCodeToList(ScmCompiledCode * cc)838 ScmObj Scm_CompiledCodeToList(ScmCompiledCode *cc)
839 {
840     ScmObj h = SCM_NIL, t = SCM_NIL;
841 
842     for (u_int i=0; i<(u_int)cc->codeSize; i++) {
843         ScmWord insn = cc->code[i];
844         u_int code = SCM_VM_INSN_CODE(insn);
845         const char *name = Scm_VMInsnName(code);
846 
847         switch (Scm_VMInsnNumParams(code)) {
848         case 0:
849             SCM_APPEND1(h, t, SCM_LIST1(SCM_INTERN(name)));
850             break;
851         case 1:
852             SCM_APPEND1(h, t, SCM_LIST2(SCM_INTERN(name),
853                                         SCM_MAKE_INT(SCM_VM_INSN_ARG(insn))));
854             break;
855         case 2:
856             SCM_APPEND1(h, t, SCM_LIST3(SCM_INTERN(name),
857                                         SCM_MAKE_INT(SCM_VM_INSN_ARG0(insn)),
858                                         SCM_MAKE_INT(SCM_VM_INSN_ARG1(insn))));
859             break;
860         }
861 
862         switch (Scm_VMInsnOperandType(code)) {
863         case SCM_VM_OPERAND_OBJ:;
864         case SCM_VM_OPERAND_CODE:;
865         case SCM_VM_OPERAND_CODES:;
866             SCM_APPEND1(h, t, SCM_OBJ(cc->code[++i]));
867             break;
868         case SCM_VM_OPERAND_ADDR: {
869             u_int off = (u_int)((ScmWord*)cc->code[++i] - cc->code);
870             SCM_APPEND1(h, t, SCM_MAKE_INT(off));
871             break;
872         }
873         case SCM_VM_OPERAND_OBJ_ADDR: {
874             u_int off = (u_int)((ScmWord*)cc->code[i+2] - cc->code);
875             SCM_APPEND(h, t, SCM_LIST2(SCM_OBJ(cc->code[i+1]),
876                                        SCM_MAKE_INT(off)));
877             i += 2;
878             break;
879         }
880         }
881     }
882     return h;
883 }
884 
885 /*===========================================================
886  * VM Instruction introspection
887  */
888 
889 static struct {
890     const char *name;           /* name */
891     int nparams;                /* # of parameters */
892     int operandType;            /* operand type */
893     int flags;                  /* flags */
894 } insn_table[] = {
895 #define DEFINSN(sym, nam, np, type, flags)                     \
896     { nam, np, SCM_CPP_CAT(SCM_VM_OPERAND_, type), flags },
897 #include "vminsn.c"
898 #undef DEFINSN
899 };
900 
901 #define CHECK_CODE(code)                                        \
902     do {                                                        \
903         if (code >= SCM_VM_NUM_INSNS) {                         \
904             Scm_Error("invalid VM instruction code: %d", code); \
905         }                                                       \
906     } while (0)
907 
Scm_VMInsnName(u_int code)908 const char *Scm_VMInsnName(u_int code)
909 {
910     CHECK_CODE(code);
911     return insn_table[code].name;
912 }
913 
Scm_VMInsnNumParams(u_int code)914 int Scm_VMInsnNumParams(u_int code)
915 {
916     CHECK_CODE(code);
917     return insn_table[code].nparams;
918 }
919 
vm_insn_flags(u_int code)920 int vm_insn_flags(u_int code)   /* private for the time being */
921 {
922     CHECK_CODE(code);
923     return insn_table[code].flags;
924 }
925 
Scm_VMInsnOperandType(u_int code)926 int Scm_VMInsnOperandType(u_int code)
927 {
928     CHECK_CODE(code);
929     return insn_table[code].operandType;
930 }
931 
Scm_VMInsnNameToCode(ScmObj name)932 int Scm_VMInsnNameToCode(ScmObj name)
933 {
934     if (SCM_SYMBOLP(name))  name = SCM_OBJ(SCM_SYMBOL_NAME(name));
935     else if (!SCM_STRINGP(name)) {
936         Scm_Error("vm-insn-name->code: requires a symbol or a string, but got %S", name);
937     }
938     const char *n = Scm_GetStringConst(SCM_STRING(name));
939     for (int i=0; i<SCM_VM_NUM_INSNS; i++) {
940         if (strcmp(insn_table[i].name, n) == 0) {
941             return i;
942         }
943     }
944     Scm_Error("vm-insn-name->code: no such instruction: %A", name);
945     return -1;                  /* dummy */
946 }
947 
948 /* (kind of) inversion of VMInsnInspect. */
Scm_VMInsnBuild(ScmObj obj)949 ScmWord Scm_VMInsnBuild(ScmObj obj)
950 {
951     int len = Scm_Length(obj);
952 
953     if (len < 1 || len > 3 || !SCM_SYMBOLP(SCM_CAR(obj))) goto badspec;
954     int code = Scm_VMInsnNameToCode(SCM_CAR(obj));
955 
956     switch (Scm_VMInsnNumParams(code)) {
957     case 0:
958         if (len != 1) {
959             Scm_Error("VM instruction %S takes no parameters, but got %S",
960                       SCM_CAR(obj), obj);
961         }
962         return SCM_VM_INSN(code);
963     case 1: {
964         if (len != 2) {
965             Scm_Error("VM instruction %S takes one parameter, but got %S",
966                       SCM_CAR(obj), obj);
967         }
968         if (!SCM_INTP(SCM_CADR(obj))) goto badspec;
969         int arg0 = SCM_INT_VALUE(SCM_CADR(obj));
970         return SCM_VM_INSN1(code, arg0);
971     }
972     case 2: {
973         if (len != 3) {
974             Scm_Error("VM instruction %S takes two parameters, but got %S",
975                       SCM_CAR(obj), obj);
976         }
977         if (!SCM_INTP(SCM_CADR(obj))) goto badspec;
978         if (!SCM_INTP(SCM_CAR(SCM_CDDR(obj)))) goto badspec;
979         int arg0 = SCM_INT_VALUE(SCM_CADR(obj));
980         int arg1 = SCM_INT_VALUE(SCM_CAR(SCM_CDDR(obj)));
981         return SCM_VM_INSN2(code, arg0, arg1);
982     }
983     }
984     /*FALLTHROUGH*/
985   badspec:
986     Scm_Error("Bad VM insn spec: %S", obj);
987     return 0;       /* dummy */
988 }
989 
990