1 /*
2  * compaux.c - C API bridge for the compiler
3  *
4  *   Copyright (c) 2000-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /* This file serves as a bridge to the compiler, which is implemented
35    in Scheme (see compile.scm) */
36 
37 #include <stdlib.h>
38 #define LIBGAUCHE_BODY
39 #include "gauche.h"
40 #include "gauche/vm.h"
41 #include "gauche/vminsn.h"
42 #include "gauche/class.h"
43 #include "gauche/code.h"
44 #include "gauche/priv/identifierP.h"
45 #include "gauche/priv/builtin-syms.h"
46 #include "gauche/priv/readerP.h" /* for Scm_MakeReadReference */
47 
48 /*
49  * Syntax
50  */
51 
52 
53 /*
54  * Compiler Entry
55  */
56 
57 static ScmGloc *compile_gloc = NULL;
58 static ScmGloc *compile_partial_gloc = NULL;
59 static ScmGloc *compile_finish_gloc = NULL;
60 static ScmGloc *init_compiler_gloc = NULL;
61 
62 static ScmInternalMutex compile_finish_mutex;
63 
Scm_Compile(ScmObj program,ScmObj env)64 ScmObj Scm_Compile(ScmObj program, ScmObj env)
65 {
66     return Scm_ApplyRec2(SCM_GLOC_GET(compile_gloc), program, env);
67 }
68 
Scm_CompilePartial(ScmObj program,ScmObj env)69 ScmObj Scm_CompilePartial(ScmObj program, ScmObj env)
70 {
71     return Scm_ApplyRec2(SCM_GLOC_GET(compile_partial_gloc), program, env);
72 }
73 
Scm_CompileFinish(ScmCompiledCode * cc)74 void Scm_CompileFinish(ScmCompiledCode *cc)
75 {
76     if (cc->code == NULL) {
77         SCM_INTERNAL_MUTEX_LOCK(compile_finish_mutex);
78         SCM_UNWIND_PROTECT {
79             if (cc->code == NULL) {
80                 Scm_ApplyRec1(SCM_GLOC_GET(compile_finish_gloc), SCM_OBJ(cc));
81             }
82         }
83         SCM_WHEN_ERROR {
84             SCM_INTERNAL_MUTEX_UNLOCK(compile_finish_mutex);
85             SCM_NEXT_HANDLER;
86         }
87         SCM_END_PROTECT;
88         SCM_INTERNAL_MUTEX_UNLOCK(compile_finish_mutex);
89     }
90 }
91 
92 /*-------------------------------------------------------------
93  * Syntactic closure object
94  */
95 
synclo_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)96 static void synclo_print(ScmObj obj, ScmPort *port,
97                          ScmWriteContext *ctx SCM_UNUSED)
98 {
99     Scm_Printf(port, "#<syntactic-closure %S>",
100                SCM_SYNTACTIC_CLOSURE(obj)->expr);
101 }
102 
103 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_SyntacticClosureClass, synclo_print);
104 
Scm_MakeSyntacticClosure(ScmObj env,ScmObj literals,ScmObj expr)105 ScmObj Scm_MakeSyntacticClosure(ScmObj env, ScmObj literals, ScmObj expr)
106 {
107     ScmSyntacticClosure *s = SCM_NEW(ScmSyntacticClosure);
108     SCM_SET_CLASS(s, SCM_CLASS_SYNTACTIC_CLOSURE);
109     s->env = env;
110     s->literals = literals;
111     s->expr = expr;
112     return SCM_OBJ(s);
113 }
114 
synclo_env_get(ScmObj obj)115 static ScmObj synclo_env_get(ScmObj obj)
116 {
117     return SCM_SYNTACTIC_CLOSURE(obj)->env;
118 }
119 
synclo_literals_get(ScmObj obj)120 static ScmObj synclo_literals_get(ScmObj obj)
121 {
122     return SCM_SYNTACTIC_CLOSURE(obj)->literals;
123 }
124 
synclo_expr_get(ScmObj obj)125 static ScmObj synclo_expr_get(ScmObj obj)
126 {
127     return SCM_SYNTACTIC_CLOSURE(obj)->expr;
128 }
129 
130 static ScmClassStaticSlotSpec synclo_slots[] = {
131     SCM_CLASS_SLOT_SPEC("env", synclo_env_get, NULL),
132     SCM_CLASS_SLOT_SPEC("literals", synclo_literals_get, NULL),
133     SCM_CLASS_SLOT_SPEC("expr", synclo_expr_get, NULL),
134     SCM_CLASS_SLOT_SPEC_END()
135 };
136 
137 /*-------------------------------------------------------------
138  * Identifier object
139  */
140 
141 /*
142  * About identifier's ENV slot.
143  * We close identifier's binding environment (list of frames), so that later
144  * we can look up its bindings hygienically.  We truncate the frames up to
145  * where the binding occur, for the efficient lookup and comparison.
146  * Notably, the identifiers that are unbound or refer to toplevel variable
147  * has () in its env.
148  * A caveat--we can't truncate frames at the time of construction, since
149  * the entire frame structure may not be fixed while we're processing internal
150  * defines.  The identifier may refer to another identifier that will be
151  * inserted later.  Thus, we delay the truncation operation until it is
152  * needed.
153  * The ENV slot itself now contains (<flag> . <frames>), where <flag> is #f
154  * if truncation hasn't be done, #t otherwise.  ENV should be treated as
155  * an opaque data for the others; you should always get it with
156  * Scm_IdentifierEnv(), and never directly access ENV slot itself.
157  */
158 
identifier_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)159 static void identifier_print(ScmObj obj, ScmPort *port,
160                              ScmWriteContext *ctx SCM_UNUSED)
161 {
162     ScmIdentifier *id = SCM_IDENTIFIER(obj);
163     /* We may want to have an external identifier syntax, so that an
164        identifier can be written out and then read back.  It will be
165        convenient if we can embed a reference to other module's global
166        binding directly in the program.  However, it can also breaches
167        module-based sandbox implementation, so further consideration is
168        required.
169     */
170     Scm_Printf(port, "#<identifier %S#%S.%x>",
171                id->module->name, id->name, SCM_WORD(id));
172 }
173 
174 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_IdentifierClass, identifier_print);
175 
176 /* Truncate local frames */
get_binding_frame(ScmObj var,ScmObj env)177 static ScmObj get_binding_frame(ScmObj var, ScmObj env)
178 {
179     ScmObj frame, fp;
180     SCM_FOR_EACH(frame, env) {
181         if (!SCM_PAIRP(SCM_CAR(frame))) continue;
182         SCM_FOR_EACH(fp, SCM_CDAR(frame)) {
183             if (SCM_CAAR(fp) == var) {
184                 SCM_ASSERT(SCM_LISTP(frame));
185                 return frame;
186             }
187         }
188     }
189     return SCM_NIL;
190 }
191 
Scm_MakeIdentifier(ScmObj name,ScmModule * mod,ScmObj env)192 ScmObj Scm_MakeIdentifier(ScmObj name, ScmModule *mod, ScmObj env)
193 {
194     ScmIdentifier *id = SCM_NEW(ScmIdentifier);
195     SCM_SET_CLASS(id, SCM_CLASS_IDENTIFIER);
196     id->name = name;
197     id->module = mod? mod : SCM_CURRENT_MODULE();
198     id->frames = Scm_Cons(SCM_FALSE, env); /* see the above comment */
199     return SCM_OBJ(id);
200 }
201 
Scm_IdentifierEnv(ScmIdentifier * id)202 ScmObj Scm_IdentifierEnv(ScmIdentifier *id)
203 {
204     SCM_ASSERT(SCM_PAIRP(id->frames));
205     if (SCM_FALSEP(SCM_CAR(id->frames))) {
206         /* MT safety: This operation is idempotent, so it's ok if more than
207            one thread execute here. */
208         ScmObj f = get_binding_frame(id->name, SCM_CDR(id->frames));
209         SCM_SET_CDR_UNCHECKED(id->frames, f);
210         SCM_SET_CAR_UNCHECKED(id->frames, SCM_TRUE);
211     }
212     return SCM_CDR(id->frames);
213 }
214 
Scm_OutermostIdentifier(ScmIdentifier * id)215 ScmIdentifier *Scm_OutermostIdentifier(ScmIdentifier *id)
216 {
217     while (SCM_IDENTIFIERP(id->name)) {
218         id = SCM_IDENTIFIER(SCM_IDENTIFIER(id)->name);
219     }
220     return id;
221 }
222 
Scm_UnwrapIdentifier(ScmIdentifier * id)223 ScmSymbol *Scm_UnwrapIdentifier(ScmIdentifier *id)
224 {
225     ScmObj z = Scm_OutermostIdentifier(id)->name;
226     SCM_ASSERT(SCM_SYMBOLP(z));
227     return SCM_SYMBOL(z);
228 }
229 
230 /* returns global binding of the identifier */
Scm_IdentifierGlobalBinding(ScmIdentifier * id)231 ScmGloc *Scm_IdentifierGlobalBinding(ScmIdentifier *id)
232 {
233     ScmIdentifier *z = Scm_OutermostIdentifier(id);
234     return Scm_FindBinding(z->module, SCM_SYMBOL(z->name), 0);
235 }
236 
237 /* returns true if SYM has the same binding with ID in ENV. */
Scm_IdentifierBindingEqv(ScmIdentifier * id,ScmSymbol * sym,ScmObj env)238 int Scm_IdentifierBindingEqv(ScmIdentifier *id, ScmSymbol *sym, ScmObj env)
239 {
240     ScmObj env1 = Scm_IdentifierEnv(id);
241     ScmObj env2 = get_binding_frame(SCM_OBJ(sym), env);
242     return (env1 == env2);
243 }
244 
Scm_WrapIdentifier(ScmIdentifier * orig)245 ScmObj Scm_WrapIdentifier(ScmIdentifier *orig)
246 {
247     ScmIdentifier *id = SCM_NEW(ScmIdentifier);
248     SCM_SET_CLASS(id, SCM_CLASS_IDENTIFIER);
249     id->name = SCM_OBJ(orig);
250     id->module = orig->module;
251     id->frames = orig->frames;
252     return SCM_OBJ(id);
253 }
254 
identifier_name_get(ScmObj obj)255 static ScmObj identifier_name_get(ScmObj obj)
256 {
257     return SCM_OBJ(SCM_IDENTIFIER(obj)->name);
258 }
259 
identifier_name_set(ScmObj obj,ScmObj val)260 static void   identifier_name_set(ScmObj obj, ScmObj val)
261 {
262     if (!SCM_SYMBOLP(val) && !SCM_IDENTIFIERP(val)) {
263         Scm_Error("symbol or identifier required, but got %S", val);
264     }
265     SCM_IDENTIFIER(obj)->name = val;
266 }
267 
identifier_module_get(ScmObj obj)268 static ScmObj identifier_module_get(ScmObj obj)
269 {
270     return SCM_OBJ(SCM_IDENTIFIER(obj)->module);
271 }
272 
identifier_env_get(ScmObj obj)273 static ScmObj identifier_env_get(ScmObj obj)
274 {
275     return Scm_IdentifierEnv(SCM_IDENTIFIER(obj));
276 }
277 
278 /* Identifier name can be mutated during macro expansion to avoid
279    conflicts on macro-inserted toplevel identifiers.  See
280    %rename-toplevel-identifier! in compiler pass1.
281    Other than that, identifiers must be treated as immutable objects. */
282 static ScmClassStaticSlotSpec identifier_slots[] = {
283     SCM_CLASS_SLOT_SPEC("name", identifier_name_get, identifier_name_set),
284     SCM_CLASS_SLOT_SPEC("module", identifier_module_get, NULL),
285     SCM_CLASS_SLOT_SPEC("env", identifier_env_get, NULL),
286     SCM_CLASS_SLOT_SPEC_END()
287 };
288 
289 /*------------------------------------------------------------------
290  * Unwrapping syntax
291  *
292  * Traverses a form and replaces identifiers for bare symbols.
293  * It's complicated because the form may have cycles.
294  * We use Scm_ReadReference as the temporary placeholder to handle
295  * circular structure.  All the read references are replaced before
296  * Scm_UnwrapSyntax returns.
297  */
298 
299 typedef struct unwrap_ctx_rec {
300     ScmHashCore history;        /* object -> #f | R() | R(v)
301                                    where
302                                      SCM_UNBOUND - the object has been visited
303                                          only once
304                                      value - the object is realized
305                                      R() - empty ReadReference.  The object
306                                          has been visited more than once,
307                                          but its final replacement hasn't
308                                          been materialized yet.
309                                      R(v) - ReadReference with value.
310                                          The object should correspond to
311                                          the value.
312                                  */
313     ScmHashCore refs;           /* location -> read reference*/
314     int immutable;              /* flag */
315 } unwrap_ctx;
316 
register_location(unwrap_ctx * ctx,ScmObj * loc,ScmObj ref)317 static void register_location(unwrap_ctx *ctx,
318                               ScmObj *loc,
319                               ScmObj ref)
320 {
321     if (!SCM_READ_REFERENCE_P(ref)) return;
322     if (SCM_READ_REFERENCE_REALIZED(ref)) {
323         *loc = SCM_READ_REFERENCE(ref)->value;
324     } else {
325         ScmDictEntry *e =
326             Scm_HashCoreSearch(&ctx->refs, (intptr_t)loc, SCM_DICT_CREATE);
327         e->value = (intptr_t)ref;
328     }
329 }
330 
fill_history(ScmDictEntry * e,ScmObj value)331 static void fill_history(ScmDictEntry *e, ScmObj value)
332 {
333     if (e->value) {
334         if (SCM_READ_REFERENCE_P(e->value)) {
335             SCM_READ_REFERENCE(e->value)->value = value;
336         }
337     } else {
338         e->value = (intptr_t)value;
339     }
340 }
341 
patch_locations(unwrap_ctx * ctx)342 static void patch_locations(unwrap_ctx *ctx)
343 {
344     ScmHashIter iter;
345     Scm_HashIterInit(&iter, &ctx->refs);
346     for (;;) {
347         ScmDictEntry *e = Scm_HashIterNext(&iter);
348         if (e == NULL) break;
349         ScmObj *loc = (ScmObj*)SCM_DICT_KEY(e);
350         ScmReadReference *ref = SCM_READ_REFERENCE(SCM_DICT_VALUE(e));
351         if (SCM_READ_REFERENCE_P(ref)) {
352             SCM_ASSERT(SCM_READ_REFERENCE_REALIZED(ref));
353             *loc = SCM_READ_REFERENCE(ref)->value;
354         }
355     }
356 }
357 
358 /* Returns either original form, or converted form, or a read reference. */
unwrap_rec(ScmObj form,unwrap_ctx * ctx)359 static ScmObj unwrap_rec(ScmObj form, unwrap_ctx *ctx)
360 {
361     if (!SCM_PTRP(form)) return form;
362     ScmDictEntry *e = Scm_HashCoreSearch(&ctx->history,
363                                          (intptr_t)form,
364                                          SCM_DICT_GET);
365     if (e) {
366         /* We've visited FORM before.  If this is the second time,
367            we allocate ReadReference to hold the value.  It will be
368            filled later by the caller. */
369         if (!e->value) {
370             e->value = (intptr_t)Scm_MakeReadReference();
371         }
372         return SCM_DICT_VALUE(e);
373     }
374 
375     if (SCM_PAIRP(form)) {
376         e = Scm_HashCoreSearch(&ctx->history, (intptr_t)form, SCM_DICT_CREATE);
377         ScmObj ca = unwrap_rec(SCM_CAR(form), ctx);
378         ScmObj cd = unwrap_rec(SCM_CDR(form), ctx);
379         if (ca == SCM_CAR(form) && cd == SCM_CDR(form)
380             && (!ctx->immutable || Scm_ImmutablePairP(form))) {
381             fill_history(e, form);
382             return form;
383         }
384         ScmObj p = (ctx->immutable
385                     ? Scm_MakeImmutablePair(ca, cd)
386                     : Scm_Cons(ca, cd));
387         fill_history(e, p);
388         register_location(ctx, &SCM_PAIR(p)->car, ca);
389         register_location(ctx, &SCM_PAIR(p)->cdr, cd);
390         return p;
391     }
392     if (SCM_IDENTIFIERP(form)) {
393         return SCM_OBJ(Scm_UnwrapIdentifier(SCM_IDENTIFIER(form)));
394     }
395     if (SCM_VECTORP(form)) {
396         int len = SCM_VECTOR_SIZE(form);
397         ScmObj *pelt = SCM_VECTOR_ELEMENTS(form);
398         e = Scm_HashCoreSearch(&ctx->history, (intptr_t)form, SCM_DICT_CREATE);
399         for (int i=0; i<len; i++, pelt++) {
400             ScmObj elt = unwrap_rec(*pelt, ctx);
401             if (elt != *pelt
402                 || (ctx->immutable && !SCM_VECTOR_IMMUTABLE_P(form))) {
403                 ScmObj newvec = Scm_MakeVector(len, SCM_FALSE);
404                 pelt = SCM_VECTOR_ELEMENTS(form);
405                 int j;
406                 for (j=0; j<i; j++, pelt++) {
407                     SCM_VECTOR_ELEMENT(newvec, j) = *pelt;
408                 }
409                 register_location(ctx, &SCM_VECTOR_ELEMENT(newvec, i), elt);
410                 SCM_VECTOR_ELEMENT(newvec, i) = elt;
411                 for (j=i+1, pelt++; j<len; j++, pelt++) {
412                     elt = unwrap_rec(*pelt, ctx);
413                     register_location(ctx, &SCM_VECTOR_ELEMENT(newvec, i), elt);
414                     SCM_VECTOR_ELEMENT(newvec, j) = elt;
415                 }
416                 if (ctx->immutable) {
417                     SCM_VECTOR_IMMUTABLE_SET(newvec, TRUE);
418                 }
419                 fill_history(e, newvec);
420                 return newvec;
421             }
422         }
423         fill_history(e, form);
424         return form;
425     }
426     return form;
427 }
428 
429 #if GAUCHE_API_VERSION < 1000
Scm_UnwrapSyntax(ScmObj form)430 ScmObj Scm_UnwrapSyntax(ScmObj form)
431 {
432     return Scm_UnwrapSyntax2(form, FALSE);
433 }
Scm_UnwrapSyntax2(ScmObj form,int immutablep)434 ScmObj Scm_UnwrapSyntax2(ScmObj form, int immutablep)
435 #else  /* GAUCHE_API_VERSION >= 1000 */
436 ScmObj Scm_UnwrapSyntax(ScmObj form, int immutablep)
437 #endif /* GAUCHE_API_VERSION >= 1000 */
438 {
439     unwrap_ctx ctx;
440     Scm_HashCoreInitSimple(&ctx.history, SCM_HASH_EQ, 0, NULL);
441     Scm_HashCoreInitSimple(&ctx.refs, SCM_HASH_EQ, 0, NULL);
442     ctx.immutable = immutablep;
443     ScmObj r = unwrap_rec(form, &ctx);
444     patch_locations(&ctx);
445     return r;
446 }
447 
448 /*===================================================================
449  * Initializer
450  */
451 
452 #define INIT_GLOC(gloc, name, mod)                                      \
453     do {                                                                \
454         gloc = Scm_FindBinding(mod, SCM_SYMBOL(SCM_INTERN(name)),       \
455                                SCM_BINDING_STAY_IN_MODULE);             \
456         if (gloc == NULL) {                                             \
457             Scm_Panic("no " name " procedure in gauche.internal");      \
458         }                                                               \
459     } while (0)
460 
Scm__InitCompaux(void)461 void Scm__InitCompaux(void)
462 {
463     ScmModule *g = Scm_GaucheModule();
464     ScmModule *gi = Scm_GaucheInternalModule();
465 
466     Scm_InitStaticClass(SCM_CLASS_SYNTACTIC_CLOSURE, "<syntactic-closure>", g,
467                         synclo_slots, 0);
468     Scm_InitStaticClass(SCM_CLASS_IDENTIFIER, "<identifier>", g,
469                         identifier_slots, 0);
470 
471     SCM_INTERNAL_MUTEX_INIT(compile_finish_mutex);
472 
473     /* Grab the entry points of compile.scm */
474     INIT_GLOC(init_compiler_gloc,   "init-compiler", gi);
475     INIT_GLOC(compile_gloc,         "compile",       gi);
476     INIT_GLOC(compile_partial_gloc, "compile-partial", gi);
477     INIT_GLOC(compile_finish_gloc,  "compile-finish",  gi);
478 
479     Scm_ApplyRec0(SCM_GLOC_GET(init_compiler_gloc));
480 }
481