1 /*
2 * compaux.c - C API bridge for the compiler
3 *
4 * Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the authors nor the names of its contributors
18 * may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /* This file serves as a bridge to the compiler, which is implemented
35 in Scheme (see compile.scm) */
36
37 #include <stdlib.h>
38 #define LIBGAUCHE_BODY
39 #include "gauche.h"
40 #include "gauche/vm.h"
41 #include "gauche/vminsn.h"
42 #include "gauche/class.h"
43 #include "gauche/code.h"
44 #include "gauche/priv/identifierP.h"
45 #include "gauche/priv/builtin-syms.h"
46 #include "gauche/priv/readerP.h" /* for Scm_MakeReadReference */
47
48 /*
49 * Syntax
50 */
51
52
53 /*
54 * Compiler Entry
55 */
56
57 static ScmGloc *compile_gloc = NULL;
58 static ScmGloc *compile_partial_gloc = NULL;
59 static ScmGloc *compile_finish_gloc = NULL;
60 static ScmGloc *init_compiler_gloc = NULL;
61
62 static ScmInternalMutex compile_finish_mutex;
63
Scm_Compile(ScmObj program,ScmObj env)64 ScmObj Scm_Compile(ScmObj program, ScmObj env)
65 {
66 return Scm_ApplyRec2(SCM_GLOC_GET(compile_gloc), program, env);
67 }
68
Scm_CompilePartial(ScmObj program,ScmObj env)69 ScmObj Scm_CompilePartial(ScmObj program, ScmObj env)
70 {
71 return Scm_ApplyRec2(SCM_GLOC_GET(compile_partial_gloc), program, env);
72 }
73
Scm_CompileFinish(ScmCompiledCode * cc)74 void Scm_CompileFinish(ScmCompiledCode *cc)
75 {
76 if (cc->code == NULL) {
77 SCM_INTERNAL_MUTEX_LOCK(compile_finish_mutex);
78 SCM_UNWIND_PROTECT {
79 if (cc->code == NULL) {
80 Scm_ApplyRec1(SCM_GLOC_GET(compile_finish_gloc), SCM_OBJ(cc));
81 }
82 }
83 SCM_WHEN_ERROR {
84 SCM_INTERNAL_MUTEX_UNLOCK(compile_finish_mutex);
85 SCM_NEXT_HANDLER;
86 }
87 SCM_END_PROTECT;
88 SCM_INTERNAL_MUTEX_UNLOCK(compile_finish_mutex);
89 }
90 }
91
92 /*-------------------------------------------------------------
93 * Syntactic closure object
94 */
95
synclo_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)96 static void synclo_print(ScmObj obj, ScmPort *port,
97 ScmWriteContext *ctx SCM_UNUSED)
98 {
99 Scm_Printf(port, "#<syntactic-closure %S>",
100 SCM_SYNTACTIC_CLOSURE(obj)->expr);
101 }
102
103 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_SyntacticClosureClass, synclo_print);
104
Scm_MakeSyntacticClosure(ScmObj env,ScmObj literals,ScmObj expr)105 ScmObj Scm_MakeSyntacticClosure(ScmObj env, ScmObj literals, ScmObj expr)
106 {
107 ScmSyntacticClosure *s = SCM_NEW(ScmSyntacticClosure);
108 SCM_SET_CLASS(s, SCM_CLASS_SYNTACTIC_CLOSURE);
109 s->env = env;
110 s->literals = literals;
111 s->expr = expr;
112 return SCM_OBJ(s);
113 }
114
synclo_env_get(ScmObj obj)115 static ScmObj synclo_env_get(ScmObj obj)
116 {
117 return SCM_SYNTACTIC_CLOSURE(obj)->env;
118 }
119
synclo_literals_get(ScmObj obj)120 static ScmObj synclo_literals_get(ScmObj obj)
121 {
122 return SCM_SYNTACTIC_CLOSURE(obj)->literals;
123 }
124
synclo_expr_get(ScmObj obj)125 static ScmObj synclo_expr_get(ScmObj obj)
126 {
127 return SCM_SYNTACTIC_CLOSURE(obj)->expr;
128 }
129
130 static ScmClassStaticSlotSpec synclo_slots[] = {
131 SCM_CLASS_SLOT_SPEC("env", synclo_env_get, NULL),
132 SCM_CLASS_SLOT_SPEC("literals", synclo_literals_get, NULL),
133 SCM_CLASS_SLOT_SPEC("expr", synclo_expr_get, NULL),
134 SCM_CLASS_SLOT_SPEC_END()
135 };
136
137 /*-------------------------------------------------------------
138 * Identifier object
139 */
140
141 /*
142 * About identifier's ENV slot.
143 * We close identifier's binding environment (list of frames), so that later
144 * we can look up its bindings hygienically. We truncate the frames up to
145 * where the binding occur, for the efficient lookup and comparison.
146 * Notably, the identifiers that are unbound or refer to toplevel variable
147 * has () in its env.
148 * A caveat--we can't truncate frames at the time of construction, since
149 * the entire frame structure may not be fixed while we're processing internal
150 * defines. The identifier may refer to another identifier that will be
151 * inserted later. Thus, we delay the truncation operation until it is
152 * needed.
153 * The ENV slot itself now contains (<flag> . <frames>), where <flag> is #f
154 * if truncation hasn't be done, #t otherwise. ENV should be treated as
155 * an opaque data for the others; you should always get it with
156 * Scm_IdentifierEnv(), and never directly access ENV slot itself.
157 */
158
identifier_print(ScmObj obj,ScmPort * port,ScmWriteContext * ctx SCM_UNUSED)159 static void identifier_print(ScmObj obj, ScmPort *port,
160 ScmWriteContext *ctx SCM_UNUSED)
161 {
162 ScmIdentifier *id = SCM_IDENTIFIER(obj);
163 /* We may want to have an external identifier syntax, so that an
164 identifier can be written out and then read back. It will be
165 convenient if we can embed a reference to other module's global
166 binding directly in the program. However, it can also breaches
167 module-based sandbox implementation, so further consideration is
168 required.
169 */
170 Scm_Printf(port, "#<identifier %S#%S.%x>",
171 id->module->name, id->name, SCM_WORD(id));
172 }
173
174 SCM_DEFINE_BUILTIN_CLASS_SIMPLE(Scm_IdentifierClass, identifier_print);
175
176 /* Truncate local frames */
get_binding_frame(ScmObj var,ScmObj env)177 static ScmObj get_binding_frame(ScmObj var, ScmObj env)
178 {
179 ScmObj frame, fp;
180 SCM_FOR_EACH(frame, env) {
181 if (!SCM_PAIRP(SCM_CAR(frame))) continue;
182 SCM_FOR_EACH(fp, SCM_CDAR(frame)) {
183 if (SCM_CAAR(fp) == var) {
184 SCM_ASSERT(SCM_LISTP(frame));
185 return frame;
186 }
187 }
188 }
189 return SCM_NIL;
190 }
191
Scm_MakeIdentifier(ScmObj name,ScmModule * mod,ScmObj env)192 ScmObj Scm_MakeIdentifier(ScmObj name, ScmModule *mod, ScmObj env)
193 {
194 ScmIdentifier *id = SCM_NEW(ScmIdentifier);
195 SCM_SET_CLASS(id, SCM_CLASS_IDENTIFIER);
196 id->name = name;
197 id->module = mod? mod : SCM_CURRENT_MODULE();
198 id->frames = Scm_Cons(SCM_FALSE, env); /* see the above comment */
199 return SCM_OBJ(id);
200 }
201
Scm_IdentifierEnv(ScmIdentifier * id)202 ScmObj Scm_IdentifierEnv(ScmIdentifier *id)
203 {
204 SCM_ASSERT(SCM_PAIRP(id->frames));
205 if (SCM_FALSEP(SCM_CAR(id->frames))) {
206 /* MT safety: This operation is idempotent, so it's ok if more than
207 one thread execute here. */
208 ScmObj f = get_binding_frame(id->name, SCM_CDR(id->frames));
209 SCM_SET_CDR_UNCHECKED(id->frames, f);
210 SCM_SET_CAR_UNCHECKED(id->frames, SCM_TRUE);
211 }
212 return SCM_CDR(id->frames);
213 }
214
Scm_OutermostIdentifier(ScmIdentifier * id)215 ScmIdentifier *Scm_OutermostIdentifier(ScmIdentifier *id)
216 {
217 while (SCM_IDENTIFIERP(id->name)) {
218 id = SCM_IDENTIFIER(SCM_IDENTIFIER(id)->name);
219 }
220 return id;
221 }
222
Scm_UnwrapIdentifier(ScmIdentifier * id)223 ScmSymbol *Scm_UnwrapIdentifier(ScmIdentifier *id)
224 {
225 ScmObj z = Scm_OutermostIdentifier(id)->name;
226 SCM_ASSERT(SCM_SYMBOLP(z));
227 return SCM_SYMBOL(z);
228 }
229
230 /* returns global binding of the identifier */
Scm_IdentifierGlobalBinding(ScmIdentifier * id)231 ScmGloc *Scm_IdentifierGlobalBinding(ScmIdentifier *id)
232 {
233 ScmIdentifier *z = Scm_OutermostIdentifier(id);
234 return Scm_FindBinding(z->module, SCM_SYMBOL(z->name), 0);
235 }
236
237 /* returns true if SYM has the same binding with ID in ENV. */
Scm_IdentifierBindingEqv(ScmIdentifier * id,ScmSymbol * sym,ScmObj env)238 int Scm_IdentifierBindingEqv(ScmIdentifier *id, ScmSymbol *sym, ScmObj env)
239 {
240 ScmObj env1 = Scm_IdentifierEnv(id);
241 ScmObj env2 = get_binding_frame(SCM_OBJ(sym), env);
242 return (env1 == env2);
243 }
244
Scm_WrapIdentifier(ScmIdentifier * orig)245 ScmObj Scm_WrapIdentifier(ScmIdentifier *orig)
246 {
247 ScmIdentifier *id = SCM_NEW(ScmIdentifier);
248 SCM_SET_CLASS(id, SCM_CLASS_IDENTIFIER);
249 id->name = SCM_OBJ(orig);
250 id->module = orig->module;
251 id->frames = orig->frames;
252 return SCM_OBJ(id);
253 }
254
identifier_name_get(ScmObj obj)255 static ScmObj identifier_name_get(ScmObj obj)
256 {
257 return SCM_OBJ(SCM_IDENTIFIER(obj)->name);
258 }
259
identifier_name_set(ScmObj obj,ScmObj val)260 static void identifier_name_set(ScmObj obj, ScmObj val)
261 {
262 if (!SCM_SYMBOLP(val) && !SCM_IDENTIFIERP(val)) {
263 Scm_Error("symbol or identifier required, but got %S", val);
264 }
265 SCM_IDENTIFIER(obj)->name = val;
266 }
267
identifier_module_get(ScmObj obj)268 static ScmObj identifier_module_get(ScmObj obj)
269 {
270 return SCM_OBJ(SCM_IDENTIFIER(obj)->module);
271 }
272
identifier_env_get(ScmObj obj)273 static ScmObj identifier_env_get(ScmObj obj)
274 {
275 return Scm_IdentifierEnv(SCM_IDENTIFIER(obj));
276 }
277
278 /* Identifier name can be mutated during macro expansion to avoid
279 conflicts on macro-inserted toplevel identifiers. See
280 %rename-toplevel-identifier! in compiler pass1.
281 Other than that, identifiers must be treated as immutable objects. */
282 static ScmClassStaticSlotSpec identifier_slots[] = {
283 SCM_CLASS_SLOT_SPEC("name", identifier_name_get, identifier_name_set),
284 SCM_CLASS_SLOT_SPEC("module", identifier_module_get, NULL),
285 SCM_CLASS_SLOT_SPEC("env", identifier_env_get, NULL),
286 SCM_CLASS_SLOT_SPEC_END()
287 };
288
289 /*------------------------------------------------------------------
290 * Unwrapping syntax
291 *
292 * Traverses a form and replaces identifiers for bare symbols.
293 * It's complicated because the form may have cycles.
294 * We use Scm_ReadReference as the temporary placeholder to handle
295 * circular structure. All the read references are replaced before
296 * Scm_UnwrapSyntax returns.
297 */
298
299 typedef struct unwrap_ctx_rec {
300 ScmHashCore history; /* object -> #f | R() | R(v)
301 where
302 SCM_UNBOUND - the object has been visited
303 only once
304 value - the object is realized
305 R() - empty ReadReference. The object
306 has been visited more than once,
307 but its final replacement hasn't
308 been materialized yet.
309 R(v) - ReadReference with value.
310 The object should correspond to
311 the value.
312 */
313 ScmHashCore refs; /* location -> read reference*/
314 int immutable; /* flag */
315 } unwrap_ctx;
316
register_location(unwrap_ctx * ctx,ScmObj * loc,ScmObj ref)317 static void register_location(unwrap_ctx *ctx,
318 ScmObj *loc,
319 ScmObj ref)
320 {
321 if (!SCM_READ_REFERENCE_P(ref)) return;
322 if (SCM_READ_REFERENCE_REALIZED(ref)) {
323 *loc = SCM_READ_REFERENCE(ref)->value;
324 } else {
325 ScmDictEntry *e =
326 Scm_HashCoreSearch(&ctx->refs, (intptr_t)loc, SCM_DICT_CREATE);
327 e->value = (intptr_t)ref;
328 }
329 }
330
fill_history(ScmDictEntry * e,ScmObj value)331 static void fill_history(ScmDictEntry *e, ScmObj value)
332 {
333 if (e->value) {
334 if (SCM_READ_REFERENCE_P(e->value)) {
335 SCM_READ_REFERENCE(e->value)->value = value;
336 }
337 } else {
338 e->value = (intptr_t)value;
339 }
340 }
341
patch_locations(unwrap_ctx * ctx)342 static void patch_locations(unwrap_ctx *ctx)
343 {
344 ScmHashIter iter;
345 Scm_HashIterInit(&iter, &ctx->refs);
346 for (;;) {
347 ScmDictEntry *e = Scm_HashIterNext(&iter);
348 if (e == NULL) break;
349 ScmObj *loc = (ScmObj*)SCM_DICT_KEY(e);
350 ScmReadReference *ref = SCM_READ_REFERENCE(SCM_DICT_VALUE(e));
351 if (SCM_READ_REFERENCE_P(ref)) {
352 SCM_ASSERT(SCM_READ_REFERENCE_REALIZED(ref));
353 *loc = SCM_READ_REFERENCE(ref)->value;
354 }
355 }
356 }
357
358 /* Returns either original form, or converted form, or a read reference. */
unwrap_rec(ScmObj form,unwrap_ctx * ctx)359 static ScmObj unwrap_rec(ScmObj form, unwrap_ctx *ctx)
360 {
361 if (!SCM_PTRP(form)) return form;
362 ScmDictEntry *e = Scm_HashCoreSearch(&ctx->history,
363 (intptr_t)form,
364 SCM_DICT_GET);
365 if (e) {
366 /* We've visited FORM before. If this is the second time,
367 we allocate ReadReference to hold the value. It will be
368 filled later by the caller. */
369 if (!e->value) {
370 e->value = (intptr_t)Scm_MakeReadReference();
371 }
372 return SCM_DICT_VALUE(e);
373 }
374
375 if (SCM_PAIRP(form)) {
376 e = Scm_HashCoreSearch(&ctx->history, (intptr_t)form, SCM_DICT_CREATE);
377 ScmObj ca = unwrap_rec(SCM_CAR(form), ctx);
378 ScmObj cd = unwrap_rec(SCM_CDR(form), ctx);
379 if (ca == SCM_CAR(form) && cd == SCM_CDR(form)
380 && (!ctx->immutable || Scm_ImmutablePairP(form))) {
381 fill_history(e, form);
382 return form;
383 }
384 ScmObj p = (ctx->immutable
385 ? Scm_MakeImmutablePair(ca, cd)
386 : Scm_Cons(ca, cd));
387 fill_history(e, p);
388 register_location(ctx, &SCM_PAIR(p)->car, ca);
389 register_location(ctx, &SCM_PAIR(p)->cdr, cd);
390 return p;
391 }
392 if (SCM_IDENTIFIERP(form)) {
393 return SCM_OBJ(Scm_UnwrapIdentifier(SCM_IDENTIFIER(form)));
394 }
395 if (SCM_VECTORP(form)) {
396 int len = SCM_VECTOR_SIZE(form);
397 ScmObj *pelt = SCM_VECTOR_ELEMENTS(form);
398 e = Scm_HashCoreSearch(&ctx->history, (intptr_t)form, SCM_DICT_CREATE);
399 for (int i=0; i<len; i++, pelt++) {
400 ScmObj elt = unwrap_rec(*pelt, ctx);
401 if (elt != *pelt
402 || (ctx->immutable && !SCM_VECTOR_IMMUTABLE_P(form))) {
403 ScmObj newvec = Scm_MakeVector(len, SCM_FALSE);
404 pelt = SCM_VECTOR_ELEMENTS(form);
405 int j;
406 for (j=0; j<i; j++, pelt++) {
407 SCM_VECTOR_ELEMENT(newvec, j) = *pelt;
408 }
409 register_location(ctx, &SCM_VECTOR_ELEMENT(newvec, i), elt);
410 SCM_VECTOR_ELEMENT(newvec, i) = elt;
411 for (j=i+1, pelt++; j<len; j++, pelt++) {
412 elt = unwrap_rec(*pelt, ctx);
413 register_location(ctx, &SCM_VECTOR_ELEMENT(newvec, i), elt);
414 SCM_VECTOR_ELEMENT(newvec, j) = elt;
415 }
416 if (ctx->immutable) {
417 SCM_VECTOR_IMMUTABLE_SET(newvec, TRUE);
418 }
419 fill_history(e, newvec);
420 return newvec;
421 }
422 }
423 fill_history(e, form);
424 return form;
425 }
426 return form;
427 }
428
429 #if GAUCHE_API_VERSION < 1000
Scm_UnwrapSyntax(ScmObj form)430 ScmObj Scm_UnwrapSyntax(ScmObj form)
431 {
432 return Scm_UnwrapSyntax2(form, FALSE);
433 }
Scm_UnwrapSyntax2(ScmObj form,int immutablep)434 ScmObj Scm_UnwrapSyntax2(ScmObj form, int immutablep)
435 #else /* GAUCHE_API_VERSION >= 1000 */
436 ScmObj Scm_UnwrapSyntax(ScmObj form, int immutablep)
437 #endif /* GAUCHE_API_VERSION >= 1000 */
438 {
439 unwrap_ctx ctx;
440 Scm_HashCoreInitSimple(&ctx.history, SCM_HASH_EQ, 0, NULL);
441 Scm_HashCoreInitSimple(&ctx.refs, SCM_HASH_EQ, 0, NULL);
442 ctx.immutable = immutablep;
443 ScmObj r = unwrap_rec(form, &ctx);
444 patch_locations(&ctx);
445 return r;
446 }
447
448 /*===================================================================
449 * Initializer
450 */
451
452 #define INIT_GLOC(gloc, name, mod) \
453 do { \
454 gloc = Scm_FindBinding(mod, SCM_SYMBOL(SCM_INTERN(name)), \
455 SCM_BINDING_STAY_IN_MODULE); \
456 if (gloc == NULL) { \
457 Scm_Panic("no " name " procedure in gauche.internal"); \
458 } \
459 } while (0)
460
Scm__InitCompaux(void)461 void Scm__InitCompaux(void)
462 {
463 ScmModule *g = Scm_GaucheModule();
464 ScmModule *gi = Scm_GaucheInternalModule();
465
466 Scm_InitStaticClass(SCM_CLASS_SYNTACTIC_CLOSURE, "<syntactic-closure>", g,
467 synclo_slots, 0);
468 Scm_InitStaticClass(SCM_CLASS_IDENTIFIER, "<identifier>", g,
469 identifier_slots, 0);
470
471 SCM_INTERNAL_MUTEX_INIT(compile_finish_mutex);
472
473 /* Grab the entry points of compile.scm */
474 INIT_GLOC(init_compiler_gloc, "init-compiler", gi);
475 INIT_GLOC(compile_gloc, "compile", gi);
476 INIT_GLOC(compile_partial_gloc, "compile-partial", gi);
477 INIT_GLOC(compile_finish_gloc, "compile-finish", gi);
478
479 Scm_ApplyRec0(SCM_GLOC_GET(init_compiler_gloc));
480 }
481