1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE // for strdup
3 #endif
4 #include <assert.h>
5 #include <math.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include "compile.h"
10 #include "bytecode.h"
11 #include "locfile.h"
12 #include "jv_alloc.h"
13 #include "linker.h"
14
15 /*
16 The intermediate representation for jq filters is as a sequence of
17 struct inst, which form a doubly-linked list via the next and prev
18 pointers.
19
20 A "block" represents a sequence of "struct inst", which may be
21 empty.
22
23 Blocks are generated by the parser bottom-up, so may have free
24 variables (refer to things not defined). See inst.bound_by and
25 inst.symbol.
26 */
27 struct inst {
28 struct inst* next;
29 struct inst* prev;
30
31 opcode op;
32
33 struct {
34 uint16_t intval;
35 struct inst* target;
36 jv constant;
37 const struct cfunction* cfunc;
38 } imm;
39
40 struct locfile* locfile;
41 location source;
42
43 // Binding
44 // An instruction requiring binding (for parameters/variables/functions)
45 // is in one of three states:
46 // inst->bound_by = NULL - Unbound free variable
47 // inst->bound_by = inst - This instruction binds a variable
48 // inst->bound_by = other - Uses variable bound by other instruction
49 // Unbound instructions (references to other things that may or may not
50 // exist) are created by "gen_foo_unbound", and bindings are created by
51 // block_bind(definition, body), which binds all instructions in
52 // body which are unboudn and refer to "definition" by name.
53 struct inst* bound_by;
54 char* symbol;
55
56 int nformals;
57 int nactuals;
58
59 block subfn; // used by CLOSURE_CREATE (body of function)
60 block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
61
62 // This instruction is compiled as part of which function?
63 // (only used during block_compile)
64 struct bytecode* compiled;
65
66 int bytecode_pos; // position just after this insn
67 };
68
inst_new(opcode op)69 static inst* inst_new(opcode op) {
70 inst* i = jv_mem_alloc(sizeof(inst));
71 i->next = i->prev = 0;
72 i->op = op;
73 i->bytecode_pos = -1;
74 i->bound_by = 0;
75 i->symbol = 0;
76 i->nformals = -1;
77 i->nactuals = -1;
78 i->subfn = gen_noop();
79 i->arglist = gen_noop();
80 i->source = UNKNOWN_LOCATION;
81 i->locfile = 0;
82 return i;
83 }
84
inst_free(struct inst * i)85 static void inst_free(struct inst* i) {
86 jv_mem_free(i->symbol);
87 block_free(i->subfn);
88 block_free(i->arglist);
89 if (i->locfile)
90 locfile_free(i->locfile);
91 if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
92 jv_free(i->imm.constant);
93 }
94 jv_mem_free(i);
95 }
96
inst_block(inst * i)97 static block inst_block(inst* i) {
98 block b = {i,i};
99 return b;
100 }
101
block_is_single(block b)102 int block_is_single(block b) {
103 return b.first && b.first == b.last;
104 }
105
block_take(block * b)106 static inst* block_take(block* b) {
107 if (b->first == 0) return 0;
108 inst* i = b->first;
109 if (i->next) {
110 i->next->prev = 0;
111 b->first = i->next;
112 i->next = 0;
113 } else {
114 b->first = 0;
115 b->last = 0;
116 }
117 return i;
118 }
119
gen_location(location loc,struct locfile * l,block b)120 block gen_location(location loc, struct locfile* l, block b) {
121 for (inst* i = b.first; i; i = i->next) {
122 if (i->source.start == UNKNOWN_LOCATION.start &&
123 i->source.end == UNKNOWN_LOCATION.end) {
124 i->source = loc;
125 i->locfile = locfile_retain(l);
126 }
127 }
128 return b;
129 }
130
gen_noop()131 block gen_noop() {
132 block b = {0,0};
133 return b;
134 }
135
block_is_noop(block b)136 int block_is_noop(block b) {
137 return (b.first == 0 && b.last == 0);
138 }
139
gen_op_simple(opcode op)140 block gen_op_simple(opcode op) {
141 assert(opcode_describe(op)->length == 1);
142 return inst_block(inst_new(op));
143 }
144
145
gen_const(jv constant)146 block gen_const(jv constant) {
147 assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
148 inst* i = inst_new(LOADK);
149 i->imm.constant = constant;
150 return inst_block(i);
151 }
152
gen_const_global(jv constant,const char * name)153 block gen_const_global(jv constant, const char *name) {
154 assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
155 (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
156 inst* i = inst_new(STORE_GLOBAL);
157 i->imm.constant = constant;
158 i->symbol = strdup(name);
159 return inst_block(i);
160 }
161
gen_op_pushk_under(jv constant)162 block gen_op_pushk_under(jv constant) {
163 assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
164 inst* i = inst_new(PUSHK_UNDER);
165 i->imm.constant = constant;
166 return inst_block(i);
167 }
168
block_is_const(block b)169 int block_is_const(block b) {
170 return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
171 }
172
block_is_const_inf(block b)173 int block_is_const_inf(block b) {
174 return (block_is_single(b) && b.first->op == LOADK &&
175 jv_get_kind(b.first->imm.constant) == JV_KIND_NUMBER &&
176 isinf(jv_number_value(b.first->imm.constant)));
177 }
178
block_const_kind(block b)179 jv_kind block_const_kind(block b) {
180 assert(block_is_const(b));
181 return jv_get_kind(b.first->imm.constant);
182 }
183
block_const(block b)184 jv block_const(block b) {
185 assert(block_is_const(b));
186 return jv_copy(b.first->imm.constant);
187 }
188
gen_op_target(opcode op,block target)189 block gen_op_target(opcode op, block target) {
190 assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
191 assert(target.last);
192 inst* i = inst_new(op);
193 i->imm.target = target.last;
194 return inst_block(i);
195 }
196
gen_op_targetlater(opcode op)197 block gen_op_targetlater(opcode op) {
198 assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
199 inst* i = inst_new(op);
200 i->imm.target = 0;
201 return inst_block(i);
202 }
inst_set_target(block b,block target)203 void inst_set_target(block b, block target) {
204 assert(block_is_single(b));
205 assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
206 assert(target.last);
207 b.first->imm.target = target.last;
208 }
209
gen_op_unbound(opcode op,const char * name)210 block gen_op_unbound(opcode op, const char* name) {
211 assert(opcode_describe(op)->flags & OP_HAS_BINDING);
212 inst* i = inst_new(op);
213 i->symbol = strdup(name);
214 return inst_block(i);
215 }
216
gen_op_var_fresh(opcode op,const char * name)217 block gen_op_var_fresh(opcode op, const char* name) {
218 assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
219 return block_bind(gen_op_unbound(op, name),
220 gen_noop(), OP_HAS_VARIABLE);
221 }
222
gen_op_bound(opcode op,block binder)223 block gen_op_bound(opcode op, block binder) {
224 assert(block_is_single(binder));
225 block b = gen_op_unbound(op, binder.first->symbol);
226 b.first->bound_by = binder.first;
227 return b;
228 }
229
gen_dictpair(block k,block v)230 block gen_dictpair(block k, block v) {
231 return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
232 }
233
234
inst_join(inst * a,inst * b)235 static void inst_join(inst* a, inst* b) {
236 assert(a && b);
237 assert(!a->next);
238 assert(!b->prev);
239 a->next = b;
240 b->prev = a;
241 }
242
block_append(block * b,block b2)243 void block_append(block* b, block b2) {
244 if (b2.first) {
245 if (b->last) {
246 inst_join(b->last, b2.first);
247 } else {
248 b->first = b2.first;
249 }
250 b->last = b2.last;
251 }
252 }
253
block_join(block a,block b)254 block block_join(block a, block b) {
255 block c = a;
256 block_append(&c, b);
257 return c;
258 }
259
block_has_only_binders_and_imports(block binders,int bindflags)260 int block_has_only_binders_and_imports(block binders, int bindflags) {
261 bindflags |= OP_HAS_BINDING;
262 for (inst* curr = binders.first; curr; curr = curr->next) {
263 if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
264 return 0;
265 }
266 }
267 return 1;
268 }
269
inst_is_binder(inst * i,int bindflags)270 static int inst_is_binder(inst *i, int bindflags) {
271 return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
272 }
273
block_has_only_binders(block binders,int bindflags)274 int block_has_only_binders(block binders, int bindflags) {
275 bindflags |= OP_HAS_BINDING;
276 bindflags &= ~OP_BIND_WILDCARD;
277 for (inst* curr = binders.first; curr; curr = curr->next) {
278 if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
279 return 0;
280 }
281 }
282 return 1;
283 }
284
285 // Count a binder's (function) formal params
block_count_formals(block b)286 static int block_count_formals(block b) {
287 int args = 0;
288 if (b.first->op == CLOSURE_CREATE_C)
289 return b.first->imm.cfunc->nargs - 1;
290 for (inst* i = b.first->arglist.first; i; i = i->next) {
291 assert(i->op == CLOSURE_PARAM);
292 args++;
293 }
294 return args;
295 }
296
297 // Count a call site's actual params
block_count_actuals(block b)298 static int block_count_actuals(block b) {
299 int args = 0;
300 for (inst* i = b.first; i; i = i->next) {
301 switch (i->op) {
302 default: assert(0 && "Unknown function type"); break;
303 case CLOSURE_CREATE:
304 case CLOSURE_PARAM:
305 case CLOSURE_CREATE_C:
306 args++;
307 break;
308 }
309 }
310 return args;
311 }
312
block_count_refs(block binder,block body)313 static int block_count_refs(block binder, block body) {
314 int nrefs = 0;
315 for (inst* i = body.first; i; i = i->next) {
316 if (i != binder.first && i->bound_by == binder.first) {
317 nrefs++;
318 }
319 // counting recurses into closures
320 nrefs += block_count_refs(binder, i->subfn);
321 // counting recurses into argument list
322 nrefs += block_count_refs(binder, i->arglist);
323 }
324 return nrefs;
325 }
326
block_bind_subblock(block binder,block body,int bindflags,int break_distance)327 static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
328 assert(block_is_single(binder));
329 assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
330 assert(binder.first->symbol);
331 assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
332 assert(break_distance >= 0);
333
334 binder.first->bound_by = binder.first;
335 if (binder.first->nformals == -1)
336 binder.first->nformals = block_count_formals(binder);
337 int nrefs = 0;
338 for (inst* i = body.first; i; i = i->next) {
339 int flags = opcode_describe(i->op)->flags;
340 if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
341 (!strcmp(i->symbol, binder.first->symbol) ||
342 // Check for break/break2/break3; see parser.y
343 ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
344 break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
345 i->symbol[2] == '\0'))) {
346 // bind this instruction
347 if (i->op == CALL_JQ && i->nactuals == -1)
348 i->nactuals = block_count_actuals(i->arglist);
349 if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
350 i->bound_by = binder.first;
351 nrefs++;
352 }
353 } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
354 !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
355 !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
356 // Increment the break distance required for this binder to match
357 // a break whenever we come across a STOREV of *anonlabel...
358 break_distance++;
359 }
360 // binding recurses into closures
361 nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance);
362 // binding recurses into argument list
363 nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance);
364 }
365 return nrefs;
366 }
367
block_bind_each(block binder,block body,int bindflags)368 static int block_bind_each(block binder, block body, int bindflags) {
369 assert(block_has_only_binders(binder, bindflags));
370 bindflags |= OP_HAS_BINDING;
371 int nrefs = 0;
372 for (inst* curr = binder.first; curr; curr = curr->next) {
373 nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
374 }
375 return nrefs;
376 }
377
block_bind(block binder,block body,int bindflags)378 block block_bind(block binder, block body, int bindflags) {
379 block_bind_each(binder, body, bindflags);
380 return block_join(binder, body);
381 }
382
block_bind_library(block binder,block body,int bindflags,const char * libname)383 block block_bind_library(block binder, block body, int bindflags, const char *libname) {
384 bindflags |= OP_HAS_BINDING;
385 int nrefs = 0;
386 int matchlen = (libname == NULL) ? 0 : strlen(libname);
387 char *matchname = jv_mem_alloc(matchlen+2+1);
388 matchname[0] = '\0';
389 if (libname != NULL && libname[0] != '\0') {
390 strcpy(matchname,libname);
391 strcpy(matchname+matchlen, "::");
392 matchlen += 2;
393 }
394 assert(block_has_only_binders(binder, bindflags));
395 for (inst *curr = binder.first; curr; curr = curr->next) {
396 int bindflags2 = bindflags;
397 char* cname = curr->symbol;
398 char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
399 strcpy(tname, matchname);
400 strcpy(tname+matchlen, curr->symbol);
401
402 // Ew
403 if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
404 bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
405
406 // This mutation is ugly, even if we undo it
407 curr->symbol = tname;
408 nrefs += block_bind_subblock(inst_block(curr), body, bindflags2, 0);
409 curr->symbol = cname;
410 free(tname);
411 }
412 free(matchname);
413 return body; // We don't return a join because we don't want those sticking around...
414 }
415
416 // Bind binder to body and throw away any defs in binder not referenced
417 // (directly or indirectly) from body.
block_bind_referenced(block binder,block body,int bindflags)418 block block_bind_referenced(block binder, block body, int bindflags) {
419 assert(block_has_only_binders(binder, bindflags));
420 bindflags |= OP_HAS_BINDING;
421 block refd = gen_noop();
422 block unrefd = gen_noop();
423 int nrefs;
424 for (int last_kept = 0, kept = 0; ; ) {
425 for (inst* curr; (curr = block_take(&binder));) {
426 block b = inst_block(curr);
427 nrefs = block_bind_each(b, body, bindflags);
428 // Check if this binder is referenced from any of the ones we
429 // already know are referenced by body.
430 nrefs += block_count_refs(b, refd);
431 nrefs += block_count_refs(b, body);
432 if (nrefs) {
433 refd = BLOCK(refd, b);
434 kept++;
435 } else {
436 unrefd = BLOCK(unrefd, b);
437 }
438 }
439 if (kept == last_kept)
440 break;
441 last_kept = kept;
442 binder = unrefd;
443 unrefd = gen_noop();
444 }
445 block_free(unrefd);
446 return block_join(refd, body);
447 }
448
block_drop_unreferenced(block body)449 block block_drop_unreferenced(block body) {
450 inst* curr;
451 block refd = gen_noop();
452 block unrefd = gen_noop();
453 int drop;
454 do {
455 drop = 0;
456 while ((curr = block_take(&body)) && curr->op != TOP) {
457 block b = inst_block(curr);
458 if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) {
459 unrefd = BLOCK(unrefd, b);
460 drop++;
461 } else {
462 refd = BLOCK(refd, b);
463 }
464 }
465 if (curr && curr->op == TOP) {
466 body = BLOCK(inst_block(curr),body);
467 }
468 body = BLOCK(refd, body);
469 refd = gen_noop();
470 } while (drop != 0);
471 block_free(unrefd);
472 return body;
473 }
474
block_take_imports(block * body)475 jv block_take_imports(block* body) {
476 jv imports = jv_array();
477
478 inst* top = NULL;
479 if (body->first && body->first->op == TOP) {
480 top = block_take(body);
481 }
482 while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
483 inst* dep = block_take(body);
484 if (dep->op == DEPS) {
485 imports = jv_array_append(imports, jv_copy(dep->imm.constant));
486 }
487 inst_free(dep);
488 }
489 if (top) {
490 *body = block_join(inst_block(top),*body);
491 }
492 return imports;
493 }
494
block_list_funcs(block body,int omit_underscores)495 jv block_list_funcs(block body, int omit_underscores) {
496 jv funcs = jv_object(); // Use the keys for set semantics.
497 for (inst *pos = body.first; pos != NULL; pos = pos->next) {
498 if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
499 if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
500 funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
501 }
502 }
503 }
504 return jv_keys_unsorted(funcs);
505 }
506
gen_module(block metadata)507 block gen_module(block metadata) {
508 inst* i = inst_new(MODULEMETA);
509 i->imm.constant = block_const(metadata);
510 if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
511 i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
512 block_free(metadata);
513 return inst_block(i);
514 }
515
block_module_meta(block b)516 jv block_module_meta(block b) {
517 if (b.first != NULL && b.first->op == MODULEMETA)
518 return jv_copy(b.first->imm.constant);
519 return jv_null();
520 }
521
gen_import(const char * name,const char * as,int is_data)522 block gen_import(const char* name, const char* as, int is_data) {
523 inst* i = inst_new(DEPS);
524 jv meta = jv_object();
525 if (as != NULL)
526 meta = jv_object_set(meta, jv_string("as"), jv_string(as));
527 meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
528 meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
529 i->imm.constant = meta;
530 return inst_block(i);
531 }
532
gen_import_meta(block import,block metadata)533 block gen_import_meta(block import, block metadata) {
534 assert(block_is_single(import) && import.first->op == DEPS);
535 assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
536 inst *i = import.first;
537 i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
538 block_free(metadata);
539 return import;
540 }
541
gen_function(const char * name,block formals,block body)542 block gen_function(const char* name, block formals, block body) {
543 inst* i = inst_new(CLOSURE_CREATE);
544 for (inst* i = formals.last; i; i = i->prev) {
545 if (i->op == CLOSURE_PARAM_REGULAR) {
546 i->op = CLOSURE_PARAM;
547 body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
548 }
549 block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
550 }
551 i->subfn = body;
552 i->symbol = strdup(name);
553 i->arglist = formals;
554 block b = inst_block(i);
555 block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
556 return b;
557 }
558
gen_param_regular(const char * name)559 block gen_param_regular(const char* name) {
560 return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
561 }
562
gen_param(const char * name)563 block gen_param(const char* name) {
564 return gen_op_unbound(CLOSURE_PARAM, name);
565 }
566
gen_lambda(block body)567 block gen_lambda(block body) {
568 return gen_function("@lambda", gen_noop(), body);
569 }
570
gen_call(const char * name,block args)571 block gen_call(const char* name, block args) {
572 block b = gen_op_unbound(CALL_JQ, name);
573 b.first->arglist = args;
574 return b;
575 }
576
gen_subexp(block a)577 block gen_subexp(block a) {
578 if (block_is_noop(a)) {
579 return gen_op_simple(DUP);
580 }
581 if (block_is_single(a) && a.first->op == LOADK) {
582 jv c = block_const(a);
583 block_free(a);
584 return gen_op_pushk_under(c);
585 }
586 return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
587 }
588
gen_both(block a,block b)589 block gen_both(block a, block b) {
590 block jump = gen_op_targetlater(JUMP);
591 block fork = gen_op_target(FORK, jump);
592 block c = BLOCK(fork, a, jump, b);
593 inst_set_target(jump, c);
594 return c;
595 }
596
gen_const_object(block expr)597 block gen_const_object(block expr) {
598 int is_const = 1;
599 jv o = jv_object();
600 jv k = jv_null();
601 jv v = jv_null();
602 for (inst *i = expr.first; i; i = i->next) {
603 if (i->op == PUSHK_UNDER) {
604 k = jv_copy(i->imm.constant);
605 i = i->next;
606 } else if (i->op != SUBEXP_BEGIN ||
607 i->next == NULL ||
608 i->next->op != LOADK ||
609 i->next->next == NULL ||
610 i->next->next->op != SUBEXP_END) {
611 is_const = 0;
612 break;
613 } else {
614 k = jv_copy(i->next->imm.constant);
615 i = i->next->next->next;
616 }
617 if (i != NULL && i->op == PUSHK_UNDER) {
618 v = jv_copy(i->imm.constant);
619 i = i->next;
620 } else if (i == NULL ||
621 i->op != SUBEXP_BEGIN ||
622 i->next == NULL ||
623 i->next->op != LOADK ||
624 i->next->next == NULL ||
625 i->next->next->op != SUBEXP_END) {
626 is_const = 0;
627 break;
628 } else {
629 v = jv_copy(i->next->imm.constant);
630 i = i->next->next->next;
631 }
632 if (i == NULL || i->op != INSERT) {
633 is_const = 0;
634 break;
635 }
636 if (jv_get_kind(k) != JV_KIND_STRING) {
637 is_const = 0;
638 break;
639 }
640 o = jv_object_set(o, k, v);
641 k = jv_null();
642 v = jv_null();
643 }
644 if (!is_const) {
645 jv_free(o);
646 jv_free(k);
647 jv_free(v);
648 block b = {0,0};
649 return b;
650 }
651 block_free(expr);
652 return gen_const(o);
653 }
654
gen_const_array(block expr)655 static block gen_const_array(block expr) {
656 /*
657 * An expr of all constant elements looks like this:
658 *
659 * 0009 FORK 0027
660 * 0011 FORK 0023
661 * 0013 FORK 0019
662 * 0015 LOADK 1
663 * 0017 JUMP 0021
664 * 0019 LOADK 2
665 * 0021 JUMP 0025
666 * 0023 LOADK 3
667 * 0025 JUMP 0029
668 * 0027 LOADK 4
669 *
670 * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
671 * every LOADK. The sequence ends in a LOADK. Any deviation and it's
672 * not a list of constants.
673 *
674 * Here we check for this pattern almost exactly. We don't check that
675 * the targets of the FORK and JUMP instructions are in the right
676 * sequence.
677 */
678 int all_const = 1;
679 int commas = 0;
680 int normal = 1;
681 jv a = jv_array();
682 for (inst *i = expr.first; i; i = i->next) {
683 if (i->op == FORK) {
684 commas++;
685 if (i->imm.target == NULL || i->imm.target->op != JUMP ||
686 jv_array_length(jv_copy(a)) > 0) {
687 normal = 0;
688 break;
689 }
690 } else if (all_const && i->op == LOADK) {
691 if (i->next != NULL && i->next->op != JUMP) {
692 normal = 0;
693 break;
694 }
695 a = jv_array_append(a, jv_copy(i->imm.constant));
696 } else if (i->op != JUMP || i->imm.target == NULL ||
697 i->imm.target->op != LOADK) {
698 all_const = 0;
699 }
700 }
701
702 if (all_const && normal &&
703 (expr.last == NULL || expr.last->op == LOADK) &&
704 jv_array_length(jv_copy(a)) == commas + 1) {
705 block_free(expr);
706 return gen_const(a);
707 }
708
709 jv_free(a);
710 block b = {0,0};
711 return b;
712 }
713
gen_collect(block expr)714 block gen_collect(block expr) {
715 block const_array = gen_const_array(expr);
716 if (const_array.first != NULL)
717 return const_array;
718
719 block array_var = gen_op_var_fresh(STOREV, "collect");
720 block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
721
722 block tail = BLOCK(gen_op_bound(APPEND, array_var),
723 gen_op_simple(BACKTRACK));
724
725 return BLOCK(c,
726 gen_op_target(FORK, tail),
727 expr,
728 tail,
729 gen_op_bound(LOADVN, array_var));
730 }
731
bind_matcher(block matcher,block body)732 static block bind_matcher(block matcher, block body) {
733 // cannot call block_bind(matcher, body) because that requires
734 // block_has_only_binders(matcher), which is not true here as matchers
735 // may also contain code to extract the correct elements
736 for (inst* i = matcher.first; i; i = i->next) {
737 if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
738 block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
739 }
740 return BLOCK(matcher, body);
741 }
742
743
744 // Extract destructuring var names from the block
745 // *vars should be a jv_object (for set semantics)
block_get_unbound_vars(block b,jv * vars)746 static void block_get_unbound_vars(block b, jv *vars) {
747 assert(vars != NULL);
748 assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
749 for (inst* i = b.first; i; i = i->next) {
750 if (i->subfn.first) {
751 block_get_unbound_vars(i->subfn, vars);
752 continue;
753 }
754 if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
755 *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
756 }
757 }
758 }
759
760 /* Build wrappers around destructuring matchers so that we can chain them
761 * when we have errors. The approach is as follows:
762 * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
763 * existing_matcher_block
764 * JUMP BODY
765 */
bind_alternation_matchers(block matchers,block body)766 static block bind_alternation_matchers(block matchers, block body) {
767 block preamble = {0};
768 block altmatchers = {0};
769 block mb = {0};
770 block final_matcher = matchers;
771
772 // Pass through the matchers to find all destructured names.
773 while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
774 block_append(&altmatchers, inst_block(block_take(&final_matcher)));
775 }
776
777 // We don't have any alternations here, so we can use the simplest case.
778 if (altmatchers.first == NULL) {
779 return bind_matcher(final_matcher, body);
780 }
781
782 // Collect var names
783 jv all_vars = jv_object();
784 block_get_unbound_vars(altmatchers, &all_vars);
785 block_get_unbound_vars(final_matcher, &all_vars);
786
787 // We need a preamble of STOREVs to which to bind the matchers and the body.
788 jv_object_keys_foreach(all_vars, key) {
789 preamble = BLOCK(preamble,
790 gen_op_simple(DUP),
791 gen_const(jv_null()),
792 gen_op_unbound(STOREV, jv_string_value(key)));
793 jv_free(key);
794 }
795 jv_free(all_vars);
796
797 // Now we build each matcher in turn
798 for (inst *i = altmatchers.first; i; i = i->next) {
799 block submatcher = i->subfn;
800
801 // If we're successful, jump to the end of the matchers
802 submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));
803
804 // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
805 mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);
806
807 // We're done with this inst and we don't want it anymore
808 // But we can't let it free the submatcher block.
809 i->subfn.first = i->subfn.last = NULL;
810 }
811 // We're done with these insts now.
812 block_free(altmatchers);
813
814 return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
815 }
816
gen_reduce(block source,block matcher,block init,block body)817 block gen_reduce(block source, block matcher, block init, block body) {
818 block res_var = gen_op_var_fresh(STOREV, "reduce");
819 block loop = BLOCK(gen_op_simple(DUPN),
820 source,
821 bind_alternation_matchers(matcher,
822 BLOCK(gen_op_bound(LOADVN, res_var),
823 body,
824 gen_op_bound(STOREV, res_var))),
825 gen_op_simple(BACKTRACK));
826 return BLOCK(gen_op_simple(DUP),
827 init,
828 res_var,
829 gen_op_target(FORK, loop),
830 loop,
831 gen_op_bound(LOADVN, res_var));
832 }
833
gen_foreach(block source,block matcher,block init,block update,block extract)834 block gen_foreach(block source, block matcher, block init, block update, block extract) {
835 block output = gen_op_targetlater(JUMP);
836 block state_var = gen_op_var_fresh(STOREV, "foreach");
837 block loop = BLOCK(gen_op_simple(DUPN),
838 // get a value from the source expression:
839 source,
840 // destructure the value into variable(s) for all the code
841 // in the body to see
842 bind_alternation_matchers(matcher,
843 // load the loop state variable
844 BLOCK(gen_op_bound(LOADVN, state_var),
845 // generate updated state
846 update,
847 // save the updated state for value extraction
848 gen_op_simple(DUP),
849 // save new state
850 gen_op_bound(STOREV, state_var),
851 // extract an output...
852 extract,
853 // ...and output it by jumping
854 // past the BACKTRACK that comes
855 // right after the loop body,
856 // which in turn is there
857 // because...
858 //
859 // (Incidentally, extract can also
860 // backtrack, e.g., if it calls
861 // empty, in which case we don't
862 // get here.)
863 output)));
864 block foreach = BLOCK(gen_op_simple(DUP),
865 init,
866 state_var,
867 gen_op_target(FORK, loop),
868 loop,
869 // ...at this point `foreach`'s original input
870 // will be on top of the stack, and we don't
871 // want to output it, so we backtrack.
872 gen_op_simple(BACKTRACK));
873 inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop
874 return foreach;
875 }
876
gen_definedor(block a,block b)877 block gen_definedor(block a, block b) {
878 // var found := false
879 block found_var = gen_op_var_fresh(STOREV, "found");
880 block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
881
882 // if found, backtrack. Otherwise execute b
883 block backtrack = gen_op_simple(BACKTRACK);
884 block tail = BLOCK(gen_op_simple(DUP),
885 gen_op_bound(LOADV, found_var),
886 gen_op_target(JUMP_F, backtrack),
887 backtrack,
888 gen_op_simple(POP),
889 b);
890
891 // try again
892 block if_notfound = gen_op_simple(BACKTRACK);
893
894 // found := true, produce result
895 block if_found = BLOCK(gen_op_simple(DUP),
896 gen_const(jv_true()),
897 gen_op_bound(STOREV, found_var),
898 gen_op_target(JUMP, tail));
899
900 return BLOCK(init,
901 gen_op_target(FORK, if_notfound),
902 a,
903 gen_op_target(JUMP_F, if_found),
904 if_found,
905 if_notfound,
906 tail);
907 }
908
block_has_main(block top)909 int block_has_main(block top) {
910 for (inst *c = top.first; c; c = c->next) {
911 if (c->op == TOP)
912 return 1;
913 }
914 return 0;
915 }
916
block_is_funcdef(block b)917 int block_is_funcdef(block b) {
918 if (b.first != NULL && b.first->op == CLOSURE_CREATE)
919 return 1;
920 return 0;
921 }
922
gen_condbranch(block iftrue,block iffalse)923 block gen_condbranch(block iftrue, block iffalse) {
924 iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
925 return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
926 }
927
gen_and(block a,block b)928 block gen_and(block a, block b) {
929 // a and b = if a then (if b then true else false) else false
930 return BLOCK(gen_op_simple(DUP), a,
931 gen_condbranch(BLOCK(gen_op_simple(POP),
932 b,
933 gen_condbranch(gen_const(jv_true()),
934 gen_const(jv_false()))),
935 BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
936 }
937
gen_or(block a,block b)938 block gen_or(block a, block b) {
939 // a or b = if a then true else (if b then true else false)
940 return BLOCK(gen_op_simple(DUP), a,
941 gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
942 BLOCK(gen_op_simple(POP),
943 b,
944 gen_condbranch(gen_const(jv_true()),
945 gen_const(jv_false())))));
946 }
947
gen_destructure_alt(block matcher)948 block gen_destructure_alt(block matcher) {
949 for (inst *i = matcher.first; i; i = i->next) {
950 if (i->op == STOREV) {
951 i->op = STOREVN;
952 }
953 }
954 inst* i = inst_new(DESTRUCTURE_ALT);
955 i->subfn = matcher;
956 return inst_block(i);
957 }
958
gen_var_binding(block var,const char * name,block body)959 block gen_var_binding(block var, const char* name, block body) {
960 return gen_destructure(var, gen_op_unbound(STOREV, name), body);
961 }
962
gen_array_matcher(block left,block curr)963 block gen_array_matcher(block left, block curr) {
964 int index;
965 if (block_is_noop(left))
966 index = 0;
967 else {
968 // `left` was returned by this function, so the third inst is the
969 // constant containing the previously used index
970 assert(left.first->op == DUP);
971 assert(left.first->next != NULL);
972 inst *i = NULL;
973 if (left.first->next->op == PUSHK_UNDER) {
974 i = left.first->next;
975 } else {
976 assert(left.first->next->op == SUBEXP_BEGIN);
977 assert(left.first->next->next->op == LOADK);
978 i = left.first->next->next;
979 }
980 index = 1 + (int) jv_number_value(i->imm.constant);
981 }
982
983 // `left` goes at the end so that the const index is in a predictable place
984 return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
985 gen_op_simple(INDEX), curr, left);
986 }
987
gen_object_matcher(block name,block curr)988 block gen_object_matcher(block name, block curr) {
989 return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
990 curr);
991 }
992
gen_destructure(block var,block matchers,block body)993 block gen_destructure(block var, block matchers, block body) {
994 // var bindings can be added after coding the program; leave the TOP first.
995 block top = gen_noop();
996 if (body.first && body.first->op == TOP)
997 top = inst_block(block_take(&body));
998
999 if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
1000 block_append(&var, gen_op_simple(DUP));
1001 } else {
1002 top = BLOCK(top, gen_op_simple(DUP));
1003 }
1004
1005 return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
1006 }
1007
1008 // Like gen_var_binding(), but bind `break`'s wildcard unbound variable
gen_wildvar_binding(block var,const char * name,block body)1009 static block gen_wildvar_binding(block var, const char* name, block body) {
1010 return BLOCK(gen_op_simple(DUP), var,
1011 block_bind(gen_op_unbound(STOREV, name),
1012 body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
1013 }
1014
gen_cond(block cond,block iftrue,block iffalse)1015 block gen_cond(block cond, block iftrue, block iffalse) {
1016 return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
1017 gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
1018 BLOCK(gen_op_simple(POP), iffalse)));
1019 }
1020
gen_try_handler(block handler)1021 block gen_try_handler(block handler) {
1022 // Quite a pain just to hide jq's internal errors.
1023 return gen_cond(// `if type=="object" and .__jq
1024 gen_and(gen_call("_equal",
1025 BLOCK(gen_lambda(gen_const(jv_string("object"))),
1026 gen_lambda(gen_noop()))),
1027 BLOCK(gen_subexp(gen_const(jv_string("__jq"))),
1028 gen_noop(),
1029 gen_op_simple(INDEX))),
1030 // `then error`
1031 gen_call("error", gen_noop()),
1032 // `else HANDLER end`
1033 handler);
1034 }
1035
gen_try(block exp,block handler)1036 block gen_try(block exp, block handler) {
1037 /*
1038 * Produce something like:
1039 * FORK_OPT <address of handler>
1040 * <exp>
1041 * JUMP <end of handler>
1042 * <handler>
1043 *
1044 * If this is not an internal try/catch, then catch and re-raise
1045 * internal errors to prevent them from leaking.
1046 *
1047 * The handler will only execute if we backtrack to the FORK_OPT with
1048 * an error (exception). If <exp> produces no value then FORK_OPT
1049 * will backtrack (propagate the `empty`, as it were. If <exp>
1050 * produces a value then we'll execute whatever bytecode follows this
1051 * sequence.
1052 */
1053 if (!handler.first && !handler.last)
1054 // A hack to deal with `.` as the handler; we could use a real NOOP here
1055 handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP), handler);
1056 exp = BLOCK(exp, gen_op_target(JUMP, handler));
1057 return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler);
1058 }
1059
gen_label(const char * label,block exp)1060 block gen_label(const char *label, block exp) {
1061 block cond = gen_call("_equal",
1062 BLOCK(gen_lambda(gen_noop()),
1063 gen_lambda(gen_op_unbound(LOADV, label))));
1064 return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
1065 BLOCK(gen_op_simple(POP),
1066 // try exp catch if . == $label
1067 // then empty
1068 // else error end
1069 //
1070 // Can't use gen_binop(), as that's firmly
1071 // stuck in parser.y as it refers to things
1072 // like EQ.
1073 gen_try(exp,
1074 gen_cond(cond,
1075 gen_op_simple(BACKTRACK),
1076 gen_call("error", gen_noop())))));
1077 }
1078
gen_cbinding(const struct cfunction * cfunctions,int ncfunctions,block code)1079 block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
1080 for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
1081 inst* i = inst_new(CLOSURE_CREATE_C);
1082 i->imm.cfunc = &cfunctions[cfunc];
1083 i->symbol = strdup(i->imm.cfunc->name);
1084 code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO);
1085 }
1086 return code;
1087 }
1088
nesting_level(struct bytecode * bc,inst * target)1089 static uint16_t nesting_level(struct bytecode* bc, inst* target) {
1090 uint16_t level = 0;
1091 assert(bc && target && target->compiled);
1092 while (bc && target->compiled != bc) {
1093 level++;
1094 bc = bc->parent;
1095 }
1096 assert(bc && bc == target->compiled);
1097 return level;
1098 }
1099
count_cfunctions(block b)1100 static int count_cfunctions(block b) {
1101 int n = 0;
1102 for (inst* i = b.first; i; i = i->next) {
1103 if (i->op == CLOSURE_CREATE_C) n++;
1104 n += count_cfunctions(i->subfn);
1105 }
1106 return n;
1107 }
1108
1109 #ifndef WIN32
1110 extern char **environ;
1111 #endif
1112
1113 static jv
make_env(jv env)1114 make_env(jv env)
1115 {
1116 if (jv_is_valid(env))
1117 return jv_copy(env);
1118 jv r = jv_object();
1119 if (environ == NULL)
1120 return r;
1121 for (size_t i = 0; environ[i] != NULL; i++) {
1122 const char *eq;
1123
1124 if ((eq = strchr(environ[i], '=')) == NULL)
1125 r = jv_object_delete(r, jv_string(environ[i]));
1126 else
1127 r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
1128 }
1129 return jv_copy(r);
1130 }
1131
1132 // Expands call instructions into a calling sequence
expand_call_arglist(block * b,jv args,jv * env)1133 static int expand_call_arglist(block* b, jv args, jv *env) {
1134 int errors = 0;
1135 block ret = gen_noop();
1136 for (inst* curr; (curr = block_take(b));) {
1137 if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
1138 if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
1139 curr->op = LOADK;
1140 *env = curr->imm.constant = make_env(*env);
1141 } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
1142 curr->op = LOADK;
1143 curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
1144 } else if (!curr->bound_by) {
1145 if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
1146 locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
1147 else if (curr->op == LOADV)
1148 locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
1149 else
1150 locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist));
1151 errors++;
1152 // don't process this instruction if it's not well-defined
1153 ret = BLOCK(ret, inst_block(curr));
1154 continue;
1155 }
1156 }
1157
1158 block prelude = gen_noop();
1159 if (curr->op == CALL_JQ) {
1160 int actual_args = 0, desired_args = 0;
1161 // We expand the argument list as a series of instructions
1162 switch (curr->bound_by->op) {
1163 default: assert(0 && "Unknown function type"); break;
1164 case CLOSURE_CREATE:
1165 case CLOSURE_PARAM: {
1166 block callargs = gen_noop();
1167 for (inst* i; (i = block_take(&curr->arglist));) {
1168 assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
1169 block b = inst_block(i);
1170 switch (i->op) {
1171 default: assert(0 && "Unknown type of parameter"); break;
1172 case CLOSURE_REF:
1173 block_append(&callargs, b);
1174 break;
1175 case CLOSURE_CREATE:
1176 block_append(&prelude, b);
1177 block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
1178 break;
1179 }
1180 actual_args++;
1181 }
1182 curr->imm.intval = actual_args;
1183 curr->arglist = callargs;
1184
1185 if (curr->bound_by->op == CLOSURE_CREATE) {
1186 for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
1187 assert(i->op == CLOSURE_PARAM);
1188 desired_args++;
1189 }
1190 }
1191 break;
1192 }
1193
1194 case CLOSURE_CREATE_C: {
1195 for (inst* i; (i = block_take(&curr->arglist)); ) {
1196 assert(i->op == CLOSURE_CREATE); // FIXME
1197 block body = i->subfn;
1198 i->subfn = gen_noop();
1199 inst_free(i);
1200 // arguments should be pushed in reverse order, prepend them to prelude
1201 errors += expand_call_arglist(&body, args, env);
1202 prelude = BLOCK(gen_subexp(body), prelude);
1203 actual_args++;
1204 }
1205 assert(curr->op == CALL_JQ);
1206 curr->op = CALL_BUILTIN;
1207 curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
1208 assert(curr->bound_by->op == CLOSURE_CREATE_C);
1209 desired_args = curr->bound_by->imm.cfunc->nargs - 1;
1210 assert(!curr->arglist.first);
1211 break;
1212 }
1213 }
1214
1215 assert(actual_args == desired_args); // because now handle this above
1216 }
1217 ret = BLOCK(ret, prelude, inst_block(curr));
1218 }
1219 *b = ret;
1220 return errors;
1221 }
1222
compile(struct bytecode * bc,block b,struct locfile * lf,jv args,jv * env)1223 static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
1224 int errors = 0;
1225 int pos = 0;
1226 int var_frame_idx = 0;
1227 bc->nsubfunctions = 0;
1228 errors += expand_call_arglist(&b, args, env);
1229 b = BLOCK(b, gen_op_simple(RET));
1230 jv localnames = jv_array();
1231 for (inst* curr = b.first; curr; curr = curr->next) {
1232 if (!curr->next) assert(curr == b.last);
1233 int length = opcode_describe(curr->op)->length;
1234 if (curr->op == CALL_JQ) {
1235 for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1236 length += 2;
1237 }
1238 }
1239 pos += length;
1240 curr->bytecode_pos = pos;
1241 curr->compiled = bc;
1242
1243 assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1244
1245 if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
1246 curr->bound_by == curr) {
1247 curr->imm.intval = var_frame_idx++;
1248 localnames = jv_array_append(localnames, jv_string(curr->symbol));
1249 }
1250
1251 if (curr->op == CLOSURE_CREATE) {
1252 assert(curr->bound_by == curr);
1253 curr->imm.intval = bc->nsubfunctions++;
1254 }
1255 if (curr->op == CLOSURE_CREATE_C) {
1256 assert(curr->bound_by == curr);
1257 int idx = bc->globals->ncfunctions++;
1258 bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
1259 jv_string(curr->symbol));
1260 bc->globals->cfunctions[idx] = *curr->imm.cfunc;
1261 curr->imm.intval = idx;
1262 }
1263 }
1264 if (pos > 0xFFFF) {
1265 // too long for program counter to fit in uint16_t
1266 locfile_locate(lf, UNKNOWN_LOCATION,
1267 "function compiled to %d bytes which is too long", pos);
1268 errors++;
1269 }
1270 bc->codelen = pos;
1271 bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
1272 if (bc->nsubfunctions) {
1273 bc->subfunctions = jv_mem_calloc(sizeof(struct bytecode*), bc->nsubfunctions);
1274 for (inst* curr = b.first; curr; curr = curr->next) {
1275 if (curr->op == CLOSURE_CREATE) {
1276 struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
1277 bc->subfunctions[curr->imm.intval] = subfn;
1278 subfn->globals = bc->globals;
1279 subfn->parent = bc;
1280 subfn->nclosures = 0;
1281 subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
1282 jv params = jv_array();
1283 for (inst* param = curr->arglist.first; param; param = param->next) {
1284 assert(param->op == CLOSURE_PARAM);
1285 assert(param->bound_by == param);
1286 param->imm.intval = subfn->nclosures++;
1287 param->compiled = subfn;
1288 params = jv_array_append(params, jv_string(param->symbol));
1289 }
1290 subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
1291 errors += compile(subfn, curr->subfn, lf, args, env);
1292 curr->subfn = gen_noop();
1293 }
1294 }
1295 } else {
1296 bc->subfunctions = 0;
1297 }
1298 uint16_t* code = jv_mem_calloc(sizeof(uint16_t), bc->codelen);
1299 bc->code = code;
1300 pos = 0;
1301 jv constant_pool = jv_array();
1302 int maxvar = -1;
1303 if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
1304 const struct opcode_description* op = opcode_describe(curr->op);
1305 if (op->length == 0)
1306 continue;
1307 code[pos++] = curr->op;
1308 assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1309 if (curr->op == CALL_BUILTIN) {
1310 assert(curr->bound_by->op == CLOSURE_CREATE_C);
1311 assert(!curr->arglist.first);
1312 code[pos++] = (uint16_t)curr->imm.intval;
1313 code[pos++] = curr->bound_by->imm.intval;
1314 } else if (curr->op == CALL_JQ) {
1315 assert(curr->bound_by->op == CLOSURE_CREATE ||
1316 curr->bound_by->op == CLOSURE_PARAM);
1317 code[pos++] = (uint16_t)curr->imm.intval;
1318 code[pos++] = nesting_level(bc, curr->bound_by);
1319 code[pos++] = curr->bound_by->imm.intval |
1320 (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
1321 for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1322 assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
1323 code[pos++] = nesting_level(bc, arg->bound_by);
1324 code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
1325 }
1326 } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
1327 // STORE_GLOBAL: constant global, basically
1328 code[pos++] = jv_array_length(jv_copy(constant_pool));
1329 constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1330 code[pos++] = nesting_level(bc, curr->bound_by);
1331 uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1332 code[pos++] = var;
1333 } else if (op->flags & OP_HAS_CONSTANT) {
1334 code[pos++] = jv_array_length(jv_copy(constant_pool));
1335 constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1336 } else if (op->flags & OP_HAS_VARIABLE) {
1337 code[pos++] = nesting_level(bc, curr->bound_by);
1338 uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1339 code[pos++] = var;
1340 if (var > maxvar) maxvar = var;
1341 } else if (op->flags & OP_HAS_BRANCH) {
1342 assert(curr->imm.target->bytecode_pos != -1);
1343 assert(curr->imm.target->bytecode_pos > pos); // only forward branches
1344 code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
1345 pos++;
1346 } else if (op->length > 1) {
1347 assert(0 && "codegen not implemented for this operation");
1348 }
1349 }
1350 bc->constants = constant_pool;
1351 bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
1352 block_free(b);
1353 return errors;
1354 }
1355
block_compile(block b,struct bytecode ** out,struct locfile * lf,jv args)1356 int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
1357 struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
1358 bc->parent = 0;
1359 bc->nclosures = 0;
1360 bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
1361 int ncfunc = count_cfunctions(b);
1362 bc->globals->ncfunctions = 0;
1363 bc->globals->cfunctions = jv_mem_calloc(sizeof(struct cfunction), ncfunc);
1364 bc->globals->cfunc_names = jv_array();
1365 bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
1366 jv env = jv_invalid();
1367 int nerrors = compile(bc, b, lf, args, &env);
1368 jv_free(args);
1369 jv_free(env);
1370 assert(bc->globals->ncfunctions == ncfunc);
1371 if (nerrors > 0) {
1372 bytecode_free(bc);
1373 *out = 0;
1374 } else {
1375 *out = bc;
1376 }
1377 return nerrors;
1378 }
1379
block_free(block b)1380 void block_free(block b) {
1381 struct inst* next;
1382 for (struct inst* curr = b.first; curr; curr = next) {
1383 next = curr->next;
1384 inst_free(curr);
1385 }
1386 }
1387