1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE // for strdup
3 #endif
4 #include <assert.h>
5 #include <math.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include "compile.h"
10 #include "bytecode.h"
11 #include "locfile.h"
12 #include "jv_alloc.h"
13 #include "linker.h"
14 
15 /*
16   The intermediate representation for jq filters is as a sequence of
17   struct inst, which form a doubly-linked list via the next and prev
18   pointers.
19 
20   A "block" represents a sequence of "struct inst", which may be
21   empty.
22 
23   Blocks are generated by the parser bottom-up, so may have free
24   variables (refer to things not defined). See inst.bound_by and
25   inst.symbol.
26  */
27 struct inst {
28   struct inst* next;
29   struct inst* prev;
30 
31   opcode op;
32 
33   struct {
34     uint16_t intval;
35     struct inst* target;
36     jv constant;
37     const struct cfunction* cfunc;
38   } imm;
39 
40   struct locfile* locfile;
41   location source;
42 
43   // Binding
44   // An instruction requiring binding (for parameters/variables/functions)
45   // is in one of three states:
46   //   inst->bound_by = NULL  - Unbound free variable
47   //   inst->bound_by = inst  - This instruction binds a variable
48   //   inst->bound_by = other - Uses variable bound by other instruction
49   // Unbound instructions (references to other things that may or may not
50   // exist) are created by "gen_foo_unbound", and bindings are created by
51   // block_bind(definition, body), which binds all instructions in
52   // body which are unboudn and refer to "definition" by name.
53   struct inst* bound_by;
54   char* symbol;
55 
56   int nformals;
57   int nactuals;
58 
59   block subfn;   // used by CLOSURE_CREATE (body of function)
60   block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
61 
62   // This instruction is compiled as part of which function?
63   // (only used during block_compile)
64   struct bytecode* compiled;
65 
66   int bytecode_pos; // position just after this insn
67 };
68 
inst_new(opcode op)69 static inst* inst_new(opcode op) {
70   inst* i = jv_mem_alloc(sizeof(inst));
71   i->next = i->prev = 0;
72   i->op = op;
73   i->bytecode_pos = -1;
74   i->bound_by = 0;
75   i->symbol = 0;
76   i->nformals = -1;
77   i->nactuals = -1;
78   i->subfn = gen_noop();
79   i->arglist = gen_noop();
80   i->source = UNKNOWN_LOCATION;
81   i->locfile = 0;
82   return i;
83 }
84 
inst_free(struct inst * i)85 static void inst_free(struct inst* i) {
86   jv_mem_free(i->symbol);
87   block_free(i->subfn);
88   block_free(i->arglist);
89   if (i->locfile)
90     locfile_free(i->locfile);
91   if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
92     jv_free(i->imm.constant);
93   }
94   jv_mem_free(i);
95 }
96 
inst_block(inst * i)97 static block inst_block(inst* i) {
98   block b = {i,i};
99   return b;
100 }
101 
block_is_single(block b)102 int block_is_single(block b) {
103   return b.first && b.first == b.last;
104 }
105 
block_take(block * b)106 static inst* block_take(block* b) {
107   if (b->first == 0) return 0;
108   inst* i = b->first;
109   if (i->next) {
110     i->next->prev = 0;
111     b->first = i->next;
112     i->next = 0;
113   } else {
114     b->first = 0;
115     b->last = 0;
116   }
117   return i;
118 }
119 
gen_location(location loc,struct locfile * l,block b)120 block gen_location(location loc, struct locfile* l, block b) {
121   for (inst* i = b.first; i; i = i->next) {
122     if (i->source.start == UNKNOWN_LOCATION.start &&
123         i->source.end == UNKNOWN_LOCATION.end) {
124       i->source = loc;
125       i->locfile = locfile_retain(l);
126     }
127   }
128   return b;
129 }
130 
gen_noop()131 block gen_noop() {
132   block b = {0,0};
133   return b;
134 }
135 
block_is_noop(block b)136 int block_is_noop(block b) {
137   return (b.first == 0 && b.last == 0);
138 }
139 
gen_op_simple(opcode op)140 block gen_op_simple(opcode op) {
141   assert(opcode_describe(op)->length == 1);
142   return inst_block(inst_new(op));
143 }
144 
145 
gen_const(jv constant)146 block gen_const(jv constant) {
147   assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
148   inst* i = inst_new(LOADK);
149   i->imm.constant = constant;
150   return inst_block(i);
151 }
152 
gen_const_global(jv constant,const char * name)153 block gen_const_global(jv constant, const char *name) {
154   assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
155          (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
156   inst* i = inst_new(STORE_GLOBAL);
157   i->imm.constant = constant;
158   i->symbol = strdup(name);
159   return inst_block(i);
160 }
161 
gen_op_pushk_under(jv constant)162 block gen_op_pushk_under(jv constant) {
163   assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
164   inst* i = inst_new(PUSHK_UNDER);
165   i->imm.constant = constant;
166   return inst_block(i);
167 }
168 
block_is_const(block b)169 int block_is_const(block b) {
170   return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
171 }
172 
block_is_const_inf(block b)173 int block_is_const_inf(block b) {
174   return (block_is_single(b) && b.first->op == LOADK &&
175           jv_get_kind(b.first->imm.constant) == JV_KIND_NUMBER &&
176           isinf(jv_number_value(b.first->imm.constant)));
177 }
178 
block_const_kind(block b)179 jv_kind block_const_kind(block b) {
180   assert(block_is_const(b));
181   return jv_get_kind(b.first->imm.constant);
182 }
183 
block_const(block b)184 jv block_const(block b) {
185   assert(block_is_const(b));
186   return jv_copy(b.first->imm.constant);
187 }
188 
gen_op_target(opcode op,block target)189 block gen_op_target(opcode op, block target) {
190   assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
191   assert(target.last);
192   inst* i = inst_new(op);
193   i->imm.target = target.last;
194   return inst_block(i);
195 }
196 
gen_op_targetlater(opcode op)197 block gen_op_targetlater(opcode op) {
198   assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
199   inst* i = inst_new(op);
200   i->imm.target = 0;
201   return inst_block(i);
202 }
inst_set_target(block b,block target)203 void inst_set_target(block b, block target) {
204   assert(block_is_single(b));
205   assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
206   assert(target.last);
207   b.first->imm.target = target.last;
208 }
209 
gen_op_unbound(opcode op,const char * name)210 block gen_op_unbound(opcode op, const char* name) {
211   assert(opcode_describe(op)->flags & OP_HAS_BINDING);
212   inst* i = inst_new(op);
213   i->symbol = strdup(name);
214   return inst_block(i);
215 }
216 
gen_op_var_fresh(opcode op,const char * name)217 block gen_op_var_fresh(opcode op, const char* name) {
218   assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
219   return block_bind(gen_op_unbound(op, name),
220                     gen_noop(), OP_HAS_VARIABLE);
221 }
222 
gen_op_bound(opcode op,block binder)223 block gen_op_bound(opcode op, block binder) {
224   assert(block_is_single(binder));
225   block b = gen_op_unbound(op, binder.first->symbol);
226   b.first->bound_by = binder.first;
227   return b;
228 }
229 
gen_dictpair(block k,block v)230 block gen_dictpair(block k, block v) {
231   return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
232 }
233 
234 
inst_join(inst * a,inst * b)235 static void inst_join(inst* a, inst* b) {
236   assert(a && b);
237   assert(!a->next);
238   assert(!b->prev);
239   a->next = b;
240   b->prev = a;
241 }
242 
block_append(block * b,block b2)243 void block_append(block* b, block b2) {
244   if (b2.first) {
245     if (b->last) {
246       inst_join(b->last, b2.first);
247     } else {
248       b->first = b2.first;
249     }
250     b->last = b2.last;
251   }
252 }
253 
block_join(block a,block b)254 block block_join(block a, block b) {
255   block c = a;
256   block_append(&c, b);
257   return c;
258 }
259 
block_has_only_binders_and_imports(block binders,int bindflags)260 int block_has_only_binders_and_imports(block binders, int bindflags) {
261   bindflags |= OP_HAS_BINDING;
262   for (inst* curr = binders.first; curr; curr = curr->next) {
263     if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
264       return 0;
265     }
266   }
267   return 1;
268 }
269 
inst_is_binder(inst * i,int bindflags)270 static int inst_is_binder(inst *i, int bindflags) {
271   return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
272 }
273 
block_has_only_binders(block binders,int bindflags)274 int block_has_only_binders(block binders, int bindflags) {
275   bindflags |= OP_HAS_BINDING;
276   bindflags &= ~OP_BIND_WILDCARD;
277   for (inst* curr = binders.first; curr; curr = curr->next) {
278     if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
279       return 0;
280     }
281   }
282   return 1;
283 }
284 
285 // Count a binder's (function) formal params
block_count_formals(block b)286 static int block_count_formals(block b) {
287   int args = 0;
288   if (b.first->op == CLOSURE_CREATE_C)
289     return b.first->imm.cfunc->nargs - 1;
290   for (inst* i = b.first->arglist.first; i; i = i->next) {
291     assert(i->op == CLOSURE_PARAM);
292     args++;
293   }
294   return args;
295 }
296 
297 // Count a call site's actual params
block_count_actuals(block b)298 static int block_count_actuals(block b) {
299   int args = 0;
300   for (inst* i = b.first; i; i = i->next) {
301     switch (i->op) {
302     default: assert(0 && "Unknown function type"); break;
303     case CLOSURE_CREATE:
304     case CLOSURE_PARAM:
305     case CLOSURE_CREATE_C:
306       args++;
307       break;
308     }
309   }
310   return args;
311 }
312 
block_count_refs(block binder,block body)313 static int block_count_refs(block binder, block body) {
314   int nrefs = 0;
315   for (inst* i = body.first; i; i = i->next) {
316     if (i != binder.first && i->bound_by == binder.first) {
317       nrefs++;
318     }
319     // counting recurses into closures
320     nrefs += block_count_refs(binder, i->subfn);
321     // counting recurses into argument list
322     nrefs += block_count_refs(binder, i->arglist);
323   }
324   return nrefs;
325 }
326 
block_bind_subblock(block binder,block body,int bindflags,int break_distance)327 static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
328   assert(block_is_single(binder));
329   assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
330   assert(binder.first->symbol);
331   assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
332   assert(break_distance >= 0);
333 
334   binder.first->bound_by = binder.first;
335   if (binder.first->nformals == -1)
336     binder.first->nformals = block_count_formals(binder);
337   int nrefs = 0;
338   for (inst* i = body.first; i; i = i->next) {
339     int flags = opcode_describe(i->op)->flags;
340     if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
341         (!strcmp(i->symbol, binder.first->symbol) ||
342          // Check for break/break2/break3; see parser.y
343          ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
344           break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
345           i->symbol[2] == '\0'))) {
346       // bind this instruction
347       if (i->op == CALL_JQ && i->nactuals == -1)
348         i->nactuals = block_count_actuals(i->arglist);
349       if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
350         i->bound_by = binder.first;
351         nrefs++;
352       }
353     } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
354                !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
355                !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
356       // Increment the break distance required for this binder to match
357       // a break whenever we come across a STOREV of *anonlabel...
358       break_distance++;
359     }
360     // binding recurses into closures
361     nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance);
362     // binding recurses into argument list
363     nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance);
364   }
365   return nrefs;
366 }
367 
block_bind_each(block binder,block body,int bindflags)368 static int block_bind_each(block binder, block body, int bindflags) {
369   assert(block_has_only_binders(binder, bindflags));
370   bindflags |= OP_HAS_BINDING;
371   int nrefs = 0;
372   for (inst* curr = binder.first; curr; curr = curr->next) {
373     nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
374   }
375   return nrefs;
376 }
377 
block_bind(block binder,block body,int bindflags)378 block block_bind(block binder, block body, int bindflags) {
379   block_bind_each(binder, body, bindflags);
380   return block_join(binder, body);
381 }
382 
block_bind_library(block binder,block body,int bindflags,const char * libname)383 block block_bind_library(block binder, block body, int bindflags, const char *libname) {
384   bindflags |= OP_HAS_BINDING;
385   int nrefs = 0;
386   int matchlen = (libname == NULL) ? 0 : strlen(libname);
387   char *matchname = jv_mem_alloc(matchlen+2+1);
388   matchname[0] = '\0';
389   if (libname != NULL && libname[0] != '\0') {
390     strcpy(matchname,libname);
391     strcpy(matchname+matchlen, "::");
392     matchlen += 2;
393   }
394   assert(block_has_only_binders(binder, bindflags));
395   for (inst *curr = binder.first; curr; curr = curr->next) {
396     int bindflags2 = bindflags;
397     char* cname = curr->symbol;
398     char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
399     strcpy(tname, matchname);
400     strcpy(tname+matchlen, curr->symbol);
401 
402     // Ew
403     if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
404       bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
405 
406     // This mutation is ugly, even if we undo it
407     curr->symbol = tname;
408     nrefs += block_bind_subblock(inst_block(curr), body, bindflags2, 0);
409     curr->symbol = cname;
410     free(tname);
411   }
412   free(matchname);
413   return body; // We don't return a join because we don't want those sticking around...
414 }
415 
416 // Bind binder to body and throw away any defs in binder not referenced
417 // (directly or indirectly) from body.
block_bind_referenced(block binder,block body,int bindflags)418 block block_bind_referenced(block binder, block body, int bindflags) {
419   assert(block_has_only_binders(binder, bindflags));
420   bindflags |= OP_HAS_BINDING;
421   block refd = gen_noop();
422   block unrefd = gen_noop();
423   int nrefs;
424   for (int last_kept = 0, kept = 0; ; ) {
425     for (inst* curr; (curr = block_take(&binder));) {
426       block b = inst_block(curr);
427       nrefs = block_bind_each(b, body, bindflags);
428       // Check if this binder is referenced from any of the ones we
429       // already know are referenced by body.
430       nrefs += block_count_refs(b, refd);
431       nrefs += block_count_refs(b, body);
432       if (nrefs) {
433         refd = BLOCK(refd, b);
434         kept++;
435       } else {
436         unrefd = BLOCK(unrefd, b);
437       }
438     }
439     if (kept == last_kept)
440       break;
441     last_kept = kept;
442     binder = unrefd;
443     unrefd = gen_noop();
444   }
445   block_free(unrefd);
446   return block_join(refd, body);
447 }
448 
block_drop_unreferenced(block body)449 block block_drop_unreferenced(block body) {
450   inst* curr;
451   block refd = gen_noop();
452   block unrefd = gen_noop();
453   int drop;
454   do {
455     drop = 0;
456     while ((curr = block_take(&body)) && curr->op != TOP) {
457       block b = inst_block(curr);
458       if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) {
459         unrefd = BLOCK(unrefd, b);
460         drop++;
461       } else {
462         refd = BLOCK(refd, b);
463       }
464     }
465     if (curr && curr->op == TOP) {
466       body = BLOCK(inst_block(curr),body);
467     }
468     body = BLOCK(refd, body);
469     refd = gen_noop();
470   } while (drop != 0);
471   block_free(unrefd);
472   return body;
473 }
474 
block_take_imports(block * body)475 jv block_take_imports(block* body) {
476   jv imports = jv_array();
477 
478   inst* top = NULL;
479   if (body->first && body->first->op == TOP) {
480     top = block_take(body);
481   }
482   while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
483     inst* dep = block_take(body);
484     if (dep->op == DEPS) {
485       imports = jv_array_append(imports, jv_copy(dep->imm.constant));
486     }
487     inst_free(dep);
488   }
489   if (top) {
490     *body = block_join(inst_block(top),*body);
491   }
492   return imports;
493 }
494 
block_list_funcs(block body,int omit_underscores)495 jv block_list_funcs(block body, int omit_underscores) {
496   jv funcs = jv_object(); // Use the keys for set semantics.
497   for (inst *pos = body.first; pos != NULL; pos = pos->next) {
498     if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
499       if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
500         funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
501       }
502     }
503   }
504   return jv_keys_unsorted(funcs);
505 }
506 
gen_module(block metadata)507 block gen_module(block metadata) {
508   inst* i = inst_new(MODULEMETA);
509   i->imm.constant = block_const(metadata);
510   if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
511     i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
512   block_free(metadata);
513   return inst_block(i);
514 }
515 
block_module_meta(block b)516 jv block_module_meta(block b) {
517   if (b.first != NULL && b.first->op == MODULEMETA)
518     return jv_copy(b.first->imm.constant);
519   return jv_null();
520 }
521 
gen_import(const char * name,const char * as,int is_data)522 block gen_import(const char* name, const char* as, int is_data) {
523   inst* i = inst_new(DEPS);
524   jv meta = jv_object();
525   if (as != NULL)
526     meta = jv_object_set(meta, jv_string("as"), jv_string(as));
527   meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
528   meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
529   i->imm.constant = meta;
530   return inst_block(i);
531 }
532 
gen_import_meta(block import,block metadata)533 block gen_import_meta(block import, block metadata) {
534   assert(block_is_single(import) && import.first->op == DEPS);
535   assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
536   inst *i = import.first;
537   i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
538   block_free(metadata);
539   return import;
540 }
541 
gen_function(const char * name,block formals,block body)542 block gen_function(const char* name, block formals, block body) {
543   inst* i = inst_new(CLOSURE_CREATE);
544   for (inst* i = formals.last; i; i = i->prev) {
545     if (i->op == CLOSURE_PARAM_REGULAR) {
546       i->op = CLOSURE_PARAM;
547       body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
548     }
549     block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
550   }
551   i->subfn = body;
552   i->symbol = strdup(name);
553   i->arglist = formals;
554   block b = inst_block(i);
555   block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
556   return b;
557 }
558 
gen_param_regular(const char * name)559 block gen_param_regular(const char* name) {
560   return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
561 }
562 
gen_param(const char * name)563 block gen_param(const char* name) {
564   return gen_op_unbound(CLOSURE_PARAM, name);
565 }
566 
gen_lambda(block body)567 block gen_lambda(block body) {
568   return gen_function("@lambda", gen_noop(), body);
569 }
570 
gen_call(const char * name,block args)571 block gen_call(const char* name, block args) {
572   block b = gen_op_unbound(CALL_JQ, name);
573   b.first->arglist = args;
574   return b;
575 }
576 
gen_subexp(block a)577 block gen_subexp(block a) {
578   if (block_is_noop(a)) {
579     return gen_op_simple(DUP);
580   }
581   if (block_is_single(a) && a.first->op == LOADK) {
582     jv c = block_const(a);
583     block_free(a);
584     return gen_op_pushk_under(c);
585   }
586   return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
587 }
588 
gen_both(block a,block b)589 block gen_both(block a, block b) {
590   block jump = gen_op_targetlater(JUMP);
591   block fork = gen_op_target(FORK, jump);
592   block c = BLOCK(fork, a, jump, b);
593   inst_set_target(jump, c);
594   return c;
595 }
596 
gen_const_object(block expr)597 block gen_const_object(block expr) {
598   int is_const = 1;
599   jv o = jv_object();
600   jv k = jv_null();
601   jv v = jv_null();
602   for (inst *i = expr.first; i; i = i->next) {
603     if (i->op == PUSHK_UNDER) {
604       k = jv_copy(i->imm.constant);
605       i = i->next;
606     } else if (i->op != SUBEXP_BEGIN ||
607         i->next == NULL ||
608         i->next->op != LOADK ||
609         i->next->next == NULL ||
610         i->next->next->op != SUBEXP_END) {
611       is_const = 0;
612       break;
613     } else {
614       k = jv_copy(i->next->imm.constant);
615       i = i->next->next->next;
616     }
617     if (i != NULL && i->op == PUSHK_UNDER) {
618       v = jv_copy(i->imm.constant);
619       i = i->next;
620     } else if (i == NULL ||
621         i->op != SUBEXP_BEGIN ||
622         i->next == NULL ||
623         i->next->op != LOADK ||
624         i->next->next == NULL ||
625         i->next->next->op != SUBEXP_END) {
626       is_const = 0;
627       break;
628     } else {
629       v = jv_copy(i->next->imm.constant);
630       i = i->next->next->next;
631     }
632     if (i == NULL || i->op != INSERT) {
633       is_const = 0;
634       break;
635     }
636     if (jv_get_kind(k) != JV_KIND_STRING) {
637       is_const = 0;
638       break;
639     }
640     o = jv_object_set(o, k, v);
641     k = jv_null();
642     v = jv_null();
643   }
644   if (!is_const) {
645     jv_free(o);
646     jv_free(k);
647     jv_free(v);
648     block b = {0,0};
649     return b;
650   }
651   block_free(expr);
652   return gen_const(o);
653 }
654 
gen_const_array(block expr)655 static block gen_const_array(block expr) {
656   /*
657    * An expr of all constant elements looks like this:
658    *
659    * 0009 FORK 0027
660    * 0011 FORK 0023
661    * 0013 FORK 0019
662    * 0015 LOADK 1
663    * 0017 JUMP 0021
664    * 0019 LOADK 2
665    * 0021 JUMP 0025
666    * 0023 LOADK 3
667    * 0025 JUMP 0029
668    * 0027 LOADK 4
669    *
670    * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
671    * every LOADK.  The sequence ends in a LOADK.  Any deviation and it's
672    * not a list of constants.
673    *
674    * Here we check for this pattern almost exactly.  We don't check that
675    * the targets of the FORK and JUMP instructions are in the right
676    * sequence.
677    */
678   int all_const = 1;
679   int commas = 0;
680   int normal = 1;
681   jv a = jv_array();
682   for (inst *i = expr.first; i; i = i->next) {
683     if (i->op == FORK) {
684       commas++;
685       if (i->imm.target == NULL || i->imm.target->op != JUMP ||
686           jv_array_length(jv_copy(a)) > 0) {
687         normal = 0;
688         break;
689       }
690     } else if (all_const && i->op == LOADK) {
691       if (i->next != NULL && i->next->op != JUMP) {
692         normal = 0;
693         break;
694       }
695       a = jv_array_append(a, jv_copy(i->imm.constant));
696     } else if (i->op != JUMP || i->imm.target == NULL ||
697                i->imm.target->op != LOADK) {
698       all_const = 0;
699     }
700   }
701 
702   if (all_const && normal &&
703       (expr.last == NULL || expr.last->op == LOADK) &&
704       jv_array_length(jv_copy(a)) == commas + 1) {
705     block_free(expr);
706     return gen_const(a);
707   }
708 
709   jv_free(a);
710   block b = {0,0};
711   return b;
712 }
713 
gen_collect(block expr)714 block gen_collect(block expr) {
715   block const_array = gen_const_array(expr);
716   if (const_array.first != NULL)
717     return const_array;
718 
719   block array_var = gen_op_var_fresh(STOREV, "collect");
720   block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
721 
722   block tail = BLOCK(gen_op_bound(APPEND, array_var),
723                      gen_op_simple(BACKTRACK));
724 
725   return BLOCK(c,
726                gen_op_target(FORK, tail),
727                expr,
728                tail,
729                gen_op_bound(LOADVN, array_var));
730 }
731 
bind_matcher(block matcher,block body)732 static block bind_matcher(block matcher, block body) {
733   // cannot call block_bind(matcher, body) because that requires
734   // block_has_only_binders(matcher), which is not true here as matchers
735   // may also contain code to extract the correct elements
736   for (inst* i = matcher.first; i; i = i->next) {
737     if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
738       block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
739   }
740   return BLOCK(matcher, body);
741 }
742 
743 
744 // Extract destructuring var names from the block
745 // *vars should be a jv_object (for set semantics)
block_get_unbound_vars(block b,jv * vars)746 static void block_get_unbound_vars(block b, jv *vars) {
747   assert(vars != NULL);
748   assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
749   for (inst* i = b.first; i; i = i->next) {
750     if (i->subfn.first) {
751       block_get_unbound_vars(i->subfn, vars);
752       continue;
753     }
754     if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
755       *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
756     }
757   }
758 }
759 
760 /* Build wrappers around destructuring matchers so that we can chain them
761  * when we have errors.  The approach is as follows:
762  * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
763  * existing_matcher_block
764  * JUMP BODY
765  */
bind_alternation_matchers(block matchers,block body)766 static block bind_alternation_matchers(block matchers, block body) {
767   block preamble = {0};
768   block altmatchers = {0};
769   block mb = {0};
770   block final_matcher = matchers;
771 
772   // Pass through the matchers to find all destructured names.
773   while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
774     block_append(&altmatchers, inst_block(block_take(&final_matcher)));
775   }
776 
777   // We don't have any alternations here, so we can use the simplest case.
778   if (altmatchers.first == NULL) {
779     return bind_matcher(final_matcher, body);
780   }
781 
782   // Collect var names
783   jv all_vars = jv_object();
784   block_get_unbound_vars(altmatchers, &all_vars);
785   block_get_unbound_vars(final_matcher, &all_vars);
786 
787   // We need a preamble of STOREVs to which to bind the matchers and the body.
788   jv_object_keys_foreach(all_vars, key) {
789     preamble = BLOCK(preamble,
790                      gen_op_simple(DUP),
791                      gen_const(jv_null()),
792                      gen_op_unbound(STOREV, jv_string_value(key)));
793     jv_free(key);
794   }
795   jv_free(all_vars);
796 
797   // Now we build each matcher in turn
798   for (inst *i = altmatchers.first; i; i = i->next) {
799     block submatcher = i->subfn;
800 
801     // If we're successful, jump to the end of the matchers
802     submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));
803 
804     // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
805     mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);
806 
807     // We're done with this inst and we don't want it anymore
808     // But we can't let it free the submatcher block.
809     i->subfn.first = i->subfn.last = NULL;
810   }
811   // We're done with these insts now.
812   block_free(altmatchers);
813 
814   return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
815 }
816 
gen_reduce(block source,block matcher,block init,block body)817 block gen_reduce(block source, block matcher, block init, block body) {
818   block res_var = gen_op_var_fresh(STOREV, "reduce");
819   block loop = BLOCK(gen_op_simple(DUPN),
820                      source,
821                      bind_alternation_matchers(matcher,
822                                   BLOCK(gen_op_bound(LOADVN, res_var),
823                                         body,
824                                         gen_op_bound(STOREV, res_var))),
825                      gen_op_simple(BACKTRACK));
826   return BLOCK(gen_op_simple(DUP),
827                init,
828                res_var,
829                gen_op_target(FORK, loop),
830                loop,
831                gen_op_bound(LOADVN, res_var));
832 }
833 
gen_foreach(block source,block matcher,block init,block update,block extract)834 block gen_foreach(block source, block matcher, block init, block update, block extract) {
835   block output = gen_op_targetlater(JUMP);
836   block state_var = gen_op_var_fresh(STOREV, "foreach");
837   block loop = BLOCK(gen_op_simple(DUPN),
838                      // get a value from the source expression:
839                      source,
840                      // destructure the value into variable(s) for all the code
841                      // in the body to see
842                      bind_alternation_matchers(matcher,
843                                   // load the loop state variable
844                                   BLOCK(gen_op_bound(LOADVN, state_var),
845                                         // generate updated state
846                                         update,
847                                         // save the updated state for value extraction
848                                         gen_op_simple(DUP),
849                                         // save new state
850                                         gen_op_bound(STOREV, state_var),
851                                         // extract an output...
852                                         extract,
853                                         // ...and output it by jumping
854                                         // past the BACKTRACK that comes
855                                         // right after the loop body,
856                                         // which in turn is there
857                                         // because...
858                                         //
859                                         // (Incidentally, extract can also
860                                         // backtrack, e.g., if it calls
861                                         // empty, in which case we don't
862                                         // get here.)
863                                         output)));
864   block foreach = BLOCK(gen_op_simple(DUP),
865                         init,
866                         state_var,
867                         gen_op_target(FORK, loop),
868                         loop,
869                         // ...at this point `foreach`'s original input
870                         // will be on top of the stack, and we don't
871                         // want to output it, so we backtrack.
872                         gen_op_simple(BACKTRACK));
873   inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop
874   return foreach;
875 }
876 
gen_definedor(block a,block b)877 block gen_definedor(block a, block b) {
878   // var found := false
879   block found_var = gen_op_var_fresh(STOREV, "found");
880   block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
881 
882   // if found, backtrack. Otherwise execute b
883   block backtrack = gen_op_simple(BACKTRACK);
884   block tail = BLOCK(gen_op_simple(DUP),
885                      gen_op_bound(LOADV, found_var),
886                      gen_op_target(JUMP_F, backtrack),
887                      backtrack,
888                      gen_op_simple(POP),
889                      b);
890 
891   // try again
892   block if_notfound = gen_op_simple(BACKTRACK);
893 
894   // found := true, produce result
895   block if_found = BLOCK(gen_op_simple(DUP),
896                          gen_const(jv_true()),
897                          gen_op_bound(STOREV, found_var),
898                          gen_op_target(JUMP, tail));
899 
900   return BLOCK(init,
901                gen_op_target(FORK, if_notfound),
902                a,
903                gen_op_target(JUMP_F, if_found),
904                if_found,
905                if_notfound,
906                tail);
907 }
908 
block_has_main(block top)909 int block_has_main(block top) {
910   for (inst *c = top.first; c; c = c->next) {
911     if (c->op == TOP)
912       return 1;
913   }
914   return 0;
915 }
916 
block_is_funcdef(block b)917 int block_is_funcdef(block b) {
918   if (b.first != NULL && b.first->op == CLOSURE_CREATE)
919     return 1;
920   return 0;
921 }
922 
gen_condbranch(block iftrue,block iffalse)923 block gen_condbranch(block iftrue, block iffalse) {
924   iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
925   return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
926 }
927 
gen_and(block a,block b)928 block gen_and(block a, block b) {
929   // a and b = if a then (if b then true else false) else false
930   return BLOCK(gen_op_simple(DUP), a,
931                gen_condbranch(BLOCK(gen_op_simple(POP),
932                                     b,
933                                     gen_condbranch(gen_const(jv_true()),
934                                                    gen_const(jv_false()))),
935                               BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
936 }
937 
gen_or(block a,block b)938 block gen_or(block a, block b) {
939   // a or b = if a then true else (if b then true else false)
940   return BLOCK(gen_op_simple(DUP), a,
941                gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
942                               BLOCK(gen_op_simple(POP),
943                                     b,
944                                     gen_condbranch(gen_const(jv_true()),
945                                                    gen_const(jv_false())))));
946 }
947 
gen_destructure_alt(block matcher)948 block gen_destructure_alt(block matcher) {
949   for (inst *i = matcher.first; i; i = i->next) {
950     if (i->op == STOREV) {
951       i->op = STOREVN;
952     }
953   }
954   inst* i = inst_new(DESTRUCTURE_ALT);
955   i->subfn = matcher;
956   return inst_block(i);
957 }
958 
gen_var_binding(block var,const char * name,block body)959 block gen_var_binding(block var, const char* name, block body) {
960   return gen_destructure(var, gen_op_unbound(STOREV, name), body);
961 }
962 
gen_array_matcher(block left,block curr)963 block gen_array_matcher(block left, block curr) {
964   int index;
965   if (block_is_noop(left))
966     index = 0;
967   else {
968     // `left` was returned by this function, so the third inst is the
969     // constant containing the previously used index
970     assert(left.first->op == DUP);
971     assert(left.first->next != NULL);
972     inst *i = NULL;
973     if (left.first->next->op == PUSHK_UNDER) {
974       i = left.first->next;
975     } else {
976       assert(left.first->next->op == SUBEXP_BEGIN);
977       assert(left.first->next->next->op == LOADK);
978       i = left.first->next->next;
979     }
980     index = 1 + (int) jv_number_value(i->imm.constant);
981   }
982 
983   // `left` goes at the end so that the const index is in a predictable place
984   return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
985                gen_op_simple(INDEX), curr, left);
986 }
987 
gen_object_matcher(block name,block curr)988 block gen_object_matcher(block name, block curr) {
989   return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
990                curr);
991 }
992 
gen_destructure(block var,block matchers,block body)993 block gen_destructure(block var, block matchers, block body) {
994   // var bindings can be added after coding the program; leave the TOP first.
995   block top = gen_noop();
996   if (body.first && body.first->op == TOP)
997     top = inst_block(block_take(&body));
998 
999   if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
1000     block_append(&var, gen_op_simple(DUP));
1001   } else {
1002     top = BLOCK(top, gen_op_simple(DUP));
1003   }
1004 
1005   return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
1006 }
1007 
1008 // Like gen_var_binding(), but bind `break`'s wildcard unbound variable
gen_wildvar_binding(block var,const char * name,block body)1009 static block gen_wildvar_binding(block var, const char* name, block body) {
1010   return BLOCK(gen_op_simple(DUP), var,
1011                block_bind(gen_op_unbound(STOREV, name),
1012                           body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
1013 }
1014 
gen_cond(block cond,block iftrue,block iffalse)1015 block gen_cond(block cond, block iftrue, block iffalse) {
1016   return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
1017                gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
1018                               BLOCK(gen_op_simple(POP), iffalse)));
1019 }
1020 
gen_try_handler(block handler)1021 block gen_try_handler(block handler) {
1022   // Quite a pain just to hide jq's internal errors.
1023   return gen_cond(// `if type=="object" and .__jq
1024                   gen_and(gen_call("_equal",
1025                                    BLOCK(gen_lambda(gen_const(jv_string("object"))),
1026                                          gen_lambda(gen_noop()))),
1027                           BLOCK(gen_subexp(gen_const(jv_string("__jq"))),
1028                                 gen_noop(),
1029                                 gen_op_simple(INDEX))),
1030                   // `then error`
1031                   gen_call("error", gen_noop()),
1032                   // `else HANDLER end`
1033                   handler);
1034 }
1035 
gen_try(block exp,block handler)1036 block gen_try(block exp, block handler) {
1037   /*
1038    * Produce something like:
1039    *  FORK_OPT <address of handler>
1040    *  <exp>
1041    *  JUMP <end of handler>
1042    *  <handler>
1043    *
1044    * If this is not an internal try/catch, then catch and re-raise
1045    * internal errors to prevent them from leaking.
1046    *
1047    * The handler will only execute if we backtrack to the FORK_OPT with
1048    * an error (exception).  If <exp> produces no value then FORK_OPT
1049    * will backtrack (propagate the `empty`, as it were.  If <exp>
1050    * produces a value then we'll execute whatever bytecode follows this
1051    * sequence.
1052    */
1053   if (!handler.first && !handler.last)
1054     // A hack to deal with `.` as the handler; we could use a real NOOP here
1055     handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP), handler);
1056   exp = BLOCK(exp, gen_op_target(JUMP, handler));
1057   return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler);
1058 }
1059 
gen_label(const char * label,block exp)1060 block gen_label(const char *label, block exp) {
1061   block cond = gen_call("_equal",
1062                         BLOCK(gen_lambda(gen_noop()),
1063                               gen_lambda(gen_op_unbound(LOADV, label))));
1064   return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
1065                              BLOCK(gen_op_simple(POP),
1066                                    // try exp catch if . == $label
1067                                    //               then empty
1068                                    //               else error end
1069                                    //
1070                                    // Can't use gen_binop(), as that's firmly
1071                                    // stuck in parser.y as it refers to things
1072                                    // like EQ.
1073                                    gen_try(exp,
1074                                            gen_cond(cond,
1075                                                     gen_op_simple(BACKTRACK),
1076                                                     gen_call("error", gen_noop())))));
1077 }
1078 
gen_cbinding(const struct cfunction * cfunctions,int ncfunctions,block code)1079 block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
1080   for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
1081     inst* i = inst_new(CLOSURE_CREATE_C);
1082     i->imm.cfunc = &cfunctions[cfunc];
1083     i->symbol = strdup(i->imm.cfunc->name);
1084     code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO);
1085   }
1086   return code;
1087 }
1088 
nesting_level(struct bytecode * bc,inst * target)1089 static uint16_t nesting_level(struct bytecode* bc, inst* target) {
1090   uint16_t level = 0;
1091   assert(bc && target && target->compiled);
1092   while (bc && target->compiled != bc) {
1093     level++;
1094     bc = bc->parent;
1095   }
1096   assert(bc && bc == target->compiled);
1097   return level;
1098 }
1099 
count_cfunctions(block b)1100 static int count_cfunctions(block b) {
1101   int n = 0;
1102   for (inst* i = b.first; i; i = i->next) {
1103     if (i->op == CLOSURE_CREATE_C) n++;
1104     n += count_cfunctions(i->subfn);
1105   }
1106   return n;
1107 }
1108 
1109 #ifndef WIN32
1110 extern char **environ;
1111 #endif
1112 
1113 static jv
make_env(jv env)1114 make_env(jv env)
1115 {
1116   if (jv_is_valid(env))
1117     return jv_copy(env);
1118   jv r = jv_object();
1119   if (environ == NULL)
1120     return r;
1121   for (size_t i = 0; environ[i] != NULL; i++) {
1122     const char *eq;
1123 
1124     if ((eq = strchr(environ[i], '=')) == NULL)
1125       r = jv_object_delete(r, jv_string(environ[i]));
1126     else
1127       r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
1128   }
1129   return jv_copy(r);
1130 }
1131 
1132 // Expands call instructions into a calling sequence
expand_call_arglist(block * b,jv args,jv * env)1133 static int expand_call_arglist(block* b, jv args, jv *env) {
1134   int errors = 0;
1135   block ret = gen_noop();
1136   for (inst* curr; (curr = block_take(b));) {
1137     if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
1138       if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
1139         curr->op = LOADK;
1140         *env = curr->imm.constant = make_env(*env);
1141       } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
1142         curr->op = LOADK;
1143         curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
1144       } else if (!curr->bound_by) {
1145         if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
1146           locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
1147         else if (curr->op == LOADV)
1148           locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
1149         else
1150           locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist));
1151         errors++;
1152         // don't process this instruction if it's not well-defined
1153         ret = BLOCK(ret, inst_block(curr));
1154         continue;
1155       }
1156     }
1157 
1158     block prelude = gen_noop();
1159     if (curr->op == CALL_JQ) {
1160       int actual_args = 0, desired_args = 0;
1161       // We expand the argument list as a series of instructions
1162       switch (curr->bound_by->op) {
1163       default: assert(0 && "Unknown function type"); break;
1164       case CLOSURE_CREATE:
1165       case CLOSURE_PARAM: {
1166         block callargs = gen_noop();
1167         for (inst* i; (i = block_take(&curr->arglist));) {
1168           assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
1169           block b = inst_block(i);
1170           switch (i->op) {
1171           default: assert(0 && "Unknown type of parameter"); break;
1172           case CLOSURE_REF:
1173             block_append(&callargs, b);
1174             break;
1175           case CLOSURE_CREATE:
1176             block_append(&prelude, b);
1177             block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
1178             break;
1179           }
1180           actual_args++;
1181         }
1182         curr->imm.intval = actual_args;
1183         curr->arglist = callargs;
1184 
1185         if (curr->bound_by->op == CLOSURE_CREATE) {
1186           for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
1187             assert(i->op == CLOSURE_PARAM);
1188             desired_args++;
1189           }
1190         }
1191         break;
1192       }
1193 
1194       case CLOSURE_CREATE_C: {
1195         for (inst* i; (i = block_take(&curr->arglist)); ) {
1196           assert(i->op == CLOSURE_CREATE); // FIXME
1197           block body = i->subfn;
1198           i->subfn = gen_noop();
1199           inst_free(i);
1200           // arguments should be pushed in reverse order, prepend them to prelude
1201           errors += expand_call_arglist(&body, args, env);
1202           prelude = BLOCK(gen_subexp(body), prelude);
1203           actual_args++;
1204         }
1205         assert(curr->op == CALL_JQ);
1206         curr->op = CALL_BUILTIN;
1207         curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
1208         assert(curr->bound_by->op == CLOSURE_CREATE_C);
1209         desired_args = curr->bound_by->imm.cfunc->nargs - 1;
1210         assert(!curr->arglist.first);
1211         break;
1212       }
1213       }
1214 
1215       assert(actual_args == desired_args); // because now handle this above
1216     }
1217     ret = BLOCK(ret, prelude, inst_block(curr));
1218   }
1219   *b = ret;
1220   return errors;
1221 }
1222 
compile(struct bytecode * bc,block b,struct locfile * lf,jv args,jv * env)1223 static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
1224   int errors = 0;
1225   int pos = 0;
1226   int var_frame_idx = 0;
1227   bc->nsubfunctions = 0;
1228   errors += expand_call_arglist(&b, args, env);
1229   b = BLOCK(b, gen_op_simple(RET));
1230   jv localnames = jv_array();
1231   for (inst* curr = b.first; curr; curr = curr->next) {
1232     if (!curr->next) assert(curr == b.last);
1233     int length = opcode_describe(curr->op)->length;
1234     if (curr->op == CALL_JQ) {
1235       for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1236         length += 2;
1237       }
1238     }
1239     pos += length;
1240     curr->bytecode_pos = pos;
1241     curr->compiled = bc;
1242 
1243     assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1244 
1245     if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
1246         curr->bound_by == curr) {
1247       curr->imm.intval = var_frame_idx++;
1248       localnames = jv_array_append(localnames, jv_string(curr->symbol));
1249     }
1250 
1251     if (curr->op == CLOSURE_CREATE) {
1252       assert(curr->bound_by == curr);
1253       curr->imm.intval = bc->nsubfunctions++;
1254     }
1255     if (curr->op == CLOSURE_CREATE_C) {
1256       assert(curr->bound_by == curr);
1257       int idx = bc->globals->ncfunctions++;
1258       bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
1259                                                  jv_string(curr->symbol));
1260       bc->globals->cfunctions[idx] = *curr->imm.cfunc;
1261       curr->imm.intval = idx;
1262     }
1263   }
1264   if (pos > 0xFFFF) {
1265     // too long for program counter to fit in uint16_t
1266     locfile_locate(lf, UNKNOWN_LOCATION,
1267         "function compiled to %d bytes which is too long", pos);
1268     errors++;
1269   }
1270   bc->codelen = pos;
1271   bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
1272   if (bc->nsubfunctions) {
1273     bc->subfunctions = jv_mem_calloc(sizeof(struct bytecode*), bc->nsubfunctions);
1274     for (inst* curr = b.first; curr; curr = curr->next) {
1275       if (curr->op == CLOSURE_CREATE) {
1276         struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
1277         bc->subfunctions[curr->imm.intval] = subfn;
1278         subfn->globals = bc->globals;
1279         subfn->parent = bc;
1280         subfn->nclosures = 0;
1281         subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
1282         jv params = jv_array();
1283         for (inst* param = curr->arglist.first; param; param = param->next) {
1284           assert(param->op == CLOSURE_PARAM);
1285           assert(param->bound_by == param);
1286           param->imm.intval = subfn->nclosures++;
1287           param->compiled = subfn;
1288           params = jv_array_append(params, jv_string(param->symbol));
1289         }
1290         subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
1291         errors += compile(subfn, curr->subfn, lf, args, env);
1292         curr->subfn = gen_noop();
1293       }
1294     }
1295   } else {
1296     bc->subfunctions = 0;
1297   }
1298   uint16_t* code = jv_mem_calloc(sizeof(uint16_t), bc->codelen);
1299   bc->code = code;
1300   pos = 0;
1301   jv constant_pool = jv_array();
1302   int maxvar = -1;
1303   if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
1304     const struct opcode_description* op = opcode_describe(curr->op);
1305     if (op->length == 0)
1306       continue;
1307     code[pos++] = curr->op;
1308     assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1309     if (curr->op == CALL_BUILTIN) {
1310       assert(curr->bound_by->op == CLOSURE_CREATE_C);
1311       assert(!curr->arglist.first);
1312       code[pos++] = (uint16_t)curr->imm.intval;
1313       code[pos++] = curr->bound_by->imm.intval;
1314     } else if (curr->op == CALL_JQ) {
1315       assert(curr->bound_by->op == CLOSURE_CREATE ||
1316              curr->bound_by->op == CLOSURE_PARAM);
1317       code[pos++] = (uint16_t)curr->imm.intval;
1318       code[pos++] = nesting_level(bc, curr->bound_by);
1319       code[pos++] = curr->bound_by->imm.intval |
1320         (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
1321       for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1322         assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
1323         code[pos++] = nesting_level(bc, arg->bound_by);
1324         code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
1325       }
1326     } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
1327       // STORE_GLOBAL: constant global, basically
1328       code[pos++] = jv_array_length(jv_copy(constant_pool));
1329       constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1330       code[pos++] = nesting_level(bc, curr->bound_by);
1331       uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1332       code[pos++] = var;
1333     } else if (op->flags & OP_HAS_CONSTANT) {
1334       code[pos++] = jv_array_length(jv_copy(constant_pool));
1335       constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1336     } else if (op->flags & OP_HAS_VARIABLE) {
1337       code[pos++] = nesting_level(bc, curr->bound_by);
1338       uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1339       code[pos++] = var;
1340       if (var > maxvar) maxvar = var;
1341     } else if (op->flags & OP_HAS_BRANCH) {
1342       assert(curr->imm.target->bytecode_pos != -1);
1343       assert(curr->imm.target->bytecode_pos > pos); // only forward branches
1344       code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
1345       pos++;
1346     } else if (op->length > 1) {
1347       assert(0 && "codegen not implemented for this operation");
1348     }
1349   }
1350   bc->constants = constant_pool;
1351   bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
1352   block_free(b);
1353   return errors;
1354 }
1355 
block_compile(block b,struct bytecode ** out,struct locfile * lf,jv args)1356 int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
1357   struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
1358   bc->parent = 0;
1359   bc->nclosures = 0;
1360   bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
1361   int ncfunc = count_cfunctions(b);
1362   bc->globals->ncfunctions = 0;
1363   bc->globals->cfunctions = jv_mem_calloc(sizeof(struct cfunction), ncfunc);
1364   bc->globals->cfunc_names = jv_array();
1365   bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
1366   jv env = jv_invalid();
1367   int nerrors = compile(bc, b, lf, args, &env);
1368   jv_free(args);
1369   jv_free(env);
1370   assert(bc->globals->ncfunctions == ncfunc);
1371   if (nerrors > 0) {
1372     bytecode_free(bc);
1373     *out = 0;
1374   } else {
1375     *out = bc;
1376   }
1377   return nerrors;
1378 }
1379 
block_free(block b)1380 void block_free(block b) {
1381   struct inst* next;
1382   for (struct inst* curr = b.first; curr; curr = next) {
1383     next = curr->next;
1384     inst_free(curr);
1385   }
1386 }
1387