1 
2 #include <limits.h>  /* for INT_MAX */
3 #include <stdio.h>   /* for fprintf etc */
4 #include <stdlib.h>  /* for free etc */
5 #include <string.h>  /* for strlen */
6 #include "header.h"
7 
8 /* Define this to get warning messages when optimisations can't be used. */
9 /* #define OPTIMISATION_WARNINGS */
10 
11 /* recursive use: */
12 
13 static void generate(struct generator * g, struct node * p);
14 
new_label(struct generator * g)15 static int new_label(struct generator * g) {
16     return g->next_label++;
17 }
18 
19 /* Write routines for simple entities */
20 
21 /* Write a space if the preceding character was not whitespace */
ws_opt_space(struct generator * g,const char * s)22 static void ws_opt_space(struct generator * g, const char * s) {
23     int ch = str_back(g->outbuf);
24     if (ch != ' ' && ch != '\n' && ch != '\t' && ch != -1)
25         write_char(g, ' ');
26     write_string(g, s);
27 }
28 
wi3(struct generator * g,int i)29 static void wi3(struct generator * g, int i) {
30     if (i < 100) write_char(g, ' ');
31     if (i < 10)  write_char(g, ' ');
32     write_int(g, i); /* integer (width 3) */
33 }
34 
35 
36 /* Write routines for items from the syntax tree */
37 
write_varname(struct generator * g,struct name * p)38 static void write_varname(struct generator * g, struct name * p) {
39 
40     int ch = "SIIrxg"[p->type];
41     switch (p->type) {
42         case t_external:
43             write_string(g, g->options->externals_prefix); break;
44         case t_string:
45         case t_boolean:
46         case t_integer: {
47             int count = p->count;
48             if (count < 0) {
49                 fprintf(stderr, "Reference to optimised out variable ");
50                 report_b(stderr, p->b);
51                 fprintf(stderr, " attempted\n");
52                 exit(1);
53             }
54             if (p->type == t_boolean) {
55                 /* We use a single array for booleans and integers, with the
56                  * integers first.
57                  */
58                 count += g->analyser->name_count[t_integer];
59             }
60             write_char(g, ch);
61             write_char(g, '[');
62             write_int(g, count);
63             write_char(g, ']');
64             return;
65         }
66         default:
67             write_char(g, ch); write_char(g, '_');
68     }
69     write_b(g, p->b);
70 }
71 
write_varref(struct generator * g,struct name * p)72 static void write_varref(struct generator * g, struct name * p) {  /* reference to variable */
73     if (p->type < t_routine) write_string(g, "z->");
74     write_varname(g, p);
75 }
76 
write_hexdigit(struct generator * g,int i)77 static void write_hexdigit(struct generator * g, int i) {
78     str_append_ch(g->outbuf, "0123456789ABCDEF"[i & 0xF]); /* hexchar */
79 }
80 
write_hex(struct generator * g,int i)81 static void write_hex(struct generator * g, int i) {
82     if (i >> 4) write_hex(g, i >> 4);
83     write_hexdigit(g, i); /* hex integer */
84 }
85 
86 /* write character literal */
wlitch(struct generator * g,int ch)87 static void wlitch(struct generator * g, int ch) {
88     if (32 <= ch && ch < 127) {
89         write_char(g, '\'');
90         if (ch == '\'' || ch == '\\') {
91             write_char(g, '\\');
92         }
93         write_char(g, ch);
94         write_char(g, '\'');
95     } else {
96         write_string(g, "0x"); write_hex(g, ch);
97     }
98 }
99 
wlitarray(struct generator * g,symbol * p)100 static void wlitarray(struct generator * g, symbol * p) {  /* write literal array */
101 
102     write_string(g, "{ ");
103     {
104         int i;
105         for (i = 0; i < SIZE(p); i++) {
106             wlitch(g, p[i]);
107             if (i < SIZE(p) - 1) write_string(g, ", ");
108         }
109     }
110     write_string(g, " }");
111 }
112 
wlitref(struct generator * g,symbol * p)113 static void wlitref(struct generator * g, symbol * p) {  /* write ref to literal array */
114 
115     if (SIZE(p) == 0) {
116         write_char(g, '0');
117     } else {
118         struct str * s = g->outbuf;
119         g->outbuf = g->declarations;
120         write_string(g, "static const symbol s_"); write_int(g, g->literalstring_count); write_string(g, "[] = ");
121         wlitarray(g, p);
122         write_string(g, ";\n");
123         g->outbuf = s;
124         write_string(g, "s_"); write_int(g, g->literalstring_count);
125         g->literalstring_count++;
126     }
127 }
128 
write_margin(struct generator * g)129 static void write_margin(struct generator * g) {
130     int i;
131     for (i = 0; i < g->margin; i++) write_string(g, "    ");
132 }
133 
write_comment_content(struct generator * g,struct node * p)134 void write_comment_content(struct generator * g, struct node * p) {
135     switch (p->type) {
136         case c_mathassign:
137         case c_plusassign:
138         case c_minusassign:
139         case c_multiplyassign:
140         case c_divideassign:
141             if (p->name) {
142                 write_char(g, '$');
143                 write_b(g, p->name->b);
144                 write_char(g, ' ');
145             }
146             write_string(g, name_of_token(p->type));
147             write_string(g, " <integer expression>");
148             break;
149         case c_eq:
150         case c_ne:
151         case c_gr:
152         case c_ge:
153         case c_ls:
154         case c_le:
155             write_string(g, "$(<integer expression> ");
156             write_string(g, name_of_token(p->type));
157             write_string(g, " <integer expression>)");
158             break;
159         default:
160             write_string(g, name_of_token(p->type));
161             if (p->name) {
162                 write_char(g, ' ');
163                 write_b(g, p->name->b);
164             }
165     }
166     write_string(g, ", line ");
167     write_int(g, p->line_number);
168 }
169 
write_comment(struct generator * g,struct node * p)170 static void write_comment(struct generator * g, struct node * p) {
171     if (g->options->comments) {
172         ws_opt_space(g, "/* ");
173         write_comment_content(g, p);
174         write_string(g, " */");
175     }
176     write_newline(g);
177 }
178 
wms(struct generator * g,const char * s)179 static void wms(struct generator * g, const char * s) {
180     write_margin(g); write_string(g, s);   } /* margin + string */
181 
write_block_start(struct generator * g)182 static void write_block_start(struct generator * g) { /* block start */
183     wms(g, "{   ");
184     g->margin++;
185 }
186 
write_block_end(struct generator * g)187 static void write_block_end(struct generator * g) {    /* block end */
188 
189     if (g->line_labelled == g->line_count) { wms(g, ";"); write_newline(g); }
190     g->margin--;
191     wms(g, "}"); write_newline(g);
192 }
193 
194 static void w(struct generator * g, const char * s);
195 
196 /* keep c */
wk(struct generator * g,struct node * p,int keep_limit)197 static void wk(struct generator * g, struct node * p, int keep_limit) {
198     ++g->keep_count;
199     if (p->mode == m_forward) {
200         write_string(g, "int c");
201         write_int(g, g->keep_count);
202         write_string(g, " = z->c");
203         if (keep_limit) {
204             write_string(g, ", mlimit");
205             write_int(g, g->keep_count);
206         }
207         write_char(g, ';');
208     } else {
209         write_string(g, "int m");
210         write_int(g, g->keep_count);
211         write_string(g, " = z->l - z->c");
212         if (keep_limit) {
213             write_string(g, ", mlimit");
214             write_int(g, g->keep_count);
215         }
216         write_string(g, "; (void)m");
217         write_int(g, g->keep_count);
218         write_char(g, ';');
219     }
220 }
221 
wrestore(struct generator * g,struct node * p,int keep_token)222 static void wrestore(struct generator * g, struct node * p, int keep_token) {     /* restore c */
223     if (p->mode == m_forward) {
224         write_string(g, "z->c = c");
225     } else {
226         write_string(g, "z->c = z->l - m");
227     }
228     write_int(g, keep_token); write_char(g, ';');
229 }
230 
wrestorelimit(struct generator * g,struct node * p,int keep_token)231 static void wrestorelimit(struct generator * g, struct node * p, int keep_token) {     /* restore limit */
232     if (p->mode == m_forward) {
233         w(g, "z->l += mlimit");
234     } else {
235         w(g, "z->lb = mlimit");
236     }
237     write_int(g, keep_token); write_string(g, ";");
238 }
239 
winc(struct generator * g,struct node * p)240 static void winc(struct generator * g, struct node * p) {     /* increment c */
241     write_string(g, p->mode == m_forward ? "z->c++;" :
242                                  "z->c--;");
243 }
244 
wsetl(struct generator * g,int n)245 static void wsetl(struct generator * g, int n) {
246 
247     g->margin--;
248     wms(g, "lab"); write_int(g, n); write_char(g, ':'); write_newline(g);
249     g->line_labelled = g->line_count;
250     g->margin++;
251 }
252 
wgotol(struct generator * g,int n)253 static void wgotol(struct generator * g, int n) {
254     wms(g, "goto lab"); write_int(g, n); write_char(g, ';'); write_newline(g);
255 }
256 
write_failure(struct generator * g,struct node * p)257 static void write_failure(struct generator * g, struct node * p) {          /* fail */
258     if (g->failure_keep_count != 0) {
259         write_string(g, "{ ");
260         if (g->failure_keep_count > 0) {
261             wrestore(g, p, g->failure_keep_count);
262         } else {
263             wrestorelimit(g, p, -g->failure_keep_count);
264         }
265         write_char(g, ' ');
266     }
267     switch (g->failure_label) {
268         case x_return:
269             write_string(g, "return 0;");
270             break;
271         default:
272             write_string(g, "goto lab");
273             write_int(g, g->failure_label);
274             write_char(g, ';');
275             g->label_used = 1;
276     }
277     if (g->failure_keep_count != 0) write_string(g, " }");
278 }
279 
280 
281 /* if at limit fail */
write_check_limit(struct generator * g,struct node * p)282 static void write_check_limit(struct generator * g, struct node * p) {
283 
284     write_string(g, p->mode == m_forward ? "if (z->c >= z->l) " :
285                                  "if (z->c <= z->lb) ");
286     write_failure(g, p);
287 }
288 
write_data_address(struct generator * g,struct node * p)289 static void write_data_address(struct generator * g, struct node * p) {
290     symbol * b = p->literalstring;
291     if (b != 0) {
292         write_int(g, SIZE(b)); w(g, ", ");
293         wlitref(g, b);
294     } else {
295         write_varref(g, p->name);
296     }
297 }
298 
299 /* Formatted write. */
writef(struct generator * g,const char * input,struct node * p)300 static void writef(struct generator * g, const char * input, struct node * p) {
301     int i = 0;
302     int l = strlen(input);
303 
304     while (i < l) {
305         int ch = input[i++];
306         if (ch != '~') {
307             write_char(g, ch);
308             continue;
309         }
310         switch (input[i++]) {
311             default: write_char(g, input[i - 1]); continue;
312             case 'C': write_comment(g, p); continue;
313             case 'k': wk(g, p, false); continue;
314             case 'K': wk(g, p, true); continue;
315             case 'i': winc(g, p); continue;
316             case 'l': write_check_limit(g, p); continue;
317             case 'f': write_failure(g, p); continue;
318             case 'M': write_margin(g); continue;
319             case 'N': write_newline(g); continue;
320             case '{': write_block_start(g); continue;
321             case '}': write_block_end(g); continue;
322             case 'S': write_string(g, g->S[input[i++] - '0']); continue;
323             case 'I': write_int(g, g->I[input[i++] - '0']); continue;
324             case 'J': wi3(g, g->I[input[i++] - '0']); continue;
325             case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
326             case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
327             case 'L': wlitref(g, g->L[input[i++] - '0']); continue;
328             case 'A': wlitarray(g, g->L[input[i++] - '0']); continue;
329             case 'c': wlitch(g, g->I[input[i++] - '0']); continue;
330             case 'a': write_data_address(g, p); continue;
331             case '+': g->margin++; continue;
332             case '-': g->margin--; continue;
333             case '$': /* insert_s, insert_v etc */
334                 write_char(g, p->literalstring == 0 ? 'v' : 's');
335                 continue;
336             case 'p': write_string(g, g->options->externals_prefix); continue;
337         }
338     }
339 }
340 
w(struct generator * g,const char * s)341 static void w(struct generator * g, const char * s) {
342     writef(g, s, 0);
343 }
344 
generate_AE(struct generator * g,struct node * p)345 static void generate_AE(struct generator * g, struct node * p) {
346     const char * s;
347     switch (p->type) {
348         case c_name:
349             write_varref(g, p->name); break;
350         case c_number:
351             write_int(g, p->number); break;
352         case c_maxint:
353             write_string(g, "MAXINT"); break;
354         case c_minint:
355             write_string(g, "MININT"); break;
356         case c_neg:
357             write_char(g, '-'); generate_AE(g, p->right); break;
358         case c_multiply:
359             s = " * "; goto label0;
360         case c_plus:
361             s = " + "; goto label0;
362         case c_minus:
363             s = " - "; goto label0;
364         case c_divide:
365             s = " / ";
366         label0:
367             write_char(g, '('); generate_AE(g, p->left);
368             write_string(g, s); generate_AE(g, p->right); write_char(g, ')'); break;
369         case c_cursor:
370             w(g, "z->c"); break;
371         case c_limit:
372             w(g, p->mode == m_forward ? "z->l" : "z->lb"); break;
373         case c_len:
374             if (g->options->encoding == ENC_UTF8) {
375                 w(g, "len_utf8(z->p)");
376                 break;
377             }
378             /* FALLTHRU */
379         case c_size:
380             w(g, "SIZE(z->p)");
381             break;
382         case c_lenof:
383             if (g->options->encoding == ENC_UTF8) {
384                 g->V[0] = p->name;
385                 w(g, "len_utf8(~V0)");
386                 break;
387             }
388             /* FALLTHRU */
389         case c_sizeof:
390             g->V[0] = p->name;
391             w(g, "SIZE(~V0)");
392             break;
393     }
394 }
395 
396 /* K_needed() tests to see if we really need to keep c. Not true when the
397    command does not touch the cursor. This and repeat_score() could be
398    elaborated almost indefinitely.
399 */
400 
K_needed_(struct generator * g,struct node * p,int call_depth)401 static int K_needed_(struct generator * g, struct node * p, int call_depth) {
402     while (p) {
403         switch (p->type) {
404             case c_atlimit:
405             case c_do:
406             case c_dollar:
407             case c_leftslice:
408             case c_rightslice:
409             case c_mathassign:
410             case c_plusassign:
411             case c_minusassign:
412             case c_multiplyassign:
413             case c_divideassign:
414             case c_eq:
415             case c_ne:
416             case c_gr:
417             case c_ge:
418             case c_ls:
419             case c_le:
420             case c_sliceto:
421             case c_booltest:
422             case c_set:
423             case c_unset:
424             case c_true:
425             case c_false:
426             case c_debug:
427                 break;
428 
429             case c_call:
430                 /* Recursive functions aren't typical in snowball programs, so
431                  * make the pessimistic assumption that keep is needed if we
432                  * hit a generous limit on recursion.  It's not likely to make
433                  * a difference to any real world program, but means we won't
434                  * recurse until we run out of stack for pathological cases.
435                  */
436                 if (call_depth >= 100) return true;
437                 if (K_needed_(g, p->name->definition, call_depth + 1))
438                     return true;
439                 break;
440 
441             case c_bra:
442                 if (K_needed_(g, p->left, call_depth)) return true;
443                 break;
444 
445             default: return true;
446         }
447         p = p->right;
448     }
449     return false;
450 }
451 
K_needed(struct generator * g,struct node * p)452 extern int K_needed(struct generator * g, struct node * p) {
453     return K_needed_(g, p, 0);
454 }
455 
repeat_score(struct generator * g,struct node * p,int call_depth)456 static int repeat_score(struct generator * g, struct node * p, int call_depth) {
457     int score = 0;
458     while (p) {
459         switch (p->type) {
460             case c_dollar:
461             case c_leftslice:
462             case c_rightslice:
463             case c_mathassign:
464             case c_plusassign:
465             case c_minusassign:
466             case c_multiplyassign:
467             case c_divideassign:
468             case c_eq:
469             case c_ne:
470             case c_gr:
471             case c_ge:
472             case c_ls:
473             case c_le:
474             case c_sliceto:   /* case c_not: must not be included here! */
475             case c_debug:
476                 break;
477 
478             case c_call:
479                 /* Recursive functions aren't typical in snowball programs, so
480                  * make the pessimistic assumption that repeat requires cursor
481                  * reinstatement if we hit a generous limit on recursion.  It's
482                  * not likely to make a difference to any real world program,
483                  * but means we won't recurse until we run out of stack for
484                  * pathological cases.
485                  */
486                 if (call_depth >= 100) {
487                     return 2;
488                 }
489                 score += repeat_score(g, p->name->definition, call_depth + 1);
490                 if (score >= 2)
491                     return score;
492                 break;
493 
494             case c_bra:
495                 score += repeat_score(g, p->left, call_depth);
496                 if (score >= 2)
497                     return score;
498                 break;
499 
500             case c_name:
501             case c_literalstring:
502             case c_next:
503             case c_grouping:
504             case c_non:
505             case c_hop:
506                 if (++score >= 2)
507                     return score;
508                 break;
509 
510             default:
511                 return 2;
512         }
513         p = p->right;
514     }
515     return score;
516 }
517 
518 /* tests if an expression requires cursor reinstatement in a repeat */
519 
repeat_restore(struct generator * g,struct node * p)520 extern int repeat_restore(struct generator * g, struct node * p) {
521     return repeat_score(g, p, 0) >= 2;
522 }
523 
generate_bra(struct generator * g,struct node * p)524 static void generate_bra(struct generator * g, struct node * p) {
525     p = p->left;
526     while (p) {
527         generate(g, p);
528         p = p->right;
529     }
530 }
531 
generate_and(struct generator * g,struct node * p)532 static void generate_and(struct generator * g, struct node * p) {
533     int keep_c = 0;
534     if (K_needed(g, p->left)) {
535         writef(g, "~{~k~C", p);
536         keep_c = g->keep_count;
537     } else {
538         writef(g, "~M~C", p);
539     }
540     p = p->left;
541     while (p) {
542         generate(g, p);
543         if (keep_c && p->right != 0) {
544             w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
545         }
546         p = p->right;
547     }
548     if (keep_c) w(g, "~}");
549 }
550 
generate_or(struct generator * g,struct node * p)551 static void generate_or(struct generator * g, struct node * p) {
552     int keep_c = 0;
553 
554     int used = g->label_used;
555     int a0 = g->failure_label;
556     int a1 = g->failure_keep_count;
557 
558     int out_lab = new_label(g);
559 
560     if (K_needed(g, p->left)) {
561         writef(g, "~{~k~C", p);
562         keep_c = g->keep_count;
563     } else {
564         writef(g, "~M~C", p);
565     }
566     p = p->left;
567     g->failure_keep_count = 0;
568     while (p->right) {
569         g->failure_label = new_label(g);
570         g->label_used = 0;
571         generate(g, p);
572         wgotol(g, out_lab);
573         if (g->label_used)
574             wsetl(g, g->failure_label);
575         if (keep_c) {
576             w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
577         }
578         p = p->right;
579     }
580     g->label_used = used;
581     g->failure_label = a0;
582     g->failure_keep_count = a1;
583 
584     generate(g, p);
585     if (keep_c) w(g, "~}");
586     wsetl(g, out_lab);
587 }
588 
generate_backwards(struct generator * g,struct node * p)589 static void generate_backwards(struct generator * g, struct node * p) {
590 
591     writef(g, "~Mz->lb = z->c; z->c = z->l;~C~N", p);
592     generate(g, p->left);
593     w(g, "~Mz->c = z->lb;~N");
594 }
595 
596 
generate_not(struct generator * g,struct node * p)597 static void generate_not(struct generator * g, struct node * p) {
598     int keep_c = 0;
599 
600     int used = g->label_used;
601     int a0 = g->failure_label;
602     int a1 = g->failure_keep_count;
603 
604     if (K_needed(g, p->left)) {
605         writef(g, "~{~k~C", p);
606         keep_c = g->keep_count;
607     } else {
608         writef(g, "~M~C", p);
609     }
610 
611     g->failure_label = new_label(g);
612     g->label_used = 0;
613     g->failure_keep_count = 0;
614     generate(g, p->left);
615 
616     {
617         int l = g->failure_label;
618         int u = g->label_used;
619 
620         g->label_used = used;
621         g->failure_label = a0;
622         g->failure_keep_count = a1;
623 
624         writef(g, "~M~f~N", p);
625         if (u)
626             wsetl(g, l);
627     }
628     if (keep_c) {
629         w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N~}");
630     }
631 }
632 
633 
generate_try(struct generator * g,struct node * p)634 static void generate_try(struct generator * g, struct node * p) {
635     int keep_c = 0;
636     if (K_needed(g, p->left)) {
637         writef(g, "~{~k~C", p);
638         keep_c = g->keep_count;
639     } else {
640         writef(g, "~M~C", p);
641     }
642     g->failure_keep_count = keep_c;
643 
644     g->failure_label = new_label(g);
645     g->label_used = 0;
646     generate(g, p->left);
647 
648     if (g->label_used)
649         wsetl(g, g->failure_label);
650 
651     if (keep_c) w(g, "~}");
652 }
653 
generate_set(struct generator * g,struct node * p)654 static void generate_set(struct generator * g, struct node * p) {
655     g->V[0] = p->name; writef(g, "~M~V0 = 1;~C", p);
656 }
657 
generate_unset(struct generator * g,struct node * p)658 static void generate_unset(struct generator * g, struct node * p) {
659     g->V[0] = p->name; writef(g, "~M~V0 = 0;~C", p);
660 }
661 
generate_fail(struct generator * g,struct node * p)662 static void generate_fail(struct generator * g, struct node * p) {
663     generate(g, p->left);
664     writef(g, "~M~f~C", p);
665 }
666 
667 /* generate_test() also implements 'reverse' */
668 
generate_test(struct generator * g,struct node * p)669 static void generate_test(struct generator * g, struct node * p) {
670     int keep_c = 0;
671     if (K_needed(g, p->left)) {
672         keep_c = ++g->keep_count;
673         w(g, p->mode == m_forward ? "~{int c_test" :
674                                     "~{int m_test");
675         write_int(g, keep_c);
676         w(g, p->mode == m_forward ? " = z->c;" :
677                                     " = z->l - z->c;");
678         writef(g, "~C", p);
679     } else writef(g, "~M~C", p);
680 
681     generate(g, p->left);
682 
683     if (keep_c) {
684         w(g, p->mode == m_forward ? "~Mz->c = c_test" :
685                                     "~Mz->c = z->l - m_test");
686         write_int(g, keep_c);
687         writef(g, ";~N~}", p);
688     }
689 }
690 
generate_do(struct generator * g,struct node * p)691 static void generate_do(struct generator * g, struct node * p) {
692     int keep_c = 0;
693     if (K_needed(g, p->left)) {
694         writef(g, "~{~k~C", p);
695         keep_c = g->keep_count;
696     } else {
697         writef(g, "~M~C", p);
698     }
699 
700     if (p->left->type == c_call) {
701         /* Optimise do <call> */
702         g->V[0] = p->left->name;
703         writef(g, "~{int ret = ~V0(z);~C", p->left);
704         w(g, "~Mif (ret < 0) return ret;~N~}");
705     } else {
706         g->failure_label = new_label(g);
707         g->label_used = 0;
708         g->failure_keep_count = 0;
709         generate(g, p->left);
710 
711         if (g->label_used)
712             wsetl(g, g->failure_label);
713     }
714     if (keep_c) {
715         w(g, "~M"); wrestore(g, p, keep_c);
716         w(g, "~N~}");
717     }
718 }
719 
generate_next(struct generator * g,struct node * p)720 static void generate_next(struct generator * g, struct node * p) {
721     if (g->options->encoding == ENC_UTF8) {
722         if (p->mode == m_forward)
723             w(g, "~{int ret = skip_utf8(z->p, z->c, z->l, 1");
724         else
725             w(g, "~{int ret = skip_b_utf8(z->p, z->c, z->lb, 1");
726         writef(g, ");~N"
727               "~Mif (ret < 0) ~f~N"
728               "~Mz->c = ret;~C"
729               "~}", p);
730     } else
731         writef(g, "~M~l~N"
732               "~M~i~C", p);
733 }
734 
generate_GO_grouping(struct generator * g,struct node * p,int is_goto,int complement)735 static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
736 
737     struct grouping * q = p->name->grouping;
738     g->S[0] = p->mode == m_forward ? "" : "_b";
739     g->S[1] = complement ? "in" : "out";
740     g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
741     g->V[0] = p->name;
742     g->I[0] = q->smallest_ch;
743     g->I[1] = q->largest_ch;
744     if (is_goto) {
745         writef(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1) < 0) ~f~C", p);
746     } else {
747         writef(g, "~{~C"
748               "~Mint ret = ~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1);~N"
749               "~Mif (ret < 0) ~f~N", p);
750         if (p->mode == m_forward)
751             w(g, "~Mz->c += ret;~N");
752         else
753             w(g, "~Mz->c -= ret;~N");
754         w(g, "~}");
755     }
756 }
757 
generate_GO(struct generator * g,struct node * p,int style)758 static void generate_GO(struct generator * g, struct node * p, int style) {
759     int keep_c = 0;
760 
761     int used = g->label_used;
762     int a0 = g->failure_label;
763     int a1 = g->failure_keep_count;
764 
765     if (p->left->type == c_grouping || p->left->type == c_non) {
766         /* Special case for "goto" or "gopast" when used on a grouping or an
767          * inverted grouping - the movement of c by the matching action is
768          * exactly what we want! */
769 #ifdef OPTIMISATION_WARNINGS
770         printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
771 #endif
772         if (g->options->comments) {
773             writef(g, "~M~C", p);
774         }
775         generate_GO_grouping(g, p->left, style, p->left->type == c_non);
776         return;
777     }
778 
779     w(g, "~Mwhile(1) {"); writef(g, "~C~+", p);
780 
781     if (style == 1 || repeat_restore(g, p->left)) {
782         writef(g, "~M~k~N", p);
783         keep_c = g->keep_count;
784     }
785 
786     g->failure_label = new_label(g);
787     g->label_used = 0;
788     g->failure_keep_count = 0;
789     generate(g, p->left);
790 
791     if (style == 1) {
792         /* include for goto; omit for gopast */
793         w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
794     }
795     w(g, "~Mbreak;~N");
796     if (g->label_used)
797         wsetl(g, g->failure_label);
798     if (keep_c) {
799         w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
800     }
801 
802     g->label_used = used;
803     g->failure_label = a0;
804     g->failure_keep_count = a1;
805 
806     generate_next(g, p);
807     w(g, "~}");
808 }
809 
generate_loop(struct generator * g,struct node * p)810 static void generate_loop(struct generator * g, struct node * p) {
811     w(g, "~{int i; for (i = "); generate_AE(g, p->AE); writef(g, "; i > 0; i--)~C"
812             "~{", p);
813 
814     generate(g, p->left);
815 
816     w(g,    "~}"
817          "~}");
818 }
819 
generate_repeat_or_atleast(struct generator * g,struct node * p,int atleast_case)820 static void generate_repeat_or_atleast(struct generator * g, struct node * p, int atleast_case) {
821     int keep_c = 0;
822     if (atleast_case) {
823         writef(g, "~Mwhile(1) {~+~N", p);
824     } else {
825         writef(g, "~Mwhile(1) {~+~C", p);
826     }
827 
828     if (repeat_restore(g, p->left)) {
829         writef(g, "~M~k~N", p);
830         keep_c = g->keep_count;
831     }
832 
833     g->failure_label = new_label(g);
834     g->label_used = 0;
835     g->failure_keep_count = 0;
836     generate(g, p->left);
837 
838     if (atleast_case) w(g, "~Mi--;~N");
839 
840     w(g, "~Mcontinue;~N");
841     if (g->label_used)
842         wsetl(g, g->failure_label);
843 
844     if (keep_c) {
845         w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
846     }
847 
848     w(g, "~Mbreak;~N"
849       "~}");
850 }
851 
generate_repeat(struct generator * g,struct node * p)852 static void generate_repeat(struct generator * g, struct node * p) {
853     generate_repeat_or_atleast(g, p, false);
854 }
855 
generate_atleast(struct generator * g,struct node * p)856 static void generate_atleast(struct generator * g, struct node * p) {
857     w(g, "~{int i = "); generate_AE(g, p->AE); w(g, ";~C");
858     {
859         int used = g->label_used;
860         int a0 = g->failure_label;
861         int a1 = g->failure_keep_count;
862 
863         generate_repeat_or_atleast(g, p, true);
864 
865         g->label_used = used;
866         g->failure_label = a0;
867         g->failure_keep_count = a1;
868     }
869     writef(g, "~Mif (i > 0) ~f~N"
870        "~}", p);
871 }
872 
generate_setmark(struct generator * g,struct node * p)873 static void generate_setmark(struct generator * g, struct node * p) {
874     g->V[0] = p->name;
875     writef(g, "~M~V0 = z->c;~C", p);
876 }
877 
generate_tomark(struct generator * g,struct node * p)878 static void generate_tomark(struct generator * g, struct node * p) {
879     g->S[0] = p->mode == m_forward ? ">" : "<";
880 
881     w(g, "~Mif (z->c ~S0 "); generate_AE(g, p->AE); writef(g, ") ~f~N", p);
882     w(g, "~Mz->c = "); generate_AE(g, p->AE); writef(g, ";~C", p);
883 }
884 
generate_atmark(struct generator * g,struct node * p)885 static void generate_atmark(struct generator * g, struct node * p) {
886 
887     w(g, "~Mif (z->c != "); generate_AE(g, p->AE); writef(g, ") ~f~C", p);
888 }
889 
generate_hop(struct generator * g,struct node * p)890 static void generate_hop(struct generator * g, struct node * p) {
891     if (g->options->encoding == ENC_UTF8) {
892         g->S[0] = p->mode == m_forward ? "" : "_b";
893         g->S[1] = p->mode == m_forward ? "z->l" : "z->lb";
894         w(g, "~{int ret = skip~S0_utf8(z->p, z->c, ~S1, ");
895         generate_AE(g, p->AE);
896         writef(g, ");~C", p);
897         writef(g, "~Mif (ret < 0) ~f~N", p);
898         writef(g, "~Mz->c = ret;~N"
899                "~}", p);
900     } else {
901         // Fixed-width characters.
902         g->S[0] = p->mode == m_forward ? "+" : "-";
903         if (p->AE->type == c_number) {
904             // Constant distance hop.
905             //
906             // No need to check for negative hop as that's converted to false by
907             // the analyser.
908             //
909             // Note that if we signal f then z->c will be reset when this is
910             // handled - we rely on this here and unconditionally update z->c.
911             w(g, "z->c = z->c ~S0 ");
912             generate_AE(g, p->AE);
913             w(g, ";~C");
914             if (p->mode == m_forward) {
915                 writef(g, "~Mif (z->c > z->l) ~f~N", p);
916             } else {
917                 writef(g, "~Mif (z->c < z->lb) ~f~N", p);
918             }
919         } else {
920             w(g, "~{int ret = z->c ~S0 ");
921             generate_AE(g, p->AE);
922             writef(g, ";~C", p);
923             if (p->mode == m_forward) {
924                 writef(g, "~Mif (ret > z->l || ret < z->c) ~f~N", p);
925             } else {
926                 writef(g, "~Mif (ret < z->lb || ret > z->c) ~f~N", p);
927             }
928             writef(g, "~Mz->c = ret;~N"
929                       "~}", p);
930         }
931     }
932 }
933 
generate_delete(struct generator * g,struct node * p)934 static void generate_delete(struct generator * g, struct node * p) {
935     writef(g, "~{int ret = slice_del(z);~C", p);
936     writef(g, "~Mif (ret < 0) return ret;~N"
937           "~}", p);
938 }
939 
generate_tolimit(struct generator * g,struct node * p)940 static void generate_tolimit(struct generator * g, struct node * p) {
941     g->S[0] = p->mode == m_forward ? "" : "b";
942     writef(g, "~Mz->c = z->l~S0;~C", p);
943 }
944 
generate_atlimit(struct generator * g,struct node * p)945 static void generate_atlimit(struct generator * g, struct node * p) {
946     g->S[0] = p->mode == m_forward ? "" : "b";
947     g->S[1] = p->mode == m_forward ? "<" : ">";
948     writef(g, "~Mif (z->c ~S1 z->l~S0) ~f~C", p);
949 }
950 
generate_leftslice(struct generator * g,struct node * p)951 static void generate_leftslice(struct generator * g, struct node * p) {
952     g->S[0] = p->mode == m_forward ? "bra" : "ket";
953     writef(g, "~Mz->~S0 = z->c;~C", p);
954 }
955 
generate_rightslice(struct generator * g,struct node * p)956 static void generate_rightslice(struct generator * g, struct node * p) {
957     g->S[0] = p->mode == m_forward ? "ket" : "bra";
958     writef(g, "~Mz->~S0 = z->c;~C", p);
959 }
960 
generate_assignto(struct generator * g,struct node * p)961 static void generate_assignto(struct generator * g, struct node * p) {
962     g->V[0] = p->name;
963     writef(g, "~M~V0 = assign_to(z, ~V0);~C"
964           "~Mif (~V0 == 0) return -1;~C", p);
965 }
966 
generate_sliceto(struct generator * g,struct node * p)967 static void generate_sliceto(struct generator * g, struct node * p) {
968     g->V[0] = p->name;
969     writef(g, "~M~V0 = slice_to(z, ~V0);~C"
970           "~Mif (~V0 == 0) return -1;~N", p);
971 }
972 
generate_insert(struct generator * g,struct node * p,int style)973 static void generate_insert(struct generator * g, struct node * p, int style) {
974 
975     int keep_c = style == c_attach;
976     if (p->mode == m_backward) keep_c = !keep_c;
977     writef(g, "~{int ret;~N", p);
978     if (keep_c) w(g, "~{int saved_c = z->c;~N");
979     writef(g, "~Mret = insert_~$(z, z->c, z->c, ~a);~C", p);
980     if (keep_c) w(g, "~Mz->c = saved_c;~N~}");
981     writef(g, "~Mif (ret < 0) return ret;~N"
982           "~}", p);
983 }
984 
generate_assignfrom(struct generator * g,struct node * p)985 static void generate_assignfrom(struct generator * g, struct node * p) {
986 
987     int keep_c = p->mode == m_forward; /* like 'attach' */
988     writef(g, "~{int ret;~N", p);
989     if (keep_c) writef(g, "~{int saved_c = z->c;~N", p);
990     w(g, "~Mret = ");
991     writef(g, keep_c ? "insert_~$(z, z->c, z->l, ~a);~C" : "insert_~$(z, z->lb, z->c, ~a);~C", p);
992     if (keep_c) w(g, "~Mz->c = saved_c;~N~}");
993     writef(g, "~Mif (ret < 0) return ret;~N"
994           "~}", p);
995 }
996 
generate_slicefrom(struct generator * g,struct node * p)997 static void generate_slicefrom(struct generator * g, struct node * p) {
998     writef(g, "~{int ret = slice_from_~$(z, ~a);~C", p);
999     writef(g, "~Mif (ret < 0) return ret;~N"
1000           "~}", p);
1001 }
1002 
generate_setlimit(struct generator * g,struct node * p)1003 static void generate_setlimit(struct generator * g, struct node * p) {
1004     int keep_c;
1005     if (p->left && p->left->type == c_tomark) {
1006         /* Special case for:
1007          *
1008          *   setlimit tomark AE for C
1009          *
1010          * All uses of setlimit in the current stemmers we ship follow this
1011          * pattern, and by special-casing we can avoid having to save and
1012          * restore c.
1013          */
1014         struct node * q = p->left;
1015 
1016         ++g->keep_count;
1017         writef(g, "~N~{int mlimit", p);
1018         write_int(g, g->keep_count);
1019         writef(g, ";~C", p);
1020         keep_c = g->keep_count;
1021 
1022         g->S[0] = q->mode == m_forward ? ">" : "<";
1023 
1024         w(g, "~Mif (z->c ~S0 "); generate_AE(g, q->AE); writef(g, ") ~f~N", q);
1025         w(g, "~Mmlimit");
1026         write_int(g, keep_c);
1027         if (p->mode == m_forward) {
1028             w(g, " = z->l - z->c; z->l = ");
1029         } else {
1030             w(g, " = z->lb; z->lb = ");
1031         }
1032         generate_AE(g, q->AE);
1033         w(g, ";~N");
1034     } else {
1035         writef(g, "~{~K~C", p);
1036         keep_c = g->keep_count;
1037         generate(g, p->left);
1038 
1039         w(g, "~Mmlimit");
1040         write_int(g, keep_c);
1041         if (p->mode == m_forward)
1042             w(g, " = z->l - z->c; z->l = z->c;~N");
1043         else
1044             w(g, " = z->lb; z->lb = z->c;~N");
1045         w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
1046     }
1047 
1048     g->failure_keep_count = -keep_c;
1049     generate(g, p->aux);
1050     w(g, "~M");
1051     wrestorelimit(g, p, -g->failure_keep_count);
1052     w(g, "~N"
1053       "~}");
1054 }
1055 
1056 /* dollar sets snowball up to operate on a string variable as if it were the
1057  * current string */
generate_dollar(struct generator * g,struct node * p)1058 static void generate_dollar(struct generator * g, struct node * p) {
1059 
1060     int used = g->label_used;
1061     int a0 = g->failure_label;
1062     int a1 = g->failure_keep_count;
1063     int keep_token;
1064     g->failure_label = new_label(g);
1065     g->label_used = 0;
1066     g->failure_keep_count = 0;
1067 
1068     keep_token = ++g->keep_count;
1069     g->I[0] = keep_token;
1070     writef(g, "~{struct SN_env env~I0 = * z;~C", p);
1071     g->V[0] = p->name;
1072     /* Assume failure. */
1073     writef(g, "~Mint failure = 1;~N"
1074           "~Mz->p = ~V0;~N"
1075           "~Mz->lb = z->c = 0;~N"
1076           "~Mz->l = SIZE(z->p);~N", p);
1077     generate(g, p->left);
1078     /* Mark success. */
1079     w(g, "~Mfailure = 0;~N");
1080     if (g->label_used)
1081         wsetl(g, g->failure_label);
1082     g->V[0] = p->name; /* necessary */
1083 
1084     g->label_used = used;
1085     g->failure_label = a0;
1086     g->failure_keep_count = a1;
1087 
1088     g->I[0] = keep_token;
1089     writef(g, "~M~V0 = z->p;~N"
1090           "~M* z = env~I0;~N"
1091           "~Mif (failure) ~f~N~}", p);
1092 }
1093 
generate_integer_assign(struct generator * g,struct node * p,char * s)1094 static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
1095 
1096     g->V[0] = p->name;
1097     g->S[0] = s;
1098     w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); writef(g, ";~C", p);
1099 }
1100 
generate_integer_test(struct generator * g,struct node * p,char * s)1101 static void generate_integer_test(struct generator * g, struct node * p, char * s) {
1102 
1103     w(g, "~Mif (!(");
1104     generate_AE(g, p->left);
1105     write_char(g, ' ');
1106     write_string(g, s);
1107     write_char(g, ' ');
1108     generate_AE(g, p->AE);
1109     writef(g, ")) ~f~C", p);
1110 }
1111 
generate_call(struct generator * g,struct node * p)1112 static void generate_call(struct generator * g, struct node * p) {
1113 
1114     g->V[0] = p->name;
1115     writef(g, "~{int ret = ~V0(z);~C", p);
1116     if (g->failure_keep_count == 0 && g->failure_label == x_return) {
1117         /* Combine the two tests in this special case for better optimisation
1118          * and clearer generated code. */
1119         writef(g, "~Mif (ret <= 0) return ret;~N~}", p);
1120     } else {
1121         writef(g, "~Mif (ret == 0) ~f~N"
1122               "~Mif (ret < 0) return ret;~N~}", p);
1123     }
1124 }
1125 
generate_grouping(struct generator * g,struct node * p,int complement)1126 static void generate_grouping(struct generator * g, struct node * p, int complement) {
1127 
1128     struct grouping * q = p->name->grouping;
1129     g->S[0] = p->mode == m_forward ? "" : "_b";
1130     g->S[1] = complement ? "out" : "in";
1131     g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
1132     g->V[0] = p->name;
1133     g->I[0] = q->smallest_ch;
1134     g->I[1] = q->largest_ch;
1135     writef(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 0)) ~f~C", p);
1136 }
1137 
generate_namedstring(struct generator * g,struct node * p)1138 static void generate_namedstring(struct generator * g, struct node * p) {
1139 
1140     g->S[0] = p->mode == m_forward ? "" : "_b";
1141     g->V[0] = p->name;
1142     writef(g, "~Mif (!(eq_v~S0(z, ~V0))) ~f~C", p);
1143 }
1144 
generate_literalstring(struct generator * g,struct node * p)1145 static void generate_literalstring(struct generator * g, struct node * p) {
1146     symbol * b = p->literalstring;
1147     if (SIZE(b) == 1) {
1148         /* It's quite common to compare with a single character literal string,
1149          * so just inline the simpler code for this case rather than making a
1150          * function call.  In UTF-8 mode, only do this for the ASCII subset,
1151          * since multi-byte characters are more complex to test against.
1152          */
1153         if (g->options->encoding == ENC_UTF8 && *b >= 128) {
1154             printf("single byte %d\n", *b);
1155             exit(1);
1156         }
1157         g->I[0] = *b;
1158         if (p->mode == m_forward) {
1159             writef(g, "~Mif (z->c == z->l || z->p[z->c] != ~c0) ~f~C"
1160                   "~Mz->c++;~N", p);
1161         } else {
1162             writef(g, "~Mif (z->c <= z->lb || z->p[z->c - 1] != ~c0) ~f~C"
1163                   "~Mz->c--;~N", p);
1164         }
1165     } else {
1166         g->S[0] = p->mode == m_forward ? "" : "_b";
1167         g->I[0] = SIZE(b);
1168         g->L[0] = b;
1169 
1170         writef(g, "~Mif (!(eq_s~S0(z, ~I0, ~L0))) ~f~C", p);
1171     }
1172 }
1173 
generate_define(struct generator * g,struct node * p)1174 static void generate_define(struct generator * g, struct node * p) {
1175     struct name * q = p->name;
1176     g->next_label = 0;
1177 
1178     g->S[0] = q->type == t_routine ? "static" : "extern";
1179     g->V[0] = q;
1180 
1181     w(g, "~N~S0 int ~V0(struct SN_env * z) {");
1182     if (g->options->comments) {
1183         write_string(g, p->mode == m_forward ? " /* forwardmode */" : " /* backwardmode */");
1184     }
1185     w(g, "~N~+");
1186     if (p->amongvar_needed) w(g, "~Mint among_var;~N");
1187     g->failure_keep_count = 0;
1188     g->failure_label = x_return;
1189     g->label_used = 0;
1190     g->keep_count = 0;
1191     generate(g, p->left);
1192     w(g, "~Mreturn 1;~N~}");
1193 }
1194 
generate_substring(struct generator * g,struct node * p)1195 static void generate_substring(struct generator * g, struct node * p) {
1196 
1197     struct among * x = p->among;
1198     int block = -1;
1199     unsigned int bitmap = 0;
1200     struct amongvec * among_cases = x->b;
1201     int c;
1202     int empty_case = -1;
1203     int n_cases = 0;
1204     symbol cases[2];
1205     int shortest_size = INT_MAX;
1206     int shown_comment = 0;
1207 
1208     g->S[0] = p->mode == m_forward ? "" : "_b";
1209     g->I[0] = x->number;
1210     g->I[1] = x->literalstring_count;
1211 
1212     /* In forward mode with non-ASCII UTF-8 characters, the first byte
1213      * of the string will often be the same, so instead look at the last
1214      * common byte position.
1215      *
1216      * In backward mode, we can't match if there are fewer characters before
1217      * the current position than the minimum length.
1218      */
1219     for (c = 0; c < x->literalstring_count; ++c) {
1220         int size = among_cases[c].size;
1221         if (size != 0 && size < shortest_size) {
1222             shortest_size = size;
1223         }
1224     }
1225 
1226     for (c = 0; c < x->literalstring_count; ++c) {
1227         symbol ch;
1228         if (among_cases[c].size == 0) {
1229             empty_case = c;
1230             continue;
1231         }
1232         if (p->mode == m_forward) {
1233             ch = among_cases[c].b[shortest_size - 1];
1234         } else {
1235             ch = among_cases[c].b[among_cases[c].size - 1];
1236         }
1237         if (n_cases == 0) {
1238             block = ch >> 5;
1239         } else if (ch >> 5 != block) {
1240             block = -1;
1241             if (n_cases > 2) break;
1242         }
1243         if (block == -1) {
1244             if (n_cases > 0 && ch == cases[0]) continue;
1245             if (n_cases < 2) {
1246                 cases[n_cases++] = ch;
1247             } else if (ch != cases[1]) {
1248                 ++n_cases;
1249                 break;
1250             }
1251         } else {
1252             if ((bitmap & (1u << (ch & 0x1f))) == 0) {
1253                 bitmap |= 1u << (ch & 0x1f);
1254                 if (n_cases < 2)
1255                     cases[n_cases] = ch;
1256                 ++n_cases;
1257             }
1258         }
1259     }
1260 
1261     if (block != -1 || n_cases <= 2) {
1262         char buf[64];
1263         g->I[2] = block;
1264         g->I[3] = bitmap;
1265         g->I[4] = shortest_size - 1;
1266         if (p->mode == m_forward) {
1267             sprintf(buf, "z->p[z->c + %d]", shortest_size - 1);
1268             g->S[1] = buf;
1269             if (shortest_size == 1) {
1270                 writef(g, "~Mif (z->c >= z->l", p);
1271             } else {
1272                 writef(g, "~Mif (z->c + ~I4 >= z->l", p);
1273             }
1274         } else {
1275             g->S[1] = "z->p[z->c - 1]";
1276             if (shortest_size == 1) {
1277                 writef(g, "~Mif (z->c <= z->lb", p);
1278             } else {
1279                 writef(g, "~Mif (z->c - ~I4 <= z->lb", p);
1280             }
1281         }
1282         if (n_cases == 0) {
1283             /* We get this for the degenerate case: among ( '' )
1284              * This doesn't seem to be a useful construct, but it is
1285              * syntactically valid.
1286              */
1287         } else if (n_cases == 1) {
1288             g->I[4] = cases[0];
1289             writef(g, " || ~S1 != ~I4", p);
1290         } else if (n_cases == 2) {
1291             g->I[4] = cases[0];
1292             g->I[5] = cases[1];
1293             writef(g, " || (~S1 != ~I4 && ~S1 != ~I5)", p);
1294         } else {
1295             writef(g, " || ~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)", p);
1296         }
1297         write_string(g, ") ");
1298         if (empty_case != -1) {
1299             /* If the among includes the empty string, it can never fail
1300              * so not matching the bitmap means we match the empty string.
1301              */
1302             g->I[4] = among_cases[empty_case].result;
1303             writef(g, "among_var = ~I4; else~C", p);
1304         } else {
1305             writef(g, "~f~C", p);
1306         }
1307         shown_comment = 1;
1308     } else {
1309 #ifdef OPTIMISATION_WARNINGS
1310         printf("Couldn't shortcut among %d\n", x->number);
1311 #endif
1312     }
1313 
1314     if (!x->amongvar_needed) {
1315         writef(g, "~Mif (!(find_among~S0(z, a_~I0, ~I1))) ~f", p);
1316         writef(g, shown_comment ? "~N" : "~C", p);
1317     } else {
1318         writef(g, "~Mamong_var = find_among~S0(z, a_~I0, ~I1);", p);
1319         writef(g, shown_comment ? "~N" : "~C", p);
1320         writef(g, "~Mif (!(among_var)) ~f~N", p);
1321     }
1322 }
1323 
generate_among(struct generator * g,struct node * p)1324 static void generate_among(struct generator * g, struct node * p) {
1325 
1326     struct among * x = p->among;
1327 
1328     if (x->substring == 0) generate_substring(g, p);
1329 
1330     if (x->starter != 0) generate(g, x->starter);
1331 
1332     if (x->command_count == 1 && x->nocommand_count == 0) {
1333         /* Only one outcome ("no match" already handled). */
1334         generate(g, x->commands[0]);
1335     } else if (x->command_count > 0) {
1336         int i;
1337         writef(g, "~Mswitch (among_var) {~C~+", p);
1338         for (i = 1; i <= x->command_count; i++) {
1339             g->I[0] = i;
1340             w(g, "~Mcase ~I0:~N~+");
1341             generate(g, x->commands[i - 1]);
1342             w(g, "~Mbreak;~N~-");
1343         }
1344         w(g, "~}");
1345     }
1346 }
1347 
generate_booltest(struct generator * g,struct node * p)1348 static void generate_booltest(struct generator * g, struct node * p) {
1349 
1350     g->V[0] = p->name;
1351     writef(g, "~Mif (!(~V0)) ~f~C", p);
1352 }
1353 
generate_false(struct generator * g,struct node * p)1354 static void generate_false(struct generator * g, struct node * p) {
1355 
1356     writef(g, "~M~f~C", p);
1357 }
1358 
generate_debug(struct generator * g,struct node * p)1359 static void generate_debug(struct generator * g, struct node * p) {
1360 
1361     g->I[0] = g->debug_count++;
1362     g->I[1] = p->line_number;
1363     writef(g, "~Mdebug(z, ~I0, ~I1);~C", p);
1364 
1365 }
1366 
generate(struct generator * g,struct node * p)1367 static void generate(struct generator * g, struct node * p) {
1368 
1369     int used = g->label_used;
1370     int a0 = g->failure_label;
1371     int a1 = g->failure_keep_count;
1372 
1373     switch (p->type) {
1374         case c_define:        generate_define(g, p); break;
1375         case c_bra:           generate_bra(g, p); break;
1376         case c_and:           generate_and(g, p); break;
1377         case c_or:            generate_or(g, p); break;
1378         case c_backwards:     generate_backwards(g, p); break;
1379         case c_not:           generate_not(g, p); break;
1380         case c_set:           generate_set(g, p); break;
1381         case c_unset:         generate_unset(g, p); break;
1382         case c_try:           generate_try(g, p); break;
1383         case c_fail:          generate_fail(g, p); break;
1384         case c_reverse:
1385         case c_test:          generate_test(g, p); break;
1386         case c_do:            generate_do(g, p); break;
1387         case c_goto:          generate_GO(g, p, 1); break;
1388         case c_gopast:        generate_GO(g, p, 0); break;
1389         case c_repeat:        generate_repeat(g, p); break;
1390         case c_loop:          generate_loop(g, p); break;
1391         case c_atleast:       generate_atleast(g, p); break;
1392         case c_setmark:       generate_setmark(g, p); break;
1393         case c_tomark:        generate_tomark(g, p); break;
1394         case c_atmark:        generate_atmark(g, p); break;
1395         case c_hop:           generate_hop(g, p); break;
1396         case c_delete:        generate_delete(g, p); break;
1397         case c_next:          generate_next(g, p); break;
1398         case c_tolimit:       generate_tolimit(g, p); break;
1399         case c_atlimit:       generate_atlimit(g, p); break;
1400         case c_leftslice:     generate_leftslice(g, p); break;
1401         case c_rightslice:    generate_rightslice(g, p); break;
1402         case c_assignto:      generate_assignto(g, p); break;
1403         case c_sliceto:       generate_sliceto(g, p); break;
1404         case c_assign:        generate_assignfrom(g, p); break;
1405         case c_insert:
1406         case c_attach:        generate_insert(g, p, p->type); break;
1407         case c_slicefrom:     generate_slicefrom(g, p); break;
1408         case c_setlimit:      generate_setlimit(g, p); break;
1409         case c_dollar:        generate_dollar(g, p); break;
1410         case c_mathassign:    generate_integer_assign(g, p, "="); break;
1411         case c_plusassign:    generate_integer_assign(g, p, "+="); break;
1412         case c_minusassign:   generate_integer_assign(g, p, "-="); break;
1413         case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
1414         case c_divideassign:  generate_integer_assign(g, p, "/="); break;
1415         case c_eq:            generate_integer_test(g, p, "=="); break;
1416         case c_ne:            generate_integer_test(g, p, "!="); break;
1417         case c_gr:            generate_integer_test(g, p, ">"); break;
1418         case c_ge:            generate_integer_test(g, p, ">="); break;
1419         case c_ls:            generate_integer_test(g, p, "<"); break;
1420         case c_le:            generate_integer_test(g, p, "<="); break;
1421         case c_call:          generate_call(g, p); break;
1422         case c_grouping:      generate_grouping(g, p, false); break;
1423         case c_non:           generate_grouping(g, p, true); break;
1424         case c_name:          generate_namedstring(g, p); break;
1425         case c_literalstring: generate_literalstring(g, p); break;
1426         case c_among:         generate_among(g, p); break;
1427         case c_substring:     generate_substring(g, p); break;
1428         case c_booltest:      generate_booltest(g, p); break;
1429         case c_false:         generate_false(g, p); break;
1430         case c_true:          break;
1431         case c_debug:         generate_debug(g, p); break;
1432         default: fprintf(stderr, "%d encountered\n", p->type);
1433                  exit(1);
1434     }
1435 
1436     if (g->failure_label != a0)
1437         g->label_used = used;
1438     g->failure_label = a0;
1439     g->failure_keep_count = a1;
1440 }
1441 
write_generated_comment_content(struct generator * g)1442 void write_generated_comment_content(struct generator * g) {
1443     w(g, "Generated by Snowball " SNOWBALL_VERSION
1444          " - https://snowballstem.org/");
1445 }
1446 
write_start_comment(struct generator * g,const char * comment_start,const char * comment_end)1447 void write_start_comment(struct generator * g,
1448                          const char * comment_start,
1449                          const char * comment_end) {
1450     write_margin(g);
1451     w(g, comment_start);
1452     write_generated_comment_content(g);
1453     if (comment_end) {
1454         w(g, comment_end);
1455     }
1456     w(g, "~N~N");
1457 }
1458 
generate_head(struct generator * g)1459 static void generate_head(struct generator * g) {
1460 
1461     w(g, "#include \"");
1462     if (g->options->runtime_path) {
1463         write_string(g, g->options->runtime_path);
1464         if (g->options->runtime_path[strlen(g->options->runtime_path) - 1] != '/')
1465             write_char(g, '/');
1466     }
1467     w(g, "header.h\"~N~N");
1468 }
1469 
generate_routine_headers(struct generator * g)1470 static void generate_routine_headers(struct generator * g) {
1471     struct name * q;
1472     for (q = g->analyser->names; q; q = q->next) {
1473         g->V[0] = q;
1474         switch (q->type) {
1475             case t_routine:
1476                 w(g, "static int ~W0(struct SN_env * z);~N");
1477                 break;
1478             case t_external:
1479                 w(g,
1480                   "#ifdef __cplusplus~N"
1481                   "extern \"C\" {~N"
1482                   "#endif~N"
1483                   "extern int ~W0(struct SN_env * z);~N"
1484                   "#ifdef __cplusplus~N"
1485                   "}~N"
1486                   "#endif~N"
1487                   );
1488                 break;
1489         }
1490     }
1491 }
1492 
generate_among_table(struct generator * g,struct among * x)1493 static void generate_among_table(struct generator * g, struct among * x) {
1494 
1495     struct amongvec * v = x->b;
1496 
1497     g->I[0] = x->number;
1498     {
1499         int i;
1500         for (i = 0; i < x->literalstring_count; i++) {
1501             g->I[1] = i;
1502             g->I[2] = v->size;
1503             g->L[0] = v->b;
1504             if (v->size)
1505                 w(g, "static const symbol s_~I0_~I1[~I2] = ~A0;~N");
1506             v++;
1507         }
1508     }
1509 
1510     g->I[1] = x->literalstring_count;
1511     w(g, "~N~Mstatic const struct among a_~I0[~I1] =~N{~N");
1512 
1513     v = x->b;
1514     {
1515         int i;
1516         for (i = 0; i < x->literalstring_count; i++) {
1517             g->I[1] = i;
1518             g->I[2] = v->size;
1519             g->I[3] = v->i;
1520             g->I[4] = v->result;
1521             g->S[0] = i < x->literalstring_count - 1 ? "," : "";
1522 
1523             if (g->options->comments) {
1524                 w(g, "/*~J1 */ ");
1525             }
1526             w(g, "{ ~I2, ");
1527             if (v->size == 0) {
1528                 w(g, "0,");
1529             } else {
1530                 w(g, "s_~I0_~I1,");
1531             }
1532             w(g, " ~I3, ~I4, ");
1533             if (v->function == 0) {
1534                 write_char(g, '0');
1535             } else {
1536                 write_varname(g, v->function);
1537             }
1538             w(g, "}~S0~N");
1539             v++;
1540         }
1541     }
1542     w(g, "};~N~N");
1543 }
1544 
generate_amongs(struct generator * g)1545 static void generate_amongs(struct generator * g) {
1546     struct among * x;
1547     for (x = g->analyser->amongs; x; x = x->next) {
1548         generate_among_table(g, x);
1549     }
1550 }
1551 
set_bit(symbol * b,int i)1552 static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
1553 
generate_grouping_table(struct generator * g,struct grouping * q)1554 static void generate_grouping_table(struct generator * g, struct grouping * q) {
1555 
1556     int range = q->largest_ch - q->smallest_ch + 1;
1557     int size = (range + 7)/ 8;  /* assume 8 bits per symbol */
1558     symbol * b = q->b;
1559     symbol * map = create_b(size);
1560     int i;
1561     for (i = 0; i < size; i++) map[i] = 0;
1562 
1563     for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1564 
1565     g->V[0] = q->name;
1566 
1567     w(g, "static const unsigned char ~V0[] = { ");
1568     for (i = 0; i < size; i++) {
1569         write_int(g, map[i]);
1570         if (i < size - 1) w(g, ", ");
1571     }
1572     w(g, " };~N~N");
1573     lose_b(map);
1574 }
1575 
generate_groupings(struct generator * g)1576 static void generate_groupings(struct generator * g) {
1577     struct grouping * q;
1578     for (q = g->analyser->groupings; q; q = q->next) {
1579         if (q->name->used)
1580             generate_grouping_table(g, q);
1581     }
1582 }
1583 
generate_create(struct generator * g)1584 static void generate_create(struct generator * g) {
1585 
1586     int * p = g->analyser->name_count;
1587     g->I[0] = p[t_string];
1588     g->I[1] = p[t_integer] + p[t_boolean];
1589     w(g, "~N"
1590          "extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1); }"
1591          "~N");
1592 }
1593 
generate_close(struct generator * g)1594 static void generate_close(struct generator * g) {
1595 
1596     int * p = g->analyser->name_count;
1597     g->I[0] = p[t_string];
1598     w(g, "~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N");
1599 }
1600 
generate_create_and_close_templates(struct generator * g)1601 static void generate_create_and_close_templates(struct generator * g) {
1602     w(g, "~N"
1603          "extern struct SN_env * ~pcreate_env(void);~N"
1604          "extern void ~pclose_env(struct SN_env * z);~N"
1605          "~N");
1606 }
1607 
generate_header_file(struct generator * g)1608 static void generate_header_file(struct generator * g) {
1609 
1610     struct name * q;
1611     const char * vp = g->options->variables_prefix;
1612     g->S[0] = vp;
1613 
1614     w(g, "#ifdef __cplusplus~N"
1615          "extern \"C\" {~N"
1616          "#endif~N");            /* for C++ */
1617 
1618     generate_create_and_close_templates(g);
1619     for (q = g->analyser->names; q; q = q->next) {
1620         g->V[0] = q;
1621         switch (q->type) {
1622             case t_external:
1623                 w(g, "extern int ~W0(struct SN_env * z);~N");
1624                 break;
1625             case t_string:
1626             case t_integer:
1627             case t_boolean:
1628                 if (vp) {
1629                     int count = q->count;
1630                     if (count < 0) {
1631                         /* Unused variables should get removed from `names`. */
1632                         fprintf(stderr, "Optimised out variable ");
1633                         report_b(stderr, q->b);
1634                         fprintf(stderr, " still in names list\n");
1635                         exit(1);
1636                     }
1637                     if (q->type == t_boolean) {
1638                         /* We use a single array for booleans and integers,
1639                          * with the integers first.
1640                          */
1641                         count += g->analyser->name_count[t_integer];
1642                     }
1643                     g->I[0] = count;
1644                     g->I[1] = "SIIrxg"[q->type];
1645                     w(g, "#define ~S0");
1646                     write_b(g, q->b);
1647                     w(g, " (~c1[~I0])~N");
1648                 }
1649                 break;
1650         }
1651     }
1652 
1653     w(g, "~N"
1654          "#ifdef __cplusplus~N"
1655          "}~N"
1656          "#endif~N");            /* for C++ */
1657 
1658     w(g, "~N");
1659 }
1660 
generate_program_c(struct generator * g)1661 extern void generate_program_c(struct generator * g) {
1662 
1663     g->outbuf = str_new();
1664     write_start_comment(g, "/* ", " */");
1665     generate_head(g);
1666     generate_routine_headers(g);
1667     w(g, "#ifdef __cplusplus~N"
1668          "extern \"C\" {~N"
1669          "#endif~N"
1670          "~N");
1671     generate_create_and_close_templates(g);
1672     w(g, "~N"
1673          "#ifdef __cplusplus~N"
1674          "}~N"
1675          "#endif~N");
1676     generate_amongs(g);
1677     generate_groupings(g);
1678     g->declarations = g->outbuf;
1679     g->outbuf = str_new();
1680     g->literalstring_count = 0;
1681     {
1682         struct node * p = g->analyser->program;
1683         while (p) { generate(g, p); p = p->right; }
1684     }
1685     generate_create(g);
1686     generate_close(g);
1687     output_str(g->options->output_src, g->declarations);
1688     str_delete(g->declarations);
1689     output_str(g->options->output_src, g->outbuf);
1690     str_clear(g->outbuf);
1691 
1692     write_start_comment(g, "/* ", " */");
1693     generate_header_file(g);
1694     output_str(g->options->output_h, g->outbuf);
1695     str_delete(g->outbuf);
1696 }
1697 
1698 /* Generator functions common to multiple languages. */
1699 
create_generator(struct analyser * a,struct options * o)1700 extern struct generator * create_generator(struct analyser * a, struct options * o) {
1701     NEW(generator, g);
1702     g->analyser = a;
1703     g->options = o;
1704     g->margin = 0;
1705     g->debug_count = 0;
1706     g->copy_from_count = 0;
1707     g->line_count = 0;
1708     g->line_labelled = 0;
1709     g->failure_label = -1;
1710     g->unreachable = false;
1711 #ifndef DISABLE_PYTHON
1712     g->max_label = 0;
1713 #endif
1714     return g;
1715 }
1716 
close_generator(struct generator * g)1717 extern void close_generator(struct generator * g) {
1718     FREE(g);
1719 }
1720 
1721 /* Write routines for simple entities */
1722 
write_char(struct generator * g,int ch)1723 extern void write_char(struct generator * g, int ch) {
1724     str_append_ch(g->outbuf, ch); /* character */
1725 }
1726 
write_newline(struct generator * g)1727 extern void write_newline(struct generator * g) {
1728     str_append_ch(g->outbuf, '\n'); /* newline */
1729     g->line_count++;
1730 }
1731 
write_string(struct generator * g,const char * s)1732 extern void write_string(struct generator * g, const char * s) {
1733     str_append_string(g->outbuf, s);
1734 }
1735 
write_int(struct generator * g,int i)1736 extern void write_int(struct generator * g, int i) {
1737     str_append_int(g->outbuf, i);
1738 }
1739 
write_b(struct generator * g,symbol * b)1740 extern void write_b(struct generator * g, symbol * b) {
1741 
1742     str_append_b(g->outbuf, b);
1743 }
1744 
write_str(struct generator * g,struct str * str)1745 extern void write_str(struct generator * g, struct str * str) {
1746 
1747     str_append(g->outbuf, str);
1748 }
1749