1
2 #include <limits.h> /* for INT_MAX */
3 #include <stdio.h> /* for fprintf etc */
4 #include <stdlib.h> /* for free etc */
5 #include <string.h> /* for strlen */
6 #include "header.h"
7
8 /* Define this to get warning messages when optimisations can't be used. */
9 /* #define OPTIMISATION_WARNINGS */
10
11 /* recursive use: */
12
13 static void generate(struct generator * g, struct node * p);
14
new_label(struct generator * g)15 static int new_label(struct generator * g) {
16 return g->next_label++;
17 }
18
19 /* Write routines for simple entities */
20
21 /* Write a space if the preceding character was not whitespace */
ws_opt_space(struct generator * g,const char * s)22 static void ws_opt_space(struct generator * g, const char * s) {
23 int ch = str_back(g->outbuf);
24 if (ch != ' ' && ch != '\n' && ch != '\t' && ch != -1)
25 write_char(g, ' ');
26 write_string(g, s);
27 }
28
wi3(struct generator * g,int i)29 static void wi3(struct generator * g, int i) {
30 if (i < 100) write_char(g, ' ');
31 if (i < 10) write_char(g, ' ');
32 write_int(g, i); /* integer (width 3) */
33 }
34
35
36 /* Write routines for items from the syntax tree */
37
write_varname(struct generator * g,struct name * p)38 static void write_varname(struct generator * g, struct name * p) {
39
40 int ch = "SIIrxg"[p->type];
41 switch (p->type) {
42 case t_external:
43 write_string(g, g->options->externals_prefix); break;
44 case t_string:
45 case t_boolean:
46 case t_integer: {
47 int count = p->count;
48 if (count < 0) {
49 fprintf(stderr, "Reference to optimised out variable ");
50 report_b(stderr, p->b);
51 fprintf(stderr, " attempted\n");
52 exit(1);
53 }
54 if (p->type == t_boolean) {
55 /* We use a single array for booleans and integers, with the
56 * integers first.
57 */
58 count += g->analyser->name_count[t_integer];
59 }
60 write_char(g, ch);
61 write_char(g, '[');
62 write_int(g, count);
63 write_char(g, ']');
64 return;
65 }
66 default:
67 write_char(g, ch); write_char(g, '_');
68 }
69 write_b(g, p->b);
70 }
71
write_varref(struct generator * g,struct name * p)72 static void write_varref(struct generator * g, struct name * p) { /* reference to variable */
73 if (p->type < t_routine) write_string(g, "z->");
74 write_varname(g, p);
75 }
76
write_hexdigit(struct generator * g,int i)77 static void write_hexdigit(struct generator * g, int i) {
78 str_append_ch(g->outbuf, "0123456789ABCDEF"[i & 0xF]); /* hexchar */
79 }
80
write_hex(struct generator * g,int i)81 static void write_hex(struct generator * g, int i) {
82 if (i >> 4) write_hex(g, i >> 4);
83 write_hexdigit(g, i); /* hex integer */
84 }
85
86 /* write character literal */
wlitch(struct generator * g,int ch)87 static void wlitch(struct generator * g, int ch) {
88 if (32 <= ch && ch < 127) {
89 write_char(g, '\'');
90 if (ch == '\'' || ch == '\\') {
91 write_char(g, '\\');
92 }
93 write_char(g, ch);
94 write_char(g, '\'');
95 } else {
96 write_string(g, "0x"); write_hex(g, ch);
97 }
98 }
99
wlitarray(struct generator * g,symbol * p)100 static void wlitarray(struct generator * g, symbol * p) { /* write literal array */
101
102 write_string(g, "{ ");
103 {
104 int i;
105 for (i = 0; i < SIZE(p); i++) {
106 wlitch(g, p[i]);
107 if (i < SIZE(p) - 1) write_string(g, ", ");
108 }
109 }
110 write_string(g, " }");
111 }
112
wlitref(struct generator * g,symbol * p)113 static void wlitref(struct generator * g, symbol * p) { /* write ref to literal array */
114
115 if (SIZE(p) == 0) {
116 write_char(g, '0');
117 } else {
118 struct str * s = g->outbuf;
119 g->outbuf = g->declarations;
120 write_string(g, "static const symbol s_"); write_int(g, g->literalstring_count); write_string(g, "[] = ");
121 wlitarray(g, p);
122 write_string(g, ";\n");
123 g->outbuf = s;
124 write_string(g, "s_"); write_int(g, g->literalstring_count);
125 g->literalstring_count++;
126 }
127 }
128
write_margin(struct generator * g)129 static void write_margin(struct generator * g) {
130 int i;
131 for (i = 0; i < g->margin; i++) write_string(g, " ");
132 }
133
write_comment_content(struct generator * g,struct node * p)134 void write_comment_content(struct generator * g, struct node * p) {
135 switch (p->type) {
136 case c_mathassign:
137 case c_plusassign:
138 case c_minusassign:
139 case c_multiplyassign:
140 case c_divideassign:
141 if (p->name) {
142 write_char(g, '$');
143 write_b(g, p->name->b);
144 write_char(g, ' ');
145 }
146 write_string(g, name_of_token(p->type));
147 write_string(g, " <integer expression>");
148 break;
149 case c_eq:
150 case c_ne:
151 case c_gr:
152 case c_ge:
153 case c_ls:
154 case c_le:
155 write_string(g, "$(<integer expression> ");
156 write_string(g, name_of_token(p->type));
157 write_string(g, " <integer expression>)");
158 break;
159 default:
160 write_string(g, name_of_token(p->type));
161 if (p->name) {
162 write_char(g, ' ');
163 write_b(g, p->name->b);
164 }
165 }
166 write_string(g, ", line ");
167 write_int(g, p->line_number);
168 }
169
write_comment(struct generator * g,struct node * p)170 static void write_comment(struct generator * g, struct node * p) {
171 if (g->options->comments) {
172 ws_opt_space(g, "/* ");
173 write_comment_content(g, p);
174 write_string(g, " */");
175 }
176 write_newline(g);
177 }
178
wms(struct generator * g,const char * s)179 static void wms(struct generator * g, const char * s) {
180 write_margin(g); write_string(g, s); } /* margin + string */
181
write_block_start(struct generator * g)182 static void write_block_start(struct generator * g) { /* block start */
183 wms(g, "{ ");
184 g->margin++;
185 }
186
write_block_end(struct generator * g)187 static void write_block_end(struct generator * g) { /* block end */
188
189 if (g->line_labelled == g->line_count) { wms(g, ";"); write_newline(g); }
190 g->margin--;
191 wms(g, "}"); write_newline(g);
192 }
193
194 static void w(struct generator * g, const char * s);
195
196 /* keep c */
wk(struct generator * g,struct node * p,int keep_limit)197 static void wk(struct generator * g, struct node * p, int keep_limit) {
198 ++g->keep_count;
199 if (p->mode == m_forward) {
200 write_string(g, "int c");
201 write_int(g, g->keep_count);
202 write_string(g, " = z->c");
203 if (keep_limit) {
204 write_string(g, ", mlimit");
205 write_int(g, g->keep_count);
206 }
207 write_char(g, ';');
208 } else {
209 write_string(g, "int m");
210 write_int(g, g->keep_count);
211 write_string(g, " = z->l - z->c");
212 if (keep_limit) {
213 write_string(g, ", mlimit");
214 write_int(g, g->keep_count);
215 }
216 write_string(g, "; (void)m");
217 write_int(g, g->keep_count);
218 write_char(g, ';');
219 }
220 }
221
wrestore(struct generator * g,struct node * p,int keep_token)222 static void wrestore(struct generator * g, struct node * p, int keep_token) { /* restore c */
223 if (p->mode == m_forward) {
224 write_string(g, "z->c = c");
225 } else {
226 write_string(g, "z->c = z->l - m");
227 }
228 write_int(g, keep_token); write_char(g, ';');
229 }
230
wrestorelimit(struct generator * g,struct node * p,int keep_token)231 static void wrestorelimit(struct generator * g, struct node * p, int keep_token) { /* restore limit */
232 if (p->mode == m_forward) {
233 w(g, "z->l += mlimit");
234 } else {
235 w(g, "z->lb = mlimit");
236 }
237 write_int(g, keep_token); write_string(g, ";");
238 }
239
winc(struct generator * g,struct node * p)240 static void winc(struct generator * g, struct node * p) { /* increment c */
241 write_string(g, p->mode == m_forward ? "z->c++;" :
242 "z->c--;");
243 }
244
wsetl(struct generator * g,int n)245 static void wsetl(struct generator * g, int n) {
246
247 g->margin--;
248 wms(g, "lab"); write_int(g, n); write_char(g, ':'); write_newline(g);
249 g->line_labelled = g->line_count;
250 g->margin++;
251 }
252
wgotol(struct generator * g,int n)253 static void wgotol(struct generator * g, int n) {
254 wms(g, "goto lab"); write_int(g, n); write_char(g, ';'); write_newline(g);
255 }
256
write_failure(struct generator * g,struct node * p)257 static void write_failure(struct generator * g, struct node * p) { /* fail */
258 if (g->failure_keep_count != 0) {
259 write_string(g, "{ ");
260 if (g->failure_keep_count > 0) {
261 wrestore(g, p, g->failure_keep_count);
262 } else {
263 wrestorelimit(g, p, -g->failure_keep_count);
264 }
265 write_char(g, ' ');
266 }
267 switch (g->failure_label) {
268 case x_return:
269 write_string(g, "return 0;");
270 break;
271 default:
272 write_string(g, "goto lab");
273 write_int(g, g->failure_label);
274 write_char(g, ';');
275 g->label_used = 1;
276 }
277 if (g->failure_keep_count != 0) write_string(g, " }");
278 }
279
280
281 /* if at limit fail */
write_check_limit(struct generator * g,struct node * p)282 static void write_check_limit(struct generator * g, struct node * p) {
283
284 write_string(g, p->mode == m_forward ? "if (z->c >= z->l) " :
285 "if (z->c <= z->lb) ");
286 write_failure(g, p);
287 }
288
write_data_address(struct generator * g,struct node * p)289 static void write_data_address(struct generator * g, struct node * p) {
290 symbol * b = p->literalstring;
291 if (b != 0) {
292 write_int(g, SIZE(b)); w(g, ", ");
293 wlitref(g, b);
294 } else {
295 write_varref(g, p->name);
296 }
297 }
298
299 /* Formatted write. */
writef(struct generator * g,const char * input,struct node * p)300 static void writef(struct generator * g, const char * input, struct node * p) {
301 int i = 0;
302 int l = strlen(input);
303
304 while (i < l) {
305 int ch = input[i++];
306 if (ch != '~') {
307 write_char(g, ch);
308 continue;
309 }
310 switch (input[i++]) {
311 default: write_char(g, input[i - 1]); continue;
312 case 'C': write_comment(g, p); continue;
313 case 'k': wk(g, p, false); continue;
314 case 'K': wk(g, p, true); continue;
315 case 'i': winc(g, p); continue;
316 case 'l': write_check_limit(g, p); continue;
317 case 'f': write_failure(g, p); continue;
318 case 'M': write_margin(g); continue;
319 case 'N': write_newline(g); continue;
320 case '{': write_block_start(g); continue;
321 case '}': write_block_end(g); continue;
322 case 'S': write_string(g, g->S[input[i++] - '0']); continue;
323 case 'I': write_int(g, g->I[input[i++] - '0']); continue;
324 case 'J': wi3(g, g->I[input[i++] - '0']); continue;
325 case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
326 case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
327 case 'L': wlitref(g, g->L[input[i++] - '0']); continue;
328 case 'A': wlitarray(g, g->L[input[i++] - '0']); continue;
329 case 'c': wlitch(g, g->I[input[i++] - '0']); continue;
330 case 'a': write_data_address(g, p); continue;
331 case '+': g->margin++; continue;
332 case '-': g->margin--; continue;
333 case '$': /* insert_s, insert_v etc */
334 write_char(g, p->literalstring == 0 ? 'v' : 's');
335 continue;
336 case 'p': write_string(g, g->options->externals_prefix); continue;
337 }
338 }
339 }
340
w(struct generator * g,const char * s)341 static void w(struct generator * g, const char * s) {
342 writef(g, s, 0);
343 }
344
generate_AE(struct generator * g,struct node * p)345 static void generate_AE(struct generator * g, struct node * p) {
346 const char * s;
347 switch (p->type) {
348 case c_name:
349 write_varref(g, p->name); break;
350 case c_number:
351 write_int(g, p->number); break;
352 case c_maxint:
353 write_string(g, "MAXINT"); break;
354 case c_minint:
355 write_string(g, "MININT"); break;
356 case c_neg:
357 write_char(g, '-'); generate_AE(g, p->right); break;
358 case c_multiply:
359 s = " * "; goto label0;
360 case c_plus:
361 s = " + "; goto label0;
362 case c_minus:
363 s = " - "; goto label0;
364 case c_divide:
365 s = " / ";
366 label0:
367 write_char(g, '('); generate_AE(g, p->left);
368 write_string(g, s); generate_AE(g, p->right); write_char(g, ')'); break;
369 case c_cursor:
370 w(g, "z->c"); break;
371 case c_limit:
372 w(g, p->mode == m_forward ? "z->l" : "z->lb"); break;
373 case c_len:
374 if (g->options->encoding == ENC_UTF8) {
375 w(g, "len_utf8(z->p)");
376 break;
377 }
378 /* FALLTHRU */
379 case c_size:
380 w(g, "SIZE(z->p)");
381 break;
382 case c_lenof:
383 if (g->options->encoding == ENC_UTF8) {
384 g->V[0] = p->name;
385 w(g, "len_utf8(~V0)");
386 break;
387 }
388 /* FALLTHRU */
389 case c_sizeof:
390 g->V[0] = p->name;
391 w(g, "SIZE(~V0)");
392 break;
393 }
394 }
395
396 /* K_needed() tests to see if we really need to keep c. Not true when the
397 command does not touch the cursor. This and repeat_score() could be
398 elaborated almost indefinitely.
399 */
400
K_needed_(struct generator * g,struct node * p,int call_depth)401 static int K_needed_(struct generator * g, struct node * p, int call_depth) {
402 while (p) {
403 switch (p->type) {
404 case c_atlimit:
405 case c_do:
406 case c_dollar:
407 case c_leftslice:
408 case c_rightslice:
409 case c_mathassign:
410 case c_plusassign:
411 case c_minusassign:
412 case c_multiplyassign:
413 case c_divideassign:
414 case c_eq:
415 case c_ne:
416 case c_gr:
417 case c_ge:
418 case c_ls:
419 case c_le:
420 case c_sliceto:
421 case c_booltest:
422 case c_set:
423 case c_unset:
424 case c_true:
425 case c_false:
426 case c_debug:
427 break;
428
429 case c_call:
430 /* Recursive functions aren't typical in snowball programs, so
431 * make the pessimistic assumption that keep is needed if we
432 * hit a generous limit on recursion. It's not likely to make
433 * a difference to any real world program, but means we won't
434 * recurse until we run out of stack for pathological cases.
435 */
436 if (call_depth >= 100) return true;
437 if (K_needed_(g, p->name->definition, call_depth + 1))
438 return true;
439 break;
440
441 case c_bra:
442 if (K_needed_(g, p->left, call_depth)) return true;
443 break;
444
445 default: return true;
446 }
447 p = p->right;
448 }
449 return false;
450 }
451
K_needed(struct generator * g,struct node * p)452 extern int K_needed(struct generator * g, struct node * p) {
453 return K_needed_(g, p, 0);
454 }
455
repeat_score(struct generator * g,struct node * p,int call_depth)456 static int repeat_score(struct generator * g, struct node * p, int call_depth) {
457 int score = 0;
458 while (p) {
459 switch (p->type) {
460 case c_dollar:
461 case c_leftslice:
462 case c_rightslice:
463 case c_mathassign:
464 case c_plusassign:
465 case c_minusassign:
466 case c_multiplyassign:
467 case c_divideassign:
468 case c_eq:
469 case c_ne:
470 case c_gr:
471 case c_ge:
472 case c_ls:
473 case c_le:
474 case c_sliceto: /* case c_not: must not be included here! */
475 case c_debug:
476 break;
477
478 case c_call:
479 /* Recursive functions aren't typical in snowball programs, so
480 * make the pessimistic assumption that repeat requires cursor
481 * reinstatement if we hit a generous limit on recursion. It's
482 * not likely to make a difference to any real world program,
483 * but means we won't recurse until we run out of stack for
484 * pathological cases.
485 */
486 if (call_depth >= 100) {
487 return 2;
488 }
489 score += repeat_score(g, p->name->definition, call_depth + 1);
490 if (score >= 2)
491 return score;
492 break;
493
494 case c_bra:
495 score += repeat_score(g, p->left, call_depth);
496 if (score >= 2)
497 return score;
498 break;
499
500 case c_name:
501 case c_literalstring:
502 case c_next:
503 case c_grouping:
504 case c_non:
505 case c_hop:
506 if (++score >= 2)
507 return score;
508 break;
509
510 default:
511 return 2;
512 }
513 p = p->right;
514 }
515 return score;
516 }
517
518 /* tests if an expression requires cursor reinstatement in a repeat */
519
repeat_restore(struct generator * g,struct node * p)520 extern int repeat_restore(struct generator * g, struct node * p) {
521 return repeat_score(g, p, 0) >= 2;
522 }
523
generate_bra(struct generator * g,struct node * p)524 static void generate_bra(struct generator * g, struct node * p) {
525 p = p->left;
526 while (p) {
527 generate(g, p);
528 p = p->right;
529 }
530 }
531
generate_and(struct generator * g,struct node * p)532 static void generate_and(struct generator * g, struct node * p) {
533 int keep_c = 0;
534 if (K_needed(g, p->left)) {
535 writef(g, "~{~k~C", p);
536 keep_c = g->keep_count;
537 } else {
538 writef(g, "~M~C", p);
539 }
540 p = p->left;
541 while (p) {
542 generate(g, p);
543 if (keep_c && p->right != 0) {
544 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
545 }
546 p = p->right;
547 }
548 if (keep_c) w(g, "~}");
549 }
550
generate_or(struct generator * g,struct node * p)551 static void generate_or(struct generator * g, struct node * p) {
552 int keep_c = 0;
553
554 int used = g->label_used;
555 int a0 = g->failure_label;
556 int a1 = g->failure_keep_count;
557
558 int out_lab = new_label(g);
559
560 if (K_needed(g, p->left)) {
561 writef(g, "~{~k~C", p);
562 keep_c = g->keep_count;
563 } else {
564 writef(g, "~M~C", p);
565 }
566 p = p->left;
567 g->failure_keep_count = 0;
568 while (p->right) {
569 g->failure_label = new_label(g);
570 g->label_used = 0;
571 generate(g, p);
572 wgotol(g, out_lab);
573 if (g->label_used)
574 wsetl(g, g->failure_label);
575 if (keep_c) {
576 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
577 }
578 p = p->right;
579 }
580 g->label_used = used;
581 g->failure_label = a0;
582 g->failure_keep_count = a1;
583
584 generate(g, p);
585 if (keep_c) w(g, "~}");
586 wsetl(g, out_lab);
587 }
588
generate_backwards(struct generator * g,struct node * p)589 static void generate_backwards(struct generator * g, struct node * p) {
590
591 writef(g, "~Mz->lb = z->c; z->c = z->l;~C~N", p);
592 generate(g, p->left);
593 w(g, "~Mz->c = z->lb;~N");
594 }
595
596
generate_not(struct generator * g,struct node * p)597 static void generate_not(struct generator * g, struct node * p) {
598 int keep_c = 0;
599
600 int used = g->label_used;
601 int a0 = g->failure_label;
602 int a1 = g->failure_keep_count;
603
604 if (K_needed(g, p->left)) {
605 writef(g, "~{~k~C", p);
606 keep_c = g->keep_count;
607 } else {
608 writef(g, "~M~C", p);
609 }
610
611 g->failure_label = new_label(g);
612 g->label_used = 0;
613 g->failure_keep_count = 0;
614 generate(g, p->left);
615
616 {
617 int l = g->failure_label;
618 int u = g->label_used;
619
620 g->label_used = used;
621 g->failure_label = a0;
622 g->failure_keep_count = a1;
623
624 writef(g, "~M~f~N", p);
625 if (u)
626 wsetl(g, l);
627 }
628 if (keep_c) {
629 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N~}");
630 }
631 }
632
633
generate_try(struct generator * g,struct node * p)634 static void generate_try(struct generator * g, struct node * p) {
635 int keep_c = 0;
636 if (K_needed(g, p->left)) {
637 writef(g, "~{~k~C", p);
638 keep_c = g->keep_count;
639 } else {
640 writef(g, "~M~C", p);
641 }
642 g->failure_keep_count = keep_c;
643
644 g->failure_label = new_label(g);
645 g->label_used = 0;
646 generate(g, p->left);
647
648 if (g->label_used)
649 wsetl(g, g->failure_label);
650
651 if (keep_c) w(g, "~}");
652 }
653
generate_set(struct generator * g,struct node * p)654 static void generate_set(struct generator * g, struct node * p) {
655 g->V[0] = p->name; writef(g, "~M~V0 = 1;~C", p);
656 }
657
generate_unset(struct generator * g,struct node * p)658 static void generate_unset(struct generator * g, struct node * p) {
659 g->V[0] = p->name; writef(g, "~M~V0 = 0;~C", p);
660 }
661
generate_fail(struct generator * g,struct node * p)662 static void generate_fail(struct generator * g, struct node * p) {
663 generate(g, p->left);
664 writef(g, "~M~f~C", p);
665 }
666
667 /* generate_test() also implements 'reverse' */
668
generate_test(struct generator * g,struct node * p)669 static void generate_test(struct generator * g, struct node * p) {
670 int keep_c = 0;
671 if (K_needed(g, p->left)) {
672 keep_c = ++g->keep_count;
673 w(g, p->mode == m_forward ? "~{int c_test" :
674 "~{int m_test");
675 write_int(g, keep_c);
676 w(g, p->mode == m_forward ? " = z->c;" :
677 " = z->l - z->c;");
678 writef(g, "~C", p);
679 } else writef(g, "~M~C", p);
680
681 generate(g, p->left);
682
683 if (keep_c) {
684 w(g, p->mode == m_forward ? "~Mz->c = c_test" :
685 "~Mz->c = z->l - m_test");
686 write_int(g, keep_c);
687 writef(g, ";~N~}", p);
688 }
689 }
690
generate_do(struct generator * g,struct node * p)691 static void generate_do(struct generator * g, struct node * p) {
692 int keep_c = 0;
693 if (K_needed(g, p->left)) {
694 writef(g, "~{~k~C", p);
695 keep_c = g->keep_count;
696 } else {
697 writef(g, "~M~C", p);
698 }
699
700 if (p->left->type == c_call) {
701 /* Optimise do <call> */
702 g->V[0] = p->left->name;
703 writef(g, "~{int ret = ~V0(z);~C", p->left);
704 w(g, "~Mif (ret < 0) return ret;~N~}");
705 } else {
706 g->failure_label = new_label(g);
707 g->label_used = 0;
708 g->failure_keep_count = 0;
709 generate(g, p->left);
710
711 if (g->label_used)
712 wsetl(g, g->failure_label);
713 }
714 if (keep_c) {
715 w(g, "~M"); wrestore(g, p, keep_c);
716 w(g, "~N~}");
717 }
718 }
719
generate_next(struct generator * g,struct node * p)720 static void generate_next(struct generator * g, struct node * p) {
721 if (g->options->encoding == ENC_UTF8) {
722 if (p->mode == m_forward)
723 w(g, "~{int ret = skip_utf8(z->p, z->c, z->l, 1");
724 else
725 w(g, "~{int ret = skip_b_utf8(z->p, z->c, z->lb, 1");
726 writef(g, ");~N"
727 "~Mif (ret < 0) ~f~N"
728 "~Mz->c = ret;~C"
729 "~}", p);
730 } else
731 writef(g, "~M~l~N"
732 "~M~i~C", p);
733 }
734
generate_GO_grouping(struct generator * g,struct node * p,int is_goto,int complement)735 static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
736
737 struct grouping * q = p->name->grouping;
738 g->S[0] = p->mode == m_forward ? "" : "_b";
739 g->S[1] = complement ? "in" : "out";
740 g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
741 g->V[0] = p->name;
742 g->I[0] = q->smallest_ch;
743 g->I[1] = q->largest_ch;
744 if (is_goto) {
745 writef(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1) < 0) ~f~C", p);
746 } else {
747 writef(g, "~{~C"
748 "~Mint ret = ~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1);~N"
749 "~Mif (ret < 0) ~f~N", p);
750 if (p->mode == m_forward)
751 w(g, "~Mz->c += ret;~N");
752 else
753 w(g, "~Mz->c -= ret;~N");
754 w(g, "~}");
755 }
756 }
757
generate_GO(struct generator * g,struct node * p,int style)758 static void generate_GO(struct generator * g, struct node * p, int style) {
759 int keep_c = 0;
760
761 int used = g->label_used;
762 int a0 = g->failure_label;
763 int a1 = g->failure_keep_count;
764
765 if (p->left->type == c_grouping || p->left->type == c_non) {
766 /* Special case for "goto" or "gopast" when used on a grouping or an
767 * inverted grouping - the movement of c by the matching action is
768 * exactly what we want! */
769 #ifdef OPTIMISATION_WARNINGS
770 printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
771 #endif
772 if (g->options->comments) {
773 writef(g, "~M~C", p);
774 }
775 generate_GO_grouping(g, p->left, style, p->left->type == c_non);
776 return;
777 }
778
779 w(g, "~Mwhile(1) {"); writef(g, "~C~+", p);
780
781 if (style == 1 || repeat_restore(g, p->left)) {
782 writef(g, "~M~k~N", p);
783 keep_c = g->keep_count;
784 }
785
786 g->failure_label = new_label(g);
787 g->label_used = 0;
788 g->failure_keep_count = 0;
789 generate(g, p->left);
790
791 if (style == 1) {
792 /* include for goto; omit for gopast */
793 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
794 }
795 w(g, "~Mbreak;~N");
796 if (g->label_used)
797 wsetl(g, g->failure_label);
798 if (keep_c) {
799 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
800 }
801
802 g->label_used = used;
803 g->failure_label = a0;
804 g->failure_keep_count = a1;
805
806 generate_next(g, p);
807 w(g, "~}");
808 }
809
generate_loop(struct generator * g,struct node * p)810 static void generate_loop(struct generator * g, struct node * p) {
811 w(g, "~{int i; for (i = "); generate_AE(g, p->AE); writef(g, "; i > 0; i--)~C"
812 "~{", p);
813
814 generate(g, p->left);
815
816 w(g, "~}"
817 "~}");
818 }
819
generate_repeat_or_atleast(struct generator * g,struct node * p,int atleast_case)820 static void generate_repeat_or_atleast(struct generator * g, struct node * p, int atleast_case) {
821 int keep_c = 0;
822 if (atleast_case) {
823 writef(g, "~Mwhile(1) {~+~N", p);
824 } else {
825 writef(g, "~Mwhile(1) {~+~C", p);
826 }
827
828 if (repeat_restore(g, p->left)) {
829 writef(g, "~M~k~N", p);
830 keep_c = g->keep_count;
831 }
832
833 g->failure_label = new_label(g);
834 g->label_used = 0;
835 g->failure_keep_count = 0;
836 generate(g, p->left);
837
838 if (atleast_case) w(g, "~Mi--;~N");
839
840 w(g, "~Mcontinue;~N");
841 if (g->label_used)
842 wsetl(g, g->failure_label);
843
844 if (keep_c) {
845 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
846 }
847
848 w(g, "~Mbreak;~N"
849 "~}");
850 }
851
generate_repeat(struct generator * g,struct node * p)852 static void generate_repeat(struct generator * g, struct node * p) {
853 generate_repeat_or_atleast(g, p, false);
854 }
855
generate_atleast(struct generator * g,struct node * p)856 static void generate_atleast(struct generator * g, struct node * p) {
857 w(g, "~{int i = "); generate_AE(g, p->AE); w(g, ";~C");
858 {
859 int used = g->label_used;
860 int a0 = g->failure_label;
861 int a1 = g->failure_keep_count;
862
863 generate_repeat_or_atleast(g, p, true);
864
865 g->label_used = used;
866 g->failure_label = a0;
867 g->failure_keep_count = a1;
868 }
869 writef(g, "~Mif (i > 0) ~f~N"
870 "~}", p);
871 }
872
generate_setmark(struct generator * g,struct node * p)873 static void generate_setmark(struct generator * g, struct node * p) {
874 g->V[0] = p->name;
875 writef(g, "~M~V0 = z->c;~C", p);
876 }
877
generate_tomark(struct generator * g,struct node * p)878 static void generate_tomark(struct generator * g, struct node * p) {
879 g->S[0] = p->mode == m_forward ? ">" : "<";
880
881 w(g, "~Mif (z->c ~S0 "); generate_AE(g, p->AE); writef(g, ") ~f~N", p);
882 w(g, "~Mz->c = "); generate_AE(g, p->AE); writef(g, ";~C", p);
883 }
884
generate_atmark(struct generator * g,struct node * p)885 static void generate_atmark(struct generator * g, struct node * p) {
886
887 w(g, "~Mif (z->c != "); generate_AE(g, p->AE); writef(g, ") ~f~C", p);
888 }
889
generate_hop(struct generator * g,struct node * p)890 static void generate_hop(struct generator * g, struct node * p) {
891 if (g->options->encoding == ENC_UTF8) {
892 g->S[0] = p->mode == m_forward ? "" : "_b";
893 g->S[1] = p->mode == m_forward ? "z->l" : "z->lb";
894 w(g, "~{int ret = skip~S0_utf8(z->p, z->c, ~S1, ");
895 generate_AE(g, p->AE);
896 writef(g, ");~C", p);
897 writef(g, "~Mif (ret < 0) ~f~N", p);
898 writef(g, "~Mz->c = ret;~N"
899 "~}", p);
900 } else {
901 // Fixed-width characters.
902 g->S[0] = p->mode == m_forward ? "+" : "-";
903 if (p->AE->type == c_number) {
904 // Constant distance hop.
905 //
906 // No need to check for negative hop as that's converted to false by
907 // the analyser.
908 //
909 // Note that if we signal f then z->c will be reset when this is
910 // handled - we rely on this here and unconditionally update z->c.
911 w(g, "z->c = z->c ~S0 ");
912 generate_AE(g, p->AE);
913 w(g, ";~C");
914 if (p->mode == m_forward) {
915 writef(g, "~Mif (z->c > z->l) ~f~N", p);
916 } else {
917 writef(g, "~Mif (z->c < z->lb) ~f~N", p);
918 }
919 } else {
920 w(g, "~{int ret = z->c ~S0 ");
921 generate_AE(g, p->AE);
922 writef(g, ";~C", p);
923 if (p->mode == m_forward) {
924 writef(g, "~Mif (ret > z->l || ret < z->c) ~f~N", p);
925 } else {
926 writef(g, "~Mif (ret < z->lb || ret > z->c) ~f~N", p);
927 }
928 writef(g, "~Mz->c = ret;~N"
929 "~}", p);
930 }
931 }
932 }
933
generate_delete(struct generator * g,struct node * p)934 static void generate_delete(struct generator * g, struct node * p) {
935 writef(g, "~{int ret = slice_del(z);~C", p);
936 writef(g, "~Mif (ret < 0) return ret;~N"
937 "~}", p);
938 }
939
generate_tolimit(struct generator * g,struct node * p)940 static void generate_tolimit(struct generator * g, struct node * p) {
941 g->S[0] = p->mode == m_forward ? "" : "b";
942 writef(g, "~Mz->c = z->l~S0;~C", p);
943 }
944
generate_atlimit(struct generator * g,struct node * p)945 static void generate_atlimit(struct generator * g, struct node * p) {
946 g->S[0] = p->mode == m_forward ? "" : "b";
947 g->S[1] = p->mode == m_forward ? "<" : ">";
948 writef(g, "~Mif (z->c ~S1 z->l~S0) ~f~C", p);
949 }
950
generate_leftslice(struct generator * g,struct node * p)951 static void generate_leftslice(struct generator * g, struct node * p) {
952 g->S[0] = p->mode == m_forward ? "bra" : "ket";
953 writef(g, "~Mz->~S0 = z->c;~C", p);
954 }
955
generate_rightslice(struct generator * g,struct node * p)956 static void generate_rightslice(struct generator * g, struct node * p) {
957 g->S[0] = p->mode == m_forward ? "ket" : "bra";
958 writef(g, "~Mz->~S0 = z->c;~C", p);
959 }
960
generate_assignto(struct generator * g,struct node * p)961 static void generate_assignto(struct generator * g, struct node * p) {
962 g->V[0] = p->name;
963 writef(g, "~M~V0 = assign_to(z, ~V0);~C"
964 "~Mif (~V0 == 0) return -1;~C", p);
965 }
966
generate_sliceto(struct generator * g,struct node * p)967 static void generate_sliceto(struct generator * g, struct node * p) {
968 g->V[0] = p->name;
969 writef(g, "~M~V0 = slice_to(z, ~V0);~C"
970 "~Mif (~V0 == 0) return -1;~N", p);
971 }
972
generate_insert(struct generator * g,struct node * p,int style)973 static void generate_insert(struct generator * g, struct node * p, int style) {
974
975 int keep_c = style == c_attach;
976 if (p->mode == m_backward) keep_c = !keep_c;
977 writef(g, "~{int ret;~N", p);
978 if (keep_c) w(g, "~{int saved_c = z->c;~N");
979 writef(g, "~Mret = insert_~$(z, z->c, z->c, ~a);~C", p);
980 if (keep_c) w(g, "~Mz->c = saved_c;~N~}");
981 writef(g, "~Mif (ret < 0) return ret;~N"
982 "~}", p);
983 }
984
generate_assignfrom(struct generator * g,struct node * p)985 static void generate_assignfrom(struct generator * g, struct node * p) {
986
987 int keep_c = p->mode == m_forward; /* like 'attach' */
988 writef(g, "~{int ret;~N", p);
989 if (keep_c) writef(g, "~{int saved_c = z->c;~N", p);
990 w(g, "~Mret = ");
991 writef(g, keep_c ? "insert_~$(z, z->c, z->l, ~a);~C" : "insert_~$(z, z->lb, z->c, ~a);~C", p);
992 if (keep_c) w(g, "~Mz->c = saved_c;~N~}");
993 writef(g, "~Mif (ret < 0) return ret;~N"
994 "~}", p);
995 }
996
generate_slicefrom(struct generator * g,struct node * p)997 static void generate_slicefrom(struct generator * g, struct node * p) {
998 writef(g, "~{int ret = slice_from_~$(z, ~a);~C", p);
999 writef(g, "~Mif (ret < 0) return ret;~N"
1000 "~}", p);
1001 }
1002
generate_setlimit(struct generator * g,struct node * p)1003 static void generate_setlimit(struct generator * g, struct node * p) {
1004 int keep_c;
1005 if (p->left && p->left->type == c_tomark) {
1006 /* Special case for:
1007 *
1008 * setlimit tomark AE for C
1009 *
1010 * All uses of setlimit in the current stemmers we ship follow this
1011 * pattern, and by special-casing we can avoid having to save and
1012 * restore c.
1013 */
1014 struct node * q = p->left;
1015
1016 ++g->keep_count;
1017 writef(g, "~N~{int mlimit", p);
1018 write_int(g, g->keep_count);
1019 writef(g, ";~C", p);
1020 keep_c = g->keep_count;
1021
1022 g->S[0] = q->mode == m_forward ? ">" : "<";
1023
1024 w(g, "~Mif (z->c ~S0 "); generate_AE(g, q->AE); writef(g, ") ~f~N", q);
1025 w(g, "~Mmlimit");
1026 write_int(g, keep_c);
1027 if (p->mode == m_forward) {
1028 w(g, " = z->l - z->c; z->l = ");
1029 } else {
1030 w(g, " = z->lb; z->lb = ");
1031 }
1032 generate_AE(g, q->AE);
1033 w(g, ";~N");
1034 } else {
1035 writef(g, "~{~K~C", p);
1036 keep_c = g->keep_count;
1037 generate(g, p->left);
1038
1039 w(g, "~Mmlimit");
1040 write_int(g, keep_c);
1041 if (p->mode == m_forward)
1042 w(g, " = z->l - z->c; z->l = z->c;~N");
1043 else
1044 w(g, " = z->lb; z->lb = z->c;~N");
1045 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
1046 }
1047
1048 g->failure_keep_count = -keep_c;
1049 generate(g, p->aux);
1050 w(g, "~M");
1051 wrestorelimit(g, p, -g->failure_keep_count);
1052 w(g, "~N"
1053 "~}");
1054 }
1055
1056 /* dollar sets snowball up to operate on a string variable as if it were the
1057 * current string */
generate_dollar(struct generator * g,struct node * p)1058 static void generate_dollar(struct generator * g, struct node * p) {
1059
1060 int used = g->label_used;
1061 int a0 = g->failure_label;
1062 int a1 = g->failure_keep_count;
1063 int keep_token;
1064 g->failure_label = new_label(g);
1065 g->label_used = 0;
1066 g->failure_keep_count = 0;
1067
1068 keep_token = ++g->keep_count;
1069 g->I[0] = keep_token;
1070 writef(g, "~{struct SN_env env~I0 = * z;~C", p);
1071 g->V[0] = p->name;
1072 /* Assume failure. */
1073 writef(g, "~Mint failure = 1;~N"
1074 "~Mz->p = ~V0;~N"
1075 "~Mz->lb = z->c = 0;~N"
1076 "~Mz->l = SIZE(z->p);~N", p);
1077 generate(g, p->left);
1078 /* Mark success. */
1079 w(g, "~Mfailure = 0;~N");
1080 if (g->label_used)
1081 wsetl(g, g->failure_label);
1082 g->V[0] = p->name; /* necessary */
1083
1084 g->label_used = used;
1085 g->failure_label = a0;
1086 g->failure_keep_count = a1;
1087
1088 g->I[0] = keep_token;
1089 writef(g, "~M~V0 = z->p;~N"
1090 "~M* z = env~I0;~N"
1091 "~Mif (failure) ~f~N~}", p);
1092 }
1093
generate_integer_assign(struct generator * g,struct node * p,char * s)1094 static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
1095
1096 g->V[0] = p->name;
1097 g->S[0] = s;
1098 w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); writef(g, ";~C", p);
1099 }
1100
generate_integer_test(struct generator * g,struct node * p,char * s)1101 static void generate_integer_test(struct generator * g, struct node * p, char * s) {
1102
1103 w(g, "~Mif (!(");
1104 generate_AE(g, p->left);
1105 write_char(g, ' ');
1106 write_string(g, s);
1107 write_char(g, ' ');
1108 generate_AE(g, p->AE);
1109 writef(g, ")) ~f~C", p);
1110 }
1111
generate_call(struct generator * g,struct node * p)1112 static void generate_call(struct generator * g, struct node * p) {
1113
1114 g->V[0] = p->name;
1115 writef(g, "~{int ret = ~V0(z);~C", p);
1116 if (g->failure_keep_count == 0 && g->failure_label == x_return) {
1117 /* Combine the two tests in this special case for better optimisation
1118 * and clearer generated code. */
1119 writef(g, "~Mif (ret <= 0) return ret;~N~}", p);
1120 } else {
1121 writef(g, "~Mif (ret == 0) ~f~N"
1122 "~Mif (ret < 0) return ret;~N~}", p);
1123 }
1124 }
1125
generate_grouping(struct generator * g,struct node * p,int complement)1126 static void generate_grouping(struct generator * g, struct node * p, int complement) {
1127
1128 struct grouping * q = p->name->grouping;
1129 g->S[0] = p->mode == m_forward ? "" : "_b";
1130 g->S[1] = complement ? "out" : "in";
1131 g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
1132 g->V[0] = p->name;
1133 g->I[0] = q->smallest_ch;
1134 g->I[1] = q->largest_ch;
1135 writef(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 0)) ~f~C", p);
1136 }
1137
generate_namedstring(struct generator * g,struct node * p)1138 static void generate_namedstring(struct generator * g, struct node * p) {
1139
1140 g->S[0] = p->mode == m_forward ? "" : "_b";
1141 g->V[0] = p->name;
1142 writef(g, "~Mif (!(eq_v~S0(z, ~V0))) ~f~C", p);
1143 }
1144
generate_literalstring(struct generator * g,struct node * p)1145 static void generate_literalstring(struct generator * g, struct node * p) {
1146 symbol * b = p->literalstring;
1147 if (SIZE(b) == 1) {
1148 /* It's quite common to compare with a single character literal string,
1149 * so just inline the simpler code for this case rather than making a
1150 * function call. In UTF-8 mode, only do this for the ASCII subset,
1151 * since multi-byte characters are more complex to test against.
1152 */
1153 if (g->options->encoding == ENC_UTF8 && *b >= 128) {
1154 printf("single byte %d\n", *b);
1155 exit(1);
1156 }
1157 g->I[0] = *b;
1158 if (p->mode == m_forward) {
1159 writef(g, "~Mif (z->c == z->l || z->p[z->c] != ~c0) ~f~C"
1160 "~Mz->c++;~N", p);
1161 } else {
1162 writef(g, "~Mif (z->c <= z->lb || z->p[z->c - 1] != ~c0) ~f~C"
1163 "~Mz->c--;~N", p);
1164 }
1165 } else {
1166 g->S[0] = p->mode == m_forward ? "" : "_b";
1167 g->I[0] = SIZE(b);
1168 g->L[0] = b;
1169
1170 writef(g, "~Mif (!(eq_s~S0(z, ~I0, ~L0))) ~f~C", p);
1171 }
1172 }
1173
generate_define(struct generator * g,struct node * p)1174 static void generate_define(struct generator * g, struct node * p) {
1175 struct name * q = p->name;
1176 g->next_label = 0;
1177
1178 g->S[0] = q->type == t_routine ? "static" : "extern";
1179 g->V[0] = q;
1180
1181 w(g, "~N~S0 int ~V0(struct SN_env * z) {");
1182 if (g->options->comments) {
1183 write_string(g, p->mode == m_forward ? " /* forwardmode */" : " /* backwardmode */");
1184 }
1185 w(g, "~N~+");
1186 if (p->amongvar_needed) w(g, "~Mint among_var;~N");
1187 g->failure_keep_count = 0;
1188 g->failure_label = x_return;
1189 g->label_used = 0;
1190 g->keep_count = 0;
1191 generate(g, p->left);
1192 w(g, "~Mreturn 1;~N~}");
1193 }
1194
generate_substring(struct generator * g,struct node * p)1195 static void generate_substring(struct generator * g, struct node * p) {
1196
1197 struct among * x = p->among;
1198 int block = -1;
1199 unsigned int bitmap = 0;
1200 struct amongvec * among_cases = x->b;
1201 int c;
1202 int empty_case = -1;
1203 int n_cases = 0;
1204 symbol cases[2];
1205 int shortest_size = INT_MAX;
1206 int shown_comment = 0;
1207
1208 g->S[0] = p->mode == m_forward ? "" : "_b";
1209 g->I[0] = x->number;
1210 g->I[1] = x->literalstring_count;
1211
1212 /* In forward mode with non-ASCII UTF-8 characters, the first byte
1213 * of the string will often be the same, so instead look at the last
1214 * common byte position.
1215 *
1216 * In backward mode, we can't match if there are fewer characters before
1217 * the current position than the minimum length.
1218 */
1219 for (c = 0; c < x->literalstring_count; ++c) {
1220 int size = among_cases[c].size;
1221 if (size != 0 && size < shortest_size) {
1222 shortest_size = size;
1223 }
1224 }
1225
1226 for (c = 0; c < x->literalstring_count; ++c) {
1227 symbol ch;
1228 if (among_cases[c].size == 0) {
1229 empty_case = c;
1230 continue;
1231 }
1232 if (p->mode == m_forward) {
1233 ch = among_cases[c].b[shortest_size - 1];
1234 } else {
1235 ch = among_cases[c].b[among_cases[c].size - 1];
1236 }
1237 if (n_cases == 0) {
1238 block = ch >> 5;
1239 } else if (ch >> 5 != block) {
1240 block = -1;
1241 if (n_cases > 2) break;
1242 }
1243 if (block == -1) {
1244 if (n_cases > 0 && ch == cases[0]) continue;
1245 if (n_cases < 2) {
1246 cases[n_cases++] = ch;
1247 } else if (ch != cases[1]) {
1248 ++n_cases;
1249 break;
1250 }
1251 } else {
1252 if ((bitmap & (1u << (ch & 0x1f))) == 0) {
1253 bitmap |= 1u << (ch & 0x1f);
1254 if (n_cases < 2)
1255 cases[n_cases] = ch;
1256 ++n_cases;
1257 }
1258 }
1259 }
1260
1261 if (block != -1 || n_cases <= 2) {
1262 char buf[64];
1263 g->I[2] = block;
1264 g->I[3] = bitmap;
1265 g->I[4] = shortest_size - 1;
1266 if (p->mode == m_forward) {
1267 sprintf(buf, "z->p[z->c + %d]", shortest_size - 1);
1268 g->S[1] = buf;
1269 if (shortest_size == 1) {
1270 writef(g, "~Mif (z->c >= z->l", p);
1271 } else {
1272 writef(g, "~Mif (z->c + ~I4 >= z->l", p);
1273 }
1274 } else {
1275 g->S[1] = "z->p[z->c - 1]";
1276 if (shortest_size == 1) {
1277 writef(g, "~Mif (z->c <= z->lb", p);
1278 } else {
1279 writef(g, "~Mif (z->c - ~I4 <= z->lb", p);
1280 }
1281 }
1282 if (n_cases == 0) {
1283 /* We get this for the degenerate case: among ( '' )
1284 * This doesn't seem to be a useful construct, but it is
1285 * syntactically valid.
1286 */
1287 } else if (n_cases == 1) {
1288 g->I[4] = cases[0];
1289 writef(g, " || ~S1 != ~I4", p);
1290 } else if (n_cases == 2) {
1291 g->I[4] = cases[0];
1292 g->I[5] = cases[1];
1293 writef(g, " || (~S1 != ~I4 && ~S1 != ~I5)", p);
1294 } else {
1295 writef(g, " || ~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)", p);
1296 }
1297 write_string(g, ") ");
1298 if (empty_case != -1) {
1299 /* If the among includes the empty string, it can never fail
1300 * so not matching the bitmap means we match the empty string.
1301 */
1302 g->I[4] = among_cases[empty_case].result;
1303 writef(g, "among_var = ~I4; else~C", p);
1304 } else {
1305 writef(g, "~f~C", p);
1306 }
1307 shown_comment = 1;
1308 } else {
1309 #ifdef OPTIMISATION_WARNINGS
1310 printf("Couldn't shortcut among %d\n", x->number);
1311 #endif
1312 }
1313
1314 if (!x->amongvar_needed) {
1315 writef(g, "~Mif (!(find_among~S0(z, a_~I0, ~I1))) ~f", p);
1316 writef(g, shown_comment ? "~N" : "~C", p);
1317 } else {
1318 writef(g, "~Mamong_var = find_among~S0(z, a_~I0, ~I1);", p);
1319 writef(g, shown_comment ? "~N" : "~C", p);
1320 writef(g, "~Mif (!(among_var)) ~f~N", p);
1321 }
1322 }
1323
generate_among(struct generator * g,struct node * p)1324 static void generate_among(struct generator * g, struct node * p) {
1325
1326 struct among * x = p->among;
1327
1328 if (x->substring == 0) generate_substring(g, p);
1329
1330 if (x->starter != 0) generate(g, x->starter);
1331
1332 if (x->command_count == 1 && x->nocommand_count == 0) {
1333 /* Only one outcome ("no match" already handled). */
1334 generate(g, x->commands[0]);
1335 } else if (x->command_count > 0) {
1336 int i;
1337 writef(g, "~Mswitch (among_var) {~C~+", p);
1338 for (i = 1; i <= x->command_count; i++) {
1339 g->I[0] = i;
1340 w(g, "~Mcase ~I0:~N~+");
1341 generate(g, x->commands[i - 1]);
1342 w(g, "~Mbreak;~N~-");
1343 }
1344 w(g, "~}");
1345 }
1346 }
1347
generate_booltest(struct generator * g,struct node * p)1348 static void generate_booltest(struct generator * g, struct node * p) {
1349
1350 g->V[0] = p->name;
1351 writef(g, "~Mif (!(~V0)) ~f~C", p);
1352 }
1353
generate_false(struct generator * g,struct node * p)1354 static void generate_false(struct generator * g, struct node * p) {
1355
1356 writef(g, "~M~f~C", p);
1357 }
1358
generate_debug(struct generator * g,struct node * p)1359 static void generate_debug(struct generator * g, struct node * p) {
1360
1361 g->I[0] = g->debug_count++;
1362 g->I[1] = p->line_number;
1363 writef(g, "~Mdebug(z, ~I0, ~I1);~C", p);
1364
1365 }
1366
generate(struct generator * g,struct node * p)1367 static void generate(struct generator * g, struct node * p) {
1368
1369 int used = g->label_used;
1370 int a0 = g->failure_label;
1371 int a1 = g->failure_keep_count;
1372
1373 switch (p->type) {
1374 case c_define: generate_define(g, p); break;
1375 case c_bra: generate_bra(g, p); break;
1376 case c_and: generate_and(g, p); break;
1377 case c_or: generate_or(g, p); break;
1378 case c_backwards: generate_backwards(g, p); break;
1379 case c_not: generate_not(g, p); break;
1380 case c_set: generate_set(g, p); break;
1381 case c_unset: generate_unset(g, p); break;
1382 case c_try: generate_try(g, p); break;
1383 case c_fail: generate_fail(g, p); break;
1384 case c_reverse:
1385 case c_test: generate_test(g, p); break;
1386 case c_do: generate_do(g, p); break;
1387 case c_goto: generate_GO(g, p, 1); break;
1388 case c_gopast: generate_GO(g, p, 0); break;
1389 case c_repeat: generate_repeat(g, p); break;
1390 case c_loop: generate_loop(g, p); break;
1391 case c_atleast: generate_atleast(g, p); break;
1392 case c_setmark: generate_setmark(g, p); break;
1393 case c_tomark: generate_tomark(g, p); break;
1394 case c_atmark: generate_atmark(g, p); break;
1395 case c_hop: generate_hop(g, p); break;
1396 case c_delete: generate_delete(g, p); break;
1397 case c_next: generate_next(g, p); break;
1398 case c_tolimit: generate_tolimit(g, p); break;
1399 case c_atlimit: generate_atlimit(g, p); break;
1400 case c_leftslice: generate_leftslice(g, p); break;
1401 case c_rightslice: generate_rightslice(g, p); break;
1402 case c_assignto: generate_assignto(g, p); break;
1403 case c_sliceto: generate_sliceto(g, p); break;
1404 case c_assign: generate_assignfrom(g, p); break;
1405 case c_insert:
1406 case c_attach: generate_insert(g, p, p->type); break;
1407 case c_slicefrom: generate_slicefrom(g, p); break;
1408 case c_setlimit: generate_setlimit(g, p); break;
1409 case c_dollar: generate_dollar(g, p); break;
1410 case c_mathassign: generate_integer_assign(g, p, "="); break;
1411 case c_plusassign: generate_integer_assign(g, p, "+="); break;
1412 case c_minusassign: generate_integer_assign(g, p, "-="); break;
1413 case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
1414 case c_divideassign: generate_integer_assign(g, p, "/="); break;
1415 case c_eq: generate_integer_test(g, p, "=="); break;
1416 case c_ne: generate_integer_test(g, p, "!="); break;
1417 case c_gr: generate_integer_test(g, p, ">"); break;
1418 case c_ge: generate_integer_test(g, p, ">="); break;
1419 case c_ls: generate_integer_test(g, p, "<"); break;
1420 case c_le: generate_integer_test(g, p, "<="); break;
1421 case c_call: generate_call(g, p); break;
1422 case c_grouping: generate_grouping(g, p, false); break;
1423 case c_non: generate_grouping(g, p, true); break;
1424 case c_name: generate_namedstring(g, p); break;
1425 case c_literalstring: generate_literalstring(g, p); break;
1426 case c_among: generate_among(g, p); break;
1427 case c_substring: generate_substring(g, p); break;
1428 case c_booltest: generate_booltest(g, p); break;
1429 case c_false: generate_false(g, p); break;
1430 case c_true: break;
1431 case c_debug: generate_debug(g, p); break;
1432 default: fprintf(stderr, "%d encountered\n", p->type);
1433 exit(1);
1434 }
1435
1436 if (g->failure_label != a0)
1437 g->label_used = used;
1438 g->failure_label = a0;
1439 g->failure_keep_count = a1;
1440 }
1441
write_generated_comment_content(struct generator * g)1442 void write_generated_comment_content(struct generator * g) {
1443 w(g, "Generated by Snowball " SNOWBALL_VERSION
1444 " - https://snowballstem.org/");
1445 }
1446
write_start_comment(struct generator * g,const char * comment_start,const char * comment_end)1447 void write_start_comment(struct generator * g,
1448 const char * comment_start,
1449 const char * comment_end) {
1450 write_margin(g);
1451 w(g, comment_start);
1452 write_generated_comment_content(g);
1453 if (comment_end) {
1454 w(g, comment_end);
1455 }
1456 w(g, "~N~N");
1457 }
1458
generate_head(struct generator * g)1459 static void generate_head(struct generator * g) {
1460
1461 w(g, "#include \"");
1462 if (g->options->runtime_path) {
1463 write_string(g, g->options->runtime_path);
1464 if (g->options->runtime_path[strlen(g->options->runtime_path) - 1] != '/')
1465 write_char(g, '/');
1466 }
1467 w(g, "header.h\"~N~N");
1468 }
1469
generate_routine_headers(struct generator * g)1470 static void generate_routine_headers(struct generator * g) {
1471 struct name * q;
1472 for (q = g->analyser->names; q; q = q->next) {
1473 g->V[0] = q;
1474 switch (q->type) {
1475 case t_routine:
1476 w(g, "static int ~W0(struct SN_env * z);~N");
1477 break;
1478 case t_external:
1479 w(g,
1480 "#ifdef __cplusplus~N"
1481 "extern \"C\" {~N"
1482 "#endif~N"
1483 "extern int ~W0(struct SN_env * z);~N"
1484 "#ifdef __cplusplus~N"
1485 "}~N"
1486 "#endif~N"
1487 );
1488 break;
1489 }
1490 }
1491 }
1492
generate_among_table(struct generator * g,struct among * x)1493 static void generate_among_table(struct generator * g, struct among * x) {
1494
1495 struct amongvec * v = x->b;
1496
1497 g->I[0] = x->number;
1498 {
1499 int i;
1500 for (i = 0; i < x->literalstring_count; i++) {
1501 g->I[1] = i;
1502 g->I[2] = v->size;
1503 g->L[0] = v->b;
1504 if (v->size)
1505 w(g, "static const symbol s_~I0_~I1[~I2] = ~A0;~N");
1506 v++;
1507 }
1508 }
1509
1510 g->I[1] = x->literalstring_count;
1511 w(g, "~N~Mstatic const struct among a_~I0[~I1] =~N{~N");
1512
1513 v = x->b;
1514 {
1515 int i;
1516 for (i = 0; i < x->literalstring_count; i++) {
1517 g->I[1] = i;
1518 g->I[2] = v->size;
1519 g->I[3] = v->i;
1520 g->I[4] = v->result;
1521 g->S[0] = i < x->literalstring_count - 1 ? "," : "";
1522
1523 if (g->options->comments) {
1524 w(g, "/*~J1 */ ");
1525 }
1526 w(g, "{ ~I2, ");
1527 if (v->size == 0) {
1528 w(g, "0,");
1529 } else {
1530 w(g, "s_~I0_~I1,");
1531 }
1532 w(g, " ~I3, ~I4, ");
1533 if (v->function == 0) {
1534 write_char(g, '0');
1535 } else {
1536 write_varname(g, v->function);
1537 }
1538 w(g, "}~S0~N");
1539 v++;
1540 }
1541 }
1542 w(g, "};~N~N");
1543 }
1544
generate_amongs(struct generator * g)1545 static void generate_amongs(struct generator * g) {
1546 struct among * x;
1547 for (x = g->analyser->amongs; x; x = x->next) {
1548 generate_among_table(g, x);
1549 }
1550 }
1551
set_bit(symbol * b,int i)1552 static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
1553
generate_grouping_table(struct generator * g,struct grouping * q)1554 static void generate_grouping_table(struct generator * g, struct grouping * q) {
1555
1556 int range = q->largest_ch - q->smallest_ch + 1;
1557 int size = (range + 7)/ 8; /* assume 8 bits per symbol */
1558 symbol * b = q->b;
1559 symbol * map = create_b(size);
1560 int i;
1561 for (i = 0; i < size; i++) map[i] = 0;
1562
1563 for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1564
1565 g->V[0] = q->name;
1566
1567 w(g, "static const unsigned char ~V0[] = { ");
1568 for (i = 0; i < size; i++) {
1569 write_int(g, map[i]);
1570 if (i < size - 1) w(g, ", ");
1571 }
1572 w(g, " };~N~N");
1573 lose_b(map);
1574 }
1575
generate_groupings(struct generator * g)1576 static void generate_groupings(struct generator * g) {
1577 struct grouping * q;
1578 for (q = g->analyser->groupings; q; q = q->next) {
1579 if (q->name->used)
1580 generate_grouping_table(g, q);
1581 }
1582 }
1583
generate_create(struct generator * g)1584 static void generate_create(struct generator * g) {
1585
1586 int * p = g->analyser->name_count;
1587 g->I[0] = p[t_string];
1588 g->I[1] = p[t_integer] + p[t_boolean];
1589 w(g, "~N"
1590 "extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1); }"
1591 "~N");
1592 }
1593
generate_close(struct generator * g)1594 static void generate_close(struct generator * g) {
1595
1596 int * p = g->analyser->name_count;
1597 g->I[0] = p[t_string];
1598 w(g, "~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N");
1599 }
1600
generate_create_and_close_templates(struct generator * g)1601 static void generate_create_and_close_templates(struct generator * g) {
1602 w(g, "~N"
1603 "extern struct SN_env * ~pcreate_env(void);~N"
1604 "extern void ~pclose_env(struct SN_env * z);~N"
1605 "~N");
1606 }
1607
generate_header_file(struct generator * g)1608 static void generate_header_file(struct generator * g) {
1609
1610 struct name * q;
1611 const char * vp = g->options->variables_prefix;
1612 g->S[0] = vp;
1613
1614 w(g, "#ifdef __cplusplus~N"
1615 "extern \"C\" {~N"
1616 "#endif~N"); /* for C++ */
1617
1618 generate_create_and_close_templates(g);
1619 for (q = g->analyser->names; q; q = q->next) {
1620 g->V[0] = q;
1621 switch (q->type) {
1622 case t_external:
1623 w(g, "extern int ~W0(struct SN_env * z);~N");
1624 break;
1625 case t_string:
1626 case t_integer:
1627 case t_boolean:
1628 if (vp) {
1629 int count = q->count;
1630 if (count < 0) {
1631 /* Unused variables should get removed from `names`. */
1632 fprintf(stderr, "Optimised out variable ");
1633 report_b(stderr, q->b);
1634 fprintf(stderr, " still in names list\n");
1635 exit(1);
1636 }
1637 if (q->type == t_boolean) {
1638 /* We use a single array for booleans and integers,
1639 * with the integers first.
1640 */
1641 count += g->analyser->name_count[t_integer];
1642 }
1643 g->I[0] = count;
1644 g->I[1] = "SIIrxg"[q->type];
1645 w(g, "#define ~S0");
1646 write_b(g, q->b);
1647 w(g, " (~c1[~I0])~N");
1648 }
1649 break;
1650 }
1651 }
1652
1653 w(g, "~N"
1654 "#ifdef __cplusplus~N"
1655 "}~N"
1656 "#endif~N"); /* for C++ */
1657
1658 w(g, "~N");
1659 }
1660
generate_program_c(struct generator * g)1661 extern void generate_program_c(struct generator * g) {
1662
1663 g->outbuf = str_new();
1664 write_start_comment(g, "/* ", " */");
1665 generate_head(g);
1666 generate_routine_headers(g);
1667 w(g, "#ifdef __cplusplus~N"
1668 "extern \"C\" {~N"
1669 "#endif~N"
1670 "~N");
1671 generate_create_and_close_templates(g);
1672 w(g, "~N"
1673 "#ifdef __cplusplus~N"
1674 "}~N"
1675 "#endif~N");
1676 generate_amongs(g);
1677 generate_groupings(g);
1678 g->declarations = g->outbuf;
1679 g->outbuf = str_new();
1680 g->literalstring_count = 0;
1681 {
1682 struct node * p = g->analyser->program;
1683 while (p) { generate(g, p); p = p->right; }
1684 }
1685 generate_create(g);
1686 generate_close(g);
1687 output_str(g->options->output_src, g->declarations);
1688 str_delete(g->declarations);
1689 output_str(g->options->output_src, g->outbuf);
1690 str_clear(g->outbuf);
1691
1692 write_start_comment(g, "/* ", " */");
1693 generate_header_file(g);
1694 output_str(g->options->output_h, g->outbuf);
1695 str_delete(g->outbuf);
1696 }
1697
1698 /* Generator functions common to multiple languages. */
1699
create_generator(struct analyser * a,struct options * o)1700 extern struct generator * create_generator(struct analyser * a, struct options * o) {
1701 NEW(generator, g);
1702 g->analyser = a;
1703 g->options = o;
1704 g->margin = 0;
1705 g->debug_count = 0;
1706 g->copy_from_count = 0;
1707 g->line_count = 0;
1708 g->line_labelled = 0;
1709 g->failure_label = -1;
1710 g->unreachable = false;
1711 #ifndef DISABLE_PYTHON
1712 g->max_label = 0;
1713 #endif
1714 return g;
1715 }
1716
close_generator(struct generator * g)1717 extern void close_generator(struct generator * g) {
1718 FREE(g);
1719 }
1720
1721 /* Write routines for simple entities */
1722
write_char(struct generator * g,int ch)1723 extern void write_char(struct generator * g, int ch) {
1724 str_append_ch(g->outbuf, ch); /* character */
1725 }
1726
write_newline(struct generator * g)1727 extern void write_newline(struct generator * g) {
1728 str_append_ch(g->outbuf, '\n'); /* newline */
1729 g->line_count++;
1730 }
1731
write_string(struct generator * g,const char * s)1732 extern void write_string(struct generator * g, const char * s) {
1733 str_append_string(g->outbuf, s);
1734 }
1735
write_int(struct generator * g,int i)1736 extern void write_int(struct generator * g, int i) {
1737 str_append_int(g->outbuf, i);
1738 }
1739
write_b(struct generator * g,symbol * b)1740 extern void write_b(struct generator * g, symbol * b) {
1741
1742 str_append_b(g->outbuf, b);
1743 }
1744
write_str(struct generator * g,struct str * str)1745 extern void write_str(struct generator * g, struct str * str) {
1746
1747 str_append(g->outbuf, str);
1748 }
1749