1 %{
2 /* GNU Mailutils -- a suite of utilities for electronic mail
3 Copyright (C) 1999-2021 Free Software Foundation, Inc.
4
5 GNU Mailutils is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GNU Mailutils is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <mh.h>
19 #include <mh_format.h>
20 #include <sys/stat.h>
21
22 int yyerror (const char *s);
23 int yylex (void);
24
25 static mu_opool_t tokpool; /* Temporary token storage */
26
27
28 /* Lexical context */
29 enum context
30 {
31 ctx_init, /* Normal text */
32 ctx_if, /* After %< or %? */
33 ctx_expr, /* Expression within cond */
34 ctx_func, /* after (func */
35 };
36
37 static enum context *ctx_stack;
38 size_t ctx_tos;
39 size_t ctx_max;
40
41 static inline void
ctx_push(enum context ctx)42 ctx_push (enum context ctx)
43 {
44 if (ctx_tos == ctx_max)
45 ctx_stack = mu_2nrealloc (ctx_stack, &ctx_max, sizeof (ctx_stack[0]));
46 ctx_stack[ctx_tos++] = ctx;
47 }
48
49 static inline void
ctx_pop(void)50 ctx_pop (void)
51 {
52 if (ctx_tos == 0)
53 {
54 yyerror ("out of context");
55 abort ();
56 }
57 ctx_tos--;
58 }
59
60 static inline enum context
ctx_get(void)61 ctx_get (void)
62 {
63 return ctx_stack[ctx_tos-1];
64 }
65
66 enum node_type
67 {
68 fmtnode_print,
69 fmtnode_literal,
70 fmtnode_number,
71 fmtnode_body,
72 fmtnode_comp,
73 fmtnode_funcall,
74 fmtnode_cntl,
75 fmtnode_typecast,
76 };
77
78 struct node
79 {
80 enum node_type nodetype;
81 enum mh_type datatype;
82 int printflag;
83 struct node *prev, *next;
84 union
85 {
86 char *str;
87 long num;
88 struct node *arg;
89 struct
90 {
91 int fmtspec;
92 struct node *arg;
93 } prt;
94 struct
95 {
96 mh_builtin_t *builtin;
97 struct node *arg;
98 } funcall;
99 struct
100 {
101 struct node *cond;
102 struct node *iftrue;
103 struct node *iffalse;
104 } cntl;
105 } v;
106 };
107
108 static struct node *parse_tree;
109 static struct node *new_node (enum node_type nodetype, enum mh_type datatype);
110
111 static struct node *printelim (struct node *root);
112 static void codegen (mh_format_t *fmt, int tree);
113 static struct node *typecast (struct node *node, enum mh_type type);
114
115 %}
116
117 %union {
118 char *str;
119 char const *mesg;
120 long num;
121 struct {
122 struct node *head, *tail;
123 } nodelist;
124 struct node *nodeptr;
125 mh_builtin_t *builtin;
126 int fmtspec;
127 struct {
128 enum mh_type type;
129 union
130 {
131 char *str;
132 long num;
133 } v;
134 } arg;
135 };
136
137 %token <num> NUMBER "number"
138 %token <str> STRING "string" COMPONENT "component"
139 %token <arg> ARGUMENT "argument"
140 %token <builtin> FUNCTION "function name"
141 %token IF "%<" ELIF "%?" ELSE "%|" FI "%>"
142 %token <fmtspec> FMTSPEC "format specifier"
143 %token BOGUS
144 %token EOFN ")"
145
146 %type <nodelist> list zlist elif_list
147 %type <nodeptr> item escape component funcall cntl argument
148 %type <nodeptr> cond cond_expr elif_part else_part printable
149 %type <builtin> function
150 %type <fmtspec> fmtspec
151
152 %error-verbose
153
154 %%
155
156 input : list
157 {
158 parse_tree = $1.head;
159 }
160 ;
161
162 list : item
163 {
164 $$.head = $$.tail = $1;
165 }
166 | list item
167 {
168 $2->prev = $1.tail;
169 $1.tail->next = $2;
170 $1.tail = $2;
171 $$ = $1;
172 }
173 ;
174
175 item : STRING
176 {
177 struct node *n = new_node (fmtnode_literal, mhtype_str);
178 n->v.str = $1;
179 $$ = new_node (fmtnode_print, mhtype_str);
180 $$->v.prt.arg = n;
181 }
182 | escape
183 ;
184
185 escape : cntl
186 | fmtspec printable
187 {
188 if ($2->printflag & MHA_NOPRINT)
189 $$ = $2;
190 else
191 {
192 $$ = new_node (fmtnode_print, $2->datatype);
193 $$->v.prt.fmtspec = ($2->printflag & MHA_IGNOREFMT) ? 0 : $1;
194 $$->v.prt.arg = $2;
195 }
196 }
197 ;
198
199 printable : component
200 | funcall
201 ;
202
203 component : COMPONENT
204 {
205 if (mu_c_strcasecmp ($1, "body") == 0)
206 $$ = new_node (fmtnode_body, mhtype_str);
207 else
208 {
209 $$ = new_node (fmtnode_comp, mhtype_str);
210 $$->v.str = $1;
211 }
212 }
213 ;
214
215 funcall : function argument EOFN
216 {
217 struct node *arg;
218
219 ctx_pop ();
220
221 arg = $2;
222 if ($1->argtype == mhtype_none)
223 {
224 if (arg)
225 {
226 yyerror ("function doesn't take arguments");
227 YYABORT;
228 }
229 }
230 else if (arg == NULL)
231 {
232 if ($1->flags & MHA_OPTARG_NIL)
233 {
234 switch ($1->argtype)
235 {
236 case mhtype_str:
237 arg = new_node (fmtnode_literal, mhtype_str);
238 arg->v.str = "";
239 break;
240
241 case mhtype_num:
242 arg = new_node (fmtnode_number, mhtype_num);
243 arg->v.num = 0;
244 break;
245
246 default:
247 abort ();
248 }
249 }
250 else if ($1->flags & MHA_OPTARG)
251 {
252 /* ok - ignore */;
253 }
254 else
255 {
256 yyerror ("required argument missing");
257 YYABORT;
258 }
259 }
260 else if ($1->flags & MHA_LITERAL)
261 {
262 switch ($1->argtype)
263 {
264 case mhtype_num:
265 if (arg->nodetype == fmtnode_number)
266 /* ok */;
267 else
268 {
269 yyerror ("argument must be a number");
270 YYABORT;
271 }
272 break;
273
274 case mhtype_str:
275 if (arg->nodetype == fmtnode_literal)
276 /* ok */;
277 else if (arg->nodetype == fmtnode_number)
278 {
279 char *s;
280 mu_asprintf (&s, "%ld", arg->v.num);
281 arg->nodetype = fmtnode_literal;
282 arg->datatype = mhtype_str;
283 arg->v.str = s;
284 }
285 else
286 {
287 yyerror ("argument must be literal");
288 YYABORT;
289 }
290 break;
291
292 default:
293 break;
294 }
295 }
296
297 if ($1->flags & MHA_VOID)
298 {
299 $2->printflag = MHA_NOPRINT;
300 $$ = $2;
301 }
302 else
303 {
304 $$ = new_node (fmtnode_funcall, $1->type);
305 $$->v.funcall.builtin = $1;
306 $$->v.funcall.arg = typecast (arg, $1->argtype);
307 $$->printflag = $1->flags & MHA_PRINT_MASK;
308 if ($1->type == mhtype_none)
309 $$->printflag = MHA_NOPRINT;
310 }
311 }
312 ;
313
314 fmtspec : /* empty */
315 {
316 $$ = 0;
317 }
318 | FMTSPEC
319 ;
320
321 function : FUNCTION
322 {
323 ctx_push (ctx_func);
324 }
325 ;
326
327 argument : /* empty */
328 {
329 $$ = NULL;
330 }
331 | ARGUMENT
332 {
333 switch ($1.type)
334 {
335 case mhtype_none:
336 $$ = NULL;
337 break;
338
339 case mhtype_str:
340 $$ = new_node (fmtnode_literal, mhtype_str);
341 $$->v.str = $1.v.str;
342 break;
343
344 case mhtype_num:
345 $$ = new_node (fmtnode_number, mhtype_num);
346 $$->v.num = $1.v.num;
347 }
348 }
349 | escape
350 {
351 $$ = printelim ($1);
352 }
353 ;
354
355 /* 1 2 3 4 5 */
356 cntl : if cond zlist elif_part fi
357 {
358 $$ = new_node(fmtnode_cntl, mhtype_num);
359 $$->v.cntl.cond = $2;
360 $$->v.cntl.iftrue = $3.head;
361 $$->v.cntl.iffalse = $4;
362 }
363 ;
364
365 zlist : /* empty */
366 {
367 $$.head = $$.tail = NULL;
368 }
369 | list
370 ;
371
372 if : IF
373 {
374 ctx_push (ctx_if);
375 }
376 ;
377
378 fi : FI
379 {
380 ctx_pop ();
381 }
382 ;
383
384 elif : ELIF
385 {
386 ctx_pop ();
387 ctx_push (ctx_if);
388 }
389 ;
390
391 cond : cond_expr
392 {
393 ctx_pop ();
394 ctx_push (ctx_expr);
395 $$ = printelim ($1);
396 }
397 ;
398
399 cond_expr : component
400 | funcall
401 ;
402
403 elif_part : /* empty */
404 {
405 $$ = NULL;
406 }
407 | else_part
408 | elif_list
409 {
410 $$ = $1.head;
411 }
412 ;
413
414 elif_list : elif cond zlist
415 {
416 struct node *np = new_node (fmtnode_cntl, mhtype_num);
417 np->v.cntl.cond = $2;
418 np->v.cntl.iftrue = $3.head;
419 np->v.cntl.iffalse = NULL;
420 $$.head = $$.tail = np;
421 }
422 | elif_list elif cond zlist
423 {
424 struct node *np = new_node(fmtnode_cntl, mhtype_num);
425 np->v.cntl.cond = $3;
426 np->v.cntl.iftrue = $4.head;
427 np->v.cntl.iffalse = NULL;
428
429 $1.tail->v.cntl.iffalse = np;
430 $1.tail = np;
431
432 $$ = $1;
433 }
434 | elif_list else_part
435 {
436 $1.tail->v.cntl.iffalse = $2;
437 $1.tail = $2;
438 $$ = $1;
439 }
440 ;
441
442 else_part : ELSE zlist
443 {
444 $$ = $2.head;
445 }
446 ;
447
448 %%
449
450 static char *start;
451 static char *tok_start;
452 static char *curp;
453 static mu_linetrack_t trk;
454 static struct mu_locus_range yylloc;
455
456 static inline size_t
457 token_leng (void)
458 {
459 return curp - tok_start;
460 }
461
462 static inline void
463 mark (void)
464 {
465 if (curp > tok_start)
466 mu_linetrack_advance (trk, &yylloc, tok_start, token_leng ());
467 tok_start = curp;
468 }
469
470 static inline int
471 input (void)
472 {
473 if (*curp == 0)
474 return 0;
475 return *curp++;
476 }
477
478 static inline void
479 eatinput (size_t n)
480 {
481 mark ();
482 while (n--)
483 input ();
484 mark ();
485 }
486
487 static inline int
488 peek (void)
489 {
490 return *curp;
491 }
492
493 static inline int
494 unput (int c)
495 {
496 if (curp == start)
497 {
498 mu_error (_("%s:%d: INTERNAL ERROR: out of unput space: please report"),
499 __FILE__, __LINE__);
500 abort ();
501 }
502 return *--curp = c;
503 }
504
505 static int
506 skip (int class)
507 {
508 curp = mu_str_skip_class (curp, class);
509 return *curp;
510 }
511
512 static int
513 skipeol (void)
514 {
515 int c;
516
517 do
518 {
519 c = input ();
520 if (c == '\\' && (c = input ()) == '\n')
521 c = input ();
522 }
523 while (c && c != '\n');
524 return *curp;
525 }
526
527
528 static inline int
529 bogus (const char *mesg)
530 {
531 yylval.mesg = mesg;
532 return BOGUS;
533 }
534
535 static char *
536 find_bol (unsigned line)
537 {
538 char *p = start;
539
540 while (--line)
541 {
542 while (*p != '\n')
543 {
544 if (*p == 0)
545 return p;
546 p++;
547 }
548 p++;
549 }
550 return p;
551 }
552
553 int
554 yyerror (const char *s)
555 {
556 if (yychar != BOGUS)
557 {
558 char *bol;
559 size_t len;
560 static char tab[] = " ";
561 size_t b = 0, e = 0;
562 size_t i;
563
564 bol = find_bol (yylloc.beg.mu_line);
565 len = strcspn (bol, "\n");
566
567 mu_diag_at_locus_range (MU_DIAG_ERROR, &yylloc, "%s", s);
568 for (i = 0; i < len; i++)
569 /* How ... tribal! */
570 {
571 if (bol[i] == '\t')
572 {
573 mu_stream_write (mu_strerr, tab, strlen (tab), NULL);
574 if (yylloc.beg.mu_col > i)
575 b += strlen (tab) - 1;
576 if (yylloc.end.mu_col > i)
577 e += strlen (tab) - 1;
578 }
579 else
580 mu_stream_write (mu_strerr, bol + i, 1, NULL);
581 }
582 mu_stream_write (mu_strerr, "\n", 1, NULL);
583 if (mu_locus_point_eq (&yylloc.beg, &yylloc.end))
584 mu_error ("%*.*s^",
585 (int) (b + yylloc.beg.mu_col - 1),
586 (int) (b + yylloc.beg.mu_col - 1), "");
587 else
588 mu_error ("%*.*s^%*.*s^",
589 (int)(b + yylloc.beg.mu_col - 1),
590 (int)(b + yylloc.beg.mu_col - 1), "",
591 (int)(e + yylloc.end.mu_col - yylloc.beg.mu_col - b - 1),
592 (int)(e + yylloc.end.mu_col - yylloc.beg.mu_col - b - 1),
593 "");
594 }
595 return 0;
596 }
597
598 static int backslash(int c);
599
600 struct lexer_tab
601 {
602 char *ctx_name;
603 int (*lexer) (void);
604 };
605
606 static int yylex_initial (void);
607 static int yylex_cond (void);
608 static int yylex_expr (void);
609 static int yylex_func (void);
610
611 static struct lexer_tab lexer_tab[] = {
612 [ctx_init] = { "initial", yylex_initial },
613 [ctx_if] = { "condition", yylex_cond },
614 [ctx_expr] = { "expression", yylex_expr },
615 [ctx_func] = { "function", yylex_func }
616 };
617
618 int
619 yylex (void)
620 {
621 int tok;
622
623 do
624 {
625 mark ();
626 if (yydebug)
627 fprintf (stderr, "lex: [%s] at %-10.10s...]\n",
628 lexer_tab[ctx_get ()].ctx_name, curp);
629 tok = lexer_tab[ctx_get ()].lexer ();
630 }
631 while (tok == STRING && yylval.str[0] == 0);
632
633 mark ();
634 if (tok == BOGUS)
635 yyerror (yylval.mesg);
636 return tok;
637 }
638
639 static int
640 token_fmtspec (int flags)
641 {
642 int num = 0;
643
644 if (peek () == '0')
645 {
646 flags |= MH_FMT_ZEROPAD;
647 input ();
648 }
649 else if (!mu_isdigit (peek ()))
650 {
651 return bogus ("expected digit");
652 }
653 mark ();
654 while (*curp && mu_isdigit (peek ()))
655 num = num * 10 + input () - '0';
656 yylval.fmtspec = flags | num;
657 unput ('%');
658 return FMTSPEC;
659 }
660
661 static int
662 token_function (void)
663 {
664 eatinput (1);
665 skip (MU_CTYPE_IDENT);
666 if (token_leng () == 0 || !strchr (" \t(){%", peek ()))
667 {
668 return bogus ("expected function name");
669 }
670
671 yylval.builtin = mh_lookup_builtin (tok_start, token_leng ());
672
673 if (!yylval.builtin)
674 {
675 return bogus ("unknown function");
676 }
677 if (!yylval.builtin->fun
678 && !(yylval.builtin->flags & (MHA_SPECIAL|MHA_VOID)))
679 {
680 mu_error ("INTERNAL ERROR at %s:%d: \"%s\" has no associated function"
681 " and is not marked as MHA_SPECIAL",
682 __FILE__, __LINE__, yylval.builtin->name);
683 abort ();
684 }
685
686 return FUNCTION;
687 }
688
689 static int
690 token_component (void)
691 {
692 eatinput (1);
693 if (!mu_isalpha (peek ()))
694 {
695 return bogus ("component name expected");
696 }
697 mark ();
698 if (skip (MU_CTYPE_HEADR) != '}')
699 {
700 return bogus ("component name expected");
701 }
702 mu_opool_append (tokpool, tok_start, token_leng ());
703 mu_opool_append_char (tokpool, 0);
704 yylval.str = mu_opool_finish (tokpool, NULL);
705 eatinput (1);
706 return COMPONENT;
707 }
708
709 int
710 yylex_initial (void)
711 {
712 int c;
713
714 again:
715 mark ();
716 if (peek () == '%')
717 {
718 input ();
719
720 switch (c = input ())
721 {
722 case ';':
723 skipeol ();
724 goto again;
725 case '<':
726 return IF;
727 case '%':
728 unput (c);
729 unput (c);
730 break;
731 case '(':
732 unput (c);
733 return token_function ();
734 case '{':
735 unput (c);
736 return token_component ();
737 case '-':
738 return token_fmtspec (MH_FMT_RALIGN);
739 case '0': case '1': case '2': case '3': case '4':
740 case '5': case '6': case '7': case '8': case '9':
741 unput (c);
742 return token_fmtspec (MH_FMT_DEFAULT);
743 default:
744 return bogus ("component or function name expected");
745 }
746 }
747
748 c = peek ();
749
750 if (c == 0)
751 return 0;
752
753 while ((c = input ()) != 0)
754 {
755 if (c == '%')
756 {
757 if (peek () == '%')
758 mu_opool_append_char (tokpool, input ());
759 else
760 {
761 unput (c);
762 break;
763 }
764 }
765 else if (c == '\\')
766 {
767 if ((c = input ()) == 0)
768 {
769 return bogus ("unexpected end of file");
770 }
771 if (c != '\n')
772 mu_opool_append_char (tokpool, backslash (c));
773 }
774 else
775 mu_opool_append_char (tokpool, c);
776 }
777
778 mu_opool_append_char (tokpool, 0);
779 yylval.str = mu_opool_finish (tokpool, NULL);
780 return STRING;
781 }
782
783 int
784 yylex_cond (void)
785 {
786 while (1)
787 {
788 switch (peek ())
789 {
790 case '(':
791 return token_function ();
792 case '{':
793 return token_component ();
794 case '\\':
795 input ();
796 if (input () == '\n')
797 continue;
798 default:
799 return bogus ("'(' or '{' expected");
800 }
801 }
802 }
803
804 int
805 yylex_expr (void)
806 {
807 int c;
808
809 if ((c = input ()) == '%')
810 {
811 switch (c = input ())
812 {
813 case '?':
814 return ELIF;
815 case '|':
816 return ELSE;
817 case '>':
818 return FI;
819 }
820 unput (c);
821 unput ('%');
822 }
823 else
824 unput (c);
825 return yylex_initial ();
826 }
827
828 int
829 yylex_func (void)
830 {
831 int c;
832
833 /* Expected argument or closing parenthesis */
834 again:
835 mark ();
836 switch (peek ())
837 {
838 case '(':
839 return token_function ();
840
841 case ')':
842 eatinput (1);
843 return EOFN;
844
845 case '{':
846 return token_component ();
847
848 case '%':
849 input ();
850 switch (peek ())
851 {
852 case '<':
853 input ();
854 return IF;
855
856 case '%':
857 break;
858
859 default:
860 return bogus ("expected '%' or '<'");
861 }
862 break;
863
864 case ' ':
865 case '\t':
866 skip (MU_CTYPE_SPACE);
867 if (peek () == '%')
868 goto again;
869 break;
870
871 default:
872 return input ();
873 }
874
875 mark ();
876
877 while ((c = input ()) != ')')
878 {
879 if (c == 0)
880 {
881 return bogus ("expected ')'");
882 }
883
884 if (c == '\\')
885 {
886 if ((c = input ()) == 0)
887 {
888 return bogus ("unexpected end of file");
889 }
890 mu_opool_append_char (tokpool, backslash (c));
891 }
892 else
893 mu_opool_append_char (tokpool, c);
894 }
895 mu_opool_append_char (tokpool, 0);
896
897 yylval.arg.v.str = mu_opool_finish (tokpool, NULL);
898 yylval.arg.type = mhtype_str;
899 unput (c);
900
901 if (mu_isdigit (yylval.arg.v.str[0])
902 || (yylval.arg.v.str[0] == '-' && mu_isdigit (yylval.arg.v.str[1])))
903 {
904 long n;
905 char *p;
906 errno = 0;
907 n = strtol (yylval.arg.v.str, &p, 0);
908 if (errno == 0 && *p == 0)
909 {
910 yylval.arg.type = mhtype_num;
911 yylval.arg.v.num = n;
912 }
913 }
914
915 if (peek () != ')')
916 {
917 return bogus ("expected ')'");
918 }
919
920 return ARGUMENT;
921 }
922
923 static int
924 format_parse (mh_format_t *fmtptr, char *format_str,
925 struct mu_locus_point const *locus,
926 int flags)
927 {
928 int rc;
929 char *p = getenv ("MHFORMAT_DEBUG");
930
931 if (p || (flags & MH_FMT_PARSE_DEBUG))
932 yydebug = 1;
933 start = tok_start = curp = format_str;
934 mu_opool_create (&tokpool, MU_OPOOL_ENOMEMABRT);
935
936 ctx_tos = ctx_max = 0;
937 ctx_stack = NULL;
938 ctx_push (ctx_init);
939 mu_linetrack_create (&trk, "input", 2);
940 if (locus && locus->mu_file)
941 mu_linetrack_rebase (trk, locus);
942 mu_locus_range_init (&yylloc);
943
944 rc = yyparse ();
945 if (rc == 0)
946 codegen (fmtptr, flags & MH_FMT_PARSE_TREE);
947 else
948 mu_opool_destroy (&tokpool);
949
950 mu_locus_range_deinit (&yylloc);
951 mu_linetrack_destroy (&trk);
952 free (ctx_stack);
953
954 parse_tree = NULL;
955 tokpool = NULL;
956 return rc;
957 }
958
959 int
960 mh_format_string_parse (mh_format_t *retfmt, char const *format_str,
961 struct mu_locus_point const *locus,
962 int flags)
963 {
964 char *fmts = mu_strdup (format_str);
965 int rc = format_parse (retfmt, fmts, locus, flags);
966 free (fmts);
967 return rc;
968 }
969
970 int
971 mh_read_formfile (char const *name, char **pformat)
972 {
973 FILE *fp;
974 struct stat st;
975 char *format_str;
976 char *file_name;
977 int rc;
978
979 rc = mh_find_file (name, &file_name);
980 if (rc)
981 {
982 mu_error (_("cannot access format file %s: %s"), name, strerror (rc));
983 return -1;
984 }
985
986 if (stat (file_name, &st))
987 {
988 mu_error (_("cannot stat format file %s: %s"), file_name,
989 strerror (errno));
990 free (file_name);
991 return -1;
992 }
993
994 fp = fopen (file_name, "r");
995 if (!fp)
996 {
997 mu_error (_("cannot open format file %s: %s"), file_name,
998 strerror (errno));
999 free (file_name);
1000 return -1;
1001 }
1002
1003 format_str = mu_alloc (st.st_size + 1);
1004 if (fread (format_str, st.st_size, 1, fp) != 1)
1005 {
1006 mu_error (_("error reading format file %s: %s"), file_name,
1007 strerror (errno));
1008 free (file_name);
1009 return -1;
1010 }
1011 free (file_name);
1012
1013 format_str[st.st_size] = 0;
1014 if (format_str[st.st_size-1] == '\n')
1015 format_str[st.st_size-1] = 0;
1016 fclose (fp);
1017 *pformat = format_str;
1018 return 0;
1019 }
1020
1021 int
1022 mh_format_file_parse (mh_format_t *retfmt, char const *formfile, int flags)
1023 {
1024 char *fmts;
1025 int rc;
1026
1027 rc = mh_read_formfile (formfile, &fmts);
1028 if (rc == 0)
1029 {
1030 struct mu_locus_point loc;
1031 loc.mu_file = formfile;
1032 loc.mu_line = 1;
1033 loc.mu_col = 0;
1034 rc = format_parse (retfmt, fmts, &loc, flags);
1035 free (fmts);
1036 }
1037 return rc;
1038 }
1039
1040 int
1041 backslash (int c)
1042 {
1043 static char transtab[] = "b\bf\fn\nr\rt\t";
1044 char *p;
1045
1046 for (p = transtab; *p; p += 2)
1047 {
1048 if (*p == c)
1049 return p[1];
1050 }
1051 return c;
1052 }
1053
1054 static struct node *
1055 new_node (enum node_type nodetype, enum mh_type datatype)
1056 {
1057 struct node *np = mu_zalloc (sizeof *np);
1058 np->nodetype = nodetype;
1059 np->datatype = datatype;
1060 return np;
1061 }
1062
1063 static void node_list_free (struct node *node);
1064
1065 static void
1066 node_free (struct node *node)
1067 {
1068 if (!node)
1069 return;
1070 switch (node->nodetype)
1071 {
1072 case fmtnode_print:
1073 node_free (node->v.prt.arg);
1074 break;
1075
1076 case fmtnode_literal:
1077 break;
1078
1079 case fmtnode_number:
1080 break;
1081
1082 case fmtnode_body:
1083 break;
1084
1085 case fmtnode_comp:
1086 break;
1087
1088 case fmtnode_funcall:
1089 node_free (node->v.funcall.arg);
1090 break;
1091
1092 case fmtnode_cntl:
1093 node_list_free (node->v.cntl.cond);
1094 node_list_free (node->v.cntl.iftrue);
1095 node_list_free (node->v.cntl.iffalse);
1096 break;
1097
1098 default:
1099 abort ();
1100 }
1101 free (node);
1102 }
1103
1104 static void
1105 node_list_free (struct node *node)
1106 {
1107 while (node)
1108 {
1109 struct node *next = node->next;
1110 node_free (node);
1111 node = next;
1112 }
1113 }
1114
1115 static struct node *
1116 typecast (struct node *node, enum mh_type type)
1117 {
1118 if (!node)
1119 /* FIXME: when passing optional argument, the caller must know the
1120 type of value returned by the previous expression */
1121 return node;
1122
1123 if (node->datatype == type)
1124 return node;
1125 switch (node->nodetype)
1126 {
1127 case fmtnode_cntl:
1128 node->v.cntl.iftrue = typecast (node->v.cntl.iftrue, type);
1129 node->v.cntl.iffalse = typecast (node->v.cntl.iffalse, type);
1130 node->datatype = type;
1131 break;
1132
1133 default:
1134 {
1135 struct node *arg = new_node (fmtnode_typecast, type);
1136 arg->v.arg = node;
1137 node = arg;
1138 }
1139 }
1140 return node;
1141 }
1142
1143 #define INLINE -1
1144
1145 static inline void
1146 indent (int level)
1147 {
1148 printf ("%*.*s", 2*level, 2*level, "");
1149 }
1150
1151 static inline void
1152 delim (int level, char const *dstr)
1153 {
1154 if (level == INLINE)
1155 printf ("%s", dstr);
1156 else
1157 {
1158 printf ("\n");
1159 indent (level);
1160 }
1161 }
1162
1163 static void dump_statement (struct node *node, int level);
1164
1165 void
1166 mh_print_fmtspec (int fmtspec)
1167 {
1168 if (!(fmtspec & (MH_FMT_RALIGN|MH_FMT_ZEROPAD|MH_FMT_COMPWS)))
1169 printf ("NONE");
1170 else
1171 {
1172 if (!(fmtspec & MH_FMT_RALIGN))
1173 printf ("NO");
1174 printf ("RALIGN|");
1175 if (!(fmtspec & MH_FMT_ZEROPAD))
1176 printf ("NO");
1177 printf ("ZEROPAD|");
1178 if (!(fmtspec & MH_FMT_COMPWS))
1179 printf ("NO");
1180 printf ("COMPWS");
1181 }
1182 }
1183
1184 static char *typename[] = { "NONE", "NUM", "STR" };
1185
1186 static void
1187 dump_node_pretty (struct node *node, int level)
1188 {
1189 if (!node)
1190 return;
1191 switch (node->nodetype)
1192 {
1193 case fmtnode_print:
1194 if (node->v.prt.fmtspec)
1195 {
1196 printf ("FORMAT(");
1197 mh_print_fmtspec (node->v.prt.fmtspec);
1198 printf(", %d, ", node->v.prt.fmtspec & MH_WIDTH_MASK);
1199 }
1200 else
1201 printf ("PRINT(");
1202 dump_statement (node->v.prt.arg, INLINE);
1203 printf (")");
1204 break;
1205
1206 case fmtnode_literal:
1207 {
1208 char const *p = node->v.str;
1209 putchar ('"');
1210 while (*p)
1211 {
1212 if (*p == '\\' || *p == '"')
1213 {
1214 putchar ('\\');
1215 putchar (*p);
1216 }
1217 else if (*p == '\n')
1218 {
1219 putchar ('\\');
1220 putchar ('n');
1221 }
1222 else
1223 putchar (*p);
1224 p++;
1225 }
1226 putchar ('"');
1227 }
1228 break;
1229
1230 case fmtnode_number:
1231 printf ("%ld", node->v.num);
1232 break;
1233
1234 case fmtnode_body:
1235 printf ("BODY");
1236 break;
1237
1238 case fmtnode_comp:
1239 printf ("COMPONENT.%s", node->v.str);
1240 break;
1241
1242 case fmtnode_funcall:
1243 printf ("%s(", node->v.funcall.builtin->name);
1244 dump_statement (node->v.funcall.arg, INLINE);
1245 printf (")");
1246 break;
1247
1248 case fmtnode_cntl:
1249 printf ("IF (");
1250 dump_node_pretty (node->v.cntl.cond, INLINE);
1251 printf (") THEN");
1252
1253 if (level != INLINE)
1254 level++;
1255
1256 delim (level, "; ");
1257
1258 dump_statement (node->v.cntl.iftrue, level);
1259
1260 if (node->v.cntl.iffalse)
1261 {
1262 delim (level == INLINE ? level : level - 1, "; ");
1263 printf ("ELSE");
1264 delim (level, " ");
1265 dump_statement (node->v.cntl.iffalse, level);
1266 }
1267
1268 if (level != INLINE)
1269 level--;
1270 delim (level, "; ");
1271 printf ("FI");
1272 break;
1273
1274 case fmtnode_typecast:
1275 printf ("%s(", typename[node->datatype]);
1276 dump_node_pretty (node->v.arg, INLINE);
1277 printf (")");
1278 break;
1279
1280 default:
1281 abort ();
1282 }
1283 }
1284
1285 static void
1286 dump_statement (struct node *node, int level)
1287 {
1288 while (node)
1289 {
1290 dump_node_pretty (node, level);
1291 node = node->next;
1292 if (node)
1293 delim (level, "; ");
1294 }
1295 }
1296
1297 void
1298 mh_format_dump_code (mh_format_t fmt)
1299 {
1300 dump_statement (fmt->tree, 0);
1301 printf ("\n");
1302 }
1303
1304 void
1305 mh_format_free_tree (mh_format_t fmt)
1306 {
1307 if (fmt)
1308 {
1309 node_list_free (fmt->tree);
1310 fmt->tree = NULL;
1311 mu_opool_destroy (&fmt->pool);
1312 }
1313 }
1314
1315 void
1316 mh_format_free (mh_format_t fmt)
1317 {
1318 if (!fmt)
1319 return;
1320
1321 mh_format_free_tree (fmt);
1322
1323 if (fmt->prog)
1324 free (fmt->prog);
1325 fmt->progmax = fmt->progcnt = 0;
1326 fmt->prog = NULL;
1327 }
1328
1329 void
1330 mh_format_destroy (mh_format_t *fmt)
1331 {
1332 if (fmt)
1333 {
1334 mh_format_free (*fmt);
1335 *fmt = NULL;
1336 }
1337 }
1338
1339 static struct node *
1340 printelim (struct node *node)
1341 {
1342 if (node->nodetype == fmtnode_print)
1343 {
1344 struct node *arg = node->v.prt.arg;
1345 arg->next = node->next;
1346 free (node);
1347 node = arg;
1348 }
1349 return node;
1350 }
1351
1352 #define PROG_MIN_ALLOC 8
1353
1354 static inline void
1355 ensure_space (struct mh_format *fmt, size_t n)
1356 {
1357 while (fmt->progcnt + n >= fmt->progmax)
1358 {
1359 if (fmt->progmax == 0)
1360 fmt->progmax = n < PROG_MIN_ALLOC ? PROG_MIN_ALLOC : n;
1361 fmt->prog = mu_2nrealloc (fmt->prog, &fmt->progmax, sizeof fmt->prog[0]);
1362 }
1363 }
1364
1365 static void
1366 emit_instr (struct mh_format *fmt, mh_instr_t instr)
1367 {
1368 ensure_space (fmt, 1);
1369 fmt->prog[fmt->progcnt++] = instr;
1370 }
1371
1372 static inline void
1373 emit_opcode (struct mh_format *fmt, mh_opcode_t op)
1374 {
1375 emit_instr (fmt, (mh_instr_t) op);
1376 }
1377
1378 static void
1379 emit_string (struct mh_format *fmt, char const *str)
1380 {
1381 size_t length = strlen (str) + 1;
1382 size_t count = (length + sizeof (mh_instr_t)) / sizeof (mh_instr_t) + 1;
1383
1384 ensure_space (fmt, count);
1385 emit_instr (fmt, (mh_instr_t) count);
1386 memcpy (MHI_STR (fmt->prog[fmt->progcnt]), str, length);
1387 fmt->progcnt += count;
1388 }
1389
1390 static void codegen_node (struct mh_format *fmt, struct node *node);
1391 static void codegen_nodelist (struct mh_format *fmt, struct node *node);
1392
1393 static void
1394 emit_opcode_typed (struct mh_format *fmt, enum mh_type type,
1395 enum mh_opcode opnum, enum mh_opcode opstr)
1396 {
1397 switch (type)
1398 {
1399 case mhtype_num:
1400 emit_opcode (fmt, opnum);
1401 break;
1402
1403 case mhtype_str:
1404 emit_opcode (fmt, opstr);
1405 break;
1406
1407 default:
1408 abort ();
1409 }
1410 }
1411
1412 static void
1413 emit_special (struct mh_format *fmt, mh_builtin_t *builtin, struct node *arg)
1414 {
1415 if (arg)
1416 {
1417 if (builtin->flags & MHA_LITERAL)
1418 {
1419 switch (arg->nodetype)
1420 {
1421 case fmtnode_literal:
1422 emit_opcode (fmt, mhop_sets);
1423 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1424 emit_string (fmt, arg->v.str);
1425 break;
1426
1427 case fmtnode_number:
1428 emit_opcode (fmt, mhop_setn);
1429 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1430 emit_instr (fmt, (mh_instr_t) (long) arg->v.num);
1431 break;
1432
1433 default:
1434 abort ();
1435 }
1436 }
1437 else
1438 codegen_node (fmt, arg);
1439 }
1440 }
1441
1442 static void
1443 emit_funcall (struct mh_format *fmt, mh_builtin_t *builtin, struct node *arg)
1444 {
1445 if (builtin->flags & MHA_ACC)
1446 {
1447 emit_opcode (fmt, mhop_movs);
1448 emit_instr (fmt, (mh_instr_t) (long) R_ACC);
1449 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1450 }
1451
1452 if (builtin->flags & MHA_SPECIAL)
1453 {
1454 emit_special (fmt, builtin, arg);
1455 return;
1456 }
1457
1458 if (arg)
1459 {
1460 if (builtin->flags & MHA_LITERAL)
1461 {
1462 switch (arg->nodetype)
1463 {
1464 case fmtnode_literal:
1465 emit_opcode (fmt, mhop_sets);
1466 emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1467 emit_string (fmt, arg->v.str);
1468 break;
1469
1470 case fmtnode_number:
1471 emit_opcode (fmt, mhop_setn);
1472 emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1473 emit_instr (fmt, (mh_instr_t) (long) arg->v.num);
1474 break;
1475
1476 default:
1477 abort ();
1478 }
1479 }
1480 else
1481 {
1482 codegen_node (fmt, arg);
1483 emit_opcode_typed (fmt, arg->datatype, mhop_movn, mhop_movs);
1484 emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1485 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1486 }
1487 }
1488 else if (builtin->argtype != mhtype_none)
1489 {
1490 emit_opcode_typed (fmt, builtin->argtype, mhop_movn, mhop_movs);
1491 emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1492 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1493 }
1494
1495 emit_opcode (fmt, mhop_call);
1496 emit_instr (fmt, (mh_instr_t) builtin->fun);
1497 }
1498
1499 static void
1500 codegen_node (struct mh_format *fmt, struct node *node)
1501 {
1502 if (!node)
1503 return;
1504 switch (node->nodetype)
1505 {
1506 case fmtnode_print:
1507 if (node->v.prt.arg->nodetype == fmtnode_literal)
1508 {
1509 emit_opcode (fmt, mhop_printlit);
1510 emit_string (fmt, node->v.prt.arg->v.str);
1511 }
1512 else if (node->v.prt.arg->nodetype == fmtnode_number)
1513 {
1514 char *s;
1515 emit_opcode (fmt, mhop_printlit);
1516 mu_asprintf (&s, "%ld", node->v.prt.arg->v.num);
1517 emit_string (fmt, s);
1518 free (s);
1519 }
1520 else
1521 {
1522 codegen_node (fmt, node->v.prt.arg);
1523 if (node->v.prt.fmtspec)
1524 {
1525 emit_opcode (fmt, mhop_fmtspec);
1526 emit_instr (fmt, (mh_instr_t) (long) node->v.prt.fmtspec);
1527 }
1528
1529 if (node->v.prt.arg->datatype != mhtype_none)
1530 emit_opcode_typed (fmt, node->v.prt.arg->datatype,
1531 mhop_printn, mhop_prints);
1532 }
1533 break;
1534
1535 case fmtnode_literal:
1536 emit_opcode (fmt, mhop_sets);
1537 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1538 emit_string (fmt, node->v.str);
1539 break;
1540
1541 case fmtnode_number:
1542 emit_opcode (fmt, mhop_setn);
1543 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1544 emit_instr (fmt, (mh_instr_t) (long) node->v.num);
1545 break;
1546
1547 case fmtnode_body:
1548 emit_opcode (fmt, mhop_ldbody);
1549 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1550 break;
1551
1552 case fmtnode_comp:
1553 emit_opcode (fmt, mhop_ldcomp);
1554 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1555 emit_string (fmt, node->v.str);
1556 break;
1557
1558 case fmtnode_funcall:
1559 emit_funcall (fmt, node->v.funcall.builtin, node->v.funcall.arg);
1560 break;
1561
1562 case fmtnode_cntl:
1563 {
1564 long pc[2];
1565
1566 /* Implementation of control escapes is a bit tricky. According to
1567 the spec:
1568
1569 "[f]unction escapes write their return value in 'num' for
1570 functions returning integer or boolean values"
1571
1572 That means that after "%<(gt 1024)" the value of 'num' would be
1573 1 or 0, depending on its value prior to entering the conditional.
1574 However this would defeat the purpose of the conditional itself,
1575 because then the following construct would be meaningless:
1576
1577 %<(gt 1024)...%?(gt 512)...%|...%>
1578
1579 Indeed, in MH implementation the value of 'num' propagates into
1580 the conditional expression, because any function escape serving
1581 as condition is evaluated in a separate context.
1582
1583 To ensure this behavior, the virtual machine of GNU MH holds the
1584 value of the 'num' register on stack while evaluating the condition
1585 and restores it afterward.
1586
1587 On the other hand, the spec says that:
1588
1589 "[c]ontrol escapes return a boolean value, setting num to 1
1590 if the last explicit condition evaluated by a `%<' or `%?'
1591 control succeeded, and 0 otherwise."
1592
1593 To ensure this, the value on top of stack is exchanged with the
1594 value of the 'num' register upon entering the 'if' branch, and
1595 the tos value is popped into the 'num' upon leaving it. Any
1596 'else if' branches are handled the same way.
1597
1598 Before leaving the 'else' branch, the 'num' is set to 0 explicitly.
1599 */
1600 emit_opcode (fmt, mhop_pushn);
1601 codegen_node (fmt, node->v.cntl.cond);
1602 emit_opcode_typed (fmt, node->v.cntl.cond->datatype,
1603 mhop_brzn, mhop_brzs);
1604 pc[0] = fmt->progcnt;
1605 emit_instr (fmt, (mh_instr_t) NULL);
1606 if (node->v.cntl.iftrue)
1607 {
1608 emit_opcode (fmt, mhop_xchgn);
1609 codegen_nodelist (fmt, node->v.cntl.iftrue);
1610 }
1611 emit_opcode (fmt, mhop_popn);
1612
1613 if (node->v.cntl.iffalse)
1614 {
1615 emit_opcode (fmt, mhop_branch);
1616 pc[1] = fmt->progcnt;
1617 emit_instr (fmt, (mh_instr_t) NULL);
1618
1619 fmt->prog[pc[0]].num = fmt->progcnt - pc[0];
1620 emit_opcode (fmt, mhop_popn);
1621 codegen_nodelist (fmt, node->v.cntl.iffalse);
1622 if (node->v.cntl.iffalse->nodetype != fmtnode_cntl)
1623 {
1624 emit_opcode (fmt, mhop_setn);
1625 emit_instr (fmt, (mh_instr_t) (long) R_REG);
1626 emit_instr (fmt, (mh_instr_t) (long) 0);
1627 }
1628 fmt->prog[pc[1]].num = fmt->progcnt - pc[1];
1629 }
1630 else
1631 fmt->prog[pc[0]].num = fmt->progcnt - pc[0];
1632 }
1633 break;
1634
1635 case fmtnode_typecast:
1636 codegen_node (fmt, node->v.arg);
1637 switch (node->datatype)
1638 {
1639 case mhtype_num:
1640 emit_opcode (fmt, mhop_atoi);
1641 break;
1642
1643 case mhtype_str:
1644 emit_opcode (fmt, mhop_itoa);
1645 break;
1646
1647 default:
1648 abort ();
1649 }
1650 break;
1651
1652 default:
1653 abort ();
1654 }
1655 }
1656
1657 static void
1658 codegen_nodelist (struct mh_format *fmt, struct node *node)
1659 {
1660 while (node)
1661 {
1662 codegen_node (fmt, node);
1663 node = node->next;
1664 }
1665 }
1666
1667 static void
1668 codegen (mh_format_t *fmtptr, int tree)
1669 {
1670 struct mh_format *fmt;
1671
1672 fmt = mu_zalloc (sizeof *fmt);
1673
1674 *fmtptr = fmt;
1675 emit_opcode (fmt, mhop_stop);
1676 codegen_nodelist (fmt, parse_tree);
1677 emit_opcode (fmt, mhop_stop);
1678
1679 if (tree)
1680 {
1681 fmt->tree = parse_tree;
1682 fmt->pool = tokpool;
1683 }
1684 else
1685 {
1686 node_list_free (parse_tree);
1687 mu_opool_destroy (&tokpool);
1688 }
1689 }
1690
1691
1692