1 %{
2 /* GNU Mailutils -- a suite of utilities for electronic mail
3    Copyright (C) 1999-2021 Free Software Foundation, Inc.
4 
5    GNU Mailutils is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GNU Mailutils is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with GNU Mailutils.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include <mh.h>
19 #include <mh_format.h>
20 #include <sys/stat.h>
21 
22 int yyerror (const char *s);
23 int yylex (void);
24 
25 static mu_opool_t tokpool;     /* Temporary token storage */
26 
27 
28 /* Lexical context */
29 enum context
30   {
31     ctx_init,   /* Normal text */
32     ctx_if,     /* After %< or %? */
33     ctx_expr,   /* Expression within cond */
34     ctx_func,   /* after (func */
35   };
36 
37 static enum context *ctx_stack;
38 size_t ctx_tos;
39 size_t ctx_max;
40 
41 static inline void
ctx_push(enum context ctx)42 ctx_push (enum context ctx)
43 {
44   if (ctx_tos == ctx_max)
45     ctx_stack = mu_2nrealloc (ctx_stack, &ctx_max, sizeof (ctx_stack[0]));
46   ctx_stack[ctx_tos++] = ctx;
47 }
48 
49 static inline void
ctx_pop(void)50 ctx_pop (void)
51 {
52   if (ctx_tos == 0)
53     {
54       yyerror ("out of context");
55       abort ();
56     }
57   ctx_tos--;
58 }
59 
60 static inline enum context
ctx_get(void)61 ctx_get (void)
62 {
63   return ctx_stack[ctx_tos-1];
64 }
65 
66 enum node_type
67 {
68   fmtnode_print,
69   fmtnode_literal,
70   fmtnode_number,
71   fmtnode_body,
72   fmtnode_comp,
73   fmtnode_funcall,
74   fmtnode_cntl,
75   fmtnode_typecast,
76 };
77 
78 struct node
79 {
80   enum node_type nodetype;
81   enum mh_type datatype;
82   int printflag;
83   struct node *prev, *next;
84   union
85   {
86     char *str;
87     long num;
88     struct node *arg;
89     struct
90     {
91       int fmtspec;
92       struct node *arg;
93     } prt;
94     struct
95     {
96       mh_builtin_t *builtin;
97       struct node *arg;
98     } funcall;
99     struct
100     {
101       struct node *cond;
102       struct node *iftrue;
103       struct node *iffalse;
104     } cntl;
105   } v;
106 };
107 
108 static struct node *parse_tree;
109 static struct node *new_node (enum node_type nodetype, enum mh_type datatype);
110 
111 static struct node *printelim (struct node *root);
112 static void codegen (mh_format_t *fmt, int tree);
113 static struct node *typecast (struct node *node, enum mh_type type);
114 
115 %}
116 
117 %union {
118   char *str;
119   char const *mesg;
120   long num;
121   struct {
122     struct node *head, *tail;
123   } nodelist;
124   struct node *nodeptr;
125   mh_builtin_t *builtin;
126   int fmtspec;
127   struct {
128     enum mh_type type;
129     union
130     {
131       char *str;
132       long num;
133     } v;
134   } arg;
135 };
136 
137 %token <num> NUMBER "number"
138 %token <str> STRING "string" COMPONENT "component"
139 %token <arg> ARGUMENT "argument"
140 %token <builtin> FUNCTION "function name"
141 %token IF "%<" ELIF "%?" ELSE "%|" FI "%>"
142 %token <fmtspec> FMTSPEC "format specifier"
143 %token BOGUS
144 %token EOFN ")"
145 
146 %type <nodelist> list zlist elif_list
147 %type <nodeptr> item escape component funcall cntl argument
148 %type <nodeptr> cond cond_expr elif_part else_part printable
149 %type <builtin> function
150 %type <fmtspec> fmtspec
151 
152 %error-verbose
153 
154 %%
155 
156 input     : list
157             {
158 	      parse_tree = $1.head;
159 	    }
160           ;
161 
162 list      : item
163             {
164 	      $$.head = $$.tail = $1;
165 	    }
166           | list item
167 	    {
168 	      $2->prev = $1.tail;
169 	      $1.tail->next = $2;
170 	      $1.tail = $2;
171 	      $$ = $1;
172 	    }
173           ;
174 
175 item      : STRING
176             {
177 	      struct node *n = new_node (fmtnode_literal, mhtype_str);
178 	      n->v.str = $1;
179 	      $$ = new_node (fmtnode_print, mhtype_str);
180 	      $$->v.prt.arg = n;
181 	    }
182           | escape
183           ;
184 
185 escape    : cntl
186           | fmtspec printable
187             {
188 	      if ($2->printflag & MHA_NOPRINT)
189 		$$ = $2;
190 	      else
191 		{
192 		  $$ = new_node (fmtnode_print, $2->datatype);
193 		  $$->v.prt.fmtspec = ($2->printflag & MHA_IGNOREFMT) ? 0 : $1;
194 		  $$->v.prt.arg = $2;
195 		}
196 	    }
197           ;
198 
199 printable : component
200           | funcall
201           ;
202 
203 component : COMPONENT
204             {
205 	      if (mu_c_strcasecmp ($1, "body") == 0)
206 		$$ = new_node (fmtnode_body, mhtype_str);
207 	      else
208 		{
209 		  $$ = new_node (fmtnode_comp, mhtype_str);
210 		  $$->v.str = $1;
211 		}
212 	    }
213           ;
214 
215 funcall   : function argument EOFN
216             {
217 	      struct node *arg;
218 
219 	      ctx_pop ();
220 
221 	      arg = $2;
222 	      if ($1->argtype == mhtype_none)
223 		{
224 		  if (arg)
225 		    {
226 		      yyerror ("function doesn't take arguments");
227 		      YYABORT;
228 		    }
229 		}
230 	      else if (arg == NULL)
231 		{
232 		  if ($1->flags & MHA_OPTARG_NIL)
233 		    {
234 		      switch ($1->argtype)
235 			{
236 			case mhtype_str:
237 			  arg = new_node (fmtnode_literal, mhtype_str);
238 			  arg->v.str = "";
239 			  break;
240 
241 			case mhtype_num:
242 			  arg = new_node (fmtnode_number, mhtype_num);
243 			  arg->v.num = 0;
244 			  break;
245 
246 			default:
247 			  abort ();
248 			}
249 		    }
250 		  else if ($1->flags & MHA_OPTARG)
251 		    {
252 		      /* ok - ignore */;
253 		    }
254 		  else
255 		    {
256 		      yyerror ("required argument missing");
257 		      YYABORT;
258 		    }
259 		}
260 	      else if ($1->flags & MHA_LITERAL)
261 		{
262 		  switch ($1->argtype)
263 		    {
264 		    case mhtype_num:
265 		      if (arg->nodetype == fmtnode_number)
266 			/* ok */;
267 		      else
268 			{
269 			  yyerror ("argument must be a number");
270 			  YYABORT;
271 			}
272 		      break;
273 
274 		    case mhtype_str:
275 		      if (arg->nodetype == fmtnode_literal)
276 			/* ok */;
277 		      else if (arg->nodetype == fmtnode_number)
278 			{
279 			  char *s;
280 			  mu_asprintf (&s, "%ld", arg->v.num);
281 			  arg->nodetype = fmtnode_literal;
282 			  arg->datatype = mhtype_str;
283 			  arg->v.str = s;
284 			}
285 		      else
286 			{
287 			  yyerror ("argument must be literal");
288 			  YYABORT;
289 			}
290 		      break;
291 
292 		    default:
293 		      break;
294 		    }
295 		}
296 
297 	      if ($1->flags & MHA_VOID)
298 		{
299 		  $2->printflag = MHA_NOPRINT;
300 		  $$ = $2;
301 		}
302 	      else
303 		{
304 		  $$ = new_node (fmtnode_funcall, $1->type);
305 		  $$->v.funcall.builtin = $1;
306 		  $$->v.funcall.arg = typecast (arg, $1->argtype);
307 		  $$->printflag = $1->flags & MHA_PRINT_MASK;
308 		  if ($1->type == mhtype_none)
309 		    $$->printflag = MHA_NOPRINT;
310 		}
311 	    }
312           ;
313 
314 fmtspec   : /* empty */
315             {
316 	      $$ = 0;
317 	    }
318           | FMTSPEC
319           ;
320 
321 function  : FUNCTION
322             {
323 	      ctx_push (ctx_func);
324 	    }
325           ;
326 
327 argument  : /* empty */
328             {
329 	      $$ = NULL;
330 	    }
331           | ARGUMENT
332             {
333 	      switch ($1.type)
334 		{
335 		case mhtype_none:
336 		  $$ = NULL;
337 		  break;
338 
339 		case mhtype_str:
340 		  $$ = new_node (fmtnode_literal, mhtype_str);
341 		  $$->v.str = $1.v.str;
342 		  break;
343 
344 		case mhtype_num:
345 		  $$ = new_node (fmtnode_number, mhtype_num);
346 		  $$->v.num = $1.v.num;
347 		}
348 	    }
349 	  | escape
350 	    {
351 	      $$ = printelim ($1);
352 	    }
353           ;
354 
355 /*           1   2    3       4     5    */
356 cntl      : if cond zlist elif_part fi
357             {
358 	      $$ = new_node(fmtnode_cntl, mhtype_num);
359 	      $$->v.cntl.cond = $2;
360 	      $$->v.cntl.iftrue = $3.head;
361 	      $$->v.cntl.iffalse = $4;
362 	    }
363           ;
364 
365 zlist     : /* empty */
366             {
367 	      $$.head = $$.tail = NULL;
368 	    }
369           | list
370           ;
371 
372 if        : IF
373             {
374 	      ctx_push (ctx_if);
375 	    }
376           ;
377 
378 fi        : FI
379             {
380 	      ctx_pop ();
381 	    }
382           ;
383 
384 elif      : ELIF
385             {
386 	      ctx_pop ();
387 	      ctx_push (ctx_if);
388 	    }
389           ;
390 
391 cond      : cond_expr
392             {
393 	      ctx_pop ();
394 	      ctx_push (ctx_expr);
395 	      $$ = printelim ($1);
396 	    }
397           ;
398 
399 cond_expr : component
400           | funcall
401           ;
402 
403 elif_part : /* empty */
404             {
405 	      $$ = NULL;
406 	    }
407           | else_part
408           | elif_list
409 	    {
410 	      $$ = $1.head;
411 	    }
412           ;
413 
414 elif_list : elif cond zlist
415             {
416 	      struct node *np = new_node (fmtnode_cntl, mhtype_num);
417 	      np->v.cntl.cond = $2;
418 	      np->v.cntl.iftrue = $3.head;
419 	      np->v.cntl.iffalse = NULL;
420 	      $$.head = $$.tail = np;
421 	    }
422           | elif_list elif cond zlist
423 	    {
424 	      struct node *np = new_node(fmtnode_cntl, mhtype_num);
425 	      np->v.cntl.cond = $3;
426 	      np->v.cntl.iftrue = $4.head;
427 	      np->v.cntl.iffalse = NULL;
428 
429 	      $1.tail->v.cntl.iffalse = np;
430 	      $1.tail = np;
431 
432 	      $$ = $1;
433 	    }
434           | elif_list else_part
435 	    {
436 	      $1.tail->v.cntl.iffalse = $2;
437 	      $1.tail = $2;
438 	      $$ = $1;
439 	    }
440           ;
441 
442 else_part : ELSE zlist
443 	    {
444 	      $$ = $2.head;
445 	    }
446 	  ;
447 
448 %%
449 
450 static char *start;
451 static char *tok_start;
452 static char *curp;
453 static mu_linetrack_t trk;
454 static struct mu_locus_range yylloc;
455 
456 static inline size_t
457 token_leng (void)
458 {
459   return curp - tok_start;
460 }
461 
462 static inline void
463 mark (void)
464 {
465   if (curp > tok_start)
466     mu_linetrack_advance (trk, &yylloc, tok_start, token_leng ());
467   tok_start = curp;
468 }
469 
470 static inline int
471 input (void)
472 {
473   if (*curp == 0)
474     return 0;
475   return *curp++;
476 }
477 
478 static inline void
479 eatinput (size_t n)
480 {
481   mark ();
482   while (n--)
483     input ();
484   mark ();
485 }
486 
487 static inline int
488 peek (void)
489 {
490   return *curp;
491 }
492 
493 static inline int
494 unput (int c)
495 {
496   if (curp == start)
497     {
498       mu_error (_("%s:%d: INTERNAL ERROR: out of unput space: please report"),
499 		__FILE__, __LINE__);
500       abort ();
501     }
502   return *--curp = c;
503 }
504 
505 static int
506 skip (int class)
507 {
508   curp = mu_str_skip_class (curp, class);
509   return *curp;
510 }
511 
512 static int
513 skipeol (void)
514 {
515   int c;
516 
517   do
518     {
519       c = input ();
520       if (c == '\\' && (c = input ()) == '\n')
521 	c = input ();
522     }
523   while (c && c != '\n');
524   return *curp;
525 }
526 
527 
528 static inline int
529 bogus (const char *mesg)
530 {
531   yylval.mesg = mesg;
532   return BOGUS;
533 }
534 
535 static char *
536 find_bol (unsigned line)
537 {
538   char *p = start;
539 
540   while (--line)
541     {
542       while (*p != '\n')
543 	{
544 	  if (*p == 0)
545 	    return p;
546 	  p++;
547 	}
548       p++;
549     }
550   return p;
551 }
552 
553 int
554 yyerror (const char *s)
555 {
556   if (yychar != BOGUS)
557     {
558       char *bol;
559       size_t len;
560       static char tab[] = "        ";
561       size_t b = 0, e = 0;
562       size_t i;
563 
564       bol = find_bol (yylloc.beg.mu_line);
565       len = strcspn (bol, "\n");
566 
567       mu_diag_at_locus_range (MU_DIAG_ERROR, &yylloc, "%s", s);
568       for (i = 0; i < len; i++)
569 	/* How ... tribal! */
570 	{
571 	  if (bol[i] == '\t')
572 	    {
573 	      mu_stream_write (mu_strerr, tab, strlen (tab), NULL);
574 	      if (yylloc.beg.mu_col > i)
575 		b += strlen (tab) - 1;
576 	      if (yylloc.end.mu_col > i)
577 		e += strlen (tab) - 1;
578 	    }
579 	  else
580 	    mu_stream_write (mu_strerr, bol + i, 1, NULL);
581 	}
582       mu_stream_write (mu_strerr, "\n", 1, NULL);
583       if (mu_locus_point_eq (&yylloc.beg, &yylloc.end))
584 	mu_error ("%*.*s^",
585 		  (int) (b + yylloc.beg.mu_col - 1),
586 		  (int) (b + yylloc.beg.mu_col - 1), "");
587       else
588 	mu_error ("%*.*s^%*.*s^",
589 		  (int)(b + yylloc.beg.mu_col - 1),
590 		  (int)(b + yylloc.beg.mu_col - 1), "",
591 		  (int)(e + yylloc.end.mu_col - yylloc.beg.mu_col - b - 1),
592 		  (int)(e + yylloc.end.mu_col - yylloc.beg.mu_col - b - 1),
593 		  "");
594     }
595   return 0;
596 }
597 
598 static int backslash(int c);
599 
600 struct lexer_tab
601 {
602   char *ctx_name;
603   int (*lexer) (void);
604 };
605 
606 static int yylex_initial (void);
607 static int yylex_cond (void);
608 static int yylex_expr (void);
609 static int yylex_func (void);
610 
611 static struct lexer_tab lexer_tab[] = {
612   [ctx_init] = { "initial",    yylex_initial },
613   [ctx_if]   = { "condition",  yylex_cond },
614   [ctx_expr] = { "expression", yylex_expr },
615   [ctx_func] = { "function",   yylex_func }
616 };
617 
618 int
619 yylex (void)
620 {
621   int tok;
622 
623   do
624     {
625       mark ();
626       if (yydebug)
627 	fprintf (stderr, "lex: [%s] at %-10.10s...]\n",
628 		 lexer_tab[ctx_get ()].ctx_name, curp);
629       tok = lexer_tab[ctx_get ()].lexer ();
630     }
631   while (tok == STRING && yylval.str[0] == 0);
632 
633   mark ();
634   if (tok == BOGUS)
635     yyerror (yylval.mesg);
636   return tok;
637 }
638 
639 static int
640 token_fmtspec (int flags)
641 {
642   int num = 0;
643 
644   if (peek () == '0')
645     {
646       flags |= MH_FMT_ZEROPAD;
647       input ();
648     }
649   else if (!mu_isdigit (peek ()))
650     {
651       return bogus ("expected digit");
652     }
653   mark ();
654   while (*curp && mu_isdigit (peek ()))
655     num = num * 10 + input () - '0';
656   yylval.fmtspec = flags | num;
657   unput ('%');
658   return FMTSPEC;
659 }
660 
661 static int
662 token_function (void)
663 {
664   eatinput (1);
665   skip (MU_CTYPE_IDENT);
666   if (token_leng () == 0 || !strchr (" \t(){%", peek ()))
667     {
668       return bogus ("expected function name");
669     }
670 
671   yylval.builtin = mh_lookup_builtin (tok_start, token_leng ());
672 
673   if (!yylval.builtin)
674     {
675       return bogus ("unknown function");
676     }
677   if (!yylval.builtin->fun
678       && !(yylval.builtin->flags & (MHA_SPECIAL|MHA_VOID)))
679     {
680       mu_error ("INTERNAL ERROR at %s:%d: \"%s\" has no associated function"
681 		" and is not marked as MHA_SPECIAL",
682 		__FILE__, __LINE__, yylval.builtin->name);
683       abort ();
684     }
685 
686   return FUNCTION;
687 }
688 
689 static int
690 token_component (void)
691 {
692   eatinput (1);
693   if (!mu_isalpha (peek ()))
694     {
695       return bogus ("component name expected");
696     }
697   mark ();
698   if (skip (MU_CTYPE_HEADR) != '}')
699     {
700       return bogus ("component name expected");
701     }
702   mu_opool_append (tokpool, tok_start, token_leng ());
703   mu_opool_append_char (tokpool, 0);
704   yylval.str = mu_opool_finish (tokpool, NULL);
705   eatinput (1);
706   return COMPONENT;
707 }
708 
709 int
710 yylex_initial (void)
711 {
712   int c;
713 
714  again:
715   mark ();
716   if (peek () == '%')
717     {
718       input ();
719 
720       switch (c = input ())
721 	{
722 	case ';':
723 	  skipeol ();
724 	  goto again;
725 	case '<':
726 	  return IF;
727 	case '%':
728 	  unput (c);
729 	  unput (c);
730 	  break;
731 	case '(':
732 	  unput (c);
733 	  return token_function ();
734 	case '{':
735 	  unput (c);
736 	  return token_component ();
737 	case '-':
738 	  return token_fmtspec (MH_FMT_RALIGN);
739 	case '0': case '1': case '2': case '3': case '4':
740 	case '5': case '6': case '7': case '8': case '9':
741 	  unput (c);
742 	  return token_fmtspec (MH_FMT_DEFAULT);
743 	default:
744 	  return bogus ("component or function name expected");
745       }
746     }
747 
748   c = peek ();
749 
750   if (c == 0)
751     return 0;
752 
753   while ((c = input ()) != 0)
754     {
755       if (c == '%')
756 	{
757 	  if (peek () == '%')
758 	    mu_opool_append_char (tokpool, input ());
759 	  else
760 	    {
761 	      unput (c);
762 	      break;
763 	    }
764 	}
765       else if (c == '\\')
766 	{
767 	  if ((c = input ()) == 0)
768 	    {
769 	      return bogus ("unexpected end of file");
770 	    }
771 	  if (c != '\n')
772 	    mu_opool_append_char (tokpool, backslash (c));
773 	}
774       else
775 	mu_opool_append_char (tokpool, c);
776     }
777 
778   mu_opool_append_char (tokpool, 0);
779   yylval.str = mu_opool_finish (tokpool, NULL);
780   return STRING;
781 }
782 
783 int
784 yylex_cond (void)
785 {
786   while (1)
787     {
788       switch (peek ())
789 	{
790 	case '(':
791 	  return token_function ();
792 	case '{':
793 	  return token_component ();
794 	case '\\':
795 	  input ();
796 	  if (input () == '\n')
797 	    continue;
798 	default:
799 	  return bogus ("'(' or '{' expected");
800 	}
801     }
802 }
803 
804 int
805 yylex_expr (void)
806 {
807   int c;
808 
809   if ((c = input ()) == '%')
810     {
811       switch (c = input ())
812 	{
813 	case '?':
814 	  return ELIF;
815 	case '|':
816 	  return ELSE;
817 	case '>':
818 	  return FI;
819 	}
820       unput (c);
821       unput ('%');
822     }
823   else
824     unput (c);
825   return yylex_initial ();
826 }
827 
828 int
829 yylex_func (void)
830 {
831   int c;
832 
833   /* Expected argument or closing parenthesis */
834  again:
835   mark ();
836   switch (peek ())
837     {
838     case '(':
839       return token_function ();
840 
841     case ')':
842       eatinput (1);
843       return EOFN;
844 
845     case '{':
846       return token_component ();
847 
848     case '%':
849       input ();
850       switch (peek ())
851 	{
852 	case '<':
853 	  input ();
854 	  return IF;
855 
856 	case '%':
857 	  break;
858 
859 	default:
860 	  return bogus ("expected '%' or '<'");
861 	}
862       break;
863 
864     case ' ':
865     case '\t':
866       skip (MU_CTYPE_SPACE);
867       if (peek () == '%')
868 	goto again;
869       break;
870 
871     default:
872       return input ();
873     }
874 
875   mark ();
876 
877   while ((c = input ()) != ')')
878     {
879       if (c == 0)
880 	{
881 	  return bogus ("expected ')'");
882 	}
883 
884       if (c == '\\')
885 	{
886 	  if ((c = input ()) == 0)
887 	    {
888 	      return bogus ("unexpected end of file");
889 	    }
890 	  mu_opool_append_char (tokpool, backslash (c));
891 	}
892       else
893 	mu_opool_append_char (tokpool, c);
894     }
895   mu_opool_append_char (tokpool, 0);
896 
897   yylval.arg.v.str = mu_opool_finish (tokpool, NULL);
898   yylval.arg.type = mhtype_str;
899   unput (c);
900 
901   if (mu_isdigit (yylval.arg.v.str[0])
902       || (yylval.arg.v.str[0] == '-' && mu_isdigit (yylval.arg.v.str[1])))
903     {
904       long n;
905       char *p;
906       errno = 0;
907       n = strtol (yylval.arg.v.str, &p, 0);
908       if (errno == 0 && *p == 0)
909 	{
910 	  yylval.arg.type = mhtype_num;
911 	  yylval.arg.v.num = n;
912 	}
913     }
914 
915   if (peek () != ')')
916     {
917       return bogus ("expected ')'");
918     }
919 
920   return ARGUMENT;
921 }
922 
923 static int
924 format_parse (mh_format_t *fmtptr, char *format_str,
925 	      struct mu_locus_point const *locus,
926 	      int flags)
927 {
928   int rc;
929   char *p = getenv ("MHFORMAT_DEBUG");
930 
931   if (p || (flags & MH_FMT_PARSE_DEBUG))
932     yydebug = 1;
933   start = tok_start = curp = format_str;
934   mu_opool_create (&tokpool, MU_OPOOL_ENOMEMABRT);
935 
936   ctx_tos = ctx_max = 0;
937   ctx_stack = NULL;
938   ctx_push (ctx_init);
939   mu_linetrack_create (&trk, "input", 2);
940   if (locus && locus->mu_file)
941     mu_linetrack_rebase (trk, locus);
942   mu_locus_range_init (&yylloc);
943 
944   rc = yyparse ();
945   if (rc == 0)
946     codegen (fmtptr, flags & MH_FMT_PARSE_TREE);
947   else
948     mu_opool_destroy (&tokpool);
949 
950   mu_locus_range_deinit (&yylloc);
951   mu_linetrack_destroy (&trk);
952   free (ctx_stack);
953 
954   parse_tree = NULL;
955   tokpool = NULL;
956   return rc;
957 }
958 
959 int
960 mh_format_string_parse (mh_format_t *retfmt, char const *format_str,
961 			struct mu_locus_point const *locus,
962 			int flags)
963 {
964   char *fmts = mu_strdup (format_str);
965   int rc = format_parse (retfmt, fmts, locus, flags);
966   free (fmts);
967   return rc;
968 }
969 
970 int
971 mh_read_formfile (char const *name, char **pformat)
972 {
973   FILE *fp;
974   struct stat st;
975   char *format_str;
976   char *file_name;
977   int rc;
978 
979   rc = mh_find_file (name, &file_name);
980   if (rc)
981     {
982       mu_error (_("cannot access format file %s: %s"), name, strerror (rc));
983       return -1;
984     }
985 
986   if (stat (file_name, &st))
987     {
988       mu_error (_("cannot stat format file %s: %s"), file_name,
989 		strerror (errno));
990       free (file_name);
991       return -1;
992     }
993 
994   fp = fopen (file_name, "r");
995   if (!fp)
996     {
997       mu_error (_("cannot open format file %s: %s"), file_name,
998 		strerror (errno));
999       free (file_name);
1000       return -1;
1001     }
1002 
1003   format_str = mu_alloc (st.st_size + 1);
1004   if (fread (format_str, st.st_size, 1, fp) != 1)
1005     {
1006       mu_error (_("error reading format file %s: %s"), file_name,
1007 		strerror (errno));
1008       free (file_name);
1009       return -1;
1010     }
1011   free (file_name);
1012 
1013   format_str[st.st_size] = 0;
1014   if (format_str[st.st_size-1] == '\n')
1015     format_str[st.st_size-1] = 0;
1016   fclose (fp);
1017   *pformat = format_str;
1018   return 0;
1019 }
1020 
1021 int
1022 mh_format_file_parse (mh_format_t *retfmt, char const *formfile, int flags)
1023 {
1024   char *fmts;
1025   int rc;
1026 
1027   rc = mh_read_formfile (formfile, &fmts);
1028   if (rc == 0)
1029     {
1030       struct mu_locus_point loc;
1031       loc.mu_file = formfile;
1032       loc.mu_line = 1;
1033       loc.mu_col = 0;
1034       rc = format_parse (retfmt, fmts, &loc, flags);
1035       free (fmts);
1036     }
1037   return rc;
1038 }
1039 
1040 int
1041 backslash (int c)
1042 {
1043   static char transtab[] = "b\bf\fn\nr\rt\t";
1044   char *p;
1045 
1046   for (p = transtab; *p; p += 2)
1047     {
1048       if (*p == c)
1049 	return p[1];
1050     }
1051   return c;
1052 }
1053 
1054 static struct node *
1055 new_node (enum node_type nodetype, enum mh_type datatype)
1056 {
1057   struct node *np = mu_zalloc (sizeof *np);
1058   np->nodetype = nodetype;
1059   np->datatype = datatype;
1060   return np;
1061 }
1062 
1063 static void node_list_free (struct node *node);
1064 
1065 static void
1066 node_free (struct node *node)
1067 {
1068   if (!node)
1069     return;
1070   switch (node->nodetype)
1071     {
1072     case fmtnode_print:
1073       node_free (node->v.prt.arg);
1074       break;
1075 
1076     case fmtnode_literal:
1077       break;
1078 
1079     case fmtnode_number:
1080       break;
1081 
1082     case fmtnode_body:
1083       break;
1084 
1085     case fmtnode_comp:
1086       break;
1087 
1088     case fmtnode_funcall:
1089       node_free (node->v.funcall.arg);
1090       break;
1091 
1092     case fmtnode_cntl:
1093       node_list_free (node->v.cntl.cond);
1094       node_list_free (node->v.cntl.iftrue);
1095       node_list_free (node->v.cntl.iffalse);
1096       break;
1097 
1098     default:
1099       abort ();
1100     }
1101   free (node);
1102 }
1103 
1104 static void
1105 node_list_free (struct node *node)
1106 {
1107   while (node)
1108     {
1109       struct node *next = node->next;
1110       node_free (node);
1111       node = next;
1112     }
1113 }
1114 
1115 static struct node *
1116 typecast (struct node *node, enum mh_type type)
1117 {
1118   if (!node)
1119     /* FIXME: when passing optional argument, the caller must know the
1120        type of value returned by the previous expression */
1121     return node;
1122 
1123   if (node->datatype == type)
1124     return node;
1125   switch (node->nodetype)
1126     {
1127     case fmtnode_cntl:
1128       node->v.cntl.iftrue = typecast (node->v.cntl.iftrue, type);
1129       node->v.cntl.iffalse = typecast (node->v.cntl.iffalse, type);
1130       node->datatype = type;
1131       break;
1132 
1133     default:
1134       {
1135 	struct node *arg = new_node (fmtnode_typecast, type);
1136 	arg->v.arg = node;
1137 	node = arg;
1138       }
1139     }
1140   return node;
1141 }
1142 
1143 #define INLINE -1
1144 
1145 static inline void
1146 indent (int level)
1147 {
1148   printf ("%*.*s", 2*level, 2*level, "");
1149 }
1150 
1151 static inline void
1152 delim (int level, char const *dstr)
1153 {
1154   if (level == INLINE)
1155     printf ("%s", dstr);
1156   else
1157     {
1158       printf ("\n");
1159       indent (level);
1160     }
1161 }
1162 
1163 static void dump_statement (struct node *node, int level);
1164 
1165 void
1166 mh_print_fmtspec (int fmtspec)
1167 {
1168   if (!(fmtspec & (MH_FMT_RALIGN|MH_FMT_ZEROPAD|MH_FMT_COMPWS)))
1169     printf ("NONE");
1170   else
1171     {
1172       if (!(fmtspec & MH_FMT_RALIGN))
1173 	printf ("NO");
1174       printf ("RALIGN|");
1175       if (!(fmtspec & MH_FMT_ZEROPAD))
1176 	printf ("NO");
1177       printf ("ZEROPAD|");
1178       if (!(fmtspec & MH_FMT_COMPWS))
1179 	printf ("NO");
1180       printf ("COMPWS");
1181     }
1182 }
1183 
1184 static char *typename[] = { "NONE", "NUM", "STR" };
1185 
1186 static void
1187 dump_node_pretty (struct node *node, int level)
1188 {
1189   if (!node)
1190     return;
1191   switch (node->nodetype)
1192     {
1193     case fmtnode_print:
1194       if (node->v.prt.fmtspec)
1195 	{
1196 	  printf ("FORMAT(");
1197 	  mh_print_fmtspec (node->v.prt.fmtspec);
1198 	  printf(", %d, ", node->v.prt.fmtspec & MH_WIDTH_MASK);
1199 	}
1200       else
1201 	printf ("PRINT(");
1202       dump_statement (node->v.prt.arg, INLINE);
1203       printf (")");
1204       break;
1205 
1206     case fmtnode_literal:
1207       {
1208 	char const *p = node->v.str;
1209 	putchar ('"');
1210 	while (*p)
1211 	  {
1212 	    if (*p == '\\' || *p == '"')
1213 	      {
1214 		putchar ('\\');
1215 		putchar (*p);
1216 	      }
1217 	    else if (*p == '\n')
1218 	      {
1219 		putchar ('\\');
1220 		putchar ('n');
1221 	      }
1222 	    else
1223 	      putchar (*p);
1224 	    p++;
1225 	  }
1226 	putchar ('"');
1227       }
1228       break;
1229 
1230     case fmtnode_number:
1231       printf ("%ld", node->v.num);
1232       break;
1233 
1234     case fmtnode_body:
1235       printf ("BODY");
1236       break;
1237 
1238     case fmtnode_comp:
1239       printf ("COMPONENT.%s", node->v.str);
1240       break;
1241 
1242     case fmtnode_funcall:
1243       printf ("%s(", node->v.funcall.builtin->name);
1244       dump_statement (node->v.funcall.arg, INLINE);
1245       printf (")");
1246       break;
1247 
1248     case fmtnode_cntl:
1249       printf ("IF (");
1250       dump_node_pretty (node->v.cntl.cond, INLINE);
1251       printf (") THEN");
1252 
1253       if (level != INLINE)
1254 	level++;
1255 
1256       delim (level, "; ");
1257 
1258       dump_statement (node->v.cntl.iftrue, level);
1259 
1260       if (node->v.cntl.iffalse)
1261 	{
1262 	  delim (level == INLINE ? level : level - 1, "; ");
1263 	  printf ("ELSE");
1264 	  delim (level, " ");
1265 	  dump_statement (node->v.cntl.iffalse, level);
1266 	}
1267 
1268       if (level != INLINE)
1269 	level--;
1270       delim (level, "; ");
1271       printf ("FI");
1272       break;
1273 
1274     case fmtnode_typecast:
1275       printf ("%s(", typename[node->datatype]);
1276       dump_node_pretty (node->v.arg, INLINE);
1277       printf (")");
1278       break;
1279 
1280     default:
1281       abort ();
1282     }
1283 }
1284 
1285 static void
1286 dump_statement (struct node *node, int level)
1287 {
1288   while (node)
1289     {
1290       dump_node_pretty (node, level);
1291       node = node->next;
1292       if (node)
1293 	delim (level, "; ");
1294     }
1295 }
1296 
1297 void
1298 mh_format_dump_code (mh_format_t fmt)
1299 {
1300   dump_statement (fmt->tree, 0);
1301   printf ("\n");
1302 }
1303 
1304 void
1305 mh_format_free_tree (mh_format_t fmt)
1306 {
1307   if (fmt)
1308     {
1309       node_list_free (fmt->tree);
1310       fmt->tree = NULL;
1311       mu_opool_destroy (&fmt->pool);
1312     }
1313 }
1314 
1315 void
1316 mh_format_free (mh_format_t fmt)
1317 {
1318   if (!fmt)
1319     return;
1320 
1321   mh_format_free_tree (fmt);
1322 
1323   if (fmt->prog)
1324     free (fmt->prog);
1325   fmt->progmax = fmt->progcnt = 0;
1326   fmt->prog = NULL;
1327 }
1328 
1329 void
1330 mh_format_destroy (mh_format_t *fmt)
1331 {
1332   if (fmt)
1333     {
1334       mh_format_free (*fmt);
1335       *fmt = NULL;
1336     }
1337 }
1338 
1339 static struct node *
1340 printelim (struct node *node)
1341 {
1342   if (node->nodetype == fmtnode_print)
1343     {
1344       struct node *arg = node->v.prt.arg;
1345       arg->next = node->next;
1346       free (node);
1347       node = arg;
1348     }
1349   return node;
1350 }
1351 
1352 #define PROG_MIN_ALLOC 8
1353 
1354 static inline void
1355 ensure_space (struct mh_format *fmt, size_t n)
1356 {
1357   while (fmt->progcnt + n >= fmt->progmax)
1358     {
1359       if (fmt->progmax == 0)
1360 	fmt->progmax = n < PROG_MIN_ALLOC ? PROG_MIN_ALLOC : n;
1361       fmt->prog = mu_2nrealloc (fmt->prog, &fmt->progmax, sizeof fmt->prog[0]);
1362     }
1363 }
1364 
1365 static void
1366 emit_instr (struct mh_format *fmt, mh_instr_t instr)
1367 {
1368   ensure_space (fmt, 1);
1369   fmt->prog[fmt->progcnt++] = instr;
1370 }
1371 
1372 static inline void
1373 emit_opcode (struct mh_format *fmt, mh_opcode_t op)
1374 {
1375   emit_instr (fmt, (mh_instr_t) op);
1376 }
1377 
1378 static void
1379 emit_string (struct mh_format *fmt, char const *str)
1380 {
1381   size_t length = strlen (str) + 1;
1382   size_t count = (length + sizeof (mh_instr_t)) / sizeof (mh_instr_t) + 1;
1383 
1384   ensure_space (fmt, count);
1385   emit_instr (fmt, (mh_instr_t) count);
1386   memcpy (MHI_STR (fmt->prog[fmt->progcnt]), str, length);
1387   fmt->progcnt += count;
1388 }
1389 
1390 static void codegen_node (struct mh_format *fmt, struct node *node);
1391 static void codegen_nodelist (struct mh_format *fmt, struct node *node);
1392 
1393 static void
1394 emit_opcode_typed (struct mh_format *fmt, enum mh_type type,
1395 		   enum mh_opcode opnum, enum mh_opcode opstr)
1396 {
1397   switch (type)
1398     {
1399     case mhtype_num:
1400       emit_opcode (fmt, opnum);
1401       break;
1402 
1403     case mhtype_str:
1404       emit_opcode (fmt, opstr);
1405       break;
1406 
1407     default:
1408       abort ();
1409     }
1410 }
1411 
1412 static void
1413 emit_special (struct mh_format *fmt, mh_builtin_t *builtin, struct node *arg)
1414 {
1415   if (arg)
1416     {
1417       if (builtin->flags & MHA_LITERAL)
1418 	{
1419 	  switch (arg->nodetype)
1420 	    {
1421 	    case fmtnode_literal:
1422 	      emit_opcode (fmt, mhop_sets);
1423 	      emit_instr (fmt, (mh_instr_t) (long) R_REG);
1424 	      emit_string (fmt, arg->v.str);
1425 	      break;
1426 
1427 	    case fmtnode_number:
1428 	      emit_opcode (fmt, mhop_setn);
1429 	      emit_instr (fmt, (mh_instr_t) (long) R_REG);
1430 	      emit_instr (fmt, (mh_instr_t) (long) arg->v.num);
1431 	      break;
1432 
1433 	    default:
1434 	      abort ();
1435 	    }
1436 	}
1437       else
1438 	codegen_node (fmt, arg);
1439     }
1440 }
1441 
1442 static void
1443 emit_funcall (struct mh_format *fmt, mh_builtin_t *builtin, struct node *arg)
1444 {
1445   if (builtin->flags & MHA_ACC)
1446     {
1447       emit_opcode (fmt, mhop_movs);
1448       emit_instr (fmt, (mh_instr_t) (long) R_ACC);
1449       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1450     }
1451 
1452   if (builtin->flags & MHA_SPECIAL)
1453     {
1454       emit_special (fmt, builtin, arg);
1455       return;
1456     }
1457 
1458   if (arg)
1459     {
1460       if (builtin->flags & MHA_LITERAL)
1461 	{
1462 	  switch (arg->nodetype)
1463 	    {
1464 	    case fmtnode_literal:
1465 	      emit_opcode (fmt, mhop_sets);
1466 	      emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1467 	      emit_string (fmt, arg->v.str);
1468 	      break;
1469 
1470 	    case fmtnode_number:
1471 	      emit_opcode (fmt, mhop_setn);
1472 	      emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1473 	      emit_instr (fmt, (mh_instr_t) (long) arg->v.num);
1474 	      break;
1475 
1476 	    default:
1477 	      abort ();
1478 	    }
1479 	}
1480       else
1481 	{
1482 	  codegen_node (fmt, arg);
1483 	  emit_opcode_typed (fmt, arg->datatype, mhop_movn, mhop_movs);
1484 	  emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1485 	  emit_instr (fmt, (mh_instr_t) (long) R_REG);
1486 	}
1487     }
1488   else if (builtin->argtype != mhtype_none)
1489     {
1490       emit_opcode_typed (fmt, builtin->argtype, mhop_movn, mhop_movs);
1491       emit_instr (fmt, (mh_instr_t) (long) R_ARG);
1492       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1493     }
1494 
1495   emit_opcode (fmt, mhop_call);
1496   emit_instr (fmt, (mh_instr_t) builtin->fun);
1497 }
1498 
1499 static void
1500 codegen_node (struct mh_format *fmt, struct node *node)
1501 {
1502   if (!node)
1503     return;
1504   switch (node->nodetype)
1505     {
1506     case fmtnode_print:
1507       if (node->v.prt.arg->nodetype == fmtnode_literal)
1508 	{
1509 	  emit_opcode (fmt, mhop_printlit);
1510 	  emit_string (fmt, node->v.prt.arg->v.str);
1511 	}
1512       else if (node->v.prt.arg->nodetype == fmtnode_number)
1513 	{
1514 	  char *s;
1515 	  emit_opcode (fmt, mhop_printlit);
1516 	  mu_asprintf (&s, "%ld", node->v.prt.arg->v.num);
1517 	  emit_string (fmt, s);
1518 	  free (s);
1519 	}
1520       else
1521 	{
1522 	  codegen_node (fmt, node->v.prt.arg);
1523 	  if (node->v.prt.fmtspec)
1524 	    {
1525 	      emit_opcode (fmt, mhop_fmtspec);
1526 	      emit_instr (fmt, (mh_instr_t) (long) node->v.prt.fmtspec);
1527 	    }
1528 
1529 	  if (node->v.prt.arg->datatype != mhtype_none)
1530 	    emit_opcode_typed (fmt, node->v.prt.arg->datatype,
1531 			       mhop_printn, mhop_prints);
1532 	}
1533       break;
1534 
1535     case fmtnode_literal:
1536       emit_opcode (fmt, mhop_sets);
1537       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1538       emit_string (fmt, node->v.str);
1539       break;
1540 
1541     case fmtnode_number:
1542       emit_opcode (fmt, mhop_setn);
1543       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1544       emit_instr (fmt, (mh_instr_t) (long) node->v.num);
1545       break;
1546 
1547     case fmtnode_body:
1548       emit_opcode (fmt, mhop_ldbody);
1549       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1550       break;
1551 
1552     case fmtnode_comp:
1553       emit_opcode (fmt, mhop_ldcomp);
1554       emit_instr (fmt, (mh_instr_t) (long) R_REG);
1555       emit_string (fmt, node->v.str);
1556       break;
1557 
1558     case fmtnode_funcall:
1559       emit_funcall (fmt, node->v.funcall.builtin, node->v.funcall.arg);
1560       break;
1561 
1562     case fmtnode_cntl:
1563       {
1564 	long pc[2];
1565 
1566 	/* Implementation of control escapes is a bit tricky. According to
1567 	   the spec:
1568 
1569 	     "[f]unction escapes write their return value in 'num' for
1570 	      functions returning integer or boolean values"
1571 
1572 	   That means that after "%<(gt 1024)" the value of 'num' would be
1573 	   1 or 0, depending on its value prior to entering the conditional.
1574 	   However this would defeat the purpose of the conditional itself,
1575 	   because then the following construct would be meaningless:
1576 
1577 	       %<(gt 1024)...%?(gt 512)...%|...%>
1578 
1579 	   Indeed, in MH implementation the value of 'num' propagates into
1580 	   the conditional expression, because any function escape serving
1581 	   as condition is evaluated in a separate context.
1582 
1583 	   To ensure this behavior, the virtual machine of GNU MH holds the
1584 	   value of the 'num' register on stack while evaluating the condition
1585 	   and restores it afterward.
1586 
1587 	   On the other hand, the spec says that:
1588 
1589  	     "[c]ontrol escapes return a boolean value, setting num to 1
1590 	     if the last explicit condition evaluated by a `%<'  or `%?'
1591 	     control succeeded, and 0 otherwise."
1592 
1593 	   To ensure this, the value on top of stack is exchanged with the
1594 	   value of the 'num' register upon entering the 'if' branch, and
1595 	   the tos value is popped into the 'num' upon leaving it. Any
1596 	   'else if' branches are handled the same way.
1597 
1598 	   Before leaving the 'else' branch, the 'num' is set to 0 explicitly.
1599 	*/
1600 	emit_opcode (fmt, mhop_pushn);
1601 	codegen_node (fmt, node->v.cntl.cond);
1602 	emit_opcode_typed (fmt, node->v.cntl.cond->datatype,
1603 			   mhop_brzn, mhop_brzs);
1604 	pc[0] = fmt->progcnt;
1605 	emit_instr (fmt, (mh_instr_t) NULL);
1606 	if (node->v.cntl.iftrue)
1607 	  {
1608 	    emit_opcode (fmt, mhop_xchgn);
1609 	    codegen_nodelist (fmt, node->v.cntl.iftrue);
1610 	  }
1611 	emit_opcode (fmt, mhop_popn);
1612 
1613 	if (node->v.cntl.iffalse)
1614 	  {
1615 	    emit_opcode (fmt, mhop_branch);
1616 	    pc[1] = fmt->progcnt;
1617 	    emit_instr (fmt, (mh_instr_t) NULL);
1618 
1619 	    fmt->prog[pc[0]].num = fmt->progcnt - pc[0];
1620 	    emit_opcode (fmt, mhop_popn);
1621 	    codegen_nodelist (fmt, node->v.cntl.iffalse);
1622 	    if (node->v.cntl.iffalse->nodetype != fmtnode_cntl)
1623 	      {
1624 		emit_opcode (fmt, mhop_setn);
1625 		emit_instr (fmt, (mh_instr_t) (long) R_REG);
1626 		emit_instr (fmt, (mh_instr_t) (long) 0);
1627 	      }
1628 	    fmt->prog[pc[1]].num = fmt->progcnt - pc[1];
1629 	  }
1630 	else
1631 	  fmt->prog[pc[0]].num = fmt->progcnt - pc[0];
1632       }
1633       break;
1634 
1635     case fmtnode_typecast:
1636       codegen_node (fmt, node->v.arg);
1637       switch (node->datatype)
1638 	{
1639 	case mhtype_num:
1640 	  emit_opcode (fmt, mhop_atoi);
1641 	  break;
1642 
1643 	case mhtype_str:
1644 	  emit_opcode (fmt, mhop_itoa);
1645 	  break;
1646 
1647 	default:
1648 	  abort ();
1649 	}
1650       break;
1651 
1652     default:
1653       abort ();
1654     }
1655 }
1656 
1657 static void
1658 codegen_nodelist (struct mh_format *fmt, struct node *node)
1659 {
1660   while (node)
1661     {
1662       codegen_node (fmt, node);
1663       node = node->next;
1664     }
1665 }
1666 
1667 static void
1668 codegen (mh_format_t *fmtptr, int tree)
1669 {
1670   struct mh_format *fmt;
1671 
1672   fmt = mu_zalloc (sizeof *fmt);
1673 
1674   *fmtptr = fmt;
1675   emit_opcode (fmt, mhop_stop);
1676   codegen_nodelist (fmt, parse_tree);
1677   emit_opcode (fmt, mhop_stop);
1678 
1679   if (tree)
1680     {
1681       fmt->tree = parse_tree;
1682       fmt->pool = tokpool;
1683     }
1684   else
1685     {
1686       node_list_free (parse_tree);
1687       mu_opool_destroy (&tokpool);
1688     }
1689 }
1690 
1691 
1692