1 /* GNU m4 -- A simple macro processor
2 
3    Copyright (C) 1989-1994, 2000, 2004, 2006-2014, 2016-2017, 2020-2021
4    Free Software Foundation, Inc.
5 
6    This file is part of GNU M4.
7 
8    GNU M4 is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    GNU M4 is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <https://www.gnu.org/licenses/>.
20 */
21 
22 /* Code for all builtin macros, initialization of symbol table, and
23    expansion of user defined macros.  */
24 
25 #include "m4.h"
26 
27 #include "execute.h"
28 #include "memchr2.h"
29 #include "progname.h"
30 #include "regex.h"
31 #include "spawn-pipe.h"
32 #include "wait-process.h"
33 
34 #define ARG(i) (argc > (i) ? TOKEN_DATA_TEXT (argv[i]) : "")
35 
36 /* Initialization of builtin and predefined macros.  The table
37    "builtin_tab" is both used for initialization, and by the "builtin"
38    builtin.  */
39 
40 #define DECLARE(name) \
41   static void name (struct obstack *, int, token_data **)
42 
43 DECLARE (m4___file__);
44 DECLARE (m4___line__);
45 DECLARE (m4___program__);
46 DECLARE (m4_builtin);
47 DECLARE (m4_changecom);
48 DECLARE (m4_changequote);
49 #ifdef ENABLE_CHANGEWORD
50 DECLARE (m4_changeword);
51 #endif
52 DECLARE (m4_debugmode);
53 DECLARE (m4_debugfile);
54 DECLARE (m4_decr);
55 DECLARE (m4_define);
56 DECLARE (m4_defn);
57 DECLARE (m4_divert);
58 DECLARE (m4_divnum);
59 DECLARE (m4_dnl);
60 DECLARE (m4_dumpdef);
61 DECLARE (m4_errprint);
62 DECLARE (m4_esyscmd);
63 DECLARE (m4_eval);
64 DECLARE (m4_format);
65 DECLARE (m4_ifdef);
66 DECLARE (m4_ifelse);
67 DECLARE (m4_include);
68 DECLARE (m4_incr);
69 DECLARE (m4_index);
70 DECLARE (m4_indir);
71 DECLARE (m4_len);
72 DECLARE (m4_m4exit);
73 DECLARE (m4_m4wrap);
74 DECLARE (m4_maketemp);
75 DECLARE (m4_mkstemp);
76 DECLARE (m4_patsubst);
77 DECLARE (m4_popdef);
78 DECLARE (m4_pushdef);
79 DECLARE (m4_regexp);
80 DECLARE (m4_shift);
81 DECLARE (m4_sinclude);
82 DECLARE (m4_substr);
83 DECLARE (m4_syscmd);
84 DECLARE (m4_sysval);
85 DECLARE (m4_traceoff);
86 DECLARE (m4_traceon);
87 DECLARE (m4_translit);
88 DECLARE (m4_undefine);
89 DECLARE (m4_undivert);
90 
91 #undef DECLARE
92 
93 static builtin const builtin_tab[] =
94 {
95 
96   /* name               GNUext  macros  blind   function */
97 
98   { "__file__",         true,   false,  false,  m4___file__ },
99   { "__line__",         true,   false,  false,  m4___line__ },
100   { "__program__",      true,   false,  false,  m4___program__ },
101   { "builtin",          true,   true,   true,   m4_builtin },
102   { "changecom",        false,  false,  false,  m4_changecom },
103   { "changequote",      false,  false,  false,  m4_changequote },
104 #ifdef ENABLE_CHANGEWORD
105   { "changeword",       true,   false,  true,   m4_changeword },
106 #endif
107   { "debugmode",        true,   false,  false,  m4_debugmode },
108   { "debugfile",        true,   false,  false,  m4_debugfile },
109   { "decr",             false,  false,  true,   m4_decr },
110   { "define",           false,  true,   true,   m4_define },
111   { "defn",             false,  false,  true,   m4_defn },
112   { "divert",           false,  false,  false,  m4_divert },
113   { "divnum",           false,  false,  false,  m4_divnum },
114   { "dnl",              false,  false,  false,  m4_dnl },
115   { "dumpdef",          false,  false,  false,  m4_dumpdef },
116   { "errprint",         false,  false,  true,   m4_errprint },
117   { "esyscmd",          true,   false,  true,   m4_esyscmd },
118   { "eval",             false,  false,  true,   m4_eval },
119   { "format",           true,   false,  true,   m4_format },
120   { "ifdef",            false,  false,  true,   m4_ifdef },
121   { "ifelse",           false,  false,  true,   m4_ifelse },
122   { "include",          false,  false,  true,   m4_include },
123   { "incr",             false,  false,  true,   m4_incr },
124   { "index",            false,  false,  true,   m4_index },
125   { "indir",            true,   true,   true,   m4_indir },
126   { "len",              false,  false,  true,   m4_len },
127   { "m4exit",           false,  false,  false,  m4_m4exit },
128   { "m4wrap",           false,  false,  true,   m4_m4wrap },
129   { "maketemp",         false,  false,  true,   m4_maketemp },
130   { "mkstemp",          false,  false,  true,   m4_mkstemp },
131   { "patsubst",         true,   false,  true,   m4_patsubst },
132   { "popdef",           false,  false,  true,   m4_popdef },
133   { "pushdef",          false,  true,   true,   m4_pushdef },
134   { "regexp",           true,   false,  true,   m4_regexp },
135   { "shift",            false,  false,  true,   m4_shift },
136   { "sinclude",         false,  false,  true,   m4_sinclude },
137   { "substr",           false,  false,  true,   m4_substr },
138   { "syscmd",           false,  false,  true,   m4_syscmd },
139   { "sysval",           false,  false,  false,  m4_sysval },
140   { "traceoff",         false,  false,  false,  m4_traceoff },
141   { "traceon",          false,  false,  false,  m4_traceon },
142   { "translit",         false,  false,  true,   m4_translit },
143   { "undefine",         false,  false,  true,   m4_undefine },
144   { "undivert",         false,  false,  false,  m4_undivert },
145 
146   { 0,                  false,  false,  false,  0 },
147 
148   /* placeholder is intentionally stuck after the table end delimiter,
149      so that we can easily find it, while not treating it as a real
150      builtin.  */
151   { "placeholder",      true,   false,  false,  m4_placeholder },
152 };
153 
154 static predefined const predefined_tab[] =
155 {
156 #if UNIX
157   { "unix",     "__unix__",     "" },
158 #endif
159 #if W32_NATIVE
160   { "windows",  "__windows__",  "" },
161 #endif
162 #if OS2
163   { "os2",      "__os2__",      "" },
164 #endif
165 #if !UNIX && !W32_NATIVE && !OS2
166 # warning Platform macro not provided
167 #endif
168   { NULL,       "__gnu__",      "" },
169 
170   { NULL,       NULL,           NULL },
171 };
172 
173 /*----------------------------------------.
174 | Find the builtin, which lives on ADDR.  |
175 `----------------------------------------*/
176 
177 const builtin * ATTRIBUTE_PURE
find_builtin_by_addr(builtin_func * func)178 find_builtin_by_addr (builtin_func *func)
179 {
180   const builtin *bp;
181 
182   for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
183     if (bp->func == func)
184       return bp;
185   if (func == m4_placeholder)
186     return bp + 1;
187   return NULL;
188 }
189 
190 /*----------------------------------------------------------.
191 | Find the builtin, which has NAME.  On failure, return the |
192 | placeholder builtin.                                      |
193 `----------------------------------------------------------*/
194 
195 const builtin * ATTRIBUTE_PURE
find_builtin_by_name(const char * name)196 find_builtin_by_name (const char *name)
197 {
198   const builtin *bp;
199 
200   for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
201     if (STREQ (bp->name, name))
202       return bp;
203   return bp + 1;
204 }
205 
206 /*----------------------------------------------------------------.
207 | Install a builtin macro with name NAME, bound to the C function |
208 | given in BP.  MODE is SYMBOL_INSERT or SYMBOL_PUSHDEF.          |
209 `----------------------------------------------------------------*/
210 
211 void
define_builtin(const char * name,const builtin * bp,symbol_lookup mode)212 define_builtin (const char *name, const builtin *bp, symbol_lookup mode)
213 {
214   symbol *sym;
215 
216   sym = lookup_symbol (name, mode);
217   SYMBOL_TYPE (sym) = TOKEN_FUNC;
218   SYMBOL_MACRO_ARGS (sym) = bp->groks_macro_args;
219   SYMBOL_BLIND_NO_ARGS (sym) = bp->blind_if_no_args;
220   SYMBOL_FUNC (sym) = bp->func;
221 }
222 
223 /* Storage for the compiled regular expression of
224    --warn-macro-sequence.  */
225 static struct re_pattern_buffer macro_sequence_buf;
226 
227 /* Storage for the matches of --warn-macro-sequence.  */
228 static struct re_registers macro_sequence_regs;
229 
230 /* True if --warn-macro-sequence is in effect.  */
231 static bool macro_sequence_inuse;
232 
233 /*----------------------------------------.
234 | Clean up regular expression variables.  |
235 `----------------------------------------*/
236 
237 static void
free_pattern_buffer(struct re_pattern_buffer * buf,struct re_registers * regs)238 free_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
239 {
240   regfree (buf);
241   free (regs->start);
242   free (regs->end);
243 }
244 
245 /*-----------------------------------------------------------------.
246 | Set the regular expression of --warn-macro-sequence that will be |
247 | checked during define and pushdef.  Exit on failure.             |
248 `-----------------------------------------------------------------*/
249 void
set_macro_sequence(const char * regexp)250 set_macro_sequence (const char *regexp)
251 {
252   const char *msg;
253 
254   if (! regexp)
255     regexp = DEFAULT_MACRO_SEQUENCE;
256   else if (regexp[0] == '\0')
257     {
258       macro_sequence_inuse = false;
259       return;
260     }
261 
262   msg = re_compile_pattern (regexp, strlen (regexp), &macro_sequence_buf);
263   if (msg != NULL)
264     m4_failure (0, _("--warn-macro-sequence: bad regular expression `%s': %s"),
265                 regexp, msg);
266   re_set_registers (&macro_sequence_buf, &macro_sequence_regs,
267                     macro_sequence_regs.num_regs,
268                     macro_sequence_regs.start, macro_sequence_regs.end);
269   macro_sequence_inuse = true;
270 }
271 
272 /*-----------------------------------------------------------.
273 | Free dynamic memory utilized by the macro sequence regular |
274 | expression during the define builtin.                      |
275 `-----------------------------------------------------------*/
276 void
free_macro_sequence(void)277 free_macro_sequence (void)
278 {
279   free_pattern_buffer (&macro_sequence_buf, &macro_sequence_regs);
280 }
281 
282 /*-----------------------------------------------------------------.
283 | Define a predefined or user-defined macro, with name NAME, and   |
284 | expansion TEXT.  MODE destinguishes between the "define" and the |
285 | "pushdef" case.  It is also used from main.                      |
286 `-----------------------------------------------------------------*/
287 
288 void
define_user_macro(const char * name,const char * text,symbol_lookup mode)289 define_user_macro (const char *name, const char *text, symbol_lookup mode)
290 {
291   symbol *s;
292   char *defn = xstrdup (text ? text : "");
293 
294   s = lookup_symbol (name, mode);
295   if (SYMBOL_TYPE (s) == TOKEN_TEXT)
296     free (SYMBOL_TEXT (s));
297 
298   SYMBOL_TYPE (s) = TOKEN_TEXT;
299   SYMBOL_TEXT (s) = defn;
300 
301   /* Implement --warn-macro-sequence.  */
302   if (macro_sequence_inuse && text)
303     {
304       regoff_t offset = 0;
305       size_t len = strlen (defn);
306 
307       while ((offset = re_search (&macro_sequence_buf, defn, len, offset,
308                                   len - offset, &macro_sequence_regs)) >= 0)
309         {
310           /* Skip empty matches.  */
311           if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0])
312             offset++;
313           else
314             {
315               char tmp;
316               offset = macro_sequence_regs.end[0];
317               tmp = defn[offset];
318               defn[offset] = '\0';
319               M4ERROR ((warning_status, 0,
320                         _("Warning: definition of `%s' contains sequence `%s'"),
321                         name, defn + macro_sequence_regs.start[0]));
322               defn[offset] = tmp;
323             }
324         }
325       if (offset == -2)
326         M4ERROR ((warning_status, 0,
327                   _("error checking --warn-macro-sequence for macro `%s'"),
328                   name));
329     }
330 }
331 
332 /*-----------------------------------------------.
333 | Initialize all builtin and predefined macros.  |
334 `-----------------------------------------------*/
335 
336 void
builtin_init(void)337 builtin_init (void)
338 {
339   const builtin *bp;
340   const predefined *pp;
341   char *string;
342 
343   for (bp = &builtin_tab[0]; bp->name != NULL; bp++)
344     if (!no_gnu_extensions || !bp->gnu_extension)
345       {
346         if (prefix_all_builtins)
347           {
348             string = (char *) xmalloc (strlen (bp->name) + 4);
349             strcpy (string, "m4_");
350             strcat (string, bp->name);
351             define_builtin (string, bp, SYMBOL_INSERT);
352             free (string);
353           }
354         else
355           define_builtin (bp->name, bp, SYMBOL_INSERT);
356       }
357 
358   for (pp = &predefined_tab[0]; pp->func != NULL; pp++)
359     if (no_gnu_extensions)
360       {
361         if (pp->unix_name != NULL)
362           define_user_macro (pp->unix_name, pp->func, SYMBOL_INSERT);
363       }
364     else
365       {
366         if (pp->gnu_name != NULL)
367           define_user_macro (pp->gnu_name, pp->func, SYMBOL_INSERT);
368       }
369 }
370 
371 /*-------------------------------------------------------------------.
372 | Give friendly warnings if a builtin macro is passed an             |
373 | inappropriate number of arguments.  NAME is the macro name for     |
374 | messages, ARGC is actual number of arguments, MIN is the minimum   |
375 | number of acceptable arguments, negative if not applicable, MAX is |
376 | the maximum number, negative if not applicable.                    |
377 `-------------------------------------------------------------------*/
378 
379 static bool
bad_argc(token_data * name,int argc,int min,int max)380 bad_argc (token_data *name, int argc, int min, int max)
381 {
382   bool isbad = false;
383 
384   if (min > 0 && argc < min)
385     {
386       if (!suppress_warnings)
387         M4ERROR ((warning_status, 0,
388                   _("Warning: too few arguments to builtin `%s'"),
389                   TOKEN_DATA_TEXT (name)));
390       isbad = true;
391     }
392   else if (max > 0 && argc > max && !suppress_warnings)
393     M4ERROR ((warning_status, 0,
394               _("Warning: excess arguments to builtin `%s' ignored"),
395               TOKEN_DATA_TEXT (name)));
396 
397   return isbad;
398 }
399 
400 /*-----------------------------------------------------------------.
401 | The function numeric_arg () converts ARG to an int pointed to by |
402 | VALUEP.  If the conversion fails, print error message for macro  |
403 | MACRO.  Return true iff conversion succeeds.                     |
404 `-----------------------------------------------------------------*/
405 
406 static bool
numeric_arg(token_data * macro,const char * arg,int * valuep)407 numeric_arg (token_data *macro, const char *arg, int *valuep)
408 {
409   char *endp;
410 
411   if (*arg == '\0')
412     {
413       *valuep = 0;
414       M4ERROR ((warning_status, 0,
415                 _("empty string treated as 0 in builtin `%s'"),
416                 TOKEN_DATA_TEXT (macro)));
417     }
418   else
419     {
420       errno = 0;
421       *valuep = strtol (arg, &endp, 10);
422       if (*endp != '\0')
423         {
424           M4ERROR ((warning_status, 0,
425                     _("non-numeric argument to builtin `%s'"),
426                     TOKEN_DATA_TEXT (macro)));
427           return false;
428         }
429       if (c_isspace (*arg))
430         M4ERROR ((warning_status, 0,
431                   _("leading whitespace ignored in builtin `%s'"),
432                   TOKEN_DATA_TEXT (macro)));
433       else if (errno == ERANGE)
434         M4ERROR ((warning_status, 0,
435                   _("numeric overflow detected in builtin `%s'"),
436                   TOKEN_DATA_TEXT (macro)));
437     }
438   return true;
439 }
440 
441 /*------------------------------------------------------.
442 | The function ntoa () converts VALUE to a signed ASCII |
443 | representation in radix RADIX.                        |
444 `------------------------------------------------------*/
445 
446 /* Digits for number to ASCII conversions.  */
447 static char const digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
448 
449 const char *
ntoa(int32_t value,int radix)450 ntoa (int32_t value, int radix)
451 {
452   bool negative;
453   uint32_t uvalue;
454   static char str[256];
455   char *s = &str[sizeof str];
456 
457   *--s = '\0';
458 
459   if (value < 0)
460     {
461       negative = true;
462       uvalue = -(uint32_t) value;
463     }
464   else
465     {
466       negative = false;
467       uvalue = (uint32_t) value;
468     }
469 
470   do
471     {
472       *--s = digits[uvalue % radix];
473       uvalue /= radix;
474     }
475   while (uvalue > 0);
476 
477   if (negative)
478     *--s = '-';
479   return s;
480 }
481 
482 /*---------------------------------------------------------------.
483 | Format an int VAL, and stuff it into an obstack OBS.  Used for |
484 | macros expanding to numbers.                                   |
485 `---------------------------------------------------------------*/
486 
487 static void
shipout_int(struct obstack * obs,int val)488 shipout_int (struct obstack *obs, int val)
489 {
490   const char *s;
491 
492   s = ntoa ((int32_t) val, 10);
493   obstack_grow (obs, s, strlen (s));
494 }
495 
496 /*-------------------------------------------------------------------.
497 | Print ARGC arguments from the table ARGV to obstack OBS, separated |
498 | by SEP, and quoted by the current quotes if QUOTED is true.        |
499 `-------------------------------------------------------------------*/
500 
501 static void
dump_args(struct obstack * obs,int argc,token_data ** argv,const char * sep,bool quoted)502 dump_args (struct obstack *obs, int argc, token_data **argv,
503            const char *sep, bool quoted)
504 {
505   int i;
506   size_t len = strlen (sep);
507 
508   for (i = 1; i < argc; i++)
509     {
510       if (i > 1)
511         obstack_grow (obs, sep, len);
512       if (quoted)
513         obstack_grow (obs, lquote.string, lquote.length);
514       obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
515                     strlen (TOKEN_DATA_TEXT (argv[i])));
516       if (quoted)
517         obstack_grow (obs, rquote.string, rquote.length);
518     }
519 }
520 
521 /* The rest of this file is code for builtins and expansion of user
522    defined macros.  All the functions for builtins have a prototype as:
523 
524         void m4_MACRONAME (struct obstack *obs, int argc, char *argv[]);
525 
526    The function are expected to leave their expansion on the obstack OBS,
527    as an unfinished object.  ARGV is a table of ARGC pointers to the
528    individual arguments to the macro.  Please note that in general
529    argv[argc] != NULL.  */
530 
531 /* The first section are macros for definining, undefining, examining,
532    changing, ... other macros.  */
533 
534 /*-------------------------------------------------------------------.
535 | The function define_macro is common for the builtins "define",     |
536 | "undefine", "pushdef" and "popdef".  ARGC and ARGV is as for the   |
537 | caller, and MODE argument determines how the macro name is entered |
538 | into the symbol table.                                             |
539 `-------------------------------------------------------------------*/
540 
541 static void
define_macro(int argc,token_data ** argv,symbol_lookup mode)542 define_macro (int argc, token_data **argv, symbol_lookup mode)
543 {
544   const builtin *bp;
545 
546   if (bad_argc (argv[0], argc, 2, 3))
547     return;
548 
549   if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
550     {
551       M4ERROR ((warning_status, 0,
552                 _("Warning: %s: invalid macro name ignored"), ARG (0)));
553       return;
554     }
555 
556   if (argc == 2)
557     {
558       define_user_macro (ARG (1), "", mode);
559       return;
560     }
561 
562   switch (TOKEN_DATA_TYPE (argv[2]))
563     {
564     case TOKEN_TEXT:
565       define_user_macro (ARG (1), ARG (2), mode);
566       break;
567 
568     case TOKEN_FUNC:
569       bp = find_builtin_by_addr (TOKEN_DATA_FUNC (argv[2]));
570       if (bp == NULL)
571         return;
572       else
573         define_builtin (ARG (1), bp, mode);
574       break;
575 
576     case TOKEN_VOID:
577     default:
578       M4ERROR ((warning_status, 0,
579                 "INTERNAL ERROR: bad token data type in define_macro ()"));
580       abort ();
581     }
582 }
583 
584 static void
m4_define(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)585 m4_define (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
586 {
587   define_macro (argc, argv, SYMBOL_INSERT);
588 }
589 
590 static void
m4_undefine(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)591 m4_undefine (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
592 {
593   int i;
594   if (bad_argc (argv[0], argc, 2, -1))
595     return;
596   for (i = 1; i < argc; i++)
597     lookup_symbol (ARG (i), SYMBOL_DELETE);
598 }
599 
600 static void
m4_pushdef(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)601 m4_pushdef (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
602 {
603   define_macro (argc, argv,  SYMBOL_PUSHDEF);
604 }
605 
606 static void
m4_popdef(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)607 m4_popdef (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
608 {
609   int i;
610   if (bad_argc (argv[0], argc, 2, -1))
611     return;
612   for (i = 1; i < argc; i++)
613     lookup_symbol (ARG (i), SYMBOL_POPDEF);
614 }
615 
616 /*---------------------.
617 | Conditionals of m4.  |
618 `---------------------*/
619 
620 static void
m4_ifdef(struct obstack * obs,int argc,token_data ** argv)621 m4_ifdef (struct obstack *obs, int argc, token_data **argv)
622 {
623   symbol *s;
624   const char *result;
625 
626   if (bad_argc (argv[0], argc, 3, 4))
627     return;
628   s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
629 
630   if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
631     result = ARG (2);
632   else if (argc >= 4)
633     result = ARG (3);
634   else
635     result = NULL;
636 
637   if (result != NULL)
638     obstack_grow (obs, result, strlen (result));
639 }
640 
641 static void
m4_ifelse(struct obstack * obs,int argc,token_data ** argv)642 m4_ifelse (struct obstack *obs, int argc, token_data **argv)
643 {
644   const char *result;
645   token_data *me = argv[0];
646 
647   if (argc == 2)
648     return;
649 
650   if (bad_argc (me, argc, 4, -1))
651     return;
652   else
653     /* Diagnose excess arguments if 5, 8, 11, etc., actual arguments.  */
654     bad_argc (me, (argc + 2) % 3, -1, 1);
655 
656   argv++;
657   argc--;
658 
659   result = NULL;
660   while (result == NULL)
661 
662     if (STREQ (ARG (0), ARG (1)))
663       result = ARG (2);
664 
665     else
666       switch (argc)
667         {
668         case 3:
669           return;
670 
671         case 4:
672         case 5:
673           result = ARG (3);
674           break;
675 
676         default:
677           argc -= 3;
678           argv += 3;
679         }
680 
681   obstack_grow (obs, result, strlen (result));
682 }
683 
684 /*-------------------------------------------------------------------.
685 | The function dump_symbol () is for use by "dumpdef".  It builds up |
686 | a table of all defined, un-shadowed, symbols.                      |
687 `-------------------------------------------------------------------*/
688 
689 /* The structure dump_symbol_data is used to pass the information needed
690    from call to call to dump_symbol.  */
691 
692 struct dump_symbol_data
693 {
694   struct obstack *obs;          /* obstack for table */
695   symbol **base;                /* base of table */
696   int size;                     /* size of table */
697 };
698 
699 static void
dump_symbol(symbol * sym,void * arg)700 dump_symbol (symbol *sym, void *arg)
701 {
702   struct dump_symbol_data *data = (struct dump_symbol_data *) arg;
703   if (SYMBOL_TYPE (sym) != TOKEN_VOID)
704     {
705       obstack_blank (data->obs, sizeof (symbol *));
706       data->base = (symbol **) obstack_base (data->obs);
707       data->base[data->size++] = sym;
708     }
709 }
710 
711 /*------------------------------------------------------------------------.
712 | qsort comparison routine, for sorting the table made in m4_dumpdef ().  |
713 `------------------------------------------------------------------------*/
714 
715 static int
dumpdef_cmp(const void * s1,const void * s2)716 dumpdef_cmp (const void *s1, const void *s2)
717 {
718   return strcmp (SYMBOL_NAME (* (symbol *const *) s1),
719                  SYMBOL_NAME (* (symbol *const *) s2));
720 }
721 
722 /*-------------------------------------------------------------.
723 | Implementation of "dumpdef" itself.  It builds up a table of |
724 | pointers to symbols, sorts it and prints the sorted table.   |
725 `-------------------------------------------------------------*/
726 
727 static void
m4_dumpdef(struct obstack * obs,int argc,token_data ** argv)728 m4_dumpdef (struct obstack *obs, int argc, token_data **argv)
729 {
730   symbol *s;
731   int i;
732   struct dump_symbol_data data;
733   const builtin *bp;
734 
735   data.obs = obs;
736   data.base = (symbol **) obstack_base (obs);
737   data.size = 0;
738 
739   if (argc == 1)
740     {
741       hack_all_symbols (dump_symbol, &data);
742     }
743   else
744     {
745       for (i = 1; i < argc; i++)
746         {
747           s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
748           if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
749             dump_symbol (s, &data);
750           else
751             M4ERROR ((warning_status, 0,
752                       _("undefined macro `%s'"), TOKEN_DATA_TEXT (argv[i])));
753         }
754     }
755 
756   /* Make table of symbols invisible to expand_macro ().  */
757 
758   obstack_finish (obs);
759 
760   qsort (data.base, data.size, sizeof (symbol *), dumpdef_cmp);
761 
762   for (; data.size > 0; --data.size, data.base++)
763     {
764       DEBUG_PRINT1 ("%s:\t", SYMBOL_NAME (data.base[0]));
765 
766       switch (SYMBOL_TYPE (data.base[0]))
767         {
768         case TOKEN_TEXT:
769           if (debug_level & DEBUG_TRACE_QUOTE)
770             DEBUG_PRINT3 ("%s%s%s\n",
771                           lquote.string, SYMBOL_TEXT (data.base[0]), rquote.string);
772           else
773             DEBUG_PRINT1 ("%s\n", SYMBOL_TEXT (data.base[0]));
774           break;
775 
776         case TOKEN_FUNC:
777           bp = find_builtin_by_addr (SYMBOL_FUNC (data.base[0]));
778           if (bp == NULL)
779             {
780               M4ERROR ((warning_status, 0, "\
781 INTERNAL ERROR: builtin not found in builtin table"));
782               abort ();
783             }
784           DEBUG_PRINT1 ("<%s>\n", bp->name);
785           break;
786 
787         case TOKEN_VOID:
788         default:
789           M4ERROR ((warning_status, 0,
790                     "INTERNAL ERROR: bad token data type in m4_dumpdef ()"));
791           abort ();
792           break;
793         }
794     }
795 }
796 
797 /*-----------------------------------------------------------------.
798 | The builtin "builtin" allows calls to builtin macros, even if    |
799 | their definition has been overridden or shadowed.  It is thus    |
800 | possible to redefine builtins, and still access their original   |
801 | definition.  This macro is not available in compatibility mode.  |
802 `-----------------------------------------------------------------*/
803 
804 static void
m4_builtin(struct obstack * obs,int argc,token_data ** argv)805 m4_builtin (struct obstack *obs, int argc, token_data **argv)
806 {
807   const builtin *bp;
808   const char *name;
809 
810   if (bad_argc (argv[0], argc, 2, -1))
811     return;
812   if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
813     {
814       M4ERROR ((warning_status, 0,
815                 _("Warning: %s: invalid macro name ignored"), ARG (0)));
816       return;
817     }
818 
819   name = ARG (1);
820   bp = find_builtin_by_name (name);
821   if (bp->func == m4_placeholder)
822     M4ERROR ((warning_status, 0,
823               _("undefined builtin `%s'"), name));
824   else
825     {
826       int i;
827       if (! bp->groks_macro_args)
828         for (i = 2; i < argc; i++)
829           if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
830             {
831               TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
832               TOKEN_DATA_TEXT (argv[i]) = (char *) "";
833             }
834       bp->func (obs, argc - 1, argv + 1);
835     }
836 }
837 
838 /*-------------------------------------------------------------------.
839 | The builtin "indir" allows indirect calls to macros, even if their |
840 | name is not a proper macro name.  It is thus possible to define    |
841 | macros with ill-formed names for internal use in larger macro      |
842 | packages.  This macro is not available in compatibility mode.      |
843 `-------------------------------------------------------------------*/
844 
845 static void
m4_indir(struct obstack * obs,int argc,token_data ** argv)846 m4_indir (struct obstack *obs, int argc, token_data **argv)
847 {
848   symbol *s;
849   const char *name;
850 
851   if (bad_argc (argv[0], argc, 2, -1))
852     return;
853   if (TOKEN_DATA_TYPE (argv[1]) != TOKEN_TEXT)
854     {
855       M4ERROR ((warning_status, 0,
856                 _("Warning: %s: invalid macro name ignored"), ARG (0)));
857       return;
858     }
859 
860   name = ARG (1);
861   s = lookup_symbol (name, SYMBOL_LOOKUP);
862   if (s == NULL || SYMBOL_TYPE (s) == TOKEN_VOID)
863     M4ERROR ((warning_status, 0,
864               _("undefined macro `%s'"), name));
865   else
866     {
867       int i;
868       if (! SYMBOL_MACRO_ARGS (s))
869         for (i = 2; i < argc; i++)
870           if (TOKEN_DATA_TYPE (argv[i]) != TOKEN_TEXT)
871             {
872               TOKEN_DATA_TYPE (argv[i]) = TOKEN_TEXT;
873               TOKEN_DATA_TEXT (argv[i]) = (char *) "";
874             }
875       call_macro (s, argc - 1, argv + 1, obs);
876     }
877 }
878 
879 /*------------------------------------------------------------------.
880 | The macro "defn" returns the quoted definition of the macro named |
881 | by the first argument.  If the macro is builtin, it will push a   |
882 | special macro-definition token on the input stack.                |
883 `------------------------------------------------------------------*/
884 
885 static void
m4_defn(struct obstack * obs,int argc,token_data ** argv)886 m4_defn (struct obstack *obs, int argc, token_data **argv)
887 {
888   symbol *s;
889   builtin_func *b;
890   unsigned int i;
891 
892   if (bad_argc (argv[0], argc, 2, -1))
893     return;
894 
895   assert (0 < argc);
896   for (i = 1; i < (unsigned) argc; i++)
897     {
898       const char *arg = ARG((int) i);
899       s = lookup_symbol (arg, SYMBOL_LOOKUP);
900       if (s == NULL)
901         continue;
902 
903       switch (SYMBOL_TYPE (s))
904         {
905         case TOKEN_TEXT:
906           obstack_grow (obs, lquote.string, lquote.length);
907           obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
908           obstack_grow (obs, rquote.string, rquote.length);
909           break;
910 
911         case TOKEN_FUNC:
912           b = SYMBOL_FUNC (s);
913           if (b == m4_placeholder)
914             M4ERROR ((warning_status, 0, _("\
915 builtin `%s' requested by frozen file is not supported"), arg));
916           else if (argc != 2)
917             M4ERROR ((warning_status, 0,
918                       _("Warning: cannot concatenate builtin `%s'"),
919                       arg));
920           else
921             push_macro (b);
922           break;
923 
924         case TOKEN_VOID:
925           /* Nothing to do for traced but undefined macro.  */
926           break;
927 
928         default:
929           M4ERROR ((warning_status, 0,
930                     "INTERNAL ERROR: bad symbol type in m4_defn ()"));
931           abort ();
932         }
933     }
934 }
935 
936 /*--------------------------------------------------------------.
937 | This section contains macros to handle the builtins "syscmd", |
938 | "esyscmd" and "sysval".  "esyscmd" is GNU specific.           |
939 `--------------------------------------------------------------*/
940 
941 /* Exit code from last "syscmd" command.  */
942 static int sysval;
943 
944 static void
m4_syscmd(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)945 m4_syscmd (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
946 {
947   const char *cmd = ARG (1);
948   int status;
949   int sig_status;
950   const char *prog_args[4] = { "sh", "-c" };
951   if (bad_argc (argv[0], argc, 2, 2) || !*cmd)
952     {
953       /* The empty command is successful.  */
954       sysval = 0;
955       return;
956     }
957 
958   debug_flush_files ();
959 #if W32_NATIVE
960   if (strstr (SYSCMD_SHELL, "cmd"))
961     {
962       prog_args[0] = "cmd";
963       prog_args[1] = "/c";
964     }
965 #endif
966   prog_args[2] = cmd;
967   errno = 0;
968   status = execute (ARG (0), SYSCMD_SHELL, prog_args, NULL, false,
969                     false, false, false, true, false, &sig_status);
970   if (sig_status)
971     {
972       assert (status == 127);
973       sysval = sig_status << 8;
974     }
975   else
976     {
977       if (status == 127 && errno)
978         M4ERROR ((warning_status, errno, _("cannot run command `%s'"), cmd));
979       sysval = status;
980     }
981 }
982 
983 static void
m4_esyscmd(struct obstack * obs,int argc,token_data ** argv)984 m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
985 {
986   const char *cmd = ARG (1);
987   const char *prog_args[4] = { "sh", "-c" };
988   pid_t child;
989   int fd;
990   FILE *pin;
991   int status;
992   int sig_status;
993 
994   if (bad_argc (argv[0], argc, 2, 2) || !*cmd)
995     {
996       /* The empty command is successful.  */
997       sysval = 0;
998       return;
999     }
1000 
1001   debug_flush_files ();
1002 #if W32_NATIVE
1003   if (strstr (SYSCMD_SHELL, "cmd"))
1004     {
1005       prog_args[0] = "cmd";
1006       prog_args[1] = "/c";
1007     }
1008 #endif
1009   prog_args[2] = cmd;
1010   errno = 0;
1011   child = create_pipe_in (ARG (0), SYSCMD_SHELL, prog_args, NULL,
1012                           NULL, false, true, false, &fd);
1013   if (child == -1)
1014     {
1015       M4ERROR ((warning_status, errno, _("cannot run command `%s'"), cmd));
1016       sysval = 127;
1017       return;
1018     }
1019 #if OS2
1020   /* On OS/2 kLIBC, fdopen() creates a stream in a mode of a file descriptor.
1021      So include "t" to open a stream in a text mode explicitly on OS/2. */
1022   pin = fdopen (fd, "rt");
1023 #else
1024   pin = fdopen (fd, "r");
1025 #endif
1026   if (pin == NULL)
1027     {
1028       M4ERROR ((warning_status, errno, _("cannot run command `%s'"), cmd));
1029       sysval = 127;
1030       close (fd);
1031       return;
1032     }
1033   while (1)
1034     {
1035       size_t avail = obstack_room (obs);
1036       size_t len;
1037       if (!avail)
1038         {
1039           int ch = getc (pin);
1040           if (ch == EOF)
1041             break;
1042           obstack_1grow (obs, ch);
1043           continue;
1044         }
1045       len = fread (obstack_next_free (obs), 1, avail, pin);
1046       if (len <= 0)
1047         break;
1048       obstack_blank_fast (obs, len);
1049     }
1050   if (ferror (pin) || fclose (pin))
1051     m4_failure (errno, _("cannot read pipe"));
1052   errno = 0;
1053   status = wait_subprocess (child, ARG (0), false, true, true, false,
1054                             &sig_status);
1055   if (sig_status)
1056     {
1057       assert (status == 127);
1058       sysval = sig_status << 8;
1059     }
1060   else
1061     {
1062       if (status == 127 && errno)
1063         M4ERROR ((warning_status, errno, _("cannot run command `%s'"), cmd));
1064       sysval = status;
1065     }
1066 }
1067 
1068 static void
m4_sysval(struct obstack * obs,int argc MAYBE_UNUSED,token_data ** argv MAYBE_UNUSED)1069 m4_sysval (struct obstack *obs, int argc MAYBE_UNUSED,
1070            token_data **argv MAYBE_UNUSED)
1071 {
1072   shipout_int (obs, sysval);
1073 }
1074 
1075 /*------------------------------------------------------------------.
1076 | This section contains the top level code for the "eval" builtin.  |
1077 | The actual work is done in the function evaluate (), which lives  |
1078 | in eval.c.                                                        |
1079 `------------------------------------------------------------------*/
1080 
1081 static void
m4_eval(struct obstack * obs,int argc,token_data ** argv)1082 m4_eval (struct obstack *obs, int argc, token_data **argv)
1083 {
1084   int32_t value = 0;
1085   int radix = 10;
1086   int min = 1;
1087   const char *s;
1088 
1089   if (bad_argc (argv[0], argc, 2, 4))
1090     return;
1091 
1092   if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix))
1093     return;
1094 
1095   if (radix < 1 || radix > (int) strlen (digits))
1096     {
1097       M4ERROR ((warning_status, 0,
1098                 _("radix %d in builtin `%s' out of range"),
1099                 radix, ARG (0)));
1100       return;
1101     }
1102 
1103   if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &min))
1104     return;
1105   if (min < 0)
1106     {
1107       M4ERROR ((warning_status, 0,
1108                 _("negative width to builtin `%s'"), ARG (0)));
1109       return;
1110     }
1111 
1112   if (!*ARG (1))
1113     M4ERROR ((warning_status, 0,
1114               _("empty string treated as 0 in builtin `%s'"), ARG (0)));
1115   else if (evaluate (ARG (1), &value))
1116     return;
1117 
1118   if (radix == 1)
1119     {
1120       if (value < 0)
1121         {
1122           obstack_1grow (obs, '-');
1123           value = -value;
1124         }
1125       /* This assumes 2's-complement for correctly handling INT_MIN.  */
1126       while (min-- - value > 0)
1127         obstack_1grow (obs, '0');
1128       while (value-- != 0)
1129         obstack_1grow (obs, '1');
1130       obstack_1grow (obs, '\0');
1131       return;
1132     }
1133 
1134   s = ntoa (value, radix);
1135 
1136   if (*s == '-')
1137     {
1138       obstack_1grow (obs, '-');
1139       s++;
1140     }
1141   for (min -= strlen (s); --min >= 0;)
1142     obstack_1grow (obs, '0');
1143 
1144   obstack_grow (obs, s, strlen (s));
1145 }
1146 
1147 static void
m4_incr(struct obstack * obs,int argc,token_data ** argv)1148 m4_incr (struct obstack *obs, int argc, token_data **argv)
1149 {
1150   int value;
1151 
1152   if (bad_argc (argv[0], argc, 2, 2))
1153     return;
1154 
1155   if (!numeric_arg (argv[0], ARG (1), &value))
1156     return;
1157 
1158   /* Minimize undefined C behavior on overflow.  This code assumes
1159      that the implementation-defined overflow when casting unsigned to
1160      signed is a silent twos-complement wrap-around.  */
1161   uint32_t v = value;
1162   int32_t w = v + 1;
1163   shipout_int (obs, w);
1164 }
1165 
1166 static void
m4_decr(struct obstack * obs,int argc,token_data ** argv)1167 m4_decr (struct obstack *obs, int argc, token_data **argv)
1168 {
1169   int value;
1170 
1171   if (bad_argc (argv[0], argc, 2, 2))
1172     return;
1173 
1174   if (!numeric_arg (argv[0], ARG (1), &value))
1175     return;
1176 
1177   /* Minimize undefined C behavior on overflow.  This code assumes
1178      that the implementation-defined overflow when casting unsigned to
1179      signed is a silent twos-complement wrap-around.  */
1180   uint32_t v = value;
1181   int32_t w = v - 1;
1182   shipout_int (obs, w);
1183 }
1184 
1185 /* This section contains the macros "divert", "undivert" and "divnum" for
1186    handling diversion.  The utility functions used lives in output.c.  */
1187 
1188 /*-----------------------------------------------------------------.
1189 | Divert further output to the diversion given by ARGV[1].  Out of |
1190 | range means discard further output.                              |
1191 `-----------------------------------------------------------------*/
1192 
1193 static void
m4_divert(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1194 m4_divert (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1195 {
1196   int i = 0;
1197 
1198   if (bad_argc (argv[0], argc, 1, 2))
1199     return;
1200 
1201   if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i))
1202     return;
1203 
1204   make_diversion (i);
1205 }
1206 
1207 /*-----------------------------------------------------.
1208 | Expand to the current diversion number, -1 if none.  |
1209 `-----------------------------------------------------*/
1210 
1211 static void
m4_divnum(struct obstack * obs,int argc,token_data ** argv)1212 m4_divnum (struct obstack *obs, int argc, token_data **argv)
1213 {
1214   if (bad_argc (argv[0], argc, 1, 1))
1215     return;
1216   shipout_int (obs, current_diversion);
1217 }
1218 
1219 /*------------------------------------------------------------------.
1220 | Bring back the diversion given by the argument list.  If none is  |
1221 | specified, bring back all diversions.  GNU specific is the option |
1222 | of undiverting named files, by passing a non-numeric argument to  |
1223 | undivert ().                                                      |
1224 `------------------------------------------------------------------*/
1225 
1226 static void
m4_undivert(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1227 m4_undivert (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1228 {
1229   int i, file;
1230   FILE *fp;
1231   char *endp;
1232 
1233   if (argc == 1)
1234     undivert_all ();
1235   else
1236     for (i = 1; i < argc; i++)
1237       {
1238         file = strtol (ARG (i), &endp, 10);
1239         if (*endp == '\0' && !c_isspace (*ARG (i)))
1240           insert_diversion (file);
1241         else if (no_gnu_extensions)
1242           M4ERROR ((warning_status, 0,
1243                     _("non-numeric argument to builtin `%s'"), ARG (0)));
1244         else
1245           {
1246             fp = m4_path_search (ARG (i), NULL);
1247             if (fp != NULL)
1248               {
1249                 insert_file (fp);
1250                 if (fclose (fp) == EOF)
1251                   M4ERROR ((warning_status, errno,
1252                             _("error undiverting `%s'"), ARG (i)));
1253               }
1254             else
1255               M4ERROR ((warning_status, errno,
1256                         _("cannot undivert `%s'"), ARG (i)));
1257           }
1258       }
1259 }
1260 
1261 /* This section contains various macros, which does not fall into any
1262    specific group.  These are "dnl", "shift", "changequote", "changecom"
1263    and "changeword".  */
1264 
1265 /*-----------------------------------------------------------.
1266 | Delete all subsequent whitespace from input.  The function |
1267 | skip_line () lives in input.c.                             |
1268 `-----------------------------------------------------------*/
1269 
1270 static void
m4_dnl(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1271 m4_dnl (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1272 {
1273   if (bad_argc (argv[0], argc, 1, 1))
1274     return;
1275 
1276   skip_line ();
1277 }
1278 
1279 /*--------------------------------------------------------------------.
1280 | Shift all arguments one to the left, discarding the first           |
1281 | argument.  Each output argument is quoted with the current quotes.  |
1282 `--------------------------------------------------------------------*/
1283 
1284 static void
m4_shift(struct obstack * obs,int argc,token_data ** argv)1285 m4_shift (struct obstack *obs, int argc, token_data **argv)
1286 {
1287   if (bad_argc (argv[0], argc, 2, -1))
1288     return;
1289   dump_args (obs, argc - 1, argv + 1, ",", true);
1290 }
1291 
1292 /*--------------------------------------------------------------------------.
1293 | Change the current quotes.  The function set_quotes () lives in input.c.  |
1294 `--------------------------------------------------------------------------*/
1295 
1296 static void
m4_changequote(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1297 m4_changequote (struct obstack *obs MAYBE_UNUSED, int argc,
1298                 token_data **argv)
1299 {
1300   if (bad_argc (argv[0], argc, 1, 3))
1301     return;
1302 
1303   /* Explicit NULL distinguishes between empty and missing argument.  */
1304   set_quotes ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1305              (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1306 }
1307 
1308 /*-----------------------------------------------------------------.
1309 | Change the current comment delimiters.  The function set_comment |
1310 | () lives in input.c.                                             |
1311 `-----------------------------------------------------------------*/
1312 
1313 static void
m4_changecom(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1314 m4_changecom (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1315 {
1316   if (bad_argc (argv[0], argc, 1, 3))
1317     return;
1318 
1319   /* Explicit NULL distinguishes between empty and missing argument.  */
1320   set_comment ((argc >= 2) ? TOKEN_DATA_TEXT (argv[1]) : NULL,
1321                (argc >= 3) ? TOKEN_DATA_TEXT (argv[2]) : NULL);
1322 }
1323 
1324 #ifdef ENABLE_CHANGEWORD
1325 
1326 /*---------------------------------------------------------------.
1327 | Change the regular expression used for breaking the input into |
1328 | words.  The function set_word_regexp () lives in input.c.      |
1329 `---------------------------------------------------------------*/
1330 
1331 static void
m4_changeword(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1332 m4_changeword (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1333 {
1334   if (bad_argc (argv[0], argc, 2, 2))
1335     return;
1336 
1337   set_word_regexp (TOKEN_DATA_TEXT (argv[1]));
1338 }
1339 
1340 #endif /* ENABLE_CHANGEWORD */
1341 
1342 /* This section contains macros for inclusion of other files -- "include"
1343    and "sinclude".  This differs from bringing back diversions, in that
1344    the input is scanned before being copied to the output.  */
1345 
1346 /*---------------------------------------------------------------.
1347 | Generic include function.  Include the file given by the first |
1348 | argument, if it exists.  Complain about inaccessible files iff |
1349 | SILENT is false.                                               |
1350 `---------------------------------------------------------------*/
1351 
1352 static void
include(int argc,token_data ** argv,bool silent)1353 include (int argc, token_data **argv, bool silent)
1354 {
1355   FILE *fp;
1356   char *name;
1357 
1358   if (bad_argc (argv[0], argc, 2, 2))
1359     return;
1360 
1361   fp = m4_path_search (ARG (1), &name);
1362   if (fp == NULL)
1363     {
1364       if (!silent)
1365         {
1366           M4ERROR ((warning_status, errno, _("cannot open `%s'"), ARG (1)));
1367           retcode = EXIT_FAILURE;
1368         }
1369       return;
1370     }
1371 
1372   push_file (fp, name, true);
1373   free (name);
1374 }
1375 
1376 /*------------------------------------------------.
1377 | Include a file, complaining in case of errors.  |
1378 `------------------------------------------------*/
1379 
1380 static void
m4_include(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1381 m4_include (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1382 {
1383   include (argc, argv, false);
1384 }
1385 
1386 /*----------------------------------.
1387 | Include a file, ignoring errors.  |
1388 `----------------------------------*/
1389 
1390 static void
m4_sinclude(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1391 m4_sinclude (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1392 {
1393   include (argc, argv, true);
1394 }
1395 
1396 /* More miscellaneous builtins -- "maketemp", "errprint", "__file__",
1397    "__line__", and "__program__".  The last three are GNU specific.  */
1398 
1399 /*------------------------------------------------------------------.
1400 | Use the first argument as at template for a temporary file name.  |
1401 `------------------------------------------------------------------*/
1402 
1403 /* Add trailing 'X' to PATTERN of length LEN as necessary, then
1404    securely create the file, and place the quoted new file name on
1405    OBS.  Report errors on behalf of ME.  */
1406 static void
mkstemp_helper(struct obstack * obs,const char * me,const char * pattern,size_t len)1407 mkstemp_helper (struct obstack *obs, const char *me, const char *pattern,
1408                 size_t len)
1409 {
1410   int fd;
1411   size_t i;
1412   char *name;
1413 
1414   /* Guarantee that there are six trailing 'X' characters, even if the
1415      user forgot to supply them.  Output must be quoted if
1416      successful.  */
1417   obstack_grow (obs, lquote.string, lquote.length);
1418   obstack_grow (obs, pattern, len);
1419   for (i = 0; len > 0 && i < 6; i++)
1420     if (pattern[len - i - 1] != 'X')
1421       break;
1422   obstack_grow0 (obs, "XXXXXX", 6 - i);
1423   name = (char *) obstack_base (obs) + lquote.length;
1424 
1425   errno = 0;
1426   fd = mkstemp (name);
1427   if (fd < 0)
1428     {
1429       M4ERROR ((0, errno, _("%s: cannot create tempfile `%s'"), me, pattern));
1430       obstack_free (obs, obstack_finish (obs));
1431     }
1432   else
1433     {
1434       close (fd);
1435       /* Remove NUL, then finish quote.  */
1436       obstack_blank_fast (obs, -1);
1437       obstack_grow (obs, rquote.string, rquote.length);
1438     }
1439 }
1440 
1441 static void
m4_maketemp(struct obstack * obs,int argc,token_data ** argv)1442 m4_maketemp (struct obstack *obs, int argc, token_data **argv)
1443 {
1444   if (bad_argc (argv[0], argc, 2, 2))
1445     return;
1446   if (no_gnu_extensions)
1447     {
1448       /* POSIX states "any trailing 'X' characters [are] replaced with
1449          the current process ID as a string", without referencing the
1450          file system.  Horribly insecure, but we have to do it when we
1451          are in traditional mode.
1452 
1453          For reference, Solaris m4 does:
1454            maketemp() -> `'
1455            maketemp(X) -> `X'
1456            maketemp(XX) -> `Xn', where n is last digit of pid
1457            maketemp(XXXXXXXX) -> `X00nnnnn', where nnnnn is 16-bit pid
1458       */
1459       const char *str = ARG (1);
1460       int len = strlen (str);
1461       int i;
1462       int len2;
1463 
1464       M4ERROR ((warning_status, 0, _("recommend using mkstemp instead")));
1465       for (i = len; i > 1; i--)
1466         if (str[i - 1] != 'X')
1467           break;
1468       obstack_grow (obs, str, i);
1469       str = ntoa ((int32_t) getpid (), 10);
1470       len2 = strlen (str);
1471       if (len2 > len - i)
1472         obstack_grow0 (obs, str + len2 - (len - i), len - i);
1473       else
1474         {
1475           while (i++ < len - len2)
1476             obstack_1grow (obs, '0');
1477           obstack_grow0 (obs, str, len2);
1478         }
1479     }
1480   else
1481     mkstemp_helper (obs, ARG (0), ARG (1), strlen (ARG (1)));
1482 }
1483 
1484 static void
m4_mkstemp(struct obstack * obs,int argc,token_data ** argv)1485 m4_mkstemp (struct obstack *obs, int argc, token_data **argv)
1486 {
1487   if (bad_argc (argv[0], argc, 2, 2))
1488     return;
1489   mkstemp_helper (obs, ARG (0), ARG (1), strlen (ARG (1)));
1490 }
1491 
1492 /*----------------------------------------.
1493 | Print all arguments on standard error.  |
1494 `----------------------------------------*/
1495 
1496 static void
m4_errprint(struct obstack * obs,int argc,token_data ** argv)1497 m4_errprint (struct obstack *obs, int argc, token_data **argv)
1498 {
1499   if (bad_argc (argv[0], argc, 2, -1))
1500     return;
1501   dump_args (obs, argc, argv, " ", false);
1502   obstack_1grow (obs, '\0');
1503   debug_flush_files ();
1504   xfprintf (stderr, "%s", (char *) obstack_finish (obs));
1505   fflush (stderr);
1506 }
1507 
1508 static void
m4___file__(struct obstack * obs,int argc,token_data ** argv)1509 m4___file__ (struct obstack *obs, int argc, token_data **argv)
1510 {
1511   if (bad_argc (argv[0], argc, 1, 1))
1512     return;
1513   obstack_grow (obs, lquote.string, lquote.length);
1514   obstack_grow (obs, current_file, strlen (current_file));
1515   obstack_grow (obs, rquote.string, rquote.length);
1516 }
1517 
1518 static void
m4___line__(struct obstack * obs,int argc,token_data ** argv)1519 m4___line__ (struct obstack *obs, int argc, token_data **argv)
1520 {
1521   if (bad_argc (argv[0], argc, 1, 1))
1522     return;
1523   shipout_int (obs, current_line);
1524 }
1525 
1526 static void
m4___program__(struct obstack * obs,int argc,token_data ** argv)1527 m4___program__ (struct obstack *obs, int argc, token_data **argv)
1528 {
1529   if (bad_argc (argv[0], argc, 1, 1))
1530     return;
1531   obstack_grow (obs, lquote.string, lquote.length);
1532   obstack_grow (obs, program_name, strlen (program_name));
1533   obstack_grow (obs, rquote.string, rquote.length);
1534 }
1535 
1536 /* This section contains various macros for exiting, saving input until
1537    EOF is seen, and tracing macro calls.  That is: "m4exit", "m4wrap",
1538    "traceon" and "traceoff".  */
1539 
1540 /*----------------------------------------------------------.
1541 | Exit immediately, with exit status specified by the first |
1542 | argument, or 0 if no arguments are present.               |
1543 `----------------------------------------------------------*/
1544 
1545 static void
m4_m4exit(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1546 m4_m4exit (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1547 {
1548   int exit_code = EXIT_SUCCESS;
1549 
1550   /* Warn on bad arguments, but still exit.  */
1551   bad_argc (argv[0], argc, 1, 2);
1552   if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &exit_code))
1553     exit_code = EXIT_FAILURE;
1554   if (exit_code < 0 || exit_code > 255)
1555     {
1556       M4ERROR ((warning_status, 0,
1557                 _("exit status out of range: `%d'"), exit_code));
1558       exit_code = EXIT_FAILURE;
1559     }
1560   /* Change debug stream back to stderr, to force flushing debug stream and
1561      detect any errors it might have encountered.  */
1562   debug_set_output (NULL);
1563   debug_flush_files ();
1564   if (exit_code == EXIT_SUCCESS && retcode != EXIT_SUCCESS)
1565     exit_code = retcode;
1566   /* Propagate non-zero status to atexit handlers.  */
1567   if (exit_code != EXIT_SUCCESS)
1568     exit_failure = exit_code;
1569   exit (exit_code);
1570 }
1571 
1572 /*------------------------------------------------------------------.
1573 | Save the argument text until EOF has been seen, allowing for user |
1574 | specified cleanup action.  GNU version saves all arguments, the   |
1575 | standard version only the first.                                  |
1576 `------------------------------------------------------------------*/
1577 
1578 static void
m4_m4wrap(struct obstack * obs,int argc,token_data ** argv)1579 m4_m4wrap (struct obstack *obs, int argc, token_data **argv)
1580 {
1581   if (bad_argc (argv[0], argc, 2, -1))
1582     return;
1583   if (no_gnu_extensions)
1584     obstack_grow (obs, ARG (1), strlen (ARG (1)));
1585   else
1586     dump_args (obs, argc, argv, " ", false);
1587   obstack_1grow (obs, '\0');
1588   push_wrapup ((char *) obstack_finish (obs));
1589 }
1590 
1591 /* Enable tracing of all specified macros, or all, if none is specified.
1592    Tracing is disabled by default, when a macro is defined.  This can be
1593    overridden by the "t" debug flag.  */
1594 
1595 /*------------------------------------------------------------------.
1596 | Set_trace () is used by "traceon" and "traceoff" to enable and    |
1597 | disable tracing of a macro.  It disables tracing if DATA is NULL, |
1598 | otherwise it enables tracing.                                     |
1599 `------------------------------------------------------------------*/
1600 
1601 static void
set_trace(symbol * sym,void * data)1602 set_trace (symbol *sym, void *data)
1603 {
1604   SYMBOL_TRACED (sym) = data != NULL;
1605   /* Remove placeholder from table if macro is undefined and untraced.  */
1606   if (SYMBOL_TYPE (sym) == TOKEN_VOID && data == NULL)
1607     lookup_symbol (SYMBOL_NAME (sym), SYMBOL_POPDEF);
1608 }
1609 
1610 static void
m4_traceon(struct obstack * obs,int argc,token_data ** argv)1611 m4_traceon (struct obstack *obs, int argc, token_data **argv)
1612 {
1613   symbol *s;
1614   int i;
1615 
1616   if (argc == 1)
1617     hack_all_symbols (set_trace, obs);
1618   else
1619     for (i = 1; i < argc; i++)
1620       {
1621         s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
1622         if (!s)
1623           s = lookup_symbol (ARG (i), SYMBOL_INSERT);
1624         set_trace (s, obs);
1625       }
1626 }
1627 
1628 /*------------------------------------------------------------------------.
1629 | Disable tracing of all specified macros, or all, if none is specified.  |
1630 `------------------------------------------------------------------------*/
1631 
1632 static void
m4_traceoff(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1633 m4_traceoff (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1634 {
1635   symbol *s;
1636   int i;
1637 
1638   if (argc == 1)
1639     hack_all_symbols (set_trace, NULL);
1640   else
1641     for (i = 1; i < argc; i++)
1642       {
1643         s = lookup_symbol (TOKEN_DATA_TEXT (argv[i]), SYMBOL_LOOKUP);
1644         if (s != NULL)
1645           set_trace (s, NULL);
1646       }
1647 }
1648 
1649 /*------------------------------------------------------------------.
1650 | On-the-fly control of the format of the tracing output.  It takes |
1651 | one argument, which is a character string like given to the -d    |
1652 | option, or none in which case the debug_level is zeroed.          |
1653 `------------------------------------------------------------------*/
1654 
1655 static void
m4_debugmode(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1656 m4_debugmode (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1657 {
1658   int new_debug_level;
1659   int change_flag;
1660 
1661   if (bad_argc (argv[0], argc, 1, 2))
1662     return;
1663 
1664   if (argc == 1)
1665     debug_level = 0;
1666   else
1667     {
1668       if (ARG (1)[0] == '+' || ARG (1)[0] == '-')
1669         {
1670           change_flag = ARG (1)[0];
1671           new_debug_level = debug_decode (ARG (1) + 1);
1672         }
1673       else
1674         {
1675           change_flag = 0;
1676           new_debug_level = debug_decode (ARG (1));
1677         }
1678 
1679       if (new_debug_level < 0)
1680         M4ERROR ((warning_status, 0,
1681                   _("Debugmode: bad debug flags: `%s'"), ARG (1)));
1682       else
1683         {
1684           switch (change_flag)
1685             {
1686             case 0:
1687               debug_level = new_debug_level;
1688               break;
1689 
1690             case '+':
1691               debug_level |= new_debug_level;
1692               break;
1693 
1694             case '-':
1695               debug_level &= ~new_debug_level;
1696               break;
1697 
1698             default:
1699               M4ERROR ((warning_status, 0,
1700                         "INTERNAL ERROR: bad flag in m4_debugmode ()"));
1701               abort ();
1702             }
1703         }
1704     }
1705 }
1706 
1707 /*-------------------------------------------------------------------------.
1708 | Specify the destination of the debugging output.  With one argument, the |
1709 | argument is taken as a file name, with no arguments, revert to stderr.   |
1710 `-------------------------------------------------------------------------*/
1711 
1712 static void
m4_debugfile(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)1713 m4_debugfile (struct obstack *obs MAYBE_UNUSED, int argc, token_data **argv)
1714 {
1715   if (bad_argc (argv[0], argc, 1, 2))
1716     return;
1717 
1718   if (argc == 1)
1719     debug_set_output (NULL);
1720   else if (!debug_set_output (ARG (1)))
1721     M4ERROR ((warning_status, errno,
1722               _("cannot set debug file `%s'"), ARG (1)));
1723 }
1724 
1725 /* This section contains text processing macros: "len", "index",
1726    "substr", "translit", "format", "regexp" and "patsubst".  The last
1727    three are GNU specific.  */
1728 
1729 /*---------------------------------------------.
1730 | Expand to the length of the first argument.  |
1731 `---------------------------------------------*/
1732 
1733 static void
m4_len(struct obstack * obs,int argc,token_data ** argv)1734 m4_len (struct obstack *obs, int argc, token_data **argv)
1735 {
1736   if (bad_argc (argv[0], argc, 2, 2))
1737     return;
1738   shipout_int (obs, strlen (ARG (1)));
1739 }
1740 
1741 /*-------------------------------------------------------------------.
1742 | The macro expands to the first index of the second argument in the |
1743 | first argument.                                                    |
1744 `-------------------------------------------------------------------*/
1745 
1746 static void
m4_index(struct obstack * obs,int argc,token_data ** argv)1747 m4_index (struct obstack *obs, int argc, token_data **argv)
1748 {
1749   const char *haystack;
1750   const char *result;
1751   int retval;
1752 
1753   if (bad_argc (argv[0], argc, 3, 3))
1754     {
1755       /* builtin(`index') is blank, but index(`abc') is 0.  */
1756       if (argc == 2)
1757         shipout_int (obs, 0);
1758       return;
1759     }
1760 
1761   haystack = ARG (1);
1762   result = strstr (haystack, ARG (2));
1763   retval = result ? result - haystack : -1;
1764 
1765   shipout_int (obs, retval);
1766 }
1767 
1768 /*-----------------------------------------------------------------.
1769 | The macro "substr" extracts substrings from the first argument,  |
1770 | starting from the index given by the second argument, extending  |
1771 | for a length given by the third argument.  If the third argument |
1772 | is missing, the substring extends to the end of the first        |
1773 | argument.                                                        |
1774 `-----------------------------------------------------------------*/
1775 
1776 static void
m4_substr(struct obstack * obs,int argc,token_data ** argv)1777 m4_substr (struct obstack *obs, int argc, token_data **argv)
1778 {
1779   int start = 0;
1780   int length, avail;
1781 
1782   if (bad_argc (argv[0], argc, 3, 4))
1783     {
1784       /* builtin(`substr') is blank, but substr(`abc') is abc.  */
1785       if (argc == 2)
1786         obstack_grow (obs, ARG (1), strlen (ARG (1)));
1787       return;
1788     }
1789 
1790   length = avail = strlen (ARG (1));
1791   if (!numeric_arg (argv[0], ARG (2), &start))
1792     return;
1793 
1794   if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &length))
1795     return;
1796 
1797   if (start < 0 || length <= 0 || start >= avail)
1798     return;
1799 
1800   if (start + length > avail)
1801     length = avail - start;
1802   obstack_grow (obs, ARG (1) + start, length);
1803 }
1804 
1805 /*------------------------------------------------------------------.
1806 | For "translit", ranges are allowed in the second and third        |
1807 | argument.  They are expanded in the following function, and the   |
1808 | expanded strings, without any ranges left, are used to translate  |
1809 | the characters of the first argument.  A single - (dash) can be   |
1810 | included in the strings by being the first or the last character  |
1811 | in the string.  If the first character in a range is after the    |
1812 | first in the character set, the range is made backwards, thus 9-0 |
1813 | is the string 9876543210.                                         |
1814 `------------------------------------------------------------------*/
1815 
1816 static const char *
expand_ranges(const char * s,struct obstack * obs)1817 expand_ranges (const char *s, struct obstack *obs)
1818 {
1819   unsigned char from;
1820   unsigned char to;
1821 
1822   for (from = '\0'; *s != '\0'; from = to_uchar (*s++))
1823     {
1824       if (*s == '-' && from != '\0')
1825         {
1826           to = to_uchar (*++s);
1827           if (to == '\0')
1828             {
1829               /* trailing dash */
1830               obstack_1grow (obs, '-');
1831               break;
1832             }
1833           else if (from <= to)
1834             {
1835               while (from++ < to)
1836                 obstack_1grow (obs, from);
1837             }
1838           else
1839             {
1840               while (--from >= to)
1841                 obstack_1grow (obs, from);
1842             }
1843         }
1844       else
1845         obstack_1grow (obs, *s);
1846     }
1847   obstack_1grow (obs, '\0');
1848   return (char *) obstack_finish (obs);
1849 }
1850 
1851 /*-----------------------------------------------------------------.
1852 | The macro "translit" translates all characters in the first      |
1853 | argument, which are present in the second argument, into the     |
1854 | corresponding character from the third argument.  If the third   |
1855 | argument is shorter than the second, the extra characters in the |
1856 | second argument are deleted from the first.                      |
1857 `-----------------------------------------------------------------*/
1858 
1859 static void
m4_translit(struct obstack * obs,int argc,token_data ** argv)1860 m4_translit (struct obstack *obs, int argc, token_data **argv)
1861 {
1862   const char *data = ARG (1);
1863   const char *from = ARG (2);
1864   const char *to;
1865   char map[UCHAR_MAX + 1];
1866   char found[UCHAR_MAX + 1];
1867   unsigned char ch;
1868 
1869   if (bad_argc (argv[0], argc, 3, 4) || !*data || !*from)
1870     {
1871       /* builtin(`translit') is blank, but translit(`abc') is abc.  */
1872       if (2 <= argc)
1873         obstack_grow (obs, data, strlen (data));
1874       return;
1875     }
1876 
1877   to = ARG (3);
1878   if (strchr (to, '-') != NULL)
1879     {
1880       to = expand_ranges (to, obs);
1881       assert (to && *to);
1882     }
1883 
1884   /* If there are only one or two bytes to replace, it is faster to
1885      use memchr2.  Using expand_ranges does nothing unless there are
1886      at least three bytes.  */
1887   if (!from[1] || !from[2])
1888     {
1889       const char *p;
1890       size_t len = strlen (data);
1891       while ((p = (char *) memchr2 (data, from[0], from[1], len)))
1892         {
1893           obstack_grow (obs, data, p - data);
1894           len -= p - data;
1895           if (!len)
1896             return;
1897           data = p + 1;
1898           len--;
1899           if (*p == from[0] && to[0])
1900             obstack_1grow (obs, to[0]);
1901           else if (*p == from[1] && to[0] && to[1])
1902             obstack_1grow (obs, to[1]);
1903         }
1904       obstack_grow (obs, data, len);
1905       return;
1906     }
1907 
1908   if (strchr (from, '-') != NULL)
1909     {
1910       from = expand_ranges (from, obs);
1911       assert (from && *from);
1912     }
1913 
1914   /* Calling strchr(from) for each character in data is quadratic,
1915      since both strings can be arbitrarily long.  Instead, create a
1916      from-to mapping in one pass of from, then use that map in one
1917      pass of data, for linear behavior.  Traditional behavior is that
1918      only the first instance of a character in from is consulted,
1919      hence the found map.  */
1920   memset (map, 0, sizeof map);
1921   memset (found, 0, sizeof found);
1922   for ( ; (ch = *from) != '\0'; from++)
1923     {
1924       if (! found[ch])
1925         {
1926           found[ch] = 1;
1927           map[ch] = *to;
1928         }
1929       if (*to != '\0')
1930         to++;
1931     }
1932 
1933   for (data = ARG (1); (ch = *data) != '\0'; data++)
1934     {
1935       if (! found[ch])
1936         obstack_1grow (obs, ch);
1937       else if (map[ch])
1938         obstack_1grow (obs, map[ch]);
1939     }
1940 }
1941 
1942 /*-------------------------------------------------------------------.
1943 | Frontend for printf like formatting.  The function format () lives |
1944 | in the file format.c.                                              |
1945 `-------------------------------------------------------------------*/
1946 
1947 static void
m4_format(struct obstack * obs,int argc,token_data ** argv)1948 m4_format (struct obstack *obs, int argc, token_data **argv)
1949 {
1950   if (bad_argc (argv[0], argc, 2, -1))
1951     return;
1952   expand_format (obs, argc - 1, argv + 1);
1953 }
1954 
1955 /*------------------------------------------------------------------.
1956 | Function to perform substitution by regular expressions.  Used by |
1957 | the builtins regexp and patsubst.  The changed text is placed on  |
1958 | the obstack.  The substitution is REPL, with \& substituted by    |
1959 | this part of VICTIM matched by the last whole regular expression, |
1960 | taken from REGS[0], and \N substituted by the text matched by the |
1961 | Nth parenthesized sub-expression, taken from REGS[N].             |
1962 `------------------------------------------------------------------*/
1963 
1964 static int substitute_warned = 0;
1965 
1966 static void
substitute(struct obstack * obs,const char * victim,const char * repl,struct re_registers * regs)1967 substitute (struct obstack *obs, const char *victim, const char *repl,
1968             struct re_registers *regs)
1969 {
1970   int ch;
1971   __re_size_t ind;
1972   while (1)
1973     {
1974       const char *backslash = strchr (repl, '\\');
1975       if (!backslash)
1976         {
1977           obstack_grow (obs, repl, strlen (repl));
1978           return;
1979         }
1980       obstack_grow (obs, repl, backslash - repl);
1981       repl = backslash;
1982       ch = *++repl;
1983       switch (ch)
1984         {
1985         case '0':
1986           if (!substitute_warned)
1987             {
1988               M4ERROR ((warning_status, 0, _("\
1989 Warning: \\0 will disappear, use \\& instead in replacements")));
1990               substitute_warned = 1;
1991             }
1992           FALLTHROUGH;
1993         case '&':
1994           obstack_grow (obs, victim + regs->start[0],
1995                         regs->end[0] - regs->start[0]);
1996           repl++;
1997           break;
1998 
1999         case '1': case '2': case '3': case '4': case '5': case '6':
2000         case '7': case '8': case '9':
2001           ind = ch -= '0';
2002           if (regs->num_regs - 1 <= ind)
2003             M4ERROR ((warning_status, 0,
2004                       _("Warning: sub-expression %d not present"), ch));
2005           else if (regs->end[ch] > 0)
2006             obstack_grow (obs, victim + regs->start[ch],
2007                           regs->end[ch] - regs->start[ch]);
2008           repl++;
2009           break;
2010 
2011         case '\0':
2012           M4ERROR ((warning_status, 0,
2013                     _("Warning: trailing \\ ignored in replacement")));
2014           return;
2015 
2016         default:
2017           obstack_1grow (obs, ch);
2018           repl++;
2019           break;
2020         }
2021     }
2022 }
2023 
2024 /*------------------------------------------.
2025 | Initialize regular expression variables.  |
2026 `------------------------------------------*/
2027 
2028 void
init_pattern_buffer(struct re_pattern_buffer * buf,struct re_registers * regs)2029 init_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
2030 {
2031   buf->translate = NULL;
2032   buf->fastmap = NULL;
2033   buf->buffer = NULL;
2034   buf->allocated = 0;
2035   if (regs)
2036     {
2037       regs->start = NULL;
2038       regs->end = NULL;
2039     }
2040 }
2041 
2042 /*------------------------------------------------------------------.
2043 | Regular expression version of index.  Given two arguments, expand |
2044 | to the index of the first match of the second argument (a regexp) |
2045 | in the first.  Expand to -1 if here is no match.  Given a third   |
2046 | argument, it changes the expansion to this argument.              |
2047 `------------------------------------------------------------------*/
2048 
2049 static void
m4_regexp(struct obstack * obs,int argc,token_data ** argv)2050 m4_regexp (struct obstack *obs, int argc, token_data **argv)
2051 {
2052   const char *victim;           /* first argument */
2053   const char *regexp;           /* regular expression */
2054   const char *repl;             /* replacement string */
2055 
2056   struct re_pattern_buffer buf; /* compiled regular expression */
2057   struct re_registers regs;     /* for subexpression matches */
2058   const char *msg;              /* error message from re_compile_pattern */
2059   int startpos;                 /* start position of match */
2060   int length;                   /* length of first argument */
2061 
2062   if (bad_argc (argv[0], argc, 3, 4))
2063     {
2064       /* builtin(`regexp') is blank, but regexp(`abc') is 0.  */
2065       if (argc == 2)
2066         shipout_int (obs, 0);
2067       return;
2068     }
2069 
2070   victim = TOKEN_DATA_TEXT (argv[1]);
2071   regexp = TOKEN_DATA_TEXT (argv[2]);
2072 
2073   init_pattern_buffer (&buf, &regs);
2074   msg = re_compile_pattern (regexp, strlen (regexp), &buf);
2075 
2076   if (msg != NULL)
2077     {
2078       M4ERROR ((warning_status, 0,
2079                 _("bad regular expression: `%s': %s"), regexp, msg));
2080       free_pattern_buffer (&buf, &regs);
2081       return;
2082     }
2083 
2084   length = strlen (victim);
2085   /* Avoid overhead of allocating regs if we won't use it.  */
2086   startpos = re_search (&buf, victim, length, 0, length,
2087                         argc == 3 ? NULL : &regs);
2088 
2089   if (startpos == -2)
2090     M4ERROR ((warning_status, 0,
2091               _("error matching regular expression `%s'"), regexp));
2092   else if (argc == 3)
2093     shipout_int (obs, startpos);
2094   else if (startpos >= 0)
2095     {
2096       repl = TOKEN_DATA_TEXT (argv[3]);
2097       substitute (obs, victim, repl, &regs);
2098     }
2099 
2100   free_pattern_buffer (&buf, &regs);
2101 }
2102 
2103 /*--------------------------------------------------------------------------.
2104 | Substitute all matches of a regexp occuring in a string.  Each match of   |
2105 | the second argument (a regexp) in the first argument is changed to the    |
2106 | third argument, with \& substituted by the matched text, and \N           |
2107 | substituted by the text matched by the Nth parenthesized sub-expression.  |
2108 `--------------------------------------------------------------------------*/
2109 
2110 static void
m4_patsubst(struct obstack * obs,int argc,token_data ** argv)2111 m4_patsubst (struct obstack *obs, int argc, token_data **argv)
2112 {
2113   const char *victim;           /* first argument */
2114   const char *regexp;           /* regular expression */
2115 
2116   struct re_pattern_buffer buf; /* compiled regular expression */
2117   struct re_registers regs;     /* for subexpression matches */
2118   const char *msg;              /* error message from re_compile_pattern */
2119   int matchpos;                 /* start position of match */
2120   int offset;                   /* current match offset */
2121   int length;                   /* length of first argument */
2122 
2123   if (bad_argc (argv[0], argc, 3, 4))
2124     {
2125       /* builtin(`patsubst') is blank, but patsubst(`abc') is abc.  */
2126       if (argc == 2)
2127         obstack_grow (obs, ARG (1), strlen (ARG (1)));
2128       return;
2129     }
2130 
2131   regexp = TOKEN_DATA_TEXT (argv[2]);
2132 
2133   init_pattern_buffer (&buf, &regs);
2134   msg = re_compile_pattern (regexp, strlen (regexp), &buf);
2135 
2136   if (msg != NULL)
2137     {
2138       M4ERROR ((warning_status, 0,
2139                 _("bad regular expression `%s': %s"), regexp, msg));
2140       free (buf.buffer);
2141       return;
2142     }
2143 
2144   victim = TOKEN_DATA_TEXT (argv[1]);
2145   length = strlen (victim);
2146 
2147   offset = 0;
2148   while (offset <= length)
2149     {
2150       matchpos = re_search (&buf, victim, length,
2151                             offset, length - offset, &regs);
2152       if (matchpos < 0)
2153         {
2154 
2155           /* Match failed -- either error or there is no match in the
2156              rest of the string, in which case the rest of the string is
2157              copied verbatim.  */
2158 
2159           if (matchpos == -2)
2160             M4ERROR ((warning_status, 0,
2161                       _("error matching regular expression `%s'"), regexp));
2162           else if (offset < length)
2163             obstack_grow (obs, victim + offset, length - offset);
2164           break;
2165         }
2166 
2167       /* Copy the part of the string that was skipped by re_search ().  */
2168 
2169       if (matchpos > offset)
2170         obstack_grow (obs, victim + offset, matchpos - offset);
2171 
2172       /* Handle the part of the string that was covered by the match.  */
2173 
2174       substitute (obs, victim, ARG (3), &regs);
2175 
2176       /* Update the offset to the end of the match.  If the regexp
2177          matched a null string, advance offset one more, to avoid
2178          infinite loops.  */
2179 
2180       offset = regs.end[0];
2181       if (regs.start[0] == regs.end[0])
2182         obstack_1grow (obs, victim[offset++]);
2183     }
2184   obstack_1grow (obs, '\0');
2185 
2186   free_pattern_buffer (&buf, &regs);
2187 }
2188 
2189 /* Finally, a placeholder builtin.  This builtin is not installed by
2190    default, but when reading back frozen files, this is associated
2191    with any builtin we don't recognize (for example, if the frozen
2192    file was created with a changeword capable m4, but is then loaded
2193    by a different m4 that does not support changeword).  This way, we
2194    can keep 'm4 -R' quiet in the common case that the user did not
2195    know or care about the builtin when the frozen file was created,
2196    while still flagging it as a potential error if an attempt is made
2197    to actually use the builtin.  */
2198 
2199 /*--------------------------------------------------------------------.
2200 | Issue a warning that this macro is a placeholder for an unsupported |
2201 | builtin that was requested while reloading a frozen file.           |
2202 `--------------------------------------------------------------------*/
2203 
2204 void
m4_placeholder(struct obstack * obs MAYBE_UNUSED,int argc,token_data ** argv)2205 m4_placeholder (struct obstack *obs MAYBE_UNUSED, int argc,
2206                 token_data **argv)
2207 {
2208   M4ERROR ((warning_status, 0, _("\
2209 builtin `%s' requested by frozen file is not supported"), ARG (0)));
2210 }
2211 
2212 /*-------------------------------------------------------------------.
2213 | This function handles all expansion of user defined and predefined |
2214 | macros.  It is called with an obstack OBS, where the macros        |
2215 | expansion will be placed, as an unfinished object.  SYM points to  |
2216 | the macro definition, giving the expansion text.  ARGC and ARGV    |
2217 | are the arguments, as usual.                                       |
2218 `-------------------------------------------------------------------*/
2219 
2220 void
expand_user_macro(struct obstack * obs,symbol * sym,int argc,token_data ** argv)2221 expand_user_macro (struct obstack *obs, symbol *sym,
2222                    int argc, token_data **argv)
2223 {
2224   const char *text = SYMBOL_TEXT (sym);
2225   int i;
2226   while (1)
2227     {
2228       const char *dollar = strchr (text, '$');
2229       if (!dollar)
2230         {
2231           obstack_grow (obs, text, strlen (text));
2232           return;
2233         }
2234       obstack_grow (obs, text, dollar - text);
2235       text = dollar;
2236       switch (*++text)
2237         {
2238         case '0': case '1': case '2': case '3': case '4':
2239         case '5': case '6': case '7': case '8': case '9':
2240           if (no_gnu_extensions)
2241             {
2242               i = *text++ - '0';
2243             }
2244           else
2245             {
2246               for (i = 0; c_isdigit (*text); text++)
2247                 i = i*10 + (*text - '0');
2248             }
2249           if (i < argc)
2250             obstack_grow (obs, TOKEN_DATA_TEXT (argv[i]),
2251                           strlen (TOKEN_DATA_TEXT (argv[i])));
2252           break;
2253 
2254         case '#': /* number of arguments */
2255           shipout_int (obs, argc - 1);
2256           text++;
2257           break;
2258 
2259         case '*': /* all arguments */
2260         case '@': /* ... same, but quoted */
2261           dump_args (obs, argc, argv, ",", *text == '@');
2262           text++;
2263           break;
2264 
2265         default:
2266           obstack_1grow (obs, '$');
2267           break;
2268         }
2269     }
2270 }
2271