1 /* GNU m4 -- A simple macro processor
2 
3    Copyright (C) 1989-1994, 2004-2014, 2016-2017, 2020-2021 Free Software
4    Foundation, Inc.
5 
6    This file is part of GNU M4.
7 
8    GNU M4 is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    GNU M4 is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <https://www.gnu.org/licenses/>.
20 */
21 
22 /* We use <config.h> instead of "config.h" so that a compilation
23    using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
24    (which it would do because it found this file in $srcdir).  */
25 
26 #include <config.h>
27 
28 #include <assert.h>
29 #include <c-ctype.h>
30 #include <errno.h>
31 #include <limits.h>
32 #include <locale.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <string.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 
39 #include "attribute.h"
40 #include "binary-io.h"
41 #include "clean-temp.h"
42 #include "cloexec.h"
43 #include "close-stream.h"
44 #include "closein.h"
45 #include "dirname.h"
46 #include "error.h"
47 #include "exitfail.h"
48 #include "filenamecat.h"
49 #include "obstack.h"
50 #include "stdio--.h"
51 #include "stdlib--.h"
52 #include "unistd--.h"
53 #include "verify.h"
54 #include "verror.h"
55 #include "xalloc.h"
56 #include "xprintf.h"
57 #include "xvasprintf.h"
58 
59 /* Canonicalize UNIX recognition macros.  */
60 #if defined unix || defined __unix || defined __unix__ \
61   || defined _POSIX_VERSION || defined _POSIX2_VERSION \
62   || defined __NetBSD__ || defined __OpenBSD__ \
63   || defined __APPLE__ || defined __APPLE_CC__
64 # define UNIX 1
65 #endif
66 
67 /* Canonicalize Windows recognition macros.  */
68 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
69 # define W32_NATIVE 1
70 #endif
71 
72 /* Canonicalize OS/2 recognition macro.  */
73 #ifdef __EMX__
74 # define OS2 1
75 # undef UNIX
76 #endif
77 
78 /* Used if any programmer error is detected (not possible, right?)  */
79 #define EXIT_INTERNAL_ERROR 2
80 
81 /* Used for version mismatch, when -R detects a frozen file it can't parse.  */
82 #define EXIT_MISMATCH 63
83 
84 /* NLS.  */
85 
86 #include "gettext.h"
87 #if ! ENABLE_NLS
88 # undef textdomain
89 # define textdomain(Domainname) /* empty */
90 # undef bindtextdomain
91 # define bindtextdomain(Domainname, Dirname) /* empty */
92 #endif
93 
94 #define _(msgid) gettext (msgid)
95 
96 /* Various declarations.  */
97 
98 struct string
99   {
100     char *string;               /* characters of the string */
101     size_t length;              /* length of the string */
102   };
103 typedef struct string STRING;
104 
105 /* Memory allocation.  */
106 #define obstack_chunk_alloc     xmalloc
107 #define obstack_chunk_free      free
108 
109 /* Those must come first.  */
110 typedef struct token_data token_data;
111 typedef void builtin_func (struct obstack *, int, token_data **);
112 
113 /* Gnulib's stdbool doesn't work with bool bitfields.  For nicer
114    debugging, use bool when we know it works, but use the more
115    portable unsigned int elsewhere.  */
116 #if __GNUC__ > 2
117 typedef bool bool_bitfield;
118 #else
119 typedef unsigned int bool_bitfield;
120 #endif /* ! __GNUC__ */
121 
122 /* File: m4.c  --- global definitions.  */
123 
124 /* Option flags.  */
125 extern int sync_output;                 /* -s */
126 extern int debug_level;                 /* -d */
127 extern size_t hash_table_size;          /* -H */
128 extern int no_gnu_extensions;           /* -G */
129 extern int prefix_all_builtins;         /* -P */
130 extern int max_debug_argument_length;   /* -l */
131 extern int suppress_warnings;           /* -Q */
132 extern int warning_status;              /* -E */
133 extern int nesting_limit;               /* -L */
134 #ifdef ENABLE_CHANGEWORD
135 extern const char *user_word_regexp;    /* -W */
136 #endif
137 
138 /* Error handling.  */
139 extern int retcode;
140 
141 extern void m4_error (int, int, const char *, ...)
142   ATTRIBUTE_FORMAT ((__printf__, 3, 4));
143 extern void m4_error_at_line (int, int, const char *, int, const char *, ...)
144   ATTRIBUTE_FORMAT ((__printf__, 5, 6));
145 extern _Noreturn void m4_failure (int, const char *, ...)
146   ATTRIBUTE_FORMAT ((__printf__, 2, 3));
147 extern _Noreturn void m4_failure_at_line (int, const char *, int,
148                                           const char *, ...)
149   ATTRIBUTE_FORMAT ((__printf__, 4, 5));
150 
151 #define M4ERROR(Arglist) (m4_error Arglist)
152 #define M4ERROR_AT_LINE(Arglist) (m4_error_at_line Arglist)
153 
154 
155 /* File: debug.c  --- debugging and tracing function.  */
156 
157 extern FILE *debug;
158 
159 /* The value of debug_level is a bitmask of the following.  */
160 
161 /* a: show arglist in trace output */
162 #define DEBUG_TRACE_ARGS 1
163 /* e: show expansion in trace output */
164 #define DEBUG_TRACE_EXPANSION 2
165 /* q: quote args and expansion in trace output */
166 #define DEBUG_TRACE_QUOTE 4
167 /* t: trace all macros -- overrides trace{on,off} */
168 #define DEBUG_TRACE_ALL 8
169 /* l: add line numbers to trace output */
170 #define DEBUG_TRACE_LINE 16
171 /* f: add file name to trace output */
172 #define DEBUG_TRACE_FILE 32
173 /* p: trace path search of include files */
174 #define DEBUG_TRACE_PATH 64
175 /* c: show macro call before args collection */
176 #define DEBUG_TRACE_CALL 128
177 /* i: trace changes of input files */
178 #define DEBUG_TRACE_INPUT 256
179 /* x: add call id to trace output */
180 #define DEBUG_TRACE_CALLID 512
181 
182 /* V: very verbose --  print everything */
183 #define DEBUG_TRACE_VERBOSE 1023
184 /* default flags -- equiv: aeq */
185 #define DEBUG_TRACE_DEFAULT 7
186 
187 #define DEBUG_PRINT1(Fmt, Arg1) \
188   do                                                            \
189     {                                                           \
190       if (debug != NULL)                                        \
191         xfprintf (debug, Fmt, Arg1);                            \
192     }                                                           \
193   while (0)
194 
195 #define DEBUG_PRINT3(Fmt, Arg1, Arg2, Arg3) \
196   do                                                            \
197     {                                                           \
198       if (debug != NULL)                                        \
199         xfprintf (debug, Fmt, Arg1, Arg2, Arg3);                \
200     }                                                           \
201   while (0)
202 
203 #define DEBUG_MESSAGE(Fmt) \
204   do                                                            \
205     {                                                           \
206       if (debug != NULL)                                        \
207         {                                                       \
208           debug_message_prefix ();                              \
209           xfprintf (debug, Fmt);                                \
210           putc ('\n', debug);                                   \
211         }                                                       \
212     }                                                           \
213   while (0)
214 
215 #define DEBUG_MESSAGE1(Fmt, Arg1) \
216   do                                                            \
217     {                                                           \
218       if (debug != NULL)                                        \
219         {                                                       \
220           debug_message_prefix ();                              \
221           xfprintf (debug, Fmt, Arg1);                          \
222           putc ('\n', debug);                                   \
223         }                                                       \
224     }                                                           \
225   while (0)
226 
227 #define DEBUG_MESSAGE2(Fmt, Arg1, Arg2) \
228   do                                                            \
229     {                                                           \
230       if (debug != NULL)                                        \
231         {                                                       \
232           debug_message_prefix ();                              \
233           xfprintf (debug, Fmt, Arg1, Arg2);                    \
234           putc ('\n', debug);                                   \
235         }                                                       \
236     }                                                           \
237   while (0)
238 
239 extern void debug_init (void);
240 extern int debug_decode (const char *);
241 extern void debug_flush_files (void);
242 extern bool debug_set_output (const char *);
243 extern void debug_message_prefix (void);
244 
245 extern void trace_prepre (const char *, int);
246 extern void trace_pre (const char *, int, int, token_data **);
247 extern void trace_post (const char *, int, int, const char *);
248 
249 /* File: input.c  --- lexical definitions.  */
250 
251 /* Various different token types.  */
252 enum token_type
253 {
254   TOKEN_EOF,                    /* end of file */
255   TOKEN_STRING,                 /* a quoted string or comment */
256   TOKEN_WORD,                   /* an identifier */
257   TOKEN_OPEN,                   /* ( */
258   TOKEN_COMMA,                  /* , */
259   TOKEN_CLOSE,                  /* ) */
260   TOKEN_SIMPLE,                 /* any other single character */
261   TOKEN_MACDEF                  /* a macro's definition (see "defn") */
262 };
263 
264 /* The data for a token, a macro argument, and a macro definition.  */
265 enum token_data_type
266 {
267   TOKEN_VOID,
268   TOKEN_TEXT,
269   TOKEN_FUNC
270 };
271 
272 struct token_data
273 {
274   enum token_data_type type;
275   union
276     {
277       struct
278         {
279           char *text;
280 #ifdef ENABLE_CHANGEWORD
281           char *original_text;
282 #endif
283         }
284       u_t;
285       builtin_func *func;
286     }
287   u;
288 };
289 
290 #define TOKEN_DATA_TYPE(Td)             ((Td)->type)
291 #define TOKEN_DATA_TEXT(Td)             ((Td)->u.u_t.text)
292 #ifdef ENABLE_CHANGEWORD
293 # define TOKEN_DATA_ORIG_TEXT(Td)       ((Td)->u.u_t.original_text)
294 #endif
295 #define TOKEN_DATA_FUNC(Td)             ((Td)->u.func)
296 
297 typedef enum token_type token_type;
298 typedef enum token_data_type token_data_type;
299 
300 extern void input_init (void);
301 extern token_type peek_token (void);
302 extern token_type next_token (token_data *, int *);
303 extern void skip_line (void);
304 
305 /* push back input */
306 extern void push_file (FILE *, const char *, bool);
307 extern void push_macro (builtin_func *);
308 extern struct obstack *push_string_init (void);
309 extern const char *push_string_finish (void);
310 extern void push_wrapup (const char *);
311 extern bool pop_wrapup (void);
312 
313 /* current input file, and line */
314 extern const char *current_file;
315 extern int current_line;
316 
317 /* left and right quote, begin and end comment */
318 extern STRING bcomm;
319 extern STRING ecomm;
320 extern STRING lquote;
321 extern STRING rquote;
322 
323 #define DEF_LQUOTE "`"
324 #define DEF_RQUOTE "\'"
325 #define DEF_BCOMM "#"
326 #define DEF_ECOMM "\n"
327 
328 extern void set_quotes (const char *, const char *);
329 extern void set_comment (const char *, const char *);
330 #ifdef ENABLE_CHANGEWORD
331 extern void set_word_regexp (const char *);
332 #endif
333 
334 /* File: output.c --- output functions.  */
335 extern int current_diversion;
336 extern int output_current_line;
337 
338 extern void output_init (void);
339 extern void output_exit (void);
340 extern void output_text (const char *, int);
341 extern void shipout_text (struct obstack *, const char *, int, int);
342 extern void make_diversion (int);
343 extern void insert_diversion (int);
344 extern void insert_file (FILE *);
345 extern void freeze_diversions (FILE *);
346 
347 /* File symtab.c  --- symbol table definitions.  */
348 
349 /* Operation modes for lookup_symbol ().  */
350 enum symbol_lookup
351 {
352   SYMBOL_LOOKUP,
353   SYMBOL_INSERT,
354   SYMBOL_DELETE,
355   SYMBOL_PUSHDEF,
356   SYMBOL_POPDEF
357 };
358 
359 /* Symbol table entry.  */
360 struct symbol
361 {
362   struct symbol *stack; /* pushdef stack */
363   struct symbol *next; /* hash bucket chain */
364   bool_bitfield traced : 1;
365   bool_bitfield macro_args : 1;
366   bool_bitfield blind_no_args : 1;
367   bool_bitfield deleted : 1;
368   int pending_expansions;
369 
370   size_t hash;
371   char *name;
372   token_data data;
373 };
374 
375 #define SYMBOL_STACK(S)         ((S)->stack)
376 #define SYMBOL_TRACED(S)        ((S)->traced)
377 #define SYMBOL_MACRO_ARGS(S)    ((S)->macro_args)
378 #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
379 #define SYMBOL_DELETED(S)       ((S)->deleted)
380 #define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
381 #define SYMBOL_NAME(S)          ((S)->name)
382 #define SYMBOL_TYPE(S)          (TOKEN_DATA_TYPE (&(S)->data))
383 #define SYMBOL_TEXT(S)          (TOKEN_DATA_TEXT (&(S)->data))
384 #define SYMBOL_FUNC(S)          (TOKEN_DATA_FUNC (&(S)->data))
385 
386 typedef enum symbol_lookup symbol_lookup;
387 typedef struct symbol symbol;
388 typedef void hack_symbol (symbol *, void *);
389 
390 #define HASHMAX 65537             /* default, overridden by -Hsize */
391 
392 extern void free_symbol (symbol *sym);
393 extern void symtab_init (void);
394 extern symbol *lookup_symbol (const char *, symbol_lookup);
395 extern void hack_all_symbols (hack_symbol *, void *);
396 
397 /* File: macro.c  --- macro expansion.  */
398 
399 extern int expansion_level;
400 
401 extern void expand_input (void);
402 extern void call_macro (symbol *, int, token_data **, struct obstack *);
403 
404 /* File: builtin.c  --- builtins.  */
405 
406 struct builtin
407 {
408   const char *name;
409   bool_bitfield gnu_extension : 1;
410   bool_bitfield groks_macro_args : 1;
411   bool_bitfield blind_if_no_args : 1;
412   builtin_func *func;
413 };
414 
415 struct predefined
416 {
417   const char *unix_name;
418   const char *gnu_name;
419   const char *func;
420 };
421 
422 typedef struct builtin builtin;
423 typedef struct predefined predefined;
424 struct re_pattern_buffer;
425 struct re_registers;
426 
427 /* The default sequence detects multi-digit parameters (obsolete after
428    1.4.x), and any use of extended arguments with the default ${}
429    syntax (new in 2.0).  */
430 #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
431 
432 extern void builtin_init (void);
433 extern void define_builtin (const char *, const builtin *, symbol_lookup);
434 extern void set_macro_sequence (const char *);
435 extern void free_macro_sequence (void);
436 extern void define_user_macro (const char *, const char *, symbol_lookup);
437 extern void undivert_all (void);
438 extern void expand_user_macro (struct obstack *, symbol *, int, token_data **);
439 extern void m4_placeholder (struct obstack *, int, token_data **);
440 extern void init_pattern_buffer (struct re_pattern_buffer *,
441                                  struct re_registers *);
442 extern const char *ntoa (int32_t, int);
443 
444 extern const builtin *find_builtin_by_addr (builtin_func *);
445 extern const builtin *find_builtin_by_name (const char *);
446 
447 /* File: path.c  --- path search for include files.  */
448 
449 extern void include_init (void);
450 extern void include_env_init (void);
451 extern void add_include_directory (const char *);
452 extern FILE *m4_path_search (const char *, char **);
453 
454 /* File: eval.c  --- expression evaluation.  */
455 
456 extern bool evaluate (const char *, int32_t *);
457 
458 /* File: format.c  --- printf like formatting.  */
459 
460 extern void expand_format (struct obstack *, int, token_data **);
461 
462 /* File: freeze.c --- frozen state files.  */
463 
464 extern void produce_frozen_state (const char *);
465 extern void reload_frozen_state (const char *);
466 
467 /* Debugging the memory allocator.  */
468 
469 #ifdef WITH_DMALLOC
470 # define DMALLOC_FUNC_CHECK
471 # include <dmalloc.h>
472 #endif
473 
474 /* Other debug stuff.  */
475 
476 #ifdef DEBUG
477 # define DEBUG_INCL   1
478 # define DEBUG_INPUT  1
479 # define DEBUG_MACRO  1
480 # define DEBUG_OUTPUT 1
481 # define DEBUG_STKOVF 1
482 # define DEBUG_SYM    1
483 #endif
484 
485 /* Convert a possibly-signed character to an unsigned character.  This is
486    a bit safer than casting to unsigned char, since it catches some type
487    errors that the cast doesn't.  */
488 #if HAVE_INLINE
to_uchar(char ch)489 static inline unsigned char to_uchar (char ch) { return ch; }
490 #else
491 # define to_uchar(C) ((unsigned char) (C))
492 #endif
493 
494 /* Avoid negative logic when comparing two strings.  */
495 #define STREQ(a, b) (strcmp (a, b) == 0)
496