1 /* GNU m4 -- A simple macro processor
2
3 Copyright (C) 1989-1994, 2004-2014, 2016-2017, 2020-2021 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU M4.
7
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 /* We use <config.h> instead of "config.h" so that a compilation
23 using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
24 (which it would do because it found this file in $srcdir). */
25
26 #include <config.h>
27
28 #include <assert.h>
29 #include <c-ctype.h>
30 #include <errno.h>
31 #include <limits.h>
32 #include <locale.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <string.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38
39 #include "attribute.h"
40 #include "binary-io.h"
41 #include "clean-temp.h"
42 #include "cloexec.h"
43 #include "close-stream.h"
44 #include "closein.h"
45 #include "dirname.h"
46 #include "error.h"
47 #include "exitfail.h"
48 #include "filenamecat.h"
49 #include "obstack.h"
50 #include "stdio--.h"
51 #include "stdlib--.h"
52 #include "unistd--.h"
53 #include "verify.h"
54 #include "verror.h"
55 #include "xalloc.h"
56 #include "xprintf.h"
57 #include "xvasprintf.h"
58
59 /* Canonicalize UNIX recognition macros. */
60 #if defined unix || defined __unix || defined __unix__ \
61 || defined _POSIX_VERSION || defined _POSIX2_VERSION \
62 || defined __NetBSD__ || defined __OpenBSD__ \
63 || defined __APPLE__ || defined __APPLE_CC__
64 # define UNIX 1
65 #endif
66
67 /* Canonicalize Windows recognition macros. */
68 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
69 # define W32_NATIVE 1
70 #endif
71
72 /* Canonicalize OS/2 recognition macro. */
73 #ifdef __EMX__
74 # define OS2 1
75 # undef UNIX
76 #endif
77
78 /* Used if any programmer error is detected (not possible, right?) */
79 #define EXIT_INTERNAL_ERROR 2
80
81 /* Used for version mismatch, when -R detects a frozen file it can't parse. */
82 #define EXIT_MISMATCH 63
83
84 /* NLS. */
85
86 #include "gettext.h"
87 #if ! ENABLE_NLS
88 # undef textdomain
89 # define textdomain(Domainname) /* empty */
90 # undef bindtextdomain
91 # define bindtextdomain(Domainname, Dirname) /* empty */
92 #endif
93
94 #define _(msgid) gettext (msgid)
95
96 /* Various declarations. */
97
98 struct string
99 {
100 char *string; /* characters of the string */
101 size_t length; /* length of the string */
102 };
103 typedef struct string STRING;
104
105 /* Memory allocation. */
106 #define obstack_chunk_alloc xmalloc
107 #define obstack_chunk_free free
108
109 /* Those must come first. */
110 typedef struct token_data token_data;
111 typedef void builtin_func (struct obstack *, int, token_data **);
112
113 /* Gnulib's stdbool doesn't work with bool bitfields. For nicer
114 debugging, use bool when we know it works, but use the more
115 portable unsigned int elsewhere. */
116 #if __GNUC__ > 2
117 typedef bool bool_bitfield;
118 #else
119 typedef unsigned int bool_bitfield;
120 #endif /* ! __GNUC__ */
121
122 /* File: m4.c --- global definitions. */
123
124 /* Option flags. */
125 extern int sync_output; /* -s */
126 extern int debug_level; /* -d */
127 extern size_t hash_table_size; /* -H */
128 extern int no_gnu_extensions; /* -G */
129 extern int prefix_all_builtins; /* -P */
130 extern int max_debug_argument_length; /* -l */
131 extern int suppress_warnings; /* -Q */
132 extern int warning_status; /* -E */
133 extern int nesting_limit; /* -L */
134 #ifdef ENABLE_CHANGEWORD
135 extern const char *user_word_regexp; /* -W */
136 #endif
137
138 /* Error handling. */
139 extern int retcode;
140
141 extern void m4_error (int, int, const char *, ...)
142 ATTRIBUTE_FORMAT ((__printf__, 3, 4));
143 extern void m4_error_at_line (int, int, const char *, int, const char *, ...)
144 ATTRIBUTE_FORMAT ((__printf__, 5, 6));
145 extern _Noreturn void m4_failure (int, const char *, ...)
146 ATTRIBUTE_FORMAT ((__printf__, 2, 3));
147 extern _Noreturn void m4_failure_at_line (int, const char *, int,
148 const char *, ...)
149 ATTRIBUTE_FORMAT ((__printf__, 4, 5));
150
151 #define M4ERROR(Arglist) (m4_error Arglist)
152 #define M4ERROR_AT_LINE(Arglist) (m4_error_at_line Arglist)
153
154
155 /* File: debug.c --- debugging and tracing function. */
156
157 extern FILE *debug;
158
159 /* The value of debug_level is a bitmask of the following. */
160
161 /* a: show arglist in trace output */
162 #define DEBUG_TRACE_ARGS 1
163 /* e: show expansion in trace output */
164 #define DEBUG_TRACE_EXPANSION 2
165 /* q: quote args and expansion in trace output */
166 #define DEBUG_TRACE_QUOTE 4
167 /* t: trace all macros -- overrides trace{on,off} */
168 #define DEBUG_TRACE_ALL 8
169 /* l: add line numbers to trace output */
170 #define DEBUG_TRACE_LINE 16
171 /* f: add file name to trace output */
172 #define DEBUG_TRACE_FILE 32
173 /* p: trace path search of include files */
174 #define DEBUG_TRACE_PATH 64
175 /* c: show macro call before args collection */
176 #define DEBUG_TRACE_CALL 128
177 /* i: trace changes of input files */
178 #define DEBUG_TRACE_INPUT 256
179 /* x: add call id to trace output */
180 #define DEBUG_TRACE_CALLID 512
181
182 /* V: very verbose -- print everything */
183 #define DEBUG_TRACE_VERBOSE 1023
184 /* default flags -- equiv: aeq */
185 #define DEBUG_TRACE_DEFAULT 7
186
187 #define DEBUG_PRINT1(Fmt, Arg1) \
188 do \
189 { \
190 if (debug != NULL) \
191 xfprintf (debug, Fmt, Arg1); \
192 } \
193 while (0)
194
195 #define DEBUG_PRINT3(Fmt, Arg1, Arg2, Arg3) \
196 do \
197 { \
198 if (debug != NULL) \
199 xfprintf (debug, Fmt, Arg1, Arg2, Arg3); \
200 } \
201 while (0)
202
203 #define DEBUG_MESSAGE(Fmt) \
204 do \
205 { \
206 if (debug != NULL) \
207 { \
208 debug_message_prefix (); \
209 xfprintf (debug, Fmt); \
210 putc ('\n', debug); \
211 } \
212 } \
213 while (0)
214
215 #define DEBUG_MESSAGE1(Fmt, Arg1) \
216 do \
217 { \
218 if (debug != NULL) \
219 { \
220 debug_message_prefix (); \
221 xfprintf (debug, Fmt, Arg1); \
222 putc ('\n', debug); \
223 } \
224 } \
225 while (0)
226
227 #define DEBUG_MESSAGE2(Fmt, Arg1, Arg2) \
228 do \
229 { \
230 if (debug != NULL) \
231 { \
232 debug_message_prefix (); \
233 xfprintf (debug, Fmt, Arg1, Arg2); \
234 putc ('\n', debug); \
235 } \
236 } \
237 while (0)
238
239 extern void debug_init (void);
240 extern int debug_decode (const char *);
241 extern void debug_flush_files (void);
242 extern bool debug_set_output (const char *);
243 extern void debug_message_prefix (void);
244
245 extern void trace_prepre (const char *, int);
246 extern void trace_pre (const char *, int, int, token_data **);
247 extern void trace_post (const char *, int, int, const char *);
248
249 /* File: input.c --- lexical definitions. */
250
251 /* Various different token types. */
252 enum token_type
253 {
254 TOKEN_EOF, /* end of file */
255 TOKEN_STRING, /* a quoted string or comment */
256 TOKEN_WORD, /* an identifier */
257 TOKEN_OPEN, /* ( */
258 TOKEN_COMMA, /* , */
259 TOKEN_CLOSE, /* ) */
260 TOKEN_SIMPLE, /* any other single character */
261 TOKEN_MACDEF /* a macro's definition (see "defn") */
262 };
263
264 /* The data for a token, a macro argument, and a macro definition. */
265 enum token_data_type
266 {
267 TOKEN_VOID,
268 TOKEN_TEXT,
269 TOKEN_FUNC
270 };
271
272 struct token_data
273 {
274 enum token_data_type type;
275 union
276 {
277 struct
278 {
279 char *text;
280 #ifdef ENABLE_CHANGEWORD
281 char *original_text;
282 #endif
283 }
284 u_t;
285 builtin_func *func;
286 }
287 u;
288 };
289
290 #define TOKEN_DATA_TYPE(Td) ((Td)->type)
291 #define TOKEN_DATA_TEXT(Td) ((Td)->u.u_t.text)
292 #ifdef ENABLE_CHANGEWORD
293 # define TOKEN_DATA_ORIG_TEXT(Td) ((Td)->u.u_t.original_text)
294 #endif
295 #define TOKEN_DATA_FUNC(Td) ((Td)->u.func)
296
297 typedef enum token_type token_type;
298 typedef enum token_data_type token_data_type;
299
300 extern void input_init (void);
301 extern token_type peek_token (void);
302 extern token_type next_token (token_data *, int *);
303 extern void skip_line (void);
304
305 /* push back input */
306 extern void push_file (FILE *, const char *, bool);
307 extern void push_macro (builtin_func *);
308 extern struct obstack *push_string_init (void);
309 extern const char *push_string_finish (void);
310 extern void push_wrapup (const char *);
311 extern bool pop_wrapup (void);
312
313 /* current input file, and line */
314 extern const char *current_file;
315 extern int current_line;
316
317 /* left and right quote, begin and end comment */
318 extern STRING bcomm;
319 extern STRING ecomm;
320 extern STRING lquote;
321 extern STRING rquote;
322
323 #define DEF_LQUOTE "`"
324 #define DEF_RQUOTE "\'"
325 #define DEF_BCOMM "#"
326 #define DEF_ECOMM "\n"
327
328 extern void set_quotes (const char *, const char *);
329 extern void set_comment (const char *, const char *);
330 #ifdef ENABLE_CHANGEWORD
331 extern void set_word_regexp (const char *);
332 #endif
333
334 /* File: output.c --- output functions. */
335 extern int current_diversion;
336 extern int output_current_line;
337
338 extern void output_init (void);
339 extern void output_exit (void);
340 extern void output_text (const char *, int);
341 extern void shipout_text (struct obstack *, const char *, int, int);
342 extern void make_diversion (int);
343 extern void insert_diversion (int);
344 extern void insert_file (FILE *);
345 extern void freeze_diversions (FILE *);
346
347 /* File symtab.c --- symbol table definitions. */
348
349 /* Operation modes for lookup_symbol (). */
350 enum symbol_lookup
351 {
352 SYMBOL_LOOKUP,
353 SYMBOL_INSERT,
354 SYMBOL_DELETE,
355 SYMBOL_PUSHDEF,
356 SYMBOL_POPDEF
357 };
358
359 /* Symbol table entry. */
360 struct symbol
361 {
362 struct symbol *stack; /* pushdef stack */
363 struct symbol *next; /* hash bucket chain */
364 bool_bitfield traced : 1;
365 bool_bitfield macro_args : 1;
366 bool_bitfield blind_no_args : 1;
367 bool_bitfield deleted : 1;
368 int pending_expansions;
369
370 size_t hash;
371 char *name;
372 token_data data;
373 };
374
375 #define SYMBOL_STACK(S) ((S)->stack)
376 #define SYMBOL_TRACED(S) ((S)->traced)
377 #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
378 #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
379 #define SYMBOL_DELETED(S) ((S)->deleted)
380 #define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
381 #define SYMBOL_NAME(S) ((S)->name)
382 #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
383 #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
384 #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
385
386 typedef enum symbol_lookup symbol_lookup;
387 typedef struct symbol symbol;
388 typedef void hack_symbol (symbol *, void *);
389
390 #define HASHMAX 65537 /* default, overridden by -Hsize */
391
392 extern void free_symbol (symbol *sym);
393 extern void symtab_init (void);
394 extern symbol *lookup_symbol (const char *, symbol_lookup);
395 extern void hack_all_symbols (hack_symbol *, void *);
396
397 /* File: macro.c --- macro expansion. */
398
399 extern int expansion_level;
400
401 extern void expand_input (void);
402 extern void call_macro (symbol *, int, token_data **, struct obstack *);
403
404 /* File: builtin.c --- builtins. */
405
406 struct builtin
407 {
408 const char *name;
409 bool_bitfield gnu_extension : 1;
410 bool_bitfield groks_macro_args : 1;
411 bool_bitfield blind_if_no_args : 1;
412 builtin_func *func;
413 };
414
415 struct predefined
416 {
417 const char *unix_name;
418 const char *gnu_name;
419 const char *func;
420 };
421
422 typedef struct builtin builtin;
423 typedef struct predefined predefined;
424 struct re_pattern_buffer;
425 struct re_registers;
426
427 /* The default sequence detects multi-digit parameters (obsolete after
428 1.4.x), and any use of extended arguments with the default ${}
429 syntax (new in 2.0). */
430 #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
431
432 extern void builtin_init (void);
433 extern void define_builtin (const char *, const builtin *, symbol_lookup);
434 extern void set_macro_sequence (const char *);
435 extern void free_macro_sequence (void);
436 extern void define_user_macro (const char *, const char *, symbol_lookup);
437 extern void undivert_all (void);
438 extern void expand_user_macro (struct obstack *, symbol *, int, token_data **);
439 extern void m4_placeholder (struct obstack *, int, token_data **);
440 extern void init_pattern_buffer (struct re_pattern_buffer *,
441 struct re_registers *);
442 extern const char *ntoa (int32_t, int);
443
444 extern const builtin *find_builtin_by_addr (builtin_func *);
445 extern const builtin *find_builtin_by_name (const char *);
446
447 /* File: path.c --- path search for include files. */
448
449 extern void include_init (void);
450 extern void include_env_init (void);
451 extern void add_include_directory (const char *);
452 extern FILE *m4_path_search (const char *, char **);
453
454 /* File: eval.c --- expression evaluation. */
455
456 extern bool evaluate (const char *, int32_t *);
457
458 /* File: format.c --- printf like formatting. */
459
460 extern void expand_format (struct obstack *, int, token_data **);
461
462 /* File: freeze.c --- frozen state files. */
463
464 extern void produce_frozen_state (const char *);
465 extern void reload_frozen_state (const char *);
466
467 /* Debugging the memory allocator. */
468
469 #ifdef WITH_DMALLOC
470 # define DMALLOC_FUNC_CHECK
471 # include <dmalloc.h>
472 #endif
473
474 /* Other debug stuff. */
475
476 #ifdef DEBUG
477 # define DEBUG_INCL 1
478 # define DEBUG_INPUT 1
479 # define DEBUG_MACRO 1
480 # define DEBUG_OUTPUT 1
481 # define DEBUG_STKOVF 1
482 # define DEBUG_SYM 1
483 #endif
484
485 /* Convert a possibly-signed character to an unsigned character. This is
486 a bit safer than casting to unsigned char, since it catches some type
487 errors that the cast doesn't. */
488 #if HAVE_INLINE
to_uchar(char ch)489 static inline unsigned char to_uchar (char ch) { return ch; }
490 #else
491 # define to_uchar(C) ((unsigned char) (C))
492 #endif
493
494 /* Avoid negative logic when comparing two strings. */
495 #define STREQ(a, b) (strcmp (a, b) == 0)
496