1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002-2020 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
18
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks, each representing the text
26 between subsequent parameters.
27
28 Each block comprises the text between its surrounding parameters,
29 the length of that text, and the one-based index of the following
30 parameter. The final block in the replacement text is easily
31 recognizable as it has an argument index of zero. */
32
33 struct block
34 {
35 unsigned int text_len;
36 unsigned short arg_index;
37 uchar text[1];
38 };
39
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42
43 /* Structure holding information about a function-like macro
44 invocation. */
45 struct fun_macro
46 {
47 /* Memory buffer holding the trad_arg array. */
48 _cpp_buff *buff;
49
50 /* An array of size the number of macro parameters + 1, containing
51 the offsets of the start of each macro argument in the output
52 buffer. The argument continues until the character before the
53 start of the next one. */
54 size_t *args;
55
56 /* The hashnode of the macro. */
57 cpp_hashnode *node;
58
59 /* The offset of the macro name in the output buffer. */
60 size_t offset;
61
62 /* The line the macro name appeared on. */
63 location_t line;
64
65 /* Number of parameters. */
66 unsigned int paramc;
67
68 /* Zero-based index of argument being currently lexed. */
69 unsigned int argc;
70 };
71
72 /* Lexing state. It is mostly used to prevent macro expansion. */
73 enum ls {ls_none = 0, /* Normal state. */
74 ls_fun_open, /* When looking for '('. */
75 ls_fun_close, /* When looking for ')'. */
76 ls_defined, /* After defined. */
77 ls_defined_close, /* Looking for ')' of defined(). */
78 ls_hash, /* After # in preprocessor conditional. */
79 ls_predicate, /* After the predicate, maybe paren? */
80 ls_answer /* In answer to predicate. */
81 };
82
83 /* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.c
84 from recognizing comments and directives during its lexing pass. */
85
86 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
87 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
88 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
89 static void check_output_buffer (cpp_reader *, size_t);
90 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
91 static bool scan_parameters (cpp_reader *, unsigned *);
92 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
93 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
94 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
95 struct fun_macro *);
96 static void save_argument (struct fun_macro *, size_t);
97 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
98 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
99
100 /* Ensures we have N bytes' space in the output buffer, and
101 reallocates it if not. */
102 static void
check_output_buffer(cpp_reader * pfile,size_t n)103 check_output_buffer (cpp_reader *pfile, size_t n)
104 {
105 /* We might need two bytes to terminate an unterminated comment, and
106 one more to terminate the line with a NUL. */
107 n += 2 + 1;
108
109 if (n > (size_t) (pfile->out.limit - pfile->out.cur))
110 {
111 size_t size = pfile->out.cur - pfile->out.base;
112 size_t new_size = (size + n) * 3 / 2;
113
114 pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
115 pfile->out.limit = pfile->out.base + new_size;
116 pfile->out.cur = pfile->out.base + size;
117 }
118 }
119
120 /* Skip a C-style block comment in a macro as a result of -CC.
121 PFILE->buffer->cur points to the initial asterisk of the comment,
122 change it to point to after the '*' and '/' characters that terminate it.
123 Return true if the macro has not been termined, in that case set
124 PFILE->buffer->cur to the end of the buffer. */
125 static bool
skip_macro_block_comment(cpp_reader * pfile)126 skip_macro_block_comment (cpp_reader *pfile)
127 {
128 const uchar *cur = pfile->buffer->cur;
129
130 cur++;
131 if (*cur == '/')
132 cur++;
133
134 /* People like decorating comments with '*', so check for '/'
135 instead for efficiency. */
136 while (! (*cur++ == '/' && cur[-2] == '*'))
137 if (cur[-1] == '\n')
138 {
139 pfile->buffer->cur = cur - 1;
140 return true;
141 }
142
143 pfile->buffer->cur = cur;
144 return false;
145 }
146
147 /* CUR points to the asterisk introducing a comment in the current
148 context. IN_DEFINE is true if we are in the replacement text of a
149 macro.
150
151 The asterisk and following comment is copied to the buffer pointed
152 to by pfile->out.cur, which must be of sufficient size.
153 Unterminated comments are diagnosed, and correctly terminated in
154 the output. pfile->out.cur is updated depending upon IN_DEFINE,
155 -C, -CC and pfile->state.in_directive.
156
157 Returns a pointer to the first character after the comment in the
158 input buffer. */
159 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)160 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
161 {
162 bool unterminated, copy = false;
163 location_t src_loc = pfile->line_table->highest_line;
164 cpp_buffer *buffer = pfile->buffer;
165
166 buffer->cur = cur;
167 if (pfile->context->prev)
168 unterminated = skip_macro_block_comment (pfile);
169 else
170 unterminated = _cpp_skip_block_comment (pfile);
171
172 if (unterminated)
173 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
174 "unterminated comment");
175
176 /* Comments in directives become spaces so that tokens are properly
177 separated when the ISO preprocessor re-lexes the line. The
178 exception is #define. */
179 if (pfile->state.in_directive)
180 {
181 if (in_define)
182 {
183 if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
184 pfile->out.cur--;
185 else
186 copy = true;
187 }
188 else
189 pfile->out.cur[-1] = ' ';
190 }
191 else if (CPP_OPTION (pfile, discard_comments))
192 pfile->out.cur--;
193 else
194 copy = true;
195
196 if (copy)
197 {
198 size_t len = (size_t) (buffer->cur - cur);
199 memcpy (pfile->out.cur, cur, len);
200 pfile->out.cur += len;
201 if (unterminated)
202 {
203 *pfile->out.cur++ = '*';
204 *pfile->out.cur++ = '/';
205 }
206 }
207
208 return buffer->cur;
209 }
210
211 /* CUR points to any character in the input buffer. Skips over all
212 contiguous horizontal white space and NULs, including comments if
213 SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
214 character or the end of the current context. Escaped newlines are
215 removed.
216
217 The whitespace is copied verbatim to the output buffer, except that
218 comments are handled as described in copy_comment().
219 pfile->out.cur is updated.
220
221 Returns a pointer to the first character after the whitespace in
222 the input buffer. */
223 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)224 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
225 {
226 uchar *out = pfile->out.cur;
227
228 for (;;)
229 {
230 unsigned int c = *cur++;
231 *out++ = c;
232
233 if (is_nvspace (c))
234 continue;
235
236 if (c == '/' && *cur == '*' && skip_comments)
237 {
238 pfile->out.cur = out;
239 cur = copy_comment (pfile, cur, false /* in_define */);
240 out = pfile->out.cur;
241 continue;
242 }
243
244 out--;
245 break;
246 }
247
248 pfile->out.cur = out;
249 return cur - 1;
250 }
251
252 /* Lexes and outputs an identifier starting at CUR, which is assumed
253 to point to a valid first character of an identifier. Returns
254 the hashnode, and updates out.cur. */
255 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)256 lex_identifier (cpp_reader *pfile, const uchar *cur)
257 {
258 size_t len;
259 uchar *out = pfile->out.cur;
260 cpp_hashnode *result;
261
262 do
263 *out++ = *cur++;
264 while (is_numchar (*cur));
265
266 CUR (pfile->context) = cur;
267 len = out - pfile->out.cur;
268 result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
269 len, HT_ALLOC));
270 pfile->out.cur = out;
271 return result;
272 }
273
274 /* Overlays the true file buffer temporarily with text of length LEN
275 starting at START. The true buffer is restored upon calling
276 restore_buff(). */
277 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)278 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
279 {
280 cpp_buffer *buffer = pfile->buffer;
281
282 pfile->overlaid_buffer = buffer;
283 pfile->saved_cur = buffer->cur;
284 pfile->saved_rlimit = buffer->rlimit;
285 pfile->saved_line_base = buffer->next_line;
286 buffer->need_line = false;
287
288 buffer->cur = start;
289 buffer->line_base = start;
290 buffer->rlimit = start + len;
291 }
292
293 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
294 void
_cpp_remove_overlay(cpp_reader * pfile)295 _cpp_remove_overlay (cpp_reader *pfile)
296 {
297 cpp_buffer *buffer = pfile->overlaid_buffer;
298
299 buffer->cur = pfile->saved_cur;
300 buffer->rlimit = pfile->saved_rlimit;
301 buffer->line_base = pfile->saved_line_base;
302 buffer->need_line = true;
303
304 pfile->overlaid_buffer = NULL;
305 }
306
307 /* Reads a logical line into the output buffer. Returns TRUE if there
308 is more text left in the buffer. */
309 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)310 _cpp_read_logical_line_trad (cpp_reader *pfile)
311 {
312 do
313 {
314 if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
315 return false;
316 }
317 while (!_cpp_scan_out_logical_line (pfile, NULL, false)
318 || pfile->state.skipping);
319
320 return pfile->buffer != NULL;
321 }
322
323 /* Return true if NODE is a fun_like macro. */
324 static inline bool
fun_like_macro(cpp_hashnode * node)325 fun_like_macro (cpp_hashnode *node)
326 {
327 if (cpp_builtin_macro_p (node))
328 return (node->value.builtin == BT_HAS_ATTRIBUTE
329 || node->value.builtin == BT_HAS_BUILTIN
330 || node->value.builtin == BT_HAS_INCLUDE
331 || node->value.builtin == BT_HAS_INCLUDE_NEXT);
332 return node->value.macro->fun_like;
333 }
334
335 /* Set up state for finding the opening '(' of a function-like
336 macro. */
337 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)338 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
339 struct fun_macro *macro)
340 {
341 unsigned int n;
342 if (cpp_builtin_macro_p (node))
343 n = 1;
344 else
345 n = node->value.macro->paramc;
346
347 if (macro->buff)
348 _cpp_release_buff (pfile, macro->buff);
349 macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
350 macro->args = (size_t *) BUFF_FRONT (macro->buff);
351 macro->node = node;
352 macro->offset = start - pfile->out.base;
353 macro->paramc = n;
354 macro->argc = 0;
355 }
356
357 /* Save the OFFSET of the start of the next argument to MACRO. */
358 static void
save_argument(struct fun_macro * macro,size_t offset)359 save_argument (struct fun_macro *macro, size_t offset)
360 {
361 macro->argc++;
362 if (macro->argc <= macro->paramc)
363 macro->args[macro->argc] = offset;
364 }
365
366 /* Copies the next logical line in the current buffer (starting at
367 buffer->cur) to the output buffer. The output is guaranteed to
368 terminate with a NUL character. buffer->cur is updated.
369
370 If MACRO is non-NULL, then we are scanning the replacement list of
371 MACRO, and we call save_replacement_text() every time we meet an
372 argument.
373
374 If BUILTIN_MACRO_ARG is true, this is called to macro expand
375 arguments of builtin function-like macros. */
376 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro,bool builtin_macro_arg)377 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
378 bool builtin_macro_arg)
379 {
380 bool result = true;
381 cpp_context *context;
382 const uchar *cur;
383 uchar *out;
384 struct fun_macro fmacro;
385 unsigned int c, paren_depth = 0, quote;
386 enum ls lex_state = ls_none;
387 bool header_ok;
388 const uchar *start_of_input_line;
389
390 fmacro.buff = NULL;
391 fmacro.args = NULL;
392 fmacro.node = NULL;
393 fmacro.offset = 0;
394 fmacro.line = 0;
395 fmacro.paramc = 0;
396 fmacro.argc = 0;
397
398 quote = 0;
399 header_ok = pfile->state.angled_headers;
400 CUR (pfile->context) = pfile->buffer->cur;
401 RLIMIT (pfile->context) = pfile->buffer->rlimit;
402 if (!builtin_macro_arg)
403 {
404 pfile->out.cur = pfile->out.base;
405 pfile->out.first_line = pfile->line_table->highest_line;
406 }
407 /* start_of_input_line is needed to make sure that directives really,
408 really start at the first character of the line. */
409 start_of_input_line = pfile->buffer->cur;
410 new_context:
411 context = pfile->context;
412 cur = CUR (context);
413 check_output_buffer (pfile, RLIMIT (context) - cur);
414 out = pfile->out.cur;
415
416 for (;;)
417 {
418 if (!context->prev
419 && !builtin_macro_arg
420 && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
421 {
422 pfile->buffer->cur = cur;
423 _cpp_process_line_notes (pfile, false);
424 }
425 c = *cur++;
426 *out++ = c;
427
428 /* Whitespace should "continue" out of the switch,
429 non-whitespace should "break" out of it. */
430 switch (c)
431 {
432 case ' ':
433 case '\t':
434 case '\f':
435 case '\v':
436 case '\0':
437 continue;
438
439 case '\n':
440 /* If this is a macro's expansion, pop it. */
441 if (context->prev)
442 {
443 pfile->out.cur = out - 1;
444 _cpp_pop_context (pfile);
445 goto new_context;
446 }
447
448 /* Omit the newline from the output buffer. */
449 pfile->out.cur = out - 1;
450 pfile->buffer->cur = cur;
451 if (builtin_macro_arg)
452 goto done;
453 pfile->buffer->need_line = true;
454 CPP_INCREMENT_LINE (pfile, 0);
455
456 if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
457 && !pfile->state.in_directive
458 && _cpp_get_fresh_line (pfile))
459 {
460 /* Newlines in arguments become a space, but we don't
461 clear any in-progress quote. */
462 if (lex_state == ls_fun_close)
463 out[-1] = ' ';
464 cur = pfile->buffer->cur;
465 continue;
466 }
467 goto done;
468
469 case '<':
470 if (header_ok)
471 quote = '>';
472 break;
473 case '>':
474 if (c == quote)
475 quote = 0;
476 break;
477
478 case '"':
479 case '\'':
480 if (c == quote)
481 quote = 0;
482 else if (!quote)
483 quote = c;
484 break;
485
486 case '\\':
487 /* Skip escaped quotes here, it's easier than above. */
488 if (*cur == '\\' || *cur == '"' || *cur == '\'')
489 *out++ = *cur++;
490 break;
491
492 case '/':
493 /* Traditional CPP does not recognize comments within
494 literals. */
495 if (!quote && *cur == '*')
496 {
497 pfile->out.cur = out;
498 cur = copy_comment (pfile, cur, macro != 0);
499 out = pfile->out.cur;
500 continue;
501 }
502 break;
503
504 case '_':
505 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
506 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
507 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
508 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
509 case 'y': case 'z':
510 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
511 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
512 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
513 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
514 case 'Y': case 'Z':
515 if (!pfile->state.skipping && (quote == 0 || macro))
516 {
517 cpp_hashnode *node;
518 uchar *out_start = out - 1;
519
520 pfile->out.cur = out_start;
521 node = lex_identifier (pfile, cur - 1);
522 out = pfile->out.cur;
523 cur = CUR (context);
524
525 if (cpp_macro_p (node)
526 /* Should we expand for ls_answer? */
527 && (lex_state == ls_none || lex_state == ls_fun_open)
528 && !pfile->state.prevent_expansion)
529 {
530 /* Macros invalidate MI optimization. */
531 pfile->mi_valid = false;
532 if (fun_like_macro (node))
533 {
534 maybe_start_funlike (pfile, node, out_start, &fmacro);
535 lex_state = ls_fun_open;
536 fmacro.line = pfile->line_table->highest_line;
537 continue;
538 }
539 else if (!recursive_macro (pfile, node))
540 {
541 /* Remove the object-like macro's name from the
542 output, and push its replacement text. */
543 pfile->out.cur = out_start;
544 push_replacement_text (pfile, node);
545 lex_state = ls_none;
546 goto new_context;
547 }
548 }
549 else if (macro && node->type == NT_MACRO_ARG)
550 {
551 /* Found a parameter in the replacement text of a
552 #define. Remove its name from the output. */
553 pfile->out.cur = out_start;
554 save_replacement_text (pfile, macro, node->value.arg_index);
555 out = pfile->out.base;
556 }
557 else if (lex_state == ls_hash)
558 {
559 lex_state = ls_predicate;
560 continue;
561 }
562 else if (pfile->state.in_expression
563 && node == pfile->spec_nodes.n_defined)
564 {
565 lex_state = ls_defined;
566 continue;
567 }
568 }
569 break;
570
571 case '(':
572 if (quote == 0)
573 {
574 paren_depth++;
575 if (lex_state == ls_fun_open)
576 {
577 if (recursive_macro (pfile, fmacro.node))
578 lex_state = ls_none;
579 else
580 {
581 lex_state = ls_fun_close;
582 paren_depth = 1;
583 out = pfile->out.base + fmacro.offset;
584 fmacro.args[0] = fmacro.offset;
585 }
586 }
587 else if (lex_state == ls_predicate)
588 lex_state = ls_answer;
589 else if (lex_state == ls_defined)
590 lex_state = ls_defined_close;
591 }
592 break;
593
594 case ',':
595 if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
596 save_argument (&fmacro, out - pfile->out.base);
597 break;
598
599 case ')':
600 if (quote == 0)
601 {
602 paren_depth--;
603 if (lex_state == ls_fun_close && paren_depth == 0)
604 {
605 if (cpp_builtin_macro_p (fmacro.node))
606 {
607 /* Handle builtin function-like macros like
608 __has_attribute. The already parsed arguments
609 are put into a buffer, which is then preprocessed
610 and the result is fed to _cpp_push_text_context
611 with disabled expansion, where the ISO preprocessor
612 parses it. While in traditional preprocessing
613 macro arguments aren't immediately expanded, they in
614 the end are because the macro with replaced arguments
615 is preprocessed again. For the builtin function-like
616 macros we need the argument immediately though,
617 if we don't preprocess them, they would behave
618 very differently from ISO preprocessor handling
619 of those builtin macros. So, this handling is
620 more similar to traditional preprocessing of
621 #if directives, where we also keep preprocessing
622 until everything is expanded, and then feed the
623 result with disabled expansion to ISO preprocessor
624 for handling the directives. */
625 lex_state = ls_none;
626 save_argument (&fmacro, out - pfile->out.base);
627 cpp_macro m;
628 memset (&m, '\0', sizeof (m));
629 m.paramc = fmacro.paramc;
630 if (_cpp_arguments_ok (pfile, &m, fmacro.node,
631 fmacro.argc))
632 {
633 size_t len = fmacro.args[1] - fmacro.args[0];
634 uchar *buf;
635
636 /* Remove the macro's invocation from the
637 output, and push its replacement text. */
638 pfile->out.cur = pfile->out.base + fmacro.offset;
639 CUR (context) = cur;
640 buf = _cpp_unaligned_alloc (pfile, len + 2);
641 buf[0] = '(';
642 memcpy (buf + 1, pfile->out.base + fmacro.args[0],
643 len);
644 buf[len + 1] = '\n';
645
646 const unsigned char *ctx_rlimit = RLIMIT (context);
647 const unsigned char *saved_cur = pfile->buffer->cur;
648 const unsigned char *saved_rlimit
649 = pfile->buffer->rlimit;
650 const unsigned char *saved_line_base
651 = pfile->buffer->line_base;
652 bool saved_need_line = pfile->buffer->need_line;
653 cpp_buffer *saved_overlaid_buffer
654 = pfile->overlaid_buffer;
655 pfile->buffer->cur = buf;
656 pfile->buffer->line_base = buf;
657 pfile->buffer->rlimit = buf + len + 1;
658 pfile->buffer->need_line = false;
659 pfile->overlaid_buffer = pfile->buffer;
660 bool saved_in_directive = pfile->state.in_directive;
661 pfile->state.in_directive = true;
662 cpp_context *saved_prev_context = context->prev;
663 context->prev = NULL;
664
665 _cpp_scan_out_logical_line (pfile, NULL, true);
666
667 pfile->state.in_directive = saved_in_directive;
668 check_output_buffer (pfile, 1);
669 *pfile->out.cur = '\n';
670 pfile->buffer->cur = pfile->out.base + fmacro.offset;
671 pfile->buffer->line_base = pfile->buffer->cur;
672 pfile->buffer->rlimit = pfile->out.cur;
673 CUR (context) = pfile->buffer->cur;
674 RLIMIT (context) = pfile->buffer->rlimit;
675
676 pfile->state.prevent_expansion++;
677 const uchar *text
678 = _cpp_builtin_macro_text (pfile, fmacro.node);
679 pfile->state.prevent_expansion--;
680
681 context->prev = saved_prev_context;
682 pfile->buffer->cur = saved_cur;
683 pfile->buffer->rlimit = saved_rlimit;
684 pfile->buffer->line_base = saved_line_base;
685 pfile->buffer->need_line = saved_need_line;
686 pfile->overlaid_buffer = saved_overlaid_buffer;
687 pfile->out.cur = pfile->out.base + fmacro.offset;
688 CUR (context) = cur;
689 RLIMIT (context) = ctx_rlimit;
690 len = ustrlen (text);
691 buf = _cpp_unaligned_alloc (pfile, len + 1);
692 memcpy (buf, text, len);
693 buf[len] = '\n';
694 text = buf;
695 _cpp_push_text_context (pfile, fmacro.node,
696 text, len);
697 goto new_context;
698 }
699 break;
700 }
701
702 cpp_macro *m = fmacro.node->value.macro;
703
704 m->used = 1;
705 lex_state = ls_none;
706 save_argument (&fmacro, out - pfile->out.base);
707
708 /* A single zero-length argument is no argument. */
709 if (fmacro.argc == 1
710 && m->paramc == 0
711 && out == pfile->out.base + fmacro.offset + 1)
712 fmacro.argc = 0;
713
714 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
715 {
716 /* Remove the macro's invocation from the
717 output, and push its replacement text. */
718 pfile->out.cur = pfile->out.base + fmacro.offset;
719 CUR (context) = cur;
720 replace_args_and_push (pfile, &fmacro);
721 goto new_context;
722 }
723 }
724 else if (lex_state == ls_answer || lex_state == ls_defined_close)
725 lex_state = ls_none;
726 }
727 break;
728
729 case '#':
730 if (cur - 1 == start_of_input_line
731 /* A '#' from a macro doesn't start a directive. */
732 && !pfile->context->prev
733 && !pfile->state.in_directive)
734 {
735 /* A directive. With the way _cpp_handle_directive
736 currently works, we only want to call it if either we
737 know the directive is OK, or we want it to fail and
738 be removed from the output. If we want it to be
739 passed through (the assembler case) then we must not
740 call _cpp_handle_directive. */
741 pfile->out.cur = out;
742 cur = skip_whitespace (pfile, cur, true /* skip_comments */);
743 out = pfile->out.cur;
744
745 if (*cur == '\n')
746 {
747 /* Null directive. Ignore it and don't invalidate
748 the MI optimization. */
749 pfile->buffer->need_line = true;
750 CPP_INCREMENT_LINE (pfile, 0);
751 result = false;
752 goto done;
753 }
754 else
755 {
756 bool do_it = false;
757
758 if (is_numstart (*cur)
759 && CPP_OPTION (pfile, lang) != CLK_ASM)
760 do_it = true;
761 else if (is_idstart (*cur))
762 /* Check whether we know this directive, but don't
763 advance. */
764 do_it = lex_identifier (pfile, cur)->is_directive;
765
766 if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
767 {
768 /* This is a kludge. We want to have the ISO
769 preprocessor lex the next token. */
770 pfile->buffer->cur = cur;
771 _cpp_handle_directive (pfile, false /* indented */);
772 result = false;
773 goto done;
774 }
775 }
776 }
777
778 if (pfile->state.in_expression)
779 {
780 lex_state = ls_hash;
781 continue;
782 }
783 break;
784
785 default:
786 break;
787 }
788
789 /* Non-whitespace disables MI optimization and stops treating
790 '<' as a quote in #include. */
791 header_ok = false;
792 if (!pfile->state.in_directive)
793 pfile->mi_valid = false;
794
795 if (lex_state == ls_none)
796 continue;
797
798 /* Some of these transitions of state are syntax errors. The
799 ISO preprocessor will issue errors later. */
800 if (lex_state == ls_fun_open)
801 /* Missing '('. */
802 lex_state = ls_none;
803 else if (lex_state == ls_hash
804 || lex_state == ls_predicate
805 || lex_state == ls_defined)
806 lex_state = ls_none;
807
808 /* ls_answer and ls_defined_close keep going until ')'. */
809 }
810
811 done:
812 if (fmacro.buff)
813 _cpp_release_buff (pfile, fmacro.buff);
814
815 if (lex_state == ls_fun_close)
816 cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
817 "unterminated argument list invoking macro \"%s\"",
818 NODE_NAME (fmacro.node));
819 return result;
820 }
821
822 /* Push a context holding the replacement text of the macro NODE on
823 the context stack. NODE is either object-like, or a function-like
824 macro with no arguments. */
825 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)826 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
827 {
828 size_t len;
829 const uchar *text;
830 uchar *buf;
831
832 if (cpp_builtin_macro_p (node))
833 {
834 text = _cpp_builtin_macro_text (pfile, node);
835 len = ustrlen (text);
836 buf = _cpp_unaligned_alloc (pfile, len + 1);
837 memcpy (buf, text, len);
838 buf[len] = '\n';
839 text = buf;
840 }
841 else
842 {
843 cpp_macro *macro = node->value.macro;
844 macro->used = 1;
845 text = macro->exp.text;
846 len = macro->count;
847 }
848
849 _cpp_push_text_context (pfile, node, text, len);
850 }
851
852 /* Returns TRUE if traditional macro recursion is detected. */
853 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)854 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
855 {
856 bool recursing = !!(node->flags & NODE_DISABLED);
857
858 /* Object-like macros that are already expanding are necessarily
859 recursive.
860
861 However, it is possible to have traditional function-like macros
862 that are not infinitely recursive but recurse to any given depth.
863 Further, it is easy to construct examples that get ever longer
864 until the point they stop recursing. So there is no easy way to
865 detect true recursion; instead we assume any expansion more than
866 20 deep since the first invocation of this macro must be
867 recursing. */
868 if (recursing && fun_like_macro (node))
869 {
870 size_t depth = 0;
871 cpp_context *context = pfile->context;
872
873 do
874 {
875 depth++;
876 if (context->c.macro == node && depth > 20)
877 break;
878 context = context->prev;
879 }
880 while (context);
881 recursing = context != NULL;
882 }
883
884 if (recursing)
885 cpp_error (pfile, CPP_DL_ERROR,
886 "detected recursion whilst expanding macro \"%s\"",
887 NODE_NAME (node));
888
889 return recursing;
890 }
891
892 /* Return the length of the replacement text of a function-like or
893 object-like non-builtin macro. */
894 size_t
_cpp_replacement_text_len(const cpp_macro * macro)895 _cpp_replacement_text_len (const cpp_macro *macro)
896 {
897 size_t len;
898
899 if (macro->fun_like && (macro->paramc != 0))
900 {
901 const uchar *exp;
902
903 len = 0;
904 for (exp = macro->exp.text;;)
905 {
906 struct block *b = (struct block *) exp;
907
908 len += b->text_len;
909 if (b->arg_index == 0)
910 break;
911 len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
912 exp += BLOCK_LEN (b->text_len);
913 }
914 }
915 else
916 len = macro->count;
917
918 return len;
919 }
920
921 /* Copy the replacement text of MACRO to DEST, which must be of
922 sufficient size. It is not NUL-terminated. The next character is
923 returned. */
924 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)925 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
926 {
927 if (macro->fun_like && (macro->paramc != 0))
928 {
929 const uchar *exp;
930
931 for (exp = macro->exp.text;;)
932 {
933 struct block *b = (struct block *) exp;
934 cpp_hashnode *param;
935
936 memcpy (dest, b->text, b->text_len);
937 dest += b->text_len;
938 if (b->arg_index == 0)
939 break;
940 param = macro->parm.params[b->arg_index - 1];
941 memcpy (dest, NODE_NAME (param), NODE_LEN (param));
942 dest += NODE_LEN (param);
943 exp += BLOCK_LEN (b->text_len);
944 }
945 }
946 else
947 {
948 memcpy (dest, macro->exp.text, macro->count);
949 dest += macro->count;
950 }
951
952 return dest;
953 }
954
955 /* Push a context holding the replacement text of the macro NODE on
956 the context stack. NODE is either object-like, or a function-like
957 macro with no arguments. */
958 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)959 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
960 {
961 cpp_macro *macro = fmacro->node->value.macro;
962
963 if (macro->paramc == 0)
964 push_replacement_text (pfile, fmacro->node);
965 else
966 {
967 const uchar *exp;
968 uchar *p;
969 _cpp_buff *buff;
970 size_t len = 0;
971 int cxtquote = 0;
972
973 /* Get an estimate of the length of the argument-replaced text.
974 This is a worst case estimate, assuming that every replacement
975 text character needs quoting. */
976 for (exp = macro->exp.text;;)
977 {
978 struct block *b = (struct block *) exp;
979
980 len += b->text_len;
981 if (b->arg_index == 0)
982 break;
983 len += 2 * (fmacro->args[b->arg_index]
984 - fmacro->args[b->arg_index - 1] - 1);
985 exp += BLOCK_LEN (b->text_len);
986 }
987
988 /* Allocate room for the expansion plus \n. */
989 buff = _cpp_get_buff (pfile, len + 1);
990
991 /* Copy the expansion and replace arguments. */
992 /* Accumulate actual length, including quoting as necessary */
993 p = BUFF_FRONT (buff);
994 len = 0;
995 for (exp = macro->exp.text;;)
996 {
997 struct block *b = (struct block *) exp;
998 size_t arglen;
999 int argquote;
1000 uchar *base;
1001 uchar *in;
1002
1003 len += b->text_len;
1004 /* Copy the non-argument text literally, keeping
1005 track of whether matching quotes have been seen. */
1006 for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1007 {
1008 if (*in == '"')
1009 cxtquote = ! cxtquote;
1010 *p++ = *in++;
1011 }
1012 /* Done if no more arguments */
1013 if (b->arg_index == 0)
1014 break;
1015 arglen = (fmacro->args[b->arg_index]
1016 - fmacro->args[b->arg_index - 1] - 1);
1017 base = pfile->out.base + fmacro->args[b->arg_index - 1];
1018 in = base;
1019 #if 0
1020 /* Skip leading whitespace in the text for the argument to
1021 be substituted. To be compatible with gcc 2.95, we would
1022 also need to trim trailing whitespace. Gcc 2.95 trims
1023 leading and trailing whitespace, which may be a bug. The
1024 current gcc testsuite explicitly checks that this leading
1025 and trailing whitespace in actual arguments is
1026 preserved. */
1027 while (arglen > 0 && is_space (*in))
1028 {
1029 in++;
1030 arglen--;
1031 }
1032 #endif
1033 for (argquote = 0; arglen > 0; arglen--)
1034 {
1035 if (cxtquote && *in == '"')
1036 {
1037 if (in > base && *(in-1) != '\\')
1038 argquote = ! argquote;
1039 /* Always add backslash before double quote if argument
1040 is expanded in a quoted context */
1041 *p++ = '\\';
1042 len++;
1043 }
1044 else if (cxtquote && argquote && *in == '\\')
1045 {
1046 /* Always add backslash before a backslash in an argument
1047 that is expanded in a quoted context and also in the
1048 range of a quoted context in the argument itself. */
1049 *p++ = '\\';
1050 len++;
1051 }
1052 *p++ = *in++;
1053 len++;
1054 }
1055 exp += BLOCK_LEN (b->text_len);
1056 }
1057
1058 /* \n-terminate. */
1059 *p = '\n';
1060 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1061
1062 /* So we free buffer allocation when macro is left. */
1063 pfile->context->buff = buff;
1064 }
1065 }
1066
1067 /* Read and record the parameters, if any, of a function-like macro
1068 definition. Destroys pfile->out.cur.
1069
1070 Returns true on success, false on failure (syntax error or a
1071 duplicate parameter). On success, CUR (pfile->context) is just
1072 past the closing parenthesis. */
1073 static bool
scan_parameters(cpp_reader * pfile,unsigned * n_ptr)1074 scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1075 {
1076 const uchar *cur = CUR (pfile->context) + 1;
1077 bool ok;
1078
1079 unsigned nparms = 0;
1080 for (;;)
1081 {
1082 cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1083
1084 if (is_idstart (*cur))
1085 {
1086 struct cpp_hashnode *id = lex_identifier (pfile, cur);
1087 ok = false;
1088 if (!_cpp_save_parameter (pfile, nparms, id, id))
1089 break;
1090 nparms++;
1091 cur = skip_whitespace (pfile, CUR (pfile->context),
1092 true /* skip_comments */);
1093 if (*cur == ',')
1094 {
1095 cur++;
1096 continue;
1097 }
1098 ok = (*cur == ')');
1099 break;
1100 }
1101
1102 ok = (*cur == ')' && !nparms);
1103 break;
1104 }
1105
1106 *n_ptr = nparms;
1107
1108 if (!ok)
1109 cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1110
1111 CUR (pfile->context) = cur + (*cur == ')');
1112
1113 return ok;
1114 }
1115
1116 /* Save the text from pfile->out.base to pfile->out.cur as
1117 the replacement text for the current macro, followed by argument
1118 ARG_INDEX, with zero indicating the end of the replacement
1119 text. */
1120 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)1121 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1122 unsigned int arg_index)
1123 {
1124 size_t len = pfile->out.cur - pfile->out.base;
1125 uchar *exp;
1126
1127 if (macro->paramc == 0)
1128 {
1129 /* Object-like and function-like macros without parameters
1130 simply store their \n-terminated replacement text. */
1131 exp = _cpp_unaligned_alloc (pfile, len + 1);
1132 memcpy (exp, pfile->out.base, len);
1133 exp[len] = '\n';
1134 macro->exp.text = exp;
1135 macro->count = len;
1136 }
1137 else
1138 {
1139 /* Store the text's length (unsigned int), the argument index
1140 (unsigned short, base 1) and then the text. */
1141 size_t blen = BLOCK_LEN (len);
1142 struct block *block;
1143
1144 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1145 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1146
1147 exp = BUFF_FRONT (pfile->a_buff);
1148 block = (struct block *) (exp + macro->count);
1149 macro->exp.text = exp;
1150
1151 /* Write out the block information. */
1152 block->text_len = len;
1153 block->arg_index = arg_index;
1154 memcpy (block->text, pfile->out.base, len);
1155
1156 /* Lex the rest into the start of the output buffer. */
1157 pfile->out.cur = pfile->out.base;
1158
1159 macro->count += blen;
1160
1161 /* If we've finished, commit the memory. */
1162 if (arg_index == 0)
1163 BUFF_FRONT (pfile->a_buff) += macro->count;
1164 }
1165 }
1166
1167 /* Analyze and save the replacement text of a macro. Returns true on
1168 success. */
1169 cpp_macro *
_cpp_create_trad_definition(cpp_reader * pfile)1170 _cpp_create_trad_definition (cpp_reader *pfile)
1171 {
1172 const uchar *cur;
1173 uchar *limit;
1174 cpp_context *context = pfile->context;
1175 unsigned nparms = 0;
1176 int fun_like = 0;
1177 cpp_hashnode **params = NULL;
1178
1179 /* The context has not been set up for command line defines, and CUR
1180 has not been updated for the macro name for in-file defines. */
1181 pfile->out.cur = pfile->out.base;
1182 CUR (context) = pfile->buffer->cur;
1183 RLIMIT (context) = pfile->buffer->rlimit;
1184 check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1185
1186 /* Is this a function-like macro? */
1187 if (* CUR (context) == '(')
1188 {
1189 fun_like = +1;
1190 if (scan_parameters (pfile, &nparms))
1191 params = (cpp_hashnode **)_cpp_commit_buff
1192 (pfile, sizeof (cpp_hashnode *) * nparms);
1193 else
1194 fun_like = -1;
1195 }
1196
1197 cpp_macro *macro = NULL;
1198
1199 if (fun_like >= 0)
1200 {
1201 macro = _cpp_new_macro (pfile, cmk_traditional,
1202 _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1203 macro->parm.params = params;
1204 macro->paramc = nparms;
1205 macro->fun_like = fun_like != 0;
1206 }
1207
1208 /* Skip leading whitespace in the replacement text. */
1209 pfile->buffer->cur
1210 = skip_whitespace (pfile, CUR (context),
1211 CPP_OPTION (pfile, discard_comments_in_macro_exp));
1212
1213 pfile->state.prevent_expansion++;
1214 _cpp_scan_out_logical_line (pfile, macro, false);
1215 pfile->state.prevent_expansion--;
1216
1217 _cpp_unsave_parameters (pfile, nparms);
1218
1219 if (macro)
1220 {
1221 /* Skip trailing white space. */
1222 cur = pfile->out.base;
1223 limit = pfile->out.cur;
1224 while (limit > cur && is_space (limit[-1]))
1225 limit--;
1226 pfile->out.cur = limit;
1227 save_replacement_text (pfile, macro, 0);
1228 }
1229
1230 return macro;
1231 }
1232
1233 /* Copy SRC of length LEN to DEST, but convert all contiguous
1234 whitespace to a single space, provided it is not in quotes. The
1235 quote currently in effect is pointed to by PQUOTE, and is updated
1236 by the function. Returns the number of bytes copied. */
1237 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1238 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1239 {
1240 uchar *orig_dest = dest;
1241 uchar quote = *pquote;
1242
1243 while (len)
1244 {
1245 if (is_space (*src) && !quote)
1246 {
1247 do
1248 src++, len--;
1249 while (len && is_space (*src));
1250 *dest++ = ' ';
1251 }
1252 else
1253 {
1254 if (*src == '\'' || *src == '"')
1255 {
1256 if (!quote)
1257 quote = *src;
1258 else if (quote == *src)
1259 quote = 0;
1260 }
1261 *dest++ = *src++, len--;
1262 }
1263 }
1264
1265 *pquote = quote;
1266 return dest - orig_dest;
1267 }
1268
1269 /* Returns true if MACRO1 and MACRO2 have expansions different other
1270 than in the form of their whitespace. */
1271 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1272 _cpp_expansions_different_trad (const cpp_macro *macro1,
1273 const cpp_macro *macro2)
1274 {
1275 uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1276 uchar *p2 = p1 + macro1->count;
1277 uchar quote1 = 0, quote2 = 0;
1278 bool mismatch;
1279 size_t len1, len2;
1280
1281 if (macro1->paramc > 0)
1282 {
1283 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1284
1285 mismatch = true;
1286 for (;;)
1287 {
1288 struct block *b1 = (struct block *) exp1;
1289 struct block *b2 = (struct block *) exp2;
1290
1291 if (b1->arg_index != b2->arg_index)
1292 break;
1293
1294 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
1295 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
1296 if (len1 != len2 || memcmp (p1, p2, len1))
1297 break;
1298 if (b1->arg_index == 0)
1299 {
1300 mismatch = false;
1301 break;
1302 }
1303 exp1 += BLOCK_LEN (b1->text_len);
1304 exp2 += BLOCK_LEN (b2->text_len);
1305 }
1306 }
1307 else
1308 {
1309 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
1310 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
1311 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1312 }
1313
1314 free (p1);
1315 return mismatch;
1316 }
1317