1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002-2021 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   location_t line;
64 
65   /* Number of parameters.  */
66   unsigned int paramc;
67 
68   /* Zero-based index of argument being currently lexed.  */
69   unsigned int argc;
70 };
71 
72 /* Lexing state.  It is mostly used to prevent macro expansion.  */
73 enum ls {ls_none = 0,		/* Normal state.  */
74 	 ls_fun_open,		/* When looking for '('.  */
75 	 ls_fun_close,		/* When looking for ')'.  */
76 	 ls_defined,		/* After defined.  */
77 	 ls_defined_close,	/* Looking for ')' of defined().  */
78 	 ls_hash,		/* After # in preprocessor conditional.  */
79 	 ls_predicate,		/* After the predicate, maybe paren?  */
80 	 ls_answer		/* In answer to predicate.  */
81 };
82 
83 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
84    from recognizing comments and directives during its lexing pass.  */
85 
86 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
87 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
88 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
89 static void check_output_buffer (cpp_reader *, size_t);
90 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
91 static bool scan_parameters (cpp_reader *, unsigned *);
92 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
93 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
94 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
95 				 struct fun_macro *);
96 static void save_argument (struct fun_macro *, size_t);
97 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
98 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
99 
100 /* Ensures we have N bytes' space in the output buffer, and
101    reallocates it if not.  */
102 static void
check_output_buffer(cpp_reader * pfile,size_t n)103 check_output_buffer (cpp_reader *pfile, size_t n)
104 {
105   /* We might need two bytes to terminate an unterminated comment, and
106      one more to terminate the line with a NUL.  */
107   n += 2 + 1;
108 
109   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
110     {
111       size_t size = pfile->out.cur - pfile->out.base;
112       size_t new_size = (size + n) * 3 / 2;
113 
114       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
115       pfile->out.limit = pfile->out.base + new_size;
116       pfile->out.cur = pfile->out.base + size;
117     }
118 }
119 
120 /* Skip a C-style block comment in a macro as a result of -CC.
121    PFILE->buffer->cur points to the initial asterisk of the comment,
122    change it to point to after the '*' and '/' characters that terminate it.
123    Return true if the macro has not been termined, in that case set
124    PFILE->buffer->cur to the end of the buffer.  */
125 static bool
skip_macro_block_comment(cpp_reader * pfile)126 skip_macro_block_comment (cpp_reader *pfile)
127 {
128   const uchar *cur = pfile->buffer->cur;
129 
130   cur++;
131   if (*cur == '/')
132     cur++;
133 
134   /* People like decorating comments with '*', so check for '/'
135      instead for efficiency.  */
136   while (! (*cur++ == '/' && cur[-2] == '*'))
137     if (cur[-1] == '\n')
138       {
139 	pfile->buffer->cur = cur - 1;
140 	return true;
141       }
142 
143   pfile->buffer->cur = cur;
144   return false;
145 }
146 
147 /* CUR points to the asterisk introducing a comment in the current
148    context.  IN_DEFINE is true if we are in the replacement text of a
149    macro.
150 
151    The asterisk and following comment is copied to the buffer pointed
152    to by pfile->out.cur, which must be of sufficient size.
153    Unterminated comments are diagnosed, and correctly terminated in
154    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
155    -C, -CC and pfile->state.in_directive.
156 
157    Returns a pointer to the first character after the comment in the
158    input buffer.  */
159 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)160 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
161 {
162   bool unterminated, copy = false;
163   location_t src_loc = pfile->line_table->highest_line;
164   cpp_buffer *buffer = pfile->buffer;
165 
166   buffer->cur = cur;
167   if (pfile->context->prev)
168     unterminated = skip_macro_block_comment (pfile);
169   else
170     unterminated = _cpp_skip_block_comment (pfile);
171 
172   if (unterminated)
173     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
174 			 "unterminated comment");
175 
176   /* Comments in directives become spaces so that tokens are properly
177      separated when the ISO preprocessor re-lexes the line.  The
178      exception is #define.  */
179   if (pfile->state.in_directive)
180     {
181       if (in_define)
182 	{
183 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
184 	    pfile->out.cur--;
185 	  else
186 	    copy = true;
187 	}
188       else
189 	pfile->out.cur[-1] = ' ';
190     }
191   else if (CPP_OPTION (pfile, discard_comments))
192     pfile->out.cur--;
193   else
194     copy = true;
195 
196   if (copy)
197     {
198       size_t len = (size_t) (buffer->cur - cur);
199       memcpy (pfile->out.cur, cur, len);
200       pfile->out.cur += len;
201       if (unterminated)
202 	{
203 	  *pfile->out.cur++ = '*';
204 	  *pfile->out.cur++ = '/';
205 	}
206     }
207 
208   return buffer->cur;
209 }
210 
211 /* CUR points to any character in the input buffer.  Skips over all
212    contiguous horizontal white space and NULs, including comments if
213    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
214    character or the end of the current context.  Escaped newlines are
215    removed.
216 
217    The whitespace is copied verbatim to the output buffer, except that
218    comments are handled as described in copy_comment().
219    pfile->out.cur is updated.
220 
221    Returns a pointer to the first character after the whitespace in
222    the input buffer.  */
223 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)224 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
225 {
226   uchar *out = pfile->out.cur;
227 
228   for (;;)
229     {
230       unsigned int c = *cur++;
231       *out++ = c;
232 
233       if (is_nvspace (c))
234 	continue;
235 
236       if (c == '/' && *cur == '*' && skip_comments)
237 	{
238 	  pfile->out.cur = out;
239 	  cur = copy_comment (pfile, cur, false /* in_define */);
240 	  out = pfile->out.cur;
241 	  continue;
242 	}
243 
244       out--;
245       break;
246     }
247 
248   pfile->out.cur = out;
249   return cur - 1;
250 }
251 
252 /* Lexes and outputs an identifier starting at CUR, which is assumed
253    to point to a valid first character of an identifier.  Returns
254    the hashnode, and updates out.cur.  */
255 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)256 lex_identifier (cpp_reader *pfile, const uchar *cur)
257 {
258   size_t len;
259   uchar *out = pfile->out.cur;
260   cpp_hashnode *result;
261 
262   do
263     *out++ = *cur++;
264   while (is_numchar (*cur));
265 
266   CUR (pfile->context) = cur;
267   len = out - pfile->out.cur;
268   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
269 				    len, HT_ALLOC));
270   pfile->out.cur = out;
271   return result;
272 }
273 
274 /* Overlays the true file buffer temporarily with text of length LEN
275    starting at START.  The true buffer is restored upon calling
276    restore_buff().  */
277 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)278 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
279 {
280   cpp_buffer *buffer = pfile->buffer;
281 
282   pfile->overlaid_buffer = buffer;
283   pfile->saved_cur = buffer->cur;
284   pfile->saved_rlimit = buffer->rlimit;
285   pfile->saved_line_base = buffer->next_line;
286   buffer->need_line = false;
287 
288   buffer->cur = start;
289   buffer->line_base = start;
290   buffer->rlimit = start + len;
291 }
292 
293 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
294 void
_cpp_remove_overlay(cpp_reader * pfile)295 _cpp_remove_overlay (cpp_reader *pfile)
296 {
297   cpp_buffer *buffer = pfile->overlaid_buffer;
298 
299   buffer->cur = pfile->saved_cur;
300   buffer->rlimit = pfile->saved_rlimit;
301   buffer->line_base = pfile->saved_line_base;
302   buffer->need_line = true;
303 
304   pfile->overlaid_buffer = NULL;
305 }
306 
307 /* Reads a logical line into the output buffer.  Returns TRUE if there
308    is more text left in the buffer.  */
309 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)310 _cpp_read_logical_line_trad (cpp_reader *pfile)
311 {
312   do
313     {
314       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
315 	{
316 	  /* Now pop the buffer that _cpp_get_fresh_line did not.  */
317 	  _cpp_pop_buffer (pfile);
318 	  return false;
319 	}
320     }
321   while (!_cpp_scan_out_logical_line (pfile, NULL, false)
322 	 || pfile->state.skipping);
323 
324   return pfile->buffer != NULL;
325 }
326 
327 /* Return true if NODE is a fun_like macro.  */
328 static inline bool
fun_like_macro(cpp_hashnode * node)329 fun_like_macro (cpp_hashnode *node)
330 {
331   if (cpp_builtin_macro_p (node))
332     return (node->value.builtin == BT_HAS_ATTRIBUTE
333 	    || node->value.builtin == BT_HAS_STD_ATTRIBUTE
334 	    || node->value.builtin == BT_HAS_BUILTIN
335 	    || node->value.builtin == BT_HAS_INCLUDE
336 	    || node->value.builtin == BT_HAS_INCLUDE_NEXT);
337   return node->value.macro->fun_like;
338 }
339 
340 /* Set up state for finding the opening '(' of a function-like
341    macro.  */
342 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)343 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
344 		     struct fun_macro *macro)
345 {
346   unsigned int n;
347   if (cpp_builtin_macro_p (node))
348     n = 1;
349   else
350     n = node->value.macro->paramc;
351 
352   if (macro->buff)
353     _cpp_release_buff (pfile, macro->buff);
354   macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
355   macro->args = (size_t *) BUFF_FRONT (macro->buff);
356   macro->node = node;
357   macro->offset = start - pfile->out.base;
358   macro->paramc = n;
359   macro->argc = 0;
360 }
361 
362 /* Save the OFFSET of the start of the next argument to MACRO.  */
363 static void
save_argument(struct fun_macro * macro,size_t offset)364 save_argument (struct fun_macro *macro, size_t offset)
365 {
366   macro->argc++;
367   if (macro->argc <= macro->paramc)
368     macro->args[macro->argc] = offset;
369 }
370 
371 /* Copies the next logical line in the current buffer (starting at
372    buffer->cur) to the output buffer.  The output is guaranteed to
373    terminate with a NUL character.  buffer->cur is updated.
374 
375    If MACRO is non-NULL, then we are scanning the replacement list of
376    MACRO, and we call save_replacement_text() every time we meet an
377    argument.
378 
379    If BUILTIN_MACRO_ARG is true, this is called to macro expand
380    arguments of builtin function-like macros.  */
381 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro,bool builtin_macro_arg)382 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
383 			    bool builtin_macro_arg)
384 {
385   bool result = true;
386   cpp_context *context;
387   const uchar *cur;
388   uchar *out;
389   struct fun_macro fmacro;
390   unsigned int c, paren_depth = 0, quote;
391   enum ls lex_state = ls_none;
392   bool header_ok;
393   const uchar *start_of_input_line;
394 
395   fmacro.buff = NULL;
396   fmacro.args = NULL;
397   fmacro.node = NULL;
398   fmacro.offset = 0;
399   fmacro.line = 0;
400   fmacro.paramc = 0;
401   fmacro.argc = 0;
402 
403   quote = 0;
404   header_ok = pfile->state.angled_headers;
405   CUR (pfile->context) = pfile->buffer->cur;
406   RLIMIT (pfile->context) = pfile->buffer->rlimit;
407   if (!builtin_macro_arg)
408     {
409       pfile->out.cur = pfile->out.base;
410       pfile->out.first_line = pfile->line_table->highest_line;
411     }
412   /* start_of_input_line is needed to make sure that directives really,
413      really start at the first character of the line.  */
414   start_of_input_line = pfile->buffer->cur;
415  new_context:
416   context = pfile->context;
417   cur = CUR (context);
418   check_output_buffer (pfile, RLIMIT (context) - cur);
419   out = pfile->out.cur;
420 
421   for (;;)
422     {
423       if (!context->prev
424 	  && !builtin_macro_arg
425 	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
426 	{
427 	  pfile->buffer->cur = cur;
428 	  _cpp_process_line_notes (pfile, false);
429 	}
430       c = *cur++;
431       *out++ = c;
432 
433       /* Whitespace should "continue" out of the switch,
434 	 non-whitespace should "break" out of it.  */
435       switch (c)
436 	{
437 	case ' ':
438 	case '\t':
439 	case '\f':
440 	case '\v':
441 	case '\0':
442 	  continue;
443 
444 	case '\n':
445 	  /* If this is a macro's expansion, pop it.  */
446 	  if (context->prev)
447 	    {
448 	      pfile->out.cur = out - 1;
449 	      _cpp_pop_context (pfile);
450 	      goto new_context;
451 	    }
452 
453 	  /* Omit the newline from the output buffer.  */
454 	  pfile->out.cur = out - 1;
455 	  pfile->buffer->cur = cur;
456 	  if (builtin_macro_arg)
457 	    goto done;
458 	  pfile->buffer->need_line = true;
459 	  CPP_INCREMENT_LINE (pfile, 0);
460 
461 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
462 	      && !pfile->state.in_directive
463 	      && _cpp_get_fresh_line (pfile))
464 	    {
465 	      /* Newlines in arguments become a space, but we don't
466 		 clear any in-progress quote.  */
467 	      if (lex_state == ls_fun_close)
468 		out[-1] = ' ';
469 	      cur = pfile->buffer->cur;
470 	      continue;
471 	    }
472 	  goto done;
473 
474 	case '<':
475 	  if (header_ok)
476 	    quote = '>';
477 	  break;
478 	case '>':
479 	  if (c == quote)
480 	    quote = 0;
481 	  break;
482 
483 	case '"':
484 	case '\'':
485 	  if (c == quote)
486 	    quote = 0;
487 	  else if (!quote)
488 	    quote = c;
489 	  break;
490 
491 	case '\\':
492 	  /* Skip escaped quotes here, it's easier than above.  */
493 	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
494 	    *out++ = *cur++;
495 	  break;
496 
497 	case '/':
498 	  /* Traditional CPP does not recognize comments within
499 	     literals.  */
500 	  if (!quote && *cur == '*')
501 	    {
502 	      pfile->out.cur = out;
503 	      cur = copy_comment (pfile, cur, macro != 0);
504 	      out = pfile->out.cur;
505 	      continue;
506 	    }
507 	  break;
508 
509 	case '_':
510 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
511 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
512 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
513 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
514 	case 'y': case 'z':
515 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
516 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
517 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
518 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
519 	case 'Y': case 'Z':
520 	  if (!pfile->state.skipping && (quote == 0 || macro))
521 	    {
522 	      cpp_hashnode *node;
523 	      uchar *out_start = out - 1;
524 
525 	      pfile->out.cur = out_start;
526 	      node = lex_identifier (pfile, cur - 1);
527 	      out = pfile->out.cur;
528 	      cur = CUR (context);
529 
530 	      if (cpp_macro_p (node)
531 		  /* Should we expand for ls_answer?  */
532 		  && (lex_state == ls_none || lex_state == ls_fun_open)
533 		  && !pfile->state.prevent_expansion)
534 		{
535 		  /* Macros invalidate MI optimization.  */
536 		  pfile->mi_valid = false;
537 		  if (fun_like_macro (node))
538 		    {
539 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
540 		      lex_state = ls_fun_open;
541 		      fmacro.line = pfile->line_table->highest_line;
542 		      continue;
543 		    }
544 		  else if (!recursive_macro (pfile, node))
545 		    {
546 		      /* Remove the object-like macro's name from the
547 			 output, and push its replacement text.  */
548 		      pfile->out.cur = out_start;
549 		      push_replacement_text (pfile, node);
550 		      lex_state = ls_none;
551 		      goto new_context;
552 		    }
553 		}
554 	      else if (macro && node->type == NT_MACRO_ARG)
555 		{
556 		  /* Found a parameter in the replacement text of a
557 		     #define.  Remove its name from the output.  */
558 		  pfile->out.cur = out_start;
559 		  save_replacement_text (pfile, macro, node->value.arg_index);
560 		  out = pfile->out.base;
561 		}
562 	      else if (lex_state == ls_hash)
563 		{
564 		  lex_state = ls_predicate;
565 		  continue;
566 		}
567 	      else if (pfile->state.in_expression
568 		       && node == pfile->spec_nodes.n_defined)
569 		{
570 		  lex_state = ls_defined;
571 		  continue;
572 		}
573 	    }
574 	  break;
575 
576 	case '(':
577 	  if (quote == 0)
578 	    {
579 	      paren_depth++;
580 	      if (lex_state == ls_fun_open)
581 		{
582 		  if (recursive_macro (pfile, fmacro.node))
583 		    lex_state = ls_none;
584 		  else
585 		    {
586 		      lex_state = ls_fun_close;
587 		      paren_depth = 1;
588 		      out = pfile->out.base + fmacro.offset;
589 		      fmacro.args[0] = fmacro.offset;
590 		    }
591 		}
592 	      else if (lex_state == ls_predicate)
593 		lex_state = ls_answer;
594 	      else if (lex_state == ls_defined)
595 		lex_state = ls_defined_close;
596 	    }
597 	  break;
598 
599 	case ',':
600 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
601 	    save_argument (&fmacro, out - pfile->out.base);
602 	  break;
603 
604 	case ')':
605 	  if (quote == 0)
606 	    {
607 	      paren_depth--;
608 	      if (lex_state == ls_fun_close && paren_depth == 0)
609 		{
610 		  if (cpp_builtin_macro_p (fmacro.node))
611 		    {
612 		      /* Handle builtin function-like macros like
613 			 __has_attribute.  The already parsed arguments
614 			 are put into a buffer, which is then preprocessed
615 			 and the result is fed to _cpp_push_text_context
616 			 with disabled expansion, where the ISO preprocessor
617 			 parses it.  While in traditional preprocessing
618 			 macro arguments aren't immediately expanded, they in
619 			 the end are because the macro with replaced arguments
620 			 is preprocessed again.  For the builtin function-like
621 			 macros we need the argument immediately though,
622 			 if we don't preprocess them, they would behave
623 			 very differently from ISO preprocessor handling
624 			 of those builtin macros.  So, this handling is
625 			 more similar to traditional preprocessing of
626 			 #if directives, where we also keep preprocessing
627 			 until everything is expanded, and then feed the
628 			 result with disabled expansion to ISO preprocessor
629 			 for handling the directives.  */
630 		      lex_state = ls_none;
631 		      save_argument (&fmacro, out - pfile->out.base);
632 		      cpp_macro m;
633 		      memset (&m, '\0', sizeof (m));
634 		      m.paramc = fmacro.paramc;
635 		      if (_cpp_arguments_ok (pfile, &m, fmacro.node,
636 					     fmacro.argc))
637 			{
638 			  size_t len = fmacro.args[1] - fmacro.args[0];
639 			  uchar *buf;
640 
641 			  /* Remove the macro's invocation from the
642 			     output, and push its replacement text.  */
643 			  pfile->out.cur = pfile->out.base + fmacro.offset;
644 			  CUR (context) = cur;
645 			  buf = _cpp_unaligned_alloc (pfile, len + 2);
646 			  buf[0] = '(';
647 			  memcpy (buf + 1, pfile->out.base + fmacro.args[0],
648 				  len);
649 			  buf[len + 1] = '\n';
650 
651 			  const unsigned char *ctx_rlimit = RLIMIT (context);
652 			  const unsigned char *saved_cur = pfile->buffer->cur;
653 			  const unsigned char *saved_rlimit
654 			    = pfile->buffer->rlimit;
655 			  const unsigned char *saved_line_base
656 			    = pfile->buffer->line_base;
657 			  bool saved_need_line = pfile->buffer->need_line;
658 			  cpp_buffer *saved_overlaid_buffer
659 			    = pfile->overlaid_buffer;
660 			  pfile->buffer->cur = buf;
661 			  pfile->buffer->line_base = buf;
662 			  pfile->buffer->rlimit = buf + len + 1;
663 			  pfile->buffer->need_line = false;
664 			  pfile->overlaid_buffer = pfile->buffer;
665 			  bool saved_in_directive = pfile->state.in_directive;
666 			  pfile->state.in_directive = true;
667 			  cpp_context *saved_prev_context = context->prev;
668 			  context->prev = NULL;
669 
670 			  _cpp_scan_out_logical_line (pfile, NULL, true);
671 
672 			  pfile->state.in_directive = saved_in_directive;
673 			  check_output_buffer (pfile, 1);
674 			  *pfile->out.cur = '\n';
675 			  pfile->buffer->cur = pfile->out.base + fmacro.offset;
676 			  pfile->buffer->line_base = pfile->buffer->cur;
677 			  pfile->buffer->rlimit = pfile->out.cur;
678 			  CUR (context) = pfile->buffer->cur;
679 			  RLIMIT (context) = pfile->buffer->rlimit;
680 
681 			  pfile->state.prevent_expansion++;
682 			  const uchar *text
683 			    = _cpp_builtin_macro_text (pfile, fmacro.node);
684 			  pfile->state.prevent_expansion--;
685 
686 			  context->prev = saved_prev_context;
687 			  pfile->buffer->cur = saved_cur;
688 			  pfile->buffer->rlimit = saved_rlimit;
689 			  pfile->buffer->line_base = saved_line_base;
690 			  pfile->buffer->need_line = saved_need_line;
691 			  pfile->overlaid_buffer = saved_overlaid_buffer;
692 			  pfile->out.cur = pfile->out.base + fmacro.offset;
693 			  CUR (context) = cur;
694 			  RLIMIT (context) = ctx_rlimit;
695 			  len = ustrlen (text);
696 			  buf = _cpp_unaligned_alloc (pfile, len + 1);
697 			  memcpy (buf, text, len);
698 			  buf[len] = '\n';
699 			  text = buf;
700 			  _cpp_push_text_context (pfile, fmacro.node,
701 						  text, len);
702 			  goto new_context;
703 			}
704 		      break;
705 		    }
706 
707 		  cpp_macro *m = fmacro.node->value.macro;
708 
709 		  m->used = 1;
710 		  lex_state = ls_none;
711 		  save_argument (&fmacro, out - pfile->out.base);
712 
713 		  /* A single zero-length argument is no argument.  */
714 		  if (fmacro.argc == 1
715 		      && m->paramc == 0
716 		      && out == pfile->out.base + fmacro.offset + 1)
717 		    fmacro.argc = 0;
718 
719 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
720 		    {
721 		      /* Remove the macro's invocation from the
722 			 output, and push its replacement text.  */
723 		      pfile->out.cur = pfile->out.base + fmacro.offset;
724 		      CUR (context) = cur;
725 		      replace_args_and_push (pfile, &fmacro);
726 		      goto new_context;
727 		    }
728 		}
729 	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
730 		lex_state = ls_none;
731 	    }
732 	  break;
733 
734 	case '#':
735 	  if (cur - 1 == start_of_input_line
736 	      /* A '#' from a macro doesn't start a directive.  */
737 	      && !pfile->context->prev
738 	      && !pfile->state.in_directive)
739 	    {
740 	      /* A directive.  With the way _cpp_handle_directive
741 		 currently works, we only want to call it if either we
742 		 know the directive is OK, or we want it to fail and
743 		 be removed from the output.  If we want it to be
744 		 passed through (the assembler case) then we must not
745 		 call _cpp_handle_directive.  */
746 	      pfile->out.cur = out;
747 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
748 	      out = pfile->out.cur;
749 
750 	      if (*cur == '\n')
751 		{
752 		  /* Null directive.  Ignore it and don't invalidate
753 		     the MI optimization.  */
754 		  pfile->buffer->need_line = true;
755 		  CPP_INCREMENT_LINE (pfile, 0);
756 		  result = false;
757 		  goto done;
758 		}
759 	      else
760 		{
761 		  bool do_it = false;
762 
763 		  if (is_numstart (*cur)
764 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
765 		    do_it = true;
766 		  else if (is_idstart (*cur))
767 		    /* Check whether we know this directive, but don't
768 		       advance.  */
769 		    do_it = lex_identifier (pfile, cur)->is_directive;
770 
771 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
772 		    {
773 		      /* This is a kludge.  We want to have the ISO
774 			 preprocessor lex the next token.  */
775 		      pfile->buffer->cur = cur;
776 		      _cpp_handle_directive (pfile, false /* indented */);
777 		      result = false;
778 		      goto done;
779 		    }
780 		}
781 	    }
782 
783 	  if (pfile->state.in_expression)
784 	    {
785 	      lex_state = ls_hash;
786 	      continue;
787 	    }
788 	  break;
789 
790 	default:
791 	  break;
792 	}
793 
794       /* Non-whitespace disables MI optimization and stops treating
795 	 '<' as a quote in #include.  */
796       header_ok = false;
797       if (!pfile->state.in_directive)
798 	pfile->mi_valid = false;
799 
800       if (lex_state == ls_none)
801 	continue;
802 
803       /* Some of these transitions of state are syntax errors.  The
804 	 ISO preprocessor will issue errors later.  */
805       if (lex_state == ls_fun_open)
806 	/* Missing '('.  */
807 	lex_state = ls_none;
808       else if (lex_state == ls_hash
809 	       || lex_state == ls_predicate
810 	       || lex_state == ls_defined)
811 	lex_state = ls_none;
812 
813       /* ls_answer and ls_defined_close keep going until ')'.  */
814     }
815 
816  done:
817   if (fmacro.buff)
818     _cpp_release_buff (pfile, fmacro.buff);
819 
820   if (lex_state == ls_fun_close)
821     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
822 			 "unterminated argument list invoking macro \"%s\"",
823 			 NODE_NAME (fmacro.node));
824   return result;
825 }
826 
827 /* Push a context holding the replacement text of the macro NODE on
828    the context stack.  NODE is either object-like, or a function-like
829    macro with no arguments.  */
830 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)831 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
832 {
833   size_t len;
834   const uchar *text;
835   uchar *buf;
836 
837   if (cpp_builtin_macro_p (node))
838     {
839       text = _cpp_builtin_macro_text (pfile, node);
840       len = ustrlen (text);
841       buf = _cpp_unaligned_alloc (pfile, len + 1);
842       memcpy (buf, text, len);
843       buf[len] = '\n';
844       text = buf;
845     }
846   else
847     {
848       cpp_macro *macro = node->value.macro;
849       macro->used = 1;
850       text = macro->exp.text;
851       len = macro->count;
852     }
853 
854   _cpp_push_text_context (pfile, node, text, len);
855 }
856 
857 /* Returns TRUE if traditional macro recursion is detected.  */
858 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)859 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
860 {
861   bool recursing = !!(node->flags & NODE_DISABLED);
862 
863   /* Object-like macros that are already expanding are necessarily
864      recursive.
865 
866      However, it is possible to have traditional function-like macros
867      that are not infinitely recursive but recurse to any given depth.
868      Further, it is easy to construct examples that get ever longer
869      until the point they stop recursing.  So there is no easy way to
870      detect true recursion; instead we assume any expansion more than
871      20 deep since the first invocation of this macro must be
872      recursing.  */
873   if (recursing && fun_like_macro (node))
874     {
875       size_t depth = 0;
876       cpp_context *context = pfile->context;
877 
878       do
879 	{
880 	  depth++;
881 	  if (context->c.macro == node && depth > 20)
882 	    break;
883 	  context = context->prev;
884 	}
885       while (context);
886       recursing = context != NULL;
887     }
888 
889   if (recursing)
890     cpp_error (pfile, CPP_DL_ERROR,
891 	       "detected recursion whilst expanding macro \"%s\"",
892 	       NODE_NAME (node));
893 
894   return recursing;
895 }
896 
897 /* Return the length of the replacement text of a function-like or
898    object-like non-builtin macro.  */
899 size_t
_cpp_replacement_text_len(const cpp_macro * macro)900 _cpp_replacement_text_len (const cpp_macro *macro)
901 {
902   size_t len;
903 
904   if (macro->fun_like && (macro->paramc != 0))
905     {
906       const uchar *exp;
907 
908       len = 0;
909       for (exp = macro->exp.text;;)
910 	{
911 	  struct block *b = (struct block *) exp;
912 
913 	  len += b->text_len;
914 	  if (b->arg_index == 0)
915 	    break;
916 	  len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
917 	  exp += BLOCK_LEN (b->text_len);
918 	}
919     }
920   else
921     len = macro->count;
922 
923   return len;
924 }
925 
926 /* Copy the replacement text of MACRO to DEST, which must be of
927    sufficient size.  It is not NUL-terminated.  The next character is
928    returned.  */
929 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)930 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
931 {
932   if (macro->fun_like && (macro->paramc != 0))
933     {
934       const uchar *exp;
935 
936       for (exp = macro->exp.text;;)
937 	{
938 	  struct block *b = (struct block *) exp;
939 	  cpp_hashnode *param;
940 
941 	  memcpy (dest, b->text, b->text_len);
942 	  dest += b->text_len;
943 	  if (b->arg_index == 0)
944 	    break;
945 	  param = macro->parm.params[b->arg_index - 1];
946 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
947 	  dest += NODE_LEN (param);
948 	  exp += BLOCK_LEN (b->text_len);
949 	}
950     }
951   else
952     {
953       memcpy (dest, macro->exp.text, macro->count);
954       dest += macro->count;
955     }
956 
957   return dest;
958 }
959 
960 /* Push a context holding the replacement text of the macro NODE on
961    the context stack.  NODE is either object-like, or a function-like
962    macro with no arguments.  */
963 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)964 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
965 {
966   cpp_macro *macro = fmacro->node->value.macro;
967 
968   if (macro->paramc == 0)
969     push_replacement_text (pfile, fmacro->node);
970   else
971     {
972       const uchar *exp;
973       uchar *p;
974       _cpp_buff *buff;
975       size_t len = 0;
976       int cxtquote = 0;
977 
978       /* Get an estimate of the length of the argument-replaced text.
979 	 This is a worst case estimate, assuming that every replacement
980 	 text character needs quoting.  */
981       for (exp = macro->exp.text;;)
982 	{
983 	  struct block *b = (struct block *) exp;
984 
985 	  len += b->text_len;
986 	  if (b->arg_index == 0)
987 	    break;
988 	  len += 2 * (fmacro->args[b->arg_index]
989 		      - fmacro->args[b->arg_index - 1] - 1);
990 	  exp += BLOCK_LEN (b->text_len);
991 	}
992 
993       /* Allocate room for the expansion plus \n.  */
994       buff = _cpp_get_buff (pfile, len + 1);
995 
996       /* Copy the expansion and replace arguments.  */
997       /* Accumulate actual length, including quoting as necessary */
998       p = BUFF_FRONT (buff);
999       len = 0;
1000       for (exp = macro->exp.text;;)
1001 	{
1002 	  struct block *b = (struct block *) exp;
1003 	  size_t arglen;
1004 	  int argquote;
1005 	  uchar *base;
1006 	  uchar *in;
1007 
1008 	  len += b->text_len;
1009 	  /* Copy the non-argument text literally, keeping
1010 	     track of whether matching quotes have been seen. */
1011 	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1012 	    {
1013 	      if (*in == '"')
1014 		cxtquote = ! cxtquote;
1015 	      *p++ = *in++;
1016 	    }
1017 	  /* Done if no more arguments */
1018 	  if (b->arg_index == 0)
1019 	    break;
1020 	  arglen = (fmacro->args[b->arg_index]
1021 		    - fmacro->args[b->arg_index - 1] - 1);
1022 	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
1023 	  in = base;
1024 #if 0
1025 	  /* Skip leading whitespace in the text for the argument to
1026 	     be substituted. To be compatible with gcc 2.95, we would
1027 	     also need to trim trailing whitespace. Gcc 2.95 trims
1028 	     leading and trailing whitespace, which may be a bug.  The
1029 	     current gcc testsuite explicitly checks that this leading
1030 	     and trailing whitespace in actual arguments is
1031 	     preserved. */
1032 	  while (arglen > 0 && is_space (*in))
1033 	    {
1034 	      in++;
1035 	      arglen--;
1036 	    }
1037 #endif
1038 	  for (argquote = 0; arglen > 0; arglen--)
1039 	    {
1040 	      if (cxtquote && *in == '"')
1041 		{
1042 		  if (in > base && *(in-1) != '\\')
1043 		    argquote = ! argquote;
1044 		  /* Always add backslash before double quote if argument
1045 		     is expanded in a quoted context */
1046 		  *p++ = '\\';
1047 		  len++;
1048 		}
1049 	      else if (cxtquote && argquote && *in == '\\')
1050 		{
1051 		  /* Always add backslash before a backslash in an argument
1052 		     that is expanded in a quoted context and also in the
1053 		     range of a quoted context in the argument itself. */
1054 		  *p++ = '\\';
1055 		  len++;
1056 		}
1057 	      *p++ = *in++;
1058 	      len++;
1059 	    }
1060 	  exp += BLOCK_LEN (b->text_len);
1061 	}
1062 
1063       /* \n-terminate.  */
1064       *p = '\n';
1065       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1066 
1067       /* So we free buffer allocation when macro is left.  */
1068       pfile->context->buff = buff;
1069     }
1070 }
1071 
1072 /* Read and record the parameters, if any, of a function-like macro
1073    definition.  Destroys pfile->out.cur.
1074 
1075    Returns true on success, false on failure (syntax error or a
1076    duplicate parameter).  On success, CUR (pfile->context) is just
1077    past the closing parenthesis.  */
1078 static bool
scan_parameters(cpp_reader * pfile,unsigned * n_ptr)1079 scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1080 {
1081   const uchar *cur = CUR (pfile->context) + 1;
1082   bool ok;
1083 
1084   unsigned nparms = 0;
1085   for (;;)
1086     {
1087       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1088 
1089       if (is_idstart (*cur))
1090 	{
1091 	  struct cpp_hashnode *id = lex_identifier (pfile, cur);
1092 	  ok = false;
1093 	  if (!_cpp_save_parameter (pfile, nparms, id, id))
1094 	    break;
1095 	  nparms++;
1096 	  cur = skip_whitespace (pfile, CUR (pfile->context),
1097 				 true /* skip_comments */);
1098 	  if (*cur == ',')
1099 	    {
1100 	      cur++;
1101 	      continue;
1102 	    }
1103 	  ok = (*cur == ')');
1104 	  break;
1105 	}
1106 
1107       ok = (*cur == ')' && !nparms);
1108       break;
1109     }
1110 
1111   *n_ptr = nparms;
1112 
1113   if (!ok)
1114     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1115 
1116   CUR (pfile->context) = cur + (*cur == ')');
1117 
1118   return ok;
1119 }
1120 
1121 /* Save the text from pfile->out.base to pfile->out.cur as
1122    the replacement text for the current macro, followed by argument
1123    ARG_INDEX, with zero indicating the end of the replacement
1124    text.  */
1125 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)1126 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1127 		       unsigned int arg_index)
1128 {
1129   size_t len = pfile->out.cur - pfile->out.base;
1130   uchar *exp;
1131 
1132   if (macro->paramc == 0)
1133     {
1134       /* Object-like and function-like macros without parameters
1135 	 simply store their \n-terminated replacement text.  */
1136       exp = _cpp_unaligned_alloc (pfile, len + 1);
1137       memcpy (exp, pfile->out.base, len);
1138       exp[len] = '\n';
1139       macro->exp.text = exp;
1140       macro->count = len;
1141     }
1142   else
1143     {
1144       /* Store the text's length (unsigned int), the argument index
1145 	 (unsigned short, base 1) and then the text.  */
1146       size_t blen = BLOCK_LEN (len);
1147       struct block *block;
1148 
1149       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1150 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1151 
1152       exp = BUFF_FRONT (pfile->a_buff);
1153       block = (struct block *) (exp + macro->count);
1154       macro->exp.text = exp;
1155 
1156       /* Write out the block information.  */
1157       block->text_len = len;
1158       block->arg_index = arg_index;
1159       memcpy (block->text, pfile->out.base, len);
1160 
1161       /* Lex the rest into the start of the output buffer.  */
1162       pfile->out.cur = pfile->out.base;
1163 
1164       macro->count += blen;
1165 
1166       /* If we've finished, commit the memory.  */
1167       if (arg_index == 0)
1168 	BUFF_FRONT (pfile->a_buff) += macro->count;
1169     }
1170 }
1171 
1172 /* Analyze and save the replacement text of a macro.  Returns true on
1173    success.  */
1174 cpp_macro *
_cpp_create_trad_definition(cpp_reader * pfile)1175 _cpp_create_trad_definition (cpp_reader *pfile)
1176 {
1177   const uchar *cur;
1178   uchar *limit;
1179   cpp_context *context = pfile->context;
1180   unsigned nparms = 0;
1181   int fun_like = 0;
1182   cpp_hashnode **params = NULL;
1183 
1184   /* The context has not been set up for command line defines, and CUR
1185      has not been updated for the macro name for in-file defines.  */
1186   pfile->out.cur = pfile->out.base;
1187   CUR (context) = pfile->buffer->cur;
1188   RLIMIT (context) = pfile->buffer->rlimit;
1189   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1190 
1191   /* Is this a function-like macro?  */
1192   if (* CUR (context) == '(')
1193     {
1194       fun_like = +1;
1195       if (scan_parameters (pfile, &nparms))
1196 	params = (cpp_hashnode **)_cpp_commit_buff
1197 	  (pfile, sizeof (cpp_hashnode *) * nparms);
1198       else
1199 	fun_like = -1;
1200     }
1201 
1202   cpp_macro *macro = NULL;
1203 
1204   if (fun_like >= 0)
1205     {
1206       macro = _cpp_new_macro (pfile, cmk_traditional,
1207 			      _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1208       macro->parm.params = params;
1209       macro->paramc = nparms;
1210       macro->fun_like = fun_like != 0;
1211     }
1212 
1213   /* Skip leading whitespace in the replacement text.  */
1214   pfile->buffer->cur
1215     = skip_whitespace (pfile, CUR (context),
1216 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1217 
1218   pfile->state.prevent_expansion++;
1219   _cpp_scan_out_logical_line (pfile, macro, false);
1220   pfile->state.prevent_expansion--;
1221 
1222   _cpp_unsave_parameters (pfile, nparms);
1223 
1224   if (macro)
1225     {
1226       /* Skip trailing white space.  */
1227       cur = pfile->out.base;
1228       limit = pfile->out.cur;
1229       while (limit > cur && is_space (limit[-1]))
1230 	limit--;
1231       pfile->out.cur = limit;
1232       save_replacement_text (pfile, macro, 0);
1233     }
1234 
1235   return macro;
1236 }
1237 
1238 /* Copy SRC of length LEN to DEST, but convert all contiguous
1239    whitespace to a single space, provided it is not in quotes.  The
1240    quote currently in effect is pointed to by PQUOTE, and is updated
1241    by the function.  Returns the number of bytes copied.  */
1242 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1243 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1244 {
1245   uchar *orig_dest = dest;
1246   uchar quote = *pquote;
1247 
1248   while (len)
1249     {
1250       if (is_space (*src) && !quote)
1251 	{
1252 	  do
1253 	    src++, len--;
1254 	  while (len && is_space (*src));
1255 	  *dest++ = ' ';
1256 	}
1257       else
1258 	{
1259 	  if (*src == '\'' || *src == '"')
1260 	    {
1261 	      if (!quote)
1262 		quote = *src;
1263 	      else if (quote == *src)
1264 		quote = 0;
1265 	    }
1266 	  *dest++ = *src++, len--;
1267 	}
1268     }
1269 
1270   *pquote = quote;
1271   return dest - orig_dest;
1272 }
1273 
1274 /* Returns true if MACRO1 and MACRO2 have expansions different other
1275    than in the form of their whitespace.  */
1276 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1277 _cpp_expansions_different_trad (const cpp_macro *macro1,
1278 				const cpp_macro *macro2)
1279 {
1280   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1281   uchar *p2 = p1 + macro1->count;
1282   uchar quote1 = 0, quote2 = 0;
1283   bool mismatch;
1284   size_t len1, len2;
1285 
1286   if (macro1->paramc > 0)
1287     {
1288       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1289 
1290       mismatch = true;
1291       for (;;)
1292 	{
1293 	  struct block *b1 = (struct block *) exp1;
1294 	  struct block *b2 = (struct block *) exp2;
1295 
1296 	  if (b1->arg_index != b2->arg_index)
1297 	    break;
1298 
1299 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1300 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1301 	  if (len1 != len2 || memcmp (p1, p2, len1))
1302 	    break;
1303 	  if (b1->arg_index == 0)
1304 	    {
1305 	      mismatch = false;
1306 	      break;
1307 	    }
1308 	  exp1 += BLOCK_LEN (b1->text_len);
1309 	  exp2 += BLOCK_LEN (b2->text_len);
1310 	}
1311     }
1312   else
1313     {
1314       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1315       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1316       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1317     }
1318 
1319   free (p1);
1320   return mismatch;
1321 }
1322