1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   location_t line;
64 
65   /* Number of parameters.  */
66   unsigned int paramc;
67 
68   /* Zero-based index of argument being currently lexed.  */
69   unsigned int argc;
70 };
71 
72 /* Lexing state.  It is mostly used to prevent macro expansion.  */
73 enum ls {ls_none = 0,		/* Normal state.  */
74 	 ls_fun_open,		/* When looking for '('.  */
75 	 ls_fun_close,		/* When looking for ')'.  */
76 	 ls_defined,		/* After defined.  */
77 	 ls_defined_close,	/* Looking for ')' of defined().  */
78 	 ls_hash,		/* After # in preprocessor conditional.  */
79 	 ls_predicate,		/* After the predicate, maybe paren?  */
80 	 ls_answer		/* In answer to predicate.  */
81 };
82 
83 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
84    from recognizing comments and directives during its lexing pass.  */
85 
86 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
87 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
88 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
89 static void check_output_buffer (cpp_reader *, size_t);
90 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
91 static bool scan_parameters (cpp_reader *, unsigned *);
92 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
93 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
94 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
95 				 struct fun_macro *);
96 static void save_argument (struct fun_macro *, size_t);
97 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
98 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
99 
100 /* Ensures we have N bytes' space in the output buffer, and
101    reallocates it if not.  */
102 static void
check_output_buffer(cpp_reader * pfile,size_t n)103 check_output_buffer (cpp_reader *pfile, size_t n)
104 {
105   /* We might need two bytes to terminate an unterminated comment, and
106      one more to terminate the line with a NUL.  */
107   n += 2 + 1;
108 
109   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
110     {
111       size_t size = pfile->out.cur - pfile->out.base;
112       size_t new_size = (size + n) * 3 / 2;
113 
114       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
115       pfile->out.limit = pfile->out.base + new_size;
116       pfile->out.cur = pfile->out.base + size;
117     }
118 }
119 
120 /* Skip a C-style block comment in a macro as a result of -CC.
121    PFILE->buffer->cur points to the initial asterisk of the comment,
122    change it to point to after the '*' and '/' characters that terminate it.
123    Return true if the macro has not been termined, in that case set
124    PFILE->buffer->cur to the end of the buffer.  */
125 static bool
skip_macro_block_comment(cpp_reader * pfile)126 skip_macro_block_comment (cpp_reader *pfile)
127 {
128   const uchar *cur = pfile->buffer->cur;
129 
130   cur++;
131   if (*cur == '/')
132     cur++;
133 
134   /* People like decorating comments with '*', so check for '/'
135      instead for efficiency.  */
136   while (! (*cur++ == '/' && cur[-2] == '*'))
137     if (cur[-1] == '\n')
138       {
139 	pfile->buffer->cur = cur - 1;
140 	return true;
141       }
142 
143   pfile->buffer->cur = cur;
144   return false;
145 }
146 
147 /* CUR points to the asterisk introducing a comment in the current
148    context.  IN_DEFINE is true if we are in the replacement text of a
149    macro.
150 
151    The asterisk and following comment is copied to the buffer pointed
152    to by pfile->out.cur, which must be of sufficient size.
153    Unterminated comments are diagnosed, and correctly terminated in
154    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
155    -C, -CC and pfile->state.in_directive.
156 
157    Returns a pointer to the first character after the comment in the
158    input buffer.  */
159 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)160 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
161 {
162   bool unterminated, copy = false;
163   location_t src_loc = pfile->line_table->highest_line;
164   cpp_buffer *buffer = pfile->buffer;
165 
166   buffer->cur = cur;
167   if (pfile->context->prev)
168     unterminated = skip_macro_block_comment (pfile);
169   else
170     unterminated = _cpp_skip_block_comment (pfile);
171 
172   if (unterminated)
173     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
174 			 "unterminated comment");
175 
176   /* Comments in directives become spaces so that tokens are properly
177      separated when the ISO preprocessor re-lexes the line.  The
178      exception is #define.  */
179   if (pfile->state.in_directive)
180     {
181       if (in_define)
182 	{
183 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
184 	    pfile->out.cur--;
185 	  else
186 	    copy = true;
187 	}
188       else
189 	pfile->out.cur[-1] = ' ';
190     }
191   else if (CPP_OPTION (pfile, discard_comments))
192     pfile->out.cur--;
193   else
194     copy = true;
195 
196   if (copy)
197     {
198       size_t len = (size_t) (buffer->cur - cur);
199       memcpy (pfile->out.cur, cur, len);
200       pfile->out.cur += len;
201       if (unterminated)
202 	{
203 	  *pfile->out.cur++ = '*';
204 	  *pfile->out.cur++ = '/';
205 	}
206     }
207 
208   return buffer->cur;
209 }
210 
211 /* CUR points to any character in the input buffer.  Skips over all
212    contiguous horizontal white space and NULs, including comments if
213    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
214    character or the end of the current context.  Escaped newlines are
215    removed.
216 
217    The whitespace is copied verbatim to the output buffer, except that
218    comments are handled as described in copy_comment().
219    pfile->out.cur is updated.
220 
221    Returns a pointer to the first character after the whitespace in
222    the input buffer.  */
223 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)224 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
225 {
226   uchar *out = pfile->out.cur;
227 
228   for (;;)
229     {
230       unsigned int c = *cur++;
231       *out++ = c;
232 
233       if (is_nvspace (c))
234 	continue;
235 
236       if (c == '/' && *cur == '*' && skip_comments)
237 	{
238 	  pfile->out.cur = out;
239 	  cur = copy_comment (pfile, cur, false /* in_define */);
240 	  out = pfile->out.cur;
241 	  continue;
242 	}
243 
244       out--;
245       break;
246     }
247 
248   pfile->out.cur = out;
249   return cur - 1;
250 }
251 
252 /* Lexes and outputs an identifier starting at CUR, which is assumed
253    to point to a valid first character of an identifier.  Returns
254    the hashnode, and updates out.cur.  */
255 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)256 lex_identifier (cpp_reader *pfile, const uchar *cur)
257 {
258   size_t len;
259   uchar *out = pfile->out.cur;
260   cpp_hashnode *result;
261 
262   do
263     *out++ = *cur++;
264   while (is_numchar (*cur));
265 
266   CUR (pfile->context) = cur;
267   len = out - pfile->out.cur;
268   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
269 				    len, HT_ALLOC));
270   pfile->out.cur = out;
271   return result;
272 }
273 
274 /* Overlays the true file buffer temporarily with text of length LEN
275    starting at START.  The true buffer is restored upon calling
276    restore_buff().  */
277 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)278 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
279 {
280   cpp_buffer *buffer = pfile->buffer;
281 
282   pfile->overlaid_buffer = buffer;
283   pfile->saved_cur = buffer->cur;
284   pfile->saved_rlimit = buffer->rlimit;
285   pfile->saved_line_base = buffer->next_line;
286   buffer->need_line = false;
287 
288   buffer->cur = start;
289   buffer->line_base = start;
290   buffer->rlimit = start + len;
291 }
292 
293 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
294 void
_cpp_remove_overlay(cpp_reader * pfile)295 _cpp_remove_overlay (cpp_reader *pfile)
296 {
297   cpp_buffer *buffer = pfile->overlaid_buffer;
298 
299   buffer->cur = pfile->saved_cur;
300   buffer->rlimit = pfile->saved_rlimit;
301   buffer->line_base = pfile->saved_line_base;
302   buffer->need_line = true;
303 
304   pfile->overlaid_buffer = NULL;
305 }
306 
307 /* Reads a logical line into the output buffer.  Returns TRUE if there
308    is more text left in the buffer.  */
309 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)310 _cpp_read_logical_line_trad (cpp_reader *pfile)
311 {
312   do
313     {
314       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
315 	return false;
316     }
317   while (!_cpp_scan_out_logical_line (pfile, NULL, false)
318 	 || pfile->state.skipping);
319 
320   return pfile->buffer != NULL;
321 }
322 
323 /* Return true if NODE is a fun_like macro.  */
324 static inline bool
fun_like_macro(cpp_hashnode * node)325 fun_like_macro (cpp_hashnode *node)
326 {
327   if (cpp_builtin_macro_p (node))
328     return (node->value.builtin == BT_HAS_ATTRIBUTE
329 	    || node->value.builtin == BT_HAS_BUILTIN
330 	    || node->value.builtin == BT_HAS_INCLUDE
331 	    || node->value.builtin == BT_HAS_INCLUDE_NEXT);
332   return node->value.macro->fun_like;
333 }
334 
335 /* Set up state for finding the opening '(' of a function-like
336    macro.  */
337 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)338 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
339 		     struct fun_macro *macro)
340 {
341   unsigned int n;
342   if (cpp_builtin_macro_p (node))
343     n = 1;
344   else
345     n = node->value.macro->paramc;
346 
347   if (macro->buff)
348     _cpp_release_buff (pfile, macro->buff);
349   macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
350   macro->args = (size_t *) BUFF_FRONT (macro->buff);
351   macro->node = node;
352   macro->offset = start - pfile->out.base;
353   macro->paramc = n;
354   macro->argc = 0;
355 }
356 
357 /* Save the OFFSET of the start of the next argument to MACRO.  */
358 static void
save_argument(struct fun_macro * macro,size_t offset)359 save_argument (struct fun_macro *macro, size_t offset)
360 {
361   macro->argc++;
362   if (macro->argc <= macro->paramc)
363     macro->args[macro->argc] = offset;
364 }
365 
366 /* Copies the next logical line in the current buffer (starting at
367    buffer->cur) to the output buffer.  The output is guaranteed to
368    terminate with a NUL character.  buffer->cur is updated.
369 
370    If MACRO is non-NULL, then we are scanning the replacement list of
371    MACRO, and we call save_replacement_text() every time we meet an
372    argument.
373 
374    If BUILTIN_MACRO_ARG is true, this is called to macro expand
375    arguments of builtin function-like macros.  */
376 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro,bool builtin_macro_arg)377 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
378 			    bool builtin_macro_arg)
379 {
380   bool result = true;
381   cpp_context *context;
382   const uchar *cur;
383   uchar *out;
384   struct fun_macro fmacro;
385   unsigned int c, paren_depth = 0, quote;
386   enum ls lex_state = ls_none;
387   bool header_ok;
388   const uchar *start_of_input_line;
389 
390   fmacro.buff = NULL;
391   fmacro.args = NULL;
392   fmacro.node = NULL;
393   fmacro.offset = 0;
394   fmacro.line = 0;
395   fmacro.paramc = 0;
396   fmacro.argc = 0;
397 
398   quote = 0;
399   header_ok = pfile->state.angled_headers;
400   CUR (pfile->context) = pfile->buffer->cur;
401   RLIMIT (pfile->context) = pfile->buffer->rlimit;
402   if (!builtin_macro_arg)
403     {
404       pfile->out.cur = pfile->out.base;
405       pfile->out.first_line = pfile->line_table->highest_line;
406     }
407   /* start_of_input_line is needed to make sure that directives really,
408      really start at the first character of the line.  */
409   start_of_input_line = pfile->buffer->cur;
410  new_context:
411   context = pfile->context;
412   cur = CUR (context);
413   check_output_buffer (pfile, RLIMIT (context) - cur);
414   out = pfile->out.cur;
415 
416   for (;;)
417     {
418       if (!context->prev
419 	  && !builtin_macro_arg
420 	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
421 	{
422 	  pfile->buffer->cur = cur;
423 	  _cpp_process_line_notes (pfile, false);
424 	}
425       c = *cur++;
426       *out++ = c;
427 
428       /* Whitespace should "continue" out of the switch,
429 	 non-whitespace should "break" out of it.  */
430       switch (c)
431 	{
432 	case ' ':
433 	case '\t':
434 	case '\f':
435 	case '\v':
436 	case '\0':
437 	  continue;
438 
439 	case '\n':
440 	  /* If this is a macro's expansion, pop it.  */
441 	  if (context->prev)
442 	    {
443 	      pfile->out.cur = out - 1;
444 	      _cpp_pop_context (pfile);
445 	      goto new_context;
446 	    }
447 
448 	  /* Omit the newline from the output buffer.  */
449 	  pfile->out.cur = out - 1;
450 	  pfile->buffer->cur = cur;
451 	  if (builtin_macro_arg)
452 	    goto done;
453 	  pfile->buffer->need_line = true;
454 	  CPP_INCREMENT_LINE (pfile, 0);
455 
456 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
457 	      && !pfile->state.in_directive
458 	      && _cpp_get_fresh_line (pfile))
459 	    {
460 	      /* Newlines in arguments become a space, but we don't
461 		 clear any in-progress quote.  */
462 	      if (lex_state == ls_fun_close)
463 		out[-1] = ' ';
464 	      cur = pfile->buffer->cur;
465 	      continue;
466 	    }
467 	  goto done;
468 
469 	case '<':
470 	  if (header_ok)
471 	    quote = '>';
472 	  break;
473 	case '>':
474 	  if (c == quote)
475 	    quote = 0;
476 	  break;
477 
478 	case '"':
479 	case '\'':
480 	  if (c == quote)
481 	    quote = 0;
482 	  else if (!quote)
483 	    quote = c;
484 	  break;
485 
486 	case '\\':
487 	  /* Skip escaped quotes here, it's easier than above.  */
488 	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
489 	    *out++ = *cur++;
490 	  break;
491 
492 	case '/':
493 	  /* Traditional CPP does not recognize comments within
494 	     literals.  */
495 	  if (!quote && *cur == '*')
496 	    {
497 	      pfile->out.cur = out;
498 	      cur = copy_comment (pfile, cur, macro != 0);
499 	      out = pfile->out.cur;
500 	      continue;
501 	    }
502 	  break;
503 
504 	case '_':
505 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
506 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
507 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
508 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
509 	case 'y': case 'z':
510 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
511 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
512 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
513 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
514 	case 'Y': case 'Z':
515 	  if (!pfile->state.skipping && (quote == 0 || macro))
516 	    {
517 	      cpp_hashnode *node;
518 	      uchar *out_start = out - 1;
519 
520 	      pfile->out.cur = out_start;
521 	      node = lex_identifier (pfile, cur - 1);
522 	      out = pfile->out.cur;
523 	      cur = CUR (context);
524 
525 	      if (cpp_macro_p (node)
526 		  /* Should we expand for ls_answer?  */
527 		  && (lex_state == ls_none || lex_state == ls_fun_open)
528 		  && !pfile->state.prevent_expansion)
529 		{
530 		  /* Macros invalidate MI optimization.  */
531 		  pfile->mi_valid = false;
532 		  if (fun_like_macro (node))
533 		    {
534 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
535 		      lex_state = ls_fun_open;
536 		      fmacro.line = pfile->line_table->highest_line;
537 		      continue;
538 		    }
539 		  else if (!recursive_macro (pfile, node))
540 		    {
541 		      /* Remove the object-like macro's name from the
542 			 output, and push its replacement text.  */
543 		      pfile->out.cur = out_start;
544 		      push_replacement_text (pfile, node);
545 		      lex_state = ls_none;
546 		      goto new_context;
547 		    }
548 		}
549 	      else if (macro && node->type == NT_MACRO_ARG)
550 		{
551 		  /* Found a parameter in the replacement text of a
552 		     #define.  Remove its name from the output.  */
553 		  pfile->out.cur = out_start;
554 		  save_replacement_text (pfile, macro, node->value.arg_index);
555 		  out = pfile->out.base;
556 		}
557 	      else if (lex_state == ls_hash)
558 		{
559 		  lex_state = ls_predicate;
560 		  continue;
561 		}
562 	      else if (pfile->state.in_expression
563 		       && node == pfile->spec_nodes.n_defined)
564 		{
565 		  lex_state = ls_defined;
566 		  continue;
567 		}
568 	    }
569 	  break;
570 
571 	case '(':
572 	  if (quote == 0)
573 	    {
574 	      paren_depth++;
575 	      if (lex_state == ls_fun_open)
576 		{
577 		  if (recursive_macro (pfile, fmacro.node))
578 		    lex_state = ls_none;
579 		  else
580 		    {
581 		      lex_state = ls_fun_close;
582 		      paren_depth = 1;
583 		      out = pfile->out.base + fmacro.offset;
584 		      fmacro.args[0] = fmacro.offset;
585 		    }
586 		}
587 	      else if (lex_state == ls_predicate)
588 		lex_state = ls_answer;
589 	      else if (lex_state == ls_defined)
590 		lex_state = ls_defined_close;
591 	    }
592 	  break;
593 
594 	case ',':
595 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
596 	    save_argument (&fmacro, out - pfile->out.base);
597 	  break;
598 
599 	case ')':
600 	  if (quote == 0)
601 	    {
602 	      paren_depth--;
603 	      if (lex_state == ls_fun_close && paren_depth == 0)
604 		{
605 		  if (cpp_builtin_macro_p (fmacro.node))
606 		    {
607 		      /* Handle builtin function-like macros like
608 			 __has_attribute.  The already parsed arguments
609 			 are put into a buffer, which is then preprocessed
610 			 and the result is fed to _cpp_push_text_context
611 			 with disabled expansion, where the ISO preprocessor
612 			 parses it.  While in traditional preprocessing
613 			 macro arguments aren't immediately expanded, they in
614 			 the end are because the macro with replaced arguments
615 			 is preprocessed again.  For the builtin function-like
616 			 macros we need the argument immediately though,
617 			 if we don't preprocess them, they would behave
618 			 very differently from ISO preprocessor handling
619 			 of those builtin macros.  So, this handling is
620 			 more similar to traditional preprocessing of
621 			 #if directives, where we also keep preprocessing
622 			 until everything is expanded, and then feed the
623 			 result with disabled expansion to ISO preprocessor
624 			 for handling the directives.  */
625 		      lex_state = ls_none;
626 		      save_argument (&fmacro, out - pfile->out.base);
627 		      cpp_macro m;
628 		      memset (&m, '\0', sizeof (m));
629 		      m.paramc = fmacro.paramc;
630 		      if (_cpp_arguments_ok (pfile, &m, fmacro.node,
631 					     fmacro.argc))
632 			{
633 			  size_t len = fmacro.args[1] - fmacro.args[0];
634 			  uchar *buf;
635 
636 			  /* Remove the macro's invocation from the
637 			     output, and push its replacement text.  */
638 			  pfile->out.cur = pfile->out.base + fmacro.offset;
639 			  CUR (context) = cur;
640 			  buf = _cpp_unaligned_alloc (pfile, len + 2);
641 			  buf[0] = '(';
642 			  memcpy (buf + 1, pfile->out.base + fmacro.args[0],
643 				  len);
644 			  buf[len + 1] = '\n';
645 
646 			  const unsigned char *ctx_rlimit = RLIMIT (context);
647 			  const unsigned char *saved_cur = pfile->buffer->cur;
648 			  const unsigned char *saved_rlimit
649 			    = pfile->buffer->rlimit;
650 			  const unsigned char *saved_line_base
651 			    = pfile->buffer->line_base;
652 			  bool saved_need_line = pfile->buffer->need_line;
653 			  cpp_buffer *saved_overlaid_buffer
654 			    = pfile->overlaid_buffer;
655 			  pfile->buffer->cur = buf;
656 			  pfile->buffer->line_base = buf;
657 			  pfile->buffer->rlimit = buf + len + 1;
658 			  pfile->buffer->need_line = false;
659 			  pfile->overlaid_buffer = pfile->buffer;
660 			  bool saved_in_directive = pfile->state.in_directive;
661 			  pfile->state.in_directive = true;
662 			  cpp_context *saved_prev_context = context->prev;
663 			  context->prev = NULL;
664 
665 			  _cpp_scan_out_logical_line (pfile, NULL, true);
666 
667 			  pfile->state.in_directive = saved_in_directive;
668 			  check_output_buffer (pfile, 1);
669 			  *pfile->out.cur = '\n';
670 			  pfile->buffer->cur = pfile->out.base + fmacro.offset;
671 			  pfile->buffer->line_base = pfile->buffer->cur;
672 			  pfile->buffer->rlimit = pfile->out.cur;
673 			  CUR (context) = pfile->buffer->cur;
674 			  RLIMIT (context) = pfile->buffer->rlimit;
675 
676 			  pfile->state.prevent_expansion++;
677 			  const uchar *text
678 			    = _cpp_builtin_macro_text (pfile, fmacro.node);
679 			  pfile->state.prevent_expansion--;
680 
681 			  context->prev = saved_prev_context;
682 			  pfile->buffer->cur = saved_cur;
683 			  pfile->buffer->rlimit = saved_rlimit;
684 			  pfile->buffer->line_base = saved_line_base;
685 			  pfile->buffer->need_line = saved_need_line;
686 			  pfile->overlaid_buffer = saved_overlaid_buffer;
687 			  pfile->out.cur = pfile->out.base + fmacro.offset;
688 			  CUR (context) = cur;
689 			  RLIMIT (context) = ctx_rlimit;
690 			  len = ustrlen (text);
691 			  buf = _cpp_unaligned_alloc (pfile, len + 1);
692 			  memcpy (buf, text, len);
693 			  buf[len] = '\n';
694 			  text = buf;
695 			  _cpp_push_text_context (pfile, fmacro.node,
696 						  text, len);
697 			  goto new_context;
698 			}
699 		      break;
700 		    }
701 
702 		  cpp_macro *m = fmacro.node->value.macro;
703 
704 		  m->used = 1;
705 		  lex_state = ls_none;
706 		  save_argument (&fmacro, out - pfile->out.base);
707 
708 		  /* A single zero-length argument is no argument.  */
709 		  if (fmacro.argc == 1
710 		      && m->paramc == 0
711 		      && out == pfile->out.base + fmacro.offset + 1)
712 		    fmacro.argc = 0;
713 
714 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
715 		    {
716 		      /* Remove the macro's invocation from the
717 			 output, and push its replacement text.  */
718 		      pfile->out.cur = pfile->out.base + fmacro.offset;
719 		      CUR (context) = cur;
720 		      replace_args_and_push (pfile, &fmacro);
721 		      goto new_context;
722 		    }
723 		}
724 	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
725 		lex_state = ls_none;
726 	    }
727 	  break;
728 
729 	case '#':
730 	  if (cur - 1 == start_of_input_line
731 	      /* A '#' from a macro doesn't start a directive.  */
732 	      && !pfile->context->prev
733 	      && !pfile->state.in_directive)
734 	    {
735 	      /* A directive.  With the way _cpp_handle_directive
736 		 currently works, we only want to call it if either we
737 		 know the directive is OK, or we want it to fail and
738 		 be removed from the output.  If we want it to be
739 		 passed through (the assembler case) then we must not
740 		 call _cpp_handle_directive.  */
741 	      pfile->out.cur = out;
742 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
743 	      out = pfile->out.cur;
744 
745 	      if (*cur == '\n')
746 		{
747 		  /* Null directive.  Ignore it and don't invalidate
748 		     the MI optimization.  */
749 		  pfile->buffer->need_line = true;
750 		  CPP_INCREMENT_LINE (pfile, 0);
751 		  result = false;
752 		  goto done;
753 		}
754 	      else
755 		{
756 		  bool do_it = false;
757 
758 		  if (is_numstart (*cur)
759 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
760 		    do_it = true;
761 		  else if (is_idstart (*cur))
762 		    /* Check whether we know this directive, but don't
763 		       advance.  */
764 		    do_it = lex_identifier (pfile, cur)->is_directive;
765 
766 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
767 		    {
768 		      /* This is a kludge.  We want to have the ISO
769 			 preprocessor lex the next token.  */
770 		      pfile->buffer->cur = cur;
771 		      _cpp_handle_directive (pfile, false /* indented */);
772 		      result = false;
773 		      goto done;
774 		    }
775 		}
776 	    }
777 
778 	  if (pfile->state.in_expression)
779 	    {
780 	      lex_state = ls_hash;
781 	      continue;
782 	    }
783 	  break;
784 
785 	default:
786 	  break;
787 	}
788 
789       /* Non-whitespace disables MI optimization and stops treating
790 	 '<' as a quote in #include.  */
791       header_ok = false;
792       if (!pfile->state.in_directive)
793 	pfile->mi_valid = false;
794 
795       if (lex_state == ls_none)
796 	continue;
797 
798       /* Some of these transitions of state are syntax errors.  The
799 	 ISO preprocessor will issue errors later.  */
800       if (lex_state == ls_fun_open)
801 	/* Missing '('.  */
802 	lex_state = ls_none;
803       else if (lex_state == ls_hash
804 	       || lex_state == ls_predicate
805 	       || lex_state == ls_defined)
806 	lex_state = ls_none;
807 
808       /* ls_answer and ls_defined_close keep going until ')'.  */
809     }
810 
811  done:
812   if (fmacro.buff)
813     _cpp_release_buff (pfile, fmacro.buff);
814 
815   if (lex_state == ls_fun_close)
816     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
817 			 "unterminated argument list invoking macro \"%s\"",
818 			 NODE_NAME (fmacro.node));
819   return result;
820 }
821 
822 /* Push a context holding the replacement text of the macro NODE on
823    the context stack.  NODE is either object-like, or a function-like
824    macro with no arguments.  */
825 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)826 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
827 {
828   size_t len;
829   const uchar *text;
830   uchar *buf;
831 
832   if (cpp_builtin_macro_p (node))
833     {
834       text = _cpp_builtin_macro_text (pfile, node);
835       len = ustrlen (text);
836       buf = _cpp_unaligned_alloc (pfile, len + 1);
837       memcpy (buf, text, len);
838       buf[len] = '\n';
839       text = buf;
840     }
841   else
842     {
843       cpp_macro *macro = node->value.macro;
844       macro->used = 1;
845       text = macro->exp.text;
846       len = macro->count;
847     }
848 
849   _cpp_push_text_context (pfile, node, text, len);
850 }
851 
852 /* Returns TRUE if traditional macro recursion is detected.  */
853 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)854 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
855 {
856   bool recursing = !!(node->flags & NODE_DISABLED);
857 
858   /* Object-like macros that are already expanding are necessarily
859      recursive.
860 
861      However, it is possible to have traditional function-like macros
862      that are not infinitely recursive but recurse to any given depth.
863      Further, it is easy to construct examples that get ever longer
864      until the point they stop recursing.  So there is no easy way to
865      detect true recursion; instead we assume any expansion more than
866      20 deep since the first invocation of this macro must be
867      recursing.  */
868   if (recursing && fun_like_macro (node))
869     {
870       size_t depth = 0;
871       cpp_context *context = pfile->context;
872 
873       do
874 	{
875 	  depth++;
876 	  if (context->c.macro == node && depth > 20)
877 	    break;
878 	  context = context->prev;
879 	}
880       while (context);
881       recursing = context != NULL;
882     }
883 
884   if (recursing)
885     cpp_error (pfile, CPP_DL_ERROR,
886 	       "detected recursion whilst expanding macro \"%s\"",
887 	       NODE_NAME (node));
888 
889   return recursing;
890 }
891 
892 /* Return the length of the replacement text of a function-like or
893    object-like non-builtin macro.  */
894 size_t
_cpp_replacement_text_len(const cpp_macro * macro)895 _cpp_replacement_text_len (const cpp_macro *macro)
896 {
897   size_t len;
898 
899   if (macro->fun_like && (macro->paramc != 0))
900     {
901       const uchar *exp;
902 
903       len = 0;
904       for (exp = macro->exp.text;;)
905 	{
906 	  struct block *b = (struct block *) exp;
907 
908 	  len += b->text_len;
909 	  if (b->arg_index == 0)
910 	    break;
911 	  len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
912 	  exp += BLOCK_LEN (b->text_len);
913 	}
914     }
915   else
916     len = macro->count;
917 
918   return len;
919 }
920 
921 /* Copy the replacement text of MACRO to DEST, which must be of
922    sufficient size.  It is not NUL-terminated.  The next character is
923    returned.  */
924 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)925 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
926 {
927   if (macro->fun_like && (macro->paramc != 0))
928     {
929       const uchar *exp;
930 
931       for (exp = macro->exp.text;;)
932 	{
933 	  struct block *b = (struct block *) exp;
934 	  cpp_hashnode *param;
935 
936 	  memcpy (dest, b->text, b->text_len);
937 	  dest += b->text_len;
938 	  if (b->arg_index == 0)
939 	    break;
940 	  param = macro->parm.params[b->arg_index - 1];
941 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
942 	  dest += NODE_LEN (param);
943 	  exp += BLOCK_LEN (b->text_len);
944 	}
945     }
946   else
947     {
948       memcpy (dest, macro->exp.text, macro->count);
949       dest += macro->count;
950     }
951 
952   return dest;
953 }
954 
955 /* Push a context holding the replacement text of the macro NODE on
956    the context stack.  NODE is either object-like, or a function-like
957    macro with no arguments.  */
958 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)959 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
960 {
961   cpp_macro *macro = fmacro->node->value.macro;
962 
963   if (macro->paramc == 0)
964     push_replacement_text (pfile, fmacro->node);
965   else
966     {
967       const uchar *exp;
968       uchar *p;
969       _cpp_buff *buff;
970       size_t len = 0;
971       int cxtquote = 0;
972 
973       /* Get an estimate of the length of the argument-replaced text.
974 	 This is a worst case estimate, assuming that every replacement
975 	 text character needs quoting.  */
976       for (exp = macro->exp.text;;)
977 	{
978 	  struct block *b = (struct block *) exp;
979 
980 	  len += b->text_len;
981 	  if (b->arg_index == 0)
982 	    break;
983 	  len += 2 * (fmacro->args[b->arg_index]
984 		      - fmacro->args[b->arg_index - 1] - 1);
985 	  exp += BLOCK_LEN (b->text_len);
986 	}
987 
988       /* Allocate room for the expansion plus \n.  */
989       buff = _cpp_get_buff (pfile, len + 1);
990 
991       /* Copy the expansion and replace arguments.  */
992       /* Accumulate actual length, including quoting as necessary */
993       p = BUFF_FRONT (buff);
994       len = 0;
995       for (exp = macro->exp.text;;)
996 	{
997 	  struct block *b = (struct block *) exp;
998 	  size_t arglen;
999 	  int argquote;
1000 	  uchar *base;
1001 	  uchar *in;
1002 
1003 	  len += b->text_len;
1004 	  /* Copy the non-argument text literally, keeping
1005 	     track of whether matching quotes have been seen. */
1006 	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1007 	    {
1008 	      if (*in == '"')
1009 		cxtquote = ! cxtquote;
1010 	      *p++ = *in++;
1011 	    }
1012 	  /* Done if no more arguments */
1013 	  if (b->arg_index == 0)
1014 	    break;
1015 	  arglen = (fmacro->args[b->arg_index]
1016 		    - fmacro->args[b->arg_index - 1] - 1);
1017 	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
1018 	  in = base;
1019 #if 0
1020 	  /* Skip leading whitespace in the text for the argument to
1021 	     be substituted. To be compatible with gcc 2.95, we would
1022 	     also need to trim trailing whitespace. Gcc 2.95 trims
1023 	     leading and trailing whitespace, which may be a bug.  The
1024 	     current gcc testsuite explicitly checks that this leading
1025 	     and trailing whitespace in actual arguments is
1026 	     preserved. */
1027 	  while (arglen > 0 && is_space (*in))
1028 	    {
1029 	      in++;
1030 	      arglen--;
1031 	    }
1032 #endif
1033 	  for (argquote = 0; arglen > 0; arglen--)
1034 	    {
1035 	      if (cxtquote && *in == '"')
1036 		{
1037 		  if (in > base && *(in-1) != '\\')
1038 		    argquote = ! argquote;
1039 		  /* Always add backslash before double quote if argument
1040 		     is expanded in a quoted context */
1041 		  *p++ = '\\';
1042 		  len++;
1043 		}
1044 	      else if (cxtquote && argquote && *in == '\\')
1045 		{
1046 		  /* Always add backslash before a backslash in an argument
1047 		     that is expanded in a quoted context and also in the
1048 		     range of a quoted context in the argument itself. */
1049 		  *p++ = '\\';
1050 		  len++;
1051 		}
1052 	      *p++ = *in++;
1053 	      len++;
1054 	    }
1055 	  exp += BLOCK_LEN (b->text_len);
1056 	}
1057 
1058       /* \n-terminate.  */
1059       *p = '\n';
1060       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1061 
1062       /* So we free buffer allocation when macro is left.  */
1063       pfile->context->buff = buff;
1064     }
1065 }
1066 
1067 /* Read and record the parameters, if any, of a function-like macro
1068    definition.  Destroys pfile->out.cur.
1069 
1070    Returns true on success, false on failure (syntax error or a
1071    duplicate parameter).  On success, CUR (pfile->context) is just
1072    past the closing parenthesis.  */
1073 static bool
scan_parameters(cpp_reader * pfile,unsigned * n_ptr)1074 scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1075 {
1076   const uchar *cur = CUR (pfile->context) + 1;
1077   bool ok;
1078 
1079   unsigned nparms = 0;
1080   for (;;)
1081     {
1082       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1083 
1084       if (is_idstart (*cur))
1085 	{
1086 	  struct cpp_hashnode *id = lex_identifier (pfile, cur);
1087 	  ok = false;
1088 	  if (!_cpp_save_parameter (pfile, nparms, id, id))
1089 	    break;
1090 	  nparms++;
1091 	  cur = skip_whitespace (pfile, CUR (pfile->context),
1092 				 true /* skip_comments */);
1093 	  if (*cur == ',')
1094 	    {
1095 	      cur++;
1096 	      continue;
1097 	    }
1098 	  ok = (*cur == ')');
1099 	  break;
1100 	}
1101 
1102       ok = (*cur == ')' && !nparms);
1103       break;
1104     }
1105 
1106   *n_ptr = nparms;
1107 
1108   if (!ok)
1109     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1110 
1111   CUR (pfile->context) = cur + (*cur == ')');
1112 
1113   return ok;
1114 }
1115 
1116 /* Save the text from pfile->out.base to pfile->out.cur as
1117    the replacement text for the current macro, followed by argument
1118    ARG_INDEX, with zero indicating the end of the replacement
1119    text.  */
1120 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)1121 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1122 		       unsigned int arg_index)
1123 {
1124   size_t len = pfile->out.cur - pfile->out.base;
1125   uchar *exp;
1126 
1127   if (macro->paramc == 0)
1128     {
1129       /* Object-like and function-like macros without parameters
1130 	 simply store their \n-terminated replacement text.  */
1131       exp = _cpp_unaligned_alloc (pfile, len + 1);
1132       memcpy (exp, pfile->out.base, len);
1133       exp[len] = '\n';
1134       macro->exp.text = exp;
1135       macro->count = len;
1136     }
1137   else
1138     {
1139       /* Store the text's length (unsigned int), the argument index
1140 	 (unsigned short, base 1) and then the text.  */
1141       size_t blen = BLOCK_LEN (len);
1142       struct block *block;
1143 
1144       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1145 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1146 
1147       exp = BUFF_FRONT (pfile->a_buff);
1148       block = (struct block *) (exp + macro->count);
1149       macro->exp.text = exp;
1150 
1151       /* Write out the block information.  */
1152       block->text_len = len;
1153       block->arg_index = arg_index;
1154       memcpy (block->text, pfile->out.base, len);
1155 
1156       /* Lex the rest into the start of the output buffer.  */
1157       pfile->out.cur = pfile->out.base;
1158 
1159       macro->count += blen;
1160 
1161       /* If we've finished, commit the memory.  */
1162       if (arg_index == 0)
1163 	BUFF_FRONT (pfile->a_buff) += macro->count;
1164     }
1165 }
1166 
1167 /* Analyze and save the replacement text of a macro.  Returns true on
1168    success.  */
1169 cpp_macro *
_cpp_create_trad_definition(cpp_reader * pfile)1170 _cpp_create_trad_definition (cpp_reader *pfile)
1171 {
1172   const uchar *cur;
1173   uchar *limit;
1174   cpp_context *context = pfile->context;
1175   unsigned nparms = 0;
1176   int fun_like = 0;
1177   cpp_hashnode **params = NULL;
1178 
1179   /* The context has not been set up for command line defines, and CUR
1180      has not been updated for the macro name for in-file defines.  */
1181   pfile->out.cur = pfile->out.base;
1182   CUR (context) = pfile->buffer->cur;
1183   RLIMIT (context) = pfile->buffer->rlimit;
1184   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1185 
1186   /* Is this a function-like macro?  */
1187   if (* CUR (context) == '(')
1188     {
1189       fun_like = +1;
1190       if (scan_parameters (pfile, &nparms))
1191 	params = (cpp_hashnode **)_cpp_commit_buff
1192 	  (pfile, sizeof (cpp_hashnode *) * nparms);
1193       else
1194 	fun_like = -1;
1195     }
1196 
1197   cpp_macro *macro = NULL;
1198 
1199   if (fun_like >= 0)
1200     {
1201       macro = _cpp_new_macro (pfile, cmk_traditional,
1202 			      _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1203       macro->parm.params = params;
1204       macro->paramc = nparms;
1205       macro->fun_like = fun_like != 0;
1206     }
1207 
1208   /* Skip leading whitespace in the replacement text.  */
1209   pfile->buffer->cur
1210     = skip_whitespace (pfile, CUR (context),
1211 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1212 
1213   pfile->state.prevent_expansion++;
1214   _cpp_scan_out_logical_line (pfile, macro, false);
1215   pfile->state.prevent_expansion--;
1216 
1217   _cpp_unsave_parameters (pfile, nparms);
1218 
1219   if (macro)
1220     {
1221       /* Skip trailing white space.  */
1222       cur = pfile->out.base;
1223       limit = pfile->out.cur;
1224       while (limit > cur && is_space (limit[-1]))
1225 	limit--;
1226       pfile->out.cur = limit;
1227       save_replacement_text (pfile, macro, 0);
1228     }
1229 
1230   return macro;
1231 }
1232 
1233 /* Copy SRC of length LEN to DEST, but convert all contiguous
1234    whitespace to a single space, provided it is not in quotes.  The
1235    quote currently in effect is pointed to by PQUOTE, and is updated
1236    by the function.  Returns the number of bytes copied.  */
1237 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1238 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1239 {
1240   uchar *orig_dest = dest;
1241   uchar quote = *pquote;
1242 
1243   while (len)
1244     {
1245       if (is_space (*src) && !quote)
1246 	{
1247 	  do
1248 	    src++, len--;
1249 	  while (len && is_space (*src));
1250 	  *dest++ = ' ';
1251 	}
1252       else
1253 	{
1254 	  if (*src == '\'' || *src == '"')
1255 	    {
1256 	      if (!quote)
1257 		quote = *src;
1258 	      else if (quote == *src)
1259 		quote = 0;
1260 	    }
1261 	  *dest++ = *src++, len--;
1262 	}
1263     }
1264 
1265   *pquote = quote;
1266   return dest - orig_dest;
1267 }
1268 
1269 /* Returns true if MACRO1 and MACRO2 have expansions different other
1270    than in the form of their whitespace.  */
1271 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1272 _cpp_expansions_different_trad (const cpp_macro *macro1,
1273 				const cpp_macro *macro2)
1274 {
1275   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1276   uchar *p2 = p1 + macro1->count;
1277   uchar quote1 = 0, quote2 = 0;
1278   bool mismatch;
1279   size_t len1, len2;
1280 
1281   if (macro1->paramc > 0)
1282     {
1283       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1284 
1285       mismatch = true;
1286       for (;;)
1287 	{
1288 	  struct block *b1 = (struct block *) exp1;
1289 	  struct block *b2 = (struct block *) exp2;
1290 
1291 	  if (b1->arg_index != b2->arg_index)
1292 	    break;
1293 
1294 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1295 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1296 	  if (len1 != len2 || memcmp (p1, p2, len1))
1297 	    break;
1298 	  if (b1->arg_index == 0)
1299 	    {
1300 	      mismatch = false;
1301 	      break;
1302 	    }
1303 	  exp1 += BLOCK_LEN (b1->text_len);
1304 	  exp2 += BLOCK_LEN (b2->text_len);
1305 	}
1306     }
1307   else
1308     {
1309       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1310       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1311       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1312     }
1313 
1314   free (p1);
1315   return mismatch;
1316 }
1317