1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   source_location line;
64 
65   /* Number of parameters.  */
66   unsigned int paramc;
67 
68   /* Zero-based index of argument being currently lexed.  */
69   unsigned int argc;
70 };
71 
72 /* Lexing state.  It is mostly used to prevent macro expansion.  */
73 enum ls {ls_none = 0,		/* Normal state.  */
74 	 ls_fun_open,		/* When looking for '('.  */
75 	 ls_fun_close,		/* When looking for ')'.  */
76 	 ls_defined,		/* After defined.  */
77 	 ls_defined_close,	/* Looking for ')' of defined().  */
78 	 ls_hash,		/* After # in preprocessor conditional.  */
79 	 ls_predicate,		/* After the predicate, maybe paren?  */
80 	 ls_answer,		/* In answer to predicate.  */
81 	 ls_has_include,	/* After __has_include__.  */
82 	 ls_has_include_close};	/* Looking for ')' of __has_include__.  */
83 
84 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
85    from recognizing comments and directives during its lexing pass.  */
86 
87 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
88 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
89 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
90 static void check_output_buffer (cpp_reader *, size_t);
91 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
92 static bool scan_parameters (cpp_reader *, cpp_macro *);
93 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
94 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
95 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
96 				 struct fun_macro *);
97 static void save_argument (struct fun_macro *, size_t);
98 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
99 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
100 
101 /* Ensures we have N bytes' space in the output buffer, and
102    reallocates it if not.  */
103 static void
check_output_buffer(cpp_reader * pfile,size_t n)104 check_output_buffer (cpp_reader *pfile, size_t n)
105 {
106   /* We might need two bytes to terminate an unterminated comment, and
107      one more to terminate the line with a NUL.  */
108   n += 2 + 1;
109 
110   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
111     {
112       size_t size = pfile->out.cur - pfile->out.base;
113       size_t new_size = (size + n) * 3 / 2;
114 
115       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
116       pfile->out.limit = pfile->out.base + new_size;
117       pfile->out.cur = pfile->out.base + size;
118     }
119 }
120 
121 /* Skip a C-style block comment in a macro as a result of -CC.
122    PFILE->buffer->cur points to the initial asterisk of the comment,
123    change it to point to after the '*' and '/' characters that terminate it.
124    Return true if the macro has not been termined, in that case set
125    PFILE->buffer->cur to the end of the buffer.  */
126 static bool
skip_macro_block_comment(cpp_reader * pfile)127 skip_macro_block_comment (cpp_reader *pfile)
128 {
129   const uchar *cur = pfile->buffer->cur;
130 
131   cur++;
132   if (*cur == '/')
133     cur++;
134 
135   /* People like decorating comments with '*', so check for '/'
136      instead for efficiency.  */
137   while (! (*cur++ == '/' && cur[-2] == '*'))
138     if (cur[-1] == '\n')
139       {
140 	pfile->buffer->cur = cur - 1;
141 	return true;
142       }
143 
144   pfile->buffer->cur = cur;
145   return false;
146 }
147 
148 /* CUR points to the asterisk introducing a comment in the current
149    context.  IN_DEFINE is true if we are in the replacement text of a
150    macro.
151 
152    The asterisk and following comment is copied to the buffer pointed
153    to by pfile->out.cur, which must be of sufficient size.
154    Unterminated comments are diagnosed, and correctly terminated in
155    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
156    -C, -CC and pfile->state.in_directive.
157 
158    Returns a pointer to the first character after the comment in the
159    input buffer.  */
160 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)161 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
162 {
163   bool unterminated, copy = false;
164   source_location src_loc = pfile->line_table->highest_line;
165   cpp_buffer *buffer = pfile->buffer;
166 
167   buffer->cur = cur;
168   if (pfile->context->prev)
169     unterminated = skip_macro_block_comment (pfile);
170   else
171     unterminated = _cpp_skip_block_comment (pfile);
172 
173   if (unterminated)
174     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
175 			 "unterminated comment");
176 
177   /* Comments in directives become spaces so that tokens are properly
178      separated when the ISO preprocessor re-lexes the line.  The
179      exception is #define.  */
180   if (pfile->state.in_directive)
181     {
182       if (in_define)
183 	{
184 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
185 	    pfile->out.cur--;
186 	  else
187 	    copy = true;
188 	}
189       else
190 	pfile->out.cur[-1] = ' ';
191     }
192   else if (CPP_OPTION (pfile, discard_comments))
193     pfile->out.cur--;
194   else
195     copy = true;
196 
197   if (copy)
198     {
199       size_t len = (size_t) (buffer->cur - cur);
200       memcpy (pfile->out.cur, cur, len);
201       pfile->out.cur += len;
202       if (unterminated)
203 	{
204 	  *pfile->out.cur++ = '*';
205 	  *pfile->out.cur++ = '/';
206 	}
207     }
208 
209   return buffer->cur;
210 }
211 
212 /* CUR points to any character in the input buffer.  Skips over all
213    contiguous horizontal white space and NULs, including comments if
214    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
215    character or the end of the current context.  Escaped newlines are
216    removed.
217 
218    The whitespace is copied verbatim to the output buffer, except that
219    comments are handled as described in copy_comment().
220    pfile->out.cur is updated.
221 
222    Returns a pointer to the first character after the whitespace in
223    the input buffer.  */
224 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)225 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
226 {
227   uchar *out = pfile->out.cur;
228 
229   for (;;)
230     {
231       unsigned int c = *cur++;
232       *out++ = c;
233 
234       if (is_nvspace (c))
235 	continue;
236 
237       if (c == '/' && *cur == '*' && skip_comments)
238 	{
239 	  pfile->out.cur = out;
240 	  cur = copy_comment (pfile, cur, false /* in_define */);
241 	  out = pfile->out.cur;
242 	  continue;
243 	}
244 
245       out--;
246       break;
247     }
248 
249   pfile->out.cur = out;
250   return cur - 1;
251 }
252 
253 /* Lexes and outputs an identifier starting at CUR, which is assumed
254    to point to a valid first character of an identifier.  Returns
255    the hashnode, and updates out.cur.  */
256 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)257 lex_identifier (cpp_reader *pfile, const uchar *cur)
258 {
259   size_t len;
260   uchar *out = pfile->out.cur;
261   cpp_hashnode *result;
262 
263   do
264     *out++ = *cur++;
265   while (is_numchar (*cur));
266 
267   CUR (pfile->context) = cur;
268   len = out - pfile->out.cur;
269   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
270 				    len, HT_ALLOC));
271   pfile->out.cur = out;
272   return result;
273 }
274 
275 /* Overlays the true file buffer temporarily with text of length LEN
276    starting at START.  The true buffer is restored upon calling
277    restore_buff().  */
278 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)279 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
280 {
281   cpp_buffer *buffer = pfile->buffer;
282 
283   pfile->overlaid_buffer = buffer;
284   pfile->saved_cur = buffer->cur;
285   pfile->saved_rlimit = buffer->rlimit;
286   pfile->saved_line_base = buffer->next_line;
287   buffer->need_line = false;
288 
289   buffer->cur = start;
290   buffer->line_base = start;
291   buffer->rlimit = start + len;
292 }
293 
294 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
295 void
_cpp_remove_overlay(cpp_reader * pfile)296 _cpp_remove_overlay (cpp_reader *pfile)
297 {
298   cpp_buffer *buffer = pfile->overlaid_buffer;
299 
300   buffer->cur = pfile->saved_cur;
301   buffer->rlimit = pfile->saved_rlimit;
302   buffer->line_base = pfile->saved_line_base;
303   buffer->need_line = true;
304 
305   pfile->overlaid_buffer = NULL;
306 }
307 
308 /* Reads a logical line into the output buffer.  Returns TRUE if there
309    is more text left in the buffer.  */
310 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)311 _cpp_read_logical_line_trad (cpp_reader *pfile)
312 {
313   do
314     {
315       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
316 	return false;
317     }
318   while (!_cpp_scan_out_logical_line (pfile, NULL, false)
319 	 || pfile->state.skipping);
320 
321   return pfile->buffer != NULL;
322 }
323 
324 /* Return true if NODE is a fun_like macro.  */
325 static inline bool
fun_like_macro(cpp_hashnode * node)326 fun_like_macro (cpp_hashnode *node)
327 {
328   if (node->flags & NODE_BUILTIN)
329     return node->value.builtin == BT_HAS_ATTRIBUTE;
330   else
331     return node->value.macro->fun_like;
332 }
333 
334 /* Set up state for finding the opening '(' of a function-like
335    macro.  */
336 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)337 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
338 		     struct fun_macro *macro)
339 {
340   unsigned int n;
341   if (node->flags & NODE_BUILTIN)
342     n = 1;
343   else
344     n = node->value.macro->paramc;
345 
346   if (macro->buff)
347     _cpp_release_buff (pfile, macro->buff);
348   macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
349   macro->args = (size_t *) BUFF_FRONT (macro->buff);
350   macro->node = node;
351   macro->offset = start - pfile->out.base;
352   macro->paramc = n;
353   macro->argc = 0;
354 }
355 
356 /* Save the OFFSET of the start of the next argument to MACRO.  */
357 static void
save_argument(struct fun_macro * macro,size_t offset)358 save_argument (struct fun_macro *macro, size_t offset)
359 {
360   macro->argc++;
361   if (macro->argc <= macro->paramc)
362     macro->args[macro->argc] = offset;
363 }
364 
365 /* Copies the next logical line in the current buffer (starting at
366    buffer->cur) to the output buffer.  The output is guaranteed to
367    terminate with a NUL character.  buffer->cur is updated.
368 
369    If MACRO is non-NULL, then we are scanning the replacement list of
370    MACRO, and we call save_replacement_text() every time we meet an
371    argument.
372 
373    If BUILTIN_MACRO_ARG is true, this is called to macro expand
374    arguments of builtin function-like macros.  */
375 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro,bool builtin_macro_arg)376 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
377 			    bool builtin_macro_arg)
378 {
379   bool result = true;
380   cpp_context *context;
381   const uchar *cur;
382   uchar *out;
383   struct fun_macro fmacro;
384   unsigned int c, paren_depth = 0, quote;
385   enum ls lex_state = ls_none;
386   bool header_ok;
387   const uchar *start_of_input_line;
388 
389   fmacro.buff = NULL;
390   fmacro.args = NULL;
391   fmacro.node = NULL;
392   fmacro.offset = 0;
393   fmacro.line = 0;
394   fmacro.paramc = 0;
395   fmacro.argc = 0;
396 
397   quote = 0;
398   header_ok = pfile->state.angled_headers;
399   CUR (pfile->context) = pfile->buffer->cur;
400   RLIMIT (pfile->context) = pfile->buffer->rlimit;
401   if (!builtin_macro_arg)
402     {
403       pfile->out.cur = pfile->out.base;
404       pfile->out.first_line = pfile->line_table->highest_line;
405     }
406   /* start_of_input_line is needed to make sure that directives really,
407      really start at the first character of the line.  */
408   start_of_input_line = pfile->buffer->cur;
409  new_context:
410   context = pfile->context;
411   cur = CUR (context);
412   check_output_buffer (pfile, RLIMIT (context) - cur);
413   out = pfile->out.cur;
414 
415   for (;;)
416     {
417       if (!context->prev
418 	  && !builtin_macro_arg
419 	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
420 	{
421 	  pfile->buffer->cur = cur;
422 	  _cpp_process_line_notes (pfile, false);
423 	}
424       c = *cur++;
425       *out++ = c;
426 
427       /* Whitespace should "continue" out of the switch,
428 	 non-whitespace should "break" out of it.  */
429       switch (c)
430 	{
431 	case ' ':
432 	case '\t':
433 	case '\f':
434 	case '\v':
435 	case '\0':
436 	  continue;
437 
438 	case '\n':
439 	  /* If this is a macro's expansion, pop it.  */
440 	  if (context->prev)
441 	    {
442 	      pfile->out.cur = out - 1;
443 	      _cpp_pop_context (pfile);
444 	      goto new_context;
445 	    }
446 
447 	  /* Omit the newline from the output buffer.  */
448 	  pfile->out.cur = out - 1;
449 	  pfile->buffer->cur = cur;
450 	  if (builtin_macro_arg)
451 	    goto done;
452 	  pfile->buffer->need_line = true;
453 	  CPP_INCREMENT_LINE (pfile, 0);
454 
455 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
456 	      && !pfile->state.in_directive
457 	      && _cpp_get_fresh_line (pfile))
458 	    {
459 	      /* Newlines in arguments become a space, but we don't
460 		 clear any in-progress quote.  */
461 	      if (lex_state == ls_fun_close)
462 		out[-1] = ' ';
463 	      cur = pfile->buffer->cur;
464 	      continue;
465 	    }
466 	  goto done;
467 
468 	case '<':
469 	  if (header_ok)
470 	    quote = '>';
471 	  break;
472 	case '>':
473 	  if (c == quote)
474 	    quote = 0;
475 	  break;
476 
477 	case '"':
478 	case '\'':
479 	  if (c == quote)
480 	    quote = 0;
481 	  else if (!quote)
482 	    quote = c;
483 	  break;
484 
485 	case '\\':
486 	  /* Skip escaped quotes here, it's easier than above.  */
487 	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
488 	    *out++ = *cur++;
489 	  break;
490 
491 	case '/':
492 	  /* Traditional CPP does not recognize comments within
493 	     literals.  */
494 	  if (!quote && *cur == '*')
495 	    {
496 	      pfile->out.cur = out;
497 	      cur = copy_comment (pfile, cur, macro != 0);
498 	      out = pfile->out.cur;
499 	      continue;
500 	    }
501 	  break;
502 
503 	case '_':
504 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
505 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
506 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
507 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
508 	case 'y': case 'z':
509 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
510 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
511 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
512 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
513 	case 'Y': case 'Z':
514 	  if (!pfile->state.skipping && (quote == 0 || macro))
515 	    {
516 	      cpp_hashnode *node;
517 	      uchar *out_start = out - 1;
518 
519 	      pfile->out.cur = out_start;
520 	      node = lex_identifier (pfile, cur - 1);
521 	      out = pfile->out.cur;
522 	      cur = CUR (context);
523 
524 	      if (node->type == NT_MACRO
525 		  /* Should we expand for ls_answer?  */
526 		  && (lex_state == ls_none || lex_state == ls_fun_open)
527 		  && !pfile->state.prevent_expansion)
528 		{
529 		  /* Macros invalidate MI optimization.  */
530 		  pfile->mi_valid = false;
531 		  if (fun_like_macro (node))
532 		    {
533 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
534 		      lex_state = ls_fun_open;
535 		      fmacro.line = pfile->line_table->highest_line;
536 		      continue;
537 		    }
538 		  else if (!recursive_macro (pfile, node))
539 		    {
540 		      /* Remove the object-like macro's name from the
541 			 output, and push its replacement text.  */
542 		      pfile->out.cur = out_start;
543 		      push_replacement_text (pfile, node);
544 		      lex_state = ls_none;
545 		      goto new_context;
546 		    }
547 		}
548 	      else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
549 		{
550 		  /* Found a parameter in the replacement text of a
551 		     #define.  Remove its name from the output.  */
552 		  pfile->out.cur = out_start;
553 		  save_replacement_text (pfile, macro, node->value.arg_index);
554 		  out = pfile->out.base;
555 		}
556 	      else if (lex_state == ls_hash)
557 		{
558 		  lex_state = ls_predicate;
559 		  continue;
560 		}
561 	      else if (pfile->state.in_expression
562 		       && node == pfile->spec_nodes.n_defined)
563 		{
564 		  lex_state = ls_defined;
565 		  continue;
566 		}
567 	      else if (pfile->state.in_expression
568 		       && (node == pfile->spec_nodes.n__has_include__
569 			|| node == pfile->spec_nodes.n__has_include_next__))
570 		{
571 		  lex_state = ls_has_include;
572 		  continue;
573 		}
574 	    }
575 	  break;
576 
577 	case '(':
578 	  if (quote == 0)
579 	    {
580 	      paren_depth++;
581 	      if (lex_state == ls_fun_open)
582 		{
583 		  if (recursive_macro (pfile, fmacro.node))
584 		    lex_state = ls_none;
585 		  else
586 		    {
587 		      lex_state = ls_fun_close;
588 		      paren_depth = 1;
589 		      out = pfile->out.base + fmacro.offset;
590 		      fmacro.args[0] = fmacro.offset;
591 		    }
592 		}
593 	      else if (lex_state == ls_predicate)
594 		lex_state = ls_answer;
595 	      else if (lex_state == ls_defined)
596 		lex_state = ls_defined_close;
597 	      else if (lex_state == ls_has_include)
598 		lex_state = ls_has_include_close;
599 	    }
600 	  break;
601 
602 	case ',':
603 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
604 	    save_argument (&fmacro, out - pfile->out.base);
605 	  break;
606 
607 	case ')':
608 	  if (quote == 0)
609 	    {
610 	      paren_depth--;
611 	      if (lex_state == ls_fun_close && paren_depth == 0)
612 		{
613 		  if (fmacro.node->flags & NODE_BUILTIN)
614 		    {
615 		      /* Handle builtin function-like macros like
616 			 __has_attribute.  The already parsed arguments
617 			 are put into a buffer, which is then preprocessed
618 			 and the result is fed to _cpp_push_text_context
619 			 with disabled expansion, where the ISO preprocessor
620 			 parses it.  While in traditional preprocessing
621 			 macro arguments aren't immediately expanded, they in
622 			 the end are because the macro with replaced arguments
623 			 is preprocessed again.  For the builtin function-like
624 			 macros we need the argument immediately though,
625 			 if we don't preprocess them, they would behave
626 			 very differently from ISO preprocessor handling
627 			 of those builtin macros.  So, this handling is
628 			 more similar to traditional preprocessing of
629 			 #if directives, where we also keep preprocessing
630 			 until everything is expanded, and then feed the
631 			 result with disabled expansion to ISO preprocessor
632 			 for handling the directives.  */
633 		      lex_state = ls_none;
634 		      save_argument (&fmacro, out - pfile->out.base);
635 		      cpp_macro m;
636 		      memset (&m, '\0', sizeof (m));
637 		      m.paramc = fmacro.paramc;
638 		      if (_cpp_arguments_ok (pfile, &m, fmacro.node,
639 					     fmacro.argc))
640 			{
641 			  size_t len = fmacro.args[1] - fmacro.args[0];
642 			  uchar *buf;
643 
644 			  /* Remove the macro's invocation from the
645 			     output, and push its replacement text.  */
646 			  pfile->out.cur = pfile->out.base + fmacro.offset;
647 			  CUR (context) = cur;
648 			  buf = _cpp_unaligned_alloc (pfile, len + 2);
649 			  buf[0] = '(';
650 			  memcpy (buf + 1, pfile->out.base + fmacro.args[0],
651 				  len);
652 			  buf[len + 1] = '\n';
653 
654 			  const unsigned char *ctx_rlimit = RLIMIT (context);
655 			  const unsigned char *saved_cur = pfile->buffer->cur;
656 			  const unsigned char *saved_rlimit
657 			    = pfile->buffer->rlimit;
658 			  const unsigned char *saved_line_base
659 			    = pfile->buffer->line_base;
660 			  bool saved_need_line = pfile->buffer->need_line;
661 			  cpp_buffer *saved_overlaid_buffer
662 			    = pfile->overlaid_buffer;
663 			  pfile->buffer->cur = buf;
664 			  pfile->buffer->line_base = buf;
665 			  pfile->buffer->rlimit = buf + len + 1;
666 			  pfile->buffer->need_line = false;
667 			  pfile->overlaid_buffer = pfile->buffer;
668 			  bool saved_in_directive = pfile->state.in_directive;
669 			  pfile->state.in_directive = true;
670 			  cpp_context *saved_prev_context = context->prev;
671 			  context->prev = NULL;
672 
673 			  _cpp_scan_out_logical_line (pfile, NULL, true);
674 
675 			  pfile->state.in_directive = saved_in_directive;
676 			  check_output_buffer (pfile, 1);
677 			  *pfile->out.cur = '\n';
678 			  pfile->buffer->cur = pfile->out.base + fmacro.offset;
679 			  pfile->buffer->line_base = pfile->buffer->cur;
680 			  pfile->buffer->rlimit = pfile->out.cur;
681 			  CUR (context) = pfile->buffer->cur;
682 			  RLIMIT (context) = pfile->buffer->rlimit;
683 
684 			  pfile->state.prevent_expansion++;
685 			  const uchar *text
686 			    = _cpp_builtin_macro_text (pfile, fmacro.node);
687 			  pfile->state.prevent_expansion--;
688 
689 			  context->prev = saved_prev_context;
690 			  pfile->buffer->cur = saved_cur;
691 			  pfile->buffer->rlimit = saved_rlimit;
692 			  pfile->buffer->line_base = saved_line_base;
693 			  pfile->buffer->need_line = saved_need_line;
694 			  pfile->overlaid_buffer = saved_overlaid_buffer;
695 			  pfile->out.cur = pfile->out.base + fmacro.offset;
696 			  CUR (context) = cur;
697 			  RLIMIT (context) = ctx_rlimit;
698 			  len = ustrlen (text);
699 			  buf = _cpp_unaligned_alloc (pfile, len + 1);
700 			  memcpy (buf, text, len);
701 			  buf[len] = '\n';
702 			  text = buf;
703 			  _cpp_push_text_context (pfile, fmacro.node,
704 						  text, len);
705 			  goto new_context;
706 			}
707 		      break;
708 		    }
709 
710 		  cpp_macro *m = fmacro.node->value.macro;
711 
712 		  m->used = 1;
713 		  lex_state = ls_none;
714 		  save_argument (&fmacro, out - pfile->out.base);
715 
716 		  /* A single zero-length argument is no argument.  */
717 		  if (fmacro.argc == 1
718 		      && m->paramc == 0
719 		      && out == pfile->out.base + fmacro.offset + 1)
720 		    fmacro.argc = 0;
721 
722 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
723 		    {
724 		      /* Remove the macro's invocation from the
725 			 output, and push its replacement text.  */
726 		      pfile->out.cur = pfile->out.base + fmacro.offset;
727 		      CUR (context) = cur;
728 		      replace_args_and_push (pfile, &fmacro);
729 		      goto new_context;
730 		    }
731 		}
732 	      else if (lex_state == ls_answer || lex_state == ls_defined_close
733 			|| lex_state == ls_has_include_close)
734 		lex_state = ls_none;
735 	    }
736 	  break;
737 
738 	case '#':
739 	  if (cur - 1 == start_of_input_line
740 	      /* A '#' from a macro doesn't start a directive.  */
741 	      && !pfile->context->prev
742 	      && !pfile->state.in_directive)
743 	    {
744 	      /* A directive.  With the way _cpp_handle_directive
745 		 currently works, we only want to call it if either we
746 		 know the directive is OK, or we want it to fail and
747 		 be removed from the output.  If we want it to be
748 		 passed through (the assembler case) then we must not
749 		 call _cpp_handle_directive.  */
750 	      pfile->out.cur = out;
751 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
752 	      out = pfile->out.cur;
753 
754 	      if (*cur == '\n')
755 		{
756 		  /* Null directive.  Ignore it and don't invalidate
757 		     the MI optimization.  */
758 		  pfile->buffer->need_line = true;
759 		  CPP_INCREMENT_LINE (pfile, 0);
760 		  result = false;
761 		  goto done;
762 		}
763 	      else
764 		{
765 		  bool do_it = false;
766 
767 		  if (is_numstart (*cur)
768 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
769 		    do_it = true;
770 		  else if (is_idstart (*cur))
771 		    /* Check whether we know this directive, but don't
772 		       advance.  */
773 		    do_it = lex_identifier (pfile, cur)->is_directive;
774 
775 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
776 		    {
777 		      /* This is a kludge.  We want to have the ISO
778 			 preprocessor lex the next token.  */
779 		      pfile->buffer->cur = cur;
780 		      _cpp_handle_directive (pfile, false /* indented */);
781 		      result = false;
782 		      goto done;
783 		    }
784 		}
785 	    }
786 
787 	  if (pfile->state.in_expression)
788 	    {
789 	      lex_state = ls_hash;
790 	      continue;
791 	    }
792 	  break;
793 
794 	default:
795 	  break;
796 	}
797 
798       /* Non-whitespace disables MI optimization and stops treating
799 	 '<' as a quote in #include.  */
800       header_ok = false;
801       if (!pfile->state.in_directive)
802 	pfile->mi_valid = false;
803 
804       if (lex_state == ls_none)
805 	continue;
806 
807       /* Some of these transitions of state are syntax errors.  The
808 	 ISO preprocessor will issue errors later.  */
809       if (lex_state == ls_fun_open)
810 	/* Missing '('.  */
811 	lex_state = ls_none;
812       else if (lex_state == ls_hash
813 	       || lex_state == ls_predicate
814 	       || lex_state == ls_defined
815 	       || lex_state == ls_has_include)
816 	lex_state = ls_none;
817 
818       /* ls_answer and ls_defined_close keep going until ')'.  */
819     }
820 
821  done:
822   if (fmacro.buff)
823     _cpp_release_buff (pfile, fmacro.buff);
824 
825   if (lex_state == ls_fun_close)
826     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
827 			 "unterminated argument list invoking macro \"%s\"",
828 			 NODE_NAME (fmacro.node));
829   return result;
830 }
831 
832 /* Push a context holding the replacement text of the macro NODE on
833    the context stack.  NODE is either object-like, or a function-like
834    macro with no arguments.  */
835 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)836 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
837 {
838   size_t len;
839   const uchar *text;
840   uchar *buf;
841 
842   if (node->flags & NODE_BUILTIN)
843     {
844       text = _cpp_builtin_macro_text (pfile, node);
845       len = ustrlen (text);
846       buf = _cpp_unaligned_alloc (pfile, len + 1);
847       memcpy (buf, text, len);
848       buf[len] = '\n';
849       text = buf;
850     }
851   else
852     {
853       cpp_macro *macro = node->value.macro;
854       macro->used = 1;
855       text = macro->exp.text;
856       macro->traditional = 1;
857       len = macro->count;
858     }
859 
860   _cpp_push_text_context (pfile, node, text, len);
861 }
862 
863 /* Returns TRUE if traditional macro recursion is detected.  */
864 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)865 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
866 {
867   bool recursing = !!(node->flags & NODE_DISABLED);
868 
869   /* Object-like macros that are already expanding are necessarily
870      recursive.
871 
872      However, it is possible to have traditional function-like macros
873      that are not infinitely recursive but recurse to any given depth.
874      Further, it is easy to construct examples that get ever longer
875      until the point they stop recursing.  So there is no easy way to
876      detect true recursion; instead we assume any expansion more than
877      20 deep since the first invocation of this macro must be
878      recursing.  */
879   if (recursing && fun_like_macro (node))
880     {
881       size_t depth = 0;
882       cpp_context *context = pfile->context;
883 
884       do
885 	{
886 	  depth++;
887 	  if (context->c.macro == node && depth > 20)
888 	    break;
889 	  context = context->prev;
890 	}
891       while (context);
892       recursing = context != NULL;
893     }
894 
895   if (recursing)
896     cpp_error (pfile, CPP_DL_ERROR,
897 	       "detected recursion whilst expanding macro \"%s\"",
898 	       NODE_NAME (node));
899 
900   return recursing;
901 }
902 
903 /* Return the length of the replacement text of a function-like or
904    object-like non-builtin macro.  */
905 size_t
_cpp_replacement_text_len(const cpp_macro * macro)906 _cpp_replacement_text_len (const cpp_macro *macro)
907 {
908   size_t len;
909 
910   if (macro->fun_like && (macro->paramc != 0))
911     {
912       const uchar *exp;
913 
914       len = 0;
915       for (exp = macro->exp.text;;)
916 	{
917 	  struct block *b = (struct block *) exp;
918 
919 	  len += b->text_len;
920 	  if (b->arg_index == 0)
921 	    break;
922 	  len += NODE_LEN (macro->params[b->arg_index - 1]);
923 	  exp += BLOCK_LEN (b->text_len);
924 	}
925     }
926   else
927     len = macro->count;
928 
929   return len;
930 }
931 
932 /* Copy the replacement text of MACRO to DEST, which must be of
933    sufficient size.  It is not NUL-terminated.  The next character is
934    returned.  */
935 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)936 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
937 {
938   if (macro->fun_like && (macro->paramc != 0))
939     {
940       const uchar *exp;
941 
942       for (exp = macro->exp.text;;)
943 	{
944 	  struct block *b = (struct block *) exp;
945 	  cpp_hashnode *param;
946 
947 	  memcpy (dest, b->text, b->text_len);
948 	  dest += b->text_len;
949 	  if (b->arg_index == 0)
950 	    break;
951 	  param = macro->params[b->arg_index - 1];
952 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
953 	  dest += NODE_LEN (param);
954 	  exp += BLOCK_LEN (b->text_len);
955 	}
956     }
957   else
958     {
959       memcpy (dest, macro->exp.text, macro->count);
960       dest += macro->count;
961     }
962 
963   return dest;
964 }
965 
966 /* Push a context holding the replacement text of the macro NODE on
967    the context stack.  NODE is either object-like, or a function-like
968    macro with no arguments.  */
969 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)970 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
971 {
972   cpp_macro *macro = fmacro->node->value.macro;
973 
974   if (macro->paramc == 0)
975     push_replacement_text (pfile, fmacro->node);
976   else
977     {
978       const uchar *exp;
979       uchar *p;
980       _cpp_buff *buff;
981       size_t len = 0;
982       int cxtquote = 0;
983 
984       /* Get an estimate of the length of the argument-replaced text.
985 	 This is a worst case estimate, assuming that every replacement
986 	 text character needs quoting.  */
987       for (exp = macro->exp.text;;)
988 	{
989 	  struct block *b = (struct block *) exp;
990 
991 	  len += b->text_len;
992 	  if (b->arg_index == 0)
993 	    break;
994 	  len += 2 * (fmacro->args[b->arg_index]
995 		      - fmacro->args[b->arg_index - 1] - 1);
996 	  exp += BLOCK_LEN (b->text_len);
997 	}
998 
999       /* Allocate room for the expansion plus \n.  */
1000       buff = _cpp_get_buff (pfile, len + 1);
1001 
1002       /* Copy the expansion and replace arguments.  */
1003       /* Accumulate actual length, including quoting as necessary */
1004       p = BUFF_FRONT (buff);
1005       len = 0;
1006       for (exp = macro->exp.text;;)
1007 	{
1008 	  struct block *b = (struct block *) exp;
1009 	  size_t arglen;
1010 	  int argquote;
1011 	  uchar *base;
1012 	  uchar *in;
1013 
1014 	  len += b->text_len;
1015 	  /* Copy the non-argument text literally, keeping
1016 	     track of whether matching quotes have been seen. */
1017 	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1018 	    {
1019 	      if (*in == '"')
1020 		cxtquote = ! cxtquote;
1021 	      *p++ = *in++;
1022 	    }
1023 	  /* Done if no more arguments */
1024 	  if (b->arg_index == 0)
1025 	    break;
1026 	  arglen = (fmacro->args[b->arg_index]
1027 		    - fmacro->args[b->arg_index - 1] - 1);
1028 	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
1029 	  in = base;
1030 #if 0
1031 	  /* Skip leading whitespace in the text for the argument to
1032 	     be substituted. To be compatible with gcc 2.95, we would
1033 	     also need to trim trailing whitespace. Gcc 2.95 trims
1034 	     leading and trailing whitespace, which may be a bug.  The
1035 	     current gcc testsuite explicitly checks that this leading
1036 	     and trailing whitespace in actual arguments is
1037 	     preserved. */
1038 	  while (arglen > 0 && is_space (*in))
1039 	    {
1040 	      in++;
1041 	      arglen--;
1042 	    }
1043 #endif
1044 	  for (argquote = 0; arglen > 0; arglen--)
1045 	    {
1046 	      if (cxtquote && *in == '"')
1047 		{
1048 		  if (in > base && *(in-1) != '\\')
1049 		    argquote = ! argquote;
1050 		  /* Always add backslash before double quote if argument
1051 		     is expanded in a quoted context */
1052 		  *p++ = '\\';
1053 		  len++;
1054 		}
1055 	      else if (cxtquote && argquote && *in == '\\')
1056 		{
1057 		  /* Always add backslash before a backslash in an argument
1058 		     that is expanded in a quoted context and also in the
1059 		     range of a quoted context in the argument itself. */
1060 		  *p++ = '\\';
1061 		  len++;
1062 		}
1063 	      *p++ = *in++;
1064 	      len++;
1065 	    }
1066 	  exp += BLOCK_LEN (b->text_len);
1067 	}
1068 
1069       /* \n-terminate.  */
1070       *p = '\n';
1071       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1072 
1073       /* So we free buffer allocation when macro is left.  */
1074       pfile->context->buff = buff;
1075     }
1076 }
1077 
1078 /* Read and record the parameters, if any, of a function-like macro
1079    definition.  Destroys pfile->out.cur.
1080 
1081    Returns true on success, false on failure (syntax error or a
1082    duplicate parameter).  On success, CUR (pfile->context) is just
1083    past the closing parenthesis.  */
1084 static bool
scan_parameters(cpp_reader * pfile,cpp_macro * macro)1085 scan_parameters (cpp_reader *pfile, cpp_macro *macro)
1086 {
1087   const uchar *cur = CUR (pfile->context) + 1;
1088   bool ok;
1089 
1090   for (;;)
1091     {
1092       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1093 
1094       if (is_idstart (*cur))
1095 	{
1096 	  struct cpp_hashnode *id = lex_identifier (pfile, cur);
1097 	  ok = false;
1098 	  if (_cpp_save_parameter (pfile, macro, id, id))
1099 	    break;
1100 	  cur = skip_whitespace (pfile, CUR (pfile->context),
1101 				 true /* skip_comments */);
1102 	  if (*cur == ',')
1103 	    {
1104 	      cur++;
1105 	      continue;
1106 	    }
1107 	  ok = (*cur == ')');
1108 	  break;
1109 	}
1110 
1111       ok = (*cur == ')' && macro->paramc == 0);
1112       break;
1113     }
1114 
1115   if (!ok)
1116     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1117 
1118   CUR (pfile->context) = cur + (*cur == ')');
1119 
1120   return ok;
1121 }
1122 
1123 /* Save the text from pfile->out.base to pfile->out.cur as
1124    the replacement text for the current macro, followed by argument
1125    ARG_INDEX, with zero indicating the end of the replacement
1126    text.  */
1127 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)1128 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1129 		       unsigned int arg_index)
1130 {
1131   size_t len = pfile->out.cur - pfile->out.base;
1132   uchar *exp;
1133 
1134   if (macro->paramc == 0)
1135     {
1136       /* Object-like and function-like macros without parameters
1137 	 simply store their \n-terminated replacement text.  */
1138       exp = _cpp_unaligned_alloc (pfile, len + 1);
1139       memcpy (exp, pfile->out.base, len);
1140       exp[len] = '\n';
1141       macro->exp.text = exp;
1142       macro->traditional = 1;
1143       macro->count = len;
1144     }
1145   else
1146     {
1147       /* Store the text's length (unsigned int), the argument index
1148 	 (unsigned short, base 1) and then the text.  */
1149       size_t blen = BLOCK_LEN (len);
1150       struct block *block;
1151 
1152       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1153 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1154 
1155       exp = BUFF_FRONT (pfile->a_buff);
1156       block = (struct block *) (exp + macro->count);
1157       macro->exp.text = exp;
1158       macro->traditional = 1;
1159 
1160       /* Write out the block information.  */
1161       block->text_len = len;
1162       block->arg_index = arg_index;
1163       memcpy (block->text, pfile->out.base, len);
1164 
1165       /* Lex the rest into the start of the output buffer.  */
1166       pfile->out.cur = pfile->out.base;
1167 
1168       macro->count += blen;
1169 
1170       /* If we've finished, commit the memory.  */
1171       if (arg_index == 0)
1172 	BUFF_FRONT (pfile->a_buff) += macro->count;
1173     }
1174 }
1175 
1176 /* Analyze and save the replacement text of a macro.  Returns true on
1177    success.  */
1178 bool
_cpp_create_trad_definition(cpp_reader * pfile,cpp_macro * macro)1179 _cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro)
1180 {
1181   const uchar *cur;
1182   uchar *limit;
1183   cpp_context *context = pfile->context;
1184 
1185   /* The context has not been set up for command line defines, and CUR
1186      has not been updated for the macro name for in-file defines.  */
1187   pfile->out.cur = pfile->out.base;
1188   CUR (context) = pfile->buffer->cur;
1189   RLIMIT (context) = pfile->buffer->rlimit;
1190   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1191 
1192   /* Is this a function-like macro?  */
1193   if (* CUR (context) == '(')
1194     {
1195       bool ok = scan_parameters (pfile, macro);
1196 
1197       /* Remember the params so we can clear NODE_MACRO_ARG flags.  */
1198       macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1199 
1200       /* Setting macro to NULL indicates an error occurred, and
1201 	 prevents unnecessary work in _cpp_scan_out_logical_line.  */
1202       if (!ok)
1203 	macro = NULL;
1204       else
1205 	{
1206 	  BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1207 	  macro->fun_like = 1;
1208 	}
1209     }
1210 
1211   /* Skip leading whitespace in the replacement text.  */
1212   pfile->buffer->cur
1213     = skip_whitespace (pfile, CUR (context),
1214 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1215 
1216   pfile->state.prevent_expansion++;
1217   _cpp_scan_out_logical_line (pfile, macro, false);
1218   pfile->state.prevent_expansion--;
1219 
1220   if (!macro)
1221     return false;
1222 
1223   /* Skip trailing white space.  */
1224   cur = pfile->out.base;
1225   limit = pfile->out.cur;
1226   while (limit > cur && is_space (limit[-1]))
1227     limit--;
1228   pfile->out.cur = limit;
1229   save_replacement_text (pfile, macro, 0);
1230 
1231   return true;
1232 }
1233 
1234 /* Copy SRC of length LEN to DEST, but convert all contiguous
1235    whitespace to a single space, provided it is not in quotes.  The
1236    quote currently in effect is pointed to by PQUOTE, and is updated
1237    by the function.  Returns the number of bytes copied.  */
1238 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1239 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1240 {
1241   uchar *orig_dest = dest;
1242   uchar quote = *pquote;
1243 
1244   while (len)
1245     {
1246       if (is_space (*src) && !quote)
1247 	{
1248 	  do
1249 	    src++, len--;
1250 	  while (len && is_space (*src));
1251 	  *dest++ = ' ';
1252 	}
1253       else
1254 	{
1255 	  if (*src == '\'' || *src == '"')
1256 	    {
1257 	      if (!quote)
1258 		quote = *src;
1259 	      else if (quote == *src)
1260 		quote = 0;
1261 	    }
1262 	  *dest++ = *src++, len--;
1263 	}
1264     }
1265 
1266   *pquote = quote;
1267   return dest - orig_dest;
1268 }
1269 
1270 /* Returns true if MACRO1 and MACRO2 have expansions different other
1271    than in the form of their whitespace.  */
1272 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1273 _cpp_expansions_different_trad (const cpp_macro *macro1,
1274 				const cpp_macro *macro2)
1275 {
1276   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1277   uchar *p2 = p1 + macro1->count;
1278   uchar quote1 = 0, quote2 = 0;
1279   bool mismatch;
1280   size_t len1, len2;
1281 
1282   if (macro1->paramc > 0)
1283     {
1284       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1285 
1286       mismatch = true;
1287       for (;;)
1288 	{
1289 	  struct block *b1 = (struct block *) exp1;
1290 	  struct block *b2 = (struct block *) exp2;
1291 
1292 	  if (b1->arg_index != b2->arg_index)
1293 	    break;
1294 
1295 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1296 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1297 	  if (len1 != len2 || memcmp (p1, p2, len1))
1298 	    break;
1299 	  if (b1->arg_index == 0)
1300 	    {
1301 	      mismatch = false;
1302 	      break;
1303 	    }
1304 	  exp1 += BLOCK_LEN (b1->text_len);
1305 	  exp2 += BLOCK_LEN (b2->text_len);
1306 	}
1307     }
1308   else
1309     {
1310       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1311       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1312       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1313     }
1314 
1315   free (p1);
1316   return mismatch;
1317 }
1318