1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   source_location line;
64 
65   /* Zero-based index of argument being currently lexed.  */
66   unsigned int argc;
67 };
68 
69 /* Lexing state.  It is mostly used to prevent macro expansion.  */
70 enum ls {ls_none = 0,		/* Normal state.  */
71 	 ls_fun_open,		/* When looking for '('.  */
72 	 ls_fun_close,		/* When looking for ')'.  */
73 	 ls_defined,		/* After defined.  */
74 	 ls_defined_close,	/* Looking for ')' of defined().  */
75 	 ls_hash,		/* After # in preprocessor conditional.  */
76 	 ls_predicate,		/* After the predicate, maybe paren?  */
77 	 ls_answer,		/* In answer to predicate.  */
78 	 ls_has_include,	/* After __has_include__.  */
79 	 ls_has_include_close};	/* Looking for ')' of __has_include__.  */
80 
81 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
82    from recognizing comments and directives during its lexing pass.  */
83 
84 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
85 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
86 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
87 static void check_output_buffer (cpp_reader *, size_t);
88 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
89 static bool scan_parameters (cpp_reader *, cpp_macro *);
90 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
91 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
92 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
93 				 struct fun_macro *);
94 static void save_argument (struct fun_macro *, size_t);
95 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
96 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
97 
98 /* Ensures we have N bytes' space in the output buffer, and
99    reallocates it if not.  */
100 static void
check_output_buffer(cpp_reader * pfile,size_t n)101 check_output_buffer (cpp_reader *pfile, size_t n)
102 {
103   /* We might need two bytes to terminate an unterminated comment, and
104      one more to terminate the line with a NUL.  */
105   n += 2 + 1;
106 
107   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
108     {
109       size_t size = pfile->out.cur - pfile->out.base;
110       size_t new_size = (size + n) * 3 / 2;
111 
112       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
113       pfile->out.limit = pfile->out.base + new_size;
114       pfile->out.cur = pfile->out.base + size;
115     }
116 }
117 
118 /* Skip a C-style block comment in a macro as a result of -CC.
119    Buffer->cur points to the initial asterisk of the comment.  */
120 static void
skip_macro_block_comment(cpp_reader * pfile)121 skip_macro_block_comment (cpp_reader *pfile)
122 {
123   const uchar *cur = pfile->buffer->cur;
124 
125   cur++;
126   if (*cur == '/')
127     cur++;
128 
129   /* People like decorating comments with '*', so check for '/'
130      instead for efficiency.  */
131   while(! (*cur++ == '/' && cur[-2] == '*') )
132     ;
133 
134   pfile->buffer->cur = cur;
135 }
136 
137 /* CUR points to the asterisk introducing a comment in the current
138    context.  IN_DEFINE is true if we are in the replacement text of a
139    macro.
140 
141    The asterisk and following comment is copied to the buffer pointed
142    to by pfile->out.cur, which must be of sufficient size.
143    Unterminated comments are diagnosed, and correctly terminated in
144    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
145    -C, -CC and pfile->state.in_directive.
146 
147    Returns a pointer to the first character after the comment in the
148    input buffer.  */
149 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)150 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
151 {
152   bool unterminated, copy = false;
153   source_location src_loc = pfile->line_table->highest_line;
154   cpp_buffer *buffer = pfile->buffer;
155 
156   buffer->cur = cur;
157   if (pfile->context->prev)
158     unterminated = false, skip_macro_block_comment (pfile);
159   else
160     unterminated = _cpp_skip_block_comment (pfile);
161 
162   if (unterminated)
163     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
164 			 "unterminated comment");
165 
166   /* Comments in directives become spaces so that tokens are properly
167      separated when the ISO preprocessor re-lexes the line.  The
168      exception is #define.  */
169   if (pfile->state.in_directive)
170     {
171       if (in_define)
172 	{
173 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
174 	    pfile->out.cur--;
175 	  else
176 	    copy = true;
177 	}
178       else
179 	pfile->out.cur[-1] = ' ';
180     }
181   else if (CPP_OPTION (pfile, discard_comments))
182     pfile->out.cur--;
183   else
184     copy = true;
185 
186   if (copy)
187     {
188       size_t len = (size_t) (buffer->cur - cur);
189       memcpy (pfile->out.cur, cur, len);
190       pfile->out.cur += len;
191       if (unterminated)
192 	{
193 	  *pfile->out.cur++ = '*';
194 	  *pfile->out.cur++ = '/';
195 	}
196     }
197 
198   return buffer->cur;
199 }
200 
201 /* CUR points to any character in the input buffer.  Skips over all
202    contiguous horizontal white space and NULs, including comments if
203    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
204    character or the end of the current context.  Escaped newlines are
205    removed.
206 
207    The whitespace is copied verbatim to the output buffer, except that
208    comments are handled as described in copy_comment().
209    pfile->out.cur is updated.
210 
211    Returns a pointer to the first character after the whitespace in
212    the input buffer.  */
213 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)214 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
215 {
216   uchar *out = pfile->out.cur;
217 
218   for (;;)
219     {
220       unsigned int c = *cur++;
221       *out++ = c;
222 
223       if (is_nvspace (c))
224 	continue;
225 
226       if (c == '/' && *cur == '*' && skip_comments)
227 	{
228 	  pfile->out.cur = out;
229 	  cur = copy_comment (pfile, cur, false /* in_define */);
230 	  out = pfile->out.cur;
231 	  continue;
232 	}
233 
234       out--;
235       break;
236     }
237 
238   pfile->out.cur = out;
239   return cur - 1;
240 }
241 
242 /* Lexes and outputs an identifier starting at CUR, which is assumed
243    to point to a valid first character of an identifier.  Returns
244    the hashnode, and updates out.cur.  */
245 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)246 lex_identifier (cpp_reader *pfile, const uchar *cur)
247 {
248   size_t len;
249   uchar *out = pfile->out.cur;
250   cpp_hashnode *result;
251 
252   do
253     *out++ = *cur++;
254   while (is_numchar (*cur));
255 
256   CUR (pfile->context) = cur;
257   len = out - pfile->out.cur;
258   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
259 				    len, HT_ALLOC));
260   pfile->out.cur = out;
261   return result;
262 }
263 
264 /* Overlays the true file buffer temporarily with text of length LEN
265    starting at START.  The true buffer is restored upon calling
266    restore_buff().  */
267 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)268 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
269 {
270   cpp_buffer *buffer = pfile->buffer;
271 
272   pfile->overlaid_buffer = buffer;
273   pfile->saved_cur = buffer->cur;
274   pfile->saved_rlimit = buffer->rlimit;
275   pfile->saved_line_base = buffer->next_line;
276   buffer->need_line = false;
277 
278   buffer->cur = start;
279   buffer->line_base = start;
280   buffer->rlimit = start + len;
281 }
282 
283 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
284 void
_cpp_remove_overlay(cpp_reader * pfile)285 _cpp_remove_overlay (cpp_reader *pfile)
286 {
287   cpp_buffer *buffer = pfile->overlaid_buffer;
288 
289   buffer->cur = pfile->saved_cur;
290   buffer->rlimit = pfile->saved_rlimit;
291   buffer->line_base = pfile->saved_line_base;
292   buffer->need_line = true;
293 
294   pfile->overlaid_buffer = NULL;
295 }
296 
297 /* Reads a logical line into the output buffer.  Returns TRUE if there
298    is more text left in the buffer.  */
299 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)300 _cpp_read_logical_line_trad (cpp_reader *pfile)
301 {
302   do
303     {
304       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
305 	return false;
306     }
307   while (!_cpp_scan_out_logical_line (pfile, NULL) || pfile->state.skipping);
308 
309   return pfile->buffer != NULL;
310 }
311 
312 /* Set up state for finding the opening '(' of a function-like
313    macro.  */
314 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)315 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start, struct fun_macro *macro)
316 {
317   unsigned int n = node->value.macro->paramc + 1;
318 
319   if (macro->buff)
320     _cpp_release_buff (pfile, macro->buff);
321   macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
322   macro->args = (size_t *) BUFF_FRONT (macro->buff);
323   macro->node = node;
324   macro->offset = start - pfile->out.base;
325   macro->argc = 0;
326 }
327 
328 /* Save the OFFSET of the start of the next argument to MACRO.  */
329 static void
save_argument(struct fun_macro * macro,size_t offset)330 save_argument (struct fun_macro *macro, size_t offset)
331 {
332   macro->argc++;
333   if (macro->argc <= macro->node->value.macro->paramc)
334     macro->args[macro->argc] = offset;
335 }
336 
337 /* Copies the next logical line in the current buffer (starting at
338    buffer->cur) to the output buffer.  The output is guaranteed to
339    terminate with a NUL character.  buffer->cur is updated.
340 
341    If MACRO is non-NULL, then we are scanning the replacement list of
342    MACRO, and we call save_replacement_text() every time we meet an
343    argument.  */
344 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro)345 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro)
346 {
347   bool result = true;
348   cpp_context *context;
349   const uchar *cur;
350   uchar *out;
351   struct fun_macro fmacro;
352   unsigned int c, paren_depth = 0, quote;
353   enum ls lex_state = ls_none;
354   bool header_ok;
355   const uchar *start_of_input_line;
356 
357   fmacro.buff = NULL;
358   fmacro.args = NULL;
359   fmacro.node = NULL;
360   fmacro.offset = 0;
361   fmacro.line = 0;
362   fmacro.argc = 0;
363 
364   quote = 0;
365   header_ok = pfile->state.angled_headers;
366   CUR (pfile->context) = pfile->buffer->cur;
367   RLIMIT (pfile->context) = pfile->buffer->rlimit;
368   pfile->out.cur = pfile->out.base;
369   pfile->out.first_line = pfile->line_table->highest_line;
370   /* start_of_input_line is needed to make sure that directives really,
371      really start at the first character of the line.  */
372   start_of_input_line = pfile->buffer->cur;
373  new_context:
374   context = pfile->context;
375   cur = CUR (context);
376   check_output_buffer (pfile, RLIMIT (context) - cur);
377   out = pfile->out.cur;
378 
379   for (;;)
380     {
381       if (!context->prev
382 	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
383 	{
384 	  pfile->buffer->cur = cur;
385 	  _cpp_process_line_notes (pfile, false);
386 	}
387       c = *cur++;
388       *out++ = c;
389 
390       /* Whitespace should "continue" out of the switch,
391 	 non-whitespace should "break" out of it.  */
392       switch (c)
393 	{
394 	case ' ':
395 	case '\t':
396 	case '\f':
397 	case '\v':
398 	case '\0':
399 	  continue;
400 
401 	case '\n':
402 	  /* If this is a macro's expansion, pop it.  */
403 	  if (context->prev)
404 	    {
405 	      pfile->out.cur = out - 1;
406 	      _cpp_pop_context (pfile);
407 	      goto new_context;
408 	    }
409 
410 	  /* Omit the newline from the output buffer.  */
411 	  pfile->out.cur = out - 1;
412 	  pfile->buffer->cur = cur;
413 	  pfile->buffer->need_line = true;
414 	  CPP_INCREMENT_LINE (pfile, 0);
415 
416 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
417 	      && !pfile->state.in_directive
418 	      && _cpp_get_fresh_line (pfile))
419 	    {
420 	      /* Newlines in arguments become a space, but we don't
421 		 clear any in-progress quote.  */
422 	      if (lex_state == ls_fun_close)
423 		out[-1] = ' ';
424 	      cur = pfile->buffer->cur;
425 	      continue;
426 	    }
427 	  goto done;
428 
429 	case '<':
430 	  if (header_ok)
431 	    quote = '>';
432 	  break;
433 	case '>':
434 	  if (c == quote)
435 	    quote = 0;
436 	  break;
437 
438 	case '"':
439 	case '\'':
440 	  if (c == quote)
441 	    quote = 0;
442 	  else if (!quote)
443 	    quote = c;
444 	  break;
445 
446 	case '\\':
447 	  /* Skip escaped quotes here, it's easier than above.  */
448 	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
449 	    *out++ = *cur++;
450 	  break;
451 
452 	case '/':
453 	  /* Traditional CPP does not recognize comments within
454 	     literals.  */
455 	  if (!quote && *cur == '*')
456 	    {
457 	      pfile->out.cur = out;
458 	      cur = copy_comment (pfile, cur, macro != 0);
459 	      out = pfile->out.cur;
460 	      continue;
461 	    }
462 	  break;
463 
464 	case '_':
465 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
466 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
467 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
468 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
469 	case 'y': case 'z':
470 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
471 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
472 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
473 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
474 	case 'Y': case 'Z':
475 	  if (!pfile->state.skipping && (quote == 0 || macro))
476 	    {
477 	      cpp_hashnode *node;
478 	      uchar *out_start = out - 1;
479 
480 	      pfile->out.cur = out_start;
481 	      node = lex_identifier (pfile, cur - 1);
482 	      out = pfile->out.cur;
483 	      cur = CUR (context);
484 
485 	      if (node->type == NT_MACRO
486 		  /* Should we expand for ls_answer?  */
487 		  && (lex_state == ls_none || lex_state == ls_fun_open)
488 		  && !pfile->state.prevent_expansion)
489 		{
490 		  /* Macros invalidate MI optimization.  */
491 		  pfile->mi_valid = false;
492 		  if (! (node->flags & NODE_BUILTIN)
493 		      && node->value.macro->fun_like)
494 		    {
495 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
496 		      lex_state = ls_fun_open;
497 		      fmacro.line = pfile->line_table->highest_line;
498 		      continue;
499 		    }
500 		  else if (!recursive_macro (pfile, node))
501 		    {
502 		      /* Remove the object-like macro's name from the
503 			 output, and push its replacement text.  */
504 		      pfile->out.cur = out_start;
505 		      push_replacement_text (pfile, node);
506 		      lex_state = ls_none;
507 		      goto new_context;
508 		    }
509 		}
510 	      else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
511 		{
512 		  /* Found a parameter in the replacement text of a
513 		     #define.  Remove its name from the output.  */
514 		  pfile->out.cur = out_start;
515 		  save_replacement_text (pfile, macro, node->value.arg_index);
516 		  out = pfile->out.base;
517 		}
518 	      else if (lex_state == ls_hash)
519 		{
520 		  lex_state = ls_predicate;
521 		  continue;
522 		}
523 	      else if (pfile->state.in_expression
524 		       && node == pfile->spec_nodes.n_defined)
525 		{
526 		  lex_state = ls_defined;
527 		  continue;
528 		}
529 	      else if (pfile->state.in_expression
530 		       && (node == pfile->spec_nodes.n__has_include__
531 			|| node == pfile->spec_nodes.n__has_include_next__))
532 		{
533 		  lex_state = ls_has_include;
534 		  continue;
535 		}
536 	    }
537 	  break;
538 
539 	case '(':
540 	  if (quote == 0)
541 	    {
542 	      paren_depth++;
543 	      if (lex_state == ls_fun_open)
544 		{
545 		  if (recursive_macro (pfile, fmacro.node))
546 		    lex_state = ls_none;
547 		  else
548 		    {
549 		      lex_state = ls_fun_close;
550 		      paren_depth = 1;
551 		      out = pfile->out.base + fmacro.offset;
552 		      fmacro.args[0] = fmacro.offset;
553 		    }
554 		}
555 	      else if (lex_state == ls_predicate)
556 		lex_state = ls_answer;
557 	      else if (lex_state == ls_defined)
558 		lex_state = ls_defined_close;
559 	      else if (lex_state == ls_has_include)
560 		lex_state = ls_has_include_close;
561 	    }
562 	  break;
563 
564 	case ',':
565 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
566 	    save_argument (&fmacro, out - pfile->out.base);
567 	  break;
568 
569 	case ')':
570 	  if (quote == 0)
571 	    {
572 	      paren_depth--;
573 	      if (lex_state == ls_fun_close && paren_depth == 0)
574 		{
575 		  cpp_macro *m = fmacro.node->value.macro;
576 
577 		  m->used = 1;
578 		  lex_state = ls_none;
579 		  save_argument (&fmacro, out - pfile->out.base);
580 
581 		  /* A single zero-length argument is no argument.  */
582 		  if (fmacro.argc == 1
583 		      && m->paramc == 0
584 		      && out == pfile->out.base + fmacro.offset + 1)
585 		    fmacro.argc = 0;
586 
587 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
588 		    {
589 		      /* Remove the macro's invocation from the
590 			 output, and push its replacement text.  */
591 		      pfile->out.cur = (pfile->out.base
592 					     + fmacro.offset);
593 		      CUR (context) = cur;
594 		      replace_args_and_push (pfile, &fmacro);
595 		      goto new_context;
596 		    }
597 		}
598 	      else if (lex_state == ls_answer || lex_state == ls_defined_close
599 			|| lex_state == ls_has_include_close)
600 		lex_state = ls_none;
601 	    }
602 	  break;
603 
604 	case '#':
605 	  if (cur - 1 == start_of_input_line
606 	      /* A '#' from a macro doesn't start a directive.  */
607 	      && !pfile->context->prev
608 	      && !pfile->state.in_directive)
609 	    {
610 	      /* A directive.  With the way _cpp_handle_directive
611 		 currently works, we only want to call it if either we
612 		 know the directive is OK, or we want it to fail and
613 		 be removed from the output.  If we want it to be
614 		 passed through (the assembler case) then we must not
615 		 call _cpp_handle_directive.  */
616 	      pfile->out.cur = out;
617 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
618 	      out = pfile->out.cur;
619 
620 	      if (*cur == '\n')
621 		{
622 		  /* Null directive.  Ignore it and don't invalidate
623 		     the MI optimization.  */
624 		  pfile->buffer->need_line = true;
625 		  CPP_INCREMENT_LINE (pfile, 0);
626 		  result = false;
627 		  goto done;
628 		}
629 	      else
630 		{
631 		  bool do_it = false;
632 
633 		  if (is_numstart (*cur)
634 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
635 		    do_it = true;
636 		  else if (is_idstart (*cur))
637 		    /* Check whether we know this directive, but don't
638 		       advance.  */
639 		    do_it = lex_identifier (pfile, cur)->is_directive;
640 
641 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
642 		    {
643 		      /* This is a kludge.  We want to have the ISO
644 			 preprocessor lex the next token.  */
645 		      pfile->buffer->cur = cur;
646 		      _cpp_handle_directive (pfile, false /* indented */);
647 		      result = false;
648 		      goto done;
649 		    }
650 		}
651 	    }
652 
653 	  if (pfile->state.in_expression)
654 	    {
655 	      lex_state = ls_hash;
656 	      continue;
657 	    }
658 	  break;
659 
660 	default:
661 	  break;
662 	}
663 
664       /* Non-whitespace disables MI optimization and stops treating
665 	 '<' as a quote in #include.  */
666       header_ok = false;
667       if (!pfile->state.in_directive)
668 	pfile->mi_valid = false;
669 
670       if (lex_state == ls_none)
671 	continue;
672 
673       /* Some of these transitions of state are syntax errors.  The
674 	 ISO preprocessor will issue errors later.  */
675       if (lex_state == ls_fun_open)
676 	/* Missing '('.  */
677 	lex_state = ls_none;
678       else if (lex_state == ls_hash
679 	       || lex_state == ls_predicate
680 	       || lex_state == ls_defined
681 	       || lex_state == ls_has_include)
682 	lex_state = ls_none;
683 
684       /* ls_answer and ls_defined_close keep going until ')'.  */
685     }
686 
687  done:
688   if (fmacro.buff)
689     _cpp_release_buff (pfile, fmacro.buff);
690 
691   if (lex_state == ls_fun_close)
692     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
693 			 "unterminated argument list invoking macro \"%s\"",
694 			 NODE_NAME (fmacro.node));
695   return result;
696 }
697 
698 /* Push a context holding the replacement text of the macro NODE on
699    the context stack.  NODE is either object-like, or a function-like
700    macro with no arguments.  */
701 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)702 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
703 {
704   size_t len;
705   const uchar *text;
706   uchar *buf;
707 
708   if (node->flags & NODE_BUILTIN)
709     {
710       text = _cpp_builtin_macro_text (pfile, node);
711       len = ustrlen (text);
712       buf = _cpp_unaligned_alloc (pfile, len + 1);
713       memcpy (buf, text, len);
714       buf[len]='\n';
715       text = buf;
716     }
717   else
718     {
719       cpp_macro *macro = node->value.macro;
720       macro->used = 1;
721       text = macro->exp.text;
722       macro->traditional = 1;
723       len = macro->count;
724     }
725 
726   _cpp_push_text_context (pfile, node, text, len);
727 }
728 
729 /* Returns TRUE if traditional macro recursion is detected.  */
730 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)731 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
732 {
733   bool recursing = !!(node->flags & NODE_DISABLED);
734 
735   /* Object-like macros that are already expanding are necessarily
736      recursive.
737 
738      However, it is possible to have traditional function-like macros
739      that are not infinitely recursive but recurse to any given depth.
740      Further, it is easy to construct examples that get ever longer
741      until the point they stop recursing.  So there is no easy way to
742      detect true recursion; instead we assume any expansion more than
743      20 deep since the first invocation of this macro must be
744      recursing.  */
745   if (recursing && node->value.macro->fun_like)
746     {
747       size_t depth = 0;
748       cpp_context *context = pfile->context;
749 
750       do
751 	{
752 	  depth++;
753 	  if (context->c.macro == node && depth > 20)
754 	    break;
755 	  context = context->prev;
756 	}
757       while (context);
758       recursing = context != NULL;
759     }
760 
761   if (recursing)
762     cpp_error (pfile, CPP_DL_ERROR,
763 	       "detected recursion whilst expanding macro \"%s\"",
764 	       NODE_NAME (node));
765 
766   return recursing;
767 }
768 
769 /* Return the length of the replacement text of a function-like or
770    object-like non-builtin macro.  */
771 size_t
_cpp_replacement_text_len(const cpp_macro * macro)772 _cpp_replacement_text_len (const cpp_macro *macro)
773 {
774   size_t len;
775 
776   if (macro->fun_like && (macro->paramc != 0))
777     {
778       const uchar *exp;
779 
780       len = 0;
781       for (exp = macro->exp.text;;)
782 	{
783 	  struct block *b = (struct block *) exp;
784 
785 	  len += b->text_len;
786 	  if (b->arg_index == 0)
787 	    break;
788 	  len += NODE_LEN (macro->params[b->arg_index - 1]);
789 	  exp += BLOCK_LEN (b->text_len);
790 	}
791     }
792   else
793     len = macro->count;
794 
795   return len;
796 }
797 
798 /* Copy the replacement text of MACRO to DEST, which must be of
799    sufficient size.  It is not NUL-terminated.  The next character is
800    returned.  */
801 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)802 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
803 {
804   if (macro->fun_like && (macro->paramc != 0))
805     {
806       const uchar *exp;
807 
808       for (exp = macro->exp.text;;)
809 	{
810 	  struct block *b = (struct block *) exp;
811 	  cpp_hashnode *param;
812 
813 	  memcpy (dest, b->text, b->text_len);
814 	  dest += b->text_len;
815 	  if (b->arg_index == 0)
816 	    break;
817 	  param = macro->params[b->arg_index - 1];
818 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
819 	  dest += NODE_LEN (param);
820 	  exp += BLOCK_LEN (b->text_len);
821 	}
822     }
823   else
824     {
825       memcpy (dest, macro->exp.text, macro->count);
826       dest += macro->count;
827     }
828 
829   return dest;
830 }
831 
832 /* Push a context holding the replacement text of the macro NODE on
833    the context stack.  NODE is either object-like, or a function-like
834    macro with no arguments.  */
835 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)836 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
837 {
838   cpp_macro *macro = fmacro->node->value.macro;
839 
840   if (macro->paramc == 0)
841     push_replacement_text (pfile, fmacro->node);
842   else
843     {
844       const uchar *exp;
845       uchar *p;
846       _cpp_buff *buff;
847       size_t len = 0;
848       int cxtquote = 0;
849 
850       /* Get an estimate of the length of the argument-replaced text.
851 	 This is a worst case estimate, assuming that every replacement
852 	 text character needs quoting.  */
853       for (exp = macro->exp.text;;)
854 	{
855 	  struct block *b = (struct block *) exp;
856 
857 	  len += b->text_len;
858 	  if (b->arg_index == 0)
859 	    break;
860 	  len += 2 * (fmacro->args[b->arg_index]
861 		      - fmacro->args[b->arg_index - 1] - 1);
862 	  exp += BLOCK_LEN (b->text_len);
863 	}
864 
865       /* Allocate room for the expansion plus \n.  */
866       buff = _cpp_get_buff (pfile, len + 1);
867 
868       /* Copy the expansion and replace arguments.  */
869       /* Accumulate actual length, including quoting as necessary */
870       p = BUFF_FRONT (buff);
871       len = 0;
872       for (exp = macro->exp.text;;)
873 	{
874 	  struct block *b = (struct block *) exp;
875 	  size_t arglen;
876 	  int argquote;
877 	  uchar *base;
878 	  uchar *in;
879 
880 	  len += b->text_len;
881 	  /* Copy the non-argument text literally, keeping
882 	     track of whether matching quotes have been seen. */
883 	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
884 	    {
885 	      if (*in == '"')
886 		cxtquote = ! cxtquote;
887 	      *p++ = *in++;
888 	    }
889 	  /* Done if no more arguments */
890 	  if (b->arg_index == 0)
891 	    break;
892 	  arglen = (fmacro->args[b->arg_index]
893 		    - fmacro->args[b->arg_index - 1] - 1);
894 	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
895 	  in = base;
896 #if 0
897 	  /* Skip leading whitespace in the text for the argument to
898 	     be substituted. To be compatible with gcc 2.95, we would
899 	     also need to trim trailing whitespace. Gcc 2.95 trims
900 	     leading and trailing whitespace, which may be a bug.  The
901 	     current gcc testsuite explicitly checks that this leading
902 	     and trailing whitespace in actual arguments is
903 	     preserved. */
904 	  while (arglen > 0 && is_space (*in))
905 	    {
906 	      in++;
907 	      arglen--;
908 	    }
909 #endif
910 	  for (argquote = 0; arglen > 0; arglen--)
911 	    {
912 	      if (cxtquote && *in == '"')
913 		{
914 		  if (in > base && *(in-1) != '\\')
915 		    argquote = ! argquote;
916 		  /* Always add backslash before double quote if argument
917 		     is expanded in a quoted context */
918 		  *p++ = '\\';
919 		  len++;
920 		}
921 	      else if (cxtquote && argquote && *in == '\\')
922 		{
923 		  /* Always add backslash before a backslash in an argument
924 		     that is expanded in a quoted context and also in the
925 		     range of a quoted context in the argument itself. */
926 		  *p++ = '\\';
927 		  len++;
928 		}
929 	      *p++ = *in++;
930 	      len++;
931 	    }
932 	  exp += BLOCK_LEN (b->text_len);
933 	}
934 
935       /* \n-terminate.  */
936       *p = '\n';
937       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
938 
939       /* So we free buffer allocation when macro is left.  */
940       pfile->context->buff = buff;
941     }
942 }
943 
944 /* Read and record the parameters, if any, of a function-like macro
945    definition.  Destroys pfile->out.cur.
946 
947    Returns true on success, false on failure (syntax error or a
948    duplicate parameter).  On success, CUR (pfile->context) is just
949    past the closing parenthesis.  */
950 static bool
scan_parameters(cpp_reader * pfile,cpp_macro * macro)951 scan_parameters (cpp_reader *pfile, cpp_macro *macro)
952 {
953   const uchar *cur = CUR (pfile->context) + 1;
954   bool ok;
955 
956   for (;;)
957     {
958       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
959 
960       if (is_idstart (*cur))
961 	{
962 	  ok = false;
963 	  if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
964 	    break;
965 	  cur = skip_whitespace (pfile, CUR (pfile->context),
966 				 true /* skip_comments */);
967 	  if (*cur == ',')
968 	    {
969 	      cur++;
970 	      continue;
971 	    }
972 	  ok = (*cur == ')');
973 	  break;
974 	}
975 
976       ok = (*cur == ')' && macro->paramc == 0);
977       break;
978     }
979 
980   if (!ok)
981     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
982 
983   CUR (pfile->context) = cur + (*cur == ')');
984 
985   return ok;
986 }
987 
988 /* Save the text from pfile->out.base to pfile->out.cur as
989    the replacement text for the current macro, followed by argument
990    ARG_INDEX, with zero indicating the end of the replacement
991    text.  */
992 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)993 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
994 		       unsigned int arg_index)
995 {
996   size_t len = pfile->out.cur - pfile->out.base;
997   uchar *exp;
998 
999   if (macro->paramc == 0)
1000     {
1001       /* Object-like and function-like macros without parameters
1002 	 simply store their \n-terminated replacement text.  */
1003       exp = _cpp_unaligned_alloc (pfile, len + 1);
1004       memcpy (exp, pfile->out.base, len);
1005       exp[len] = '\n';
1006       macro->exp.text = exp;
1007       macro->traditional = 1;
1008       macro->count = len;
1009     }
1010   else
1011     {
1012       /* Store the text's length (unsigned int), the argument index
1013 	 (unsigned short, base 1) and then the text.  */
1014       size_t blen = BLOCK_LEN (len);
1015       struct block *block;
1016 
1017       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1018 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1019 
1020       exp = BUFF_FRONT (pfile->a_buff);
1021       block = (struct block *) (exp + macro->count);
1022       macro->exp.text = exp;
1023       macro->traditional = 1;
1024 
1025       /* Write out the block information.  */
1026       block->text_len = len;
1027       block->arg_index = arg_index;
1028       memcpy (block->text, pfile->out.base, len);
1029 
1030       /* Lex the rest into the start of the output buffer.  */
1031       pfile->out.cur = pfile->out.base;
1032 
1033       macro->count += blen;
1034 
1035       /* If we've finished, commit the memory.  */
1036       if (arg_index == 0)
1037 	BUFF_FRONT (pfile->a_buff) += macro->count;
1038     }
1039 }
1040 
1041 /* Analyze and save the replacement text of a macro.  Returns true on
1042    success.  */
1043 bool
_cpp_create_trad_definition(cpp_reader * pfile,cpp_macro * macro)1044 _cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro)
1045 {
1046   const uchar *cur;
1047   uchar *limit;
1048   cpp_context *context = pfile->context;
1049 
1050   /* The context has not been set up for command line defines, and CUR
1051      has not been updated for the macro name for in-file defines.  */
1052   pfile->out.cur = pfile->out.base;
1053   CUR (context) = pfile->buffer->cur;
1054   RLIMIT (context) = pfile->buffer->rlimit;
1055   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1056 
1057   /* Is this a function-like macro?  */
1058   if (* CUR (context) == '(')
1059     {
1060       bool ok = scan_parameters (pfile, macro);
1061 
1062       /* Remember the params so we can clear NODE_MACRO_ARG flags.  */
1063       macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1064 
1065       /* Setting macro to NULL indicates an error occurred, and
1066 	 prevents unnecessary work in _cpp_scan_out_logical_line.  */
1067       if (!ok)
1068 	macro = NULL;
1069       else
1070 	{
1071 	  BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1072 	  macro->fun_like = 1;
1073 	}
1074     }
1075 
1076   /* Skip leading whitespace in the replacement text.  */
1077   pfile->buffer->cur
1078     = skip_whitespace (pfile, CUR (context),
1079 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1080 
1081   pfile->state.prevent_expansion++;
1082   _cpp_scan_out_logical_line (pfile, macro);
1083   pfile->state.prevent_expansion--;
1084 
1085   if (!macro)
1086     return false;
1087 
1088   /* Skip trailing white space.  */
1089   cur = pfile->out.base;
1090   limit = pfile->out.cur;
1091   while (limit > cur && is_space (limit[-1]))
1092     limit--;
1093   pfile->out.cur = limit;
1094   save_replacement_text (pfile, macro, 0);
1095 
1096   return true;
1097 }
1098 
1099 /* Copy SRC of length LEN to DEST, but convert all contiguous
1100    whitespace to a single space, provided it is not in quotes.  The
1101    quote currently in effect is pointed to by PQUOTE, and is updated
1102    by the function.  Returns the number of bytes copied.  */
1103 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1104 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1105 {
1106   uchar *orig_dest = dest;
1107   uchar quote = *pquote;
1108 
1109   while (len)
1110     {
1111       if (is_space (*src) && !quote)
1112 	{
1113 	  do
1114 	    src++, len--;
1115 	  while (len && is_space (*src));
1116 	  *dest++ = ' ';
1117 	}
1118       else
1119 	{
1120 	  if (*src == '\'' || *src == '"')
1121 	    {
1122 	      if (!quote)
1123 		quote = *src;
1124 	      else if (quote == *src)
1125 		quote = 0;
1126 	    }
1127 	  *dest++ = *src++, len--;
1128 	}
1129     }
1130 
1131   *pquote = quote;
1132   return dest - orig_dest;
1133 }
1134 
1135 /* Returns true if MACRO1 and MACRO2 have expansions different other
1136    than in the form of their whitespace.  */
1137 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1138 _cpp_expansions_different_trad (const cpp_macro *macro1,
1139 				const cpp_macro *macro2)
1140 {
1141   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1142   uchar *p2 = p1 + macro1->count;
1143   uchar quote1 = 0, quote2 = 0;
1144   bool mismatch;
1145   size_t len1, len2;
1146 
1147   if (macro1->paramc > 0)
1148     {
1149       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1150 
1151       mismatch = true;
1152       for (;;)
1153 	{
1154 	  struct block *b1 = (struct block *) exp1;
1155 	  struct block *b2 = (struct block *) exp2;
1156 
1157 	  if (b1->arg_index != b2->arg_index)
1158 	    break;
1159 
1160 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1161 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1162 	  if (len1 != len2 || memcmp (p1, p2, len1))
1163 	    break;
1164 	  if (b1->arg_index == 0)
1165 	    {
1166 	      mismatch = false;
1167 	      break;
1168 	    }
1169 	  exp1 += BLOCK_LEN (b1->text_len);
1170 	  exp2 += BLOCK_LEN (b2->text_len);
1171 	}
1172     }
1173   else
1174     {
1175       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1176       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1177       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1178     }
1179 
1180   free (p1);
1181   return mismatch;
1182 }
1183