1 /* CPP Library - traditional lexical analysis and macro expansion.
2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
3    Contributed by Neil Booth, May 2002
4 
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #include "config.h"
20 #include "system.h"
21 #include "cpplib.h"
22 #include "internal.h"
23 
24 /* The replacement text of a function-like macro is stored as a
25    contiguous sequence of aligned blocks, each representing the text
26    between subsequent parameters.
27 
28    Each block comprises the text between its surrounding parameters,
29    the length of that text, and the one-based index of the following
30    parameter.  The final block in the replacement text is easily
31    recognizable as it has an argument index of zero.  */
32 
33 struct block
34 {
35   unsigned int text_len;
36   unsigned short arg_index;
37   uchar text[1];
38 };
39 
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42 
43 /* Structure holding information about a function-like macro
44    invocation.  */
45 struct fun_macro
46 {
47   /* Memory buffer holding the trad_arg array.  */
48   _cpp_buff *buff;
49 
50   /* An array of size the number of macro parameters + 1, containing
51      the offsets of the start of each macro argument in the output
52      buffer.  The argument continues until the character before the
53      start of the next one.  */
54   size_t *args;
55 
56   /* The hashnode of the macro.  */
57   cpp_hashnode *node;
58 
59   /* The offset of the macro name in the output buffer.  */
60   size_t offset;
61 
62   /* The line the macro name appeared on.  */
63   source_location line;
64 
65   /* Zero-based index of argument being currently lexed.  */
66   unsigned int argc;
67 };
68 
69 /* Lexing state.  It is mostly used to prevent macro expansion.  */
70 enum ls {ls_none = 0,		/* Normal state.  */
71 	 ls_fun_open,		/* When looking for '('.  */
72 	 ls_fun_close,		/* When looking for ')'.  */
73 	 ls_defined,		/* After defined.  */
74 	 ls_defined_close,	/* Looking for ')' of defined().  */
75 	 ls_hash,		/* After # in preprocessor conditional.  */
76 	 ls_predicate,		/* After the predicate, maybe paren?  */
77 	 ls_answer};		/* In answer to predicate.  */
78 
79 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
80    from recognizing comments and directives during its lexing pass.  */
81 
82 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
83 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
84 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
85 static void check_output_buffer (cpp_reader *, size_t);
86 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
87 static bool scan_parameters (cpp_reader *, cpp_macro *);
88 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
89 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
90 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
91 				 struct fun_macro *);
92 static void save_argument (struct fun_macro *, size_t);
93 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
94 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
95 
96 /* Ensures we have N bytes' space in the output buffer, and
97    reallocates it if not.  */
98 static void
check_output_buffer(cpp_reader * pfile,size_t n)99 check_output_buffer (cpp_reader *pfile, size_t n)
100 {
101   /* We might need two bytes to terminate an unterminated comment, and
102      one more to terminate the line with a NUL.  */
103   n += 2 + 1;
104 
105   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
106     {
107       size_t size = pfile->out.cur - pfile->out.base;
108       size_t new_size = (size + n) * 3 / 2;
109 
110       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
111       pfile->out.limit = pfile->out.base + new_size;
112       pfile->out.cur = pfile->out.base + size;
113     }
114 }
115 
116 /* Skip a C-style block comment in a macro as a result of -CC.
117    Buffer->cur points to the initial asterisk of the comment.  */
118 static void
skip_macro_block_comment(cpp_reader * pfile)119 skip_macro_block_comment (cpp_reader *pfile)
120 {
121   const uchar *cur = pfile->buffer->cur;
122 
123   cur++;
124   if (*cur == '/')
125     cur++;
126 
127   /* People like decorating comments with '*', so check for '/'
128      instead for efficiency.  */
129   while(! (*cur++ == '/' && cur[-2] == '*') )
130     ;
131 
132   pfile->buffer->cur = cur;
133 }
134 
135 /* CUR points to the asterisk introducing a comment in the current
136    context.  IN_DEFINE is true if we are in the replacement text of a
137    macro.
138 
139    The asterisk and following comment is copied to the buffer pointed
140    to by pfile->out.cur, which must be of sufficient size.
141    Unterminated comments are diagnosed, and correctly terminated in
142    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
143    -C, -CC and pfile->state.in_directive.
144 
145    Returns a pointer to the first character after the comment in the
146    input buffer.  */
147 static const uchar *
copy_comment(cpp_reader * pfile,const uchar * cur,int in_define)148 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
149 {
150   bool unterminated, copy = false;
151   source_location src_loc = pfile->line_table->highest_line;
152   cpp_buffer *buffer = pfile->buffer;
153 
154   buffer->cur = cur;
155   if (pfile->context->prev)
156     unterminated = false, skip_macro_block_comment (pfile);
157   else
158     unterminated = _cpp_skip_block_comment (pfile);
159 
160   if (unterminated)
161     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
162 			 "unterminated comment");
163 
164   /* Comments in directives become spaces so that tokens are properly
165      separated when the ISO preprocessor re-lexes the line.  The
166      exception is #define.  */
167   if (pfile->state.in_directive)
168     {
169       if (in_define)
170 	{
171 	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
172 	    pfile->out.cur--;
173 	  else
174 	    copy = true;
175 	}
176       else
177 	pfile->out.cur[-1] = ' ';
178     }
179   else if (CPP_OPTION (pfile, discard_comments))
180     pfile->out.cur--;
181   else
182     copy = true;
183 
184   if (copy)
185     {
186       size_t len = (size_t) (buffer->cur - cur);
187       memcpy (pfile->out.cur, cur, len);
188       pfile->out.cur += len;
189       if (unterminated)
190 	{
191 	  *pfile->out.cur++ = '*';
192 	  *pfile->out.cur++ = '/';
193 	}
194     }
195 
196   return buffer->cur;
197 }
198 
199 /* CUR points to any character in the input buffer.  Skips over all
200    contiguous horizontal white space and NULs, including comments if
201    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
202    character or the end of the current context.  Escaped newlines are
203    removed.
204 
205    The whitespace is copied verbatim to the output buffer, except that
206    comments are handled as described in copy_comment().
207    pfile->out.cur is updated.
208 
209    Returns a pointer to the first character after the whitespace in
210    the input buffer.  */
211 static const uchar *
skip_whitespace(cpp_reader * pfile,const uchar * cur,int skip_comments)212 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
213 {
214   uchar *out = pfile->out.cur;
215 
216   for (;;)
217     {
218       unsigned int c = *cur++;
219       *out++ = c;
220 
221       if (is_nvspace (c))
222 	continue;
223 
224       if (c == '/' && *cur == '*' && skip_comments)
225 	{
226 	  pfile->out.cur = out;
227 	  cur = copy_comment (pfile, cur, false /* in_define */);
228 	  out = pfile->out.cur;
229 	  continue;
230 	}
231 
232       out--;
233       break;
234     }
235 
236   pfile->out.cur = out;
237   return cur - 1;
238 }
239 
240 /* Lexes and outputs an identifier starting at CUR, which is assumed
241    to point to a valid first character of an identifier.  Returns
242    the hashnode, and updates out.cur.  */
243 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * cur)244 lex_identifier (cpp_reader *pfile, const uchar *cur)
245 {
246   size_t len;
247   uchar *out = pfile->out.cur;
248   cpp_hashnode *result;
249 
250   do
251     *out++ = *cur++;
252   while (is_numchar (*cur));
253 
254   CUR (pfile->context) = cur;
255   len = out - pfile->out.cur;
256   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
257 				    len, HT_ALLOC));
258   pfile->out.cur = out;
259   return result;
260 }
261 
262 /* Overlays the true file buffer temporarily with text of length LEN
263    starting at START.  The true buffer is restored upon calling
264    restore_buff().  */
265 void
_cpp_overlay_buffer(cpp_reader * pfile,const uchar * start,size_t len)266 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
267 {
268   cpp_buffer *buffer = pfile->buffer;
269 
270   pfile->overlaid_buffer = buffer;
271   pfile->saved_cur = buffer->cur;
272   pfile->saved_rlimit = buffer->rlimit;
273   pfile->saved_line_base = buffer->next_line;
274   buffer->need_line = false;
275 
276   buffer->cur = start;
277   buffer->line_base = start;
278   buffer->rlimit = start + len;
279 }
280 
281 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
282 void
_cpp_remove_overlay(cpp_reader * pfile)283 _cpp_remove_overlay (cpp_reader *pfile)
284 {
285   cpp_buffer *buffer = pfile->overlaid_buffer;
286 
287   buffer->cur = pfile->saved_cur;
288   buffer->rlimit = pfile->saved_rlimit;
289   buffer->line_base = pfile->saved_line_base;
290   buffer->need_line = true;
291 
292   pfile->overlaid_buffer = NULL;
293 }
294 
295 /* Reads a logical line into the output buffer.  Returns TRUE if there
296    is more text left in the buffer.  */
297 bool
_cpp_read_logical_line_trad(cpp_reader * pfile)298 _cpp_read_logical_line_trad (cpp_reader *pfile)
299 {
300   do
301     {
302       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
303 	return false;
304     }
305   while (!_cpp_scan_out_logical_line (pfile, NULL) || pfile->state.skipping);
306 
307   return pfile->buffer != NULL;
308 }
309 
310 /* Set up state for finding the opening '(' of a function-like
311    macro.  */
312 static void
maybe_start_funlike(cpp_reader * pfile,cpp_hashnode * node,const uchar * start,struct fun_macro * macro)313 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start, struct fun_macro *macro)
314 {
315   unsigned int n = node->value.macro->paramc + 1;
316 
317   if (macro->buff)
318     _cpp_release_buff (pfile, macro->buff);
319   macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
320   macro->args = (size_t *) BUFF_FRONT (macro->buff);
321   macro->node = node;
322   macro->offset = start - pfile->out.base;
323   macro->argc = 0;
324 }
325 
326 /* Save the OFFSET of the start of the next argument to MACRO.  */
327 static void
save_argument(struct fun_macro * macro,size_t offset)328 save_argument (struct fun_macro *macro, size_t offset)
329 {
330   macro->argc++;
331   if (macro->argc <= macro->node->value.macro->paramc)
332     macro->args[macro->argc] = offset;
333 }
334 
335 /* Copies the next logical line in the current buffer (starting at
336    buffer->cur) to the output buffer.  The output is guaranteed to
337    terminate with a NUL character.  buffer->cur is updated.
338 
339    If MACRO is non-NULL, then we are scanning the replacement list of
340    MACRO, and we call save_replacement_text() every time we meet an
341    argument.  */
342 bool
_cpp_scan_out_logical_line(cpp_reader * pfile,cpp_macro * macro)343 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro)
344 {
345   bool result = true;
346   cpp_context *context;
347   const uchar *cur;
348   uchar *out;
349   struct fun_macro fmacro;
350   unsigned int c, paren_depth = 0, quote;
351   enum ls lex_state = ls_none;
352   bool header_ok;
353   const uchar *start_of_input_line;
354 
355   fmacro.buff = NULL;
356   fmacro.args = NULL;
357   fmacro.node = NULL;
358   fmacro.offset = 0;
359   fmacro.line = 0;
360   fmacro.argc = 0;
361 
362   quote = 0;
363   header_ok = pfile->state.angled_headers;
364   CUR (pfile->context) = pfile->buffer->cur;
365   RLIMIT (pfile->context) = pfile->buffer->rlimit;
366   pfile->out.cur = pfile->out.base;
367   pfile->out.first_line = pfile->line_table->highest_line;
368   /* start_of_input_line is needed to make sure that directives really,
369      really start at the first character of the line.  */
370   start_of_input_line = pfile->buffer->cur;
371  new_context:
372   context = pfile->context;
373   cur = CUR (context);
374   check_output_buffer (pfile, RLIMIT (context) - cur);
375   out = pfile->out.cur;
376 
377   for (;;)
378     {
379       if (!context->prev
380 	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
381 	{
382 	  pfile->buffer->cur = cur;
383 	  _cpp_process_line_notes (pfile, false);
384 	}
385       c = *cur++;
386       *out++ = c;
387 
388       /* Whitespace should "continue" out of the switch,
389 	 non-whitespace should "break" out of it.  */
390       switch (c)
391 	{
392 	case ' ':
393 	case '\t':
394 	case '\f':
395 	case '\v':
396 	case '\0':
397 	  continue;
398 
399 	case '\n':
400 	  /* If this is a macro's expansion, pop it.  */
401 	  if (context->prev)
402 	    {
403 	      pfile->out.cur = out - 1;
404 	      _cpp_pop_context (pfile);
405 	      goto new_context;
406 	    }
407 
408 	  /* Omit the newline from the output buffer.  */
409 	  pfile->out.cur = out - 1;
410 	  pfile->buffer->cur = cur;
411 	  pfile->buffer->need_line = true;
412 	  CPP_INCREMENT_LINE (pfile, 0);
413 
414 	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
415 	      && !pfile->state.in_directive
416 	      && _cpp_get_fresh_line (pfile))
417 	    {
418 	      /* Newlines in arguments become a space, but we don't
419 		 clear any in-progress quote.  */
420 	      if (lex_state == ls_fun_close)
421 		out[-1] = ' ';
422 	      cur = pfile->buffer->cur;
423 	      continue;
424 	    }
425 	  goto done;
426 
427 	case '<':
428 	  if (header_ok)
429 	    quote = '>';
430 	  break;
431 	case '>':
432 	  if (c == quote)
433 	    quote = 0;
434 	  break;
435 
436 	case '"':
437 	case '\'':
438 	  if (c == quote)
439 	    quote = 0;
440 	  else if (!quote)
441 	    quote = c;
442 	  break;
443 
444 	case '\\':
445 	  /* Skip escaped quotes here, it's easier than above.  */
446 	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
447 	    *out++ = *cur++;
448 	  break;
449 
450 	case '/':
451 	  /* Traditional CPP does not recognize comments within
452 	     literals.  */
453 	  if (!quote && *cur == '*')
454 	    {
455 	      pfile->out.cur = out;
456 	      cur = copy_comment (pfile, cur, macro != 0);
457 	      out = pfile->out.cur;
458 	      continue;
459 	    }
460 	  break;
461 
462 	case '_':
463 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
464 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
465 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
466 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
467 	case 'y': case 'z':
468 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
469 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
470 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
471 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
472 	case 'Y': case 'Z':
473 	  if (!pfile->state.skipping && (quote == 0 || macro))
474 	    {
475 	      cpp_hashnode *node;
476 	      uchar *out_start = out - 1;
477 
478 	      pfile->out.cur = out_start;
479 	      node = lex_identifier (pfile, cur - 1);
480 	      out = pfile->out.cur;
481 	      cur = CUR (context);
482 
483 	      if (node->type == NT_MACRO
484 		  /* Should we expand for ls_answer?  */
485 		  && (lex_state == ls_none || lex_state == ls_fun_open)
486 		  && !pfile->state.prevent_expansion)
487 		{
488 		  /* Macros invalidate MI optimization.  */
489 		  pfile->mi_valid = false;
490 		  if (! (node->flags & NODE_BUILTIN)
491 		      && node->value.macro->fun_like)
492 		    {
493 		      maybe_start_funlike (pfile, node, out_start, &fmacro);
494 		      lex_state = ls_fun_open;
495 		      fmacro.line = pfile->line_table->highest_line;
496 		      continue;
497 		    }
498 		  else if (!recursive_macro (pfile, node))
499 		    {
500 		      /* Remove the object-like macro's name from the
501 			 output, and push its replacement text.  */
502 		      pfile->out.cur = out_start;
503 		      push_replacement_text (pfile, node);
504 		      lex_state = ls_none;
505 		      goto new_context;
506 		    }
507 		}
508 	      else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
509 		{
510 		  /* Found a parameter in the replacement text of a
511 		     #define.  Remove its name from the output.  */
512 		  pfile->out.cur = out_start;
513 		  save_replacement_text (pfile, macro, node->value.arg_index);
514 		  out = pfile->out.base;
515 		}
516 	      else if (lex_state == ls_hash)
517 		{
518 		  lex_state = ls_predicate;
519 		  continue;
520 		}
521 	      else if (pfile->state.in_expression
522 		       && node == pfile->spec_nodes.n_defined)
523 		{
524 		  lex_state = ls_defined;
525 		  continue;
526 		}
527 	    }
528 	  break;
529 
530 	case '(':
531 	  if (quote == 0)
532 	    {
533 	      paren_depth++;
534 	      if (lex_state == ls_fun_open)
535 		{
536 		  if (recursive_macro (pfile, fmacro.node))
537 		    lex_state = ls_none;
538 		  else
539 		    {
540 		      lex_state = ls_fun_close;
541 		      paren_depth = 1;
542 		      out = pfile->out.base + fmacro.offset;
543 		      fmacro.args[0] = fmacro.offset;
544 		    }
545 		}
546 	      else if (lex_state == ls_predicate)
547 		lex_state = ls_answer;
548 	      else if (lex_state == ls_defined)
549 		lex_state = ls_defined_close;
550 	    }
551 	  break;
552 
553 	case ',':
554 	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
555 	    save_argument (&fmacro, out - pfile->out.base);
556 	  break;
557 
558 	case ')':
559 	  if (quote == 0)
560 	    {
561 	      paren_depth--;
562 	      if (lex_state == ls_fun_close && paren_depth == 0)
563 		{
564 		  cpp_macro *m = fmacro.node->value.macro;
565 
566 		  m->used = 1;
567 		  lex_state = ls_none;
568 		  save_argument (&fmacro, out - pfile->out.base);
569 
570 		  /* A single zero-length argument is no argument.  */
571 		  if (fmacro.argc == 1
572 		      && m->paramc == 0
573 		      && out == pfile->out.base + fmacro.offset + 1)
574 		    fmacro.argc = 0;
575 
576 		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
577 		    {
578 		      /* Remove the macro's invocation from the
579 			 output, and push its replacement text.  */
580 		      pfile->out.cur = (pfile->out.base
581 					     + fmacro.offset);
582 		      CUR (context) = cur;
583 		      replace_args_and_push (pfile, &fmacro);
584 		      goto new_context;
585 		    }
586 		}
587 	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
588 		lex_state = ls_none;
589 	    }
590 	  break;
591 
592 	case '#':
593 	  if (cur - 1 == start_of_input_line
594 	      /* A '#' from a macro doesn't start a directive.  */
595 	      && !pfile->context->prev
596 	      && !pfile->state.in_directive)
597 	    {
598 	      /* A directive.  With the way _cpp_handle_directive
599 		 currently works, we only want to call it if either we
600 		 know the directive is OK, or we want it to fail and
601 		 be removed from the output.  If we want it to be
602 		 passed through (the assembler case) then we must not
603 		 call _cpp_handle_directive.  */
604 	      pfile->out.cur = out;
605 	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
606 	      out = pfile->out.cur;
607 
608 	      if (*cur == '\n')
609 		{
610 		  /* Null directive.  Ignore it and don't invalidate
611 		     the MI optimization.  */
612 		  pfile->buffer->need_line = true;
613 		  CPP_INCREMENT_LINE (pfile, 0);
614 		  result = false;
615 		  goto done;
616 		}
617 	      else
618 		{
619 		  bool do_it = false;
620 
621 		  if (is_numstart (*cur)
622 		      && CPP_OPTION (pfile, lang) != CLK_ASM)
623 		    do_it = true;
624 		  else if (is_idstart (*cur))
625 		    /* Check whether we know this directive, but don't
626 		       advance.  */
627 		    do_it = lex_identifier (pfile, cur)->is_directive;
628 
629 		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
630 		    {
631 		      /* This is a kludge.  We want to have the ISO
632 			 preprocessor lex the next token.  */
633 		      pfile->buffer->cur = cur;
634 		      _cpp_handle_directive (pfile, false /* indented */);
635 		      result = false;
636 		      goto done;
637 		    }
638 		}
639 	    }
640 
641 	  if (pfile->state.in_expression)
642 	    {
643 	      lex_state = ls_hash;
644 	      continue;
645 	    }
646 	  break;
647 
648 	default:
649 	  break;
650 	}
651 
652       /* Non-whitespace disables MI optimization and stops treating
653 	 '<' as a quote in #include.  */
654       header_ok = false;
655       if (!pfile->state.in_directive)
656 	pfile->mi_valid = false;
657 
658       if (lex_state == ls_none)
659 	continue;
660 
661       /* Some of these transitions of state are syntax errors.  The
662 	 ISO preprocessor will issue errors later.  */
663       if (lex_state == ls_fun_open)
664 	/* Missing '('.  */
665 	lex_state = ls_none;
666       else if (lex_state == ls_hash
667 	       || lex_state == ls_predicate
668 	       || lex_state == ls_defined)
669 	lex_state = ls_none;
670 
671       /* ls_answer and ls_defined_close keep going until ')'.  */
672     }
673 
674  done:
675   if (fmacro.buff)
676     _cpp_release_buff (pfile, fmacro.buff);
677 
678   if (lex_state == ls_fun_close)
679     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
680 			 "unterminated argument list invoking macro \"%s\"",
681 			 NODE_NAME (fmacro.node));
682   return result;
683 }
684 
685 /* Push a context holding the replacement text of the macro NODE on
686    the context stack.  NODE is either object-like, or a function-like
687    macro with no arguments.  */
688 static void
push_replacement_text(cpp_reader * pfile,cpp_hashnode * node)689 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
690 {
691   size_t len;
692   const uchar *text;
693   uchar *buf;
694 
695   if (node->flags & NODE_BUILTIN)
696     {
697       text = _cpp_builtin_macro_text (pfile, node);
698       len = ustrlen (text);
699       buf = _cpp_unaligned_alloc (pfile, len + 1);
700       memcpy (buf, text, len);
701       buf[len]='\n';
702       text = buf;
703     }
704   else
705     {
706       cpp_macro *macro = node->value.macro;
707       macro->used = 1;
708       text = macro->exp.text;
709       macro->traditional = 1;
710       len = macro->count;
711     }
712 
713   _cpp_push_text_context (pfile, node, text, len);
714 }
715 
716 /* Returns TRUE if traditional macro recursion is detected.  */
717 static bool
recursive_macro(cpp_reader * pfile,cpp_hashnode * node)718 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
719 {
720   bool recursing = !!(node->flags & NODE_DISABLED);
721 
722   /* Object-like macros that are already expanding are necessarily
723      recursive.
724 
725      However, it is possible to have traditional function-like macros
726      that are not infinitely recursive but recurse to any given depth.
727      Further, it is easy to construct examples that get ever longer
728      until the point they stop recursing.  So there is no easy way to
729      detect true recursion; instead we assume any expansion more than
730      20 deep since the first invocation of this macro must be
731      recursing.  */
732   if (recursing && node->value.macro->fun_like)
733     {
734       size_t depth = 0;
735       cpp_context *context = pfile->context;
736 
737       do
738 	{
739 	  depth++;
740 	  if (context->c.macro == node && depth > 20)
741 	    break;
742 	  context = context->prev;
743 	}
744       while (context);
745       recursing = context != NULL;
746     }
747 
748   if (recursing)
749     cpp_error (pfile, CPP_DL_ERROR,
750 	       "detected recursion whilst expanding macro \"%s\"",
751 	       NODE_NAME (node));
752 
753   return recursing;
754 }
755 
756 /* Return the length of the replacement text of a function-like or
757    object-like non-builtin macro.  */
758 size_t
_cpp_replacement_text_len(const cpp_macro * macro)759 _cpp_replacement_text_len (const cpp_macro *macro)
760 {
761   size_t len;
762 
763   if (macro->fun_like && (macro->paramc != 0))
764     {
765       const uchar *exp;
766 
767       len = 0;
768       for (exp = macro->exp.text;;)
769 	{
770 	  struct block *b = (struct block *) exp;
771 
772 	  len += b->text_len;
773 	  if (b->arg_index == 0)
774 	    break;
775 	  len += NODE_LEN (macro->params[b->arg_index - 1]);
776 	  exp += BLOCK_LEN (b->text_len);
777 	}
778     }
779   else
780     len = macro->count;
781 
782   return len;
783 }
784 
785 /* Copy the replacement text of MACRO to DEST, which must be of
786    sufficient size.  It is not NUL-terminated.  The next character is
787    returned.  */
788 uchar *
_cpp_copy_replacement_text(const cpp_macro * macro,uchar * dest)789 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
790 {
791   if (macro->fun_like && (macro->paramc != 0))
792     {
793       const uchar *exp;
794 
795       for (exp = macro->exp.text;;)
796 	{
797 	  struct block *b = (struct block *) exp;
798 	  cpp_hashnode *param;
799 
800 	  memcpy (dest, b->text, b->text_len);
801 	  dest += b->text_len;
802 	  if (b->arg_index == 0)
803 	    break;
804 	  param = macro->params[b->arg_index - 1];
805 	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
806 	  dest += NODE_LEN (param);
807 	  exp += BLOCK_LEN (b->text_len);
808 	}
809     }
810   else
811     {
812       memcpy (dest, macro->exp.text, macro->count);
813       dest += macro->count;
814     }
815 
816   return dest;
817 }
818 
819 /* Push a context holding the replacement text of the macro NODE on
820    the context stack.  NODE is either object-like, or a function-like
821    macro with no arguments.  */
822 static void
replace_args_and_push(cpp_reader * pfile,struct fun_macro * fmacro)823 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
824 {
825   cpp_macro *macro = fmacro->node->value.macro;
826 
827   if (macro->paramc == 0)
828     push_replacement_text (pfile, fmacro->node);
829   else
830     {
831       const uchar *exp;
832       uchar *p;
833       _cpp_buff *buff;
834       size_t len = 0;
835       int cxtquote = 0;
836 
837       /* Get an estimate of the length of the argument-replaced text.
838 	 This is a worst case estimate, assuming that every replacement
839 	 text character needs quoting.  */
840       for (exp = macro->exp.text;;)
841 	{
842 	  struct block *b = (struct block *) exp;
843 
844 	  len += b->text_len;
845 	  if (b->arg_index == 0)
846 	    break;
847 	  len += 2 * (fmacro->args[b->arg_index]
848 		      - fmacro->args[b->arg_index - 1] - 1);
849 	  exp += BLOCK_LEN (b->text_len);
850 	}
851 
852       /* Allocate room for the expansion plus \n.  */
853       buff = _cpp_get_buff (pfile, len + 1);
854 
855       /* Copy the expansion and replace arguments.  */
856       /* Accumulate actual length, including quoting as necessary */
857       p = BUFF_FRONT (buff);
858       len = 0;
859       for (exp = macro->exp.text;;)
860 	{
861 	  struct block *b = (struct block *) exp;
862 	  size_t arglen;
863 	  int argquote;
864 	  uchar *base;
865 	  uchar *in;
866 
867 	  len += b->text_len;
868 	  /* Copy the non-argument text literally, keeping
869 	     track of whether matching quotes have been seen. */
870 	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
871 	    {
872 	      if (*in == '"')
873 		cxtquote = ! cxtquote;
874 	      *p++ = *in++;
875 	    }
876 	  /* Done if no more arguments */
877 	  if (b->arg_index == 0)
878 	    break;
879 	  arglen = (fmacro->args[b->arg_index]
880 		    - fmacro->args[b->arg_index - 1] - 1);
881 	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
882 	  in = base;
883 #if 0
884 	  /* Skip leading whitespace in the text for the argument to
885 	     be substituted. To be compatible with gcc 2.95, we would
886 	     also need to trim trailing whitespace. Gcc 2.95 trims
887 	     leading and trailing whitespace, which may be a bug.  The
888 	     current gcc testsuite explicitly checks that this leading
889 	     and trailing whitespace in actual arguments is
890 	     preserved. */
891 	  while (arglen > 0 && is_space (*in))
892 	    {
893 	      in++;
894 	      arglen--;
895 	    }
896 #endif
897 	  for (argquote = 0; arglen > 0; arglen--)
898 	    {
899 	      if (cxtquote && *in == '"')
900 		{
901 		  if (in > base && *(in-1) != '\\')
902 		    argquote = ! argquote;
903 		  /* Always add backslash before double quote if argument
904 		     is expanded in a quoted context */
905 		  *p++ = '\\';
906 		  len++;
907 		}
908 	      else if (cxtquote && argquote && *in == '\\')
909 		{
910 		  /* Always add backslash before a backslash in an argument
911 		     that is expanded in a quoted context and also in the
912 		     range of a quoted context in the argument itself. */
913 		  *p++ = '\\';
914 		  len++;
915 		}
916 	      *p++ = *in++;
917 	      len++;
918 	    }
919 	  exp += BLOCK_LEN (b->text_len);
920 	}
921 
922       /* \n-terminate.  */
923       *p = '\n';
924       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
925 
926       /* So we free buffer allocation when macro is left.  */
927       pfile->context->buff = buff;
928     }
929 }
930 
931 /* Read and record the parameters, if any, of a function-like macro
932    definition.  Destroys pfile->out.cur.
933 
934    Returns true on success, false on failure (syntax error or a
935    duplicate parameter).  On success, CUR (pfile->context) is just
936    past the closing parenthesis.  */
937 static bool
scan_parameters(cpp_reader * pfile,cpp_macro * macro)938 scan_parameters (cpp_reader *pfile, cpp_macro *macro)
939 {
940   const uchar *cur = CUR (pfile->context) + 1;
941   bool ok;
942 
943   for (;;)
944     {
945       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
946 
947       if (is_idstart (*cur))
948 	{
949 	  ok = false;
950 	  if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
951 	    break;
952 	  cur = skip_whitespace (pfile, CUR (pfile->context),
953 				 true /* skip_comments */);
954 	  if (*cur == ',')
955 	    {
956 	      cur++;
957 	      continue;
958 	    }
959 	  ok = (*cur == ')');
960 	  break;
961 	}
962 
963       ok = (*cur == ')' && macro->paramc == 0);
964       break;
965     }
966 
967   if (!ok)
968     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
969 
970   CUR (pfile->context) = cur + (*cur == ')');
971 
972   return ok;
973 }
974 
975 /* Save the text from pfile->out.base to pfile->out.cur as
976    the replacement text for the current macro, followed by argument
977    ARG_INDEX, with zero indicating the end of the replacement
978    text.  */
979 static void
save_replacement_text(cpp_reader * pfile,cpp_macro * macro,unsigned int arg_index)980 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
981 		       unsigned int arg_index)
982 {
983   size_t len = pfile->out.cur - pfile->out.base;
984   uchar *exp;
985 
986   if (macro->paramc == 0)
987     {
988       /* Object-like and function-like macros without parameters
989 	 simply store their \n-terminated replacement text.  */
990       exp = _cpp_unaligned_alloc (pfile, len + 1);
991       memcpy (exp, pfile->out.base, len);
992       exp[len] = '\n';
993       macro->exp.text = exp;
994       macro->traditional = 1;
995       macro->count = len;
996     }
997   else
998     {
999       /* Store the text's length (unsigned int), the argument index
1000 	 (unsigned short, base 1) and then the text.  */
1001       size_t blen = BLOCK_LEN (len);
1002       struct block *block;
1003 
1004       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1005 	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1006 
1007       exp = BUFF_FRONT (pfile->a_buff);
1008       block = (struct block *) (exp + macro->count);
1009       macro->exp.text = exp;
1010       macro->traditional = 1;
1011 
1012       /* Write out the block information.  */
1013       block->text_len = len;
1014       block->arg_index = arg_index;
1015       memcpy (block->text, pfile->out.base, len);
1016 
1017       /* Lex the rest into the start of the output buffer.  */
1018       pfile->out.cur = pfile->out.base;
1019 
1020       macro->count += blen;
1021 
1022       /* If we've finished, commit the memory.  */
1023       if (arg_index == 0)
1024 	BUFF_FRONT (pfile->a_buff) += macro->count;
1025     }
1026 }
1027 
1028 /* Analyze and save the replacement text of a macro.  Returns true on
1029    success.  */
1030 bool
_cpp_create_trad_definition(cpp_reader * pfile,cpp_macro * macro)1031 _cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro)
1032 {
1033   const uchar *cur;
1034   uchar *limit;
1035   cpp_context *context = pfile->context;
1036 
1037   /* The context has not been set up for command line defines, and CUR
1038      has not been updated for the macro name for in-file defines.  */
1039   pfile->out.cur = pfile->out.base;
1040   CUR (context) = pfile->buffer->cur;
1041   RLIMIT (context) = pfile->buffer->rlimit;
1042   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1043 
1044   /* Is this a function-like macro?  */
1045   if (* CUR (context) == '(')
1046     {
1047       bool ok = scan_parameters (pfile, macro);
1048 
1049       /* Remember the params so we can clear NODE_MACRO_ARG flags.  */
1050       macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1051 
1052       /* Setting macro to NULL indicates an error occurred, and
1053 	 prevents unnecessary work in _cpp_scan_out_logical_line.  */
1054       if (!ok)
1055 	macro = NULL;
1056       else
1057 	{
1058 	  BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1059 	  macro->fun_like = 1;
1060 	}
1061     }
1062 
1063   /* Skip leading whitespace in the replacement text.  */
1064   pfile->buffer->cur
1065     = skip_whitespace (pfile, CUR (context),
1066 		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1067 
1068   pfile->state.prevent_expansion++;
1069   _cpp_scan_out_logical_line (pfile, macro);
1070   pfile->state.prevent_expansion--;
1071 
1072   if (!macro)
1073     return false;
1074 
1075   /* Skip trailing white space.  */
1076   cur = pfile->out.base;
1077   limit = pfile->out.cur;
1078   while (limit > cur && is_space (limit[-1]))
1079     limit--;
1080   pfile->out.cur = limit;
1081   save_replacement_text (pfile, macro, 0);
1082 
1083   return true;
1084 }
1085 
1086 /* Copy SRC of length LEN to DEST, but convert all contiguous
1087    whitespace to a single space, provided it is not in quotes.  The
1088    quote currently in effect is pointed to by PQUOTE, and is updated
1089    by the function.  Returns the number of bytes copied.  */
1090 static size_t
canonicalize_text(uchar * dest,const uchar * src,size_t len,uchar * pquote)1091 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1092 {
1093   uchar *orig_dest = dest;
1094   uchar quote = *pquote;
1095 
1096   while (len)
1097     {
1098       if (is_space (*src) && !quote)
1099 	{
1100 	  do
1101 	    src++, len--;
1102 	  while (len && is_space (*src));
1103 	  *dest++ = ' ';
1104 	}
1105       else
1106 	{
1107 	  if (*src == '\'' || *src == '"')
1108 	    {
1109 	      if (!quote)
1110 		quote = *src;
1111 	      else if (quote == *src)
1112 		quote = 0;
1113 	    }
1114 	  *dest++ = *src++, len--;
1115 	}
1116     }
1117 
1118   *pquote = quote;
1119   return dest - orig_dest;
1120 }
1121 
1122 /* Returns true if MACRO1 and MACRO2 have expansions different other
1123    than in the form of their whitespace.  */
1124 bool
_cpp_expansions_different_trad(const cpp_macro * macro1,const cpp_macro * macro2)1125 _cpp_expansions_different_trad (const cpp_macro *macro1,
1126 				const cpp_macro *macro2)
1127 {
1128   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1129   uchar *p2 = p1 + macro1->count;
1130   uchar quote1 = 0, quote2 = 0;
1131   bool mismatch;
1132   size_t len1, len2;
1133 
1134   if (macro1->paramc > 0)
1135     {
1136       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1137 
1138       mismatch = true;
1139       for (;;)
1140 	{
1141 	  struct block *b1 = (struct block *) exp1;
1142 	  struct block *b2 = (struct block *) exp2;
1143 
1144 	  if (b1->arg_index != b2->arg_index)
1145 	    break;
1146 
1147 	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1148 	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1149 	  if (len1 != len2 || memcmp (p1, p2, len1))
1150 	    break;
1151 	  if (b1->arg_index == 0)
1152 	    {
1153 	      mismatch = false;
1154 	      break;
1155 	    }
1156 	  exp1 += BLOCK_LEN (b1->text_len);
1157 	  exp2 += BLOCK_LEN (b2->text_len);
1158 	}
1159     }
1160   else
1161     {
1162       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1163       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1164       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1165     }
1166 
1167   free (p1);
1168   return mismatch;
1169 }
1170