1 /* Data and functions related to line maps and input files.
2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "diagnostic-core.h"
26 #include "selftest.h"
27 #include "cpplib.h"
28 
29 #ifndef HAVE_ICONV
30 #define HAVE_ICONV 0
31 #endif
32 
33 /* This is a cache used by get_next_line to store the content of a
34    file to be searched for file lines.  */
35 struct fcache
36 {
37   /* These are information used to store a line boundary.  */
38   struct line_info
39   {
40     /* The line number.  It starts from 1.  */
41     size_t line_num;
42 
43     /* The position (byte count) of the beginning of the line,
44        relative to the file data pointer.  This starts at zero.  */
45     size_t start_pos;
46 
47     /* The position (byte count) of the last byte of the line.  This
48        normally points to the '\n' character, or to one byte after the
49        last byte of the file, if the file doesn't contain a '\n'
50        character.  */
51     size_t end_pos;
52 
line_infofcache::line_info53     line_info (size_t l, size_t s, size_t e)
54       : line_num (l), start_pos (s), end_pos (e)
55     {}
56 
line_infofcache::line_info57     line_info ()
58       :line_num (0), start_pos (0), end_pos (0)
59     {}
60   };
61 
62   /* The number of time this file has been accessed.  This is used
63      to designate which file cache to evict from the cache
64      array.  */
65   unsigned use_count;
66 
67   /* The file_path is the key for identifying a particular file in
68      the cache.
69      For libcpp-using code, the underlying buffer for this field is
70      owned by the corresponding _cpp_file within the cpp_reader.  */
71   const char *file_path;
72 
73   FILE *fp;
74 
75   /* This points to the content of the file that we've read so
76      far.  */
77   char *data;
78 
79   /*  The size of the DATA array above.*/
80   size_t size;
81 
82   /* The number of bytes read from the underlying file so far.  This
83      must be less (or equal) than SIZE above.  */
84   size_t nb_read;
85 
86   /* The index of the beginning of the current line.  */
87   size_t line_start_idx;
88 
89   /* The number of the previous line read.  This starts at 1.  Zero
90      means we've read no line so far.  */
91   size_t line_num;
92 
93   /* This is the total number of lines of the current file.  At the
94      moment, we try to get this information from the line map
95      subsystem.  Note that this is just a hint.  When using the C++
96      front-end, this hint is correct because the input file is then
97      completely tokenized before parsing starts; so the line map knows
98      the number of lines before compilation really starts.  For e.g,
99      the C front-end, it can happen that we start emitting diagnostics
100      before the line map has seen the end of the file.  */
101   size_t total_lines;
102 
103   /* Could this file be missing a trailing newline on its final line?
104      Initially true (to cope with empty files), set to true/false
105      as each line is read.  */
106   bool missing_trailing_newline;
107 
108   /* This is a record of the beginning and end of the lines we've seen
109      while reading the file.  This is useful to avoid walking the data
110      from the beginning when we are asked to read a line that is
111      before LINE_START_IDX above.  Note that the maximum size of this
112      record is fcache_line_record_size, so that the memory consumption
113      doesn't explode.  We thus scale total_lines down to
114      fcache_line_record_size.  */
115   vec<line_info, va_heap> line_record;
116 
117   fcache ();
118   ~fcache ();
119 };
120 
121 /* Current position in real source file.  */
122 
123 location_t input_location = UNKNOWN_LOCATION;
124 
125 struct line_maps *line_table;
126 
127 /* A stashed copy of "line_table" for use by selftest::line_table_test.
128    This needs to be a global so that it can be a GC root, and thus
129    prevent the stashed copy from being garbage-collected if the GC runs
130    during a line_table_test.  */
131 
132 struct line_maps *saved_line_table;
133 
134 static fcache *fcache_tab;
135 static const size_t fcache_tab_size = 16;
136 static const size_t fcache_buffer_size = 4 * 1024;
137 static const size_t fcache_line_record_size = 100;
138 
139 /* Expand the source location LOC into a human readable location.  If
140    LOC resolves to a builtin location, the file name of the readable
141    location is set to the string "<built-in>". If EXPANSION_POINT_P is
142    TRUE and LOC is virtual, then it is resolved to the expansion
143    point of the involved macro.  Otherwise, it is resolved to the
144    spelling location of the token.
145 
146    When resolving to the spelling location of the token, if the
147    resulting location is for a built-in location (that is, it has no
148    associated line/column) in the context of a macro expansion, the
149    returned location is the first one (while unwinding the macro
150    location towards its expansion point) that is in real source
151    code.
152 
153    ASPECT controls which part of the location to use.  */
154 
155 static expanded_location
expand_location_1(location_t loc,bool expansion_point_p,enum location_aspect aspect)156 expand_location_1 (location_t loc,
157 		   bool expansion_point_p,
158 		   enum location_aspect aspect)
159 {
160   expanded_location xloc;
161   const line_map_ordinary *map;
162   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
163   tree block = NULL;
164 
165   if (IS_ADHOC_LOC (loc))
166     {
167       block = LOCATION_BLOCK (loc);
168       loc = LOCATION_LOCUS (loc);
169     }
170 
171   memset (&xloc, 0, sizeof (xloc));
172 
173   if (loc >= RESERVED_LOCATION_COUNT)
174     {
175       if (!expansion_point_p)
176 	{
177 	  /* We want to resolve LOC to its spelling location.
178 
179 	     But if that spelling location is a reserved location that
180 	     appears in the context of a macro expansion (like for a
181 	     location for a built-in token), let's consider the first
182 	     location (toward the expansion point) that is not reserved;
183 	     that is, the first location that is in real source code.  */
184 	  loc = linemap_unwind_to_first_non_reserved_loc (line_table,
185 							  loc, NULL);
186 	  lrk = LRK_SPELLING_LOCATION;
187 	}
188       loc = linemap_resolve_location (line_table, loc, lrk, &map);
189 
190       /* loc is now either in an ordinary map, or is a reserved location.
191 	 If it is a compound location, the caret is in a spelling location,
192 	 but the start/finish might still be a virtual location.
193 	 Depending of what the caller asked for, we may need to recurse
194 	 one level in order to resolve any virtual locations in the
195 	 end-points.  */
196       switch (aspect)
197 	{
198 	default:
199 	  gcc_unreachable ();
200 	  /* Fall through.  */
201 	case LOCATION_ASPECT_CARET:
202 	  break;
203 	case LOCATION_ASPECT_START:
204 	  {
205 	    location_t start = get_start (loc);
206 	    if (start != loc)
207 	      return expand_location_1 (start, expansion_point_p, aspect);
208 	  }
209 	  break;
210 	case LOCATION_ASPECT_FINISH:
211 	  {
212 	    location_t finish = get_finish (loc);
213 	    if (finish != loc)
214 	      return expand_location_1 (finish, expansion_point_p, aspect);
215 	  }
216 	  break;
217 	}
218       xloc = linemap_expand_location (line_table, map, loc);
219     }
220 
221   xloc.data = block;
222   if (loc <= BUILTINS_LOCATION)
223     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
224 
225   return xloc;
226 }
227 
228 /* Initialize the set of cache used for files accessed by caret
229    diagnostic.  */
230 
231 static void
diagnostic_file_cache_init(void)232 diagnostic_file_cache_init (void)
233 {
234   if (fcache_tab == NULL)
235     fcache_tab = new fcache[fcache_tab_size];
236 }
237 
238 /* Free the resources used by the set of cache used for files accessed
239    by caret diagnostic.  */
240 
241 void
diagnostic_file_cache_fini(void)242 diagnostic_file_cache_fini (void)
243 {
244   if (fcache_tab)
245     {
246       delete [] (fcache_tab);
247       fcache_tab = NULL;
248     }
249 }
250 
251 /* Return the total lines number that have been read so far by the
252    line map (in the preprocessor) so far.  For languages like C++ that
253    entirely preprocess the input file before starting to parse, this
254    equals the actual number of lines of the file.  */
255 
256 static size_t
total_lines_num(const char * file_path)257 total_lines_num (const char *file_path)
258 {
259   size_t r = 0;
260   location_t l = 0;
261   if (linemap_get_file_highest_location (line_table, file_path, &l))
262     {
263       gcc_assert (l >= RESERVED_LOCATION_COUNT);
264       expanded_location xloc = expand_location (l);
265       r = xloc.line;
266     }
267   return r;
268 }
269 
270 /* Lookup the cache used for the content of a given file accessed by
271    caret diagnostic.  Return the found cached file, or NULL if no
272    cached file was found.  */
273 
274 static fcache*
lookup_file_in_cache_tab(const char * file_path)275 lookup_file_in_cache_tab (const char *file_path)
276 {
277   if (file_path == NULL)
278     return NULL;
279 
280   diagnostic_file_cache_init ();
281 
282   /* This will contain the found cached file.  */
283   fcache *r = NULL;
284   for (unsigned i = 0; i < fcache_tab_size; ++i)
285     {
286       fcache *c = &fcache_tab[i];
287       if (c->file_path && !strcmp (c->file_path, file_path))
288 	{
289 	  ++c->use_count;
290 	  r = c;
291 	}
292     }
293 
294   if (r)
295     ++r->use_count;
296 
297   return r;
298 }
299 
300 /* Purge any mention of FILENAME from the cache of files used for
301    printing source code.  For use in selftests when working
302    with tempfiles.  */
303 
304 void
diagnostics_file_cache_forcibly_evict_file(const char * file_path)305 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
306 {
307   gcc_assert (file_path);
308 
309   fcache *r = lookup_file_in_cache_tab (file_path);
310   if (!r)
311     /* Not found.  */
312     return;
313 
314   r->file_path = NULL;
315   if (r->fp)
316     fclose (r->fp);
317   r->fp = NULL;
318   r->nb_read = 0;
319   r->line_start_idx = 0;
320   r->line_num = 0;
321   r->line_record.truncate (0);
322   r->use_count = 0;
323   r->total_lines = 0;
324   r->missing_trailing_newline = true;
325 }
326 
327 /* Return the file cache that has been less used, recently, or the
328    first empty one.  If HIGHEST_USE_COUNT is non-null,
329    *HIGHEST_USE_COUNT is set to the highest use count of the entries
330    in the cache table.  */
331 
332 static fcache*
evicted_cache_tab_entry(unsigned * highest_use_count)333 evicted_cache_tab_entry (unsigned *highest_use_count)
334 {
335   diagnostic_file_cache_init ();
336 
337   fcache *to_evict = &fcache_tab[0];
338   unsigned huc = to_evict->use_count;
339   for (unsigned i = 1; i < fcache_tab_size; ++i)
340     {
341       fcache *c = &fcache_tab[i];
342       bool c_is_empty = (c->file_path == NULL);
343 
344       if (c->use_count < to_evict->use_count
345 	  || (to_evict->file_path && c_is_empty))
346 	/* We evict C because it's either an entry with a lower use
347 	   count or one that is empty.  */
348 	to_evict = c;
349 
350       if (huc < c->use_count)
351 	huc = c->use_count;
352 
353       if (c_is_empty)
354 	/* We've reached the end of the cache; subsequent elements are
355 	   all empty.  */
356 	break;
357     }
358 
359   if (highest_use_count)
360     *highest_use_count = huc;
361 
362   return to_evict;
363 }
364 
365 /* Create the cache used for the content of a given file to be
366    accessed by caret diagnostic.  This cache is added to an array of
367    cache and can be retrieved by lookup_file_in_cache_tab.  This
368    function returns the created cache.  Note that only the last
369    fcache_tab_size files are cached.  */
370 
371 static fcache*
add_file_to_cache_tab(const char * file_path)372 add_file_to_cache_tab (const char *file_path)
373 {
374 
375   FILE *fp = fopen (file_path, "r");
376   if (fp == NULL)
377     return NULL;
378 
379   unsigned highest_use_count = 0;
380   fcache *r = evicted_cache_tab_entry (&highest_use_count);
381   r->file_path = file_path;
382   if (r->fp)
383     fclose (r->fp);
384   r->fp = fp;
385   r->nb_read = 0;
386   r->line_start_idx = 0;
387   r->line_num = 0;
388   r->line_record.truncate (0);
389   /* Ensure that this cache entry doesn't get evicted next time
390      add_file_to_cache_tab is called.  */
391   r->use_count = ++highest_use_count;
392   r->total_lines = total_lines_num (file_path);
393   r->missing_trailing_newline = true;
394 
395   return r;
396 }
397 
398 /* Lookup the cache used for the content of a given file accessed by
399    caret diagnostic.  If no cached file was found, create a new cache
400    for this file, add it to the array of cached file and return
401    it.  */
402 
403 static fcache*
lookup_or_add_file_to_cache_tab(const char * file_path)404 lookup_or_add_file_to_cache_tab (const char *file_path)
405 {
406   fcache *r = lookup_file_in_cache_tab (file_path);
407   if (r == NULL)
408     r = add_file_to_cache_tab (file_path);
409   return r;
410 }
411 
412 /* Default constructor for a cache of file used by caret
413    diagnostic.  */
414 
fcache()415 fcache::fcache ()
416 : use_count (0), file_path (NULL), fp (NULL), data (0),
417   size (0), nb_read (0), line_start_idx (0), line_num (0),
418   total_lines (0), missing_trailing_newline (true)
419 {
420   line_record.create (0);
421 }
422 
423 /* Destructor for a cache of file used by caret diagnostic.  */
424 
~fcache()425 fcache::~fcache ()
426 {
427   if (fp)
428     {
429       fclose (fp);
430       fp = NULL;
431     }
432   if (data)
433     {
434       XDELETEVEC (data);
435       data = 0;
436     }
437   line_record.release ();
438 }
439 
440 /* Returns TRUE iff the cache would need to be filled with data coming
441    from the file.  That is, either the cache is empty or full or the
442    current line is empty.  Note that if the cache is full, it would
443    need to be extended and filled again.  */
444 
445 static bool
needs_read(fcache * c)446 needs_read (fcache *c)
447 {
448   return (c->nb_read == 0
449 	  || c->nb_read == c->size
450 	  || (c->line_start_idx >= c->nb_read - 1));
451 }
452 
453 /*  Return TRUE iff the cache is full and thus needs to be
454     extended.  */
455 
456 static bool
needs_grow(fcache * c)457 needs_grow (fcache *c)
458 {
459   return c->nb_read == c->size;
460 }
461 
462 /* Grow the cache if it needs to be extended.  */
463 
464 static void
maybe_grow(fcache * c)465 maybe_grow (fcache *c)
466 {
467   if (!needs_grow (c))
468     return;
469 
470   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
471   c->data = XRESIZEVEC (char, c->data, size);
472   c->size = size;
473 }
474 
475 /*  Read more data into the cache.  Extends the cache if need be.
476     Returns TRUE iff new data could be read.  */
477 
478 static bool
read_data(fcache * c)479 read_data (fcache *c)
480 {
481   if (feof (c->fp) || ferror (c->fp))
482     return false;
483 
484   maybe_grow (c);
485 
486   char * from = c->data + c->nb_read;
487   size_t to_read = c->size - c->nb_read;
488   size_t nb_read = fread (from, 1, to_read, c->fp);
489 
490   if (ferror (c->fp))
491     return false;
492 
493   c->nb_read += nb_read;
494   return !!nb_read;
495 }
496 
497 /* Read new data iff the cache needs to be filled with more data
498    coming from the file FP.  Return TRUE iff the cache was filled with
499    mode data.  */
500 
501 static bool
maybe_read_data(fcache * c)502 maybe_read_data (fcache *c)
503 {
504   if (!needs_read (c))
505     return false;
506   return read_data (c);
507 }
508 
509 /* Read a new line from file FP, using C as a cache for the data
510    coming from the file.  Upon successful completion, *LINE is set to
511    the beginning of the line found.  *LINE points directly in the
512    line cache and is only valid until the next call of get_next_line.
513    *LINE_LEN is set to the length of the line.  Note that the line
514    does not contain any terminal delimiter.  This function returns
515    true if some data was read or process from the cache, false
516    otherwise.  Note that subsequent calls to get_next_line might
517    make the content of *LINE invalid.  */
518 
519 static bool
get_next_line(fcache * c,char ** line,ssize_t * line_len)520 get_next_line (fcache *c, char **line, ssize_t *line_len)
521 {
522   /* Fill the cache with data to process.  */
523   maybe_read_data (c);
524 
525   size_t remaining_size = c->nb_read - c->line_start_idx;
526   if (remaining_size == 0)
527     /* There is no more data to process.  */
528     return false;
529 
530   char *line_start = c->data + c->line_start_idx;
531 
532   char *next_line_start = NULL;
533   size_t len = 0;
534   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
535   if (line_end == NULL)
536     {
537       /* We haven't found the end-of-line delimiter in the cache.
538 	 Fill the cache with more data from the file and look for the
539 	 '\n'.  */
540       while (maybe_read_data (c))
541 	{
542 	  line_start = c->data + c->line_start_idx;
543 	  remaining_size = c->nb_read - c->line_start_idx;
544 	  line_end = (char *) memchr (line_start, '\n', remaining_size);
545 	  if (line_end != NULL)
546 	    {
547 	      next_line_start = line_end + 1;
548 	      break;
549 	    }
550 	}
551       if (line_end == NULL)
552 	{
553 	  /* We've loadded all the file into the cache and still no
554 	     '\n'.  Let's say the line ends up at one byte passed the
555 	     end of the file.  This is to stay consistent with the case
556 	     of when the line ends up with a '\n' and line_end points to
557 	     that terminal '\n'.  That consistency is useful below in
558 	     the len calculation.  */
559 	  line_end = c->data + c->nb_read ;
560 	  c->missing_trailing_newline = true;
561 	}
562       else
563 	c->missing_trailing_newline = false;
564     }
565   else
566     {
567       next_line_start = line_end + 1;
568       c->missing_trailing_newline = false;
569     }
570 
571   if (ferror (c->fp))
572     return false;
573 
574   /* At this point, we've found the end of the of line.  It either
575      points to the '\n' or to one byte after the last byte of the
576      file.  */
577   gcc_assert (line_end != NULL);
578 
579   len = line_end - line_start;
580 
581   if (c->line_start_idx < c->nb_read)
582     *line = line_start;
583 
584   ++c->line_num;
585 
586   /* Before we update our line record, make sure the hint about the
587      total number of lines of the file is correct.  If it's not, then
588      we give up recording line boundaries from now on.  */
589   bool update_line_record = true;
590   if (c->line_num > c->total_lines)
591     update_line_record = false;
592 
593     /* Now update our line record so that re-reading lines from the
594      before c->line_start_idx is faster.  */
595   if (update_line_record
596       && c->line_record.length () < fcache_line_record_size)
597     {
598       /* If the file lines fits in the line record, we just record all
599 	 its lines ...*/
600       if (c->total_lines <= fcache_line_record_size
601 	  && c->line_num > c->line_record.length ())
602 	c->line_record.safe_push (fcache::line_info (c->line_num,
603 						 c->line_start_idx,
604 						 line_end - c->data));
605       else if (c->total_lines > fcache_line_record_size)
606 	{
607 	  /* ... otherwise, we just scale total_lines down to
608 	     (fcache_line_record_size lines.  */
609 	  size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
610 	  if (c->line_record.length () == 0
611 	      || n >= c->line_record.length ())
612 	    c->line_record.safe_push (fcache::line_info (c->line_num,
613 						     c->line_start_idx,
614 						     line_end - c->data));
615 	}
616     }
617 
618   /* Update c->line_start_idx so that it points to the next line to be
619      read.  */
620   if (next_line_start)
621     c->line_start_idx = next_line_start - c->data;
622   else
623     /* We didn't find any terminal '\n'.  Let's consider that the end
624        of line is the end of the data in the cache.  The next
625        invocation of get_next_line will either read more data from the
626        underlying file or return false early because we've reached the
627        end of the file.  */
628     c->line_start_idx = c->nb_read;
629 
630   *line_len = len;
631 
632   return true;
633 }
634 
635 /* Consume the next bytes coming from the cache (or from its
636    underlying file if there are remaining unread bytes in the file)
637    until we reach the next end-of-line (or end-of-file).  There is no
638    copying from the cache involved.  Return TRUE upon successful
639    completion.  */
640 
641 static bool
goto_next_line(fcache * cache)642 goto_next_line (fcache *cache)
643 {
644   char *l;
645   ssize_t len;
646 
647   return get_next_line (cache, &l, &len);
648 }
649 
650 /* Read an arbitrary line number LINE_NUM from the file cached in C.
651    If the line was read successfully, *LINE points to the beginning
652    of the line in the file cache and *LINE_LEN is the length of the
653    line.  *LINE is not nul-terminated, but may contain zero bytes.
654    *LINE is only valid until the next call of read_line_num.
655    This function returns bool if a line was read.  */
656 
657 static bool
read_line_num(fcache * c,size_t line_num,char ** line,ssize_t * line_len)658 read_line_num (fcache *c, size_t line_num,
659 	       char **line, ssize_t *line_len)
660 {
661   gcc_assert (line_num > 0);
662 
663   if (line_num <= c->line_num)
664     {
665       /* We've been asked to read lines that are before c->line_num.
666 	 So lets use our line record (if it's not empty) to try to
667 	 avoid re-reading the file from the beginning again.  */
668 
669       if (c->line_record.is_empty ())
670 	{
671 	  c->line_start_idx = 0;
672 	  c->line_num = 0;
673 	}
674       else
675 	{
676 	  fcache::line_info *i = NULL;
677 	  if (c->total_lines <= fcache_line_record_size)
678 	    {
679 	      /* In languages where the input file is not totally
680 		 preprocessed up front, the c->total_lines hint
681 		 can be smaller than the number of lines of the
682 		 file.  In that case, only the first
683 		 c->total_lines have been recorded.
684 
685 		 Otherwise, the first c->total_lines we've read have
686 		 their start/end recorded here.  */
687 	      i = (line_num <= c->total_lines)
688 		? &c->line_record[line_num - 1]
689 		: &c->line_record[c->total_lines - 1];
690 	      gcc_assert (i->line_num <= line_num);
691 	    }
692 	  else
693 	    {
694 	      /*  So the file had more lines than our line record
695 		  size.  Thus the number of lines we've recorded has
696 		  been scaled down to fcache_line_reacord_size.  Let's
697 		  pick the start/end of the recorded line that is
698 		  closest to line_num.  */
699 	      size_t n = (line_num <= c->total_lines)
700 		? line_num * fcache_line_record_size / c->total_lines
701 		: c ->line_record.length () - 1;
702 	      if (n < c->line_record.length ())
703 		{
704 		  i = &c->line_record[n];
705 		  gcc_assert (i->line_num <= line_num);
706 		}
707 	    }
708 
709 	  if (i && i->line_num == line_num)
710 	    {
711 	      /* We have the start/end of the line.  */
712 	      *line = c->data + i->start_pos;
713 	      *line_len = i->end_pos - i->start_pos;
714 	      return true;
715 	    }
716 
717 	  if (i)
718 	    {
719 	      c->line_start_idx = i->start_pos;
720 	      c->line_num = i->line_num - 1;
721 	    }
722 	  else
723 	    {
724 	      c->line_start_idx = 0;
725 	      c->line_num = 0;
726 	    }
727 	}
728     }
729 
730   /*  Let's walk from line c->line_num up to line_num - 1, without
731       copying any line.  */
732   while (c->line_num < line_num - 1)
733     if (!goto_next_line (c))
734       return false;
735 
736   /* The line we want is the next one.  Let's read and copy it back to
737      the caller.  */
738   return get_next_line (c, line, line_len);
739 }
740 
741 /* Return the physical source line that corresponds to FILE_PATH/LINE.
742    The line is not nul-terminated.  The returned pointer is only
743    valid until the next call of location_get_source_line.
744    Note that the line can contain several null characters,
745    so the returned value's length has the actual length of the line.
746    If the function fails, a NULL char_span is returned.  */
747 
748 char_span
location_get_source_line(const char * file_path,int line)749 location_get_source_line (const char *file_path, int line)
750 {
751   char *buffer = NULL;
752   ssize_t len;
753 
754   if (line == 0)
755     return char_span (NULL, 0);
756 
757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758   if (c == NULL)
759     return char_span (NULL, 0);
760 
761   bool read = read_line_num (c, line, &buffer, &len);
762   if (!read)
763     return char_span (NULL, 0);
764 
765   return char_span (buffer, len);
766 }
767 
768 /* Determine if FILE_PATH missing a trailing newline on its final line.
769    Only valid to call once all of the file has been loaded, by
770    requesting a line number beyond the end of the file.  */
771 
772 bool
location_missing_trailing_newline(const char * file_path)773 location_missing_trailing_newline (const char *file_path)
774 {
775   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
776   if (c == NULL)
777     return false;
778 
779   return c->missing_trailing_newline;
780 }
781 
782 /* Test if the location originates from the spelling location of a
783    builtin-tokens.  That is, return TRUE if LOC is a (possibly
784    virtual) location of a built-in token that appears in the expansion
785    list of a macro.  Please note that this function also works on
786    tokens that result from built-in tokens.  For instance, the
787    function would return true if passed a token "4" that is the result
788    of the expansion of the built-in __LINE__ macro.  */
789 bool
is_location_from_builtin_token(location_t loc)790 is_location_from_builtin_token (location_t loc)
791 {
792   const line_map_ordinary *map = NULL;
793   loc = linemap_resolve_location (line_table, loc,
794 				  LRK_SPELLING_LOCATION, &map);
795   return loc == BUILTINS_LOCATION;
796 }
797 
798 /* Expand the source location LOC into a human readable location.  If
799    LOC is virtual, it resolves to the expansion point of the involved
800    macro.  If LOC resolves to a builtin location, the file name of the
801    readable location is set to the string "<built-in>".  */
802 
803 expanded_location
expand_location(location_t loc)804 expand_location (location_t loc)
805 {
806   return expand_location_1 (loc, /*expansion_point_p=*/true,
807 			    LOCATION_ASPECT_CARET);
808 }
809 
810 /* Expand the source location LOC into a human readable location.  If
811    LOC is virtual, it resolves to the expansion location of the
812    relevant macro.  If LOC resolves to a builtin location, the file
813    name of the readable location is set to the string
814    "<built-in>".  */
815 
816 expanded_location
expand_location_to_spelling_point(location_t loc,enum location_aspect aspect)817 expand_location_to_spelling_point (location_t loc,
818 				   enum location_aspect aspect)
819 {
820   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
821 }
822 
823 /* The rich_location class within libcpp requires a way to expand
824    location_t instances, and relies on the client code
825    providing a symbol named
826      linemap_client_expand_location_to_spelling_point
827    to do this.
828 
829    This is the implementation for libcommon.a (all host binaries),
830    which simply calls into expand_location_1.  */
831 
832 expanded_location
linemap_client_expand_location_to_spelling_point(location_t loc,enum location_aspect aspect)833 linemap_client_expand_location_to_spelling_point (location_t loc,
834 						  enum location_aspect aspect)
835 {
836   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
837 }
838 
839 
840 /* If LOCATION is in a system header and if it is a virtual location for
841    a token coming from the expansion of a macro, unwind it to the
842    location of the expansion point of the macro.  Otherwise, just return
843    LOCATION.
844 
845    This is used for instance when we want to emit diagnostics about a
846    token that may be located in a macro that is itself defined in a
847    system header, for example, for the NULL macro.  In such a case, if
848    LOCATION were passed directly to diagnostic functions such as
849    warning_at, the diagnostic would be suppressed (unless
850    -Wsystem-headers).  */
851 
852 location_t
expansion_point_location_if_in_system_header(location_t location)853 expansion_point_location_if_in_system_header (location_t location)
854 {
855   if (in_system_header_at (location))
856     location = linemap_resolve_location (line_table, location,
857 					 LRK_MACRO_EXPANSION_POINT,
858 					 NULL);
859   return location;
860 }
861 
862 /* If LOCATION is a virtual location for a token coming from the expansion
863    of a macro, unwind to the location of the expansion point of the macro.  */
864 
865 location_t
expansion_point_location(location_t location)866 expansion_point_location (location_t location)
867 {
868   return linemap_resolve_location (line_table, location,
869 				   LRK_MACRO_EXPANSION_POINT, NULL);
870 }
871 
872 /* Construct a location with caret at CARET, ranging from START to
873    finish e.g.
874 
875                  11111111112
876         12345678901234567890
877      522
878      523   return foo + bar;
879                   ~~~~^~~~~
880      524
881 
882    The location's caret is at the "+", line 523 column 15, but starts
883    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
884    of "bar" at column 19.  */
885 
886 location_t
make_location(location_t caret,location_t start,location_t finish)887 make_location (location_t caret, location_t start, location_t finish)
888 {
889   location_t pure_loc = get_pure_location (caret);
890   source_range src_range;
891   src_range.m_start = get_start (start);
892   src_range.m_finish = get_finish (finish);
893   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
894 						   pure_loc,
895 						   src_range,
896 						   NULL);
897   return combined_loc;
898 }
899 
900 /* Same as above, but taking a source range rather than two locations.  */
901 
902 location_t
make_location(location_t caret,source_range src_range)903 make_location (location_t caret, source_range src_range)
904 {
905   location_t pure_loc = get_pure_location (caret);
906   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
907 }
908 
909 /* Dump statistics to stderr about the memory usage of the line_table
910    set of line maps.  This also displays some statistics about macro
911    expansion.  */
912 
913 void
dump_line_table_statistics(void)914 dump_line_table_statistics (void)
915 {
916   struct linemap_stats s;
917   long total_used_map_size,
918     macro_maps_size,
919     total_allocated_map_size;
920 
921   memset (&s, 0, sizeof (s));
922 
923   linemap_get_statistics (line_table, &s);
924 
925   macro_maps_size = s.macro_maps_used_size
926     + s.macro_maps_locations_size;
927 
928   total_allocated_map_size = s.ordinary_maps_allocated_size
929     + s.macro_maps_allocated_size
930     + s.macro_maps_locations_size;
931 
932   total_used_map_size = s.ordinary_maps_used_size
933     + s.macro_maps_used_size
934     + s.macro_maps_locations_size;
935 
936   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
937            s.num_expanded_macros);
938   if (s.num_expanded_macros != 0)
939     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
940              s.num_macro_tokens / s.num_expanded_macros);
941   fprintf (stderr,
942            "\nLine Table allocations during the "
943 	   "compilation process\n");
944   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
945 	   SIZE_AMOUNT (s.num_ordinary_maps_used));
946   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
947 	   SIZE_AMOUNT (s.ordinary_maps_used_size));
948   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
949 	   SIZE_AMOUNT (s.num_ordinary_maps_allocated));
950   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
951 	   SIZE_AMOUNT (s.ordinary_maps_allocated_size));
952   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
953 	   SIZE_AMOUNT (s.num_macro_maps_used));
954   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
955 	   SIZE_AMOUNT (s.macro_maps_used_size));
956   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
957 	   SIZE_AMOUNT (s.macro_maps_locations_size));
958   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
959 	   SIZE_AMOUNT (macro_maps_size));
960   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
961 	   SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
962   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
963 	   SIZE_AMOUNT (total_allocated_map_size));
964   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
965 	   SIZE_AMOUNT (total_used_map_size));
966   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
967 	   SIZE_AMOUNT (s.adhoc_table_size));
968   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
969 	   SIZE_AMOUNT (s.adhoc_table_entries_used));
970   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
971 	   SIZE_AMOUNT (line_table->num_optimized_ranges));
972   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
973 	   SIZE_AMOUNT (line_table->num_unoptimized_ranges));
974 
975   fprintf (stderr, "\n");
976 }
977 
978 /* Get location one beyond the final location in ordinary map IDX.  */
979 
980 static location_t
get_end_location(struct line_maps * set,unsigned int idx)981 get_end_location (struct line_maps *set, unsigned int idx)
982 {
983   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
984     return set->highest_location;
985 
986   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
987   return MAP_START_LOCATION (next_map);
988 }
989 
990 /* Helper function for write_digit_row.  */
991 
992 static void
write_digit(FILE * stream,int digit)993 write_digit (FILE *stream, int digit)
994 {
995   fputc ('0' + (digit % 10), stream);
996 }
997 
998 /* Helper function for dump_location_info.
999    Write a row of numbers to STREAM, numbering a source line,
1000    giving the units, tens, hundreds etc of the column number.  */
1001 
1002 static void
write_digit_row(FILE * stream,int indent,const line_map_ordinary * map,location_t loc,int max_col,int divisor)1003 write_digit_row (FILE *stream, int indent,
1004 		 const line_map_ordinary *map,
1005 		 location_t loc, int max_col, int divisor)
1006 {
1007   fprintf (stream, "%*c", indent, ' ');
1008   fprintf (stream, "|");
1009   for (int column = 1; column < max_col; column++)
1010     {
1011       location_t column_loc = loc + (column << map->m_range_bits);
1012       write_digit (stream, column_loc / divisor);
1013     }
1014   fprintf (stream, "\n");
1015 }
1016 
1017 /* Write a half-closed (START) / half-open (END) interval of
1018    location_t to STREAM.  */
1019 
1020 static void
dump_location_range(FILE * stream,location_t start,location_t end)1021 dump_location_range (FILE *stream,
1022 		     location_t start, location_t end)
1023 {
1024   fprintf (stream,
1025 	   "  location_t interval: %u <= loc < %u\n",
1026 	   start, end);
1027 }
1028 
1029 /* Write a labelled description of a half-closed (START) / half-open (END)
1030    interval of location_t to STREAM.  */
1031 
1032 static void
dump_labelled_location_range(FILE * stream,const char * name,location_t start,location_t end)1033 dump_labelled_location_range (FILE *stream,
1034 			      const char *name,
1035 			      location_t start, location_t end)
1036 {
1037   fprintf (stream, "%s\n", name);
1038   dump_location_range (stream, start, end);
1039   fprintf (stream, "\n");
1040 }
1041 
1042 /* Write a visualization of the locations in the line_table to STREAM.  */
1043 
1044 void
dump_location_info(FILE * stream)1045 dump_location_info (FILE *stream)
1046 {
1047   /* Visualize the reserved locations.  */
1048   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049 				0, RESERVED_LOCATION_COUNT);
1050 
1051   /* Visualize the ordinary line_map instances, rendering the sources. */
1052   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1053     {
1054       location_t end_location = get_end_location (line_table, idx);
1055       /* half-closed: doesn't include this one. */
1056 
1057       const line_map_ordinary *map
1058 	= LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1059       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060       dump_location_range (stream,
1061 			   MAP_START_LOCATION (map), end_location);
1062       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063       fprintf (stream, "  starting at line: %i\n",
1064 	       ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1065       fprintf (stream, "  column and range bits: %i\n",
1066 	       map->m_column_and_range_bits);
1067       fprintf (stream, "  column bits: %i\n",
1068 	       map->m_column_and_range_bits - map->m_range_bits);
1069       fprintf (stream, "  range bits: %i\n",
1070 	       map->m_range_bits);
1071       const char * reason;
1072       switch (map->reason) {
1073       case LC_ENTER:
1074 	reason = "LC_ENTER";
1075 	break;
1076       case LC_LEAVE:
1077 	reason = "LC_LEAVE";
1078 	break;
1079       case LC_RENAME:
1080 	reason = "LC_RENAME";
1081 	break;
1082       case LC_RENAME_VERBATIM:
1083 	reason = "LC_RENAME_VERBATIM";
1084 	break;
1085       case LC_ENTER_MACRO:
1086 	reason = "LC_RENAME_MACRO";
1087 	break;
1088       default:
1089 	reason = "Unknown";
1090       }
1091       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1092 
1093       const line_map_ordinary *includer_map
1094 	= linemap_included_from_linemap (line_table, map);
1095       fprintf (stream, "  included from location: %d",
1096 	       linemap_included_from (map));
1097       if (includer_map) {
1098 	fprintf (stream, " (in ordinary map %d)",
1099 		 int (includer_map - line_table->info_ordinary.maps));
1100       }
1101       fprintf (stream, "\n");
1102 
1103       /* Render the span of source lines that this "map" covers.  */
1104       for (location_t loc = MAP_START_LOCATION (map);
1105 	   loc < end_location;
1106 	   loc += (1 << map->m_range_bits) )
1107 	{
1108 	  gcc_assert (pure_location_p (line_table, loc) );
1109 
1110 	  expanded_location exploc
1111 	    = linemap_expand_location (line_table, map, loc);
1112 
1113 	  if (exploc.column == 0)
1114 	    {
1115 	      /* Beginning of a new source line: draw the line.  */
1116 
1117 	      char_span line_text = location_get_source_line (exploc.file,
1118 							      exploc.line);
1119 	      if (!line_text)
1120 		break;
1121 	      fprintf (stream,
1122 		       "%s:%3i|loc:%5i|%.*s\n",
1123 		       exploc.file, exploc.line,
1124 		       loc,
1125 		       (int)line_text.length (), line_text.get_buffer ());
1126 
1127 	      /* "loc" is at column 0, which means "the whole line".
1128 		 Render the locations *within* the line, by underlining
1129 		 it, showing the location_t numeric values
1130 		 at each column.  */
1131 	      size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132 	      if (max_col > line_text.length ())
1133 		max_col = line_text.length () + 1;
1134 
1135 	      int len_lnum = num_digits (exploc.line);
1136 	      if (len_lnum < 3)
1137 		len_lnum = 3;
1138 	      int len_loc = num_digits (loc);
1139 	      if (len_loc < 5)
1140 		len_loc = 5;
1141 
1142 	      int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1143 
1144 	      /* Thousands.  */
1145 	      if (end_location > 999)
1146 		write_digit_row (stream, indent, map, loc, max_col, 1000);
1147 
1148 	      /* Hundreds.  */
1149 	      if (end_location > 99)
1150 		write_digit_row (stream, indent, map, loc, max_col, 100);
1151 
1152 	      /* Tens.  */
1153 	      write_digit_row (stream, indent, map, loc, max_col, 10);
1154 
1155 	      /* Units.  */
1156 	      write_digit_row (stream, indent, map, loc, max_col, 1);
1157 	    }
1158 	}
1159       fprintf (stream, "\n");
1160     }
1161 
1162   /* Visualize unallocated values.  */
1163   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164 				line_table->highest_location,
1165 				LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1166 
1167   /* Visualize the macro line_map instances, rendering the sources. */
1168   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1169     {
1170       /* Each macro map that is allocated owns location_t values
1171 	 that are *lower* that the one before them.
1172 	 Hence it's meaningful to view them either in order of ascending
1173 	 source locations, or in order of ascending macro map index.  */
1174       const bool ascending_location_ts = true;
1175       unsigned int idx = (ascending_location_ts
1176 			  ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177 			  : i);
1178       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1179       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180 	       idx,
1181 	       linemap_map_get_macro_name (map),
1182 	       MACRO_MAP_NUM_MACRO_TOKENS (map));
1183       dump_location_range (stream,
1184 			   map->start_location,
1185 			   (map->start_location
1186 			    + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188 	      "expansion point is location %i",
1189 	      MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190       fprintf (stream, "  map->start_location: %u\n",
1191 	       map->start_location);
1192 
1193       fprintf (stream, "  macro_locations:\n");
1194       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1195 	{
1196 	  location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197 	  location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1198 
1199 	  /* linemap_add_macro_token encodes token numbers in an expansion
1200 	     by putting them after MAP_START_LOCATION. */
1201 
1202 	  /* I'm typically seeing 4 uninitialized entries at the end of
1203 	     0xafafafaf.
1204 	     This appears to be due to macro.c:replace_args
1205 	     adding 2 extra args for padding tokens; presumably there may
1206 	     be a leading and/or trailing padding token injected,
1207 	     each for 2 more location slots.
1208 	     This would explain there being up to 4 location_ts slots
1209 	     that may be uninitialized.  */
1210 
1211 	  fprintf (stream, "    %u: %u, %u\n",
1212 		   i,
1213 		   x,
1214 		   y);
1215 	  if (x == y)
1216 	    {
1217 	      if (x < MAP_START_LOCATION (map))
1218 		inform (x, "token %u has x-location == y-location == %u", i, x);
1219 	      else
1220 		fprintf (stream,
1221 			 "x-location == y-location == %u encodes token # %u\n",
1222 			 x, x - MAP_START_LOCATION (map));
1223 		}
1224 	  else
1225 	    {
1226 	      inform (x, "token %u has x-location == %u", i, x);
1227 	      inform (x, "token %u has y-location == %u", i, y);
1228 	    }
1229 	}
1230       fprintf (stream, "\n");
1231     }
1232 
1233   /* It appears that MAX_LOCATION_T itself is never assigned to a
1234      macro map, presumably due to an off-by-one error somewhere
1235      between the logic in linemap_enter_macro and
1236      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1237   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1238 				MAX_LOCATION_T,
1239 				MAX_LOCATION_T + 1);
1240 
1241   /* Visualize ad-hoc values.  */
1242   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1243 				MAX_LOCATION_T + 1, UINT_MAX);
1244 }
1245 
1246 /* string_concat's constructor.  */
1247 
string_concat(int num,location_t * locs)1248 string_concat::string_concat (int num, location_t *locs)
1249   : m_num (num)
1250 {
1251   m_locs = ggc_vec_alloc <location_t> (num);
1252   for (int i = 0; i < num; i++)
1253     m_locs[i] = locs[i];
1254 }
1255 
1256 /* string_concat_db's constructor.  */
1257 
string_concat_db()1258 string_concat_db::string_concat_db ()
1259 {
1260   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1261 }
1262 
1263 /* Record that a string concatenation occurred, covering NUM
1264    string literal tokens.  LOCS is an array of size NUM, containing the
1265    locations of the tokens.  A copy of LOCS is taken.  */
1266 
1267 void
record_string_concatenation(int num,location_t * locs)1268 string_concat_db::record_string_concatenation (int num, location_t *locs)
1269 {
1270   gcc_assert (num > 1);
1271   gcc_assert (locs);
1272 
1273   location_t key_loc = get_key_loc (locs[0]);
1274 
1275   string_concat *concat
1276     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1277   m_table->put (key_loc, concat);
1278 }
1279 
1280 /* Determine if LOC was the location of the the initial token of a
1281    concatenation of string literal tokens.
1282    If so, *OUT_NUM is written to with the number of tokens, and
1283    *OUT_LOCS with the location of an array of locations of the
1284    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1285    storage owned by the string_concat_db.
1286    Otherwise, return false.  */
1287 
1288 bool
get_string_concatenation(location_t loc,int * out_num,location_t ** out_locs)1289 string_concat_db::get_string_concatenation (location_t loc,
1290 					    int *out_num,
1291 					    location_t **out_locs)
1292 {
1293   gcc_assert (out_num);
1294   gcc_assert (out_locs);
1295 
1296   location_t key_loc = get_key_loc (loc);
1297 
1298   string_concat **concat = m_table->get (key_loc);
1299   if (!concat)
1300     return false;
1301 
1302   *out_num = (*concat)->m_num;
1303   *out_locs =(*concat)->m_locs;
1304   return true;
1305 }
1306 
1307 /* Internal function.  Canonicalize LOC into a form suitable for
1308    use as a key within the database, stripping away macro expansion,
1309    ad-hoc information, and range information, using the location of
1310    the start of LOC within an ordinary linemap.  */
1311 
1312 location_t
get_key_loc(location_t loc)1313 string_concat_db::get_key_loc (location_t loc)
1314 {
1315   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1316 				  NULL);
1317 
1318   loc = get_range_from_loc (line_table, loc).m_start;
1319 
1320   return loc;
1321 }
1322 
1323 /* Helper class for use within get_substring_ranges_for_loc.
1324    An vec of cpp_string with responsibility for releasing all of the
1325    str->text for each str in the vector.  */
1326 
1327 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1328 {
1329  public:
auto_cpp_string_vec(int alloc)1330   auto_cpp_string_vec (int alloc)
1331     : auto_vec <cpp_string> (alloc) {}
1332 
~auto_cpp_string_vec()1333   ~auto_cpp_string_vec ()
1334   {
1335     /* Clean up the copies within this vec.  */
1336     int i;
1337     cpp_string *str;
1338     FOR_EACH_VEC_ELT (*this, i, str)
1339       free (const_cast <unsigned char *> (str->text));
1340   }
1341 };
1342 
1343 /* Attempt to populate RANGES with source location information on the
1344    individual characters within the string literal found at STRLOC.
1345    If CONCATS is non-NULL, then any string literals that the token at
1346    STRLOC  was concatenated with are also added to RANGES.
1347 
1348    Return NULL if successful, or an error message if any errors occurred (in
1349    which case RANGES may be only partially populated and should not
1350    be used).
1351 
1352    This is implemented by re-parsing the relevant source line(s).  */
1353 
1354 static const char *
get_substring_ranges_for_loc(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,cpp_substring_ranges & ranges)1355 get_substring_ranges_for_loc (cpp_reader *pfile,
1356 			      string_concat_db *concats,
1357 			      location_t strloc,
1358 			      enum cpp_ttype type,
1359 			      cpp_substring_ranges &ranges)
1360 {
1361   gcc_assert (pfile);
1362 
1363   if (strloc == UNKNOWN_LOCATION)
1364     return "unknown location";
1365 
1366   /* Reparsing the strings requires accurate location information.
1367      If -ftrack-macro-expansion has been overridden from its default
1368      of 2, then we might have a location of a macro expansion point,
1369      rather than the location of the literal itself.
1370      Avoid this by requiring that we have full macro expansion tracking
1371      for substring locations to be available.  */
1372   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1373     return "track_macro_expansion != 2";
1374 
1375   /* If #line or # 44 "file"-style directives are present, then there's
1376      no guarantee that the line numbers we have can be used to locate
1377      the strings.  For example, we might have a .i file with # directives
1378      pointing back to lines within a .c file, but the .c file might
1379      have been edited since the .i file was created.
1380      In such a case, the safest course is to disable on-demand substring
1381      locations.  */
1382   if (line_table->seen_line_directive)
1383     return "seen line directive";
1384 
1385   /* If string concatenation has occurred at STRLOC, get the locations
1386      of all of the literal tokens making up the compound string.
1387      Otherwise, just use STRLOC.  */
1388   int num_locs = 1;
1389   location_t *strlocs = &strloc;
1390   if (concats)
1391     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1392 
1393   auto_cpp_string_vec strs (num_locs);
1394   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1395   for (int i = 0; i < num_locs; i++)
1396     {
1397       /* Get range of strloc.  We will use it to locate the start and finish
1398 	 of the literal token within the line.  */
1399       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1400 
1401       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1402 	{
1403 	  /* If the string token was within a macro expansion, then we can
1404 	     cope with it for the simple case where we have a single token.
1405 	     Otherwise, bail out.  */
1406 	  if (src_range.m_start != src_range.m_finish)
1407 	    return "macro expansion";
1408 	}
1409       else
1410 	{
1411 	  if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1412 	    /* If so, we can't reliably determine where the token started within
1413 	       its line.  */
1414 	    return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1415 
1416 	  if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1417 	    /* If so, we can't reliably determine where the token finished
1418 	       within its line.  */
1419 	    return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1420 	}
1421 
1422       expanded_location start
1423 	= expand_location_to_spelling_point (src_range.m_start,
1424 					     LOCATION_ASPECT_START);
1425       expanded_location finish
1426 	= expand_location_to_spelling_point (src_range.m_finish,
1427 					     LOCATION_ASPECT_FINISH);
1428       if (start.file != finish.file)
1429 	return "range endpoints are in different files";
1430       if (start.line != finish.line)
1431 	return "range endpoints are on different lines";
1432       if (start.column > finish.column)
1433 	return "range endpoints are reversed";
1434 
1435       char_span line = location_get_source_line (start.file, start.line);
1436       if (!line)
1437 	return "unable to read source line";
1438 
1439       /* Determine the location of the literal (including quotes
1440 	 and leading prefix chars, such as the 'u' in a u""
1441 	 token).  */
1442       size_t literal_length = finish.column - start.column + 1;
1443 
1444       /* Ensure that we don't crash if we got the wrong location.  */
1445       if (line.length () < (start.column - 1 + literal_length))
1446 	return "line is not wide enough";
1447 
1448       char_span literal = line.subspan (start.column - 1, literal_length);
1449 
1450       cpp_string from;
1451       from.len = literal_length;
1452       /* Make a copy of the literal, to avoid having to rely on
1453 	 the lifetime of the copy of the line within the cache.
1454 	 This will be released by the auto_cpp_string_vec dtor.  */
1455       from.text = (unsigned char *)literal.xstrdup ();
1456       strs.safe_push (from);
1457 
1458       /* For very long lines, a new linemap could have started
1459 	 halfway through the token.
1460 	 Ensure that the loc_reader uses the linemap of the
1461 	 *end* of the token for its start location.  */
1462       const line_map_ordinary *start_ord_map;
1463       linemap_resolve_location (line_table, src_range.m_start,
1464 				LRK_SPELLING_LOCATION, &start_ord_map);
1465       const line_map_ordinary *final_ord_map;
1466       linemap_resolve_location (line_table, src_range.m_finish,
1467 				LRK_SPELLING_LOCATION, &final_ord_map);
1468       if (start_ord_map == NULL || final_ord_map == NULL)
1469 	return "failed to get ordinary maps";
1470       /* Bulletproofing.  We ought to only have different ordinary maps
1471 	 for start vs finish due to line-length jumps.  */
1472       if (start_ord_map != final_ord_map
1473 	  && start_ord_map->to_file != final_ord_map->to_file)
1474 	return "start and finish are spelled in different ordinary maps";
1475       /* The file from linemap_resolve_location ought to match that from
1476 	 expand_location_to_spelling_point.  */
1477       if (start_ord_map->to_file != start.file)
1478 	return "mismatching file after resolving linemap";
1479 
1480       location_t start_loc
1481 	= linemap_position_for_line_and_column (line_table, final_ord_map,
1482 						start.line, start.column);
1483 
1484       cpp_string_location_reader loc_reader (start_loc, line_table);
1485       loc_readers.safe_push (loc_reader);
1486     }
1487 
1488   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1489   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1490 						 loc_readers.address (),
1491 						 num_locs, &ranges, type);
1492   if (err)
1493     return err;
1494 
1495   /* Success: "ranges" should now contain information on the string.  */
1496   return NULL;
1497 }
1498 
1499 /* Attempt to populate *OUT_LOC with source location information on the
1500    given characters within the string literal found at STRLOC.
1501    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1502    character set.
1503 
1504    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1505    and string literal "012345\n789"
1506    *OUT_LOC is written to with:
1507      "012345\n789"
1508          ~^~~~~
1509 
1510    If CONCATS is non-NULL, then any string literals that the token at
1511    STRLOC was concatenated with are also considered.
1512 
1513    This is implemented by re-parsing the relevant source line(s).
1514 
1515    Return NULL if successful, or an error message if any errors occurred.
1516    Error messages are intended for GCC developers (to help debugging) rather
1517    than for end-users.  */
1518 
1519 const char *
get_location_within_string(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int caret_idx,int start_idx,int end_idx,location_t * out_loc)1520 get_location_within_string (cpp_reader *pfile,
1521 			    string_concat_db *concats,
1522 			    location_t strloc,
1523 			    enum cpp_ttype type,
1524 			    int caret_idx, int start_idx, int end_idx,
1525 			    location_t *out_loc)
1526 {
1527   gcc_checking_assert (caret_idx >= 0);
1528   gcc_checking_assert (start_idx >= 0);
1529   gcc_checking_assert (end_idx >= 0);
1530   gcc_assert (out_loc);
1531 
1532   cpp_substring_ranges ranges;
1533   const char *err
1534     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1535   if (err)
1536     return err;
1537 
1538   if (caret_idx >= ranges.get_num_ranges ())
1539     return "caret_idx out of range";
1540   if (start_idx >= ranges.get_num_ranges ())
1541     return "start_idx out of range";
1542   if (end_idx >= ranges.get_num_ranges ())
1543     return "end_idx out of range";
1544 
1545   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1546 			    ranges.get_range (start_idx).m_start,
1547 			    ranges.get_range (end_idx).m_finish);
1548   return NULL;
1549 }
1550 
1551 #if CHECKING_P
1552 
1553 namespace selftest {
1554 
1555 /* Selftests of location handling.  */
1556 
1557 /* Attempt to populate *OUT_RANGE with source location information on the
1558    given character within the string literal found at STRLOC.
1559    CHAR_IDX refers to an offset within the execution character set.
1560    If CONCATS is non-NULL, then any string literals that the token at
1561    STRLOC was concatenated with are also considered.
1562 
1563    This is implemented by re-parsing the relevant source line(s).
1564 
1565    Return NULL if successful, or an error message if any errors occurred.
1566    Error messages are intended for GCC developers (to help debugging) rather
1567    than for end-users.  */
1568 
1569 static const char *
get_source_range_for_char(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int char_idx,source_range * out_range)1570 get_source_range_for_char (cpp_reader *pfile,
1571 			   string_concat_db *concats,
1572 			   location_t strloc,
1573 			   enum cpp_ttype type,
1574 			   int char_idx,
1575 			   source_range *out_range)
1576 {
1577   gcc_checking_assert (char_idx >= 0);
1578   gcc_assert (out_range);
1579 
1580   cpp_substring_ranges ranges;
1581   const char *err
1582     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1583   if (err)
1584     return err;
1585 
1586   if (char_idx >= ranges.get_num_ranges ())
1587     return "char_idx out of range";
1588 
1589   *out_range = ranges.get_range (char_idx);
1590   return NULL;
1591 }
1592 
1593 /* As get_source_range_for_char, but write to *OUT the number
1594    of ranges that are available.  */
1595 
1596 static const char *
get_num_source_ranges_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int * out)1597 get_num_source_ranges_for_substring (cpp_reader *pfile,
1598 				     string_concat_db *concats,
1599 				     location_t strloc,
1600 				     enum cpp_ttype type,
1601 				     int *out)
1602 {
1603   gcc_assert (out);
1604 
1605   cpp_substring_ranges ranges;
1606   const char *err
1607     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1608 
1609   if (err)
1610     return err;
1611 
1612   *out = ranges.get_num_ranges ();
1613   return NULL;
1614 }
1615 
1616 /* Selftests of location handling.  */
1617 
1618 /* Verify that compare() on linenum_type handles comparisons over the full
1619    range of the type.  */
1620 
1621 static void
test_linenum_comparisons()1622 test_linenum_comparisons ()
1623 {
1624   linenum_type min_line (0);
1625   linenum_type max_line (0xffffffff);
1626   ASSERT_EQ (0, compare (min_line, min_line));
1627   ASSERT_EQ (0, compare (max_line, max_line));
1628 
1629   ASSERT_GT (compare (max_line, min_line), 0);
1630   ASSERT_LT (compare (min_line, max_line), 0);
1631 }
1632 
1633 /* Helper function for verifying location data: when location_t
1634    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1635    as having column 0.  */
1636 
1637 static bool
should_have_column_data_p(location_t loc)1638 should_have_column_data_p (location_t loc)
1639 {
1640   if (IS_ADHOC_LOC (loc))
1641     loc = get_location_from_adhoc_loc (line_table, loc);
1642   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1643     return false;
1644   return true;
1645 }
1646 
1647 /* Selftest for should_have_column_data_p.  */
1648 
1649 static void
test_should_have_column_data_p()1650 test_should_have_column_data_p ()
1651 {
1652   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1653   ASSERT_TRUE
1654     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1655   ASSERT_FALSE
1656     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1657 }
1658 
1659 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1660    on LOC.  */
1661 
1662 static void
assert_loceq(const char * exp_filename,int exp_linenum,int exp_colnum,location_t loc)1663 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1664 	      location_t loc)
1665 {
1666   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1667   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1668   /* If location_t values are sufficiently high, then column numbers
1669      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1670      When close to the threshold, column numbers *may* be present: if
1671      the final linemap before the threshold contains a line that straddles
1672      the threshold, locations in that line have column information.  */
1673   if (should_have_column_data_p (loc))
1674     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1675 }
1676 
1677 /* Various selftests involve constructing a line table and one or more
1678    line maps within it.
1679 
1680    For maximum test coverage we want to run these tests with a variety
1681    of situations:
1682    - line_table->default_range_bits: some frontends use a non-zero value
1683    and others use zero
1684    - the fallback modes within line-map.c: there are various threshold
1685    values for location_t beyond line-map.c changes
1686    behavior (disabling of the range-packing optimization, disabling
1687    of column-tracking).  We can exercise these by starting the line_table
1688    at interesting values at or near these thresholds.
1689 
1690    The following struct describes a particular case within our test
1691    matrix.  */
1692 
1693 struct line_table_case
1694 {
line_table_caseline_table_case1695   line_table_case (int default_range_bits, int base_location)
1696   : m_default_range_bits (default_range_bits),
1697     m_base_location (base_location)
1698   {}
1699 
1700   int m_default_range_bits;
1701   int m_base_location;
1702 };
1703 
1704 /* Constructor.  Store the old value of line_table, and create a new
1705    one, using sane defaults.  */
1706 
line_table_test()1707 line_table_test::line_table_test ()
1708 {
1709   gcc_assert (saved_line_table == NULL);
1710   saved_line_table = line_table;
1711   line_table = ggc_alloc<line_maps> ();
1712   linemap_init (line_table, BUILTINS_LOCATION);
1713   gcc_assert (saved_line_table->reallocator);
1714   line_table->reallocator = saved_line_table->reallocator;
1715   gcc_assert (saved_line_table->round_alloc_size);
1716   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1717   line_table->default_range_bits = 0;
1718 }
1719 
1720 /* Constructor.  Store the old value of line_table, and create a new
1721    one, using the sitation described in CASE_.  */
1722 
line_table_test(const line_table_case & case_)1723 line_table_test::line_table_test (const line_table_case &case_)
1724 {
1725   gcc_assert (saved_line_table == NULL);
1726   saved_line_table = line_table;
1727   line_table = ggc_alloc<line_maps> ();
1728   linemap_init (line_table, BUILTINS_LOCATION);
1729   gcc_assert (saved_line_table->reallocator);
1730   line_table->reallocator = saved_line_table->reallocator;
1731   gcc_assert (saved_line_table->round_alloc_size);
1732   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1733   line_table->default_range_bits = case_.m_default_range_bits;
1734   if (case_.m_base_location)
1735     {
1736       line_table->highest_location = case_.m_base_location;
1737       line_table->highest_line = case_.m_base_location;
1738     }
1739 }
1740 
1741 /* Destructor.  Restore the old value of line_table.  */
1742 
~line_table_test()1743 line_table_test::~line_table_test ()
1744 {
1745   gcc_assert (saved_line_table != NULL);
1746   line_table = saved_line_table;
1747   saved_line_table = NULL;
1748 }
1749 
1750 /* Verify basic operation of ordinary linemaps.  */
1751 
1752 static void
test_accessing_ordinary_linemaps(const line_table_case & case_)1753 test_accessing_ordinary_linemaps (const line_table_case &case_)
1754 {
1755   line_table_test ltt (case_);
1756 
1757   /* Build a simple linemap describing some locations. */
1758   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1759 
1760   linemap_line_start (line_table, 1, 100);
1761   location_t loc_a = linemap_position_for_column (line_table, 1);
1762   location_t loc_b = linemap_position_for_column (line_table, 23);
1763 
1764   linemap_line_start (line_table, 2, 100);
1765   location_t loc_c = linemap_position_for_column (line_table, 1);
1766   location_t loc_d = linemap_position_for_column (line_table, 17);
1767 
1768   /* Example of a very long line.  */
1769   linemap_line_start (line_table, 3, 2000);
1770   location_t loc_e = linemap_position_for_column (line_table, 700);
1771 
1772   /* Transitioning back to a short line.  */
1773   linemap_line_start (line_table, 4, 0);
1774   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1775 
1776   if (should_have_column_data_p (loc_back_to_short))
1777     {
1778       /* Verify that we switched to short lines in the linemap.  */
1779       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1780       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1781     }
1782 
1783   /* Example of a line that will eventually be seen to be longer
1784      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1785      below that.  */
1786   linemap_line_start (line_table, 5, 2000);
1787 
1788   location_t loc_start_of_very_long_line
1789     = linemap_position_for_column (line_table, 2000);
1790   location_t loc_too_wide
1791     = linemap_position_for_column (line_table, 4097);
1792   location_t loc_too_wide_2
1793     = linemap_position_for_column (line_table, 4098);
1794 
1795   /* ...and back to a sane line length.  */
1796   linemap_line_start (line_table, 6, 100);
1797   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1798 
1799   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1800 
1801   /* Multiple files.  */
1802   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1803   linemap_line_start (line_table, 1, 200);
1804   location_t loc_f = linemap_position_for_column (line_table, 150);
1805   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1806 
1807   /* Verify that we can recover the location info.  */
1808   assert_loceq ("foo.c", 1, 1, loc_a);
1809   assert_loceq ("foo.c", 1, 23, loc_b);
1810   assert_loceq ("foo.c", 2, 1, loc_c);
1811   assert_loceq ("foo.c", 2, 17, loc_d);
1812   assert_loceq ("foo.c", 3, 700, loc_e);
1813   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1814 
1815   /* In the very wide line, the initial location should be fully tracked.  */
1816   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1817   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1818      be disabled.  */
1819   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1820   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1821   /*...and column-tracking should be re-enabled for subsequent lines.  */
1822   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1823 
1824   assert_loceq ("bar.c", 1, 150, loc_f);
1825 
1826   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1827   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1828 
1829   /* Verify using make_location to build a range, and extracting data
1830      back from it.  */
1831   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1832   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1833   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1834   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1835   ASSERT_EQ (loc_b, src_range.m_start);
1836   ASSERT_EQ (loc_d, src_range.m_finish);
1837 }
1838 
1839 /* Verify various properties of UNKNOWN_LOCATION.  */
1840 
1841 static void
test_unknown_location()1842 test_unknown_location ()
1843 {
1844   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1845   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1846   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1847 }
1848 
1849 /* Verify various properties of BUILTINS_LOCATION.  */
1850 
1851 static void
test_builtins()1852 test_builtins ()
1853 {
1854   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1855   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1856 }
1857 
1858 /* Regression test for make_location.
1859    Ensure that we use pure locations for the start/finish of the range,
1860    rather than storing a packed or ad-hoc range as the start/finish.  */
1861 
1862 static void
test_make_location_nonpure_range_endpoints(const line_table_case & case_)1863 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1864 {
1865   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1866      with C++ frontend.
1867      ....................0000000001111111111222.
1868      ....................1234567890123456789012.  */
1869   const char *content = "     r += !aaa == bbb;\n";
1870   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1871   line_table_test ltt (case_);
1872   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1873 
1874   const location_t c11 = linemap_position_for_column (line_table, 11);
1875   const location_t c12 = linemap_position_for_column (line_table, 12);
1876   const location_t c13 = linemap_position_for_column (line_table, 13);
1877   const location_t c14 = linemap_position_for_column (line_table, 14);
1878   const location_t c21 = linemap_position_for_column (line_table, 21);
1879 
1880   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1881     return;
1882 
1883   /* Use column 13 for the caret location, arbitrarily, to verify that we
1884      handle start != caret.  */
1885   const location_t aaa = make_location (c13, c12, c14);
1886   ASSERT_EQ (c13, get_pure_location (aaa));
1887   ASSERT_EQ (c12, get_start (aaa));
1888   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1889   ASSERT_EQ (c14, get_finish (aaa));
1890   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1891 
1892   /* Make a location using a location with a range as the start-point.  */
1893   const location_t not_aaa = make_location (c11, aaa, c14);
1894   ASSERT_EQ (c11, get_pure_location (not_aaa));
1895   /* It should use the start location of the range, not store the range
1896      itself.  */
1897   ASSERT_EQ (c12, get_start (not_aaa));
1898   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1899   ASSERT_EQ (c14, get_finish (not_aaa));
1900   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1901 
1902   /* Similarly, make a location with a range as the end-point.  */
1903   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1904   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1905   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1906   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1907   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1908   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1909   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1910   /* It should use the finish location of the range, not store the range
1911      itself.  */
1912   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1913   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1914   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1915   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1916   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1917 }
1918 
1919 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1920 
1921 static void
test_reading_source_line()1922 test_reading_source_line ()
1923 {
1924   /* Create a tempfile and write some text to it.  */
1925   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1926 			"01234567890123456789\n"
1927 			"This is the test text\n"
1928 			"This is the 3rd line");
1929 
1930   /* Read back a specific line from the tempfile.  */
1931   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1932   ASSERT_TRUE (source_line);
1933   ASSERT_TRUE (source_line.get_buffer () != NULL);
1934   ASSERT_EQ (20, source_line.length ());
1935   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1936 			 source_line.get_buffer (), source_line.length ()));
1937 
1938   source_line = location_get_source_line (tmp.get_filename (), 2);
1939   ASSERT_TRUE (source_line);
1940   ASSERT_TRUE (source_line.get_buffer () != NULL);
1941   ASSERT_EQ (21, source_line.length ());
1942   ASSERT_TRUE (!strncmp ("This is the test text",
1943 			 source_line.get_buffer (), source_line.length ()));
1944 
1945   source_line = location_get_source_line (tmp.get_filename (), 4);
1946   ASSERT_FALSE (source_line);
1947   ASSERT_TRUE (source_line.get_buffer () == NULL);
1948 }
1949 
1950 /* Tests of lexing.  */
1951 
1952 /* Verify that token TOK from PARSER has cpp_token_as_text
1953    equal to EXPECTED_TEXT.  */
1954 
1955 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)		\
1956   SELFTEST_BEGIN_STMT							\
1957     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));	\
1958     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);		\
1959   SELFTEST_END_STMT
1960 
1961 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1962    and ranges from EXP_START_COL to EXP_FINISH_COL.
1963    Use LOC as the effective location of the selftest.  */
1964 
1965 static void
assert_token_loc_eq(const location & loc,const cpp_token * tok,const char * exp_filename,int exp_linenum,int exp_start_col,int exp_finish_col)1966 assert_token_loc_eq (const location &loc,
1967 		     const cpp_token *tok,
1968 		     const char *exp_filename, int exp_linenum,
1969 		     int exp_start_col, int exp_finish_col)
1970 {
1971   location_t tok_loc = tok->src_loc;
1972   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1973   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1974 
1975   /* If location_t values are sufficiently high, then column numbers
1976      will be unavailable.  */
1977   if (!should_have_column_data_p (tok_loc))
1978     return;
1979 
1980   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1981   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1982   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1983   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1984 }
1985 
1986 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1987    SELFTEST_LOCATION as the effective location of the selftest.  */
1988 
1989 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1990 			    EXP_START_COL, EXP_FINISH_COL) \
1991   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1992 		       (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1993 
1994 /* Test of lexing a file using libcpp, verifying tokens and their
1995    location information.  */
1996 
1997 static void
test_lexer(const line_table_case & case_)1998 test_lexer (const line_table_case &case_)
1999 {
2000   /* Create a tempfile and write some text to it.  */
2001   const char *content =
2002     /*00000000011111111112222222222333333.3333444444444.455555555556
2003       12345678901234567890123456789012345.6789012345678.901234567890.  */
2004     ("test_name /* c-style comment */\n"
2005      "                                  \"test literal\"\n"
2006      " // test c++-style comment\n"
2007      "   42\n");
2008   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2009 
2010   line_table_test ltt (case_);
2011 
2012   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2013 
2014   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2015   ASSERT_NE (fname, NULL);
2016 
2017   /* Verify that we get the expected tokens back, with the correct
2018      location information.  */
2019 
2020   location_t loc;
2021   const cpp_token *tok;
2022   tok = cpp_get_token_with_location (parser, &loc);
2023   ASSERT_NE (tok, NULL);
2024   ASSERT_EQ (tok->type, CPP_NAME);
2025   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2026   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2027 
2028   tok = cpp_get_token_with_location (parser, &loc);
2029   ASSERT_NE (tok, NULL);
2030   ASSERT_EQ (tok->type, CPP_STRING);
2031   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2032   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2033 
2034   tok = cpp_get_token_with_location (parser, &loc);
2035   ASSERT_NE (tok, NULL);
2036   ASSERT_EQ (tok->type, CPP_NUMBER);
2037   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2038   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2039 
2040   tok = cpp_get_token_with_location (parser, &loc);
2041   ASSERT_NE (tok, NULL);
2042   ASSERT_EQ (tok->type, CPP_EOF);
2043 
2044   cpp_finish (parser, NULL);
2045   cpp_destroy (parser);
2046 }
2047 
2048 /* Forward decls.  */
2049 
2050 struct lexer_test;
2051 class lexer_test_options;
2052 
2053 /* A class for specifying options of a lexer_test.
2054    The "apply" vfunc is called during the lexer_test constructor.  */
2055 
2056 class lexer_test_options
2057 {
2058  public:
2059   virtual void apply (lexer_test &) = 0;
2060 };
2061 
2062 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2063    in its dtor.
2064 
2065    This is needed by struct lexer_test to ensure that the cleanup of the
2066    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2067 
2068 class cpp_reader_ptr
2069 {
2070  public:
cpp_reader_ptr(cpp_reader * ptr)2071   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2072 
~cpp_reader_ptr()2073   ~cpp_reader_ptr ()
2074   {
2075     cpp_finish (m_ptr, NULL);
2076     cpp_destroy (m_ptr);
2077   }
2078 
2079   operator cpp_reader * () const { return m_ptr; }
2080 
2081  private:
2082   cpp_reader *m_ptr;
2083 };
2084 
2085 /* A struct for writing lexer tests.  */
2086 
2087 struct lexer_test
2088 {
2089   lexer_test (const line_table_case &case_, const char *content,
2090 	      lexer_test_options *options);
2091   ~lexer_test ();
2092 
2093   const cpp_token *get_token ();
2094 
2095   /* The ordering of these fields matters.
2096      The line_table_test must be first, since the cpp_reader_ptr
2097      uses it.
2098      The cpp_reader must be cleaned up *after* the temp_source_file
2099      since the filenames in input.c's input cache are owned by the
2100      cpp_reader; in particular, when ~temp_source_file evicts the
2101      filename the filenames must still be alive.  */
2102   line_table_test m_ltt;
2103   cpp_reader_ptr m_parser;
2104   temp_source_file m_tempfile;
2105   string_concat_db m_concats;
2106   bool m_implicitly_expect_EOF;
2107 };
2108 
2109 /* Use an EBCDIC encoding for the execution charset, specifically
2110    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2111 
2112    This exercises iconv integration within libcpp.
2113    Not every build of iconv supports the given charset,
2114    so we need to flag this error and handle it gracefully.  */
2115 
2116 class ebcdic_execution_charset : public lexer_test_options
2117 {
2118  public:
ebcdic_execution_charset()2119   ebcdic_execution_charset () : m_num_iconv_errors (0)
2120     {
2121       gcc_assert (s_singleton == NULL);
2122       s_singleton = this;
2123     }
~ebcdic_execution_charset()2124   ~ebcdic_execution_charset ()
2125     {
2126       gcc_assert (s_singleton == this);
2127       s_singleton = NULL;
2128     }
2129 
apply(lexer_test & test)2130   void apply (lexer_test &test) FINAL OVERRIDE
2131   {
2132     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2133     cpp_opts->narrow_charset = "IBM1047";
2134 
2135     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2136     callbacks->diagnostic = on_diagnostic;
2137   }
2138 
on_diagnostic(cpp_reader * pfile ATTRIBUTE_UNUSED,enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,enum cpp_warning_reason reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap ATTRIBUTE_UNUSED)2139   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2140 			     enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2141 			     enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2142 			     rich_location *richloc ATTRIBUTE_UNUSED,
2143 			     const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2144     ATTRIBUTE_FPTR_PRINTF(5,0)
2145   {
2146     gcc_assert (s_singleton);
2147     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2148     const char *msg = "conversion from %s to %s not supported by iconv";
2149 #ifdef ENABLE_NLS
2150     msg = dgettext ("cpplib", msg);
2151 #endif
2152     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2153        when the local iconv build doesn't support the conversion.  */
2154     if (strcmp (msgid, msg) == 0)
2155       {
2156 	s_singleton->m_num_iconv_errors++;
2157 	return true;
2158       }
2159 
2160     /* Otherwise, we have an unexpected error.  */
2161     abort ();
2162   }
2163 
iconv_errors_occurred_p()2164   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2165 
2166  private:
2167   static ebcdic_execution_charset *s_singleton;
2168   int m_num_iconv_errors;
2169 };
2170 
2171 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2172 
2173 /* A lexer_test_options subclass that records a list of diagnostic
2174    messages emitted by the lexer.  */
2175 
2176 class lexer_diagnostic_sink : public lexer_test_options
2177 {
2178  public:
lexer_diagnostic_sink()2179   lexer_diagnostic_sink ()
2180   {
2181     gcc_assert (s_singleton == NULL);
2182     s_singleton = this;
2183   }
~lexer_diagnostic_sink()2184   ~lexer_diagnostic_sink ()
2185   {
2186     gcc_assert (s_singleton == this);
2187     s_singleton = NULL;
2188 
2189     int i;
2190     char *str;
2191     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2192       free (str);
2193   }
2194 
apply(lexer_test & test)2195   void apply (lexer_test &test) FINAL OVERRIDE
2196   {
2197     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2198     callbacks->diagnostic = on_diagnostic;
2199   }
2200 
on_diagnostic(cpp_reader * pfile ATTRIBUTE_UNUSED,enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,enum cpp_warning_reason reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap)2201   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2202 			     enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2203 			     enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2204 			     rich_location *richloc ATTRIBUTE_UNUSED,
2205 			     const char *msgid, va_list *ap)
2206     ATTRIBUTE_FPTR_PRINTF(5,0)
2207   {
2208     char *msg = xvasprintf (msgid, *ap);
2209     s_singleton->m_diagnostics.safe_push (msg);
2210     return true;
2211   }
2212 
2213   auto_vec<char *> m_diagnostics;
2214 
2215  private:
2216   static lexer_diagnostic_sink *s_singleton;
2217 };
2218 
2219 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2220 
2221 /* Constructor.  Override line_table with a new instance based on CASE_,
2222    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2223    start parsing the tempfile.  */
2224 
lexer_test(const line_table_case & case_,const char * content,lexer_test_options * options)2225 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2226 			lexer_test_options *options)
2227 : m_ltt (case_),
2228   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2229   /* Create a tempfile and write the text to it.  */
2230   m_tempfile (SELFTEST_LOCATION, ".c", content),
2231   m_concats (),
2232   m_implicitly_expect_EOF (true)
2233 {
2234   if (options)
2235     options->apply (*this);
2236 
2237   cpp_init_iconv (m_parser);
2238 
2239   /* Parse the file.  */
2240   const char *fname = cpp_read_main_file (m_parser,
2241 					  m_tempfile.get_filename ());
2242   ASSERT_NE (fname, NULL);
2243 }
2244 
2245 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2246 
~lexer_test()2247 lexer_test::~lexer_test ()
2248 {
2249   location_t loc;
2250   const cpp_token *tok;
2251 
2252   if (m_implicitly_expect_EOF)
2253     {
2254       tok = cpp_get_token_with_location (m_parser, &loc);
2255       ASSERT_NE (tok, NULL);
2256       ASSERT_EQ (tok->type, CPP_EOF);
2257     }
2258 }
2259 
2260 /* Get the next token from m_parser.  */
2261 
2262 const cpp_token *
get_token()2263 lexer_test::get_token ()
2264 {
2265   location_t loc;
2266   const cpp_token *tok;
2267 
2268   tok = cpp_get_token_with_location (m_parser, &loc);
2269   ASSERT_NE (tok, NULL);
2270   return tok;
2271 }
2272 
2273 /* Verify that locations within string literals are correctly handled.  */
2274 
2275 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2276    using the string concatenation database for TEST.
2277 
2278    Assert that the character at index IDX is on EXPECTED_LINE,
2279    and that it begins at column EXPECTED_START_COL and ends at
2280    EXPECTED_FINISH_COL (unless the locations are beyond
2281    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2282    columns).  */
2283 
2284 static void
assert_char_at_range(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int idx,int expected_line,int expected_start_col,int expected_finish_col)2285 assert_char_at_range (const location &loc,
2286 		      lexer_test& test,
2287 		      location_t strloc, enum cpp_ttype type, int idx,
2288 		      int expected_line, int expected_start_col,
2289 		      int expected_finish_col)
2290 {
2291   cpp_reader *pfile = test.m_parser;
2292   string_concat_db *concats = &test.m_concats;
2293 
2294   source_range actual_range = source_range();
2295   const char *err
2296     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2297 				 &actual_range);
2298   if (should_have_column_data_p (strloc))
2299     ASSERT_EQ_AT (loc, NULL, err);
2300   else
2301     {
2302       ASSERT_STREQ_AT (loc,
2303 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2304 		       err);
2305       return;
2306     }
2307 
2308   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2309   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2310   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2311   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2312 
2313   if (should_have_column_data_p (actual_range.m_start))
2314     {
2315       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2316       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2317     }
2318   if (should_have_column_data_p (actual_range.m_finish))
2319     {
2320       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2321       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2322     }
2323 }
2324 
2325 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2326    the effective location of any errors.  */
2327 
2328 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2329 			     EXPECTED_START_COL, EXPECTED_FINISH_COL)	\
2330   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2331 			(IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2332 			(EXPECTED_FINISH_COL))
2333 
2334 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2335    using the string concatenation database for TEST.
2336 
2337    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2338 
2339 static void
assert_num_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int expected_num_ranges)2340 assert_num_substring_ranges (const location &loc,
2341 			     lexer_test& test,
2342 			     location_t strloc,
2343 			     enum cpp_ttype type,
2344 			     int expected_num_ranges)
2345 {
2346   cpp_reader *pfile = test.m_parser;
2347   string_concat_db *concats = &test.m_concats;
2348 
2349   int actual_num_ranges = -1;
2350   const char *err
2351     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2352 					   &actual_num_ranges);
2353   if (should_have_column_data_p (strloc))
2354     ASSERT_EQ_AT (loc, NULL, err);
2355   else
2356     {
2357       ASSERT_STREQ_AT (loc,
2358 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2359 		       err);
2360       return;
2361     }
2362   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2363 }
2364 
2365 /* Macro for calling assert_num_substring_ranges, supplying
2366    SELFTEST_LOCATION for the effective location of any errors.  */
2367 
2368 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2369 				    EXPECTED_NUM_RANGES)		\
2370   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2371 			       (TYPE), (EXPECTED_NUM_RANGES))
2372 
2373 
2374 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2375    returns an error (using the string concatenation database for TEST).  */
2376 
2377 static void
assert_has_no_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,const char * expected_err)2378 assert_has_no_substring_ranges (const location &loc,
2379 				lexer_test& test,
2380 				location_t strloc,
2381 				enum cpp_ttype type,
2382 				const char *expected_err)
2383 {
2384   cpp_reader *pfile = test.m_parser;
2385   string_concat_db *concats = &test.m_concats;
2386   cpp_substring_ranges ranges;
2387   const char *actual_err
2388     = get_substring_ranges_for_loc (pfile, concats, strloc,
2389 				    type, ranges);
2390   if (should_have_column_data_p (strloc))
2391     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2392   else
2393     ASSERT_STREQ_AT (loc,
2394 		     "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2395 		     actual_err);
2396 }
2397 
2398 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2399     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2400 				    (STRLOC), (TYPE), (ERR))
2401 
2402 /* Lex a simple string literal.  Verify the substring location data, before
2403    and after running cpp_interpret_string on it.  */
2404 
2405 static void
test_lexer_string_locations_simple(const line_table_case & case_)2406 test_lexer_string_locations_simple (const line_table_case &case_)
2407 {
2408   /* Digits 0-9 (with 0 at column 10), the simple way.
2409      ....................000000000.11111111112.2222222223333333333
2410      ....................123456789.01234567890.1234567890123456789
2411      We add a trailing comment to ensure that we correctly locate
2412      the end of the string literal token.  */
2413   const char *content = "        \"0123456789\" /* not a string */\n";
2414   lexer_test test (case_, content, NULL);
2415 
2416   /* Verify that we get the expected token back, with the correct
2417      location information.  */
2418   const cpp_token *tok = test.get_token ();
2419   ASSERT_EQ (tok->type, CPP_STRING);
2420   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2421   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2422 
2423   /* At this point in lexing, the quote characters are treated as part of
2424      the string (they are stripped off by cpp_interpret_string).  */
2425 
2426   ASSERT_EQ (tok->val.str.len, 12);
2427 
2428   /* Verify that cpp_interpret_string works.  */
2429   cpp_string dst_string;
2430   const enum cpp_ttype type = CPP_STRING;
2431   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2432 				      &dst_string, type);
2433   ASSERT_TRUE (result);
2434   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2435   free (const_cast <unsigned char *> (dst_string.text));
2436 
2437   /* Verify ranges of individual characters.  This no longer includes the
2438      opening quote, but does include the closing quote.  */
2439   for (int i = 0; i <= 10; i++)
2440     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2441 			  10 + i, 10 + i);
2442 
2443   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2444 }
2445 
2446 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2447    encoding.  */
2448 
2449 static void
test_lexer_string_locations_ebcdic(const line_table_case & case_)2450 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2451 {
2452   /* EBCDIC support requires iconv.  */
2453   if (!HAVE_ICONV)
2454     return;
2455 
2456   /* Digits 0-9 (with 0 at column 10), the simple way.
2457      ....................000000000.11111111112.2222222223333333333
2458      ....................123456789.01234567890.1234567890123456789
2459      We add a trailing comment to ensure that we correctly locate
2460      the end of the string literal token.  */
2461   const char *content = "        \"0123456789\" /* not a string */\n";
2462   ebcdic_execution_charset use_ebcdic;
2463   lexer_test test (case_, content, &use_ebcdic);
2464 
2465   /* Verify that we get the expected token back, with the correct
2466      location information.  */
2467   const cpp_token *tok = test.get_token ();
2468   ASSERT_EQ (tok->type, CPP_STRING);
2469   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2470   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2471 
2472   /* At this point in lexing, the quote characters are treated as part of
2473      the string (they are stripped off by cpp_interpret_string).  */
2474 
2475   ASSERT_EQ (tok->val.str.len, 12);
2476 
2477   /* The remainder of the test requires an iconv implementation that
2478      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2479   if (use_ebcdic.iconv_errors_occurred_p ())
2480     return;
2481 
2482   /* Verify that cpp_interpret_string works.  */
2483   cpp_string dst_string;
2484   const enum cpp_ttype type = CPP_STRING;
2485   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2486 				      &dst_string, type);
2487   ASSERT_TRUE (result);
2488   /* We should now have EBCDIC-encoded text, specifically
2489      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2490      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2491   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2492 		(const char *)dst_string.text);
2493   free (const_cast <unsigned char *> (dst_string.text));
2494 
2495   /* Verify that we don't attempt to record substring location information
2496      for such cases.  */
2497   ASSERT_HAS_NO_SUBSTRING_RANGES
2498     (test, tok->src_loc, type,
2499      "execution character set != source character set");
2500 }
2501 
2502 /* Lex a string literal containing a hex-escaped character.
2503    Verify the substring location data, before and after running
2504    cpp_interpret_string on it.  */
2505 
2506 static void
test_lexer_string_locations_hex(const line_table_case & case_)2507 test_lexer_string_locations_hex (const line_table_case &case_)
2508 {
2509   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2510      and with a space in place of digit 6, to terminate the escaped
2511      hex code.
2512      ....................000000000.111111.11112222.
2513      ....................123456789.012345.67890123.  */
2514   const char *content = "        \"01234\\x35 789\"\n";
2515   lexer_test test (case_, content, NULL);
2516 
2517   /* Verify that we get the expected token back, with the correct
2518      location information.  */
2519   const cpp_token *tok = test.get_token ();
2520   ASSERT_EQ (tok->type, CPP_STRING);
2521   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2522   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2523 
2524   /* At this point in lexing, the quote characters are treated as part of
2525      the string (they are stripped off by cpp_interpret_string).  */
2526   ASSERT_EQ (tok->val.str.len, 15);
2527 
2528   /* Verify that cpp_interpret_string works.  */
2529   cpp_string dst_string;
2530   const enum cpp_ttype type = CPP_STRING;
2531   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2532 				      &dst_string, type);
2533   ASSERT_TRUE (result);
2534   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2535   free (const_cast <unsigned char *> (dst_string.text));
2536 
2537   /* Verify ranges of individual characters.  This no longer includes the
2538      opening quote, but does include the closing quote.  */
2539   for (int i = 0; i <= 4; i++)
2540     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2541   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2542   for (int i = 6; i <= 10; i++)
2543     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2544 
2545   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2546 }
2547 
2548 /* Lex a string literal containing an octal-escaped character.
2549    Verify the substring location data after running cpp_interpret_string
2550    on it.  */
2551 
2552 static void
test_lexer_string_locations_oct(const line_table_case & case_)2553 test_lexer_string_locations_oct (const line_table_case &case_)
2554 {
2555   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2556      and with a space in place of digit 6, to terminate the escaped
2557      octal code.
2558      ....................000000000.111111.11112222.2222223333333333444
2559      ....................123456789.012345.67890123.4567890123456789012  */
2560   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2561   lexer_test test (case_, content, NULL);
2562 
2563   /* Verify that we get the expected token back, with the correct
2564      location information.  */
2565   const cpp_token *tok = test.get_token ();
2566   ASSERT_EQ (tok->type, CPP_STRING);
2567   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2568 
2569   /* Verify that cpp_interpret_string works.  */
2570   cpp_string dst_string;
2571   const enum cpp_ttype type = CPP_STRING;
2572   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2573 				      &dst_string, type);
2574   ASSERT_TRUE (result);
2575   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2576   free (const_cast <unsigned char *> (dst_string.text));
2577 
2578   /* Verify ranges of individual characters.  This no longer includes the
2579      opening quote, but does include the closing quote.  */
2580   for (int i = 0; i < 5; i++)
2581     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2582   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2583   for (int i = 6; i <= 10; i++)
2584     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2585 
2586   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2587 }
2588 
2589 /* Test of string literal containing letter escapes.  */
2590 
2591 static void
test_lexer_string_locations_letter_escape_1(const line_table_case & case_)2592 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2593 {
2594   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2595      .....................000000000.1.11111.1.1.11222.22222223333333
2596      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2597   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2598   lexer_test test (case_, content, NULL);
2599 
2600   /* Verify that we get the expected tokens back.  */
2601   const cpp_token *tok = test.get_token ();
2602   ASSERT_EQ (tok->type, CPP_STRING);
2603   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2604 
2605   /* Verify ranges of individual characters. */
2606   /* "\t".  */
2607   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608 			0, 1, 10, 11);
2609   /* "foo". */
2610   for (int i = 1; i <= 3; i++)
2611     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612 			  i, 1, 11 + i, 11 + i);
2613   /* "\\" and "\n".  */
2614   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2615 			4, 1, 15, 16);
2616   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617 			5, 1, 17, 18);
2618 
2619   /* "bar" and closing quote for nul-terminator.  */
2620   for (int i = 6; i <= 9; i++)
2621     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622 			  i, 1, 13 + i, 13 + i);
2623 
2624   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2625 }
2626 
2627 /* Another test of a string literal containing a letter escape.
2628    Based on string seen in
2629      printf ("%-%\n");
2630    in gcc.dg/format/c90-printf-1.c.  */
2631 
2632 static void
test_lexer_string_locations_letter_escape_2(const line_table_case & case_)2633 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2634 {
2635   /* .....................000000000.1111.11.1111.22222222223.
2636      .....................123456789.0123.45.6789.01234567890.  */
2637   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2638   lexer_test test (case_, content, NULL);
2639 
2640   /* Verify that we get the expected tokens back.  */
2641   const cpp_token *tok = test.get_token ();
2642   ASSERT_EQ (tok->type, CPP_STRING);
2643   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2644 
2645   /* Verify ranges of individual characters. */
2646   /* "%-%".  */
2647   for (int i = 0; i < 3; i++)
2648     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2649 			  i, 1, 10 + i, 10 + i);
2650   /* "\n".  */
2651   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2652 			3, 1, 13, 14);
2653 
2654   /* Closing quote for nul-terminator.  */
2655   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2656 			4, 1, 15, 15);
2657 
2658   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2659 }
2660 
2661 /* Lex a string literal containing UCN 4 characters.
2662    Verify the substring location data after running cpp_interpret_string
2663    on it.  */
2664 
2665 static void
test_lexer_string_locations_ucn4(const line_table_case & case_)2666 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2667 {
2668   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2669      as UCN 4.
2670      ....................000000000.111111.111122.222222223.33333333344444
2671      ....................123456789.012345.678901.234567890.12345678901234  */
2672   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2673   lexer_test test (case_, content, NULL);
2674 
2675   /* Verify that we get the expected token back, with the correct
2676      location information.  */
2677   const cpp_token *tok = test.get_token ();
2678   ASSERT_EQ (tok->type, CPP_STRING);
2679   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2680 
2681   /* Verify that cpp_interpret_string works.
2682      The string should be encoded in the execution character
2683      set.  Assuming that that is UTF-8, we should have the following:
2684      -----------  ----  -----  -------  ----------------
2685      Byte offset  Byte  Octal  Unicode  Source Column(s)
2686      -----------  ----  -----  -------  ----------------
2687      0            0x30         '0'      10
2688      1            0x31         '1'      11
2689      2            0x32         '2'      12
2690      3            0x33         '3'      13
2691      4            0x34         '4'      14
2692      5            0xE2  \342   U+2174   15-20
2693      6            0x85  \205    (cont)  15-20
2694      7            0xB4  \264    (cont)  15-20
2695      8            0xE2  \342   U+2175   21-26
2696      9            0x85  \205    (cont)  21-26
2697      10           0xB5  \265    (cont)  21-26
2698      11           0x37         '7'      27
2699      12           0x38         '8'      28
2700      13           0x39         '9'      29
2701      14           0x00                  30 (closing quote)
2702      -----------  ----  -----  -------  ---------------.  */
2703 
2704   cpp_string dst_string;
2705   const enum cpp_ttype type = CPP_STRING;
2706   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2707 				      &dst_string, type);
2708   ASSERT_TRUE (result);
2709   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2710 		(const char *)dst_string.text);
2711   free (const_cast <unsigned char *> (dst_string.text));
2712 
2713   /* Verify ranges of individual characters.  This no longer includes the
2714      opening quote, but does include the closing quote.
2715      '01234'.  */
2716   for (int i = 0; i <= 4; i++)
2717     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2718   /* U+2174.  */
2719   for (int i = 5; i <= 7; i++)
2720     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2721   /* U+2175.  */
2722   for (int i = 8; i <= 10; i++)
2723     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2724   /* '789' and nul terminator  */
2725   for (int i = 11; i <= 14; i++)
2726     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2727 
2728   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2729 }
2730 
2731 /* Lex a string literal containing UCN 8 characters.
2732    Verify the substring location data after running cpp_interpret_string
2733    on it.  */
2734 
2735 static void
test_lexer_string_locations_ucn8(const line_table_case & case_)2736 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2737 {
2738   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2739      ....................000000000.111111.1111222222.2222333333333.344444
2740      ....................123456789.012345.6789012345.6789012345678.901234  */
2741   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2742   lexer_test test (case_, content, NULL);
2743 
2744   /* Verify that we get the expected token back, with the correct
2745      location information.  */
2746   const cpp_token *tok = test.get_token ();
2747   ASSERT_EQ (tok->type, CPP_STRING);
2748   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2749 			   "\"01234\\U00002174\\U00002175789\"");
2750 
2751   /* Verify that cpp_interpret_string works.
2752      The UTF-8 encoding of the string is identical to that from
2753      the ucn4 testcase above; the only difference is the column
2754      locations.  */
2755   cpp_string dst_string;
2756   const enum cpp_ttype type = CPP_STRING;
2757   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2758 				      &dst_string, type);
2759   ASSERT_TRUE (result);
2760   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2761 		(const char *)dst_string.text);
2762   free (const_cast <unsigned char *> (dst_string.text));
2763 
2764   /* Verify ranges of individual characters.  This no longer includes the
2765      opening quote, but does include the closing quote.
2766      '01234'.  */
2767   for (int i = 0; i <= 4; i++)
2768     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2769   /* U+2174.  */
2770   for (int i = 5; i <= 7; i++)
2771     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2772   /* U+2175.  */
2773   for (int i = 8; i <= 10; i++)
2774     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2775   /* '789' at columns 35-37  */
2776   for (int i = 11; i <= 13; i++)
2777     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2778   /* Closing quote/nul-terminator at column 38.  */
2779   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2780 
2781   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2782 }
2783 
2784 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2785 
2786 static uint32_t
uint32_from_big_endian(const uint32_t * ptr_be_value)2787 uint32_from_big_endian (const uint32_t *ptr_be_value)
2788 {
2789   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2790   return (((uint32_t) buf[0] << 24)
2791 	  | ((uint32_t) buf[1] << 16)
2792 	  | ((uint32_t) buf[2] << 8)
2793 	  | (uint32_t) buf[3]);
2794 }
2795 
2796 /* Lex a wide string literal and verify that attempts to read substring
2797    location data from it fail gracefully.  */
2798 
2799 static void
test_lexer_string_locations_wide_string(const line_table_case & case_)2800 test_lexer_string_locations_wide_string (const line_table_case &case_)
2801 {
2802   /* Digits 0-9.
2803      ....................000000000.11111111112.22222222233333
2804      ....................123456789.01234567890.12345678901234  */
2805   const char *content = "       L\"0123456789\" /* non-str */\n";
2806   lexer_test test (case_, content, NULL);
2807 
2808   /* Verify that we get the expected token back, with the correct
2809      location information.  */
2810   const cpp_token *tok = test.get_token ();
2811   ASSERT_EQ (tok->type, CPP_WSTRING);
2812   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2813 
2814   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2815   cpp_string dst_string;
2816   const enum cpp_ttype type = CPP_WSTRING;
2817   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2818 				      &dst_string, type);
2819   ASSERT_TRUE (result);
2820   /* The cpp_reader defaults to big-endian with
2821      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2822      now be encoded as UTF-32BE.  */
2823   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2824   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2825   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2826   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2827   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2828   free (const_cast <unsigned char *> (dst_string.text));
2829 
2830   /* We don't yet support generating substring location information
2831      for L"" strings.  */
2832   ASSERT_HAS_NO_SUBSTRING_RANGES
2833     (test, tok->src_loc, type,
2834      "execution character set != source character set");
2835 }
2836 
2837 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2838 
2839 static uint16_t
uint16_from_big_endian(const uint16_t * ptr_be_value)2840 uint16_from_big_endian (const uint16_t *ptr_be_value)
2841 {
2842   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2843   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2844 }
2845 
2846 /* Lex a u"" string literal and verify that attempts to read substring
2847    location data from it fail gracefully.  */
2848 
2849 static void
test_lexer_string_locations_string16(const line_table_case & case_)2850 test_lexer_string_locations_string16 (const line_table_case &case_)
2851 {
2852   /* Digits 0-9.
2853      ....................000000000.11111111112.22222222233333
2854      ....................123456789.01234567890.12345678901234  */
2855   const char *content = "       u\"0123456789\" /* non-str */\n";
2856   lexer_test test (case_, content, NULL);
2857 
2858   /* Verify that we get the expected token back, with the correct
2859      location information.  */
2860   const cpp_token *tok = test.get_token ();
2861   ASSERT_EQ (tok->type, CPP_STRING16);
2862   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2863 
2864   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2865   cpp_string dst_string;
2866   const enum cpp_ttype type = CPP_STRING16;
2867   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2868 				      &dst_string, type);
2869   ASSERT_TRUE (result);
2870 
2871   /* The cpp_reader defaults to big-endian, so dst_string should
2872      now be encoded as UTF-16BE.  */
2873   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2874   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2875   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2876   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2877   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2878   free (const_cast <unsigned char *> (dst_string.text));
2879 
2880   /* We don't yet support generating substring location information
2881      for L"" strings.  */
2882   ASSERT_HAS_NO_SUBSTRING_RANGES
2883     (test, tok->src_loc, type,
2884      "execution character set != source character set");
2885 }
2886 
2887 /* Lex a U"" string literal and verify that attempts to read substring
2888    location data from it fail gracefully.  */
2889 
2890 static void
test_lexer_string_locations_string32(const line_table_case & case_)2891 test_lexer_string_locations_string32 (const line_table_case &case_)
2892 {
2893   /* Digits 0-9.
2894      ....................000000000.11111111112.22222222233333
2895      ....................123456789.01234567890.12345678901234  */
2896   const char *content = "       U\"0123456789\" /* non-str */\n";
2897   lexer_test test (case_, content, NULL);
2898 
2899   /* Verify that we get the expected token back, with the correct
2900      location information.  */
2901   const cpp_token *tok = test.get_token ();
2902   ASSERT_EQ (tok->type, CPP_STRING32);
2903   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2904 
2905   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2906   cpp_string dst_string;
2907   const enum cpp_ttype type = CPP_STRING32;
2908   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2909 				      &dst_string, type);
2910   ASSERT_TRUE (result);
2911 
2912   /* The cpp_reader defaults to big-endian, so dst_string should
2913      now be encoded as UTF-32BE.  */
2914   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2915   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2916   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2917   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2918   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2919   free (const_cast <unsigned char *> (dst_string.text));
2920 
2921   /* We don't yet support generating substring location information
2922      for L"" strings.  */
2923   ASSERT_HAS_NO_SUBSTRING_RANGES
2924     (test, tok->src_loc, type,
2925      "execution character set != source character set");
2926 }
2927 
2928 /* Lex a u8-string literal.
2929    Verify the substring location data after running cpp_interpret_string
2930    on it.  */
2931 
2932 static void
test_lexer_string_locations_u8(const line_table_case & case_)2933 test_lexer_string_locations_u8 (const line_table_case &case_)
2934 {
2935   /* Digits 0-9.
2936      ....................000000000.11111111112.22222222233333
2937      ....................123456789.01234567890.12345678901234  */
2938   const char *content = "      u8\"0123456789\" /* non-str */\n";
2939   lexer_test test (case_, content, NULL);
2940 
2941   /* Verify that we get the expected token back, with the correct
2942      location information.  */
2943   const cpp_token *tok = test.get_token ();
2944   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2945   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2946 
2947   /* Verify that cpp_interpret_string works.  */
2948   cpp_string dst_string;
2949   const enum cpp_ttype type = CPP_STRING;
2950   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2951 				      &dst_string, type);
2952   ASSERT_TRUE (result);
2953   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2954   free (const_cast <unsigned char *> (dst_string.text));
2955 
2956   /* Verify ranges of individual characters.  This no longer includes the
2957      opening quote, but does include the closing quote.  */
2958   for (int i = 0; i <= 10; i++)
2959     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2960 }
2961 
2962 /* Lex a string literal containing UTF-8 source characters.
2963    Verify the substring location data after running cpp_interpret_string
2964    on it.  */
2965 
2966 static void
test_lexer_string_locations_utf8_source(const line_table_case & case_)2967 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2968 {
2969  /* This string literal is written out to the source file as UTF-8,
2970     and is of the form "before mojibake after", where "mojibake"
2971     is written as the following four unicode code points:
2972        U+6587 CJK UNIFIED IDEOGRAPH-6587
2973        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2974        U+5316 CJK UNIFIED IDEOGRAPH-5316
2975        U+3051 HIRAGANA LETTER KE.
2976      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2977      "before" and "after" are 1 byte per unicode character.
2978 
2979      The numbering shown are "columns", which are *byte* numbers within
2980      the line, rather than unicode character numbers.
2981 
2982      .................... 000000000.1111111.
2983      .................... 123456789.0123456.  */
2984   const char *content = ("        \"before "
2985 			 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2986 			      UTF-8: 0xE6 0x96 0x87
2987 			      C octal escaped UTF-8: \346\226\207
2988 			    "column" numbers: 17-19.  */
2989 			 "\346\226\207"
2990 
2991 			 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2992 			      UTF-8: 0xE5 0xAD 0x97
2993 			      C octal escaped UTF-8: \345\255\227
2994 			    "column" numbers: 20-22.  */
2995 			 "\345\255\227"
2996 
2997 			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2998 			      UTF-8: 0xE5 0x8C 0x96
2999 			      C octal escaped UTF-8: \345\214\226
3000 			    "column" numbers: 23-25.  */
3001 			 "\345\214\226"
3002 
3003 			 /* U+3051 HIRAGANA LETTER KE
3004 			      UTF-8: 0xE3 0x81 0x91
3005 			      C octal escaped UTF-8: \343\201\221
3006 			    "column" numbers: 26-28.  */
3007 			 "\343\201\221"
3008 
3009 			 /* column numbers 29 onwards
3010 			  2333333.33334444444444
3011 			  9012345.67890123456789. */
3012 			 " after\" /* non-str */\n");
3013   lexer_test test (case_, content, NULL);
3014 
3015   /* Verify that we get the expected token back, with the correct
3016      location information.  */
3017   const cpp_token *tok = test.get_token ();
3018   ASSERT_EQ (tok->type, CPP_STRING);
3019   ASSERT_TOKEN_AS_TEXT_EQ
3020     (test.m_parser, tok,
3021      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3022 
3023   /* Verify that cpp_interpret_string works.  */
3024   cpp_string dst_string;
3025   const enum cpp_ttype type = CPP_STRING;
3026   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3027 				      &dst_string, type);
3028   ASSERT_TRUE (result);
3029   ASSERT_STREQ
3030     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3031      (const char *)dst_string.text);
3032   free (const_cast <unsigned char *> (dst_string.text));
3033 
3034   /* Verify ranges of individual characters.  This no longer includes the
3035      opening quote, but does include the closing quote.
3036      Assuming that both source and execution encodings are UTF-8, we have
3037      a run of 25 octets in each, plus the NUL terminator.  */
3038   for (int i = 0; i < 25; i++)
3039     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3040   /* NUL-terminator should use the closing quote at column 35.  */
3041   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3042 
3043   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3044 }
3045 
3046 /* Test of string literal concatenation.  */
3047 
3048 static void
test_lexer_string_locations_concatenation_1(const line_table_case & case_)3049 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3050 {
3051   /* Digits 0-9.
3052      .....................000000000.111111.11112222222222
3053      .....................123456789.012345.67890123456789.  */
3054   const char *content = ("        \"01234\" /* non-str */\n"
3055 			 "        \"56789\" /* non-str */\n");
3056   lexer_test test (case_, content, NULL);
3057 
3058   location_t input_locs[2];
3059 
3060   /* Verify that we get the expected tokens back.  */
3061   auto_vec <cpp_string> input_strings;
3062   const cpp_token *tok_a = test.get_token ();
3063   ASSERT_EQ (tok_a->type, CPP_STRING);
3064   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3065   input_strings.safe_push (tok_a->val.str);
3066   input_locs[0] = tok_a->src_loc;
3067 
3068   const cpp_token *tok_b = test.get_token ();
3069   ASSERT_EQ (tok_b->type, CPP_STRING);
3070   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3071   input_strings.safe_push (tok_b->val.str);
3072   input_locs[1] = tok_b->src_loc;
3073 
3074   /* Verify that cpp_interpret_string works.  */
3075   cpp_string dst_string;
3076   const enum cpp_ttype type = CPP_STRING;
3077   bool result = cpp_interpret_string (test.m_parser,
3078 				      input_strings.address (), 2,
3079 				      &dst_string, type);
3080   ASSERT_TRUE (result);
3081   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3082   free (const_cast <unsigned char *> (dst_string.text));
3083 
3084   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3085   test.m_concats.record_string_concatenation (2, input_locs);
3086 
3087   location_t initial_loc = input_locs[0];
3088 
3089   /* "01234" on line 1.  */
3090   for (int i = 0; i <= 4; i++)
3091     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3092   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3093   for (int i = 5; i <= 10; i++)
3094     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3095 
3096   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3097 }
3098 
3099 /* Another test of string literal concatenation.  */
3100 
3101 static void
test_lexer_string_locations_concatenation_2(const line_table_case & case_)3102 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3103 {
3104   /* Digits 0-9.
3105      .....................000000000.111.11111112222222
3106      .....................123456789.012.34567890123456.  */
3107   const char *content = ("        \"01\" /* non-str */\n"
3108 			 "        \"23\" /* non-str */\n"
3109 			 "        \"45\" /* non-str */\n"
3110 			 "        \"67\" /* non-str */\n"
3111 			 "        \"89\" /* non-str */\n");
3112   lexer_test test (case_, content, NULL);
3113 
3114   auto_vec <cpp_string> input_strings;
3115   location_t input_locs[5];
3116 
3117   /* Verify that we get the expected tokens back.  */
3118   for (int i = 0; i < 5; i++)
3119     {
3120       const cpp_token *tok = test.get_token ();
3121       ASSERT_EQ (tok->type, CPP_STRING);
3122       input_strings.safe_push (tok->val.str);
3123       input_locs[i] = tok->src_loc;
3124     }
3125 
3126   /* Verify that cpp_interpret_string works.  */
3127   cpp_string dst_string;
3128   const enum cpp_ttype type = CPP_STRING;
3129   bool result = cpp_interpret_string (test.m_parser,
3130 				      input_strings.address (), 5,
3131 				      &dst_string, type);
3132   ASSERT_TRUE (result);
3133   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3134   free (const_cast <unsigned char *> (dst_string.text));
3135 
3136   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3137   test.m_concats.record_string_concatenation (5, input_locs);
3138 
3139   location_t initial_loc = input_locs[0];
3140 
3141   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3142      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3143      and expect get_source_range_for_substring to fail.
3144      However, for a string concatenation test, we can have a case
3145      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3146      but subsequent strings can be after it.
3147      Attempting to detect this within assert_char_at_range
3148      would overcomplicate the logic for the common test cases, so
3149      we detect it here.  */
3150   if (should_have_column_data_p (input_locs[0])
3151       && !should_have_column_data_p (input_locs[4]))
3152     {
3153       /* Verify that get_source_range_for_substring gracefully rejects
3154 	 this case.  */
3155       source_range actual_range;
3156       const char *err
3157 	= get_source_range_for_char (test.m_parser, &test.m_concats,
3158 				     initial_loc, type, 0, &actual_range);
3159       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3160       return;
3161     }
3162 
3163   for (int i = 0; i < 5; i++)
3164     for (int j = 0; j < 2; j++)
3165       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3166 			    i + 1, 10 + j, 10 + j);
3167 
3168   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3169   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3170 
3171   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3172 }
3173 
3174 /* Another test of string literal concatenation, this time combined with
3175    various kinds of escaped characters.  */
3176 
3177 static void
test_lexer_string_locations_concatenation_3(const line_table_case & case_)3178 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3179 {
3180   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3181      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3182   const char *content
3183     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3184        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3185     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3186   lexer_test test (case_, content, NULL);
3187 
3188   auto_vec <cpp_string> input_strings;
3189   location_t input_locs[4];
3190 
3191   /* Verify that we get the expected tokens back.  */
3192   for (int i = 0; i < 4; i++)
3193     {
3194       const cpp_token *tok = test.get_token ();
3195       ASSERT_EQ (tok->type, CPP_STRING);
3196       input_strings.safe_push (tok->val.str);
3197       input_locs[i] = tok->src_loc;
3198     }
3199 
3200   /* Verify that cpp_interpret_string works.  */
3201   cpp_string dst_string;
3202   const enum cpp_ttype type = CPP_STRING;
3203   bool result = cpp_interpret_string (test.m_parser,
3204 				      input_strings.address (), 4,
3205 				      &dst_string, type);
3206   ASSERT_TRUE (result);
3207   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3208   free (const_cast <unsigned char *> (dst_string.text));
3209 
3210   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3211   test.m_concats.record_string_concatenation (4, input_locs);
3212 
3213   location_t initial_loc = input_locs[0];
3214 
3215   for (int i = 0; i <= 4; i++)
3216     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3217   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3218   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3219   for (int i = 7; i <= 9; i++)
3220     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3221 
3222   /* NUL-terminator should use the location of the final closing quote.  */
3223   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3224 
3225   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3226 }
3227 
3228 /* Test of string literal in a macro.  */
3229 
3230 static void
test_lexer_string_locations_macro(const line_table_case & case_)3231 test_lexer_string_locations_macro (const line_table_case &case_)
3232 {
3233   /* Digits 0-9.
3234      .....................0000000001111111111.22222222223.
3235      .....................1234567890123456789.01234567890.  */
3236   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3237 			 "  MACRO");
3238   lexer_test test (case_, content, NULL);
3239 
3240   /* Verify that we get the expected tokens back.  */
3241   const cpp_token *tok = test.get_token ();
3242   ASSERT_EQ (tok->type, CPP_PADDING);
3243 
3244   tok = test.get_token ();
3245   ASSERT_EQ (tok->type, CPP_STRING);
3246   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3247 
3248   /* Verify ranges of individual characters.  We ought to
3249      see columns within the macro definition.  */
3250   for (int i = 0; i <= 10; i++)
3251     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3252 			  i, 1, 20 + i, 20 + i);
3253 
3254   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3255 
3256   tok = test.get_token ();
3257   ASSERT_EQ (tok->type, CPP_PADDING);
3258 }
3259 
3260 /* Test of stringification of a macro argument.  */
3261 
3262 static void
test_lexer_string_locations_stringified_macro_argument(const line_table_case & case_)3263 test_lexer_string_locations_stringified_macro_argument
3264   (const line_table_case &case_)
3265 {
3266   /* .....................000000000111111111122222222223.
3267      .....................123456789012345678901234567890.  */
3268   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3269 			 "MACRO(foo)\n");
3270   lexer_test test (case_, content, NULL);
3271 
3272   /* Verify that we get the expected token back.  */
3273   const cpp_token *tok = test.get_token ();
3274   ASSERT_EQ (tok->type, CPP_PADDING);
3275 
3276   tok = test.get_token ();
3277   ASSERT_EQ (tok->type, CPP_STRING);
3278   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3279 
3280   /* We don't support getting the location of a stringified macro
3281      argument.  Verify that it fails gracefully.  */
3282   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3283 				  "cpp_interpret_string_1 failed");
3284 
3285   tok = test.get_token ();
3286   ASSERT_EQ (tok->type, CPP_PADDING);
3287 
3288   tok = test.get_token ();
3289   ASSERT_EQ (tok->type, CPP_PADDING);
3290 }
3291 
3292 /* Ensure that we are fail gracefully if something attempts to pass
3293    in a location that isn't a string literal token.  Seen on this code:
3294 
3295      const char a[] = " %d ";
3296      __builtin_printf (a, 0.5);
3297                        ^
3298 
3299    when c-format.c erroneously used the indicated one-character
3300    location as the format string location, leading to a read past the
3301    end of a string buffer in cpp_interpret_string_1.  */
3302 
3303 static void
test_lexer_string_locations_non_string(const line_table_case & case_)3304 test_lexer_string_locations_non_string (const line_table_case &case_)
3305 {
3306   /* .....................000000000111111111122222222223.
3307      .....................123456789012345678901234567890.  */
3308   const char *content = ("         a\n");
3309   lexer_test test (case_, content, NULL);
3310 
3311   /* Verify that we get the expected token back.  */
3312   const cpp_token *tok = test.get_token ();
3313   ASSERT_EQ (tok->type, CPP_NAME);
3314   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3315 
3316   /* At this point, libcpp is attempting to interpret the name as a
3317      string literal, despite it not starting with a quote.  We don't detect
3318      that, but we should at least fail gracefully.  */
3319   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3320 				  "cpp_interpret_string_1 failed");
3321 }
3322 
3323 /* Ensure that we can read substring information for a token which
3324    starts in one linemap and ends in another .  Adapted from
3325    gcc.dg/cpp/pr69985.c.  */
3326 
3327 static void
test_lexer_string_locations_long_line(const line_table_case & case_)3328 test_lexer_string_locations_long_line (const line_table_case &case_)
3329 {
3330   /* .....................000000.000111111111
3331      .....................123456.789012346789.  */
3332   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3333 			 "     \"0123456789012345678901234567890123456789"
3334 			 "0123456789012345678901234567890123456789"
3335 			 "0123456789012345678901234567890123456789"
3336 			 "0123456789\"\n");
3337 
3338   lexer_test test (case_, content, NULL);
3339 
3340   /* Verify that we get the expected token back.  */
3341   const cpp_token *tok = test.get_token ();
3342   ASSERT_EQ (tok->type, CPP_STRING);
3343 
3344   if (!should_have_column_data_p (line_table->highest_location))
3345     return;
3346 
3347   /* Verify ranges of individual characters.  */
3348   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3349   for (int i = 0; i < 131; i++)
3350     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3351 			  i, 2, 7 + i, 7 + i);
3352 }
3353 
3354 /* Test of locations within a raw string that doesn't contain a newline.  */
3355 
3356 static void
test_lexer_string_locations_raw_string_one_line(const line_table_case & case_)3357 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3358 {
3359   /* .....................00.0000000111111111122.
3360      .....................12.3456789012345678901.  */
3361   const char *content = ("R\"foo(0123456789)foo\"\n");
3362   lexer_test test (case_, content, NULL);
3363 
3364   /* Verify that we get the expected token back.  */
3365   const cpp_token *tok = test.get_token ();
3366   ASSERT_EQ (tok->type, CPP_STRING);
3367 
3368   /* Verify that cpp_interpret_string works.  */
3369   cpp_string dst_string;
3370   const enum cpp_ttype type = CPP_STRING;
3371   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3372 				      &dst_string, type);
3373   ASSERT_TRUE (result);
3374   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3375   free (const_cast <unsigned char *> (dst_string.text));
3376 
3377   if (!should_have_column_data_p (line_table->highest_location))
3378     return;
3379 
3380   /* 0-9, plus the nil terminator.  */
3381   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3382   for (int i = 0; i < 11; i++)
3383     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3384 			  i, 1, 7 + i, 7 + i);
3385 }
3386 
3387 /* Test of locations within a raw string that contains a newline.  */
3388 
3389 static void
test_lexer_string_locations_raw_string_multiline(const line_table_case & case_)3390 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3391 {
3392   /* .....................00.0000.
3393      .....................12.3456.  */
3394   const char *content = ("R\"foo(\n"
3395   /* .....................00000.
3396      .....................12345.  */
3397 			 "hello\n"
3398 			 "world\n"
3399   /* .....................00000.
3400      .....................12345.  */
3401 			 ")foo\"\n");
3402   lexer_test test (case_, content, NULL);
3403 
3404   /* Verify that we get the expected token back.  */
3405   const cpp_token *tok = test.get_token ();
3406   ASSERT_EQ (tok->type, CPP_STRING);
3407 
3408   /* Verify that cpp_interpret_string works.  */
3409   cpp_string dst_string;
3410   const enum cpp_ttype type = CPP_STRING;
3411   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3412 				      &dst_string, type);
3413   ASSERT_TRUE (result);
3414   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3415   free (const_cast <unsigned char *> (dst_string.text));
3416 
3417   if (!should_have_column_data_p (line_table->highest_location))
3418     return;
3419 
3420   /* Currently we don't support locations within raw strings that
3421      contain newlines.  */
3422   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3423 				  "range endpoints are on different lines");
3424 }
3425 
3426 /* Test of parsing an unterminated raw string.  */
3427 
3428 static void
test_lexer_string_locations_raw_string_unterminated(const line_table_case & case_)3429 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3430 {
3431   const char *content = "R\"ouch()ouCh\" /* etc */";
3432 
3433   lexer_diagnostic_sink diagnostics;
3434   lexer_test test (case_, content, &diagnostics);
3435   test.m_implicitly_expect_EOF = false;
3436 
3437   /* Attempt to parse the raw string.  */
3438   const cpp_token *tok = test.get_token ();
3439   ASSERT_EQ (tok->type, CPP_EOF);
3440 
3441   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3442   /* We expect the message "unterminated raw string"
3443      in the "cpplib" translation domain.
3444      It's not clear that dgettext is available on all supported hosts,
3445      so this assertion is commented-out for now.
3446        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3447                      diagnostics.m_diagnostics[0]);
3448   */
3449 }
3450 
3451 /* Test of lexing char constants.  */
3452 
3453 static void
test_lexer_char_constants(const line_table_case & case_)3454 test_lexer_char_constants (const line_table_case &case_)
3455 {
3456   /* Various char constants.
3457      .....................0000000001111111111.22222222223.
3458      .....................1234567890123456789.01234567890.  */
3459   const char *content = ("         'a'\n"
3460 			 "        u'a'\n"
3461 			 "        U'a'\n"
3462 			 "        L'a'\n"
3463 			 "         'abc'\n");
3464   lexer_test test (case_, content, NULL);
3465 
3466   /* Verify that we get the expected tokens back.  */
3467   /* 'a'.  */
3468   const cpp_token *tok = test.get_token ();
3469   ASSERT_EQ (tok->type, CPP_CHAR);
3470   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3471 
3472   unsigned int chars_seen;
3473   int unsignedp;
3474   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3475 					  &chars_seen, &unsignedp);
3476   ASSERT_EQ (cc, 'a');
3477   ASSERT_EQ (chars_seen, 1);
3478 
3479   /* u'a'.  */
3480   tok = test.get_token ();
3481   ASSERT_EQ (tok->type, CPP_CHAR16);
3482   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3483 
3484   /* U'a'.  */
3485   tok = test.get_token ();
3486   ASSERT_EQ (tok->type, CPP_CHAR32);
3487   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3488 
3489   /* L'a'.  */
3490   tok = test.get_token ();
3491   ASSERT_EQ (tok->type, CPP_WCHAR);
3492   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3493 
3494   /* 'abc' (c-char-sequence).  */
3495   tok = test.get_token ();
3496   ASSERT_EQ (tok->type, CPP_CHAR);
3497   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3498 }
3499 /* A table of interesting location_t values, giving one axis of our test
3500    matrix.  */
3501 
3502 static const location_t boundary_locations[] = {
3503   /* Zero means "don't override the default values for a new line_table".  */
3504   0,
3505 
3506   /* An arbitrary non-zero value that isn't close to one of
3507      the boundary values below.  */
3508   0x10000,
3509 
3510   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3511   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3512   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3513   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3514   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3515   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3516 
3517   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3518   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3519   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3520   LINE_MAP_MAX_LOCATION_WITH_COLS,
3521   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3522   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3523 };
3524 
3525 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3526 
3527 void
for_each_line_table_case(void (* testcase)(const line_table_case &))3528 for_each_line_table_case (void (*testcase) (const line_table_case &))
3529 {
3530   /* As noted above in the description of struct line_table_case,
3531      we want to explore a test matrix of interesting line_table
3532      situations, running various selftests for each case within the
3533      matrix.  */
3534 
3535   /* Run all tests with:
3536      (a) line_table->default_range_bits == 0, and
3537      (b) line_table->default_range_bits == 5.  */
3538   int num_cases_tested = 0;
3539   for (int default_range_bits = 0; default_range_bits <= 5;
3540        default_range_bits += 5)
3541     {
3542       /* ...and use each of the "interesting" location values as
3543 	 the starting location within line_table.  */
3544       const int num_boundary_locations
3545 	= sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3546       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3547 	{
3548 	  line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3549 
3550 	  testcase (c);
3551 
3552 	  num_cases_tested++;
3553 	}
3554     }
3555 
3556   /* Verify that we fully covered the test matrix.  */
3557   ASSERT_EQ (num_cases_tested, 2 * 12);
3558 }
3559 
3560 /* Verify that when presented with a consecutive pair of locations with
3561    a very large line offset, we don't attempt to consolidate them into
3562    a single ordinary linemap where the line offsets within the line map
3563    would lead to overflow (PR lto/88147).  */
3564 
3565 static void
test_line_offset_overflow()3566 test_line_offset_overflow ()
3567 {
3568   line_table_test ltt (line_table_case (5, 0));
3569 
3570   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3571   linemap_line_start (line_table, 1, 100);
3572   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3573   assert_loceq ("foo.c", 2578, 0, loc_a);
3574 
3575   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3576   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3577   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3578 
3579   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3580   assert_loceq ("foo.c", 404198, 0, loc_b);
3581 
3582   /* We should have started a new linemap, rather than attempting to store
3583      a very large line offset.  */
3584   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3585   ASSERT_NE (ordmap_a, ordmap_b);
3586 }
3587 
3588 /* Run all of the selftests within this file.  */
3589 
3590 void
input_c_tests()3591 input_c_tests ()
3592 {
3593   test_linenum_comparisons ();
3594   test_should_have_column_data_p ();
3595   test_unknown_location ();
3596   test_builtins ();
3597   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3598 
3599   for_each_line_table_case (test_accessing_ordinary_linemaps);
3600   for_each_line_table_case (test_lexer);
3601   for_each_line_table_case (test_lexer_string_locations_simple);
3602   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3603   for_each_line_table_case (test_lexer_string_locations_hex);
3604   for_each_line_table_case (test_lexer_string_locations_oct);
3605   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3606   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3607   for_each_line_table_case (test_lexer_string_locations_ucn4);
3608   for_each_line_table_case (test_lexer_string_locations_ucn8);
3609   for_each_line_table_case (test_lexer_string_locations_wide_string);
3610   for_each_line_table_case (test_lexer_string_locations_string16);
3611   for_each_line_table_case (test_lexer_string_locations_string32);
3612   for_each_line_table_case (test_lexer_string_locations_u8);
3613   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3614   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3615   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3616   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3617   for_each_line_table_case (test_lexer_string_locations_macro);
3618   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3619   for_each_line_table_case (test_lexer_string_locations_non_string);
3620   for_each_line_table_case (test_lexer_string_locations_long_line);
3621   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3622   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3623   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3624   for_each_line_table_case (test_lexer_char_constants);
3625 
3626   test_reading_source_line ();
3627 
3628   test_line_offset_overflow ();
3629 }
3630 
3631 } // namespace selftest
3632 
3633 #endif /* CHECKING_P */
3634