1 /* Copyright (C) 2016-2022 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rather on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to format known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "tree-cfg.h"
64 #include "tree-ssa-propagate.h"
65 #include "calls.h"
66 #include "cfgloop.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-ssa-loop.h"
69 #include "intl.h"
70 #include "langhooks.h"
71 
72 #include "attribs.h"
73 #include "builtins.h"
74 #include "pointer-query.h"
75 #include "stor-layout.h"
76 
77 #include "realmpfr.h"
78 #include "target.h"
79 
80 #include "cpplib.h"
81 #include "input.h"
82 #include "toplev.h"
83 #include "substring-locations.h"
84 #include "diagnostic.h"
85 #include "domwalk.h"
86 #include "alloc-pool.h"
87 #include "vr-values.h"
88 #include "tree-ssa-strlen.h"
89 #include "tree-dfa.h"
90 
91 /* The likely worst case value of MB_LEN_MAX for the target, large enough
92    for UTF-8.  Ideally, this would be obtained by a target hook if it were
93    to be used for optimization but it's good enough as is for warnings.  */
94 #define target_mb_len_max()   6
95 
96 /* The maximum number of bytes a single non-string directive can result
97    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
98    LDBL_MAX_10_EXP of 4932.  */
99 #define IEEE_MAX_10_EXP    4932
100 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
101 
102 namespace {
103 
104 /* Set to the warning level for the current function which is equal
105    either to warn_format_trunc for bounded functions or to
106    warn_format_overflow otherwise.  */
107 
108 static int warn_level;
109 
110 /* The minimum, maximum, likely, and unlikely maximum number of bytes
111    of output either a formatting function or an individual directive
112    can result in.  */
113 
114 struct result_range
115 {
116   /* The absolute minimum number of bytes.  The result of a successful
117      conversion is guaranteed to be no less than this.  (An erroneous
118      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
119   unsigned HOST_WIDE_INT min;
120   /* The likely maximum result that is used in diagnostics.  In most
121      cases MAX is the same as the worst case UNLIKELY result.  */
122   unsigned HOST_WIDE_INT max;
123   /* The likely result used to trigger diagnostics.  For conversions
124      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
125      in that range.  */
126   unsigned HOST_WIDE_INT likely;
127   /* In rare cases (e.g., for multibyte characters) UNLIKELY gives
128      the worst cases maximum result of a directive.  In most cases
129      UNLIKELY == MAX.  UNLIKELY is used to control the return value
130      optimization but not in diagnostics.  */
131   unsigned HOST_WIDE_INT unlikely;
132 };
133 
134 /* Return the value of INT_MIN for the target.  */
135 
136 static inline HOST_WIDE_INT
target_int_min()137 target_int_min ()
138 {
139   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
140 }
141 
142 /* Return the value of INT_MAX for the target.  */
143 
144 static inline unsigned HOST_WIDE_INT
target_int_max()145 target_int_max ()
146 {
147   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
148 }
149 
150 /* Return the value of SIZE_MAX for the target.  */
151 
152 static inline unsigned HOST_WIDE_INT
target_size_max()153 target_size_max ()
154 {
155   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
156 }
157 
158 /* A straightforward mapping from the execution character set to the host
159    character set indexed by execution character.  */
160 
161 static char target_to_host_charmap[256];
162 
163 /* Initialize a mapping from the execution character set to the host
164    character set.  */
165 
166 static bool
init_target_to_host_charmap()167 init_target_to_host_charmap ()
168 {
169   /* If the percent sign is non-zero the mapping has already been
170      initialized.  */
171   if (target_to_host_charmap['%'])
172     return true;
173 
174   /* Initialize the target_percent character (done elsewhere).  */
175   if (!init_target_chars ())
176     return false;
177 
178   /* The subset of the source character set used by printf conversion
179      specifications (strictly speaking, not all letters are used but
180      they are included here for the sake of simplicity).  The dollar
181      sign must be included even though it's not in the basic source
182      character set.  */
183   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
184     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
185 
186   /* Set the mapping for all characters to some ordinary value (i,e.,
187      not none used in printf conversion specifications) and overwrite
188      those that are used by conversion specifications with their
189      corresponding values.  */
190   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
191 
192   /* Are the two sets of characters the same?  */
193   bool all_same_p = true;
194 
195   for (const char *pc = srcset; *pc; ++pc)
196     {
197       /* Slice off the high end bits in case target characters are
198 	 signed.  All values are expected to be non-nul, otherwise
199 	 there's a problem.  */
200       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
201 	{
202 	  target_to_host_charmap[tc] = *pc;
203 	  if (tc != *pc)
204 	    all_same_p = false;
205 	}
206       else
207 	return false;
208 
209     }
210 
211   /* Set the first element to a non-zero value if the mapping
212      is 1-to-1, otherwise leave it clear (NUL is assumed to be
213      the same in both character sets).  */
214   target_to_host_charmap[0] = all_same_p;
215 
216   return true;
217 }
218 
219 /* Return the host source character corresponding to the character
220    CH in the execution character set if one exists, or some innocuous
221    (non-special, non-nul) source character otherwise.  */
222 
223 static inline unsigned char
target_to_host(unsigned char ch)224 target_to_host (unsigned char ch)
225 {
226   return target_to_host_charmap[ch];
227 }
228 
229 /* Convert an initial substring of the string TARGSTR consisting of
230    characters in the execution character set into a string in the
231    source character set on the host and store up to HOSTSZ characters
232    in the buffer pointed to by HOSTR.  Return HOSTR.  */
233 
234 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)235 target_to_host (char *hostr, size_t hostsz, const char *targstr)
236 {
237   /* Make sure the buffer is reasonably big.  */
238   gcc_assert (hostsz > 4);
239 
240   /* The interesting subset of source and execution characters are
241      the same so no conversion is necessary.  However, truncate
242      overlong strings just like the translated strings are.  */
243   if (target_to_host_charmap['\0'] == 1)
244     {
245       size_t len = strlen (targstr);
246       if (len >= hostsz)
247 	{
248 	  memcpy (hostr, targstr, hostsz - 4);
249 	  strcpy (hostr + hostsz - 4, "...");
250 	}
251       else
252 	memcpy (hostr, targstr, len + 1);
253       return hostr;
254     }
255 
256   /* Convert the initial substring of TARGSTR to the corresponding
257      characters in the host set, appending "..." if TARGSTR is too
258      long to fit.  Using the static buffer assumes the function is
259      not called in between sequence points (which it isn't).  */
260   for (char *ph = hostr; ; ++targstr)
261     {
262       *ph++ = target_to_host (*targstr);
263       if (!*targstr)
264 	break;
265 
266       if (size_t (ph - hostr) == hostsz)
267 	{
268 	  strcpy (ph - 4, "...");
269 	  break;
270 	}
271     }
272 
273   return hostr;
274 }
275 
276 /* Convert the sequence of decimal digits in the execution character
277    starting at *PS to a HOST_WIDE_INT, analogously to strtol.  Return
278    the result and set *PS to one past the last converted character.
279    On range error set ERANGE to the digit that caused it.  */
280 
281 static inline HOST_WIDE_INT
target_strtowi(const char ** ps,const char ** erange)282 target_strtowi (const char **ps, const char **erange)
283 {
284   unsigned HOST_WIDE_INT val = 0;
285   for ( ; ; ++*ps)
286     {
287       unsigned char c = target_to_host (**ps);
288       if (ISDIGIT (c))
289 	{
290 	  c -= '0';
291 
292 	  /* Check for overflow.  */
293 	  if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
294 	    {
295 	      val = HOST_WIDE_INT_MAX;
296 	      *erange = *ps;
297 
298 	      /* Skip the remaining digits.  */
299 	      do
300 		c = target_to_host (*++*ps);
301 	      while (ISDIGIT (c));
302 	      break;
303 	    }
304 	  else
305 	    val = val * 10 + c;
306 	}
307       else
308 	break;
309     }
310 
311   return val;
312 }
313 
314 /* Given FORMAT, set *PLOC to the source location of the format string
315    and return the format string if it is known or null otherwise.  */
316 
317 static const char*
get_format_string(tree format,location_t * ploc)318 get_format_string (tree format, location_t *ploc)
319 {
320   *ploc = EXPR_LOC_OR_LOC (format, input_location);
321 
322   return c_getstr (format);
323 }
324 
325 /* For convenience and brevity, shorter named entrypoints of
326    format_string_diagnostic_t::emit_warning_va and
327    format_string_diagnostic_t::emit_warning_n_va.
328    These have to be functions with the attribute so that exgettext
329    works properly.  */
330 
331 static bool
332 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,const char * gmsgid,...)333 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
334 	 const char *corrected_substring, opt_code opt,
335 	 const char *gmsgid, ...)
336 {
337   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
338 				   corrected_substring);
339   va_list ap;
340   va_start (ap, gmsgid);
341   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
342   va_end (ap);
343 
344   return warned;
345 }
346 
347 static bool
348 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)349 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
350 	   const char *corrected_substring, opt_code opt,
351 	   unsigned HOST_WIDE_INT n,
352 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
353 {
354   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
355 				   corrected_substring);
356   va_list ap;
357   va_start (ap, plural_gmsgid);
358   bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
359 					&ap);
360   va_end (ap);
361 
362   return warned;
363 }
364 
365 /* Format length modifiers.  */
366 
367 enum format_lengths
368 {
369   FMT_LEN_none,
370   FMT_LEN_hh,    // char argument
371   FMT_LEN_h,     // short
372   FMT_LEN_l,     // long
373   FMT_LEN_ll,    // long long
374   FMT_LEN_L,     // long double (and GNU long long)
375   FMT_LEN_z,     // size_t
376   FMT_LEN_t,     // ptrdiff_t
377   FMT_LEN_j      // intmax_t
378 };
379 
380 
381 /* Description of the result of conversion either of a single directive
382    or the whole format string.  */
383 
384 class fmtresult
385 {
386 public:
387   /* Construct a FMTRESULT object with all counters initialized
388      to MIN.  KNOWNRANGE is set when MIN is valid.  */
fmtresult(unsigned HOST_WIDE_INT min=HOST_WIDE_INT_MAX)389   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
390   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
391     knownrange (min < HOST_WIDE_INT_MAX),
392     mayfail (), nullp ()
393   {
394     range.min = min;
395     range.max = min;
396     range.likely = min;
397     range.unlikely = min;
398   }
399 
400   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
401      KNOWNRANGE is set when both MIN and MAX are valid.   */
fmtresult(unsigned HOST_WIDE_INT min,unsigned HOST_WIDE_INT max,unsigned HOST_WIDE_INT likely=HOST_WIDE_INT_MAX)402   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
403 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
404   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
405     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
406     mayfail (), nullp ()
407   {
408     range.min = min;
409     range.max = max;
410     range.likely = max < likely ? min : likely;
411     range.unlikely = max;
412   }
413 
414   /* Adjust result upward to reflect the RANGE of values the specified
415      width or precision is known to be in.  */
416   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
417 					    tree = NULL_TREE,
418 					    unsigned = 0, unsigned = 0);
419 
420   /* Return the maximum number of decimal digits a value of TYPE
421      formats as on output.  */
422   static unsigned type_max_digits (tree, int);
423 
424   /* The range a directive's argument is in.  */
425   tree argmin, argmax;
426 
427   /* The starting offset into the destination of the formatted function
428      call of the %s argument that points into (aliases with) the same
429      destination array.  */
430   HOST_WIDE_INT dst_offset;
431 
432   /* The minimum and maximum number of bytes that a directive
433      results in on output for an argument in the range above.  */
434   result_range range;
435 
436   /* Non-nul when the argument of a string directive is not a nul
437      terminated string.  */
438   tree nonstr;
439 
440   /* True when the range above is obtained from a known value of
441      a directive's argument or its bounds and not the result of
442      heuristics that depend on warning levels.  */
443   bool knownrange;
444 
445   /* True for a directive that may fail (such as wide character
446      directives).  */
447   bool mayfail;
448 
449   /* True when the argument is a null pointer.  */
450   bool nullp;
451 };
452 
453 /* Adjust result upward to reflect the range ADJUST of values the
454    specified width or precision is known to be in.  When non-null,
455    TYPE denotes the type of the directive whose result is being
456    adjusted, BASE gives the base of the directive (octal, decimal,
457    or hex), and ADJ denotes the additional adjustment to the LIKELY
458    counter that may need to be added when ADJUST is a range.  */
459 
460 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)461 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
462 					  tree type /* = NULL_TREE */,
463 					  unsigned base /* = 0 */,
464 					  unsigned adj /* = 0 */)
465 {
466   bool minadjusted = false;
467 
468   /* Adjust the minimum and likely counters.  */
469   if (adjust[0] >= 0)
470     {
471       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
472 	{
473 	  range.min = adjust[0];
474 	  minadjusted = true;
475 	}
476 
477       /* Adjust the likely counter.  */
478       if (range.likely < range.min)
479 	range.likely = range.min;
480     }
481   else if (adjust[0] == target_int_min ()
482 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
483     knownrange = false;
484 
485   /* Adjust the maximum counter.  */
486   if (adjust[1] > 0)
487     {
488       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
489 	{
490 	  range.max = adjust[1];
491 
492 	  /* Set KNOWNRANGE if both the minimum and maximum have been
493 	     adjusted.  Otherwise leave it at what it was before.  */
494 	  knownrange = minadjusted;
495 	}
496     }
497 
498   if (warn_level > 1 && type)
499     {
500       /* For large non-constant width or precision whose range spans
501 	 the maximum number of digits produced by the directive for
502 	 any argument, set the likely number of bytes to be at most
503 	 the number digits plus other adjustment determined by the
504 	 caller (one for sign or two for the hexadecimal "0x"
505 	 prefix).  */
506       unsigned dirdigs = type_max_digits (type, base);
507       if (adjust[0] < dirdigs && dirdigs < adjust[1]
508 	  && range.likely < dirdigs)
509 	range.likely = dirdigs + adj;
510     }
511   else if (range.likely < (range.min ? range.min : 1))
512     {
513       /* Conservatively, set LIKELY to at least MIN but no less than
514 	 1 unless MAX is zero.  */
515       range.likely = (range.min
516 		      ? range.min
517 		      : range.max && (range.max < HOST_WIDE_INT_MAX
518 				      || warn_level > 1) ? 1 : 0);
519     }
520 
521   /* Finally adjust the unlikely counter to be at least as large as
522      the maximum.  */
523   if (range.unlikely < range.max)
524     range.unlikely = range.max;
525 
526   return *this;
527 }
528 
529 /* Return the maximum number of digits a value of TYPE formats in
530    BASE on output, not counting base prefix .  */
531 
532 unsigned
type_max_digits(tree type,int base)533 fmtresult::type_max_digits (tree type, int base)
534 {
535   unsigned prec = TYPE_PRECISION (type);
536   switch (base)
537     {
538     case 8:
539       return (prec + 2) / 3;
540     case 10:
541       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
542 	 of 8, 16, 32, and 64 bits.  */
543       return prec * 301 / 1000 + 1;
544     case 16:
545       return prec / 4;
546     }
547 
548   gcc_unreachable ();
549 }
550 
551 static bool
552 get_int_range (tree, gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *,
553 	       bool, HOST_WIDE_INT, range_query *);
554 
555 struct call_info;
556 
557 /* Description of a format directive.  A directive is either a plain
558    string or a conversion specification that starts with '%'.  */
559 
560 struct directive
561 {
directive__anonc39a57990111::directive562   directive (const call_info *inf, unsigned dno)
563     : info (inf), dirno (dno), argno (), beg (), len (), flags (),
564     width (), prec (),  modifier (), specifier (), arg (), fmtfunc ()
565   { }
566 
567   /* Reference to the info structure describing the call that this
568      directive is a part of.  */
569   const call_info *info;
570 
571   /* The 1-based directive number (for debugging).  */
572   unsigned dirno;
573 
574   /* The zero-based argument number of the directive's argument ARG in
575      the function's argument list.  */
576   unsigned argno;
577 
578   /* The first character of the directive and its length.  */
579   const char *beg;
580   size_t len;
581 
582   /* A bitmap of flags, one for each character.  */
583   unsigned flags[256 / sizeof (int)];
584 
585   /* The range of values of the specified width, or -1 if not specified.  */
586   HOST_WIDE_INT width[2];
587   /* The range of values of the specified precision, or -1 if not
588      specified.  */
589   HOST_WIDE_INT prec[2];
590 
591   /* Length modifier.  */
592   format_lengths modifier;
593 
594   /* Format specifier character.  */
595   char specifier;
596 
597   /* The argument of the directive or null when the directive doesn't
598      take one or when none is available (such as for vararg functions).  */
599   tree arg;
600 
601   /* Format conversion function that given a directive and an argument
602      returns the formatting result.  */
603   fmtresult (*fmtfunc) (const directive &, tree, pointer_query &);
604 
605   /* Return True when the format flag CHR has been used.  */
get_flag__anonc39a57990111::directive606   bool get_flag (char chr) const
607   {
608     unsigned char c = chr & 0xff;
609     return (flags[c / (CHAR_BIT * sizeof *flags)]
610 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
611   }
612 
613   /* Make a record of the format flag CHR having been used.  */
set_flag__anonc39a57990111::directive614   void set_flag (char chr)
615   {
616     unsigned char c = chr & 0xff;
617     flags[c / (CHAR_BIT * sizeof *flags)]
618       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
619   }
620 
621   /* Reset the format flag CHR.  */
clear_flag__anonc39a57990111::directive622   void clear_flag (char chr)
623   {
624     unsigned char c = chr & 0xff;
625     flags[c / (CHAR_BIT * sizeof *flags)]
626       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
627   }
628 
629   /* Set both bounds of the width range to VAL.  */
set_width__anonc39a57990111::directive630   void set_width (HOST_WIDE_INT val)
631   {
632     width[0] = width[1] = val;
633   }
634 
635   /* Set the width range according to ARG, with both bounds being
636      no less than 0.  For a constant ARG set both bounds to its value
637      or 0, whichever is greater.  For a non-constant ARG in some range
638      set width to its range adjusting each bound to -1 if it's less.
639      For an indeterminate ARG set width to [0, INT_MAX].  */
640   void set_width (tree arg, range_query *);
641 
642   /* Set both bounds of the precision range to VAL.  */
set_precision__anonc39a57990111::directive643   void set_precision (HOST_WIDE_INT val)
644   {
645     prec[0] = prec[1] = val;
646   }
647 
648   /* Set the precision range according to ARG, with both bounds being
649      no less than -1.  For a constant ARG set both bounds to its value
650      or -1 whichever is greater.  For a non-constant ARG in some range
651      set precision to its range adjusting each bound to -1 if it's less.
652      For an indeterminate ARG set precision to [-1, INT_MAX].  */
653   void set_precision (tree arg, range_query *query);
654 
655   /* Return true if both width and precision are known to be
656      either constant or in some range, false otherwise.  */
known_width_and_precision__anonc39a57990111::directive657   bool known_width_and_precision () const
658   {
659     return ((width[1] < 0
660 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
661 	    && (prec[1] < 0
662 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
663   }
664 };
665 
666 /* The result of a call to a formatted function.  */
667 
668 struct format_result
669 {
format_result__anonc39a57990111::format_result670   format_result ()
671     : range (), aliases (), alias_count (), knownrange (), posunder4k (),
672     floating (), warned () { /* No-op.  */ }
673 
~format_result__anonc39a57990111::format_result674   ~format_result ()
675   {
676     XDELETEVEC (aliases);
677   }
678 
679   /* Range of characters written by the formatted function.
680      Setting the minimum to HOST_WIDE_INT_MAX disables all
681      length tracking for the remainder of the format string.  */
682   result_range range;
683 
684   struct alias_info
685   {
686     directive dir;          /* The directive that aliases the destination.  */
687     HOST_WIDE_INT offset;   /* The offset at which it aliases it.  */
688     result_range range;     /* The raw result of the directive.  */
689   };
690 
691   /* An array of directives whose pointer argument aliases a part
692      of the destination object of the formatted function.  */
693   alias_info *aliases;
694   unsigned alias_count;
695 
696   /* True when the range above is obtained from known values of
697      directive arguments, or bounds on the amount of output such
698      as width and precision, and not the result of  heuristics that
699      depend on warning levels.  It's used to issue stricter diagnostics
700      in cases where strings of unknown lengths are bounded by the arrays
701      they are determined to refer to.  KNOWNRANGE must not be used for
702      the return value optimization.  */
703   bool knownrange;
704 
705   /* True if no individual directive could fail or result in more than
706      4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
707      greater).  Implementations are not required to handle directives
708      that produce more than 4K bytes (leading to undefined behavior)
709      and so when one is found it disables the return value optimization.
710      Similarly, directives that can fail (such as wide character
711      directives) disable the optimization.  */
712   bool posunder4k;
713 
714   /* True when a floating point directive has been seen in the format
715      string.  */
716   bool floating;
717 
718   /* True when an intermediate result has caused a warning.  Used to
719      avoid issuing duplicate warnings while finishing the processing
720      of a call.  WARNED also disables the return value optimization.  */
721   bool warned;
722 
723   /* Preincrement the number of output characters by 1.  */
operator ++__anonc39a57990111::format_result724   format_result& operator++ ()
725   {
726     return *this += 1;
727   }
728 
729   /* Postincrement the number of output characters by 1.  */
operator ++__anonc39a57990111::format_result730   format_result operator++ (int)
731   {
732     format_result prev (*this);
733     *this += 1;
734     return prev;
735   }
736 
737   /* Increment the number of output characters by N.  */
738   format_result& operator+= (unsigned HOST_WIDE_INT);
739 
740   /* Add a directive to the sequence of those with potentially aliasing
741      arguments.  */
742   void append_alias (const directive &, HOST_WIDE_INT, const result_range &);
743 
744 private:
745   /* Not copyable or assignable.  */
746   format_result (format_result&);
747   void operator= (format_result&);
748 };
749 
750 format_result&
operator +=(unsigned HOST_WIDE_INT n)751 format_result::operator+= (unsigned HOST_WIDE_INT n)
752 {
753   gcc_assert (n < HOST_WIDE_INT_MAX);
754 
755   if (range.min < HOST_WIDE_INT_MAX)
756     range.min += n;
757 
758   if (range.max < HOST_WIDE_INT_MAX)
759     range.max += n;
760 
761   if (range.likely < HOST_WIDE_INT_MAX)
762     range.likely += n;
763 
764   if (range.unlikely < HOST_WIDE_INT_MAX)
765     range.unlikely += n;
766 
767   return *this;
768 }
769 
770 void
append_alias(const directive & d,HOST_WIDE_INT off,const result_range & resrng)771 format_result::append_alias (const directive &d, HOST_WIDE_INT off,
772 			     const result_range &resrng)
773 {
774   unsigned cnt = alias_count + 1;
775   alias_info *ar = XNEWVEC (alias_info, cnt);
776 
777   for (unsigned i = 0; i != alias_count; ++i)
778     ar[i] = aliases[i];
779 
780   ar[alias_count].dir = d;
781   ar[alias_count].offset = off;
782   ar[alias_count].range = resrng;
783 
784   XDELETEVEC (aliases);
785 
786   alias_count = cnt;
787   aliases = ar;
788 }
789 
790 /* Return the logarithm of X in BASE.  */
791 
792 static int
ilog(unsigned HOST_WIDE_INT x,int base)793 ilog (unsigned HOST_WIDE_INT x, int base)
794 {
795   int res = 0;
796   do
797     {
798       ++res;
799       x /= base;
800     } while (x);
801   return res;
802 }
803 
804 /* Return the number of bytes resulting from converting into a string
805    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
806    PLUS indicates whether 1 for a plus sign should be added for positive
807    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
808    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
809    be represented.  */
810 
811 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)812 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
813 {
814   unsigned HOST_WIDE_INT absval;
815 
816   HOST_WIDE_INT res;
817 
818   if (TYPE_UNSIGNED (TREE_TYPE (x)))
819     {
820       if (tree_fits_uhwi_p (x))
821 	{
822 	  absval = tree_to_uhwi (x);
823 	  res = plus;
824 	}
825       else
826 	return -1;
827     }
828   else
829     {
830       if (tree_fits_shwi_p (x))
831 	{
832 	  HOST_WIDE_INT i = tree_to_shwi (x);
833          if (HOST_WIDE_INT_MIN == i)
834            {
835              /* Avoid undefined behavior due to negating a minimum.  */
836              absval = HOST_WIDE_INT_MAX;
837              res = 1;
838            }
839          else if (i < 0)
840 	   {
841 	     absval = -i;
842 	     res = 1;
843 	   }
844 	 else
845 	   {
846 	     absval = i;
847 	     res = plus;
848 	   }
849 	}
850       else
851 	return -1;
852     }
853 
854   int ndigs = ilog (absval, base);
855 
856   res += prec < ndigs ? ndigs : prec;
857 
858   /* Adjust a non-zero value for the base prefix, either hexadecimal,
859      or, unless precision has resulted in a leading zero, also octal.  */
860   if (prefix && absval && (base == 16 || prec <= ndigs))
861     {
862       if (base == 8)
863 	res += 1;
864       else if (base == 16)
865 	res += 2;
866     }
867 
868   return res;
869 }
870 
871 /* Description of a call to a formatted function.  */
872 
873 struct call_info
874 {
875   /* Function call statement.  */
876   gimple *callstmt;
877 
878   /* Function called.  */
879   tree func;
880 
881   /* Called built-in function code.  */
882   built_in_function fncode;
883 
884   /* The "origin" of the destination pointer argument, which is either
885      the DECL of the destination buffer being written into or a pointer
886      that points to it, plus some offset.  */
887   tree dst_origin;
888 
889   /* For a destination pointing to a struct array member, the offset of
890      the member.  */
891   HOST_WIDE_INT dst_field;
892 
893   /* The offset into the destination buffer.  */
894   HOST_WIDE_INT dst_offset;
895 
896   /* Format argument and format string extracted from it.  */
897   tree format;
898   const char *fmtstr;
899 
900   /* The location of the format argument.  */
901   location_t fmtloc;
902 
903   /* The destination object size for __builtin___xxx_chk functions
904      typically determined by __builtin_object_size, or -1 if unknown.  */
905   unsigned HOST_WIDE_INT objsize;
906 
907   /* Number of the first variable argument.  */
908   unsigned HOST_WIDE_INT argidx;
909 
910   /* True for functions like snprintf that specify the size of
911      the destination, false for others like sprintf that don't.  */
912   bool bounded;
913 
914   /* True for bounded functions like snprintf that specify a zero-size
915      buffer as a request to compute the size of output without actually
916      writing any.  NOWRITE is cleared in response to the %n directive
917      which has side-effects similar to writing output.  */
918   bool nowrite;
919 
920   /* Return true if the called function's return value is used.  */
retval_used__anonc39a57990111::call_info921   bool retval_used () const
922   {
923     return gimple_get_lhs (callstmt);
924   }
925 
926   /* Return the warning option corresponding to the called function.  */
warnopt__anonc39a57990111::call_info927   opt_code warnopt () const
928   {
929     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
930   }
931 
932   /* Return true for calls to file formatted functions.  */
is_file_func__anonc39a57990111::call_info933   bool is_file_func () const
934   {
935     return (fncode == BUILT_IN_FPRINTF
936 	    || fncode == BUILT_IN_FPRINTF_CHK
937 	    || fncode == BUILT_IN_FPRINTF_UNLOCKED
938 	    || fncode == BUILT_IN_VFPRINTF
939 	    || fncode == BUILT_IN_VFPRINTF_CHK);
940   }
941 
942   /* Return true for calls to string formatted functions.  */
is_string_func__anonc39a57990111::call_info943   bool is_string_func () const
944   {
945     return (fncode == BUILT_IN_SPRINTF
946 	    || fncode == BUILT_IN_SPRINTF_CHK
947 	    || fncode == BUILT_IN_SNPRINTF
948 	    || fncode == BUILT_IN_SNPRINTF_CHK
949 	    || fncode == BUILT_IN_VSPRINTF
950 	    || fncode == BUILT_IN_VSPRINTF_CHK
951 	    || fncode == BUILT_IN_VSNPRINTF
952 	    || fncode == BUILT_IN_VSNPRINTF_CHK);
953   }
954 };
955 
956 void
set_width(tree arg,range_query * query)957 directive::set_width (tree arg, range_query *query)
958 {
959   get_int_range (arg, info->callstmt, width, width + 1, true, 0, query);
960 }
961 
962 void
set_precision(tree arg,range_query * query)963 directive::set_precision (tree arg, range_query *query)
964 {
965   get_int_range (arg, info->callstmt, prec, prec + 1, false, -1, query);
966 }
967 
968 /* Return the result of formatting a no-op directive (such as '%n').  */
969 
970 static fmtresult
format_none(const directive &,tree,pointer_query &)971 format_none (const directive &, tree, pointer_query &)
972 {
973   fmtresult res (0);
974   return res;
975 }
976 
977 /* Return the result of formatting the '%%' directive.  */
978 
979 static fmtresult
format_percent(const directive &,tree,pointer_query &)980 format_percent (const directive &, tree, pointer_query &)
981 {
982   fmtresult res (1);
983   return res;
984 }
985 
986 
987 /* Compute intmax_type_node and uintmax_type_node similarly to how
988    tree.cc builds size_type_node.  */
989 
990 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)991 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
992 {
993   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
994     {
995       *pintmax = integer_type_node;
996       *puintmax = unsigned_type_node;
997     }
998   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
999     {
1000       *pintmax = long_integer_type_node;
1001       *puintmax = long_unsigned_type_node;
1002     }
1003   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1004     {
1005       *pintmax = long_long_integer_type_node;
1006       *puintmax = long_long_unsigned_type_node;
1007     }
1008   else
1009     {
1010       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1011 	if (int_n_enabled_p[i])
1012 	  {
1013 	    char name[50], altname[50];
1014 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1015 	    sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
1016 
1017 	    if (strcmp (name, UINTMAX_TYPE) == 0
1018 		|| strcmp (altname, UINTMAX_TYPE) == 0)
1019 	      {
1020 	        *pintmax = int_n_trees[i].signed_type;
1021 	        *puintmax = int_n_trees[i].unsigned_type;
1022 		return;
1023 	      }
1024 	  }
1025       gcc_unreachable ();
1026     }
1027 }
1028 
1029 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1030    in and that is representable in type int.
1031    Return true when the range is a subrange of that of int.
1032    When ARG is null it is as if it had the full range of int.
1033    When ABSOLUTE is true the range reflects the absolute value of
1034    the argument.  When ABSOLUTE is false, negative bounds of
1035    the determined range are replaced with NEGBOUND.  */
1036 
1037 static bool
get_int_range(tree arg,gimple * stmt,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,range_query * query)1038 get_int_range (tree arg, gimple *stmt,
1039 	       HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1040 	       bool absolute, HOST_WIDE_INT negbound,
1041 	       range_query *query)
1042 {
1043   /* The type of the result.  */
1044   const_tree type = integer_type_node;
1045 
1046   bool knownrange = false;
1047 
1048   if (!arg)
1049     {
1050       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1051       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1052     }
1053   else if (TREE_CODE (arg) == INTEGER_CST
1054 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1055     {
1056       /* For a constant argument return its value adjusted as specified
1057 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1058 	 result is known.  */
1059       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1060       *pmax = *pmin;
1061       knownrange = true;
1062     }
1063   else
1064     {
1065       /* True if the argument's range cannot be determined.  */
1066       bool unknown = true;
1067 
1068       tree argtype = TREE_TYPE (arg);
1069 
1070       /* Ignore invalid arguments with greater precision that that
1071 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1072 	 They will have been detected and diagnosed by -Wformat and
1073 	 so it's not important to complicate this code to try to deal
1074 	 with them again.  */
1075       if (TREE_CODE (arg) == SSA_NAME
1076 	  && INTEGRAL_TYPE_P (argtype)
1077 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1078 	{
1079 	  /* Try to determine the range of values of the integer argument.  */
1080 	  value_range vr;
1081 	  query->range_of_expr (vr, arg, stmt);
1082 
1083 	  if (!vr.undefined_p () && !vr.varying_p ())
1084 	    {
1085 	      HOST_WIDE_INT type_min
1086 		= (TYPE_UNSIGNED (argtype)
1087 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1088 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1089 
1090 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1091 
1092 	      tree type = TREE_TYPE (arg);
1093 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
1094 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
1095 	      *pmin = TREE_INT_CST_LOW (tmin);
1096 	      *pmax = TREE_INT_CST_LOW (tmax);
1097 
1098 	      if (*pmin < *pmax)
1099 		{
1100 		  /* Return true if the adjusted range is a subrange of
1101 		     the full range of the argument's type.  *PMAX may
1102 		     be less than *PMIN when the argument is unsigned
1103 		     and its upper bound is in excess of TYPE_MAX.  In
1104 		     that (invalid) case disregard the range and use that
1105 		     of the expected type instead.  */
1106 		  knownrange = type_min < *pmin || *pmax < type_max;
1107 
1108 		  unknown = false;
1109 		}
1110 	    }
1111 	}
1112 
1113       /* Handle an argument with an unknown range as if none had been
1114 	 provided.  */
1115       if (unknown)
1116 	return get_int_range (NULL_TREE, NULL, pmin, pmax, absolute,
1117 			      negbound, query);
1118     }
1119 
1120   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1121   if (absolute)
1122     {
1123       if (*pmin < 0)
1124 	{
1125 	  if (*pmin == *pmax)
1126 	    *pmin = *pmax = -*pmin;
1127 	  else
1128 	    {
1129 	      /* Make sure signed overlow is avoided.  */
1130 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1131 
1132 	      HOST_WIDE_INT tmp = -*pmin;
1133 	      *pmin = 0;
1134 	      if (*pmax < tmp)
1135 		*pmax = tmp;
1136 	    }
1137 	}
1138     }
1139   else if (*pmin < negbound)
1140     *pmin = negbound;
1141 
1142   return knownrange;
1143 }
1144 
1145 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1146    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1147    the type of the directive's formal argument it's possible for both
1148    to result in the same number of bytes or a range of bytes that's
1149    less than the number of bytes that would result from formatting
1150    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1151    determined by checking for the actual argument being in the range
1152    of the type of the directive.  If it isn't it must be assumed to
1153    take on the full range of the directive's type.
1154    Return true when the range has been adjusted to the full range
1155    of DIRTYPE, and false otherwise.  */
1156 
1157 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1158 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1159 {
1160   tree argtype = TREE_TYPE (*argmin);
1161   unsigned argprec = TYPE_PRECISION (argtype);
1162   unsigned dirprec = TYPE_PRECISION (dirtype);
1163 
1164   /* If the actual argument and the directive's argument have the same
1165      precision and sign there can be no overflow and so there is nothing
1166      to adjust.  */
1167   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1168     return false;
1169 
1170   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1171      branch in the extract_range_from_unary_expr function in tree-vrp.cc.  */
1172 
1173   if (TREE_CODE (*argmin) == INTEGER_CST
1174       && TREE_CODE (*argmax) == INTEGER_CST
1175       && (dirprec >= argprec
1176 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1177 					     int_const_binop (MINUS_EXPR,
1178 							      *argmax,
1179 							      *argmin),
1180 					     size_int (dirprec)))))
1181     {
1182       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1183       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1184 
1185       /* If *ARGMIN is still less than *ARGMAX the conversion above
1186 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1187       if (tree_int_cst_le (*argmin, *argmax))
1188 	return false;
1189     }
1190 
1191   *argmin = TYPE_MIN_VALUE (dirtype);
1192   *argmax = TYPE_MAX_VALUE (dirtype);
1193   return true;
1194 }
1195 
1196 /* Return a range representing the minimum and maximum number of bytes
1197    that the format directive DIR will output for any argument given
1198    the WIDTH and PRECISION (extracted from DIR).  This function is
1199    used when the directive argument or its value isn't known.  */
1200 
1201 static fmtresult
format_integer(const directive & dir,tree arg,pointer_query & ptr_qry)1202 format_integer (const directive &dir, tree arg, pointer_query &ptr_qry)
1203 {
1204   tree intmax_type_node;
1205   tree uintmax_type_node;
1206 
1207   /* Base to format the number in.  */
1208   int base;
1209 
1210   /* True when a conversion is preceded by a prefix indicating the base
1211      of the argument (octal or hexadecimal).  */
1212   bool maybebase = dir.get_flag ('#');
1213 
1214   /* True when a signed conversion is preceded by a sign or space.  */
1215   bool maybesign = false;
1216 
1217   /* True for signed conversions (i.e., 'd' and 'i').  */
1218   bool sign = false;
1219 
1220   switch (dir.specifier)
1221     {
1222     case 'd':
1223     case 'i':
1224       /* Space and '+' are  only meaningful for signed conversions.  */
1225       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1226       sign = true;
1227       base = 10;
1228       break;
1229     case 'u':
1230       base = 10;
1231       break;
1232     case 'o':
1233       base = 8;
1234       break;
1235     case 'X':
1236     case 'x':
1237       base = 16;
1238       break;
1239     default:
1240       gcc_unreachable ();
1241     }
1242 
1243   /* The type of the "formal" argument expected by the directive.  */
1244   tree dirtype = NULL_TREE;
1245 
1246   /* Determine the expected type of the argument from the length
1247      modifier.  */
1248   switch (dir.modifier)
1249     {
1250     case FMT_LEN_none:
1251       if (dir.specifier == 'p')
1252 	dirtype = ptr_type_node;
1253       else
1254 	dirtype = sign ? integer_type_node : unsigned_type_node;
1255       break;
1256 
1257     case FMT_LEN_h:
1258       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1259       break;
1260 
1261     case FMT_LEN_hh:
1262       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1263       break;
1264 
1265     case FMT_LEN_l:
1266       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1267       break;
1268 
1269     case FMT_LEN_L:
1270     case FMT_LEN_ll:
1271       dirtype = (sign
1272 		 ? long_long_integer_type_node
1273 		 : long_long_unsigned_type_node);
1274       break;
1275 
1276     case FMT_LEN_z:
1277       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1278       break;
1279 
1280     case FMT_LEN_t:
1281       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1282       break;
1283 
1284     case FMT_LEN_j:
1285       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1286       dirtype = sign ? intmax_type_node : uintmax_type_node;
1287       break;
1288 
1289     default:
1290       return fmtresult ();
1291     }
1292 
1293   /* The type of the argument to the directive, either deduced from
1294      the actual non-constant argument if one is known, or from
1295      the directive itself when none has been provided because it's
1296      a va_list.  */
1297   tree argtype = NULL_TREE;
1298 
1299   if (!arg)
1300     {
1301       /* When the argument has not been provided, use the type of
1302 	 the directive's argument as an approximation.  This will
1303 	 result in false positives for directives like %i with
1304 	 arguments with smaller precision (such as short or char).  */
1305       argtype = dirtype;
1306     }
1307   else if (TREE_CODE (arg) == INTEGER_CST)
1308     {
1309       /* When a constant argument has been provided use its value
1310 	 rather than type to determine the length of the output.  */
1311       fmtresult res;
1312 
1313       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1314 	{
1315 	  /* As a special case, a precision of zero with a zero argument
1316 	     results in zero bytes except in base 8 when the '#' flag is
1317 	     specified, and for signed conversions in base 8 and 10 when
1318 	     either the space or '+' flag has been specified and it results
1319 	     in just one byte (with width having the normal effect).  This
1320 	     must extend to the case of a specified precision with
1321 	     an unknown value because it can be zero.  */
1322 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1323 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1324 	    {
1325 	      res.range.max = 1;
1326 	      res.range.likely = 1;
1327 	    }
1328 	  else
1329 	    {
1330 	      res.range.max = res.range.min;
1331 	      res.range.likely = res.range.min;
1332 	    }
1333 	}
1334       else
1335 	{
1336 	  /* Convert the argument to the type of the directive.  */
1337 	  arg = fold_convert (dirtype, arg);
1338 
1339 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1340 				       maybesign, maybebase);
1341 	  if (dir.prec[0] == dir.prec[1])
1342 	    res.range.max = res.range.min;
1343 	  else
1344 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1345 					 maybesign, maybebase);
1346 	  res.range.likely = res.range.min;
1347 	  res.knownrange = true;
1348 	}
1349 
1350       res.range.unlikely = res.range.max;
1351 
1352       /* Bump up the counters if WIDTH is greater than LEN.  */
1353       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1354 					 (sign | maybebase) + (base == 16));
1355       /* Bump up the counters again if PRECision is greater still.  */
1356       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1357 					 (sign | maybebase) + (base == 16));
1358 
1359       return res;
1360     }
1361   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1362 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1363     /* Determine the type of the provided non-constant argument.  */
1364     argtype = TREE_TYPE (arg);
1365   else
1366     /* Don't bother with invalid arguments since they likely would
1367        have already been diagnosed, and disable any further checking
1368        of the format string by returning [-1, -1].  */
1369     return fmtresult ();
1370 
1371   fmtresult res;
1372 
1373   /* Using either the range the non-constant argument is in, or its
1374      type (either "formal" or actual), create a range of values that
1375      constrain the length of output given the warning level.  */
1376   tree argmin = NULL_TREE;
1377   tree argmax = NULL_TREE;
1378 
1379   if (arg
1380       && TREE_CODE (arg) == SSA_NAME
1381       && INTEGRAL_TYPE_P (argtype))
1382     {
1383       /* Try to determine the range of values of the integer argument
1384 	 (range information is not available for pointers).  */
1385       value_range vr;
1386       ptr_qry.rvals->range_of_expr (vr, arg, dir.info->callstmt);
1387 
1388       if (!vr.varying_p () && !vr.undefined_p ())
1389 	{
1390 	  argmin = wide_int_to_tree (TREE_TYPE (arg), vr.lower_bound ());
1391 	  argmax = wide_int_to_tree (TREE_TYPE (arg), vr.upper_bound ());
1392 
1393 	  /* Set KNOWNRANGE if the argument is in a known subrange
1394 	     of the directive's type and neither width nor precision
1395 	     is unknown.  (KNOWNRANGE may be reset below).  */
1396 	  res.knownrange
1397 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1398 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1399 	       && dir.known_width_and_precision ());
1400 
1401 	  res.argmin = argmin;
1402 	  res.argmax = argmax;
1403 	}
1404       else
1405 	{
1406 	  /* The argument here may be the result of promoting the actual
1407 	     argument to int.  Try to determine the type of the actual
1408 	     argument before promotion and narrow down its range that
1409 	     way.  */
1410 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1411 	  if (is_gimple_assign (def))
1412 	    {
1413 	      tree_code code = gimple_assign_rhs_code (def);
1414 	      if (code == INTEGER_CST)
1415 		{
1416 		  arg = gimple_assign_rhs1 (def);
1417 		  return format_integer (dir, arg, ptr_qry);
1418 		}
1419 
1420 	      if (code == NOP_EXPR)
1421 		{
1422 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1423 		  if (INTEGRAL_TYPE_P (type)
1424 		      || TREE_CODE (type) == POINTER_TYPE)
1425 		    argtype = type;
1426 		}
1427 	    }
1428 	}
1429     }
1430 
1431   if (!argmin)
1432     {
1433       if (TREE_CODE (argtype) == POINTER_TYPE)
1434 	{
1435 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1436 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1437 	}
1438       else
1439 	{
1440 	  argmin = TYPE_MIN_VALUE (argtype);
1441 	  argmax = TYPE_MAX_VALUE (argtype);
1442 	}
1443     }
1444 
1445   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1446      of the directive.  If it has been cleared then since ARGMIN and/or
1447      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1448      ARGMAX in the result to include in diagnostics.  */
1449   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1450     {
1451       res.knownrange = false;
1452       res.argmin = argmin;
1453       res.argmax = argmax;
1454     }
1455 
1456   /* Recursively compute the minimum and maximum from the known range.  */
1457   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1458     {
1459       /* For unsigned conversions/directives or signed when
1460 	 the minimum is positive, use the minimum and maximum to compute
1461 	 the shortest and longest output, respectively.  */
1462       res.range.min = format_integer (dir, argmin, ptr_qry).range.min;
1463       res.range.max = format_integer (dir, argmax, ptr_qry).range.max;
1464     }
1465   else if (tree_int_cst_sgn (argmax) < 0)
1466     {
1467       /* For signed conversions/directives if maximum is negative,
1468 	 use the minimum as the longest output and maximum as the
1469 	 shortest output.  */
1470       res.range.min = format_integer (dir, argmax, ptr_qry).range.min;
1471       res.range.max = format_integer (dir, argmin, ptr_qry).range.max;
1472     }
1473   else
1474     {
1475       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1476 	 as the shortest output and for the longest output compute the
1477 	 length of the output of both minimum and maximum and pick the
1478 	 longer.  */
1479       unsigned HOST_WIDE_INT max1
1480 	= format_integer (dir, argmin, ptr_qry).range.max;
1481       unsigned HOST_WIDE_INT max2
1482 	= format_integer (dir, argmax, ptr_qry).range.max;
1483       res.range.min
1484 	= format_integer (dir, integer_zero_node, ptr_qry).range.min;
1485       res.range.max = MAX (max1, max2);
1486     }
1487 
1488   /* If the range is known, use the maximum as the likely length.  */
1489   if (res.knownrange)
1490     res.range.likely = res.range.max;
1491   else
1492     {
1493       /* Otherwise, use the minimum.  Except for the case where for %#x or
1494          %#o the minimum is just for a single value in the range (0) and
1495          for all other values it is something longer, like 0x1 or 01.
1496 	  Use the length for value 1 in that case instead as the likely
1497 	  length.  */
1498       res.range.likely = res.range.min;
1499       if (maybebase
1500 	  && base != 10
1501 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1502 	{
1503 	  if (res.range.min == 1)
1504 	    res.range.likely += base == 8 ? 1 : 2;
1505 	  else if (res.range.min == 2
1506 		   && base == 16
1507 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1508 	    ++res.range.likely;
1509 	}
1510     }
1511 
1512   res.range.unlikely = res.range.max;
1513   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1514 				     (sign | maybebase) + (base == 16));
1515   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1516 				     (sign | maybebase) + (base == 16));
1517 
1518   return res;
1519 }
1520 
1521 /* Return the number of bytes that a format directive consisting of FLAGS,
1522    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1523    would result for argument X under ideal conditions (i.e., if PREC
1524    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1525    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1526    This function works around those problems.  */
1527 
1528 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1529 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1530 			char spec, char rndspec)
1531 {
1532   char fmtstr[40];
1533 
1534   HOST_WIDE_INT len = strlen (flags);
1535 
1536   fmtstr[0] = '%';
1537   memcpy (fmtstr + 1, flags, len);
1538   memcpy (fmtstr + 1 + len, ".*R", 3);
1539   fmtstr[len + 4] = rndspec;
1540   fmtstr[len + 5] = spec;
1541   fmtstr[len + 6] = '\0';
1542 
1543   spec = TOUPPER (spec);
1544   if (spec == 'E' || spec == 'F')
1545     {
1546       /* For %e, specify the precision explicitly since mpfr_sprintf
1547 	 does its own thing just to be different (see MPFR bug 21088).  */
1548       if (prec < 0)
1549 	prec = 6;
1550     }
1551   else
1552     {
1553       /* Avoid passing negative precisions with larger magnitude to MPFR
1554 	 to avoid exposing its bugs.  (A negative precision is supposed
1555 	 to be ignored.)  */
1556       if (prec < 0)
1557 	prec = -1;
1558     }
1559 
1560   HOST_WIDE_INT p = prec;
1561 
1562   if (spec == 'G' && !strchr (flags, '#'))
1563     {
1564       /* For G/g without the pound flag, precision gives the maximum number
1565 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1566 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1567 	 should be more than sufficient for any real format.  */
1568       if ((IEEE_MAX_10_EXP * 2) < prec)
1569 	prec = IEEE_MAX_10_EXP * 2;
1570       p = prec;
1571     }
1572   else
1573     {
1574       /* Cap precision arbitrarily at 1KB and add the difference
1575 	 (if any) to the MPFR result.  */
1576       if (prec > 1024)
1577 	p = 1024;
1578     }
1579 
1580   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1581 
1582   /* Handle the unlikely (impossible?) error by returning more than
1583      the maximum dictated by the function's return type.  */
1584   if (len < 0)
1585     return target_dir_max () + 1;
1586 
1587   /* Adjust the return value by the difference.  */
1588   if (p < prec)
1589     len += prec - p;
1590 
1591   return len;
1592 }
1593 
1594 /* Return the number of bytes to format using the format specifier
1595    SPEC and the precision PREC the largest value in the real floating
1596    TYPE.  */
1597 
1598 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1599 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1600 {
1601   machine_mode mode = TYPE_MODE (type);
1602 
1603   /* IBM Extended mode.  */
1604   if (MODE_COMPOSITE_P (mode))
1605     mode = DFmode;
1606 
1607   /* Get the real type format description for the target.  */
1608   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1609   REAL_VALUE_TYPE rv;
1610 
1611   real_maxval (&rv, 0, mode);
1612 
1613   /* Convert the GCC real value representation with the precision
1614      of the real type to the mpfr_t format with the GCC default
1615      round-to-nearest mode.  */
1616   mpfr_t x;
1617   mpfr_init2 (x, rfmt->p);
1618   mpfr_from_real (x, &rv, MPFR_RNDN);
1619 
1620   /* Return a value one greater to account for the leading minus sign.  */
1621   unsigned HOST_WIDE_INT r
1622     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1623   mpfr_clear (x);
1624   return r;
1625 }
1626 
1627 /* Return a range representing the minimum and maximum number of bytes
1628    that the directive DIR will output for any argument.  PREC gives
1629    the adjusted precision range to account for negative precisions
1630    meaning the default 6.  This function is used when the directive
1631    argument or its value isn't known.  */
1632 
1633 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1634 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1635 {
1636   tree type;
1637 
1638   switch (dir.modifier)
1639     {
1640     case FMT_LEN_l:
1641     case FMT_LEN_none:
1642       type = double_type_node;
1643       break;
1644 
1645     case FMT_LEN_L:
1646       type = long_double_type_node;
1647       break;
1648 
1649     case FMT_LEN_ll:
1650       type = long_double_type_node;
1651       break;
1652 
1653     default:
1654       return fmtresult ();
1655     }
1656 
1657   /* The minimum and maximum number of bytes produced by the directive.  */
1658   fmtresult res;
1659 
1660   /* The minimum output as determined by flags.  It's always at least 1.
1661      When plus or space are set the output is preceded by either a sign
1662      or a space.  */
1663   unsigned flagmin = (1 /* for the first digit */
1664 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1665 
1666   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1667      for the plus sign/space with the '+' and ' ' flags, respectively,
1668      unless reduced below.  */
1669   res.range.min = 2 + flagmin;
1670 
1671   /* When the pound flag is set the decimal point is included in output
1672      regardless of precision.  Whether or not a decimal point is included
1673      otherwise depends on the specification and precision.  */
1674   bool radix = dir.get_flag ('#');
1675 
1676   switch (dir.specifier)
1677     {
1678     case 'A':
1679     case 'a':
1680       {
1681 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1682 	if (dir.prec[0] <= 0)
1683 	  minprec = 0;
1684 	else if (dir.prec[0] > 0)
1685 	  minprec = dir.prec[0] + !radix /* decimal point */;
1686 
1687 	res.range.likely = (2 /* 0x */
1688 			    + flagmin
1689 			    + radix
1690 			    + minprec
1691 			    + 3 /* p+0 */);
1692 
1693 	res.range.max = format_floating_max (type, 'a', prec[1]);
1694 
1695 	/* The unlikely maximum accounts for the longest multibyte
1696 	   decimal point character.  */
1697 	res.range.unlikely = res.range.max;
1698 	if (dir.prec[1] > 0)
1699 	  res.range.unlikely += target_mb_len_max () - 1;
1700 
1701 	break;
1702       }
1703 
1704     case 'E':
1705     case 'e':
1706       {
1707 	/* Minimum output attributable to precision and, when it's
1708 	   non-zero, decimal point.  */
1709 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1710 
1711 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1712 	   of the value of the actual argument.  */
1713 	res.range.likely = (flagmin
1714 			    + radix
1715 			    + minprec
1716 			    + 2 /* e+ */ + 2);
1717 
1718 	res.range.max = format_floating_max (type, 'e', prec[1]);
1719 
1720 	/* The unlikely maximum accounts for the longest multibyte
1721 	   decimal point character.  */
1722 	if (dir.prec[0] != dir.prec[1]
1723 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1724 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1725 	else
1726 	  res.range.unlikely = res.range.max;
1727 	break;
1728       }
1729 
1730     case 'F':
1731     case 'f':
1732       {
1733 	/* Minimum output attributable to precision and, when it's non-zero,
1734 	   decimal point.  */
1735 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1736 
1737 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1738 	   when precision isn't specified is 8 bytes ("1.23456" since
1739 	   precision is taken to be 6).  When precision is zero, the lower
1740 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1741 	   than zero, then the lower bound is 2 plus precision (plus flags).
1742 	   But in all cases, the lower bound is no greater than 3.  */
1743 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1744 	if (min < res.range.min)
1745 	  res.range.min = min;
1746 
1747 	/* Compute the upper bound for -TYPE_MAX.  */
1748 	res.range.max = format_floating_max (type, 'f', prec[1]);
1749 
1750 	/* The minimum output with unknown precision is a single byte
1751 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1752 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1753 	  res.range.likely = 3;
1754 	else
1755 	  res.range.likely = min;
1756 
1757 	/* The unlikely maximum accounts for the longest multibyte
1758 	   decimal point character.  */
1759 	if (dir.prec[0] != dir.prec[1]
1760 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1761 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1762 	break;
1763       }
1764 
1765     case 'G':
1766     case 'g':
1767       {
1768 	/* The %g output depends on precision and the exponent of
1769 	   the argument.  Since the value of the argument isn't known
1770 	   the lower bound on the range of bytes (not counting flags
1771 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1772 	   "%g" and "%#g", respectively, with a zero argument).  */
1773 	unsigned HOST_WIDE_INT min = flagmin + radix;
1774 	if (min < res.range.min)
1775 	  res.range.min = min;
1776 
1777 	char spec = 'g';
1778 	HOST_WIDE_INT maxprec = dir.prec[1];
1779 	if (radix && maxprec)
1780 	  {
1781 	    /* When the pound flag (radix) is set, trailing zeros aren't
1782 	       trimmed and so the longest output is the same as for %e,
1783 	       except with precision minus 1 (as specified in C11).  */
1784 	    spec = 'e';
1785 	    if (maxprec > 0)
1786 	      --maxprec;
1787 	    else if (maxprec < 0)
1788 	      maxprec = 5;
1789 	  }
1790 	else
1791 	  maxprec = prec[1];
1792 
1793 	res.range.max = format_floating_max (type, spec, maxprec);
1794 
1795 	/* The likely output is either the maximum computed above
1796 	   minus 1 (assuming the maximum is positive) when precision
1797 	   is known (or unspecified), or the same minimum as for %e
1798 	   (which is computed for a non-negative argument).  Unlike
1799 	   for the other specifiers above the likely output isn't
1800 	   the minimum because for %g that's 1 which is unlikely.  */
1801 	if (dir.prec[1] < 0
1802 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1803 	  res.range.likely = res.range.max - 1;
1804 	else
1805 	  {
1806 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1807 	    res.range.likely = (flagmin
1808 				+ radix
1809 				+ minprec
1810 				+ 2 /* e+ */ + 2);
1811 	  }
1812 
1813 	/* The unlikely maximum accounts for the longest multibyte
1814 	   decimal point character.  */
1815 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1816 	break;
1817       }
1818 
1819     default:
1820       return fmtresult ();
1821     }
1822 
1823   /* Bump up the byte counters if WIDTH is greater.  */
1824   res.adjust_for_width_or_precision (dir.width);
1825   return res;
1826 }
1827 
1828 /* Return a range representing the minimum and maximum number of bytes
1829    that the directive DIR will write on output for the floating argument
1830    ARG.  */
1831 
1832 static fmtresult
format_floating(const directive & dir,tree arg,pointer_query &)1833 format_floating (const directive &dir, tree arg, pointer_query &)
1834 {
1835   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1836   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1837 	       ? long_double_type_node : double_type_node);
1838 
1839   /* For an indeterminate precision the lower bound must be assumed
1840      to be zero.  */
1841   if (TOUPPER (dir.specifier) == 'A')
1842     {
1843       /* Get the number of fractional decimal digits needed to represent
1844 	 the argument without a loss of accuracy.  */
1845       unsigned fmtprec
1846 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1847 
1848       /* The precision of the IEEE 754 double format is 53.
1849 	 The precision of all other GCC binary double formats
1850 	 is 56 or less.  */
1851       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1852 
1853       /* For %a, leave the minimum precision unspecified to let
1854 	 MFPR trim trailing zeros (as it and many other systems
1855 	 including Glibc happen to do) and set the maximum
1856 	 precision to reflect what it would be with trailing zeros
1857 	 present (as Solaris and derived systems do).  */
1858       if (dir.prec[1] < 0)
1859 	{
1860 	  /* Both bounds are negative implies that precision has
1861 	     not been specified.  */
1862 	  prec[0] = maxprec;
1863 	  prec[1] = -1;
1864 	}
1865       else if (dir.prec[0] < 0)
1866 	{
1867 	  /* With a negative lower bound and a non-negative upper
1868 	     bound set the minimum precision to zero and the maximum
1869 	     to the greater of the maximum precision (i.e., with
1870 	     trailing zeros present) and the specified upper bound.  */
1871 	  prec[0] = 0;
1872 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1873 	}
1874     }
1875   else if (dir.prec[0] < 0)
1876     {
1877       if (dir.prec[1] < 0)
1878 	{
1879 	  /* A precision in a strictly negative range is ignored and
1880 	     the default of 6 is used instead.  */
1881 	  prec[0] = prec[1] = 6;
1882 	}
1883       else
1884 	{
1885 	  /* For a precision in a partly negative range, the lower bound
1886 	     must be assumed to be zero and the new upper bound is the
1887 	     greater of 6 (the default precision used when the specified
1888 	     precision is negative) and the upper bound of the specified
1889 	     range.  */
1890 	  prec[0] = 0;
1891 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1892 	}
1893     }
1894 
1895   if (!arg
1896       || TREE_CODE (arg) != REAL_CST
1897       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1898     return format_floating (dir, prec);
1899 
1900   /* The minimum and maximum number of bytes produced by the directive.  */
1901   fmtresult res;
1902 
1903   /* Get the real type format description for the target.  */
1904   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1905   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1906 
1907   if (!real_isfinite (rvp))
1908     {
1909       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1910 	 and "[-]nan" with the choice being implementation-defined
1911 	 but not locale dependent.  */
1912       bool sign = dir.get_flag ('+') || real_isneg (rvp);
1913       res.range.min = 3 + sign;
1914 
1915       res.range.likely = res.range.min;
1916       res.range.max = res.range.min;
1917       /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1918 	 For NaN, the C/POSIX standards specify two formats:
1919 	   "[-/+]nan"
1920 	 and
1921 	   "[-/+]nan(n-char-sequence)"
1922 	 No known printf implementation outputs the latter format but AIX
1923 	 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1924 	 so the unlikely maximum reflects that.  */
1925       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1926 
1927       /* The range for infinity and NaN is known unless either width
1928 	 or precision is unknown.  Width has the same effect regardless
1929 	 of whether the argument is finite.  Precision is either ignored
1930 	 (e.g., Glibc) or can have an effect on the short vs long format
1931 	 such as inf/infinity (e.g., Solaris).  */
1932       res.knownrange = dir.known_width_and_precision ();
1933 
1934       /* Adjust the range for width but ignore precision.  */
1935       res.adjust_for_width_or_precision (dir.width);
1936 
1937       return res;
1938     }
1939 
1940   char fmtstr [40];
1941   char *pfmt = fmtstr;
1942 
1943   /* Append flags.  */
1944   for (const char *pf = "-+ #0"; *pf; ++pf)
1945     if (dir.get_flag (*pf))
1946       *pfmt++ = *pf;
1947 
1948   *pfmt = '\0';
1949 
1950   {
1951     /* Set up an array to easily iterate over.  */
1952     unsigned HOST_WIDE_INT* const minmax[] = {
1953       &res.range.min, &res.range.max
1954     };
1955 
1956     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1957       {
1958 	/* Convert the GCC real value representation with the precision
1959 	   of the real type to the mpfr_t format rounding down in the
1960 	   first iteration that computes the minimum and up in the second
1961 	   that computes the maximum.  This order is arbitrary because
1962 	   rounding in either direction can result in longer output.  */
1963 	mpfr_t mpfrval;
1964 	mpfr_init2 (mpfrval, rfmt->p);
1965 	mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1966 
1967 	/* Use the MPFR rounding specifier to round down in the first
1968 	   iteration and then up.  In most but not all cases this will
1969 	   result in the same number of bytes.  */
1970 	char rndspec = "DU"[i];
1971 
1972 	/* Format it and store the result in the corresponding member
1973 	   of the result struct.  */
1974 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1975 					     dir.specifier, rndspec);
1976 	mpfr_clear (mpfrval);
1977       }
1978   }
1979 
1980   /* Make sure the minimum is less than the maximum (MPFR rounding
1981      in the call to mpfr_snprintf can result in the reverse.  */
1982   if (res.range.max < res.range.min)
1983     {
1984       unsigned HOST_WIDE_INT tmp = res.range.min;
1985       res.range.min = res.range.max;
1986       res.range.max = tmp;
1987     }
1988 
1989   /* The range is known unless either width or precision is unknown.  */
1990   res.knownrange = dir.known_width_and_precision ();
1991 
1992   /* For the same floating point constant, unless width or precision
1993      is unknown, use the longer output as the likely maximum since
1994      with round to nearest either is equally likely.  Otherwise, when
1995      precision is unknown, use the greater of the minimum and 3 as
1996      the likely output (for "0.0" since zero precision is unlikely).  */
1997   if (res.knownrange)
1998     res.range.likely = res.range.max;
1999   else if (res.range.min < 3
2000 	   && dir.prec[0] < 0
2001 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2002     res.range.likely = 3;
2003   else
2004     res.range.likely = res.range.min;
2005 
2006   res.range.unlikely = res.range.max;
2007 
2008   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2009     {
2010       /* Unless the precision is zero output longer than 2 bytes may
2011 	 include the decimal point which must be a single character
2012 	 up to MB_LEN_MAX in length.  This is overly conservative
2013 	 since in some conversions some constants result in no decimal
2014 	 point (e.g., in %g).  */
2015       res.range.unlikely += target_mb_len_max () - 1;
2016     }
2017 
2018   res.adjust_for_width_or_precision (dir.width);
2019   return res;
2020 }
2021 
2022 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2023    strings referenced by the expression STR, or (-1, -1) when not known.
2024    Used by the format_string function below.  */
2025 
2026 static fmtresult
get_string_length(tree str,gimple * stmt,unsigned HOST_WIDE_INT max_size,unsigned eltsize,pointer_query & ptr_qry)2027 get_string_length (tree str, gimple *stmt, unsigned HOST_WIDE_INT max_size,
2028 		   unsigned eltsize, pointer_query &ptr_qry)
2029 {
2030   if (!str)
2031     return fmtresult ();
2032 
2033   /* Try to determine the dynamic string length first.
2034      Set MAXBOUND to an arbitrary non-null non-integer node as a request
2035      to have it set to the length of the longest string in a PHI.  */
2036   c_strlen_data lendata = { };
2037   lendata.maxbound = str;
2038   if (eltsize == 1)
2039     get_range_strlen_dynamic (str, stmt, &lendata, ptr_qry);
2040   else
2041     {
2042       /* Determine the length of the shortest and longest string referenced
2043 	 by STR.  Strings of unknown lengths are bounded by the sizes of
2044 	 arrays that subexpressions of STR may refer to.  Pointers that
2045 	 aren't known to point any such arrays result in LENDATA.MAXLEN
2046 	 set to SIZE_MAX.  */
2047       get_range_strlen (str, &lendata, eltsize);
2048     }
2049 
2050   /* If LENDATA.MAXBOUND is not equal to .MINLEN it corresponds to the bound
2051      of the largest array STR refers to, if known, or it's set to SIZE_MAX
2052      otherwise.  */
2053 
2054   /* Return the default result when nothing is known about the string.  */
2055   if ((lendata.maxbound && !tree_fits_uhwi_p (lendata.maxbound))
2056       || !tree_fits_uhwi_p (lendata.maxlen))
2057     {
2058       fmtresult res;
2059       res.nonstr = lendata.decl;
2060       return res;
2061     }
2062 
2063   unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2064   if (integer_zerop (lendata.minlen)
2065       && (!lendata.maxbound || lenmax <= tree_to_uhwi (lendata.maxbound))
2066       && lenmax <= tree_to_uhwi (lendata.maxlen))
2067     {
2068       if (max_size > 0 && max_size < HOST_WIDE_INT_MAX)
2069 	{
2070 	  /* Adjust the conservative unknown/unbounded result if MAX_SIZE
2071 	     is valid.  Set UNLIKELY to maximum in case MAX_SIZE refers
2072 	     to a subobject.
2073 	     TODO: This is overly conservative.  Set UNLIKELY to the size
2074 	     of the outermost enclosing declared object.  */
2075 	  fmtresult res (0, max_size - 1);
2076 	  res.nonstr = lendata.decl;
2077 	  res.range.likely = res.range.max;
2078 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2079 	  return res;
2080 	}
2081 
2082       fmtresult res;
2083       res.nonstr = lendata.decl;
2084       return res;
2085     }
2086 
2087   /* The minimum length of the string.  */
2088   HOST_WIDE_INT min
2089     = (tree_fits_uhwi_p (lendata.minlen)
2090        ? tree_to_uhwi (lendata.minlen)
2091        : 0);
2092 
2093   /* The maximum length of the string; initially set to MAXBOUND which
2094      may be less than MAXLEN, but may be adjusted up below.  */
2095   HOST_WIDE_INT max
2096     = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2097        ? tree_to_uhwi (lendata.maxbound)
2098        : HOST_WIDE_INT_M1U);
2099 
2100   /* True if either the maximum length is unknown or (conservatively)
2101      the array bound is less than the maximum length.  That can happen
2102      when the length of the string is unknown but the array in which
2103      the string is stored is a member of a struct.  The warning uses
2104      the size of the member as the upper bound but the optimization
2105      doesn't.  The optimization could still use the size of
2106      enclosing object as the upper bound but that's not done here.  */
2107   const bool unbounded
2108     = (integer_all_onesp (lendata.maxlen)
2109        || (lendata.maxbound
2110 	   && tree_int_cst_lt (lendata.maxbound, lendata.maxlen)));
2111 
2112   /* Set the max/likely counters to unbounded when a minimum is known
2113      but the maximum length isn't bounded.  This implies that STR is
2114      a conditional expression involving a string of known length and
2115      an expression of unknown/unbounded length.  */
2116   if (min
2117       && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2118       && unbounded)
2119     max = HOST_WIDE_INT_M1U;
2120 
2121   /* get_range_strlen() returns the target value of SIZE_MAX for
2122      strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2123      which may be bigger.  */
2124   if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2125     min = HOST_WIDE_INT_M1U;
2126   if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2127     max = HOST_WIDE_INT_M1U;
2128 
2129   fmtresult res (min, max);
2130   res.nonstr = lendata.decl;
2131 
2132   /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2133      by STR are known to be bounded (though not necessarily by their
2134      actual length but perhaps by their maximum possible length).  */
2135   if (res.range.max < target_int_max ())
2136     {
2137       res.knownrange = true;
2138       /* When the length of the longest string is known and not
2139 	 excessive use it as the likely length of the string(s).  */
2140       res.range.likely = res.range.max;
2141     }
2142   else
2143     {
2144       /* When the upper bound is unknown (it can be zero or excessive)
2145 	 set the likely length to the greater of 1.  If MAXBOUND is
2146 	 known, also reset the length of the lower bound to zero.  */
2147       res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2148       if (lendata.maxbound && !integer_all_onesp (lendata.maxbound))
2149 	res.range.min = 0;
2150     }
2151 
2152   res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2153 
2154   return res;
2155 }
2156 
2157 /* Return the minimum and maximum number of characters formatted
2158    by the '%c' format directives and its wide character form for
2159    the argument ARG.  ARG can be null (for functions such as
2160    vsprinf).  */
2161 
2162 static fmtresult
format_character(const directive & dir,tree arg,pointer_query & ptr_qry)2163 format_character (const directive &dir, tree arg, pointer_query &ptr_qry)
2164 {
2165   fmtresult res;
2166 
2167   res.knownrange = true;
2168 
2169   if (dir.specifier == 'C'
2170       || dir.modifier == FMT_LEN_l)
2171     {
2172       /* A wide character can result in as few as zero bytes.  */
2173       res.range.min = 0;
2174 
2175       HOST_WIDE_INT min, max;
2176       if (get_int_range (arg, dir.info->callstmt, &min, &max, false, 0,
2177 			 ptr_qry.rvals))
2178 	{
2179 	  if (min == 0 && max == 0)
2180 	    {
2181 	      /* The NUL wide character results in no bytes.  */
2182 	      res.range.max = 0;
2183 	      res.range.likely = 0;
2184 	      res.range.unlikely = 0;
2185 	    }
2186 	  else if (min >= 0 && min < 128)
2187 	    {
2188 	      /* Be conservative if the target execution character set
2189 		 is not a 1-to-1 mapping to the source character set or
2190 		 if the source set is not ASCII.  */
2191 	      bool one_2_one_ascii
2192 		= (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2193 
2194 	      /* A wide character in the ASCII range most likely results
2195 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2196 	      res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2197 	      res.range.likely = 1;
2198 	      res.range.unlikely = target_mb_len_max ();
2199 	      res.mayfail = !one_2_one_ascii;
2200 	    }
2201 	  else
2202 	    {
2203 	      /* A wide character outside the ASCII range likely results
2204 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2205 	      res.range.max = target_mb_len_max ();
2206 	      res.range.likely = 2;
2207 	      res.range.unlikely = res.range.max;
2208 	      /* Converting such a character may fail.  */
2209 	      res.mayfail = true;
2210 	    }
2211 	}
2212       else
2213 	{
2214 	  /* An unknown wide character is treated the same as a wide
2215 	     character outside the ASCII range.  */
2216 	  res.range.max = target_mb_len_max ();
2217 	  res.range.likely = 2;
2218 	  res.range.unlikely = res.range.max;
2219 	  res.mayfail = true;
2220 	}
2221     }
2222   else
2223     {
2224       /* A plain '%c' directive.  Its output is exactly 1.  */
2225       res.range.min = res.range.max = 1;
2226       res.range.likely = res.range.unlikely = 1;
2227       res.knownrange = true;
2228     }
2229 
2230   /* Bump up the byte counters if WIDTH is greater.  */
2231   return res.adjust_for_width_or_precision (dir.width);
2232 }
2233 
2234 /* If TYPE is an array or struct or union, increment *FLDOFF by the starting
2235    offset of the member that *OFF points into if one can be determined and
2236    set *FLDSIZE to its size in bytes and decrement *OFF by the same.
2237    Otherwise do nothing.  */
2238 
2239 static void
set_aggregate_size_and_offset(tree type,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2240 set_aggregate_size_and_offset (tree type, HOST_WIDE_INT *fldoff,
2241 			       HOST_WIDE_INT *fldsize, HOST_WIDE_INT *off)
2242 {
2243   /* The byte offset of the most basic struct member the byte
2244      offset *OFF corresponds to, or for a (multidimensional)
2245      array member, the byte offset of the array element.  */
2246   if (TREE_CODE (type) == ARRAY_TYPE
2247       && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
2248     {
2249       HOST_WIDE_INT index = 0, arrsize = 0;
2250       if (array_elt_at_offset (type, *off, &index, &arrsize))
2251 	{
2252 	  *fldoff += index;
2253 	  *fldsize = arrsize;
2254 	}
2255       /* Otherwise leave *FLDOFF et al. unchanged.  */
2256     }
2257   else if (RECORD_OR_UNION_TYPE_P (type))
2258     {
2259       HOST_WIDE_INT index = 0;
2260       tree sub = field_at_offset (type, NULL_TREE, *off, &index);
2261       if (sub)
2262 	{
2263 	  tree subsize = DECL_SIZE_UNIT (sub);
2264 	  if (*fldsize < HOST_WIDE_INT_MAX
2265 	      && subsize
2266 	      && tree_fits_uhwi_p (subsize))
2267 	    *fldsize = tree_to_uhwi (subsize);
2268 	  else
2269 	    *fldsize = HOST_WIDE_INT_MAX;
2270 	  *fldoff += index;
2271 	  *off -= index;
2272 	}
2273       /* Otherwise leave *FLDOFF et al. unchanged.  */
2274     }
2275 }
2276 
2277 /* For an expression X of pointer type, recursively try to find its origin
2278    (either object DECL or pointer such as PARM_DECL) Y and return such a Y.
2279    When X refers to an array element or struct member, set *FLDOFF to
2280    the offset of the element or member from the beginning of the "most
2281    derived" object and *FLDSIZE to its size.  When nonnull, set *OFF to
2282    the overall offset from the beginning of the object so that
2283    *FLDOFF <= *OFF.  */
2284 
2285 static tree
get_origin_and_offset_r(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2286 get_origin_and_offset_r (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *fldsize,
2287 			 HOST_WIDE_INT *off)
2288 {
2289   HOST_WIDE_INT sizebuf = -1;
2290   if (!fldsize)
2291     fldsize = &sizebuf;
2292 
2293   if (DECL_P (x))
2294     {
2295       /* Set the size if it hasn't been set yet.  */
2296       if (tree size = DECL_SIZE_UNIT (x))
2297 	if (*fldsize < 0 && tree_fits_shwi_p (size))
2298 	  *fldsize = tree_to_shwi (size);
2299       return x;
2300     }
2301 
2302   switch (TREE_CODE (x))
2303     {
2304     case ADDR_EXPR:
2305       x = TREE_OPERAND (x, 0);
2306       return get_origin_and_offset_r (x, fldoff, fldsize, off);
2307 
2308     case ARRAY_REF:
2309       {
2310 	tree sub = TREE_OPERAND (x, 1);
2311 	unsigned HOST_WIDE_INT idx =
2312 	  tree_fits_uhwi_p (sub) ? tree_to_uhwi (sub) : HOST_WIDE_INT_MAX;
2313 
2314 	tree elsz = array_ref_element_size (x);
2315 	unsigned HOST_WIDE_INT elbytes =
2316 	  tree_fits_shwi_p (elsz) ? tree_to_shwi (elsz) : HOST_WIDE_INT_MAX;
2317 
2318 	unsigned HOST_WIDE_INT byteoff = idx * elbytes;
2319 
2320 	if (byteoff < HOST_WIDE_INT_MAX
2321 	    && elbytes < HOST_WIDE_INT_MAX
2322 	    && (elbytes == 0 || byteoff / elbytes == idx))
2323 	  {
2324 	    /* For in-bounds constant offsets into constant-sized arrays
2325 	       bump up *OFF, and for what's likely arrays or structs of
2326 	       arrays, also *FLDOFF, as necessary.  */
2327 	    if (off)
2328 	      *off += byteoff;
2329 	    if (elbytes > 1)
2330 	      *fldoff += byteoff;
2331 	  }
2332 	else
2333 	  *fldoff = HOST_WIDE_INT_MAX;
2334 
2335 	x = TREE_OPERAND (x, 0);
2336 	return get_origin_and_offset_r (x, fldoff, fldsize, off);
2337       }
2338 
2339     case MEM_REF:
2340       if (off)
2341 	{
2342 	  tree offset = TREE_OPERAND (x, 1);
2343 	  *off = (tree_fits_uhwi_p (offset)
2344 		  ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2345 	}
2346 
2347       x = TREE_OPERAND (x, 0);
2348 
2349       if (off)
2350 	{
2351 	  tree xtype
2352 	    = (TREE_CODE (x) == ADDR_EXPR
2353 	       ? TREE_TYPE (TREE_OPERAND (x, 0)) : TREE_TYPE (TREE_TYPE (x)));
2354 
2355 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2356 	}
2357 
2358       return get_origin_and_offset_r (x, fldoff, fldsize, nullptr);
2359 
2360     case COMPONENT_REF:
2361       {
2362 	tree foff = component_ref_field_offset (x);
2363 	tree fld = TREE_OPERAND (x, 1);
2364 	if (!tree_fits_shwi_p (foff)
2365 	    || !tree_fits_shwi_p (DECL_FIELD_BIT_OFFSET (fld)))
2366 	  return x;
2367 	*fldoff += (tree_to_shwi (foff)
2368 		    + (tree_to_shwi (DECL_FIELD_BIT_OFFSET (fld))
2369 		       / BITS_PER_UNIT));
2370 
2371 	get_origin_and_offset_r (fld, fldoff, fldsize, off);
2372 	x = TREE_OPERAND (x, 0);
2373 	return get_origin_and_offset_r (x, fldoff, nullptr, off);
2374       }
2375 
2376     case SSA_NAME:
2377       {
2378 	gimple *def = SSA_NAME_DEF_STMT (x);
2379 	if (is_gimple_assign (def))
2380 	  {
2381 	    tree_code code = gimple_assign_rhs_code (def);
2382 	    if (code == ADDR_EXPR)
2383 	      {
2384 		x = gimple_assign_rhs1 (def);
2385 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2386 	      }
2387 
2388 	    if (code == POINTER_PLUS_EXPR)
2389 	      {
2390 		tree offset = gimple_assign_rhs2 (def);
2391 		if (off && tree_fits_uhwi_p (offset))
2392 		  *off = tree_to_uhwi (offset);
2393 
2394 		x = gimple_assign_rhs1 (def);
2395 		x = get_origin_and_offset_r (x, fldoff, fldsize, off);
2396 		if (off && !tree_fits_uhwi_p (offset))
2397 		  *off = HOST_WIDE_INT_MAX;
2398 		if (off)
2399 		  {
2400 		    tree xtype = TREE_TYPE (x);
2401 		    set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2402 		  }
2403 		return x;
2404 	      }
2405 	    else if (code == VAR_DECL)
2406 	      {
2407 		x = gimple_assign_rhs1 (def);
2408 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2409 	      }
2410 	  }
2411 	else if (gimple_nop_p (def) && SSA_NAME_VAR (x))
2412 	  x = SSA_NAME_VAR (x);
2413 
2414 	tree xtype = TREE_TYPE (x);
2415 	if (POINTER_TYPE_P (xtype))
2416 	  xtype = TREE_TYPE (xtype);
2417 
2418 	if (off)
2419 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2420       }
2421 
2422     default:
2423       break;
2424     }
2425 
2426   return x;
2427 }
2428 
2429 /* Nonrecursive version of the above.
2430    The function never returns null unless X is null to begin with.  */
2431 
2432 static tree
get_origin_and_offset(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * off,HOST_WIDE_INT * fldsize=nullptr)2433 get_origin_and_offset (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *off,
2434 		       HOST_WIDE_INT *fldsize = nullptr)
2435 {
2436   if (!x)
2437     return NULL_TREE;
2438 
2439   HOST_WIDE_INT sizebuf;
2440   if (!fldsize)
2441     fldsize = &sizebuf;
2442 
2443   /* Invalidate *FLDSIZE.  */
2444   *fldsize = -1;
2445   *fldoff = *off = 0;
2446 
2447   return get_origin_and_offset_r (x, fldoff, fldsize, off);
2448 }
2449 
2450 /* If ARG refers to the same (sub)object or array element as described
2451    by DST and DST_FLD, return the byte offset into the struct member or
2452    array element referenced by ARG and set *ARG_SIZE to the size of
2453    the (sub)object.  Otherwise return HOST_WIDE_INT_MIN to indicate
2454    that ARG and DST do not refer to the same object.  */
2455 
2456 static HOST_WIDE_INT
alias_offset(tree arg,HOST_WIDE_INT * arg_size,tree dst,HOST_WIDE_INT dst_fld)2457 alias_offset (tree arg, HOST_WIDE_INT *arg_size,
2458 	      tree dst, HOST_WIDE_INT dst_fld)
2459 {
2460   /* See if the argument refers to the same base object as the destination
2461      of the formatted function call, and if so, try to determine if they
2462      can alias.  */
2463   if (!arg || !dst || !ptr_derefs_may_alias_p (arg, dst))
2464     return HOST_WIDE_INT_MIN;
2465 
2466   /* The two arguments may refer to the same object.  If they both refer
2467      to a struct member, see if the members are one and the same.  If so,
2468      return the offset into the member.  */
2469   HOST_WIDE_INT arg_off = 0, arg_fld = 0;
2470 
2471   tree arg_orig = get_origin_and_offset (arg, &arg_fld, &arg_off, arg_size);
2472 
2473   if (arg_orig == dst && arg_fld == dst_fld)
2474     return arg_off;
2475 
2476   return HOST_WIDE_INT_MIN;
2477 }
2478 
2479 /* Return the minimum and maximum number of characters formatted
2480    by the '%s' format directive and its wide character form for
2481    the argument ARG.  ARG can be null (for functions such as
2482    vsprinf).  */
2483 
2484 static fmtresult
format_string(const directive & dir,tree arg,pointer_query & ptr_qry)2485 format_string (const directive &dir, tree arg, pointer_query &ptr_qry)
2486 {
2487   fmtresult res;
2488 
2489   /* The size of the (sub)object ARG refers to.  Used to adjust
2490      the conservative get_string_length() result.  */
2491   HOST_WIDE_INT arg_size = 0;
2492 
2493   if (warn_restrict)
2494     {
2495       /* See if ARG might alias the destination of the call with
2496 	 DST_ORIGIN and DST_FIELD.  If so, store the starting offset
2497 	 so that the overlap can be determined for certain later,
2498 	 when the amount of output of the call (including subsequent
2499 	 directives) has been computed.  Otherwise, store HWI_MIN.  */
2500       res.dst_offset = alias_offset (arg, &arg_size, dir.info->dst_origin,
2501 				     dir.info->dst_field);
2502       if (res.dst_offset >= 0 && res.dst_offset <= arg_size)
2503 	arg_size -= res.dst_offset;
2504       else
2505 	arg_size = 0;
2506     }
2507 
2508   /* Compute the range the argument's length can be in.  */
2509   int count_by = 1;
2510   if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2511     {
2512       /* Get a node for a C type that will be the same size
2513 	 as a wchar_t on the target.  */
2514       tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2515 
2516       /* Now that we have a suitable node, get the number of
2517 	 bytes it occupies.  */
2518       count_by = int_size_in_bytes (node);
2519       gcc_checking_assert (count_by == 2 || count_by == 4);
2520     }
2521 
2522   fmtresult slen =
2523     get_string_length (arg, dir.info->callstmt, arg_size, count_by, ptr_qry);
2524   if (slen.range.min == slen.range.max
2525       && slen.range.min < HOST_WIDE_INT_MAX)
2526     {
2527       /* The argument is either a string constant or it refers
2528 	 to one of a number of strings of the same length.  */
2529 
2530       /* A '%s' directive with a string argument with constant length.  */
2531       res.range = slen.range;
2532 
2533       if (dir.specifier == 'S'
2534 	  || dir.modifier == FMT_LEN_l)
2535 	{
2536 	  /* In the worst case the length of output of a wide string S
2537 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2538 	  res.range.max *= target_mb_len_max ();
2539 	  res.range.unlikely = res.range.max;
2540 	  /* It's likely that the total length is not more that
2541 	     2 * wcslen (S).*/
2542 	  res.range.likely = res.range.min * 2;
2543 
2544 	  if (dir.prec[1] >= 0
2545 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2546 	    {
2547 	      res.range.max = dir.prec[1];
2548 	      res.range.likely = dir.prec[1];
2549 	      res.range.unlikely = dir.prec[1];
2550 	    }
2551 
2552 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2553 	    res.range.min = 0;
2554 	  else if (dir.prec[0] >= 0)
2555 	    res.range.likely = dir.prec[0];
2556 
2557 	  /* Even a non-empty wide character string need not convert into
2558 	     any bytes.  */
2559 	  res.range.min = 0;
2560 
2561 	  /* A non-empty wide character conversion may fail.  */
2562 	  if (slen.range.max > 0)
2563 	    res.mayfail = true;
2564 	}
2565       else
2566 	{
2567 	  res.knownrange = true;
2568 
2569 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2570 	    res.range.min = 0;
2571 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2572 	    res.range.min = dir.prec[0];
2573 
2574 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2575 	    {
2576 	      res.range.max = dir.prec[1];
2577 	      res.range.likely = dir.prec[1];
2578 	      res.range.unlikely = dir.prec[1];
2579 	    }
2580 	}
2581     }
2582   else if (arg && integer_zerop (arg))
2583     {
2584       /* Handle null pointer argument.  */
2585 
2586       fmtresult res (0);
2587       res.nullp = true;
2588       return res;
2589     }
2590   else
2591     {
2592       /* For a '%s' and '%ls' directive with a non-constant string (either
2593 	 one of a number of strings of known length or an unknown string)
2594 	 the minimum number of characters is lesser of PRECISION[0] and
2595 	 the length of the shortest known string or zero, and the maximum
2596 	 is the lesser of the length of the longest known string or
2597 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2598 	 the minimum at level 1 and the greater of the minimum and 1
2599 	 at level 2.  This result is adjust upward for width (if it's
2600 	 specified).  */
2601 
2602       if (dir.specifier == 'S'
2603 	  || dir.modifier == FMT_LEN_l)
2604 	{
2605 	  /* A wide character converts to as few as zero bytes.  */
2606 	  slen.range.min = 0;
2607 	  if (slen.range.max < target_int_max ())
2608 	    slen.range.max *= target_mb_len_max ();
2609 
2610 	  if (slen.range.likely < target_int_max ())
2611 	    slen.range.likely *= 2;
2612 
2613 	  if (slen.range.likely < target_int_max ())
2614 	    slen.range.unlikely *= target_mb_len_max ();
2615 
2616 	  /* A non-empty wide character conversion may fail.  */
2617 	  if (slen.range.max > 0)
2618 	    res.mayfail = true;
2619 	}
2620 
2621       res.range = slen.range;
2622 
2623       if (dir.prec[0] >= 0)
2624 	{
2625 	  /* Adjust the minimum to zero if the string length is unknown,
2626 	     or at most the lower bound of the precision otherwise.  */
2627 	  if (slen.range.min >= target_int_max ())
2628 	    res.range.min = 0;
2629 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2630 	    res.range.min = dir.prec[0];
2631 
2632 	  /* Make both maxima no greater than the upper bound of precision.  */
2633 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2634 	      || slen.range.max >= target_int_max ())
2635 	    {
2636 	      res.range.max = dir.prec[1];
2637 	      res.range.unlikely = dir.prec[1];
2638 	    }
2639 
2640 	  /* If precision is constant, set the likely counter to the lesser
2641 	     of it and the maximum string length.  Otherwise, if the lower
2642 	     bound of precision is greater than zero, set the likely counter
2643 	     to the minimum.  Otherwise set it to zero or one based on
2644 	     the warning level.  */
2645 	  if (dir.prec[0] == dir.prec[1])
2646 	    res.range.likely
2647 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2648 		 ? dir.prec[0] : slen.range.max);
2649 	  else if (dir.prec[0] > 0)
2650 	    res.range.likely = res.range.min;
2651 	  else
2652 	    res.range.likely = warn_level > 1;
2653 	}
2654       else if (dir.prec[1] >= 0)
2655 	{
2656 	  res.range.min = 0;
2657 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2658 	    res.range.max = dir.prec[1];
2659 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2660 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2661 	    res.range.unlikely = dir.prec[1];
2662 	}
2663       else if (slen.range.min >= target_int_max ())
2664 	{
2665 	  res.range.min = 0;
2666 	  res.range.max = HOST_WIDE_INT_MAX;
2667 	  /* At level 1 strings of unknown length are assumed to be
2668 	     empty, while at level 1 they are assumed to be one byte
2669 	     long.  */
2670 	  res.range.likely = warn_level > 1;
2671 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2672 	}
2673       else
2674 	{
2675 	  /* A string of unknown length unconstrained by precision is
2676 	     assumed to be empty at level 1 and just one character long
2677 	     at higher levels.  */
2678 	  if (res.range.likely >= target_int_max ())
2679 	    res.range.likely = warn_level > 1;
2680 	}
2681     }
2682 
2683   /* If the argument isn't a nul-terminated string and the number
2684      of bytes on output isn't bounded by precision, set NONSTR.  */
2685   if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2686     res.nonstr = slen.nonstr;
2687 
2688   /* Bump up the byte counters if WIDTH is greater.  */
2689   return res.adjust_for_width_or_precision (dir.width);
2690 }
2691 
2692 /* Format plain string (part of the format string itself).  */
2693 
2694 static fmtresult
format_plain(const directive & dir,tree,pointer_query &)2695 format_plain (const directive &dir, tree, pointer_query &)
2696 {
2697   fmtresult res (dir.len);
2698   return res;
2699 }
2700 
2701 /* Return true if the RESULT of a directive in a call describe by INFO
2702    should be diagnosed given the AVAILable space in the destination.  */
2703 
2704 static bool
should_warn_p(const call_info & info,const result_range & avail,const result_range & result)2705 should_warn_p (const call_info &info,
2706 	       const result_range &avail, const result_range &result)
2707 {
2708   if (result.max <= avail.min)
2709     {
2710       /* The least amount of space remaining in the destination is big
2711 	 enough for the longest output.  */
2712       return false;
2713     }
2714 
2715   if (info.bounded)
2716     {
2717       if (warn_format_trunc == 1 && result.min <= avail.max
2718 	  && info.retval_used ())
2719 	{
2720 	  /* The likely amount of space remaining in the destination is big
2721 	     enough for the least output and the return value is used.  */
2722 	  return false;
2723 	}
2724 
2725       if (warn_format_trunc == 1 && result.likely <= avail.likely
2726 	  && !info.retval_used ())
2727 	{
2728 	  /* The likely amount of space remaining in the destination is big
2729 	     enough for the likely output and the return value is unused.  */
2730 	  return false;
2731 	}
2732 
2733       if (warn_format_trunc == 2
2734 	  && result.likely <= avail.min
2735 	  && (result.max <= avail.min
2736 	      || result.max > HOST_WIDE_INT_MAX))
2737 	{
2738 	  /* The minimum amount of space remaining in the destination is big
2739 	     enough for the longest output.  */
2740 	  return false;
2741 	}
2742     }
2743   else
2744     {
2745       if (warn_level == 1 && result.likely <= avail.likely)
2746 	{
2747 	  /* The likely amount of space remaining in the destination is big
2748 	     enough for the likely output.  */
2749 	  return false;
2750 	}
2751 
2752       if (warn_level == 2
2753 	  && result.likely <= avail.min
2754 	  && (result.max <= avail.min
2755 	      || result.max > HOST_WIDE_INT_MAX))
2756 	{
2757 	  /* The minimum amount of space remaining in the destination is big
2758 	     enough for the longest output.  */
2759 	  return false;
2760 	}
2761     }
2762 
2763   return true;
2764 }
2765 
2766 /* At format string location describe by DIRLOC in a call described
2767    by INFO, issue a warning for a directive DIR whose output may be
2768    in excess of the available space AVAIL_RANGE in the destination
2769    given the formatting result FMTRES.  This function does nothing
2770    except decide whether to issue a warning for a possible write
2771    past the end or truncation and, if so, format the warning.
2772    Return true if a warning has been issued.  */
2773 
2774 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2775 maybe_warn (substring_loc &dirloc, location_t argloc,
2776 	    const call_info &info,
2777 	    const result_range &avail_range, const result_range &res,
2778 	    const directive &dir)
2779 {
2780   if (!should_warn_p (info, avail_range, res))
2781     return false;
2782 
2783   /* A warning will definitely be issued below.  */
2784 
2785   /* The maximum byte count to reference in the warning.  Larger counts
2786      imply that the upper bound is unknown (and could be anywhere between
2787      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2788      than "between N and X" where X is some huge number.  */
2789   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2790 
2791   /* True when there is enough room in the destination for the least
2792      amount of a directive's output but not enough for its likely or
2793      maximum output.  */
2794   bool maybe = (res.min <= avail_range.max
2795 		&& (avail_range.min < res.likely
2796 		    || (res.max < HOST_WIDE_INT_MAX
2797 			&& avail_range.min < res.max)));
2798 
2799   /* Buffer for the directive in the host character set (used when
2800      the source character set is different).  */
2801   char hostdir[32];
2802 
2803   if (avail_range.min == avail_range.max)
2804     {
2805       /* The size of the destination region is exact.  */
2806       unsigned HOST_WIDE_INT navail = avail_range.max;
2807 
2808       if (target_to_host (*dir.beg) != '%')
2809 	{
2810 	  /* For plain character directives (i.e., the format string itself)
2811 	     but not others, point the caret at the first character that's
2812 	     past the end of the destination.  */
2813 	  if (navail < dir.len)
2814 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2815 	}
2816 
2817       if (*dir.beg == '\0')
2818 	{
2819 	  /* This is the terminating nul.  */
2820 	  gcc_assert (res.min == 1 && res.min == res.max);
2821 
2822 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2823 			  info.bounded
2824 			  ? (maybe
2825 			     ? G_("%qE output may be truncated before the "
2826 				  "last format character")
2827 			     : G_("%qE output truncated before the last "
2828 				  "format character"))
2829 			  : (maybe
2830 			     ? G_("%qE may write a terminating nul past the "
2831 				  "end of the destination")
2832 			     : G_("%qE writing a terminating nul past the "
2833 				  "end of the destination")),
2834 			  info.func);
2835 	}
2836 
2837       if (res.min == res.max)
2838 	{
2839 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2840 	  if (!info.bounded)
2841 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2842 			      "%<%.*s%> directive writing %wu byte into a "
2843 			      "region of size %wu",
2844 			      "%<%.*s%> directive writing %wu bytes into a "
2845 			      "region of size %wu",
2846 			      (int) dir.len, d, res.min, navail);
2847 	  else if (maybe)
2848 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2849 			      "%<%.*s%> directive output may be truncated "
2850 			      "writing %wu byte into a region of size %wu",
2851 			      "%<%.*s%> directive output may be truncated "
2852 			      "writing %wu bytes into a region of size %wu",
2853 			      (int) dir.len, d, res.min, navail);
2854 	  else
2855 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2856 			      "%<%.*s%> directive output truncated writing "
2857 			      "%wu byte into a region of size %wu",
2858 			      "%<%.*s%> directive output truncated writing "
2859 			      "%wu bytes into a region of size %wu",
2860 			      (int) dir.len, d, res.min, navail);
2861 	}
2862       if (res.min == 0 && res.max < maxbytes)
2863 	return fmtwarn (dirloc, argloc, NULL,
2864 			info.warnopt (),
2865 			info.bounded
2866 			? (maybe
2867 			   ? G_("%<%.*s%> directive output may be truncated "
2868 				"writing up to %wu bytes into a region of "
2869 				"size %wu")
2870 			   : G_("%<%.*s%> directive output truncated writing "
2871 				"up to %wu bytes into a region of size %wu"))
2872 			: G_("%<%.*s%> directive writing up to %wu bytes "
2873 			     "into a region of size %wu"), (int) dir.len,
2874 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2875 			res.max, navail);
2876 
2877       if (res.min == 0 && maxbytes <= res.max)
2878 	/* This is a special case to avoid issuing the potentially
2879 	   confusing warning:
2880 	     writing 0 or more bytes into a region of size 0.  */
2881 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2882 			info.bounded
2883 			? (maybe
2884 			   ? G_("%<%.*s%> directive output may be truncated "
2885 				"writing likely %wu or more bytes into a "
2886 				"region of size %wu")
2887 			   : G_("%<%.*s%> directive output truncated writing "
2888 				"likely %wu or more bytes into a region of "
2889 				"size %wu"))
2890 			: G_("%<%.*s%> directive writing likely %wu or more "
2891 			     "bytes into a region of size %wu"), (int) dir.len,
2892 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2893 			res.likely, navail);
2894 
2895       if (res.max < maxbytes)
2896 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2897 			info.bounded
2898 			? (maybe
2899 			   ? G_("%<%.*s%> directive output may be truncated "
2900 				"writing between %wu and %wu bytes into a "
2901 				"region of size %wu")
2902 			   : G_("%<%.*s%> directive output truncated "
2903 				"writing between %wu and %wu bytes into a "
2904 				"region of size %wu"))
2905 			: G_("%<%.*s%> directive writing between %wu and "
2906 			     "%wu bytes into a region of size %wu"),
2907 			(int) dir.len,
2908 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2909 			res.min, res.max, navail);
2910 
2911       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2912 		      info.bounded
2913 		      ? (maybe
2914 			 ? G_("%<%.*s%> directive output may be truncated "
2915 			      "writing %wu or more bytes into a region of "
2916 			      "size %wu")
2917 			 : G_("%<%.*s%> directive output truncated writing "
2918 			      "%wu or more bytes into a region of size %wu"))
2919 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2920 			   "into a region of size %wu"), (int) dir.len,
2921 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2922 		      res.min, navail);
2923     }
2924 
2925   /* The size of the destination region is a range.  */
2926 
2927   if (target_to_host (*dir.beg) != '%')
2928     {
2929       unsigned HOST_WIDE_INT navail = avail_range.max;
2930 
2931       /* For plain character directives (i.e., the format string itself)
2932 	 but not others, point the caret at the first character that's
2933 	 past the end of the destination.  */
2934       if (navail < dir.len)
2935 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2936     }
2937 
2938   if (*dir.beg == '\0')
2939     {
2940       gcc_assert (res.min == 1 && res.min == res.max);
2941 
2942       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2943 		      info.bounded
2944 		      ? (maybe
2945 			 ? G_("%qE output may be truncated before the last "
2946 			      "format character")
2947 			 : G_("%qE output truncated before the last format "
2948 			      "character"))
2949 		      : (maybe
2950 			 ? G_("%qE may write a terminating nul past the end "
2951 			      "of the destination")
2952 			 : G_("%qE writing a terminating nul past the end "
2953 			      "of the destination")), info.func);
2954     }
2955 
2956   if (res.min == res.max)
2957     {
2958       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2959       if (!info.bounded)
2960 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2961 			  "%<%.*s%> directive writing %wu byte into a region "
2962 			  "of size between %wu and %wu",
2963 			  "%<%.*s%> directive writing %wu bytes into a region "
2964 			  "of size between %wu and %wu", (int) dir.len, d,
2965 			  res.min, avail_range.min, avail_range.max);
2966       else if (maybe)
2967 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2968 			  "%<%.*s%> directive output may be truncated writing "
2969 			  "%wu byte into a region of size between %wu and %wu",
2970 			  "%<%.*s%> directive output may be truncated writing "
2971 			  "%wu bytes into a region of size between %wu and "
2972 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2973 			  avail_range.max);
2974       else
2975 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2976 			  "%<%.*s%> directive output truncated writing %wu "
2977 			  "byte into a region of size between %wu and %wu",
2978 			  "%<%.*s%> directive output truncated writing %wu "
2979 			  "bytes into a region of size between %wu and %wu",
2980 			  (int) dir.len, d, res.min, avail_range.min,
2981 			  avail_range.max);
2982     }
2983 
2984   if (res.min == 0 && res.max < maxbytes)
2985     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2986 		    info.bounded
2987 		    ? (maybe
2988 		       ? G_("%<%.*s%> directive output may be truncated "
2989 			    "writing up to %wu bytes into a region of size "
2990 			    "between %wu and %wu")
2991 		       : G_("%<%.*s%> directive output truncated writing "
2992 			    "up to %wu bytes into a region of size between "
2993 			    "%wu and %wu"))
2994 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2995 			 "into a region of size between %wu and %wu"),
2996 		    (int) dir.len,
2997 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2998 		    res.max, avail_range.min, avail_range.max);
2999 
3000   if (res.min == 0 && maxbytes <= res.max)
3001     /* This is a special case to avoid issuing the potentially confusing
3002        warning:
3003 	 writing 0 or more bytes into a region of size between 0 and N.  */
3004     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3005 		    info.bounded
3006 		    ? (maybe
3007 		       ? G_("%<%.*s%> directive output may be truncated "
3008 			    "writing likely %wu or more bytes into a region "
3009 			    "of size between %wu and %wu")
3010 		       : G_("%<%.*s%> directive output truncated writing "
3011 			    "likely %wu or more bytes into a region of size "
3012 			    "between %wu and %wu"))
3013 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
3014 			 "into a region of size between %wu and %wu"),
3015 		    (int) dir.len,
3016 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3017 		    res.likely, avail_range.min, avail_range.max);
3018 
3019   if (res.max < maxbytes)
3020     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3021 		    info.bounded
3022 		    ? (maybe
3023 		       ? G_("%<%.*s%> directive output may be truncated "
3024 			    "writing between %wu and %wu bytes into a region "
3025 			    "of size between %wu and %wu")
3026 		       : G_("%<%.*s%> directive output truncated writing "
3027 			    "between %wu and %wu bytes into a region of size "
3028 			    "between %wu and %wu"))
3029 		    : G_("%<%.*s%> directive writing between %wu and "
3030 			 "%wu bytes into a region of size between %wu and "
3031 			 "%wu"), (int) dir.len,
3032 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3033 		    res.min, res.max, avail_range.min, avail_range.max);
3034 
3035   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3036 		  info.bounded
3037 		  ? (maybe
3038 		     ? G_("%<%.*s%> directive output may be truncated writing "
3039 			  "%wu or more bytes into a region of size between "
3040 			  "%wu and %wu")
3041 		     : G_("%<%.*s%> directive output truncated writing "
3042 			  "%wu or more bytes into a region of size between "
3043 			  "%wu and %wu"))
3044 		  : G_("%<%.*s%> directive writing %wu or more bytes "
3045 		       "into a region of size between %wu and %wu"),
3046 		  (int) dir.len,
3047 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
3048 		  res.min, avail_range.min, avail_range.max);
3049 }
3050 
3051 /* Given the formatting result described by RES and NAVAIL, the number
3052    of available bytes in the destination, return the range of bytes
3053    remaining in the destination.  */
3054 
3055 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)3056 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
3057 {
3058   result_range range;
3059 
3060   if (HOST_WIDE_INT_MAX <= navail)
3061     {
3062       range.min = range.max = range.likely = range.unlikely = navail;
3063       return range;
3064     }
3065 
3066   /* The lower bound of the available range is the available size
3067      minus the maximum output size, and the upper bound is the size
3068      minus the minimum.  */
3069   range.max = res.range.min < navail ? navail - res.range.min : 0;
3070 
3071   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
3072 
3073   if (res.range.max < HOST_WIDE_INT_MAX)
3074     range.min = res.range.max < navail ? navail - res.range.max : 0;
3075   else
3076     range.min = range.likely;
3077 
3078   range.unlikely = (res.range.unlikely < navail
3079 		    ? navail - res.range.unlikely : 0);
3080 
3081   return range;
3082 }
3083 
3084 /* Compute the length of the output resulting from the directive DIR
3085    in a call described by INFO and update the overall result of the call
3086    in *RES.  Return true if the directive has been handled.  */
3087 
3088 static bool
format_directive(const call_info & info,format_result * res,const directive & dir,pointer_query & ptr_qry)3089 format_directive (const call_info &info,
3090 		  format_result *res, const directive &dir,
3091 		  pointer_query &ptr_qry)
3092 {
3093   /* Offset of the beginning of the directive from the beginning
3094      of the format string.  */
3095   size_t offset = dir.beg - info.fmtstr;
3096   size_t start = offset;
3097   size_t length = offset + dir.len - !!dir.len;
3098 
3099   /* Create a location for the whole directive from the % to the format
3100      specifier.  */
3101   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3102 			offset, start, length);
3103 
3104   /* Also get the location of the argument if possible.
3105      This doesn't work for integer literals or function calls.  */
3106   location_t argloc = UNKNOWN_LOCATION;
3107   if (dir.arg)
3108     argloc = EXPR_LOCATION (dir.arg);
3109 
3110   /* Bail when there is no function to compute the output length,
3111      or when minimum length checking has been disabled.   */
3112   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
3113     return false;
3114 
3115   /* Compute the range of lengths of the formatted output.  */
3116   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, ptr_qry);
3117 
3118   /* Record whether the output of all directives is known to be
3119      bounded by some maximum, implying that their arguments are
3120      either known exactly or determined to be in a known range
3121      or, for strings, limited by the upper bounds of the arrays
3122      they refer to.  */
3123   res->knownrange &= fmtres.knownrange;
3124 
3125   if (!fmtres.knownrange)
3126     {
3127       /* Only when the range is known, check it against the host value
3128 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
3129 	 INT_MAX precision, which is the longest possible output of any
3130 	 single directive).  That's the largest valid byte count (though
3131 	 not valid call to a printf-like function because it can never
3132 	 return such a count).  Otherwise, the range doesn't correspond
3133 	 to known values of the argument.  */
3134       if (fmtres.range.max > target_dir_max ())
3135 	{
3136 	  /* Normalize the MAX counter to avoid having to deal with it
3137 	     later.  The counter can be less than HOST_WIDE_INT_M1U
3138 	     when compiling for an ILP32 target on an LP64 host.  */
3139 	  fmtres.range.max = HOST_WIDE_INT_M1U;
3140 	  /* Disable exact and maximum length checking after a failure
3141 	     to determine the maximum number of characters (for example
3142 	     for wide characters or wide character strings) but continue
3143 	     tracking the minimum number of characters.  */
3144 	  res->range.max = HOST_WIDE_INT_M1U;
3145 	}
3146 
3147       if (fmtres.range.min > target_dir_max ())
3148 	{
3149 	  /* Disable exact length checking after a failure to determine
3150 	     even the minimum number of characters (it shouldn't happen
3151 	     except in an error) but keep tracking the minimum and maximum
3152 	     number of characters.  */
3153 	  return true;
3154 	}
3155     }
3156 
3157   /* Buffer for the directive in the host character set (used when
3158      the source character set is different).  */
3159   char hostdir[32];
3160 
3161   int dirlen = dir.len;
3162 
3163   if (fmtres.nullp)
3164     {
3165       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3166 	       "%<%.*s%> directive argument is null",
3167 	       dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
3168 
3169       /* Don't bother processing the rest of the format string.  */
3170       res->warned = true;
3171       res->range.min = HOST_WIDE_INT_M1U;
3172       res->range.max = HOST_WIDE_INT_M1U;
3173       return false;
3174     }
3175 
3176   /* Compute the number of available bytes in the destination.  There
3177      must always be at least one byte of space for the terminating
3178      NUL that's appended after the format string has been processed.  */
3179   result_range avail_range = bytes_remaining (info.objsize, *res);
3180 
3181   /* If the argument aliases a part of the destination of the formatted
3182      call at offset FMTRES.DST_OFFSET append the directive and its result
3183      to the set of aliases for later processing.  */
3184   if (fmtres.dst_offset != HOST_WIDE_INT_MIN)
3185     res->append_alias (dir, fmtres.dst_offset, fmtres.range);
3186 
3187   bool warned = res->warned;
3188 
3189   if (!warned)
3190     warned = maybe_warn (dirloc, argloc, info, avail_range,
3191 			 fmtres.range, dir);
3192 
3193   /* Bump up the total maximum if it isn't too big.  */
3194   if (res->range.max < HOST_WIDE_INT_MAX
3195       && fmtres.range.max < HOST_WIDE_INT_MAX)
3196     res->range.max += fmtres.range.max;
3197 
3198   /* Raise the total unlikely maximum by the larger of the maximum
3199      and the unlikely maximum.  */
3200   unsigned HOST_WIDE_INT save = res->range.unlikely;
3201   if (fmtres.range.max < fmtres.range.unlikely)
3202     res->range.unlikely += fmtres.range.unlikely;
3203   else
3204     res->range.unlikely += fmtres.range.max;
3205 
3206   if (res->range.unlikely < save)
3207     res->range.unlikely = HOST_WIDE_INT_M1U;
3208 
3209   res->range.min += fmtres.range.min;
3210   res->range.likely += fmtres.range.likely;
3211 
3212   /* Has the minimum directive output length exceeded the maximum
3213      of 4095 bytes required to be supported?  */
3214   bool minunder4k = fmtres.range.min < 4096;
3215   bool maxunder4k = fmtres.range.max < 4096;
3216   /* Clear POSUNDER4K in the overall result if the maximum has exceeded
3217      the 4k (this is necessary to avoid the return value optimization
3218      that may not be safe in the maximum case).  */
3219   if (!maxunder4k)
3220     res->posunder4k = false;
3221   /* Also clear POSUNDER4K if the directive may fail.  */
3222   if (fmtres.mayfail)
3223     res->posunder4k = false;
3224 
3225   if (!warned
3226       /* Only warn at level 2.  */
3227       && warn_level > 1
3228       /* Only warn for string functions.  */
3229       && info.is_string_func ()
3230       && (!minunder4k
3231 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
3232     {
3233       /* The directive output may be longer than the maximum required
3234 	 to be handled by an implementation according to 7.21.6.1, p15
3235 	 of C11.  Warn on this only at level 2 but remember this and
3236 	 prevent folding the return value when done.  This allows for
3237 	 the possibility of the actual libc call failing due to ENOMEM
3238 	 (like Glibc does with very large precision or width).
3239 	 Issue the "may exceed" warning only for string functions and
3240 	 not for fprintf or printf.  */
3241 
3242       if (fmtres.range.min == fmtres.range.max)
3243 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3244 			  "%<%.*s%> directive output of %wu bytes exceeds "
3245 			  "minimum required size of 4095", dirlen,
3246 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3247 			  fmtres.range.min);
3248       else if (!minunder4k)
3249 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3250 			  "%<%.*s%> directive output between %wu and %wu "
3251 			  "bytes exceeds minimum required size of 4095",
3252 			  dirlen,
3253 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3254 			  fmtres.range.min, fmtres.range.max);
3255       else if (!info.retval_used () && info.is_string_func ())
3256 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3257 			  "%<%.*s%> directive output between %wu and %wu "
3258 			  "bytes may exceed minimum required size of "
3259 			  "4095",
3260 			  dirlen,
3261 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3262 			  fmtres.range.min, fmtres.range.max);
3263     }
3264 
3265   /* Has the likely and maximum directive output exceeded INT_MAX?  */
3266   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
3267   /* Don't consider the maximum to be in excess when it's the result
3268      of a string of unknown length (i.e., whose maximum has been set
3269      to be greater than or equal to HOST_WIDE_INT_MAX.  */
3270   bool maxximax = (*dir.beg
3271 		   && res->range.max > target_int_max ()
3272 		   && res->range.max < HOST_WIDE_INT_MAX);
3273 
3274   if (!warned
3275       /* Warn for the likely output size at level 1.  */
3276       && (likelyximax
3277 	  /* But only warn for the maximum at level 2.  */
3278 	  || (warn_level > 1
3279 	      && maxximax
3280 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
3281     {
3282       if (fmtres.range.min > target_int_max ())
3283 	{
3284 	  /* The directive output exceeds INT_MAX bytes.  */
3285 	  if (fmtres.range.min == fmtres.range.max)
3286 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3287 			      "%<%.*s%> directive output of %wu bytes exceeds "
3288 			      "%<INT_MAX%>", dirlen,
3289 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3290 			      fmtres.range.min);
3291 	  else
3292 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3293 			      "%<%.*s%> directive output between %wu and "
3294 			      "%wu bytes exceeds %<INT_MAX%>", dirlen,
3295 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3296 			      fmtres.range.min, fmtres.range.max);
3297 	}
3298       else if (res->range.min > target_int_max ())
3299 	{
3300 	  /* The directive output is under INT_MAX but causes the result
3301 	     to exceed INT_MAX bytes.  */
3302 	  if (fmtres.range.min == fmtres.range.max)
3303 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3304 			      "%<%.*s%> directive output of %wu bytes causes "
3305 			      "result to exceed %<INT_MAX%>", dirlen,
3306 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3307 			      fmtres.range.min);
3308 	  else
3309 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3310 			      "%<%.*s%> directive output between %wu and "
3311 			      "%wu bytes causes result to exceed %<INT_MAX%>",
3312 			      dirlen,
3313 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3314 			      fmtres.range.min, fmtres.range.max);
3315 	}
3316       else if ((!info.retval_used () || !info.bounded)
3317 	       && (info.is_string_func ()))
3318 	/* Warn for calls to string functions that either aren't bounded
3319 	   (sprintf) or whose return value isn't used.  */
3320 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3321 			  "%<%.*s%> directive output between %wu and "
3322 			  "%wu bytes may cause result to exceed "
3323 			  "%<INT_MAX%>", dirlen,
3324 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3325 			  fmtres.range.min, fmtres.range.max);
3326     }
3327 
3328   if (!warned && fmtres.nonstr)
3329     {
3330       warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3331 			"%<%.*s%> directive argument is not a nul-terminated "
3332 			"string",
3333 			dirlen,
3334 			target_to_host (hostdir, sizeof hostdir, dir.beg));
3335       if (warned && DECL_P (fmtres.nonstr))
3336 	inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
3337 		"referenced argument declared here");
3338       return false;
3339     }
3340 
3341   if (warned && fmtres.range.min < fmtres.range.likely
3342       && fmtres.range.likely < fmtres.range.max)
3343     inform_n (info.fmtloc, fmtres.range.likely,
3344 	      "assuming directive output of %wu byte",
3345 	      "assuming directive output of %wu bytes",
3346 	      fmtres.range.likely);
3347 
3348   if (warned && fmtres.argmin)
3349     {
3350       if (fmtres.argmin == fmtres.argmax)
3351 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3352       else if (fmtres.knownrange)
3353 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
3354 		fmtres.argmin, fmtres.argmax);
3355       else
3356 	inform (info.fmtloc,
3357 		"using the range [%E, %E] for directive argument",
3358 		fmtres.argmin, fmtres.argmax);
3359     }
3360 
3361   res->warned |= warned;
3362 
3363   if (!dir.beg[0] && res->warned)
3364     {
3365       location_t callloc = gimple_location (info.callstmt);
3366 
3367       unsigned HOST_WIDE_INT min = res->range.min;
3368       unsigned HOST_WIDE_INT max = res->range.max;
3369 
3370       if (info.objsize < HOST_WIDE_INT_MAX)
3371 	{
3372 	  /* If a warning has been issued for buffer overflow or truncation
3373 	     help the user figure out how big a buffer they need.  */
3374 
3375 	  if (min == max)
3376 	    inform_n (callloc, min,
3377 		      "%qE output %wu byte into a destination of size %wu",
3378 		      "%qE output %wu bytes into a destination of size %wu",
3379 		      info.func, min, info.objsize);
3380 	  else if (max < HOST_WIDE_INT_MAX)
3381 	    inform (callloc,
3382 		    "%qE output between %wu and %wu bytes into "
3383 		    "a destination of size %wu",
3384 		    info.func, min, max, info.objsize);
3385 	  else if (min < res->range.likely && res->range.likely < max)
3386 	    inform (callloc,
3387 		    "%qE output %wu or more bytes (assuming %wu) into "
3388 		    "a destination of size %wu",
3389 		    info.func, min, res->range.likely, info.objsize);
3390 	  else
3391 	    inform (callloc,
3392 		    "%qE output %wu or more bytes into a destination of size "
3393 		    "%wu",
3394 		    info.func, min, info.objsize);
3395 	}
3396       else if (!info.is_string_func ())
3397 	{
3398 	  /* If the warning is for a file function like fprintf
3399 	     of printf with no destination size just print the computed
3400 	     result.  */
3401 	  if (min == max)
3402 	    inform_n (callloc, min,
3403 		      "%qE output %wu byte", "%qE output %wu bytes",
3404 		      info.func, min);
3405 	  else if (max < HOST_WIDE_INT_MAX)
3406 	    inform (callloc,
3407 		    "%qE output between %wu and %wu bytes",
3408 		    info.func, min, max);
3409 	  else if (min < res->range.likely && res->range.likely < max)
3410 	    inform (callloc,
3411 		    "%qE output %wu or more bytes (assuming %wu)",
3412 		    info.func, min, res->range.likely);
3413 	  else
3414 	    inform (callloc,
3415 		    "%qE output %wu or more bytes",
3416 		    info.func, min);
3417 	}
3418     }
3419 
3420   if (dump_file && *dir.beg)
3421     {
3422       fprintf (dump_file,
3423 	       "    Result: "
3424 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3425 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3426 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3427 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3428 	       fmtres.range.min, fmtres.range.likely,
3429 	       fmtres.range.max, fmtres.range.unlikely,
3430 	       res->range.min, res->range.likely,
3431 	       res->range.max, res->range.unlikely);
3432     }
3433 
3434   return true;
3435 }
3436 
3437 /* Parse a format directive in function call described by INFO starting
3438    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3439    arguments extracted for the directive.  Return the length of
3440    the directive.  */
3441 
3442 static size_t
parse_directive(call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,range_query * query)3443 parse_directive (call_info &info,
3444 		 directive &dir, format_result *res,
3445 		 const char *str, unsigned *argno,
3446 		 range_query *query)
3447 {
3448   const char *pcnt = strchr (str, target_percent);
3449   dir.beg = str;
3450 
3451   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3452     {
3453       /* This directive is either a plain string or the terminating nul
3454 	 (which isn't really a directive but it simplifies things to
3455 	 handle it as if it were).  */
3456       dir.len = len;
3457       dir.fmtfunc = format_plain;
3458 
3459       if (dump_file)
3460 	{
3461 	  fprintf (dump_file, "  Directive %u at offset "
3462 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3463 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3464 		   dir.dirno,
3465 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3466 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3467 	}
3468 
3469       return len - !*str;
3470     }
3471 
3472   /* Set the directive argument's number to correspond to its position
3473      in the formatted function call's argument list.  */
3474   dir.argno = *argno;
3475 
3476   const char *pf = pcnt + 1;
3477 
3478     /* POSIX numbered argument index or zero when none.  */
3479   HOST_WIDE_INT dollar = 0;
3480 
3481   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3482      when given by a va_list argument, and a non-negative value
3483      when specified in the format string itself.  */
3484   HOST_WIDE_INT width = -1;
3485   HOST_WIDE_INT precision = -1;
3486 
3487   /* Pointers to the beginning of the width and precision decimal
3488      string (if any) within the directive.  */
3489   const char *pwidth = 0;
3490   const char *pprec = 0;
3491 
3492   /* When the value of the decimal string that specifies width or
3493      precision is out of range, points to the digit that causes
3494      the value to exceed the limit.  */
3495   const char *werange = NULL;
3496   const char *perange = NULL;
3497 
3498   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3499      For vararg functions set to void_node.  */
3500   tree star_width = NULL_TREE;
3501 
3502   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3503      For vararg functions set to void_node.  */
3504   tree star_precision = NULL_TREE;
3505 
3506   if (ISDIGIT (target_to_host (*pf)))
3507     {
3508       /* This could be either a POSIX positional argument, the '0'
3509 	 flag, or a width, depending on what follows.  Store it as
3510 	 width and sort it out later after the next character has
3511 	 been seen.  */
3512       pwidth = pf;
3513       width = target_strtowi (&pf, &werange);
3514     }
3515   else if (target_to_host (*pf) == '*')
3516     {
3517       /* Similarly to the block above, this could be either a POSIX
3518 	 positional argument or a width, depending on what follows.  */
3519       if (*argno < gimple_call_num_args (info.callstmt))
3520 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3521       else
3522 	star_width = void_node;
3523       ++pf;
3524     }
3525 
3526   if (target_to_host (*pf) == '$')
3527     {
3528       /* Handle the POSIX dollar sign which references the 1-based
3529 	 positional argument number.  */
3530       if (width != -1)
3531 	dollar = width + info.argidx;
3532       else if (star_width
3533 	       && TREE_CODE (star_width) == INTEGER_CST
3534 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3535 		   <= TYPE_PRECISION (integer_type_node)))
3536 	dollar = width + tree_to_shwi (star_width);
3537 
3538       /* Bail when the numbered argument is out of range (it will
3539 	 have already been diagnosed by -Wformat).  */
3540       if (dollar == 0
3541 	  || dollar == (int)info.argidx
3542 	  || dollar > gimple_call_num_args (info.callstmt))
3543 	return false;
3544 
3545       --dollar;
3546 
3547       star_width = NULL_TREE;
3548       width = -1;
3549       ++pf;
3550     }
3551 
3552   if (dollar || !star_width)
3553     {
3554       if (width != -1)
3555 	{
3556 	  if (width == 0)
3557 	    {
3558 	      /* The '0' that has been interpreted as a width above is
3559 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3560 		 and continue processing other flags.  */
3561 	      width = -1;
3562 	      dir.set_flag ('0');
3563 	    }
3564 	  else if (!dollar)
3565 	    {
3566 	      /* (Non-zero) width has been seen.  The next character
3567 		 is either a period or a digit.  */
3568 	      goto start_precision;
3569 	    }
3570 	}
3571       /* When either '$' has been seen, or width has not been seen,
3572 	 the next field is the optional flags followed by an optional
3573 	 width.  */
3574       for ( ; ; ) {
3575 	switch (target_to_host (*pf))
3576 	  {
3577 	  case ' ':
3578 	  case '0':
3579 	  case '+':
3580 	  case '-':
3581 	  case '#':
3582 	    dir.set_flag (target_to_host (*pf++));
3583 	    break;
3584 
3585 	  default:
3586 	    goto start_width;
3587 	  }
3588       }
3589 
3590     start_width:
3591       if (ISDIGIT (target_to_host (*pf)))
3592 	{
3593 	  werange = 0;
3594 	  pwidth = pf;
3595 	  width = target_strtowi (&pf, &werange);
3596 	}
3597       else if (target_to_host (*pf) == '*')
3598 	{
3599 	  if (*argno < gimple_call_num_args (info.callstmt))
3600 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3601 	  else
3602 	    {
3603 	      /* This is (likely) a va_list.  It could also be an invalid
3604 		 call with insufficient arguments.  */
3605 	      star_width = void_node;
3606 	    }
3607 	  ++pf;
3608 	}
3609       else if (target_to_host (*pf) == '\'')
3610 	{
3611 	  /* The POSIX apostrophe indicating a numeric grouping
3612 	     in the current locale.  Even though it's possible to
3613 	     estimate the upper bound on the size of the output
3614 	     based on the number of digits it probably isn't worth
3615 	     continuing.  */
3616 	  return 0;
3617 	}
3618     }
3619 
3620  start_precision:
3621   if (target_to_host (*pf) == '.')
3622     {
3623       ++pf;
3624 
3625       if (ISDIGIT (target_to_host (*pf)))
3626 	{
3627 	  pprec = pf;
3628 	  precision = target_strtowi (&pf, &perange);
3629 	}
3630       else if (target_to_host (*pf) == '*')
3631 	{
3632 	  if (*argno < gimple_call_num_args (info.callstmt))
3633 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3634 	  else
3635 	    {
3636 	      /* This is (likely) a va_list.  It could also be an invalid
3637 		 call with insufficient arguments.  */
3638 	      star_precision = void_node;
3639 	    }
3640 	  ++pf;
3641 	}
3642       else
3643 	{
3644 	  /* The decimal precision or the asterisk are optional.
3645 	     When neither is specified it's taken to be zero.  */
3646 	  precision = 0;
3647 	}
3648     }
3649 
3650   switch (target_to_host (*pf))
3651     {
3652     case 'h':
3653       if (target_to_host (pf[1]) == 'h')
3654 	{
3655 	  ++pf;
3656 	  dir.modifier = FMT_LEN_hh;
3657 	}
3658       else
3659 	dir.modifier = FMT_LEN_h;
3660       ++pf;
3661       break;
3662 
3663     case 'j':
3664       dir.modifier = FMT_LEN_j;
3665       ++pf;
3666       break;
3667 
3668     case 'L':
3669       dir.modifier = FMT_LEN_L;
3670       ++pf;
3671       break;
3672 
3673     case 'l':
3674       if (target_to_host (pf[1]) == 'l')
3675 	{
3676 	  ++pf;
3677 	  dir.modifier = FMT_LEN_ll;
3678 	}
3679       else
3680 	dir.modifier = FMT_LEN_l;
3681       ++pf;
3682       break;
3683 
3684     case 't':
3685       dir.modifier = FMT_LEN_t;
3686       ++pf;
3687       break;
3688 
3689     case 'z':
3690       dir.modifier = FMT_LEN_z;
3691       ++pf;
3692       break;
3693     }
3694 
3695   switch (target_to_host (*pf))
3696     {
3697       /* Handle a sole '%' character the same as "%%" but since it's
3698 	 undefined prevent the result from being folded.  */
3699     case '\0':
3700       --pf;
3701       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3702       /* FALLTHRU */
3703     case '%':
3704       dir.fmtfunc = format_percent;
3705       break;
3706 
3707     case 'a':
3708     case 'A':
3709     case 'e':
3710     case 'E':
3711     case 'f':
3712     case 'F':
3713     case 'g':
3714     case 'G':
3715       res->floating = true;
3716       dir.fmtfunc = format_floating;
3717       break;
3718 
3719     case 'd':
3720     case 'i':
3721     case 'o':
3722     case 'u':
3723     case 'x':
3724     case 'X':
3725       dir.fmtfunc = format_integer;
3726       break;
3727 
3728     case 'p':
3729       /* The %p output is implementation-defined.  It's possible
3730 	 to determine this format but due to extensions (especially
3731 	 those of the Linux kernel -- see bug 78512) the first %p
3732 	 in the format string disables any further processing.  */
3733       return false;
3734 
3735     case 'n':
3736       /* %n has side-effects even when nothing is actually printed to
3737 	 any buffer.  */
3738       info.nowrite = false;
3739       dir.fmtfunc = format_none;
3740       break;
3741 
3742     case 'C':
3743     case 'c':
3744       /* POSIX wide character and C/POSIX narrow character.  */
3745       dir.fmtfunc = format_character;
3746       break;
3747 
3748     case 'S':
3749     case 's':
3750       /* POSIX wide string and C/POSIX narrow character string.  */
3751       dir.fmtfunc = format_string;
3752       break;
3753 
3754     default:
3755       /* Unknown conversion specification.  */
3756       return 0;
3757     }
3758 
3759   dir.specifier = target_to_host (*pf++);
3760 
3761   /* Store the length of the format directive.  */
3762   dir.len = pf - pcnt;
3763 
3764   /* Buffer for the directive in the host character set (used when
3765      the source character set is different).  */
3766   char hostdir[32];
3767 
3768   if (star_width)
3769     {
3770       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3771 	dir.set_width (star_width, query);
3772       else
3773 	{
3774 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3775 	     (width is the absolute value of that specified).  */
3776 	  dir.width[0] = 0;
3777 	  dir.width[1] = target_int_max () + 1;
3778 	}
3779     }
3780   else
3781     {
3782       if (width == HOST_WIDE_INT_MAX && werange)
3783 	{
3784 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3785 	  size_t caret = begin + (werange - pcnt);
3786 	  size_t end = pf - info.fmtstr - 1;
3787 
3788 	  /* Create a location for the width part of the directive,
3789 	     pointing the caret at the first out-of-range digit.  */
3790 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3791 				caret, begin, end);
3792 
3793 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3794 		   "%<%.*s%> directive width out of range", (int) dir.len,
3795 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3796 	}
3797 
3798       dir.set_width (width);
3799     }
3800 
3801   if (star_precision)
3802     {
3803       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3804 	dir.set_precision (star_precision, query);
3805       else
3806 	{
3807 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3808 	     (unlike width, negative precision is ignored).  */
3809 	  dir.prec[0] = -1;
3810 	  dir.prec[1] = target_int_max ();
3811 	}
3812     }
3813   else
3814     {
3815       if (precision == HOST_WIDE_INT_MAX && perange)
3816 	{
3817 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3818 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3819 	  size_t end = pf - info.fmtstr - 2;
3820 
3821 	  /* Create a location for the precision part of the directive,
3822 	     including the leading period, pointing the caret at the first
3823 	     out-of-range digit .  */
3824 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3825 				caret, begin, end);
3826 
3827 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3828 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3829 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3830 	}
3831 
3832       dir.set_precision (precision);
3833     }
3834 
3835   /* Extract the argument if the directive takes one and if it's
3836      available (e.g., the function doesn't take a va_list).  Treat
3837      missing arguments the same as va_list, even though they will
3838      have likely already been diagnosed by -Wformat.  */
3839   if (dir.specifier != '%'
3840       && *argno < gimple_call_num_args (info.callstmt))
3841     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3842 
3843   if (dump_file)
3844     {
3845       fprintf (dump_file,
3846 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3847 	       ": \"%.*s\"",
3848 	       dir.dirno,
3849 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3850 	       (int)dir.len, dir.beg);
3851       if (star_width)
3852 	{
3853 	  if (dir.width[0] == dir.width[1])
3854 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3855 		     dir.width[0]);
3856 	  else
3857 	    fprintf (dump_file,
3858 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3859 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3860 		     dir.width[0], dir.width[1]);
3861 	}
3862 
3863       if (star_precision)
3864 	{
3865 	  if (dir.prec[0] == dir.prec[1])
3866 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3867 		     dir.prec[0]);
3868 	  else
3869 	    fprintf (dump_file,
3870 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3871 		     HOST_WIDE_INT_PRINT_DEC "]",
3872 		     dir.prec[0], dir.prec[1]);
3873 	}
3874       fputc ('\n', dump_file);
3875     }
3876 
3877   return dir.len;
3878 }
3879 
3880 /* Diagnose overlap between destination and %s directive arguments.  */
3881 
3882 static void
maybe_warn_overlap(call_info & info,format_result * res)3883 maybe_warn_overlap (call_info &info, format_result *res)
3884 {
3885   /* Two vectors of 1-based indices corresponding to either certainly
3886      or possibly aliasing arguments.  */
3887   auto_vec<int, 16> aliasarg[2];
3888 
3889   /* Go through the array of potentially aliasing directives and collect
3890      argument numbers of those that do or may overlap the destination
3891      object given the full result.  */
3892   for (unsigned i = 0; i != res->alias_count; ++i)
3893     {
3894       const format_result::alias_info &alias = res->aliases[i];
3895 
3896       enum { possible = -1, none = 0, certain = 1 } overlap = none;
3897 
3898       /* If the precision is zero there is no overlap.  (This only
3899 	 considers %s directives and ignores %n.)  */
3900       if (alias.dir.prec[0] == 0 && alias.dir.prec[1] == 0)
3901 	continue;
3902 
3903       if (alias.offset == HOST_WIDE_INT_MAX
3904 	  || info.dst_offset == HOST_WIDE_INT_MAX)
3905 	overlap = possible;
3906       else if (alias.offset == info.dst_offset)
3907 	overlap = alias.dir.prec[0] == 0 ? possible : certain;
3908       else
3909 	{
3910 	  /* Determine overlap from the range of output and offsets
3911 	     into the same destination as the source, and rule out
3912 	     impossible overlap.  */
3913 	  unsigned HOST_WIDE_INT albeg = alias.offset;
3914 	  unsigned HOST_WIDE_INT dstbeg = info.dst_offset;
3915 
3916 	  unsigned HOST_WIDE_INT alend = albeg + alias.range.min;
3917 	  unsigned HOST_WIDE_INT dstend = dstbeg + res->range.min - 1;
3918 
3919 	  if ((albeg <= dstbeg && alend > dstbeg)
3920 	      || (albeg >= dstbeg && albeg < dstend))
3921 	    overlap = certain;
3922 	  else
3923 	    {
3924 	      alend = albeg + alias.range.max;
3925 	      if (alend < albeg)
3926 		alend = HOST_WIDE_INT_M1U;
3927 
3928 	      dstend = dstbeg + res->range.max - 1;
3929 	      if (dstend < dstbeg)
3930 		dstend = HOST_WIDE_INT_M1U;
3931 
3932 	      if ((albeg >= dstbeg && albeg <= dstend)
3933 		  || (alend >= dstbeg && alend <= dstend))
3934 		overlap = possible;
3935 	    }
3936 	}
3937 
3938       if (overlap == none)
3939 	continue;
3940 
3941       /* Append the 1-based argument number.  */
3942       aliasarg[overlap != certain].safe_push (alias.dir.argno + 1);
3943 
3944       /* Disable any kind of optimization.  */
3945       res->range.unlikely = HOST_WIDE_INT_M1U;
3946     }
3947 
3948   tree arg0 = gimple_call_arg (info.callstmt, 0);
3949   location_t loc = gimple_location (info.callstmt);
3950 
3951   bool aliaswarn = false;
3952 
3953   unsigned ncertain = aliasarg[0].length ();
3954   unsigned npossible = aliasarg[1].length ();
3955   if (ncertain && npossible)
3956     {
3957       /* If there are multiple arguments that overlap, some certainly
3958 	 and some possibly, handle both sets in a single diagnostic.  */
3959       aliaswarn
3960 	= warning_at (loc, OPT_Wrestrict,
3961 		      "%qE arguments %Z and maybe %Z overlap destination "
3962 		      "object %qE",
3963 		      info.func, aliasarg[0].address (), ncertain,
3964 		      aliasarg[1].address (), npossible,
3965 		      info.dst_origin);
3966     }
3967   else if (ncertain)
3968     {
3969       /* There is only one set of two or more arguments and they all
3970 	 certainly overlap the destination.  */
3971       aliaswarn
3972 	= warning_n (loc, OPT_Wrestrict, ncertain,
3973 		     "%qE argument %Z overlaps destination object %qE",
3974 		     "%qE arguments %Z overlap destination object %qE",
3975 		     info.func, aliasarg[0].address (), ncertain,
3976 		     info.dst_origin);
3977     }
3978   else if (npossible)
3979     {
3980       /* There is only one set of two or more arguments and they all
3981 	 may overlap (but need not).  */
3982       aliaswarn
3983 	= warning_n (loc, OPT_Wrestrict, npossible,
3984 		     "%qE argument %Z may overlap destination object %qE",
3985 		     "%qE arguments %Z may overlap destination object %qE",
3986 		     info.func, aliasarg[1].address (), npossible,
3987 		     info.dst_origin);
3988     }
3989 
3990   if (aliaswarn)
3991     {
3992       res->warned = true;
3993 
3994       if (info.dst_origin != arg0)
3995 	{
3996 	  /* If its location is different from the first argument of the call
3997 	     point either at the destination object itself or at the expression
3998 	     that was used to determine the overlap.  */
3999 	  loc = (DECL_P (info.dst_origin)
4000 		 ? DECL_SOURCE_LOCATION (info.dst_origin)
4001 		 : EXPR_LOCATION (info.dst_origin));
4002 	  if (loc != UNKNOWN_LOCATION)
4003 	    inform (loc,
4004 		    "destination object referenced by %<restrict%>-qualified "
4005 		    "argument 1 was declared here");
4006 	}
4007     }
4008 }
4009 
4010 /* Compute the length of the output resulting from the call to a formatted
4011    output function described by INFO and store the result of the call in
4012    *RES.  Issue warnings for detected past the end writes.  Return true
4013    if the complete format string has been processed and *RES can be relied
4014    on, false otherwise (e.g., when a unknown or unhandled directive was seen
4015    that caused the processing to be terminated early).  */
4016 
4017 static bool
compute_format_length(call_info & info,format_result * res,pointer_query & ptr_qry)4018 compute_format_length (call_info &info, format_result *res,
4019 		       pointer_query &ptr_qry)
4020 {
4021   if (dump_file)
4022     {
4023       location_t callloc = gimple_location (info.callstmt);
4024       fprintf (dump_file, "%s:%i: ",
4025 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
4026       print_generic_expr (dump_file, info.func, dump_flags);
4027 
4028       fprintf (dump_file,
4029 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
4030 	       ", fmtstr = \"%s\"\n",
4031 	       info.objsize, info.fmtstr);
4032     }
4033 
4034   /* Reset the minimum and maximum byte counters.  */
4035   res->range.min = res->range.max = 0;
4036 
4037   /* No directive has been seen yet so the length of output is bounded
4038      by the known range [0, 0] (with no conversion resulting in a failure
4039      or producing more than 4K bytes) until determined otherwise.  */
4040   res->knownrange = true;
4041   res->floating = false;
4042   res->warned = false;
4043 
4044   /* 1-based directive counter.  */
4045   unsigned dirno = 1;
4046 
4047   /* The variadic argument counter.  */
4048   unsigned argno = info.argidx;
4049 
4050   bool success = true;
4051 
4052   for (const char *pf = info.fmtstr; ; ++dirno)
4053     {
4054       directive dir (&info, dirno);
4055 
4056       size_t n = parse_directive (info, dir, res, pf, &argno, ptr_qry.rvals);
4057 
4058       /* Return failure if the format function fails.  */
4059       if (!format_directive (info, res, dir, ptr_qry))
4060 	return false;
4061 
4062       /* Return success when the directive is zero bytes long and it's
4063 	 the last thing in the format string (i.e., it's the terminating
4064 	 nul, which isn't really a directive but handling it as one makes
4065 	 things simpler).  */
4066       if (!n)
4067 	{
4068 	  success = *pf == '\0';
4069 	  break;
4070 	}
4071 
4072       pf += n;
4073     }
4074 
4075   maybe_warn_overlap (info, res);
4076 
4077   /* The complete format string was processed (with or without warnings).  */
4078   return success;
4079 }
4080 
4081 /* Return the size of the object referenced by the expression DEST in
4082    statement STMT, if available, or the maximum possible size otherwise.  */
4083 
4084 static unsigned HOST_WIDE_INT
get_destination_size(tree dest,gimple * stmt,pointer_query & ptr_qry)4085 get_destination_size (tree dest, gimple *stmt, pointer_query &ptr_qry)
4086 {
4087   /* When there is no destination return the maximum.  */
4088   if (!dest)
4089     return HOST_WIDE_INT_MAX;
4090 
4091   /* Use compute_objsize to determine the size of the destination object.  */
4092   access_ref aref;
4093   if (!ptr_qry.get_ref (dest, stmt, &aref))
4094     return HOST_WIDE_INT_MAX;
4095 
4096   offset_int remsize = aref.size_remaining ();
4097   if (!wi::fits_uhwi_p (remsize))
4098     return HOST_WIDE_INT_MAX;
4099 
4100   return remsize.to_uhwi ();
4101 }
4102 
4103 /* Return true if the call described by INFO with result RES safe to
4104    optimize (i.e., no undefined behavior), and set RETVAL to the range
4105    of its return values.  */
4106 
4107 static bool
is_call_safe(const call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])4108 is_call_safe (const call_info &info,
4109 	      const format_result &res, bool under4k,
4110 	      unsigned HOST_WIDE_INT retval[2])
4111 {
4112   if (under4k && !res.posunder4k)
4113     return false;
4114 
4115   /* The minimum return value.  */
4116   retval[0] = res.range.min;
4117 
4118   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
4119      but in cases involving multibyte characters could be as large as
4120      RES.RANGE.UNLIKELY.  */
4121   retval[1]
4122     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
4123 
4124   /* Adjust the number of bytes which includes the terminating nul
4125      to reflect the return value of the function which does not.
4126      Because the valid range of the function is [INT_MIN, INT_MAX],
4127      a valid range before the adjustment below is [0, INT_MAX + 1]
4128      (the functions only return negative values on error or undefined
4129      behavior).  */
4130   if (retval[0] <= target_int_max () + 1)
4131     --retval[0];
4132   if (retval[1] <= target_int_max () + 1)
4133     --retval[1];
4134 
4135   /* Avoid the return value optimization when the behavior of the call
4136      is undefined either because any directive may have produced 4K or
4137      more of output, or the return value exceeds INT_MAX, or because
4138      the output overflows the destination object (but leave it enabled
4139      when the function is bounded because then the behavior is well-
4140      defined).  */
4141   if (retval[0] == retval[1]
4142       && (info.bounded || retval[0] < info.objsize)
4143       && retval[0] <= target_int_max ())
4144     return true;
4145 
4146   if ((info.bounded || retval[1] < info.objsize)
4147       && (retval[0] < target_int_max ()
4148 	  && retval[1] < target_int_max ()))
4149     return true;
4150 
4151   if (!under4k && (info.bounded || retval[0] < info.objsize))
4152     return true;
4153 
4154   return false;
4155 }
4156 
4157 /* Given a suitable result RES of a call to a formatted output function
4158    described by INFO, substitute the result for the return value of
4159    the call.  The result is suitable if the number of bytes it represents
4160    is known and exact.  A result that isn't suitable for substitution may
4161    have its range set to the range of return values, if that is known.
4162    Return true if the call is removed and gsi_next should not be performed
4163    in the caller.  */
4164 
4165 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4166 try_substitute_return_value (gimple_stmt_iterator *gsi,
4167 			     const call_info &info,
4168 			     const format_result &res)
4169 {
4170   tree lhs = gimple_get_lhs (info.callstmt);
4171 
4172   /* Set to true when the entire call has been removed.  */
4173   bool removed = false;
4174 
4175   /* The minimum and maximum return value.  */
4176   unsigned HOST_WIDE_INT retval[2] = {0};
4177   bool safe = is_call_safe (info, res, true, retval);
4178 
4179   if (safe
4180       && retval[0] == retval[1]
4181       /* Not prepared to handle possibly throwing calls here; they shouldn't
4182 	 appear in non-artificial testcases, except when the __*_chk routines
4183 	 are badly declared.  */
4184       && !stmt_ends_bb_p (info.callstmt))
4185     {
4186       tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
4187 				retval[0]);
4188 
4189       if (lhs == NULL_TREE && info.nowrite)
4190 	{
4191 	  /* Remove the call to the bounded function with a zero size
4192 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
4193 	  unlink_stmt_vdef (info.callstmt);
4194 	  gsi_remove (gsi, true);
4195 	  removed = true;
4196 	}
4197       else if (info.nowrite)
4198 	{
4199 	  /* Replace the call to the bounded function with a zero size
4200 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
4201 	     of the function.  */
4202 	  gimplify_and_update_call_from_tree (gsi, cst);
4203 	  gimple *callstmt = gsi_stmt (*gsi);
4204 	  update_stmt (callstmt);
4205 	}
4206       else if (lhs)
4207 	{
4208 	  /* Replace the left-hand side of the call with the constant
4209 	     result of the formatted function.  */
4210 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
4211 	  gimple *g = gimple_build_assign (lhs, cst);
4212 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
4213 	  update_stmt (info.callstmt);
4214 	}
4215 
4216       if (dump_file)
4217 	{
4218 	  if (removed)
4219 	    fprintf (dump_file, "  Removing call statement.");
4220 	  else
4221 	    {
4222 	      fprintf (dump_file, "  Substituting ");
4223 	      print_generic_expr (dump_file, cst, dump_flags);
4224 	      fprintf (dump_file, " for %s.\n",
4225 		       info.nowrite ? "statement" : "return value");
4226 	    }
4227 	}
4228     }
4229   else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
4230     {
4231       bool setrange = false;
4232 
4233       if (safe
4234 	  && (info.bounded || retval[1] < info.objsize)
4235 	  && (retval[0] < target_int_max ()
4236 	      && retval[1] < target_int_max ()))
4237 	{
4238 	  /* If the result is in a valid range bounded by the size of
4239 	     the destination set it so that it can be used for subsequent
4240 	     optimizations.  */
4241 	  int prec = TYPE_PRECISION (integer_type_node);
4242 
4243 	  wide_int min = wi::shwi (retval[0], prec);
4244 	  wide_int max = wi::shwi (retval[1], prec);
4245 	  set_range_info (lhs, VR_RANGE, min, max);
4246 
4247 	  setrange = true;
4248 	}
4249 
4250       if (dump_file)
4251 	{
4252 	  const char *inbounds
4253 	    = (retval[0] < info.objsize
4254 	       ? (retval[1] < info.objsize
4255 		  ? "in" : "potentially out-of")
4256 	       : "out-of");
4257 
4258 	  const char *what = setrange ? "Setting" : "Discarding";
4259 	  if (retval[0] != retval[1])
4260 	    fprintf (dump_file,
4261 		     "  %s %s-bounds return value range ["
4262 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
4263 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
4264 		     what, inbounds, retval[0], retval[1]);
4265 	  else
4266 	    fprintf (dump_file, "  %s %s-bounds return value "
4267 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
4268 		     what, inbounds, retval[0]);
4269 	}
4270     }
4271 
4272   if (dump_file)
4273     fputc ('\n', dump_file);
4274 
4275   return removed;
4276 }
4277 
4278 /* Try to simplify a s{,n}printf call described by INFO with result
4279    RES by replacing it with a simpler and presumably more efficient
4280    call (such as strcpy).  */
4281 
4282 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4283 try_simplify_call (gimple_stmt_iterator *gsi,
4284 		   const call_info &info,
4285 		   const format_result &res)
4286 {
4287   unsigned HOST_WIDE_INT dummy[2];
4288   if (!is_call_safe (info, res, info.retval_used (), dummy))
4289     return false;
4290 
4291   switch (info.fncode)
4292     {
4293     case BUILT_IN_SNPRINTF:
4294       return gimple_fold_builtin_snprintf (gsi);
4295 
4296     case BUILT_IN_SPRINTF:
4297       return gimple_fold_builtin_sprintf (gsi);
4298 
4299     default:
4300       ;
4301     }
4302 
4303   return false;
4304 }
4305 
4306 /* Return the zero-based index of the format string argument of a printf
4307    like function and set *IDX_ARGS to the first format argument.  When
4308    no such index exists return UINT_MAX.  */
4309 
4310 static unsigned
get_user_idx_format(tree fndecl,unsigned * idx_args)4311 get_user_idx_format (tree fndecl, unsigned *idx_args)
4312 {
4313   tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
4314   if (!attrs)
4315     attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
4316 
4317   if (!attrs)
4318     return UINT_MAX;
4319 
4320   attrs = TREE_VALUE (attrs);
4321 
4322   tree archetype = TREE_VALUE (attrs);
4323   if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
4324     return UINT_MAX;
4325 
4326   attrs = TREE_CHAIN (attrs);
4327   tree fmtarg = TREE_VALUE (attrs);
4328 
4329   attrs = TREE_CHAIN (attrs);
4330   tree elliparg = TREE_VALUE (attrs);
4331 
4332   /* Attribute argument indices are 1-based but we use zero-based.  */
4333   *idx_args = tree_to_uhwi (elliparg) - 1;
4334   return tree_to_uhwi (fmtarg) - 1;
4335 }
4336 
4337 }   /* Unnamed namespace.  */
4338 
4339 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
4340    functions and if so, handle it.  Return true if the call is removed and
4341    gsi_next should not be performed in the caller.  */
4342 
4343 bool
handle_printf_call(gimple_stmt_iterator * gsi,pointer_query & ptr_qry)4344 handle_printf_call (gimple_stmt_iterator *gsi, pointer_query &ptr_qry)
4345 {
4346   init_target_to_host_charmap ();
4347 
4348   call_info info = call_info ();
4349 
4350   info.callstmt = gsi_stmt (*gsi);
4351   info.func = gimple_call_fndecl (info.callstmt);
4352   if (!info.func)
4353     return false;
4354 
4355   /* Format string argument number (valid for all functions).  */
4356   unsigned idx_format = UINT_MAX;
4357   if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4358     info.fncode = DECL_FUNCTION_CODE (info.func);
4359   else
4360     {
4361       unsigned idx_args;
4362       idx_format = get_user_idx_format (info.func, &idx_args);
4363       if (idx_format == UINT_MAX
4364 	  || idx_format >= gimple_call_num_args (info.callstmt)
4365 	  || idx_args > gimple_call_num_args (info.callstmt)
4366 	  || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
4367 							  idx_format))))
4368 	return false;
4369       info.fncode = BUILT_IN_NONE;
4370       info.argidx = idx_args;
4371     }
4372 
4373   /* The size of the destination as in snprintf(dest, size, ...).  */
4374   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
4375 
4376   /* The size of the destination determined by __builtin_object_size.  */
4377   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
4378 
4379   /* Zero-based buffer size argument number (snprintf and vsnprintf).  */
4380   unsigned idx_dstsize = UINT_MAX;
4381 
4382   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
4383   unsigned idx_objsize = UINT_MAX;
4384 
4385   /* Destinaton argument number (valid for sprintf functions only).  */
4386   unsigned idx_dstptr = 0;
4387 
4388   switch (info.fncode)
4389     {
4390     case BUILT_IN_NONE:
4391       // User-defined function with attribute format (printf).
4392       idx_dstptr = -1;
4393       break;
4394 
4395     case BUILT_IN_FPRINTF:
4396       // Signature:
4397       //   __builtin_fprintf (FILE*, format, ...)
4398       idx_format = 1;
4399       info.argidx = 2;
4400       idx_dstptr = -1;
4401       break;
4402 
4403     case BUILT_IN_FPRINTF_CHK:
4404       // Signature:
4405       //   __builtin_fprintf_chk (FILE*, ost, format, ...)
4406       idx_format = 2;
4407       info.argidx = 3;
4408       idx_dstptr = -1;
4409       break;
4410 
4411     case BUILT_IN_FPRINTF_UNLOCKED:
4412       // Signature:
4413       //   __builtin_fprintf_unnlocked (FILE*, format, ...)
4414       idx_format = 1;
4415       info.argidx = 2;
4416       idx_dstptr = -1;
4417       break;
4418 
4419     case BUILT_IN_PRINTF:
4420       // Signature:
4421       //   __builtin_printf (format, ...)
4422       idx_format = 0;
4423       info.argidx = 1;
4424       idx_dstptr = -1;
4425       break;
4426 
4427     case BUILT_IN_PRINTF_CHK:
4428       // Signature:
4429       //   __builtin_printf_chk (ost, format, ...)
4430       idx_format = 1;
4431       info.argidx = 2;
4432       idx_dstptr = -1;
4433       break;
4434 
4435     case BUILT_IN_PRINTF_UNLOCKED:
4436       // Signature:
4437       //   __builtin_printf (format, ...)
4438       idx_format = 0;
4439       info.argidx = 1;
4440       idx_dstptr = -1;
4441       break;
4442 
4443     case BUILT_IN_SPRINTF:
4444       // Signature:
4445       //   __builtin_sprintf (dst, format, ...)
4446       idx_format = 1;
4447       info.argidx = 2;
4448       break;
4449 
4450     case BUILT_IN_SPRINTF_CHK:
4451       // Signature:
4452       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
4453       idx_objsize = 2;
4454       idx_format = 3;
4455       info.argidx = 4;
4456       break;
4457 
4458     case BUILT_IN_SNPRINTF:
4459       // Signature:
4460       //   __builtin_snprintf (dst, size, format, ...)
4461       idx_dstsize = 1;
4462       idx_format = 2;
4463       info.argidx = 3;
4464       info.bounded = true;
4465       break;
4466 
4467     case BUILT_IN_SNPRINTF_CHK:
4468       // Signature:
4469       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
4470       idx_dstsize = 1;
4471       idx_objsize = 3;
4472       idx_format = 4;
4473       info.argidx = 5;
4474       info.bounded = true;
4475       break;
4476 
4477     case BUILT_IN_VFPRINTF:
4478       // Signature:
4479       //   __builtin_vprintf (FILE*, format, va_list)
4480       idx_format = 1;
4481       info.argidx = -1;
4482       idx_dstptr = -1;
4483       break;
4484 
4485     case BUILT_IN_VFPRINTF_CHK:
4486       // Signature:
4487       //   __builtin___vfprintf_chk (FILE*, ost, format, va_list)
4488       idx_format = 2;
4489       info.argidx = -1;
4490       idx_dstptr = -1;
4491       break;
4492 
4493     case BUILT_IN_VPRINTF:
4494       // Signature:
4495       //   __builtin_vprintf (format, va_list)
4496       idx_format = 0;
4497       info.argidx = -1;
4498       idx_dstptr = -1;
4499       break;
4500 
4501     case BUILT_IN_VPRINTF_CHK:
4502       // Signature:
4503       //   __builtin___vprintf_chk (ost, format, va_list)
4504       idx_format = 1;
4505       info.argidx = -1;
4506       idx_dstptr = -1;
4507       break;
4508 
4509     case BUILT_IN_VSNPRINTF:
4510       // Signature:
4511       //   __builtin_vsprintf (dst, size, format, va)
4512       idx_dstsize = 1;
4513       idx_format = 2;
4514       info.argidx = -1;
4515       info.bounded = true;
4516       break;
4517 
4518     case BUILT_IN_VSNPRINTF_CHK:
4519       // Signature:
4520       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
4521       idx_dstsize = 1;
4522       idx_objsize = 3;
4523       idx_format = 4;
4524       info.argidx = -1;
4525       info.bounded = true;
4526       break;
4527 
4528     case BUILT_IN_VSPRINTF:
4529       // Signature:
4530       //   __builtin_vsprintf (dst, format, va)
4531       idx_format = 1;
4532       info.argidx = -1;
4533       break;
4534 
4535     case BUILT_IN_VSPRINTF_CHK:
4536       // Signature:
4537       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4538       idx_format = 3;
4539       idx_objsize = 2;
4540       info.argidx = -1;
4541       break;
4542 
4543     default:
4544       return false;
4545     }
4546 
4547   /* Set the global warning level for this function.  */
4548   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4549 
4550   /* For all string functions the first argument is a pointer to
4551      the destination.  */
4552   tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4553 		 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4554 
4555   info.format = gimple_call_arg (info.callstmt, idx_format);
4556 
4557   /* True when the destination size is constant as opposed to the lower
4558      or upper bound of a range.  */
4559   bool dstsize_cst_p = true;
4560   bool posunder4k = true;
4561 
4562   if (idx_dstsize == UINT_MAX)
4563     {
4564       /* For non-bounded functions like sprintf, determine the size
4565 	 of the destination from the object or pointer passed to it
4566 	 as the first argument.  */
4567       dstsize = get_destination_size (dstptr, info.callstmt, ptr_qry);
4568     }
4569   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4570     {
4571       /* For bounded functions try to get the size argument.  */
4572 
4573       if (TREE_CODE (size) == INTEGER_CST)
4574 	{
4575 	  dstsize = tree_to_uhwi (size);
4576 	  /* No object can be larger than SIZE_MAX bytes (half the address
4577 	     space) on the target.
4578 	     The functions are defined only for output of at most INT_MAX
4579 	     bytes.  Specifying a bound in excess of that limit effectively
4580 	     defeats the bounds checking (and on some implementations such
4581 	     as Solaris cause the function to fail with EINVAL).  */
4582 	  if (dstsize > target_size_max () / 2)
4583 	    {
4584 	      /* Avoid warning if -Wstringop-overflow is specified since
4585 		 it also warns for the same thing though only for the
4586 		 checking built-ins.  */
4587 	      if ((idx_objsize == UINT_MAX
4588 		   || !warn_stringop_overflow))
4589 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4590 			    "specified bound %wu exceeds maximum object size "
4591 			    "%wu",
4592 			    dstsize, target_size_max () / 2);
4593 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4594 		 than INT_MAX.  Even though not all POSIX implementations
4595 		 conform to the requirement, avoid folding in this case.  */
4596 	      posunder4k = false;
4597 	    }
4598 	  else if (dstsize > target_int_max ())
4599 	    {
4600 	      warning_at (gimple_location (info.callstmt), info.warnopt (),
4601 			  "specified bound %wu exceeds %<INT_MAX%>",
4602 			  dstsize);
4603 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4604 		 than INT_MAX.  Avoid folding in that case.  */
4605 	      posunder4k = false;
4606 	    }
4607 	}
4608       else if (TREE_CODE (size) == SSA_NAME)
4609 	{
4610 	  /* Try to determine the range of values of the argument
4611 	     and use the greater of the two at level 1 and the smaller
4612 	     of them at level 2.  */
4613 	  value_range vr;
4614 	  ptr_qry.rvals->range_of_expr (vr, size, info.callstmt);
4615 
4616 	  if (!vr.undefined_p ())
4617 	    {
4618 	      tree type = TREE_TYPE (size);
4619 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
4620 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
4621 	      unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin);
4622 	      unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax);
4623 	      dstsize = warn_level < 2 ? maxsize : minsize;
4624 
4625 	      if (minsize > target_int_max ())
4626 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4627 			    "specified bound range [%wu, %wu] exceeds "
4628 			    "%<INT_MAX%>",
4629 			    minsize, maxsize);
4630 
4631 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4632 		 than INT_MAX.  Avoid folding if that's possible.  */
4633 	      if (maxsize > target_int_max ())
4634 		posunder4k = false;
4635 	    }
4636 
4637 	  /* The destination size is not constant.  If the function is
4638 	     bounded (e.g., snprintf) a lower bound of zero doesn't
4639 	     necessarily imply it can be eliminated.  */
4640 	  dstsize_cst_p = false;
4641 	}
4642     }
4643 
4644   if (idx_objsize != UINT_MAX)
4645     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4646       if (tree_fits_uhwi_p (size))
4647 	objsize = tree_to_uhwi (size);
4648 
4649   if (info.bounded && !dstsize)
4650     {
4651       /* As a special case, when the explicitly specified destination
4652 	 size argument (to a bounded function like snprintf) is zero
4653 	 it is a request to determine the number of bytes on output
4654 	 without actually producing any.  Pretend the size is
4655 	 unlimited in this case.  */
4656       info.objsize = HOST_WIDE_INT_MAX;
4657       info.nowrite = dstsize_cst_p;
4658     }
4659   else
4660     {
4661       /* For calls to non-bounded functions or to those of bounded
4662 	 functions with a non-zero size, warn if the destination
4663 	 pointer is null.  */
4664       if (dstptr && integer_zerop (dstptr))
4665 	{
4666 	  /* This is diagnosed with -Wformat only when the null is a constant
4667 	     pointer.  The warning here diagnoses instances where the pointer
4668 	     is not constant.  */
4669 	  location_t loc = gimple_location (info.callstmt);
4670 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4671 		      info.warnopt (), "null destination pointer");
4672 	  return false;
4673 	}
4674 
4675       /* Set the object size to the smaller of the two arguments
4676 	 of both have been specified and they're not equal.  */
4677       info.objsize = dstsize < objsize ? dstsize : objsize;
4678 
4679       if (info.bounded
4680 	  && dstsize < target_size_max () / 2 && objsize < dstsize
4681 	  /* Avoid warning if -Wstringop-overflow is specified since
4682 	     it also warns for the same thing though only for the
4683 	     checking built-ins.  */
4684 	  && (idx_objsize == UINT_MAX
4685 	      || !warn_stringop_overflow))
4686 	{
4687 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4688 		      "specified bound %wu exceeds the size %wu "
4689 		      "of the destination object", dstsize, objsize);
4690 	}
4691     }
4692 
4693   /* Determine if the format argument may be null and warn if not
4694      and if the argument is null.  */
4695   if (integer_zerop (info.format)
4696       && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4697     {
4698       location_t loc = gimple_location (info.callstmt);
4699       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4700 		  info.warnopt (), "null format string");
4701       return false;
4702     }
4703 
4704   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4705   if (!info.fmtstr)
4706     return false;
4707 
4708   if (warn_restrict)
4709     {
4710       /* Compute the origin of the destination pointer and its offset
4711 	 from the base object/pointer if possible.  */
4712       info.dst_offset = 0;
4713       info.dst_origin = get_origin_and_offset (dstptr, &info.dst_field,
4714 					       &info.dst_offset);
4715     }
4716 
4717   /* The result is the number of bytes output by the formatted function,
4718      including the terminating NUL.  */
4719   format_result res;
4720 
4721   /* I/O functions with no destination argument (i.e., all forms of fprintf
4722      and printf) may fail under any conditions.  Others (i.e., all forms of
4723      sprintf) may only fail under specific conditions determined for each
4724      directive.  Clear POSUNDER4K for the former set of functions and set
4725      it to true for the latter (it can only be cleared later, but it is
4726      never set to true again).  */
4727   res.posunder4k = posunder4k && dstptr;
4728 
4729   bool success = compute_format_length (info, &res, ptr_qry);
4730   if (res.warned)
4731     suppress_warning (info.callstmt, info.warnopt ());
4732 
4733   /* When optimizing and the printf return value optimization is enabled,
4734      attempt to substitute the computed result for the return value of
4735      the call.  Avoid this optimization when -frounding-math is in effect
4736      and the format string contains a floating point directive.  */
4737   bool call_removed = false;
4738   if (success && optimize > 0)
4739     {
4740       /* Save a copy of the iterator pointing at the call.  The iterator
4741 	 may change to point past the call in try_substitute_return_value
4742 	 but the original value is needed in try_simplify_call.  */
4743       gimple_stmt_iterator gsi_call = *gsi;
4744 
4745       if (flag_printf_return_value
4746 	  && (!flag_rounding_math || !res.floating))
4747 	call_removed = try_substitute_return_value (gsi, info, res);
4748 
4749       if (!call_removed)
4750 	try_simplify_call (&gsi_call, info, res);
4751     }
4752 
4753   return call_removed;
4754 }
4755