1 /* Copyright (C) 2016-2021 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rather on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to format known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "tree-cfg.h"
64 #include "tree-ssa-propagate.h"
65 #include "calls.h"
66 #include "cfgloop.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-ssa-loop.h"
69 #include "intl.h"
70 #include "langhooks.h"
71 
72 #include "attribs.h"
73 #include "builtins.h"
74 #include "pointer-query.h"
75 #include "stor-layout.h"
76 
77 #include "realmpfr.h"
78 #include "target.h"
79 
80 #include "cpplib.h"
81 #include "input.h"
82 #include "toplev.h"
83 #include "substring-locations.h"
84 #include "diagnostic.h"
85 #include "domwalk.h"
86 #include "alloc-pool.h"
87 #include "vr-values.h"
88 #include "tree-ssa-strlen.h"
89 #include "tree-dfa.h"
90 
91 /* The likely worst case value of MB_LEN_MAX for the target, large enough
92    for UTF-8.  Ideally, this would be obtained by a target hook if it were
93    to be used for optimization but it's good enough as is for warnings.  */
94 #define target_mb_len_max()   6
95 
96 /* The maximum number of bytes a single non-string directive can result
97    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
98    LDBL_MAX_10_EXP of 4932.  */
99 #define IEEE_MAX_10_EXP    4932
100 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
101 
102 namespace {
103 
104 /* Set to the warning level for the current function which is equal
105    either to warn_format_trunc for bounded functions or to
106    warn_format_overflow otherwise.  */
107 
108 static int warn_level;
109 
110 /* The minimum, maximum, likely, and unlikely maximum number of bytes
111    of output either a formatting function or an individual directive
112    can result in.  */
113 
114 struct result_range
115 {
116   /* The absolute minimum number of bytes.  The result of a successful
117      conversion is guaranteed to be no less than this.  (An erroneous
118      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
119   unsigned HOST_WIDE_INT min;
120   /* The likely maximum result that is used in diagnostics.  In most
121      cases MAX is the same as the worst case UNLIKELY result.  */
122   unsigned HOST_WIDE_INT max;
123   /* The likely result used to trigger diagnostics.  For conversions
124      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
125      in that range.  */
126   unsigned HOST_WIDE_INT likely;
127   /* In rare cases (e.g., for multibyte characters) UNLIKELY gives
128      the worst cases maximum result of a directive.  In most cases
129      UNLIKELY == MAX.  UNLIKELY is used to control the return value
130      optimization but not in diagnostics.  */
131   unsigned HOST_WIDE_INT unlikely;
132 };
133 
134 /* Return the value of INT_MIN for the target.  */
135 
136 static inline HOST_WIDE_INT
target_int_min()137 target_int_min ()
138 {
139   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
140 }
141 
142 /* Return the value of INT_MAX for the target.  */
143 
144 static inline unsigned HOST_WIDE_INT
target_int_max()145 target_int_max ()
146 {
147   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
148 }
149 
150 /* Return the value of SIZE_MAX for the target.  */
151 
152 static inline unsigned HOST_WIDE_INT
target_size_max()153 target_size_max ()
154 {
155   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
156 }
157 
158 /* A straightforward mapping from the execution character set to the host
159    character set indexed by execution character.  */
160 
161 static char target_to_host_charmap[256];
162 
163 /* Initialize a mapping from the execution character set to the host
164    character set.  */
165 
166 static bool
init_target_to_host_charmap()167 init_target_to_host_charmap ()
168 {
169   /* If the percent sign is non-zero the mapping has already been
170      initialized.  */
171   if (target_to_host_charmap['%'])
172     return true;
173 
174   /* Initialize the target_percent character (done elsewhere).  */
175   if (!init_target_chars ())
176     return false;
177 
178   /* The subset of the source character set used by printf conversion
179      specifications (strictly speaking, not all letters are used but
180      they are included here for the sake of simplicity).  The dollar
181      sign must be included even though it's not in the basic source
182      character set.  */
183   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
184     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
185 
186   /* Set the mapping for all characters to some ordinary value (i,e.,
187      not none used in printf conversion specifications) and overwrite
188      those that are used by conversion specifications with their
189      corresponding values.  */
190   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
191 
192   /* Are the two sets of characters the same?  */
193   bool all_same_p = true;
194 
195   for (const char *pc = srcset; *pc; ++pc)
196     {
197       /* Slice off the high end bits in case target characters are
198 	 signed.  All values are expected to be non-nul, otherwise
199 	 there's a problem.  */
200       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
201 	{
202 	  target_to_host_charmap[tc] = *pc;
203 	  if (tc != *pc)
204 	    all_same_p = false;
205 	}
206       else
207 	return false;
208 
209     }
210 
211   /* Set the first element to a non-zero value if the mapping
212      is 1-to-1, otherwise leave it clear (NUL is assumed to be
213      the same in both character sets).  */
214   target_to_host_charmap[0] = all_same_p;
215 
216   return true;
217 }
218 
219 /* Return the host source character corresponding to the character
220    CH in the execution character set if one exists, or some innocuous
221    (non-special, non-nul) source character otherwise.  */
222 
223 static inline unsigned char
target_to_host(unsigned char ch)224 target_to_host (unsigned char ch)
225 {
226   return target_to_host_charmap[ch];
227 }
228 
229 /* Convert an initial substring of the string TARGSTR consisting of
230    characters in the execution character set into a string in the
231    source character set on the host and store up to HOSTSZ characters
232    in the buffer pointed to by HOSTR.  Return HOSTR.  */
233 
234 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)235 target_to_host (char *hostr, size_t hostsz, const char *targstr)
236 {
237   /* Make sure the buffer is reasonably big.  */
238   gcc_assert (hostsz > 4);
239 
240   /* The interesting subset of source and execution characters are
241      the same so no conversion is necessary.  However, truncate
242      overlong strings just like the translated strings are.  */
243   if (target_to_host_charmap['\0'] == 1)
244     {
245       size_t len = strlen (targstr);
246       if (len >= hostsz)
247 	{
248 	  memcpy (hostr, targstr, hostsz - 4);
249 	  strcpy (hostr + hostsz - 4, "...");
250 	}
251       else
252 	memcpy (hostr, targstr, len + 1);
253       return hostr;
254     }
255 
256   /* Convert the initial substring of TARGSTR to the corresponding
257      characters in the host set, appending "..." if TARGSTR is too
258      long to fit.  Using the static buffer assumes the function is
259      not called in between sequence points (which it isn't).  */
260   for (char *ph = hostr; ; ++targstr)
261     {
262       *ph++ = target_to_host (*targstr);
263       if (!*targstr)
264 	break;
265 
266       if (size_t (ph - hostr) == hostsz)
267 	{
268 	  strcpy (ph - 4, "...");
269 	  break;
270 	}
271     }
272 
273   return hostr;
274 }
275 
276 /* Convert the sequence of decimal digits in the execution character
277    starting at *PS to a HOST_WIDE_INT, analogously to strtol.  Return
278    the result and set *PS to one past the last converted character.
279    On range error set ERANGE to the digit that caused it.  */
280 
281 static inline HOST_WIDE_INT
target_strtowi(const char ** ps,const char ** erange)282 target_strtowi (const char **ps, const char **erange)
283 {
284   unsigned HOST_WIDE_INT val = 0;
285   for ( ; ; ++*ps)
286     {
287       unsigned char c = target_to_host (**ps);
288       if (ISDIGIT (c))
289 	{
290 	  c -= '0';
291 
292 	  /* Check for overflow.  */
293 	  if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
294 	    {
295 	      val = HOST_WIDE_INT_MAX;
296 	      *erange = *ps;
297 
298 	      /* Skip the remaining digits.  */
299 	      do
300 		c = target_to_host (*++*ps);
301 	      while (ISDIGIT (c));
302 	      break;
303 	    }
304 	  else
305 	    val = val * 10 + c;
306 	}
307       else
308 	break;
309     }
310 
311   return val;
312 }
313 
314 /* Given FORMAT, set *PLOC to the source location of the format string
315    and return the format string if it is known or null otherwise.  */
316 
317 static const char*
get_format_string(tree format,location_t * ploc)318 get_format_string (tree format, location_t *ploc)
319 {
320   *ploc = EXPR_LOC_OR_LOC (format, input_location);
321 
322   return c_getstr (format);
323 }
324 
325 /* For convenience and brevity, shorter named entrypoints of
326    format_string_diagnostic_t::emit_warning_va and
327    format_string_diagnostic_t::emit_warning_n_va.
328    These have to be functions with the attribute so that exgettext
329    works properly.  */
330 
331 static bool
332 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,const char * gmsgid,...)333 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
334 	 const char *corrected_substring, opt_code opt,
335 	 const char *gmsgid, ...)
336 {
337   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
338 				   corrected_substring);
339   va_list ap;
340   va_start (ap, gmsgid);
341   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
342   va_end (ap);
343 
344   return warned;
345 }
346 
347 static bool
348 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,opt_code opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)349 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
350 	   const char *corrected_substring, opt_code opt,
351 	   unsigned HOST_WIDE_INT n,
352 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
353 {
354   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
355 				   corrected_substring);
356   va_list ap;
357   va_start (ap, plural_gmsgid);
358   bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
359 					&ap);
360   va_end (ap);
361 
362   return warned;
363 }
364 
365 /* Format length modifiers.  */
366 
367 enum format_lengths
368 {
369   FMT_LEN_none,
370   FMT_LEN_hh,    // char argument
371   FMT_LEN_h,     // short
372   FMT_LEN_l,     // long
373   FMT_LEN_ll,    // long long
374   FMT_LEN_L,     // long double (and GNU long long)
375   FMT_LEN_z,     // size_t
376   FMT_LEN_t,     // ptrdiff_t
377   FMT_LEN_j      // intmax_t
378 };
379 
380 
381 /* Description of the result of conversion either of a single directive
382    or the whole format string.  */
383 
384 class fmtresult
385 {
386 public:
387   /* Construct a FMTRESULT object with all counters initialized
388      to MIN.  KNOWNRANGE is set when MIN is valid.  */
389   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
argmin()390   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
391     knownrange (min < HOST_WIDE_INT_MAX),
392     mayfail (), nullp ()
393   {
394     range.min = min;
395     range.max = min;
396     range.likely = min;
397     range.unlikely = min;
398   }
399 
400   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
401      KNOWNRANGE is set when both MIN and MAX are valid.   */
402   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
403 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
argmin()404   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
405     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
406     mayfail (), nullp ()
407   {
408     range.min = min;
409     range.max = max;
410     range.likely = max < likely ? min : likely;
411     range.unlikely = max;
412   }
413 
414   /* Adjust result upward to reflect the RANGE of values the specified
415      width or precision is known to be in.  */
416   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
417 					    tree = NULL_TREE,
418 					    unsigned = 0, unsigned = 0);
419 
420   /* Return the maximum number of decimal digits a value of TYPE
421      formats as on output.  */
422   static unsigned type_max_digits (tree, int);
423 
424   /* The range a directive's argument is in.  */
425   tree argmin, argmax;
426 
427   /* The starting offset into the destination of the formatted function
428      call of the %s argument that points into (aliases with) the same
429      destination array.  */
430   HOST_WIDE_INT dst_offset;
431 
432   /* The minimum and maximum number of bytes that a directive
433      results in on output for an argument in the range above.  */
434   result_range range;
435 
436   /* Non-nul when the argument of a string directive is not a nul
437      terminated string.  */
438   tree nonstr;
439 
440   /* True when the range above is obtained from a known value of
441      a directive's argument or its bounds and not the result of
442      heuristics that depend on warning levels.  */
443   bool knownrange;
444 
445   /* True for a directive that may fail (such as wide character
446      directives).  */
447   bool mayfail;
448 
449   /* True when the argument is a null pointer.  */
450   bool nullp;
451 };
452 
453 /* Adjust result upward to reflect the range ADJUST of values the
454    specified width or precision is known to be in.  When non-null,
455    TYPE denotes the type of the directive whose result is being
456    adjusted, BASE gives the base of the directive (octal, decimal,
457    or hex), and ADJ denotes the additional adjustment to the LIKELY
458    counter that may need to be added when ADJUST is a range.  */
459 
460 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)461 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
462 					  tree type /* = NULL_TREE */,
463 					  unsigned base /* = 0 */,
464 					  unsigned adj /* = 0 */)
465 {
466   bool minadjusted = false;
467 
468   /* Adjust the minimum and likely counters.  */
469   if (adjust[0] >= 0)
470     {
471       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
472 	{
473 	  range.min = adjust[0];
474 	  minadjusted = true;
475 	}
476 
477       /* Adjust the likely counter.  */
478       if (range.likely < range.min)
479 	range.likely = range.min;
480     }
481   else if (adjust[0] == target_int_min ()
482 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
483     knownrange = false;
484 
485   /* Adjust the maximum counter.  */
486   if (adjust[1] > 0)
487     {
488       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
489 	{
490 	  range.max = adjust[1];
491 
492 	  /* Set KNOWNRANGE if both the minimum and maximum have been
493 	     adjusted.  Otherwise leave it at what it was before.  */
494 	  knownrange = minadjusted;
495 	}
496     }
497 
498   if (warn_level > 1 && type)
499     {
500       /* For large non-constant width or precision whose range spans
501 	 the maximum number of digits produced by the directive for
502 	 any argument, set the likely number of bytes to be at most
503 	 the number digits plus other adjustment determined by the
504 	 caller (one for sign or two for the hexadecimal "0x"
505 	 prefix).  */
506       unsigned dirdigs = type_max_digits (type, base);
507       if (adjust[0] < dirdigs && dirdigs < adjust[1]
508 	  && range.likely < dirdigs)
509 	range.likely = dirdigs + adj;
510     }
511   else if (range.likely < (range.min ? range.min : 1))
512     {
513       /* Conservatively, set LIKELY to at least MIN but no less than
514 	 1 unless MAX is zero.  */
515       range.likely = (range.min
516 		      ? range.min
517 		      : range.max && (range.max < HOST_WIDE_INT_MAX
518 				      || warn_level > 1) ? 1 : 0);
519     }
520 
521   /* Finally adjust the unlikely counter to be at least as large as
522      the maximum.  */
523   if (range.unlikely < range.max)
524     range.unlikely = range.max;
525 
526   return *this;
527 }
528 
529 /* Return the maximum number of digits a value of TYPE formats in
530    BASE on output, not counting base prefix .  */
531 
532 unsigned
type_max_digits(tree type,int base)533 fmtresult::type_max_digits (tree type, int base)
534 {
535   unsigned prec = TYPE_PRECISION (type);
536   switch (base)
537     {
538     case 8:
539       return (prec + 2) / 3;
540     case 10:
541       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
542 	 of 8, 16, 32, and 64 bits.  */
543       return prec * 301 / 1000 + 1;
544     case 16:
545       return prec / 4;
546     }
547 
548   gcc_unreachable ();
549 }
550 
551 static bool
552 get_int_range (tree, gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *,
553 	       bool, HOST_WIDE_INT, range_query *);
554 
555 struct call_info;
556 
557 /* Description of a format directive.  A directive is either a plain
558    string or a conversion specification that starts with '%'.  */
559 
560 struct directive
561 {
directivedirective562   directive (const call_info *inf, unsigned dno)
563     : info (inf), dirno (dno), argno (), beg (), len (), flags (),
564     width (), prec (),  modifier (), specifier (), arg (), fmtfunc ()
565   { }
566 
567   /* Reference to the info structure describing the call that this
568      directive is a part of.  */
569   const call_info *info;
570 
571   /* The 1-based directive number (for debugging).  */
572   unsigned dirno;
573 
574   /* The zero-based argument number of the directive's argument ARG in
575      the function's argument list.  */
576   unsigned argno;
577 
578   /* The first character of the directive and its length.  */
579   const char *beg;
580   size_t len;
581 
582   /* A bitmap of flags, one for each character.  */
583   unsigned flags[256 / sizeof (int)];
584 
585   /* The range of values of the specified width, or -1 if not specified.  */
586   HOST_WIDE_INT width[2];
587   /* The range of values of the specified precision, or -1 if not
588      specified.  */
589   HOST_WIDE_INT prec[2];
590 
591   /* Length modifier.  */
592   format_lengths modifier;
593 
594   /* Format specifier character.  */
595   char specifier;
596 
597   /* The argument of the directive or null when the directive doesn't
598      take one or when none is available (such as for vararg functions).  */
599   tree arg;
600 
601   /* Format conversion function that given a directive and an argument
602      returns the formatting result.  */
603   fmtresult (*fmtfunc) (const directive &, tree, range_query *);
604 
605   /* Return True when the format flag CHR has been used.  */
get_flagdirective606   bool get_flag (char chr) const
607   {
608     unsigned char c = chr & 0xff;
609     return (flags[c / (CHAR_BIT * sizeof *flags)]
610 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
611   }
612 
613   /* Make a record of the format flag CHR having been used.  */
set_flagdirective614   void set_flag (char chr)
615   {
616     unsigned char c = chr & 0xff;
617     flags[c / (CHAR_BIT * sizeof *flags)]
618       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
619   }
620 
621   /* Reset the format flag CHR.  */
clear_flagdirective622   void clear_flag (char chr)
623   {
624     unsigned char c = chr & 0xff;
625     flags[c / (CHAR_BIT * sizeof *flags)]
626       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
627   }
628 
629   /* Set both bounds of the width range to VAL.  */
set_widthdirective630   void set_width (HOST_WIDE_INT val)
631   {
632     width[0] = width[1] = val;
633   }
634 
635   /* Set the width range according to ARG, with both bounds being
636      no less than 0.  For a constant ARG set both bounds to its value
637      or 0, whichever is greater.  For a non-constant ARG in some range
638      set width to its range adjusting each bound to -1 if it's less.
639      For an indeterminate ARG set width to [0, INT_MAX].  */
640   void set_width (tree arg, range_query *);
641 
642   /* Set both bounds of the precision range to VAL.  */
set_precisiondirective643   void set_precision (HOST_WIDE_INT val)
644   {
645     prec[0] = prec[1] = val;
646   }
647 
648   /* Set the precision range according to ARG, with both bounds being
649      no less than -1.  For a constant ARG set both bounds to its value
650      or -1 whichever is greater.  For a non-constant ARG in some range
651      set precision to its range adjusting each bound to -1 if it's less.
652      For an indeterminate ARG set precision to [-1, INT_MAX].  */
653   void set_precision (tree arg, range_query *query);
654 
655   /* Return true if both width and precision are known to be
656      either constant or in some range, false otherwise.  */
known_width_and_precisiondirective657   bool known_width_and_precision () const
658   {
659     return ((width[1] < 0
660 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
661 	    && (prec[1] < 0
662 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
663   }
664 };
665 
666 /* The result of a call to a formatted function.  */
667 
668 struct format_result
669 {
format_resultformat_result670   format_result ()
671     : range (), aliases (), alias_count (), knownrange (), posunder4k (),
672     floating (), warned () { /* No-op.  */ }
673 
~format_resultformat_result674   ~format_result ()
675   {
676     XDELETEVEC (aliases);
677   }
678 
679   /* Range of characters written by the formatted function.
680      Setting the minimum to HOST_WIDE_INT_MAX disables all
681      length tracking for the remainder of the format string.  */
682   result_range range;
683 
684   struct alias_info
685   {
686     directive dir;          /* The directive that aliases the destination.  */
687     HOST_WIDE_INT offset;   /* The offset at which it aliases it.  */
688     result_range range;     /* The raw result of the directive.  */
689   };
690 
691   /* An array of directives whose pointer argument aliases a part
692      of the destination object of the formatted function.  */
693   alias_info *aliases;
694   unsigned alias_count;
695 
696   /* True when the range above is obtained from known values of
697      directive arguments, or bounds on the amount of output such
698      as width and precision, and not the result of  heuristics that
699      depend on warning levels.  It's used to issue stricter diagnostics
700      in cases where strings of unknown lengths are bounded by the arrays
701      they are determined to refer to.  KNOWNRANGE must not be used for
702      the return value optimization.  */
703   bool knownrange;
704 
705   /* True if no individual directive could fail or result in more than
706      4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
707      greater).  Implementations are not required to handle directives
708      that produce more than 4K bytes (leading to undefined behavior)
709      and so when one is found it disables the return value optimization.
710      Similarly, directives that can fail (such as wide character
711      directives) disable the optimization.  */
712   bool posunder4k;
713 
714   /* True when a floating point directive has been seen in the format
715      string.  */
716   bool floating;
717 
718   /* True when an intermediate result has caused a warning.  Used to
719      avoid issuing duplicate warnings while finishing the processing
720      of a call.  WARNED also disables the return value optimization.  */
721   bool warned;
722 
723   /* Preincrement the number of output characters by 1.  */
724   format_result& operator++ ()
725   {
726     return *this += 1;
727   }
728 
729   /* Postincrement the number of output characters by 1.  */
730   format_result operator++ (int)
731   {
732     format_result prev (*this);
733     *this += 1;
734     return prev;
735   }
736 
737   /* Increment the number of output characters by N.  */
738   format_result& operator+= (unsigned HOST_WIDE_INT);
739 
740   /* Add a directive to the sequence of those with potentially aliasing
741      arguments.  */
742   void append_alias (const directive &, HOST_WIDE_INT, const result_range &);
743 
744 private:
745   /* Not copyable or assignable.  */
746   format_result (format_result&);
747   void operator= (format_result&);
748 };
749 
750 format_result&
751 format_result::operator+= (unsigned HOST_WIDE_INT n)
752 {
753   gcc_assert (n < HOST_WIDE_INT_MAX);
754 
755   if (range.min < HOST_WIDE_INT_MAX)
756     range.min += n;
757 
758   if (range.max < HOST_WIDE_INT_MAX)
759     range.max += n;
760 
761   if (range.likely < HOST_WIDE_INT_MAX)
762     range.likely += n;
763 
764   if (range.unlikely < HOST_WIDE_INT_MAX)
765     range.unlikely += n;
766 
767   return *this;
768 }
769 
770 void
append_alias(const directive & d,HOST_WIDE_INT off,const result_range & resrng)771 format_result::append_alias (const directive &d, HOST_WIDE_INT off,
772 			     const result_range &resrng)
773 {
774   unsigned cnt = alias_count + 1;
775   alias_info *ar = XNEWVEC (alias_info, cnt);
776 
777   for (unsigned i = 0; i != alias_count; ++i)
778     ar[i] = aliases[i];
779 
780   ar[alias_count].dir = d;
781   ar[alias_count].offset = off;
782   ar[alias_count].range = resrng;
783 
784   XDELETEVEC (aliases);
785 
786   alias_count = cnt;
787   aliases = ar;
788 }
789 
790 /* Return the logarithm of X in BASE.  */
791 
792 static int
ilog(unsigned HOST_WIDE_INT x,int base)793 ilog (unsigned HOST_WIDE_INT x, int base)
794 {
795   int res = 0;
796   do
797     {
798       ++res;
799       x /= base;
800     } while (x);
801   return res;
802 }
803 
804 /* Return the number of bytes resulting from converting into a string
805    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
806    PLUS indicates whether 1 for a plus sign should be added for positive
807    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
808    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
809    be represented.  */
810 
811 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)812 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
813 {
814   unsigned HOST_WIDE_INT absval;
815 
816   HOST_WIDE_INT res;
817 
818   if (TYPE_UNSIGNED (TREE_TYPE (x)))
819     {
820       if (tree_fits_uhwi_p (x))
821 	{
822 	  absval = tree_to_uhwi (x);
823 	  res = plus;
824 	}
825       else
826 	return -1;
827     }
828   else
829     {
830       if (tree_fits_shwi_p (x))
831 	{
832 	  HOST_WIDE_INT i = tree_to_shwi (x);
833          if (HOST_WIDE_INT_MIN == i)
834            {
835              /* Avoid undefined behavior due to negating a minimum.  */
836              absval = HOST_WIDE_INT_MAX;
837              res = 1;
838            }
839          else if (i < 0)
840 	   {
841 	     absval = -i;
842 	     res = 1;
843 	   }
844 	 else
845 	   {
846 	     absval = i;
847 	     res = plus;
848 	   }
849 	}
850       else
851 	return -1;
852     }
853 
854   int ndigs = ilog (absval, base);
855 
856   res += prec < ndigs ? ndigs : prec;
857 
858   /* Adjust a non-zero value for the base prefix, either hexadecimal,
859      or, unless precision has resulted in a leading zero, also octal.  */
860   if (prefix && absval && (base == 16 || prec <= ndigs))
861     {
862       if (base == 8)
863 	res += 1;
864       else if (base == 16)
865 	res += 2;
866     }
867 
868   return res;
869 }
870 
871 /* Description of a call to a formatted function.  */
872 
873 struct call_info
874 {
875   /* Function call statement.  */
876   gimple *callstmt;
877 
878   /* Function called.  */
879   tree func;
880 
881   /* Called built-in function code.  */
882   built_in_function fncode;
883 
884   /* The "origin" of the destination pointer argument, which is either
885      the DECL of the destination buffer being written into or a pointer
886      that points to it, plus some offset.  */
887   tree dst_origin;
888 
889   /* For a destination pointing to a struct array member, the offset of
890      the member.  */
891   HOST_WIDE_INT dst_field;
892 
893   /* The offset into the destination buffer.  */
894   HOST_WIDE_INT dst_offset;
895 
896   /* Format argument and format string extracted from it.  */
897   tree format;
898   const char *fmtstr;
899 
900   /* The location of the format argument.  */
901   location_t fmtloc;
902 
903   /* The destination object size for __builtin___xxx_chk functions
904      typically determined by __builtin_object_size, or -1 if unknown.  */
905   unsigned HOST_WIDE_INT objsize;
906 
907   /* Number of the first variable argument.  */
908   unsigned HOST_WIDE_INT argidx;
909 
910   /* True for functions like snprintf that specify the size of
911      the destination, false for others like sprintf that don't.  */
912   bool bounded;
913 
914   /* True for bounded functions like snprintf that specify a zero-size
915      buffer as a request to compute the size of output without actually
916      writing any.  NOWRITE is cleared in response to the %n directive
917      which has side-effects similar to writing output.  */
918   bool nowrite;
919 
920   /* Return true if the called function's return value is used.  */
retval_usedcall_info921   bool retval_used () const
922   {
923     return gimple_get_lhs (callstmt);
924   }
925 
926   /* Return the warning option corresponding to the called function.  */
warnoptcall_info927   opt_code warnopt () const
928   {
929     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
930   }
931 
932   /* Return true for calls to file formatted functions.  */
is_file_funccall_info933   bool is_file_func () const
934   {
935     return (fncode == BUILT_IN_FPRINTF
936 	    || fncode == BUILT_IN_FPRINTF_CHK
937 	    || fncode == BUILT_IN_FPRINTF_UNLOCKED
938 	    || fncode == BUILT_IN_VFPRINTF
939 	    || fncode == BUILT_IN_VFPRINTF_CHK);
940   }
941 
942   /* Return true for calls to string formatted functions.  */
is_string_funccall_info943   bool is_string_func () const
944   {
945     return (fncode == BUILT_IN_SPRINTF
946 	    || fncode == BUILT_IN_SPRINTF_CHK
947 	    || fncode == BUILT_IN_SNPRINTF
948 	    || fncode == BUILT_IN_SNPRINTF_CHK
949 	    || fncode == BUILT_IN_VSPRINTF
950 	    || fncode == BUILT_IN_VSPRINTF_CHK
951 	    || fncode == BUILT_IN_VSNPRINTF
952 	    || fncode == BUILT_IN_VSNPRINTF_CHK);
953   }
954 };
955 
956 void
set_width(tree arg,range_query * query)957 directive::set_width (tree arg, range_query *query)
958 {
959   get_int_range (arg, info->callstmt, width, width + 1, true, 0, query);
960 }
961 
962 void
set_precision(tree arg,range_query * query)963 directive::set_precision (tree arg, range_query *query)
964 {
965   get_int_range (arg, info->callstmt, prec, prec + 1, false, -1, query);
966 }
967 
968 /* Return the result of formatting a no-op directive (such as '%n').  */
969 
970 static fmtresult
format_none(const directive &,tree,range_query *)971 format_none (const directive &, tree, range_query *)
972 {
973   fmtresult res (0);
974   return res;
975 }
976 
977 /* Return the result of formatting the '%%' directive.  */
978 
979 static fmtresult
format_percent(const directive &,tree,range_query *)980 format_percent (const directive &, tree, range_query *)
981 {
982   fmtresult res (1);
983   return res;
984 }
985 
986 
987 /* Compute intmax_type_node and uintmax_type_node similarly to how
988    tree.c builds size_type_node.  */
989 
990 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)991 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
992 {
993   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
994     {
995       *pintmax = integer_type_node;
996       *puintmax = unsigned_type_node;
997     }
998   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
999     {
1000       *pintmax = long_integer_type_node;
1001       *puintmax = long_unsigned_type_node;
1002     }
1003   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1004     {
1005       *pintmax = long_long_integer_type_node;
1006       *puintmax = long_long_unsigned_type_node;
1007     }
1008   else
1009     {
1010       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1011 	if (int_n_enabled_p[i])
1012 	  {
1013 	    char name[50], altname[50];
1014 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1015 	    sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
1016 
1017 	    if (strcmp (name, UINTMAX_TYPE) == 0
1018 		|| strcmp (altname, UINTMAX_TYPE) == 0)
1019 	      {
1020 	        *pintmax = int_n_trees[i].signed_type;
1021 	        *puintmax = int_n_trees[i].unsigned_type;
1022 		return;
1023 	      }
1024 	  }
1025       gcc_unreachable ();
1026     }
1027 }
1028 
1029 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1030    in and that is representable in type int.
1031    Return true when the range is a subrange of that of int.
1032    When ARG is null it is as if it had the full range of int.
1033    When ABSOLUTE is true the range reflects the absolute value of
1034    the argument.  When ABSOLUTE is false, negative bounds of
1035    the determined range are replaced with NEGBOUND.  */
1036 
1037 static bool
get_int_range(tree arg,gimple * stmt,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,range_query * query)1038 get_int_range (tree arg, gimple *stmt,
1039 	       HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1040 	       bool absolute, HOST_WIDE_INT negbound,
1041 	       range_query *query)
1042 {
1043   /* The type of the result.  */
1044   const_tree type = integer_type_node;
1045 
1046   bool knownrange = false;
1047 
1048   if (!arg)
1049     {
1050       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1051       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1052     }
1053   else if (TREE_CODE (arg) == INTEGER_CST
1054 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1055     {
1056       /* For a constant argument return its value adjusted as specified
1057 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1058 	 result is known.  */
1059       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1060       *pmax = *pmin;
1061       knownrange = true;
1062     }
1063   else
1064     {
1065       /* True if the argument's range cannot be determined.  */
1066       bool unknown = true;
1067 
1068       tree argtype = TREE_TYPE (arg);
1069 
1070       /* Ignore invalid arguments with greater precision that that
1071 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1072 	 They will have been detected and diagnosed by -Wformat and
1073 	 so it's not important to complicate this code to try to deal
1074 	 with them again.  */
1075       if (TREE_CODE (arg) == SSA_NAME
1076 	  && INTEGRAL_TYPE_P (argtype)
1077 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1078 	{
1079 	  /* Try to determine the range of values of the integer argument.  */
1080 	  value_range vr;
1081 	  query->range_of_expr (vr, arg, stmt);
1082 
1083 	  if (!vr.undefined_p () && !vr.varying_p ())
1084 	    {
1085 	      HOST_WIDE_INT type_min
1086 		= (TYPE_UNSIGNED (argtype)
1087 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1088 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1089 
1090 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1091 
1092 	      tree type = TREE_TYPE (arg);
1093 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
1094 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
1095 	      *pmin = TREE_INT_CST_LOW (tmin);
1096 	      *pmax = TREE_INT_CST_LOW (tmax);
1097 
1098 	      if (*pmin < *pmax)
1099 		{
1100 		  /* Return true if the adjusted range is a subrange of
1101 		     the full range of the argument's type.  *PMAX may
1102 		     be less than *PMIN when the argument is unsigned
1103 		     and its upper bound is in excess of TYPE_MAX.  In
1104 		     that (invalid) case disregard the range and use that
1105 		     of the expected type instead.  */
1106 		  knownrange = type_min < *pmin || *pmax < type_max;
1107 
1108 		  unknown = false;
1109 		}
1110 	    }
1111 	}
1112 
1113       /* Handle an argument with an unknown range as if none had been
1114 	 provided.  */
1115       if (unknown)
1116 	return get_int_range (NULL_TREE, NULL, pmin, pmax, absolute,
1117 			      negbound, query);
1118     }
1119 
1120   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1121   if (absolute)
1122     {
1123       if (*pmin < 0)
1124 	{
1125 	  if (*pmin == *pmax)
1126 	    *pmin = *pmax = -*pmin;
1127 	  else
1128 	    {
1129 	      /* Make sure signed overlow is avoided.  */
1130 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1131 
1132 	      HOST_WIDE_INT tmp = -*pmin;
1133 	      *pmin = 0;
1134 	      if (*pmax < tmp)
1135 		*pmax = tmp;
1136 	    }
1137 	}
1138     }
1139   else if (*pmin < negbound)
1140     *pmin = negbound;
1141 
1142   return knownrange;
1143 }
1144 
1145 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1146    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1147    the type of the directive's formal argument it's possible for both
1148    to result in the same number of bytes or a range of bytes that's
1149    less than the number of bytes that would result from formatting
1150    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1151    determined by checking for the actual argument being in the range
1152    of the type of the directive.  If it isn't it must be assumed to
1153    take on the full range of the directive's type.
1154    Return true when the range has been adjusted to the full range
1155    of DIRTYPE, and false otherwise.  */
1156 
1157 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1158 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1159 {
1160   tree argtype = TREE_TYPE (*argmin);
1161   unsigned argprec = TYPE_PRECISION (argtype);
1162   unsigned dirprec = TYPE_PRECISION (dirtype);
1163 
1164   /* If the actual argument and the directive's argument have the same
1165      precision and sign there can be no overflow and so there is nothing
1166      to adjust.  */
1167   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1168     return false;
1169 
1170   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1171      branch in the extract_range_from_unary_expr function in tree-vrp.c.  */
1172 
1173   if (TREE_CODE (*argmin) == INTEGER_CST
1174       && TREE_CODE (*argmax) == INTEGER_CST
1175       && (dirprec >= argprec
1176 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1177 					     int_const_binop (MINUS_EXPR,
1178 							      *argmax,
1179 							      *argmin),
1180 					     size_int (dirprec)))))
1181     {
1182       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1183       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1184 
1185       /* If *ARGMIN is still less than *ARGMAX the conversion above
1186 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1187       if (tree_int_cst_le (*argmin, *argmax))
1188 	return false;
1189     }
1190 
1191   *argmin = TYPE_MIN_VALUE (dirtype);
1192   *argmax = TYPE_MAX_VALUE (dirtype);
1193   return true;
1194 }
1195 
1196 /* Return a range representing the minimum and maximum number of bytes
1197    that the format directive DIR will output for any argument given
1198    the WIDTH and PRECISION (extracted from DIR).  This function is
1199    used when the directive argument or its value isn't known.  */
1200 
1201 static fmtresult
format_integer(const directive & dir,tree arg,range_query * query)1202 format_integer (const directive &dir, tree arg, range_query *query)
1203 {
1204   tree intmax_type_node;
1205   tree uintmax_type_node;
1206 
1207   /* Base to format the number in.  */
1208   int base;
1209 
1210   /* True when a conversion is preceded by a prefix indicating the base
1211      of the argument (octal or hexadecimal).  */
1212   bool maybebase = dir.get_flag ('#');
1213 
1214   /* True when a signed conversion is preceded by a sign or space.  */
1215   bool maybesign = false;
1216 
1217   /* True for signed conversions (i.e., 'd' and 'i').  */
1218   bool sign = false;
1219 
1220   switch (dir.specifier)
1221     {
1222     case 'd':
1223     case 'i':
1224       /* Space and '+' are  only meaningful for signed conversions.  */
1225       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1226       sign = true;
1227       base = 10;
1228       break;
1229     case 'u':
1230       base = 10;
1231       break;
1232     case 'o':
1233       base = 8;
1234       break;
1235     case 'X':
1236     case 'x':
1237       base = 16;
1238       break;
1239     default:
1240       gcc_unreachable ();
1241     }
1242 
1243   /* The type of the "formal" argument expected by the directive.  */
1244   tree dirtype = NULL_TREE;
1245 
1246   /* Determine the expected type of the argument from the length
1247      modifier.  */
1248   switch (dir.modifier)
1249     {
1250     case FMT_LEN_none:
1251       if (dir.specifier == 'p')
1252 	dirtype = ptr_type_node;
1253       else
1254 	dirtype = sign ? integer_type_node : unsigned_type_node;
1255       break;
1256 
1257     case FMT_LEN_h:
1258       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1259       break;
1260 
1261     case FMT_LEN_hh:
1262       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1263       break;
1264 
1265     case FMT_LEN_l:
1266       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1267       break;
1268 
1269     case FMT_LEN_L:
1270     case FMT_LEN_ll:
1271       dirtype = (sign
1272 		 ? long_long_integer_type_node
1273 		 : long_long_unsigned_type_node);
1274       break;
1275 
1276     case FMT_LEN_z:
1277       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1278       break;
1279 
1280     case FMT_LEN_t:
1281       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1282       break;
1283 
1284     case FMT_LEN_j:
1285       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1286       dirtype = sign ? intmax_type_node : uintmax_type_node;
1287       break;
1288 
1289     default:
1290       return fmtresult ();
1291     }
1292 
1293   /* The type of the argument to the directive, either deduced from
1294      the actual non-constant argument if one is known, or from
1295      the directive itself when none has been provided because it's
1296      a va_list.  */
1297   tree argtype = NULL_TREE;
1298 
1299   if (!arg)
1300     {
1301       /* When the argument has not been provided, use the type of
1302 	 the directive's argument as an approximation.  This will
1303 	 result in false positives for directives like %i with
1304 	 arguments with smaller precision (such as short or char).  */
1305       argtype = dirtype;
1306     }
1307   else if (TREE_CODE (arg) == INTEGER_CST)
1308     {
1309       /* When a constant argument has been provided use its value
1310 	 rather than type to determine the length of the output.  */
1311       fmtresult res;
1312 
1313       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1314 	{
1315 	  /* As a special case, a precision of zero with a zero argument
1316 	     results in zero bytes except in base 8 when the '#' flag is
1317 	     specified, and for signed conversions in base 8 and 10 when
1318 	     either the space or '+' flag has been specified and it results
1319 	     in just one byte (with width having the normal effect).  This
1320 	     must extend to the case of a specified precision with
1321 	     an unknown value because it can be zero.  */
1322 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1323 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1324 	    {
1325 	      res.range.max = 1;
1326 	      res.range.likely = 1;
1327 	    }
1328 	  else
1329 	    {
1330 	      res.range.max = res.range.min;
1331 	      res.range.likely = res.range.min;
1332 	    }
1333 	}
1334       else
1335 	{
1336 	  /* Convert the argument to the type of the directive.  */
1337 	  arg = fold_convert (dirtype, arg);
1338 
1339 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1340 				       maybesign, maybebase);
1341 	  if (dir.prec[0] == dir.prec[1])
1342 	    res.range.max = res.range.min;
1343 	  else
1344 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1345 					 maybesign, maybebase);
1346 	  res.range.likely = res.range.min;
1347 	  res.knownrange = true;
1348 	}
1349 
1350       res.range.unlikely = res.range.max;
1351 
1352       /* Bump up the counters if WIDTH is greater than LEN.  */
1353       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1354 					 (sign | maybebase) + (base == 16));
1355       /* Bump up the counters again if PRECision is greater still.  */
1356       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1357 					 (sign | maybebase) + (base == 16));
1358 
1359       return res;
1360     }
1361   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1362 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1363     /* Determine the type of the provided non-constant argument.  */
1364     argtype = TREE_TYPE (arg);
1365   else
1366     /* Don't bother with invalid arguments since they likely would
1367        have already been diagnosed, and disable any further checking
1368        of the format string by returning [-1, -1].  */
1369     return fmtresult ();
1370 
1371   fmtresult res;
1372 
1373   /* Using either the range the non-constant argument is in, or its
1374      type (either "formal" or actual), create a range of values that
1375      constrain the length of output given the warning level.  */
1376   tree argmin = NULL_TREE;
1377   tree argmax = NULL_TREE;
1378 
1379   if (arg
1380       && TREE_CODE (arg) == SSA_NAME
1381       && INTEGRAL_TYPE_P (argtype))
1382     {
1383       /* Try to determine the range of values of the integer argument
1384 	 (range information is not available for pointers).  */
1385       value_range vr;
1386       query->range_of_expr (vr, arg, dir.info->callstmt);
1387 
1388       if (!vr.varying_p () && !vr.undefined_p ())
1389 	{
1390 	  argmin = wide_int_to_tree (TREE_TYPE (arg), vr.lower_bound ());
1391 	  argmax = wide_int_to_tree (TREE_TYPE (arg), vr.upper_bound ());
1392 
1393 	  /* Set KNOWNRANGE if the argument is in a known subrange
1394 	     of the directive's type and neither width nor precision
1395 	     is unknown.  (KNOWNRANGE may be reset below).  */
1396 	  res.knownrange
1397 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1398 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1399 	       && dir.known_width_and_precision ());
1400 
1401 	  res.argmin = argmin;
1402 	  res.argmax = argmax;
1403 	}
1404       else
1405 	{
1406 	  /* The argument here may be the result of promoting the actual
1407 	     argument to int.  Try to determine the type of the actual
1408 	     argument before promotion and narrow down its range that
1409 	     way.  */
1410 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1411 	  if (is_gimple_assign (def))
1412 	    {
1413 	      tree_code code = gimple_assign_rhs_code (def);
1414 	      if (code == INTEGER_CST)
1415 		{
1416 		  arg = gimple_assign_rhs1 (def);
1417 		  return format_integer (dir, arg, query);
1418 		}
1419 
1420 	      if (code == NOP_EXPR)
1421 		{
1422 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1423 		  if (INTEGRAL_TYPE_P (type)
1424 		      || TREE_CODE (type) == POINTER_TYPE)
1425 		    argtype = type;
1426 		}
1427 	    }
1428 	}
1429     }
1430 
1431   if (!argmin)
1432     {
1433       if (TREE_CODE (argtype) == POINTER_TYPE)
1434 	{
1435 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1436 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1437 	}
1438       else
1439 	{
1440 	  argmin = TYPE_MIN_VALUE (argtype);
1441 	  argmax = TYPE_MAX_VALUE (argtype);
1442 	}
1443     }
1444 
1445   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1446      of the directive.  If it has been cleared then since ARGMIN and/or
1447      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1448      ARGMAX in the result to include in diagnostics.  */
1449   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1450     {
1451       res.knownrange = false;
1452       res.argmin = argmin;
1453       res.argmax = argmax;
1454     }
1455 
1456   /* Recursively compute the minimum and maximum from the known range.  */
1457   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1458     {
1459       /* For unsigned conversions/directives or signed when
1460 	 the minimum is positive, use the minimum and maximum to compute
1461 	 the shortest and longest output, respectively.  */
1462       res.range.min = format_integer (dir, argmin, query).range.min;
1463       res.range.max = format_integer (dir, argmax, query).range.max;
1464     }
1465   else if (tree_int_cst_sgn (argmax) < 0)
1466     {
1467       /* For signed conversions/directives if maximum is negative,
1468 	 use the minimum as the longest output and maximum as the
1469 	 shortest output.  */
1470       res.range.min = format_integer (dir, argmax, query).range.min;
1471       res.range.max = format_integer (dir, argmin, query).range.max;
1472     }
1473   else
1474     {
1475       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1476 	 as the shortest output and for the longest output compute the
1477 	 length of the output of both minimum and maximum and pick the
1478 	 longer.  */
1479       unsigned HOST_WIDE_INT max1
1480 	= format_integer (dir, argmin, query).range.max;
1481       unsigned HOST_WIDE_INT max2
1482 	= format_integer (dir, argmax, query).range.max;
1483       res.range.min
1484 	= format_integer (dir, integer_zero_node, query).range.min;
1485       res.range.max = MAX (max1, max2);
1486     }
1487 
1488   /* If the range is known, use the maximum as the likely length.  */
1489   if (res.knownrange)
1490     res.range.likely = res.range.max;
1491   else
1492     {
1493       /* Otherwise, use the minimum.  Except for the case where for %#x or
1494          %#o the minimum is just for a single value in the range (0) and
1495          for all other values it is something longer, like 0x1 or 01.
1496 	  Use the length for value 1 in that case instead as the likely
1497 	  length.  */
1498       res.range.likely = res.range.min;
1499       if (maybebase
1500 	  && base != 10
1501 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1502 	{
1503 	  if (res.range.min == 1)
1504 	    res.range.likely += base == 8 ? 1 : 2;
1505 	  else if (res.range.min == 2
1506 		   && base == 16
1507 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1508 	    ++res.range.likely;
1509 	}
1510     }
1511 
1512   res.range.unlikely = res.range.max;
1513   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1514 				     (sign | maybebase) + (base == 16));
1515   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1516 				     (sign | maybebase) + (base == 16));
1517 
1518   return res;
1519 }
1520 
1521 /* Return the number of bytes that a format directive consisting of FLAGS,
1522    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1523    would result for argument X under ideal conditions (i.e., if PREC
1524    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1525    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1526    This function works around those problems.  */
1527 
1528 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1529 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1530 			char spec, char rndspec)
1531 {
1532   char fmtstr[40];
1533 
1534   HOST_WIDE_INT len = strlen (flags);
1535 
1536   fmtstr[0] = '%';
1537   memcpy (fmtstr + 1, flags, len);
1538   memcpy (fmtstr + 1 + len, ".*R", 3);
1539   fmtstr[len + 4] = rndspec;
1540   fmtstr[len + 5] = spec;
1541   fmtstr[len + 6] = '\0';
1542 
1543   spec = TOUPPER (spec);
1544   if (spec == 'E' || spec == 'F')
1545     {
1546       /* For %e, specify the precision explicitly since mpfr_sprintf
1547 	 does its own thing just to be different (see MPFR bug 21088).  */
1548       if (prec < 0)
1549 	prec = 6;
1550     }
1551   else
1552     {
1553       /* Avoid passing negative precisions with larger magnitude to MPFR
1554 	 to avoid exposing its bugs.  (A negative precision is supposed
1555 	 to be ignored.)  */
1556       if (prec < 0)
1557 	prec = -1;
1558     }
1559 
1560   HOST_WIDE_INT p = prec;
1561 
1562   if (spec == 'G' && !strchr (flags, '#'))
1563     {
1564       /* For G/g without the pound flag, precision gives the maximum number
1565 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1566 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1567 	 should be more than sufficient for any real format.  */
1568       if ((IEEE_MAX_10_EXP * 2) < prec)
1569 	prec = IEEE_MAX_10_EXP * 2;
1570       p = prec;
1571     }
1572   else
1573     {
1574       /* Cap precision arbitrarily at 1KB and add the difference
1575 	 (if any) to the MPFR result.  */
1576       if (prec > 1024)
1577 	p = 1024;
1578     }
1579 
1580   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1581 
1582   /* Handle the unlikely (impossible?) error by returning more than
1583      the maximum dictated by the function's return type.  */
1584   if (len < 0)
1585     return target_dir_max () + 1;
1586 
1587   /* Adjust the return value by the difference.  */
1588   if (p < prec)
1589     len += prec - p;
1590 
1591   return len;
1592 }
1593 
1594 /* Return the number of bytes to format using the format specifier
1595    SPEC and the precision PREC the largest value in the real floating
1596    TYPE.  */
1597 
1598 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1599 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1600 {
1601   machine_mode mode = TYPE_MODE (type);
1602 
1603   /* IBM Extended mode.  */
1604   if (MODE_COMPOSITE_P (mode))
1605     mode = DFmode;
1606 
1607   /* Get the real type format description for the target.  */
1608   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1609   REAL_VALUE_TYPE rv;
1610 
1611   real_maxval (&rv, 0, mode);
1612 
1613   /* Convert the GCC real value representation with the precision
1614      of the real type to the mpfr_t format with the GCC default
1615      round-to-nearest mode.  */
1616   mpfr_t x;
1617   mpfr_init2 (x, rfmt->p);
1618   mpfr_from_real (x, &rv, MPFR_RNDN);
1619 
1620   /* Return a value one greater to account for the leading minus sign.  */
1621   unsigned HOST_WIDE_INT r
1622     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1623   mpfr_clear (x);
1624   return r;
1625 }
1626 
1627 /* Return a range representing the minimum and maximum number of bytes
1628    that the directive DIR will output for any argument.  PREC gives
1629    the adjusted precision range to account for negative precisions
1630    meaning the default 6.  This function is used when the directive
1631    argument or its value isn't known.  */
1632 
1633 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1634 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1635 {
1636   tree type;
1637 
1638   switch (dir.modifier)
1639     {
1640     case FMT_LEN_l:
1641     case FMT_LEN_none:
1642       type = double_type_node;
1643       break;
1644 
1645     case FMT_LEN_L:
1646       type = long_double_type_node;
1647       break;
1648 
1649     case FMT_LEN_ll:
1650       type = long_double_type_node;
1651       break;
1652 
1653     default:
1654       return fmtresult ();
1655     }
1656 
1657   /* The minimum and maximum number of bytes produced by the directive.  */
1658   fmtresult res;
1659 
1660   /* The minimum output as determined by flags.  It's always at least 1.
1661      When plus or space are set the output is preceded by either a sign
1662      or a space.  */
1663   unsigned flagmin = (1 /* for the first digit */
1664 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1665 
1666   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1667      for the plus sign/space with the '+' and ' ' flags, respectively,
1668      unless reduced below.  */
1669   res.range.min = 2 + flagmin;
1670 
1671   /* When the pound flag is set the decimal point is included in output
1672      regardless of precision.  Whether or not a decimal point is included
1673      otherwise depends on the specification and precision.  */
1674   bool radix = dir.get_flag ('#');
1675 
1676   switch (dir.specifier)
1677     {
1678     case 'A':
1679     case 'a':
1680       {
1681 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1682 	if (dir.prec[0] <= 0)
1683 	  minprec = 0;
1684 	else if (dir.prec[0] > 0)
1685 	  minprec = dir.prec[0] + !radix /* decimal point */;
1686 
1687 	res.range.likely = (2 /* 0x */
1688 			    + flagmin
1689 			    + radix
1690 			    + minprec
1691 			    + 3 /* p+0 */);
1692 
1693 	res.range.max = format_floating_max (type, 'a', prec[1]);
1694 
1695 	/* The unlikely maximum accounts for the longest multibyte
1696 	   decimal point character.  */
1697 	res.range.unlikely = res.range.max;
1698 	if (dir.prec[1] > 0)
1699 	  res.range.unlikely += target_mb_len_max () - 1;
1700 
1701 	break;
1702       }
1703 
1704     case 'E':
1705     case 'e':
1706       {
1707 	/* Minimum output attributable to precision and, when it's
1708 	   non-zero, decimal point.  */
1709 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1710 
1711 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1712 	   of the value of the actual argument.  */
1713 	res.range.likely = (flagmin
1714 			    + radix
1715 			    + minprec
1716 			    + 2 /* e+ */ + 2);
1717 
1718 	res.range.max = format_floating_max (type, 'e', prec[1]);
1719 
1720 	/* The unlikely maximum accounts for the longest multibyte
1721 	   decimal point character.  */
1722 	if (dir.prec[0] != dir.prec[1]
1723 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1724 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1725 	else
1726 	  res.range.unlikely = res.range.max;
1727 	break;
1728       }
1729 
1730     case 'F':
1731     case 'f':
1732       {
1733 	/* Minimum output attributable to precision and, when it's non-zero,
1734 	   decimal point.  */
1735 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1736 
1737 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1738 	   when precision isn't specified is 8 bytes ("1.23456" since
1739 	   precision is taken to be 6).  When precision is zero, the lower
1740 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1741 	   than zero, then the lower bound is 2 plus precision (plus flags).
1742 	   But in all cases, the lower bound is no greater than 3.  */
1743 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1744 	if (min < res.range.min)
1745 	  res.range.min = min;
1746 
1747 	/* Compute the upper bound for -TYPE_MAX.  */
1748 	res.range.max = format_floating_max (type, 'f', prec[1]);
1749 
1750 	/* The minimum output with unknown precision is a single byte
1751 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1752 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1753 	  res.range.likely = 3;
1754 	else
1755 	  res.range.likely = min;
1756 
1757 	/* The unlikely maximum accounts for the longest multibyte
1758 	   decimal point character.  */
1759 	if (dir.prec[0] != dir.prec[1]
1760 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1761 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1762 	break;
1763       }
1764 
1765     case 'G':
1766     case 'g':
1767       {
1768 	/* The %g output depends on precision and the exponent of
1769 	   the argument.  Since the value of the argument isn't known
1770 	   the lower bound on the range of bytes (not counting flags
1771 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1772 	   "%g" and "%#g", respectively, with a zero argument).  */
1773 	unsigned HOST_WIDE_INT min = flagmin + radix;
1774 	if (min < res.range.min)
1775 	  res.range.min = min;
1776 
1777 	char spec = 'g';
1778 	HOST_WIDE_INT maxprec = dir.prec[1];
1779 	if (radix && maxprec)
1780 	  {
1781 	    /* When the pound flag (radix) is set, trailing zeros aren't
1782 	       trimmed and so the longest output is the same as for %e,
1783 	       except with precision minus 1 (as specified in C11).  */
1784 	    spec = 'e';
1785 	    if (maxprec > 0)
1786 	      --maxprec;
1787 	    else if (maxprec < 0)
1788 	      maxprec = 5;
1789 	  }
1790 	else
1791 	  maxprec = prec[1];
1792 
1793 	res.range.max = format_floating_max (type, spec, maxprec);
1794 
1795 	/* The likely output is either the maximum computed above
1796 	   minus 1 (assuming the maximum is positive) when precision
1797 	   is known (or unspecified), or the same minimum as for %e
1798 	   (which is computed for a non-negative argument).  Unlike
1799 	   for the other specifiers above the likely output isn't
1800 	   the minimum because for %g that's 1 which is unlikely.  */
1801 	if (dir.prec[1] < 0
1802 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1803 	  res.range.likely = res.range.max - 1;
1804 	else
1805 	  {
1806 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1807 	    res.range.likely = (flagmin
1808 				+ radix
1809 				+ minprec
1810 				+ 2 /* e+ */ + 2);
1811 	  }
1812 
1813 	/* The unlikely maximum accounts for the longest multibyte
1814 	   decimal point character.  */
1815 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1816 	break;
1817       }
1818 
1819     default:
1820       return fmtresult ();
1821     }
1822 
1823   /* Bump up the byte counters if WIDTH is greater.  */
1824   res.adjust_for_width_or_precision (dir.width);
1825   return res;
1826 }
1827 
1828 /* Return a range representing the minimum and maximum number of bytes
1829    that the directive DIR will write on output for the floating argument
1830    ARG.  */
1831 
1832 static fmtresult
format_floating(const directive & dir,tree arg,range_query *)1833 format_floating (const directive &dir, tree arg, range_query *)
1834 {
1835   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1836   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1837 	       ? long_double_type_node : double_type_node);
1838 
1839   /* For an indeterminate precision the lower bound must be assumed
1840      to be zero.  */
1841   if (TOUPPER (dir.specifier) == 'A')
1842     {
1843       /* Get the number of fractional decimal digits needed to represent
1844 	 the argument without a loss of accuracy.  */
1845       unsigned fmtprec
1846 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1847 
1848       /* The precision of the IEEE 754 double format is 53.
1849 	 The precision of all other GCC binary double formats
1850 	 is 56 or less.  */
1851       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1852 
1853       /* For %a, leave the minimum precision unspecified to let
1854 	 MFPR trim trailing zeros (as it and many other systems
1855 	 including Glibc happen to do) and set the maximum
1856 	 precision to reflect what it would be with trailing zeros
1857 	 present (as Solaris and derived systems do).  */
1858       if (dir.prec[1] < 0)
1859 	{
1860 	  /* Both bounds are negative implies that precision has
1861 	     not been specified.  */
1862 	  prec[0] = maxprec;
1863 	  prec[1] = -1;
1864 	}
1865       else if (dir.prec[0] < 0)
1866 	{
1867 	  /* With a negative lower bound and a non-negative upper
1868 	     bound set the minimum precision to zero and the maximum
1869 	     to the greater of the maximum precision (i.e., with
1870 	     trailing zeros present) and the specified upper bound.  */
1871 	  prec[0] = 0;
1872 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1873 	}
1874     }
1875   else if (dir.prec[0] < 0)
1876     {
1877       if (dir.prec[1] < 0)
1878 	{
1879 	  /* A precision in a strictly negative range is ignored and
1880 	     the default of 6 is used instead.  */
1881 	  prec[0] = prec[1] = 6;
1882 	}
1883       else
1884 	{
1885 	  /* For a precision in a partly negative range, the lower bound
1886 	     must be assumed to be zero and the new upper bound is the
1887 	     greater of 6 (the default precision used when the specified
1888 	     precision is negative) and the upper bound of the specified
1889 	     range.  */
1890 	  prec[0] = 0;
1891 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1892 	}
1893     }
1894 
1895   if (!arg
1896       || TREE_CODE (arg) != REAL_CST
1897       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1898     return format_floating (dir, prec);
1899 
1900   /* The minimum and maximum number of bytes produced by the directive.  */
1901   fmtresult res;
1902 
1903   /* Get the real type format description for the target.  */
1904   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1905   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1906 
1907   if (!real_isfinite (rvp))
1908     {
1909       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1910 	 and "[-]nan" with the choice being implementation-defined
1911 	 but not locale dependent.  */
1912       bool sign = dir.get_flag ('+') || real_isneg (rvp);
1913       res.range.min = 3 + sign;
1914 
1915       res.range.likely = res.range.min;
1916       res.range.max = res.range.min;
1917       /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1918 	 For NaN, the C/POSIX standards specify two formats:
1919 	   "[-/+]nan"
1920 	 and
1921 	   "[-/+]nan(n-char-sequence)"
1922 	 No known printf implementation outputs the latter format but AIX
1923 	 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1924 	 so the unlikely maximum reflects that.  */
1925       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1926 
1927       /* The range for infinity and NaN is known unless either width
1928 	 or precision is unknown.  Width has the same effect regardless
1929 	 of whether the argument is finite.  Precision is either ignored
1930 	 (e.g., Glibc) or can have an effect on the short vs long format
1931 	 such as inf/infinity (e.g., Solaris).  */
1932       res.knownrange = dir.known_width_and_precision ();
1933 
1934       /* Adjust the range for width but ignore precision.  */
1935       res.adjust_for_width_or_precision (dir.width);
1936 
1937       return res;
1938     }
1939 
1940   char fmtstr [40];
1941   char *pfmt = fmtstr;
1942 
1943   /* Append flags.  */
1944   for (const char *pf = "-+ #0"; *pf; ++pf)
1945     if (dir.get_flag (*pf))
1946       *pfmt++ = *pf;
1947 
1948   *pfmt = '\0';
1949 
1950   {
1951     /* Set up an array to easily iterate over.  */
1952     unsigned HOST_WIDE_INT* const minmax[] = {
1953       &res.range.min, &res.range.max
1954     };
1955 
1956     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1957       {
1958 	/* Convert the GCC real value representation with the precision
1959 	   of the real type to the mpfr_t format rounding down in the
1960 	   first iteration that computes the minimum and up in the second
1961 	   that computes the maximum.  This order is arbitrary because
1962 	   rounding in either direction can result in longer output.  */
1963 	mpfr_t mpfrval;
1964 	mpfr_init2 (mpfrval, rfmt->p);
1965 	mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1966 
1967 	/* Use the MPFR rounding specifier to round down in the first
1968 	   iteration and then up.  In most but not all cases this will
1969 	   result in the same number of bytes.  */
1970 	char rndspec = "DU"[i];
1971 
1972 	/* Format it and store the result in the corresponding member
1973 	   of the result struct.  */
1974 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1975 					     dir.specifier, rndspec);
1976 	mpfr_clear (mpfrval);
1977       }
1978   }
1979 
1980   /* Make sure the minimum is less than the maximum (MPFR rounding
1981      in the call to mpfr_snprintf can result in the reverse.  */
1982   if (res.range.max < res.range.min)
1983     {
1984       unsigned HOST_WIDE_INT tmp = res.range.min;
1985       res.range.min = res.range.max;
1986       res.range.max = tmp;
1987     }
1988 
1989   /* The range is known unless either width or precision is unknown.  */
1990   res.knownrange = dir.known_width_and_precision ();
1991 
1992   /* For the same floating point constant, unless width or precision
1993      is unknown, use the longer output as the likely maximum since
1994      with round to nearest either is equally likely.  Otherwise, when
1995      precision is unknown, use the greater of the minimum and 3 as
1996      the likely output (for "0.0" since zero precision is unlikely).  */
1997   if (res.knownrange)
1998     res.range.likely = res.range.max;
1999   else if (res.range.min < 3
2000 	   && dir.prec[0] < 0
2001 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2002     res.range.likely = 3;
2003   else
2004     res.range.likely = res.range.min;
2005 
2006   res.range.unlikely = res.range.max;
2007 
2008   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2009     {
2010       /* Unless the precision is zero output longer than 2 bytes may
2011 	 include the decimal point which must be a single character
2012 	 up to MB_LEN_MAX in length.  This is overly conservative
2013 	 since in some conversions some constants result in no decimal
2014 	 point (e.g., in %g).  */
2015       res.range.unlikely += target_mb_len_max () - 1;
2016     }
2017 
2018   res.adjust_for_width_or_precision (dir.width);
2019   return res;
2020 }
2021 
2022 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2023    strings referenced by the expression STR, or (-1, -1) when not known.
2024    Used by the format_string function below.  */
2025 
2026 static fmtresult
get_string_length(tree str,gimple * stmt,unsigned HOST_WIDE_INT max_size,unsigned eltsize,range_query * query)2027 get_string_length (tree str, gimple *stmt, unsigned HOST_WIDE_INT max_size,
2028 		   unsigned eltsize, range_query *query)
2029 {
2030   if (!str)
2031     return fmtresult ();
2032 
2033   /* Try to determine the dynamic string length first.
2034      Set MAXBOUND to an arbitrary non-null non-integer node as a request
2035      to have it set to the length of the longest string in a PHI.  */
2036   c_strlen_data lendata = { };
2037   lendata.maxbound = str;
2038   if (eltsize == 1)
2039     get_range_strlen_dynamic (str, stmt, &lendata, query);
2040   else
2041     {
2042       /* Determine the length of the shortest and longest string referenced
2043 	 by STR.  Strings of unknown lengths are bounded by the sizes of
2044 	 arrays that subexpressions of STR may refer to.  Pointers that
2045 	 aren't known to point any such arrays result in LENDATA.MAXLEN
2046 	 set to SIZE_MAX.  */
2047       get_range_strlen (str, &lendata, eltsize);
2048     }
2049 
2050   /* If LENDATA.MAXBOUND is not equal to .MINLEN it corresponds to the bound
2051      of the largest array STR refers to, if known, or it's set to SIZE_MAX
2052      otherwise.  */
2053 
2054   /* Return the default result when nothing is known about the string.  */
2055   if ((lendata.maxbound && !tree_fits_uhwi_p (lendata.maxbound))
2056       || !tree_fits_uhwi_p (lendata.maxlen))
2057     {
2058       fmtresult res;
2059       res.nonstr = lendata.decl;
2060       return res;
2061     }
2062 
2063   unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2064   if (integer_zerop (lendata.minlen)
2065       && (!lendata.maxbound || lenmax <= tree_to_uhwi (lendata.maxbound))
2066       && lenmax <= tree_to_uhwi (lendata.maxlen))
2067     {
2068       if (max_size > 0 && max_size < HOST_WIDE_INT_MAX)
2069 	{
2070 	  /* Adjust the conservative unknown/unbounded result if MAX_SIZE
2071 	     is valid.  Set UNLIKELY to maximum in case MAX_SIZE refers
2072 	     to a subobject.
2073 	     TODO: This is overly conservative.  Set UNLIKELY to the size
2074 	     of the outermost enclosing declared object.  */
2075 	  fmtresult res (0, max_size - 1);
2076 	  res.nonstr = lendata.decl;
2077 	  res.range.likely = res.range.max;
2078 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2079 	  return res;
2080 	}
2081 
2082       fmtresult res;
2083       res.nonstr = lendata.decl;
2084       return res;
2085     }
2086 
2087   HOST_WIDE_INT min
2088     = (tree_fits_uhwi_p (lendata.minlen)
2089        ? tree_to_uhwi (lendata.minlen)
2090        : 0);
2091 
2092   HOST_WIDE_INT max
2093     = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2094        ? tree_to_uhwi (lendata.maxbound)
2095        : HOST_WIDE_INT_M1U);
2096 
2097   const bool unbounded = integer_all_onesp (lendata.maxlen);
2098 
2099   /* Set the max/likely counters to unbounded when a minimum is known
2100      but the maximum length isn't bounded.  This implies that STR is
2101      a conditional expression involving a string of known length and
2102      an expression of unknown/unbounded length.  */
2103   if (min
2104       && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2105       && unbounded)
2106     max = HOST_WIDE_INT_M1U;
2107 
2108   /* get_range_strlen() returns the target value of SIZE_MAX for
2109      strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2110      which may be bigger.  */
2111   if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2112     min = HOST_WIDE_INT_M1U;
2113   if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2114     max = HOST_WIDE_INT_M1U;
2115 
2116   fmtresult res (min, max);
2117   res.nonstr = lendata.decl;
2118 
2119   /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2120      by STR are known to be bounded (though not necessarily by their
2121      actual length but perhaps by their maximum possible length).  */
2122   if (res.range.max < target_int_max ())
2123     {
2124       res.knownrange = true;
2125       /* When the length of the longest string is known and not
2126 	 excessive use it as the likely length of the string(s).  */
2127       res.range.likely = res.range.max;
2128     }
2129   else
2130     {
2131       /* When the upper bound is unknown (it can be zero or excessive)
2132 	 set the likely length to the greater of 1.  If MAXBOUND is
2133 	 known, also reset the length of the lower bound to zero.  */
2134       res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2135       if (lendata.maxbound && !integer_all_onesp (lendata.maxbound))
2136 	res.range.min = 0;
2137     }
2138 
2139   res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2140 
2141   return res;
2142 }
2143 
2144 /* Return the minimum and maximum number of characters formatted
2145    by the '%c' format directives and its wide character form for
2146    the argument ARG.  ARG can be null (for functions such as
2147    vsprinf).  */
2148 
2149 static fmtresult
format_character(const directive & dir,tree arg,range_query * query)2150 format_character (const directive &dir, tree arg, range_query *query)
2151 {
2152   fmtresult res;
2153 
2154   res.knownrange = true;
2155 
2156   if (dir.specifier == 'C'
2157       || dir.modifier == FMT_LEN_l)
2158     {
2159       /* A wide character can result in as few as zero bytes.  */
2160       res.range.min = 0;
2161 
2162       HOST_WIDE_INT min, max;
2163       if (get_int_range (arg, dir.info->callstmt, &min, &max, false, 0, query))
2164 	{
2165 	  if (min == 0 && max == 0)
2166 	    {
2167 	      /* The NUL wide character results in no bytes.  */
2168 	      res.range.max = 0;
2169 	      res.range.likely = 0;
2170 	      res.range.unlikely = 0;
2171 	    }
2172 	  else if (min >= 0 && min < 128)
2173 	    {
2174 	      /* Be conservative if the target execution character set
2175 		 is not a 1-to-1 mapping to the source character set or
2176 		 if the source set is not ASCII.  */
2177 	      bool one_2_one_ascii
2178 		= (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2179 
2180 	      /* A wide character in the ASCII range most likely results
2181 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2182 	      res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2183 	      res.range.likely = 1;
2184 	      res.range.unlikely = target_mb_len_max ();
2185 	      res.mayfail = !one_2_one_ascii;
2186 	    }
2187 	  else
2188 	    {
2189 	      /* A wide character outside the ASCII range likely results
2190 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2191 	      res.range.max = target_mb_len_max ();
2192 	      res.range.likely = 2;
2193 	      res.range.unlikely = res.range.max;
2194 	      /* Converting such a character may fail.  */
2195 	      res.mayfail = true;
2196 	    }
2197 	}
2198       else
2199 	{
2200 	  /* An unknown wide character is treated the same as a wide
2201 	     character outside the ASCII range.  */
2202 	  res.range.max = target_mb_len_max ();
2203 	  res.range.likely = 2;
2204 	  res.range.unlikely = res.range.max;
2205 	  res.mayfail = true;
2206 	}
2207     }
2208   else
2209     {
2210       /* A plain '%c' directive.  Its output is exactly 1.  */
2211       res.range.min = res.range.max = 1;
2212       res.range.likely = res.range.unlikely = 1;
2213       res.knownrange = true;
2214     }
2215 
2216   /* Bump up the byte counters if WIDTH is greater.  */
2217   return res.adjust_for_width_or_precision (dir.width);
2218 }
2219 
2220 /* If TYPE is an array or struct or union, increment *FLDOFF by the starting
2221    offset of the member that *OFF point into and set *FLDSIZE to its size
2222    in bytes and decrement *OFF by the same.  Otherwise do nothing.  */
2223 
2224 static void
set_aggregate_size_and_offset(tree type,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2225 set_aggregate_size_and_offset (tree type, HOST_WIDE_INT *fldoff,
2226 			       HOST_WIDE_INT *fldsize, HOST_WIDE_INT *off)
2227 {
2228   /* The byte offset of the most basic struct member the byte
2229      offset *OFF corresponds to, or for a (multidimensional)
2230      array member, the byte offset of the array element.  */
2231   if (TREE_CODE (type) == ARRAY_TYPE
2232       && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
2233     {
2234       HOST_WIDE_INT index = 0, arrsize = 0;
2235       if (array_elt_at_offset (type, *off, &index, &arrsize))
2236 	{
2237 	  *fldoff += index;
2238 	  *off -= index;
2239 	  *fldsize = arrsize;
2240 	}
2241     }
2242   else if (RECORD_OR_UNION_TYPE_P (type))
2243     {
2244       HOST_WIDE_INT index = 0;
2245       tree sub = field_at_offset (type, NULL_TREE, *off, &index);
2246       if (sub)
2247 	{
2248 	  tree subsize = DECL_SIZE_UNIT (sub);
2249 	  if (*fldsize < HOST_WIDE_INT_MAX
2250 	      && subsize
2251 	      && tree_fits_uhwi_p (subsize))
2252 	    *fldsize = tree_to_uhwi (subsize);
2253 	  else
2254 	    *fldsize = HOST_WIDE_INT_MAX;
2255 	  *fldoff += index;
2256 	  *off -= index;
2257 	}
2258     }
2259 }
2260 
2261 /* For an expression X of pointer type, recursively try to find the same
2262    origin (object or pointer) as Y it references and return such a Y.
2263    When X refers to an array element or struct member, set *FLDOFF to
2264    the offset of the element or member from the beginning of the "most
2265    derived" object and *FLDSIZE to its size.  When nonnull, set *OFF to
2266    the overall offset from the beginning of the object so that
2267    *FLDOFF <= *OFF.  */
2268 
2269 static tree
get_origin_and_offset_r(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * fldsize,HOST_WIDE_INT * off)2270 get_origin_and_offset_r (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *fldsize,
2271 			 HOST_WIDE_INT *off)
2272 {
2273   if (!x)
2274     return NULL_TREE;
2275 
2276   HOST_WIDE_INT sizebuf = -1;
2277   if (!fldsize)
2278     fldsize = &sizebuf;
2279 
2280   if (DECL_P (x))
2281     {
2282       /* Set the size if it hasn't been set yet.  */
2283       if (tree size = DECL_SIZE_UNIT (x))
2284 	if (*fldsize < 0 && tree_fits_shwi_p (size))
2285 	  *fldsize = tree_to_shwi (size);
2286       return x;
2287     }
2288 
2289   switch (TREE_CODE (x))
2290     {
2291     case ADDR_EXPR:
2292       x = TREE_OPERAND (x, 0);
2293       return get_origin_and_offset_r (x, fldoff, fldsize, off);
2294 
2295     case ARRAY_REF:
2296       {
2297 	tree offset = TREE_OPERAND (x, 1);
2298 	HOST_WIDE_INT idx = (tree_fits_uhwi_p (offset)
2299 			     ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2300 
2301 	tree eltype = TREE_TYPE (x);
2302 	if (TREE_CODE (eltype) == INTEGER_TYPE)
2303 	  {
2304 	    if (off)
2305 	      *off = idx;
2306 	  }
2307 	else if (idx < HOST_WIDE_INT_MAX)
2308 	  *fldoff += idx * int_size_in_bytes (eltype);
2309 	else
2310 	  *fldoff = idx;
2311 
2312 	x = TREE_OPERAND (x, 0);
2313 	return get_origin_and_offset_r (x, fldoff, fldsize, nullptr);
2314       }
2315 
2316     case MEM_REF:
2317       if (off)
2318 	{
2319 	  tree offset = TREE_OPERAND (x, 1);
2320 	  *off = (tree_fits_uhwi_p (offset)
2321 		  ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2322 	}
2323 
2324       x = TREE_OPERAND (x, 0);
2325 
2326       if (off)
2327 	{
2328 	  tree xtype
2329 	    = (TREE_CODE (x) == ADDR_EXPR
2330 	       ? TREE_TYPE (TREE_OPERAND (x, 0)) : TREE_TYPE (TREE_TYPE (x)));
2331 
2332 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2333 	}
2334 
2335       return get_origin_and_offset_r (x, fldoff, fldsize, nullptr);
2336 
2337     case COMPONENT_REF:
2338       {
2339 	tree fld = TREE_OPERAND (x, 1);
2340 	*fldoff += int_byte_position (fld);
2341 
2342 	get_origin_and_offset_r (fld, fldoff, fldsize, off);
2343 	x = TREE_OPERAND (x, 0);
2344 	return get_origin_and_offset_r (x, fldoff, nullptr, off);
2345       }
2346 
2347     case SSA_NAME:
2348       {
2349 	gimple *def = SSA_NAME_DEF_STMT (x);
2350 	if (is_gimple_assign (def))
2351 	  {
2352 	    tree_code code = gimple_assign_rhs_code (def);
2353 	    if (code == ADDR_EXPR)
2354 	      {
2355 		x = gimple_assign_rhs1 (def);
2356 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2357 	      }
2358 
2359 	    if (code == POINTER_PLUS_EXPR)
2360 	      {
2361 		tree offset = gimple_assign_rhs2 (def);
2362 		if (off && tree_fits_uhwi_p (offset))
2363 		  *off = tree_to_uhwi (offset);
2364 
2365 		x = gimple_assign_rhs1 (def);
2366 		x = get_origin_and_offset_r (x, fldoff, fldsize, off);
2367 		if (off && !tree_fits_uhwi_p (offset))
2368 		  *off = HOST_WIDE_INT_MAX;
2369 		if (off)
2370 		  {
2371 		    tree xtype = TREE_TYPE (x);
2372 		    set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2373 		  }
2374 		return x;
2375 	      }
2376 	    else if (code == VAR_DECL)
2377 	      {
2378 		x = gimple_assign_rhs1 (def);
2379 		return get_origin_and_offset_r (x, fldoff, fldsize, off);
2380 	      }
2381 	  }
2382 	else if (gimple_nop_p (def) && SSA_NAME_VAR (x))
2383 	  x = SSA_NAME_VAR (x);
2384 
2385 	tree xtype = TREE_TYPE (x);
2386 	if (POINTER_TYPE_P (xtype))
2387 	  xtype = TREE_TYPE (xtype);
2388 
2389 	if (off)
2390 	  set_aggregate_size_and_offset (xtype, fldoff, fldsize, off);
2391       }
2392 
2393     default:
2394       break;
2395     }
2396 
2397   return x;
2398 }
2399 
2400 /* Nonrecursive version of the above.  */
2401 
2402 static tree
2403 get_origin_and_offset (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *off,
2404 		       HOST_WIDE_INT *fldsize = nullptr)
2405 {
2406   HOST_WIDE_INT sizebuf;
2407   if (!fldsize)
2408     fldsize = &sizebuf;
2409 
2410   *fldsize = -1;
2411 
2412   *fldoff = *off = *fldsize = 0;
2413   tree orig = get_origin_and_offset_r (x, fldoff, fldsize, off);
2414   if (!orig)
2415     return NULL_TREE;
2416 
2417   if (!*fldoff && *off == *fldsize)
2418     {
2419       *fldoff = *off;
2420       *off = 0;
2421     }
2422 
2423   return orig;
2424 }
2425 
2426 /* If ARG refers to the same (sub)object or array element as described
2427    by DST and DST_FLD, return the byte offset into the struct member or
2428    array element referenced by ARG and set *ARG_SIZE to the size of
2429    the (sub)object.  Otherwise return HOST_WIDE_INT_MIN to indicate
2430    that ARG and DST do not refer to the same object.  */
2431 
2432 static HOST_WIDE_INT
alias_offset(tree arg,HOST_WIDE_INT * arg_size,tree dst,HOST_WIDE_INT dst_fld)2433 alias_offset (tree arg, HOST_WIDE_INT *arg_size,
2434 	      tree dst, HOST_WIDE_INT dst_fld)
2435 {
2436   /* See if the argument refers to the same base object as the destination
2437      of the formatted function call, and if so, try to determine if they
2438      can alias.  */
2439   if (!arg || !dst || !ptr_derefs_may_alias_p (arg, dst))
2440     return HOST_WIDE_INT_MIN;
2441 
2442   /* The two arguments may refer to the same object.  If they both refer
2443      to a struct member, see if the members are one and the same.  */
2444   HOST_WIDE_INT arg_off = 0, arg_fld = 0;
2445 
2446   tree arg_orig = get_origin_and_offset (arg, &arg_fld, &arg_off, arg_size);
2447 
2448   if (arg_orig == dst && arg_fld == dst_fld)
2449     return arg_off;
2450 
2451   return HOST_WIDE_INT_MIN;
2452 }
2453 
2454 /* Return the minimum and maximum number of characters formatted
2455    by the '%s' format directive and its wide character form for
2456    the argument ARG.  ARG can be null (for functions such as
2457    vsprinf).  */
2458 
2459 static fmtresult
format_string(const directive & dir,tree arg,range_query * query)2460 format_string (const directive &dir, tree arg, range_query *query)
2461 {
2462   fmtresult res;
2463 
2464   /* The size of the (sub)object ARG refers to.  Used to adjust
2465      the conservative get_string_length() result.  */
2466   HOST_WIDE_INT arg_size = 0;
2467 
2468   if (warn_restrict)
2469     {
2470       /* See if ARG might alias the destination of the call with
2471 	 DST_ORIGIN and DST_FIELD.  If so, store the starting offset
2472 	 so that the overlap can be determined for certain later,
2473 	 when the amount of output of the call (including subsequent
2474 	 directives) has been computed.  Otherwise, store HWI_MIN.  */
2475       res.dst_offset = alias_offset (arg, &arg_size, dir.info->dst_origin,
2476 				     dir.info->dst_field);
2477       if (res.dst_offset >= 0 && res.dst_offset <= arg_size)
2478 	arg_size -= res.dst_offset;
2479       else
2480 	arg_size = 0;
2481     }
2482 
2483   /* Compute the range the argument's length can be in.  */
2484   int count_by = 1;
2485   if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2486     {
2487       /* Get a node for a C type that will be the same size
2488 	 as a wchar_t on the target.  */
2489       tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2490 
2491       /* Now that we have a suitable node, get the number of
2492 	 bytes it occupies.  */
2493       count_by = int_size_in_bytes (node);
2494       gcc_checking_assert (count_by == 2 || count_by == 4);
2495     }
2496 
2497   fmtresult slen =
2498     get_string_length (arg, dir.info->callstmt, arg_size, count_by, query);
2499   if (slen.range.min == slen.range.max
2500       && slen.range.min < HOST_WIDE_INT_MAX)
2501     {
2502       /* The argument is either a string constant or it refers
2503 	 to one of a number of strings of the same length.  */
2504 
2505       /* A '%s' directive with a string argument with constant length.  */
2506       res.range = slen.range;
2507 
2508       if (dir.specifier == 'S'
2509 	  || dir.modifier == FMT_LEN_l)
2510 	{
2511 	  /* In the worst case the length of output of a wide string S
2512 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2513 	  res.range.max *= target_mb_len_max ();
2514 	  res.range.unlikely = res.range.max;
2515 	  /* It's likely that the total length is not more that
2516 	     2 * wcslen (S).*/
2517 	  res.range.likely = res.range.min * 2;
2518 
2519 	  if (dir.prec[1] >= 0
2520 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2521 	    {
2522 	      res.range.max = dir.prec[1];
2523 	      res.range.likely = dir.prec[1];
2524 	      res.range.unlikely = dir.prec[1];
2525 	    }
2526 
2527 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2528 	    res.range.min = 0;
2529 	  else if (dir.prec[0] >= 0)
2530 	    res.range.likely = dir.prec[0];
2531 
2532 	  /* Even a non-empty wide character string need not convert into
2533 	     any bytes.  */
2534 	  res.range.min = 0;
2535 
2536 	  /* A non-empty wide character conversion may fail.  */
2537 	  if (slen.range.max > 0)
2538 	    res.mayfail = true;
2539 	}
2540       else
2541 	{
2542 	  res.knownrange = true;
2543 
2544 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2545 	    res.range.min = 0;
2546 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2547 	    res.range.min = dir.prec[0];
2548 
2549 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2550 	    {
2551 	      res.range.max = dir.prec[1];
2552 	      res.range.likely = dir.prec[1];
2553 	      res.range.unlikely = dir.prec[1];
2554 	    }
2555 	}
2556     }
2557   else if (arg && integer_zerop (arg))
2558     {
2559       /* Handle null pointer argument.  */
2560 
2561       fmtresult res (0);
2562       res.nullp = true;
2563       return res;
2564     }
2565   else
2566     {
2567       /* For a '%s' and '%ls' directive with a non-constant string (either
2568 	 one of a number of strings of known length or an unknown string)
2569 	 the minimum number of characters is lesser of PRECISION[0] and
2570 	 the length of the shortest known string or zero, and the maximum
2571 	 is the lesser of the length of the longest known string or
2572 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2573 	 the minimum at level 1 and the greater of the minimum and 1
2574 	 at level 2.  This result is adjust upward for width (if it's
2575 	 specified).  */
2576 
2577       if (dir.specifier == 'S'
2578 	  || dir.modifier == FMT_LEN_l)
2579 	{
2580 	  /* A wide character converts to as few as zero bytes.  */
2581 	  slen.range.min = 0;
2582 	  if (slen.range.max < target_int_max ())
2583 	    slen.range.max *= target_mb_len_max ();
2584 
2585 	  if (slen.range.likely < target_int_max ())
2586 	    slen.range.likely *= 2;
2587 
2588 	  if (slen.range.likely < target_int_max ())
2589 	    slen.range.unlikely *= target_mb_len_max ();
2590 
2591 	  /* A non-empty wide character conversion may fail.  */
2592 	  if (slen.range.max > 0)
2593 	    res.mayfail = true;
2594 	}
2595 
2596       res.range = slen.range;
2597 
2598       if (dir.prec[0] >= 0)
2599 	{
2600 	  /* Adjust the minimum to zero if the string length is unknown,
2601 	     or at most the lower bound of the precision otherwise.  */
2602 	  if (slen.range.min >= target_int_max ())
2603 	    res.range.min = 0;
2604 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2605 	    res.range.min = dir.prec[0];
2606 
2607 	  /* Make both maxima no greater than the upper bound of precision.  */
2608 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2609 	      || slen.range.max >= target_int_max ())
2610 	    {
2611 	      res.range.max = dir.prec[1];
2612 	      res.range.unlikely = dir.prec[1];
2613 	    }
2614 
2615 	  /* If precision is constant, set the likely counter to the lesser
2616 	     of it and the maximum string length.  Otherwise, if the lower
2617 	     bound of precision is greater than zero, set the likely counter
2618 	     to the minimum.  Otherwise set it to zero or one based on
2619 	     the warning level.  */
2620 	  if (dir.prec[0] == dir.prec[1])
2621 	    res.range.likely
2622 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2623 		 ? dir.prec[0] : slen.range.max);
2624 	  else if (dir.prec[0] > 0)
2625 	    res.range.likely = res.range.min;
2626 	  else
2627 	    res.range.likely = warn_level > 1;
2628 	}
2629       else if (dir.prec[1] >= 0)
2630 	{
2631 	  res.range.min = 0;
2632 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2633 	    res.range.max = dir.prec[1];
2634 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2635 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2636 	    res.range.unlikely = dir.prec[1];
2637 	}
2638       else if (slen.range.min >= target_int_max ())
2639 	{
2640 	  res.range.min = 0;
2641 	  res.range.max = HOST_WIDE_INT_MAX;
2642 	  /* At level 1 strings of unknown length are assumed to be
2643 	     empty, while at level 1 they are assumed to be one byte
2644 	     long.  */
2645 	  res.range.likely = warn_level > 1;
2646 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2647 	}
2648       else
2649 	{
2650 	  /* A string of unknown length unconstrained by precision is
2651 	     assumed to be empty at level 1 and just one character long
2652 	     at higher levels.  */
2653 	  if (res.range.likely >= target_int_max ())
2654 	    res.range.likely = warn_level > 1;
2655 	}
2656     }
2657 
2658   /* If the argument isn't a nul-terminated string and the number
2659      of bytes on output isn't bounded by precision, set NONSTR.  */
2660   if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2661     res.nonstr = slen.nonstr;
2662 
2663   /* Bump up the byte counters if WIDTH is greater.  */
2664   return res.adjust_for_width_or_precision (dir.width);
2665 }
2666 
2667 /* Format plain string (part of the format string itself).  */
2668 
2669 static fmtresult
format_plain(const directive & dir,tree,range_query *)2670 format_plain (const directive &dir, tree, range_query *)
2671 {
2672   fmtresult res (dir.len);
2673   return res;
2674 }
2675 
2676 /* Return true if the RESULT of a directive in a call describe by INFO
2677    should be diagnosed given the AVAILable space in the destination.  */
2678 
2679 static bool
should_warn_p(const call_info & info,const result_range & avail,const result_range & result)2680 should_warn_p (const call_info &info,
2681 	       const result_range &avail, const result_range &result)
2682 {
2683   if (result.max <= avail.min)
2684     {
2685       /* The least amount of space remaining in the destination is big
2686 	 enough for the longest output.  */
2687       return false;
2688     }
2689 
2690   if (info.bounded)
2691     {
2692       if (warn_format_trunc == 1 && result.min <= avail.max
2693 	  && info.retval_used ())
2694 	{
2695 	  /* The likely amount of space remaining in the destination is big
2696 	     enough for the least output and the return value is used.  */
2697 	  return false;
2698 	}
2699 
2700       if (warn_format_trunc == 1 && result.likely <= avail.likely
2701 	  && !info.retval_used ())
2702 	{
2703 	  /* The likely amount of space remaining in the destination is big
2704 	     enough for the likely output and the return value is unused.  */
2705 	  return false;
2706 	}
2707 
2708       if (warn_format_trunc == 2
2709 	  && result.likely <= avail.min
2710 	  && (result.max <= avail.min
2711 	      || result.max > HOST_WIDE_INT_MAX))
2712 	{
2713 	  /* The minimum amount of space remaining in the destination is big
2714 	     enough for the longest output.  */
2715 	  return false;
2716 	}
2717     }
2718   else
2719     {
2720       if (warn_level == 1 && result.likely <= avail.likely)
2721 	{
2722 	  /* The likely amount of space remaining in the destination is big
2723 	     enough for the likely output.  */
2724 	  return false;
2725 	}
2726 
2727       if (warn_level == 2
2728 	  && result.likely <= avail.min
2729 	  && (result.max <= avail.min
2730 	      || result.max > HOST_WIDE_INT_MAX))
2731 	{
2732 	  /* The minimum amount of space remaining in the destination is big
2733 	     enough for the longest output.  */
2734 	  return false;
2735 	}
2736     }
2737 
2738   return true;
2739 }
2740 
2741 /* At format string location describe by DIRLOC in a call described
2742    by INFO, issue a warning for a directive DIR whose output may be
2743    in excess of the available space AVAIL_RANGE in the destination
2744    given the formatting result FMTRES.  This function does nothing
2745    except decide whether to issue a warning for a possible write
2746    past the end or truncation and, if so, format the warning.
2747    Return true if a warning has been issued.  */
2748 
2749 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2750 maybe_warn (substring_loc &dirloc, location_t argloc,
2751 	    const call_info &info,
2752 	    const result_range &avail_range, const result_range &res,
2753 	    const directive &dir)
2754 {
2755   if (!should_warn_p (info, avail_range, res))
2756     return false;
2757 
2758   /* A warning will definitely be issued below.  */
2759 
2760   /* The maximum byte count to reference in the warning.  Larger counts
2761      imply that the upper bound is unknown (and could be anywhere between
2762      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2763      than "between N and X" where X is some huge number.  */
2764   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2765 
2766   /* True when there is enough room in the destination for the least
2767      amount of a directive's output but not enough for its likely or
2768      maximum output.  */
2769   bool maybe = (res.min <= avail_range.max
2770 		&& (avail_range.min < res.likely
2771 		    || (res.max < HOST_WIDE_INT_MAX
2772 			&& avail_range.min < res.max)));
2773 
2774   /* Buffer for the directive in the host character set (used when
2775      the source character set is different).  */
2776   char hostdir[32];
2777 
2778   if (avail_range.min == avail_range.max)
2779     {
2780       /* The size of the destination region is exact.  */
2781       unsigned HOST_WIDE_INT navail = avail_range.max;
2782 
2783       if (target_to_host (*dir.beg) != '%')
2784 	{
2785 	  /* For plain character directives (i.e., the format string itself)
2786 	     but not others, point the caret at the first character that's
2787 	     past the end of the destination.  */
2788 	  if (navail < dir.len)
2789 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2790 	}
2791 
2792       if (*dir.beg == '\0')
2793 	{
2794 	  /* This is the terminating nul.  */
2795 	  gcc_assert (res.min == 1 && res.min == res.max);
2796 
2797 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2798 			  info.bounded
2799 			  ? (maybe
2800 			     ? G_("%qE output may be truncated before the "
2801 				  "last format character")
2802 			     : G_("%qE output truncated before the last "
2803 				  "format character"))
2804 			  : (maybe
2805 			     ? G_("%qE may write a terminating nul past the "
2806 				  "end of the destination")
2807 			     : G_("%qE writing a terminating nul past the "
2808 				  "end of the destination")),
2809 			  info.func);
2810 	}
2811 
2812       if (res.min == res.max)
2813 	{
2814 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2815 	  if (!info.bounded)
2816 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2817 			      "%<%.*s%> directive writing %wu byte into a "
2818 			      "region of size %wu",
2819 			      "%<%.*s%> directive writing %wu bytes into a "
2820 			      "region of size %wu",
2821 			      (int) dir.len, d, res.min, navail);
2822 	  else if (maybe)
2823 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2824 			      "%<%.*s%> directive output may be truncated "
2825 			      "writing %wu byte into a region of size %wu",
2826 			      "%<%.*s%> directive output may be truncated "
2827 			      "writing %wu bytes into a region of size %wu",
2828 			      (int) dir.len, d, res.min, navail);
2829 	  else
2830 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2831 			      "%<%.*s%> directive output truncated writing "
2832 			      "%wu byte into a region of size %wu",
2833 			      "%<%.*s%> directive output truncated writing "
2834 			      "%wu bytes into a region of size %wu",
2835 			      (int) dir.len, d, res.min, navail);
2836 	}
2837       if (res.min == 0 && res.max < maxbytes)
2838 	return fmtwarn (dirloc, argloc, NULL,
2839 			info.warnopt (),
2840 			info.bounded
2841 			? (maybe
2842 			   ? G_("%<%.*s%> directive output may be truncated "
2843 				"writing up to %wu bytes into a region of "
2844 				"size %wu")
2845 			   : G_("%<%.*s%> directive output truncated writing "
2846 				"up to %wu bytes into a region of size %wu"))
2847 			: G_("%<%.*s%> directive writing up to %wu bytes "
2848 			     "into a region of size %wu"), (int) dir.len,
2849 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2850 			res.max, navail);
2851 
2852       if (res.min == 0 && maxbytes <= res.max)
2853 	/* This is a special case to avoid issuing the potentially
2854 	   confusing warning:
2855 	     writing 0 or more bytes into a region of size 0.  */
2856 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2857 			info.bounded
2858 			? (maybe
2859 			   ? G_("%<%.*s%> directive output may be truncated "
2860 				"writing likely %wu or more bytes into a "
2861 				"region of size %wu")
2862 			   : G_("%<%.*s%> directive output truncated writing "
2863 				"likely %wu or more bytes into a region of "
2864 				"size %wu"))
2865 			: G_("%<%.*s%> directive writing likely %wu or more "
2866 			     "bytes into a region of size %wu"), (int) dir.len,
2867 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2868 			res.likely, navail);
2869 
2870       if (res.max < maxbytes)
2871 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2872 			info.bounded
2873 			? (maybe
2874 			   ? G_("%<%.*s%> directive output may be truncated "
2875 				"writing between %wu and %wu bytes into a "
2876 				"region of size %wu")
2877 			   : G_("%<%.*s%> directive output truncated "
2878 				"writing between %wu and %wu bytes into a "
2879 				"region of size %wu"))
2880 			: G_("%<%.*s%> directive writing between %wu and "
2881 			     "%wu bytes into a region of size %wu"),
2882 			(int) dir.len,
2883 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2884 			res.min, res.max, navail);
2885 
2886       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2887 		      info.bounded
2888 		      ? (maybe
2889 			 ? G_("%<%.*s%> directive output may be truncated "
2890 			      "writing %wu or more bytes into a region of "
2891 			      "size %wu")
2892 			 : G_("%<%.*s%> directive output truncated writing "
2893 			      "%wu or more bytes into a region of size %wu"))
2894 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2895 			   "into a region of size %wu"), (int) dir.len,
2896 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2897 		      res.min, navail);
2898     }
2899 
2900   /* The size of the destination region is a range.  */
2901 
2902   if (target_to_host (*dir.beg) != '%')
2903     {
2904       unsigned HOST_WIDE_INT navail = avail_range.max;
2905 
2906       /* For plain character directives (i.e., the format string itself)
2907 	 but not others, point the caret at the first character that's
2908 	 past the end of the destination.  */
2909       if (navail < dir.len)
2910 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2911     }
2912 
2913   if (*dir.beg == '\0')
2914     {
2915       gcc_assert (res.min == 1 && res.min == res.max);
2916 
2917       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2918 		      info.bounded
2919 		      ? (maybe
2920 			 ? G_("%qE output may be truncated before the last "
2921 			      "format character")
2922 			 : G_("%qE output truncated before the last format "
2923 			      "character"))
2924 		      : (maybe
2925 			 ? G_("%qE may write a terminating nul past the end "
2926 			      "of the destination")
2927 			 : G_("%qE writing a terminating nul past the end "
2928 			      "of the destination")), info.func);
2929     }
2930 
2931   if (res.min == res.max)
2932     {
2933       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2934       if (!info.bounded)
2935 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2936 			  "%<%.*s%> directive writing %wu byte into a region "
2937 			  "of size between %wu and %wu",
2938 			  "%<%.*s%> directive writing %wu bytes into a region "
2939 			  "of size between %wu and %wu", (int) dir.len, d,
2940 			  res.min, avail_range.min, avail_range.max);
2941       else if (maybe)
2942 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2943 			  "%<%.*s%> directive output may be truncated writing "
2944 			  "%wu byte into a region of size between %wu and %wu",
2945 			  "%<%.*s%> directive output may be truncated writing "
2946 			  "%wu bytes into a region of size between %wu and "
2947 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2948 			  avail_range.max);
2949       else
2950 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2951 			  "%<%.*s%> directive output truncated writing %wu "
2952 			  "byte into a region of size between %wu and %wu",
2953 			  "%<%.*s%> directive output truncated writing %wu "
2954 			  "bytes into a region of size between %wu and %wu",
2955 			  (int) dir.len, d, res.min, avail_range.min,
2956 			  avail_range.max);
2957     }
2958 
2959   if (res.min == 0 && res.max < maxbytes)
2960     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2961 		    info.bounded
2962 		    ? (maybe
2963 		       ? G_("%<%.*s%> directive output may be truncated "
2964 			    "writing up to %wu bytes into a region of size "
2965 			    "between %wu and %wu")
2966 		       : G_("%<%.*s%> directive output truncated writing "
2967 			    "up to %wu bytes into a region of size between "
2968 			    "%wu and %wu"))
2969 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2970 			 "into a region of size between %wu and %wu"),
2971 		    (int) dir.len,
2972 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2973 		    res.max, avail_range.min, avail_range.max);
2974 
2975   if (res.min == 0 && maxbytes <= res.max)
2976     /* This is a special case to avoid issuing the potentially confusing
2977        warning:
2978 	 writing 0 or more bytes into a region of size between 0 and N.  */
2979     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2980 		    info.bounded
2981 		    ? (maybe
2982 		       ? G_("%<%.*s%> directive output may be truncated "
2983 			    "writing likely %wu or more bytes into a region "
2984 			    "of size between %wu and %wu")
2985 		       : G_("%<%.*s%> directive output truncated writing "
2986 			    "likely %wu or more bytes into a region of size "
2987 			    "between %wu and %wu"))
2988 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
2989 			 "into a region of size between %wu and %wu"),
2990 		    (int) dir.len,
2991 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2992 		    res.likely, avail_range.min, avail_range.max);
2993 
2994   if (res.max < maxbytes)
2995     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2996 		    info.bounded
2997 		    ? (maybe
2998 		       ? G_("%<%.*s%> directive output may be truncated "
2999 			    "writing between %wu and %wu bytes into a region "
3000 			    "of size between %wu and %wu")
3001 		       : G_("%<%.*s%> directive output truncated writing "
3002 			    "between %wu and %wu bytes into a region of size "
3003 			    "between %wu and %wu"))
3004 		    : G_("%<%.*s%> directive writing between %wu and "
3005 			 "%wu bytes into a region of size between %wu and "
3006 			 "%wu"), (int) dir.len,
3007 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
3008 		    res.min, res.max, avail_range.min, avail_range.max);
3009 
3010   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3011 		  info.bounded
3012 		  ? (maybe
3013 		     ? G_("%<%.*s%> directive output may be truncated writing "
3014 			  "%wu or more bytes into a region of size between "
3015 			  "%wu and %wu")
3016 		     : G_("%<%.*s%> directive output truncated writing "
3017 			  "%wu or more bytes into a region of size between "
3018 			  "%wu and %wu"))
3019 		  : G_("%<%.*s%> directive writing %wu or more bytes "
3020 		       "into a region of size between %wu and %wu"),
3021 		  (int) dir.len,
3022 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
3023 		  res.min, avail_range.min, avail_range.max);
3024 }
3025 
3026 /* Given the formatting result described by RES and NAVAIL, the number
3027    of available bytes in the destination, return the range of bytes
3028    remaining in the destination.  */
3029 
3030 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)3031 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
3032 {
3033   result_range range;
3034 
3035   if (HOST_WIDE_INT_MAX <= navail)
3036     {
3037       range.min = range.max = range.likely = range.unlikely = navail;
3038       return range;
3039     }
3040 
3041   /* The lower bound of the available range is the available size
3042      minus the maximum output size, and the upper bound is the size
3043      minus the minimum.  */
3044   range.max = res.range.min < navail ? navail - res.range.min : 0;
3045 
3046   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
3047 
3048   if (res.range.max < HOST_WIDE_INT_MAX)
3049     range.min = res.range.max < navail ? navail - res.range.max : 0;
3050   else
3051     range.min = range.likely;
3052 
3053   range.unlikely = (res.range.unlikely < navail
3054 		    ? navail - res.range.unlikely : 0);
3055 
3056   return range;
3057 }
3058 
3059 /* Compute the length of the output resulting from the directive DIR
3060    in a call described by INFO and update the overall result of the call
3061    in *RES.  Return true if the directive has been handled.  */
3062 
3063 static bool
format_directive(const call_info & info,format_result * res,const directive & dir,range_query * query)3064 format_directive (const call_info &info,
3065 		  format_result *res, const directive &dir,
3066 		  range_query *query)
3067 {
3068   /* Offset of the beginning of the directive from the beginning
3069      of the format string.  */
3070   size_t offset = dir.beg - info.fmtstr;
3071   size_t start = offset;
3072   size_t length = offset + dir.len - !!dir.len;
3073 
3074   /* Create a location for the whole directive from the % to the format
3075      specifier.  */
3076   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3077 			offset, start, length);
3078 
3079   /* Also get the location of the argument if possible.
3080      This doesn't work for integer literals or function calls.  */
3081   location_t argloc = UNKNOWN_LOCATION;
3082   if (dir.arg)
3083     argloc = EXPR_LOCATION (dir.arg);
3084 
3085   /* Bail when there is no function to compute the output length,
3086      or when minimum length checking has been disabled.   */
3087   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
3088     return false;
3089 
3090   /* Compute the range of lengths of the formatted output.  */
3091   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, query);
3092 
3093   /* Record whether the output of all directives is known to be
3094      bounded by some maximum, implying that their arguments are
3095      either known exactly or determined to be in a known range
3096      or, for strings, limited by the upper bounds of the arrays
3097      they refer to.  */
3098   res->knownrange &= fmtres.knownrange;
3099 
3100   if (!fmtres.knownrange)
3101     {
3102       /* Only when the range is known, check it against the host value
3103 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
3104 	 INT_MAX precision, which is the longest possible output of any
3105 	 single directive).  That's the largest valid byte count (though
3106 	 not valid call to a printf-like function because it can never
3107 	 return such a count).  Otherwise, the range doesn't correspond
3108 	 to known values of the argument.  */
3109       if (fmtres.range.max > target_dir_max ())
3110 	{
3111 	  /* Normalize the MAX counter to avoid having to deal with it
3112 	     later.  The counter can be less than HOST_WIDE_INT_M1U
3113 	     when compiling for an ILP32 target on an LP64 host.  */
3114 	  fmtres.range.max = HOST_WIDE_INT_M1U;
3115 	  /* Disable exact and maximum length checking after a failure
3116 	     to determine the maximum number of characters (for example
3117 	     for wide characters or wide character strings) but continue
3118 	     tracking the minimum number of characters.  */
3119 	  res->range.max = HOST_WIDE_INT_M1U;
3120 	}
3121 
3122       if (fmtres.range.min > target_dir_max ())
3123 	{
3124 	  /* Disable exact length checking after a failure to determine
3125 	     even the minimum number of characters (it shouldn't happen
3126 	     except in an error) but keep tracking the minimum and maximum
3127 	     number of characters.  */
3128 	  return true;
3129 	}
3130     }
3131 
3132   /* Buffer for the directive in the host character set (used when
3133      the source character set is different).  */
3134   char hostdir[32];
3135 
3136   int dirlen = dir.len;
3137 
3138   if (fmtres.nullp)
3139     {
3140       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3141 	       "%<%.*s%> directive argument is null",
3142 	       dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
3143 
3144       /* Don't bother processing the rest of the format string.  */
3145       res->warned = true;
3146       res->range.min = HOST_WIDE_INT_M1U;
3147       res->range.max = HOST_WIDE_INT_M1U;
3148       return false;
3149     }
3150 
3151   /* Compute the number of available bytes in the destination.  There
3152      must always be at least one byte of space for the terminating
3153      NUL that's appended after the format string has been processed.  */
3154   result_range avail_range = bytes_remaining (info.objsize, *res);
3155 
3156   /* If the argument aliases a part of the destination of the formatted
3157      call at offset FMTRES.DST_OFFSET append the directive and its result
3158      to the set of aliases for later processing.  */
3159   if (fmtres.dst_offset != HOST_WIDE_INT_MIN)
3160     res->append_alias (dir, fmtres.dst_offset, fmtres.range);
3161 
3162   bool warned = res->warned;
3163 
3164   if (!warned)
3165     warned = maybe_warn (dirloc, argloc, info, avail_range,
3166 			 fmtres.range, dir);
3167 
3168   /* Bump up the total maximum if it isn't too big.  */
3169   if (res->range.max < HOST_WIDE_INT_MAX
3170       && fmtres.range.max < HOST_WIDE_INT_MAX)
3171     res->range.max += fmtres.range.max;
3172 
3173   /* Raise the total unlikely maximum by the larger of the maximum
3174      and the unlikely maximum.  */
3175   unsigned HOST_WIDE_INT save = res->range.unlikely;
3176   if (fmtres.range.max < fmtres.range.unlikely)
3177     res->range.unlikely += fmtres.range.unlikely;
3178   else
3179     res->range.unlikely += fmtres.range.max;
3180 
3181   if (res->range.unlikely < save)
3182     res->range.unlikely = HOST_WIDE_INT_M1U;
3183 
3184   res->range.min += fmtres.range.min;
3185   res->range.likely += fmtres.range.likely;
3186 
3187   /* Has the minimum directive output length exceeded the maximum
3188      of 4095 bytes required to be supported?  */
3189   bool minunder4k = fmtres.range.min < 4096;
3190   bool maxunder4k = fmtres.range.max < 4096;
3191   /* Clear POSUNDER4K in the overall result if the maximum has exceeded
3192      the 4k (this is necessary to avoid the return value optimization
3193      that may not be safe in the maximum case).  */
3194   if (!maxunder4k)
3195     res->posunder4k = false;
3196   /* Also clear POSUNDER4K if the directive may fail.  */
3197   if (fmtres.mayfail)
3198     res->posunder4k = false;
3199 
3200   if (!warned
3201       /* Only warn at level 2.  */
3202       && warn_level > 1
3203       /* Only warn for string functions.  */
3204       && info.is_string_func ()
3205       && (!minunder4k
3206 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
3207     {
3208       /* The directive output may be longer than the maximum required
3209 	 to be handled by an implementation according to 7.21.6.1, p15
3210 	 of C11.  Warn on this only at level 2 but remember this and
3211 	 prevent folding the return value when done.  This allows for
3212 	 the possibility of the actual libc call failing due to ENOMEM
3213 	 (like Glibc does with very large precision or width).
3214 	 Issue the "may exceed" warning only for string functions and
3215 	 not for fprintf or printf.  */
3216 
3217       if (fmtres.range.min == fmtres.range.max)
3218 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3219 			  "%<%.*s%> directive output of %wu bytes exceeds "
3220 			  "minimum required size of 4095", dirlen,
3221 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3222 			  fmtres.range.min);
3223       else if (!minunder4k)
3224 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3225 			  "%<%.*s%> directive output between %wu and %wu "
3226 			  "bytes exceeds minimum required size of 4095",
3227 			  dirlen,
3228 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3229 			  fmtres.range.min, fmtres.range.max);
3230       else if (!info.retval_used () && info.is_string_func ())
3231 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3232 			  "%<%.*s%> directive output between %wu and %wu "
3233 			  "bytes may exceed minimum required size of "
3234 			  "4095",
3235 			  dirlen,
3236 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3237 			  fmtres.range.min, fmtres.range.max);
3238     }
3239 
3240   /* Has the likely and maximum directive output exceeded INT_MAX?  */
3241   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
3242   /* Don't consider the maximum to be in excess when it's the result
3243      of a string of unknown length (i.e., whose maximum has been set
3244      to be greater than or equal to HOST_WIDE_INT_MAX.  */
3245   bool maxximax = (*dir.beg
3246 		   && res->range.max > target_int_max ()
3247 		   && res->range.max < HOST_WIDE_INT_MAX);
3248 
3249   if (!warned
3250       /* Warn for the likely output size at level 1.  */
3251       && (likelyximax
3252 	  /* But only warn for the maximum at level 2.  */
3253 	  || (warn_level > 1
3254 	      && maxximax
3255 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
3256     {
3257       if (fmtres.range.min > target_int_max ())
3258 	{
3259 	  /* The directive output exceeds INT_MAX bytes.  */
3260 	  if (fmtres.range.min == fmtres.range.max)
3261 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3262 			      "%<%.*s%> directive output of %wu bytes exceeds "
3263 			      "%<INT_MAX%>", dirlen,
3264 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3265 			      fmtres.range.min);
3266 	  else
3267 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3268 			      "%<%.*s%> directive output between %wu and "
3269 			      "%wu bytes exceeds %<INT_MAX%>", dirlen,
3270 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3271 			      fmtres.range.min, fmtres.range.max);
3272 	}
3273       else if (res->range.min > target_int_max ())
3274 	{
3275 	  /* The directive output is under INT_MAX but causes the result
3276 	     to exceed INT_MAX bytes.  */
3277 	  if (fmtres.range.min == fmtres.range.max)
3278 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3279 			      "%<%.*s%> directive output of %wu bytes causes "
3280 			      "result to exceed %<INT_MAX%>", dirlen,
3281 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3282 			      fmtres.range.min);
3283 	  else
3284 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3285 			      "%<%.*s%> directive output between %wu and "
3286 			      "%wu bytes causes result to exceed %<INT_MAX%>",
3287 			      dirlen,
3288 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3289 			      fmtres.range.min, fmtres.range.max);
3290 	}
3291       else if ((!info.retval_used () || !info.bounded)
3292 	       && (info.is_string_func ()))
3293 	/* Warn for calls to string functions that either aren't bounded
3294 	   (sprintf) or whose return value isn't used.  */
3295 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3296 			  "%<%.*s%> directive output between %wu and "
3297 			  "%wu bytes may cause result to exceed "
3298 			  "%<INT_MAX%>", dirlen,
3299 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3300 			  fmtres.range.min, fmtres.range.max);
3301     }
3302 
3303   if (!warned && fmtres.nonstr)
3304     {
3305       warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3306 			"%<%.*s%> directive argument is not a nul-terminated "
3307 			"string",
3308 			dirlen,
3309 			target_to_host (hostdir, sizeof hostdir, dir.beg));
3310       if (warned && DECL_P (fmtres.nonstr))
3311 	inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
3312 		"referenced argument declared here");
3313       return false;
3314     }
3315 
3316   if (warned && fmtres.range.min < fmtres.range.likely
3317       && fmtres.range.likely < fmtres.range.max)
3318     inform_n (info.fmtloc, fmtres.range.likely,
3319 	      "assuming directive output of %wu byte",
3320 	      "assuming directive output of %wu bytes",
3321 	      fmtres.range.likely);
3322 
3323   if (warned && fmtres.argmin)
3324     {
3325       if (fmtres.argmin == fmtres.argmax)
3326 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3327       else if (fmtres.knownrange)
3328 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
3329 		fmtres.argmin, fmtres.argmax);
3330       else
3331 	inform (info.fmtloc,
3332 		"using the range [%E, %E] for directive argument",
3333 		fmtres.argmin, fmtres.argmax);
3334     }
3335 
3336   res->warned |= warned;
3337 
3338   if (!dir.beg[0] && res->warned)
3339     {
3340       location_t callloc = gimple_location (info.callstmt);
3341 
3342       unsigned HOST_WIDE_INT min = res->range.min;
3343       unsigned HOST_WIDE_INT max = res->range.max;
3344 
3345       if (info.objsize < HOST_WIDE_INT_MAX)
3346 	{
3347 	  /* If a warning has been issued for buffer overflow or truncation
3348 	     help the user figure out how big a buffer they need.  */
3349 
3350 	  if (min == max)
3351 	    inform_n (callloc, min,
3352 		      "%qE output %wu byte into a destination of size %wu",
3353 		      "%qE output %wu bytes into a destination of size %wu",
3354 		      info.func, min, info.objsize);
3355 	  else if (max < HOST_WIDE_INT_MAX)
3356 	    inform (callloc,
3357 		    "%qE output between %wu and %wu bytes into "
3358 		    "a destination of size %wu",
3359 		    info.func, min, max, info.objsize);
3360 	  else if (min < res->range.likely && res->range.likely < max)
3361 	    inform (callloc,
3362 		    "%qE output %wu or more bytes (assuming %wu) into "
3363 		    "a destination of size %wu",
3364 		    info.func, min, res->range.likely, info.objsize);
3365 	  else
3366 	    inform (callloc,
3367 		    "%qE output %wu or more bytes into a destination of size "
3368 		    "%wu",
3369 		    info.func, min, info.objsize);
3370 	}
3371       else if (!info.is_string_func ())
3372 	{
3373 	  /* If the warning is for a file function like fprintf
3374 	     of printf with no destination size just print the computed
3375 	     result.  */
3376 	  if (min == max)
3377 	    inform_n (callloc, min,
3378 		      "%qE output %wu byte", "%qE output %wu bytes",
3379 		      info.func, min);
3380 	  else if (max < HOST_WIDE_INT_MAX)
3381 	    inform (callloc,
3382 		    "%qE output between %wu and %wu bytes",
3383 		    info.func, min, max);
3384 	  else if (min < res->range.likely && res->range.likely < max)
3385 	    inform (callloc,
3386 		    "%qE output %wu or more bytes (assuming %wu)",
3387 		    info.func, min, res->range.likely);
3388 	  else
3389 	    inform (callloc,
3390 		    "%qE output %wu or more bytes",
3391 		    info.func, min);
3392 	}
3393     }
3394 
3395   if (dump_file && *dir.beg)
3396     {
3397       fprintf (dump_file,
3398 	       "    Result: "
3399 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3400 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3401 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3402 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3403 	       fmtres.range.min, fmtres.range.likely,
3404 	       fmtres.range.max, fmtres.range.unlikely,
3405 	       res->range.min, res->range.likely,
3406 	       res->range.max, res->range.unlikely);
3407     }
3408 
3409   return true;
3410 }
3411 
3412 /* Parse a format directive in function call described by INFO starting
3413    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3414    arguments extracted for the directive.  Return the length of
3415    the directive.  */
3416 
3417 static size_t
parse_directive(call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,range_query * query)3418 parse_directive (call_info &info,
3419 		 directive &dir, format_result *res,
3420 		 const char *str, unsigned *argno,
3421 		 range_query *query)
3422 {
3423   const char *pcnt = strchr (str, target_percent);
3424   dir.beg = str;
3425 
3426   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3427     {
3428       /* This directive is either a plain string or the terminating nul
3429 	 (which isn't really a directive but it simplifies things to
3430 	 handle it as if it were).  */
3431       dir.len = len;
3432       dir.fmtfunc = format_plain;
3433 
3434       if (dump_file)
3435 	{
3436 	  fprintf (dump_file, "  Directive %u at offset "
3437 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3438 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3439 		   dir.dirno,
3440 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3441 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3442 	}
3443 
3444       return len - !*str;
3445     }
3446 
3447   /* Set the directive argument's number to correspond to its position
3448      in the formatted function call's argument list.  */
3449   dir.argno = *argno;
3450 
3451   const char *pf = pcnt + 1;
3452 
3453     /* POSIX numbered argument index or zero when none.  */
3454   HOST_WIDE_INT dollar = 0;
3455 
3456   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3457      when given by a va_list argument, and a non-negative value
3458      when specified in the format string itself.  */
3459   HOST_WIDE_INT width = -1;
3460   HOST_WIDE_INT precision = -1;
3461 
3462   /* Pointers to the beginning of the width and precision decimal
3463      string (if any) within the directive.  */
3464   const char *pwidth = 0;
3465   const char *pprec = 0;
3466 
3467   /* When the value of the decimal string that specifies width or
3468      precision is out of range, points to the digit that causes
3469      the value to exceed the limit.  */
3470   const char *werange = NULL;
3471   const char *perange = NULL;
3472 
3473   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3474      For vararg functions set to void_node.  */
3475   tree star_width = NULL_TREE;
3476 
3477   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3478      For vararg functions set to void_node.  */
3479   tree star_precision = NULL_TREE;
3480 
3481   if (ISDIGIT (target_to_host (*pf)))
3482     {
3483       /* This could be either a POSIX positional argument, the '0'
3484 	 flag, or a width, depending on what follows.  Store it as
3485 	 width and sort it out later after the next character has
3486 	 been seen.  */
3487       pwidth = pf;
3488       width = target_strtowi (&pf, &werange);
3489     }
3490   else if (target_to_host (*pf) == '*')
3491     {
3492       /* Similarly to the block above, this could be either a POSIX
3493 	 positional argument or a width, depending on what follows.  */
3494       if (*argno < gimple_call_num_args (info.callstmt))
3495 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3496       else
3497 	star_width = void_node;
3498       ++pf;
3499     }
3500 
3501   if (target_to_host (*pf) == '$')
3502     {
3503       /* Handle the POSIX dollar sign which references the 1-based
3504 	 positional argument number.  */
3505       if (width != -1)
3506 	dollar = width + info.argidx;
3507       else if (star_width
3508 	       && TREE_CODE (star_width) == INTEGER_CST
3509 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3510 		   <= TYPE_PRECISION (integer_type_node)))
3511 	dollar = width + tree_to_shwi (star_width);
3512 
3513       /* Bail when the numbered argument is out of range (it will
3514 	 have already been diagnosed by -Wformat).  */
3515       if (dollar == 0
3516 	  || dollar == (int)info.argidx
3517 	  || dollar > gimple_call_num_args (info.callstmt))
3518 	return false;
3519 
3520       --dollar;
3521 
3522       star_width = NULL_TREE;
3523       width = -1;
3524       ++pf;
3525     }
3526 
3527   if (dollar || !star_width)
3528     {
3529       if (width != -1)
3530 	{
3531 	  if (width == 0)
3532 	    {
3533 	      /* The '0' that has been interpreted as a width above is
3534 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3535 		 and continue processing other flags.  */
3536 	      width = -1;
3537 	      dir.set_flag ('0');
3538 	    }
3539 	  else if (!dollar)
3540 	    {
3541 	      /* (Non-zero) width has been seen.  The next character
3542 		 is either a period or a digit.  */
3543 	      goto start_precision;
3544 	    }
3545 	}
3546       /* When either '$' has been seen, or width has not been seen,
3547 	 the next field is the optional flags followed by an optional
3548 	 width.  */
3549       for ( ; ; ) {
3550 	switch (target_to_host (*pf))
3551 	  {
3552 	  case ' ':
3553 	  case '0':
3554 	  case '+':
3555 	  case '-':
3556 	  case '#':
3557 	    dir.set_flag (target_to_host (*pf++));
3558 	    break;
3559 
3560 	  default:
3561 	    goto start_width;
3562 	  }
3563       }
3564 
3565     start_width:
3566       if (ISDIGIT (target_to_host (*pf)))
3567 	{
3568 	  werange = 0;
3569 	  pwidth = pf;
3570 	  width = target_strtowi (&pf, &werange);
3571 	}
3572       else if (target_to_host (*pf) == '*')
3573 	{
3574 	  if (*argno < gimple_call_num_args (info.callstmt))
3575 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3576 	  else
3577 	    {
3578 	      /* This is (likely) a va_list.  It could also be an invalid
3579 		 call with insufficient arguments.  */
3580 	      star_width = void_node;
3581 	    }
3582 	  ++pf;
3583 	}
3584       else if (target_to_host (*pf) == '\'')
3585 	{
3586 	  /* The POSIX apostrophe indicating a numeric grouping
3587 	     in the current locale.  Even though it's possible to
3588 	     estimate the upper bound on the size of the output
3589 	     based on the number of digits it probably isn't worth
3590 	     continuing.  */
3591 	  return 0;
3592 	}
3593     }
3594 
3595  start_precision:
3596   if (target_to_host (*pf) == '.')
3597     {
3598       ++pf;
3599 
3600       if (ISDIGIT (target_to_host (*pf)))
3601 	{
3602 	  pprec = pf;
3603 	  precision = target_strtowi (&pf, &perange);
3604 	}
3605       else if (target_to_host (*pf) == '*')
3606 	{
3607 	  if (*argno < gimple_call_num_args (info.callstmt))
3608 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3609 	  else
3610 	    {
3611 	      /* This is (likely) a va_list.  It could also be an invalid
3612 		 call with insufficient arguments.  */
3613 	      star_precision = void_node;
3614 	    }
3615 	  ++pf;
3616 	}
3617       else
3618 	{
3619 	  /* The decimal precision or the asterisk are optional.
3620 	     When neither is specified it's taken to be zero.  */
3621 	  precision = 0;
3622 	}
3623     }
3624 
3625   switch (target_to_host (*pf))
3626     {
3627     case 'h':
3628       if (target_to_host (pf[1]) == 'h')
3629 	{
3630 	  ++pf;
3631 	  dir.modifier = FMT_LEN_hh;
3632 	}
3633       else
3634 	dir.modifier = FMT_LEN_h;
3635       ++pf;
3636       break;
3637 
3638     case 'j':
3639       dir.modifier = FMT_LEN_j;
3640       ++pf;
3641       break;
3642 
3643     case 'L':
3644       dir.modifier = FMT_LEN_L;
3645       ++pf;
3646       break;
3647 
3648     case 'l':
3649       if (target_to_host (pf[1]) == 'l')
3650 	{
3651 	  ++pf;
3652 	  dir.modifier = FMT_LEN_ll;
3653 	}
3654       else
3655 	dir.modifier = FMT_LEN_l;
3656       ++pf;
3657       break;
3658 
3659     case 't':
3660       dir.modifier = FMT_LEN_t;
3661       ++pf;
3662       break;
3663 
3664     case 'z':
3665       dir.modifier = FMT_LEN_z;
3666       ++pf;
3667       break;
3668     }
3669 
3670   switch (target_to_host (*pf))
3671     {
3672       /* Handle a sole '%' character the same as "%%" but since it's
3673 	 undefined prevent the result from being folded.  */
3674     case '\0':
3675       --pf;
3676       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3677       /* FALLTHRU */
3678     case '%':
3679       dir.fmtfunc = format_percent;
3680       break;
3681 
3682     case 'a':
3683     case 'A':
3684     case 'e':
3685     case 'E':
3686     case 'f':
3687     case 'F':
3688     case 'g':
3689     case 'G':
3690       res->floating = true;
3691       dir.fmtfunc = format_floating;
3692       break;
3693 
3694     case 'd':
3695     case 'i':
3696     case 'o':
3697     case 'u':
3698     case 'x':
3699     case 'X':
3700       dir.fmtfunc = format_integer;
3701       break;
3702 
3703     case 'p':
3704       /* The %p output is implementation-defined.  It's possible
3705 	 to determine this format but due to extensions (especially
3706 	 those of the Linux kernel -- see bug 78512) the first %p
3707 	 in the format string disables any further processing.  */
3708       return false;
3709 
3710     case 'n':
3711       /* %n has side-effects even when nothing is actually printed to
3712 	 any buffer.  */
3713       info.nowrite = false;
3714       dir.fmtfunc = format_none;
3715       break;
3716 
3717     case 'C':
3718     case 'c':
3719       /* POSIX wide character and C/POSIX narrow character.  */
3720       dir.fmtfunc = format_character;
3721       break;
3722 
3723     case 'S':
3724     case 's':
3725       /* POSIX wide string and C/POSIX narrow character string.  */
3726       dir.fmtfunc = format_string;
3727       break;
3728 
3729     default:
3730       /* Unknown conversion specification.  */
3731       return 0;
3732     }
3733 
3734   dir.specifier = target_to_host (*pf++);
3735 
3736   /* Store the length of the format directive.  */
3737   dir.len = pf - pcnt;
3738 
3739   /* Buffer for the directive in the host character set (used when
3740      the source character set is different).  */
3741   char hostdir[32];
3742 
3743   if (star_width)
3744     {
3745       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3746 	dir.set_width (star_width, query);
3747       else
3748 	{
3749 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3750 	     (width is the absolute value of that specified).  */
3751 	  dir.width[0] = 0;
3752 	  dir.width[1] = target_int_max () + 1;
3753 	}
3754     }
3755   else
3756     {
3757       if (width == HOST_WIDE_INT_MAX && werange)
3758 	{
3759 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3760 	  size_t caret = begin + (werange - pcnt);
3761 	  size_t end = pf - info.fmtstr - 1;
3762 
3763 	  /* Create a location for the width part of the directive,
3764 	     pointing the caret at the first out-of-range digit.  */
3765 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3766 				caret, begin, end);
3767 
3768 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3769 		   "%<%.*s%> directive width out of range", (int) dir.len,
3770 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3771 	}
3772 
3773       dir.set_width (width);
3774     }
3775 
3776   if (star_precision)
3777     {
3778       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3779 	dir.set_precision (star_precision, query);
3780       else
3781 	{
3782 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3783 	     (unlike width, negative precision is ignored).  */
3784 	  dir.prec[0] = -1;
3785 	  dir.prec[1] = target_int_max ();
3786 	}
3787     }
3788   else
3789     {
3790       if (precision == HOST_WIDE_INT_MAX && perange)
3791 	{
3792 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3793 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3794 	  size_t end = pf - info.fmtstr - 2;
3795 
3796 	  /* Create a location for the precision part of the directive,
3797 	     including the leading period, pointing the caret at the first
3798 	     out-of-range digit .  */
3799 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3800 				caret, begin, end);
3801 
3802 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3803 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3804 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3805 	}
3806 
3807       dir.set_precision (precision);
3808     }
3809 
3810   /* Extract the argument if the directive takes one and if it's
3811      available (e.g., the function doesn't take a va_list).  Treat
3812      missing arguments the same as va_list, even though they will
3813      have likely already been diagnosed by -Wformat.  */
3814   if (dir.specifier != '%'
3815       && *argno < gimple_call_num_args (info.callstmt))
3816     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3817 
3818   if (dump_file)
3819     {
3820       fprintf (dump_file,
3821 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3822 	       ": \"%.*s\"",
3823 	       dir.dirno,
3824 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3825 	       (int)dir.len, dir.beg);
3826       if (star_width)
3827 	{
3828 	  if (dir.width[0] == dir.width[1])
3829 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3830 		     dir.width[0]);
3831 	  else
3832 	    fprintf (dump_file,
3833 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3834 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3835 		     dir.width[0], dir.width[1]);
3836 	}
3837 
3838       if (star_precision)
3839 	{
3840 	  if (dir.prec[0] == dir.prec[1])
3841 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3842 		     dir.prec[0]);
3843 	  else
3844 	    fprintf (dump_file,
3845 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3846 		     HOST_WIDE_INT_PRINT_DEC "]",
3847 		     dir.prec[0], dir.prec[1]);
3848 	}
3849       fputc ('\n', dump_file);
3850     }
3851 
3852   return dir.len;
3853 }
3854 
3855 /* Diagnose overlap between destination and %s directive arguments.  */
3856 
3857 static void
maybe_warn_overlap(call_info & info,format_result * res)3858 maybe_warn_overlap (call_info &info, format_result *res)
3859 {
3860   /* Two vectors of 1-based indices corresponding to either certainly
3861      or possibly aliasing arguments.  */
3862   auto_vec<int, 16> aliasarg[2];
3863 
3864   /* Go through the array of potentially aliasing directives and collect
3865      argument numbers of those that do or may overlap the destination
3866      object given the full result.  */
3867   for (unsigned i = 0; i != res->alias_count; ++i)
3868     {
3869       const format_result::alias_info &alias = res->aliases[i];
3870 
3871       enum { possible = -1, none = 0, certain = 1 } overlap = none;
3872 
3873       /* If the precision is zero there is no overlap.  (This only
3874 	 considers %s directives and ignores %n.)  */
3875       if (alias.dir.prec[0] == 0 && alias.dir.prec[1] == 0)
3876 	continue;
3877 
3878       if (alias.offset == HOST_WIDE_INT_MAX
3879 	  || info.dst_offset == HOST_WIDE_INT_MAX)
3880 	overlap = possible;
3881       else if (alias.offset == info.dst_offset)
3882 	overlap = alias.dir.prec[0] == 0 ? possible : certain;
3883       else
3884 	{
3885 	  /* Determine overlap from the range of output and offsets
3886 	     into the same destination as the source, and rule out
3887 	     impossible overlap.  */
3888 	  unsigned HOST_WIDE_INT albeg = alias.offset;
3889 	  unsigned HOST_WIDE_INT dstbeg = info.dst_offset;
3890 
3891 	  unsigned HOST_WIDE_INT alend = albeg + alias.range.min;
3892 	  unsigned HOST_WIDE_INT dstend = dstbeg + res->range.min - 1;
3893 
3894 	  if ((albeg <= dstbeg && alend > dstbeg)
3895 	      || (albeg >= dstbeg && albeg < dstend))
3896 	    overlap = certain;
3897 	  else
3898 	    {
3899 	      alend = albeg + alias.range.max;
3900 	      if (alend < albeg)
3901 		alend = HOST_WIDE_INT_M1U;
3902 
3903 	      dstend = dstbeg + res->range.max - 1;
3904 	      if (dstend < dstbeg)
3905 		dstend = HOST_WIDE_INT_M1U;
3906 
3907 	      if ((albeg >= dstbeg && albeg <= dstend)
3908 		  || (alend >= dstbeg && alend <= dstend))
3909 		overlap = possible;
3910 	    }
3911 	}
3912 
3913       if (overlap == none)
3914 	continue;
3915 
3916       /* Append the 1-based argument number.  */
3917       aliasarg[overlap != certain].safe_push (alias.dir.argno + 1);
3918 
3919       /* Disable any kind of optimization.  */
3920       res->range.unlikely = HOST_WIDE_INT_M1U;
3921     }
3922 
3923   tree arg0 = gimple_call_arg (info.callstmt, 0);
3924   location_t loc = gimple_location (info.callstmt);
3925 
3926   bool aliaswarn = false;
3927 
3928   unsigned ncertain = aliasarg[0].length ();
3929   unsigned npossible = aliasarg[1].length ();
3930   if (ncertain && npossible)
3931     {
3932       /* If there are multiple arguments that overlap, some certainly
3933 	 and some possibly, handle both sets in a single diagnostic.  */
3934       aliaswarn
3935 	= warning_at (loc, OPT_Wrestrict,
3936 		      "%qE arguments %Z and maybe %Z overlap destination "
3937 		      "object %qE",
3938 		      info.func, aliasarg[0].address (), ncertain,
3939 		      aliasarg[1].address (), npossible,
3940 		      info.dst_origin);
3941     }
3942   else if (ncertain)
3943     {
3944       /* There is only one set of two or more arguments and they all
3945 	 certainly overlap the destination.  */
3946       aliaswarn
3947 	= warning_n (loc, OPT_Wrestrict, ncertain,
3948 		     "%qE argument %Z overlaps destination object %qE",
3949 		     "%qE arguments %Z overlap destination object %qE",
3950 		     info.func, aliasarg[0].address (), ncertain,
3951 		     info.dst_origin);
3952     }
3953   else if (npossible)
3954     {
3955       /* There is only one set of two or more arguments and they all
3956 	 may overlap (but need not).  */
3957       aliaswarn
3958 	= warning_n (loc, OPT_Wrestrict, npossible,
3959 		     "%qE argument %Z may overlap destination object %qE",
3960 		     "%qE arguments %Z may overlap destination object %qE",
3961 		     info.func, aliasarg[1].address (), npossible,
3962 		     info.dst_origin);
3963     }
3964 
3965   if (aliaswarn)
3966     {
3967       res->warned = true;
3968 
3969       if (info.dst_origin != arg0)
3970 	{
3971 	  /* If its location is different from the first argument of the call
3972 	     point either at the destination object itself or at the expression
3973 	     that was used to determine the overlap.  */
3974 	  loc = (DECL_P (info.dst_origin)
3975 		 ? DECL_SOURCE_LOCATION (info.dst_origin)
3976 		 : EXPR_LOCATION (info.dst_origin));
3977 	  if (loc != UNKNOWN_LOCATION)
3978 	    inform (loc,
3979 		    "destination object referenced by %<restrict%>-qualified "
3980 		    "argument 1 was declared here");
3981 	}
3982     }
3983 }
3984 
3985 /* Compute the length of the output resulting from the call to a formatted
3986    output function described by INFO and store the result of the call in
3987    *RES.  Issue warnings for detected past the end writes.  Return true
3988    if the complete format string has been processed and *RES can be relied
3989    on, false otherwise (e.g., when a unknown or unhandled directive was seen
3990    that caused the processing to be terminated early).  */
3991 
3992 static bool
compute_format_length(call_info & info,format_result * res,range_query * query)3993 compute_format_length (call_info &info, format_result *res, range_query *query)
3994 {
3995   if (dump_file)
3996     {
3997       location_t callloc = gimple_location (info.callstmt);
3998       fprintf (dump_file, "%s:%i: ",
3999 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
4000       print_generic_expr (dump_file, info.func, dump_flags);
4001 
4002       fprintf (dump_file,
4003 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
4004 	       ", fmtstr = \"%s\"\n",
4005 	       info.objsize, info.fmtstr);
4006     }
4007 
4008   /* Reset the minimum and maximum byte counters.  */
4009   res->range.min = res->range.max = 0;
4010 
4011   /* No directive has been seen yet so the length of output is bounded
4012      by the known range [0, 0] (with no conversion resulting in a failure
4013      or producing more than 4K bytes) until determined otherwise.  */
4014   res->knownrange = true;
4015   res->floating = false;
4016   res->warned = false;
4017 
4018   /* 1-based directive counter.  */
4019   unsigned dirno = 1;
4020 
4021   /* The variadic argument counter.  */
4022   unsigned argno = info.argidx;
4023 
4024   bool success = true;
4025 
4026   for (const char *pf = info.fmtstr; ; ++dirno)
4027     {
4028       directive dir (&info, dirno);
4029 
4030       size_t n = parse_directive (info, dir, res, pf, &argno, query);
4031 
4032       /* Return failure if the format function fails.  */
4033       if (!format_directive (info, res, dir, query))
4034 	return false;
4035 
4036       /* Return success when the directive is zero bytes long and it's
4037 	 the last thing in the format string (i.e., it's the terminating
4038 	 nul, which isn't really a directive but handling it as one makes
4039 	 things simpler).  */
4040       if (!n)
4041 	{
4042 	  success = *pf == '\0';
4043 	  break;
4044 	}
4045 
4046       pf += n;
4047     }
4048 
4049   maybe_warn_overlap (info, res);
4050 
4051   /* The complete format string was processed (with or without warnings).  */
4052   return success;
4053 }
4054 
4055 /* Return the size of the object referenced by the expression DEST in
4056    statement STMT, if available, or the maximum possible size otherwise.  */
4057 
4058 static unsigned HOST_WIDE_INT
get_destination_size(tree dest,gimple * stmt,pointer_query & ptr_qry)4059 get_destination_size (tree dest, gimple *stmt, pointer_query &ptr_qry)
4060 {
4061   /* When there is no destination return the maximum.  */
4062   if (!dest)
4063     return HOST_WIDE_INT_MAX;
4064 
4065   /* Use compute_objsize to determine the size of the destination object.  */
4066   access_ref aref;
4067   if (!ptr_qry.get_ref (dest, stmt, &aref))
4068     return HOST_WIDE_INT_MAX;
4069 
4070   offset_int remsize = aref.size_remaining ();
4071   if (!wi::fits_uhwi_p (remsize))
4072     return HOST_WIDE_INT_MAX;
4073 
4074   return remsize.to_uhwi ();
4075 }
4076 
4077 /* Return true if the call described by INFO with result RES safe to
4078    optimize (i.e., no undefined behavior), and set RETVAL to the range
4079    of its return values.  */
4080 
4081 static bool
is_call_safe(const call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])4082 is_call_safe (const call_info &info,
4083 	      const format_result &res, bool under4k,
4084 	      unsigned HOST_WIDE_INT retval[2])
4085 {
4086   if (under4k && !res.posunder4k)
4087     return false;
4088 
4089   /* The minimum return value.  */
4090   retval[0] = res.range.min;
4091 
4092   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
4093      but in cases involving multibyte characters could be as large as
4094      RES.RANGE.UNLIKELY.  */
4095   retval[1]
4096     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
4097 
4098   /* Adjust the number of bytes which includes the terminating nul
4099      to reflect the return value of the function which does not.
4100      Because the valid range of the function is [INT_MIN, INT_MAX],
4101      a valid range before the adjustment below is [0, INT_MAX + 1]
4102      (the functions only return negative values on error or undefined
4103      behavior).  */
4104   if (retval[0] <= target_int_max () + 1)
4105     --retval[0];
4106   if (retval[1] <= target_int_max () + 1)
4107     --retval[1];
4108 
4109   /* Avoid the return value optimization when the behavior of the call
4110      is undefined either because any directive may have produced 4K or
4111      more of output, or the return value exceeds INT_MAX, or because
4112      the output overflows the destination object (but leave it enabled
4113      when the function is bounded because then the behavior is well-
4114      defined).  */
4115   if (retval[0] == retval[1]
4116       && (info.bounded || retval[0] < info.objsize)
4117       && retval[0] <= target_int_max ())
4118     return true;
4119 
4120   if ((info.bounded || retval[1] < info.objsize)
4121       && (retval[0] < target_int_max ()
4122 	  && retval[1] < target_int_max ()))
4123     return true;
4124 
4125   if (!under4k && (info.bounded || retval[0] < info.objsize))
4126     return true;
4127 
4128   return false;
4129 }
4130 
4131 /* Given a suitable result RES of a call to a formatted output function
4132    described by INFO, substitute the result for the return value of
4133    the call.  The result is suitable if the number of bytes it represents
4134    is known and exact.  A result that isn't suitable for substitution may
4135    have its range set to the range of return values, if that is known.
4136    Return true if the call is removed and gsi_next should not be performed
4137    in the caller.  */
4138 
4139 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4140 try_substitute_return_value (gimple_stmt_iterator *gsi,
4141 			     const call_info &info,
4142 			     const format_result &res)
4143 {
4144   tree lhs = gimple_get_lhs (info.callstmt);
4145 
4146   /* Set to true when the entire call has been removed.  */
4147   bool removed = false;
4148 
4149   /* The minimum and maximum return value.  */
4150   unsigned HOST_WIDE_INT retval[2] = {0};
4151   bool safe = is_call_safe (info, res, true, retval);
4152 
4153   if (safe
4154       && retval[0] == retval[1]
4155       /* Not prepared to handle possibly throwing calls here; they shouldn't
4156 	 appear in non-artificial testcases, except when the __*_chk routines
4157 	 are badly declared.  */
4158       && !stmt_ends_bb_p (info.callstmt))
4159     {
4160       tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
4161 				retval[0]);
4162 
4163       if (lhs == NULL_TREE && info.nowrite)
4164 	{
4165 	  /* Remove the call to the bounded function with a zero size
4166 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
4167 	  unlink_stmt_vdef (info.callstmt);
4168 	  gsi_remove (gsi, true);
4169 	  removed = true;
4170 	}
4171       else if (info.nowrite)
4172 	{
4173 	  /* Replace the call to the bounded function with a zero size
4174 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
4175 	     of the function.  */
4176 	  gimplify_and_update_call_from_tree (gsi, cst);
4177 	  gimple *callstmt = gsi_stmt (*gsi);
4178 	  update_stmt (callstmt);
4179 	}
4180       else if (lhs)
4181 	{
4182 	  /* Replace the left-hand side of the call with the constant
4183 	     result of the formatted function.  */
4184 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
4185 	  gimple *g = gimple_build_assign (lhs, cst);
4186 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
4187 	  update_stmt (info.callstmt);
4188 	}
4189 
4190       if (dump_file)
4191 	{
4192 	  if (removed)
4193 	    fprintf (dump_file, "  Removing call statement.");
4194 	  else
4195 	    {
4196 	      fprintf (dump_file, "  Substituting ");
4197 	      print_generic_expr (dump_file, cst, dump_flags);
4198 	      fprintf (dump_file, " for %s.\n",
4199 		       info.nowrite ? "statement" : "return value");
4200 	    }
4201 	}
4202     }
4203   else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
4204     {
4205       bool setrange = false;
4206 
4207       if (safe
4208 	  && (info.bounded || retval[1] < info.objsize)
4209 	  && (retval[0] < target_int_max ()
4210 	      && retval[1] < target_int_max ()))
4211 	{
4212 	  /* If the result is in a valid range bounded by the size of
4213 	     the destination set it so that it can be used for subsequent
4214 	     optimizations.  */
4215 	  int prec = TYPE_PRECISION (integer_type_node);
4216 
4217 	  wide_int min = wi::shwi (retval[0], prec);
4218 	  wide_int max = wi::shwi (retval[1], prec);
4219 	  set_range_info (lhs, VR_RANGE, min, max);
4220 
4221 	  setrange = true;
4222 	}
4223 
4224       if (dump_file)
4225 	{
4226 	  const char *inbounds
4227 	    = (retval[0] < info.objsize
4228 	       ? (retval[1] < info.objsize
4229 		  ? "in" : "potentially out-of")
4230 	       : "out-of");
4231 
4232 	  const char *what = setrange ? "Setting" : "Discarding";
4233 	  if (retval[0] != retval[1])
4234 	    fprintf (dump_file,
4235 		     "  %s %s-bounds return value range ["
4236 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
4237 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
4238 		     what, inbounds, retval[0], retval[1]);
4239 	  else
4240 	    fprintf (dump_file, "  %s %s-bounds return value "
4241 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
4242 		     what, inbounds, retval[0]);
4243 	}
4244     }
4245 
4246   if (dump_file)
4247     fputc ('\n', dump_file);
4248 
4249   return removed;
4250 }
4251 
4252 /* Try to simplify a s{,n}printf call described by INFO with result
4253    RES by replacing it with a simpler and presumably more efficient
4254    call (such as strcpy).  */
4255 
4256 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4257 try_simplify_call (gimple_stmt_iterator *gsi,
4258 		   const call_info &info,
4259 		   const format_result &res)
4260 {
4261   unsigned HOST_WIDE_INT dummy[2];
4262   if (!is_call_safe (info, res, info.retval_used (), dummy))
4263     return false;
4264 
4265   switch (info.fncode)
4266     {
4267     case BUILT_IN_SNPRINTF:
4268       return gimple_fold_builtin_snprintf (gsi);
4269 
4270     case BUILT_IN_SPRINTF:
4271       return gimple_fold_builtin_sprintf (gsi);
4272 
4273     default:
4274       ;
4275     }
4276 
4277   return false;
4278 }
4279 
4280 /* Return the zero-based index of the format string argument of a printf
4281    like function and set *IDX_ARGS to the first format argument.  When
4282    no such index exists return UINT_MAX.  */
4283 
4284 static unsigned
get_user_idx_format(tree fndecl,unsigned * idx_args)4285 get_user_idx_format (tree fndecl, unsigned *idx_args)
4286 {
4287   tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
4288   if (!attrs)
4289     attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
4290 
4291   if (!attrs)
4292     return UINT_MAX;
4293 
4294   attrs = TREE_VALUE (attrs);
4295 
4296   tree archetype = TREE_VALUE (attrs);
4297   if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
4298     return UINT_MAX;
4299 
4300   attrs = TREE_CHAIN (attrs);
4301   tree fmtarg = TREE_VALUE (attrs);
4302 
4303   attrs = TREE_CHAIN (attrs);
4304   tree elliparg = TREE_VALUE (attrs);
4305 
4306   /* Attribute argument indices are 1-based but we use zero-based.  */
4307   *idx_args = tree_to_uhwi (elliparg) - 1;
4308   return tree_to_uhwi (fmtarg) - 1;
4309 }
4310 
4311 }   /* Unnamed namespace.  */
4312 
4313 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
4314    functions and if so, handle it.  Return true if the call is removed and
4315    gsi_next should not be performed in the caller.  */
4316 
4317 bool
handle_printf_call(gimple_stmt_iterator * gsi,pointer_query & ptr_qry)4318 handle_printf_call (gimple_stmt_iterator *gsi, pointer_query &ptr_qry)
4319 {
4320   init_target_to_host_charmap ();
4321 
4322   call_info info = call_info ();
4323 
4324   info.callstmt = gsi_stmt (*gsi);
4325   info.func = gimple_call_fndecl (info.callstmt);
4326   if (!info.func)
4327     return false;
4328 
4329   /* Format string argument number (valid for all functions).  */
4330   unsigned idx_format = UINT_MAX;
4331   if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4332     info.fncode = DECL_FUNCTION_CODE (info.func);
4333   else
4334     {
4335       unsigned idx_args;
4336       idx_format = get_user_idx_format (info.func, &idx_args);
4337       if (idx_format == UINT_MAX
4338 	  || idx_format >= gimple_call_num_args (info.callstmt)
4339 	  || idx_args > gimple_call_num_args (info.callstmt)
4340 	  || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
4341 							  idx_format))))
4342 	return false;
4343       info.fncode = BUILT_IN_NONE;
4344       info.argidx = idx_args;
4345     }
4346 
4347   /* The size of the destination as in snprintf(dest, size, ...).  */
4348   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
4349 
4350   /* The size of the destination determined by __builtin_object_size.  */
4351   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
4352 
4353   /* Zero-based buffer size argument number (snprintf and vsnprintf).  */
4354   unsigned idx_dstsize = UINT_MAX;
4355 
4356   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
4357   unsigned idx_objsize = UINT_MAX;
4358 
4359   /* Destinaton argument number (valid for sprintf functions only).  */
4360   unsigned idx_dstptr = 0;
4361 
4362   switch (info.fncode)
4363     {
4364     case BUILT_IN_NONE:
4365       // User-defined function with attribute format (printf).
4366       idx_dstptr = -1;
4367       break;
4368 
4369     case BUILT_IN_FPRINTF:
4370       // Signature:
4371       //   __builtin_fprintf (FILE*, format, ...)
4372       idx_format = 1;
4373       info.argidx = 2;
4374       idx_dstptr = -1;
4375       break;
4376 
4377     case BUILT_IN_FPRINTF_CHK:
4378       // Signature:
4379       //   __builtin_fprintf_chk (FILE*, ost, format, ...)
4380       idx_format = 2;
4381       info.argidx = 3;
4382       idx_dstptr = -1;
4383       break;
4384 
4385     case BUILT_IN_FPRINTF_UNLOCKED:
4386       // Signature:
4387       //   __builtin_fprintf_unnlocked (FILE*, format, ...)
4388       idx_format = 1;
4389       info.argidx = 2;
4390       idx_dstptr = -1;
4391       break;
4392 
4393     case BUILT_IN_PRINTF:
4394       // Signature:
4395       //   __builtin_printf (format, ...)
4396       idx_format = 0;
4397       info.argidx = 1;
4398       idx_dstptr = -1;
4399       break;
4400 
4401     case BUILT_IN_PRINTF_CHK:
4402       // Signature:
4403       //   __builtin_printf_chk (ost, format, ...)
4404       idx_format = 1;
4405       info.argidx = 2;
4406       idx_dstptr = -1;
4407       break;
4408 
4409     case BUILT_IN_PRINTF_UNLOCKED:
4410       // Signature:
4411       //   __builtin_printf (format, ...)
4412       idx_format = 0;
4413       info.argidx = 1;
4414       idx_dstptr = -1;
4415       break;
4416 
4417     case BUILT_IN_SPRINTF:
4418       // Signature:
4419       //   __builtin_sprintf (dst, format, ...)
4420       idx_format = 1;
4421       info.argidx = 2;
4422       break;
4423 
4424     case BUILT_IN_SPRINTF_CHK:
4425       // Signature:
4426       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
4427       idx_objsize = 2;
4428       idx_format = 3;
4429       info.argidx = 4;
4430       break;
4431 
4432     case BUILT_IN_SNPRINTF:
4433       // Signature:
4434       //   __builtin_snprintf (dst, size, format, ...)
4435       idx_dstsize = 1;
4436       idx_format = 2;
4437       info.argidx = 3;
4438       info.bounded = true;
4439       break;
4440 
4441     case BUILT_IN_SNPRINTF_CHK:
4442       // Signature:
4443       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
4444       idx_dstsize = 1;
4445       idx_objsize = 3;
4446       idx_format = 4;
4447       info.argidx = 5;
4448       info.bounded = true;
4449       break;
4450 
4451     case BUILT_IN_VFPRINTF:
4452       // Signature:
4453       //   __builtin_vprintf (FILE*, format, va_list)
4454       idx_format = 1;
4455       info.argidx = -1;
4456       idx_dstptr = -1;
4457       break;
4458 
4459     case BUILT_IN_VFPRINTF_CHK:
4460       // Signature:
4461       //   __builtin___vfprintf_chk (FILE*, ost, format, va_list)
4462       idx_format = 2;
4463       info.argidx = -1;
4464       idx_dstptr = -1;
4465       break;
4466 
4467     case BUILT_IN_VPRINTF:
4468       // Signature:
4469       //   __builtin_vprintf (format, va_list)
4470       idx_format = 0;
4471       info.argidx = -1;
4472       idx_dstptr = -1;
4473       break;
4474 
4475     case BUILT_IN_VPRINTF_CHK:
4476       // Signature:
4477       //   __builtin___vprintf_chk (ost, format, va_list)
4478       idx_format = 1;
4479       info.argidx = -1;
4480       idx_dstptr = -1;
4481       break;
4482 
4483     case BUILT_IN_VSNPRINTF:
4484       // Signature:
4485       //   __builtin_vsprintf (dst, size, format, va)
4486       idx_dstsize = 1;
4487       idx_format = 2;
4488       info.argidx = -1;
4489       info.bounded = true;
4490       break;
4491 
4492     case BUILT_IN_VSNPRINTF_CHK:
4493       // Signature:
4494       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
4495       idx_dstsize = 1;
4496       idx_objsize = 3;
4497       idx_format = 4;
4498       info.argidx = -1;
4499       info.bounded = true;
4500       break;
4501 
4502     case BUILT_IN_VSPRINTF:
4503       // Signature:
4504       //   __builtin_vsprintf (dst, format, va)
4505       idx_format = 1;
4506       info.argidx = -1;
4507       break;
4508 
4509     case BUILT_IN_VSPRINTF_CHK:
4510       // Signature:
4511       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4512       idx_format = 3;
4513       idx_objsize = 2;
4514       info.argidx = -1;
4515       break;
4516 
4517     default:
4518       return false;
4519     }
4520 
4521   /* Set the global warning level for this function.  */
4522   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4523 
4524   /* For all string functions the first argument is a pointer to
4525      the destination.  */
4526   tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4527 		 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4528 
4529   info.format = gimple_call_arg (info.callstmt, idx_format);
4530 
4531   /* True when the destination size is constant as opposed to the lower
4532      or upper bound of a range.  */
4533   bool dstsize_cst_p = true;
4534   bool posunder4k = true;
4535 
4536   if (idx_dstsize == UINT_MAX)
4537     {
4538       /* For non-bounded functions like sprintf, determine the size
4539 	 of the destination from the object or pointer passed to it
4540 	 as the first argument.  */
4541       dstsize = get_destination_size (dstptr, info.callstmt, ptr_qry);
4542     }
4543   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4544     {
4545       /* For bounded functions try to get the size argument.  */
4546 
4547       if (TREE_CODE (size) == INTEGER_CST)
4548 	{
4549 	  dstsize = tree_to_uhwi (size);
4550 	  /* No object can be larger than SIZE_MAX bytes (half the address
4551 	     space) on the target.
4552 	     The functions are defined only for output of at most INT_MAX
4553 	     bytes.  Specifying a bound in excess of that limit effectively
4554 	     defeats the bounds checking (and on some implementations such
4555 	     as Solaris cause the function to fail with EINVAL).  */
4556 	  if (dstsize > target_size_max () / 2)
4557 	    {
4558 	      /* Avoid warning if -Wstringop-overflow is specified since
4559 		 it also warns for the same thing though only for the
4560 		 checking built-ins.  */
4561 	      if ((idx_objsize == UINT_MAX
4562 		   || !warn_stringop_overflow))
4563 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4564 			    "specified bound %wu exceeds maximum object size "
4565 			    "%wu",
4566 			    dstsize, target_size_max () / 2);
4567 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4568 		 than INT_MAX.  Even though not all POSIX implementations
4569 		 conform to the requirement, avoid folding in this case.  */
4570 	      posunder4k = false;
4571 	    }
4572 	  else if (dstsize > target_int_max ())
4573 	    {
4574 	      warning_at (gimple_location (info.callstmt), info.warnopt (),
4575 			  "specified bound %wu exceeds %<INT_MAX%>",
4576 			  dstsize);
4577 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4578 		 than INT_MAX.  Avoid folding in that case.  */
4579 	      posunder4k = false;
4580 	    }
4581 	}
4582       else if (TREE_CODE (size) == SSA_NAME)
4583 	{
4584 	  /* Try to determine the range of values of the argument
4585 	     and use the greater of the two at level 1 and the smaller
4586 	     of them at level 2.  */
4587 	  value_range vr;
4588 	  ptr_qry.rvals->range_of_expr (vr, size, info.callstmt);
4589 
4590 	  if (!vr.undefined_p ())
4591 	    {
4592 	      tree type = TREE_TYPE (size);
4593 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
4594 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
4595 	      unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin);
4596 	      unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax);
4597 	      dstsize = warn_level < 2 ? maxsize : minsize;
4598 
4599 	      if (minsize > target_int_max ())
4600 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4601 			    "specified bound range [%wu, %wu] exceeds "
4602 			    "%<INT_MAX%>",
4603 			    minsize, maxsize);
4604 
4605 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4606 		 than INT_MAX.  Avoid folding if that's possible.  */
4607 	      if (maxsize > target_int_max ())
4608 		posunder4k = false;
4609 	    }
4610 
4611 	  /* The destination size is not constant.  If the function is
4612 	     bounded (e.g., snprintf) a lower bound of zero doesn't
4613 	     necessarily imply it can be eliminated.  */
4614 	  dstsize_cst_p = false;
4615 	}
4616     }
4617 
4618   if (idx_objsize != UINT_MAX)
4619     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4620       if (tree_fits_uhwi_p (size))
4621 	objsize = tree_to_uhwi (size);
4622 
4623   if (info.bounded && !dstsize)
4624     {
4625       /* As a special case, when the explicitly specified destination
4626 	 size argument (to a bounded function like snprintf) is zero
4627 	 it is a request to determine the number of bytes on output
4628 	 without actually producing any.  Pretend the size is
4629 	 unlimited in this case.  */
4630       info.objsize = HOST_WIDE_INT_MAX;
4631       info.nowrite = dstsize_cst_p;
4632     }
4633   else
4634     {
4635       /* For calls to non-bounded functions or to those of bounded
4636 	 functions with a non-zero size, warn if the destination
4637 	 pointer is null.  */
4638       if (dstptr && integer_zerop (dstptr))
4639 	{
4640 	  /* This is diagnosed with -Wformat only when the null is a constant
4641 	     pointer.  The warning here diagnoses instances where the pointer
4642 	     is not constant.  */
4643 	  location_t loc = gimple_location (info.callstmt);
4644 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4645 		      info.warnopt (), "null destination pointer");
4646 	  return false;
4647 	}
4648 
4649       /* Set the object size to the smaller of the two arguments
4650 	 of both have been specified and they're not equal.  */
4651       info.objsize = dstsize < objsize ? dstsize : objsize;
4652 
4653       if (info.bounded
4654 	  && dstsize < target_size_max () / 2 && objsize < dstsize
4655 	  /* Avoid warning if -Wstringop-overflow is specified since
4656 	     it also warns for the same thing though only for the
4657 	     checking built-ins.  */
4658 	  && (idx_objsize == UINT_MAX
4659 	      || !warn_stringop_overflow))
4660 	{
4661 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4662 		      "specified bound %wu exceeds the size %wu "
4663 		      "of the destination object", dstsize, objsize);
4664 	}
4665     }
4666 
4667   /* Determine if the format argument may be null and warn if not
4668      and if the argument is null.  */
4669   if (integer_zerop (info.format)
4670       && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4671     {
4672       location_t loc = gimple_location (info.callstmt);
4673       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4674 		  info.warnopt (), "null format string");
4675       return false;
4676     }
4677 
4678   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4679   if (!info.fmtstr)
4680     return false;
4681 
4682   if (warn_restrict)
4683     {
4684       /* Compute the origin of the destination pointer and its offset
4685 	 from the base object/pointer if possible.  */
4686       info.dst_offset = 0;
4687       info.dst_origin = get_origin_and_offset (dstptr, &info.dst_field,
4688 					       &info.dst_offset);
4689     }
4690 
4691   /* The result is the number of bytes output by the formatted function,
4692      including the terminating NUL.  */
4693   format_result res;
4694 
4695   /* I/O functions with no destination argument (i.e., all forms of fprintf
4696      and printf) may fail under any conditions.  Others (i.e., all forms of
4697      sprintf) may only fail under specific conditions determined for each
4698      directive.  Clear POSUNDER4K for the former set of functions and set
4699      it to true for the latter (it can only be cleared later, but it is
4700      never set to true again).  */
4701   res.posunder4k = posunder4k && dstptr;
4702 
4703   bool success = compute_format_length (info, &res, ptr_qry.rvals);
4704   if (res.warned)
4705     suppress_warning (info.callstmt, info.warnopt ());
4706 
4707   /* When optimizing and the printf return value optimization is enabled,
4708      attempt to substitute the computed result for the return value of
4709      the call.  Avoid this optimization when -frounding-math is in effect
4710      and the format string contains a floating point directive.  */
4711   bool call_removed = false;
4712   if (success && optimize > 0)
4713     {
4714       /* Save a copy of the iterator pointing at the call.  The iterator
4715 	 may change to point past the call in try_substitute_return_value
4716 	 but the original value is needed in try_simplify_call.  */
4717       gimple_stmt_iterator gsi_call = *gsi;
4718 
4719       if (flag_printf_return_value
4720 	  && (!flag_rounding_math || !res.floating))
4721 	call_removed = try_substitute_return_value (gsi, info, res);
4722 
4723       if (!call_removed)
4724 	try_simplify_call (&gsi_call, info, res);
4725     }
4726 
4727   return call_removed;
4728 }
4729