1 /* Copyright (C) 2016-2021 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rather on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to format known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "tree-cfg.h"
64 #include "tree-ssa-propagate.h"
65 #include "calls.h"
66 #include "cfgloop.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-ssa-loop.h"
69 #include "intl.h"
70 #include "langhooks.h"
71 
72 #include "attribs.h"
73 #include "builtins.h"
74 #include "stor-layout.h"
75 
76 #include "realmpfr.h"
77 #include "target.h"
78 
79 #include "cpplib.h"
80 #include "input.h"
81 #include "toplev.h"
82 #include "substring-locations.h"
83 #include "diagnostic.h"
84 #include "domwalk.h"
85 #include "alloc-pool.h"
86 #include "vr-values.h"
87 #include "tree-ssa-strlen.h"
88 #include "tree-dfa.h"
89 
90 /* The likely worst case value of MB_LEN_MAX for the target, large enough
91    for UTF-8.  Ideally, this would be obtained by a target hook if it were
92    to be used for optimization but it's good enough as is for warnings.  */
93 #define target_mb_len_max()   6
94 
95 /* The maximum number of bytes a single non-string directive can result
96    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
97    LDBL_MAX_10_EXP of 4932.  */
98 #define IEEE_MAX_10_EXP    4932
99 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
100 
101 namespace {
102 
103 /* Set to the warning level for the current function which is equal
104    either to warn_format_trunc for bounded functions or to
105    warn_format_overflow otherwise.  */
106 
107 static int warn_level;
108 
109 /* The minimum, maximum, likely, and unlikely maximum number of bytes
110    of output either a formatting function or an individual directive
111    can result in.  */
112 
113 struct result_range
114 {
115   /* The absolute minimum number of bytes.  The result of a successful
116      conversion is guaranteed to be no less than this.  (An erroneous
117      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
118   unsigned HOST_WIDE_INT min;
119   /* The likely maximum result that is used in diagnostics.  In most
120      cases MAX is the same as the worst case UNLIKELY result.  */
121   unsigned HOST_WIDE_INT max;
122   /* The likely result used to trigger diagnostics.  For conversions
123      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
124      in that range.  */
125   unsigned HOST_WIDE_INT likely;
126   /* In rare cases (e.g., for multibyte characters) UNLIKELY gives
127      the worst cases maximum result of a directive.  In most cases
128      UNLIKELY == MAX.  UNLIKELY is used to control the return value
129      optimization but not in diagnostics.  */
130   unsigned HOST_WIDE_INT unlikely;
131 };
132 
133 /* Return the value of INT_MIN for the target.  */
134 
135 static inline HOST_WIDE_INT
target_int_min()136 target_int_min ()
137 {
138   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
139 }
140 
141 /* Return the value of INT_MAX for the target.  */
142 
143 static inline unsigned HOST_WIDE_INT
target_int_max()144 target_int_max ()
145 {
146   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
147 }
148 
149 /* Return the value of SIZE_MAX for the target.  */
150 
151 static inline unsigned HOST_WIDE_INT
target_size_max()152 target_size_max ()
153 {
154   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
155 }
156 
157 /* A straightforward mapping from the execution character set to the host
158    character set indexed by execution character.  */
159 
160 static char target_to_host_charmap[256];
161 
162 /* Initialize a mapping from the execution character set to the host
163    character set.  */
164 
165 static bool
init_target_to_host_charmap()166 init_target_to_host_charmap ()
167 {
168   /* If the percent sign is non-zero the mapping has already been
169      initialized.  */
170   if (target_to_host_charmap['%'])
171     return true;
172 
173   /* Initialize the target_percent character (done elsewhere).  */
174   if (!init_target_chars ())
175     return false;
176 
177   /* The subset of the source character set used by printf conversion
178      specifications (strictly speaking, not all letters are used but
179      they are included here for the sake of simplicity).  The dollar
180      sign must be included even though it's not in the basic source
181      character set.  */
182   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
183     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
184 
185   /* Set the mapping for all characters to some ordinary value (i,e.,
186      not none used in printf conversion specifications) and overwrite
187      those that are used by conversion specifications with their
188      corresponding values.  */
189   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
190 
191   /* Are the two sets of characters the same?  */
192   bool all_same_p = true;
193 
194   for (const char *pc = srcset; *pc; ++pc)
195     {
196       /* Slice off the high end bits in case target characters are
197 	 signed.  All values are expected to be non-nul, otherwise
198 	 there's a problem.  */
199       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
200 	{
201 	  target_to_host_charmap[tc] = *pc;
202 	  if (tc != *pc)
203 	    all_same_p = false;
204 	}
205       else
206 	return false;
207 
208     }
209 
210   /* Set the first element to a non-zero value if the mapping
211      is 1-to-1, otherwise leave it clear (NUL is assumed to be
212      the same in both character sets).  */
213   target_to_host_charmap[0] = all_same_p;
214 
215   return true;
216 }
217 
218 /* Return the host source character corresponding to the character
219    CH in the execution character set if one exists, or some innocuous
220    (non-special, non-nul) source character otherwise.  */
221 
222 static inline unsigned char
target_to_host(unsigned char ch)223 target_to_host (unsigned char ch)
224 {
225   return target_to_host_charmap[ch];
226 }
227 
228 /* Convert an initial substring of the string TARGSTR consisting of
229    characters in the execution character set into a string in the
230    source character set on the host and store up to HOSTSZ characters
231    in the buffer pointed to by HOSTR.  Return HOSTR.  */
232 
233 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)234 target_to_host (char *hostr, size_t hostsz, const char *targstr)
235 {
236   /* Make sure the buffer is reasonably big.  */
237   gcc_assert (hostsz > 4);
238 
239   /* The interesting subset of source and execution characters are
240      the same so no conversion is necessary.  However, truncate
241      overlong strings just like the translated strings are.  */
242   if (target_to_host_charmap['\0'] == 1)
243     {
244       size_t len = strlen (targstr);
245       if (len >= hostsz)
246 	{
247 	  memcpy (hostr, targstr, hostsz - 4);
248 	  strcpy (hostr + hostsz - 4, "...");
249 	}
250       else
251 	memcpy (hostr, targstr, len + 1);
252       return hostr;
253     }
254 
255   /* Convert the initial substring of TARGSTR to the corresponding
256      characters in the host set, appending "..." if TARGSTR is too
257      long to fit.  Using the static buffer assumes the function is
258      not called in between sequence points (which it isn't).  */
259   for (char *ph = hostr; ; ++targstr)
260     {
261       *ph++ = target_to_host (*targstr);
262       if (!*targstr)
263 	break;
264 
265       if (size_t (ph - hostr) == hostsz)
266 	{
267 	  strcpy (ph - 4, "...");
268 	  break;
269 	}
270     }
271 
272   return hostr;
273 }
274 
275 /* Convert the sequence of decimal digits in the execution character
276    starting at *PS to a HOST_WIDE_INT, analogously to strtol.  Return
277    the result and set *PS to one past the last converted character.
278    On range error set ERANGE to the digit that caused it.  */
279 
280 static inline HOST_WIDE_INT
target_strtowi(const char ** ps,const char ** erange)281 target_strtowi (const char **ps, const char **erange)
282 {
283   unsigned HOST_WIDE_INT val = 0;
284   for ( ; ; ++*ps)
285     {
286       unsigned char c = target_to_host (**ps);
287       if (ISDIGIT (c))
288 	{
289 	  c -= '0';
290 
291 	  /* Check for overflow.  */
292 	  if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
293 	    {
294 	      val = HOST_WIDE_INT_MAX;
295 	      *erange = *ps;
296 
297 	      /* Skip the remaining digits.  */
298 	      do
299 		c = target_to_host (*++*ps);
300 	      while (ISDIGIT (c));
301 	      break;
302 	    }
303 	  else
304 	    val = val * 10 + c;
305 	}
306       else
307 	break;
308     }
309 
310   return val;
311 }
312 
313 /* Given FORMAT, set *PLOC to the source location of the format string
314    and return the format string if it is known or null otherwise.  */
315 
316 static const char*
get_format_string(tree format,location_t * ploc)317 get_format_string (tree format, location_t *ploc)
318 {
319   *ploc = EXPR_LOC_OR_LOC (format, input_location);
320 
321   return c_getstr (format);
322 }
323 
324 /* For convenience and brevity, shorter named entrypoints of
325    format_string_diagnostic_t::emit_warning_va and
326    format_string_diagnostic_t::emit_warning_n_va.
327    These have to be functions with the attribute so that exgettext
328    works properly.  */
329 
330 static bool
331 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,const char * gmsgid,...)332 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
333 	 const char *corrected_substring, int opt, const char *gmsgid, ...)
334 {
335   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
336 				   corrected_substring);
337   va_list ap;
338   va_start (ap, gmsgid);
339   bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
340   va_end (ap);
341 
342   return warned;
343 }
344 
345 static bool
346 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)347 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
348 	   const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
349 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
350 {
351   format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
352 				   corrected_substring);
353   va_list ap;
354   va_start (ap, plural_gmsgid);
355   bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
356 					&ap);
357   va_end (ap);
358 
359   return warned;
360 }
361 
362 /* Format length modifiers.  */
363 
364 enum format_lengths
365 {
366   FMT_LEN_none,
367   FMT_LEN_hh,    // char argument
368   FMT_LEN_h,     // short
369   FMT_LEN_l,     // long
370   FMT_LEN_ll,    // long long
371   FMT_LEN_L,     // long double (and GNU long long)
372   FMT_LEN_z,     // size_t
373   FMT_LEN_t,     // ptrdiff_t
374   FMT_LEN_j      // intmax_t
375 };
376 
377 
378 /* Description of the result of conversion either of a single directive
379    or the whole format string.  */
380 
381 class fmtresult
382 {
383 public:
384   /* Construct a FMTRESULT object with all counters initialized
385      to MIN.  KNOWNRANGE is set when MIN is valid.  */
386   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
argmin()387   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
388     knownrange (min < HOST_WIDE_INT_MAX),
389     mayfail (), nullp ()
390   {
391     range.min = min;
392     range.max = min;
393     range.likely = min;
394     range.unlikely = min;
395   }
396 
397   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
398      KNOWNRANGE is set when both MIN and MAX are valid.   */
399   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
400 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
argmin()401   : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
402     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
403     mayfail (), nullp ()
404   {
405     range.min = min;
406     range.max = max;
407     range.likely = max < likely ? min : likely;
408     range.unlikely = max;
409   }
410 
411   /* Adjust result upward to reflect the RANGE of values the specified
412      width or precision is known to be in.  */
413   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
414 					    tree = NULL_TREE,
415 					    unsigned = 0, unsigned = 0);
416 
417   /* Return the maximum number of decimal digits a value of TYPE
418      formats as on output.  */
419   static unsigned type_max_digits (tree, int);
420 
421   /* The range a directive's argument is in.  */
422   tree argmin, argmax;
423 
424   /* The starting offset into the destination of the formatted function
425      call of the %s argument that points into (aliases with) the same
426      destination array.  */
427   HOST_WIDE_INT dst_offset;
428 
429   /* The minimum and maximum number of bytes that a directive
430      results in on output for an argument in the range above.  */
431   result_range range;
432 
433   /* Non-nul when the argument of a string directive is not a nul
434      terminated string.  */
435   tree nonstr;
436 
437   /* True when the range above is obtained from a known value of
438      a directive's argument or its bounds and not the result of
439      heuristics that depend on warning levels.  */
440   bool knownrange;
441 
442   /* True for a directive that may fail (such as wide character
443      directives).  */
444   bool mayfail;
445 
446   /* True when the argument is a null pointer.  */
447   bool nullp;
448 };
449 
450 /* Adjust result upward to reflect the range ADJUST of values the
451    specified width or precision is known to be in.  When non-null,
452    TYPE denotes the type of the directive whose result is being
453    adjusted, BASE gives the base of the directive (octal, decimal,
454    or hex), and ADJ denotes the additional adjustment to the LIKELY
455    counter that may need to be added when ADJUST is a range.  */
456 
457 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)458 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
459 					  tree type /* = NULL_TREE */,
460 					  unsigned base /* = 0 */,
461 					  unsigned adj /* = 0 */)
462 {
463   bool minadjusted = false;
464 
465   /* Adjust the minimum and likely counters.  */
466   if (adjust[0] >= 0)
467     {
468       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
469 	{
470 	  range.min = adjust[0];
471 	  minadjusted = true;
472 	}
473 
474       /* Adjust the likely counter.  */
475       if (range.likely < range.min)
476 	range.likely = range.min;
477     }
478   else if (adjust[0] == target_int_min ()
479 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
480     knownrange = false;
481 
482   /* Adjust the maximum counter.  */
483   if (adjust[1] > 0)
484     {
485       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
486 	{
487 	  range.max = adjust[1];
488 
489 	  /* Set KNOWNRANGE if both the minimum and maximum have been
490 	     adjusted.  Otherwise leave it at what it was before.  */
491 	  knownrange = minadjusted;
492 	}
493     }
494 
495   if (warn_level > 1 && type)
496     {
497       /* For large non-constant width or precision whose range spans
498 	 the maximum number of digits produced by the directive for
499 	 any argument, set the likely number of bytes to be at most
500 	 the number digits plus other adjustment determined by the
501 	 caller (one for sign or two for the hexadecimal "0x"
502 	 prefix).  */
503       unsigned dirdigs = type_max_digits (type, base);
504       if (adjust[0] < dirdigs && dirdigs < adjust[1]
505 	  && range.likely < dirdigs)
506 	range.likely = dirdigs + adj;
507     }
508   else if (range.likely < (range.min ? range.min : 1))
509     {
510       /* Conservatively, set LIKELY to at least MIN but no less than
511 	 1 unless MAX is zero.  */
512       range.likely = (range.min
513 		      ? range.min
514 		      : range.max && (range.max < HOST_WIDE_INT_MAX
515 				      || warn_level > 1) ? 1 : 0);
516     }
517 
518   /* Finally adjust the unlikely counter to be at least as large as
519      the maximum.  */
520   if (range.unlikely < range.max)
521     range.unlikely = range.max;
522 
523   return *this;
524 }
525 
526 /* Return the maximum number of digits a value of TYPE formats in
527    BASE on output, not counting base prefix .  */
528 
529 unsigned
type_max_digits(tree type,int base)530 fmtresult::type_max_digits (tree type, int base)
531 {
532   unsigned prec = TYPE_PRECISION (type);
533   switch (base)
534     {
535     case 8:
536       return (prec + 2) / 3;
537     case 10:
538       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
539 	 of 8, 16, 32, and 64 bits.  */
540       return prec * 301 / 1000 + 1;
541     case 16:
542       return prec / 4;
543     }
544 
545   gcc_unreachable ();
546 }
547 
548 static bool
549 get_int_range (tree, gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *,
550 	       bool, HOST_WIDE_INT, range_query *);
551 
552 struct call_info;
553 
554 /* Description of a format directive.  A directive is either a plain
555    string or a conversion specification that starts with '%'.  */
556 
557 struct directive
558 {
directivedirective559   directive (const call_info *inf, unsigned dno)
560     : info (inf), dirno (dno), argno (), beg (), len (), flags (),
561     width (), prec (),  modifier (), specifier (), arg (), fmtfunc ()
562   { }
563 
564   /* Reference to the info structure describing the call that this
565      directive is a part of.  */
566   const call_info *info;
567 
568   /* The 1-based directive number (for debugging).  */
569   unsigned dirno;
570 
571   /* The zero-based argument number of the directive's argument ARG in
572      the function's argument list.  */
573   unsigned argno;
574 
575   /* The first character of the directive and its length.  */
576   const char *beg;
577   size_t len;
578 
579   /* A bitmap of flags, one for each character.  */
580   unsigned flags[256 / sizeof (int)];
581 
582   /* The range of values of the specified width, or -1 if not specified.  */
583   HOST_WIDE_INT width[2];
584   /* The range of values of the specified precision, or -1 if not
585      specified.  */
586   HOST_WIDE_INT prec[2];
587 
588   /* Length modifier.  */
589   format_lengths modifier;
590 
591   /* Format specifier character.  */
592   char specifier;
593 
594   /* The argument of the directive or null when the directive doesn't
595      take one or when none is available (such as for vararg functions).  */
596   tree arg;
597 
598   /* Format conversion function that given a directive and an argument
599      returns the formatting result.  */
600   fmtresult (*fmtfunc) (const directive &, tree, range_query *);
601 
602   /* Return True when the format flag CHR has been used.  */
get_flagdirective603   bool get_flag (char chr) const
604   {
605     unsigned char c = chr & 0xff;
606     return (flags[c / (CHAR_BIT * sizeof *flags)]
607 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
608   }
609 
610   /* Make a record of the format flag CHR having been used.  */
set_flagdirective611   void set_flag (char chr)
612   {
613     unsigned char c = chr & 0xff;
614     flags[c / (CHAR_BIT * sizeof *flags)]
615       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
616   }
617 
618   /* Reset the format flag CHR.  */
clear_flagdirective619   void clear_flag (char chr)
620   {
621     unsigned char c = chr & 0xff;
622     flags[c / (CHAR_BIT * sizeof *flags)]
623       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
624   }
625 
626   /* Set both bounds of the width range to VAL.  */
set_widthdirective627   void set_width (HOST_WIDE_INT val)
628   {
629     width[0] = width[1] = val;
630   }
631 
632   /* Set the width range according to ARG, with both bounds being
633      no less than 0.  For a constant ARG set both bounds to its value
634      or 0, whichever is greater.  For a non-constant ARG in some range
635      set width to its range adjusting each bound to -1 if it's less.
636      For an indeterminate ARG set width to [0, INT_MAX].  */
637   void set_width (tree arg, range_query *);
638 
639   /* Set both bounds of the precision range to VAL.  */
set_precisiondirective640   void set_precision (HOST_WIDE_INT val)
641   {
642     prec[0] = prec[1] = val;
643   }
644 
645   /* Set the precision range according to ARG, with both bounds being
646      no less than -1.  For a constant ARG set both bounds to its value
647      or -1 whichever is greater.  For a non-constant ARG in some range
648      set precision to its range adjusting each bound to -1 if it's less.
649      For an indeterminate ARG set precision to [-1, INT_MAX].  */
650   void set_precision (tree arg, range_query *query);
651 
652   /* Return true if both width and precision are known to be
653      either constant or in some range, false otherwise.  */
known_width_and_precisiondirective654   bool known_width_and_precision () const
655   {
656     return ((width[1] < 0
657 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
658 	    && (prec[1] < 0
659 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
660   }
661 };
662 
663 /* The result of a call to a formatted function.  */
664 
665 struct format_result
666 {
format_resultformat_result667   format_result ()
668     : range (), aliases (), alias_count (), knownrange (), posunder4k (),
669     floating (), warned () { /* No-op.  */ }
670 
~format_resultformat_result671   ~format_result ()
672   {
673     XDELETEVEC (aliases);
674   }
675 
676   /* Range of characters written by the formatted function.
677      Setting the minimum to HOST_WIDE_INT_MAX disables all
678      length tracking for the remainder of the format string.  */
679   result_range range;
680 
681   struct alias_info
682   {
683     directive dir;          /* The directive that aliases the destination.  */
684     HOST_WIDE_INT offset;   /* The offset at which it aliases it.  */
685     result_range range;     /* The raw result of the directive.  */
686   };
687 
688   /* An array of directives whose pointer argument aliases a part
689      of the destination object of the formatted function.  */
690   alias_info *aliases;
691   unsigned alias_count;
692 
693   /* True when the range above is obtained from known values of
694      directive arguments, or bounds on the amount of output such
695      as width and precision, and not the result of  heuristics that
696      depend on warning levels.  It's used to issue stricter diagnostics
697      in cases where strings of unknown lengths are bounded by the arrays
698      they are determined to refer to.  KNOWNRANGE must not be used for
699      the return value optimization.  */
700   bool knownrange;
701 
702   /* True if no individual directive could fail or result in more than
703      4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
704      greater).  Implementations are not required to handle directives
705      that produce more than 4K bytes (leading to undefined behavior)
706      and so when one is found it disables the return value optimization.
707      Similarly, directives that can fail (such as wide character
708      directives) disable the optimization.  */
709   bool posunder4k;
710 
711   /* True when a floating point directive has been seen in the format
712      string.  */
713   bool floating;
714 
715   /* True when an intermediate result has caused a warning.  Used to
716      avoid issuing duplicate warnings while finishing the processing
717      of a call.  WARNED also disables the return value optimization.  */
718   bool warned;
719 
720   /* Preincrement the number of output characters by 1.  */
721   format_result& operator++ ()
722   {
723     return *this += 1;
724   }
725 
726   /* Postincrement the number of output characters by 1.  */
727   format_result operator++ (int)
728   {
729     format_result prev (*this);
730     *this += 1;
731     return prev;
732   }
733 
734   /* Increment the number of output characters by N.  */
735   format_result& operator+= (unsigned HOST_WIDE_INT);
736 
737   /* Add a directive to the sequence of those with potentially aliasing
738      arguments.  */
739   void append_alias (const directive &, HOST_WIDE_INT, const result_range &);
740 
741 private:
742   /* Not copyable or assignable.  */
743   format_result (format_result&);
744   void operator= (format_result&);
745 };
746 
747 format_result&
748 format_result::operator+= (unsigned HOST_WIDE_INT n)
749 {
750   gcc_assert (n < HOST_WIDE_INT_MAX);
751 
752   if (range.min < HOST_WIDE_INT_MAX)
753     range.min += n;
754 
755   if (range.max < HOST_WIDE_INT_MAX)
756     range.max += n;
757 
758   if (range.likely < HOST_WIDE_INT_MAX)
759     range.likely += n;
760 
761   if (range.unlikely < HOST_WIDE_INT_MAX)
762     range.unlikely += n;
763 
764   return *this;
765 }
766 
767 void
append_alias(const directive & d,HOST_WIDE_INT off,const result_range & resrng)768 format_result::append_alias (const directive &d, HOST_WIDE_INT off,
769 			     const result_range &resrng)
770 {
771   unsigned cnt = alias_count + 1;
772   alias_info *ar = XNEWVEC (alias_info, cnt);
773 
774   for (unsigned i = 0; i != alias_count; ++i)
775     ar[i] = aliases[i];
776 
777   ar[alias_count].dir = d;
778   ar[alias_count].offset = off;
779   ar[alias_count].range = resrng;
780 
781   XDELETEVEC (aliases);
782 
783   alias_count = cnt;
784   aliases = ar;
785 }
786 
787 /* Return the logarithm of X in BASE.  */
788 
789 static int
ilog(unsigned HOST_WIDE_INT x,int base)790 ilog (unsigned HOST_WIDE_INT x, int base)
791 {
792   int res = 0;
793   do
794     {
795       ++res;
796       x /= base;
797     } while (x);
798   return res;
799 }
800 
801 /* Return the number of bytes resulting from converting into a string
802    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
803    PLUS indicates whether 1 for a plus sign should be added for positive
804    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
805    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
806    be represented.  */
807 
808 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)809 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
810 {
811   unsigned HOST_WIDE_INT absval;
812 
813   HOST_WIDE_INT res;
814 
815   if (TYPE_UNSIGNED (TREE_TYPE (x)))
816     {
817       if (tree_fits_uhwi_p (x))
818 	{
819 	  absval = tree_to_uhwi (x);
820 	  res = plus;
821 	}
822       else
823 	return -1;
824     }
825   else
826     {
827       if (tree_fits_shwi_p (x))
828 	{
829 	  HOST_WIDE_INT i = tree_to_shwi (x);
830          if (HOST_WIDE_INT_MIN == i)
831            {
832              /* Avoid undefined behavior due to negating a minimum.  */
833              absval = HOST_WIDE_INT_MAX;
834              res = 1;
835            }
836          else if (i < 0)
837 	   {
838 	     absval = -i;
839 	     res = 1;
840 	   }
841 	 else
842 	   {
843 	     absval = i;
844 	     res = plus;
845 	   }
846 	}
847       else
848 	return -1;
849     }
850 
851   int ndigs = ilog (absval, base);
852 
853   res += prec < ndigs ? ndigs : prec;
854 
855   /* Adjust a non-zero value for the base prefix, either hexadecimal,
856      or, unless precision has resulted in a leading zero, also octal.  */
857   if (prefix && absval && (base == 16 || prec <= ndigs))
858     {
859       if (base == 8)
860 	res += 1;
861       else if (base == 16)
862 	res += 2;
863     }
864 
865   return res;
866 }
867 
868 /* Description of a call to a formatted function.  */
869 
870 struct call_info
871 {
872   /* Function call statement.  */
873   gimple *callstmt;
874 
875   /* Function called.  */
876   tree func;
877 
878   /* Called built-in function code.  */
879   built_in_function fncode;
880 
881   /* The "origin" of the destination pointer argument, which is either
882      the DECL of the destination buffer being written into or a pointer
883      that points to it, plus some offset.  */
884   tree dst_origin;
885 
886   /* For a destination pointing to a struct array member, the offset of
887      the member.  */
888   HOST_WIDE_INT dst_field;
889 
890   /* The offset into the destination buffer.  */
891   HOST_WIDE_INT dst_offset;
892 
893   /* Format argument and format string extracted from it.  */
894   tree format;
895   const char *fmtstr;
896 
897   /* The location of the format argument.  */
898   location_t fmtloc;
899 
900   /* The destination object size for __builtin___xxx_chk functions
901      typically determined by __builtin_object_size, or -1 if unknown.  */
902   unsigned HOST_WIDE_INT objsize;
903 
904   /* Number of the first variable argument.  */
905   unsigned HOST_WIDE_INT argidx;
906 
907   /* True for functions like snprintf that specify the size of
908      the destination, false for others like sprintf that don't.  */
909   bool bounded;
910 
911   /* True for bounded functions like snprintf that specify a zero-size
912      buffer as a request to compute the size of output without actually
913      writing any.  NOWRITE is cleared in response to the %n directive
914      which has side-effects similar to writing output.  */
915   bool nowrite;
916 
917   /* Return true if the called function's return value is used.  */
retval_usedcall_info918   bool retval_used () const
919   {
920     return gimple_get_lhs (callstmt);
921   }
922 
923   /* Return the warning option corresponding to the called function.  */
warnoptcall_info924   int warnopt () const
925   {
926     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
927   }
928 
929   /* Return true for calls to file formatted functions.  */
is_file_funccall_info930   bool is_file_func () const
931   {
932     return (fncode == BUILT_IN_FPRINTF
933 	    || fncode == BUILT_IN_FPRINTF_CHK
934 	    || fncode == BUILT_IN_FPRINTF_UNLOCKED
935 	    || fncode == BUILT_IN_VFPRINTF
936 	    || fncode == BUILT_IN_VFPRINTF_CHK);
937   }
938 
939   /* Return true for calls to string formatted functions.  */
is_string_funccall_info940   bool is_string_func () const
941   {
942     return (fncode == BUILT_IN_SPRINTF
943 	    || fncode == BUILT_IN_SPRINTF_CHK
944 	    || fncode == BUILT_IN_SNPRINTF
945 	    || fncode == BUILT_IN_SNPRINTF_CHK
946 	    || fncode == BUILT_IN_VSPRINTF
947 	    || fncode == BUILT_IN_VSPRINTF_CHK
948 	    || fncode == BUILT_IN_VSNPRINTF
949 	    || fncode == BUILT_IN_VSNPRINTF_CHK);
950   }
951 };
952 
953 void
set_width(tree arg,range_query * query)954 directive::set_width (tree arg, range_query *query)
955 {
956   get_int_range (arg, info->callstmt, width, width + 1, true, 0, query);
957 }
958 
959 void
set_precision(tree arg,range_query * query)960 directive::set_precision (tree arg, range_query *query)
961 {
962   get_int_range (arg, info->callstmt, prec, prec + 1, false, -1, query);
963 }
964 
965 /* Return the result of formatting a no-op directive (such as '%n').  */
966 
967 static fmtresult
format_none(const directive &,tree,range_query *)968 format_none (const directive &, tree, range_query *)
969 {
970   fmtresult res (0);
971   return res;
972 }
973 
974 /* Return the result of formatting the '%%' directive.  */
975 
976 static fmtresult
format_percent(const directive &,tree,range_query *)977 format_percent (const directive &, tree, range_query *)
978 {
979   fmtresult res (1);
980   return res;
981 }
982 
983 
984 /* Compute intmax_type_node and uintmax_type_node similarly to how
985    tree.c builds size_type_node.  */
986 
987 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)988 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
989 {
990   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
991     {
992       *pintmax = integer_type_node;
993       *puintmax = unsigned_type_node;
994     }
995   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
996     {
997       *pintmax = long_integer_type_node;
998       *puintmax = long_unsigned_type_node;
999     }
1000   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1001     {
1002       *pintmax = long_long_integer_type_node;
1003       *puintmax = long_long_unsigned_type_node;
1004     }
1005   else
1006     {
1007       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1008 	if (int_n_enabled_p[i])
1009 	  {
1010 	    char name[50], altname[50];
1011 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1012 	    sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
1013 
1014 	    if (strcmp (name, UINTMAX_TYPE) == 0
1015 		|| strcmp (altname, UINTMAX_TYPE) == 0)
1016 	      {
1017 	        *pintmax = int_n_trees[i].signed_type;
1018 	        *puintmax = int_n_trees[i].unsigned_type;
1019 		return;
1020 	      }
1021 	  }
1022       gcc_unreachable ();
1023     }
1024 }
1025 
1026 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1027    in and that is representable in type int.
1028    Return true when the range is a subrange of that of int.
1029    When ARG is null it is as if it had the full range of int.
1030    When ABSOLUTE is true the range reflects the absolute value of
1031    the argument.  When ABSOLUTE is false, negative bounds of
1032    the determined range are replaced with NEGBOUND.  */
1033 
1034 static bool
get_int_range(tree arg,gimple * stmt,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,range_query * query)1035 get_int_range (tree arg, gimple *stmt,
1036 	       HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1037 	       bool absolute, HOST_WIDE_INT negbound,
1038 	       range_query *query)
1039 {
1040   /* The type of the result.  */
1041   const_tree type = integer_type_node;
1042 
1043   bool knownrange = false;
1044 
1045   if (!arg)
1046     {
1047       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1048       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1049     }
1050   else if (TREE_CODE (arg) == INTEGER_CST
1051 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1052     {
1053       /* For a constant argument return its value adjusted as specified
1054 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1055 	 result is known.  */
1056       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1057       *pmax = *pmin;
1058       knownrange = true;
1059     }
1060   else
1061     {
1062       /* True if the argument's range cannot be determined.  */
1063       bool unknown = true;
1064 
1065       tree argtype = TREE_TYPE (arg);
1066 
1067       /* Ignore invalid arguments with greater precision that that
1068 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1069 	 They will have been detected and diagnosed by -Wformat and
1070 	 so it's not important to complicate this code to try to deal
1071 	 with them again.  */
1072       if (TREE_CODE (arg) == SSA_NAME
1073 	  && INTEGRAL_TYPE_P (argtype)
1074 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1075 	{
1076 	  /* Try to determine the range of values of the integer argument.  */
1077 	  value_range vr;
1078 	  query->range_of_expr (vr, arg, stmt);
1079 
1080 	  if (!vr.undefined_p () && !vr.varying_p ())
1081 	    {
1082 	      HOST_WIDE_INT type_min
1083 		= (TYPE_UNSIGNED (argtype)
1084 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1085 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1086 
1087 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1088 
1089 	      tree type = TREE_TYPE (arg);
1090 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
1091 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
1092 	      *pmin = TREE_INT_CST_LOW (tmin);
1093 	      *pmax = TREE_INT_CST_LOW (tmax);
1094 
1095 	      if (*pmin < *pmax)
1096 		{
1097 		  /* Return true if the adjusted range is a subrange of
1098 		     the full range of the argument's type.  *PMAX may
1099 		     be less than *PMIN when the argument is unsigned
1100 		     and its upper bound is in excess of TYPE_MAX.  In
1101 		     that (invalid) case disregard the range and use that
1102 		     of the expected type instead.  */
1103 		  knownrange = type_min < *pmin || *pmax < type_max;
1104 
1105 		  unknown = false;
1106 		}
1107 	    }
1108 	}
1109 
1110       /* Handle an argument with an unknown range as if none had been
1111 	 provided.  */
1112       if (unknown)
1113 	return get_int_range (NULL_TREE, NULL, pmin, pmax, absolute,
1114 			      negbound, query);
1115     }
1116 
1117   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1118   if (absolute)
1119     {
1120       if (*pmin < 0)
1121 	{
1122 	  if (*pmin == *pmax)
1123 	    *pmin = *pmax = -*pmin;
1124 	  else
1125 	    {
1126 	      /* Make sure signed overlow is avoided.  */
1127 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1128 
1129 	      HOST_WIDE_INT tmp = -*pmin;
1130 	      *pmin = 0;
1131 	      if (*pmax < tmp)
1132 		*pmax = tmp;
1133 	    }
1134 	}
1135     }
1136   else if (*pmin < negbound)
1137     *pmin = negbound;
1138 
1139   return knownrange;
1140 }
1141 
1142 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1143    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1144    the type of the directive's formal argument it's possible for both
1145    to result in the same number of bytes or a range of bytes that's
1146    less than the number of bytes that would result from formatting
1147    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1148    determined by checking for the actual argument being in the range
1149    of the type of the directive.  If it isn't it must be assumed to
1150    take on the full range of the directive's type.
1151    Return true when the range has been adjusted to the full range
1152    of DIRTYPE, and false otherwise.  */
1153 
1154 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1155 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1156 {
1157   tree argtype = TREE_TYPE (*argmin);
1158   unsigned argprec = TYPE_PRECISION (argtype);
1159   unsigned dirprec = TYPE_PRECISION (dirtype);
1160 
1161   /* If the actual argument and the directive's argument have the same
1162      precision and sign there can be no overflow and so there is nothing
1163      to adjust.  */
1164   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1165     return false;
1166 
1167   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1168      branch in the extract_range_from_unary_expr function in tree-vrp.c.  */
1169 
1170   if (TREE_CODE (*argmin) == INTEGER_CST
1171       && TREE_CODE (*argmax) == INTEGER_CST
1172       && (dirprec >= argprec
1173 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1174 					     int_const_binop (MINUS_EXPR,
1175 							      *argmax,
1176 							      *argmin),
1177 					     size_int (dirprec)))))
1178     {
1179       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1180       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1181 
1182       /* If *ARGMIN is still less than *ARGMAX the conversion above
1183 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1184       if (tree_int_cst_le (*argmin, *argmax))
1185 	return false;
1186     }
1187 
1188   *argmin = TYPE_MIN_VALUE (dirtype);
1189   *argmax = TYPE_MAX_VALUE (dirtype);
1190   return true;
1191 }
1192 
1193 /* Return a range representing the minimum and maximum number of bytes
1194    that the format directive DIR will output for any argument given
1195    the WIDTH and PRECISION (extracted from DIR).  This function is
1196    used when the directive argument or its value isn't known.  */
1197 
1198 static fmtresult
format_integer(const directive & dir,tree arg,range_query * query)1199 format_integer (const directive &dir, tree arg, range_query *query)
1200 {
1201   tree intmax_type_node;
1202   tree uintmax_type_node;
1203 
1204   /* Base to format the number in.  */
1205   int base;
1206 
1207   /* True when a conversion is preceded by a prefix indicating the base
1208      of the argument (octal or hexadecimal).  */
1209   bool maybebase = dir.get_flag ('#');
1210 
1211   /* True when a signed conversion is preceded by a sign or space.  */
1212   bool maybesign = false;
1213 
1214   /* True for signed conversions (i.e., 'd' and 'i').  */
1215   bool sign = false;
1216 
1217   switch (dir.specifier)
1218     {
1219     case 'd':
1220     case 'i':
1221       /* Space and '+' are  only meaningful for signed conversions.  */
1222       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1223       sign = true;
1224       base = 10;
1225       break;
1226     case 'u':
1227       base = 10;
1228       break;
1229     case 'o':
1230       base = 8;
1231       break;
1232     case 'X':
1233     case 'x':
1234       base = 16;
1235       break;
1236     default:
1237       gcc_unreachable ();
1238     }
1239 
1240   /* The type of the "formal" argument expected by the directive.  */
1241   tree dirtype = NULL_TREE;
1242 
1243   /* Determine the expected type of the argument from the length
1244      modifier.  */
1245   switch (dir.modifier)
1246     {
1247     case FMT_LEN_none:
1248       if (dir.specifier == 'p')
1249 	dirtype = ptr_type_node;
1250       else
1251 	dirtype = sign ? integer_type_node : unsigned_type_node;
1252       break;
1253 
1254     case FMT_LEN_h:
1255       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1256       break;
1257 
1258     case FMT_LEN_hh:
1259       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1260       break;
1261 
1262     case FMT_LEN_l:
1263       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1264       break;
1265 
1266     case FMT_LEN_L:
1267     case FMT_LEN_ll:
1268       dirtype = (sign
1269 		 ? long_long_integer_type_node
1270 		 : long_long_unsigned_type_node);
1271       break;
1272 
1273     case FMT_LEN_z:
1274       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1275       break;
1276 
1277     case FMT_LEN_t:
1278       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1279       break;
1280 
1281     case FMT_LEN_j:
1282       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1283       dirtype = sign ? intmax_type_node : uintmax_type_node;
1284       break;
1285 
1286     default:
1287       return fmtresult ();
1288     }
1289 
1290   /* The type of the argument to the directive, either deduced from
1291      the actual non-constant argument if one is known, or from
1292      the directive itself when none has been provided because it's
1293      a va_list.  */
1294   tree argtype = NULL_TREE;
1295 
1296   if (!arg)
1297     {
1298       /* When the argument has not been provided, use the type of
1299 	 the directive's argument as an approximation.  This will
1300 	 result in false positives for directives like %i with
1301 	 arguments with smaller precision (such as short or char).  */
1302       argtype = dirtype;
1303     }
1304   else if (TREE_CODE (arg) == INTEGER_CST)
1305     {
1306       /* When a constant argument has been provided use its value
1307 	 rather than type to determine the length of the output.  */
1308       fmtresult res;
1309 
1310       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1311 	{
1312 	  /* As a special case, a precision of zero with a zero argument
1313 	     results in zero bytes except in base 8 when the '#' flag is
1314 	     specified, and for signed conversions in base 8 and 10 when
1315 	     either the space or '+' flag has been specified and it results
1316 	     in just one byte (with width having the normal effect).  This
1317 	     must extend to the case of a specified precision with
1318 	     an unknown value because it can be zero.  */
1319 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1320 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1321 	    {
1322 	      res.range.max = 1;
1323 	      res.range.likely = 1;
1324 	    }
1325 	  else
1326 	    {
1327 	      res.range.max = res.range.min;
1328 	      res.range.likely = res.range.min;
1329 	    }
1330 	}
1331       else
1332 	{
1333 	  /* Convert the argument to the type of the directive.  */
1334 	  arg = fold_convert (dirtype, arg);
1335 
1336 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1337 				       maybesign, maybebase);
1338 	  if (dir.prec[0] == dir.prec[1])
1339 	    res.range.max = res.range.min;
1340 	  else
1341 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1342 					 maybesign, maybebase);
1343 	  res.range.likely = res.range.min;
1344 	  res.knownrange = true;
1345 	}
1346 
1347       res.range.unlikely = res.range.max;
1348 
1349       /* Bump up the counters if WIDTH is greater than LEN.  */
1350       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1351 					 (sign | maybebase) + (base == 16));
1352       /* Bump up the counters again if PRECision is greater still.  */
1353       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1354 					 (sign | maybebase) + (base == 16));
1355 
1356       return res;
1357     }
1358   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1359 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1360     /* Determine the type of the provided non-constant argument.  */
1361     argtype = TREE_TYPE (arg);
1362   else
1363     /* Don't bother with invalid arguments since they likely would
1364        have already been diagnosed, and disable any further checking
1365        of the format string by returning [-1, -1].  */
1366     return fmtresult ();
1367 
1368   fmtresult res;
1369 
1370   /* Using either the range the non-constant argument is in, or its
1371      type (either "formal" or actual), create a range of values that
1372      constrain the length of output given the warning level.  */
1373   tree argmin = NULL_TREE;
1374   tree argmax = NULL_TREE;
1375 
1376   if (arg
1377       && TREE_CODE (arg) == SSA_NAME
1378       && INTEGRAL_TYPE_P (argtype))
1379     {
1380       /* Try to determine the range of values of the integer argument
1381 	 (range information is not available for pointers).  */
1382       value_range vr;
1383       query->range_of_expr (vr, arg, dir.info->callstmt);
1384 
1385       if (!vr.varying_p () && !vr.undefined_p ())
1386 	{
1387 	  argmin = wide_int_to_tree (TREE_TYPE (arg), vr.lower_bound ());
1388 	  argmax = wide_int_to_tree (TREE_TYPE (arg), vr.upper_bound ());
1389 
1390 	  /* Set KNOWNRANGE if the argument is in a known subrange
1391 	     of the directive's type and neither width nor precision
1392 	     is unknown.  (KNOWNRANGE may be reset below).  */
1393 	  res.knownrange
1394 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1395 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1396 	       && dir.known_width_and_precision ());
1397 
1398 	  res.argmin = argmin;
1399 	  res.argmax = argmax;
1400 	}
1401       else
1402 	{
1403 	  /* The argument here may be the result of promoting the actual
1404 	     argument to int.  Try to determine the type of the actual
1405 	     argument before promotion and narrow down its range that
1406 	     way.  */
1407 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1408 	  if (is_gimple_assign (def))
1409 	    {
1410 	      tree_code code = gimple_assign_rhs_code (def);
1411 	      if (code == INTEGER_CST)
1412 		{
1413 		  arg = gimple_assign_rhs1 (def);
1414 		  return format_integer (dir, arg, query);
1415 		}
1416 
1417 	      if (code == NOP_EXPR)
1418 		{
1419 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1420 		  if (INTEGRAL_TYPE_P (type)
1421 		      || TREE_CODE (type) == POINTER_TYPE)
1422 		    argtype = type;
1423 		}
1424 	    }
1425 	}
1426     }
1427 
1428   if (!argmin)
1429     {
1430       if (TREE_CODE (argtype) == POINTER_TYPE)
1431 	{
1432 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1433 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1434 	}
1435       else
1436 	{
1437 	  argmin = TYPE_MIN_VALUE (argtype);
1438 	  argmax = TYPE_MAX_VALUE (argtype);
1439 	}
1440     }
1441 
1442   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1443      of the directive.  If it has been cleared then since ARGMIN and/or
1444      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1445      ARGMAX in the result to include in diagnostics.  */
1446   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1447     {
1448       res.knownrange = false;
1449       res.argmin = argmin;
1450       res.argmax = argmax;
1451     }
1452 
1453   /* Recursively compute the minimum and maximum from the known range.  */
1454   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1455     {
1456       /* For unsigned conversions/directives or signed when
1457 	 the minimum is positive, use the minimum and maximum to compute
1458 	 the shortest and longest output, respectively.  */
1459       res.range.min = format_integer (dir, argmin, query).range.min;
1460       res.range.max = format_integer (dir, argmax, query).range.max;
1461     }
1462   else if (tree_int_cst_sgn (argmax) < 0)
1463     {
1464       /* For signed conversions/directives if maximum is negative,
1465 	 use the minimum as the longest output and maximum as the
1466 	 shortest output.  */
1467       res.range.min = format_integer (dir, argmax, query).range.min;
1468       res.range.max = format_integer (dir, argmin, query).range.max;
1469     }
1470   else
1471     {
1472       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1473 	 as the shortest output and for the longest output compute the
1474 	 length of the output of both minimum and maximum and pick the
1475 	 longer.  */
1476       unsigned HOST_WIDE_INT max1
1477 	= format_integer (dir, argmin, query).range.max;
1478       unsigned HOST_WIDE_INT max2
1479 	= format_integer (dir, argmax, query).range.max;
1480       res.range.min
1481 	= format_integer (dir, integer_zero_node, query).range.min;
1482       res.range.max = MAX (max1, max2);
1483     }
1484 
1485   /* If the range is known, use the maximum as the likely length.  */
1486   if (res.knownrange)
1487     res.range.likely = res.range.max;
1488   else
1489     {
1490       /* Otherwise, use the minimum.  Except for the case where for %#x or
1491          %#o the minimum is just for a single value in the range (0) and
1492          for all other values it is something longer, like 0x1 or 01.
1493 	  Use the length for value 1 in that case instead as the likely
1494 	  length.  */
1495       res.range.likely = res.range.min;
1496       if (maybebase
1497 	  && base != 10
1498 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1499 	{
1500 	  if (res.range.min == 1)
1501 	    res.range.likely += base == 8 ? 1 : 2;
1502 	  else if (res.range.min == 2
1503 		   && base == 16
1504 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1505 	    ++res.range.likely;
1506 	}
1507     }
1508 
1509   res.range.unlikely = res.range.max;
1510   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1511 				     (sign | maybebase) + (base == 16));
1512   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1513 				     (sign | maybebase) + (base == 16));
1514 
1515   return res;
1516 }
1517 
1518 /* Return the number of bytes that a format directive consisting of FLAGS,
1519    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1520    would result for argument X under ideal conditions (i.e., if PREC
1521    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1522    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1523    This function works around those problems.  */
1524 
1525 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1526 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1527 			char spec, char rndspec)
1528 {
1529   char fmtstr[40];
1530 
1531   HOST_WIDE_INT len = strlen (flags);
1532 
1533   fmtstr[0] = '%';
1534   memcpy (fmtstr + 1, flags, len);
1535   memcpy (fmtstr + 1 + len, ".*R", 3);
1536   fmtstr[len + 4] = rndspec;
1537   fmtstr[len + 5] = spec;
1538   fmtstr[len + 6] = '\0';
1539 
1540   spec = TOUPPER (spec);
1541   if (spec == 'E' || spec == 'F')
1542     {
1543       /* For %e, specify the precision explicitly since mpfr_sprintf
1544 	 does its own thing just to be different (see MPFR bug 21088).  */
1545       if (prec < 0)
1546 	prec = 6;
1547     }
1548   else
1549     {
1550       /* Avoid passing negative precisions with larger magnitude to MPFR
1551 	 to avoid exposing its bugs.  (A negative precision is supposed
1552 	 to be ignored.)  */
1553       if (prec < 0)
1554 	prec = -1;
1555     }
1556 
1557   HOST_WIDE_INT p = prec;
1558 
1559   if (spec == 'G' && !strchr (flags, '#'))
1560     {
1561       /* For G/g without the pound flag, precision gives the maximum number
1562 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1563 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1564 	 should be more than sufficient for any real format.  */
1565       if ((IEEE_MAX_10_EXP * 2) < prec)
1566 	prec = IEEE_MAX_10_EXP * 2;
1567       p = prec;
1568     }
1569   else
1570     {
1571       /* Cap precision arbitrarily at 1KB and add the difference
1572 	 (if any) to the MPFR result.  */
1573       if (prec > 1024)
1574 	p = 1024;
1575     }
1576 
1577   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1578 
1579   /* Handle the unlikely (impossible?) error by returning more than
1580      the maximum dictated by the function's return type.  */
1581   if (len < 0)
1582     return target_dir_max () + 1;
1583 
1584   /* Adjust the return value by the difference.  */
1585   if (p < prec)
1586     len += prec - p;
1587 
1588   return len;
1589 }
1590 
1591 /* Return the number of bytes to format using the format specifier
1592    SPEC and the precision PREC the largest value in the real floating
1593    TYPE.  */
1594 
1595 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1596 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1597 {
1598   machine_mode mode = TYPE_MODE (type);
1599 
1600   /* IBM Extended mode.  */
1601   if (MODE_COMPOSITE_P (mode))
1602     mode = DFmode;
1603 
1604   /* Get the real type format description for the target.  */
1605   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1606   REAL_VALUE_TYPE rv;
1607 
1608   real_maxval (&rv, 0, mode);
1609 
1610   /* Convert the GCC real value representation with the precision
1611      of the real type to the mpfr_t format with the GCC default
1612      round-to-nearest mode.  */
1613   mpfr_t x;
1614   mpfr_init2 (x, rfmt->p);
1615   mpfr_from_real (x, &rv, MPFR_RNDN);
1616 
1617   /* Return a value one greater to account for the leading minus sign.  */
1618   unsigned HOST_WIDE_INT r
1619     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1620   mpfr_clear (x);
1621   return r;
1622 }
1623 
1624 /* Return a range representing the minimum and maximum number of bytes
1625    that the directive DIR will output for any argument.  PREC gives
1626    the adjusted precision range to account for negative precisions
1627    meaning the default 6.  This function is used when the directive
1628    argument or its value isn't known.  */
1629 
1630 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1631 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1632 {
1633   tree type;
1634 
1635   switch (dir.modifier)
1636     {
1637     case FMT_LEN_l:
1638     case FMT_LEN_none:
1639       type = double_type_node;
1640       break;
1641 
1642     case FMT_LEN_L:
1643       type = long_double_type_node;
1644       break;
1645 
1646     case FMT_LEN_ll:
1647       type = long_double_type_node;
1648       break;
1649 
1650     default:
1651       return fmtresult ();
1652     }
1653 
1654   /* The minimum and maximum number of bytes produced by the directive.  */
1655   fmtresult res;
1656 
1657   /* The minimum output as determined by flags.  It's always at least 1.
1658      When plus or space are set the output is preceded by either a sign
1659      or a space.  */
1660   unsigned flagmin = (1 /* for the first digit */
1661 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1662 
1663   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1664      for the plus sign/space with the '+' and ' ' flags, respectively,
1665      unless reduced below.  */
1666   res.range.min = 2 + flagmin;
1667 
1668   /* When the pound flag is set the decimal point is included in output
1669      regardless of precision.  Whether or not a decimal point is included
1670      otherwise depends on the specification and precision.  */
1671   bool radix = dir.get_flag ('#');
1672 
1673   switch (dir.specifier)
1674     {
1675     case 'A':
1676     case 'a':
1677       {
1678 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1679 	if (dir.prec[0] <= 0)
1680 	  minprec = 0;
1681 	else if (dir.prec[0] > 0)
1682 	  minprec = dir.prec[0] + !radix /* decimal point */;
1683 
1684 	res.range.likely = (2 /* 0x */
1685 			    + flagmin
1686 			    + radix
1687 			    + minprec
1688 			    + 3 /* p+0 */);
1689 
1690 	res.range.max = format_floating_max (type, 'a', prec[1]);
1691 
1692 	/* The unlikely maximum accounts for the longest multibyte
1693 	   decimal point character.  */
1694 	res.range.unlikely = res.range.max;
1695 	if (dir.prec[1] > 0)
1696 	  res.range.unlikely += target_mb_len_max () - 1;
1697 
1698 	break;
1699       }
1700 
1701     case 'E':
1702     case 'e':
1703       {
1704 	/* Minimum output attributable to precision and, when it's
1705 	   non-zero, decimal point.  */
1706 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1707 
1708 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1709 	   of the value of the actual argument.  */
1710 	res.range.likely = (flagmin
1711 			    + radix
1712 			    + minprec
1713 			    + 2 /* e+ */ + 2);
1714 
1715 	res.range.max = format_floating_max (type, 'e', prec[1]);
1716 
1717 	/* The unlikely maximum accounts for the longest multibyte
1718 	   decimal point character.  */
1719 	if (dir.prec[0] != dir.prec[1]
1720 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1721 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1722 	else
1723 	  res.range.unlikely = res.range.max;
1724 	break;
1725       }
1726 
1727     case 'F':
1728     case 'f':
1729       {
1730 	/* Minimum output attributable to precision and, when it's non-zero,
1731 	   decimal point.  */
1732 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1733 
1734 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1735 	   when precision isn't specified is 8 bytes ("1.23456" since
1736 	   precision is taken to be 6).  When precision is zero, the lower
1737 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1738 	   than zero, then the lower bound is 2 plus precision (plus flags).
1739 	   But in all cases, the lower bound is no greater than 3.  */
1740 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1741 	if (min < res.range.min)
1742 	  res.range.min = min;
1743 
1744 	/* Compute the upper bound for -TYPE_MAX.  */
1745 	res.range.max = format_floating_max (type, 'f', prec[1]);
1746 
1747 	/* The minimum output with unknown precision is a single byte
1748 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1749 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1750 	  res.range.likely = 3;
1751 	else
1752 	  res.range.likely = min;
1753 
1754 	/* The unlikely maximum accounts for the longest multibyte
1755 	   decimal point character.  */
1756 	if (dir.prec[0] != dir.prec[1]
1757 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1758 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1759 	break;
1760       }
1761 
1762     case 'G':
1763     case 'g':
1764       {
1765 	/* The %g output depends on precision and the exponent of
1766 	   the argument.  Since the value of the argument isn't known
1767 	   the lower bound on the range of bytes (not counting flags
1768 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1769 	   "%g" and "%#g", respectively, with a zero argument).  */
1770 	unsigned HOST_WIDE_INT min = flagmin + radix;
1771 	if (min < res.range.min)
1772 	  res.range.min = min;
1773 
1774 	char spec = 'g';
1775 	HOST_WIDE_INT maxprec = dir.prec[1];
1776 	if (radix && maxprec)
1777 	  {
1778 	    /* When the pound flag (radix) is set, trailing zeros aren't
1779 	       trimmed and so the longest output is the same as for %e,
1780 	       except with precision minus 1 (as specified in C11).  */
1781 	    spec = 'e';
1782 	    if (maxprec > 0)
1783 	      --maxprec;
1784 	    else if (maxprec < 0)
1785 	      maxprec = 5;
1786 	  }
1787 	else
1788 	  maxprec = prec[1];
1789 
1790 	res.range.max = format_floating_max (type, spec, maxprec);
1791 
1792 	/* The likely output is either the maximum computed above
1793 	   minus 1 (assuming the maximum is positive) when precision
1794 	   is known (or unspecified), or the same minimum as for %e
1795 	   (which is computed for a non-negative argument).  Unlike
1796 	   for the other specifiers above the likely output isn't
1797 	   the minimum because for %g that's 1 which is unlikely.  */
1798 	if (dir.prec[1] < 0
1799 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1800 	  res.range.likely = res.range.max - 1;
1801 	else
1802 	  {
1803 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1804 	    res.range.likely = (flagmin
1805 				+ radix
1806 				+ minprec
1807 				+ 2 /* e+ */ + 2);
1808 	  }
1809 
1810 	/* The unlikely maximum accounts for the longest multibyte
1811 	   decimal point character.  */
1812 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1813 	break;
1814       }
1815 
1816     default:
1817       return fmtresult ();
1818     }
1819 
1820   /* Bump up the byte counters if WIDTH is greater.  */
1821   res.adjust_for_width_or_precision (dir.width);
1822   return res;
1823 }
1824 
1825 /* Return a range representing the minimum and maximum number of bytes
1826    that the directive DIR will write on output for the floating argument
1827    ARG.  */
1828 
1829 static fmtresult
format_floating(const directive & dir,tree arg,range_query *)1830 format_floating (const directive &dir, tree arg, range_query *)
1831 {
1832   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1833   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1834 	       ? long_double_type_node : double_type_node);
1835 
1836   /* For an indeterminate precision the lower bound must be assumed
1837      to be zero.  */
1838   if (TOUPPER (dir.specifier) == 'A')
1839     {
1840       /* Get the number of fractional decimal digits needed to represent
1841 	 the argument without a loss of accuracy.  */
1842       unsigned fmtprec
1843 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1844 
1845       /* The precision of the IEEE 754 double format is 53.
1846 	 The precision of all other GCC binary double formats
1847 	 is 56 or less.  */
1848       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1849 
1850       /* For %a, leave the minimum precision unspecified to let
1851 	 MFPR trim trailing zeros (as it and many other systems
1852 	 including Glibc happen to do) and set the maximum
1853 	 precision to reflect what it would be with trailing zeros
1854 	 present (as Solaris and derived systems do).  */
1855       if (dir.prec[1] < 0)
1856 	{
1857 	  /* Both bounds are negative implies that precision has
1858 	     not been specified.  */
1859 	  prec[0] = maxprec;
1860 	  prec[1] = -1;
1861 	}
1862       else if (dir.prec[0] < 0)
1863 	{
1864 	  /* With a negative lower bound and a non-negative upper
1865 	     bound set the minimum precision to zero and the maximum
1866 	     to the greater of the maximum precision (i.e., with
1867 	     trailing zeros present) and the specified upper bound.  */
1868 	  prec[0] = 0;
1869 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1870 	}
1871     }
1872   else if (dir.prec[0] < 0)
1873     {
1874       if (dir.prec[1] < 0)
1875 	{
1876 	  /* A precision in a strictly negative range is ignored and
1877 	     the default of 6 is used instead.  */
1878 	  prec[0] = prec[1] = 6;
1879 	}
1880       else
1881 	{
1882 	  /* For a precision in a partly negative range, the lower bound
1883 	     must be assumed to be zero and the new upper bound is the
1884 	     greater of 6 (the default precision used when the specified
1885 	     precision is negative) and the upper bound of the specified
1886 	     range.  */
1887 	  prec[0] = 0;
1888 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1889 	}
1890     }
1891 
1892   if (!arg
1893       || TREE_CODE (arg) != REAL_CST
1894       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1895     return format_floating (dir, prec);
1896 
1897   /* The minimum and maximum number of bytes produced by the directive.  */
1898   fmtresult res;
1899 
1900   /* Get the real type format description for the target.  */
1901   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1902   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1903 
1904   if (!real_isfinite (rvp))
1905     {
1906       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1907 	 and "[-]nan" with the choice being implementation-defined
1908 	 but not locale dependent.  */
1909       bool sign = dir.get_flag ('+') || real_isneg (rvp);
1910       res.range.min = 3 + sign;
1911 
1912       res.range.likely = res.range.min;
1913       res.range.max = res.range.min;
1914       /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1915 	 For NaN, the C/POSIX standards specify two formats:
1916 	   "[-/+]nan"
1917 	 and
1918 	   "[-/+]nan(n-char-sequence)"
1919 	 No known printf implementation outputs the latter format but AIX
1920 	 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1921 	 so the unlikely maximum reflects that.  */
1922       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1923 
1924       /* The range for infinity and NaN is known unless either width
1925 	 or precision is unknown.  Width has the same effect regardless
1926 	 of whether the argument is finite.  Precision is either ignored
1927 	 (e.g., Glibc) or can have an effect on the short vs long format
1928 	 such as inf/infinity (e.g., Solaris).  */
1929       res.knownrange = dir.known_width_and_precision ();
1930 
1931       /* Adjust the range for width but ignore precision.  */
1932       res.adjust_for_width_or_precision (dir.width);
1933 
1934       return res;
1935     }
1936 
1937   char fmtstr [40];
1938   char *pfmt = fmtstr;
1939 
1940   /* Append flags.  */
1941   for (const char *pf = "-+ #0"; *pf; ++pf)
1942     if (dir.get_flag (*pf))
1943       *pfmt++ = *pf;
1944 
1945   *pfmt = '\0';
1946 
1947   {
1948     /* Set up an array to easily iterate over.  */
1949     unsigned HOST_WIDE_INT* const minmax[] = {
1950       &res.range.min, &res.range.max
1951     };
1952 
1953     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1954       {
1955 	/* Convert the GCC real value representation with the precision
1956 	   of the real type to the mpfr_t format rounding down in the
1957 	   first iteration that computes the minimum and up in the second
1958 	   that computes the maximum.  This order is arbitrary because
1959 	   rounding in either direction can result in longer output.  */
1960 	mpfr_t mpfrval;
1961 	mpfr_init2 (mpfrval, rfmt->p);
1962 	mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1963 
1964 	/* Use the MPFR rounding specifier to round down in the first
1965 	   iteration and then up.  In most but not all cases this will
1966 	   result in the same number of bytes.  */
1967 	char rndspec = "DU"[i];
1968 
1969 	/* Format it and store the result in the corresponding member
1970 	   of the result struct.  */
1971 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1972 					     dir.specifier, rndspec);
1973 	mpfr_clear (mpfrval);
1974       }
1975   }
1976 
1977   /* Make sure the minimum is less than the maximum (MPFR rounding
1978      in the call to mpfr_snprintf can result in the reverse.  */
1979   if (res.range.max < res.range.min)
1980     {
1981       unsigned HOST_WIDE_INT tmp = res.range.min;
1982       res.range.min = res.range.max;
1983       res.range.max = tmp;
1984     }
1985 
1986   /* The range is known unless either width or precision is unknown.  */
1987   res.knownrange = dir.known_width_and_precision ();
1988 
1989   /* For the same floating point constant, unless width or precision
1990      is unknown, use the longer output as the likely maximum since
1991      with round to nearest either is equally likely.  Otherwise, when
1992      precision is unknown, use the greater of the minimum and 3 as
1993      the likely output (for "0.0" since zero precision is unlikely).  */
1994   if (res.knownrange)
1995     res.range.likely = res.range.max;
1996   else if (res.range.min < 3
1997 	   && dir.prec[0] < 0
1998 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
1999     res.range.likely = 3;
2000   else
2001     res.range.likely = res.range.min;
2002 
2003   res.range.unlikely = res.range.max;
2004 
2005   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2006     {
2007       /* Unless the precision is zero output longer than 2 bytes may
2008 	 include the decimal point which must be a single character
2009 	 up to MB_LEN_MAX in length.  This is overly conservative
2010 	 since in some conversions some constants result in no decimal
2011 	 point (e.g., in %g).  */
2012       res.range.unlikely += target_mb_len_max () - 1;
2013     }
2014 
2015   res.adjust_for_width_or_precision (dir.width);
2016   return res;
2017 }
2018 
2019 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2020    strings referenced by the expression STR, or (-1, -1) when not known.
2021    Used by the format_string function below.  */
2022 
2023 static fmtresult
get_string_length(tree str,gimple * stmt,unsigned eltsize,range_query * query)2024 get_string_length (tree str, gimple *stmt, unsigned eltsize,
2025 		   range_query *query)
2026 {
2027   if (!str)
2028     return fmtresult ();
2029 
2030   /* Try to determine the dynamic string length first.
2031      Set MAXBOUND to an arbitrary non-null non-integer node as a request
2032      to have it set to the length of the longest string in a PHI.  */
2033   c_strlen_data lendata = { };
2034   lendata.maxbound = str;
2035   if (eltsize == 1)
2036     get_range_strlen_dynamic (str, stmt, &lendata, query);
2037   else
2038     {
2039       /* Determine the length of the shortest and longest string referenced
2040 	 by STR.  Strings of unknown lengths are bounded by the sizes of
2041 	 arrays that subexpressions of STR may refer to.  Pointers that
2042 	 aren't known to point any such arrays result in LENDATA.MAXLEN
2043 	 set to SIZE_MAX.  */
2044       get_range_strlen (str, &lendata, eltsize);
2045     }
2046 
2047   /* If LENDATA.MAXBOUND is not equal to .MINLEN it corresponds to the bound
2048      of the largest array STR refers to, if known, or it's set to SIZE_MAX
2049      otherwise.  */
2050 
2051   /* Return the default result when nothing is known about the string.  */
2052   if ((lendata.maxbound && !tree_fits_uhwi_p (lendata.maxbound))
2053       || !tree_fits_uhwi_p (lendata.maxlen))
2054     {
2055       fmtresult res;
2056       res.nonstr = lendata.decl;
2057       return res;
2058     }
2059 
2060   unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2061   if (integer_zerop (lendata.minlen)
2062       && (!lendata.maxbound || lenmax <= tree_to_uhwi (lendata.maxbound))
2063       && lenmax <= tree_to_uhwi (lendata.maxlen))
2064     {
2065       fmtresult res;
2066       res.nonstr = lendata.decl;
2067       return res;
2068     }
2069 
2070   HOST_WIDE_INT min
2071     = (tree_fits_uhwi_p (lendata.minlen)
2072        ? tree_to_uhwi (lendata.minlen)
2073        : 0);
2074 
2075   HOST_WIDE_INT max
2076     = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2077        ? tree_to_uhwi (lendata.maxbound)
2078        : HOST_WIDE_INT_M1U);
2079 
2080   const bool unbounded = integer_all_onesp (lendata.maxlen);
2081 
2082   /* Set the max/likely counters to unbounded when a minimum is known
2083      but the maximum length isn't bounded.  This implies that STR is
2084      a conditional expression involving a string of known length and
2085      an expression of unknown/unbounded length.  */
2086   if (min
2087       && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2088       && unbounded)
2089     max = HOST_WIDE_INT_M1U;
2090 
2091   /* get_range_strlen() returns the target value of SIZE_MAX for
2092      strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2093      which may be bigger.  */
2094   if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2095     min = HOST_WIDE_INT_M1U;
2096   if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2097     max = HOST_WIDE_INT_M1U;
2098 
2099   fmtresult res (min, max);
2100   res.nonstr = lendata.decl;
2101 
2102   /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2103      by STR are known to be bounded (though not necessarily by their
2104      actual length but perhaps by their maximum possible length).  */
2105   if (res.range.max < target_int_max ())
2106     {
2107       res.knownrange = true;
2108       /* When the length of the longest string is known and not
2109 	 excessive use it as the likely length of the string(s).  */
2110       res.range.likely = res.range.max;
2111     }
2112   else
2113     {
2114       /* When the upper bound is unknown (it can be zero or excessive)
2115 	 set the likely length to the greater of 1.  If MAXBOUND is
2116 	 known, also reset the length of the lower bound to zero.  */
2117       res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2118       if (lendata.maxbound && !integer_all_onesp (lendata.maxbound))
2119 	res.range.min = 0;
2120     }
2121 
2122   res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2123 
2124   return res;
2125 }
2126 
2127 /* Return the minimum and maximum number of characters formatted
2128    by the '%c' format directives and its wide character form for
2129    the argument ARG.  ARG can be null (for functions such as
2130    vsprinf).  */
2131 
2132 static fmtresult
format_character(const directive & dir,tree arg,range_query * query)2133 format_character (const directive &dir, tree arg, range_query *query)
2134 {
2135   fmtresult res;
2136 
2137   res.knownrange = true;
2138 
2139   if (dir.specifier == 'C'
2140       || dir.modifier == FMT_LEN_l)
2141     {
2142       /* A wide character can result in as few as zero bytes.  */
2143       res.range.min = 0;
2144 
2145       HOST_WIDE_INT min, max;
2146       if (get_int_range (arg, dir.info->callstmt, &min, &max, false, 0, query))
2147 	{
2148 	  if (min == 0 && max == 0)
2149 	    {
2150 	      /* The NUL wide character results in no bytes.  */
2151 	      res.range.max = 0;
2152 	      res.range.likely = 0;
2153 	      res.range.unlikely = 0;
2154 	    }
2155 	  else if (min >= 0 && min < 128)
2156 	    {
2157 	      /* Be conservative if the target execution character set
2158 		 is not a 1-to-1 mapping to the source character set or
2159 		 if the source set is not ASCII.  */
2160 	      bool one_2_one_ascii
2161 		= (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2162 
2163 	      /* A wide character in the ASCII range most likely results
2164 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2165 	      res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2166 	      res.range.likely = 1;
2167 	      res.range.unlikely = target_mb_len_max ();
2168 	      res.mayfail = !one_2_one_ascii;
2169 	    }
2170 	  else
2171 	    {
2172 	      /* A wide character outside the ASCII range likely results
2173 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2174 	      res.range.max = target_mb_len_max ();
2175 	      res.range.likely = 2;
2176 	      res.range.unlikely = res.range.max;
2177 	      /* Converting such a character may fail.  */
2178 	      res.mayfail = true;
2179 	    }
2180 	}
2181       else
2182 	{
2183 	  /* An unknown wide character is treated the same as a wide
2184 	     character outside the ASCII range.  */
2185 	  res.range.max = target_mb_len_max ();
2186 	  res.range.likely = 2;
2187 	  res.range.unlikely = res.range.max;
2188 	  res.mayfail = true;
2189 	}
2190     }
2191   else
2192     {
2193       /* A plain '%c' directive.  Its output is exactly 1.  */
2194       res.range.min = res.range.max = 1;
2195       res.range.likely = res.range.unlikely = 1;
2196       res.knownrange = true;
2197     }
2198 
2199   /* Bump up the byte counters if WIDTH is greater.  */
2200   return res.adjust_for_width_or_precision (dir.width);
2201 }
2202 
2203 /* Determine the offset *INDEX of the first byte of an array element of
2204    TYPE (possibly recursively) into which the byte offset OFF points.
2205    On success set *INDEX to the offset of the first byte and return type.
2206    Otherwise, if no such element can be found, return null.  */
2207 
2208 static tree
array_elt_at_offset(tree type,HOST_WIDE_INT off,HOST_WIDE_INT * index)2209 array_elt_at_offset (tree type, HOST_WIDE_INT off, HOST_WIDE_INT *index)
2210 {
2211   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
2212 
2213   tree eltype = type;
2214   while (TREE_CODE (TREE_TYPE (eltype)) == ARRAY_TYPE)
2215     eltype = TREE_TYPE (eltype);
2216 
2217   if (TYPE_MODE (TREE_TYPE (eltype)) != TYPE_MODE (char_type_node))
2218     eltype = TREE_TYPE (eltype);
2219 
2220   if (eltype == type)
2221     {
2222       *index = 0;
2223       return type;
2224     }
2225 
2226   HOST_WIDE_INT typsz = int_size_in_bytes (type);
2227   HOST_WIDE_INT eltsz = int_size_in_bytes (eltype);
2228   if (off < typsz * eltsz)
2229     {
2230       *index = (off / eltsz) * eltsz;
2231       return TREE_CODE (eltype) == ARRAY_TYPE ? TREE_TYPE (eltype) : eltype;
2232     }
2233 
2234   return NULL_TREE;
2235 }
2236 
2237 /* Determine the offset *INDEX of the first byte of a struct member of TYPE
2238    (possibly recursively) into which the byte offset OFF points.  On success
2239    set *INDEX to the offset of the first byte and return true.  Otherwise,
2240    if no such member can be found, return false.  */
2241 
2242 static bool
field_at_offset(tree type,HOST_WIDE_INT off,HOST_WIDE_INT * index)2243 field_at_offset (tree type, HOST_WIDE_INT off, HOST_WIDE_INT *index)
2244 {
2245   gcc_assert (RECORD_OR_UNION_TYPE_P (type));
2246 
2247   for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
2248     {
2249       if (TREE_CODE (fld) != FIELD_DECL || DECL_ARTIFICIAL (fld))
2250 	continue;
2251 
2252       tree fldtype = TREE_TYPE (fld);
2253       HOST_WIDE_INT fldoff = int_byte_position (fld);
2254 
2255       /* If the size is not available the field is a flexible array
2256 	 member.  Treat this case as success.  */
2257       tree typesize = TYPE_SIZE_UNIT (fldtype);
2258       HOST_WIDE_INT fldsize = (tree_fits_uhwi_p (typesize)
2259 			       ? tree_to_uhwi (typesize)
2260 			       : off);
2261 
2262       if (fldoff + fldsize < off)
2263 	continue;
2264 
2265       if (TREE_CODE (fldtype) == ARRAY_TYPE)
2266 	{
2267 	  HOST_WIDE_INT idx = 0;
2268 	  if (tree ft = array_elt_at_offset (fldtype, off, &idx))
2269 	    fldtype = ft;
2270 	  else
2271 	    break;
2272 
2273 	  *index += idx;
2274 	  fldoff -= idx;
2275 	  off -= idx;
2276 	}
2277 
2278       if (RECORD_OR_UNION_TYPE_P (fldtype))
2279 	{
2280 	  *index += fldoff;
2281 	  return field_at_offset (fldtype, off - fldoff, index);
2282 	}
2283 
2284       *index += fldoff;
2285       return true;
2286     }
2287 
2288   return false;
2289 }
2290 
2291 /* For an expression X of pointer type, recursively try to find the same
2292    origin (object or pointer) as Y it references and return such an X.
2293    When X refers to a struct member, set *FLDOFF to the offset of the
2294    member from the beginning of the "most derived" object.  */
2295 
2296 static tree
get_origin_and_offset(tree x,HOST_WIDE_INT * fldoff,HOST_WIDE_INT * off)2297 get_origin_and_offset (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *off)
2298 {
2299   if (!x)
2300     return NULL_TREE;
2301 
2302   switch (TREE_CODE (x))
2303     {
2304     case ADDR_EXPR:
2305       x = TREE_OPERAND (x, 0);
2306       return get_origin_and_offset (x, fldoff, off);
2307 
2308     case ARRAY_REF:
2309       {
2310 	tree offset = TREE_OPERAND (x, 1);
2311 	HOST_WIDE_INT idx = (tree_fits_uhwi_p (offset)
2312 			     ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2313 
2314 	tree eltype = TREE_TYPE (x);
2315 	if (TREE_CODE (eltype) == INTEGER_TYPE)
2316 	  {
2317 	    if (off)
2318 	      *off = idx;
2319 	  }
2320 	else if (idx < HOST_WIDE_INT_MAX)
2321 	  *fldoff += idx * int_size_in_bytes (eltype);
2322 	else
2323 	  *fldoff = idx;
2324 
2325 	x = TREE_OPERAND (x, 0);
2326 	return get_origin_and_offset (x, fldoff, NULL);
2327       }
2328 
2329     case MEM_REF:
2330       if (off)
2331 	{
2332 	  tree offset = TREE_OPERAND (x, 1);
2333 	  *off = (tree_fits_uhwi_p (offset)
2334 		  ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2335 	}
2336 
2337       x = TREE_OPERAND (x, 0);
2338 
2339       if (off)
2340 	{
2341 	  tree xtype
2342 	    = (TREE_CODE (x) == ADDR_EXPR
2343 	       ? TREE_TYPE (TREE_OPERAND (x, 0)) : TREE_TYPE (TREE_TYPE (x)));
2344 
2345 	  /* The byte offset of the most basic struct member the byte
2346 	     offset *OFF corresponds to, or for a (multidimensional)
2347 	     array member, the byte offset of the array element.  */
2348 	  HOST_WIDE_INT index = 0;
2349 
2350 	  if ((RECORD_OR_UNION_TYPE_P (xtype)
2351 	       && field_at_offset (xtype, *off, &index))
2352 	      || (TREE_CODE (xtype) == ARRAY_TYPE
2353 		  && TREE_CODE (TREE_TYPE (xtype)) == ARRAY_TYPE
2354 		  && array_elt_at_offset (xtype, *off, &index)))
2355 	    {
2356 	      *fldoff += index;
2357 	      *off -= index;
2358 	    }
2359 	}
2360 
2361       return get_origin_and_offset (x, fldoff, NULL);
2362 
2363     case COMPONENT_REF:
2364       {
2365 	tree fld = TREE_OPERAND (x, 1);
2366 	*fldoff += int_byte_position (fld);
2367 
2368 	get_origin_and_offset (fld, fldoff, off);
2369 	x = TREE_OPERAND (x, 0);
2370 	return get_origin_and_offset (x, fldoff, off);
2371       }
2372 
2373     case SSA_NAME:
2374       {
2375 	gimple *def = SSA_NAME_DEF_STMT (x);
2376 	if (is_gimple_assign (def))
2377 	  {
2378 	    tree_code code = gimple_assign_rhs_code (def);
2379 	    if (code == ADDR_EXPR)
2380 	      {
2381 		x = gimple_assign_rhs1 (def);
2382 		return get_origin_and_offset (x, fldoff, off);
2383 	      }
2384 
2385 	    if (code == POINTER_PLUS_EXPR)
2386 	      {
2387 		tree offset = gimple_assign_rhs2 (def);
2388 		if (off)
2389 		  *off = (tree_fits_uhwi_p (offset)
2390 			  ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2391 
2392 		x = gimple_assign_rhs1 (def);
2393 		return get_origin_and_offset (x, fldoff, NULL);
2394 	      }
2395 	    else if (code == VAR_DECL)
2396 	      {
2397 		x = gimple_assign_rhs1 (def);
2398 		return get_origin_and_offset (x, fldoff, off);
2399 	      }
2400 	  }
2401 	else if (gimple_nop_p (def) && SSA_NAME_VAR (x))
2402 	  x = SSA_NAME_VAR (x);
2403       }
2404 
2405     default:
2406       break;
2407     }
2408 
2409   return x;
2410 }
2411 
2412 /* If ARG refers to the same (sub)object or array element as described
2413    by DST and DST_FLD, return the byte offset into the struct member or
2414    array element referenced by ARG.  Otherwise return HOST_WIDE_INT_MIN
2415    to indicate that ARG and DST do not refer to the same object.  */
2416 
2417 static HOST_WIDE_INT
alias_offset(tree arg,tree dst,HOST_WIDE_INT dst_fld)2418 alias_offset (tree arg, tree dst, HOST_WIDE_INT dst_fld)
2419 {
2420   /* See if the argument refers to the same base object as the destination
2421      of the formatted function call, and if so, try to determine if they
2422      can alias.  */
2423   if (!arg || !dst || !ptr_derefs_may_alias_p (arg, dst))
2424     return HOST_WIDE_INT_MIN;
2425 
2426   /* The two arguments may refer to the same object.  If they both refer
2427      to a struct member, see if the members are one and the same.  */
2428   HOST_WIDE_INT arg_off = 0, arg_fld = 0;
2429 
2430   tree arg_orig = get_origin_and_offset (arg, &arg_fld, &arg_off);
2431 
2432   if (arg_orig == dst && arg_fld == dst_fld)
2433     return arg_off;
2434 
2435   return HOST_WIDE_INT_MIN;
2436 }
2437 
2438 /* Return the minimum and maximum number of characters formatted
2439    by the '%s' format directive and its wide character form for
2440    the argument ARG.  ARG can be null (for functions such as
2441    vsprinf).  */
2442 
2443 static fmtresult
format_string(const directive & dir,tree arg,range_query * query)2444 format_string (const directive &dir, tree arg, range_query *query)
2445 {
2446   fmtresult res;
2447 
2448   if (warn_restrict)
2449     {
2450       /* See if ARG might alias the destination of the call with
2451 	 DST_ORIGIN and DST_FIELD.  If so, store the starting offset
2452 	 so that the overlap can be determined for certain later,
2453 	 when the amount of output of the call (including subsequent
2454 	 directives) has been computed.  Otherwise, store HWI_MIN.  */
2455       res.dst_offset = alias_offset (arg, dir.info->dst_origin,
2456 				     dir.info->dst_field);
2457     }
2458 
2459   /* Compute the range the argument's length can be in.  */
2460   int count_by = 1;
2461   if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2462     {
2463       /* Get a node for a C type that will be the same size
2464 	 as a wchar_t on the target.  */
2465       tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2466 
2467       /* Now that we have a suitable node, get the number of
2468 	 bytes it occupies.  */
2469       count_by = int_size_in_bytes (node);
2470       gcc_checking_assert (count_by == 2 || count_by == 4);
2471     }
2472 
2473   fmtresult slen = get_string_length (arg, dir.info->callstmt, count_by, query);
2474   if (slen.range.min == slen.range.max
2475       && slen.range.min < HOST_WIDE_INT_MAX)
2476     {
2477       /* The argument is either a string constant or it refers
2478 	 to one of a number of strings of the same length.  */
2479 
2480       /* A '%s' directive with a string argument with constant length.  */
2481       res.range = slen.range;
2482 
2483       if (dir.specifier == 'S'
2484 	  || dir.modifier == FMT_LEN_l)
2485 	{
2486 	  /* In the worst case the length of output of a wide string S
2487 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2488 	  res.range.max *= target_mb_len_max ();
2489 	  res.range.unlikely = res.range.max;
2490 	  /* It's likely that the total length is not more that
2491 	     2 * wcslen (S).*/
2492 	  res.range.likely = res.range.min * 2;
2493 
2494 	  if (dir.prec[1] >= 0
2495 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2496 	    {
2497 	      res.range.max = dir.prec[1];
2498 	      res.range.likely = dir.prec[1];
2499 	      res.range.unlikely = dir.prec[1];
2500 	    }
2501 
2502 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2503 	    res.range.min = 0;
2504 	  else if (dir.prec[0] >= 0)
2505 	    res.range.likely = dir.prec[0];
2506 
2507 	  /* Even a non-empty wide character string need not convert into
2508 	     any bytes.  */
2509 	  res.range.min = 0;
2510 
2511 	  /* A non-empty wide character conversion may fail.  */
2512 	  if (slen.range.max > 0)
2513 	    res.mayfail = true;
2514 	}
2515       else
2516 	{
2517 	  res.knownrange = true;
2518 
2519 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2520 	    res.range.min = 0;
2521 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2522 	    res.range.min = dir.prec[0];
2523 
2524 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2525 	    {
2526 	      res.range.max = dir.prec[1];
2527 	      res.range.likely = dir.prec[1];
2528 	      res.range.unlikely = dir.prec[1];
2529 	    }
2530 	}
2531     }
2532   else if (arg && integer_zerop (arg))
2533     {
2534       /* Handle null pointer argument.  */
2535 
2536       fmtresult res (0);
2537       res.nullp = true;
2538       return res;
2539     }
2540   else
2541     {
2542       /* For a '%s' and '%ls' directive with a non-constant string (either
2543 	 one of a number of strings of known length or an unknown string)
2544 	 the minimum number of characters is lesser of PRECISION[0] and
2545 	 the length of the shortest known string or zero, and the maximum
2546 	 is the lesser of the length of the longest known string or
2547 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2548 	 the minimum at level 1 and the greater of the minimum and 1
2549 	 at level 2.  This result is adjust upward for width (if it's
2550 	 specified).  */
2551 
2552       if (dir.specifier == 'S'
2553 	  || dir.modifier == FMT_LEN_l)
2554 	{
2555 	  /* A wide character converts to as few as zero bytes.  */
2556 	  slen.range.min = 0;
2557 	  if (slen.range.max < target_int_max ())
2558 	    slen.range.max *= target_mb_len_max ();
2559 
2560 	  if (slen.range.likely < target_int_max ())
2561 	    slen.range.likely *= 2;
2562 
2563 	  if (slen.range.likely < target_int_max ())
2564 	    slen.range.unlikely *= target_mb_len_max ();
2565 
2566 	  /* A non-empty wide character conversion may fail.  */
2567 	  if (slen.range.max > 0)
2568 	    res.mayfail = true;
2569 	}
2570 
2571       res.range = slen.range;
2572 
2573       if (dir.prec[0] >= 0)
2574 	{
2575 	  /* Adjust the minimum to zero if the string length is unknown,
2576 	     or at most the lower bound of the precision otherwise.  */
2577 	  if (slen.range.min >= target_int_max ())
2578 	    res.range.min = 0;
2579 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2580 	    res.range.min = dir.prec[0];
2581 
2582 	  /* Make both maxima no greater than the upper bound of precision.  */
2583 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2584 	      || slen.range.max >= target_int_max ())
2585 	    {
2586 	      res.range.max = dir.prec[1];
2587 	      res.range.unlikely = dir.prec[1];
2588 	    }
2589 
2590 	  /* If precision is constant, set the likely counter to the lesser
2591 	     of it and the maximum string length.  Otherwise, if the lower
2592 	     bound of precision is greater than zero, set the likely counter
2593 	     to the minimum.  Otherwise set it to zero or one based on
2594 	     the warning level.  */
2595 	  if (dir.prec[0] == dir.prec[1])
2596 	    res.range.likely
2597 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2598 		 ? dir.prec[0] : slen.range.max);
2599 	  else if (dir.prec[0] > 0)
2600 	    res.range.likely = res.range.min;
2601 	  else
2602 	    res.range.likely = warn_level > 1;
2603 	}
2604       else if (dir.prec[1] >= 0)
2605 	{
2606 	  res.range.min = 0;
2607 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2608 	    res.range.max = dir.prec[1];
2609 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2610 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2611 	    res.range.unlikely = dir.prec[1];
2612 	}
2613       else if (slen.range.min >= target_int_max ())
2614 	{
2615 	  res.range.min = 0;
2616 	  res.range.max = HOST_WIDE_INT_MAX;
2617 	  /* At level 1 strings of unknown length are assumed to be
2618 	     empty, while at level 1 they are assumed to be one byte
2619 	     long.  */
2620 	  res.range.likely = warn_level > 1;
2621 	  res.range.unlikely = HOST_WIDE_INT_MAX;
2622 	}
2623       else
2624 	{
2625 	  /* A string of unknown length unconstrained by precision is
2626 	     assumed to be empty at level 1 and just one character long
2627 	     at higher levels.  */
2628 	  if (res.range.likely >= target_int_max ())
2629 	    res.range.likely = warn_level > 1;
2630 	}
2631     }
2632 
2633   /* If the argument isn't a nul-terminated string and the number
2634      of bytes on output isn't bounded by precision, set NONSTR.  */
2635   if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2636     res.nonstr = slen.nonstr;
2637 
2638   /* Bump up the byte counters if WIDTH is greater.  */
2639   return res.adjust_for_width_or_precision (dir.width);
2640 }
2641 
2642 /* Format plain string (part of the format string itself).  */
2643 
2644 static fmtresult
format_plain(const directive & dir,tree,range_query *)2645 format_plain (const directive &dir, tree, range_query *)
2646 {
2647   fmtresult res (dir.len);
2648   return res;
2649 }
2650 
2651 /* Return true if the RESULT of a directive in a call describe by INFO
2652    should be diagnosed given the AVAILable space in the destination.  */
2653 
2654 static bool
should_warn_p(const call_info & info,const result_range & avail,const result_range & result)2655 should_warn_p (const call_info &info,
2656 	       const result_range &avail, const result_range &result)
2657 {
2658   if (result.max <= avail.min)
2659     {
2660       /* The least amount of space remaining in the destination is big
2661 	 enough for the longest output.  */
2662       return false;
2663     }
2664 
2665   if (info.bounded)
2666     {
2667       if (warn_format_trunc == 1 && result.min <= avail.max
2668 	  && info.retval_used ())
2669 	{
2670 	  /* The likely amount of space remaining in the destination is big
2671 	     enough for the least output and the return value is used.  */
2672 	  return false;
2673 	}
2674 
2675       if (warn_format_trunc == 1 && result.likely <= avail.likely
2676 	  && !info.retval_used ())
2677 	{
2678 	  /* The likely amount of space remaining in the destination is big
2679 	     enough for the likely output and the return value is unused.  */
2680 	  return false;
2681 	}
2682 
2683       if (warn_format_trunc == 2
2684 	  && result.likely <= avail.min
2685 	  && (result.max <= avail.min
2686 	      || result.max > HOST_WIDE_INT_MAX))
2687 	{
2688 	  /* The minimum amount of space remaining in the destination is big
2689 	     enough for the longest output.  */
2690 	  return false;
2691 	}
2692     }
2693   else
2694     {
2695       if (warn_level == 1 && result.likely <= avail.likely)
2696 	{
2697 	  /* The likely amount of space remaining in the destination is big
2698 	     enough for the likely output.  */
2699 	  return false;
2700 	}
2701 
2702       if (warn_level == 2
2703 	  && result.likely <= avail.min
2704 	  && (result.max <= avail.min
2705 	      || result.max > HOST_WIDE_INT_MAX))
2706 	{
2707 	  /* The minimum amount of space remaining in the destination is big
2708 	     enough for the longest output.  */
2709 	  return false;
2710 	}
2711     }
2712 
2713   return true;
2714 }
2715 
2716 /* At format string location describe by DIRLOC in a call described
2717    by INFO, issue a warning for a directive DIR whose output may be
2718    in excess of the available space AVAIL_RANGE in the destination
2719    given the formatting result FMTRES.  This function does nothing
2720    except decide whether to issue a warning for a possible write
2721    past the end or truncation and, if so, format the warning.
2722    Return true if a warning has been issued.  */
2723 
2724 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2725 maybe_warn (substring_loc &dirloc, location_t argloc,
2726 	    const call_info &info,
2727 	    const result_range &avail_range, const result_range &res,
2728 	    const directive &dir)
2729 {
2730   if (!should_warn_p (info, avail_range, res))
2731     return false;
2732 
2733   /* A warning will definitely be issued below.  */
2734 
2735   /* The maximum byte count to reference in the warning.  Larger counts
2736      imply that the upper bound is unknown (and could be anywhere between
2737      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2738      than "between N and X" where X is some huge number.  */
2739   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2740 
2741   /* True when there is enough room in the destination for the least
2742      amount of a directive's output but not enough for its likely or
2743      maximum output.  */
2744   bool maybe = (res.min <= avail_range.max
2745 		&& (avail_range.min < res.likely
2746 		    || (res.max < HOST_WIDE_INT_MAX
2747 			&& avail_range.min < res.max)));
2748 
2749   /* Buffer for the directive in the host character set (used when
2750      the source character set is different).  */
2751   char hostdir[32];
2752 
2753   if (avail_range.min == avail_range.max)
2754     {
2755       /* The size of the destination region is exact.  */
2756       unsigned HOST_WIDE_INT navail = avail_range.max;
2757 
2758       if (target_to_host (*dir.beg) != '%')
2759 	{
2760 	  /* For plain character directives (i.e., the format string itself)
2761 	     but not others, point the caret at the first character that's
2762 	     past the end of the destination.  */
2763 	  if (navail < dir.len)
2764 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2765 	}
2766 
2767       if (*dir.beg == '\0')
2768 	{
2769 	  /* This is the terminating nul.  */
2770 	  gcc_assert (res.min == 1 && res.min == res.max);
2771 
2772 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2773 			  info.bounded
2774 			  ? (maybe
2775 			     ? G_("%qE output may be truncated before the "
2776 				  "last format character")
2777 			     : G_("%qE output truncated before the last "
2778 				  "format character"))
2779 			  : (maybe
2780 			     ? G_("%qE may write a terminating nul past the "
2781 				  "end of the destination")
2782 			     : G_("%qE writing a terminating nul past the "
2783 				  "end of the destination")),
2784 			  info.func);
2785 	}
2786 
2787       if (res.min == res.max)
2788 	{
2789 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2790 	  if (!info.bounded)
2791 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2792 			      "%<%.*s%> directive writing %wu byte into a "
2793 			      "region of size %wu",
2794 			      "%<%.*s%> directive writing %wu bytes into a "
2795 			      "region of size %wu",
2796 			      (int) dir.len, d, res.min, navail);
2797 	  else if (maybe)
2798 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2799 			      "%<%.*s%> directive output may be truncated "
2800 			      "writing %wu byte into a region of size %wu",
2801 			      "%<%.*s%> directive output may be truncated "
2802 			      "writing %wu bytes into a region of size %wu",
2803 			      (int) dir.len, d, res.min, navail);
2804 	  else
2805 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2806 			      "%<%.*s%> directive output truncated writing "
2807 			      "%wu byte into a region of size %wu",
2808 			      "%<%.*s%> directive output truncated writing "
2809 			      "%wu bytes into a region of size %wu",
2810 			      (int) dir.len, d, res.min, navail);
2811 	}
2812       if (res.min == 0 && res.max < maxbytes)
2813 	return fmtwarn (dirloc, argloc, NULL,
2814 			info.warnopt (),
2815 			info.bounded
2816 			? (maybe
2817 			   ? G_("%<%.*s%> directive output may be truncated "
2818 				"writing up to %wu bytes into a region of "
2819 				"size %wu")
2820 			   : G_("%<%.*s%> directive output truncated writing "
2821 				"up to %wu bytes into a region of size %wu"))
2822 			: G_("%<%.*s%> directive writing up to %wu bytes "
2823 			     "into a region of size %wu"), (int) dir.len,
2824 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2825 			res.max, navail);
2826 
2827       if (res.min == 0 && maxbytes <= res.max)
2828 	/* This is a special case to avoid issuing the potentially
2829 	   confusing warning:
2830 	     writing 0 or more bytes into a region of size 0.  */
2831 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2832 			info.bounded
2833 			? (maybe
2834 			   ? G_("%<%.*s%> directive output may be truncated "
2835 				"writing likely %wu or more bytes into a "
2836 				"region of size %wu")
2837 			   : G_("%<%.*s%> directive output truncated writing "
2838 				"likely %wu or more bytes into a region of "
2839 				"size %wu"))
2840 			: G_("%<%.*s%> directive writing likely %wu or more "
2841 			     "bytes into a region of size %wu"), (int) dir.len,
2842 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2843 			res.likely, navail);
2844 
2845       if (res.max < maxbytes)
2846 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2847 			info.bounded
2848 			? (maybe
2849 			   ? G_("%<%.*s%> directive output may be truncated "
2850 				"writing between %wu and %wu bytes into a "
2851 				"region of size %wu")
2852 			   : G_("%<%.*s%> directive output truncated "
2853 				"writing between %wu and %wu bytes into a "
2854 				"region of size %wu"))
2855 			: G_("%<%.*s%> directive writing between %wu and "
2856 			     "%wu bytes into a region of size %wu"),
2857 			(int) dir.len,
2858 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2859 			res.min, res.max, navail);
2860 
2861       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2862 		      info.bounded
2863 		      ? (maybe
2864 			 ? G_("%<%.*s%> directive output may be truncated "
2865 			      "writing %wu or more bytes into a region of "
2866 			      "size %wu")
2867 			 : G_("%<%.*s%> directive output truncated writing "
2868 			      "%wu or more bytes into a region of size %wu"))
2869 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2870 			   "into a region of size %wu"), (int) dir.len,
2871 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2872 		      res.min, navail);
2873     }
2874 
2875   /* The size of the destination region is a range.  */
2876 
2877   if (target_to_host (*dir.beg) != '%')
2878     {
2879       unsigned HOST_WIDE_INT navail = avail_range.max;
2880 
2881       /* For plain character directives (i.e., the format string itself)
2882 	 but not others, point the caret at the first character that's
2883 	 past the end of the destination.  */
2884       if (navail < dir.len)
2885 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2886     }
2887 
2888   if (*dir.beg == '\0')
2889     {
2890       gcc_assert (res.min == 1 && res.min == res.max);
2891 
2892       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2893 		      info.bounded
2894 		      ? (maybe
2895 			 ? G_("%qE output may be truncated before the last "
2896 			      "format character")
2897 			 : G_("%qE output truncated before the last format "
2898 			      "character"))
2899 		      : (maybe
2900 			 ? G_("%qE may write a terminating nul past the end "
2901 			      "of the destination")
2902 			 : G_("%qE writing a terminating nul past the end "
2903 			      "of the destination")), info.func);
2904     }
2905 
2906   if (res.min == res.max)
2907     {
2908       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2909       if (!info.bounded)
2910 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2911 			  "%<%.*s%> directive writing %wu byte into a region "
2912 			  "of size between %wu and %wu",
2913 			  "%<%.*s%> directive writing %wu bytes into a region "
2914 			  "of size between %wu and %wu", (int) dir.len, d,
2915 			  res.min, avail_range.min, avail_range.max);
2916       else if (maybe)
2917 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2918 			  "%<%.*s%> directive output may be truncated writing "
2919 			  "%wu byte into a region of size between %wu and %wu",
2920 			  "%<%.*s%> directive output may be truncated writing "
2921 			  "%wu bytes into a region of size between %wu and "
2922 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2923 			  avail_range.max);
2924       else
2925 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2926 			  "%<%.*s%> directive output truncated writing %wu "
2927 			  "byte into a region of size between %wu and %wu",
2928 			  "%<%.*s%> directive output truncated writing %wu "
2929 			  "bytes into a region of size between %wu and %wu",
2930 			  (int) dir.len, d, res.min, avail_range.min,
2931 			  avail_range.max);
2932     }
2933 
2934   if (res.min == 0 && res.max < maxbytes)
2935     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2936 		    info.bounded
2937 		    ? (maybe
2938 		       ? G_("%<%.*s%> directive output may be truncated "
2939 			    "writing up to %wu bytes into a region of size "
2940 			    "between %wu and %wu")
2941 		       : G_("%<%.*s%> directive output truncated writing "
2942 			    "up to %wu bytes into a region of size between "
2943 			    "%wu and %wu"))
2944 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2945 			 "into a region of size between %wu and %wu"),
2946 		    (int) dir.len,
2947 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2948 		    res.max, avail_range.min, avail_range.max);
2949 
2950   if (res.min == 0 && maxbytes <= res.max)
2951     /* This is a special case to avoid issuing the potentially confusing
2952        warning:
2953 	 writing 0 or more bytes into a region of size between 0 and N.  */
2954     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2955 		    info.bounded
2956 		    ? (maybe
2957 		       ? G_("%<%.*s%> directive output may be truncated "
2958 			    "writing likely %wu or more bytes into a region "
2959 			    "of size between %wu and %wu")
2960 		       : G_("%<%.*s%> directive output truncated writing "
2961 			    "likely %wu or more bytes into a region of size "
2962 			    "between %wu and %wu"))
2963 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
2964 			 "into a region of size between %wu and %wu"),
2965 		    (int) dir.len,
2966 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2967 		    res.likely, avail_range.min, avail_range.max);
2968 
2969   if (res.max < maxbytes)
2970     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2971 		    info.bounded
2972 		    ? (maybe
2973 		       ? G_("%<%.*s%> directive output may be truncated "
2974 			    "writing between %wu and %wu bytes into a region "
2975 			    "of size between %wu and %wu")
2976 		       : G_("%<%.*s%> directive output truncated writing "
2977 			    "between %wu and %wu bytes into a region of size "
2978 			    "between %wu and %wu"))
2979 		    : G_("%<%.*s%> directive writing between %wu and "
2980 			 "%wu bytes into a region of size between %wu and "
2981 			 "%wu"), (int) dir.len,
2982 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2983 		    res.min, res.max, avail_range.min, avail_range.max);
2984 
2985   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2986 		  info.bounded
2987 		  ? (maybe
2988 		     ? G_("%<%.*s%> directive output may be truncated writing "
2989 			  "%wu or more bytes into a region of size between "
2990 			  "%wu and %wu")
2991 		     : G_("%<%.*s%> directive output truncated writing "
2992 			  "%wu or more bytes into a region of size between "
2993 			  "%wu and %wu"))
2994 		  : G_("%<%.*s%> directive writing %wu or more bytes "
2995 		       "into a region of size between %wu and %wu"),
2996 		  (int) dir.len,
2997 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
2998 		  res.min, avail_range.min, avail_range.max);
2999 }
3000 
3001 /* Given the formatting result described by RES and NAVAIL, the number
3002    of available bytes in the destination, return the range of bytes
3003    remaining in the destination.  */
3004 
3005 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)3006 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
3007 {
3008   result_range range;
3009 
3010   if (HOST_WIDE_INT_MAX <= navail)
3011     {
3012       range.min = range.max = range.likely = range.unlikely = navail;
3013       return range;
3014     }
3015 
3016   /* The lower bound of the available range is the available size
3017      minus the maximum output size, and the upper bound is the size
3018      minus the minimum.  */
3019   range.max = res.range.min < navail ? navail - res.range.min : 0;
3020 
3021   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
3022 
3023   if (res.range.max < HOST_WIDE_INT_MAX)
3024     range.min = res.range.max < navail ? navail - res.range.max : 0;
3025   else
3026     range.min = range.likely;
3027 
3028   range.unlikely = (res.range.unlikely < navail
3029 		    ? navail - res.range.unlikely : 0);
3030 
3031   return range;
3032 }
3033 
3034 /* Compute the length of the output resulting from the directive DIR
3035    in a call described by INFO and update the overall result of the call
3036    in *RES.  Return true if the directive has been handled.  */
3037 
3038 static bool
format_directive(const call_info & info,format_result * res,const directive & dir,range_query * query)3039 format_directive (const call_info &info,
3040 		  format_result *res, const directive &dir,
3041 		  range_query *query)
3042 {
3043   /* Offset of the beginning of the directive from the beginning
3044      of the format string.  */
3045   size_t offset = dir.beg - info.fmtstr;
3046   size_t start = offset;
3047   size_t length = offset + dir.len - !!dir.len;
3048 
3049   /* Create a location for the whole directive from the % to the format
3050      specifier.  */
3051   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3052 			offset, start, length);
3053 
3054   /* Also get the location of the argument if possible.
3055      This doesn't work for integer literals or function calls.  */
3056   location_t argloc = UNKNOWN_LOCATION;
3057   if (dir.arg)
3058     argloc = EXPR_LOCATION (dir.arg);
3059 
3060   /* Bail when there is no function to compute the output length,
3061      or when minimum length checking has been disabled.   */
3062   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
3063     return false;
3064 
3065   /* Compute the range of lengths of the formatted output.  */
3066   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, query);
3067 
3068   /* Record whether the output of all directives is known to be
3069      bounded by some maximum, implying that their arguments are
3070      either known exactly or determined to be in a known range
3071      or, for strings, limited by the upper bounds of the arrays
3072      they refer to.  */
3073   res->knownrange &= fmtres.knownrange;
3074 
3075   if (!fmtres.knownrange)
3076     {
3077       /* Only when the range is known, check it against the host value
3078 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
3079 	 INT_MAX precision, which is the longest possible output of any
3080 	 single directive).  That's the largest valid byte count (though
3081 	 not valid call to a printf-like function because it can never
3082 	 return such a count).  Otherwise, the range doesn't correspond
3083 	 to known values of the argument.  */
3084       if (fmtres.range.max > target_dir_max ())
3085 	{
3086 	  /* Normalize the MAX counter to avoid having to deal with it
3087 	     later.  The counter can be less than HOST_WIDE_INT_M1U
3088 	     when compiling for an ILP32 target on an LP64 host.  */
3089 	  fmtres.range.max = HOST_WIDE_INT_M1U;
3090 	  /* Disable exact and maximum length checking after a failure
3091 	     to determine the maximum number of characters (for example
3092 	     for wide characters or wide character strings) but continue
3093 	     tracking the minimum number of characters.  */
3094 	  res->range.max = HOST_WIDE_INT_M1U;
3095 	}
3096 
3097       if (fmtres.range.min > target_dir_max ())
3098 	{
3099 	  /* Disable exact length checking after a failure to determine
3100 	     even the minimum number of characters (it shouldn't happen
3101 	     except in an error) but keep tracking the minimum and maximum
3102 	     number of characters.  */
3103 	  return true;
3104 	}
3105     }
3106 
3107   /* Buffer for the directive in the host character set (used when
3108      the source character set is different).  */
3109   char hostdir[32];
3110 
3111   int dirlen = dir.len;
3112 
3113   if (fmtres.nullp)
3114     {
3115       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3116 	       "%G%<%.*s%> directive argument is null",
3117 	       info.callstmt, dirlen,
3118 	       target_to_host (hostdir, sizeof hostdir, dir.beg));
3119 
3120       /* Don't bother processing the rest of the format string.  */
3121       res->warned = true;
3122       res->range.min = HOST_WIDE_INT_M1U;
3123       res->range.max = HOST_WIDE_INT_M1U;
3124       return false;
3125     }
3126 
3127   /* Compute the number of available bytes in the destination.  There
3128      must always be at least one byte of space for the terminating
3129      NUL that's appended after the format string has been processed.  */
3130   result_range avail_range = bytes_remaining (info.objsize, *res);
3131 
3132   /* If the argument aliases a part of the destination of the formatted
3133      call at offset FMTRES.DST_OFFSET append the directive and its result
3134      to the set of aliases for later processing.  */
3135   if (fmtres.dst_offset != HOST_WIDE_INT_MIN)
3136     res->append_alias (dir, fmtres.dst_offset, fmtres.range);
3137 
3138   bool warned = res->warned;
3139 
3140   if (!warned)
3141     warned = maybe_warn (dirloc, argloc, info, avail_range,
3142 			 fmtres.range, dir);
3143 
3144   /* Bump up the total maximum if it isn't too big.  */
3145   if (res->range.max < HOST_WIDE_INT_MAX
3146       && fmtres.range.max < HOST_WIDE_INT_MAX)
3147     res->range.max += fmtres.range.max;
3148 
3149   /* Raise the total unlikely maximum by the larger of the maximum
3150      and the unlikely maximum.  */
3151   unsigned HOST_WIDE_INT save = res->range.unlikely;
3152   if (fmtres.range.max < fmtres.range.unlikely)
3153     res->range.unlikely += fmtres.range.unlikely;
3154   else
3155     res->range.unlikely += fmtres.range.max;
3156 
3157   if (res->range.unlikely < save)
3158     res->range.unlikely = HOST_WIDE_INT_M1U;
3159 
3160   res->range.min += fmtres.range.min;
3161   res->range.likely += fmtres.range.likely;
3162 
3163   /* Has the minimum directive output length exceeded the maximum
3164      of 4095 bytes required to be supported?  */
3165   bool minunder4k = fmtres.range.min < 4096;
3166   bool maxunder4k = fmtres.range.max < 4096;
3167   /* Clear POSUNDER4K in the overall result if the maximum has exceeded
3168      the 4k (this is necessary to avoid the return value optimization
3169      that may not be safe in the maximum case).  */
3170   if (!maxunder4k)
3171     res->posunder4k = false;
3172   /* Also clear POSUNDER4K if the directive may fail.  */
3173   if (fmtres.mayfail)
3174     res->posunder4k = false;
3175 
3176   if (!warned
3177       /* Only warn at level 2.  */
3178       && warn_level > 1
3179       /* Only warn for string functions.  */
3180       && info.is_string_func ()
3181       && (!minunder4k
3182 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
3183     {
3184       /* The directive output may be longer than the maximum required
3185 	 to be handled by an implementation according to 7.21.6.1, p15
3186 	 of C11.  Warn on this only at level 2 but remember this and
3187 	 prevent folding the return value when done.  This allows for
3188 	 the possibility of the actual libc call failing due to ENOMEM
3189 	 (like Glibc does with very large precision or width).
3190 	 Issue the "may exceed" warning only for string functions and
3191 	 not for fprintf or printf.  */
3192 
3193       if (fmtres.range.min == fmtres.range.max)
3194 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3195 			  "%<%.*s%> directive output of %wu bytes exceeds "
3196 			  "minimum required size of 4095", dirlen,
3197 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3198 			  fmtres.range.min);
3199       else if (!minunder4k)
3200 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3201 			  "%<%.*s%> directive output between %wu and %wu "
3202 			  "bytes exceeds minimum required size of 4095",
3203 			  dirlen,
3204 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3205 			  fmtres.range.min, fmtres.range.max);
3206       else if (!info.retval_used () && info.is_string_func ())
3207 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3208 			  "%<%.*s%> directive output between %wu and %wu "
3209 			  "bytes may exceed minimum required size of "
3210 			  "4095",
3211 			  dirlen,
3212 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3213 			  fmtres.range.min, fmtres.range.max);
3214     }
3215 
3216   /* Has the likely and maximum directive output exceeded INT_MAX?  */
3217   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
3218   /* Don't consider the maximum to be in excess when it's the result
3219      of a string of unknown length (i.e., whose maximum has been set
3220      to be greater than or equal to HOST_WIDE_INT_MAX.  */
3221   bool maxximax = (*dir.beg
3222 		   && res->range.max > target_int_max ()
3223 		   && res->range.max < HOST_WIDE_INT_MAX);
3224 
3225   if (!warned
3226       /* Warn for the likely output size at level 1.  */
3227       && (likelyximax
3228 	  /* But only warn for the maximum at level 2.  */
3229 	  || (warn_level > 1
3230 	      && maxximax
3231 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
3232     {
3233       if (fmtres.range.min > target_int_max ())
3234 	{
3235 	  /* The directive output exceeds INT_MAX bytes.  */
3236 	  if (fmtres.range.min == fmtres.range.max)
3237 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3238 			      "%<%.*s%> directive output of %wu bytes exceeds "
3239 			      "%<INT_MAX%>", dirlen,
3240 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3241 			      fmtres.range.min);
3242 	  else
3243 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3244 			      "%<%.*s%> directive output between %wu and "
3245 			      "%wu bytes exceeds %<INT_MAX%>", dirlen,
3246 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3247 			      fmtres.range.min, fmtres.range.max);
3248 	}
3249       else if (res->range.min > target_int_max ())
3250 	{
3251 	  /* The directive output is under INT_MAX but causes the result
3252 	     to exceed INT_MAX bytes.  */
3253 	  if (fmtres.range.min == fmtres.range.max)
3254 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3255 			      "%<%.*s%> directive output of %wu bytes causes "
3256 			      "result to exceed %<INT_MAX%>", dirlen,
3257 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3258 			      fmtres.range.min);
3259 	  else
3260 	    warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3261 			      "%<%.*s%> directive output between %wu and "
3262 			      "%wu bytes causes result to exceed %<INT_MAX%>",
3263 			      dirlen,
3264 			      target_to_host (hostdir, sizeof hostdir, dir.beg),
3265 			      fmtres.range.min, fmtres.range.max);
3266 	}
3267       else if ((!info.retval_used () || !info.bounded)
3268 	       && (info.is_string_func ()))
3269 	/* Warn for calls to string functions that either aren't bounded
3270 	   (sprintf) or whose return value isn't used.  */
3271 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3272 			  "%<%.*s%> directive output between %wu and "
3273 			  "%wu bytes may cause result to exceed "
3274 			  "%<INT_MAX%>", dirlen,
3275 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
3276 			  fmtres.range.min, fmtres.range.max);
3277     }
3278 
3279   if (!warned && fmtres.nonstr)
3280     {
3281       warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3282 			"%<%.*s%> directive argument is not a nul-terminated "
3283 			"string",
3284 			dirlen,
3285 			target_to_host (hostdir, sizeof hostdir, dir.beg));
3286       if (warned && DECL_P (fmtres.nonstr))
3287 	inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
3288 		"referenced argument declared here");
3289       return false;
3290     }
3291 
3292   if (warned && fmtres.range.min < fmtres.range.likely
3293       && fmtres.range.likely < fmtres.range.max)
3294     inform_n (info.fmtloc, fmtres.range.likely,
3295 	      "assuming directive output of %wu byte",
3296 	      "assuming directive output of %wu bytes",
3297 	      fmtres.range.likely);
3298 
3299   if (warned && fmtres.argmin)
3300     {
3301       if (fmtres.argmin == fmtres.argmax)
3302 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3303       else if (fmtres.knownrange)
3304 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
3305 		fmtres.argmin, fmtres.argmax);
3306       else
3307 	inform (info.fmtloc,
3308 		"using the range [%E, %E] for directive argument",
3309 		fmtres.argmin, fmtres.argmax);
3310     }
3311 
3312   res->warned |= warned;
3313 
3314   if (!dir.beg[0] && res->warned)
3315     {
3316       location_t callloc = gimple_location (info.callstmt);
3317 
3318       unsigned HOST_WIDE_INT min = res->range.min;
3319       unsigned HOST_WIDE_INT max = res->range.max;
3320 
3321       if (info.objsize < HOST_WIDE_INT_MAX)
3322 	{
3323 	  /* If a warning has been issued for buffer overflow or truncation
3324 	     help the user figure out how big a buffer they need.  */
3325 
3326 	  if (min == max)
3327 	    inform_n (callloc, min,
3328 		      "%qE output %wu byte into a destination of size %wu",
3329 		      "%qE output %wu bytes into a destination of size %wu",
3330 		      info.func, min, info.objsize);
3331 	  else if (max < HOST_WIDE_INT_MAX)
3332 	    inform (callloc,
3333 		    "%qE output between %wu and %wu bytes into "
3334 		    "a destination of size %wu",
3335 		    info.func, min, max, info.objsize);
3336 	  else if (min < res->range.likely && res->range.likely < max)
3337 	    inform (callloc,
3338 		    "%qE output %wu or more bytes (assuming %wu) into "
3339 		    "a destination of size %wu",
3340 		    info.func, min, res->range.likely, info.objsize);
3341 	  else
3342 	    inform (callloc,
3343 		    "%qE output %wu or more bytes into a destination of size "
3344 		    "%wu",
3345 		    info.func, min, info.objsize);
3346 	}
3347       else if (!info.is_string_func ())
3348 	{
3349 	  /* If the warning is for a file function like fprintf
3350 	     of printf with no destination size just print the computed
3351 	     result.  */
3352 	  if (min == max)
3353 	    inform_n (callloc, min,
3354 		      "%qE output %wu byte", "%qE output %wu bytes",
3355 		      info.func, min);
3356 	  else if (max < HOST_WIDE_INT_MAX)
3357 	    inform (callloc,
3358 		    "%qE output between %wu and %wu bytes",
3359 		    info.func, min, max);
3360 	  else if (min < res->range.likely && res->range.likely < max)
3361 	    inform (callloc,
3362 		    "%qE output %wu or more bytes (assuming %wu)",
3363 		    info.func, min, res->range.likely);
3364 	  else
3365 	    inform (callloc,
3366 		    "%qE output %wu or more bytes",
3367 		    info.func, min);
3368 	}
3369     }
3370 
3371   if (dump_file && *dir.beg)
3372     {
3373       fprintf (dump_file,
3374 	       "    Result: "
3375 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3376 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3377 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3378 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3379 	       fmtres.range.min, fmtres.range.likely,
3380 	       fmtres.range.max, fmtres.range.unlikely,
3381 	       res->range.min, res->range.likely,
3382 	       res->range.max, res->range.unlikely);
3383     }
3384 
3385   return true;
3386 }
3387 
3388 /* Parse a format directive in function call described by INFO starting
3389    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3390    arguments extracted for the directive.  Return the length of
3391    the directive.  */
3392 
3393 static size_t
parse_directive(call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,range_query * query)3394 parse_directive (call_info &info,
3395 		 directive &dir, format_result *res,
3396 		 const char *str, unsigned *argno,
3397 		 range_query *query)
3398 {
3399   const char *pcnt = strchr (str, target_percent);
3400   dir.beg = str;
3401 
3402   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3403     {
3404       /* This directive is either a plain string or the terminating nul
3405 	 (which isn't really a directive but it simplifies things to
3406 	 handle it as if it were).  */
3407       dir.len = len;
3408       dir.fmtfunc = format_plain;
3409 
3410       if (dump_file)
3411 	{
3412 	  fprintf (dump_file, "  Directive %u at offset "
3413 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3414 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3415 		   dir.dirno,
3416 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3417 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3418 	}
3419 
3420       return len - !*str;
3421     }
3422 
3423   /* Set the directive argument's number to correspond to its position
3424      in the formatted function call's argument list.  */
3425   dir.argno = *argno;
3426 
3427   const char *pf = pcnt + 1;
3428 
3429     /* POSIX numbered argument index or zero when none.  */
3430   HOST_WIDE_INT dollar = 0;
3431 
3432   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3433      when given by a va_list argument, and a non-negative value
3434      when specified in the format string itself.  */
3435   HOST_WIDE_INT width = -1;
3436   HOST_WIDE_INT precision = -1;
3437 
3438   /* Pointers to the beginning of the width and precision decimal
3439      string (if any) within the directive.  */
3440   const char *pwidth = 0;
3441   const char *pprec = 0;
3442 
3443   /* When the value of the decimal string that specifies width or
3444      precision is out of range, points to the digit that causes
3445      the value to exceed the limit.  */
3446   const char *werange = NULL;
3447   const char *perange = NULL;
3448 
3449   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3450      For vararg functions set to void_node.  */
3451   tree star_width = NULL_TREE;
3452 
3453   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3454      For vararg functions set to void_node.  */
3455   tree star_precision = NULL_TREE;
3456 
3457   if (ISDIGIT (target_to_host (*pf)))
3458     {
3459       /* This could be either a POSIX positional argument, the '0'
3460 	 flag, or a width, depending on what follows.  Store it as
3461 	 width and sort it out later after the next character has
3462 	 been seen.  */
3463       pwidth = pf;
3464       width = target_strtowi (&pf, &werange);
3465     }
3466   else if (target_to_host (*pf) == '*')
3467     {
3468       /* Similarly to the block above, this could be either a POSIX
3469 	 positional argument or a width, depending on what follows.  */
3470       if (*argno < gimple_call_num_args (info.callstmt))
3471 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3472       else
3473 	star_width = void_node;
3474       ++pf;
3475     }
3476 
3477   if (target_to_host (*pf) == '$')
3478     {
3479       /* Handle the POSIX dollar sign which references the 1-based
3480 	 positional argument number.  */
3481       if (width != -1)
3482 	dollar = width + info.argidx;
3483       else if (star_width
3484 	       && TREE_CODE (star_width) == INTEGER_CST
3485 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3486 		   <= TYPE_PRECISION (integer_type_node)))
3487 	dollar = width + tree_to_shwi (star_width);
3488 
3489       /* Bail when the numbered argument is out of range (it will
3490 	 have already been diagnosed by -Wformat).  */
3491       if (dollar == 0
3492 	  || dollar == (int)info.argidx
3493 	  || dollar > gimple_call_num_args (info.callstmt))
3494 	return false;
3495 
3496       --dollar;
3497 
3498       star_width = NULL_TREE;
3499       width = -1;
3500       ++pf;
3501     }
3502 
3503   if (dollar || !star_width)
3504     {
3505       if (width != -1)
3506 	{
3507 	  if (width == 0)
3508 	    {
3509 	      /* The '0' that has been interpreted as a width above is
3510 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3511 		 and continue processing other flags.  */
3512 	      width = -1;
3513 	      dir.set_flag ('0');
3514 	    }
3515 	  else if (!dollar)
3516 	    {
3517 	      /* (Non-zero) width has been seen.  The next character
3518 		 is either a period or a digit.  */
3519 	      goto start_precision;
3520 	    }
3521 	}
3522       /* When either '$' has been seen, or width has not been seen,
3523 	 the next field is the optional flags followed by an optional
3524 	 width.  */
3525       for ( ; ; ) {
3526 	switch (target_to_host (*pf))
3527 	  {
3528 	  case ' ':
3529 	  case '0':
3530 	  case '+':
3531 	  case '-':
3532 	  case '#':
3533 	    dir.set_flag (target_to_host (*pf++));
3534 	    break;
3535 
3536 	  default:
3537 	    goto start_width;
3538 	  }
3539       }
3540 
3541     start_width:
3542       if (ISDIGIT (target_to_host (*pf)))
3543 	{
3544 	  werange = 0;
3545 	  pwidth = pf;
3546 	  width = target_strtowi (&pf, &werange);
3547 	}
3548       else if (target_to_host (*pf) == '*')
3549 	{
3550 	  if (*argno < gimple_call_num_args (info.callstmt))
3551 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3552 	  else
3553 	    {
3554 	      /* This is (likely) a va_list.  It could also be an invalid
3555 		 call with insufficient arguments.  */
3556 	      star_width = void_node;
3557 	    }
3558 	  ++pf;
3559 	}
3560       else if (target_to_host (*pf) == '\'')
3561 	{
3562 	  /* The POSIX apostrophe indicating a numeric grouping
3563 	     in the current locale.  Even though it's possible to
3564 	     estimate the upper bound on the size of the output
3565 	     based on the number of digits it probably isn't worth
3566 	     continuing.  */
3567 	  return 0;
3568 	}
3569     }
3570 
3571  start_precision:
3572   if (target_to_host (*pf) == '.')
3573     {
3574       ++pf;
3575 
3576       if (ISDIGIT (target_to_host (*pf)))
3577 	{
3578 	  pprec = pf;
3579 	  precision = target_strtowi (&pf, &perange);
3580 	}
3581       else if (target_to_host (*pf) == '*')
3582 	{
3583 	  if (*argno < gimple_call_num_args (info.callstmt))
3584 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3585 	  else
3586 	    {
3587 	      /* This is (likely) a va_list.  It could also be an invalid
3588 		 call with insufficient arguments.  */
3589 	      star_precision = void_node;
3590 	    }
3591 	  ++pf;
3592 	}
3593       else
3594 	{
3595 	  /* The decimal precision or the asterisk are optional.
3596 	     When neither is specified it's taken to be zero.  */
3597 	  precision = 0;
3598 	}
3599     }
3600 
3601   switch (target_to_host (*pf))
3602     {
3603     case 'h':
3604       if (target_to_host (pf[1]) == 'h')
3605 	{
3606 	  ++pf;
3607 	  dir.modifier = FMT_LEN_hh;
3608 	}
3609       else
3610 	dir.modifier = FMT_LEN_h;
3611       ++pf;
3612       break;
3613 
3614     case 'j':
3615       dir.modifier = FMT_LEN_j;
3616       ++pf;
3617       break;
3618 
3619     case 'L':
3620       dir.modifier = FMT_LEN_L;
3621       ++pf;
3622       break;
3623 
3624     case 'l':
3625       if (target_to_host (pf[1]) == 'l')
3626 	{
3627 	  ++pf;
3628 	  dir.modifier = FMT_LEN_ll;
3629 	}
3630       else
3631 	dir.modifier = FMT_LEN_l;
3632       ++pf;
3633       break;
3634 
3635     case 't':
3636       dir.modifier = FMT_LEN_t;
3637       ++pf;
3638       break;
3639 
3640     case 'z':
3641       dir.modifier = FMT_LEN_z;
3642       ++pf;
3643       break;
3644     }
3645 
3646   switch (target_to_host (*pf))
3647     {
3648       /* Handle a sole '%' character the same as "%%" but since it's
3649 	 undefined prevent the result from being folded.  */
3650     case '\0':
3651       --pf;
3652       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3653       /* FALLTHRU */
3654     case '%':
3655       dir.fmtfunc = format_percent;
3656       break;
3657 
3658     case 'a':
3659     case 'A':
3660     case 'e':
3661     case 'E':
3662     case 'f':
3663     case 'F':
3664     case 'g':
3665     case 'G':
3666       res->floating = true;
3667       dir.fmtfunc = format_floating;
3668       break;
3669 
3670     case 'd':
3671     case 'i':
3672     case 'o':
3673     case 'u':
3674     case 'x':
3675     case 'X':
3676       dir.fmtfunc = format_integer;
3677       break;
3678 
3679     case 'p':
3680       /* The %p output is implementation-defined.  It's possible
3681 	 to determine this format but due to extensions (especially
3682 	 those of the Linux kernel -- see bug 78512) the first %p
3683 	 in the format string disables any further processing.  */
3684       return false;
3685 
3686     case 'n':
3687       /* %n has side-effects even when nothing is actually printed to
3688 	 any buffer.  */
3689       info.nowrite = false;
3690       dir.fmtfunc = format_none;
3691       break;
3692 
3693     case 'C':
3694     case 'c':
3695       /* POSIX wide character and C/POSIX narrow character.  */
3696       dir.fmtfunc = format_character;
3697       break;
3698 
3699     case 'S':
3700     case 's':
3701       /* POSIX wide string and C/POSIX narrow character string.  */
3702       dir.fmtfunc = format_string;
3703       break;
3704 
3705     default:
3706       /* Unknown conversion specification.  */
3707       return 0;
3708     }
3709 
3710   dir.specifier = target_to_host (*pf++);
3711 
3712   /* Store the length of the format directive.  */
3713   dir.len = pf - pcnt;
3714 
3715   /* Buffer for the directive in the host character set (used when
3716      the source character set is different).  */
3717   char hostdir[32];
3718 
3719   if (star_width)
3720     {
3721       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3722 	dir.set_width (star_width, query);
3723       else
3724 	{
3725 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3726 	     (width is the absolute value of that specified).  */
3727 	  dir.width[0] = 0;
3728 	  dir.width[1] = target_int_max () + 1;
3729 	}
3730     }
3731   else
3732     {
3733       if (width == HOST_WIDE_INT_MAX && werange)
3734 	{
3735 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3736 	  size_t caret = begin + (werange - pcnt);
3737 	  size_t end = pf - info.fmtstr - 1;
3738 
3739 	  /* Create a location for the width part of the directive,
3740 	     pointing the caret at the first out-of-range digit.  */
3741 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3742 				caret, begin, end);
3743 
3744 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3745 		   "%<%.*s%> directive width out of range", (int) dir.len,
3746 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3747 	}
3748 
3749       dir.set_width (width);
3750     }
3751 
3752   if (star_precision)
3753     {
3754       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3755 	dir.set_precision (star_precision, query);
3756       else
3757 	{
3758 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3759 	     (unlike width, negative precision is ignored).  */
3760 	  dir.prec[0] = -1;
3761 	  dir.prec[1] = target_int_max ();
3762 	}
3763     }
3764   else
3765     {
3766       if (precision == HOST_WIDE_INT_MAX && perange)
3767 	{
3768 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3769 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3770 	  size_t end = pf - info.fmtstr - 2;
3771 
3772 	  /* Create a location for the precision part of the directive,
3773 	     including the leading period, pointing the caret at the first
3774 	     out-of-range digit .  */
3775 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3776 				caret, begin, end);
3777 
3778 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3779 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3780 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3781 	}
3782 
3783       dir.set_precision (precision);
3784     }
3785 
3786   /* Extract the argument if the directive takes one and if it's
3787      available (e.g., the function doesn't take a va_list).  Treat
3788      missing arguments the same as va_list, even though they will
3789      have likely already been diagnosed by -Wformat.  */
3790   if (dir.specifier != '%'
3791       && *argno < gimple_call_num_args (info.callstmt))
3792     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3793 
3794   if (dump_file)
3795     {
3796       fprintf (dump_file,
3797 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3798 	       ": \"%.*s\"",
3799 	       dir.dirno,
3800 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3801 	       (int)dir.len, dir.beg);
3802       if (star_width)
3803 	{
3804 	  if (dir.width[0] == dir.width[1])
3805 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3806 		     dir.width[0]);
3807 	  else
3808 	    fprintf (dump_file,
3809 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3810 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3811 		     dir.width[0], dir.width[1]);
3812 	}
3813 
3814       if (star_precision)
3815 	{
3816 	  if (dir.prec[0] == dir.prec[1])
3817 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3818 		     dir.prec[0]);
3819 	  else
3820 	    fprintf (dump_file,
3821 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3822 		     HOST_WIDE_INT_PRINT_DEC "]",
3823 		     dir.prec[0], dir.prec[1]);
3824 	}
3825       fputc ('\n', dump_file);
3826     }
3827 
3828   return dir.len;
3829 }
3830 
3831 /* Diagnose overlap between destination and %s directive arguments.  */
3832 
3833 static void
maybe_warn_overlap(call_info & info,format_result * res)3834 maybe_warn_overlap (call_info &info, format_result *res)
3835 {
3836   /* Two vectors of 1-based indices corresponding to either certainly
3837      or possibly aliasing arguments.  */
3838   auto_vec<int, 16> aliasarg[2];
3839 
3840   /* Go through the array of potentially aliasing directives and collect
3841      argument numbers of those that do or may overlap the destination
3842      object given the full result.  */
3843   for (unsigned i = 0; i != res->alias_count; ++i)
3844     {
3845       const format_result::alias_info &alias = res->aliases[i];
3846 
3847       enum { possible = -1, none = 0, certain = 1 } overlap = none;
3848 
3849       /* If the precision is zero there is no overlap.  (This only
3850 	 considers %s directives and ignores %n.)  */
3851       if (alias.dir.prec[0] == 0 && alias.dir.prec[1] == 0)
3852 	continue;
3853 
3854       if (alias.offset == HOST_WIDE_INT_MAX
3855 	  || info.dst_offset == HOST_WIDE_INT_MAX)
3856 	overlap = possible;
3857       else if (alias.offset == info.dst_offset)
3858 	overlap = alias.dir.prec[0] == 0 ? possible : certain;
3859       else
3860 	{
3861 	  /* Determine overlap from the range of output and offsets
3862 	     into the same destination as the source, and rule out
3863 	     impossible overlap.  */
3864 	  unsigned HOST_WIDE_INT albeg = alias.offset;
3865 	  unsigned HOST_WIDE_INT dstbeg = info.dst_offset;
3866 
3867 	  unsigned HOST_WIDE_INT alend = albeg + alias.range.min;
3868 	  unsigned HOST_WIDE_INT dstend = dstbeg + res->range.min - 1;
3869 
3870 	  if ((albeg <= dstbeg && alend > dstbeg)
3871 	      || (albeg >= dstbeg && albeg < dstend))
3872 	    overlap = certain;
3873 	  else
3874 	    {
3875 	      alend = albeg + alias.range.max;
3876 	      if (alend < albeg)
3877 		alend = HOST_WIDE_INT_M1U;
3878 
3879 	      dstend = dstbeg + res->range.max - 1;
3880 	      if (dstend < dstbeg)
3881 		dstend = HOST_WIDE_INT_M1U;
3882 
3883 	      if ((albeg >= dstbeg && albeg <= dstend)
3884 		  || (alend >= dstbeg && alend <= dstend))
3885 		overlap = possible;
3886 	    }
3887 	}
3888 
3889       if (overlap == none)
3890 	continue;
3891 
3892       /* Append the 1-based argument number.  */
3893       aliasarg[overlap != certain].safe_push (alias.dir.argno + 1);
3894 
3895       /* Disable any kind of optimization.  */
3896       res->range.unlikely = HOST_WIDE_INT_M1U;
3897     }
3898 
3899   tree arg0 = gimple_call_arg (info.callstmt, 0);
3900   location_t loc = gimple_location (info.callstmt);
3901 
3902   bool aliaswarn = false;
3903 
3904   unsigned ncertain = aliasarg[0].length ();
3905   unsigned npossible = aliasarg[1].length ();
3906   if (ncertain && npossible)
3907     {
3908       /* If there are multiple arguments that overlap, some certainly
3909 	 and some possibly, handle both sets in a single diagnostic.  */
3910       aliaswarn
3911 	= warning_at (loc, OPT_Wrestrict,
3912 		      "%qE arguments %Z and maybe %Z overlap destination "
3913 		      "object %qE",
3914 		      info.func, aliasarg[0].address (), ncertain,
3915 		      aliasarg[1].address (), npossible,
3916 		      info.dst_origin);
3917     }
3918   else if (ncertain)
3919     {
3920       /* There is only one set of two or more arguments and they all
3921 	 certainly overlap the destination.  */
3922       aliaswarn
3923 	= warning_n (loc, OPT_Wrestrict, ncertain,
3924 		     "%qE argument %Z overlaps destination object %qE",
3925 		     "%qE arguments %Z overlap destination object %qE",
3926 		     info.func, aliasarg[0].address (), ncertain,
3927 		     info.dst_origin);
3928     }
3929   else if (npossible)
3930     {
3931       /* There is only one set of two or more arguments and they all
3932 	 may overlap (but need not).  */
3933       aliaswarn
3934 	= warning_n (loc, OPT_Wrestrict, npossible,
3935 		     "%qE argument %Z may overlap destination object %qE",
3936 		     "%qE arguments %Z may overlap destination object %qE",
3937 		     info.func, aliasarg[1].address (), npossible,
3938 		     info.dst_origin);
3939     }
3940 
3941   if (aliaswarn)
3942     {
3943       res->warned = true;
3944 
3945       if (info.dst_origin != arg0)
3946 	{
3947 	  /* If its location is different from the first argument of the call
3948 	     point either at the destination object itself or at the expression
3949 	     that was used to determine the overlap.  */
3950 	  loc = (DECL_P (info.dst_origin)
3951 		 ? DECL_SOURCE_LOCATION (info.dst_origin)
3952 		 : EXPR_LOCATION (info.dst_origin));
3953 	  if (loc != UNKNOWN_LOCATION)
3954 	    inform (loc,
3955 		    "destination object referenced by %<restrict%>-qualified "
3956 		    "argument 1 was declared here");
3957 	}
3958     }
3959 }
3960 
3961 /* Compute the length of the output resulting from the call to a formatted
3962    output function described by INFO and store the result of the call in
3963    *RES.  Issue warnings for detected past the end writes.  Return true
3964    if the complete format string has been processed and *RES can be relied
3965    on, false otherwise (e.g., when a unknown or unhandled directive was seen
3966    that caused the processing to be terminated early).  */
3967 
3968 static bool
compute_format_length(call_info & info,format_result * res,range_query * query)3969 compute_format_length (call_info &info, format_result *res, range_query *query)
3970 {
3971   if (dump_file)
3972     {
3973       location_t callloc = gimple_location (info.callstmt);
3974       fprintf (dump_file, "%s:%i: ",
3975 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3976       print_generic_expr (dump_file, info.func, dump_flags);
3977 
3978       fprintf (dump_file,
3979 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3980 	       ", fmtstr = \"%s\"\n",
3981 	       info.objsize, info.fmtstr);
3982     }
3983 
3984   /* Reset the minimum and maximum byte counters.  */
3985   res->range.min = res->range.max = 0;
3986 
3987   /* No directive has been seen yet so the length of output is bounded
3988      by the known range [0, 0] (with no conversion resulting in a failure
3989      or producing more than 4K bytes) until determined otherwise.  */
3990   res->knownrange = true;
3991   res->floating = false;
3992   res->warned = false;
3993 
3994   /* 1-based directive counter.  */
3995   unsigned dirno = 1;
3996 
3997   /* The variadic argument counter.  */
3998   unsigned argno = info.argidx;
3999 
4000   bool success = true;
4001 
4002   for (const char *pf = info.fmtstr; ; ++dirno)
4003     {
4004       directive dir (&info, dirno);
4005 
4006       size_t n = parse_directive (info, dir, res, pf, &argno, query);
4007 
4008       /* Return failure if the format function fails.  */
4009       if (!format_directive (info, res, dir, query))
4010 	return false;
4011 
4012       /* Return success when the directive is zero bytes long and it's
4013 	 the last thing in the format string (i.e., it's the terminating
4014 	 nul, which isn't really a directive but handling it as one makes
4015 	 things simpler).  */
4016       if (!n)
4017 	{
4018 	  success = *pf == '\0';
4019 	  break;
4020 	}
4021 
4022       pf += n;
4023     }
4024 
4025   maybe_warn_overlap (info, res);
4026 
4027   /* The complete format string was processed (with or without warnings).  */
4028   return success;
4029 }
4030 
4031 /* Return the size of the object referenced by the expression DEST if
4032    available, or the maximum possible size otherwise.  */
4033 
4034 static unsigned HOST_WIDE_INT
get_destination_size(tree dest,pointer_query & ptr_qry)4035 get_destination_size (tree dest, pointer_query &ptr_qry)
4036 {
4037   /* When there is no destination return the maximum.  */
4038   if (!dest)
4039     return HOST_WIDE_INT_MAX;
4040 
4041   /* Use compute_objsize to determine the size of the destination object.  */
4042   access_ref aref;
4043   if (!ptr_qry.get_ref (dest, &aref))
4044     return HOST_WIDE_INT_MAX;
4045 
4046   offset_int remsize = aref.size_remaining ();
4047   if (!wi::fits_uhwi_p (remsize))
4048     return HOST_WIDE_INT_MAX;
4049 
4050   return remsize.to_uhwi ();
4051 }
4052 
4053 /* Return true if the call described by INFO with result RES safe to
4054    optimize (i.e., no undefined behavior), and set RETVAL to the range
4055    of its return values.  */
4056 
4057 static bool
is_call_safe(const call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])4058 is_call_safe (const call_info &info,
4059 	      const format_result &res, bool under4k,
4060 	      unsigned HOST_WIDE_INT retval[2])
4061 {
4062   if (under4k && !res.posunder4k)
4063     return false;
4064 
4065   /* The minimum return value.  */
4066   retval[0] = res.range.min;
4067 
4068   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
4069      but in cases involving multibyte characters could be as large as
4070      RES.RANGE.UNLIKELY.  */
4071   retval[1]
4072     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
4073 
4074   /* Adjust the number of bytes which includes the terminating nul
4075      to reflect the return value of the function which does not.
4076      Because the valid range of the function is [INT_MIN, INT_MAX],
4077      a valid range before the adjustment below is [0, INT_MAX + 1]
4078      (the functions only return negative values on error or undefined
4079      behavior).  */
4080   if (retval[0] <= target_int_max () + 1)
4081     --retval[0];
4082   if (retval[1] <= target_int_max () + 1)
4083     --retval[1];
4084 
4085   /* Avoid the return value optimization when the behavior of the call
4086      is undefined either because any directive may have produced 4K or
4087      more of output, or the return value exceeds INT_MAX, or because
4088      the output overflows the destination object (but leave it enabled
4089      when the function is bounded because then the behavior is well-
4090      defined).  */
4091   if (retval[0] == retval[1]
4092       && (info.bounded || retval[0] < info.objsize)
4093       && retval[0] <= target_int_max ())
4094     return true;
4095 
4096   if ((info.bounded || retval[1] < info.objsize)
4097       && (retval[0] < target_int_max ()
4098 	  && retval[1] < target_int_max ()))
4099     return true;
4100 
4101   if (!under4k && (info.bounded || retval[0] < info.objsize))
4102     return true;
4103 
4104   return false;
4105 }
4106 
4107 /* Given a suitable result RES of a call to a formatted output function
4108    described by INFO, substitute the result for the return value of
4109    the call.  The result is suitable if the number of bytes it represents
4110    is known and exact.  A result that isn't suitable for substitution may
4111    have its range set to the range of return values, if that is known.
4112    Return true if the call is removed and gsi_next should not be performed
4113    in the caller.  */
4114 
4115 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4116 try_substitute_return_value (gimple_stmt_iterator *gsi,
4117 			     const call_info &info,
4118 			     const format_result &res)
4119 {
4120   tree lhs = gimple_get_lhs (info.callstmt);
4121 
4122   /* Set to true when the entire call has been removed.  */
4123   bool removed = false;
4124 
4125   /* The minimum and maximum return value.  */
4126   unsigned HOST_WIDE_INT retval[2] = {0};
4127   bool safe = is_call_safe (info, res, true, retval);
4128 
4129   if (safe
4130       && retval[0] == retval[1]
4131       /* Not prepared to handle possibly throwing calls here; they shouldn't
4132 	 appear in non-artificial testcases, except when the __*_chk routines
4133 	 are badly declared.  */
4134       && !stmt_ends_bb_p (info.callstmt))
4135     {
4136       tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
4137 				retval[0]);
4138 
4139       if (lhs == NULL_TREE && info.nowrite)
4140 	{
4141 	  /* Remove the call to the bounded function with a zero size
4142 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
4143 	  unlink_stmt_vdef (info.callstmt);
4144 	  gsi_remove (gsi, true);
4145 	  removed = true;
4146 	}
4147       else if (info.nowrite)
4148 	{
4149 	  /* Replace the call to the bounded function with a zero size
4150 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
4151 	     of the function.  */
4152 	  if (!update_call_from_tree (gsi, cst))
4153 	    gimplify_and_update_call_from_tree (gsi, cst);
4154 	  gimple *callstmt = gsi_stmt (*gsi);
4155 	  update_stmt (callstmt);
4156 	}
4157       else if (lhs)
4158 	{
4159 	  /* Replace the left-hand side of the call with the constant
4160 	     result of the formatted function.  */
4161 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
4162 	  gimple *g = gimple_build_assign (lhs, cst);
4163 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
4164 	  update_stmt (info.callstmt);
4165 	}
4166 
4167       if (dump_file)
4168 	{
4169 	  if (removed)
4170 	    fprintf (dump_file, "  Removing call statement.");
4171 	  else
4172 	    {
4173 	      fprintf (dump_file, "  Substituting ");
4174 	      print_generic_expr (dump_file, cst, dump_flags);
4175 	      fprintf (dump_file, " for %s.\n",
4176 		       info.nowrite ? "statement" : "return value");
4177 	    }
4178 	}
4179     }
4180   else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
4181     {
4182       bool setrange = false;
4183 
4184       if (safe
4185 	  && (info.bounded || retval[1] < info.objsize)
4186 	  && (retval[0] < target_int_max ()
4187 	      && retval[1] < target_int_max ()))
4188 	{
4189 	  /* If the result is in a valid range bounded by the size of
4190 	     the destination set it so that it can be used for subsequent
4191 	     optimizations.  */
4192 	  int prec = TYPE_PRECISION (integer_type_node);
4193 
4194 	  wide_int min = wi::shwi (retval[0], prec);
4195 	  wide_int max = wi::shwi (retval[1], prec);
4196 	  set_range_info (lhs, VR_RANGE, min, max);
4197 
4198 	  setrange = true;
4199 	}
4200 
4201       if (dump_file)
4202 	{
4203 	  const char *inbounds
4204 	    = (retval[0] < info.objsize
4205 	       ? (retval[1] < info.objsize
4206 		  ? "in" : "potentially out-of")
4207 	       : "out-of");
4208 
4209 	  const char *what = setrange ? "Setting" : "Discarding";
4210 	  if (retval[0] != retval[1])
4211 	    fprintf (dump_file,
4212 		     "  %s %s-bounds return value range ["
4213 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
4214 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
4215 		     what, inbounds, retval[0], retval[1]);
4216 	  else
4217 	    fprintf (dump_file, "  %s %s-bounds return value "
4218 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
4219 		     what, inbounds, retval[0]);
4220 	}
4221     }
4222 
4223   if (dump_file)
4224     fputc ('\n', dump_file);
4225 
4226   return removed;
4227 }
4228 
4229 /* Try to simplify a s{,n}printf call described by INFO with result
4230    RES by replacing it with a simpler and presumably more efficient
4231    call (such as strcpy).  */
4232 
4233 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const call_info & info,const format_result & res)4234 try_simplify_call (gimple_stmt_iterator *gsi,
4235 		   const call_info &info,
4236 		   const format_result &res)
4237 {
4238   unsigned HOST_WIDE_INT dummy[2];
4239   if (!is_call_safe (info, res, info.retval_used (), dummy))
4240     return false;
4241 
4242   switch (info.fncode)
4243     {
4244     case BUILT_IN_SNPRINTF:
4245       return gimple_fold_builtin_snprintf (gsi);
4246 
4247     case BUILT_IN_SPRINTF:
4248       return gimple_fold_builtin_sprintf (gsi);
4249 
4250     default:
4251       ;
4252     }
4253 
4254   return false;
4255 }
4256 
4257 /* Return the zero-based index of the format string argument of a printf
4258    like function and set *IDX_ARGS to the first format argument.  When
4259    no such index exists return UINT_MAX.  */
4260 
4261 static unsigned
get_user_idx_format(tree fndecl,unsigned * idx_args)4262 get_user_idx_format (tree fndecl, unsigned *idx_args)
4263 {
4264   tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
4265   if (!attrs)
4266     attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
4267 
4268   if (!attrs)
4269     return UINT_MAX;
4270 
4271   attrs = TREE_VALUE (attrs);
4272 
4273   tree archetype = TREE_VALUE (attrs);
4274   if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
4275     return UINT_MAX;
4276 
4277   attrs = TREE_CHAIN (attrs);
4278   tree fmtarg = TREE_VALUE (attrs);
4279 
4280   attrs = TREE_CHAIN (attrs);
4281   tree elliparg = TREE_VALUE (attrs);
4282 
4283   /* Attribute argument indices are 1-based but we use zero-based.  */
4284   *idx_args = tree_to_uhwi (elliparg) - 1;
4285   return tree_to_uhwi (fmtarg) - 1;
4286 }
4287 
4288 }   /* Unnamed namespace.  */
4289 
4290 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
4291    functions and if so, handle it.  Return true if the call is removed and
4292    gsi_next should not be performed in the caller.  */
4293 
4294 bool
handle_printf_call(gimple_stmt_iterator * gsi,pointer_query & ptr_qry)4295 handle_printf_call (gimple_stmt_iterator *gsi, pointer_query &ptr_qry)
4296 {
4297   init_target_to_host_charmap ();
4298 
4299   call_info info = call_info ();
4300 
4301   info.callstmt = gsi_stmt (*gsi);
4302   info.func = gimple_call_fndecl (info.callstmt);
4303   if (!info.func)
4304     return false;
4305 
4306   /* Format string argument number (valid for all functions).  */
4307   unsigned idx_format = UINT_MAX;
4308   if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4309     info.fncode = DECL_FUNCTION_CODE (info.func);
4310   else
4311     {
4312       unsigned idx_args;
4313       idx_format = get_user_idx_format (info.func, &idx_args);
4314       if (idx_format == UINT_MAX
4315 	  || idx_format >= gimple_call_num_args (info.callstmt)
4316 	  || idx_args > gimple_call_num_args (info.callstmt)
4317 	  || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
4318 							  idx_format))))
4319 	return false;
4320       info.fncode = BUILT_IN_NONE;
4321       info.argidx = idx_args;
4322     }
4323 
4324   /* The size of the destination as in snprintf(dest, size, ...).  */
4325   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
4326 
4327   /* The size of the destination determined by __builtin_object_size.  */
4328   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
4329 
4330   /* Zero-based buffer size argument number (snprintf and vsnprintf).  */
4331   unsigned idx_dstsize = UINT_MAX;
4332 
4333   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
4334   unsigned idx_objsize = UINT_MAX;
4335 
4336   /* Destinaton argument number (valid for sprintf functions only).  */
4337   unsigned idx_dstptr = 0;
4338 
4339   switch (info.fncode)
4340     {
4341     case BUILT_IN_NONE:
4342       // User-defined function with attribute format (printf).
4343       idx_dstptr = -1;
4344       break;
4345 
4346     case BUILT_IN_FPRINTF:
4347       // Signature:
4348       //   __builtin_fprintf (FILE*, format, ...)
4349       idx_format = 1;
4350       info.argidx = 2;
4351       idx_dstptr = -1;
4352       break;
4353 
4354     case BUILT_IN_FPRINTF_CHK:
4355       // Signature:
4356       //   __builtin_fprintf_chk (FILE*, ost, format, ...)
4357       idx_format = 2;
4358       info.argidx = 3;
4359       idx_dstptr = -1;
4360       break;
4361 
4362     case BUILT_IN_FPRINTF_UNLOCKED:
4363       // Signature:
4364       //   __builtin_fprintf_unnlocked (FILE*, format, ...)
4365       idx_format = 1;
4366       info.argidx = 2;
4367       idx_dstptr = -1;
4368       break;
4369 
4370     case BUILT_IN_PRINTF:
4371       // Signature:
4372       //   __builtin_printf (format, ...)
4373       idx_format = 0;
4374       info.argidx = 1;
4375       idx_dstptr = -1;
4376       break;
4377 
4378     case BUILT_IN_PRINTF_CHK:
4379       // Signature:
4380       //   __builtin_printf_chk (ost, format, ...)
4381       idx_format = 1;
4382       info.argidx = 2;
4383       idx_dstptr = -1;
4384       break;
4385 
4386     case BUILT_IN_PRINTF_UNLOCKED:
4387       // Signature:
4388       //   __builtin_printf (format, ...)
4389       idx_format = 0;
4390       info.argidx = 1;
4391       idx_dstptr = -1;
4392       break;
4393 
4394     case BUILT_IN_SPRINTF:
4395       // Signature:
4396       //   __builtin_sprintf (dst, format, ...)
4397       idx_format = 1;
4398       info.argidx = 2;
4399       break;
4400 
4401     case BUILT_IN_SPRINTF_CHK:
4402       // Signature:
4403       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
4404       idx_objsize = 2;
4405       idx_format = 3;
4406       info.argidx = 4;
4407       break;
4408 
4409     case BUILT_IN_SNPRINTF:
4410       // Signature:
4411       //   __builtin_snprintf (dst, size, format, ...)
4412       idx_dstsize = 1;
4413       idx_format = 2;
4414       info.argidx = 3;
4415       info.bounded = true;
4416       break;
4417 
4418     case BUILT_IN_SNPRINTF_CHK:
4419       // Signature:
4420       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
4421       idx_dstsize = 1;
4422       idx_objsize = 3;
4423       idx_format = 4;
4424       info.argidx = 5;
4425       info.bounded = true;
4426       break;
4427 
4428     case BUILT_IN_VFPRINTF:
4429       // Signature:
4430       //   __builtin_vprintf (FILE*, format, va_list)
4431       idx_format = 1;
4432       info.argidx = -1;
4433       idx_dstptr = -1;
4434       break;
4435 
4436     case BUILT_IN_VFPRINTF_CHK:
4437       // Signature:
4438       //   __builtin___vfprintf_chk (FILE*, ost, format, va_list)
4439       idx_format = 2;
4440       info.argidx = -1;
4441       idx_dstptr = -1;
4442       break;
4443 
4444     case BUILT_IN_VPRINTF:
4445       // Signature:
4446       //   __builtin_vprintf (format, va_list)
4447       idx_format = 0;
4448       info.argidx = -1;
4449       idx_dstptr = -1;
4450       break;
4451 
4452     case BUILT_IN_VPRINTF_CHK:
4453       // Signature:
4454       //   __builtin___vprintf_chk (ost, format, va_list)
4455       idx_format = 1;
4456       info.argidx = -1;
4457       idx_dstptr = -1;
4458       break;
4459 
4460     case BUILT_IN_VSNPRINTF:
4461       // Signature:
4462       //   __builtin_vsprintf (dst, size, format, va)
4463       idx_dstsize = 1;
4464       idx_format = 2;
4465       info.argidx = -1;
4466       info.bounded = true;
4467       break;
4468 
4469     case BUILT_IN_VSNPRINTF_CHK:
4470       // Signature:
4471       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
4472       idx_dstsize = 1;
4473       idx_objsize = 3;
4474       idx_format = 4;
4475       info.argidx = -1;
4476       info.bounded = true;
4477       break;
4478 
4479     case BUILT_IN_VSPRINTF:
4480       // Signature:
4481       //   __builtin_vsprintf (dst, format, va)
4482       idx_format = 1;
4483       info.argidx = -1;
4484       break;
4485 
4486     case BUILT_IN_VSPRINTF_CHK:
4487       // Signature:
4488       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4489       idx_format = 3;
4490       idx_objsize = 2;
4491       info.argidx = -1;
4492       break;
4493 
4494     default:
4495       return false;
4496     }
4497 
4498   /* Set the global warning level for this function.  */
4499   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4500 
4501   /* For all string functions the first argument is a pointer to
4502      the destination.  */
4503   tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4504 		 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4505 
4506   info.format = gimple_call_arg (info.callstmt, idx_format);
4507 
4508   /* True when the destination size is constant as opposed to the lower
4509      or upper bound of a range.  */
4510   bool dstsize_cst_p = true;
4511   bool posunder4k = true;
4512 
4513   if (idx_dstsize == UINT_MAX)
4514     {
4515       /* For non-bounded functions like sprintf, determine the size
4516 	 of the destination from the object or pointer passed to it
4517 	 as the first argument.  */
4518       dstsize = get_destination_size (dstptr, ptr_qry);
4519     }
4520   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4521     {
4522       /* For bounded functions try to get the size argument.  */
4523 
4524       if (TREE_CODE (size) == INTEGER_CST)
4525 	{
4526 	  dstsize = tree_to_uhwi (size);
4527 	  /* No object can be larger than SIZE_MAX bytes (half the address
4528 	     space) on the target.
4529 	     The functions are defined only for output of at most INT_MAX
4530 	     bytes.  Specifying a bound in excess of that limit effectively
4531 	     defeats the bounds checking (and on some implementations such
4532 	     as Solaris cause the function to fail with EINVAL).  */
4533 	  if (dstsize > target_size_max () / 2)
4534 	    {
4535 	      /* Avoid warning if -Wstringop-overflow is specified since
4536 		 it also warns for the same thing though only for the
4537 		 checking built-ins.  */
4538 	      if ((idx_objsize == UINT_MAX
4539 		   || !warn_stringop_overflow))
4540 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4541 			    "specified bound %wu exceeds maximum object size "
4542 			    "%wu",
4543 			    dstsize, target_size_max () / 2);
4544 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4545 		 than INT_MAX.  Even though not all POSIX implementations
4546 		 conform to the requirement, avoid folding in this case.  */
4547 	      posunder4k = false;
4548 	    }
4549 	  else if (dstsize > target_int_max ())
4550 	    {
4551 	      warning_at (gimple_location (info.callstmt), info.warnopt (),
4552 			  "specified bound %wu exceeds %<INT_MAX%>",
4553 			  dstsize);
4554 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4555 		 than INT_MAX.  Avoid folding in that case.  */
4556 	      posunder4k = false;
4557 	    }
4558 	}
4559       else if (TREE_CODE (size) == SSA_NAME)
4560 	{
4561 	  /* Try to determine the range of values of the argument
4562 	     and use the greater of the two at level 1 and the smaller
4563 	     of them at level 2.  */
4564 	  value_range vr;
4565 	  ptr_qry.rvals->range_of_expr (vr, size, info.callstmt);
4566 
4567 	  if (!vr.undefined_p ())
4568 	    {
4569 	      tree type = TREE_TYPE (size);
4570 	      tree tmin = wide_int_to_tree (type, vr.lower_bound ());
4571 	      tree tmax = wide_int_to_tree (type, vr.upper_bound ());
4572 	      unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin);
4573 	      unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax);
4574 	      dstsize = warn_level < 2 ? maxsize : minsize;
4575 
4576 	      if (minsize > target_int_max ())
4577 		warning_at (gimple_location (info.callstmt), info.warnopt (),
4578 			    "specified bound range [%wu, %wu] exceeds "
4579 			    "%<INT_MAX%>",
4580 			    minsize, maxsize);
4581 
4582 	      /* POSIX requires snprintf to fail if DSTSIZE is greater
4583 		 than INT_MAX.  Avoid folding if that's possible.  */
4584 	      if (maxsize > target_int_max ())
4585 		posunder4k = false;
4586 	    }
4587 
4588 	  /* The destination size is not constant.  If the function is
4589 	     bounded (e.g., snprintf) a lower bound of zero doesn't
4590 	     necessarily imply it can be eliminated.  */
4591 	  dstsize_cst_p = false;
4592 	}
4593     }
4594 
4595   if (idx_objsize != UINT_MAX)
4596     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4597       if (tree_fits_uhwi_p (size))
4598 	objsize = tree_to_uhwi (size);
4599 
4600   if (info.bounded && !dstsize)
4601     {
4602       /* As a special case, when the explicitly specified destination
4603 	 size argument (to a bounded function like snprintf) is zero
4604 	 it is a request to determine the number of bytes on output
4605 	 without actually producing any.  Pretend the size is
4606 	 unlimited in this case.  */
4607       info.objsize = HOST_WIDE_INT_MAX;
4608       info.nowrite = dstsize_cst_p;
4609     }
4610   else
4611     {
4612       /* For calls to non-bounded functions or to those of bounded
4613 	 functions with a non-zero size, warn if the destination
4614 	 pointer is null.  */
4615       if (dstptr && integer_zerop (dstptr))
4616 	{
4617 	  /* This is diagnosed with -Wformat only when the null is a constant
4618 	     pointer.  The warning here diagnoses instances where the pointer
4619 	     is not constant.  */
4620 	  location_t loc = gimple_location (info.callstmt);
4621 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4622 		      info.warnopt (), "%Gnull destination pointer",
4623 		      info.callstmt);
4624 	  return false;
4625 	}
4626 
4627       /* Set the object size to the smaller of the two arguments
4628 	 of both have been specified and they're not equal.  */
4629       info.objsize = dstsize < objsize ? dstsize : objsize;
4630 
4631       if (info.bounded
4632 	  && dstsize < target_size_max () / 2 && objsize < dstsize
4633 	  /* Avoid warning if -Wstringop-overflow is specified since
4634 	     it also warns for the same thing though only for the
4635 	     checking built-ins.  */
4636 	  && (idx_objsize == UINT_MAX
4637 	      || !warn_stringop_overflow))
4638 	{
4639 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4640 		      "specified bound %wu exceeds the size %wu "
4641 		      "of the destination object", dstsize, objsize);
4642 	}
4643     }
4644 
4645   /* Determine if the format argument may be null and warn if not
4646      and if the argument is null.  */
4647   if (integer_zerop (info.format)
4648       && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4649     {
4650       location_t loc = gimple_location (info.callstmt);
4651       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4652 		  info.warnopt (), "%Gnull format string",
4653 		  info.callstmt);
4654       return false;
4655     }
4656 
4657   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4658   if (!info.fmtstr)
4659     return false;
4660 
4661   if (warn_restrict)
4662     {
4663       /* Compute the origin of the destination pointer and its offset
4664 	 from the base object/pointer if possible.  */
4665       info.dst_offset = 0;
4666       info.dst_origin = get_origin_and_offset (dstptr, &info.dst_field,
4667 					       &info.dst_offset);
4668     }
4669 
4670   /* The result is the number of bytes output by the formatted function,
4671      including the terminating NUL.  */
4672   format_result res;
4673 
4674   /* I/O functions with no destination argument (i.e., all forms of fprintf
4675      and printf) may fail under any conditions.  Others (i.e., all forms of
4676      sprintf) may only fail under specific conditions determined for each
4677      directive.  Clear POSUNDER4K for the former set of functions and set
4678      it to true for the latter (it can only be cleared later, but it is
4679      never set to true again).  */
4680   res.posunder4k = posunder4k && dstptr;
4681 
4682   bool success = compute_format_length (info, &res, ptr_qry.rvals);
4683   if (res.warned)
4684     gimple_set_no_warning (info.callstmt, true);
4685 
4686   /* When optimizing and the printf return value optimization is enabled,
4687      attempt to substitute the computed result for the return value of
4688      the call.  Avoid this optimization when -frounding-math is in effect
4689      and the format string contains a floating point directive.  */
4690   bool call_removed = false;
4691   if (success && optimize > 0)
4692     {
4693       /* Save a copy of the iterator pointing at the call.  The iterator
4694 	 may change to point past the call in try_substitute_return_value
4695 	 but the original value is needed in try_simplify_call.  */
4696       gimple_stmt_iterator gsi_call = *gsi;
4697 
4698       if (flag_printf_return_value
4699 	  && (!flag_rounding_math || !res.floating))
4700 	call_removed = try_substitute_return_value (gsi, info, res);
4701 
4702       if (!call_removed)
4703 	try_simplify_call (&gsi_call, info, res);
4704     }
4705 
4706   return call_removed;
4707 }
4708