1 /* Copyright (C) 2016-2018 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rathger on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to fornmat known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69 #include "langhooks.h"
70 
71 #include "builtins.h"
72 #include "stor-layout.h"
73 
74 #include "realmpfr.h"
75 #include "target.h"
76 
77 #include "cpplib.h"
78 #include "input.h"
79 #include "toplev.h"
80 #include "substring-locations.h"
81 #include "diagnostic.h"
82 #include "domwalk.h"
83 #include "alloc-pool.h"
84 #include "vr-values.h"
85 #include "gimple-ssa-evrp-analyze.h"
86 
87 /* The likely worst case value of MB_LEN_MAX for the target, large enough
88    for UTF-8.  Ideally, this would be obtained by a target hook if it were
89    to be used for optimization but it's good enough as is for warnings.  */
90 #define target_mb_len_max()   6
91 
92 /* The maximum number of bytes a single non-string directive can result
93    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
94    LDBL_MAX_10_EXP of 4932.  */
95 #define IEEE_MAX_10_EXP    4932
96 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
97 
98 namespace {
99 
100 const pass_data pass_data_sprintf_length = {
101   GIMPLE_PASS,             // pass type
102   "printf-return-value",   // pass name
103   OPTGROUP_NONE,           // optinfo_flags
104   TV_NONE,                 // tv_id
105   PROP_cfg,                // properties_required
106   0,	                   // properties_provided
107   0,	                   // properties_destroyed
108   0,	                   // properties_start
109   0,	                   // properties_finish
110 };
111 
112 /* Set to the warning level for the current function which is equal
113    either to warn_format_trunc for bounded functions or to
114    warn_format_overflow otherwise.  */
115 
116 static int warn_level;
117 
118 struct format_result;
119 
120 class sprintf_dom_walker : public dom_walker
121 {
122  public:
sprintf_dom_walker()123   sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
~sprintf_dom_walker()124   ~sprintf_dom_walker () {}
125 
126   edge before_dom_children (basic_block) FINAL OVERRIDE;
127   void after_dom_children (basic_block) FINAL OVERRIDE;
128   bool handle_gimple_call (gimple_stmt_iterator *);
129 
130   struct call_info;
131   bool compute_format_length (call_info &, format_result *);
132   class evrp_range_analyzer evrp_range_analyzer;
133 };
134 
135 class pass_sprintf_length : public gimple_opt_pass
136 {
137   bool fold_return_value;
138 
139 public:
pass_sprintf_length(gcc::context * ctxt)140   pass_sprintf_length (gcc::context *ctxt)
141     : gimple_opt_pass (pass_data_sprintf_length, ctxt),
142     fold_return_value (false)
143   { }
144 
clone()145   opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
146 
147   virtual bool gate (function *);
148 
149   virtual unsigned int execute (function *);
150 
set_pass_param(unsigned int n,bool param)151   void set_pass_param (unsigned int n, bool param)
152     {
153       gcc_assert (n == 0);
154       fold_return_value = param;
155     }
156 
157 };
158 
159 bool
gate(function *)160 pass_sprintf_length::gate (function *)
161 {
162   /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
163      is specified and either not optimizing and the pass is being invoked
164      early, or when optimizing and the pass is being invoked during
165      optimization (i.e., "late").  */
166   return ((warn_format_overflow > 0
167 	   || warn_format_trunc > 0
168 	   || flag_printf_return_value)
169 	  && (optimize > 0) == fold_return_value);
170 }
171 
172 /* The minimum, maximum, likely, and unlikely maximum number of bytes
173    of output either a formatting function or an individual directive
174    can result in.  */
175 
176 struct result_range
177 {
178   /* The absolute minimum number of bytes.  The result of a successful
179      conversion is guaranteed to be no less than this.  (An erroneous
180      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
181   unsigned HOST_WIDE_INT min;
182   /* The likely maximum result that is used in diagnostics.  In most
183      cases MAX is the same as the worst case UNLIKELY result.  */
184   unsigned HOST_WIDE_INT max;
185   /* The likely result used to trigger diagnostics.  For conversions
186      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
187      in that range.  */
188   unsigned HOST_WIDE_INT likely;
189   /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
190      the worst cases maximum result of a directive.  In most cases
191      UNLIKELY == MAX.  UNLIKELY is used to control the return value
192      optimization but not in diagnostics.  */
193   unsigned HOST_WIDE_INT unlikely;
194 };
195 
196 /* The result of a call to a formatted function.  */
197 
198 struct format_result
199 {
200   /* Range of characters written by the formatted function.
201      Setting the minimum to HOST_WIDE_INT_MAX disables all
202      length tracking for the remainder of the format string.  */
203   result_range range;
204 
205   /* True when the range above is obtained from known values of
206      directive arguments, or bounds on the amount of output such
207      as width and precision, and not the result of  heuristics that
208      depend on warning levels.  It's used to issue stricter diagnostics
209      in cases where strings of unknown lengths are bounded by the arrays
210      they are determined to refer to.  KNOWNRANGE must not be used for
211      the return value optimization.  */
212   bool knownrange;
213 
214   /* True if no individual directive resulted in more than 4095 bytes
215      of output (the total NUMBER_CHARS_{MIN,MAX} might be greater).
216      Implementations are not required to handle directives that produce
217      more than 4K bytes (leading to undefined behavior) and so when one
218      is found it disables the return value optimization.  */
219   bool under4k;
220 
221   /* True when a floating point directive has been seen in the format
222      string.  */
223   bool floating;
224 
225   /* True when an intermediate result has caused a warning.  Used to
226      avoid issuing duplicate warnings while finishing the processing
227      of a call.  WARNED also disables the return value optimization.  */
228   bool warned;
229 
230   /* Preincrement the number of output characters by 1.  */
231   format_result& operator++ ()
232   {
233     return *this += 1;
234   }
235 
236   /* Postincrement the number of output characters by 1.  */
237   format_result operator++ (int)
238   {
239     format_result prev (*this);
240     *this += 1;
241     return prev;
242   }
243 
244   /* Increment the number of output characters by N.  */
245   format_result& operator+= (unsigned HOST_WIDE_INT);
246 };
247 
248 format_result&
249 format_result::operator+= (unsigned HOST_WIDE_INT n)
250 {
251   gcc_assert (n < HOST_WIDE_INT_MAX);
252 
253   if (range.min < HOST_WIDE_INT_MAX)
254     range.min += n;
255 
256   if (range.max < HOST_WIDE_INT_MAX)
257     range.max += n;
258 
259   if (range.likely < HOST_WIDE_INT_MAX)
260     range.likely += n;
261 
262   if (range.unlikely < HOST_WIDE_INT_MAX)
263     range.unlikely += n;
264 
265   return *this;
266 }
267 
268 /* Return the value of INT_MIN for the target.  */
269 
270 static inline HOST_WIDE_INT
target_int_min()271 target_int_min ()
272 {
273   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
274 }
275 
276 /* Return the value of INT_MAX for the target.  */
277 
278 static inline unsigned HOST_WIDE_INT
target_int_max()279 target_int_max ()
280 {
281   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
282 }
283 
284 /* Return the value of SIZE_MAX for the target.  */
285 
286 static inline unsigned HOST_WIDE_INT
target_size_max()287 target_size_max ()
288 {
289   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
290 }
291 
292 /* A straightforward mapping from the execution character set to the host
293    character set indexed by execution character.  */
294 
295 static char target_to_host_charmap[256];
296 
297 /* Initialize a mapping from the execution character set to the host
298    character set.  */
299 
300 static bool
init_target_to_host_charmap()301 init_target_to_host_charmap ()
302 {
303   /* If the percent sign is non-zero the mapping has already been
304      initialized.  */
305   if (target_to_host_charmap['%'])
306     return true;
307 
308   /* Initialize the target_percent character (done elsewhere).  */
309   if (!init_target_chars ())
310     return false;
311 
312   /* The subset of the source character set used by printf conversion
313      specifications (strictly speaking, not all letters are used but
314      they are included here for the sake of simplicity).  The dollar
315      sign must be included even though it's not in the basic source
316      character set.  */
317   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
318     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
319 
320   /* Set the mapping for all characters to some ordinary value (i,e.,
321      not none used in printf conversion specifications) and overwrite
322      those that are used by conversion specifications with their
323      corresponding values.  */
324   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
325 
326   /* Are the two sets of characters the same?  */
327   bool all_same_p = true;
328 
329   for (const char *pc = srcset; *pc; ++pc)
330     {
331       /* Slice off the high end bits in case target characters are
332 	 signed.  All values are expected to be non-nul, otherwise
333 	 there's a problem.  */
334       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
335 	{
336 	  target_to_host_charmap[tc] = *pc;
337 	  if (tc != *pc)
338 	    all_same_p = false;
339 	}
340       else
341 	return false;
342 
343     }
344 
345   /* Set the first element to a non-zero value if the mapping
346      is 1-to-1, otherwise leave it clear (NUL is assumed to be
347      the same in both character sets).  */
348   target_to_host_charmap[0] = all_same_p;
349 
350   return true;
351 }
352 
353 /* Return the host source character corresponding to the character
354    CH in the execution character set if one exists, or some innocuous
355    (non-special, non-nul) source character otherwise.  */
356 
357 static inline unsigned char
target_to_host(unsigned char ch)358 target_to_host (unsigned char ch)
359 {
360   return target_to_host_charmap[ch];
361 }
362 
363 /* Convert an initial substring of the string TARGSTR consisting of
364    characters in the execution character set into a string in the
365    source character set on the host and store up to HOSTSZ characters
366    in the buffer pointed to by HOSTR.  Return HOSTR.  */
367 
368 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)369 target_to_host (char *hostr, size_t hostsz, const char *targstr)
370 {
371   /* Make sure the buffer is reasonably big.  */
372   gcc_assert (hostsz > 4);
373 
374   /* The interesting subset of source and execution characters are
375      the same so no conversion is necessary.  However, truncate
376      overlong strings just like the translated strings are.  */
377   if (target_to_host_charmap['\0'] == 1)
378     {
379       strncpy (hostr, targstr, hostsz - 4);
380       if (strlen (targstr) >= hostsz)
381 	strcpy (hostr + hostsz - 4, "...");
382       return hostr;
383     }
384 
385   /* Convert the initial substring of TARGSTR to the corresponding
386      characters in the host set, appending "..." if TARGSTR is too
387      long to fit.  Using the static buffer assumes the function is
388      not called in between sequence points (which it isn't).  */
389   for (char *ph = hostr; ; ++targstr)
390     {
391       *ph++ = target_to_host (*targstr);
392       if (!*targstr)
393 	break;
394 
395       if (size_t (ph - hostr) == hostsz - 4)
396 	{
397 	  *ph = '\0';
398 	  strcat (ph, "...");
399 	  break;
400 	}
401     }
402 
403   return hostr;
404 }
405 
406 /* Convert the sequence of decimal digits in the execution character
407    starting at S to a long, just like strtol does.  Return the result
408    and set *END to one past the last converted character.  On range
409    error set ERANGE to the digit that caused it.  */
410 
411 static inline long
target_strtol10(const char ** ps,const char ** erange)412 target_strtol10 (const char **ps, const char **erange)
413 {
414   unsigned HOST_WIDE_INT val = 0;
415   for ( ; ; ++*ps)
416     {
417       unsigned char c = target_to_host (**ps);
418       if (ISDIGIT (c))
419 	{
420 	  c -= '0';
421 
422 	  /* Check for overflow.  */
423 	  if (val > (LONG_MAX - c) / 10LU)
424 	    {
425 	      val = LONG_MAX;
426 	      *erange = *ps;
427 
428 	      /* Skip the remaining digits.  */
429 	      do
430 		c = target_to_host (*++*ps);
431 	      while (ISDIGIT (c));
432 	      break;
433 	    }
434 	  else
435 	    val = val * 10 + c;
436 	}
437       else
438 	break;
439     }
440 
441   return val;
442 }
443 
444 /* Return the constant initial value of DECL if available or DECL
445    otherwise.  Same as the synonymous function in c/c-typeck.c.  */
446 
447 static tree
decl_constant_value(tree decl)448 decl_constant_value (tree decl)
449 {
450   if (/* Don't change a variable array bound or initial value to a constant
451 	 in a place where a variable is invalid.  Note that DECL_INITIAL
452 	 isn't valid for a PARM_DECL.  */
453       current_function_decl != 0
454       && TREE_CODE (decl) != PARM_DECL
455       && !TREE_THIS_VOLATILE (decl)
456       && TREE_READONLY (decl)
457       && DECL_INITIAL (decl) != 0
458       && TREE_CODE (DECL_INITIAL (decl)) != ERROR_MARK
459       /* This is invalid if initial value is not constant.
460 	 If it has either a function call, a memory reference,
461 	 or a variable, then re-evaluating it could give different results.  */
462       && TREE_CONSTANT (DECL_INITIAL (decl))
463       /* Check for cases where this is sub-optimal, even though valid.  */
464       && TREE_CODE (DECL_INITIAL (decl)) != CONSTRUCTOR)
465     return DECL_INITIAL (decl);
466   return decl;
467 }
468 
469 /* Given FORMAT, set *PLOC to the source location of the format string
470    and return the format string if it is known or null otherwise.  */
471 
472 static const char*
get_format_string(tree format,location_t * ploc)473 get_format_string (tree format, location_t *ploc)
474 {
475   if (VAR_P (format))
476     {
477       /* Pull out a constant value if the front end didn't.  */
478       format = decl_constant_value (format);
479       STRIP_NOPS (format);
480     }
481 
482   if (integer_zerop (format))
483     {
484       /* FIXME: Diagnose null format string if it hasn't been diagnosed
485 	 by -Wformat (the latter diagnoses only nul pointer constants,
486 	 this pass can do better).  */
487       return NULL;
488     }
489 
490   HOST_WIDE_INT offset = 0;
491 
492   if (TREE_CODE (format) == POINTER_PLUS_EXPR)
493     {
494       tree arg0 = TREE_OPERAND (format, 0);
495       tree arg1 = TREE_OPERAND (format, 1);
496       STRIP_NOPS (arg0);
497       STRIP_NOPS (arg1);
498 
499       if (TREE_CODE (arg1) != INTEGER_CST)
500 	return NULL;
501 
502       format = arg0;
503 
504       /* POINTER_PLUS_EXPR offsets are to be interpreted signed.  */
505       if (!cst_and_fits_in_hwi (arg1))
506 	return NULL;
507 
508       offset = int_cst_value (arg1);
509     }
510 
511   if (TREE_CODE (format) != ADDR_EXPR)
512     return NULL;
513 
514   *ploc = EXPR_LOC_OR_LOC (format, input_location);
515 
516   format = TREE_OPERAND (format, 0);
517 
518   if (TREE_CODE (format) == ARRAY_REF
519       && tree_fits_shwi_p (TREE_OPERAND (format, 1))
520       && (offset += tree_to_shwi (TREE_OPERAND (format, 1))) >= 0)
521     format = TREE_OPERAND (format, 0);
522 
523   if (offset < 0)
524     return NULL;
525 
526   tree array_init;
527   tree array_size = NULL_TREE;
528 
529   if (VAR_P (format)
530       && TREE_CODE (TREE_TYPE (format)) == ARRAY_TYPE
531       && (array_init = decl_constant_value (format)) != format
532       && TREE_CODE (array_init) == STRING_CST)
533     {
534       /* Extract the string constant initializer.  Note that this may
535 	 include a trailing NUL character that is not in the array (e.g.
536 	 const char a[3] = "foo";).  */
537       array_size = DECL_SIZE_UNIT (format);
538       format = array_init;
539     }
540 
541   if (TREE_CODE (format) != STRING_CST)
542     return NULL;
543 
544   tree type = TREE_TYPE (format);
545 
546   scalar_int_mode char_mode;
547   if (!is_int_mode (TYPE_MODE (TREE_TYPE (type)), &char_mode)
548       || GET_MODE_SIZE (char_mode) != 1)
549     {
550       /* Wide format string.  */
551       return NULL;
552     }
553 
554   const char *fmtstr = TREE_STRING_POINTER (format);
555   unsigned fmtlen = TREE_STRING_LENGTH (format);
556 
557   if (array_size)
558     {
559       /* Variable length arrays can't be initialized.  */
560       gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
561 
562       if (tree_fits_shwi_p (array_size))
563 	{
564 	  HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
565 	  if (array_size_value > 0
566 	      && array_size_value == (int) array_size_value
567 	      && fmtlen > array_size_value)
568 	    fmtlen = array_size_value;
569 	}
570     }
571   if (offset)
572     {
573       if (offset >= fmtlen)
574 	return NULL;
575 
576       fmtstr += offset;
577       fmtlen -= offset;
578     }
579 
580   if (fmtlen < 1 || fmtstr[--fmtlen] != 0)
581     {
582       /* FIXME: Diagnose an unterminated format string if it hasn't been
583 	 diagnosed by -Wformat.  Similarly to a null format pointer,
584 	 -Wformay diagnoses only nul pointer constants, this pass can
585 	 do better).  */
586       return NULL;
587     }
588 
589   return fmtstr;
590 }
591 
592 /* For convenience and brevity, shorter named entrypoints of
593    format_warning_at_substring and format_warning_at_substring_n.
594    These have to be functions with the attribute so that exgettext
595    works properly.  */
596 
597 static bool
598 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,const char * gmsgid,...)599 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
600 	 const char *corrected_substring, int opt, const char *gmsgid, ...)
601 {
602   va_list ap;
603   va_start (ap, gmsgid);
604   bool warned = format_warning_va (fmt_loc, param_loc, corrected_substring,
605 				   opt, gmsgid, &ap);
606   va_end (ap);
607 
608   return warned;
609 }
610 
611 static bool
612 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)613 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
614 	   const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
615 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
616 {
617   va_list ap;
618   va_start (ap, plural_gmsgid);
619   bool warned = format_warning_n_va (fmt_loc, param_loc, corrected_substring,
620 				     opt, n, singular_gmsgid, plural_gmsgid,
621 				     &ap);
622   va_end (ap);
623 
624   return warned;
625 }
626 
627 /* Format length modifiers.  */
628 
629 enum format_lengths
630 {
631   FMT_LEN_none,
632   FMT_LEN_hh,    // char argument
633   FMT_LEN_h,     // short
634   FMT_LEN_l,     // long
635   FMT_LEN_ll,    // long long
636   FMT_LEN_L,     // long double (and GNU long long)
637   FMT_LEN_z,     // size_t
638   FMT_LEN_t,     // ptrdiff_t
639   FMT_LEN_j      // intmax_t
640 };
641 
642 
643 /* Description of the result of conversion either of a single directive
644    or the whole format string.  */
645 
646 struct fmtresult
647 {
648   /* Construct a FMTRESULT object with all counters initialized
649      to MIN.  KNOWNRANGE is set when MIN is valid.  */
650   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
argminfmtresult651   : argmin (), argmax (),
652     knownrange (min < HOST_WIDE_INT_MAX),
653     nullp ()
654   {
655     range.min = min;
656     range.max = min;
657     range.likely = min;
658     range.unlikely = min;
659   }
660 
661   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
662      KNOWNRANGE is set when both MIN and MAX are valid.   */
663   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
664 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
argminfmtresult665   : argmin (), argmax (),
666     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
667     nullp ()
668   {
669     range.min = min;
670     range.max = max;
671     range.likely = max < likely ? min : likely;
672     range.unlikely = max;
673   }
674 
675   /* Adjust result upward to reflect the RANGE of values the specified
676      width or precision is known to be in.  */
677   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
678 					    tree = NULL_TREE,
679 					    unsigned = 0, unsigned = 0);
680 
681   /* Return the maximum number of decimal digits a value of TYPE
682      formats as on output.  */
683   static unsigned type_max_digits (tree, int);
684 
685   /* The range a directive's argument is in.  */
686   tree argmin, argmax;
687 
688   /* The minimum and maximum number of bytes that a directive
689      results in on output for an argument in the range above.  */
690   result_range range;
691 
692   /* True when the range above is obtained from a known value of
693      a directive's argument or its bounds and not the result of
694      heuristics that depend on warning levels.  */
695   bool knownrange;
696 
697   /* True when the argument is a null pointer.  */
698   bool nullp;
699 };
700 
701 /* Adjust result upward to reflect the range ADJUST of values the
702    specified width or precision is known to be in.  When non-null,
703    TYPE denotes the type of the directive whose result is being
704    adjusted, BASE gives the base of the directive (octal, decimal,
705    or hex), and ADJ denotes the additional adjustment to the LIKELY
706    counter that may need to be added when ADJUST is a range.  */
707 
708 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)709 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
710 					  tree type /* = NULL_TREE */,
711 					  unsigned base /* = 0 */,
712 					  unsigned adj /* = 0 */)
713 {
714   bool minadjusted = false;
715 
716   /* Adjust the minimum and likely counters.  */
717   if (adjust[0] >= 0)
718     {
719       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
720 	{
721 	  range.min = adjust[0];
722 	  minadjusted = true;
723 	}
724 
725       /* Adjust the likely counter.  */
726       if (range.likely < range.min)
727 	range.likely = range.min;
728     }
729   else if (adjust[0] == target_int_min ()
730 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
731     knownrange = false;
732 
733   /* Adjust the maximum counter.  */
734   if (adjust[1] > 0)
735     {
736       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
737 	{
738 	  range.max = adjust[1];
739 
740 	  /* Set KNOWNRANGE if both the minimum and maximum have been
741 	     adjusted.  Otherwise leave it at what it was before.  */
742 	  knownrange = minadjusted;
743 	}
744     }
745 
746   if (warn_level > 1 && type)
747     {
748       /* For large non-constant width or precision whose range spans
749 	 the maximum number of digits produced by the directive for
750 	 any argument, set the likely number of bytes to be at most
751 	 the number digits plus other adjustment determined by the
752 	 caller (one for sign or two for the hexadecimal "0x"
753 	 prefix).  */
754       unsigned dirdigs = type_max_digits (type, base);
755       if (adjust[0] < dirdigs && dirdigs < adjust[1]
756 	  && range.likely < dirdigs)
757 	range.likely = dirdigs + adj;
758     }
759   else if (range.likely < (range.min ? range.min : 1))
760     {
761       /* Conservatively, set LIKELY to at least MIN but no less than
762 	 1 unless MAX is zero.  */
763       range.likely = (range.min
764 		      ? range.min
765 		      : range.max && (range.max < HOST_WIDE_INT_MAX
766 				      || warn_level > 1) ? 1 : 0);
767     }
768 
769   /* Finally adjust the unlikely counter to be at least as large as
770      the maximum.  */
771   if (range.unlikely < range.max)
772     range.unlikely = range.max;
773 
774   return *this;
775 }
776 
777 /* Return the maximum number of digits a value of TYPE formats in
778    BASE on output, not counting base prefix .  */
779 
780 unsigned
type_max_digits(tree type,int base)781 fmtresult::type_max_digits (tree type, int base)
782 {
783   unsigned prec = TYPE_PRECISION (type);
784   switch (base)
785     {
786     case 8:
787       return (prec + 2) / 3;
788     case 10:
789       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
790 	 of 8, 16, 32, and 64 bits.  */
791       return prec * 301 / 1000 + 1;
792     case 16:
793       return prec / 4;
794     }
795 
796   gcc_unreachable ();
797 }
798 
799 static bool
800 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
801 	       class vr_values *vr_values);
802 
803 /* Description of a format directive.  A directive is either a plain
804    string or a conversion specification that starts with '%'.  */
805 
806 struct directive
807 {
808   /* The 1-based directive number (for debugging).  */
809   unsigned dirno;
810 
811   /* The first character of the directive and its length.  */
812   const char *beg;
813   size_t len;
814 
815   /* A bitmap of flags, one for each character.  */
816   unsigned flags[256 / sizeof (int)];
817 
818   /* The range of values of the specified width, or -1 if not specified.  */
819   HOST_WIDE_INT width[2];
820   /* The range of values of the specified precision, or -1 if not
821      specified.  */
822   HOST_WIDE_INT prec[2];
823 
824   /* Length modifier.  */
825   format_lengths modifier;
826 
827   /* Format specifier character.  */
828   char specifier;
829 
830   /* The argument of the directive or null when the directive doesn't
831      take one or when none is available (such as for vararg functions).  */
832   tree arg;
833 
834   /* Format conversion function that given a directive and an argument
835      returns the formatting result.  */
836   fmtresult (*fmtfunc) (const directive &, tree, vr_values *);
837 
838   /* Return True when a the format flag CHR has been used.  */
get_flagdirective839   bool get_flag (char chr) const
840   {
841     unsigned char c = chr & 0xff;
842     return (flags[c / (CHAR_BIT * sizeof *flags)]
843 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
844   }
845 
846   /* Make a record of the format flag CHR having been used.  */
set_flagdirective847   void set_flag (char chr)
848   {
849     unsigned char c = chr & 0xff;
850     flags[c / (CHAR_BIT * sizeof *flags)]
851       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
852   }
853 
854   /* Reset the format flag CHR.  */
clear_flagdirective855   void clear_flag (char chr)
856   {
857     unsigned char c = chr & 0xff;
858     flags[c / (CHAR_BIT * sizeof *flags)]
859       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
860   }
861 
862   /* Set both bounds of the width range to VAL.  */
set_widthdirective863   void set_width (HOST_WIDE_INT val)
864   {
865     width[0] = width[1] = val;
866   }
867 
868   /* Set the width range according to ARG, with both bounds being
869      no less than 0.  For a constant ARG set both bounds to its value
870      or 0, whichever is greater.  For a non-constant ARG in some range
871      set width to its range adjusting each bound to -1 if it's less.
872      For an indeterminate ARG set width to [0, INT_MAX].  */
set_widthdirective873   void set_width (tree arg, vr_values *vr_values)
874   {
875     get_int_range (arg, width, width + 1, true, 0, vr_values);
876   }
877 
878   /* Set both bounds of the precision range to VAL.  */
set_precisiondirective879   void set_precision (HOST_WIDE_INT val)
880   {
881     prec[0] = prec[1] = val;
882   }
883 
884   /* Set the precision range according to ARG, with both bounds being
885      no less than -1.  For a constant ARG set both bounds to its value
886      or -1 whichever is greater.  For a non-constant ARG in some range
887      set precision to its range adjusting each bound to -1 if it's less.
888      For an indeterminate ARG set precision to [-1, INT_MAX].  */
set_precisiondirective889   void set_precision (tree arg, vr_values *vr_values)
890   {
891     get_int_range (arg, prec, prec + 1, false, -1, vr_values);
892   }
893 
894   /* Return true if both width and precision are known to be
895      either constant or in some range, false otherwise.  */
known_width_and_precisiondirective896   bool known_width_and_precision () const
897   {
898     return ((width[1] < 0
899 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
900 	    && (prec[1] < 0
901 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
902   }
903 };
904 
905 /* Return the logarithm of X in BASE.  */
906 
907 static int
ilog(unsigned HOST_WIDE_INT x,int base)908 ilog (unsigned HOST_WIDE_INT x, int base)
909 {
910   int res = 0;
911   do
912     {
913       ++res;
914       x /= base;
915     } while (x);
916   return res;
917 }
918 
919 /* Return the number of bytes resulting from converting into a string
920    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
921    PLUS indicates whether 1 for a plus sign should be added for positive
922    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
923    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
924    be represented.  */
925 
926 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)927 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
928 {
929   unsigned HOST_WIDE_INT absval;
930 
931   HOST_WIDE_INT res;
932 
933   if (TYPE_UNSIGNED (TREE_TYPE (x)))
934     {
935       if (tree_fits_uhwi_p (x))
936 	{
937 	  absval = tree_to_uhwi (x);
938 	  res = plus;
939 	}
940       else
941 	return -1;
942     }
943   else
944     {
945       if (tree_fits_shwi_p (x))
946 	{
947 	  HOST_WIDE_INT i = tree_to_shwi (x);
948          if (HOST_WIDE_INT_MIN == i)
949            {
950              /* Avoid undefined behavior due to negating a minimum.  */
951              absval = HOST_WIDE_INT_MAX;
952              res = 1;
953            }
954          else if (i < 0)
955 	   {
956 	     absval = -i;
957 	     res = 1;
958 	   }
959 	 else
960 	   {
961 	     absval = i;
962 	     res = plus;
963 	   }
964 	}
965       else
966 	return -1;
967     }
968 
969   int ndigs = ilog (absval, base);
970 
971   res += prec < ndigs ? ndigs : prec;
972 
973   /* Adjust a non-zero value for the base prefix, either hexadecimal,
974      or, unless precision has resulted in a leading zero, also octal.  */
975   if (prefix && absval && (base == 16 || prec <= ndigs))
976     {
977       if (base == 8)
978 	res += 1;
979       else if (base == 16)
980 	res += 2;
981     }
982 
983   return res;
984 }
985 
986 /* Given the formatting result described by RES and NAVAIL, the number
987    of available in the destination, return the range of bytes remaining
988    in the destination.  */
989 
990 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)991 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
992 {
993   result_range range;
994 
995   if (HOST_WIDE_INT_MAX <= navail)
996     {
997       range.min = range.max = range.likely = range.unlikely = navail;
998       return range;
999     }
1000 
1001   /* The lower bound of the available range is the available size
1002      minus the maximum output size, and the upper bound is the size
1003      minus the minimum.  */
1004   range.max = res.range.min < navail ? navail - res.range.min : 0;
1005 
1006   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
1007 
1008   if (res.range.max < HOST_WIDE_INT_MAX)
1009     range.min = res.range.max < navail ? navail - res.range.max : 0;
1010   else
1011     range.min = range.likely;
1012 
1013   range.unlikely = (res.range.unlikely < navail
1014 		    ? navail - res.range.unlikely : 0);
1015 
1016   return range;
1017 }
1018 
1019 /* Description of a call to a formatted function.  */
1020 
1021 struct sprintf_dom_walker::call_info
1022 {
1023   /* Function call statement.  */
1024   gimple *callstmt;
1025 
1026   /* Function called.  */
1027   tree func;
1028 
1029   /* Called built-in function code.  */
1030   built_in_function fncode;
1031 
1032   /* Format argument and format string extracted from it.  */
1033   tree format;
1034   const char *fmtstr;
1035 
1036   /* The location of the format argument.  */
1037   location_t fmtloc;
1038 
1039   /* The destination object size for __builtin___xxx_chk functions
1040      typically determined by __builtin_object_size, or -1 if unknown.  */
1041   unsigned HOST_WIDE_INT objsize;
1042 
1043   /* Number of the first variable argument.  */
1044   unsigned HOST_WIDE_INT argidx;
1045 
1046   /* True for functions like snprintf that specify the size of
1047      the destination, false for others like sprintf that don't.  */
1048   bool bounded;
1049 
1050   /* True for bounded functions like snprintf that specify a zero-size
1051      buffer as a request to compute the size of output without actually
1052      writing any.  NOWRITE is cleared in response to the %n directive
1053      which has side-effects similar to writing output.  */
1054   bool nowrite;
1055 
1056   /* Return true if the called function's return value is used.  */
retval_usedcall_info1057   bool retval_used () const
1058   {
1059     return gimple_get_lhs (callstmt);
1060   }
1061 
1062   /* Return the warning option corresponding to the called function.  */
warnoptcall_info1063   int warnopt () const
1064   {
1065     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
1066   }
1067 };
1068 
1069 /* Return the result of formatting a no-op directive (such as '%n').  */
1070 
1071 static fmtresult
format_none(const directive &,tree,vr_values *)1072 format_none (const directive &, tree, vr_values *)
1073 {
1074   fmtresult res (0);
1075   return res;
1076 }
1077 
1078 /* Return the result of formatting the '%%' directive.  */
1079 
1080 static fmtresult
format_percent(const directive &,tree,vr_values *)1081 format_percent (const directive &, tree, vr_values *)
1082 {
1083   fmtresult res (1);
1084   return res;
1085 }
1086 
1087 
1088 /* Compute intmax_type_node and uintmax_type_node similarly to how
1089    tree.c builds size_type_node.  */
1090 
1091 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)1092 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
1093 {
1094   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
1095     {
1096       *pintmax = integer_type_node;
1097       *puintmax = unsigned_type_node;
1098     }
1099   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
1100     {
1101       *pintmax = long_integer_type_node;
1102       *puintmax = long_unsigned_type_node;
1103     }
1104   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1105     {
1106       *pintmax = long_long_integer_type_node;
1107       *puintmax = long_long_unsigned_type_node;
1108     }
1109   else
1110     {
1111       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1112 	if (int_n_enabled_p[i])
1113 	  {
1114 	    char name[50];
1115 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1116 
1117 	    if (strcmp (name, UINTMAX_TYPE) == 0)
1118 	      {
1119 	        *pintmax = int_n_trees[i].signed_type;
1120 	        *puintmax = int_n_trees[i].unsigned_type;
1121 		return;
1122 	      }
1123 	  }
1124       gcc_unreachable ();
1125     }
1126 }
1127 
1128 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1129    in and that is representable in type int.
1130    Return true when the range is a subrange of that of int.
1131    When ARG is null it is as if it had the full range of int.
1132    When ABSOLUTE is true the range reflects the absolute value of
1133    the argument.  When ABSOLUTE is false, negative bounds of
1134    the determined range are replaced with NEGBOUND.  */
1135 
1136 static bool
get_int_range(tree arg,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,class vr_values * vr_values)1137 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1138 	       bool absolute, HOST_WIDE_INT negbound,
1139 	       class vr_values *vr_values)
1140 {
1141   /* The type of the result.  */
1142   const_tree type = integer_type_node;
1143 
1144   bool knownrange = false;
1145 
1146   if (!arg)
1147     {
1148       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1149       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1150     }
1151   else if (TREE_CODE (arg) == INTEGER_CST
1152 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1153     {
1154       /* For a constant argument return its value adjusted as specified
1155 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1156 	 result is known.  */
1157       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1158       *pmax = *pmin;
1159       knownrange = true;
1160     }
1161   else
1162     {
1163       /* True if the argument's range cannot be determined.  */
1164       bool unknown = true;
1165 
1166       tree argtype = TREE_TYPE (arg);
1167 
1168       /* Ignore invalid arguments with greater precision that that
1169 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1170 	 They will have been detected and diagnosed by -Wformat and
1171 	 so it's not important to complicate this code to try to deal
1172 	 with them again.  */
1173       if (TREE_CODE (arg) == SSA_NAME
1174 	  && INTEGRAL_TYPE_P (argtype)
1175 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1176 	{
1177 	  /* Try to determine the range of values of the integer argument.  */
1178 	  value_range *vr = vr_values->get_value_range (arg);
1179 	  if (vr->type == VR_RANGE
1180 	      && TREE_CODE (vr->min) == INTEGER_CST
1181 	      && TREE_CODE (vr->max) == INTEGER_CST)
1182 	    {
1183 	      HOST_WIDE_INT type_min
1184 		= (TYPE_UNSIGNED (argtype)
1185 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1186 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1187 
1188 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1189 
1190 	      *pmin = TREE_INT_CST_LOW (vr->min);
1191 	      *pmax = TREE_INT_CST_LOW (vr->max);
1192 
1193 	      if (*pmin < *pmax)
1194 		{
1195 		  /* Return true if the adjusted range is a subrange of
1196 		     the full range of the argument's type.  *PMAX may
1197 		     be less than *PMIN when the argument is unsigned
1198 		     and its upper bound is in excess of TYPE_MAX.  In
1199 		     that (invalid) case disregard the range and use that
1200 		     of the expected type instead.  */
1201 		  knownrange = type_min < *pmin || *pmax < type_max;
1202 
1203 		  unknown = false;
1204 		}
1205 	    }
1206 	}
1207 
1208       /* Handle an argument with an unknown range as if none had been
1209 	 provided.  */
1210       if (unknown)
1211 	return get_int_range (NULL_TREE, pmin, pmax, absolute,
1212 			      negbound, vr_values);
1213     }
1214 
1215   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1216   if (absolute)
1217     {
1218       if (*pmin < 0)
1219 	{
1220 	  if (*pmin == *pmax)
1221 	    *pmin = *pmax = -*pmin;
1222 	  else
1223 	    {
1224 	      /* Make sure signed overlow is avoided.  */
1225 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1226 
1227 	      HOST_WIDE_INT tmp = -*pmin;
1228 	      *pmin = 0;
1229 	      if (*pmax < tmp)
1230 		*pmax = tmp;
1231 	    }
1232 	}
1233     }
1234   else if (*pmin < negbound)
1235     *pmin = negbound;
1236 
1237   return knownrange;
1238 }
1239 
1240 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1241    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1242    the type of the directive's formal argument it's possible for both
1243    to result in the same number of bytes or a range of bytes that's
1244    less than the number of bytes that would result from formatting
1245    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1246    determined by checking for the actual argument being in the range
1247    of the type of the directive.  If it isn't it must be assumed to
1248    take on the full range of the directive's type.
1249    Return true when the range has been adjusted to the full range
1250    of DIRTYPE, and false otherwise.  */
1251 
1252 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1253 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1254 {
1255   tree argtype = TREE_TYPE (*argmin);
1256   unsigned argprec = TYPE_PRECISION (argtype);
1257   unsigned dirprec = TYPE_PRECISION (dirtype);
1258 
1259   /* If the actual argument and the directive's argument have the same
1260      precision and sign there can be no overflow and so there is nothing
1261      to adjust.  */
1262   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1263     return false;
1264 
1265   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1266      branch in the extract_range_from_unary_expr function in tree-vrp.c.  */
1267 
1268   if (TREE_CODE (*argmin) == INTEGER_CST
1269       && TREE_CODE (*argmax) == INTEGER_CST
1270       && (dirprec >= argprec
1271 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1272 					     int_const_binop (MINUS_EXPR,
1273 							      *argmax,
1274 							      *argmin),
1275 					     size_int (dirprec)))))
1276     {
1277       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1278       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1279 
1280       /* If *ARGMIN is still less than *ARGMAX the conversion above
1281 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1282       if (tree_int_cst_le (*argmin, *argmax))
1283 	return false;
1284     }
1285 
1286   *argmin = TYPE_MIN_VALUE (dirtype);
1287   *argmax = TYPE_MAX_VALUE (dirtype);
1288   return true;
1289 }
1290 
1291 /* Return a range representing the minimum and maximum number of bytes
1292    that the format directive DIR will output for any argument given
1293    the WIDTH and PRECISION (extracted from DIR).  This function is
1294    used when the directive argument or its value isn't known.  */
1295 
1296 static fmtresult
format_integer(const directive & dir,tree arg,vr_values * vr_values)1297 format_integer (const directive &dir, tree arg, vr_values *vr_values)
1298 {
1299   tree intmax_type_node;
1300   tree uintmax_type_node;
1301 
1302   /* Base to format the number in.  */
1303   int base;
1304 
1305   /* True when a conversion is preceded by a prefix indicating the base
1306      of the argument (octal or hexadecimal).  */
1307   bool maybebase = dir.get_flag ('#');
1308 
1309   /* True when a signed conversion is preceded by a sign or space.  */
1310   bool maybesign = false;
1311 
1312   /* True for signed conversions (i.e., 'd' and 'i').  */
1313   bool sign = false;
1314 
1315   switch (dir.specifier)
1316     {
1317     case 'd':
1318     case 'i':
1319       /* Space and '+' are  only meaningful for signed conversions.  */
1320       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1321       sign = true;
1322       base = 10;
1323       break;
1324     case 'u':
1325       base = 10;
1326       break;
1327     case 'o':
1328       base = 8;
1329       break;
1330     case 'X':
1331     case 'x':
1332       base = 16;
1333       break;
1334     default:
1335       gcc_unreachable ();
1336     }
1337 
1338   /* The type of the "formal" argument expected by the directive.  */
1339   tree dirtype = NULL_TREE;
1340 
1341   /* Determine the expected type of the argument from the length
1342      modifier.  */
1343   switch (dir.modifier)
1344     {
1345     case FMT_LEN_none:
1346       if (dir.specifier == 'p')
1347 	dirtype = ptr_type_node;
1348       else
1349 	dirtype = sign ? integer_type_node : unsigned_type_node;
1350       break;
1351 
1352     case FMT_LEN_h:
1353       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1354       break;
1355 
1356     case FMT_LEN_hh:
1357       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1358       break;
1359 
1360     case FMT_LEN_l:
1361       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1362       break;
1363 
1364     case FMT_LEN_L:
1365     case FMT_LEN_ll:
1366       dirtype = (sign
1367 		 ? long_long_integer_type_node
1368 		 : long_long_unsigned_type_node);
1369       break;
1370 
1371     case FMT_LEN_z:
1372       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1373       break;
1374 
1375     case FMT_LEN_t:
1376       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1377       break;
1378 
1379     case FMT_LEN_j:
1380       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1381       dirtype = sign ? intmax_type_node : uintmax_type_node;
1382       break;
1383 
1384     default:
1385       return fmtresult ();
1386     }
1387 
1388   /* The type of the argument to the directive, either deduced from
1389      the actual non-constant argument if one is known, or from
1390      the directive itself when none has been provided because it's
1391      a va_list.  */
1392   tree argtype = NULL_TREE;
1393 
1394   if (!arg)
1395     {
1396       /* When the argument has not been provided, use the type of
1397 	 the directive's argument as an approximation.  This will
1398 	 result in false positives for directives like %i with
1399 	 arguments with smaller precision (such as short or char).  */
1400       argtype = dirtype;
1401     }
1402   else if (TREE_CODE (arg) == INTEGER_CST)
1403     {
1404       /* When a constant argument has been provided use its value
1405 	 rather than type to determine the length of the output.  */
1406       fmtresult res;
1407 
1408       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1409 	{
1410 	  /* As a special case, a precision of zero with a zero argument
1411 	     results in zero bytes except in base 8 when the '#' flag is
1412 	     specified, and for signed conversions in base 8 and 10 when
1413 	     either the space or '+' flag has been specified and it results
1414 	     in just one byte (with width having the normal effect).  This
1415 	     must extend to the case of a specified precision with
1416 	     an unknown value because it can be zero.  */
1417 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1418 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1419 	    {
1420 	      res.range.max = 1;
1421 	      res.range.likely = 1;
1422 	    }
1423 	  else
1424 	    {
1425 	      res.range.max = res.range.min;
1426 	      res.range.likely = res.range.min;
1427 	    }
1428 	}
1429       else
1430 	{
1431 	  /* Convert the argument to the type of the directive.  */
1432 	  arg = fold_convert (dirtype, arg);
1433 
1434 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1435 				       maybesign, maybebase);
1436 	  if (dir.prec[0] == dir.prec[1])
1437 	    res.range.max = res.range.min;
1438 	  else
1439 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1440 					 maybesign, maybebase);
1441 	  res.range.likely = res.range.min;
1442 	  res.knownrange = true;
1443 	}
1444 
1445       res.range.unlikely = res.range.max;
1446 
1447       /* Bump up the counters if WIDTH is greater than LEN.  */
1448       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1449 					 (sign | maybebase) + (base == 16));
1450       /* Bump up the counters again if PRECision is greater still.  */
1451       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1452 					 (sign | maybebase) + (base == 16));
1453 
1454       return res;
1455     }
1456   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1457 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1458     /* Determine the type of the provided non-constant argument.  */
1459     argtype = TREE_TYPE (arg);
1460   else
1461     /* Don't bother with invalid arguments since they likely would
1462        have already been diagnosed, and disable any further checking
1463        of the format string by returning [-1, -1].  */
1464     return fmtresult ();
1465 
1466   fmtresult res;
1467 
1468   /* Using either the range the non-constant argument is in, or its
1469      type (either "formal" or actual), create a range of values that
1470      constrain the length of output given the warning level.  */
1471   tree argmin = NULL_TREE;
1472   tree argmax = NULL_TREE;
1473 
1474   if (arg
1475       && TREE_CODE (arg) == SSA_NAME
1476       && INTEGRAL_TYPE_P (argtype))
1477     {
1478       /* Try to determine the range of values of the integer argument
1479 	 (range information is not available for pointers).  */
1480       value_range *vr = vr_values->get_value_range (arg);
1481       if (vr->type == VR_RANGE
1482 	  && TREE_CODE (vr->min) == INTEGER_CST
1483 	  && TREE_CODE (vr->max) == INTEGER_CST)
1484 	{
1485 	  argmin = vr->min;
1486 	  argmax = vr->max;
1487 
1488 	  /* Set KNOWNRANGE if the argument is in a known subrange
1489 	     of the directive's type and neither width nor precision
1490 	     is unknown.  (KNOWNRANGE may be reset below).  */
1491 	  res.knownrange
1492 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1493 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1494 	       && dir.known_width_and_precision ());
1495 
1496 	  res.argmin = argmin;
1497 	  res.argmax = argmax;
1498 	}
1499       else if (vr->type == VR_ANTI_RANGE)
1500 	{
1501 	  /* Handle anti-ranges if/when bug 71690 is resolved.  */
1502 	}
1503       else if (vr->type == VR_VARYING
1504 	       || vr->type == VR_UNDEFINED)
1505 	{
1506 	  /* The argument here may be the result of promoting the actual
1507 	     argument to int.  Try to determine the type of the actual
1508 	     argument before promotion and narrow down its range that
1509 	     way.  */
1510 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1511 	  if (is_gimple_assign (def))
1512 	    {
1513 	      tree_code code = gimple_assign_rhs_code (def);
1514 	      if (code == INTEGER_CST)
1515 		{
1516 		  arg = gimple_assign_rhs1 (def);
1517 		  return format_integer (dir, arg, vr_values);
1518 		}
1519 
1520 	      if (code == NOP_EXPR)
1521 		{
1522 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1523 		  if (INTEGRAL_TYPE_P (type)
1524 		      || TREE_CODE (type) == POINTER_TYPE)
1525 		    argtype = type;
1526 		}
1527 	    }
1528 	}
1529     }
1530 
1531   if (!argmin)
1532     {
1533       if (TREE_CODE (argtype) == POINTER_TYPE)
1534 	{
1535 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1536 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1537 	}
1538       else
1539 	{
1540 	  argmin = TYPE_MIN_VALUE (argtype);
1541 	  argmax = TYPE_MAX_VALUE (argtype);
1542 	}
1543     }
1544 
1545   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1546      of the directive.  If it has been cleared then since ARGMIN and/or
1547      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1548      ARGMAX in the result to include in diagnostics.  */
1549   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1550     {
1551       res.knownrange = false;
1552       res.argmin = argmin;
1553       res.argmax = argmax;
1554     }
1555 
1556   /* Recursively compute the minimum and maximum from the known range.  */
1557   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1558     {
1559       /* For unsigned conversions/directives or signed when
1560 	 the minimum is positive, use the minimum and maximum to compute
1561 	 the shortest and longest output, respectively.  */
1562       res.range.min = format_integer (dir, argmin, vr_values).range.min;
1563       res.range.max = format_integer (dir, argmax, vr_values).range.max;
1564     }
1565   else if (tree_int_cst_sgn (argmax) < 0)
1566     {
1567       /* For signed conversions/directives if maximum is negative,
1568 	 use the minimum as the longest output and maximum as the
1569 	 shortest output.  */
1570       res.range.min = format_integer (dir, argmax, vr_values).range.min;
1571       res.range.max = format_integer (dir, argmin, vr_values).range.max;
1572     }
1573   else
1574     {
1575       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1576 	 as the shortest output and for the longest output compute the
1577 	 length of the output of both minimum and maximum and pick the
1578 	 longer.  */
1579       unsigned HOST_WIDE_INT max1
1580 	= format_integer (dir, argmin, vr_values).range.max;
1581       unsigned HOST_WIDE_INT max2
1582 	= format_integer (dir, argmax, vr_values).range.max;
1583       res.range.min
1584 	= format_integer (dir, integer_zero_node, vr_values).range.min;
1585       res.range.max = MAX (max1, max2);
1586     }
1587 
1588   /* If the range is known, use the maximum as the likely length.  */
1589   if (res.knownrange)
1590     res.range.likely = res.range.max;
1591   else
1592     {
1593       /* Otherwise, use the minimum.  Except for the case where for %#x or
1594          %#o the minimum is just for a single value in the range (0) and
1595          for all other values it is something longer, like 0x1 or 01.
1596 	  Use the length for value 1 in that case instead as the likely
1597 	  length.  */
1598       res.range.likely = res.range.min;
1599       if (maybebase
1600 	  && base != 10
1601 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1602 	{
1603 	  if (res.range.min == 1)
1604 	    res.range.likely += base == 8 ? 1 : 2;
1605 	  else if (res.range.min == 2
1606 		   && base == 16
1607 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1608 	    ++res.range.likely;
1609 	}
1610     }
1611 
1612   res.range.unlikely = res.range.max;
1613   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1614 				     (sign | maybebase) + (base == 16));
1615   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1616 				     (sign | maybebase) + (base == 16));
1617 
1618   return res;
1619 }
1620 
1621 /* Return the number of bytes that a format directive consisting of FLAGS,
1622    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1623    would result for argument X under ideal conditions (i.e., if PREC
1624    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1625    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1626    This function works around those problems.  */
1627 
1628 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1629 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1630 			char spec, char rndspec)
1631 {
1632   char fmtstr[40];
1633 
1634   HOST_WIDE_INT len = strlen (flags);
1635 
1636   fmtstr[0] = '%';
1637   memcpy (fmtstr + 1, flags, len);
1638   memcpy (fmtstr + 1 + len, ".*R", 3);
1639   fmtstr[len + 4] = rndspec;
1640   fmtstr[len + 5] = spec;
1641   fmtstr[len + 6] = '\0';
1642 
1643   spec = TOUPPER (spec);
1644   if (spec == 'E' || spec == 'F')
1645     {
1646       /* For %e, specify the precision explicitly since mpfr_sprintf
1647 	 does its own thing just to be different (see MPFR bug 21088).  */
1648       if (prec < 0)
1649 	prec = 6;
1650     }
1651   else
1652     {
1653       /* Avoid passing negative precisions with larger magnitude to MPFR
1654 	 to avoid exposing its bugs.  (A negative precision is supposed
1655 	 to be ignored.)  */
1656       if (prec < 0)
1657 	prec = -1;
1658     }
1659 
1660   HOST_WIDE_INT p = prec;
1661 
1662   if (spec == 'G' && !strchr (flags, '#'))
1663     {
1664       /* For G/g without the pound flag, precision gives the maximum number
1665 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1666 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1667 	 should be more than sufficient for any real format.  */
1668       if ((IEEE_MAX_10_EXP * 2) < prec)
1669 	prec = IEEE_MAX_10_EXP * 2;
1670       p = prec;
1671     }
1672   else
1673     {
1674       /* Cap precision arbitrarily at 1KB and add the difference
1675 	 (if any) to the MPFR result.  */
1676       if (prec > 1024)
1677 	p = 1024;
1678     }
1679 
1680   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1681 
1682   /* Handle the unlikely (impossible?) error by returning more than
1683      the maximum dictated by the function's return type.  */
1684   if (len < 0)
1685     return target_dir_max () + 1;
1686 
1687   /* Adjust the return value by the difference.  */
1688   if (p < prec)
1689     len += prec - p;
1690 
1691   return len;
1692 }
1693 
1694 /* Return the number of bytes to format using the format specifier
1695    SPEC and the precision PREC the largest value in the real floating
1696    TYPE.  */
1697 
1698 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1699 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1700 {
1701   machine_mode mode = TYPE_MODE (type);
1702 
1703   /* IBM Extended mode.  */
1704   if (MODE_COMPOSITE_P (mode))
1705     mode = DFmode;
1706 
1707   /* Get the real type format desription for the target.  */
1708   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1709   REAL_VALUE_TYPE rv;
1710 
1711   real_maxval (&rv, 0, mode);
1712 
1713   /* Convert the GCC real value representation with the precision
1714      of the real type to the mpfr_t format with the GCC default
1715      round-to-nearest mode.  */
1716   mpfr_t x;
1717   mpfr_init2 (x, rfmt->p);
1718   mpfr_from_real (x, &rv, GMP_RNDN);
1719 
1720   /* Return a value one greater to account for the leading minus sign.  */
1721   unsigned HOST_WIDE_INT r
1722     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1723   mpfr_clear (x);
1724   return r;
1725 }
1726 
1727 /* Return a range representing the minimum and maximum number of bytes
1728    that the directive DIR will output for any argument.  PREC gives
1729    the adjusted precision range to account for negative precisions
1730    meaning the default 6.  This function is used when the directive
1731    argument or its value isn't known.  */
1732 
1733 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1734 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1735 {
1736   tree type;
1737 
1738   switch (dir.modifier)
1739     {
1740     case FMT_LEN_l:
1741     case FMT_LEN_none:
1742       type = double_type_node;
1743       break;
1744 
1745     case FMT_LEN_L:
1746       type = long_double_type_node;
1747       break;
1748 
1749     case FMT_LEN_ll:
1750       type = long_double_type_node;
1751       break;
1752 
1753     default:
1754       return fmtresult ();
1755     }
1756 
1757   /* The minimum and maximum number of bytes produced by the directive.  */
1758   fmtresult res;
1759 
1760   /* The minimum output as determined by flags.  It's always at least 1.
1761      When plus or space are set the output is preceded by either a sign
1762      or a space.  */
1763   unsigned flagmin = (1 /* for the first digit */
1764 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1765 
1766   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1767      for the plus sign/space with the '+' and ' ' flags, respectively,
1768      unless reduced below.  */
1769   res.range.min = 2 + flagmin;
1770 
1771   /* When the pound flag is set the decimal point is included in output
1772      regardless of precision.  Whether or not a decimal point is included
1773      otherwise depends on the specification and precision.  */
1774   bool radix = dir.get_flag ('#');
1775 
1776   switch (dir.specifier)
1777     {
1778     case 'A':
1779     case 'a':
1780       {
1781 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1782 	if (dir.prec[0] <= 0)
1783 	  minprec = 0;
1784 	else if (dir.prec[0] > 0)
1785 	  minprec = dir.prec[0] + !radix /* decimal point */;
1786 
1787 	res.range.likely = (2 /* 0x */
1788 			    + flagmin
1789 			    + radix
1790 			    + minprec
1791 			    + 3 /* p+0 */);
1792 
1793 	res.range.max = format_floating_max (type, 'a', prec[1]);
1794 
1795 	/* The unlikely maximum accounts for the longest multibyte
1796 	   decimal point character.  */
1797 	res.range.unlikely = res.range.max;
1798 	if (dir.prec[1] > 0)
1799 	  res.range.unlikely += target_mb_len_max () - 1;
1800 
1801 	break;
1802       }
1803 
1804     case 'E':
1805     case 'e':
1806       {
1807 	/* Minimum output attributable to precision and, when it's
1808 	   non-zero, decimal point.  */
1809 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1810 
1811 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1812 	   of the value of the actual argument.  */
1813 	res.range.likely = (flagmin
1814 			    + radix
1815 			    + minprec
1816 			    + 2 /* e+ */ + 2);
1817 
1818 	res.range.max = format_floating_max (type, 'e', prec[1]);
1819 
1820 	/* The unlikely maximum accounts for the longest multibyte
1821 	   decimal point character.  */
1822 	if (dir.prec[0] != dir.prec[1]
1823 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1824 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1825 	else
1826 	  res.range.unlikely = res.range.max;
1827 	break;
1828       }
1829 
1830     case 'F':
1831     case 'f':
1832       {
1833 	/* Minimum output attributable to precision and, when it's non-zero,
1834 	   decimal point.  */
1835 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1836 
1837 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1838 	   when precision isn't specified is 8 bytes ("1.23456" since
1839 	   precision is taken to be 6).  When precision is zero, the lower
1840 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1841 	   than zero, then the lower bound is 2 plus precision (plus flags).
1842 	   But in all cases, the lower bound is no greater than 3.  */
1843 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1844 	if (min < res.range.min)
1845 	  res.range.min = min;
1846 
1847 	/* Compute the upper bound for -TYPE_MAX.  */
1848 	res.range.max = format_floating_max (type, 'f', prec[1]);
1849 
1850 	/* The minimum output with unknown precision is a single byte
1851 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1852 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1853 	  res.range.likely = 3;
1854 	else
1855 	  res.range.likely = min;
1856 
1857 	/* The unlikely maximum accounts for the longest multibyte
1858 	   decimal point character.  */
1859 	if (dir.prec[0] != dir.prec[1]
1860 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1861 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1862 	break;
1863       }
1864 
1865     case 'G':
1866     case 'g':
1867       {
1868 	/* The %g output depends on precision and the exponent of
1869 	   the argument.  Since the value of the argument isn't known
1870 	   the lower bound on the range of bytes (not counting flags
1871 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1872 	   "%g" and "%#g", respectively, with a zero argument).  */
1873 	unsigned HOST_WIDE_INT min = flagmin + radix;
1874 	if (min < res.range.min)
1875 	  res.range.min = min;
1876 
1877 	char spec = 'g';
1878 	HOST_WIDE_INT maxprec = dir.prec[1];
1879 	if (radix && maxprec)
1880 	  {
1881 	    /* When the pound flag (radix) is set, trailing zeros aren't
1882 	       trimmed and so the longest output is the same as for %e,
1883 	       except with precision minus 1 (as specified in C11).  */
1884 	    spec = 'e';
1885 	    if (maxprec > 0)
1886 	      --maxprec;
1887 	    else if (maxprec < 0)
1888 	      maxprec = 5;
1889 	  }
1890 	else
1891 	  maxprec = prec[1];
1892 
1893 	res.range.max = format_floating_max (type, spec, maxprec);
1894 
1895 	/* The likely output is either the maximum computed above
1896 	   minus 1 (assuming the maximum is positive) when precision
1897 	   is known (or unspecified), or the same minimum as for %e
1898 	   (which is computed for a non-negative argument).  Unlike
1899 	   for the other specifiers above the likely output isn't
1900 	   the minimum because for %g that's 1 which is unlikely.  */
1901 	if (dir.prec[1] < 0
1902 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1903 	  res.range.likely = res.range.max - 1;
1904 	else
1905 	  {
1906 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1907 	    res.range.likely = (flagmin
1908 				+ radix
1909 				+ minprec
1910 				+ 2 /* e+ */ + 2);
1911 	  }
1912 
1913 	/* The unlikely maximum accounts for the longest multibyte
1914 	   decimal point character.  */
1915 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1916 	break;
1917       }
1918 
1919     default:
1920       return fmtresult ();
1921     }
1922 
1923   /* Bump up the byte counters if WIDTH is greater.  */
1924   res.adjust_for_width_or_precision (dir.width);
1925   return res;
1926 }
1927 
1928 /* Return a range representing the minimum and maximum number of bytes
1929    that the directive DIR will write on output for the floating argument
1930    ARG.  */
1931 
1932 static fmtresult
format_floating(const directive & dir,tree arg,vr_values *)1933 format_floating (const directive &dir, tree arg, vr_values *)
1934 {
1935   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1936   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1937 	       ? long_double_type_node : double_type_node);
1938 
1939   /* For an indeterminate precision the lower bound must be assumed
1940      to be zero.  */
1941   if (TOUPPER (dir.specifier) == 'A')
1942     {
1943       /* Get the number of fractional decimal digits needed to represent
1944 	 the argument without a loss of accuracy.  */
1945       unsigned fmtprec
1946 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1947 
1948       /* The precision of the IEEE 754 double format is 53.
1949 	 The precision of all other GCC binary double formats
1950 	 is 56 or less.  */
1951       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1952 
1953       /* For %a, leave the minimum precision unspecified to let
1954 	 MFPR trim trailing zeros (as it and many other systems
1955 	 including Glibc happen to do) and set the maximum
1956 	 precision to reflect what it would be with trailing zeros
1957 	 present (as Solaris and derived systems do).  */
1958       if (dir.prec[1] < 0)
1959 	{
1960 	  /* Both bounds are negative implies that precision has
1961 	     not been specified.  */
1962 	  prec[0] = maxprec;
1963 	  prec[1] = -1;
1964 	}
1965       else if (dir.prec[0] < 0)
1966 	{
1967 	  /* With a negative lower bound and a non-negative upper
1968 	     bound set the minimum precision to zero and the maximum
1969 	     to the greater of the maximum precision (i.e., with
1970 	     trailing zeros present) and the specified upper bound.  */
1971 	  prec[0] = 0;
1972 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1973 	}
1974     }
1975   else if (dir.prec[0] < 0)
1976     {
1977       if (dir.prec[1] < 0)
1978 	{
1979 	  /* A precision in a strictly negative range is ignored and
1980 	     the default of 6 is used instead.  */
1981 	  prec[0] = prec[1] = 6;
1982 	}
1983       else
1984 	{
1985 	  /* For a precision in a partly negative range, the lower bound
1986 	     must be assumed to be zero and the new upper bound is the
1987 	     greater of 6 (the default precision used when the specified
1988 	     precision is negative) and the upper bound of the specified
1989 	     range.  */
1990 	  prec[0] = 0;
1991 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1992 	}
1993     }
1994 
1995   if (!arg
1996       || TREE_CODE (arg) != REAL_CST
1997       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1998     return format_floating (dir, prec);
1999 
2000   /* The minimum and maximum number of bytes produced by the directive.  */
2001   fmtresult res;
2002 
2003   /* Get the real type format desription for the target.  */
2004   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
2005   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
2006 
2007   if (!real_isfinite (rvp))
2008     {
2009       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
2010 	 and "[-]nan" with the choice being implementation-defined
2011 	 but not locale dependent.  */
2012       bool sign = dir.get_flag ('+') || real_isneg (rvp);
2013       res.range.min = 3 + sign;
2014 
2015       res.range.likely = res.range.min;
2016       res.range.max = res.range.min;
2017       /* The inlikely maximum is "[-/+]infinity" or "[-/+]nan".  */
2018       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 3);
2019 
2020       /* The range for infinity and NaN is known unless either width
2021 	 or precision is unknown.  Width has the same effect regardless
2022 	 of whether the argument is finite.  Precision is either ignored
2023 	 (e.g., Glibc) or can have an effect on the short vs long format
2024 	 such as inf/infinity (e.g., Solaris).  */
2025       res.knownrange = dir.known_width_and_precision ();
2026 
2027       /* Adjust the range for width but ignore precision.  */
2028       res.adjust_for_width_or_precision (dir.width);
2029 
2030       return res;
2031     }
2032 
2033   char fmtstr [40];
2034   char *pfmt = fmtstr;
2035 
2036   /* Append flags.  */
2037   for (const char *pf = "-+ #0"; *pf; ++pf)
2038     if (dir.get_flag (*pf))
2039       *pfmt++ = *pf;
2040 
2041   *pfmt = '\0';
2042 
2043   {
2044     /* Set up an array to easily iterate over.  */
2045     unsigned HOST_WIDE_INT* const minmax[] = {
2046       &res.range.min, &res.range.max
2047     };
2048 
2049     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
2050       {
2051 	/* Convert the GCC real value representation with the precision
2052 	   of the real type to the mpfr_t format rounding down in the
2053 	   first iteration that computes the minimm and up in the second
2054 	   that computes the maximum.  This order is arbibtrary because
2055 	   rounding in either direction can result in longer output.  */
2056 	mpfr_t mpfrval;
2057 	mpfr_init2 (mpfrval, rfmt->p);
2058 	mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
2059 
2060 	/* Use the MPFR rounding specifier to round down in the first
2061 	   iteration and then up.  In most but not all cases this will
2062 	   result in the same number of bytes.  */
2063 	char rndspec = "DU"[i];
2064 
2065 	/* Format it and store the result in the corresponding member
2066 	   of the result struct.  */
2067 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
2068 					     dir.specifier, rndspec);
2069 	mpfr_clear (mpfrval);
2070       }
2071   }
2072 
2073   /* Make sure the minimum is less than the maximum (MPFR rounding
2074      in the call to mpfr_snprintf can result in the reverse.  */
2075   if (res.range.max < res.range.min)
2076     {
2077       unsigned HOST_WIDE_INT tmp = res.range.min;
2078       res.range.min = res.range.max;
2079       res.range.max = tmp;
2080     }
2081 
2082   /* The range is known unless either width or precision is unknown.  */
2083   res.knownrange = dir.known_width_and_precision ();
2084 
2085   /* For the same floating point constant, unless width or precision
2086      is unknown, use the longer output as the likely maximum since
2087      with round to nearest either is equally likely.  Otheriwse, when
2088      precision is unknown, use the greater of the minimum and 3 as
2089      the likely output (for "0.0" since zero precision is unlikely).  */
2090   if (res.knownrange)
2091     res.range.likely = res.range.max;
2092   else if (res.range.min < 3
2093 	   && dir.prec[0] < 0
2094 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2095     res.range.likely = 3;
2096   else
2097     res.range.likely = res.range.min;
2098 
2099   res.range.unlikely = res.range.max;
2100 
2101   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2102     {
2103       /* Unless the precision is zero output longer than 2 bytes may
2104 	 include the decimal point which must be a single character
2105 	 up to MB_LEN_MAX in length.  This is overly conservative
2106 	 since in some conversions some constants result in no decimal
2107 	 point (e.g., in %g).  */
2108       res.range.unlikely += target_mb_len_max () - 1;
2109     }
2110 
2111   res.adjust_for_width_or_precision (dir.width);
2112   return res;
2113 }
2114 
2115 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2116    strings referenced by the expression STR, or (-1, -1) when not known.
2117    Used by the format_string function below.  */
2118 
2119 static fmtresult
get_string_length(tree str)2120 get_string_length (tree str)
2121 {
2122   if (!str)
2123     return fmtresult ();
2124 
2125   if (tree slen = c_strlen (str, 1))
2126     {
2127       /* Simply return the length of the string.  */
2128       fmtresult res (tree_to_shwi (slen));
2129       return res;
2130     }
2131 
2132   /* Determine the length of the shortest and longest string referenced
2133      by STR.  Strings of unknown lengths are bounded by the sizes of
2134      arrays that subexpressions of STR may refer to.  Pointers that
2135      aren't known to point any such arrays result in LENRANGE[1] set
2136      to SIZE_MAX.  */
2137   tree lenrange[2];
2138   bool flexarray = get_range_strlen (str, lenrange);
2139 
2140   if (lenrange [0] || lenrange [1])
2141     {
2142       HOST_WIDE_INT min
2143 	= (tree_fits_uhwi_p (lenrange[0])
2144 	   ? tree_to_uhwi (lenrange[0])
2145 	   : 0);
2146 
2147       HOST_WIDE_INT max
2148 	= (tree_fits_uhwi_p (lenrange[1])
2149 	   ? tree_to_uhwi (lenrange[1])
2150 	   : HOST_WIDE_INT_M1U);
2151 
2152       /* get_range_strlen() returns the target value of SIZE_MAX for
2153 	 strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2154 	 which may be bigger.  */
2155       if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2156 	min = HOST_WIDE_INT_M1U;
2157       if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2158 	max = HOST_WIDE_INT_M1U;
2159 
2160       fmtresult res (min, max);
2161 
2162       /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2163 	 by STR are known to be bounded (though not necessarily by their
2164 	 actual length but perhaps by their maximum possible length).  */
2165       if (res.range.max < target_int_max ())
2166 	{
2167 	  res.knownrange = true;
2168 	  /* When the the length of the longest string is known and not
2169 	     excessive use it as the likely length of the string(s).  */
2170 	  res.range.likely = res.range.max;
2171 	}
2172       else
2173 	{
2174 	  /* When the upper bound is unknown (it can be zero or excessive)
2175 	     set the likely length to the greater of 1 and the length of
2176 	     the shortest string and reset the lower bound to zero.  */
2177 	  res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2178 	  res.range.min = 0;
2179 	}
2180 
2181       /* If the range of string length has been estimated from the size
2182 	 of an array at the end of a struct assume that it's longer than
2183 	 the array bound says it is in case it's used as a poor man's
2184 	 flexible array member, such as in struct S { char a[4]; };  */
2185       res.range.unlikely = flexarray ? HOST_WIDE_INT_MAX : res.range.max;
2186 
2187       return res;
2188     }
2189 
2190   return get_string_length (NULL_TREE);
2191 }
2192 
2193 /* Return the minimum and maximum number of characters formatted
2194    by the '%c' format directives and its wide character form for
2195    the argument ARG.  ARG can be null (for functions such as
2196    vsprinf).  */
2197 
2198 static fmtresult
format_character(const directive & dir,tree arg,vr_values * vr_values)2199 format_character (const directive &dir, tree arg, vr_values *vr_values)
2200 {
2201   fmtresult res;
2202 
2203   res.knownrange = true;
2204 
2205   if (dir.modifier == FMT_LEN_l)
2206     {
2207       /* A wide character can result in as few as zero bytes.  */
2208       res.range.min = 0;
2209 
2210       HOST_WIDE_INT min, max;
2211       if (get_int_range (arg, &min, &max, false, 0, vr_values))
2212 	{
2213 	  if (min == 0 && max == 0)
2214 	    {
2215 	      /* The NUL wide character results in no bytes.  */
2216 	      res.range.max = 0;
2217 	      res.range.likely = 0;
2218 	      res.range.unlikely = 0;
2219 	    }
2220 	  else if (min > 0 && min < 128)
2221 	    {
2222 	      /* A wide character in the ASCII range most likely results
2223 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2224 	      res.range.max = 1;
2225 	      res.range.likely = 1;
2226 	      res.range.unlikely = target_mb_len_max ();
2227 	    }
2228 	  else
2229 	    {
2230 	      /* A wide character outside the ASCII range likely results
2231 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2232 	      res.range.max = target_mb_len_max ();
2233 	      res.range.likely = 2;
2234 	      res.range.unlikely = res.range.max;
2235 	    }
2236 	}
2237       else
2238 	{
2239 	  /* An unknown wide character is treated the same as a wide
2240 	     character outside the ASCII range.  */
2241 	  res.range.max = target_mb_len_max ();
2242 	  res.range.likely = 2;
2243 	  res.range.unlikely = res.range.max;
2244 	}
2245     }
2246   else
2247     {
2248       /* A plain '%c' directive.  Its ouput is exactly 1.  */
2249       res.range.min = res.range.max = 1;
2250       res.range.likely = res.range.unlikely = 1;
2251       res.knownrange = true;
2252     }
2253 
2254   /* Bump up the byte counters if WIDTH is greater.  */
2255   return res.adjust_for_width_or_precision (dir.width);
2256 }
2257 
2258 /* Return the minimum and maximum number of characters formatted
2259    by the '%s' format directive and its wide character form for
2260    the argument ARG.  ARG can be null (for functions such as
2261    vsprinf).  */
2262 
2263 static fmtresult
format_string(const directive & dir,tree arg,vr_values *)2264 format_string (const directive &dir, tree arg, vr_values *)
2265 {
2266   fmtresult res;
2267 
2268   /* Compute the range the argument's length can be in.  */
2269   fmtresult slen = get_string_length (arg);
2270   if (slen.range.min == slen.range.max
2271       && slen.range.min < HOST_WIDE_INT_MAX)
2272     {
2273       /* The argument is either a string constant or it refers
2274 	 to one of a number of strings of the same length.  */
2275 
2276       /* A '%s' directive with a string argument with constant length.  */
2277       res.range = slen.range;
2278 
2279       if (dir.modifier == FMT_LEN_l)
2280 	{
2281 	  /* In the worst case the length of output of a wide string S
2282 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2283 	  res.range.max *= target_mb_len_max ();
2284 	  res.range.unlikely = res.range.max;
2285 	  /* It's likely that the the total length is not more that
2286 	     2 * wcslen (S).*/
2287 	  res.range.likely = res.range.min * 2;
2288 
2289 	  if (dir.prec[1] >= 0
2290 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2291 	    {
2292 	      res.range.max = dir.prec[1];
2293 	      res.range.likely = dir.prec[1];
2294 	      res.range.unlikely = dir.prec[1];
2295 	    }
2296 
2297 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2298 	    res.range.min = 0;
2299 	  else if (dir.prec[0] >= 0)
2300 	    res.range.likely = dir.prec[0];
2301 
2302 	  /* Even a non-empty wide character string need not convert into
2303 	     any bytes.  */
2304 	  res.range.min = 0;
2305 	}
2306       else
2307 	{
2308 	  res.knownrange = true;
2309 
2310 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2311 	    res.range.min = 0;
2312 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2313 	    res.range.min = dir.prec[0];
2314 
2315 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2316 	    {
2317 	      res.range.max = dir.prec[1];
2318 	      res.range.likely = dir.prec[1];
2319 	      res.range.unlikely = dir.prec[1];
2320 	    }
2321 	}
2322     }
2323   else if (arg && integer_zerop (arg))
2324     {
2325       /* Handle null pointer argument.  */
2326 
2327       fmtresult res (0);
2328       res.nullp = true;
2329       return res;
2330     }
2331   else
2332     {
2333       /* For a '%s' and '%ls' directive with a non-constant string (either
2334 	 one of a number of strings of known length or an unknown string)
2335 	 the minimum number of characters is lesser of PRECISION[0] and
2336 	 the length of the shortest known string or zero, and the maximum
2337 	 is the lessser of the length of the longest known string or
2338 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2339 	 the minimum at level 1 and the greater of the minimum and 1
2340 	 at level 2.  This result is adjust upward for width (if it's
2341 	 specified).  */
2342 
2343       if (dir.modifier == FMT_LEN_l)
2344 	{
2345 	  /* A wide character converts to as few as zero bytes.  */
2346 	  slen.range.min = 0;
2347 	  if (slen.range.max < target_int_max ())
2348 	    slen.range.max *= target_mb_len_max ();
2349 
2350 	  if (slen.range.likely < target_int_max ())
2351 	    slen.range.likely *= 2;
2352 
2353 	  if (slen.range.likely < target_int_max ())
2354 	    slen.range.unlikely *= target_mb_len_max ();
2355 	}
2356 
2357       res.range = slen.range;
2358 
2359       if (dir.prec[0] >= 0)
2360 	{
2361 	  /* Adjust the minimum to zero if the string length is unknown,
2362 	     or at most the lower bound of the precision otherwise.  */
2363 	  if (slen.range.min >= target_int_max ())
2364 	    res.range.min = 0;
2365 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2366 	    res.range.min = dir.prec[0];
2367 
2368 	  /* Make both maxima no greater than the upper bound of precision.  */
2369 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2370 	      || slen.range.max >= target_int_max ())
2371 	    {
2372 	      res.range.max = dir.prec[1];
2373 	      res.range.unlikely = dir.prec[1];
2374 	    }
2375 
2376 	  /* If precision is constant, set the likely counter to the lesser
2377 	     of it and the maximum string length.  Otherwise, if the lower
2378 	     bound of precision is greater than zero, set the likely counter
2379 	     to the minimum.  Otherwise set it to zero or one based on
2380 	     the warning level.  */
2381 	  if (dir.prec[0] == dir.prec[1])
2382 	    res.range.likely
2383 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2384 		 ? dir.prec[0] : slen.range.max);
2385 	  else if (dir.prec[0] > 0)
2386 	    res.range.likely = res.range.min;
2387 	  else
2388 	    res.range.likely = warn_level > 1;
2389 	}
2390       else if (dir.prec[1] >= 0)
2391 	{
2392 	  res.range.min = 0;
2393 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2394 	    res.range.max = dir.prec[1];
2395 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2396 	}
2397       else if (slen.range.min >= target_int_max ())
2398 	{
2399 	  res.range.min = 0;
2400 	  res.range.max = HOST_WIDE_INT_MAX;
2401 	  /* At level 1 strings of unknown length are assumed to be
2402 	     empty, while at level 1 they are assumed to be one byte
2403 	     long.  */
2404 	  res.range.likely = warn_level > 1;
2405 	}
2406       else
2407 	{
2408 	  /* A string of unknown length unconstrained by precision is
2409 	     assumed to be empty at level 1 and just one character long
2410 	     at higher levels.  */
2411 	  if (res.range.likely >= target_int_max ())
2412 	    res.range.likely = warn_level > 1;
2413 	}
2414 
2415       res.range.unlikely = res.range.max;
2416     }
2417 
2418   /* Bump up the byte counters if WIDTH is greater.  */
2419   return res.adjust_for_width_or_precision (dir.width);
2420 }
2421 
2422 /* Format plain string (part of the format string itself).  */
2423 
2424 static fmtresult
format_plain(const directive & dir,tree,vr_values *)2425 format_plain (const directive &dir, tree, vr_values *)
2426 {
2427   fmtresult res (dir.len);
2428   return res;
2429 }
2430 
2431 /* Return true if the RESULT of a directive in a call describe by INFO
2432    should be diagnosed given the AVAILable space in the destination.  */
2433 
2434 static bool
should_warn_p(const sprintf_dom_walker::call_info & info,const result_range & avail,const result_range & result)2435 should_warn_p (const sprintf_dom_walker::call_info &info,
2436 	       const result_range &avail, const result_range &result)
2437 {
2438   if (result.max <= avail.min)
2439     {
2440       /* The least amount of space remaining in the destination is big
2441 	 enough for the longest output.  */
2442       return false;
2443     }
2444 
2445   if (info.bounded)
2446     {
2447       if (warn_format_trunc == 1 && result.min <= avail.max
2448 	  && info.retval_used ())
2449 	{
2450 	  /* The likely amount of space remaining in the destination is big
2451 	     enough for the least output and the return value is used.  */
2452 	  return false;
2453 	}
2454 
2455       if (warn_format_trunc == 1 && result.likely <= avail.likely
2456 	  && !info.retval_used ())
2457 	{
2458 	  /* The likely amount of space remaining in the destination is big
2459 	     enough for the likely output and the return value is unused.  */
2460 	  return false;
2461 	}
2462 
2463       if (warn_format_trunc == 2
2464 	  && result.likely <= avail.min
2465 	  && (result.max <= avail.min
2466 	      || result.max > HOST_WIDE_INT_MAX))
2467 	{
2468 	  /* The minimum amount of space remaining in the destination is big
2469 	     enough for the longest output.  */
2470 	  return false;
2471 	}
2472     }
2473   else
2474     {
2475       if (warn_level == 1 && result.likely <= avail.likely)
2476 	{
2477 	  /* The likely amount of space remaining in the destination is big
2478 	     enough for the likely output.  */
2479 	  return false;
2480 	}
2481 
2482       if (warn_level == 2
2483 	  && result.likely <= avail.min
2484 	  && (result.max <= avail.min
2485 	      || result.max > HOST_WIDE_INT_MAX))
2486 	{
2487 	  /* The minimum amount of space remaining in the destination is big
2488 	     enough for the longest output.  */
2489 	  return false;
2490 	}
2491     }
2492 
2493   return true;
2494 }
2495 
2496 /* At format string location describe by DIRLOC in a call described
2497    by INFO, issue a warning for a directive DIR whose output may be
2498    in excess of the available space AVAIL_RANGE in the destination
2499    given the formatting result FMTRES.  This function does nothing
2500    except decide whether to issue a warning for a possible write
2501    past the end or truncation and, if so, format the warning.
2502    Return true if a warning has been issued.  */
2503 
2504 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const sprintf_dom_walker::call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2505 maybe_warn (substring_loc &dirloc, location_t argloc,
2506 	    const sprintf_dom_walker::call_info &info,
2507 	    const result_range &avail_range, const result_range &res,
2508 	    const directive &dir)
2509 {
2510   if (!should_warn_p (info, avail_range, res))
2511     return false;
2512 
2513   /* A warning will definitely be issued below.  */
2514 
2515   /* The maximum byte count to reference in the warning.  Larger counts
2516      imply that the upper bound is unknown (and could be anywhere between
2517      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2518      than "between N and X" where X is some huge number.  */
2519   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2520 
2521   /* True when there is enough room in the destination for the least
2522      amount of a directive's output but not enough for its likely or
2523      maximum output.  */
2524   bool maybe = (res.min <= avail_range.max
2525 		&& (avail_range.min < res.likely
2526 		    || (res.max < HOST_WIDE_INT_MAX
2527 			&& avail_range.min < res.max)));
2528 
2529   /* Buffer for the directive in the host character set (used when
2530      the source character set is different).  */
2531   char hostdir[32];
2532 
2533   if (avail_range.min == avail_range.max)
2534     {
2535       /* The size of the destination region is exact.  */
2536       unsigned HOST_WIDE_INT navail = avail_range.max;
2537 
2538       if (target_to_host (*dir.beg) != '%')
2539 	{
2540 	  /* For plain character directives (i.e., the format string itself)
2541 	     but not others, point the caret at the first character that's
2542 	     past the end of the destination.  */
2543 	  if (navail < dir.len)
2544 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2545 	}
2546 
2547       if (*dir.beg == '\0')
2548 	{
2549 	  /* This is the terminating nul.  */
2550 	  gcc_assert (res.min == 1 && res.min == res.max);
2551 
2552 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2553 			  info.bounded
2554 			  ? (maybe
2555 			     ? G_("%qE output may be truncated before the "
2556 				  "last format character")
2557 			     : G_("%qE output truncated before the last "
2558 				  "format character"))
2559 			  : (maybe
2560 			     ? G_("%qE may write a terminating nul past the "
2561 				  "end of the destination")
2562 			     : G_("%qE writing a terminating nul past the "
2563 				  "end of the destination")),
2564 			  info.func);
2565 	}
2566 
2567       if (res.min == res.max)
2568 	{
2569 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2570 	  if (!info.bounded)
2571 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2572 			      "%<%.*s%> directive writing %wu byte into a "
2573 			      "region of size %wu",
2574 			      "%<%.*s%> directive writing %wu bytes into a "
2575 			      "region of size %wu",
2576 			      (int) dir.len, d, res.min, navail);
2577 	  else if (maybe)
2578 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2579 			      "%<%.*s%> directive output may be truncated "
2580 			      "writing %wu byte into a region of size %wu",
2581 			      "%<%.*s%> directive output may be truncated "
2582 			      "writing %wu bytes into a region of size %wu",
2583 			      (int) dir.len, d, res.min, navail);
2584 	  else
2585 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2586 			      "%<%.*s%> directive output truncated writing "
2587 			      "%wu byte into a region of size %wu",
2588 			      "%<%.*s%> directive output truncated writing "
2589 			      "%wu bytes into a region of size %wu",
2590 			      (int) dir.len, d, res.min, navail);
2591 	}
2592       if (res.min == 0 && res.max < maxbytes)
2593 	return fmtwarn (dirloc, argloc, NULL,
2594 			info.warnopt (),
2595 			info.bounded
2596 			? (maybe
2597 			   ? G_("%<%.*s%> directive output may be truncated "
2598 				"writing up to %wu bytes into a region of "
2599 				"size %wu")
2600 			   : G_("%<%.*s%> directive output truncated writing "
2601 				"up to %wu bytes into a region of size %wu"))
2602 			: G_("%<%.*s%> directive writing up to %wu bytes "
2603 			     "into a region of size %wu"), (int) dir.len,
2604 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2605 			res.max, navail);
2606 
2607       if (res.min == 0 && maxbytes <= res.max)
2608 	/* This is a special case to avoid issuing the potentially
2609 	   confusing warning:
2610 	     writing 0 or more bytes into a region of size 0.  */
2611 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2612 			info.bounded
2613 			? (maybe
2614 			   ? G_("%<%.*s%> directive output may be truncated "
2615 				"writing likely %wu or more bytes into a "
2616 				"region of size %wu")
2617 			   : G_("%<%.*s%> directive output truncated writing "
2618 				"likely %wu or more bytes into a region of "
2619 				"size %wu"))
2620 			: G_("%<%.*s%> directive writing likely %wu or more "
2621 			     "bytes into a region of size %wu"), (int) dir.len,
2622 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2623 			res.likely, navail);
2624 
2625       if (res.max < maxbytes)
2626 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2627 			info.bounded
2628 			? (maybe
2629 			   ? G_("%<%.*s%> directive output may be truncated "
2630 				"writing between %wu and %wu bytes into a "
2631 				"region of size %wu")
2632 			   : G_("%<%.*s%> directive output truncated "
2633 				"writing between %wu and %wu bytes into a "
2634 				"region of size %wu"))
2635 			: G_("%<%.*s%> directive writing between %wu and "
2636 			     "%wu bytes into a region of size %wu"),
2637 			(int) dir.len,
2638 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2639 			res.min, res.max, navail);
2640 
2641       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2642 		      info.bounded
2643 		      ? (maybe
2644 			 ? G_("%<%.*s%> directive output may be truncated "
2645 			      "writing %wu or more bytes into a region of "
2646 			      "size %wu")
2647 			 : G_("%<%.*s%> directive output truncated writing "
2648 			      "%wu or more bytes into a region of size %wu"))
2649 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2650 			   "into a region of size %wu"), (int) dir.len,
2651 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2652 		      res.min, navail);
2653     }
2654 
2655   /* The size of the destination region is a range.  */
2656 
2657   if (target_to_host (*dir.beg) != '%')
2658     {
2659       unsigned HOST_WIDE_INT navail = avail_range.max;
2660 
2661       /* For plain character directives (i.e., the format string itself)
2662 	 but not others, point the caret at the first character that's
2663 	 past the end of the destination.  */
2664       if (navail < dir.len)
2665 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2666     }
2667 
2668   if (*dir.beg == '\0')
2669     {
2670       gcc_assert (res.min == 1 && res.min == res.max);
2671 
2672       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2673 		      info.bounded
2674 		      ? (maybe
2675 			 ? G_("%qE output may be truncated before the last "
2676 			      "format character")
2677 			 : G_("%qE output truncated before the last format "
2678 			      "character"))
2679 		      : (maybe
2680 			 ? G_("%qE may write a terminating nul past the end "
2681 			      "of the destination")
2682 			 : G_("%qE writing a terminating nul past the end "
2683 			      "of the destination")), info.func);
2684     }
2685 
2686   if (res.min == res.max)
2687     {
2688       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2689       if (!info.bounded)
2690 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2691 			  "%<%.*s%> directive writing %wu byte into a region "
2692 			  "of size between %wu and %wu",
2693 			  "%<%.*s%> directive writing %wu bytes into a region "
2694 			  "of size between %wu and %wu", (int) dir.len, d,
2695 			  res.min, avail_range.min, avail_range.max);
2696       else if (maybe)
2697 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2698 			  "%<%.*s%> directive output may be truncated writing "
2699 			  "%wu byte into a region of size between %wu and %wu",
2700 			  "%<%.*s%> directive output may be truncated writing "
2701 			  "%wu bytes into a region of size between %wu and "
2702 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2703 			  avail_range.max);
2704       else
2705 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2706 			  "%<%.*s%> directive output truncated writing %wu "
2707 			  "byte into a region of size between %wu and %wu",
2708 			  "%<%.*s%> directive output truncated writing %wu "
2709 			  "bytes into a region of size between %wu and %wu",
2710 			  (int) dir.len, d, res.min, avail_range.min,
2711 			  avail_range.max);
2712     }
2713 
2714   if (res.min == 0 && res.max < maxbytes)
2715     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2716 		    info.bounded
2717 		    ? (maybe
2718 		       ? G_("%<%.*s%> directive output may be truncated "
2719 			    "writing up to %wu bytes into a region of size "
2720 			    "between %wu and %wu")
2721 		       : G_("%<%.*s%> directive output truncated writing "
2722 			    "up to %wu bytes into a region of size between "
2723 			    "%wu and %wu"))
2724 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2725 			 "into a region of size between %wu and %wu"),
2726 		    (int) dir.len,
2727 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2728 		    res.max, avail_range.min, avail_range.max);
2729 
2730   if (res.min == 0 && maxbytes <= res.max)
2731     /* This is a special case to avoid issuing the potentially confusing
2732        warning:
2733 	 writing 0 or more bytes into a region of size between 0 and N.  */
2734     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2735 		    info.bounded
2736 		    ? (maybe
2737 		       ? G_("%<%.*s%> directive output may be truncated "
2738 			    "writing likely %wu or more bytes into a region "
2739 			    "of size between %wu and %wu")
2740 		       : G_("%<%.*s%> directive output truncated writing "
2741 			    "likely %wu or more bytes into a region of size "
2742 			    "between %wu and %wu"))
2743 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
2744 			 "into a region of size between %wu and %wu"),
2745 		    (int) dir.len,
2746 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2747 		    res.likely, avail_range.min, avail_range.max);
2748 
2749   if (res.max < maxbytes)
2750     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2751 		    info.bounded
2752 		    ? (maybe
2753 		       ? G_("%<%.*s%> directive output may be truncated "
2754 			    "writing between %wu and %wu bytes into a region "
2755 			    "of size between %wu and %wu")
2756 		       : G_("%<%.*s%> directive output truncated writing "
2757 			    "between %wu and %wu bytes into a region of size "
2758 			    "between %wu and %wu"))
2759 		    : G_("%<%.*s%> directive writing between %wu and "
2760 			 "%wu bytes into a region of size between %wu and "
2761 			 "%wu"), (int) dir.len,
2762 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2763 		    res.min, res.max, avail_range.min, avail_range.max);
2764 
2765   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2766 		  info.bounded
2767 		  ? (maybe
2768 		     ? G_("%<%.*s%> directive output may be truncated writing "
2769 			  "%wu or more bytes into a region of size between "
2770 			  "%wu and %wu")
2771 		     : G_("%<%.*s%> directive output truncated writing "
2772 			  "%wu or more bytes into a region of size between "
2773 			  "%wu and %wu"))
2774 		  : G_("%<%.*s%> directive writing %wu or more bytes "
2775 		       "into a region of size between %wu and %wu"),
2776 		  (int) dir.len,
2777 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
2778 		  res.min, avail_range.min, avail_range.max);
2779 }
2780 
2781 /* Compute the length of the output resulting from the directive DIR
2782    in a call described by INFO and update the overall result of the call
2783    in *RES.  Return true if the directive has been handled.  */
2784 
2785 static bool
format_directive(const sprintf_dom_walker::call_info & info,format_result * res,const directive & dir,class vr_values * vr_values)2786 format_directive (const sprintf_dom_walker::call_info &info,
2787 		  format_result *res, const directive &dir,
2788 		  class vr_values *vr_values)
2789 {
2790   /* Offset of the beginning of the directive from the beginning
2791      of the format string.  */
2792   size_t offset = dir.beg - info.fmtstr;
2793   size_t start = offset;
2794   size_t length = offset + dir.len - !!dir.len;
2795 
2796   /* Create a location for the whole directive from the % to the format
2797      specifier.  */
2798   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2799 			offset, start, length);
2800 
2801   /* Also get the location of the argument if possible.
2802      This doesn't work for integer literals or function calls.  */
2803   location_t argloc = UNKNOWN_LOCATION;
2804   if (dir.arg)
2805     argloc = EXPR_LOCATION (dir.arg);
2806 
2807   /* Bail when there is no function to compute the output length,
2808      or when minimum length checking has been disabled.   */
2809   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2810     return false;
2811 
2812   /* Compute the range of lengths of the formatted output.  */
2813   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
2814 
2815   /* Record whether the output of all directives is known to be
2816      bounded by some maximum, implying that their arguments are
2817      either known exactly or determined to be in a known range
2818      or, for strings, limited by the upper bounds of the arrays
2819      they refer to.  */
2820   res->knownrange &= fmtres.knownrange;
2821 
2822   if (!fmtres.knownrange)
2823     {
2824       /* Only when the range is known, check it against the host value
2825 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2826 	 INT_MAX precision, which is the longest possible output of any
2827 	 single directive).  That's the largest valid byte count (though
2828 	 not valid call to a printf-like function because it can never
2829 	 return such a count).  Otherwise, the range doesn't correspond
2830 	 to known values of the argument.  */
2831       if (fmtres.range.max > target_dir_max ())
2832 	{
2833 	  /* Normalize the MAX counter to avoid having to deal with it
2834 	     later.  The counter can be less than HOST_WIDE_INT_M1U
2835 	     when compiling for an ILP32 target on an LP64 host.  */
2836 	  fmtres.range.max = HOST_WIDE_INT_M1U;
2837 	  /* Disable exact and maximum length checking after a failure
2838 	     to determine the maximum number of characters (for example
2839 	     for wide characters or wide character strings) but continue
2840 	     tracking the minimum number of characters.  */
2841 	  res->range.max = HOST_WIDE_INT_M1U;
2842 	}
2843 
2844       if (fmtres.range.min > target_dir_max ())
2845 	{
2846 	  /* Disable exact length checking after a failure to determine
2847 	     even the minimum number of characters (it shouldn't happen
2848 	     except in an error) but keep tracking the minimum and maximum
2849 	     number of characters.  */
2850 	  return true;
2851 	}
2852     }
2853 
2854   /* Buffer for the directive in the host character set (used when
2855      the source character set is different).  */
2856   char hostdir[32];
2857 
2858   int dirlen = dir.len;
2859 
2860   if (fmtres.nullp)
2861     {
2862       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2863 	       "%<%.*s%> directive argument is null",
2864 	       dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
2865 
2866       /* Don't bother processing the rest of the format string.  */
2867       res->warned = true;
2868       res->range.min = HOST_WIDE_INT_M1U;
2869       res->range.max = HOST_WIDE_INT_M1U;
2870       return false;
2871     }
2872 
2873   /* Compute the number of available bytes in the destination.  There
2874      must always be at least one byte of space for the terminating
2875      NUL that's appended after the format string has been processed.  */
2876   result_range avail_range = bytes_remaining (info.objsize, *res);
2877 
2878   bool warned = res->warned;
2879 
2880   if (!warned)
2881     warned = maybe_warn (dirloc, argloc, info, avail_range,
2882 			 fmtres.range, dir);
2883 
2884   /* Bump up the total maximum if it isn't too big.  */
2885   if (res->range.max < HOST_WIDE_INT_MAX
2886       && fmtres.range.max < HOST_WIDE_INT_MAX)
2887     res->range.max += fmtres.range.max;
2888 
2889   /* Raise the total unlikely maximum by the larger of the maximum
2890      and the unlikely maximum.  */
2891   unsigned HOST_WIDE_INT save = res->range.unlikely;
2892   if (fmtres.range.max < fmtres.range.unlikely)
2893     res->range.unlikely += fmtres.range.unlikely;
2894   else
2895     res->range.unlikely += fmtres.range.max;
2896 
2897   if (res->range.unlikely < save)
2898     res->range.unlikely = HOST_WIDE_INT_M1U;
2899 
2900   res->range.min += fmtres.range.min;
2901   res->range.likely += fmtres.range.likely;
2902 
2903   /* Has the minimum directive output length exceeded the maximum
2904      of 4095 bytes required to be supported?  */
2905   bool minunder4k = fmtres.range.min < 4096;
2906   bool maxunder4k = fmtres.range.max < 4096;
2907   /* Clear UNDER4K in the overall result if the maximum has exceeded
2908      the 4k (this is necessary to avoid the return valuye optimization
2909      that may not be safe in the maximum case).  */
2910   if (!maxunder4k)
2911     res->under4k = false;
2912 
2913   if (!warned
2914       /* Only warn at level 2.  */
2915       && warn_level > 1
2916       && (!minunder4k
2917 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2918     {
2919       /* The directive output may be longer than the maximum required
2920 	 to be handled by an implementation according to 7.21.6.1, p15
2921 	 of C11.  Warn on this only at level 2 but remember this and
2922 	 prevent folding the return value when done.  This allows for
2923 	 the possibility of the actual libc call failing due to ENOMEM
2924 	 (like Glibc does under some conditions).  */
2925 
2926       if (fmtres.range.min == fmtres.range.max)
2927 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2928 			  "%<%.*s%> directive output of %wu bytes exceeds "
2929 			  "minimum required size of 4095", dirlen,
2930 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2931 			  fmtres.range.min);
2932       else
2933 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2934 			  minunder4k
2935 			  ? G_("%<%.*s%> directive output between %wu and %wu "
2936 			       "bytes may exceed minimum required size of "
2937 			       "4095")
2938 			  : G_("%<%.*s%> directive output between %wu and %wu "
2939 			       "bytes exceeds minimum required size of 4095"),
2940 			  dirlen,
2941 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2942 			  fmtres.range.min, fmtres.range.max);
2943     }
2944 
2945   /* Has the likely and maximum directive output exceeded INT_MAX?  */
2946   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2947   /* Don't consider the maximum to be in excess when it's the result
2948      of a string of unknown length (i.e., whose maximum has been set
2949      to be greater than or equal to HOST_WIDE_INT_MAX.  */
2950   bool maxximax = (*dir.beg
2951 		   && res->range.max > target_int_max ()
2952 		   && res->range.max < HOST_WIDE_INT_MAX);
2953 
2954   if (!warned
2955       /* Warn for the likely output size at level 1.  */
2956       && (likelyximax
2957 	  /* But only warn for the maximum at level 2.  */
2958 	  || (warn_level > 1
2959 	      && maxximax
2960 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
2961     {
2962       /* The directive output causes the total length of output
2963 	 to exceed INT_MAX bytes.  */
2964 
2965       if (fmtres.range.min == fmtres.range.max)
2966 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2967 			  "%<%.*s%> directive output of %wu bytes causes "
2968 			  "result to exceed %<INT_MAX%>", dirlen,
2969 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2970 			  fmtres.range.min);
2971       else
2972 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2973 			  fmtres.range.min > target_int_max ()
2974 			  ? G_ ("%<%.*s%> directive output between %wu and "
2975 				"%wu bytes causes result to exceed "
2976 				"%<INT_MAX%>")
2977 			  : G_ ("%<%.*s%> directive output between %wu and "
2978 				"%wu bytes may cause result to exceed "
2979 				"%<INT_MAX%>"), dirlen,
2980 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2981 			  fmtres.range.min, fmtres.range.max);
2982     }
2983 
2984   if (warned && fmtres.range.min < fmtres.range.likely
2985       && fmtres.range.likely < fmtres.range.max)
2986     inform_n (info.fmtloc, fmtres.range.likely,
2987 	      "assuming directive output of %wu byte",
2988 	      "assuming directive output of %wu bytes",
2989 	      fmtres.range.likely);
2990 
2991   if (warned && fmtres.argmin)
2992     {
2993       if (fmtres.argmin == fmtres.argmax)
2994 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2995       else if (fmtres.knownrange)
2996 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
2997 		fmtres.argmin, fmtres.argmax);
2998       else
2999 	inform (info.fmtloc,
3000 		"using the range [%E, %E] for directive argument",
3001 		fmtres.argmin, fmtres.argmax);
3002     }
3003 
3004   res->warned |= warned;
3005 
3006   if (!dir.beg[0] && res->warned && info.objsize < HOST_WIDE_INT_MAX)
3007     {
3008       /* If a warning has been issued for buffer overflow or truncation
3009 	 (but not otherwise) help the user figure out how big a buffer
3010 	 they need.  */
3011 
3012       location_t callloc = gimple_location (info.callstmt);
3013 
3014       unsigned HOST_WIDE_INT min = res->range.min;
3015       unsigned HOST_WIDE_INT max = res->range.max;
3016 
3017       if (min == max)
3018 	inform (callloc,
3019 		(min == 1
3020 		 ? G_("%qE output %wu byte into a destination of size %wu")
3021 		 : G_("%qE output %wu bytes into a destination of size %wu")),
3022 		info.func, min, info.objsize);
3023       else if (max < HOST_WIDE_INT_MAX)
3024 	inform (callloc,
3025 		"%qE output between %wu and %wu bytes into "
3026 		"a destination of size %wu",
3027 		info.func, min, max, info.objsize);
3028       else if (min < res->range.likely && res->range.likely < max)
3029 	inform (callloc,
3030 		"%qE output %wu or more bytes (assuming %wu) into "
3031 		"a destination of size %wu",
3032 		info.func, min, res->range.likely, info.objsize);
3033       else
3034 	inform (callloc,
3035 		"%qE output %wu or more bytes into a destination of size %wu",
3036 		info.func, min, info.objsize);
3037     }
3038 
3039   if (dump_file && *dir.beg)
3040     {
3041       fprintf (dump_file,
3042 	       "    Result: "
3043 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3044 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3045 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3046 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3047 	       fmtres.range.min, fmtres.range.likely,
3048 	       fmtres.range.max, fmtres.range.unlikely,
3049 	       res->range.min, res->range.likely,
3050 	       res->range.max, res->range.unlikely);
3051     }
3052 
3053   return true;
3054 }
3055 
3056 /* Parse a format directive in function call described by INFO starting
3057    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3058    arguments extracted for the directive.  Return the length of
3059    the directive.  */
3060 
3061 static size_t
parse_directive(sprintf_dom_walker::call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,vr_values * vr_values)3062 parse_directive (sprintf_dom_walker::call_info &info,
3063 		 directive &dir, format_result *res,
3064 		 const char *str, unsigned *argno,
3065 		 vr_values *vr_values)
3066 {
3067   const char *pcnt = strchr (str, target_percent);
3068   dir.beg = str;
3069 
3070   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3071     {
3072       /* This directive is either a plain string or the terminating nul
3073 	 (which isn't really a directive but it simplifies things to
3074 	 handle it as if it were).  */
3075       dir.len = len;
3076       dir.fmtfunc = format_plain;
3077 
3078       if (dump_file)
3079 	{
3080 	  fprintf (dump_file, "  Directive %u at offset "
3081 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3082 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3083 		   dir.dirno,
3084 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3085 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3086 	}
3087 
3088       return len - !*str;
3089     }
3090 
3091   const char *pf = pcnt + 1;
3092 
3093     /* POSIX numbered argument index or zero when none.  */
3094   HOST_WIDE_INT dollar = 0;
3095 
3096   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3097      when given by a va_list argument, and a non-negative value
3098      when specified in the format string itself.  */
3099   HOST_WIDE_INT width = -1;
3100   HOST_WIDE_INT precision = -1;
3101 
3102   /* Pointers to the beginning of the width and precision decimal
3103      string (if any) within the directive.  */
3104   const char *pwidth = 0;
3105   const char *pprec = 0;
3106 
3107   /* When the value of the decimal string that specifies width or
3108      precision is out of range, points to the digit that causes
3109      the value to exceed the limit.  */
3110   const char *werange = NULL;
3111   const char *perange = NULL;
3112 
3113   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3114      For vararg functions set to void_node.  */
3115   tree star_width = NULL_TREE;
3116 
3117   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3118      For vararg functions set to void_node.  */
3119   tree star_precision = NULL_TREE;
3120 
3121   if (ISDIGIT (target_to_host (*pf)))
3122     {
3123       /* This could be either a POSIX positional argument, the '0'
3124 	 flag, or a width, depending on what follows.  Store it as
3125 	 width and sort it out later after the next character has
3126 	 been seen.  */
3127       pwidth = pf;
3128       width = target_strtol10 (&pf, &werange);
3129     }
3130   else if (target_to_host (*pf) == '*')
3131     {
3132       /* Similarly to the block above, this could be either a POSIX
3133 	 positional argument or a width, depending on what follows.  */
3134       if (*argno < gimple_call_num_args (info.callstmt))
3135 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3136       else
3137 	star_width = void_node;
3138       ++pf;
3139     }
3140 
3141   if (target_to_host (*pf) == '$')
3142     {
3143       /* Handle the POSIX dollar sign which references the 1-based
3144 	 positional argument number.  */
3145       if (width != -1)
3146 	dollar = width + info.argidx;
3147       else if (star_width
3148 	       && TREE_CODE (star_width) == INTEGER_CST
3149 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3150 		   <= TYPE_PRECISION (integer_type_node)))
3151 	dollar = width + tree_to_shwi (star_width);
3152 
3153       /* Bail when the numbered argument is out of range (it will
3154 	 have already been diagnosed by -Wformat).  */
3155       if (dollar == 0
3156 	  || dollar == (int)info.argidx
3157 	  || dollar > gimple_call_num_args (info.callstmt))
3158 	return false;
3159 
3160       --dollar;
3161 
3162       star_width = NULL_TREE;
3163       width = -1;
3164       ++pf;
3165     }
3166 
3167   if (dollar || !star_width)
3168     {
3169       if (width != -1)
3170 	{
3171 	  if (width == 0)
3172 	    {
3173 	      /* The '0' that has been interpreted as a width above is
3174 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3175 		 and continue processing other flags.  */
3176 	      width = -1;
3177 	      dir.set_flag ('0');
3178 	    }
3179 	  else if (!dollar)
3180 	    {
3181 	      /* (Non-zero) width has been seen.  The next character
3182 		 is either a period or a digit.  */
3183 	      goto start_precision;
3184 	    }
3185 	}
3186       /* When either '$' has been seen, or width has not been seen,
3187 	 the next field is the optional flags followed by an optional
3188 	 width.  */
3189       for ( ; ; ) {
3190 	switch (target_to_host (*pf))
3191 	  {
3192 	  case ' ':
3193 	  case '0':
3194 	  case '+':
3195 	  case '-':
3196 	  case '#':
3197 	    dir.set_flag (target_to_host (*pf++));
3198 	    break;
3199 
3200 	  default:
3201 	    goto start_width;
3202 	  }
3203       }
3204 
3205     start_width:
3206       if (ISDIGIT (target_to_host (*pf)))
3207 	{
3208 	  werange = 0;
3209 	  pwidth = pf;
3210 	  width = target_strtol10 (&pf, &werange);
3211 	}
3212       else if (target_to_host (*pf) == '*')
3213 	{
3214 	  if (*argno < gimple_call_num_args (info.callstmt))
3215 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3216 	  else
3217 	    {
3218 	      /* This is (likely) a va_list.  It could also be an invalid
3219 		 call with insufficient arguments.  */
3220 	      star_width = void_node;
3221 	    }
3222 	  ++pf;
3223 	}
3224       else if (target_to_host (*pf) == '\'')
3225 	{
3226 	  /* The POSIX apostrophe indicating a numeric grouping
3227 	     in the current locale.  Even though it's possible to
3228 	     estimate the upper bound on the size of the output
3229 	     based on the number of digits it probably isn't worth
3230 	     continuing.  */
3231 	  return 0;
3232 	}
3233     }
3234 
3235  start_precision:
3236   if (target_to_host (*pf) == '.')
3237     {
3238       ++pf;
3239 
3240       if (ISDIGIT (target_to_host (*pf)))
3241 	{
3242 	  pprec = pf;
3243 	  precision = target_strtol10 (&pf, &perange);
3244 	}
3245       else if (target_to_host (*pf) == '*')
3246 	{
3247 	  if (*argno < gimple_call_num_args (info.callstmt))
3248 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3249 	  else
3250 	    {
3251 	      /* This is (likely) a va_list.  It could also be an invalid
3252 		 call with insufficient arguments.  */
3253 	      star_precision = void_node;
3254 	    }
3255 	  ++pf;
3256 	}
3257       else
3258 	{
3259 	  /* The decimal precision or the asterisk are optional.
3260 	     When neither is dirified it's taken to be zero.  */
3261 	  precision = 0;
3262 	}
3263     }
3264 
3265   switch (target_to_host (*pf))
3266     {
3267     case 'h':
3268       if (target_to_host (pf[1]) == 'h')
3269 	{
3270 	  ++pf;
3271 	  dir.modifier = FMT_LEN_hh;
3272 	}
3273       else
3274 	dir.modifier = FMT_LEN_h;
3275       ++pf;
3276       break;
3277 
3278     case 'j':
3279       dir.modifier = FMT_LEN_j;
3280       ++pf;
3281       break;
3282 
3283     case 'L':
3284       dir.modifier = FMT_LEN_L;
3285       ++pf;
3286       break;
3287 
3288     case 'l':
3289       if (target_to_host (pf[1]) == 'l')
3290 	{
3291 	  ++pf;
3292 	  dir.modifier = FMT_LEN_ll;
3293 	}
3294       else
3295 	dir.modifier = FMT_LEN_l;
3296       ++pf;
3297       break;
3298 
3299     case 't':
3300       dir.modifier = FMT_LEN_t;
3301       ++pf;
3302       break;
3303 
3304     case 'z':
3305       dir.modifier = FMT_LEN_z;
3306       ++pf;
3307       break;
3308     }
3309 
3310   switch (target_to_host (*pf))
3311     {
3312       /* Handle a sole '%' character the same as "%%" but since it's
3313 	 undefined prevent the result from being folded.  */
3314     case '\0':
3315       --pf;
3316       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3317       /* FALLTHRU */
3318     case '%':
3319       dir.fmtfunc = format_percent;
3320       break;
3321 
3322     case 'a':
3323     case 'A':
3324     case 'e':
3325     case 'E':
3326     case 'f':
3327     case 'F':
3328     case 'g':
3329     case 'G':
3330       res->floating = true;
3331       dir.fmtfunc = format_floating;
3332       break;
3333 
3334     case 'd':
3335     case 'i':
3336     case 'o':
3337     case 'u':
3338     case 'x':
3339     case 'X':
3340       dir.fmtfunc = format_integer;
3341       break;
3342 
3343     case 'p':
3344       /* The %p output is implementation-defined.  It's possible
3345 	 to determine this format but due to extensions (edirially
3346 	 those of the Linux kernel -- see bug 78512) the first %p
3347 	 in the format string disables any further processing.  */
3348       return false;
3349 
3350     case 'n':
3351       /* %n has side-effects even when nothing is actually printed to
3352 	 any buffer.  */
3353       info.nowrite = false;
3354       dir.fmtfunc = format_none;
3355       break;
3356 
3357     case 'c':
3358       dir.fmtfunc = format_character;
3359       break;
3360 
3361     case 'S':
3362     case 's':
3363       dir.fmtfunc = format_string;
3364       break;
3365 
3366     default:
3367       /* Unknown conversion specification.  */
3368       return 0;
3369     }
3370 
3371   dir.specifier = target_to_host (*pf++);
3372 
3373   /* Store the length of the format directive.  */
3374   dir.len = pf - pcnt;
3375 
3376   /* Buffer for the directive in the host character set (used when
3377      the source character set is different).  */
3378   char hostdir[32];
3379 
3380   if (star_width)
3381     {
3382       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3383 	dir.set_width (star_width, vr_values);
3384       else
3385 	{
3386 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3387 	     (width is the absolute value of that specified).  */
3388 	  dir.width[0] = 0;
3389 	  dir.width[1] = target_int_max () + 1;
3390 	}
3391     }
3392   else
3393     {
3394       if (width == LONG_MAX && werange)
3395 	{
3396 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3397 	  size_t caret = begin + (werange - pcnt);
3398 	  size_t end = pf - info.fmtstr - 1;
3399 
3400 	  /* Create a location for the width part of the directive,
3401 	     pointing the caret at the first out-of-range digit.  */
3402 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3403 				caret, begin, end);
3404 
3405 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3406 		   "%<%.*s%> directive width out of range", (int) dir.len,
3407 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3408 	}
3409 
3410       dir.set_width (width);
3411     }
3412 
3413   if (star_precision)
3414     {
3415       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3416 	dir.set_precision (star_precision, vr_values);
3417       else
3418 	{
3419 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3420 	     (unlike width, negative precision is ignored).  */
3421 	  dir.prec[0] = -1;
3422 	  dir.prec[1] = target_int_max ();
3423 	}
3424     }
3425   else
3426     {
3427       if (precision == LONG_MAX && perange)
3428 	{
3429 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3430 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3431 	  size_t end = pf - info.fmtstr - 2;
3432 
3433 	  /* Create a location for the precision part of the directive,
3434 	     including the leading period, pointing the caret at the first
3435 	     out-of-range digit .  */
3436 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3437 				caret, begin, end);
3438 
3439 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3440 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3441 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3442 	}
3443 
3444       dir.set_precision (precision);
3445     }
3446 
3447   /* Extract the argument if the directive takes one and if it's
3448      available (e.g., the function doesn't take a va_list).  Treat
3449      missing arguments the same as va_list, even though they will
3450      have likely already been diagnosed by -Wformat.  */
3451   if (dir.specifier != '%'
3452       && *argno < gimple_call_num_args (info.callstmt))
3453     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3454 
3455   if (dump_file)
3456     {
3457       fprintf (dump_file,
3458 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3459 	       ": \"%.*s\"",
3460 	       dir.dirno,
3461 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3462 	       (int)dir.len, dir.beg);
3463       if (star_width)
3464 	{
3465 	  if (dir.width[0] == dir.width[1])
3466 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3467 		     dir.width[0]);
3468 	  else
3469 	    fprintf (dump_file,
3470 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3471 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3472 		     dir.width[0], dir.width[1]);
3473 	}
3474 
3475       if (star_precision)
3476 	{
3477 	  if (dir.prec[0] == dir.prec[1])
3478 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3479 		     dir.prec[0]);
3480 	  else
3481 	    fprintf (dump_file,
3482 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3483 		     HOST_WIDE_INT_PRINT_DEC "]",
3484 		     dir.prec[0], dir.prec[1]);
3485 	}
3486       fputc ('\n', dump_file);
3487     }
3488 
3489   return dir.len;
3490 }
3491 
3492 /* Compute the length of the output resulting from the call to a formatted
3493    output function described by INFO and store the result of the call in
3494    *RES.  Issue warnings for detected past the end writes.  Return true
3495    if the complete format string has been processed and *RES can be relied
3496    on, false otherwise (e.g., when a unknown or unhandled directive was seen
3497    that caused the processing to be terminated early).  */
3498 
3499 bool
compute_format_length(call_info & info,format_result * res)3500 sprintf_dom_walker::compute_format_length (call_info &info,
3501 					   format_result *res)
3502 {
3503   if (dump_file)
3504     {
3505       location_t callloc = gimple_location (info.callstmt);
3506       fprintf (dump_file, "%s:%i: ",
3507 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3508       print_generic_expr (dump_file, info.func, dump_flags);
3509 
3510       fprintf (dump_file,
3511 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3512 	       ", fmtstr = \"%s\"\n",
3513 	       info.objsize, info.fmtstr);
3514     }
3515 
3516   /* Reset the minimum and maximum byte counters.  */
3517   res->range.min = res->range.max = 0;
3518 
3519   /* No directive has been seen yet so the length of output is bounded
3520      by the known range [0, 0] (with no conversion producing more than
3521      4K bytes) until determined otherwise.  */
3522   res->knownrange = true;
3523   res->under4k = true;
3524   res->floating = false;
3525   res->warned = false;
3526 
3527   /* 1-based directive counter.  */
3528   unsigned dirno = 1;
3529 
3530   /* The variadic argument counter.  */
3531   unsigned argno = info.argidx;
3532 
3533   for (const char *pf = info.fmtstr; ; ++dirno)
3534     {
3535       directive dir = directive ();
3536       dir.dirno = dirno;
3537 
3538       size_t n = parse_directive (info, dir, res, pf, &argno,
3539 				  evrp_range_analyzer.get_vr_values ());
3540 
3541       /* Return failure if the format function fails.  */
3542       if (!format_directive (info, res, dir,
3543 			     evrp_range_analyzer.get_vr_values ()))
3544 	return false;
3545 
3546       /* Return success the directive is zero bytes long and it's
3547 	 the last think in the format string (i.e., it's the terminating
3548 	 nul, which isn't really a directive but handling it as one makes
3549 	 things simpler).  */
3550       if (!n)
3551 	return *pf == '\0';
3552 
3553       pf += n;
3554     }
3555 
3556   /* The complete format string was processed (with or without warnings).  */
3557   return true;
3558 }
3559 
3560 /* Return the size of the object referenced by the expression DEST if
3561    available, or -1 otherwise.  */
3562 
3563 static unsigned HOST_WIDE_INT
get_destination_size(tree dest)3564 get_destination_size (tree dest)
3565 {
3566   /* Initialize object size info before trying to compute it.  */
3567   init_object_sizes ();
3568 
3569   /* Use __builtin_object_size to determine the size of the destination
3570      object.  When optimizing, determine the smallest object (such as
3571      a member array as opposed to the whole enclosing object), otherwise
3572      use type-zero object size to determine the size of the enclosing
3573      object (the function fails without optimization in this type).  */
3574   int ost = optimize > 0;
3575   unsigned HOST_WIDE_INT size;
3576   if (compute_builtin_object_size (dest, ost, &size))
3577     return size;
3578 
3579   return HOST_WIDE_INT_M1U;
3580 }
3581 
3582 /* Return true if the call described by INFO with result RES safe to
3583    optimize (i.e., no undefined behavior), and set RETVAL to the range
3584    of its return values.  */
3585 
3586 static bool
is_call_safe(const sprintf_dom_walker::call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])3587 is_call_safe (const sprintf_dom_walker::call_info &info,
3588 	      const format_result &res, bool under4k,
3589 	      unsigned HOST_WIDE_INT retval[2])
3590 {
3591   if (under4k && !res.under4k)
3592     return false;
3593 
3594   /* The minimum return value.  */
3595   retval[0] = res.range.min;
3596 
3597   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3598      but in cases involving multibyte characters could be as large as
3599      RES.RANGE.UNLIKELY.  */
3600   retval[1]
3601     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3602 
3603   /* Adjust the number of bytes which includes the terminating nul
3604      to reflect the return value of the function which does not.
3605      Because the valid range of the function is [INT_MIN, INT_MAX],
3606      a valid range before the adjustment below is [0, INT_MAX + 1]
3607      (the functions only return negative values on error or undefined
3608      behavior).  */
3609   if (retval[0] <= target_int_max () + 1)
3610     --retval[0];
3611   if (retval[1] <= target_int_max () + 1)
3612     --retval[1];
3613 
3614   /* Avoid the return value optimization when the behavior of the call
3615      is undefined either because any directive may have produced 4K or
3616      more of output, or the return value exceeds INT_MAX, or because
3617      the output overflows the destination object (but leave it enabled
3618      when the function is bounded because then the behavior is well-
3619      defined).  */
3620   if (retval[0] == retval[1]
3621       && (info.bounded || retval[0] < info.objsize)
3622       && retval[0] <= target_int_max ())
3623     return true;
3624 
3625   if ((info.bounded || retval[1] < info.objsize)
3626       && (retval[0] < target_int_max ()
3627 	  && retval[1] < target_int_max ()))
3628     return true;
3629 
3630   if (!under4k && (info.bounded || retval[0] < info.objsize))
3631     return true;
3632 
3633   return false;
3634 }
3635 
3636 /* Given a suitable result RES of a call to a formatted output function
3637    described by INFO, substitute the result for the return value of
3638    the call.  The result is suitable if the number of bytes it represents
3639    is known and exact.  A result that isn't suitable for substitution may
3640    have its range set to the range of return values, if that is known.
3641    Return true if the call is removed and gsi_next should not be performed
3642    in the caller.  */
3643 
3644 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const sprintf_dom_walker::call_info & info,const format_result & res)3645 try_substitute_return_value (gimple_stmt_iterator *gsi,
3646 			     const sprintf_dom_walker::call_info &info,
3647 			     const format_result &res)
3648 {
3649   tree lhs = gimple_get_lhs (info.callstmt);
3650 
3651   /* Set to true when the entire call has been removed.  */
3652   bool removed = false;
3653 
3654   /* The minimum and maximum return value.  */
3655   unsigned HOST_WIDE_INT retval[2];
3656   bool safe = is_call_safe (info, res, true, retval);
3657 
3658   if (safe
3659       && retval[0] == retval[1]
3660       /* Not prepared to handle possibly throwing calls here; they shouldn't
3661 	 appear in non-artificial testcases, except when the __*_chk routines
3662 	 are badly declared.  */
3663       && !stmt_ends_bb_p (info.callstmt))
3664     {
3665       tree cst = build_int_cst (integer_type_node, retval[0]);
3666 
3667       if (lhs == NULL_TREE
3668 	  && info.nowrite)
3669 	{
3670 	  /* Remove the call to the bounded function with a zero size
3671 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
3672 	  unlink_stmt_vdef (info.callstmt);
3673 	  gsi_remove (gsi, true);
3674 	  removed = true;
3675 	}
3676       else if (info.nowrite)
3677 	{
3678 	  /* Replace the call to the bounded function with a zero size
3679 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
3680 	     of the function.  */
3681 	  if (!update_call_from_tree (gsi, cst))
3682 	    gimplify_and_update_call_from_tree (gsi, cst);
3683 	  gimple *callstmt = gsi_stmt (*gsi);
3684 	  update_stmt (callstmt);
3685 	}
3686       else if (lhs)
3687 	{
3688 	  /* Replace the left-hand side of the call with the constant
3689 	     result of the formatted function.  */
3690 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
3691 	  gimple *g = gimple_build_assign (lhs, cst);
3692 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
3693 	  update_stmt (info.callstmt);
3694 	}
3695 
3696       if (dump_file)
3697 	{
3698 	  if (removed)
3699 	    fprintf (dump_file, "  Removing call statement.");
3700 	  else
3701 	    {
3702 	      fprintf (dump_file, "  Substituting ");
3703 	      print_generic_expr (dump_file, cst, dump_flags);
3704 	      fprintf (dump_file, " for %s.\n",
3705 		       info.nowrite ? "statement" : "return value");
3706 	    }
3707 	}
3708     }
3709   else if (lhs)
3710     {
3711       bool setrange = false;
3712 
3713       if (safe
3714 	  && (info.bounded || retval[1] < info.objsize)
3715 	  && (retval[0] < target_int_max ()
3716 	      && retval[1] < target_int_max ()))
3717 	{
3718 	  /* If the result is in a valid range bounded by the size of
3719 	     the destination set it so that it can be used for subsequent
3720 	     optimizations.  */
3721 	  int prec = TYPE_PRECISION (integer_type_node);
3722 
3723 	  wide_int min = wi::shwi (retval[0], prec);
3724 	  wide_int max = wi::shwi (retval[1], prec);
3725 	  set_range_info (lhs, VR_RANGE, min, max);
3726 
3727 	  setrange = true;
3728 	}
3729 
3730       if (dump_file)
3731 	{
3732 	  const char *inbounds
3733 	    = (retval[0] < info.objsize
3734 	       ? (retval[1] < info.objsize
3735 		  ? "in" : "potentially out-of")
3736 	       : "out-of");
3737 
3738 	  const char *what = setrange ? "Setting" : "Discarding";
3739 	  if (retval[0] != retval[1])
3740 	    fprintf (dump_file,
3741 		     "  %s %s-bounds return value range ["
3742 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
3743 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
3744 		     what, inbounds, retval[0], retval[1]);
3745 	  else
3746 	    fprintf (dump_file, "  %s %s-bounds return value "
3747 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
3748 		     what, inbounds, retval[0]);
3749 	}
3750     }
3751 
3752   if (dump_file)
3753     fputc ('\n', dump_file);
3754 
3755   return removed;
3756 }
3757 
3758 /* Try to simplify a s{,n}printf call described by INFO with result
3759    RES by replacing it with a simpler and presumably more efficient
3760    call (such as strcpy).  */
3761 
3762 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const sprintf_dom_walker::call_info & info,const format_result & res)3763 try_simplify_call (gimple_stmt_iterator *gsi,
3764 		   const sprintf_dom_walker::call_info &info,
3765 		   const format_result &res)
3766 {
3767   unsigned HOST_WIDE_INT dummy[2];
3768   if (!is_call_safe (info, res, info.retval_used (), dummy))
3769     return false;
3770 
3771   switch (info.fncode)
3772     {
3773     case BUILT_IN_SNPRINTF:
3774       return gimple_fold_builtin_snprintf (gsi);
3775 
3776     case BUILT_IN_SPRINTF:
3777       return gimple_fold_builtin_sprintf (gsi);
3778 
3779     default:
3780       ;
3781     }
3782 
3783   return false;
3784 }
3785 
3786 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3787    functions and if so, handle it.  Return true if the call is removed
3788    and gsi_next should not be performed in the caller.  */
3789 
3790 bool
handle_gimple_call(gimple_stmt_iterator * gsi)3791 sprintf_dom_walker::handle_gimple_call (gimple_stmt_iterator *gsi)
3792 {
3793   call_info info = call_info ();
3794 
3795   info.callstmt = gsi_stmt (*gsi);
3796   if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3797     return false;
3798 
3799   info.func = gimple_call_fndecl (info.callstmt);
3800   info.fncode = DECL_FUNCTION_CODE (info.func);
3801 
3802   /* The size of the destination as in snprintf(dest, size, ...).  */
3803   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3804 
3805   /* The size of the destination determined by __builtin_object_size.  */
3806   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3807 
3808   /* Buffer size argument number (snprintf and vsnprintf).  */
3809   unsigned HOST_WIDE_INT idx_dstsize = HOST_WIDE_INT_M1U;
3810 
3811   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
3812   unsigned HOST_WIDE_INT idx_objsize = HOST_WIDE_INT_M1U;
3813 
3814   /* Format string argument number (valid for all functions).  */
3815   unsigned idx_format;
3816 
3817   switch (info.fncode)
3818     {
3819     case BUILT_IN_SPRINTF:
3820       // Signature:
3821       //   __builtin_sprintf (dst, format, ...)
3822       idx_format = 1;
3823       info.argidx = 2;
3824       break;
3825 
3826     case BUILT_IN_SPRINTF_CHK:
3827       // Signature:
3828       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3829       idx_objsize = 2;
3830       idx_format = 3;
3831       info.argidx = 4;
3832       break;
3833 
3834     case BUILT_IN_SNPRINTF:
3835       // Signature:
3836       //   __builtin_snprintf (dst, size, format, ...)
3837       idx_dstsize = 1;
3838       idx_format = 2;
3839       info.argidx = 3;
3840       info.bounded = true;
3841       break;
3842 
3843     case BUILT_IN_SNPRINTF_CHK:
3844       // Signature:
3845       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3846       idx_dstsize = 1;
3847       idx_objsize = 3;
3848       idx_format = 4;
3849       info.argidx = 5;
3850       info.bounded = true;
3851       break;
3852 
3853     case BUILT_IN_VSNPRINTF:
3854       // Signature:
3855       //   __builtin_vsprintf (dst, size, format, va)
3856       idx_dstsize = 1;
3857       idx_format = 2;
3858       info.argidx = -1;
3859       info.bounded = true;
3860       break;
3861 
3862     case BUILT_IN_VSNPRINTF_CHK:
3863       // Signature:
3864       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3865       idx_dstsize = 1;
3866       idx_objsize = 3;
3867       idx_format = 4;
3868       info.argidx = -1;
3869       info.bounded = true;
3870       break;
3871 
3872     case BUILT_IN_VSPRINTF:
3873       // Signature:
3874       //   __builtin_vsprintf (dst, format, va)
3875       idx_format = 1;
3876       info.argidx = -1;
3877       break;
3878 
3879     case BUILT_IN_VSPRINTF_CHK:
3880       // Signature:
3881       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3882       idx_format = 3;
3883       idx_objsize = 2;
3884       info.argidx = -1;
3885       break;
3886 
3887     default:
3888       return false;
3889     }
3890 
3891   /* Set the global warning level for this function.  */
3892   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3893 
3894   /* The first argument is a pointer to the destination.  */
3895   tree dstptr = gimple_call_arg (info.callstmt, 0);
3896 
3897   info.format = gimple_call_arg (info.callstmt, idx_format);
3898 
3899   /* True when the destination size is constant as opposed to the lower
3900      or upper bound of a range.  */
3901   bool dstsize_cst_p = true;
3902 
3903   if (idx_dstsize == HOST_WIDE_INT_M1U)
3904     {
3905       /* For non-bounded functions like sprintf, determine the size
3906 	 of the destination from the object or pointer passed to it
3907 	 as the first argument.  */
3908       dstsize = get_destination_size (dstptr);
3909     }
3910   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
3911     {
3912       /* For bounded functions try to get the size argument.  */
3913 
3914       if (TREE_CODE (size) == INTEGER_CST)
3915 	{
3916 	  dstsize = tree_to_uhwi (size);
3917 	  /* No object can be larger than SIZE_MAX bytes (half the address
3918 	     space) on the target.
3919 	     The functions are defined only for output of at most INT_MAX
3920 	     bytes.  Specifying a bound in excess of that limit effectively
3921 	     defeats the bounds checking (and on some implementations such
3922 	     as Solaris cause the function to fail with EINVAL).  */
3923 	  if (dstsize > target_size_max () / 2)
3924 	    {
3925 	      /* Avoid warning if -Wstringop-overflow is specified since
3926 		 it also warns for the same thing though only for the
3927 		 checking built-ins.  */
3928 	      if ((idx_objsize == HOST_WIDE_INT_M1U
3929 		   || !warn_stringop_overflow))
3930 		warning_at (gimple_location (info.callstmt), info.warnopt (),
3931 			    "specified bound %wu exceeds maximum object size "
3932 			    "%wu",
3933 			    dstsize, target_size_max () / 2);
3934 	    }
3935 	  else if (dstsize > target_int_max ())
3936 	    warning_at (gimple_location (info.callstmt), info.warnopt (),
3937 			"specified bound %wu exceeds %<INT_MAX%>",
3938 			dstsize);
3939 	}
3940       else if (TREE_CODE (size) == SSA_NAME)
3941 	{
3942 	  /* Try to determine the range of values of the argument
3943 	     and use the greater of the two at level 1 and the smaller
3944 	     of them at level 2.  */
3945 	  value_range *vr = evrp_range_analyzer.get_value_range (size);
3946 	  if (vr->type == VR_RANGE
3947 	      && TREE_CODE (vr->min) == INTEGER_CST
3948 	      && TREE_CODE (vr->max) == INTEGER_CST)
3949 	    dstsize = (warn_level < 2
3950 		       ? TREE_INT_CST_LOW (vr->max)
3951 		       : TREE_INT_CST_LOW (vr->min));
3952 
3953 	  /* The destination size is not constant.  If the function is
3954 	     bounded (e.g., snprintf) a lower bound of zero doesn't
3955 	     necessarily imply it can be eliminated.  */
3956 	  dstsize_cst_p = false;
3957 	}
3958     }
3959 
3960   if (idx_objsize != HOST_WIDE_INT_M1U)
3961     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
3962       if (tree_fits_uhwi_p (size))
3963 	objsize = tree_to_uhwi (size);
3964 
3965   if (info.bounded && !dstsize)
3966     {
3967       /* As a special case, when the explicitly specified destination
3968 	 size argument (to a bounded function like snprintf) is zero
3969 	 it is a request to determine the number of bytes on output
3970 	 without actually producing any.  Pretend the size is
3971 	 unlimited in this case.  */
3972       info.objsize = HOST_WIDE_INT_MAX;
3973       info.nowrite = dstsize_cst_p;
3974     }
3975   else
3976     {
3977       /* For calls to non-bounded functions or to those of bounded
3978 	 functions with a non-zero size, warn if the destination
3979 	 pointer is null.  */
3980       if (integer_zerop (dstptr))
3981 	{
3982 	  /* This is diagnosed with -Wformat only when the null is a constant
3983 	     pointer.  The warning here diagnoses instances where the pointer
3984 	     is not constant.  */
3985 	  location_t loc = gimple_location (info.callstmt);
3986 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
3987 		      info.warnopt (), "null destination pointer");
3988 	  return false;
3989 	}
3990 
3991       /* Set the object size to the smaller of the two arguments
3992 	 of both have been specified and they're not equal.  */
3993       info.objsize = dstsize < objsize ? dstsize : objsize;
3994 
3995       if (info.bounded
3996 	  && dstsize < target_size_max () / 2 && objsize < dstsize
3997 	  /* Avoid warning if -Wstringop-overflow is specified since
3998 	     it also warns for the same thing though only for the
3999 	     checking built-ins.  */
4000 	  && (idx_objsize == HOST_WIDE_INT_M1U
4001 	      || !warn_stringop_overflow))
4002 	{
4003 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4004 		      "specified bound %wu exceeds the size %wu "
4005 		      "of the destination object", dstsize, objsize);
4006 	}
4007     }
4008 
4009   if (integer_zerop (info.format))
4010     {
4011       /* This is diagnosed with -Wformat only when the null is a constant
4012 	 pointer.  The warning here diagnoses instances where the pointer
4013 	 is not constant.  */
4014       location_t loc = gimple_location (info.callstmt);
4015       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4016 		  info.warnopt (), "null format string");
4017       return false;
4018     }
4019 
4020   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4021   if (!info.fmtstr)
4022     return false;
4023 
4024   /* The result is the number of bytes output by the formatted function,
4025      including the terminating NUL.  */
4026   format_result res = format_result ();
4027 
4028   bool success = compute_format_length (info, &res);
4029 
4030   /* When optimizing and the printf return value optimization is enabled,
4031      attempt to substitute the computed result for the return value of
4032      the call.  Avoid this optimization when -frounding-math is in effect
4033      and the format string contains a floating point directive.  */
4034   bool call_removed = false;
4035   if (success && optimize > 0)
4036     {
4037       /* Save a copy of the iterator pointing at the call.  The iterator
4038 	 may change to point past the call in try_substitute_return_value
4039 	 but the original value is needed in try_simplify_call.  */
4040       gimple_stmt_iterator gsi_call = *gsi;
4041 
4042       if (flag_printf_return_value
4043 	  && (!flag_rounding_math || !res.floating))
4044 	call_removed = try_substitute_return_value (gsi, info, res);
4045 
4046       if (!call_removed)
4047 	try_simplify_call (&gsi_call, info, res);
4048     }
4049 
4050   return call_removed;
4051 }
4052 
4053 edge
before_dom_children(basic_block bb)4054 sprintf_dom_walker::before_dom_children (basic_block bb)
4055 {
4056   evrp_range_analyzer.enter (bb);
4057   for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
4058     {
4059       /* Iterate over statements, looking for function calls.  */
4060       gimple *stmt = gsi_stmt (si);
4061 
4062       /* First record ranges generated by this statement.  */
4063       evrp_range_analyzer.record_ranges_from_stmt (stmt, false);
4064 
4065       if (is_gimple_call (stmt) && handle_gimple_call (&si))
4066 	/* If handle_gimple_call returns true, the iterator is
4067 	   already pointing to the next statement.  */
4068 	continue;
4069 
4070       gsi_next (&si);
4071     }
4072   return NULL;
4073 }
4074 
4075 void
after_dom_children(basic_block bb)4076 sprintf_dom_walker::after_dom_children (basic_block bb)
4077 {
4078   evrp_range_analyzer.leave (bb);
4079 }
4080 
4081 /* Execute the pass for function FUN.  */
4082 
4083 unsigned int
execute(function * fun)4084 pass_sprintf_length::execute (function *fun)
4085 {
4086   init_target_to_host_charmap ();
4087 
4088   calculate_dominance_info (CDI_DOMINATORS);
4089 
4090   sprintf_dom_walker sprintf_dom_walker;
4091   sprintf_dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
4092 
4093   /* Clean up object size info.  */
4094   fini_object_sizes ();
4095   return 0;
4096 }
4097 
4098 }   /* Unnamed namespace.  */
4099 
4100 /* Return a pointer to a pass object newly constructed from the context
4101    CTXT.  */
4102 
4103 gimple_opt_pass *
make_pass_sprintf_length(gcc::context * ctxt)4104 make_pass_sprintf_length (gcc::context *ctxt)
4105 {
4106   return new pass_sprintf_length (ctxt);
4107 }
4108