1 /* Copyright (C) 2016-2018 Free Software Foundation, Inc.
2    Contributed by Martin Sebor <msebor@redhat.com>.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the printf-return-value pass.  The pass does
21    two things: 1) it analyzes calls to formatted output functions like
22    sprintf looking for possible buffer overflows and calls to bounded
23    functions like snprintf for early truncation (and under the control
24    of the -Wformat-length option issues warnings), and 2) under the
25    control of the -fprintf-return-value option it folds the return
26    value of safe calls into constants, making it possible to eliminate
27    code that depends on the value of those constants.
28 
29    For all functions (bounded or not) the pass uses the size of the
30    destination object.  That means that it will diagnose calls to
31    snprintf not on the basis of the size specified by the function's
32    second argument but rathger on the basis of the size the first
33    argument points to (if possible).  For bound-checking built-ins
34    like __builtin___snprintf_chk the pass uses the size typically
35    determined by __builtin_object_size and passed to the built-in
36    by the Glibc inline wrapper.
37 
38    The pass handles all forms standard sprintf format directives,
39    including character, integer, floating point, pointer, and strings,
40    with the standard C flags, widths, and precisions.  For integers
41    and strings it computes the length of output itself.  For floating
42    point it uses MPFR to fornmat known constants with up and down
43    rounding and uses the resulting range of output lengths.  For
44    strings it uses the length of string literals and the sizes of
45    character arrays that a character pointer may point to as a bound
46    on the longest string.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69 #include "langhooks.h"
70 
71 #include "builtins.h"
72 #include "stor-layout.h"
73 
74 #include "realmpfr.h"
75 #include "target.h"
76 
77 #include "cpplib.h"
78 #include "input.h"
79 #include "toplev.h"
80 #include "substring-locations.h"
81 #include "diagnostic.h"
82 #include "domwalk.h"
83 #include "alloc-pool.h"
84 #include "vr-values.h"
85 #include "gimple-ssa-evrp-analyze.h"
86 
87 /* The likely worst case value of MB_LEN_MAX for the target, large enough
88    for UTF-8.  Ideally, this would be obtained by a target hook if it were
89    to be used for optimization but it's good enough as is for warnings.  */
90 #define target_mb_len_max()   6
91 
92 /* The maximum number of bytes a single non-string directive can result
93    in.  This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
94    LDBL_MAX_10_EXP of 4932.  */
95 #define IEEE_MAX_10_EXP    4932
96 #define target_dir_max()   (target_int_max () + IEEE_MAX_10_EXP + 2)
97 
98 namespace {
99 
100 const pass_data pass_data_sprintf_length = {
101   GIMPLE_PASS,             // pass type
102   "printf-return-value",   // pass name
103   OPTGROUP_NONE,           // optinfo_flags
104   TV_NONE,                 // tv_id
105   PROP_cfg,                // properties_required
106   0,	                   // properties_provided
107   0,	                   // properties_destroyed
108   0,	                   // properties_start
109   0,	                   // properties_finish
110 };
111 
112 /* Set to the warning level for the current function which is equal
113    either to warn_format_trunc for bounded functions or to
114    warn_format_overflow otherwise.  */
115 
116 static int warn_level;
117 
118 struct format_result;
119 
120 class sprintf_dom_walker : public dom_walker
121 {
122  public:
sprintf_dom_walker()123   sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
~sprintf_dom_walker()124   ~sprintf_dom_walker () {}
125 
126   edge before_dom_children (basic_block) FINAL OVERRIDE;
127   void after_dom_children (basic_block) FINAL OVERRIDE;
128   bool handle_gimple_call (gimple_stmt_iterator *);
129 
130   struct call_info;
131   bool compute_format_length (call_info &, format_result *);
132   class evrp_range_analyzer evrp_range_analyzer;
133 };
134 
135 class pass_sprintf_length : public gimple_opt_pass
136 {
137   bool fold_return_value;
138 
139 public:
pass_sprintf_length(gcc::context * ctxt)140   pass_sprintf_length (gcc::context *ctxt)
141     : gimple_opt_pass (pass_data_sprintf_length, ctxt),
142     fold_return_value (false)
143   { }
144 
clone()145   opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
146 
147   virtual bool gate (function *);
148 
149   virtual unsigned int execute (function *);
150 
set_pass_param(unsigned int n,bool param)151   void set_pass_param (unsigned int n, bool param)
152     {
153       gcc_assert (n == 0);
154       fold_return_value = param;
155     }
156 
157 };
158 
159 bool
gate(function *)160 pass_sprintf_length::gate (function *)
161 {
162   /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
163      is specified and either not optimizing and the pass is being invoked
164      early, or when optimizing and the pass is being invoked during
165      optimization (i.e., "late").  */
166   return ((warn_format_overflow > 0
167 	   || warn_format_trunc > 0
168 	   || flag_printf_return_value)
169 	  && (optimize > 0) == fold_return_value);
170 }
171 
172 /* The minimum, maximum, likely, and unlikely maximum number of bytes
173    of output either a formatting function or an individual directive
174    can result in.  */
175 
176 struct result_range
177 {
178   /* The absolute minimum number of bytes.  The result of a successful
179      conversion is guaranteed to be no less than this.  (An erroneous
180      conversion can be indicated by MIN > HOST_WIDE_INT_MAX.)  */
181   unsigned HOST_WIDE_INT min;
182   /* The likely maximum result that is used in diagnostics.  In most
183      cases MAX is the same as the worst case UNLIKELY result.  */
184   unsigned HOST_WIDE_INT max;
185   /* The likely result used to trigger diagnostics.  For conversions
186      that result in a range of bytes [MIN, MAX], LIKELY is somewhere
187      in that range.  */
188   unsigned HOST_WIDE_INT likely;
189   /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
190      the worst cases maximum result of a directive.  In most cases
191      UNLIKELY == MAX.  UNLIKELY is used to control the return value
192      optimization but not in diagnostics.  */
193   unsigned HOST_WIDE_INT unlikely;
194 };
195 
196 /* The result of a call to a formatted function.  */
197 
198 struct format_result
199 {
200   /* Range of characters written by the formatted function.
201      Setting the minimum to HOST_WIDE_INT_MAX disables all
202      length tracking for the remainder of the format string.  */
203   result_range range;
204 
205   /* True when the range above is obtained from known values of
206      directive arguments, or bounds on the amount of output such
207      as width and precision, and not the result of  heuristics that
208      depend on warning levels.  It's used to issue stricter diagnostics
209      in cases where strings of unknown lengths are bounded by the arrays
210      they are determined to refer to.  KNOWNRANGE must not be used for
211      the return value optimization.  */
212   bool knownrange;
213 
214   /* True if no individual directive resulted in more than 4095 bytes
215      of output (the total NUMBER_CHARS_{MIN,MAX} might be greater).
216      Implementations are not required to handle directives that produce
217      more than 4K bytes (leading to undefined behavior) and so when one
218      is found it disables the return value optimization.  */
219   bool under4k;
220 
221   /* True when a floating point directive has been seen in the format
222      string.  */
223   bool floating;
224 
225   /* True when an intermediate result has caused a warning.  Used to
226      avoid issuing duplicate warnings while finishing the processing
227      of a call.  WARNED also disables the return value optimization.  */
228   bool warned;
229 
230   /* Preincrement the number of output characters by 1.  */
231   format_result& operator++ ()
232   {
233     return *this += 1;
234   }
235 
236   /* Postincrement the number of output characters by 1.  */
237   format_result operator++ (int)
238   {
239     format_result prev (*this);
240     *this += 1;
241     return prev;
242   }
243 
244   /* Increment the number of output characters by N.  */
245   format_result& operator+= (unsigned HOST_WIDE_INT);
246 };
247 
248 format_result&
249 format_result::operator+= (unsigned HOST_WIDE_INT n)
250 {
251   gcc_assert (n < HOST_WIDE_INT_MAX);
252 
253   if (range.min < HOST_WIDE_INT_MAX)
254     range.min += n;
255 
256   if (range.max < HOST_WIDE_INT_MAX)
257     range.max += n;
258 
259   if (range.likely < HOST_WIDE_INT_MAX)
260     range.likely += n;
261 
262   if (range.unlikely < HOST_WIDE_INT_MAX)
263     range.unlikely += n;
264 
265   return *this;
266 }
267 
268 /* Return the value of INT_MIN for the target.  */
269 
270 static inline HOST_WIDE_INT
target_int_min()271 target_int_min ()
272 {
273   return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
274 }
275 
276 /* Return the value of INT_MAX for the target.  */
277 
278 static inline unsigned HOST_WIDE_INT
target_int_max()279 target_int_max ()
280 {
281   return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
282 }
283 
284 /* Return the value of SIZE_MAX for the target.  */
285 
286 static inline unsigned HOST_WIDE_INT
target_size_max()287 target_size_max ()
288 {
289   return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
290 }
291 
292 /* A straightforward mapping from the execution character set to the host
293    character set indexed by execution character.  */
294 
295 static char target_to_host_charmap[256];
296 
297 /* Initialize a mapping from the execution character set to the host
298    character set.  */
299 
300 static bool
init_target_to_host_charmap()301 init_target_to_host_charmap ()
302 {
303   /* If the percent sign is non-zero the mapping has already been
304      initialized.  */
305   if (target_to_host_charmap['%'])
306     return true;
307 
308   /* Initialize the target_percent character (done elsewhere).  */
309   if (!init_target_chars ())
310     return false;
311 
312   /* The subset of the source character set used by printf conversion
313      specifications (strictly speaking, not all letters are used but
314      they are included here for the sake of simplicity).  The dollar
315      sign must be included even though it's not in the basic source
316      character set.  */
317   const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
318     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
319 
320   /* Set the mapping for all characters to some ordinary value (i,e.,
321      not none used in printf conversion specifications) and overwrite
322      those that are used by conversion specifications with their
323      corresponding values.  */
324   memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
325 
326   /* Are the two sets of characters the same?  */
327   bool all_same_p = true;
328 
329   for (const char *pc = srcset; *pc; ++pc)
330     {
331       /* Slice off the high end bits in case target characters are
332 	 signed.  All values are expected to be non-nul, otherwise
333 	 there's a problem.  */
334       if (unsigned char tc = lang_hooks.to_target_charset (*pc))
335 	{
336 	  target_to_host_charmap[tc] = *pc;
337 	  if (tc != *pc)
338 	    all_same_p = false;
339 	}
340       else
341 	return false;
342 
343     }
344 
345   /* Set the first element to a non-zero value if the mapping
346      is 1-to-1, otherwise leave it clear (NUL is assumed to be
347      the same in both character sets).  */
348   target_to_host_charmap[0] = all_same_p;
349 
350   return true;
351 }
352 
353 /* Return the host source character corresponding to the character
354    CH in the execution character set if one exists, or some innocuous
355    (non-special, non-nul) source character otherwise.  */
356 
357 static inline unsigned char
target_to_host(unsigned char ch)358 target_to_host (unsigned char ch)
359 {
360   return target_to_host_charmap[ch];
361 }
362 
363 /* Convert an initial substring of the string TARGSTR consisting of
364    characters in the execution character set into a string in the
365    source character set on the host and store up to HOSTSZ characters
366    in the buffer pointed to by HOSTR.  Return HOSTR.  */
367 
368 static const char*
target_to_host(char * hostr,size_t hostsz,const char * targstr)369 target_to_host (char *hostr, size_t hostsz, const char *targstr)
370 {
371   /* Make sure the buffer is reasonably big.  */
372   gcc_assert (hostsz > 4);
373 
374   /* The interesting subset of source and execution characters are
375      the same so no conversion is necessary.  However, truncate
376      overlong strings just like the translated strings are.  */
377   if (target_to_host_charmap['\0'] == 1)
378     {
379       size_t len = strlen (targstr);
380       if (len >= hostsz)
381 	{
382 	  memcpy (hostr, targstr, hostsz - 4);
383 	  strcpy (hostr + hostsz - 4, "...");
384 	}
385       else
386 	memcpy (hostr, targstr, len + 1);
387       return hostr;
388     }
389 
390   /* Convert the initial substring of TARGSTR to the corresponding
391      characters in the host set, appending "..." if TARGSTR is too
392      long to fit.  Using the static buffer assumes the function is
393      not called in between sequence points (which it isn't).  */
394   for (char *ph = hostr; ; ++targstr)
395     {
396       *ph++ = target_to_host (*targstr);
397       if (!*targstr)
398 	break;
399 
400       if (size_t (ph - hostr) == hostsz)
401 	{
402 	  strcpy (ph - 4, "...");
403 	  break;
404 	}
405     }
406 
407   return hostr;
408 }
409 
410 /* Convert the sequence of decimal digits in the execution character
411    starting at S to a long, just like strtol does.  Return the result
412    and set *END to one past the last converted character.  On range
413    error set ERANGE to the digit that caused it.  */
414 
415 static inline long
target_strtol10(const char ** ps,const char ** erange)416 target_strtol10 (const char **ps, const char **erange)
417 {
418   unsigned HOST_WIDE_INT val = 0;
419   for ( ; ; ++*ps)
420     {
421       unsigned char c = target_to_host (**ps);
422       if (ISDIGIT (c))
423 	{
424 	  c -= '0';
425 
426 	  /* Check for overflow.  */
427 	  if (val > (LONG_MAX - c) / 10LU)
428 	    {
429 	      val = LONG_MAX;
430 	      *erange = *ps;
431 
432 	      /* Skip the remaining digits.  */
433 	      do
434 		c = target_to_host (*++*ps);
435 	      while (ISDIGIT (c));
436 	      break;
437 	    }
438 	  else
439 	    val = val * 10 + c;
440 	}
441       else
442 	break;
443     }
444 
445   return val;
446 }
447 
448 /* Return the constant initial value of DECL if available or DECL
449    otherwise.  Same as the synonymous function in c/c-typeck.c.  */
450 
451 static tree
decl_constant_value(tree decl)452 decl_constant_value (tree decl)
453 {
454   if (/* Don't change a variable array bound or initial value to a constant
455 	 in a place where a variable is invalid.  Note that DECL_INITIAL
456 	 isn't valid for a PARM_DECL.  */
457       current_function_decl != 0
458       && TREE_CODE (decl) != PARM_DECL
459       && !TREE_THIS_VOLATILE (decl)
460       && TREE_READONLY (decl)
461       && DECL_INITIAL (decl) != 0
462       && TREE_CODE (DECL_INITIAL (decl)) != ERROR_MARK
463       /* This is invalid if initial value is not constant.
464 	 If it has either a function call, a memory reference,
465 	 or a variable, then re-evaluating it could give different results.  */
466       && TREE_CONSTANT (DECL_INITIAL (decl))
467       /* Check for cases where this is sub-optimal, even though valid.  */
468       && TREE_CODE (DECL_INITIAL (decl)) != CONSTRUCTOR)
469     return DECL_INITIAL (decl);
470   return decl;
471 }
472 
473 /* Given FORMAT, set *PLOC to the source location of the format string
474    and return the format string if it is known or null otherwise.  */
475 
476 static const char*
get_format_string(tree format,location_t * ploc)477 get_format_string (tree format, location_t *ploc)
478 {
479   if (VAR_P (format))
480     {
481       /* Pull out a constant value if the front end didn't.  */
482       format = decl_constant_value (format);
483       STRIP_NOPS (format);
484     }
485 
486   if (integer_zerop (format))
487     {
488       /* FIXME: Diagnose null format string if it hasn't been diagnosed
489 	 by -Wformat (the latter diagnoses only nul pointer constants,
490 	 this pass can do better).  */
491       return NULL;
492     }
493 
494   HOST_WIDE_INT offset = 0;
495 
496   if (TREE_CODE (format) == POINTER_PLUS_EXPR)
497     {
498       tree arg0 = TREE_OPERAND (format, 0);
499       tree arg1 = TREE_OPERAND (format, 1);
500       STRIP_NOPS (arg0);
501       STRIP_NOPS (arg1);
502 
503       if (TREE_CODE (arg1) != INTEGER_CST)
504 	return NULL;
505 
506       format = arg0;
507 
508       /* POINTER_PLUS_EXPR offsets are to be interpreted signed.  */
509       if (!cst_and_fits_in_hwi (arg1))
510 	return NULL;
511 
512       offset = int_cst_value (arg1);
513     }
514 
515   if (TREE_CODE (format) != ADDR_EXPR)
516     return NULL;
517 
518   *ploc = EXPR_LOC_OR_LOC (format, input_location);
519 
520   format = TREE_OPERAND (format, 0);
521 
522   if (TREE_CODE (format) == ARRAY_REF
523       && tree_fits_shwi_p (TREE_OPERAND (format, 1))
524       && (offset += tree_to_shwi (TREE_OPERAND (format, 1))) >= 0)
525     format = TREE_OPERAND (format, 0);
526 
527   if (offset < 0)
528     return NULL;
529 
530   tree array_init;
531   tree array_size = NULL_TREE;
532 
533   if (VAR_P (format)
534       && TREE_CODE (TREE_TYPE (format)) == ARRAY_TYPE
535       && (array_init = decl_constant_value (format)) != format
536       && TREE_CODE (array_init) == STRING_CST)
537     {
538       /* Extract the string constant initializer.  Note that this may
539 	 include a trailing NUL character that is not in the array (e.g.
540 	 const char a[3] = "foo";).  */
541       array_size = DECL_SIZE_UNIT (format);
542       format = array_init;
543     }
544 
545   if (TREE_CODE (format) != STRING_CST)
546     return NULL;
547 
548   tree type = TREE_TYPE (format);
549 
550   scalar_int_mode char_mode;
551   if (!is_int_mode (TYPE_MODE (TREE_TYPE (type)), &char_mode)
552       || GET_MODE_SIZE (char_mode) != 1)
553     {
554       /* Wide format string.  */
555       return NULL;
556     }
557 
558   const char *fmtstr = TREE_STRING_POINTER (format);
559   unsigned fmtlen = TREE_STRING_LENGTH (format);
560 
561   if (array_size)
562     {
563       /* Variable length arrays can't be initialized.  */
564       gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
565 
566       if (tree_fits_shwi_p (array_size))
567 	{
568 	  HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
569 	  if (array_size_value > 0
570 	      && array_size_value == (int) array_size_value
571 	      && fmtlen > array_size_value)
572 	    fmtlen = array_size_value;
573 	}
574     }
575   if (offset)
576     {
577       if (offset >= fmtlen)
578 	return NULL;
579 
580       fmtstr += offset;
581       fmtlen -= offset;
582     }
583 
584   if (fmtlen < 1 || fmtstr[--fmtlen] != 0)
585     {
586       /* FIXME: Diagnose an unterminated format string if it hasn't been
587 	 diagnosed by -Wformat.  Similarly to a null format pointer,
588 	 -Wformay diagnoses only nul pointer constants, this pass can
589 	 do better).  */
590       return NULL;
591     }
592 
593   return fmtstr;
594 }
595 
596 /* For convenience and brevity, shorter named entrypoints of
597    format_warning_at_substring and format_warning_at_substring_n.
598    These have to be functions with the attribute so that exgettext
599    works properly.  */
600 
601 static bool
602 ATTRIBUTE_GCC_DIAG (5, 6)
fmtwarn(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,const char * gmsgid,...)603 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
604 	 const char *corrected_substring, int opt, const char *gmsgid, ...)
605 {
606   va_list ap;
607   va_start (ap, gmsgid);
608   bool warned = format_warning_va (fmt_loc, param_loc, corrected_substring,
609 				   opt, gmsgid, &ap);
610   va_end (ap);
611 
612   return warned;
613 }
614 
615 static bool
616 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
fmtwarn_n(const substring_loc & fmt_loc,location_t param_loc,const char * corrected_substring,int opt,unsigned HOST_WIDE_INT n,const char * singular_gmsgid,const char * plural_gmsgid,...)617 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
618 	   const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
619 	   const char *singular_gmsgid, const char *plural_gmsgid, ...)
620 {
621   va_list ap;
622   va_start (ap, plural_gmsgid);
623   bool warned = format_warning_n_va (fmt_loc, param_loc, corrected_substring,
624 				     opt, n, singular_gmsgid, plural_gmsgid,
625 				     &ap);
626   va_end (ap);
627 
628   return warned;
629 }
630 
631 /* Format length modifiers.  */
632 
633 enum format_lengths
634 {
635   FMT_LEN_none,
636   FMT_LEN_hh,    // char argument
637   FMT_LEN_h,     // short
638   FMT_LEN_l,     // long
639   FMT_LEN_ll,    // long long
640   FMT_LEN_L,     // long double (and GNU long long)
641   FMT_LEN_z,     // size_t
642   FMT_LEN_t,     // ptrdiff_t
643   FMT_LEN_j      // intmax_t
644 };
645 
646 
647 /* Description of the result of conversion either of a single directive
648    or the whole format string.  */
649 
650 struct fmtresult
651 {
652   /* Construct a FMTRESULT object with all counters initialized
653      to MIN.  KNOWNRANGE is set when MIN is valid.  */
654   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
argminfmtresult655   : argmin (), argmax (),
656     knownrange (min < HOST_WIDE_INT_MAX),
657     nullp ()
658   {
659     range.min = min;
660     range.max = min;
661     range.likely = min;
662     range.unlikely = min;
663   }
664 
665   /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
666      KNOWNRANGE is set when both MIN and MAX are valid.   */
667   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
668 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
argminfmtresult669   : argmin (), argmax (),
670     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
671     nullp ()
672   {
673     range.min = min;
674     range.max = max;
675     range.likely = max < likely ? min : likely;
676     range.unlikely = max;
677   }
678 
679   /* Adjust result upward to reflect the RANGE of values the specified
680      width or precision is known to be in.  */
681   fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
682 					    tree = NULL_TREE,
683 					    unsigned = 0, unsigned = 0);
684 
685   /* Return the maximum number of decimal digits a value of TYPE
686      formats as on output.  */
687   static unsigned type_max_digits (tree, int);
688 
689   /* The range a directive's argument is in.  */
690   tree argmin, argmax;
691 
692   /* The minimum and maximum number of bytes that a directive
693      results in on output for an argument in the range above.  */
694   result_range range;
695 
696   /* True when the range above is obtained from a known value of
697      a directive's argument or its bounds and not the result of
698      heuristics that depend on warning levels.  */
699   bool knownrange;
700 
701   /* True when the argument is a null pointer.  */
702   bool nullp;
703 };
704 
705 /* Adjust result upward to reflect the range ADJUST of values the
706    specified width or precision is known to be in.  When non-null,
707    TYPE denotes the type of the directive whose result is being
708    adjusted, BASE gives the base of the directive (octal, decimal,
709    or hex), and ADJ denotes the additional adjustment to the LIKELY
710    counter that may need to be added when ADJUST is a range.  */
711 
712 fmtresult&
adjust_for_width_or_precision(const HOST_WIDE_INT adjust[2],tree type,unsigned base,unsigned adj)713 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
714 					  tree type /* = NULL_TREE */,
715 					  unsigned base /* = 0 */,
716 					  unsigned adj /* = 0 */)
717 {
718   bool minadjusted = false;
719 
720   /* Adjust the minimum and likely counters.  */
721   if (adjust[0] >= 0)
722     {
723       if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
724 	{
725 	  range.min = adjust[0];
726 	  minadjusted = true;
727 	}
728 
729       /* Adjust the likely counter.  */
730       if (range.likely < range.min)
731 	range.likely = range.min;
732     }
733   else if (adjust[0] == target_int_min ()
734 	   && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
735     knownrange = false;
736 
737   /* Adjust the maximum counter.  */
738   if (adjust[1] > 0)
739     {
740       if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
741 	{
742 	  range.max = adjust[1];
743 
744 	  /* Set KNOWNRANGE if both the minimum and maximum have been
745 	     adjusted.  Otherwise leave it at what it was before.  */
746 	  knownrange = minadjusted;
747 	}
748     }
749 
750   if (warn_level > 1 && type)
751     {
752       /* For large non-constant width or precision whose range spans
753 	 the maximum number of digits produced by the directive for
754 	 any argument, set the likely number of bytes to be at most
755 	 the number digits plus other adjustment determined by the
756 	 caller (one for sign or two for the hexadecimal "0x"
757 	 prefix).  */
758       unsigned dirdigs = type_max_digits (type, base);
759       if (adjust[0] < dirdigs && dirdigs < adjust[1]
760 	  && range.likely < dirdigs)
761 	range.likely = dirdigs + adj;
762     }
763   else if (range.likely < (range.min ? range.min : 1))
764     {
765       /* Conservatively, set LIKELY to at least MIN but no less than
766 	 1 unless MAX is zero.  */
767       range.likely = (range.min
768 		      ? range.min
769 		      : range.max && (range.max < HOST_WIDE_INT_MAX
770 				      || warn_level > 1) ? 1 : 0);
771     }
772 
773   /* Finally adjust the unlikely counter to be at least as large as
774      the maximum.  */
775   if (range.unlikely < range.max)
776     range.unlikely = range.max;
777 
778   return *this;
779 }
780 
781 /* Return the maximum number of digits a value of TYPE formats in
782    BASE on output, not counting base prefix .  */
783 
784 unsigned
type_max_digits(tree type,int base)785 fmtresult::type_max_digits (tree type, int base)
786 {
787   unsigned prec = TYPE_PRECISION (type);
788   switch (base)
789     {
790     case 8:
791       return (prec + 2) / 3;
792     case 10:
793       /* Decimal approximation: yields 3, 5, 10, and 20 for precision
794 	 of 8, 16, 32, and 64 bits.  */
795       return prec * 301 / 1000 + 1;
796     case 16:
797       return prec / 4;
798     }
799 
800   gcc_unreachable ();
801 }
802 
803 static bool
804 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
805 	       class vr_values *vr_values);
806 
807 /* Description of a format directive.  A directive is either a plain
808    string or a conversion specification that starts with '%'.  */
809 
810 struct directive
811 {
812   /* The 1-based directive number (for debugging).  */
813   unsigned dirno;
814 
815   /* The first character of the directive and its length.  */
816   const char *beg;
817   size_t len;
818 
819   /* A bitmap of flags, one for each character.  */
820   unsigned flags[256 / sizeof (int)];
821 
822   /* The range of values of the specified width, or -1 if not specified.  */
823   HOST_WIDE_INT width[2];
824   /* The range of values of the specified precision, or -1 if not
825      specified.  */
826   HOST_WIDE_INT prec[2];
827 
828   /* Length modifier.  */
829   format_lengths modifier;
830 
831   /* Format specifier character.  */
832   char specifier;
833 
834   /* The argument of the directive or null when the directive doesn't
835      take one or when none is available (such as for vararg functions).  */
836   tree arg;
837 
838   /* Format conversion function that given a directive and an argument
839      returns the formatting result.  */
840   fmtresult (*fmtfunc) (const directive &, tree, vr_values *);
841 
842   /* Return True when a the format flag CHR has been used.  */
get_flagdirective843   bool get_flag (char chr) const
844   {
845     unsigned char c = chr & 0xff;
846     return (flags[c / (CHAR_BIT * sizeof *flags)]
847 	    & (1U << (c % (CHAR_BIT * sizeof *flags))));
848   }
849 
850   /* Make a record of the format flag CHR having been used.  */
set_flagdirective851   void set_flag (char chr)
852   {
853     unsigned char c = chr & 0xff;
854     flags[c / (CHAR_BIT * sizeof *flags)]
855       |= (1U << (c % (CHAR_BIT * sizeof *flags)));
856   }
857 
858   /* Reset the format flag CHR.  */
clear_flagdirective859   void clear_flag (char chr)
860   {
861     unsigned char c = chr & 0xff;
862     flags[c / (CHAR_BIT * sizeof *flags)]
863       &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
864   }
865 
866   /* Set both bounds of the width range to VAL.  */
set_widthdirective867   void set_width (HOST_WIDE_INT val)
868   {
869     width[0] = width[1] = val;
870   }
871 
872   /* Set the width range according to ARG, with both bounds being
873      no less than 0.  For a constant ARG set both bounds to its value
874      or 0, whichever is greater.  For a non-constant ARG in some range
875      set width to its range adjusting each bound to -1 if it's less.
876      For an indeterminate ARG set width to [0, INT_MAX].  */
set_widthdirective877   void set_width (tree arg, vr_values *vr_values)
878   {
879     get_int_range (arg, width, width + 1, true, 0, vr_values);
880   }
881 
882   /* Set both bounds of the precision range to VAL.  */
set_precisiondirective883   void set_precision (HOST_WIDE_INT val)
884   {
885     prec[0] = prec[1] = val;
886   }
887 
888   /* Set the precision range according to ARG, with both bounds being
889      no less than -1.  For a constant ARG set both bounds to its value
890      or -1 whichever is greater.  For a non-constant ARG in some range
891      set precision to its range adjusting each bound to -1 if it's less.
892      For an indeterminate ARG set precision to [-1, INT_MAX].  */
set_precisiondirective893   void set_precision (tree arg, vr_values *vr_values)
894   {
895     get_int_range (arg, prec, prec + 1, false, -1, vr_values);
896   }
897 
898   /* Return true if both width and precision are known to be
899      either constant or in some range, false otherwise.  */
known_width_and_precisiondirective900   bool known_width_and_precision () const
901   {
902     return ((width[1] < 0
903 	     || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
904 	    && (prec[1] < 0
905 		|| (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
906   }
907 };
908 
909 /* Return the logarithm of X in BASE.  */
910 
911 static int
ilog(unsigned HOST_WIDE_INT x,int base)912 ilog (unsigned HOST_WIDE_INT x, int base)
913 {
914   int res = 0;
915   do
916     {
917       ++res;
918       x /= base;
919     } while (x);
920   return res;
921 }
922 
923 /* Return the number of bytes resulting from converting into a string
924    the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
925    PLUS indicates whether 1 for a plus sign should be added for positive
926    numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
927    ('0x') prefix should be added for nonzero numbers.  Return -1 if X cannot
928    be represented.  */
929 
930 static HOST_WIDE_INT
tree_digits(tree x,int base,HOST_WIDE_INT prec,bool plus,bool prefix)931 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
932 {
933   unsigned HOST_WIDE_INT absval;
934 
935   HOST_WIDE_INT res;
936 
937   if (TYPE_UNSIGNED (TREE_TYPE (x)))
938     {
939       if (tree_fits_uhwi_p (x))
940 	{
941 	  absval = tree_to_uhwi (x);
942 	  res = plus;
943 	}
944       else
945 	return -1;
946     }
947   else
948     {
949       if (tree_fits_shwi_p (x))
950 	{
951 	  HOST_WIDE_INT i = tree_to_shwi (x);
952          if (HOST_WIDE_INT_MIN == i)
953            {
954              /* Avoid undefined behavior due to negating a minimum.  */
955              absval = HOST_WIDE_INT_MAX;
956              res = 1;
957            }
958          else if (i < 0)
959 	   {
960 	     absval = -i;
961 	     res = 1;
962 	   }
963 	 else
964 	   {
965 	     absval = i;
966 	     res = plus;
967 	   }
968 	}
969       else
970 	return -1;
971     }
972 
973   int ndigs = ilog (absval, base);
974 
975   res += prec < ndigs ? ndigs : prec;
976 
977   /* Adjust a non-zero value for the base prefix, either hexadecimal,
978      or, unless precision has resulted in a leading zero, also octal.  */
979   if (prefix && absval && (base == 16 || prec <= ndigs))
980     {
981       if (base == 8)
982 	res += 1;
983       else if (base == 16)
984 	res += 2;
985     }
986 
987   return res;
988 }
989 
990 /* Given the formatting result described by RES and NAVAIL, the number
991    of available in the destination, return the range of bytes remaining
992    in the destination.  */
993 
994 static inline result_range
bytes_remaining(unsigned HOST_WIDE_INT navail,const format_result & res)995 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
996 {
997   result_range range;
998 
999   if (HOST_WIDE_INT_MAX <= navail)
1000     {
1001       range.min = range.max = range.likely = range.unlikely = navail;
1002       return range;
1003     }
1004 
1005   /* The lower bound of the available range is the available size
1006      minus the maximum output size, and the upper bound is the size
1007      minus the minimum.  */
1008   range.max = res.range.min < navail ? navail - res.range.min : 0;
1009 
1010   range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
1011 
1012   if (res.range.max < HOST_WIDE_INT_MAX)
1013     range.min = res.range.max < navail ? navail - res.range.max : 0;
1014   else
1015     range.min = range.likely;
1016 
1017   range.unlikely = (res.range.unlikely < navail
1018 		    ? navail - res.range.unlikely : 0);
1019 
1020   return range;
1021 }
1022 
1023 /* Description of a call to a formatted function.  */
1024 
1025 struct sprintf_dom_walker::call_info
1026 {
1027   /* Function call statement.  */
1028   gimple *callstmt;
1029 
1030   /* Function called.  */
1031   tree func;
1032 
1033   /* Called built-in function code.  */
1034   built_in_function fncode;
1035 
1036   /* Format argument and format string extracted from it.  */
1037   tree format;
1038   const char *fmtstr;
1039 
1040   /* The location of the format argument.  */
1041   location_t fmtloc;
1042 
1043   /* The destination object size for __builtin___xxx_chk functions
1044      typically determined by __builtin_object_size, or -1 if unknown.  */
1045   unsigned HOST_WIDE_INT objsize;
1046 
1047   /* Number of the first variable argument.  */
1048   unsigned HOST_WIDE_INT argidx;
1049 
1050   /* True for functions like snprintf that specify the size of
1051      the destination, false for others like sprintf that don't.  */
1052   bool bounded;
1053 
1054   /* True for bounded functions like snprintf that specify a zero-size
1055      buffer as a request to compute the size of output without actually
1056      writing any.  NOWRITE is cleared in response to the %n directive
1057      which has side-effects similar to writing output.  */
1058   bool nowrite;
1059 
1060   /* Return true if the called function's return value is used.  */
retval_usedcall_info1061   bool retval_used () const
1062   {
1063     return gimple_get_lhs (callstmt);
1064   }
1065 
1066   /* Return the warning option corresponding to the called function.  */
warnoptcall_info1067   int warnopt () const
1068   {
1069     return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
1070   }
1071 };
1072 
1073 /* Return the result of formatting a no-op directive (such as '%n').  */
1074 
1075 static fmtresult
format_none(const directive &,tree,vr_values *)1076 format_none (const directive &, tree, vr_values *)
1077 {
1078   fmtresult res (0);
1079   return res;
1080 }
1081 
1082 /* Return the result of formatting the '%%' directive.  */
1083 
1084 static fmtresult
format_percent(const directive &,tree,vr_values *)1085 format_percent (const directive &, tree, vr_values *)
1086 {
1087   fmtresult res (1);
1088   return res;
1089 }
1090 
1091 
1092 /* Compute intmax_type_node and uintmax_type_node similarly to how
1093    tree.c builds size_type_node.  */
1094 
1095 static void
build_intmax_type_nodes(tree * pintmax,tree * puintmax)1096 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
1097 {
1098   if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
1099     {
1100       *pintmax = integer_type_node;
1101       *puintmax = unsigned_type_node;
1102     }
1103   else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
1104     {
1105       *pintmax = long_integer_type_node;
1106       *puintmax = long_unsigned_type_node;
1107     }
1108   else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1109     {
1110       *pintmax = long_long_integer_type_node;
1111       *puintmax = long_long_unsigned_type_node;
1112     }
1113   else
1114     {
1115       for (int i = 0; i < NUM_INT_N_ENTS; i++)
1116 	if (int_n_enabled_p[i])
1117 	  {
1118 	    char name[50];
1119 	    sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1120 
1121 	    if (strcmp (name, UINTMAX_TYPE) == 0)
1122 	      {
1123 	        *pintmax = int_n_trees[i].signed_type;
1124 	        *puintmax = int_n_trees[i].unsigned_type;
1125 		return;
1126 	      }
1127 	  }
1128       gcc_unreachable ();
1129     }
1130 }
1131 
1132 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1133    in and that is representable in type int.
1134    Return true when the range is a subrange of that of int.
1135    When ARG is null it is as if it had the full range of int.
1136    When ABSOLUTE is true the range reflects the absolute value of
1137    the argument.  When ABSOLUTE is false, negative bounds of
1138    the determined range are replaced with NEGBOUND.  */
1139 
1140 static bool
get_int_range(tree arg,HOST_WIDE_INT * pmin,HOST_WIDE_INT * pmax,bool absolute,HOST_WIDE_INT negbound,class vr_values * vr_values)1141 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1142 	       bool absolute, HOST_WIDE_INT negbound,
1143 	       class vr_values *vr_values)
1144 {
1145   /* The type of the result.  */
1146   const_tree type = integer_type_node;
1147 
1148   bool knownrange = false;
1149 
1150   if (!arg)
1151     {
1152       *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1153       *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1154     }
1155   else if (TREE_CODE (arg) == INTEGER_CST
1156 	   && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1157     {
1158       /* For a constant argument return its value adjusted as specified
1159 	 by NEGATIVE and NEGBOUND and return true to indicate that the
1160 	 result is known.  */
1161       *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1162       *pmax = *pmin;
1163       knownrange = true;
1164     }
1165   else
1166     {
1167       /* True if the argument's range cannot be determined.  */
1168       bool unknown = true;
1169 
1170       tree argtype = TREE_TYPE (arg);
1171 
1172       /* Ignore invalid arguments with greater precision that that
1173 	 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1174 	 They will have been detected and diagnosed by -Wformat and
1175 	 so it's not important to complicate this code to try to deal
1176 	 with them again.  */
1177       if (TREE_CODE (arg) == SSA_NAME
1178 	  && INTEGRAL_TYPE_P (argtype)
1179 	  && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1180 	{
1181 	  /* Try to determine the range of values of the integer argument.  */
1182 	  value_range *vr = vr_values->get_value_range (arg);
1183 	  if (vr->type == VR_RANGE
1184 	      && TREE_CODE (vr->min) == INTEGER_CST
1185 	      && TREE_CODE (vr->max) == INTEGER_CST)
1186 	    {
1187 	      HOST_WIDE_INT type_min
1188 		= (TYPE_UNSIGNED (argtype)
1189 		   ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1190 		   : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1191 
1192 	      HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1193 
1194 	      *pmin = TREE_INT_CST_LOW (vr->min);
1195 	      *pmax = TREE_INT_CST_LOW (vr->max);
1196 
1197 	      if (*pmin < *pmax)
1198 		{
1199 		  /* Return true if the adjusted range is a subrange of
1200 		     the full range of the argument's type.  *PMAX may
1201 		     be less than *PMIN when the argument is unsigned
1202 		     and its upper bound is in excess of TYPE_MAX.  In
1203 		     that (invalid) case disregard the range and use that
1204 		     of the expected type instead.  */
1205 		  knownrange = type_min < *pmin || *pmax < type_max;
1206 
1207 		  unknown = false;
1208 		}
1209 	    }
1210 	}
1211 
1212       /* Handle an argument with an unknown range as if none had been
1213 	 provided.  */
1214       if (unknown)
1215 	return get_int_range (NULL_TREE, pmin, pmax, absolute,
1216 			      negbound, vr_values);
1217     }
1218 
1219   /* Adjust each bound as specified by ABSOLUTE and NEGBOUND.  */
1220   if (absolute)
1221     {
1222       if (*pmin < 0)
1223 	{
1224 	  if (*pmin == *pmax)
1225 	    *pmin = *pmax = -*pmin;
1226 	  else
1227 	    {
1228 	      /* Make sure signed overlow is avoided.  */
1229 	      gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1230 
1231 	      HOST_WIDE_INT tmp = -*pmin;
1232 	      *pmin = 0;
1233 	      if (*pmax < tmp)
1234 		*pmax = tmp;
1235 	    }
1236 	}
1237     }
1238   else if (*pmin < negbound)
1239     *pmin = negbound;
1240 
1241   return knownrange;
1242 }
1243 
1244 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1245    argument, due to the conversion from either *ARGMIN or *ARGMAX to
1246    the type of the directive's formal argument it's possible for both
1247    to result in the same number of bytes or a range of bytes that's
1248    less than the number of bytes that would result from formatting
1249    some other value in the range [*ARGMIN, *ARGMAX].  This can be
1250    determined by checking for the actual argument being in the range
1251    of the type of the directive.  If it isn't it must be assumed to
1252    take on the full range of the directive's type.
1253    Return true when the range has been adjusted to the full range
1254    of DIRTYPE, and false otherwise.  */
1255 
1256 static bool
adjust_range_for_overflow(tree dirtype,tree * argmin,tree * argmax)1257 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1258 {
1259   tree argtype = TREE_TYPE (*argmin);
1260   unsigned argprec = TYPE_PRECISION (argtype);
1261   unsigned dirprec = TYPE_PRECISION (dirtype);
1262 
1263   /* If the actual argument and the directive's argument have the same
1264      precision and sign there can be no overflow and so there is nothing
1265      to adjust.  */
1266   if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1267     return false;
1268 
1269   /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1270      branch in the extract_range_from_unary_expr function in tree-vrp.c.  */
1271 
1272   if (TREE_CODE (*argmin) == INTEGER_CST
1273       && TREE_CODE (*argmax) == INTEGER_CST
1274       && (dirprec >= argprec
1275 	  || integer_zerop (int_const_binop (RSHIFT_EXPR,
1276 					     int_const_binop (MINUS_EXPR,
1277 							      *argmax,
1278 							      *argmin),
1279 					     size_int (dirprec)))))
1280     {
1281       *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1282       *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1283 
1284       /* If *ARGMIN is still less than *ARGMAX the conversion above
1285 	 is safe.  Otherwise, it has overflowed and would be unsafe.  */
1286       if (tree_int_cst_le (*argmin, *argmax))
1287 	return false;
1288     }
1289 
1290   *argmin = TYPE_MIN_VALUE (dirtype);
1291   *argmax = TYPE_MAX_VALUE (dirtype);
1292   return true;
1293 }
1294 
1295 /* Return a range representing the minimum and maximum number of bytes
1296    that the format directive DIR will output for any argument given
1297    the WIDTH and PRECISION (extracted from DIR).  This function is
1298    used when the directive argument or its value isn't known.  */
1299 
1300 static fmtresult
format_integer(const directive & dir,tree arg,vr_values * vr_values)1301 format_integer (const directive &dir, tree arg, vr_values *vr_values)
1302 {
1303   tree intmax_type_node;
1304   tree uintmax_type_node;
1305 
1306   /* Base to format the number in.  */
1307   int base;
1308 
1309   /* True when a conversion is preceded by a prefix indicating the base
1310      of the argument (octal or hexadecimal).  */
1311   bool maybebase = dir.get_flag ('#');
1312 
1313   /* True when a signed conversion is preceded by a sign or space.  */
1314   bool maybesign = false;
1315 
1316   /* True for signed conversions (i.e., 'd' and 'i').  */
1317   bool sign = false;
1318 
1319   switch (dir.specifier)
1320     {
1321     case 'd':
1322     case 'i':
1323       /* Space and '+' are  only meaningful for signed conversions.  */
1324       maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1325       sign = true;
1326       base = 10;
1327       break;
1328     case 'u':
1329       base = 10;
1330       break;
1331     case 'o':
1332       base = 8;
1333       break;
1334     case 'X':
1335     case 'x':
1336       base = 16;
1337       break;
1338     default:
1339       gcc_unreachable ();
1340     }
1341 
1342   /* The type of the "formal" argument expected by the directive.  */
1343   tree dirtype = NULL_TREE;
1344 
1345   /* Determine the expected type of the argument from the length
1346      modifier.  */
1347   switch (dir.modifier)
1348     {
1349     case FMT_LEN_none:
1350       if (dir.specifier == 'p')
1351 	dirtype = ptr_type_node;
1352       else
1353 	dirtype = sign ? integer_type_node : unsigned_type_node;
1354       break;
1355 
1356     case FMT_LEN_h:
1357       dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1358       break;
1359 
1360     case FMT_LEN_hh:
1361       dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1362       break;
1363 
1364     case FMT_LEN_l:
1365       dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1366       break;
1367 
1368     case FMT_LEN_L:
1369     case FMT_LEN_ll:
1370       dirtype = (sign
1371 		 ? long_long_integer_type_node
1372 		 : long_long_unsigned_type_node);
1373       break;
1374 
1375     case FMT_LEN_z:
1376       dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1377       break;
1378 
1379     case FMT_LEN_t:
1380       dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1381       break;
1382 
1383     case FMT_LEN_j:
1384       build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1385       dirtype = sign ? intmax_type_node : uintmax_type_node;
1386       break;
1387 
1388     default:
1389       return fmtresult ();
1390     }
1391 
1392   /* The type of the argument to the directive, either deduced from
1393      the actual non-constant argument if one is known, or from
1394      the directive itself when none has been provided because it's
1395      a va_list.  */
1396   tree argtype = NULL_TREE;
1397 
1398   if (!arg)
1399     {
1400       /* When the argument has not been provided, use the type of
1401 	 the directive's argument as an approximation.  This will
1402 	 result in false positives for directives like %i with
1403 	 arguments with smaller precision (such as short or char).  */
1404       argtype = dirtype;
1405     }
1406   else if (TREE_CODE (arg) == INTEGER_CST)
1407     {
1408       /* When a constant argument has been provided use its value
1409 	 rather than type to determine the length of the output.  */
1410       fmtresult res;
1411 
1412       if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1413 	{
1414 	  /* As a special case, a precision of zero with a zero argument
1415 	     results in zero bytes except in base 8 when the '#' flag is
1416 	     specified, and for signed conversions in base 8 and 10 when
1417 	     either the space or '+' flag has been specified and it results
1418 	     in just one byte (with width having the normal effect).  This
1419 	     must extend to the case of a specified precision with
1420 	     an unknown value because it can be zero.  */
1421 	  res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1422 	  if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1423 	    {
1424 	      res.range.max = 1;
1425 	      res.range.likely = 1;
1426 	    }
1427 	  else
1428 	    {
1429 	      res.range.max = res.range.min;
1430 	      res.range.likely = res.range.min;
1431 	    }
1432 	}
1433       else
1434 	{
1435 	  /* Convert the argument to the type of the directive.  */
1436 	  arg = fold_convert (dirtype, arg);
1437 
1438 	  res.range.min = tree_digits (arg, base, dir.prec[0],
1439 				       maybesign, maybebase);
1440 	  if (dir.prec[0] == dir.prec[1])
1441 	    res.range.max = res.range.min;
1442 	  else
1443 	    res.range.max = tree_digits (arg, base, dir.prec[1],
1444 					 maybesign, maybebase);
1445 	  res.range.likely = res.range.min;
1446 	  res.knownrange = true;
1447 	}
1448 
1449       res.range.unlikely = res.range.max;
1450 
1451       /* Bump up the counters if WIDTH is greater than LEN.  */
1452       res.adjust_for_width_or_precision (dir.width, dirtype, base,
1453 					 (sign | maybebase) + (base == 16));
1454       /* Bump up the counters again if PRECision is greater still.  */
1455       res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1456 					 (sign | maybebase) + (base == 16));
1457 
1458       return res;
1459     }
1460   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1461 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1462     /* Determine the type of the provided non-constant argument.  */
1463     argtype = TREE_TYPE (arg);
1464   else
1465     /* Don't bother with invalid arguments since they likely would
1466        have already been diagnosed, and disable any further checking
1467        of the format string by returning [-1, -1].  */
1468     return fmtresult ();
1469 
1470   fmtresult res;
1471 
1472   /* Using either the range the non-constant argument is in, or its
1473      type (either "formal" or actual), create a range of values that
1474      constrain the length of output given the warning level.  */
1475   tree argmin = NULL_TREE;
1476   tree argmax = NULL_TREE;
1477 
1478   if (arg
1479       && TREE_CODE (arg) == SSA_NAME
1480       && INTEGRAL_TYPE_P (argtype))
1481     {
1482       /* Try to determine the range of values of the integer argument
1483 	 (range information is not available for pointers).  */
1484       value_range *vr = vr_values->get_value_range (arg);
1485       if (vr->type == VR_RANGE
1486 	  && TREE_CODE (vr->min) == INTEGER_CST
1487 	  && TREE_CODE (vr->max) == INTEGER_CST)
1488 	{
1489 	  argmin = vr->min;
1490 	  argmax = vr->max;
1491 
1492 	  /* Set KNOWNRANGE if the argument is in a known subrange
1493 	     of the directive's type and neither width nor precision
1494 	     is unknown.  (KNOWNRANGE may be reset below).  */
1495 	  res.knownrange
1496 	    = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1497 		|| !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1498 	       && dir.known_width_and_precision ());
1499 
1500 	  res.argmin = argmin;
1501 	  res.argmax = argmax;
1502 	}
1503       else if (vr->type == VR_ANTI_RANGE)
1504 	{
1505 	  /* Handle anti-ranges if/when bug 71690 is resolved.  */
1506 	}
1507       else if (vr->type == VR_VARYING
1508 	       || vr->type == VR_UNDEFINED)
1509 	{
1510 	  /* The argument here may be the result of promoting the actual
1511 	     argument to int.  Try to determine the type of the actual
1512 	     argument before promotion and narrow down its range that
1513 	     way.  */
1514 	  gimple *def = SSA_NAME_DEF_STMT (arg);
1515 	  if (is_gimple_assign (def))
1516 	    {
1517 	      tree_code code = gimple_assign_rhs_code (def);
1518 	      if (code == INTEGER_CST)
1519 		{
1520 		  arg = gimple_assign_rhs1 (def);
1521 		  return format_integer (dir, arg, vr_values);
1522 		}
1523 
1524 	      if (code == NOP_EXPR)
1525 		{
1526 		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1527 		  if (INTEGRAL_TYPE_P (type)
1528 		      || TREE_CODE (type) == POINTER_TYPE)
1529 		    argtype = type;
1530 		}
1531 	    }
1532 	}
1533     }
1534 
1535   if (!argmin)
1536     {
1537       if (TREE_CODE (argtype) == POINTER_TYPE)
1538 	{
1539 	  argmin = build_int_cst (pointer_sized_int_node, 0);
1540 	  argmax = build_all_ones_cst (pointer_sized_int_node);
1541 	}
1542       else
1543 	{
1544 	  argmin = TYPE_MIN_VALUE (argtype);
1545 	  argmax = TYPE_MAX_VALUE (argtype);
1546 	}
1547     }
1548 
1549   /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1550      of the directive.  If it has been cleared then since ARGMIN and/or
1551      ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1552      ARGMAX in the result to include in diagnostics.  */
1553   if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1554     {
1555       res.knownrange = false;
1556       res.argmin = argmin;
1557       res.argmax = argmax;
1558     }
1559 
1560   /* Recursively compute the minimum and maximum from the known range.  */
1561   if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1562     {
1563       /* For unsigned conversions/directives or signed when
1564 	 the minimum is positive, use the minimum and maximum to compute
1565 	 the shortest and longest output, respectively.  */
1566       res.range.min = format_integer (dir, argmin, vr_values).range.min;
1567       res.range.max = format_integer (dir, argmax, vr_values).range.max;
1568     }
1569   else if (tree_int_cst_sgn (argmax) < 0)
1570     {
1571       /* For signed conversions/directives if maximum is negative,
1572 	 use the minimum as the longest output and maximum as the
1573 	 shortest output.  */
1574       res.range.min = format_integer (dir, argmax, vr_values).range.min;
1575       res.range.max = format_integer (dir, argmin, vr_values).range.max;
1576     }
1577   else
1578     {
1579       /* Otherwise, 0 is inside of the range and minimum negative.  Use 0
1580 	 as the shortest output and for the longest output compute the
1581 	 length of the output of both minimum and maximum and pick the
1582 	 longer.  */
1583       unsigned HOST_WIDE_INT max1
1584 	= format_integer (dir, argmin, vr_values).range.max;
1585       unsigned HOST_WIDE_INT max2
1586 	= format_integer (dir, argmax, vr_values).range.max;
1587       res.range.min
1588 	= format_integer (dir, integer_zero_node, vr_values).range.min;
1589       res.range.max = MAX (max1, max2);
1590     }
1591 
1592   /* If the range is known, use the maximum as the likely length.  */
1593   if (res.knownrange)
1594     res.range.likely = res.range.max;
1595   else
1596     {
1597       /* Otherwise, use the minimum.  Except for the case where for %#x or
1598          %#o the minimum is just for a single value in the range (0) and
1599          for all other values it is something longer, like 0x1 or 01.
1600 	  Use the length for value 1 in that case instead as the likely
1601 	  length.  */
1602       res.range.likely = res.range.min;
1603       if (maybebase
1604 	  && base != 10
1605 	  && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1606 	{
1607 	  if (res.range.min == 1)
1608 	    res.range.likely += base == 8 ? 1 : 2;
1609 	  else if (res.range.min == 2
1610 		   && base == 16
1611 		   && (dir.width[0] == 2 || dir.prec[0] == 2))
1612 	    ++res.range.likely;
1613 	}
1614     }
1615 
1616   res.range.unlikely = res.range.max;
1617   res.adjust_for_width_or_precision (dir.width, dirtype, base,
1618 				     (sign | maybebase) + (base == 16));
1619   res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1620 				     (sign | maybebase) + (base == 16));
1621 
1622   return res;
1623 }
1624 
1625 /* Return the number of bytes that a format directive consisting of FLAGS,
1626    PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1627    would result for argument X under ideal conditions (i.e., if PREC
1628    weren't excessive).  MPFR 3.1 allocates large amounts of memory for
1629    values of PREC with large magnitude and can fail (see MPFR bug #21056).
1630    This function works around those problems.  */
1631 
1632 static unsigned HOST_WIDE_INT
get_mpfr_format_length(mpfr_ptr x,const char * flags,HOST_WIDE_INT prec,char spec,char rndspec)1633 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1634 			char spec, char rndspec)
1635 {
1636   char fmtstr[40];
1637 
1638   HOST_WIDE_INT len = strlen (flags);
1639 
1640   fmtstr[0] = '%';
1641   memcpy (fmtstr + 1, flags, len);
1642   memcpy (fmtstr + 1 + len, ".*R", 3);
1643   fmtstr[len + 4] = rndspec;
1644   fmtstr[len + 5] = spec;
1645   fmtstr[len + 6] = '\0';
1646 
1647   spec = TOUPPER (spec);
1648   if (spec == 'E' || spec == 'F')
1649     {
1650       /* For %e, specify the precision explicitly since mpfr_sprintf
1651 	 does its own thing just to be different (see MPFR bug 21088).  */
1652       if (prec < 0)
1653 	prec = 6;
1654     }
1655   else
1656     {
1657       /* Avoid passing negative precisions with larger magnitude to MPFR
1658 	 to avoid exposing its bugs.  (A negative precision is supposed
1659 	 to be ignored.)  */
1660       if (prec < 0)
1661 	prec = -1;
1662     }
1663 
1664   HOST_WIDE_INT p = prec;
1665 
1666   if (spec == 'G' && !strchr (flags, '#'))
1667     {
1668       /* For G/g without the pound flag, precision gives the maximum number
1669 	 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1670 	 a 128 bit IEEE extended precision, 4932.  Using twice as much here
1671 	 should be more than sufficient for any real format.  */
1672       if ((IEEE_MAX_10_EXP * 2) < prec)
1673 	prec = IEEE_MAX_10_EXP * 2;
1674       p = prec;
1675     }
1676   else
1677     {
1678       /* Cap precision arbitrarily at 1KB and add the difference
1679 	 (if any) to the MPFR result.  */
1680       if (prec > 1024)
1681 	p = 1024;
1682     }
1683 
1684   len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1685 
1686   /* Handle the unlikely (impossible?) error by returning more than
1687      the maximum dictated by the function's return type.  */
1688   if (len < 0)
1689     return target_dir_max () + 1;
1690 
1691   /* Adjust the return value by the difference.  */
1692   if (p < prec)
1693     len += prec - p;
1694 
1695   return len;
1696 }
1697 
1698 /* Return the number of bytes to format using the format specifier
1699    SPEC and the precision PREC the largest value in the real floating
1700    TYPE.  */
1701 
1702 static unsigned HOST_WIDE_INT
format_floating_max(tree type,char spec,HOST_WIDE_INT prec)1703 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1704 {
1705   machine_mode mode = TYPE_MODE (type);
1706 
1707   /* IBM Extended mode.  */
1708   if (MODE_COMPOSITE_P (mode))
1709     mode = DFmode;
1710 
1711   /* Get the real type format desription for the target.  */
1712   const real_format *rfmt = REAL_MODE_FORMAT (mode);
1713   REAL_VALUE_TYPE rv;
1714 
1715   real_maxval (&rv, 0, mode);
1716 
1717   /* Convert the GCC real value representation with the precision
1718      of the real type to the mpfr_t format with the GCC default
1719      round-to-nearest mode.  */
1720   mpfr_t x;
1721   mpfr_init2 (x, rfmt->p);
1722   mpfr_from_real (x, &rv, GMP_RNDN);
1723 
1724   /* Return a value one greater to account for the leading minus sign.  */
1725   unsigned HOST_WIDE_INT r
1726     = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1727   mpfr_clear (x);
1728   return r;
1729 }
1730 
1731 /* Return a range representing the minimum and maximum number of bytes
1732    that the directive DIR will output for any argument.  PREC gives
1733    the adjusted precision range to account for negative precisions
1734    meaning the default 6.  This function is used when the directive
1735    argument or its value isn't known.  */
1736 
1737 static fmtresult
format_floating(const directive & dir,const HOST_WIDE_INT prec[2])1738 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1739 {
1740   tree type;
1741 
1742   switch (dir.modifier)
1743     {
1744     case FMT_LEN_l:
1745     case FMT_LEN_none:
1746       type = double_type_node;
1747       break;
1748 
1749     case FMT_LEN_L:
1750       type = long_double_type_node;
1751       break;
1752 
1753     case FMT_LEN_ll:
1754       type = long_double_type_node;
1755       break;
1756 
1757     default:
1758       return fmtresult ();
1759     }
1760 
1761   /* The minimum and maximum number of bytes produced by the directive.  */
1762   fmtresult res;
1763 
1764   /* The minimum output as determined by flags.  It's always at least 1.
1765      When plus or space are set the output is preceded by either a sign
1766      or a space.  */
1767   unsigned flagmin = (1 /* for the first digit */
1768 		      + (dir.get_flag ('+') | dir.get_flag (' ')));
1769 
1770   /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1771      for the plus sign/space with the '+' and ' ' flags, respectively,
1772      unless reduced below.  */
1773   res.range.min = 2 + flagmin;
1774 
1775   /* When the pound flag is set the decimal point is included in output
1776      regardless of precision.  Whether or not a decimal point is included
1777      otherwise depends on the specification and precision.  */
1778   bool radix = dir.get_flag ('#');
1779 
1780   switch (dir.specifier)
1781     {
1782     case 'A':
1783     case 'a':
1784       {
1785 	HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1786 	if (dir.prec[0] <= 0)
1787 	  minprec = 0;
1788 	else if (dir.prec[0] > 0)
1789 	  minprec = dir.prec[0] + !radix /* decimal point */;
1790 
1791 	res.range.likely = (2 /* 0x */
1792 			    + flagmin
1793 			    + radix
1794 			    + minprec
1795 			    + 3 /* p+0 */);
1796 
1797 	res.range.max = format_floating_max (type, 'a', prec[1]);
1798 
1799 	/* The unlikely maximum accounts for the longest multibyte
1800 	   decimal point character.  */
1801 	res.range.unlikely = res.range.max;
1802 	if (dir.prec[1] > 0)
1803 	  res.range.unlikely += target_mb_len_max () - 1;
1804 
1805 	break;
1806       }
1807 
1808     case 'E':
1809     case 'e':
1810       {
1811 	/* Minimum output attributable to precision and, when it's
1812 	   non-zero, decimal point.  */
1813 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1814 
1815 	/* The likely minimum output is "[-+]1.234567e+00" regardless
1816 	   of the value of the actual argument.  */
1817 	res.range.likely = (flagmin
1818 			    + radix
1819 			    + minprec
1820 			    + 2 /* e+ */ + 2);
1821 
1822 	res.range.max = format_floating_max (type, 'e', prec[1]);
1823 
1824 	/* The unlikely maximum accounts for the longest multibyte
1825 	   decimal point character.  */
1826 	if (dir.prec[0] != dir.prec[1]
1827 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1828 	  res.range.unlikely = res.range.max + target_mb_len_max () -1;
1829 	else
1830 	  res.range.unlikely = res.range.max;
1831 	break;
1832       }
1833 
1834     case 'F':
1835     case 'f':
1836       {
1837 	/* Minimum output attributable to precision and, when it's non-zero,
1838 	   decimal point.  */
1839 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1840 
1841 	/* For finite numbers (i.e., not infinity or NaN) the lower bound
1842 	   when precision isn't specified is 8 bytes ("1.23456" since
1843 	   precision is taken to be 6).  When precision is zero, the lower
1844 	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
1845 	   than zero, then the lower bound is 2 plus precision (plus flags).
1846 	   But in all cases, the lower bound is no greater than 3.  */
1847 	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1848 	if (min < res.range.min)
1849 	  res.range.min = min;
1850 
1851 	/* Compute the upper bound for -TYPE_MAX.  */
1852 	res.range.max = format_floating_max (type, 'f', prec[1]);
1853 
1854 	/* The minimum output with unknown precision is a single byte
1855 	   (e.g., "0") but the more likely output is 3 bytes ("0.0").  */
1856 	if (dir.prec[0] < 0 && dir.prec[1] > 0)
1857 	  res.range.likely = 3;
1858 	else
1859 	  res.range.likely = min;
1860 
1861 	/* The unlikely maximum accounts for the longest multibyte
1862 	   decimal point character.  */
1863 	if (dir.prec[0] != dir.prec[1]
1864 	    || dir.prec[0] == -1 || dir.prec[0] > 0)
1865 	  res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1866 	break;
1867       }
1868 
1869     case 'G':
1870     case 'g':
1871       {
1872 	/* The %g output depends on precision and the exponent of
1873 	   the argument.  Since the value of the argument isn't known
1874 	   the lower bound on the range of bytes (not counting flags
1875 	   or width) is 1 plus radix (i.e., either "0" or "0." for
1876 	   "%g" and "%#g", respectively, with a zero argument).  */
1877 	unsigned HOST_WIDE_INT min = flagmin + radix;
1878 	if (min < res.range.min)
1879 	  res.range.min = min;
1880 
1881 	char spec = 'g';
1882 	HOST_WIDE_INT maxprec = dir.prec[1];
1883 	if (radix && maxprec)
1884 	  {
1885 	    /* When the pound flag (radix) is set, trailing zeros aren't
1886 	       trimmed and so the longest output is the same as for %e,
1887 	       except with precision minus 1 (as specified in C11).  */
1888 	    spec = 'e';
1889 	    if (maxprec > 0)
1890 	      --maxprec;
1891 	    else if (maxprec < 0)
1892 	      maxprec = 5;
1893 	  }
1894 	else
1895 	  maxprec = prec[1];
1896 
1897 	res.range.max = format_floating_max (type, spec, maxprec);
1898 
1899 	/* The likely output is either the maximum computed above
1900 	   minus 1 (assuming the maximum is positive) when precision
1901 	   is known (or unspecified), or the same minimum as for %e
1902 	   (which is computed for a non-negative argument).  Unlike
1903 	   for the other specifiers above the likely output isn't
1904 	   the minimum because for %g that's 1 which is unlikely.  */
1905 	if (dir.prec[1] < 0
1906 	    || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1907 	  res.range.likely = res.range.max - 1;
1908 	else
1909 	  {
1910 	    HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1911 	    res.range.likely = (flagmin
1912 				+ radix
1913 				+ minprec
1914 				+ 2 /* e+ */ + 2);
1915 	  }
1916 
1917 	/* The unlikely maximum accounts for the longest multibyte
1918 	   decimal point character.  */
1919 	res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1920 	break;
1921       }
1922 
1923     default:
1924       return fmtresult ();
1925     }
1926 
1927   /* Bump up the byte counters if WIDTH is greater.  */
1928   res.adjust_for_width_or_precision (dir.width);
1929   return res;
1930 }
1931 
1932 /* Return a range representing the minimum and maximum number of bytes
1933    that the directive DIR will write on output for the floating argument
1934    ARG.  */
1935 
1936 static fmtresult
format_floating(const directive & dir,tree arg,vr_values *)1937 format_floating (const directive &dir, tree arg, vr_values *)
1938 {
1939   HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1940   tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1941 	       ? long_double_type_node : double_type_node);
1942 
1943   /* For an indeterminate precision the lower bound must be assumed
1944      to be zero.  */
1945   if (TOUPPER (dir.specifier) == 'A')
1946     {
1947       /* Get the number of fractional decimal digits needed to represent
1948 	 the argument without a loss of accuracy.  */
1949       unsigned fmtprec
1950 	= REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1951 
1952       /* The precision of the IEEE 754 double format is 53.
1953 	 The precision of all other GCC binary double formats
1954 	 is 56 or less.  */
1955       unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1956 
1957       /* For %a, leave the minimum precision unspecified to let
1958 	 MFPR trim trailing zeros (as it and many other systems
1959 	 including Glibc happen to do) and set the maximum
1960 	 precision to reflect what it would be with trailing zeros
1961 	 present (as Solaris and derived systems do).  */
1962       if (dir.prec[1] < 0)
1963 	{
1964 	  /* Both bounds are negative implies that precision has
1965 	     not been specified.  */
1966 	  prec[0] = maxprec;
1967 	  prec[1] = -1;
1968 	}
1969       else if (dir.prec[0] < 0)
1970 	{
1971 	  /* With a negative lower bound and a non-negative upper
1972 	     bound set the minimum precision to zero and the maximum
1973 	     to the greater of the maximum precision (i.e., with
1974 	     trailing zeros present) and the specified upper bound.  */
1975 	  prec[0] = 0;
1976 	  prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1977 	}
1978     }
1979   else if (dir.prec[0] < 0)
1980     {
1981       if (dir.prec[1] < 0)
1982 	{
1983 	  /* A precision in a strictly negative range is ignored and
1984 	     the default of 6 is used instead.  */
1985 	  prec[0] = prec[1] = 6;
1986 	}
1987       else
1988 	{
1989 	  /* For a precision in a partly negative range, the lower bound
1990 	     must be assumed to be zero and the new upper bound is the
1991 	     greater of 6 (the default precision used when the specified
1992 	     precision is negative) and the upper bound of the specified
1993 	     range.  */
1994 	  prec[0] = 0;
1995 	  prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1996 	}
1997     }
1998 
1999   if (!arg
2000       || TREE_CODE (arg) != REAL_CST
2001       || !useless_type_conversion_p (type, TREE_TYPE (arg)))
2002     return format_floating (dir, prec);
2003 
2004   /* The minimum and maximum number of bytes produced by the directive.  */
2005   fmtresult res;
2006 
2007   /* Get the real type format desription for the target.  */
2008   const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
2009   const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
2010 
2011   if (!real_isfinite (rvp))
2012     {
2013       /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
2014 	 and "[-]nan" with the choice being implementation-defined
2015 	 but not locale dependent.  */
2016       bool sign = dir.get_flag ('+') || real_isneg (rvp);
2017       res.range.min = 3 + sign;
2018 
2019       res.range.likely = res.range.min;
2020       res.range.max = res.range.min;
2021       /* The inlikely maximum is "[-/+]infinity" or "[-/+]nan".  */
2022       res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 3);
2023 
2024       /* The range for infinity and NaN is known unless either width
2025 	 or precision is unknown.  Width has the same effect regardless
2026 	 of whether the argument is finite.  Precision is either ignored
2027 	 (e.g., Glibc) or can have an effect on the short vs long format
2028 	 such as inf/infinity (e.g., Solaris).  */
2029       res.knownrange = dir.known_width_and_precision ();
2030 
2031       /* Adjust the range for width but ignore precision.  */
2032       res.adjust_for_width_or_precision (dir.width);
2033 
2034       return res;
2035     }
2036 
2037   char fmtstr [40];
2038   char *pfmt = fmtstr;
2039 
2040   /* Append flags.  */
2041   for (const char *pf = "-+ #0"; *pf; ++pf)
2042     if (dir.get_flag (*pf))
2043       *pfmt++ = *pf;
2044 
2045   *pfmt = '\0';
2046 
2047   {
2048     /* Set up an array to easily iterate over.  */
2049     unsigned HOST_WIDE_INT* const minmax[] = {
2050       &res.range.min, &res.range.max
2051     };
2052 
2053     for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
2054       {
2055 	/* Convert the GCC real value representation with the precision
2056 	   of the real type to the mpfr_t format rounding down in the
2057 	   first iteration that computes the minimm and up in the second
2058 	   that computes the maximum.  This order is arbibtrary because
2059 	   rounding in either direction can result in longer output.  */
2060 	mpfr_t mpfrval;
2061 	mpfr_init2 (mpfrval, rfmt->p);
2062 	mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
2063 
2064 	/* Use the MPFR rounding specifier to round down in the first
2065 	   iteration and then up.  In most but not all cases this will
2066 	   result in the same number of bytes.  */
2067 	char rndspec = "DU"[i];
2068 
2069 	/* Format it and store the result in the corresponding member
2070 	   of the result struct.  */
2071 	*minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
2072 					     dir.specifier, rndspec);
2073 	mpfr_clear (mpfrval);
2074       }
2075   }
2076 
2077   /* Make sure the minimum is less than the maximum (MPFR rounding
2078      in the call to mpfr_snprintf can result in the reverse.  */
2079   if (res.range.max < res.range.min)
2080     {
2081       unsigned HOST_WIDE_INT tmp = res.range.min;
2082       res.range.min = res.range.max;
2083       res.range.max = tmp;
2084     }
2085 
2086   /* The range is known unless either width or precision is unknown.  */
2087   res.knownrange = dir.known_width_and_precision ();
2088 
2089   /* For the same floating point constant, unless width or precision
2090      is unknown, use the longer output as the likely maximum since
2091      with round to nearest either is equally likely.  Otheriwse, when
2092      precision is unknown, use the greater of the minimum and 3 as
2093      the likely output (for "0.0" since zero precision is unlikely).  */
2094   if (res.knownrange)
2095     res.range.likely = res.range.max;
2096   else if (res.range.min < 3
2097 	   && dir.prec[0] < 0
2098 	   && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2099     res.range.likely = 3;
2100   else
2101     res.range.likely = res.range.min;
2102 
2103   res.range.unlikely = res.range.max;
2104 
2105   if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2106     {
2107       /* Unless the precision is zero output longer than 2 bytes may
2108 	 include the decimal point which must be a single character
2109 	 up to MB_LEN_MAX in length.  This is overly conservative
2110 	 since in some conversions some constants result in no decimal
2111 	 point (e.g., in %g).  */
2112       res.range.unlikely += target_mb_len_max () - 1;
2113     }
2114 
2115   res.adjust_for_width_or_precision (dir.width);
2116   return res;
2117 }
2118 
2119 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2120    strings referenced by the expression STR, or (-1, -1) when not known.
2121    Used by the format_string function below.  */
2122 
2123 static fmtresult
get_string_length(tree str)2124 get_string_length (tree str)
2125 {
2126   if (!str)
2127     return fmtresult ();
2128 
2129   if (tree slen = c_strlen (str, 1))
2130     {
2131       /* Simply return the length of the string.  */
2132       fmtresult res (tree_to_shwi (slen));
2133       return res;
2134     }
2135 
2136   /* Determine the length of the shortest and longest string referenced
2137      by STR.  Strings of unknown lengths are bounded by the sizes of
2138      arrays that subexpressions of STR may refer to.  Pointers that
2139      aren't known to point any such arrays result in LENRANGE[1] set
2140      to SIZE_MAX.  */
2141   tree lenrange[2];
2142   bool flexarray = get_range_strlen (str, lenrange);
2143 
2144   if (lenrange [0] || lenrange [1])
2145     {
2146       HOST_WIDE_INT min
2147 	= (tree_fits_uhwi_p (lenrange[0])
2148 	   ? tree_to_uhwi (lenrange[0])
2149 	   : 0);
2150 
2151       HOST_WIDE_INT max
2152 	= (tree_fits_uhwi_p (lenrange[1])
2153 	   ? tree_to_uhwi (lenrange[1])
2154 	   : HOST_WIDE_INT_M1U);
2155 
2156       /* get_range_strlen() returns the target value of SIZE_MAX for
2157 	 strings of unknown length.  Bump it up to HOST_WIDE_INT_M1U
2158 	 which may be bigger.  */
2159       if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2160 	min = HOST_WIDE_INT_M1U;
2161       if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2162 	max = HOST_WIDE_INT_M1U;
2163 
2164       fmtresult res (min, max);
2165 
2166       /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2167 	 by STR are known to be bounded (though not necessarily by their
2168 	 actual length but perhaps by their maximum possible length).  */
2169       if (res.range.max < target_int_max ())
2170 	{
2171 	  res.knownrange = true;
2172 	  /* When the the length of the longest string is known and not
2173 	     excessive use it as the likely length of the string(s).  */
2174 	  res.range.likely = res.range.max;
2175 	}
2176       else
2177 	{
2178 	  /* When the upper bound is unknown (it can be zero or excessive)
2179 	     set the likely length to the greater of 1 and the length of
2180 	     the shortest string and reset the lower bound to zero.  */
2181 	  res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2182 	  res.range.min = 0;
2183 	}
2184 
2185       /* If the range of string length has been estimated from the size
2186 	 of an array at the end of a struct assume that it's longer than
2187 	 the array bound says it is in case it's used as a poor man's
2188 	 flexible array member, such as in struct S { char a[4]; };  */
2189       res.range.unlikely = flexarray ? HOST_WIDE_INT_MAX : res.range.max;
2190 
2191       return res;
2192     }
2193 
2194   return get_string_length (NULL_TREE);
2195 }
2196 
2197 /* Return the minimum and maximum number of characters formatted
2198    by the '%c' format directives and its wide character form for
2199    the argument ARG.  ARG can be null (for functions such as
2200    vsprinf).  */
2201 
2202 static fmtresult
format_character(const directive & dir,tree arg,vr_values * vr_values)2203 format_character (const directive &dir, tree arg, vr_values *vr_values)
2204 {
2205   fmtresult res;
2206 
2207   res.knownrange = true;
2208 
2209   if (dir.modifier == FMT_LEN_l)
2210     {
2211       /* A wide character can result in as few as zero bytes.  */
2212       res.range.min = 0;
2213 
2214       HOST_WIDE_INT min, max;
2215       if (get_int_range (arg, &min, &max, false, 0, vr_values))
2216 	{
2217 	  if (min == 0 && max == 0)
2218 	    {
2219 	      /* The NUL wide character results in no bytes.  */
2220 	      res.range.max = 0;
2221 	      res.range.likely = 0;
2222 	      res.range.unlikely = 0;
2223 	    }
2224 	  else if (min > 0 && min < 128)
2225 	    {
2226 	      /* A wide character in the ASCII range most likely results
2227 		 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
2228 	      res.range.max = 1;
2229 	      res.range.likely = 1;
2230 	      res.range.unlikely = target_mb_len_max ();
2231 	    }
2232 	  else
2233 	    {
2234 	      /* A wide character outside the ASCII range likely results
2235 		 in up to two bytes, and only unlikely in up to MB_LEN_MAX.  */
2236 	      res.range.max = target_mb_len_max ();
2237 	      res.range.likely = 2;
2238 	      res.range.unlikely = res.range.max;
2239 	    }
2240 	}
2241       else
2242 	{
2243 	  /* An unknown wide character is treated the same as a wide
2244 	     character outside the ASCII range.  */
2245 	  res.range.max = target_mb_len_max ();
2246 	  res.range.likely = 2;
2247 	  res.range.unlikely = res.range.max;
2248 	}
2249     }
2250   else
2251     {
2252       /* A plain '%c' directive.  Its ouput is exactly 1.  */
2253       res.range.min = res.range.max = 1;
2254       res.range.likely = res.range.unlikely = 1;
2255       res.knownrange = true;
2256     }
2257 
2258   /* Bump up the byte counters if WIDTH is greater.  */
2259   return res.adjust_for_width_or_precision (dir.width);
2260 }
2261 
2262 /* Return the minimum and maximum number of characters formatted
2263    by the '%s' format directive and its wide character form for
2264    the argument ARG.  ARG can be null (for functions such as
2265    vsprinf).  */
2266 
2267 static fmtresult
format_string(const directive & dir,tree arg,vr_values *)2268 format_string (const directive &dir, tree arg, vr_values *)
2269 {
2270   fmtresult res;
2271 
2272   /* Compute the range the argument's length can be in.  */
2273   fmtresult slen = get_string_length (arg);
2274   if (slen.range.min == slen.range.max
2275       && slen.range.min < HOST_WIDE_INT_MAX)
2276     {
2277       /* The argument is either a string constant or it refers
2278 	 to one of a number of strings of the same length.  */
2279 
2280       /* A '%s' directive with a string argument with constant length.  */
2281       res.range = slen.range;
2282 
2283       if (dir.modifier == FMT_LEN_l)
2284 	{
2285 	  /* In the worst case the length of output of a wide string S
2286 	     is bounded by MB_LEN_MAX * wcslen (S).  */
2287 	  res.range.max *= target_mb_len_max ();
2288 	  res.range.unlikely = res.range.max;
2289 	  /* It's likely that the the total length is not more that
2290 	     2 * wcslen (S).*/
2291 	  res.range.likely = res.range.min * 2;
2292 
2293 	  if (dir.prec[1] >= 0
2294 	      && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2295 	    {
2296 	      res.range.max = dir.prec[1];
2297 	      res.range.likely = dir.prec[1];
2298 	      res.range.unlikely = dir.prec[1];
2299 	    }
2300 
2301 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2302 	    res.range.min = 0;
2303 	  else if (dir.prec[0] >= 0)
2304 	    res.range.likely = dir.prec[0];
2305 
2306 	  /* Even a non-empty wide character string need not convert into
2307 	     any bytes.  */
2308 	  res.range.min = 0;
2309 	}
2310       else
2311 	{
2312 	  res.knownrange = true;
2313 
2314 	  if (dir.prec[0] < 0 && dir.prec[1] > -1)
2315 	    res.range.min = 0;
2316 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2317 	    res.range.min = dir.prec[0];
2318 
2319 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2320 	    {
2321 	      res.range.max = dir.prec[1];
2322 	      res.range.likely = dir.prec[1];
2323 	      res.range.unlikely = dir.prec[1];
2324 	    }
2325 	}
2326     }
2327   else if (arg && integer_zerop (arg))
2328     {
2329       /* Handle null pointer argument.  */
2330 
2331       fmtresult res (0);
2332       res.nullp = true;
2333       return res;
2334     }
2335   else
2336     {
2337       /* For a '%s' and '%ls' directive with a non-constant string (either
2338 	 one of a number of strings of known length or an unknown string)
2339 	 the minimum number of characters is lesser of PRECISION[0] and
2340 	 the length of the shortest known string or zero, and the maximum
2341 	 is the lessser of the length of the longest known string or
2342 	 PTRDIFF_MAX and PRECISION[1].  The likely length is either
2343 	 the minimum at level 1 and the greater of the minimum and 1
2344 	 at level 2.  This result is adjust upward for width (if it's
2345 	 specified).  */
2346 
2347       if (dir.modifier == FMT_LEN_l)
2348 	{
2349 	  /* A wide character converts to as few as zero bytes.  */
2350 	  slen.range.min = 0;
2351 	  if (slen.range.max < target_int_max ())
2352 	    slen.range.max *= target_mb_len_max ();
2353 
2354 	  if (slen.range.likely < target_int_max ())
2355 	    slen.range.likely *= 2;
2356 
2357 	  if (slen.range.likely < target_int_max ())
2358 	    slen.range.unlikely *= target_mb_len_max ();
2359 	}
2360 
2361       res.range = slen.range;
2362 
2363       if (dir.prec[0] >= 0)
2364 	{
2365 	  /* Adjust the minimum to zero if the string length is unknown,
2366 	     or at most the lower bound of the precision otherwise.  */
2367 	  if (slen.range.min >= target_int_max ())
2368 	    res.range.min = 0;
2369 	  else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2370 	    res.range.min = dir.prec[0];
2371 
2372 	  /* Make both maxima no greater than the upper bound of precision.  */
2373 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2374 	      || slen.range.max >= target_int_max ())
2375 	    {
2376 	      res.range.max = dir.prec[1];
2377 	      res.range.unlikely = dir.prec[1];
2378 	    }
2379 
2380 	  /* If precision is constant, set the likely counter to the lesser
2381 	     of it and the maximum string length.  Otherwise, if the lower
2382 	     bound of precision is greater than zero, set the likely counter
2383 	     to the minimum.  Otherwise set it to zero or one based on
2384 	     the warning level.  */
2385 	  if (dir.prec[0] == dir.prec[1])
2386 	    res.range.likely
2387 	      = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2388 		 ? dir.prec[0] : slen.range.max);
2389 	  else if (dir.prec[0] > 0)
2390 	    res.range.likely = res.range.min;
2391 	  else
2392 	    res.range.likely = warn_level > 1;
2393 	}
2394       else if (dir.prec[1] >= 0)
2395 	{
2396 	  res.range.min = 0;
2397 	  if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2398 	    res.range.max = dir.prec[1];
2399 	  res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2400 	}
2401       else if (slen.range.min >= target_int_max ())
2402 	{
2403 	  res.range.min = 0;
2404 	  res.range.max = HOST_WIDE_INT_MAX;
2405 	  /* At level 1 strings of unknown length are assumed to be
2406 	     empty, while at level 1 they are assumed to be one byte
2407 	     long.  */
2408 	  res.range.likely = warn_level > 1;
2409 	}
2410       else
2411 	{
2412 	  /* A string of unknown length unconstrained by precision is
2413 	     assumed to be empty at level 1 and just one character long
2414 	     at higher levels.  */
2415 	  if (res.range.likely >= target_int_max ())
2416 	    res.range.likely = warn_level > 1;
2417 	}
2418 
2419       res.range.unlikely = res.range.max;
2420     }
2421 
2422   /* Bump up the byte counters if WIDTH is greater.  */
2423   return res.adjust_for_width_or_precision (dir.width);
2424 }
2425 
2426 /* Format plain string (part of the format string itself).  */
2427 
2428 static fmtresult
format_plain(const directive & dir,tree,vr_values *)2429 format_plain (const directive &dir, tree, vr_values *)
2430 {
2431   fmtresult res (dir.len);
2432   return res;
2433 }
2434 
2435 /* Return true if the RESULT of a directive in a call describe by INFO
2436    should be diagnosed given the AVAILable space in the destination.  */
2437 
2438 static bool
should_warn_p(const sprintf_dom_walker::call_info & info,const result_range & avail,const result_range & result)2439 should_warn_p (const sprintf_dom_walker::call_info &info,
2440 	       const result_range &avail, const result_range &result)
2441 {
2442   if (result.max <= avail.min)
2443     {
2444       /* The least amount of space remaining in the destination is big
2445 	 enough for the longest output.  */
2446       return false;
2447     }
2448 
2449   if (info.bounded)
2450     {
2451       if (warn_format_trunc == 1 && result.min <= avail.max
2452 	  && info.retval_used ())
2453 	{
2454 	  /* The likely amount of space remaining in the destination is big
2455 	     enough for the least output and the return value is used.  */
2456 	  return false;
2457 	}
2458 
2459       if (warn_format_trunc == 1 && result.likely <= avail.likely
2460 	  && !info.retval_used ())
2461 	{
2462 	  /* The likely amount of space remaining in the destination is big
2463 	     enough for the likely output and the return value is unused.  */
2464 	  return false;
2465 	}
2466 
2467       if (warn_format_trunc == 2
2468 	  && result.likely <= avail.min
2469 	  && (result.max <= avail.min
2470 	      || result.max > HOST_WIDE_INT_MAX))
2471 	{
2472 	  /* The minimum amount of space remaining in the destination is big
2473 	     enough for the longest output.  */
2474 	  return false;
2475 	}
2476     }
2477   else
2478     {
2479       if (warn_level == 1 && result.likely <= avail.likely)
2480 	{
2481 	  /* The likely amount of space remaining in the destination is big
2482 	     enough for the likely output.  */
2483 	  return false;
2484 	}
2485 
2486       if (warn_level == 2
2487 	  && result.likely <= avail.min
2488 	  && (result.max <= avail.min
2489 	      || result.max > HOST_WIDE_INT_MAX))
2490 	{
2491 	  /* The minimum amount of space remaining in the destination is big
2492 	     enough for the longest output.  */
2493 	  return false;
2494 	}
2495     }
2496 
2497   return true;
2498 }
2499 
2500 /* At format string location describe by DIRLOC in a call described
2501    by INFO, issue a warning for a directive DIR whose output may be
2502    in excess of the available space AVAIL_RANGE in the destination
2503    given the formatting result FMTRES.  This function does nothing
2504    except decide whether to issue a warning for a possible write
2505    past the end or truncation and, if so, format the warning.
2506    Return true if a warning has been issued.  */
2507 
2508 static bool
maybe_warn(substring_loc & dirloc,location_t argloc,const sprintf_dom_walker::call_info & info,const result_range & avail_range,const result_range & res,const directive & dir)2509 maybe_warn (substring_loc &dirloc, location_t argloc,
2510 	    const sprintf_dom_walker::call_info &info,
2511 	    const result_range &avail_range, const result_range &res,
2512 	    const directive &dir)
2513 {
2514   if (!should_warn_p (info, avail_range, res))
2515     return false;
2516 
2517   /* A warning will definitely be issued below.  */
2518 
2519   /* The maximum byte count to reference in the warning.  Larger counts
2520      imply that the upper bound is unknown (and could be anywhere between
2521      RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2522      than "between N and X" where X is some huge number.  */
2523   unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2524 
2525   /* True when there is enough room in the destination for the least
2526      amount of a directive's output but not enough for its likely or
2527      maximum output.  */
2528   bool maybe = (res.min <= avail_range.max
2529 		&& (avail_range.min < res.likely
2530 		    || (res.max < HOST_WIDE_INT_MAX
2531 			&& avail_range.min < res.max)));
2532 
2533   /* Buffer for the directive in the host character set (used when
2534      the source character set is different).  */
2535   char hostdir[32];
2536 
2537   if (avail_range.min == avail_range.max)
2538     {
2539       /* The size of the destination region is exact.  */
2540       unsigned HOST_WIDE_INT navail = avail_range.max;
2541 
2542       if (target_to_host (*dir.beg) != '%')
2543 	{
2544 	  /* For plain character directives (i.e., the format string itself)
2545 	     but not others, point the caret at the first character that's
2546 	     past the end of the destination.  */
2547 	  if (navail < dir.len)
2548 	    dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2549 	}
2550 
2551       if (*dir.beg == '\0')
2552 	{
2553 	  /* This is the terminating nul.  */
2554 	  gcc_assert (res.min == 1 && res.min == res.max);
2555 
2556 	  return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2557 			  info.bounded
2558 			  ? (maybe
2559 			     ? G_("%qE output may be truncated before the "
2560 				  "last format character")
2561 			     : G_("%qE output truncated before the last "
2562 				  "format character"))
2563 			  : (maybe
2564 			     ? G_("%qE may write a terminating nul past the "
2565 				  "end of the destination")
2566 			     : G_("%qE writing a terminating nul past the "
2567 				  "end of the destination")),
2568 			  info.func);
2569 	}
2570 
2571       if (res.min == res.max)
2572 	{
2573 	  const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2574 	  if (!info.bounded)
2575 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2576 			      "%<%.*s%> directive writing %wu byte into a "
2577 			      "region of size %wu",
2578 			      "%<%.*s%> directive writing %wu bytes into a "
2579 			      "region of size %wu",
2580 			      (int) dir.len, d, res.min, navail);
2581 	  else if (maybe)
2582 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2583 			      "%<%.*s%> directive output may be truncated "
2584 			      "writing %wu byte into a region of size %wu",
2585 			      "%<%.*s%> directive output may be truncated "
2586 			      "writing %wu bytes into a region of size %wu",
2587 			      (int) dir.len, d, res.min, navail);
2588 	  else
2589 	    return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2590 			      "%<%.*s%> directive output truncated writing "
2591 			      "%wu byte into a region of size %wu",
2592 			      "%<%.*s%> directive output truncated writing "
2593 			      "%wu bytes into a region of size %wu",
2594 			      (int) dir.len, d, res.min, navail);
2595 	}
2596       if (res.min == 0 && res.max < maxbytes)
2597 	return fmtwarn (dirloc, argloc, NULL,
2598 			info.warnopt (),
2599 			info.bounded
2600 			? (maybe
2601 			   ? G_("%<%.*s%> directive output may be truncated "
2602 				"writing up to %wu bytes into a region of "
2603 				"size %wu")
2604 			   : G_("%<%.*s%> directive output truncated writing "
2605 				"up to %wu bytes into a region of size %wu"))
2606 			: G_("%<%.*s%> directive writing up to %wu bytes "
2607 			     "into a region of size %wu"), (int) dir.len,
2608 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2609 			res.max, navail);
2610 
2611       if (res.min == 0 && maxbytes <= res.max)
2612 	/* This is a special case to avoid issuing the potentially
2613 	   confusing warning:
2614 	     writing 0 or more bytes into a region of size 0.  */
2615 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2616 			info.bounded
2617 			? (maybe
2618 			   ? G_("%<%.*s%> directive output may be truncated "
2619 				"writing likely %wu or more bytes into a "
2620 				"region of size %wu")
2621 			   : G_("%<%.*s%> directive output truncated writing "
2622 				"likely %wu or more bytes into a region of "
2623 				"size %wu"))
2624 			: G_("%<%.*s%> directive writing likely %wu or more "
2625 			     "bytes into a region of size %wu"), (int) dir.len,
2626 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2627 			res.likely, navail);
2628 
2629       if (res.max < maxbytes)
2630 	return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2631 			info.bounded
2632 			? (maybe
2633 			   ? G_("%<%.*s%> directive output may be truncated "
2634 				"writing between %wu and %wu bytes into a "
2635 				"region of size %wu")
2636 			   : G_("%<%.*s%> directive output truncated "
2637 				"writing between %wu and %wu bytes into a "
2638 				"region of size %wu"))
2639 			: G_("%<%.*s%> directive writing between %wu and "
2640 			     "%wu bytes into a region of size %wu"),
2641 			(int) dir.len,
2642 			target_to_host (hostdir, sizeof hostdir, dir.beg),
2643 			res.min, res.max, navail);
2644 
2645       return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2646 		      info.bounded
2647 		      ? (maybe
2648 			 ? G_("%<%.*s%> directive output may be truncated "
2649 			      "writing %wu or more bytes into a region of "
2650 			      "size %wu")
2651 			 : G_("%<%.*s%> directive output truncated writing "
2652 			      "%wu or more bytes into a region of size %wu"))
2653 		      : G_("%<%.*s%> directive writing %wu or more bytes "
2654 			   "into a region of size %wu"), (int) dir.len,
2655 		      target_to_host (hostdir, sizeof hostdir, dir.beg),
2656 		      res.min, navail);
2657     }
2658 
2659   /* The size of the destination region is a range.  */
2660 
2661   if (target_to_host (*dir.beg) != '%')
2662     {
2663       unsigned HOST_WIDE_INT navail = avail_range.max;
2664 
2665       /* For plain character directives (i.e., the format string itself)
2666 	 but not others, point the caret at the first character that's
2667 	 past the end of the destination.  */
2668       if (navail < dir.len)
2669 	dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2670     }
2671 
2672   if (*dir.beg == '\0')
2673     {
2674       gcc_assert (res.min == 1 && res.min == res.max);
2675 
2676       return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2677 		      info.bounded
2678 		      ? (maybe
2679 			 ? G_("%qE output may be truncated before the last "
2680 			      "format character")
2681 			 : G_("%qE output truncated before the last format "
2682 			      "character"))
2683 		      : (maybe
2684 			 ? G_("%qE may write a terminating nul past the end "
2685 			      "of the destination")
2686 			 : G_("%qE writing a terminating nul past the end "
2687 			      "of the destination")), info.func);
2688     }
2689 
2690   if (res.min == res.max)
2691     {
2692       const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2693       if (!info.bounded)
2694 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2695 			  "%<%.*s%> directive writing %wu byte into a region "
2696 			  "of size between %wu and %wu",
2697 			  "%<%.*s%> directive writing %wu bytes into a region "
2698 			  "of size between %wu and %wu", (int) dir.len, d,
2699 			  res.min, avail_range.min, avail_range.max);
2700       else if (maybe)
2701 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2702 			  "%<%.*s%> directive output may be truncated writing "
2703 			  "%wu byte into a region of size between %wu and %wu",
2704 			  "%<%.*s%> directive output may be truncated writing "
2705 			  "%wu bytes into a region of size between %wu and "
2706 			  "%wu", (int) dir.len, d, res.min, avail_range.min,
2707 			  avail_range.max);
2708       else
2709 	return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2710 			  "%<%.*s%> directive output truncated writing %wu "
2711 			  "byte into a region of size between %wu and %wu",
2712 			  "%<%.*s%> directive output truncated writing %wu "
2713 			  "bytes into a region of size between %wu and %wu",
2714 			  (int) dir.len, d, res.min, avail_range.min,
2715 			  avail_range.max);
2716     }
2717 
2718   if (res.min == 0 && res.max < maxbytes)
2719     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2720 		    info.bounded
2721 		    ? (maybe
2722 		       ? G_("%<%.*s%> directive output may be truncated "
2723 			    "writing up to %wu bytes into a region of size "
2724 			    "between %wu and %wu")
2725 		       : G_("%<%.*s%> directive output truncated writing "
2726 			    "up to %wu bytes into a region of size between "
2727 			    "%wu and %wu"))
2728 		    : G_("%<%.*s%> directive writing up to %wu bytes "
2729 			 "into a region of size between %wu and %wu"),
2730 		    (int) dir.len,
2731 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2732 		    res.max, avail_range.min, avail_range.max);
2733 
2734   if (res.min == 0 && maxbytes <= res.max)
2735     /* This is a special case to avoid issuing the potentially confusing
2736        warning:
2737 	 writing 0 or more bytes into a region of size between 0 and N.  */
2738     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2739 		    info.bounded
2740 		    ? (maybe
2741 		       ? G_("%<%.*s%> directive output may be truncated "
2742 			    "writing likely %wu or more bytes into a region "
2743 			    "of size between %wu and %wu")
2744 		       : G_("%<%.*s%> directive output truncated writing "
2745 			    "likely %wu or more bytes into a region of size "
2746 			    "between %wu and %wu"))
2747 		    : G_("%<%.*s%> directive writing likely %wu or more bytes "
2748 			 "into a region of size between %wu and %wu"),
2749 		    (int) dir.len,
2750 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2751 		    res.likely, avail_range.min, avail_range.max);
2752 
2753   if (res.max < maxbytes)
2754     return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2755 		    info.bounded
2756 		    ? (maybe
2757 		       ? G_("%<%.*s%> directive output may be truncated "
2758 			    "writing between %wu and %wu bytes into a region "
2759 			    "of size between %wu and %wu")
2760 		       : G_("%<%.*s%> directive output truncated writing "
2761 			    "between %wu and %wu bytes into a region of size "
2762 			    "between %wu and %wu"))
2763 		    : G_("%<%.*s%> directive writing between %wu and "
2764 			 "%wu bytes into a region of size between %wu and "
2765 			 "%wu"), (int) dir.len,
2766 		    target_to_host (hostdir, sizeof hostdir, dir.beg),
2767 		    res.min, res.max, avail_range.min, avail_range.max);
2768 
2769   return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2770 		  info.bounded
2771 		  ? (maybe
2772 		     ? G_("%<%.*s%> directive output may be truncated writing "
2773 			  "%wu or more bytes into a region of size between "
2774 			  "%wu and %wu")
2775 		     : G_("%<%.*s%> directive output truncated writing "
2776 			  "%wu or more bytes into a region of size between "
2777 			  "%wu and %wu"))
2778 		  : G_("%<%.*s%> directive writing %wu or more bytes "
2779 		       "into a region of size between %wu and %wu"),
2780 		  (int) dir.len,
2781 		  target_to_host (hostdir, sizeof hostdir, dir.beg),
2782 		  res.min, avail_range.min, avail_range.max);
2783 }
2784 
2785 /* Compute the length of the output resulting from the directive DIR
2786    in a call described by INFO and update the overall result of the call
2787    in *RES.  Return true if the directive has been handled.  */
2788 
2789 static bool
format_directive(const sprintf_dom_walker::call_info & info,format_result * res,const directive & dir,class vr_values * vr_values)2790 format_directive (const sprintf_dom_walker::call_info &info,
2791 		  format_result *res, const directive &dir,
2792 		  class vr_values *vr_values)
2793 {
2794   /* Offset of the beginning of the directive from the beginning
2795      of the format string.  */
2796   size_t offset = dir.beg - info.fmtstr;
2797   size_t start = offset;
2798   size_t length = offset + dir.len - !!dir.len;
2799 
2800   /* Create a location for the whole directive from the % to the format
2801      specifier.  */
2802   substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2803 			offset, start, length);
2804 
2805   /* Also get the location of the argument if possible.
2806      This doesn't work for integer literals or function calls.  */
2807   location_t argloc = UNKNOWN_LOCATION;
2808   if (dir.arg)
2809     argloc = EXPR_LOCATION (dir.arg);
2810 
2811   /* Bail when there is no function to compute the output length,
2812      or when minimum length checking has been disabled.   */
2813   if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2814     return false;
2815 
2816   /* Compute the range of lengths of the formatted output.  */
2817   fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
2818 
2819   /* Record whether the output of all directives is known to be
2820      bounded by some maximum, implying that their arguments are
2821      either known exactly or determined to be in a known range
2822      or, for strings, limited by the upper bounds of the arrays
2823      they refer to.  */
2824   res->knownrange &= fmtres.knownrange;
2825 
2826   if (!fmtres.knownrange)
2827     {
2828       /* Only when the range is known, check it against the host value
2829 	 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2830 	 INT_MAX precision, which is the longest possible output of any
2831 	 single directive).  That's the largest valid byte count (though
2832 	 not valid call to a printf-like function because it can never
2833 	 return such a count).  Otherwise, the range doesn't correspond
2834 	 to known values of the argument.  */
2835       if (fmtres.range.max > target_dir_max ())
2836 	{
2837 	  /* Normalize the MAX counter to avoid having to deal with it
2838 	     later.  The counter can be less than HOST_WIDE_INT_M1U
2839 	     when compiling for an ILP32 target on an LP64 host.  */
2840 	  fmtres.range.max = HOST_WIDE_INT_M1U;
2841 	  /* Disable exact and maximum length checking after a failure
2842 	     to determine the maximum number of characters (for example
2843 	     for wide characters or wide character strings) but continue
2844 	     tracking the minimum number of characters.  */
2845 	  res->range.max = HOST_WIDE_INT_M1U;
2846 	}
2847 
2848       if (fmtres.range.min > target_dir_max ())
2849 	{
2850 	  /* Disable exact length checking after a failure to determine
2851 	     even the minimum number of characters (it shouldn't happen
2852 	     except in an error) but keep tracking the minimum and maximum
2853 	     number of characters.  */
2854 	  return true;
2855 	}
2856     }
2857 
2858   /* Buffer for the directive in the host character set (used when
2859      the source character set is different).  */
2860   char hostdir[32];
2861 
2862   int dirlen = dir.len;
2863 
2864   if (fmtres.nullp)
2865     {
2866       fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2867 	       "%<%.*s%> directive argument is null",
2868 	       dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
2869 
2870       /* Don't bother processing the rest of the format string.  */
2871       res->warned = true;
2872       res->range.min = HOST_WIDE_INT_M1U;
2873       res->range.max = HOST_WIDE_INT_M1U;
2874       return false;
2875     }
2876 
2877   /* Compute the number of available bytes in the destination.  There
2878      must always be at least one byte of space for the terminating
2879      NUL that's appended after the format string has been processed.  */
2880   result_range avail_range = bytes_remaining (info.objsize, *res);
2881 
2882   bool warned = res->warned;
2883 
2884   if (!warned)
2885     warned = maybe_warn (dirloc, argloc, info, avail_range,
2886 			 fmtres.range, dir);
2887 
2888   /* Bump up the total maximum if it isn't too big.  */
2889   if (res->range.max < HOST_WIDE_INT_MAX
2890       && fmtres.range.max < HOST_WIDE_INT_MAX)
2891     res->range.max += fmtres.range.max;
2892 
2893   /* Raise the total unlikely maximum by the larger of the maximum
2894      and the unlikely maximum.  */
2895   unsigned HOST_WIDE_INT save = res->range.unlikely;
2896   if (fmtres.range.max < fmtres.range.unlikely)
2897     res->range.unlikely += fmtres.range.unlikely;
2898   else
2899     res->range.unlikely += fmtres.range.max;
2900 
2901   if (res->range.unlikely < save)
2902     res->range.unlikely = HOST_WIDE_INT_M1U;
2903 
2904   res->range.min += fmtres.range.min;
2905   res->range.likely += fmtres.range.likely;
2906 
2907   /* Has the minimum directive output length exceeded the maximum
2908      of 4095 bytes required to be supported?  */
2909   bool minunder4k = fmtres.range.min < 4096;
2910   bool maxunder4k = fmtres.range.max < 4096;
2911   /* Clear UNDER4K in the overall result if the maximum has exceeded
2912      the 4k (this is necessary to avoid the return valuye optimization
2913      that may not be safe in the maximum case).  */
2914   if (!maxunder4k)
2915     res->under4k = false;
2916 
2917   if (!warned
2918       /* Only warn at level 2.  */
2919       && warn_level > 1
2920       && (!minunder4k
2921 	  || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2922     {
2923       /* The directive output may be longer than the maximum required
2924 	 to be handled by an implementation according to 7.21.6.1, p15
2925 	 of C11.  Warn on this only at level 2 but remember this and
2926 	 prevent folding the return value when done.  This allows for
2927 	 the possibility of the actual libc call failing due to ENOMEM
2928 	 (like Glibc does under some conditions).  */
2929 
2930       if (fmtres.range.min == fmtres.range.max)
2931 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2932 			  "%<%.*s%> directive output of %wu bytes exceeds "
2933 			  "minimum required size of 4095", dirlen,
2934 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2935 			  fmtres.range.min);
2936       else
2937 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2938 			  minunder4k
2939 			  ? G_("%<%.*s%> directive output between %wu and %wu "
2940 			       "bytes may exceed minimum required size of "
2941 			       "4095")
2942 			  : G_("%<%.*s%> directive output between %wu and %wu "
2943 			       "bytes exceeds minimum required size of 4095"),
2944 			  dirlen,
2945 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2946 			  fmtres.range.min, fmtres.range.max);
2947     }
2948 
2949   /* Has the likely and maximum directive output exceeded INT_MAX?  */
2950   bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2951   /* Don't consider the maximum to be in excess when it's the result
2952      of a string of unknown length (i.e., whose maximum has been set
2953      to be greater than or equal to HOST_WIDE_INT_MAX.  */
2954   bool maxximax = (*dir.beg
2955 		   && res->range.max > target_int_max ()
2956 		   && res->range.max < HOST_WIDE_INT_MAX);
2957 
2958   if (!warned
2959       /* Warn for the likely output size at level 1.  */
2960       && (likelyximax
2961 	  /* But only warn for the maximum at level 2.  */
2962 	  || (warn_level > 1
2963 	      && maxximax
2964 	      && fmtres.range.max < HOST_WIDE_INT_MAX)))
2965     {
2966       /* The directive output causes the total length of output
2967 	 to exceed INT_MAX bytes.  */
2968 
2969       if (fmtres.range.min == fmtres.range.max)
2970 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2971 			  "%<%.*s%> directive output of %wu bytes causes "
2972 			  "result to exceed %<INT_MAX%>", dirlen,
2973 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2974 			  fmtres.range.min);
2975       else
2976 	warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2977 			  fmtres.range.min > target_int_max ()
2978 			  ? G_ ("%<%.*s%> directive output between %wu and "
2979 				"%wu bytes causes result to exceed "
2980 				"%<INT_MAX%>")
2981 			  : G_ ("%<%.*s%> directive output between %wu and "
2982 				"%wu bytes may cause result to exceed "
2983 				"%<INT_MAX%>"), dirlen,
2984 			  target_to_host (hostdir, sizeof hostdir, dir.beg),
2985 			  fmtres.range.min, fmtres.range.max);
2986     }
2987 
2988   if (warned && fmtres.range.min < fmtres.range.likely
2989       && fmtres.range.likely < fmtres.range.max)
2990     inform_n (info.fmtloc, fmtres.range.likely,
2991 	      "assuming directive output of %wu byte",
2992 	      "assuming directive output of %wu bytes",
2993 	      fmtres.range.likely);
2994 
2995   if (warned && fmtres.argmin)
2996     {
2997       if (fmtres.argmin == fmtres.argmax)
2998 	inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2999       else if (fmtres.knownrange)
3000 	inform (info.fmtloc, "directive argument in the range [%E, %E]",
3001 		fmtres.argmin, fmtres.argmax);
3002       else
3003 	inform (info.fmtloc,
3004 		"using the range [%E, %E] for directive argument",
3005 		fmtres.argmin, fmtres.argmax);
3006     }
3007 
3008   res->warned |= warned;
3009 
3010   if (!dir.beg[0] && res->warned && info.objsize < HOST_WIDE_INT_MAX)
3011     {
3012       /* If a warning has been issued for buffer overflow or truncation
3013 	 (but not otherwise) help the user figure out how big a buffer
3014 	 they need.  */
3015 
3016       location_t callloc = gimple_location (info.callstmt);
3017 
3018       unsigned HOST_WIDE_INT min = res->range.min;
3019       unsigned HOST_WIDE_INT max = res->range.max;
3020 
3021       if (min == max)
3022 	inform (callloc,
3023 		(min == 1
3024 		 ? G_("%qE output %wu byte into a destination of size %wu")
3025 		 : G_("%qE output %wu bytes into a destination of size %wu")),
3026 		info.func, min, info.objsize);
3027       else if (max < HOST_WIDE_INT_MAX)
3028 	inform (callloc,
3029 		"%qE output between %wu and %wu bytes into "
3030 		"a destination of size %wu",
3031 		info.func, min, max, info.objsize);
3032       else if (min < res->range.likely && res->range.likely < max)
3033 	inform (callloc,
3034 		"%qE output %wu or more bytes (assuming %wu) into "
3035 		"a destination of size %wu",
3036 		info.func, min, res->range.likely, info.objsize);
3037       else
3038 	inform (callloc,
3039 		"%qE output %wu or more bytes into a destination of size %wu",
3040 		info.func, min, info.objsize);
3041     }
3042 
3043   if (dump_file && *dir.beg)
3044     {
3045       fprintf (dump_file,
3046 	       "    Result: "
3047 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3048 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3049 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3050 	       HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3051 	       fmtres.range.min, fmtres.range.likely,
3052 	       fmtres.range.max, fmtres.range.unlikely,
3053 	       res->range.min, res->range.likely,
3054 	       res->range.max, res->range.unlikely);
3055     }
3056 
3057   return true;
3058 }
3059 
3060 /* Parse a format directive in function call described by INFO starting
3061    at STR and populate DIR structure.  Bump up *ARGNO by the number of
3062    arguments extracted for the directive.  Return the length of
3063    the directive.  */
3064 
3065 static size_t
parse_directive(sprintf_dom_walker::call_info & info,directive & dir,format_result * res,const char * str,unsigned * argno,vr_values * vr_values)3066 parse_directive (sprintf_dom_walker::call_info &info,
3067 		 directive &dir, format_result *res,
3068 		 const char *str, unsigned *argno,
3069 		 vr_values *vr_values)
3070 {
3071   const char *pcnt = strchr (str, target_percent);
3072   dir.beg = str;
3073 
3074   if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3075     {
3076       /* This directive is either a plain string or the terminating nul
3077 	 (which isn't really a directive but it simplifies things to
3078 	 handle it as if it were).  */
3079       dir.len = len;
3080       dir.fmtfunc = format_plain;
3081 
3082       if (dump_file)
3083 	{
3084 	  fprintf (dump_file, "  Directive %u at offset "
3085 		   HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3086 		   "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3087 		   dir.dirno,
3088 		   (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3089 		   (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3090 	}
3091 
3092       return len - !*str;
3093     }
3094 
3095   const char *pf = pcnt + 1;
3096 
3097     /* POSIX numbered argument index or zero when none.  */
3098   HOST_WIDE_INT dollar = 0;
3099 
3100   /* With and precision.  -1 when not specified, HOST_WIDE_INT_MIN
3101      when given by a va_list argument, and a non-negative value
3102      when specified in the format string itself.  */
3103   HOST_WIDE_INT width = -1;
3104   HOST_WIDE_INT precision = -1;
3105 
3106   /* Pointers to the beginning of the width and precision decimal
3107      string (if any) within the directive.  */
3108   const char *pwidth = 0;
3109   const char *pprec = 0;
3110 
3111   /* When the value of the decimal string that specifies width or
3112      precision is out of range, points to the digit that causes
3113      the value to exceed the limit.  */
3114   const char *werange = NULL;
3115   const char *perange = NULL;
3116 
3117   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3118      For vararg functions set to void_node.  */
3119   tree star_width = NULL_TREE;
3120 
3121   /* Width specified via the asterisk.  Need not be INTEGER_CST.
3122      For vararg functions set to void_node.  */
3123   tree star_precision = NULL_TREE;
3124 
3125   if (ISDIGIT (target_to_host (*pf)))
3126     {
3127       /* This could be either a POSIX positional argument, the '0'
3128 	 flag, or a width, depending on what follows.  Store it as
3129 	 width and sort it out later after the next character has
3130 	 been seen.  */
3131       pwidth = pf;
3132       width = target_strtol10 (&pf, &werange);
3133     }
3134   else if (target_to_host (*pf) == '*')
3135     {
3136       /* Similarly to the block above, this could be either a POSIX
3137 	 positional argument or a width, depending on what follows.  */
3138       if (*argno < gimple_call_num_args (info.callstmt))
3139 	star_width = gimple_call_arg (info.callstmt, (*argno)++);
3140       else
3141 	star_width = void_node;
3142       ++pf;
3143     }
3144 
3145   if (target_to_host (*pf) == '$')
3146     {
3147       /* Handle the POSIX dollar sign which references the 1-based
3148 	 positional argument number.  */
3149       if (width != -1)
3150 	dollar = width + info.argidx;
3151       else if (star_width
3152 	       && TREE_CODE (star_width) == INTEGER_CST
3153 	       && (TYPE_PRECISION (TREE_TYPE (star_width))
3154 		   <= TYPE_PRECISION (integer_type_node)))
3155 	dollar = width + tree_to_shwi (star_width);
3156 
3157       /* Bail when the numbered argument is out of range (it will
3158 	 have already been diagnosed by -Wformat).  */
3159       if (dollar == 0
3160 	  || dollar == (int)info.argidx
3161 	  || dollar > gimple_call_num_args (info.callstmt))
3162 	return false;
3163 
3164       --dollar;
3165 
3166       star_width = NULL_TREE;
3167       width = -1;
3168       ++pf;
3169     }
3170 
3171   if (dollar || !star_width)
3172     {
3173       if (width != -1)
3174 	{
3175 	  if (width == 0)
3176 	    {
3177 	      /* The '0' that has been interpreted as a width above is
3178 		 actually a flag.  Reset HAVE_WIDTH, set the '0' flag,
3179 		 and continue processing other flags.  */
3180 	      width = -1;
3181 	      dir.set_flag ('0');
3182 	    }
3183 	  else if (!dollar)
3184 	    {
3185 	      /* (Non-zero) width has been seen.  The next character
3186 		 is either a period or a digit.  */
3187 	      goto start_precision;
3188 	    }
3189 	}
3190       /* When either '$' has been seen, or width has not been seen,
3191 	 the next field is the optional flags followed by an optional
3192 	 width.  */
3193       for ( ; ; ) {
3194 	switch (target_to_host (*pf))
3195 	  {
3196 	  case ' ':
3197 	  case '0':
3198 	  case '+':
3199 	  case '-':
3200 	  case '#':
3201 	    dir.set_flag (target_to_host (*pf++));
3202 	    break;
3203 
3204 	  default:
3205 	    goto start_width;
3206 	  }
3207       }
3208 
3209     start_width:
3210       if (ISDIGIT (target_to_host (*pf)))
3211 	{
3212 	  werange = 0;
3213 	  pwidth = pf;
3214 	  width = target_strtol10 (&pf, &werange);
3215 	}
3216       else if (target_to_host (*pf) == '*')
3217 	{
3218 	  if (*argno < gimple_call_num_args (info.callstmt))
3219 	    star_width = gimple_call_arg (info.callstmt, (*argno)++);
3220 	  else
3221 	    {
3222 	      /* This is (likely) a va_list.  It could also be an invalid
3223 		 call with insufficient arguments.  */
3224 	      star_width = void_node;
3225 	    }
3226 	  ++pf;
3227 	}
3228       else if (target_to_host (*pf) == '\'')
3229 	{
3230 	  /* The POSIX apostrophe indicating a numeric grouping
3231 	     in the current locale.  Even though it's possible to
3232 	     estimate the upper bound on the size of the output
3233 	     based on the number of digits it probably isn't worth
3234 	     continuing.  */
3235 	  return 0;
3236 	}
3237     }
3238 
3239  start_precision:
3240   if (target_to_host (*pf) == '.')
3241     {
3242       ++pf;
3243 
3244       if (ISDIGIT (target_to_host (*pf)))
3245 	{
3246 	  pprec = pf;
3247 	  precision = target_strtol10 (&pf, &perange);
3248 	}
3249       else if (target_to_host (*pf) == '*')
3250 	{
3251 	  if (*argno < gimple_call_num_args (info.callstmt))
3252 	    star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3253 	  else
3254 	    {
3255 	      /* This is (likely) a va_list.  It could also be an invalid
3256 		 call with insufficient arguments.  */
3257 	      star_precision = void_node;
3258 	    }
3259 	  ++pf;
3260 	}
3261       else
3262 	{
3263 	  /* The decimal precision or the asterisk are optional.
3264 	     When neither is dirified it's taken to be zero.  */
3265 	  precision = 0;
3266 	}
3267     }
3268 
3269   switch (target_to_host (*pf))
3270     {
3271     case 'h':
3272       if (target_to_host (pf[1]) == 'h')
3273 	{
3274 	  ++pf;
3275 	  dir.modifier = FMT_LEN_hh;
3276 	}
3277       else
3278 	dir.modifier = FMT_LEN_h;
3279       ++pf;
3280       break;
3281 
3282     case 'j':
3283       dir.modifier = FMT_LEN_j;
3284       ++pf;
3285       break;
3286 
3287     case 'L':
3288       dir.modifier = FMT_LEN_L;
3289       ++pf;
3290       break;
3291 
3292     case 'l':
3293       if (target_to_host (pf[1]) == 'l')
3294 	{
3295 	  ++pf;
3296 	  dir.modifier = FMT_LEN_ll;
3297 	}
3298       else
3299 	dir.modifier = FMT_LEN_l;
3300       ++pf;
3301       break;
3302 
3303     case 't':
3304       dir.modifier = FMT_LEN_t;
3305       ++pf;
3306       break;
3307 
3308     case 'z':
3309       dir.modifier = FMT_LEN_z;
3310       ++pf;
3311       break;
3312     }
3313 
3314   switch (target_to_host (*pf))
3315     {
3316       /* Handle a sole '%' character the same as "%%" but since it's
3317 	 undefined prevent the result from being folded.  */
3318     case '\0':
3319       --pf;
3320       res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3321       /* FALLTHRU */
3322     case '%':
3323       dir.fmtfunc = format_percent;
3324       break;
3325 
3326     case 'a':
3327     case 'A':
3328     case 'e':
3329     case 'E':
3330     case 'f':
3331     case 'F':
3332     case 'g':
3333     case 'G':
3334       res->floating = true;
3335       dir.fmtfunc = format_floating;
3336       break;
3337 
3338     case 'd':
3339     case 'i':
3340     case 'o':
3341     case 'u':
3342     case 'x':
3343     case 'X':
3344       dir.fmtfunc = format_integer;
3345       break;
3346 
3347     case 'p':
3348       /* The %p output is implementation-defined.  It's possible
3349 	 to determine this format but due to extensions (edirially
3350 	 those of the Linux kernel -- see bug 78512) the first %p
3351 	 in the format string disables any further processing.  */
3352       return false;
3353 
3354     case 'n':
3355       /* %n has side-effects even when nothing is actually printed to
3356 	 any buffer.  */
3357       info.nowrite = false;
3358       dir.fmtfunc = format_none;
3359       break;
3360 
3361     case 'c':
3362       dir.fmtfunc = format_character;
3363       break;
3364 
3365     case 'S':
3366     case 's':
3367       dir.fmtfunc = format_string;
3368       break;
3369 
3370     default:
3371       /* Unknown conversion specification.  */
3372       return 0;
3373     }
3374 
3375   dir.specifier = target_to_host (*pf++);
3376 
3377   /* Store the length of the format directive.  */
3378   dir.len = pf - pcnt;
3379 
3380   /* Buffer for the directive in the host character set (used when
3381      the source character set is different).  */
3382   char hostdir[32];
3383 
3384   if (star_width)
3385     {
3386       if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3387 	dir.set_width (star_width, vr_values);
3388       else
3389 	{
3390 	  /* Width specified by a va_list takes on the range [0, -INT_MIN]
3391 	     (width is the absolute value of that specified).  */
3392 	  dir.width[0] = 0;
3393 	  dir.width[1] = target_int_max () + 1;
3394 	}
3395     }
3396   else
3397     {
3398       if (width == LONG_MAX && werange)
3399 	{
3400 	  size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3401 	  size_t caret = begin + (werange - pcnt);
3402 	  size_t end = pf - info.fmtstr - 1;
3403 
3404 	  /* Create a location for the width part of the directive,
3405 	     pointing the caret at the first out-of-range digit.  */
3406 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3407 				caret, begin, end);
3408 
3409 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3410 		   "%<%.*s%> directive width out of range", (int) dir.len,
3411 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3412 	}
3413 
3414       dir.set_width (width);
3415     }
3416 
3417   if (star_precision)
3418     {
3419       if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3420 	dir.set_precision (star_precision, vr_values);
3421       else
3422 	{
3423 	  /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3424 	     (unlike width, negative precision is ignored).  */
3425 	  dir.prec[0] = -1;
3426 	  dir.prec[1] = target_int_max ();
3427 	}
3428     }
3429   else
3430     {
3431       if (precision == LONG_MAX && perange)
3432 	{
3433 	  size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3434 	  size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3435 	  size_t end = pf - info.fmtstr - 2;
3436 
3437 	  /* Create a location for the precision part of the directive,
3438 	     including the leading period, pointing the caret at the first
3439 	     out-of-range digit .  */
3440 	  substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3441 				caret, begin, end);
3442 
3443 	  fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3444 		   "%<%.*s%> directive precision out of range", (int) dir.len,
3445 		   target_to_host (hostdir, sizeof hostdir, dir.beg));
3446 	}
3447 
3448       dir.set_precision (precision);
3449     }
3450 
3451   /* Extract the argument if the directive takes one and if it's
3452      available (e.g., the function doesn't take a va_list).  Treat
3453      missing arguments the same as va_list, even though they will
3454      have likely already been diagnosed by -Wformat.  */
3455   if (dir.specifier != '%'
3456       && *argno < gimple_call_num_args (info.callstmt))
3457     dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3458 
3459   if (dump_file)
3460     {
3461       fprintf (dump_file,
3462 	       "  Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3463 	       ": \"%.*s\"",
3464 	       dir.dirno,
3465 	       (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3466 	       (int)dir.len, dir.beg);
3467       if (star_width)
3468 	{
3469 	  if (dir.width[0] == dir.width[1])
3470 	    fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3471 		     dir.width[0]);
3472 	  else
3473 	    fprintf (dump_file,
3474 		     ", width in range [" HOST_WIDE_INT_PRINT_DEC
3475 		     ", " HOST_WIDE_INT_PRINT_DEC "]",
3476 		     dir.width[0], dir.width[1]);
3477 	}
3478 
3479       if (star_precision)
3480 	{
3481 	  if (dir.prec[0] == dir.prec[1])
3482 	    fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3483 		     dir.prec[0]);
3484 	  else
3485 	    fprintf (dump_file,
3486 		     ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3487 		     HOST_WIDE_INT_PRINT_DEC "]",
3488 		     dir.prec[0], dir.prec[1]);
3489 	}
3490       fputc ('\n', dump_file);
3491     }
3492 
3493   return dir.len;
3494 }
3495 
3496 /* Compute the length of the output resulting from the call to a formatted
3497    output function described by INFO and store the result of the call in
3498    *RES.  Issue warnings for detected past the end writes.  Return true
3499    if the complete format string has been processed and *RES can be relied
3500    on, false otherwise (e.g., when a unknown or unhandled directive was seen
3501    that caused the processing to be terminated early).  */
3502 
3503 bool
compute_format_length(call_info & info,format_result * res)3504 sprintf_dom_walker::compute_format_length (call_info &info,
3505 					   format_result *res)
3506 {
3507   if (dump_file)
3508     {
3509       location_t callloc = gimple_location (info.callstmt);
3510       fprintf (dump_file, "%s:%i: ",
3511 	       LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3512       print_generic_expr (dump_file, info.func, dump_flags);
3513 
3514       fprintf (dump_file,
3515 	       ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3516 	       ", fmtstr = \"%s\"\n",
3517 	       info.objsize, info.fmtstr);
3518     }
3519 
3520   /* Reset the minimum and maximum byte counters.  */
3521   res->range.min = res->range.max = 0;
3522 
3523   /* No directive has been seen yet so the length of output is bounded
3524      by the known range [0, 0] (with no conversion producing more than
3525      4K bytes) until determined otherwise.  */
3526   res->knownrange = true;
3527   res->under4k = true;
3528   res->floating = false;
3529   res->warned = false;
3530 
3531   /* 1-based directive counter.  */
3532   unsigned dirno = 1;
3533 
3534   /* The variadic argument counter.  */
3535   unsigned argno = info.argidx;
3536 
3537   for (const char *pf = info.fmtstr; ; ++dirno)
3538     {
3539       directive dir = directive ();
3540       dir.dirno = dirno;
3541 
3542       size_t n = parse_directive (info, dir, res, pf, &argno,
3543 				  evrp_range_analyzer.get_vr_values ());
3544 
3545       /* Return failure if the format function fails.  */
3546       if (!format_directive (info, res, dir,
3547 			     evrp_range_analyzer.get_vr_values ()))
3548 	return false;
3549 
3550       /* Return success the directive is zero bytes long and it's
3551 	 the last think in the format string (i.e., it's the terminating
3552 	 nul, which isn't really a directive but handling it as one makes
3553 	 things simpler).  */
3554       if (!n)
3555 	return *pf == '\0';
3556 
3557       pf += n;
3558     }
3559 
3560   /* The complete format string was processed (with or without warnings).  */
3561   return true;
3562 }
3563 
3564 /* Return the size of the object referenced by the expression DEST if
3565    available, or -1 otherwise.  */
3566 
3567 static unsigned HOST_WIDE_INT
get_destination_size(tree dest)3568 get_destination_size (tree dest)
3569 {
3570   /* Initialize object size info before trying to compute it.  */
3571   init_object_sizes ();
3572 
3573   /* Use __builtin_object_size to determine the size of the destination
3574      object.  When optimizing, determine the smallest object (such as
3575      a member array as opposed to the whole enclosing object), otherwise
3576      use type-zero object size to determine the size of the enclosing
3577      object (the function fails without optimization in this type).  */
3578   int ost = optimize > 0;
3579   unsigned HOST_WIDE_INT size;
3580   if (compute_builtin_object_size (dest, ost, &size))
3581     return size;
3582 
3583   return HOST_WIDE_INT_M1U;
3584 }
3585 
3586 /* Return true if the call described by INFO with result RES safe to
3587    optimize (i.e., no undefined behavior), and set RETVAL to the range
3588    of its return values.  */
3589 
3590 static bool
is_call_safe(const sprintf_dom_walker::call_info & info,const format_result & res,bool under4k,unsigned HOST_WIDE_INT retval[2])3591 is_call_safe (const sprintf_dom_walker::call_info &info,
3592 	      const format_result &res, bool under4k,
3593 	      unsigned HOST_WIDE_INT retval[2])
3594 {
3595   if (under4k && !res.under4k)
3596     return false;
3597 
3598   /* The minimum return value.  */
3599   retval[0] = res.range.min;
3600 
3601   /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3602      but in cases involving multibyte characters could be as large as
3603      RES.RANGE.UNLIKELY.  */
3604   retval[1]
3605     = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3606 
3607   /* Adjust the number of bytes which includes the terminating nul
3608      to reflect the return value of the function which does not.
3609      Because the valid range of the function is [INT_MIN, INT_MAX],
3610      a valid range before the adjustment below is [0, INT_MAX + 1]
3611      (the functions only return negative values on error or undefined
3612      behavior).  */
3613   if (retval[0] <= target_int_max () + 1)
3614     --retval[0];
3615   if (retval[1] <= target_int_max () + 1)
3616     --retval[1];
3617 
3618   /* Avoid the return value optimization when the behavior of the call
3619      is undefined either because any directive may have produced 4K or
3620      more of output, or the return value exceeds INT_MAX, or because
3621      the output overflows the destination object (but leave it enabled
3622      when the function is bounded because then the behavior is well-
3623      defined).  */
3624   if (retval[0] == retval[1]
3625       && (info.bounded || retval[0] < info.objsize)
3626       && retval[0] <= target_int_max ())
3627     return true;
3628 
3629   if ((info.bounded || retval[1] < info.objsize)
3630       && (retval[0] < target_int_max ()
3631 	  && retval[1] < target_int_max ()))
3632     return true;
3633 
3634   if (!under4k && (info.bounded || retval[0] < info.objsize))
3635     return true;
3636 
3637   return false;
3638 }
3639 
3640 /* Given a suitable result RES of a call to a formatted output function
3641    described by INFO, substitute the result for the return value of
3642    the call.  The result is suitable if the number of bytes it represents
3643    is known and exact.  A result that isn't suitable for substitution may
3644    have its range set to the range of return values, if that is known.
3645    Return true if the call is removed and gsi_next should not be performed
3646    in the caller.  */
3647 
3648 static bool
try_substitute_return_value(gimple_stmt_iterator * gsi,const sprintf_dom_walker::call_info & info,const format_result & res)3649 try_substitute_return_value (gimple_stmt_iterator *gsi,
3650 			     const sprintf_dom_walker::call_info &info,
3651 			     const format_result &res)
3652 {
3653   tree lhs = gimple_get_lhs (info.callstmt);
3654 
3655   /* Set to true when the entire call has been removed.  */
3656   bool removed = false;
3657 
3658   /* The minimum and maximum return value.  */
3659   unsigned HOST_WIDE_INT retval[2];
3660   bool safe = is_call_safe (info, res, true, retval);
3661 
3662   if (safe
3663       && retval[0] == retval[1]
3664       /* Not prepared to handle possibly throwing calls here; they shouldn't
3665 	 appear in non-artificial testcases, except when the __*_chk routines
3666 	 are badly declared.  */
3667       && !stmt_ends_bb_p (info.callstmt))
3668     {
3669       tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
3670 				retval[0]);
3671 
3672       if (lhs == NULL_TREE && info.nowrite)
3673 	{
3674 	  /* Remove the call to the bounded function with a zero size
3675 	     (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs.  */
3676 	  unlink_stmt_vdef (info.callstmt);
3677 	  gsi_remove (gsi, true);
3678 	  removed = true;
3679 	}
3680       else if (info.nowrite)
3681 	{
3682 	  /* Replace the call to the bounded function with a zero size
3683 	     (e.g., snprintf(0, 0, "%i", 123) with the constant result
3684 	     of the function.  */
3685 	  if (!update_call_from_tree (gsi, cst))
3686 	    gimplify_and_update_call_from_tree (gsi, cst);
3687 	  gimple *callstmt = gsi_stmt (*gsi);
3688 	  update_stmt (callstmt);
3689 	}
3690       else if (lhs)
3691 	{
3692 	  /* Replace the left-hand side of the call with the constant
3693 	     result of the formatted function.  */
3694 	  gimple_call_set_lhs (info.callstmt, NULL_TREE);
3695 	  gimple *g = gimple_build_assign (lhs, cst);
3696 	  gsi_insert_after (gsi, g, GSI_NEW_STMT);
3697 	  update_stmt (info.callstmt);
3698 	}
3699 
3700       if (dump_file)
3701 	{
3702 	  if (removed)
3703 	    fprintf (dump_file, "  Removing call statement.");
3704 	  else
3705 	    {
3706 	      fprintf (dump_file, "  Substituting ");
3707 	      print_generic_expr (dump_file, cst, dump_flags);
3708 	      fprintf (dump_file, " for %s.\n",
3709 		       info.nowrite ? "statement" : "return value");
3710 	    }
3711 	}
3712     }
3713   else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
3714     {
3715       bool setrange = false;
3716 
3717       if (safe
3718 	  && (info.bounded || retval[1] < info.objsize)
3719 	  && (retval[0] < target_int_max ()
3720 	      && retval[1] < target_int_max ()))
3721 	{
3722 	  /* If the result is in a valid range bounded by the size of
3723 	     the destination set it so that it can be used for subsequent
3724 	     optimizations.  */
3725 	  int prec = TYPE_PRECISION (integer_type_node);
3726 
3727 	  wide_int min = wi::shwi (retval[0], prec);
3728 	  wide_int max = wi::shwi (retval[1], prec);
3729 	  set_range_info (lhs, VR_RANGE, min, max);
3730 
3731 	  setrange = true;
3732 	}
3733 
3734       if (dump_file)
3735 	{
3736 	  const char *inbounds
3737 	    = (retval[0] < info.objsize
3738 	       ? (retval[1] < info.objsize
3739 		  ? "in" : "potentially out-of")
3740 	       : "out-of");
3741 
3742 	  const char *what = setrange ? "Setting" : "Discarding";
3743 	  if (retval[0] != retval[1])
3744 	    fprintf (dump_file,
3745 		     "  %s %s-bounds return value range ["
3746 		     HOST_WIDE_INT_PRINT_UNSIGNED ", "
3747 		     HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
3748 		     what, inbounds, retval[0], retval[1]);
3749 	  else
3750 	    fprintf (dump_file, "  %s %s-bounds return value "
3751 		     HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
3752 		     what, inbounds, retval[0]);
3753 	}
3754     }
3755 
3756   if (dump_file)
3757     fputc ('\n', dump_file);
3758 
3759   return removed;
3760 }
3761 
3762 /* Try to simplify a s{,n}printf call described by INFO with result
3763    RES by replacing it with a simpler and presumably more efficient
3764    call (such as strcpy).  */
3765 
3766 static bool
try_simplify_call(gimple_stmt_iterator * gsi,const sprintf_dom_walker::call_info & info,const format_result & res)3767 try_simplify_call (gimple_stmt_iterator *gsi,
3768 		   const sprintf_dom_walker::call_info &info,
3769 		   const format_result &res)
3770 {
3771   unsigned HOST_WIDE_INT dummy[2];
3772   if (!is_call_safe (info, res, info.retval_used (), dummy))
3773     return false;
3774 
3775   switch (info.fncode)
3776     {
3777     case BUILT_IN_SNPRINTF:
3778       return gimple_fold_builtin_snprintf (gsi);
3779 
3780     case BUILT_IN_SPRINTF:
3781       return gimple_fold_builtin_sprintf (gsi);
3782 
3783     default:
3784       ;
3785     }
3786 
3787   return false;
3788 }
3789 
3790 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3791    functions and if so, handle it.  Return true if the call is removed
3792    and gsi_next should not be performed in the caller.  */
3793 
3794 bool
handle_gimple_call(gimple_stmt_iterator * gsi)3795 sprintf_dom_walker::handle_gimple_call (gimple_stmt_iterator *gsi)
3796 {
3797   call_info info = call_info ();
3798 
3799   info.callstmt = gsi_stmt (*gsi);
3800   if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3801     return false;
3802 
3803   info.func = gimple_call_fndecl (info.callstmt);
3804   info.fncode = DECL_FUNCTION_CODE (info.func);
3805 
3806   /* The size of the destination as in snprintf(dest, size, ...).  */
3807   unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3808 
3809   /* The size of the destination determined by __builtin_object_size.  */
3810   unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3811 
3812   /* Buffer size argument number (snprintf and vsnprintf).  */
3813   unsigned HOST_WIDE_INT idx_dstsize = HOST_WIDE_INT_M1U;
3814 
3815   /* Object size argument number (snprintf_chk and vsnprintf_chk).  */
3816   unsigned HOST_WIDE_INT idx_objsize = HOST_WIDE_INT_M1U;
3817 
3818   /* Format string argument number (valid for all functions).  */
3819   unsigned idx_format;
3820 
3821   switch (info.fncode)
3822     {
3823     case BUILT_IN_SPRINTF:
3824       // Signature:
3825       //   __builtin_sprintf (dst, format, ...)
3826       idx_format = 1;
3827       info.argidx = 2;
3828       break;
3829 
3830     case BUILT_IN_SPRINTF_CHK:
3831       // Signature:
3832       //   __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3833       idx_objsize = 2;
3834       idx_format = 3;
3835       info.argidx = 4;
3836       break;
3837 
3838     case BUILT_IN_SNPRINTF:
3839       // Signature:
3840       //   __builtin_snprintf (dst, size, format, ...)
3841       idx_dstsize = 1;
3842       idx_format = 2;
3843       info.argidx = 3;
3844       info.bounded = true;
3845       break;
3846 
3847     case BUILT_IN_SNPRINTF_CHK:
3848       // Signature:
3849       //   __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3850       idx_dstsize = 1;
3851       idx_objsize = 3;
3852       idx_format = 4;
3853       info.argidx = 5;
3854       info.bounded = true;
3855       break;
3856 
3857     case BUILT_IN_VSNPRINTF:
3858       // Signature:
3859       //   __builtin_vsprintf (dst, size, format, va)
3860       idx_dstsize = 1;
3861       idx_format = 2;
3862       info.argidx = -1;
3863       info.bounded = true;
3864       break;
3865 
3866     case BUILT_IN_VSNPRINTF_CHK:
3867       // Signature:
3868       //   __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3869       idx_dstsize = 1;
3870       idx_objsize = 3;
3871       idx_format = 4;
3872       info.argidx = -1;
3873       info.bounded = true;
3874       break;
3875 
3876     case BUILT_IN_VSPRINTF:
3877       // Signature:
3878       //   __builtin_vsprintf (dst, format, va)
3879       idx_format = 1;
3880       info.argidx = -1;
3881       break;
3882 
3883     case BUILT_IN_VSPRINTF_CHK:
3884       // Signature:
3885       //   __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3886       idx_format = 3;
3887       idx_objsize = 2;
3888       info.argidx = -1;
3889       break;
3890 
3891     default:
3892       return false;
3893     }
3894 
3895   /* Set the global warning level for this function.  */
3896   warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3897 
3898   /* The first argument is a pointer to the destination.  */
3899   tree dstptr = gimple_call_arg (info.callstmt, 0);
3900 
3901   info.format = gimple_call_arg (info.callstmt, idx_format);
3902 
3903   /* True when the destination size is constant as opposed to the lower
3904      or upper bound of a range.  */
3905   bool dstsize_cst_p = true;
3906 
3907   if (idx_dstsize == HOST_WIDE_INT_M1U)
3908     {
3909       /* For non-bounded functions like sprintf, determine the size
3910 	 of the destination from the object or pointer passed to it
3911 	 as the first argument.  */
3912       dstsize = get_destination_size (dstptr);
3913     }
3914   else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
3915     {
3916       /* For bounded functions try to get the size argument.  */
3917 
3918       if (TREE_CODE (size) == INTEGER_CST)
3919 	{
3920 	  dstsize = tree_to_uhwi (size);
3921 	  /* No object can be larger than SIZE_MAX bytes (half the address
3922 	     space) on the target.
3923 	     The functions are defined only for output of at most INT_MAX
3924 	     bytes.  Specifying a bound in excess of that limit effectively
3925 	     defeats the bounds checking (and on some implementations such
3926 	     as Solaris cause the function to fail with EINVAL).  */
3927 	  if (dstsize > target_size_max () / 2)
3928 	    {
3929 	      /* Avoid warning if -Wstringop-overflow is specified since
3930 		 it also warns for the same thing though only for the
3931 		 checking built-ins.  */
3932 	      if ((idx_objsize == HOST_WIDE_INT_M1U
3933 		   || !warn_stringop_overflow))
3934 		warning_at (gimple_location (info.callstmt), info.warnopt (),
3935 			    "specified bound %wu exceeds maximum object size "
3936 			    "%wu",
3937 			    dstsize, target_size_max () / 2);
3938 	    }
3939 	  else if (dstsize > target_int_max ())
3940 	    warning_at (gimple_location (info.callstmt), info.warnopt (),
3941 			"specified bound %wu exceeds %<INT_MAX%>",
3942 			dstsize);
3943 	}
3944       else if (TREE_CODE (size) == SSA_NAME)
3945 	{
3946 	  /* Try to determine the range of values of the argument
3947 	     and use the greater of the two at level 1 and the smaller
3948 	     of them at level 2.  */
3949 	  value_range *vr = evrp_range_analyzer.get_value_range (size);
3950 	  if (vr->type == VR_RANGE
3951 	      && TREE_CODE (vr->min) == INTEGER_CST
3952 	      && TREE_CODE (vr->max) == INTEGER_CST)
3953 	    dstsize = (warn_level < 2
3954 		       ? TREE_INT_CST_LOW (vr->max)
3955 		       : TREE_INT_CST_LOW (vr->min));
3956 
3957 	  /* The destination size is not constant.  If the function is
3958 	     bounded (e.g., snprintf) a lower bound of zero doesn't
3959 	     necessarily imply it can be eliminated.  */
3960 	  dstsize_cst_p = false;
3961 	}
3962     }
3963 
3964   if (idx_objsize != HOST_WIDE_INT_M1U)
3965     if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
3966       if (tree_fits_uhwi_p (size))
3967 	objsize = tree_to_uhwi (size);
3968 
3969   if (info.bounded && !dstsize)
3970     {
3971       /* As a special case, when the explicitly specified destination
3972 	 size argument (to a bounded function like snprintf) is zero
3973 	 it is a request to determine the number of bytes on output
3974 	 without actually producing any.  Pretend the size is
3975 	 unlimited in this case.  */
3976       info.objsize = HOST_WIDE_INT_MAX;
3977       info.nowrite = dstsize_cst_p;
3978     }
3979   else
3980     {
3981       /* For calls to non-bounded functions or to those of bounded
3982 	 functions with a non-zero size, warn if the destination
3983 	 pointer is null.  */
3984       if (integer_zerop (dstptr))
3985 	{
3986 	  /* This is diagnosed with -Wformat only when the null is a constant
3987 	     pointer.  The warning here diagnoses instances where the pointer
3988 	     is not constant.  */
3989 	  location_t loc = gimple_location (info.callstmt);
3990 	  warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
3991 		      info.warnopt (), "null destination pointer");
3992 	  return false;
3993 	}
3994 
3995       /* Set the object size to the smaller of the two arguments
3996 	 of both have been specified and they're not equal.  */
3997       info.objsize = dstsize < objsize ? dstsize : objsize;
3998 
3999       if (info.bounded
4000 	  && dstsize < target_size_max () / 2 && objsize < dstsize
4001 	  /* Avoid warning if -Wstringop-overflow is specified since
4002 	     it also warns for the same thing though only for the
4003 	     checking built-ins.  */
4004 	  && (idx_objsize == HOST_WIDE_INT_M1U
4005 	      || !warn_stringop_overflow))
4006 	{
4007 	  warning_at (gimple_location (info.callstmt), info.warnopt (),
4008 		      "specified bound %wu exceeds the size %wu "
4009 		      "of the destination object", dstsize, objsize);
4010 	}
4011     }
4012 
4013   if (integer_zerop (info.format))
4014     {
4015       /* This is diagnosed with -Wformat only when the null is a constant
4016 	 pointer.  The warning here diagnoses instances where the pointer
4017 	 is not constant.  */
4018       location_t loc = gimple_location (info.callstmt);
4019       warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4020 		  info.warnopt (), "null format string");
4021       return false;
4022     }
4023 
4024   info.fmtstr = get_format_string (info.format, &info.fmtloc);
4025   if (!info.fmtstr)
4026     return false;
4027 
4028   /* The result is the number of bytes output by the formatted function,
4029      including the terminating NUL.  */
4030   format_result res = format_result ();
4031 
4032   bool success = compute_format_length (info, &res);
4033 
4034   /* When optimizing and the printf return value optimization is enabled,
4035      attempt to substitute the computed result for the return value of
4036      the call.  Avoid this optimization when -frounding-math is in effect
4037      and the format string contains a floating point directive.  */
4038   bool call_removed = false;
4039   if (success && optimize > 0)
4040     {
4041       /* Save a copy of the iterator pointing at the call.  The iterator
4042 	 may change to point past the call in try_substitute_return_value
4043 	 but the original value is needed in try_simplify_call.  */
4044       gimple_stmt_iterator gsi_call = *gsi;
4045 
4046       if (flag_printf_return_value
4047 	  && (!flag_rounding_math || !res.floating))
4048 	call_removed = try_substitute_return_value (gsi, info, res);
4049 
4050       if (!call_removed)
4051 	try_simplify_call (&gsi_call, info, res);
4052     }
4053 
4054   return call_removed;
4055 }
4056 
4057 edge
before_dom_children(basic_block bb)4058 sprintf_dom_walker::before_dom_children (basic_block bb)
4059 {
4060   evrp_range_analyzer.enter (bb);
4061   for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
4062     {
4063       /* Iterate over statements, looking for function calls.  */
4064       gimple *stmt = gsi_stmt (si);
4065 
4066       /* First record ranges generated by this statement.  */
4067       evrp_range_analyzer.record_ranges_from_stmt (stmt, false);
4068 
4069       if (is_gimple_call (stmt) && handle_gimple_call (&si))
4070 	/* If handle_gimple_call returns true, the iterator is
4071 	   already pointing to the next statement.  */
4072 	continue;
4073 
4074       gsi_next (&si);
4075     }
4076   return NULL;
4077 }
4078 
4079 void
after_dom_children(basic_block bb)4080 sprintf_dom_walker::after_dom_children (basic_block bb)
4081 {
4082   evrp_range_analyzer.leave (bb);
4083 }
4084 
4085 /* Execute the pass for function FUN.  */
4086 
4087 unsigned int
execute(function * fun)4088 pass_sprintf_length::execute (function *fun)
4089 {
4090   init_target_to_host_charmap ();
4091 
4092   calculate_dominance_info (CDI_DOMINATORS);
4093 
4094   sprintf_dom_walker sprintf_dom_walker;
4095   sprintf_dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
4096 
4097   /* Clean up object size info.  */
4098   fini_object_sizes ();
4099   return 0;
4100 }
4101 
4102 }   /* Unnamed namespace.  */
4103 
4104 /* Return a pointer to a pass object newly constructed from the context
4105    CTXT.  */
4106 
4107 gimple_opt_pass *
make_pass_sprintf_length(gcc::context * ctxt)4108 make_pass_sprintf_length (gcc::context *ctxt)
4109 {
4110   return new pass_sprintf_length (ctxt);
4111 }
4112