1 /* String length optimization
2    Copyright (C) 2011-2020 Free Software Foundation, Inc.
3    Contributed by Jakub Jelinek <jakub@redhat.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-restrict.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "expr.h"
42 #include "tree-cfg.h"
43 #include "tree-dfa.h"
44 #include "domwalk.h"
45 #include "tree-ssa-alias.h"
46 #include "tree-ssa-propagate.h"
47 #include "tree-ssa-strlen.h"
48 #include "tree-hash-traits.h"
49 #include "tree-object-size.h"
50 #include "builtins.h"
51 #include "target.h"
52 #include "diagnostic-core.h"
53 #include "diagnostic.h"
54 #include "intl.h"
55 #include "attribs.h"
56 #include "calls.h"
57 #include "cfgloop.h"
58 #include "tree-ssa-loop.h"
59 #include "tree-scalar-evolution.h"
60 #include "vr-values.h"
61 #include "gimple-ssa-evrp-analyze.h"
62 #include "tree-ssa.h"
63 
64 /* A vector indexed by SSA_NAME_VERSION.  0 means unknown, positive value
65    is an index into strinfo vector, negative value stands for
66    string length of a string literal (~strlen).  */
67 static vec<int> ssa_ver_to_stridx;
68 
69 /* Number of currently active string indexes plus one.  */
70 static int max_stridx;
71 
72 /* Set to true to optimize, false when just checking.  */
73 static bool strlen_optimize;
74 
75 /* String information record.  */
76 struct strinfo
77 {
78   /* Number of leading characters that are known to be nonzero.  This is
79      also the length of the string if FULL_STRING_P.
80 
81      The values in a list of related string pointers must be consistent;
82      that is, if strinfo B comes X bytes after strinfo A, it must be
83      the case that A->nonzero_chars == X + B->nonzero_chars.  */
84   tree nonzero_chars;
85   /* Any of the corresponding pointers for querying alias oracle.  */
86   tree ptr;
87   /* STMT is used for two things:
88 
89      - To record the statement that should be used for delayed length
90        computations.  We maintain the invariant that all related strinfos
91        have delayed lengths or none do.
92 
93      - To record the malloc or calloc call that produced this result
94        to optimize away malloc/memset sequences.  STMT is reset after
95        a calloc-allocated object has been stored a non-zero value into.  */
96   gimple *stmt;
97   /* Set to the dynamic allocation statement for the object (alloca,
98      calloc, malloc, or VLA).  Unlike STMT, once set for a strinfo
99      object, ALLOC doesn't change.  */
100   gimple *alloc;
101   /* Pointer to '\0' if known, if NULL, it can be computed as
102      ptr + length.  */
103   tree endptr;
104   /* Reference count.  Any changes to strinfo entry possibly shared
105      with dominating basic blocks need unshare_strinfo first, except
106      for dont_invalidate which affects only the immediately next
107      maybe_invalidate.  */
108   int refcount;
109   /* Copy of index.  get_strinfo (si->idx) should return si;  */
110   int idx;
111   /* These 3 fields are for chaining related string pointers together.
112      E.g. for
113      bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
114      strcpy (c, d); e = c + dl;
115      strinfo(a) -> strinfo(c) -> strinfo(e)
116      All have ->first field equal to strinfo(a)->idx and are doubly
117      chained through prev/next fields.  The later strinfos are required
118      to point into the same string with zero or more bytes after
119      the previous pointer and all bytes in between the two pointers
120      must be non-zero.  Functions like strcpy or memcpy are supposed
121      to adjust all previous strinfo lengths, but not following strinfo
122      lengths (those are uncertain, usually invalidated during
123      maybe_invalidate, except when the alias oracle knows better).
124      Functions like strcat on the other side adjust the whole
125      related strinfo chain.
126      They are updated lazily, so to use the chain the same first fields
127      and si->prev->next == si->idx needs to be verified.  */
128   int first;
129   int next;
130   int prev;
131   /* A flag whether the string is known to be written in the current
132      function.  */
133   bool writable;
134   /* A flag for the next maybe_invalidate that this strinfo shouldn't
135      be invalidated.  Always cleared by maybe_invalidate.  */
136   bool dont_invalidate;
137   /* True if the string is known to be nul-terminated after NONZERO_CHARS
138      characters.  False is useful when detecting strings that are built
139      up via successive memcpys.  */
140   bool full_string_p;
141 };
142 
143 /* Pool for allocating strinfo_struct entries.  */
144 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
145 
146 /* Vector mapping positive string indexes to strinfo, for the
147    current basic block.  The first pointer in the vector is special,
148    it is either NULL, meaning the vector isn't shared, or it is
149    a basic block pointer to the owner basic_block if shared.
150    If some other bb wants to modify the vector, the vector needs
151    to be unshared first, and only the owner bb is supposed to free it.  */
152 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
153 
154 /* One OFFSET->IDX mapping.  */
155 struct stridxlist
156 {
157   struct stridxlist *next;
158   HOST_WIDE_INT offset;
159   int idx;
160 };
161 
162 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings.  */
163 struct decl_stridxlist_map
164 {
165   struct tree_map_base base;
166   struct stridxlist list;
167 };
168 
169 /* Hash table for mapping decls to a chained list of offset -> idx
170    mappings.  */
171 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
172 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
173 
174 /* Hash table mapping strlen (or strnlen with constant bound and return
175    smaller than bound) calls to stridx instances describing
176    the calls' arguments.  Non-null only when warn_stringop_truncation
177    is non-zero.  */
178 typedef std::pair<int, location_t> stridx_strlenloc;
179 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
180 
181 /* Obstack for struct stridxlist and struct decl_stridxlist_map.  */
182 static struct obstack stridx_obstack;
183 
184 /* Last memcpy statement if it could be adjusted if the trailing
185    '\0' written is immediately overwritten, or
186    *x = '\0' store that could be removed if it is immediately overwritten.  */
187 struct laststmt_struct
188 {
189   gimple *stmt;
190   tree len;
191   int stridx;
192 } laststmt;
193 
194 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
195 static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *);
196 
197 /* Sets MINMAX to either the constant value or the range VAL is in
198    and returns either the constant value or VAL on success or null
199    when the range couldn't be determined.  Uses RVALS when nonnull
200    to determine the range, otherwise get_range_info.  */
201 
202 tree
get_range(tree val,wide_int minmax[2],const vr_values * rvals)203 get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */)
204 {
205   if (TREE_CODE (val) == INTEGER_CST)
206     {
207       minmax[0] = minmax[1] = wi::to_wide (val);
208       return val;
209     }
210 
211   if (TREE_CODE (val) != SSA_NAME)
212     return NULL_TREE;
213 
214   if (rvals)
215     {
216       /* The range below may be "inaccurate" if a constant has been
217 	 substituted earlier for VAL by this pass that hasn't been
218 	 propagated through the CFG.  This shoud be fixed by the new
219 	 on-demand VRP if/when it becomes available (hopefully in
220 	 GCC 11).  */
221       const value_range *vr
222 	= (CONST_CAST (class vr_values *, rvals)->get_value_range (val));
223       value_range_kind rng = vr->kind ();
224       if (rng != VR_RANGE || !range_int_cst_p (vr))
225 	return NULL_TREE;
226 
227       minmax[0] = wi::to_wide (vr->min ());
228       minmax[1] = wi::to_wide (vr->max ());
229       return val;
230     }
231 
232   value_range_kind rng = get_range_info (val, minmax, minmax + 1);
233   if (rng == VR_RANGE)
234     return val;
235 
236   /* Do not handle anti-ranges and instead make use of the on-demand
237      VRP if/when it becomes available (hopefully in GCC 11).  */
238   return NULL_TREE;
239 }
240 
241 /* Return:
242 
243    *  +1  if SI is known to start with more than OFF nonzero characters.
244 
245    *   0  if SI is known to start with exactly OFF nonzero characters.
246 
247    *  -1  if SI either does not start with OFF nonzero characters
248 	  or the relationship between the number of leading nonzero
249 	  characters in SI and OFF is unknown.  */
250 
251 static inline int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off)252 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
253 {
254   if (si->nonzero_chars
255       && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
256     return compare_tree_int (si->nonzero_chars, off);
257   else
258     return -1;
259 }
260 
261 /* Same as above but suitable also for strings with non-constant lengths.
262    Uses RVALS to determine length range.  */
263 
264 static int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off,const vr_values * rvals)265 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
266 		       const vr_values *rvals)
267 {
268   if (!si->nonzero_chars)
269     return -1;
270 
271   if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
272     return compare_tree_int (si->nonzero_chars, off);
273 
274   if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
275     return -1;
276 
277   const value_range_equiv *vr
278     = (CONST_CAST (class vr_values *, rvals)
279        ->get_value_range (si->nonzero_chars));
280 
281   value_range_kind rng = vr->kind ();
282   if (rng != VR_RANGE || !range_int_cst_p (vr))
283     return -1;
284 
285   /* If the offset is less than the minimum length or if the bounds
286      of the length range are equal return the result of the comparison
287      same as in the constant case.  Otherwise return a conservative
288      result.  */
289   int cmpmin = compare_tree_int (vr->min (), off);
290   if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ()))
291     return cmpmin;
292 
293   return -1;
294 }
295 
296 /* Return true if SI is known to be a zero-length string.  */
297 
298 static inline bool
zero_length_string_p(strinfo * si)299 zero_length_string_p (strinfo *si)
300 {
301   return si->full_string_p && integer_zerop (si->nonzero_chars);
302 }
303 
304 /* Return strinfo vector entry IDX.  */
305 
306 static inline strinfo *
get_strinfo(int idx)307 get_strinfo (int idx)
308 {
309   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
310     return NULL;
311   return (*stridx_to_strinfo)[idx];
312 }
313 
314 /* Get the next strinfo in the chain after SI, or null if none.  */
315 
316 static inline strinfo *
get_next_strinfo(strinfo * si)317 get_next_strinfo (strinfo *si)
318 {
319   if (si->next == 0)
320     return NULL;
321   strinfo *nextsi = get_strinfo (si->next);
322   if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
323     return NULL;
324   return nextsi;
325 }
326 
327 /* Helper function for get_stridx.  Return the strinfo index of the address
328    of EXP, which is available in PTR if nonnull.  If OFFSET_OUT, it is
329    OK to return the index for some X <= &EXP and store &EXP - X in
330    *OFFSET_OUT.  When RVALS is nonnull uses it to determine range
331    information.  */
332 
333 static int
334 get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out,
335 		 const vr_values *rvals = NULL)
336 {
337   HOST_WIDE_INT off;
338   struct stridxlist *list, *last = NULL;
339   tree base;
340 
341   if (!decl_to_stridxlist_htab)
342     return 0;
343 
344   poly_int64 poff;
345   base = get_addr_base_and_unit_offset (exp, &poff);
346   if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
347     return 0;
348 
349   list = decl_to_stridxlist_htab->get (base);
350   if (list == NULL)
351     return 0;
352 
353   do
354     {
355       if (list->offset == off)
356 	{
357 	  if (offset_out)
358 	    *offset_out = 0;
359 	  return list->idx;
360 	}
361       if (list->offset > off)
362 	return 0;
363       last = list;
364       list = list->next;
365     }
366   while (list);
367 
368   if ((offset_out || ptr) && last && last->idx > 0)
369     {
370       unsigned HOST_WIDE_INT rel_off
371 	= (unsigned HOST_WIDE_INT) off - last->offset;
372       strinfo *si = get_strinfo (last->idx);
373       if (si && compare_nonzero_chars (si, rel_off, rvals) >= 0)
374 	{
375 	  if (offset_out)
376 	    {
377 	      *offset_out = rel_off;
378 	      return last->idx;
379 	    }
380 	  else
381 	    return get_stridx_plus_constant (si, rel_off, ptr);
382 	}
383     }
384   return 0;
385 }
386 
387 /* Returns string index for EXP.  When EXP is an SSA_NAME that refers
388    to a known strinfo with an offset and OFFRNG is non-null, sets
389    both elements of the OFFRNG array to the range of the offset and
390    returns the index of the known strinfo.  In this case the result
391    must not be used in for functions that modify the string.
392    When nonnull, uses RVALS to determine range information.  */
393 
394 static int
395 get_stridx (tree exp, wide_int offrng[2] = NULL, const vr_values *rvals = NULL)
396 {
397   if (offrng)
398     offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
399 
400   if (TREE_CODE (exp) == SSA_NAME)
401     {
402       if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
403 	return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
404 
405       tree e = exp;
406       int last_idx = 0;
407       HOST_WIDE_INT offset = 0;
408       /* Follow a chain of at most 5 assignments.  */
409       for (int i = 0; i < 5; i++)
410 	{
411 	  gimple *def_stmt = SSA_NAME_DEF_STMT (e);
412 	  if (!is_gimple_assign (def_stmt))
413 	    return last_idx;
414 
415 	  tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
416 	  tree ptr, off;
417 
418 	  if (rhs_code == ADDR_EXPR)
419 	    {
420 	      /* Handle indices/offsets into VLAs which are implemented
421 	         as pointers to arrays.  */
422 	      ptr = gimple_assign_rhs1 (def_stmt);
423 	      ptr = TREE_OPERAND (ptr, 0);
424 
425 	      /* Handle also VLAs of types larger than char.  */
426 	      if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
427 		{
428 		  if (TREE_CODE (ptr) == ARRAY_REF)
429 		    {
430 		      off = TREE_OPERAND (ptr, 1);
431 		      ptr = TREE_OPERAND (ptr, 0);
432 		      if (!integer_onep (eltsize))
433 			{
434 			  /* Scale the array index by the size of the element
435 			     type in the rare case that it's greater than
436 			     the typical 1 for char, making sure both operands
437 			     have the same type.  */
438 			  eltsize = fold_convert (ssizetype, eltsize);
439 			  off = fold_convert (ssizetype, off);
440 			  off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
441 			}
442 		    }
443 		  else
444 		    off = integer_zero_node;
445 		}
446 	      else
447 		return 0;
448 
449 	      if (TREE_CODE (ptr) != MEM_REF)
450 	        return 0;
451 
452 	      /* Add the MEM_REF byte offset.  */
453 	      tree mem_off = TREE_OPERAND (ptr, 1);
454 	      off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
455 	      ptr = TREE_OPERAND (ptr, 0);
456 	    }
457 	  else if (rhs_code == POINTER_PLUS_EXPR)
458 	    {
459 	      ptr = gimple_assign_rhs1 (def_stmt);
460 	      off = gimple_assign_rhs2 (def_stmt);
461 	    }
462 	  else
463 	    return 0;
464 
465 	  if (TREE_CODE (ptr) != SSA_NAME)
466 	    return 0;
467 
468 	  if (!tree_fits_shwi_p (off))
469 	    {
470 	      if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
471 		if (offrng)
472 		  {
473 		    /* Only when requested by setting OFFRNG to non-null,
474 		       return the index corresponding to the SSA_NAME.
475 		       Do this irrespective of the whether the offset
476 		       is known.  */
477 		    if (get_range (off, offrng, rvals))
478 		      {
479 			/* When the offset range is known, increment it
480 			   it by the constant offset computed in prior
481 			   iterations and store it in the OFFRNG array.  */
482  			offrng[0] += offset;
483 			offrng[1] += offset;
484 		      }
485 		    else
486 		      {
487 			/* When the offset range cannot be determined
488 			   store [0, SIZE_MAX] and let the caller decide
489 			   if the offset matters.  */
490 			offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
491 			offrng[0] = wi::zero (offrng[1].get_precision ());
492 		      }
493 		    return idx;
494 		  }
495 	      return 0;
496 	    }
497 
498 	  HOST_WIDE_INT this_off = tree_to_shwi (off);
499 	  if (offrng)
500 	    {
501 	      offrng[0] += wi::shwi (this_off, offrng->get_precision ());
502 	      offrng[1] += offrng[0];
503 	    }
504 
505 	  if (this_off < 0)
506 	    return last_idx;
507 
508 	  offset = (unsigned HOST_WIDE_INT) offset + this_off;
509 	  if (offset < 0)
510 	    return last_idx;
511 
512 	  if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
513 	    {
514 	      strinfo *si = get_strinfo (idx);
515 	      if (si)
516 		{
517 		  if (compare_nonzero_chars (si, offset) >= 0)
518 		    return get_stridx_plus_constant (si, offset, exp);
519 
520 		  if (offrng)
521 		    last_idx = idx;
522 		}
523 	    }
524 	  e = ptr;
525 	}
526 
527       return last_idx;
528     }
529 
530   if (TREE_CODE (exp) == ADDR_EXPR)
531     {
532       int idx = get_addr_stridx (TREE_OPERAND (exp, 0), exp, NULL);
533       if (idx != 0)
534 	return idx;
535     }
536 
537   const char *p = c_getstr (exp);
538   if (p)
539     return ~(int) strlen (p);
540 
541   return 0;
542 }
543 
544 /* Return true if strinfo vector is shared with the immediate dominator.  */
545 
546 static inline bool
strinfo_shared(void)547 strinfo_shared (void)
548 {
549   return vec_safe_length (stridx_to_strinfo)
550 	 && (*stridx_to_strinfo)[0] != NULL;
551 }
552 
553 /* Unshare strinfo vector that is shared with the immediate dominator.  */
554 
555 static void
unshare_strinfo_vec(void)556 unshare_strinfo_vec (void)
557 {
558   strinfo *si;
559   unsigned int i = 0;
560 
561   gcc_assert (strinfo_shared ());
562   stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
563   for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
564     if (si != NULL)
565       si->refcount++;
566   (*stridx_to_strinfo)[0] = NULL;
567 }
568 
569 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
570    Return a pointer to the location where the string index can
571    be stored (if 0) or is stored, or NULL if this can't be tracked.  */
572 
573 static int *
addr_stridxptr(tree exp)574 addr_stridxptr (tree exp)
575 {
576   HOST_WIDE_INT off;
577 
578   poly_int64 poff;
579   tree base = get_addr_base_and_unit_offset (exp, &poff);
580   if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
581     return NULL;
582 
583   if (!decl_to_stridxlist_htab)
584     {
585       decl_to_stridxlist_htab
586        	= new hash_map<tree_decl_hash, stridxlist> (64);
587       gcc_obstack_init (&stridx_obstack);
588     }
589 
590   bool existed;
591   stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
592   if (existed)
593     {
594       int i;
595       stridxlist *before = NULL;
596       for (i = 0; i < 32; i++)
597 	{
598 	  if (list->offset == off)
599 	    return &list->idx;
600 	  if (list->offset > off && before == NULL)
601 	    before = list;
602 	  if (list->next == NULL)
603 	    break;
604 	  list = list->next;
605 	}
606       if (i == 32)
607 	return NULL;
608       if (before)
609 	{
610 	  list = before;
611 	  before = XOBNEW (&stridx_obstack, struct stridxlist);
612 	  *before = *list;
613 	  list->next = before;
614 	  list->offset = off;
615 	  list->idx = 0;
616 	  return &list->idx;
617 	}
618       list->next = XOBNEW (&stridx_obstack, struct stridxlist);
619       list = list->next;
620     }
621 
622   list->next = NULL;
623   list->offset = off;
624   list->idx = 0;
625   return &list->idx;
626 }
627 
628 /* Create a new string index, or return 0 if reached limit.  */
629 
630 static int
new_stridx(tree exp)631 new_stridx (tree exp)
632 {
633   int idx;
634   if (max_stridx >= param_max_tracked_strlens)
635     return 0;
636   if (TREE_CODE (exp) == SSA_NAME)
637     {
638       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
639 	return 0;
640       idx = max_stridx++;
641       ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
642       return idx;
643     }
644   if (TREE_CODE (exp) == ADDR_EXPR)
645     {
646       int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
647       if (pidx != NULL)
648 	{
649 	  gcc_assert (*pidx == 0);
650 	  *pidx = max_stridx++;
651 	  return *pidx;
652 	}
653     }
654   return 0;
655 }
656 
657 /* Like new_stridx, but for ADDR_EXPR's operand instead.  */
658 
659 static int
new_addr_stridx(tree exp)660 new_addr_stridx (tree exp)
661 {
662   int *pidx;
663   if (max_stridx >= param_max_tracked_strlens)
664     return 0;
665   pidx = addr_stridxptr (exp);
666   if (pidx != NULL)
667     {
668       gcc_assert (*pidx == 0);
669       *pidx = max_stridx++;
670       return *pidx;
671     }
672   return 0;
673 }
674 
675 /* Create a new strinfo.  */
676 
677 static strinfo *
new_strinfo(tree ptr,int idx,tree nonzero_chars,bool full_string_p)678 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
679 {
680   strinfo *si = strinfo_pool.allocate ();
681   si->nonzero_chars = nonzero_chars;
682   STRIP_USELESS_TYPE_CONVERSION (ptr);
683   si->ptr = ptr;
684   si->stmt = NULL;
685   si->alloc = NULL;
686   si->endptr = NULL_TREE;
687   si->refcount = 1;
688   si->idx = idx;
689   si->first = 0;
690   si->prev = 0;
691   si->next = 0;
692   si->writable = false;
693   si->dont_invalidate = false;
694   si->full_string_p = full_string_p;
695   return si;
696 }
697 
698 /* Decrease strinfo refcount and free it if not referenced anymore.  */
699 
700 static inline void
free_strinfo(strinfo * si)701 free_strinfo (strinfo *si)
702 {
703   if (si && --si->refcount == 0)
704     strinfo_pool.remove (si);
705 }
706 
707 /* Set strinfo in the vector entry IDX to SI.  */
708 
709 static inline void
set_strinfo(int idx,strinfo * si)710 set_strinfo (int idx, strinfo *si)
711 {
712   if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
713     unshare_strinfo_vec ();
714   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
715     vec_safe_grow_cleared (stridx_to_strinfo, idx + 1);
716   (*stridx_to_strinfo)[idx] = si;
717 }
718 
719 /* Return the first strinfo in the related strinfo chain
720    if all strinfos in between belong to the chain, otherwise NULL.  */
721 
722 static strinfo *
verify_related_strinfos(strinfo * origsi)723 verify_related_strinfos (strinfo *origsi)
724 {
725   strinfo *si = origsi, *psi;
726 
727   if (origsi->first == 0)
728     return NULL;
729   for (; si->prev; si = psi)
730     {
731       if (si->first != origsi->first)
732 	return NULL;
733       psi = get_strinfo (si->prev);
734       if (psi == NULL)
735 	return NULL;
736       if (psi->next != si->idx)
737 	return NULL;
738     }
739   if (si->idx != si->first)
740     return NULL;
741   return si;
742 }
743 
744 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
745    Use LOC for folding.  */
746 
747 static void
set_endptr_and_length(location_t loc,strinfo * si,tree endptr)748 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
749 {
750   si->endptr = endptr;
751   si->stmt = NULL;
752   tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
753   tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
754   si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
755 				       end_as_size, start_as_size);
756   si->full_string_p = true;
757 }
758 
759 /* Return the string length, or NULL if it can't be computed.
760    The length may but need not be constant.  Instead, it might be
761    the result of a strlen() call.  */
762 
763 static tree
get_string_length(strinfo * si)764 get_string_length (strinfo *si)
765 {
766   /* If the length has already been computed return it if it's exact
767      (i.e., the string is nul-terminated at NONZERO_CHARS), or return
768      null if it isn't.  */
769   if (si->nonzero_chars)
770     return si->full_string_p ? si->nonzero_chars : NULL;
771 
772   /* If the string is the result of one of the built-in calls below
773      attempt to compute the length from the call statement.  */
774   if (si->stmt)
775     {
776       gimple *stmt = si->stmt, *lenstmt;
777       tree callee, lhs, fn, tem;
778       location_t loc;
779       gimple_stmt_iterator gsi;
780 
781       gcc_assert (is_gimple_call (stmt));
782       callee = gimple_call_fndecl (stmt);
783       gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
784       lhs = gimple_call_lhs (stmt);
785       /* unshare_strinfo is intentionally not called here.  The (delayed)
786 	 transformation of strcpy or strcat into stpcpy is done at the place
787 	 of the former strcpy/strcat call and so can affect all the strinfos
788 	 with the same stmt.  If they were unshared before and transformation
789 	 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
790 	 just compute the right length.  */
791       switch (DECL_FUNCTION_CODE (callee))
792 	{
793 	case BUILT_IN_STRCAT:
794 	case BUILT_IN_STRCAT_CHK:
795 	  gsi = gsi_for_stmt (stmt);
796 	  fn = builtin_decl_implicit (BUILT_IN_STRLEN);
797 	  gcc_assert (lhs == NULL_TREE);
798 	  tem = unshare_expr (gimple_call_arg (stmt, 0));
799 	  lenstmt = gimple_build_call (fn, 1, tem);
800 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
801 	  gimple_call_set_lhs (lenstmt, lhs);
802 	  gimple_set_vuse (lenstmt, gimple_vuse (stmt));
803 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
804 	  tem = gimple_call_arg (stmt, 0);
805           if (!ptrofftype_p (TREE_TYPE (lhs)))
806             {
807               lhs = convert_to_ptrofftype (lhs);
808               lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
809                                               true, GSI_SAME_STMT);
810             }
811 	  lenstmt = gimple_build_assign
812 			(make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
813 			 POINTER_PLUS_EXPR,tem, lhs);
814 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
815 	  gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
816 	  lhs = NULL_TREE;
817 	  /* FALLTHRU */
818 	case BUILT_IN_STRCPY:
819 	case BUILT_IN_STRCPY_CHK:
820 	  gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
821 	  if (gimple_call_num_args (stmt) == 2)
822 	    fn = builtin_decl_implicit (BUILT_IN_STPCPY);
823 	  else
824 	    fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
825 	  gcc_assert (lhs == NULL_TREE);
826 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
827 	    {
828 	      fprintf (dump_file, "Optimizing: ");
829 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
830 	    }
831 	  gimple_call_set_fndecl (stmt, fn);
832 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
833 	  gimple_call_set_lhs (stmt, lhs);
834 	  update_stmt (stmt);
835 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
836 	    {
837 	      fprintf (dump_file, "into: ");
838 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
839 	    }
840 	  /* FALLTHRU */
841 	case BUILT_IN_STPCPY:
842 	case BUILT_IN_STPCPY_CHK:
843 	  gcc_assert (lhs != NULL_TREE);
844 	  loc = gimple_location (stmt);
845 	  set_endptr_and_length (loc, si, lhs);
846 	  for (strinfo *chainsi = verify_related_strinfos (si);
847 	       chainsi != NULL;
848 	       chainsi = get_next_strinfo (chainsi))
849 	    if (chainsi->nonzero_chars == NULL)
850 	      set_endptr_and_length (loc, chainsi, lhs);
851 	  break;
852 	case BUILT_IN_ALLOCA:
853 	case BUILT_IN_ALLOCA_WITH_ALIGN:
854 	case BUILT_IN_MALLOC:
855 	  break;
856 	/* BUILT_IN_CALLOC always has si->nonzero_chars set.  */
857 	default:
858 	  gcc_unreachable ();
859 	  break;
860 	}
861     }
862 
863   return si->nonzero_chars;
864 }
865 
866 /* Dump strlen data to FP for statement STMT.  When non-null, RVALS
867    points to EVRP info and is used to dump strlen range for non-constant
868    results.  */
869 
870 DEBUG_FUNCTION void
dump_strlen_info(FILE * fp,gimple * stmt,const vr_values * rvals)871 dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals)
872 {
873   if (stmt)
874     {
875       fprintf (fp, "\nDumping strlen pass data after ");
876       print_gimple_expr (fp, stmt, TDF_LINENO);
877       fputc ('\n', fp);
878     }
879   else
880     fprintf (fp, "\nDumping strlen pass data\n");
881 
882   fprintf (fp, "max_stridx = %i\n", max_stridx);
883   fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
884 	   ssa_ver_to_stridx.length ());
885   fprintf (fp, "stridx_to_strinfo");
886   if (stridx_to_strinfo)
887     {
888       fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
889       for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
890 	{
891 	  if (strinfo *si = (*stridx_to_strinfo)[i])
892 	    {
893 	      if (!si->idx)
894 		continue;
895 	      fprintf (fp, "  idx = %i", si->idx);
896 	      if (si->ptr)
897 		{
898 		  fprintf (fp, ", ptr = ");
899 		  print_generic_expr (fp, si->ptr);
900 		}
901 
902 	      if (si->nonzero_chars)
903 		{
904 		  fprintf (fp, ", nonzero_chars = ");
905 		  print_generic_expr (fp, si->nonzero_chars);
906 		  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
907 		    {
908 		      value_range_kind rng = VR_UNDEFINED;
909 		      wide_int min, max;
910 		      if (rvals)
911 			{
912 			  const value_range *vr
913 			    = CONST_CAST (class vr_values *, rvals)
914 			    ->get_value_range (si->nonzero_chars);
915 			  rng = vr->kind ();
916 			  if (range_int_cst_p (vr))
917 			    {
918 			      min = wi::to_wide (vr->min ());
919 			      max = wi::to_wide (vr->max ());
920 			    }
921 			  else
922 			    rng = VR_UNDEFINED;
923 			}
924 		      else
925 			rng = get_range_info (si->nonzero_chars, &min, &max);
926 
927 		      if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
928 			{
929 			  fprintf (fp, " %s[%llu, %llu]",
930 				   rng == VR_RANGE ? "" : "~",
931 				   (long long) min.to_uhwi (),
932 				   (long long) max.to_uhwi ());
933 			}
934 		    }
935 		}
936 
937 	      fprintf (fp, ", refcount = %i", si->refcount);
938 	      if (si->stmt)
939 		{
940 		  fprintf (fp, ", stmt = ");
941 		  print_gimple_expr (fp, si->stmt, 0);
942 		}
943 	      if (si->alloc)
944 		{
945 		  fprintf (fp, ", alloc = ");
946 		  print_gimple_expr (fp, si->alloc, 0);
947 		}
948 	      if (si->writable)
949 		fprintf (fp, ", writable");
950 	      if (si->dont_invalidate)
951 		fprintf (fp, ", dont_invalidate");
952 	      if (si->full_string_p)
953 		fprintf (fp, ", full_string_p");
954 	      if (strinfo *next = get_next_strinfo (si))
955 		{
956 		  fprintf (fp, ", {");
957 		  do
958 		    fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
959 		  while ((next = get_next_strinfo (next)));
960 		  fprintf (fp, "}");
961 		}
962 	      fputs ("\n", fp);
963 	    }
964 	}
965     }
966   else
967     fprintf (fp, " = null\n");
968 
969   fprintf (fp, "decl_to_stridxlist_htab");
970   if (decl_to_stridxlist_htab)
971     {
972       fputs ("\n", fp);
973       typedef decl_to_stridxlist_htab_t::iterator iter_t;
974       for (iter_t it = decl_to_stridxlist_htab->begin ();
975 	   it != decl_to_stridxlist_htab->end (); ++it)
976 	{
977 	  tree decl = (*it).first;
978 	  stridxlist *list = &(*it).second;
979 	  fprintf (fp, "  decl = ");
980 	  print_generic_expr (fp, decl);
981 	  if (list)
982 	    {
983 	      fprintf (fp, ", offsets = {");
984 	      for (; list; list = list->next)
985 		fprintf (fp, "%lli%s", (long long) list->offset,
986 			 list->next ? ", " : "");
987 	      fputs ("}", fp);
988 	    }
989 	  fputs ("\n", fp);
990 	}
991     }
992   else
993     fprintf (fp, " = null\n");
994 
995   if (laststmt.stmt)
996     {
997       fprintf (fp, "laststmt = ");
998       print_gimple_expr (fp, laststmt.stmt, 0);
999       fprintf (fp, ", len = ");
1000       print_generic_expr (fp, laststmt.len);
1001       fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1002     }
1003 }
1004 
1005 /* Attempt to determine the length of the string SRC.  On success, store
1006    the length in *PDATA and return true.  Otherwise, return false.
1007    VISITED is a bitmap of visited PHI nodes.  RVALS points to EVRP info
1008    and PSSA_DEF_MAX to an SSA_NAME assignment limit used to prevent runaway
1009    recursion.  */
1010 
1011 static bool
get_range_strlen_dynamic(tree src,c_strlen_data * pdata,bitmap * visited,const vr_values * rvals,unsigned * pssa_def_max)1012 get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
1013 			  const vr_values *rvals, unsigned *pssa_def_max)
1014 {
1015   int idx = get_stridx (src);
1016   if (!idx)
1017     {
1018       if (TREE_CODE (src) == SSA_NAME)
1019 	{
1020 	  gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1021 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
1022 	    {
1023 	      if (!*visited)
1024 		*visited = BITMAP_ALLOC (NULL);
1025 
1026 	      if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1027 		return true;
1028 
1029 	      if (*pssa_def_max == 0)
1030 		return false;
1031 
1032 	      --*pssa_def_max;
1033 
1034 	      /* Iterate over the PHI arguments and determine the minimum
1035 		 and maximum length/size of each and incorporate them into
1036 		 the overall result.  */
1037 	      gphi *phi = as_a <gphi *> (def_stmt);
1038 	      for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1039 		{
1040 		  tree arg = gimple_phi_arg_def (phi, i);
1041 		  if (arg == gimple_phi_result (def_stmt))
1042 		    continue;
1043 
1044 		  c_strlen_data argdata = { };
1045 		  if (get_range_strlen_dynamic (arg, &argdata, visited, rvals,
1046 						pssa_def_max))
1047 		    {
1048 		      /* Set the DECL of an unterminated array this argument
1049 			 refers to if one hasn't been found yet.  */
1050 		      if (!pdata->decl && argdata.decl)
1051 			pdata->decl = argdata.decl;
1052 
1053 		      if (!argdata.minlen
1054 			  || (integer_zerop (argdata.minlen)
1055 			      && (!argdata.maxbound
1056 				  || integer_all_onesp (argdata.maxbound))
1057 			      && integer_all_onesp (argdata.maxlen)))
1058 			{
1059 			  /* Set the upper bound of the length to unbounded.  */
1060 			  pdata->maxlen = build_all_ones_cst (size_type_node);
1061 			  continue;
1062 			}
1063 
1064 		      /* Adjust the minimum and maximum length determined
1065 			 so far and the upper bound on the array size.  */
1066 		      if (!pdata->minlen
1067 			  || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1068 			pdata->minlen = argdata.minlen;
1069 		      if (!pdata->maxlen
1070 			  || (argdata.maxlen
1071 			      && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1072 			pdata->maxlen = argdata.maxlen;
1073 		      if (!pdata->maxbound
1074 			  || TREE_CODE (pdata->maxbound) != INTEGER_CST
1075 			  || (argdata.maxbound
1076 			      && tree_int_cst_lt (pdata->maxbound,
1077 						  argdata.maxbound)
1078 			      && !integer_all_onesp (argdata.maxbound)))
1079 			pdata->maxbound = argdata.maxbound;
1080 		    }
1081 		  else
1082 		    pdata->maxlen = build_all_ones_cst (size_type_node);
1083 		}
1084 
1085 	      return true;
1086 	    }
1087 	}
1088 
1089       /* Return success regardless of the result and handle *PDATA
1090 	 in the caller.  */
1091       get_range_strlen (src, pdata, 1);
1092       return true;
1093     }
1094 
1095   if (idx < 0)
1096     {
1097       /* SRC is a string of constant length.  */
1098       pdata->minlen = build_int_cst (size_type_node, ~idx);
1099       pdata->maxlen = pdata->minlen;
1100       pdata->maxbound = pdata->maxlen;
1101       return true;
1102     }
1103 
1104   if (strinfo *si = get_strinfo (idx))
1105     {
1106       pdata->minlen = get_string_length (si);
1107       if (!pdata->minlen && si->nonzero_chars)
1108 	{
1109 	  if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1110 	    pdata->minlen = si->nonzero_chars;
1111 	  else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1112 	    {
1113 	      const value_range_equiv *vr
1114 		= CONST_CAST (class vr_values *, rvals)
1115 		->get_value_range (si->nonzero_chars);
1116 	      if (vr->kind () == VR_RANGE
1117 		  && range_int_cst_p (vr))
1118 		{
1119 		  pdata->minlen = vr->min ();
1120 		  pdata->maxlen = vr->max ();
1121 		}
1122 	      else
1123 		pdata->minlen = build_zero_cst (size_type_node);
1124 	    }
1125 	  else
1126 	    pdata->minlen = build_zero_cst (size_type_node);
1127 
1128 	  tree base = si->ptr;
1129 	  if (TREE_CODE (base) == ADDR_EXPR)
1130 	    base = TREE_OPERAND (base, 0);
1131 
1132 	  HOST_WIDE_INT off;
1133 	  poly_int64 poff;
1134 	  base = get_addr_base_and_unit_offset (base, &poff);
1135 	  if (base
1136 	      && DECL_P (base)
1137 	      && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1138 	      && TYPE_SIZE_UNIT (TREE_TYPE (base))
1139 	      && poff.is_constant (&off))
1140 	    {
1141 	      tree basetype = TREE_TYPE (base);
1142 	      tree size = TYPE_SIZE_UNIT (basetype);
1143 	      if (TREE_CODE (size) == INTEGER_CST)
1144 		{
1145 		  ++off;   /* Increment for the terminating nul.  */
1146 		  tree toffset = build_int_cst (size_type_node, off);
1147 		  pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1148 					       toffset);
1149 		  pdata->maxbound = pdata->maxlen;
1150 		}
1151 	      else
1152 		pdata->maxlen = build_all_ones_cst (size_type_node);
1153 	    }
1154 	  else
1155 	    pdata->maxlen = build_all_ones_cst (size_type_node);
1156 	}
1157       else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1158 	{
1159 	  const value_range_equiv *vr
1160 	    = CONST_CAST (class vr_values *, rvals)
1161 	    ->get_value_range (si->nonzero_chars);
1162 	  if (vr->kind () == VR_RANGE
1163 	      && range_int_cst_p (vr))
1164 	    {
1165 	      pdata->minlen = vr->min ();
1166 	      pdata->maxlen = vr->max ();
1167 	      pdata->maxbound = pdata->maxlen;
1168 	    }
1169 	  else
1170 	    {
1171 	      pdata->minlen = build_zero_cst (size_type_node);
1172 	      pdata->maxlen = build_all_ones_cst (size_type_node);
1173 	    }
1174 	}
1175       else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1176 	{
1177 	  pdata->maxlen = pdata->minlen;
1178 	  pdata->maxbound = pdata->minlen;
1179 	}
1180       else
1181 	{
1182 	  /* For PDATA->MINLEN that's a non-constant expression such
1183 	     as PLUS_EXPR whose value range is unknown, set the bounds
1184 	     to zero and SIZE_MAX.  */
1185 	  pdata->minlen = build_zero_cst (size_type_node);
1186 	  pdata->maxlen = build_all_ones_cst (size_type_node);
1187 	}
1188 
1189       return true;
1190     }
1191 
1192   return false;
1193 }
1194 
1195 /* Analogous to get_range_strlen but for dynamically created strings,
1196    i.e., those created by calls to strcpy as opposed to just string
1197    constants.
1198    Try to obtain the range of the lengths of the string(s) referenced
1199    by SRC, or the size of the largest array SRC refers to if the range
1200    of lengths cannot be determined, and store all in *PDATA.  RVALS
1201    points to EVRP info.  */
1202 
1203 void
get_range_strlen_dynamic(tree src,c_strlen_data * pdata,const vr_values * rvals)1204 get_range_strlen_dynamic (tree src, c_strlen_data *pdata,
1205 			  const vr_values *rvals)
1206 {
1207   bitmap visited = NULL;
1208   tree maxbound = pdata->maxbound;
1209 
1210   unsigned limit = param_ssa_name_def_chain_limit;
1211   if (!get_range_strlen_dynamic (src, pdata, &visited, rvals, &limit))
1212     {
1213       /* On failure extend the length range to an impossible maximum
1214 	 (a valid MAXLEN must be less than PTRDIFF_MAX - 1).  Other
1215 	 members can stay unchanged regardless.  */
1216       pdata->minlen = ssize_int (0);
1217       pdata->maxlen = build_all_ones_cst (size_type_node);
1218     }
1219   else if (!pdata->minlen)
1220     pdata->minlen = ssize_int (0);
1221 
1222   /* If it's unchanged from it initial non-null value, set the conservative
1223      MAXBOUND to SIZE_MAX.  Otherwise leave it null (if it is null).  */
1224   if (maxbound && pdata->maxbound == maxbound)
1225     pdata->maxbound = build_all_ones_cst (size_type_node);
1226 
1227   if (visited)
1228     BITMAP_FREE (visited);
1229 }
1230 
1231 /* Invalidate string length information for strings whose length might
1232    change due to stores in STMT, except those marked DONT_INVALIDATE.
1233    For string-modifying statements, ZERO_WRITE is set when the statement
1234    wrote only zeros.
1235    Returns true if any STRIDX_TO_STRINFO entries were considered
1236    for invalidation.  */
1237 
1238 static bool
1239 maybe_invalidate (gimple *stmt, bool zero_write = false)
1240 {
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "%s called for ", __func__);
1244       print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1245     }
1246 
1247   strinfo *si;
1248   bool nonempty = false;
1249 
1250   for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1251     {
1252       if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1253 	continue;
1254 
1255       nonempty = true;
1256 
1257       /* Unconditionally reset DONT_INVALIDATE.  */
1258       bool dont_invalidate = si->dont_invalidate;
1259       si->dont_invalidate = false;
1260 
1261       if (dont_invalidate)
1262 	continue;
1263 
1264       ao_ref r;
1265       tree size = NULL_TREE;
1266       if (si->nonzero_chars)
1267 	{
1268 	  /* Include the terminating nul in the size of the string
1269 	     to consider when determining possible clobber.  */
1270 	  tree type = TREE_TYPE (si->nonzero_chars);
1271 	  size = fold_build2 (PLUS_EXPR, type, si->nonzero_chars,
1272 			      build_int_cst (type, 1));
1273 	}
1274       ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1275       if (stmt_may_clobber_ref_p_1 (stmt, &r))
1276 	{
1277 	  if (dump_file && (dump_flags & TDF_DETAILS))
1278 	    {
1279 	      fputs ("  statement may clobber object ", dump_file);
1280 	      print_generic_expr (dump_file, si->ptr);
1281 	      if (size && tree_fits_uhwi_p (size))
1282 		fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1283 			 " bytes in size", tree_to_uhwi (size));
1284 	      fputc ('\n', dump_file);
1285 	    }
1286 
1287 	  set_strinfo (i, NULL);
1288 	  free_strinfo (si);
1289 	  continue;
1290 	}
1291 
1292       if (size
1293 	  && !zero_write
1294 	  && si->stmt
1295 	  && is_gimple_call (si->stmt)
1296 	  && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1297 	      == BUILT_IN_CALLOC))
1298 	{
1299 	  /* If the clobber test above considered the length of
1300 	     the string (including the nul), then for (potentially)
1301 	     non-zero writes that might modify storage allocated by
1302 	     calloc consider the whole object and if it might be
1303 	     clobbered by the statement reset the statement.  */
1304 	  ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1305 	  if (stmt_may_clobber_ref_p_1 (stmt, &r))
1306 	    si->stmt = NULL;
1307 	}
1308     }
1309 
1310   if (dump_file && (dump_flags & TDF_DETAILS))
1311     fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1312 
1313   return nonempty;
1314 }
1315 
1316 /* Unshare strinfo record SI, if it has refcount > 1 or
1317    if stridx_to_strinfo vector is shared with some other
1318    bbs.  */
1319 
1320 static strinfo *
unshare_strinfo(strinfo * si)1321 unshare_strinfo (strinfo *si)
1322 {
1323   strinfo *nsi;
1324 
1325   if (si->refcount == 1 && !strinfo_shared ())
1326     return si;
1327 
1328   nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1329   nsi->stmt = si->stmt;
1330   nsi->alloc = si->alloc;
1331   nsi->endptr = si->endptr;
1332   nsi->first = si->first;
1333   nsi->prev = si->prev;
1334   nsi->next = si->next;
1335   nsi->writable = si->writable;
1336   set_strinfo (si->idx, nsi);
1337   free_strinfo (si);
1338   return nsi;
1339 }
1340 
1341 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1342    strinfo if there is any.  Return it's idx, or 0 if no strinfo has
1343    been created.  */
1344 
1345 static int
get_stridx_plus_constant(strinfo * basesi,unsigned HOST_WIDE_INT off,tree ptr)1346 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1347 			  tree ptr)
1348 {
1349   if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1350     return 0;
1351 
1352   if (compare_nonzero_chars (basesi, off) < 0
1353       || !tree_fits_uhwi_p (basesi->nonzero_chars))
1354     return 0;
1355 
1356   unsigned HOST_WIDE_INT nonzero_chars
1357     = tree_to_uhwi (basesi->nonzero_chars) - off;
1358   strinfo *si = basesi, *chainsi;
1359   if (si->first || si->prev || si->next)
1360     si = verify_related_strinfos (basesi);
1361   if (si == NULL
1362       || si->nonzero_chars == NULL_TREE
1363       || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1364     return 0;
1365 
1366   if (TREE_CODE (ptr) == SSA_NAME
1367       && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1368     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1369 
1370   gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1371   for (chainsi = si; chainsi->next; chainsi = si)
1372     {
1373       si = get_next_strinfo (chainsi);
1374       if (si == NULL
1375 	  || si->nonzero_chars == NULL_TREE
1376 	  || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1377 	break;
1378       int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1379       if (r != 1)
1380 	{
1381 	  if (r == 0)
1382 	    {
1383 	      if (TREE_CODE (ptr) == SSA_NAME)
1384 		ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1385 	      else
1386 		{
1387 		  int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1388 		  if (pidx != NULL && *pidx == 0)
1389 		    *pidx = si->idx;
1390 		}
1391 	      return si->idx;
1392 	    }
1393 	  break;
1394 	}
1395     }
1396 
1397   int idx = new_stridx (ptr);
1398   if (idx == 0)
1399     return 0;
1400   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1401 		    basesi->full_string_p);
1402   set_strinfo (idx, si);
1403   if (strinfo *nextsi = get_strinfo (chainsi->next))
1404     {
1405       nextsi = unshare_strinfo (nextsi);
1406       si->next = nextsi->idx;
1407       nextsi->prev = idx;
1408     }
1409   chainsi = unshare_strinfo (chainsi);
1410   if (chainsi->first == 0)
1411     chainsi->first = chainsi->idx;
1412   chainsi->next = idx;
1413   if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1414     chainsi->endptr = ptr;
1415   si->endptr = chainsi->endptr;
1416   si->prev = chainsi->idx;
1417   si->first = chainsi->first;
1418   si->writable = chainsi->writable;
1419   return si->idx;
1420 }
1421 
1422 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1423    to a zero-length string and if possible chain it to a related strinfo
1424    chain whose part is or might be CHAINSI.  */
1425 
1426 static strinfo *
zero_length_string(tree ptr,strinfo * chainsi)1427 zero_length_string (tree ptr, strinfo *chainsi)
1428 {
1429   strinfo *si;
1430   int idx;
1431   if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1432     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1433   gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1434 		       && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1435 
1436   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1437     return NULL;
1438   if (chainsi != NULL)
1439     {
1440       si = verify_related_strinfos (chainsi);
1441       if (si)
1442 	{
1443 	  do
1444 	    {
1445 	      /* We shouldn't mix delayed and non-delayed lengths.  */
1446 	      gcc_assert (si->full_string_p);
1447 	      if (si->endptr == NULL_TREE)
1448 		{
1449 		  si = unshare_strinfo (si);
1450 		  si->endptr = ptr;
1451 		}
1452 	      chainsi = si;
1453 	      si = get_next_strinfo (si);
1454 	    }
1455 	  while (si != NULL);
1456 	  if (zero_length_string_p (chainsi))
1457 	    {
1458 	      if (chainsi->next)
1459 		{
1460 		  chainsi = unshare_strinfo (chainsi);
1461 		  chainsi->next = 0;
1462 		}
1463 	      ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1464 	      return chainsi;
1465 	    }
1466 	}
1467       else
1468 	{
1469 	  /* We shouldn't mix delayed and non-delayed lengths.  */
1470 	  gcc_assert (chainsi->full_string_p);
1471 	  if (chainsi->first || chainsi->prev || chainsi->next)
1472 	    {
1473 	      chainsi = unshare_strinfo (chainsi);
1474 	      chainsi->first = 0;
1475 	      chainsi->prev = 0;
1476 	      chainsi->next = 0;
1477 	    }
1478 	}
1479     }
1480   idx = new_stridx (ptr);
1481   if (idx == 0)
1482     return NULL;
1483   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1484   set_strinfo (idx, si);
1485   si->endptr = ptr;
1486   if (chainsi != NULL)
1487     {
1488       chainsi = unshare_strinfo (chainsi);
1489       if (chainsi->first == 0)
1490 	chainsi->first = chainsi->idx;
1491       chainsi->next = idx;
1492       if (chainsi->endptr == NULL_TREE)
1493 	chainsi->endptr = ptr;
1494       si->prev = chainsi->idx;
1495       si->first = chainsi->first;
1496       si->writable = chainsi->writable;
1497     }
1498   return si;
1499 }
1500 
1501 /* For strinfo ORIGSI whose length has been just updated, adjust other
1502    related strinfos so that they match the new ORIGSI.  This involves:
1503 
1504    - adding ADJ to the nonzero_chars fields
1505    - copying full_string_p from the new ORIGSI.  */
1506 
1507 static void
adjust_related_strinfos(location_t loc,strinfo * origsi,tree adj)1508 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1509 {
1510   strinfo *si = verify_related_strinfos (origsi);
1511 
1512   if (si == NULL)
1513     return;
1514 
1515   while (1)
1516     {
1517       strinfo *nsi;
1518 
1519       if (si != origsi)
1520 	{
1521 	  tree tem;
1522 
1523 	  si = unshare_strinfo (si);
1524 	  /* We shouldn't see delayed lengths here; the caller must
1525 	     have calculated the old length in order to calculate
1526 	     the adjustment.  */
1527 	  gcc_assert (si->nonzero_chars);
1528 	  tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1529 	  si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1530 					       TREE_TYPE (si->nonzero_chars),
1531 					       si->nonzero_chars, tem);
1532 	  si->full_string_p = origsi->full_string_p;
1533 
1534 	  si->endptr = NULL_TREE;
1535 	  si->dont_invalidate = true;
1536 	}
1537       nsi = get_next_strinfo (si);
1538       if (nsi == NULL)
1539 	return;
1540       si = nsi;
1541     }
1542 }
1543 
1544 /* Find if there are other SSA_NAME pointers equal to PTR
1545    for which we don't track their string lengths yet.  If so, use
1546    IDX for them.  */
1547 
1548 static void
find_equal_ptrs(tree ptr,int idx)1549 find_equal_ptrs (tree ptr, int idx)
1550 {
1551   if (TREE_CODE (ptr) != SSA_NAME)
1552     return;
1553   while (1)
1554     {
1555       gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1556       if (!is_gimple_assign (stmt))
1557 	return;
1558       ptr = gimple_assign_rhs1 (stmt);
1559       switch (gimple_assign_rhs_code (stmt))
1560 	{
1561 	case SSA_NAME:
1562 	  break;
1563 	CASE_CONVERT:
1564 	  if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1565 	    return;
1566 	  if (TREE_CODE (ptr) == SSA_NAME)
1567 	    break;
1568 	  if (TREE_CODE (ptr) != ADDR_EXPR)
1569 	    return;
1570 	  /* FALLTHRU */
1571 	case ADDR_EXPR:
1572 	  {
1573 	    int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1574 	    if (pidx != NULL && *pidx == 0)
1575 	      *pidx = idx;
1576 	    return;
1577 	  }
1578 	default:
1579 	  return;
1580 	}
1581 
1582       /* We might find an endptr created in this pass.  Grow the
1583 	 vector in that case.  */
1584       if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1585 	ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1586 
1587       if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1588 	return;
1589       ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1590     }
1591 }
1592 
1593 /* Return true if STMT is a call to a builtin function with the right
1594    arguments and attributes that should be considered for optimization
1595    by this pass.  */
1596 
1597 static bool
valid_builtin_call(gimple * stmt)1598 valid_builtin_call (gimple *stmt)
1599 {
1600   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1601     return false;
1602 
1603   tree callee = gimple_call_fndecl (stmt);
1604   tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1605   if (decl
1606       && decl != callee
1607       && !gimple_builtin_call_types_compatible_p (stmt, decl))
1608     return false;
1609 
1610   switch (DECL_FUNCTION_CODE (callee))
1611     {
1612     case BUILT_IN_MEMCMP:
1613     case BUILT_IN_MEMCMP_EQ:
1614     case BUILT_IN_STRCMP:
1615     case BUILT_IN_STRNCMP:
1616     case BUILT_IN_STRCHR:
1617     case BUILT_IN_STRLEN:
1618     case BUILT_IN_STRNLEN:
1619       /* The above functions should be pure.  Punt if they aren't.  */
1620       if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1621 	return false;
1622       break;
1623 
1624     case BUILT_IN_ALLOCA:
1625     case BUILT_IN_ALLOCA_WITH_ALIGN:
1626     case BUILT_IN_CALLOC:
1627     case BUILT_IN_MALLOC:
1628     case BUILT_IN_MEMCPY:
1629     case BUILT_IN_MEMCPY_CHK:
1630     case BUILT_IN_MEMPCPY:
1631     case BUILT_IN_MEMPCPY_CHK:
1632     case BUILT_IN_MEMSET:
1633     case BUILT_IN_STPCPY:
1634     case BUILT_IN_STPCPY_CHK:
1635     case BUILT_IN_STPNCPY:
1636     case BUILT_IN_STPNCPY_CHK:
1637     case BUILT_IN_STRCAT:
1638     case BUILT_IN_STRCAT_CHK:
1639     case BUILT_IN_STRCPY:
1640     case BUILT_IN_STRCPY_CHK:
1641     case BUILT_IN_STRNCAT:
1642     case BUILT_IN_STRNCAT_CHK:
1643     case BUILT_IN_STRNCPY:
1644     case BUILT_IN_STRNCPY_CHK:
1645       /* The above functions should be neither const nor pure.  Punt if they
1646 	 aren't.  */
1647       if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1648 	return false;
1649       break;
1650 
1651     default:
1652       break;
1653     }
1654 
1655   return true;
1656 }
1657 
1658 /* If the last .MEM setter statement before STMT is
1659    memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1660    and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1661    just memcpy (x, y, strlen (y)).  SI must be the zero length
1662    strinfo.  */
1663 
1664 static void
adjust_last_stmt(strinfo * si,gimple * stmt,bool is_strcat)1665 adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1666 {
1667   tree vuse, callee, len;
1668   struct laststmt_struct last = laststmt;
1669   strinfo *lastsi, *firstsi;
1670   unsigned len_arg_no = 2;
1671 
1672   laststmt.stmt = NULL;
1673   laststmt.len = NULL_TREE;
1674   laststmt.stridx = 0;
1675 
1676   if (last.stmt == NULL)
1677     return;
1678 
1679   vuse = gimple_vuse (stmt);
1680   if (vuse == NULL_TREE
1681       || SSA_NAME_DEF_STMT (vuse) != last.stmt
1682       || !has_single_use (vuse))
1683     return;
1684 
1685   gcc_assert (last.stridx > 0);
1686   lastsi = get_strinfo (last.stridx);
1687   if (lastsi == NULL)
1688     return;
1689 
1690   if (lastsi != si)
1691     {
1692       if (lastsi->first == 0 || lastsi->first != si->first)
1693 	return;
1694 
1695       firstsi = verify_related_strinfos (si);
1696       if (firstsi == NULL)
1697 	return;
1698       while (firstsi != lastsi)
1699 	{
1700 	  firstsi = get_next_strinfo (firstsi);
1701 	  if (firstsi == NULL)
1702 	    return;
1703 	}
1704     }
1705 
1706   if (!is_strcat && !zero_length_string_p (si))
1707     return;
1708 
1709   if (is_gimple_assign (last.stmt))
1710     {
1711       gimple_stmt_iterator gsi;
1712 
1713       if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1714 	return;
1715       if (stmt_could_throw_p (cfun, last.stmt))
1716 	return;
1717       gsi = gsi_for_stmt (last.stmt);
1718       unlink_stmt_vdef (last.stmt);
1719       release_defs (last.stmt);
1720       gsi_remove (&gsi, true);
1721       return;
1722     }
1723 
1724   if (!valid_builtin_call (last.stmt))
1725     return;
1726 
1727   callee = gimple_call_fndecl (last.stmt);
1728   switch (DECL_FUNCTION_CODE (callee))
1729     {
1730     case BUILT_IN_MEMCPY:
1731     case BUILT_IN_MEMCPY_CHK:
1732       break;
1733     default:
1734       return;
1735     }
1736 
1737   len = gimple_call_arg (last.stmt, len_arg_no);
1738   if (tree_fits_uhwi_p (len))
1739     {
1740       if (!tree_fits_uhwi_p (last.len)
1741 	  || integer_zerop (len)
1742 	  || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1743 	return;
1744       /* Don't adjust the length if it is divisible by 4, it is more efficient
1745 	 to store the extra '\0' in that case.  */
1746       if ((tree_to_uhwi (len) & 3) == 0)
1747 	return;
1748 
1749       /* Don't fold away an out of bounds access, as this defeats proper
1750 	 warnings.  */
1751       tree dst = gimple_call_arg (last.stmt, 0);
1752       tree size = compute_objsize (dst, 0);
1753       if (size && tree_int_cst_lt (size, len))
1754 	return;
1755     }
1756   else if (TREE_CODE (len) == SSA_NAME)
1757     {
1758       gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1759       if (!is_gimple_assign (def_stmt)
1760 	  || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1761 	  || gimple_assign_rhs1 (def_stmt) != last.len
1762 	  || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1763 	return;
1764     }
1765   else
1766     return;
1767 
1768   gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1769   update_stmt (last.stmt);
1770 }
1771 
1772 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1773    call, or when BOUND is non-null, of a strnlen() call, set LHS
1774    range info to [0, min (MAX, BOUND)] when the range includes more
1775    than one value and return LHS.  Otherwise, when the range
1776    [MIN, MAX] is such that MIN == MAX, return the tree representation
1777    of (MIN). The latter allows callers to fold suitable strnlen() calls
1778    to constants.  */
1779 
1780 tree
set_strlen_range(tree lhs,wide_int min,wide_int max,tree bound)1781 set_strlen_range (tree lhs, wide_int min, wide_int max,
1782 		  tree bound /* = NULL_TREE */)
1783 {
1784   if (TREE_CODE (lhs) != SSA_NAME
1785       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1786     return NULL_TREE;
1787 
1788   if (bound)
1789     {
1790       /* For strnlen, adjust MIN and MAX as necessary.  If the bound
1791 	 is less than the size of the array set MAX to it.  It it's
1792 	 greater than MAX and MAX is non-zero bump MAX down to account
1793 	 for the necessary terminating nul.  Otherwise leave it alone.  */
1794       if (TREE_CODE (bound) == INTEGER_CST)
1795 	{
1796 	  wide_int wibnd = wi::to_wide (bound);
1797 	  int cmp = wi::cmpu (wibnd, max);
1798 	  if (cmp < 0)
1799 	    max = wibnd;
1800 	  else if (cmp && wi::ne_p (max, min))
1801 	    --max;
1802 	}
1803       else if (TREE_CODE (bound) == SSA_NAME)
1804 	{
1805 	  wide_int minbound, maxbound;
1806 	  value_range_kind rng = get_range_info (bound, &minbound, &maxbound);
1807 	  if (rng == VR_RANGE)
1808 	    {
1809 	      /* For a bound in a known range, adjust the range determined
1810 		 above as necessary.  For a bound in some anti-range or
1811 		 in an unknown range, use the range determined by callers.  */
1812 	      if (wi::ltu_p (minbound, min))
1813 		min = minbound;
1814 	      if (wi::ltu_p (maxbound, max))
1815 		max = maxbound;
1816 	    }
1817 	}
1818     }
1819 
1820   if (min == max)
1821     return wide_int_to_tree (size_type_node, min);
1822 
1823   set_range_info (lhs, VR_RANGE, min, max);
1824   return lhs;
1825 }
1826 
1827 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1828    SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1829    a character array A[N] with unknown length bounded by N, and for
1830    strnlen(), by min (N, BOUND).  */
1831 
1832 static tree
maybe_set_strlen_range(tree lhs,tree src,tree bound)1833 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1834 {
1835   if (TREE_CODE (lhs) != SSA_NAME
1836       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1837     return NULL_TREE;
1838 
1839   if (TREE_CODE (src) == SSA_NAME)
1840     {
1841       gimple *def = SSA_NAME_DEF_STMT (src);
1842       if (is_gimple_assign (def)
1843 	  && gimple_assign_rhs_code (def) == ADDR_EXPR)
1844 	src = gimple_assign_rhs1 (def);
1845     }
1846 
1847   /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1848      NUL so that the difference between a pointer to just past it and
1849      one to its beginning is positive.  */
1850   wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1851 
1852   if (TREE_CODE (src) == ADDR_EXPR)
1853     {
1854       /* The last array member of a struct can be bigger than its size
1855 	 suggests if it's treated as a poor-man's flexible array member.  */
1856       src = TREE_OPERAND (src, 0);
1857       if (TREE_CODE (src) != MEM_REF
1858 	  && !array_at_struct_end_p (src))
1859 	{
1860 	  tree type = TREE_TYPE (src);
1861 	  tree size = TYPE_SIZE_UNIT (type);
1862 	  if (size
1863 	      && TREE_CODE (size) == INTEGER_CST
1864 	      && !integer_zerop (size))
1865 	    {
1866 	      /* Even though such uses of strlen would be undefined,
1867 		 avoid relying on arrays of arrays in case some genius
1868 		 decides to call strlen on an unterminated array element
1869 		 that's followed by a terminated one.  Likewise, avoid
1870 		 assuming that a struct array member is necessarily
1871 		 nul-terminated (the nul may be in the member that
1872 		 follows).  In those cases, assume that the length
1873 		 of the string stored in such an array is bounded
1874 		 by the size of the enclosing object if one can be
1875 		 determined.  */
1876 	      tree base = get_base_address (src);
1877 	      if (VAR_P (base))
1878 		{
1879 		  if (tree size = DECL_SIZE_UNIT (base))
1880 		    if (size
1881 			&& TREE_CODE (size) == INTEGER_CST
1882 			&& TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1883 		      max = wi::to_wide (size);
1884 		}
1885 	    }
1886 
1887 	  /* For strlen() the upper bound above is equal to
1888 	     the longest string that can be stored in the array
1889 	     (i.e., it accounts for the terminating nul.  For
1890 	     strnlen() bump up the maximum by one since the array
1891 	     need not be nul-terminated.  */
1892 	  if (!bound && max != 0)
1893 	    --max;
1894 	}
1895     }
1896 
1897   wide_int min = wi::zero (max.get_precision ());
1898   return set_strlen_range (lhs, min, max, bound);
1899 }
1900 
1901 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1902    either into a region allocated for the object SI when non-null,
1903    or into an object designated by the LHS of STMT otherwise.
1904    When nonnull uses RVALS to determine range information.
1905    RAWMEM may be set by memcpy and other raw memory functions
1906    to allow accesses across subobject boundaries.  */
1907 
1908 static void
1909 maybe_warn_overflow (gimple *stmt, tree len,
1910 		     const vr_values *rvals = NULL,
1911 		     strinfo *si = NULL, bool plus_one = false,
1912 		     bool rawmem = false)
1913 {
1914   if (!len || gimple_no_warning_p (stmt))
1915     return;
1916 
1917   /* The DECL of the function performing the write if it is done
1918      by one.  */
1919   tree writefn = NULL_TREE;
1920   /* The destination expression involved in the store STMT.  */
1921   tree dest = NULL_TREE;
1922 
1923   if (is_gimple_assign (stmt))
1924     dest = gimple_assign_lhs (stmt);
1925   else if (is_gimple_call (stmt))
1926     {
1927       dest = gimple_call_arg (stmt, 0);
1928       writefn = gimple_call_fndecl (stmt);
1929     }
1930 
1931   if (TREE_NO_WARNING (dest))
1932     return;
1933 
1934   /* Use maximum precision to avoid overflow in the addition below.
1935      Make sure all operands have the same precision to keep wide_int
1936      from ICE'ing.  */
1937 
1938   /* Convenience constants.  */
1939   const widest_int diff_min
1940     = wi::to_widest (TYPE_MIN_VALUE (ptrdiff_type_node));
1941   const widest_int diff_max
1942     = wi::to_widest (TYPE_MAX_VALUE (ptrdiff_type_node));
1943   const widest_int size_max
1944     = wi::to_widest (TYPE_MAX_VALUE (size_type_node));
1945 
1946   /* The offset into the destination object computed below and not
1947      reflected in DESTSIZE.  */
1948   widest_int offrng[2] = { 0, 0 };
1949 
1950   if (!si)
1951     {
1952       /* If no destination STRINFO was provided try to get it from
1953 	 the DEST argument.  */
1954       tree ref = dest;
1955       if (TREE_CODE (ref) == ARRAY_REF)
1956 	{
1957 	  /* Handle stores to VLAs (represented as
1958 	     ARRAY_REF (MEM_REF (vlaptr, 0), N].  */
1959 	  tree off = TREE_OPERAND (ref, 1);
1960 	  ref = TREE_OPERAND (ref, 0);
1961 	  wide_int rng[2];
1962 	  if (get_range (off, rng, rvals))
1963 	    {
1964 	      /* Convert offsets to the maximum precision.  */
1965 	      offrng[0] = widest_int::from (rng[0], SIGNED);
1966 	      offrng[1] = widest_int::from (rng[1], SIGNED);
1967 	    }
1968 	  else
1969 	    {
1970 	      offrng[0] = diff_min;
1971 	      offrng[1] = diff_max;
1972 	    }
1973 	}
1974 
1975       if (TREE_CODE (ref) == MEM_REF)
1976 	{
1977 	  tree mem_off = TREE_OPERAND (ref, 1);
1978 	  ref = TREE_OPERAND (ref, 0);
1979 	  wide_int rng[2];
1980 	  if (get_range (mem_off, rng, rvals))
1981 	    {
1982 	      offrng[0] += widest_int::from (rng[0], SIGNED);
1983 	      offrng[1] += widest_int::from (rng[1], SIGNED);
1984 	    }
1985 	  else
1986 	    {
1987 	      offrng[0] = diff_min;
1988 	      offrng[1] = diff_max;
1989 	    }
1990 	}
1991 
1992       wide_int rng[2];
1993       if (int idx = get_stridx (ref, rng, rvals))
1994 	{
1995 	  si = get_strinfo (idx);
1996 	  offrng[0] += widest_int::from (rng[0], SIGNED);
1997 	  offrng[1] += widest_int::from (rng[1], SIGNED);
1998 	}
1999     }
2000 
2001   /* The allocation call if the destination object was allocated
2002      by one.  */
2003   gimple *alloc_call = NULL;
2004   /* The DECL of the destination object if known and not dynamically
2005      allocated.  */
2006   tree destdecl = NULL_TREE;
2007   /* The offset into the destination object set by compute_objsize
2008      but already reflected in DESTSIZE.  */
2009   tree destoff = NULL_TREE;
2010   /* The size of the destination region (which is smaller than
2011      the destination object for stores at a non-zero offset).  */
2012   tree destsize = NULL_TREE;
2013 
2014   /* Compute the range of sizes of the destination object.  The range
2015      is constant for declared objects but may be a range for allocated
2016      objects.  */
2017   widest_int sizrng[2] = { 0, 0 };
2018   if (si)
2019     {
2020       wide_int rng[2];
2021       destsize = gimple_call_alloc_size (si->alloc, rng, rvals);
2022       if (destsize)
2023 	{
2024 	  sizrng[0] = widest_int::from (rng[0], UNSIGNED);
2025 	  sizrng[1] = widest_int::from (rng[1], UNSIGNED);
2026 	}
2027       alloc_call = si->alloc;
2028     }
2029   else
2030     offrng[0] = offrng[1] = 0;
2031 
2032   if (!destsize)
2033     {
2034       /* If there is no STRINFO for DEST, fall back on compute_objsize.  */
2035       tree off = NULL_TREE;
2036       destsize = compute_objsize (dest, rawmem ? 0 : 1, &destdecl, &off, rvals);
2037       if (destsize)
2038 	{
2039 	  /* Remember OFF but clear OFFRNG that may have been set above.  */
2040 	  destoff = off;
2041 	  offrng[0] = offrng[1] = 0;
2042 
2043 	  if (destdecl && TREE_CODE (destdecl) == SSA_NAME)
2044 	    {
2045 	      gimple *stmt = SSA_NAME_DEF_STMT (destdecl);
2046 	      if (is_gimple_call (stmt))
2047 		alloc_call = stmt;
2048 	      destdecl = NULL_TREE;
2049 	    }
2050 
2051 	  wide_int rng[2];
2052 	  if (get_range (destsize, rng, rvals))
2053 	    {
2054 	      sizrng[0] = widest_int::from (rng[0], UNSIGNED);
2055 	      sizrng[1] = widest_int::from (rng[1], UNSIGNED);
2056 	    }
2057 	  else
2058 	    {
2059 	      /* On failure, rather than failing, set the maximum range
2060 		 so that overflow in allocated objects whose size depends
2061 		 on the strlen of the source can still be diagnosed
2062 		 below.  */
2063 	      sizrng[0] = 0;
2064 	      sizrng[1] = size_max;
2065 	    }
2066 	}
2067     }
2068 
2069   if (!destsize)
2070     {
2071       sizrng[0] = 0;
2072       sizrng[1] = size_max;
2073     };
2074 
2075   /* Return early if the DESTSIZE size expression is the same as LEN
2076      and the offset into the destination is zero.  This might happen
2077      in the case of a pair of malloc and memset calls to allocate
2078      an object and clear it as if by calloc.  */
2079   if (destsize == len && !plus_one && offrng[0] == 0 && offrng[0] == offrng[1])
2080     return;
2081 
2082   wide_int rng[2];
2083   if (!get_range (len, rng, rvals))
2084     return;
2085 
2086   widest_int lenrng[2] =
2087     { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2088 
2089   if (plus_one)
2090     {
2091       lenrng[0] += 1;
2092       lenrng[1] += 1;
2093     }
2094 
2095   /* The size of the remaining space in the destination computed
2096      as the size of the latter minus the offset into it.  */
2097   widest_int spcrng[2] = { sizrng[0], sizrng[1] };
2098   if (wi::neg_p (offrng[0]) && wi::neg_p (offrng[1]))
2099     {
2100       /* When the offset is negative and the size of the destination
2101 	 object unknown there is little to do.
2102 	 FIXME: Detect offsets that are necessarily invalid regardless
2103 	 of the size of the object.  */
2104       if (!destsize)
2105 	return;
2106 
2107       /* The remaining space is necessarily zero.  */
2108       spcrng[0] = spcrng[1] = 0;
2109     }
2110   else if (wi::neg_p (offrng[0]))
2111     {
2112       /* When the lower bound of the offset is negative but the upper
2113 	 bound is not, reduce the upper bound of the remaining space
2114 	 by the upper bound of the offset but leave the lower bound
2115 	 unchanged.  If that makes the upper bound of the space less
2116 	 than the lower bound swap the two.  */
2117       spcrng[1] -= wi::ltu_p (offrng[1], spcrng[1]) ? offrng[1] : spcrng[1];
2118       if (wi::ltu_p (spcrng[1], spcrng[0]))
2119 	std::swap (spcrng[1], spcrng[0]);
2120     }
2121   else
2122     {
2123       /* When the offset is positive reduce the remaining space by
2124 	 the lower bound of the offset or clear it if the offset is
2125 	 greater.  */
2126       spcrng[0] -= wi::ltu_p (offrng[0], spcrng[0]) ? offrng[0] : spcrng[0];
2127       spcrng[1] -= wi::ltu_p (offrng[0], spcrng[1]) ? offrng[0] : spcrng[1];
2128     }
2129 
2130   if (wi::leu_p (lenrng[0], spcrng[0])
2131       && wi::leu_p (lenrng[1], spcrng[1]))
2132     return;
2133 
2134   if (lenrng[0] == spcrng[1]
2135       && (len != destsize
2136 	  || !si || !is_strlen_related_p (si->ptr, len)))
2137     return;
2138 
2139   location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2140   bool warned = false;
2141   if (wi::leu_p (lenrng[0], spcrng[1]))
2142     {
2143       if (len != destsize
2144 	  && (!si || !is_strlen_related_p (si->ptr, len)))
2145 	return;
2146 
2147       warned = (writefn
2148 		? warning_at (loc, OPT_Wstringop_overflow_,
2149 			      "%G%qD writing one too many bytes into a region "
2150 			      "of a size that depends on %<strlen%>",
2151 			      stmt, writefn)
2152 		: warning_at (loc, OPT_Wstringop_overflow_,
2153 			      "%Gwriting one too many bytes into a region "
2154 			      "of a size that depends on %<strlen%>",
2155 			      stmt));
2156     }
2157   else if (lenrng[0] == lenrng[1])
2158     {
2159       if (spcrng[0] == spcrng[1])
2160 	warned = (writefn
2161 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2162 			       lenrng[0].to_uhwi (),
2163 			       "%G%qD writing %wu byte into a region "
2164 			       "of size %wu",
2165 			       "%G%qD writing %wu bytes into a region "
2166 			       "of size %wu",
2167 			       stmt, writefn, lenrng[0].to_uhwi (),
2168 			       spcrng[0].to_uhwi ())
2169 		  : warning_n (loc, OPT_Wstringop_overflow_,
2170 			       lenrng[0].to_uhwi (),
2171 			       "%Gwriting %wu byte into a region "
2172 			       "of size %wu",
2173 			       "%Gwriting %wu bytes into a region "
2174 			       "of size %wu",
2175 			       stmt, lenrng[0].to_uhwi (),
2176 			       spcrng[0].to_uhwi ()));
2177       else
2178 	warned = (writefn
2179 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2180 			       lenrng[0].to_uhwi (),
2181 			       "%G%qD writing %wu byte into a region "
2182 			       "of size between %wu and %wu",
2183 			       "%G%qD writing %wu bytes into a region "
2184 			       "of size between %wu and %wu",
2185 			       stmt, writefn, lenrng[0].to_uhwi (),
2186 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2187 		  : warning_n (loc, OPT_Wstringop_overflow_,
2188 			       lenrng[0].to_uhwi (),
2189 			       "%Gwriting %wu byte into a region "
2190 			       "of size between %wu and %wu",
2191 			       "%Gwriting %wu bytes into a region "
2192 			       "of size between %wu and %wu",
2193 			       stmt, lenrng[0].to_uhwi (),
2194 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2195     }
2196   else if (spcrng[0] == spcrng[1])
2197     warned = (writefn
2198 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2199 			    "%G%qD writing between %wu and %wu bytes "
2200 			    "into a region of size %wu",
2201 			    stmt, writefn, lenrng[0].to_uhwi (),
2202 			    lenrng[1].to_uhwi (),
2203 			    spcrng[0].to_uhwi ())
2204 	      : warning_at (loc, OPT_Wstringop_overflow_,
2205 			    "%Gwriting between %wu and %wu bytes "
2206 			    "into a region of size %wu",
2207 			    stmt, lenrng[0].to_uhwi (),
2208 			    lenrng[1].to_uhwi (),
2209 			    spcrng[0].to_uhwi ()));
2210   else
2211     warned = (writefn
2212 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2213 			    "%G%qD writing between %wu and %wu bytes "
2214 			    "into a region of size between %wu and %wu",
2215 			    stmt, writefn, lenrng[0].to_uhwi (),
2216 			    lenrng[1].to_uhwi (),
2217 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2218 	      : warning_at (loc, OPT_Wstringop_overflow_,
2219 			    "%Gwriting between %wu and %wu bytes "
2220 			    "into a region of size between %wu and %wu",
2221 			    stmt, lenrng[0].to_uhwi (),
2222 			    lenrng[1].to_uhwi (),
2223 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2224 
2225   if (!warned)
2226     return;
2227 
2228   gimple_set_no_warning (stmt, true);
2229 
2230   /* If DESTOFF is not null, use it to format the offset value/range.  */
2231   if (destoff)
2232     {
2233       wide_int rng[2];
2234       if (get_range (destoff, rng))
2235 	{
2236 	  offrng[0] = widest_int::from (rng[0], SIGNED);
2237 	  offrng[1] = widest_int::from (rng[1], SIGNED);
2238 	}
2239       else
2240 	offrng[0] = offrng[1] = 0;
2241     }
2242 
2243   /* Format the offset to keep the number of inform calls from growing
2244      out of control.  */
2245   char offstr[64];
2246   if (offrng[0] == offrng[1])
2247     sprintf (offstr, "%lli", (long long) offrng[0].to_shwi ());
2248   else
2249     sprintf (offstr, "[%lli, %lli]",
2250 	     (long long) offrng[0].to_shwi (), (long long) offrng[1].to_shwi ());
2251 
2252   if (destdecl)
2253     {
2254       if (tree size = DECL_SIZE_UNIT (destdecl))
2255 	inform (DECL_SOURCE_LOCATION (destdecl),
2256 		"at offset %s to object %qD with size %E declared here",
2257 		offstr, destdecl, size);
2258       else
2259 	inform (DECL_SOURCE_LOCATION (destdecl),
2260 		"at offset %s to object %qD declared here",
2261 		offstr, destdecl);
2262       return;
2263     }
2264 
2265   if (!alloc_call)
2266     return;
2267 
2268   tree allocfn = gimple_call_fndecl (alloc_call);
2269   if (!allocfn)
2270     {
2271       /* For an ALLOC_CALL via a function pointer make a small effort
2272 	 to determine the destination of the pointer.  */
2273       allocfn = gimple_call_fn (alloc_call);
2274       if (TREE_CODE (allocfn) == SSA_NAME)
2275 	{
2276 	  gimple *def = SSA_NAME_DEF_STMT (allocfn);
2277 	  if (gimple_assign_single_p (def))
2278 	    {
2279 	      tree rhs = gimple_assign_rhs1 (def);
2280 	      if (DECL_P (rhs))
2281 		allocfn = rhs;
2282 	      else if (TREE_CODE (rhs) == COMPONENT_REF)
2283 		allocfn = TREE_OPERAND (rhs, 1);
2284 	    }
2285 	}
2286     }
2287 
2288   if (gimple_call_builtin_p (alloc_call, BUILT_IN_ALLOCA_WITH_ALIGN))
2289     {
2290       if (sizrng[0] == sizrng[1])
2291 	inform (gimple_location (alloc_call),
2292 		"at offset %s to an object with size %wu declared here",
2293 		offstr, sizrng[0].to_uhwi ());
2294       else if (sizrng[0] == 0)
2295 	{
2296 	  /* Avoid printing impossible sizes.  */
2297 	  if (wi::ltu_p (sizrng[1], diff_max - 2))
2298 	    inform (gimple_location (alloc_call),
2299 		    "at offset %s to an object with size at most %wu "
2300 		    "declared here",
2301 		    offstr, sizrng[1].to_uhwi ());
2302 	  else
2303 	    inform (gimple_location (alloc_call),
2304 		    "at offset %s to an object declared here", offstr);
2305 	}
2306       else
2307 	inform (gimple_location (alloc_call),
2308 		"at offset %s to an object with size between %wu and %wu "
2309 		"declared here",
2310 		offstr, sizrng[0].to_uhwi (), sizrng[1].to_uhwi ());
2311       return;
2312     }
2313 
2314   if (sizrng[0] == sizrng[1])
2315     inform (gimple_location (alloc_call),
2316 	    "at offset %s to an object with size %wu allocated by %qE here",
2317 	    offstr, sizrng[0].to_uhwi (), allocfn);
2318   else if (sizrng[0] == 0)
2319     {
2320       /* Avoid printing impossible sizes.  */
2321       if (wi::ltu_p (sizrng[1], diff_max - 2))
2322 	inform (gimple_location (alloc_call),
2323 		"at offset %s to an object with size at most %wu allocated "
2324 		"by %qD here",
2325 		offstr, sizrng[1].to_uhwi (), allocfn);
2326       else
2327 	inform (gimple_location (alloc_call),
2328 		"at offset %s to an object allocated by %qE here",
2329 		offstr, allocfn);
2330     }
2331   else
2332     inform (gimple_location (alloc_call),
2333 	    "at offset %s to an object with size between %wu and %wu "
2334 	    "allocated by %qE here",
2335 	    offstr, sizrng[0].to_uhwi (), sizrng[1].to_uhwi (), allocfn);
2336 }
2337 
2338 /* Convenience wrapper for the above.  */
2339 
2340 static inline void
2341 maybe_warn_overflow (gimple *stmt, unsigned HOST_WIDE_INT len,
2342 		     const vr_values *rvals = NULL, strinfo *si = NULL,
2343 		     bool plus_one = false, bool rawmem = false)
2344 {
2345   maybe_warn_overflow (stmt, build_int_cst (size_type_node, len), rvals,
2346 		       si, plus_one, rawmem);
2347 }
2348 
2349 /* Handle a strlen call.  If strlen of the argument is known, replace
2350    the strlen call with the known value, otherwise remember that strlen
2351    of the argument is stored in the lhs SSA_NAME.  */
2352 
2353 static void
handle_builtin_strlen(gimple_stmt_iterator * gsi)2354 handle_builtin_strlen (gimple_stmt_iterator *gsi)
2355 {
2356   gimple *stmt = gsi_stmt (*gsi);
2357   tree lhs = gimple_call_lhs (stmt);
2358 
2359   if (lhs == NULL_TREE)
2360     return;
2361 
2362   location_t loc = gimple_location (stmt);
2363   tree callee = gimple_call_fndecl (stmt);
2364   tree src = gimple_call_arg (stmt, 0);
2365   tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2366 		? gimple_call_arg (stmt, 1) : NULL_TREE);
2367   int idx = get_stridx (src);
2368   if (idx || (bound && integer_zerop (bound)))
2369     {
2370       strinfo *si = NULL;
2371       tree rhs;
2372 
2373       if (idx < 0)
2374 	rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2375       else if (idx == 0)
2376 	rhs = bound;
2377       else
2378 	{
2379 	  rhs = NULL_TREE;
2380 	  si = get_strinfo (idx);
2381 	  if (si != NULL)
2382 	    {
2383 	      rhs = get_string_length (si);
2384 	      /* For strnlen, if bound is constant, even if si is not known
2385 		 to be zero terminated, if we know at least bound bytes are
2386 		 not zero, the return value will be bound.  */
2387 	      if (rhs == NULL_TREE
2388 		  && bound != NULL_TREE
2389 		  && TREE_CODE (bound) == INTEGER_CST
2390 		  && si->nonzero_chars != NULL_TREE
2391 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2392 		  && tree_int_cst_le (bound, si->nonzero_chars))
2393 		rhs = bound;
2394 	    }
2395 	}
2396       if (rhs != NULL_TREE)
2397 	{
2398 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2399 	    {
2400 	      fprintf (dump_file, "Optimizing: ");
2401 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2402 	    }
2403 	  rhs = unshare_expr (rhs);
2404 	  if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2405 	    rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2406 
2407 	  if (bound)
2408 	    rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2409 
2410 	  if (!update_call_from_tree (gsi, rhs))
2411 	    gimplify_and_update_call_from_tree (gsi, rhs);
2412 	  stmt = gsi_stmt (*gsi);
2413 	  update_stmt (stmt);
2414 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2415 	    {
2416 	      fprintf (dump_file, "into: ");
2417 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2418 	    }
2419 
2420 	  if (si != NULL
2421 	      /* Don't update anything for strnlen.  */
2422 	      && bound == NULL_TREE
2423 	      && TREE_CODE (si->nonzero_chars) != SSA_NAME
2424 	      && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2425 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2426 	    {
2427 	      si = unshare_strinfo (si);
2428 	      si->nonzero_chars = lhs;
2429 	      gcc_assert (si->full_string_p);
2430 	    }
2431 
2432 	  if (strlen_to_stridx
2433 	      && (bound == NULL_TREE
2434 		  /* For strnlen record this only if the call is proven
2435 		     to return the same value as strlen would.  */
2436 		  || (TREE_CODE (bound) == INTEGER_CST
2437 		      && TREE_CODE (rhs) == INTEGER_CST
2438 		      && tree_int_cst_lt (rhs, bound))))
2439 	    strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2440 
2441 	  return;
2442 	}
2443     }
2444   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2445     return;
2446 
2447   if (idx == 0)
2448     idx = new_stridx (src);
2449   else
2450     {
2451       strinfo *si = get_strinfo (idx);
2452       if (si != NULL)
2453 	{
2454 	  if (!si->full_string_p && !si->stmt)
2455 	    {
2456 	      /* Until now we only had a lower bound on the string length.
2457 		 Install LHS as the actual length.  */
2458 	      si = unshare_strinfo (si);
2459 	      tree old = si->nonzero_chars;
2460 	      si->nonzero_chars = lhs;
2461 	      si->full_string_p = true;
2462 	      if (old && TREE_CODE (old) == INTEGER_CST)
2463 		{
2464 		  old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2465 		  tree adj = fold_build2_loc (loc, MINUS_EXPR,
2466 					      TREE_TYPE (lhs), lhs, old);
2467 		  adjust_related_strinfos (loc, si, adj);
2468 		  /* Use the constant minimum length as the lower bound
2469 		     of the non-constant length.  */
2470 		  wide_int min = wi::to_wide (old);
2471 		  wide_int max
2472 		    = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2473 		  set_strlen_range (lhs, min, max);
2474 		}
2475 	      else
2476 		{
2477 		  si->first = 0;
2478 		  si->prev = 0;
2479 		  si->next = 0;
2480 		}
2481 	    }
2482 	  return;
2483 	}
2484     }
2485   if (idx)
2486     {
2487       if (!bound)
2488 	{
2489 	  /* Only store the new length information for calls to strlen(),
2490 	     not for those to strnlen().  */
2491 	  strinfo *si = new_strinfo (src, idx, lhs, true);
2492 	  set_strinfo (idx, si);
2493 	  find_equal_ptrs (src, idx);
2494 	}
2495 
2496       /* For SRC that is an array of N elements, set LHS's range
2497 	 to [0, min (N, BOUND)].  A constant return value means
2498 	 the range would have consisted of a single value.  In
2499 	 that case, fold the result into the returned constant.  */
2500       if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2501 	if (TREE_CODE (ret) == INTEGER_CST)
2502 	  {
2503 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2504 	      {
2505 		fprintf (dump_file, "Optimizing: ");
2506 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2507 	      }
2508 	    if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2509 	      ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2510 	    if (!update_call_from_tree (gsi, ret))
2511 	      gimplify_and_update_call_from_tree (gsi, ret);
2512 	    stmt = gsi_stmt (*gsi);
2513 	    update_stmt (stmt);
2514 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2515 	      {
2516 		fprintf (dump_file, "into: ");
2517 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2518 	      }
2519 	  }
2520 
2521       if (strlen_to_stridx && !bound)
2522 	strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2523     }
2524 }
2525 
2526 /* Handle a strchr call.  If strlen of the first argument is known, replace
2527    the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2528    that lhs of the call is endptr and strlen of the argument is endptr - x.  */
2529 
2530 static void
handle_builtin_strchr(gimple_stmt_iterator * gsi)2531 handle_builtin_strchr (gimple_stmt_iterator *gsi)
2532 {
2533   gimple *stmt = gsi_stmt (*gsi);
2534   tree lhs = gimple_call_lhs (stmt);
2535 
2536   if (lhs == NULL_TREE)
2537     return;
2538 
2539   if (!integer_zerop (gimple_call_arg (stmt, 1)))
2540     return;
2541 
2542   tree src = gimple_call_arg (stmt, 0);
2543 
2544   /* Avoid folding if the first argument is not a nul-terminated array.
2545      Defer warning until later.  */
2546   if (!check_nul_terminated_array (NULL_TREE, src))
2547     return;
2548 
2549   int idx = get_stridx (src);
2550   if (idx)
2551     {
2552       strinfo *si = NULL;
2553       tree rhs;
2554 
2555       if (idx < 0)
2556 	rhs = build_int_cst (size_type_node, ~idx);
2557       else
2558 	{
2559 	  rhs = NULL_TREE;
2560 	  si = get_strinfo (idx);
2561 	  if (si != NULL)
2562 	    rhs = get_string_length (si);
2563 	}
2564       if (rhs != NULL_TREE)
2565 	{
2566 	  location_t loc = gimple_location (stmt);
2567 
2568 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2569 	    {
2570 	      fprintf (dump_file, "Optimizing: ");
2571 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2572 	    }
2573 	  if (si != NULL && si->endptr != NULL_TREE)
2574 	    {
2575 	      rhs = unshare_expr (si->endptr);
2576 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2577 					      TREE_TYPE (rhs)))
2578 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2579 	    }
2580 	  else
2581 	    {
2582 	      rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2583 	      rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2584 				     TREE_TYPE (src), src, rhs);
2585 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2586 					      TREE_TYPE (rhs)))
2587 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2588 	    }
2589 	  if (!update_call_from_tree (gsi, rhs))
2590 	    gimplify_and_update_call_from_tree (gsi, rhs);
2591 	  stmt = gsi_stmt (*gsi);
2592 	  update_stmt (stmt);
2593 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2594 	    {
2595 	      fprintf (dump_file, "into: ");
2596 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2597 	    }
2598 	  if (si != NULL
2599 	      && si->endptr == NULL_TREE
2600 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2601 	    {
2602 	      si = unshare_strinfo (si);
2603 	      si->endptr = lhs;
2604 	    }
2605 	  zero_length_string (lhs, si);
2606 	  return;
2607 	}
2608     }
2609   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2610     return;
2611   if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2612     {
2613       if (idx == 0)
2614 	idx = new_stridx (src);
2615       else if (get_strinfo (idx) != NULL)
2616 	{
2617 	  zero_length_string (lhs, NULL);
2618 	  return;
2619 	}
2620       if (idx)
2621 	{
2622 	  location_t loc = gimple_location (stmt);
2623 	  tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2624 	  tree srcu = fold_convert_loc (loc, size_type_node, src);
2625 	  tree length = fold_build2_loc (loc, MINUS_EXPR,
2626 					 size_type_node, lhsu, srcu);
2627 	  strinfo *si = new_strinfo (src, idx, length, true);
2628 	  si->endptr = lhs;
2629 	  set_strinfo (idx, si);
2630 	  find_equal_ptrs (src, idx);
2631 	  zero_length_string (lhs, si);
2632 	}
2633     }
2634   else
2635     zero_length_string (lhs, NULL);
2636 }
2637 
2638 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2639    If strlen of the second argument is known, strlen of the first argument
2640    is the same after this call.  Furthermore, attempt to convert it to
2641    memcpy.  Uses RVALS to determine range information.  */
2642 
2643 static void
handle_builtin_strcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,const vr_values * rvals)2644 handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
2645 		       const vr_values *rvals)
2646 {
2647   int idx, didx;
2648   tree src, dst, srclen, len, lhs, type, fn, oldlen;
2649   bool success;
2650   gimple *stmt = gsi_stmt (*gsi);
2651   strinfo *si, *dsi, *olddsi, *zsi;
2652   location_t loc;
2653 
2654   src = gimple_call_arg (stmt, 1);
2655   dst = gimple_call_arg (stmt, 0);
2656   lhs = gimple_call_lhs (stmt);
2657   idx = get_stridx (src);
2658   si = NULL;
2659   if (idx > 0)
2660     si = get_strinfo (idx);
2661 
2662   didx = get_stridx (dst);
2663   olddsi = NULL;
2664   oldlen = NULL_TREE;
2665   if (didx > 0)
2666     olddsi = get_strinfo (didx);
2667   else if (didx < 0)
2668     return;
2669 
2670   if (olddsi != NULL)
2671     adjust_last_stmt (olddsi, stmt, false);
2672 
2673   srclen = NULL_TREE;
2674   if (si != NULL)
2675     srclen = get_string_length (si);
2676   else if (idx < 0)
2677     srclen = build_int_cst (size_type_node, ~idx);
2678 
2679   maybe_warn_overflow (stmt, srclen, rvals, olddsi, true);
2680 
2681   if (olddsi != NULL)
2682     adjust_last_stmt (olddsi, stmt, false);
2683 
2684   loc = gimple_location (stmt);
2685   if (srclen == NULL_TREE)
2686     switch (bcode)
2687       {
2688       case BUILT_IN_STRCPY:
2689       case BUILT_IN_STRCPY_CHK:
2690 	if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2691 	  return;
2692 	break;
2693       case BUILT_IN_STPCPY:
2694       case BUILT_IN_STPCPY_CHK:
2695 	if (lhs == NULL_TREE)
2696 	  return;
2697 	else
2698 	  {
2699 	    tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2700 	    srclen = fold_convert_loc (loc, size_type_node, dst);
2701 	    srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2702 				      lhsuint, srclen);
2703 	  }
2704 	break;
2705       default:
2706 	gcc_unreachable ();
2707       }
2708 
2709   if (didx == 0)
2710     {
2711       didx = new_stridx (dst);
2712       if (didx == 0)
2713 	return;
2714     }
2715   if (olddsi != NULL)
2716     {
2717       oldlen = olddsi->nonzero_chars;
2718       dsi = unshare_strinfo (olddsi);
2719       dsi->nonzero_chars = srclen;
2720       dsi->full_string_p = (srclen != NULL_TREE);
2721       /* Break the chain, so adjust_related_strinfo on later pointers in
2722 	 the chain won't adjust this one anymore.  */
2723       dsi->next = 0;
2724       dsi->stmt = NULL;
2725       dsi->endptr = NULL_TREE;
2726     }
2727   else
2728     {
2729       dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2730       set_strinfo (didx, dsi);
2731       find_equal_ptrs (dst, didx);
2732     }
2733   dsi->writable = true;
2734   dsi->dont_invalidate = true;
2735 
2736   if (dsi->nonzero_chars == NULL_TREE)
2737     {
2738       strinfo *chainsi;
2739 
2740       /* If string length of src is unknown, use delayed length
2741 	 computation.  If string length of dst will be needed, it
2742 	 can be computed by transforming this strcpy call into
2743 	 stpcpy and subtracting dst from the return value.  */
2744 
2745       /* Look for earlier strings whose length could be determined if
2746 	 this strcpy is turned into an stpcpy.  */
2747 
2748       if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2749 	{
2750 	  for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2751 	    {
2752 	      /* When setting a stmt for delayed length computation
2753 		 prevent all strinfos through dsi from being
2754 		 invalidated.  */
2755 	      chainsi = unshare_strinfo (chainsi);
2756 	      chainsi->stmt = stmt;
2757 	      chainsi->nonzero_chars = NULL_TREE;
2758 	      chainsi->full_string_p = false;
2759 	      chainsi->endptr = NULL_TREE;
2760 	      chainsi->dont_invalidate = true;
2761 	    }
2762 	}
2763       dsi->stmt = stmt;
2764 
2765       /* Try to detect overlap before returning.  This catches cases
2766 	 like strcpy (d, d + n) where n is non-constant whose range
2767 	 is such that (n <= strlen (d) holds).
2768 
2769 	 OLDDSI->NONZERO_chars may have been reset by this point with
2770 	 oldlen holding it original value.  */
2771       if (olddsi && oldlen)
2772 	{
2773 	  /* Add 1 for the terminating NUL.  */
2774 	  tree type = TREE_TYPE (oldlen);
2775 	  oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2776 				build_int_cst (type, 1));
2777 	  check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2778 	}
2779 
2780       return;
2781     }
2782 
2783   if (olddsi != NULL)
2784     {
2785       tree adj = NULL_TREE;
2786       if (oldlen == NULL_TREE)
2787 	;
2788       else if (integer_zerop (oldlen))
2789 	adj = srclen;
2790       else if (TREE_CODE (oldlen) == INTEGER_CST
2791 	       || TREE_CODE (srclen) == INTEGER_CST)
2792 	adj = fold_build2_loc (loc, MINUS_EXPR,
2793 			       TREE_TYPE (srclen), srclen,
2794 			       fold_convert_loc (loc, TREE_TYPE (srclen),
2795 						 oldlen));
2796       if (adj != NULL_TREE)
2797 	adjust_related_strinfos (loc, dsi, adj);
2798       else
2799 	dsi->prev = 0;
2800     }
2801   /* strcpy src may not overlap dst, so src doesn't need to be
2802      invalidated either.  */
2803   if (si != NULL)
2804     si->dont_invalidate = true;
2805 
2806   fn = NULL_TREE;
2807   zsi = NULL;
2808   switch (bcode)
2809     {
2810     case BUILT_IN_STRCPY:
2811       fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2812       if (lhs)
2813 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2814       break;
2815     case BUILT_IN_STRCPY_CHK:
2816       fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2817       if (lhs)
2818 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2819       break;
2820     case BUILT_IN_STPCPY:
2821       /* This would need adjustment of the lhs (subtract one),
2822 	 or detection that the trailing '\0' doesn't need to be
2823 	 written, if it will be immediately overwritten.
2824       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY);  */
2825       if (lhs)
2826 	{
2827 	  dsi->endptr = lhs;
2828 	  zsi = zero_length_string (lhs, dsi);
2829 	}
2830       break;
2831     case BUILT_IN_STPCPY_CHK:
2832       /* This would need adjustment of the lhs (subtract one),
2833 	 or detection that the trailing '\0' doesn't need to be
2834 	 written, if it will be immediately overwritten.
2835       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK);  */
2836       if (lhs)
2837 	{
2838 	  dsi->endptr = lhs;
2839 	  zsi = zero_length_string (lhs, dsi);
2840 	}
2841       break;
2842     default:
2843       gcc_unreachable ();
2844     }
2845   if (zsi != NULL)
2846     zsi->dont_invalidate = true;
2847 
2848   if (fn)
2849     {
2850       tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2851       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2852     }
2853   else
2854     type = size_type_node;
2855 
2856   len = fold_convert_loc (loc, type, unshare_expr (srclen));
2857   len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2858 
2859   /* Set the no-warning bit on the transformed statement?  */
2860   bool set_no_warning = false;
2861 
2862   if (const strinfo *chksi = olddsi ? olddsi : dsi)
2863     if (si
2864 	&& check_bounds_or_overlap (stmt, chksi->ptr, si->ptr, NULL_TREE, len))
2865       {
2866 	gimple_set_no_warning (stmt, true);
2867 	set_no_warning = true;
2868       }
2869 
2870   if (fn == NULL_TREE)
2871     return;
2872 
2873   len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
2874 				  GSI_SAME_STMT);
2875   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2876     {
2877       fprintf (dump_file, "Optimizing: ");
2878       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2879     }
2880   if (gimple_call_num_args (stmt) == 2)
2881     success = update_gimple_call (gsi, fn, 3, dst, src, len);
2882   else
2883     success = update_gimple_call (gsi, fn, 4, dst, src, len,
2884 				  gimple_call_arg (stmt, 2));
2885   if (success)
2886     {
2887       stmt = gsi_stmt (*gsi);
2888       update_stmt (stmt);
2889       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2890 	{
2891 	  fprintf (dump_file, "into: ");
2892 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2893 	}
2894       /* Allow adjust_last_stmt to decrease this memcpy's size.  */
2895       laststmt.stmt = stmt;
2896       laststmt.len = srclen;
2897       laststmt.stridx = dsi->idx;
2898     }
2899   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2900     fprintf (dump_file, "not possible.\n");
2901 
2902   if (set_no_warning)
2903     gimple_set_no_warning (stmt, true);
2904 }
2905 
2906 /* Check the size argument to the built-in forms of stpncpy and strncpy
2907    for out-of-bounds offsets or overlapping access, and to see if the
2908    size argument is derived from a call to strlen() on the source argument,
2909    and if so, issue an appropriate warning.  */
2910 
2911 static void
handle_builtin_strncat(built_in_function,gimple_stmt_iterator * gsi)2912 handle_builtin_strncat (built_in_function, gimple_stmt_iterator *gsi)
2913 {
2914   /* Same as stxncpy().  */
2915   handle_builtin_stxncpy_strncat (true, gsi);
2916 }
2917 
2918 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2919    way.  LEN can either be an integer expression, or a pointer (to char).
2920    When it is the latter (such as in recursive calls to self) it is
2921    assumed to be the argument in some call to strlen() whose relationship
2922    to SRC is being ascertained.  */
2923 
2924 bool
is_strlen_related_p(tree src,tree len)2925 is_strlen_related_p (tree src, tree len)
2926 {
2927   if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2928       && operand_equal_p (src, len, 0))
2929     return true;
2930 
2931   if (TREE_CODE (len) != SSA_NAME)
2932     return false;
2933 
2934   if (TREE_CODE (src) == SSA_NAME)
2935     {
2936       gimple *srcdef = SSA_NAME_DEF_STMT (src);
2937       if (is_gimple_assign (srcdef))
2938 	{
2939 	  /* Handle bitwise AND used in conversions from wider size_t
2940 	     to narrower unsigned types.  */
2941 	  tree_code code = gimple_assign_rhs_code (srcdef);
2942 	  if (code == BIT_AND_EXPR
2943 	      || code == NOP_EXPR)
2944 	    return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2945 
2946 	  return false;
2947 	}
2948 
2949       if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2950 	{
2951 	  /* If SRC is the result of a call to an allocation function
2952 	     or strlen, use the function's argument instead.  */
2953 	  tree func = gimple_call_fndecl (srcdef);
2954 	  built_in_function code = DECL_FUNCTION_CODE (func);
2955 	  if (code == BUILT_IN_ALLOCA
2956 	      || code == BUILT_IN_ALLOCA_WITH_ALIGN
2957 	      || code == BUILT_IN_MALLOC
2958 	      || code == BUILT_IN_STRLEN)
2959 	    return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2960 
2961 	  /* FIXME: Handle other functions with attribute alloc_size.  */
2962 	  return false;
2963 	}
2964     }
2965 
2966   gimple *lendef = SSA_NAME_DEF_STMT (len);
2967   if (!lendef)
2968     return false;
2969 
2970   if (is_gimple_call (lendef))
2971     {
2972       tree func = gimple_call_fndecl (lendef);
2973       if (!valid_builtin_call (lendef)
2974 	  || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2975 	return false;
2976 
2977       tree arg = gimple_call_arg (lendef, 0);
2978       return is_strlen_related_p (src, arg);
2979     }
2980 
2981   if (!is_gimple_assign (lendef))
2982     return false;
2983 
2984   tree_code code = gimple_assign_rhs_code (lendef);
2985   tree rhs1 = gimple_assign_rhs1 (lendef);
2986   tree rhstype = TREE_TYPE (rhs1);
2987 
2988   if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2989       || (INTEGRAL_TYPE_P (rhstype)
2990 	  && (code == BIT_AND_EXPR
2991 	      || code == NOP_EXPR)))
2992     {
2993       /* Pointer plus (an integer), and truncation are considered among
2994 	 the (potentially) related expressions to strlen.  */
2995       return is_strlen_related_p (src, rhs1);
2996     }
2997 
2998   if (tree rhs2 = gimple_assign_rhs2 (lendef))
2999     {
3000       /* Integer subtraction is considered strlen-related when both
3001 	 arguments are integers and second one is strlen-related.  */
3002       rhstype = TREE_TYPE (rhs2);
3003       if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
3004 	return is_strlen_related_p (src, rhs2);
3005     }
3006 
3007   return false;
3008 }
3009 
3010 /* Called by handle_builtin_stxncpy_strncat and by
3011    gimple_fold_builtin_strncpy in gimple-fold.c.
3012    Check to see if the specified bound is a) equal to the size of
3013    the destination DST and if so, b) if it's immediately followed by
3014    DST[CNT - 1] = '\0'.  If a) holds and b) does not, warn.  Otherwise,
3015    do nothing.  Return true if diagnostic has been issued.
3016 
3017    The purpose is to diagnose calls to strncpy and stpncpy that do
3018    not nul-terminate the copy while allowing for the idiom where
3019    such a call is immediately followed by setting the last element
3020    to nul, as in:
3021      char a[32];
3022      strncpy (a, s, sizeof a);
3023      a[sizeof a - 1] = '\0';
3024 */
3025 
3026 bool
maybe_diag_stxncpy_trunc(gimple_stmt_iterator gsi,tree src,tree cnt)3027 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt)
3028 {
3029   gimple *stmt = gsi_stmt (gsi);
3030   if (gimple_no_warning_p (stmt))
3031     return false;
3032 
3033   wide_int cntrange[2];
3034 
3035   if (TREE_CODE (cnt) == INTEGER_CST)
3036     cntrange[0] = cntrange[1] = wi::to_wide (cnt);
3037   else if (TREE_CODE (cnt) == SSA_NAME)
3038     {
3039       enum value_range_kind rng = get_range_info (cnt, cntrange, cntrange + 1);
3040       if (rng == VR_RANGE)
3041 	;
3042       else if (rng == VR_ANTI_RANGE)
3043 	{
3044 	  wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
3045 
3046 	  if (wi::ltu_p (cntrange[1], maxobjsize))
3047 	    {
3048 	      cntrange[0] = cntrange[1] + 1;
3049 	      cntrange[1] = maxobjsize;
3050 	    }
3051 	  else
3052 	    {
3053 	      cntrange[1] = cntrange[0] - 1;
3054 	      cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
3055 	    }
3056 	}
3057       else
3058 	return false;
3059     }
3060   else
3061     return false;
3062 
3063   /* Negative value is the constant string length.  If it's less than
3064      the lower bound there is no truncation.  Avoid calling get_stridx()
3065      when ssa_ver_to_stridx is empty.  That implies the caller isn't
3066      running under the control of this pass and ssa_ver_to_stridx hasn't
3067      been created yet.  */
3068   int sidx = ssa_ver_to_stridx.length () ? get_stridx (src) : 0;
3069   if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
3070     return false;
3071 
3072   tree dst = gimple_call_arg (stmt, 0);
3073   tree dstdecl = dst;
3074   if (TREE_CODE (dstdecl) == ADDR_EXPR)
3075     dstdecl = TREE_OPERAND (dstdecl, 0);
3076 
3077   tree ref = NULL_TREE;
3078 
3079   if (!sidx)
3080     {
3081       /* If the source is a non-string return early to avoid warning
3082 	 for possible truncation (if the truncation is certain SIDX
3083 	 is non-zero).  */
3084       tree srcdecl = gimple_call_arg (stmt, 1);
3085       if (TREE_CODE (srcdecl) == ADDR_EXPR)
3086 	srcdecl = TREE_OPERAND (srcdecl, 0);
3087       if (get_attr_nonstring_decl (srcdecl, &ref))
3088 	return false;
3089     }
3090 
3091   /* Likewise, if the destination refers to an array/pointer declared
3092      nonstring return early.  */
3093   if (get_attr_nonstring_decl (dstdecl, &ref))
3094     return false;
3095 
3096   /* Look for dst[i] = '\0'; after the stxncpy() call and if found
3097      avoid the truncation warning.  */
3098   gsi_next_nondebug (&gsi);
3099   gimple *next_stmt = gsi_stmt (gsi);
3100   if (!next_stmt)
3101     {
3102       /* When there is no statement in the same basic block check
3103 	 the immediate successor block.  */
3104       if (basic_block bb = gimple_bb (stmt))
3105 	{
3106 	  if (single_succ_p (bb))
3107 	    {
3108 	      /* For simplicity, ignore blocks with multiple outgoing
3109 		 edges for now and only consider successor blocks along
3110 		 normal edges.  */
3111 	      edge e = EDGE_SUCC (bb, 0);
3112 	      if (!(e->flags & EDGE_ABNORMAL))
3113 		{
3114 		  gsi = gsi_start_bb (e->dest);
3115 		  next_stmt = gsi_stmt (gsi);
3116 		  if (next_stmt && is_gimple_debug (next_stmt))
3117 		    {
3118 		      gsi_next_nondebug (&gsi);
3119 		      next_stmt = gsi_stmt (gsi);
3120 		    }
3121 		}
3122 	    }
3123 	}
3124     }
3125 
3126   if (next_stmt && is_gimple_assign (next_stmt))
3127     {
3128       tree lhs = gimple_assign_lhs (next_stmt);
3129       tree_code code = TREE_CODE (lhs);
3130       if (code == ARRAY_REF || code == MEM_REF)
3131 	lhs = TREE_OPERAND (lhs, 0);
3132 
3133       tree func = gimple_call_fndecl (stmt);
3134       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3135 	{
3136 	  tree ret = gimple_call_lhs (stmt);
3137 	  if (ret && operand_equal_p (ret, lhs, 0))
3138 	    return false;
3139 	}
3140 
3141       /* Determine the base address and offset of the reference,
3142 	 ignoring the innermost array index.  */
3143       if (TREE_CODE (ref) == ARRAY_REF)
3144 	ref = TREE_OPERAND (ref, 0);
3145 
3146       poly_int64 dstoff;
3147       tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3148 
3149       poly_int64 lhsoff;
3150       tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3151       if (lhsbase
3152 	  && dstbase
3153 	  && known_eq (dstoff, lhsoff)
3154 	  && operand_equal_p (dstbase, lhsbase, 0))
3155 	return false;
3156     }
3157 
3158   int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3159   wide_int lenrange[2];
3160   if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3161     {
3162       lenrange[0] = (sisrc->nonzero_chars
3163 		     && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3164 		     ? wi::to_wide (sisrc->nonzero_chars)
3165 		     : wi::zero (prec));
3166       lenrange[1] = lenrange[0];
3167     }
3168   else if (sidx < 0)
3169     lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3170   else
3171     {
3172       c_strlen_data lendata = { };
3173       /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3174 	 to have it set to the length of the longest string in a PHI.  */
3175       lendata.maxbound = src;
3176       get_range_strlen (src, &lendata, /* eltsize = */1);
3177       if (TREE_CODE (lendata.minlen) == INTEGER_CST
3178 	  && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3179 	{
3180 	  /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3181 	     which stores the length of the shortest known string.  */
3182 	  if (integer_all_onesp (lendata.maxlen))
3183 	    lenrange[0] = wi::shwi (0, prec);
3184 	  else
3185 	    lenrange[0] = wi::to_wide (lendata.minlen, prec);
3186 	  lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3187 	}
3188       else
3189 	{
3190 	  lenrange[0] = wi::shwi (0, prec);
3191 	  lenrange[1] = wi::shwi (-1, prec);
3192 	}
3193     }
3194 
3195   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3196   tree func = gimple_call_fndecl (stmt);
3197 
3198   if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3199     {
3200       /* If the longest source string is shorter than the lower bound
3201 	 of the specified count the copy is definitely nul-terminated.  */
3202       if (wi::ltu_p (lenrange[1], cntrange[0]))
3203 	return false;
3204 
3205       if (wi::neg_p (lenrange[1]))
3206 	{
3207 	  /* The length of one of the strings is unknown but at least
3208 	     one has non-zero length and that length is stored in
3209 	     LENRANGE[1].  Swap the bounds to force a "may be truncated"
3210 	     warning below.  */
3211 	  lenrange[1] = lenrange[0];
3212 	  lenrange[0] = wi::shwi (0, prec);
3213 	}
3214 
3215       /* Set to true for strncat whose bound is derived from the length
3216 	 of the destination (the expected usage pattern).  */
3217       bool cat_dstlen_bounded = false;
3218       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3219 	cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3220 
3221       if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3222 	return warning_n (callloc, OPT_Wstringop_truncation,
3223 			  cntrange[0].to_uhwi (),
3224 			  "%G%qD output truncated before terminating "
3225 			  "nul copying %E byte from a string of the "
3226 			  "same length",
3227 			  "%G%qD output truncated before terminating nul "
3228 			  "copying %E bytes from a string of the same "
3229 			  "length",
3230 			  stmt, func, cnt);
3231       else if (!cat_dstlen_bounded)
3232 	{
3233 	  if (wi::geu_p (lenrange[0], cntrange[1]))
3234 	    {
3235 	      /* The shortest string is longer than the upper bound of
3236 		 the count so the truncation is certain.  */
3237 	      if (cntrange[0] == cntrange[1])
3238 		return warning_n (callloc, OPT_Wstringop_truncation,
3239 				  cntrange[0].to_uhwi (),
3240 				  "%G%qD output truncated copying %E byte "
3241 				  "from a string of length %wu",
3242 				  "%G%qD output truncated copying %E bytes "
3243 				  "from a string of length %wu",
3244 				  stmt, func, cnt, lenrange[0].to_uhwi ());
3245 
3246 	      return warning_at (callloc, OPT_Wstringop_truncation,
3247 				 "%G%qD output truncated copying between %wu "
3248 				 "and %wu bytes from a string of length %wu",
3249 				 stmt, func, cntrange[0].to_uhwi (),
3250 				 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3251 	    }
3252 	  else if (wi::geu_p (lenrange[1], cntrange[1]))
3253 	    {
3254 	      /* The longest string is longer than the upper bound of
3255 		 the count so the truncation is possible.  */
3256 	      if (cntrange[0] == cntrange[1])
3257 		return warning_n (callloc, OPT_Wstringop_truncation,
3258 				  cntrange[0].to_uhwi (),
3259 				  "%G%qD output may be truncated copying %E "
3260 				  "byte from a string of length %wu",
3261 				  "%G%qD output may be truncated copying %E "
3262 				  "bytes from a string of length %wu",
3263 				  stmt, func, cnt, lenrange[1].to_uhwi ());
3264 
3265 	      return warning_at (callloc, OPT_Wstringop_truncation,
3266 				 "%G%qD output may be truncated copying between "
3267 				 "%wu and %wu bytes from a string of length %wu",
3268 				 stmt, func, cntrange[0].to_uhwi (),
3269 				 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3270 	    }
3271 	}
3272 
3273       if (!cat_dstlen_bounded
3274 	  && cntrange[0] != cntrange[1]
3275 	  && wi::leu_p (cntrange[0], lenrange[0])
3276 	  && wi::leu_p (cntrange[1], lenrange[0] + 1))
3277 	{
3278 	  /* If the source (including the terminating nul) is longer than
3279 	     the lower bound of the specified count but shorter than the
3280 	     upper bound the copy may (but need not) be truncated.  */
3281 	  return warning_at (callloc, OPT_Wstringop_truncation,
3282 			     "%G%qD output may be truncated copying between "
3283 			     "%wu and %wu bytes from a string of length %wu",
3284 			     stmt, func, cntrange[0].to_uhwi (),
3285 			     cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3286 	}
3287     }
3288 
3289   if (tree dstsize = compute_objsize (dst, 1))
3290     {
3291       /* The source length is unknown.  Try to determine the destination
3292 	 size and see if it matches the specified bound.  If not, bail.
3293 	 Otherwise go on to see if it should be diagnosed for possible
3294 	 truncation.  */
3295       if (!dstsize)
3296 	return false;
3297 
3298       if (wi::to_wide (dstsize) != cntrange[1])
3299 	return false;
3300 
3301       /* Avoid warning for strncpy(a, b, N) calls where the following
3302 	 equalities hold:
3303 	   N == sizeof a && N == sizeof b */
3304       if (tree srcsize = compute_objsize (src, 1))
3305 	if (wi::to_wide (srcsize) == cntrange[1])
3306 	  return false;
3307 
3308       if (cntrange[0] == cntrange[1])
3309 	return warning_at (callloc, OPT_Wstringop_truncation,
3310 			   "%G%qD specified bound %E equals destination size",
3311 			   stmt, func, cnt);
3312     }
3313 
3314   return false;
3315 }
3316 
3317 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3318    strncat, for out-of-bounds offsets or overlapping access, and to see
3319    if the size is derived from calling strlen() on the source argument,
3320    and if so, issue the appropriate warning.
3321    APPEND_P is true for strncat.  */
3322 
3323 static void
handle_builtin_stxncpy_strncat(bool append_p,gimple_stmt_iterator * gsi)3324 handle_builtin_stxncpy_strncat (bool append_p, gimple_stmt_iterator *gsi)
3325 {
3326   if (!strlen_to_stridx)
3327     return;
3328 
3329   gimple *stmt = gsi_stmt (*gsi);
3330 
3331   tree dst = gimple_call_arg (stmt, 0);
3332   tree src = gimple_call_arg (stmt, 1);
3333   tree len = gimple_call_arg (stmt, 2);
3334   tree dstsize = NULL_TREE, srcsize = NULL_TREE;
3335 
3336   int didx = get_stridx (dst);
3337   if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3338     {
3339       /* Compute the size of the destination string including the nul
3340 	 if it is known to be nul-terminated.  */
3341       if (sidst->nonzero_chars)
3342 	{
3343 	  if (sidst->full_string_p)
3344 	    {
3345 	      /* String is known to be nul-terminated.  */
3346 	      tree type = TREE_TYPE (sidst->nonzero_chars);
3347 	      dstsize = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3348 				     build_int_cst (type, 1));
3349 	    }
3350 	  else
3351 	    dstsize = sidst->nonzero_chars;
3352 	}
3353 
3354       dst = sidst->ptr;
3355     }
3356 
3357   int sidx = get_stridx (src);
3358   strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3359   if (sisrc)
3360     {
3361       /* strncat() and strncpy() can modify the source string by writing
3362 	 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3363 	 clear.  */
3364 
3365       /* Compute the size of the source string including the terminating
3366 	 nul if its known to be nul-terminated.  */
3367       if (sisrc->nonzero_chars)
3368 	{
3369 	  if (sisrc->full_string_p)
3370 	    {
3371 	      tree type = TREE_TYPE (sisrc->nonzero_chars);
3372 	      srcsize = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3373 				     build_int_cst (type, 1));
3374 	    }
3375 	  else
3376 	    srcsize = sisrc->nonzero_chars;
3377 	}
3378 
3379 	src = sisrc->ptr;
3380     }
3381   else
3382     srcsize = NULL_TREE;
3383 
3384   if (check_bounds_or_overlap (stmt, dst, src, dstsize, srcsize))
3385     {
3386       gimple_set_no_warning (stmt, true);
3387       return;
3388     }
3389 
3390   /* If the length argument was computed from strlen(S) for some string
3391      S retrieve the strinfo index for the string (PSS->FIRST) along with
3392      the location of the strlen() call (PSS->SECOND).  */
3393   stridx_strlenloc *pss = strlen_to_stridx->get (len);
3394   if (!pss || pss->first <= 0)
3395     {
3396       if (maybe_diag_stxncpy_trunc (*gsi, src, len))
3397 	gimple_set_no_warning (stmt, true);
3398 
3399       return;
3400     }
3401 
3402   /* Retrieve the strinfo data for the string S that LEN was computed
3403      from as some function F of strlen (S) (i.e., LEN need not be equal
3404      to strlen(S)).  */
3405   strinfo *silen = get_strinfo (pss->first);
3406 
3407   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3408 
3409   tree func = gimple_call_fndecl (stmt);
3410 
3411   bool warned = false;
3412 
3413   /* When -Wstringop-truncation is set, try to determine truncation
3414      before diagnosing possible overflow.  Truncation is implied by
3415      the LEN argument being equal to strlen(SRC), regardless of
3416      whether its value is known.  Otherwise, issue the more generic
3417      -Wstringop-overflow which triggers for LEN arguments that in
3418      any meaningful way depend on strlen(SRC).  */
3419   if (!append_p
3420       && sisrc == silen
3421       && is_strlen_related_p (src, len)
3422       && warning_at (callloc, OPT_Wstringop_truncation,
3423 		     "%G%qD output truncated before terminating nul "
3424 		     "copying as many bytes from a string as its length",
3425 		     stmt, func))
3426     warned = true;
3427   else if (silen && is_strlen_related_p (src, silen->ptr))
3428     warned = warning_at (callloc, OPT_Wstringop_overflow_,
3429 			 "%G%qD specified bound depends on the length "
3430 			 "of the source argument",
3431 			 stmt, func);
3432   if (warned)
3433     {
3434       location_t strlenloc = pss->second;
3435       if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3436 	inform (strlenloc, "length computed here");
3437     }
3438 }
3439 
3440 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3441    If strlen of the second argument is known and length of the third argument
3442    is that plus one, strlen of the first argument is the same after this
3443    call.  Uses RVALS to determine range information.  */
3444 
3445 static void
handle_builtin_memcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,const vr_values * rvals)3446 handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3447 		       const vr_values *rvals)
3448 {
3449   tree lhs, oldlen, newlen;
3450   gimple *stmt = gsi_stmt (*gsi);
3451   strinfo *si, *dsi;
3452 
3453   tree len = gimple_call_arg (stmt, 2);
3454   tree src = gimple_call_arg (stmt, 1);
3455   tree dst = gimple_call_arg (stmt, 0);
3456 
3457   int didx = get_stridx (dst);
3458   strinfo *olddsi = NULL;
3459   if (didx > 0)
3460     olddsi = get_strinfo (didx);
3461   else if (didx < 0)
3462     return;
3463 
3464   if (olddsi != NULL
3465       && !integer_zerop (len))
3466     {
3467       maybe_warn_overflow (stmt, len, rvals, olddsi, false, true);
3468       adjust_last_stmt (olddsi, stmt, false);
3469     }
3470 
3471   int idx = get_stridx (src);
3472   if (idx == 0)
3473     return;
3474 
3475   bool full_string_p;
3476   if (idx > 0)
3477     {
3478       gimple *def_stmt;
3479 
3480       /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3481 	 is known.  */
3482       si = get_strinfo (idx);
3483       if (si == NULL || si->nonzero_chars == NULL_TREE)
3484 	return;
3485       if (TREE_CODE (len) == INTEGER_CST
3486 	  && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3487 	{
3488 	  if (tree_int_cst_le (len, si->nonzero_chars))
3489 	    {
3490 	      /* Copying LEN nonzero characters, where LEN is constant.  */
3491 	      newlen = len;
3492 	      full_string_p = false;
3493 	    }
3494 	  else
3495 	    {
3496 	      /* Copying the whole of the analyzed part of SI.  */
3497 	      newlen = si->nonzero_chars;
3498 	      full_string_p = si->full_string_p;
3499 	    }
3500 	}
3501       else
3502 	{
3503 	  if (!si->full_string_p)
3504 	    return;
3505 	  if (TREE_CODE (len) != SSA_NAME)
3506 	    return;
3507 	  def_stmt = SSA_NAME_DEF_STMT (len);
3508 	  if (!is_gimple_assign (def_stmt)
3509 	      || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3510 	      || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3511 	      || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3512 	    return;
3513 	  /* Copying variable-length string SI (and no more).  */
3514 	  newlen = si->nonzero_chars;
3515 	  full_string_p = true;
3516 	}
3517     }
3518   else
3519     {
3520       si = NULL;
3521       /* Handle memcpy (x, "abcd", 5) or
3522 	 memcpy (x, "abc\0uvw", 7).  */
3523       if (!tree_fits_uhwi_p (len))
3524 	return;
3525 
3526       unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3527       unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3528       newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3529       full_string_p = clen > nonzero_chars;
3530     }
3531 
3532   if (!full_string_p
3533       && olddsi
3534       && olddsi->nonzero_chars
3535       && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3536       && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3537     {
3538       /* The SRC substring being written strictly overlaps
3539 	 a subsequence of the existing string OLDDSI.  */
3540       newlen = olddsi->nonzero_chars;
3541       full_string_p = olddsi->full_string_p;
3542     }
3543 
3544   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3545     adjust_last_stmt (olddsi, stmt, false);
3546 
3547   if (didx == 0)
3548     {
3549       didx = new_stridx (dst);
3550       if (didx == 0)
3551 	return;
3552     }
3553   oldlen = NULL_TREE;
3554   if (olddsi != NULL)
3555     {
3556       dsi = unshare_strinfo (olddsi);
3557       oldlen = olddsi->nonzero_chars;
3558       dsi->nonzero_chars = newlen;
3559       dsi->full_string_p = full_string_p;
3560       /* Break the chain, so adjust_related_strinfo on later pointers in
3561 	 the chain won't adjust this one anymore.  */
3562       dsi->next = 0;
3563       dsi->stmt = NULL;
3564       dsi->endptr = NULL_TREE;
3565     }
3566   else
3567     {
3568       dsi = new_strinfo (dst, didx, newlen, full_string_p);
3569       set_strinfo (didx, dsi);
3570       find_equal_ptrs (dst, didx);
3571     }
3572   dsi->writable = true;
3573   dsi->dont_invalidate = true;
3574   if (olddsi != NULL)
3575     {
3576       tree adj = NULL_TREE;
3577       location_t loc = gimple_location (stmt);
3578       if (oldlen == NULL_TREE)
3579 	;
3580       else if (integer_zerop (oldlen))
3581 	adj = newlen;
3582       else if (TREE_CODE (oldlen) == INTEGER_CST
3583 	       || TREE_CODE (newlen) == INTEGER_CST)
3584 	adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3585 			       fold_convert_loc (loc, TREE_TYPE (newlen),
3586 						 oldlen));
3587       if (adj != NULL_TREE)
3588 	adjust_related_strinfos (loc, dsi, adj);
3589       else
3590 	dsi->prev = 0;
3591     }
3592   /* memcpy src may not overlap dst, so src doesn't need to be
3593      invalidated either.  */
3594   if (si != NULL)
3595     si->dont_invalidate = true;
3596 
3597   if (full_string_p)
3598     {
3599       lhs = gimple_call_lhs (stmt);
3600       switch (bcode)
3601 	{
3602 	case BUILT_IN_MEMCPY:
3603 	case BUILT_IN_MEMCPY_CHK:
3604 	  /* Allow adjust_last_stmt to decrease this memcpy's size.  */
3605 	  laststmt.stmt = stmt;
3606 	  laststmt.len = dsi->nonzero_chars;
3607 	  laststmt.stridx = dsi->idx;
3608 	  if (lhs)
3609 	    ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3610 	  break;
3611 	case BUILT_IN_MEMPCPY:
3612 	case BUILT_IN_MEMPCPY_CHK:
3613 	  break;
3614 	default:
3615 	  gcc_unreachable ();
3616 	}
3617     }
3618 }
3619 
3620 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3621    If strlen of the second argument is known, strlen of the first argument
3622    is increased by the length of the second argument.  Furthermore, attempt
3623    to convert it to memcpy/strcpy if the length of the first argument
3624    is known.  */
3625 
3626 static void
handle_builtin_strcat(enum built_in_function bcode,gimple_stmt_iterator * gsi)3627 handle_builtin_strcat (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3628 {
3629   int idx, didx;
3630   tree srclen, args, type, fn, objsz, endptr;
3631   bool success;
3632   gimple *stmt = gsi_stmt (*gsi);
3633   strinfo *si, *dsi;
3634   location_t loc = gimple_location (stmt);
3635 
3636   tree src = gimple_call_arg (stmt, 1);
3637   tree dst = gimple_call_arg (stmt, 0);
3638 
3639   /* Bail if the source is the same as destination.  It will be diagnosed
3640      elsewhere.  */
3641   if (operand_equal_p (src, dst, 0))
3642     return;
3643 
3644   tree lhs = gimple_call_lhs (stmt);
3645 
3646   didx = get_stridx (dst);
3647   if (didx < 0)
3648     return;
3649 
3650   dsi = NULL;
3651   if (didx > 0)
3652     dsi = get_strinfo (didx);
3653 
3654   srclen = NULL_TREE;
3655   si = NULL;
3656   idx = get_stridx (src);
3657   if (idx < 0)
3658     srclen = build_int_cst (size_type_node, ~idx);
3659   else if (idx > 0)
3660     {
3661       si = get_strinfo (idx);
3662       if (si != NULL)
3663 	srclen = get_string_length (si);
3664     }
3665 
3666   /* Set the no-warning bit on the transformed statement?  */
3667   bool set_no_warning = false;
3668 
3669   if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3670     {
3671       {
3672 	  /* The concatenation always involves copying at least one byte
3673 	     (the terminating nul), even if the source string is empty.
3674 	     If the source is unknown assume it's one character long and
3675 	     used that as both sizes.  */
3676 	tree slen = srclen;
3677 	if (slen)
3678 	  {
3679 	    tree type = TREE_TYPE (slen);
3680 	    slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3681 	  }
3682 
3683 	tree sptr = si && si->ptr ? si->ptr : src;
3684 
3685 	if (check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE, slen))
3686 	  {
3687 	    gimple_set_no_warning (stmt, true);
3688 	    set_no_warning = true;
3689 	  }
3690       }
3691 
3692       /* strcat (p, q) can be transformed into
3693 	 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3694 	 with length endptr - p if we need to compute the length
3695 	 later on.  Don't do this transformation if we don't need
3696 	 it.  */
3697       if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3698 	{
3699 	  if (didx == 0)
3700 	    {
3701 	      didx = new_stridx (dst);
3702 	      if (didx == 0)
3703 		return;
3704 	    }
3705 	  if (dsi == NULL)
3706 	    {
3707 	      dsi = new_strinfo (dst, didx, NULL_TREE, false);
3708 	      set_strinfo (didx, dsi);
3709 	      find_equal_ptrs (dst, didx);
3710 	    }
3711 	  else
3712 	    {
3713 	      dsi = unshare_strinfo (dsi);
3714 	      dsi->nonzero_chars = NULL_TREE;
3715 	      dsi->full_string_p = false;
3716 	      dsi->next = 0;
3717 	      dsi->endptr = NULL_TREE;
3718 	    }
3719 	  dsi->writable = true;
3720 	  dsi->stmt = stmt;
3721 	  dsi->dont_invalidate = true;
3722 	}
3723       return;
3724     }
3725 
3726   tree dstlen = dsi->nonzero_chars;
3727   endptr = dsi->endptr;
3728 
3729   dsi = unshare_strinfo (dsi);
3730   dsi->endptr = NULL_TREE;
3731   dsi->stmt = NULL;
3732   dsi->writable = true;
3733 
3734   if (srclen != NULL_TREE)
3735     {
3736       dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3737 					    TREE_TYPE (dsi->nonzero_chars),
3738 					    dsi->nonzero_chars, srclen);
3739       gcc_assert (dsi->full_string_p);
3740       adjust_related_strinfos (loc, dsi, srclen);
3741       dsi->dont_invalidate = true;
3742     }
3743   else
3744     {
3745       dsi->nonzero_chars = NULL;
3746       dsi->full_string_p = false;
3747       if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3748 	dsi->dont_invalidate = true;
3749     }
3750 
3751   if (si != NULL)
3752     /* strcat src may not overlap dst, so src doesn't need to be
3753        invalidated either.  */
3754     si->dont_invalidate = true;
3755 
3756   /* For now.  Could remove the lhs from the call and add
3757      lhs = dst; afterwards.  */
3758   if (lhs)
3759     return;
3760 
3761   fn = NULL_TREE;
3762   objsz = NULL_TREE;
3763   switch (bcode)
3764     {
3765     case BUILT_IN_STRCAT:
3766       if (srclen != NULL_TREE)
3767 	fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3768       else
3769 	fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3770       break;
3771     case BUILT_IN_STRCAT_CHK:
3772       if (srclen != NULL_TREE)
3773 	fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3774       else
3775 	fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3776       objsz = gimple_call_arg (stmt, 2);
3777       break;
3778     default:
3779       gcc_unreachable ();
3780     }
3781 
3782   if (fn == NULL_TREE)
3783     return;
3784 
3785   if (dsi && dstlen)
3786     {
3787       tree type = TREE_TYPE (dstlen);
3788 
3789       /* Compute the size of the source sequence, including the nul.  */
3790       tree srcsize = srclen ? srclen : size_zero_node;
3791       tree one = build_int_cst (type, 1);
3792       srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3793       tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3794       tree sptr = si && si->ptr ? si->ptr : src;
3795 
3796       if (check_bounds_or_overlap (stmt, dst, sptr, dstsize, srcsize))
3797 	{
3798 	  gimple_set_no_warning (stmt, true);
3799 	  set_no_warning = true;
3800 	}
3801     }
3802 
3803   tree len = NULL_TREE;
3804   if (srclen != NULL_TREE)
3805     {
3806       args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3807       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3808 
3809       len = fold_convert_loc (loc, type, unshare_expr (srclen));
3810       len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3811 			     build_int_cst (type, 1));
3812       len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
3813 				      GSI_SAME_STMT);
3814     }
3815   if (endptr)
3816     dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3817   else
3818     dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3819 			   fold_convert_loc (loc, sizetype,
3820 					     unshare_expr (dstlen)));
3821   dst = force_gimple_operand_gsi (gsi, dst, true, NULL_TREE, true,
3822 				  GSI_SAME_STMT);
3823   if (objsz)
3824     {
3825       objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3826 			       fold_convert_loc (loc, TREE_TYPE (objsz),
3827 						 unshare_expr (dstlen)));
3828       objsz = force_gimple_operand_gsi (gsi, objsz, true, NULL_TREE, true,
3829 					GSI_SAME_STMT);
3830     }
3831   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3832     {
3833       fprintf (dump_file, "Optimizing: ");
3834       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3835     }
3836   if (srclen != NULL_TREE)
3837     success = update_gimple_call (gsi, fn, 3 + (objsz != NULL_TREE),
3838 				  dst, src, len, objsz);
3839   else
3840     success = update_gimple_call (gsi, fn, 2 + (objsz != NULL_TREE),
3841 				  dst, src, objsz);
3842   if (success)
3843     {
3844       stmt = gsi_stmt (*gsi);
3845       update_stmt (stmt);
3846       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3847 	{
3848 	  fprintf (dump_file, "into: ");
3849 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3850 	}
3851       /* If srclen == NULL, note that current string length can be
3852 	 computed by transforming this strcpy into stpcpy.  */
3853       if (srclen == NULL_TREE && dsi->dont_invalidate)
3854 	dsi->stmt = stmt;
3855       adjust_last_stmt (dsi, stmt, true);
3856       if (srclen != NULL_TREE)
3857 	{
3858 	  laststmt.stmt = stmt;
3859 	  laststmt.len = srclen;
3860 	  laststmt.stridx = dsi->idx;
3861 	}
3862     }
3863   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3864     fprintf (dump_file, "not possible.\n");
3865 
3866   if (set_no_warning)
3867     gimple_set_no_warning (stmt, true);
3868 }
3869 
3870 /* Handle a call to an allocation function like alloca, malloc or calloc,
3871    or an ordinary allocation function declared with attribute alloc_size.  */
3872 
3873 static void
handle_alloc_call(enum built_in_function bcode,gimple_stmt_iterator * gsi)3874 handle_alloc_call (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3875 {
3876   gimple *stmt = gsi_stmt (*gsi);
3877   tree lhs = gimple_call_lhs (stmt);
3878   if (lhs == NULL_TREE)
3879     return;
3880 
3881   gcc_assert (get_stridx (lhs) == 0);
3882   int idx = new_stridx (lhs);
3883   tree length = NULL_TREE;
3884   if (bcode == BUILT_IN_CALLOC)
3885     length = build_int_cst (size_type_node, 0);
3886   strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3887   if (bcode == BUILT_IN_CALLOC)
3888     {
3889       /* Only set STMT for calloc and malloc.  */
3890       si->stmt = stmt;
3891       /* Only set ENDPTR for calloc.  */
3892       si->endptr = lhs;
3893     }
3894   else if (bcode == BUILT_IN_MALLOC)
3895     si->stmt = stmt;
3896 
3897   /* Set ALLOC is set for all allocation functions.  */
3898   si->alloc = stmt;
3899   set_strinfo (idx, si);
3900   si->writable = true;
3901   si->dont_invalidate = true;
3902 }
3903 
3904 /* Handle a call to memset.
3905    After a call to calloc, memset(,0,) is unnecessary.
3906    memset(malloc(n),0,n) is calloc(n,1).
3907    return true when the call is transformed, false otherwise.
3908    When nonnull uses RVALS to determine range information.  */
3909 
3910 static bool
handle_builtin_memset(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)3911 handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write,
3912 		       const vr_values *rvals)
3913 {
3914   gimple *memset_stmt = gsi_stmt (*gsi);
3915   tree ptr = gimple_call_arg (memset_stmt, 0);
3916   /* Set to the non-constant offset added to PTR.  */
3917   wide_int offrng[2];
3918   int idx1 = get_stridx (ptr, offrng, rvals);
3919   if (idx1 <= 0)
3920     return false;
3921   strinfo *si1 = get_strinfo (idx1);
3922   if (!si1)
3923     return false;
3924   gimple *alloc_stmt = si1->alloc;
3925   if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3926     return false;
3927   tree callee1 = gimple_call_fndecl (alloc_stmt);
3928   if (!valid_builtin_call (alloc_stmt))
3929     return false;
3930   tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3931   tree memset_size = gimple_call_arg (memset_stmt, 2);
3932 
3933   /* Check for overflow.  */
3934   maybe_warn_overflow (memset_stmt, memset_size, rvals, NULL, false, true);
3935 
3936   /* Bail when there is no statement associated with the destination
3937      (the statement may be null even when SI1->ALLOC is not).  */
3938   if (!si1->stmt)
3939     return false;
3940 
3941   /* Avoid optimizing if store is at a variable offset from the beginning
3942      of the allocated object.  */
3943   if (offrng[0] != 0 || offrng[0] != offrng[1])
3944     return false;
3945 
3946   /* Bail when the call writes a non-zero value.  */
3947   if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3948     return false;
3949 
3950   /* Let the caller know the memset call cleared the destination.  */
3951   *zero_write = true;
3952 
3953   enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3954   if (code1 == BUILT_IN_CALLOC)
3955     /* Not touching alloc_stmt */ ;
3956   else if (code1 == BUILT_IN_MALLOC
3957 	   && operand_equal_p (memset_size, alloc_size, 0))
3958     {
3959       /* Replace the malloc + memset calls with calloc.  */
3960       gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3961       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3962 			  alloc_size, build_one_cst (size_type_node));
3963       si1->nonzero_chars = build_int_cst (size_type_node, 0);
3964       si1->full_string_p = true;
3965       si1->stmt = gsi_stmt (gsi1);
3966     }
3967   else
3968     return false;
3969   tree lhs = gimple_call_lhs (memset_stmt);
3970   unlink_stmt_vdef (memset_stmt);
3971   if (lhs)
3972     {
3973       gimple *assign = gimple_build_assign (lhs, ptr);
3974       gsi_replace (gsi, assign, false);
3975     }
3976   else
3977     {
3978       gsi_remove (gsi, true);
3979       release_defs (memset_stmt);
3980     }
3981 
3982   return true;
3983 }
3984 
3985 /* Return a pointer to the first such equality expression if RES is used
3986    only in expressions testing its equality to zero, and null otherwise.  */
3987 
3988 static gimple *
used_only_for_zero_equality(tree res)3989 used_only_for_zero_equality (tree res)
3990 {
3991   gimple *first_use = NULL;
3992 
3993   use_operand_p use_p;
3994   imm_use_iterator iter;
3995 
3996   FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3997     {
3998       gimple *use_stmt = USE_STMT (use_p);
3999 
4000       if (is_gimple_debug (use_stmt))
4001         continue;
4002       if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
4003 	{
4004 	  tree_code code = gimple_assign_rhs_code (use_stmt);
4005 	  if (code == COND_EXPR)
4006 	    {
4007 	      tree cond_expr = gimple_assign_rhs1 (use_stmt);
4008 	      if ((TREE_CODE (cond_expr) != EQ_EXPR
4009 		   && (TREE_CODE (cond_expr) != NE_EXPR))
4010 		  || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
4011 		return NULL;
4012 	    }
4013 	  else if (code == EQ_EXPR || code == NE_EXPR)
4014 	    {
4015 	      if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
4016 		return NULL;
4017             }
4018 	  else
4019 	    return NULL;
4020 	}
4021       else if (gimple_code (use_stmt) == GIMPLE_COND)
4022 	{
4023 	  tree_code code = gimple_cond_code (use_stmt);
4024 	  if ((code != EQ_EXPR && code != NE_EXPR)
4025 	      || !integer_zerop (gimple_cond_rhs (use_stmt)))
4026 	    return NULL;
4027 	}
4028       else
4029         return NULL;
4030 
4031       if (!first_use)
4032 	first_use = use_stmt;
4033     }
4034 
4035   return first_use;
4036 }
4037 
4038 /* Handle a call to memcmp.  We try to handle small comparisons by
4039    converting them to load and compare, and replacing the call to memcmp
4040    with a __builtin_memcmp_eq call where possible.
4041    return true when call is transformed, return false otherwise.  */
4042 
4043 static bool
handle_builtin_memcmp(gimple_stmt_iterator * gsi)4044 handle_builtin_memcmp (gimple_stmt_iterator *gsi)
4045 {
4046   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4047   tree res = gimple_call_lhs (stmt);
4048 
4049   if (!res || !used_only_for_zero_equality (res))
4050     return false;
4051 
4052   tree arg1 = gimple_call_arg (stmt, 0);
4053   tree arg2 = gimple_call_arg (stmt, 1);
4054   tree len = gimple_call_arg (stmt, 2);
4055   unsigned HOST_WIDE_INT leni;
4056 
4057   if (tree_fits_uhwi_p (len)
4058       && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
4059       && pow2p_hwi (leni))
4060     {
4061       leni *= CHAR_TYPE_SIZE;
4062       unsigned align1 = get_pointer_alignment (arg1);
4063       unsigned align2 = get_pointer_alignment (arg2);
4064       unsigned align = MIN (align1, align2);
4065       scalar_int_mode mode;
4066       if (int_mode_for_size (leni, 1).exists (&mode)
4067 	  && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4068 	{
4069 	  location_t loc = gimple_location (stmt);
4070 	  tree type, off;
4071 	  type = build_nonstandard_integer_type (leni, 1);
4072 	  gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4073 	  tree ptrtype = build_pointer_type_for_mode (char_type_node,
4074 						      ptr_mode, true);
4075 	  off = build_int_cst (ptrtype, 0);
4076 	  arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4077 	  arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4078 	  tree tem1 = fold_const_aggregate_ref (arg1);
4079 	  if (tem1)
4080 	    arg1 = tem1;
4081 	  tree tem2 = fold_const_aggregate_ref (arg2);
4082 	  if (tem2)
4083 	    arg2 = tem2;
4084 	  res = fold_convert_loc (loc, TREE_TYPE (res),
4085 				  fold_build2_loc (loc, NE_EXPR,
4086 						   boolean_type_node,
4087 						   arg1, arg2));
4088 	  gimplify_and_update_call_from_tree (gsi, res);
4089 	  return true;
4090 	}
4091     }
4092 
4093   gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4094   return true;
4095 }
4096 
4097 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4098    of the string(s) referenced by ARG if it can be determined.
4099    If the length cannot be determined, sets *SIZE to the size of
4100    the array the string is stored in, if any.  If no such array is
4101    known, sets *SIZE to -1.  When the strings are nul-terminated sets
4102    *NULTERM to true, otherwise to false.  When nonnull uses RVALS to
4103    determine range information. Returns true on success.  */
4104 
4105 static bool
get_len_or_size(tree arg,int idx,unsigned HOST_WIDE_INT lenrng[2],unsigned HOST_WIDE_INT * size,bool * nulterm,const vr_values * rvals)4106 get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2],
4107 		 unsigned HOST_WIDE_INT *size, bool *nulterm,
4108 		 const vr_values *rvals)
4109 {
4110   /* Invalidate.  */
4111   *size = HOST_WIDE_INT_M1U;
4112 
4113   if (idx < 0)
4114     {
4115       /* IDX is the inverted constant string length.  */
4116       lenrng[0] = ~idx;
4117       lenrng[1] = lenrng[0];
4118       *nulterm = true;
4119       return true;
4120     }
4121 
4122   /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4123      possible length + 1.  */
4124   lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4125 
4126   if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4127     {
4128       /* FIXME: Handle all this in_range_strlen_dynamic.  */
4129       if (!si->nonzero_chars)
4130 	;
4131       else if (tree_fits_uhwi_p (si->nonzero_chars))
4132 	{
4133 	  lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4134 	  *nulterm = si->full_string_p;
4135 	  /* Set the upper bound only if the string is known to be
4136 	     nul-terminated, otherwise leave it at maximum + 1.  */
4137 	  if (*nulterm)
4138 	    lenrng[1] = lenrng[0];
4139 	}
4140       else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4141 	{
4142 	  wide_int min, max;
4143 	  value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max);
4144 	  if (rng == VR_RANGE)
4145 	    {
4146 	      lenrng[0] = min.to_uhwi ();
4147 	      lenrng[1] = max.to_uhwi ();
4148 	      *nulterm = si->full_string_p;
4149 	    }
4150 	}
4151     }
4152 
4153   if (lenrng[0] != HOST_WIDE_INT_MAX)
4154     return true;
4155 
4156   /* Compute the minimum and maximum real or possible lengths.  */
4157   c_strlen_data lendata = { };
4158   /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4159      to have it set to the length of the longest string in a PHI.  */
4160   lendata.maxbound = arg;
4161   get_range_strlen_dynamic (arg, &lendata, rvals);
4162 
4163   unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4164   if (tree_fits_uhwi_p (lendata.maxbound)
4165       && !integer_all_onesp (lendata.maxbound))
4166     maxbound = tree_to_uhwi (lendata.maxbound);
4167 
4168   if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4169     {
4170       unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4171       unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4172 
4173       /* The longest string in this data model.  */
4174       const unsigned HOST_WIDE_INT lenmax
4175 	= tree_to_uhwi (max_object_size ()) - 2;
4176 
4177       if (maxbound == HOST_WIDE_INT_M1U)
4178 	{
4179 	  lenrng[0] = minlen;
4180 	  lenrng[1] = maxlen;
4181 	  *nulterm = minlen == maxlen;
4182 	}
4183       else if (maxlen < lenmax)
4184 	{
4185 	  *size = maxbound + 1;
4186 	  *nulterm = false;
4187 	}
4188       else
4189 	return false;
4190 
4191       return true;
4192     }
4193 
4194   if (maxbound != HOST_WIDE_INT_M1U
4195       && lendata.maxlen
4196       && !integer_all_onesp (lendata.maxlen))
4197     {
4198       /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4199 	 of the longest string based on the sizes of the arrays referenced
4200 	 by ARG.  */
4201       *size = maxbound + 1;
4202       *nulterm = false;
4203       return true;
4204     }
4205 
4206   return false;
4207 }
4208 
4209 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4210    the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4211    for a sufficiently large BOUND).  If the result is based on the length
4212    of one string being greater than the longest string that would fit in
4213    the array pointer to by the argument, set *PLEN and *PSIZE to
4214    the corresponding length (or its complement when the string is known
4215    to be at least as long and need not be nul-terminated) and size.
4216    Otherwise return null.  */
4217 
4218 static tree
strxcmp_eqz_result(tree arg1,int idx1,tree arg2,int idx2,unsigned HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT * psize,const vr_values * rvals)4219 strxcmp_eqz_result (tree arg1, int idx1, tree arg2, int idx2,
4220 		    unsigned HOST_WIDE_INT bound, unsigned HOST_WIDE_INT len[2],
4221 		    unsigned HOST_WIDE_INT *psize, const vr_values *rvals)
4222 {
4223   /* Determine the range the length of each string is in and whether it's
4224      known to be nul-terminated, or the size of the array it's stored in.  */
4225   bool nul1, nul2;
4226   unsigned HOST_WIDE_INT siz1, siz2;
4227   unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4228   if (!get_len_or_size (arg1, idx1, len1rng, &siz1, &nul1, rvals)
4229       || !get_len_or_size (arg2, idx2, len2rng, &siz2, &nul2, rvals))
4230     return NULL_TREE;
4231 
4232   /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4233      to HWI_MAX when invalid.  Adjust the length of each string to consider
4234      to be no more than BOUND.  */
4235   if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4236     len1rng[0] = bound;
4237   if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4238     len1rng[1] = bound;
4239   if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4240     len2rng[0] = bound;
4241   if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4242     len2rng[1] = bound;
4243 
4244   /* Two empty strings are equal.  */
4245   if (len1rng[1] == 0 && len2rng[1] == 0)
4246     return integer_one_node;
4247 
4248   /* The strings are definitely unequal when the lower bound of the length
4249      of one of them is greater than the length of the longest string that
4250      would fit into the other array.  */
4251   if (len1rng[0] == HOST_WIDE_INT_MAX
4252       && len2rng[0] != HOST_WIDE_INT_MAX
4253       && ((len2rng[0] < bound && len2rng[0] >= siz1)
4254 	  || len2rng[0] > siz1))
4255     {
4256       *psize = siz1;
4257       len[0] = len1rng[0];
4258       /* Set LEN[0] to the lower bound of ARG1's length when it's
4259 	 nul-terminated or to the complement of its minimum length
4260 	 otherwise,  */
4261       len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4262       return integer_zero_node;
4263     }
4264 
4265   if (len2rng[0] == HOST_WIDE_INT_MAX
4266       && len1rng[0] != HOST_WIDE_INT_MAX
4267       && ((len1rng[0] < bound && len1rng[0] >= siz2)
4268 	  || len1rng[0] > siz2))
4269     {
4270       *psize = siz2;
4271       len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4272       len[1] = len2rng[0];
4273       return integer_zero_node;
4274     }
4275 
4276   /* The strings are also definitely unequal when their lengths are unequal
4277      and at least one is nul-terminated.  */
4278   if (len1rng[0] != HOST_WIDE_INT_MAX
4279       && len2rng[0] != HOST_WIDE_INT_MAX
4280       && ((len1rng[1] < len2rng[0] && nul1)
4281 	  || (len2rng[1] < len1rng[0] && nul2)))
4282     {
4283       if (bound <= len1rng[0] || bound <= len2rng[0])
4284 	*psize = bound;
4285       else
4286 	*psize = HOST_WIDE_INT_M1U;
4287 
4288       len[0] = len1rng[0];
4289       len[1] = len2rng[0];
4290       return integer_zero_node;
4291     }
4292 
4293   /* The string lengths may be equal or unequal.  Even when equal and
4294      both strings nul-terminated, without the string contents there's
4295      no way to determine whether they are equal.  */
4296   return NULL_TREE;
4297 }
4298 
4299 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4300    arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4301    whose result is used in equality expressions that evaluate to
4302    a constant due to one argument being longer than the size of
4303    the other.  */
4304 
4305 static void
maybe_warn_pointless_strcmp(gimple * stmt,HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT siz)4306 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4307 			     unsigned HOST_WIDE_INT len[2],
4308 			     unsigned HOST_WIDE_INT siz)
4309 {
4310   tree lhs = gimple_call_lhs (stmt);
4311   gimple *use = used_only_for_zero_equality (lhs);
4312   if (!use)
4313     return;
4314 
4315   bool at_least = false;
4316 
4317   /* Excessive LEN[i] indicates a lower bound.  */
4318   if (len[0] > HOST_WIDE_INT_MAX)
4319     {
4320       at_least = true;
4321       len[0] = ~len[0];
4322     }
4323 
4324   if (len[1] > HOST_WIDE_INT_MAX)
4325     {
4326       at_least = true;
4327       len[1] = ~len[1];
4328     }
4329 
4330   unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4331 
4332   /* FIXME: Include a note pointing to the declaration of the smaller
4333      array.  */
4334   location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4335 
4336   tree callee = gimple_call_fndecl (stmt);
4337   bool warned = false;
4338   if (siz <= minlen && bound == -1)
4339     warned = warning_at (stmt_loc, OPT_Wstring_compare,
4340 			 (at_least
4341 			  ? G_("%G%qD of a string of length %wu or more and "
4342 			       "an array of size %wu evaluates to nonzero")
4343 			  : G_("%G%qD of a string of length %wu and an array "
4344 			       "of size %wu evaluates to nonzero")),
4345 			 stmt, callee, minlen, siz);
4346   else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4347     {
4348       if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4349 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4350 			     "%G%qD of strings of length %wu and %wu "
4351 			     "and bound of %wu evaluates to nonzero",
4352 			     stmt, callee, len[0], len[1], bound);
4353       else
4354 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4355 			     "%G%qD of a string of length %wu, an array "
4356 			     "of size %wu and bound of %wu evaluates to "
4357 			     "nonzero",
4358 			     stmt, callee, minlen, siz, bound);
4359     }
4360 
4361   if (warned)
4362     {
4363       location_t use_loc = gimple_location (use);
4364       if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4365 	inform (use_loc, "in this expression");
4366     }
4367 }
4368 
4369 
4370 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4371    when possible or by transforming the latter to the former.  Warn about
4372    calls where the length of one argument is greater than the size of
4373    the array to which the other argument points if the latter's length
4374    is not known.  Return true when the call has been transformed into
4375    another and false otherwise.  */
4376 
4377 static bool
handle_builtin_string_cmp(gimple_stmt_iterator * gsi,const vr_values * rvals)4378 handle_builtin_string_cmp (gimple_stmt_iterator *gsi, const vr_values *rvals)
4379 {
4380   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4381   tree lhs = gimple_call_lhs (stmt);
4382 
4383   if (!lhs)
4384     return false;
4385 
4386   tree arg1 = gimple_call_arg (stmt, 0);
4387   tree arg2 = gimple_call_arg (stmt, 1);
4388   int idx1 = get_stridx (arg1);
4389   int idx2 = get_stridx (arg2);
4390 
4391   /* For strncmp set to the value of the third argument if known.  */
4392   HOST_WIDE_INT bound = -1;
4393   tree len = NULL_TREE;
4394   /* Extract the strncmp bound.  */
4395   if (gimple_call_num_args (stmt) == 3)
4396     {
4397       len = gimple_call_arg (stmt, 2);
4398       if (tree_fits_shwi_p (len))
4399         bound = tree_to_shwi (len);
4400 
4401       /* If the bound argument is NOT known, do nothing.  */
4402       if (bound < 0)
4403 	return false;
4404     }
4405 
4406   /* Avoid folding if either argument is not a nul-terminated array.
4407      Defer warning until later.  */
4408   if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4409       || !check_nul_terminated_array (NULL_TREE, arg2, len))
4410     return false;
4411 
4412   {
4413     /* Set to the length of one argument (or its complement if it's
4414        the lower bound of a range) and the size of the array storing
4415        the other if the result is based on the former being equal to
4416        or greater than the latter.  */
4417     unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4418     unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4419 
4420     /* Try to determine if the two strings are either definitely equal
4421        or definitely unequal and if so, either fold the result to zero
4422        (when equal) or set the range of the result to ~[0, 0] otherwise.  */
4423     if (tree eqz = strxcmp_eqz_result (arg1, idx1, arg2, idx2, bound,
4424 				       len, &siz, rvals))
4425       {
4426 	if (integer_zerop (eqz))
4427 	  {
4428 	    maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4429 
4430 	    /* When the lengths of the first two string arguments are
4431 	       known to be unequal set the range of the result to non-zero.
4432 	       This allows the call to be eliminated if its result is only
4433 	       used in tests for equality to zero.  */
4434 	    wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4435 	    set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4436 	    return false;
4437 	  }
4438 	/* When the two strings are definitely equal (such as when they
4439 	   are both empty) fold the call to the constant result.  */
4440 	replace_call_with_value (gsi, integer_zero_node);
4441 	return true;
4442       }
4443   }
4444 
4445   /* Return if nothing is known about the strings pointed to by ARG1
4446      and ARG2.  */
4447   if (idx1 == 0 && idx2 == 0)
4448     return false;
4449 
4450   /* Determine either the length or the size of each of the strings,
4451      whichever is available.  */
4452   HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4453   HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4454 
4455   {
4456     unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4457     unsigned HOST_WIDE_INT arsz1, arsz2;
4458     bool nulterm[2];
4459 
4460     if (!get_len_or_size (arg1, idx1, len1rng, &arsz1, nulterm, rvals)
4461 	|| !get_len_or_size (arg2, idx2, len2rng, &arsz2, nulterm + 1, rvals))
4462       return false;
4463 
4464     if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4465       cstlen1 = len1rng[0];
4466     else if (arsz1 < HOST_WIDE_INT_M1U)
4467       arysiz1 = arsz1;
4468 
4469     if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4470       cstlen2 = len2rng[0];
4471     else if (arsz2 < HOST_WIDE_INT_M1U)
4472       arysiz2 = arsz2;
4473   }
4474 
4475   /* Bail if neither the string length nor the size of the array
4476      it is stored in can be determined.  */
4477   if ((cstlen1 < 0 && arysiz1 < 0)
4478       || (cstlen2 < 0 && arysiz2 < 0)
4479       || (cstlen1 < 0 && cstlen2 < 0))
4480     return false;
4481 
4482   if (cstlen1 >= 0)
4483     ++cstlen1;
4484   if (cstlen2 >= 0)
4485     ++cstlen2;
4486 
4487   /* The exact number of characters to compare.  */
4488   HOST_WIDE_INT cmpsiz = bound < 0 ? cstlen1 < 0 ? cstlen2 : cstlen1 : bound;
4489   /* The size of the array in which the unknown string is stored.  */
4490   HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4491 
4492   if (cmpsiz < varsiz && used_only_for_zero_equality (lhs))
4493     {
4494       /* If the known length is less than the size of the other array
4495 	 and the strcmp result is only used to test equality to zero,
4496 	 transform the call to the equivalent _eq call.  */
4497       if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4498 					   : BUILT_IN_STRNCMP_EQ))
4499 	{
4500 	  tree n = build_int_cst (size_type_node, cmpsiz);
4501 	  update_gimple_call (gsi, fn, 3, arg1, arg2, n);
4502 	  return true;
4503 	}
4504     }
4505 
4506   return false;
4507 }
4508 
4509 /* Handle a POINTER_PLUS_EXPR statement.
4510    For p = "abcd" + 2; compute associated length, or if
4511    p = q + off is pointing to a '\0' character of a string, call
4512    zero_length_string on it.  */
4513 
4514 static void
handle_pointer_plus(gimple_stmt_iterator * gsi)4515 handle_pointer_plus (gimple_stmt_iterator *gsi)
4516 {
4517   gimple *stmt = gsi_stmt (*gsi);
4518   tree lhs = gimple_assign_lhs (stmt), off;
4519   int idx = get_stridx (gimple_assign_rhs1 (stmt));
4520   strinfo *si, *zsi;
4521 
4522   if (idx == 0)
4523     return;
4524 
4525   if (idx < 0)
4526     {
4527       tree off = gimple_assign_rhs2 (stmt);
4528       if (tree_fits_uhwi_p (off)
4529 	  && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4530 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4531 	    = ~(~idx - (int) tree_to_uhwi (off));
4532       return;
4533     }
4534 
4535   si = get_strinfo (idx);
4536   if (si == NULL || si->nonzero_chars == NULL_TREE)
4537     return;
4538 
4539   off = gimple_assign_rhs2 (stmt);
4540   zsi = NULL;
4541   if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4542     zsi = zero_length_string (lhs, si);
4543   else if (TREE_CODE (off) == SSA_NAME)
4544     {
4545       gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4546       if (gimple_assign_single_p (def_stmt)
4547 	  && si->full_string_p
4548 	  && operand_equal_p (si->nonzero_chars,
4549 			      gimple_assign_rhs1 (def_stmt), 0))
4550 	zsi = zero_length_string (lhs, si);
4551     }
4552   if (zsi != NULL
4553       && si->endptr != NULL_TREE
4554       && si->endptr != lhs
4555       && TREE_CODE (si->endptr) == SSA_NAME)
4556     {
4557       enum tree_code rhs_code
4558 	= useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4559 	  ? SSA_NAME : NOP_EXPR;
4560       gimple_assign_set_rhs_with_ops (gsi, rhs_code, si->endptr);
4561       gcc_assert (gsi_stmt (*gsi) == stmt);
4562       update_stmt (stmt);
4563     }
4564 }
4565 
4566 /* Describes recursion limits used by count_nonzero_bytes.  */
4567 
4568 class ssa_name_limit_t
4569 {
4570   bitmap visited;         /* Bitmap of visited SSA_NAMEs.  */
4571   unsigned ssa_def_max;   /* Longest chain of SSA_NAMEs to follow.  */
4572 
4573   /* Not copyable or assignable.  */
4574   ssa_name_limit_t (ssa_name_limit_t&);
4575   void operator= (ssa_name_limit_t&);
4576 
4577  public:
4578 
ssa_name_limit_t()4579   ssa_name_limit_t ()
4580     : visited (NULL),
4581     ssa_def_max (param_ssa_name_def_chain_limit) { }
4582 
4583   int next_ssa_name (tree);
4584 
~ssa_name_limit_t()4585   ~ssa_name_limit_t ()
4586     {
4587       if (visited)
4588 	BITMAP_FREE (visited);
4589     }
4590 };
4591 
4592 /* If the SSA_NAME has already been "seen" return a positive value.
4593    Otherwise add it to VISITED.  If the SSA_NAME limit has been
4594    reached, return a negative value.  Otherwise return zero.  */
4595 
next_ssa_name(tree ssa_name)4596 int ssa_name_limit_t::next_ssa_name (tree ssa_name)
4597 {
4598   if (!visited)
4599     visited = BITMAP_ALLOC (NULL);
4600 
4601   /* Return a positive value if SSA_NAME has already been visited.  */
4602   if (!bitmap_set_bit (visited, SSA_NAME_VERSION (ssa_name)))
4603     return 1;
4604 
4605   /* Return a negative value to let caller avoid recursing beyond
4606      the specified limit.  */
4607   if (ssa_def_max == 0)
4608     return -1;
4609 
4610   --ssa_def_max;
4611 
4612   return 0;
4613 }
4614 
4615 static bool
4616 count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4617 			  unsigned [3], bool *, bool *, bool *,
4618 			  const vr_values *, ssa_name_limit_t &);
4619 
4620 /* Determines the minimum and maximum number of leading non-zero bytes
4621    in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4622    to each.
4623    Sets LENRANGE[2] to the total size of the access (which may be less
4624    than LENRANGE[1] when what's being referenced by EXP is a pointer
4625    rather than an array).
4626    Sets *NULTERM if the representation contains a zero byte, and sets
4627    *ALLNUL if all the bytes are zero.
4628    OFFSET and NBYTES are the offset into the representation and
4629    the size of the access to it determined from an ADDR_EXPR (i.e.,
4630    a pointer) or MEM_REF or zero for other expressions.
4631    Uses RVALS to determine range information.
4632    Avoids recursing deeper than the limits in SNLIM allow.
4633    Returns true on success and false otherwise.  */
4634 
4635 static bool
count_nonzero_bytes(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals,ssa_name_limit_t & snlim)4636 count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
4637 		     unsigned HOST_WIDE_INT nbytes,
4638 		     unsigned lenrange[3], bool *nulterm,
4639 		     bool *allnul, bool *allnonnul, const vr_values *rvals,
4640 		     ssa_name_limit_t &snlim)
4641 {
4642   if (TREE_CODE (exp) == SSA_NAME)
4643     {
4644       /* Handle non-zero single-character stores specially.  */
4645       tree type = TREE_TYPE (exp);
4646       if (TREE_CODE (type) == INTEGER_TYPE
4647 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4648 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4649 	  && tree_expr_nonzero_p (exp))
4650 	{
4651 	  /* If the character EXP is known to be non-zero (even if its
4652 	     exact value is not known) recurse once to set the range
4653 	     for an arbitrary constant.  */
4654 	  exp = build_int_cst (type, 1);
4655 	  return count_nonzero_bytes (exp, offset, 1, lenrange,
4656 				      nulterm, allnul, allnonnul, rvals, snlim);
4657 	}
4658 
4659       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4660       if (gimple_assign_single_p (stmt))
4661 	{
4662 	  exp = gimple_assign_rhs1 (stmt);
4663 	  if (TREE_CODE (exp) != MEM_REF)
4664 	    return false;
4665 	  /* Handle MEM_REF below.  */
4666 	}
4667       else if (gimple_code (stmt) == GIMPLE_PHI)
4668 	{
4669 	  /* Avoid processing an SSA_NAME that has already been visited
4670 	     or if an SSA_NAME limit has been reached.  Indicate success
4671 	     if the former and failure if the latter.  */
4672 	  if (int res = snlim.next_ssa_name (exp))
4673 	    return res > 0;
4674 
4675 	  /* Determine the minimum and maximum from the PHI arguments.  */
4676 	  unsigned int n = gimple_phi_num_args (stmt);
4677 	  for (unsigned i = 0; i != n; i++)
4678 	    {
4679 	      tree def = gimple_phi_arg_def (stmt, i);
4680 	      if (!count_nonzero_bytes (def, offset, nbytes, lenrange, nulterm,
4681 					allnul, allnonnul, rvals, snlim))
4682 		return false;
4683 	    }
4684 
4685 	  return true;
4686 	}
4687     }
4688 
4689   if (TREE_CODE (exp) == MEM_REF)
4690     {
4691       if (nbytes)
4692 	return false;
4693 
4694       tree arg = TREE_OPERAND (exp, 0);
4695       tree off = TREE_OPERAND (exp, 1);
4696 
4697       if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4698 	return false;
4699 
4700       unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4701       if (INT_MAX < wioff)
4702 	return false;
4703 
4704       offset += wioff;
4705       if (INT_MAX < offset)
4706 	return false;
4707 
4708       /* The size of the MEM_REF access determines the number of bytes.  */
4709       tree type = TREE_TYPE (exp);
4710       tree typesize = TYPE_SIZE_UNIT (type);
4711       if (!typesize || !tree_fits_uhwi_p (typesize))
4712 	return false;
4713       nbytes = tree_to_uhwi (typesize);
4714       if (!nbytes)
4715 	return false;
4716 
4717       /* Handle MEM_REF = SSA_NAME types of assignments.  */
4718       return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
4719 				       allnul, allnonnul, rvals, snlim);
4720     }
4721 
4722   if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4723     {
4724       exp = ctor_for_folding (exp);
4725       if (!exp)
4726 	return false;
4727     }
4728 
4729   const char *prep = NULL;
4730   if (TREE_CODE (exp) == STRING_CST)
4731     {
4732       unsigned nchars = TREE_STRING_LENGTH (exp);
4733       if (nchars < offset)
4734 	return false;
4735 
4736       if (!nbytes)
4737 	/* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4738 	   (i.e., it's the size of a pointer), or from MEM_REF (as the size
4739 	   of the access), set it here to the size of the string, including
4740 	   all internal and trailing nuls if the string has any.  */
4741 	nbytes = nchars - offset;
4742       else if (nchars - offset < nbytes)
4743 	return false;
4744 
4745       prep = TREE_STRING_POINTER (exp) + offset;
4746     }
4747 
4748   unsigned char buf[256];
4749   if (!prep)
4750     {
4751       if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4752 	return false;
4753       /* If the pointer to representation hasn't been set above
4754 	 for STRING_CST point it at the buffer.  */
4755       prep = reinterpret_cast <char *>(buf);
4756       /* Try to extract the representation of the constant object
4757 	 or expression starting from the offset.  */
4758       unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4759       if (repsize < nbytes)
4760 	{
4761 	  /* This should only happen when REPSIZE is zero because EXP
4762 	     doesn't denote an object with a known initializer, except
4763 	     perhaps when the reference reads past its end.  */
4764 	  lenrange[0] = 0;
4765 	  prep = NULL;
4766 	}
4767       else if (!nbytes)
4768 	nbytes = repsize;
4769       else if (nbytes < repsize)
4770 	return false;
4771     }
4772 
4773   if (!nbytes)
4774     return false;
4775 
4776   /* Compute the number of leading nonzero bytes in the representation
4777      and update the minimum and maximum.  */
4778   unsigned n = prep ? strnlen (prep, nbytes) : nbytes;
4779 
4780   if (n < lenrange[0])
4781     lenrange[0] = n;
4782   if (lenrange[1] < n)
4783     lenrange[1] = n;
4784 
4785   /* Set the size of the representation.  */
4786   if (lenrange[2] < nbytes)
4787     lenrange[2] = nbytes;
4788 
4789   /* Clear NULTERM if none of the bytes is zero.  */
4790   if (n == nbytes)
4791     *nulterm = false;
4792 
4793   if (n)
4794     {
4795       /* When the initial number of non-zero bytes N is non-zero, reset
4796 	 *ALLNUL; if N is less than that the size of the representation
4797 	 also clear *ALLNONNUL.  */
4798       *allnul = false;
4799       if (n < nbytes)
4800 	*allnonnul = false;
4801     }
4802   else if (*allnul || *allnonnul)
4803     {
4804       *allnonnul = false;
4805 
4806       if (*allnul)
4807 	{
4808 	  /* When either ALLNUL is set and N is zero, also determine
4809 	     whether all subsequent bytes after the first one (which
4810 	     is nul) are zero or nonzero and clear ALLNUL if not.  */
4811 	  for (const char *p = prep; p != prep + nbytes; ++p)
4812 	    if (*p)
4813 	      {
4814 		*allnul = false;
4815 		break;
4816 	      }
4817 	}
4818     }
4819 
4820   return true;
4821 }
4822 
4823 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4824    bytes that are pointed to by EXP, which should be a pointer.  */
4825 
4826 static bool
count_nonzero_bytes_addr(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals,ssa_name_limit_t & snlim)4827 count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
4828 			  unsigned HOST_WIDE_INT nbytes,
4829 			  unsigned lenrange[3], bool *nulterm,
4830 			  bool *allnul, bool *allnonnul,
4831 			  const vr_values *rvals, ssa_name_limit_t &snlim)
4832 {
4833   int idx = get_stridx (exp);
4834   if (idx > 0)
4835     {
4836       strinfo *si = get_strinfo (idx);
4837       if (!si)
4838 	return false;
4839 
4840       /* Handle both constant lengths as well non-constant lengths
4841 	 in some range.  */
4842       unsigned HOST_WIDE_INT minlen, maxlen;
4843       if (tree_fits_shwi_p (si->nonzero_chars))
4844 	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4845       else if (si->nonzero_chars
4846 	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4847 	{
4848 	  vr_values *v = CONST_CAST (vr_values *, rvals);
4849 	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
4850 	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
4851 	    return false;
4852 
4853 	  minlen = tree_to_uhwi (vr->min ());
4854 	  maxlen = tree_to_uhwi (vr->max ());
4855 	}
4856       else
4857 	return false;
4858 
4859       if (maxlen < offset)
4860 	return false;
4861 
4862       minlen = minlen < offset ? 0 : minlen - offset;
4863       maxlen -= offset;
4864       if (maxlen + 1 < nbytes)
4865 	return false;
4866 
4867       if (nbytes <= minlen)
4868 	*nulterm = false;
4869 
4870       if (nbytes < minlen)
4871 	{
4872 	  minlen = nbytes;
4873 	  if (nbytes < maxlen)
4874 	    maxlen = nbytes;
4875 	}
4876 
4877       if (minlen < lenrange[0])
4878 	lenrange[0] = minlen;
4879       if (lenrange[1] < maxlen)
4880 	lenrange[1] = maxlen;
4881 
4882       if (lenrange[2] < nbytes)
4883 	lenrange[2] = nbytes;
4884 
4885       /* Since only the length of the string are known and not its contents,
4886 	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
4887       *allnul = false;
4888       if (minlen < nbytes)
4889 	*allnonnul = false;
4890 
4891       return true;
4892     }
4893 
4894   if (TREE_CODE (exp) == ADDR_EXPR)
4895     return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
4896 				lenrange, nulterm, allnul, allnonnul, rvals,
4897 				snlim);
4898 
4899   if (TREE_CODE (exp) == SSA_NAME)
4900     {
4901       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4902       if (gimple_code (stmt) == GIMPLE_PHI)
4903 	{
4904 	  /* Avoid processing an SSA_NAME that has already been visited
4905 	     or if an SSA_NAME limit has been reached.  Indicate success
4906 	     if the former and failure if the latter.  */
4907 	  if (int res = snlim.next_ssa_name (exp))
4908 	    return res > 0;
4909 
4910 	  /* Determine the minimum and maximum from the PHI arguments.  */
4911 	  unsigned int n = gimple_phi_num_args (stmt);
4912 	  for (unsigned i = 0; i != n; i++)
4913 	    {
4914 	      tree def = gimple_phi_arg_def (stmt, i);
4915 	      if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
4916 					     nulterm, allnul, allnonnul, rvals,
4917 					     snlim))
4918 		return false;
4919 	    }
4920 
4921 	  return true;
4922 	}
4923     }
4924 
4925   /* Otherwise we don't know anything.  */
4926   lenrange[0] = 0;
4927   if (lenrange[1] < nbytes)
4928     lenrange[1] = nbytes;
4929   if (lenrange[2] < nbytes)
4930     lenrange[2] = nbytes;
4931   *nulterm = false;
4932   *allnul = false;
4933   *allnonnul = false;
4934   return true;
4935 }
4936 
4937 /* Same as above except with an implicit SSA_NAME limit.  RVALS is used
4938    to determine ranges of dynamically computed string lengths (the results
4939    of strlen).  */
4940 
4941 static bool
count_nonzero_bytes(tree exp,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals)4942 count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm,
4943 		     bool *allnul, bool *allnonnul, const vr_values *rvals)
4944 {
4945   /* Set to optimistic values so the caller doesn't have to worry about
4946      initializing these and to what.  On success, the function will clear
4947      these if it determines their values are different but being recursive
4948      it never sets either to true.  On failure, their values are
4949      unspecified.  */
4950   *nulterm = true;
4951   *allnul = true;
4952   *allnonnul = true;
4953 
4954   ssa_name_limit_t snlim;
4955   return count_nonzero_bytes (exp, 0, 0, lenrange, nulterm, allnul, allnonnul,
4956 			      rvals, snlim);
4957 }
4958 
4959 /* Handle a single or multibyte store other than by a built-in function,
4960    either via a single character assignment or by multi-byte assignment
4961    either via MEM_REF or via a type other than char (such as in
4962    '*(int*)a = 12345').  Return true to let the caller advance *GSI to
4963    the next statement in the basic block and false otherwise.  */
4964 
4965 static bool
handle_store(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)4966 handle_store (gimple_stmt_iterator *gsi, bool *zero_write,
4967 	      const vr_values *rvals)
4968 {
4969   int idx = -1;
4970   strinfo *si = NULL;
4971   gimple *stmt = gsi_stmt (*gsi);
4972   tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
4973   tree rhs = gimple_assign_rhs1 (stmt);
4974 
4975   /* The offset of the first byte in LHS modified by the store.  */
4976   unsigned HOST_WIDE_INT offset = 0;
4977 
4978   if (TREE_CODE (lhs) == MEM_REF
4979       && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4980     {
4981       tree mem_offset = TREE_OPERAND (lhs, 1);
4982       if (tree_fits_uhwi_p (mem_offset))
4983 	{
4984 	  /* Get the strinfo for the base, and use it if it starts with at
4985 	     least OFFSET nonzero characters.  This is trivially true if
4986 	     OFFSET is zero.  */
4987 	  offset = tree_to_uhwi (mem_offset);
4988 	  idx = get_stridx (TREE_OPERAND (lhs, 0));
4989 	  if (idx > 0)
4990 	    si = get_strinfo (idx);
4991 	  if (offset == 0)
4992 	    ssaname = TREE_OPERAND (lhs, 0);
4993 	  else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0)
4994 	    {
4995 	      *zero_write = initializer_zerop (rhs);
4996 
4997 	      bool dummy;
4998 	      unsigned lenrange[] = { UINT_MAX, 0, 0 };
4999 	      if (count_nonzero_bytes (rhs, lenrange, &dummy, &dummy, &dummy,
5000 				       rvals))
5001 		maybe_warn_overflow (stmt, lenrange[2], rvals);
5002 
5003 	      return true;
5004 	    }
5005 	}
5006     }
5007   else
5008     {
5009       idx = get_addr_stridx (lhs, NULL_TREE, &offset, rvals);
5010       if (idx > 0)
5011 	si = get_strinfo (idx);
5012     }
5013 
5014   /* Minimum and maximum leading non-zero bytes and the size of the store.  */
5015   unsigned lenrange[] = { UINT_MAX, 0, 0 };
5016 
5017   /* Set to the minimum length of the string being assigned if known.  */
5018   unsigned HOST_WIDE_INT rhs_minlen;
5019 
5020   /* STORING_NONZERO_P is true iff not all stored characters are zero.
5021      STORING_ALL_NONZERO_P is true if all stored characters are zero.
5022      STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5023      Both are false when it's impossible to determine which is true.  */
5024   bool storing_nonzero_p;
5025   bool storing_all_nonzero_p;
5026   bool storing_all_zeros_p;
5027   /* FULL_STRING_P is set when the stored sequence of characters form
5028      a nul-terminated string.  */
5029   bool full_string_p;
5030 
5031   const bool ranges_valid
5032     = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5033 			   &storing_all_zeros_p, &storing_all_nonzero_p,
5034 			   rvals);
5035   if (ranges_valid)
5036     {
5037       rhs_minlen = lenrange[0];
5038       storing_nonzero_p = lenrange[1] > 0;
5039       *zero_write = storing_all_zeros_p;
5040 
5041       maybe_warn_overflow (stmt, lenrange[2], rvals);
5042     }
5043   else
5044     {
5045       rhs_minlen = HOST_WIDE_INT_M1U;
5046       full_string_p = false;
5047       storing_nonzero_p = false;
5048       storing_all_zeros_p = false;
5049       storing_all_nonzero_p = false;
5050     }
5051 
5052   if (si != NULL)
5053     {
5054       /* The corresponding element is set to 1 if the first and last
5055 	 element, respectively, of the sequence of characters being
5056 	 written over the string described by SI ends before
5057 	 the terminating nul (if it has one), to zero if the nul is
5058 	 being overwritten but not beyond, or negative otherwise.  */
5059       int store_before_nul[2];
5060       if (ranges_valid)
5061 	{
5062 	  /* The offset of the last stored byte.  */
5063 	  unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5064 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
5065 	  if (endoff == offset)
5066 	    store_before_nul[1] = store_before_nul[0];
5067 	  else
5068 	    store_before_nul[1] = compare_nonzero_chars (si, endoff, rvals);
5069 	}
5070       else
5071 	{
5072 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
5073 	  store_before_nul[1] = store_before_nul[0];
5074 	  gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5075 	}
5076 
5077       if (storing_all_zeros_p
5078 	  && store_before_nul[0] == 0
5079 	  && store_before_nul[1] == 0
5080 	  && si->full_string_p)
5081 	{
5082 	  /* When overwriting a '\0' with a '\0', the store can be removed
5083 	     if we know it has been stored in the current function.  */
5084 	  if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5085 	    {
5086 	      unlink_stmt_vdef (stmt);
5087 	      release_defs (stmt);
5088 	      gsi_remove (gsi, true);
5089 	      return false;
5090 	    }
5091 	  else
5092 	    {
5093 	      si->writable = true;
5094 	      gsi_next (gsi);
5095 	      return false;
5096 	    }
5097 	}
5098 
5099       if (store_before_nul[1] > 0
5100 	  && storing_nonzero_p
5101 	  && lenrange[0] == lenrange[1]
5102 	  && lenrange[0] == lenrange[2]
5103 	  && TREE_CODE (TREE_TYPE (rhs)) == INTEGER_TYPE)
5104 	{
5105 	  /* Handle a store of one or more non-nul characters that ends
5106 	     before the terminating nul of the destination and so does
5107 	     not affect its length
5108 	     If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5109 	     and if we aren't storing '\0', we know that the length of
5110 	     the string and any other zero terminated string in memory
5111 	     remains the same.  In that case we move to the next gimple
5112 	     statement and return to signal the caller that it shouldn't
5113 	     invalidate anything.
5114 
5115 	     This is beneficial for cases like:
5116 
5117 	     char p[20];
5118 	     void foo (char *q)
5119 	     {
5120 	       strcpy (p, "foobar");
5121 	       size_t len = strlen (p);     // can be folded to 6
5122 	       size_t len2 = strlen (q);    // has to be computed
5123 	       p[0] = 'X';
5124 	       size_t len3 = strlen (p);    // can be folded to 6
5125 	       size_t len4 = strlen (q);    // can be folded to len2
5126 	       bar (len, len2, len3, len4);
5127 	       } */
5128 	  gsi_next (gsi);
5129 	  return false;
5130 	}
5131 
5132       if (storing_all_zeros_p
5133 	  || storing_nonzero_p
5134 	  || (offset != 0 && store_before_nul[1] > 0))
5135 	{
5136 	  /* When STORING_NONZERO_P, we know that the string will start
5137 	     with at least OFFSET + 1 nonzero characters.  If storing
5138 	     a single character, set si->NONZERO_CHARS to the result.
5139 	     If storing multiple characters, try to determine the number
5140 	     of leading non-zero characters and set si->NONZERO_CHARS to
5141 	     the result instead.
5142 
5143 	     When STORING_ALL_ZEROS_P, we know that the string is now
5144 	     OFFSET characters long.
5145 
5146 	     Otherwise, we're storing an unknown value at offset OFFSET,
5147 	     so need to clip the nonzero_chars to OFFSET.
5148 	     Use the minimum length of the string (or individual character)
5149 	     being stored if it's known.  Otherwise, STORING_NONZERO_P
5150 	     guarantees it's at least 1.  */
5151 	  HOST_WIDE_INT len
5152 	    = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5153 	  location_t loc = gimple_location (stmt);
5154 	  tree oldlen = si->nonzero_chars;
5155 	  if (store_before_nul[1] == 0 && si->full_string_p)
5156 	    /* We're overwriting the nul terminator with a nonzero or
5157 	       unknown character.  If the previous stmt was a memcpy,
5158 	       its length may be decreased.  */
5159 	    adjust_last_stmt (si, stmt, false);
5160 	  si = unshare_strinfo (si);
5161 	  if (storing_nonzero_p)
5162 	    {
5163 	      gcc_assert (len >= 0);
5164 	      si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5165 	    }
5166 	  else
5167 	    si->nonzero_chars = build_int_cst (size_type_node, offset);
5168 
5169 	  /* Set FULL_STRING_P only if the length of the strings being
5170 	     written is the same, and clear it if the strings have
5171 	     different lengths.  In the latter case the length stored
5172 	     in si->NONZERO_CHARS becomes the lower bound.
5173 	     FIXME: Handle the upper bound of the length if possible.  */
5174 	  si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5175 
5176 	  if (storing_all_zeros_p
5177 	      && ssaname
5178 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5179 	    si->endptr = ssaname;
5180 	  else
5181 	    si->endptr = NULL;
5182 	  si->next = 0;
5183 	  si->stmt = NULL;
5184 	  si->writable = true;
5185 	  si->dont_invalidate = true;
5186 	  if (oldlen)
5187 	    {
5188 	      tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5189 					  si->nonzero_chars, oldlen);
5190 	      adjust_related_strinfos (loc, si, adj);
5191 	    }
5192 	  else
5193 	    si->prev = 0;
5194 	}
5195     }
5196   else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5197     {
5198       if (ssaname)
5199 	idx = new_stridx (ssaname);
5200       else
5201 	idx = new_addr_stridx (lhs);
5202       if (idx != 0)
5203 	{
5204 	  tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5205 
5206 	  HOST_WIDE_INT slen;
5207 	  if (storing_all_zeros_p)
5208 	    slen = 0;
5209 	  else if (storing_nonzero_p && ranges_valid)
5210 	    {
5211 	      /* FIXME: Handle the upper bound of the length when
5212 		 LENRANGE[0] != LENRANGE[1].  */
5213 	      slen = lenrange[0];
5214 	      if (lenrange[0] != lenrange[1])
5215 		/* Set the minimum length but ignore the maximum
5216 		   for now.  */
5217 		full_string_p = false;
5218 	    }
5219 	  else
5220 	    slen = -1;
5221 
5222 	  tree len = (slen <= 0
5223 		      ? size_zero_node
5224 		      : build_int_cst (size_type_node, slen));
5225 	  si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5226 	  set_strinfo (idx, si);
5227 	  if (storing_all_zeros_p
5228 	      && ssaname
5229 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5230 	    si->endptr = ssaname;
5231 	  si->dont_invalidate = true;
5232 	  si->writable = true;
5233 	}
5234     }
5235   else if (idx == 0
5236 	   && rhs_minlen < HOST_WIDE_INT_M1U
5237 	   && ssaname == NULL_TREE
5238 	   && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5239     {
5240       HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5241       if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5242 	{
5243 	  int idx = new_addr_stridx (lhs);
5244 	  if (idx != 0)
5245 	    {
5246 	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
5247 				build_int_cst (size_type_node, rhs_minlen),
5248 				full_string_p);
5249 	      set_strinfo (idx, si);
5250 	      si->dont_invalidate = true;
5251 	    }
5252 	}
5253     }
5254 
5255   if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5256     {
5257       /* For single-byte stores only, allow adjust_last_stmt to remove
5258 	 the statement if the stored '\0' is immediately overwritten.  */
5259       laststmt.stmt = stmt;
5260       laststmt.len = build_int_cst (size_type_node, 1);
5261       laststmt.stridx = si->idx;
5262     }
5263   return true;
5264 }
5265 
5266 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0.  */
5267 
5268 static void
fold_strstr_to_strncmp(tree rhs1,tree rhs2,gimple * stmt)5269 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5270 {
5271   if (TREE_CODE (rhs1) != SSA_NAME
5272       || TREE_CODE (rhs2) != SSA_NAME)
5273     return;
5274 
5275   gimple *call_stmt = NULL;
5276   for (int pass = 0; pass < 2; pass++)
5277     {
5278       gimple *g = SSA_NAME_DEF_STMT (rhs1);
5279       if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5280 	  && has_single_use (rhs1)
5281 	  && gimple_call_arg (g, 0) == rhs2)
5282 	{
5283 	  call_stmt = g;
5284 	  break;
5285 	}
5286       std::swap (rhs1, rhs2);
5287     }
5288 
5289   if (call_stmt)
5290     {
5291       tree arg0 = gimple_call_arg (call_stmt, 0);
5292 
5293       if (arg0 == rhs2)
5294 	{
5295 	  tree arg1 = gimple_call_arg (call_stmt, 1);
5296 	  tree arg1_len = NULL_TREE;
5297 	  int idx = get_stridx (arg1);
5298 
5299 	  if (idx)
5300 	    {
5301 	      if (idx < 0)
5302 		arg1_len = build_int_cst (size_type_node, ~idx);
5303 	      else
5304 		{
5305 		  strinfo *si = get_strinfo (idx);
5306 		  if (si)
5307 		    arg1_len = get_string_length (si);
5308 		}
5309 	    }
5310 
5311 	  if (arg1_len != NULL_TREE)
5312 	    {
5313 	      gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5314 	      tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5315 
5316 	      if (!is_gimple_val (arg1_len))
5317 		{
5318 		  tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5319 		  gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5320 							    arg1_len);
5321 		  gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5322 		  arg1_len = arg1_len_tmp;
5323 		}
5324 
5325 	      gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5326 						      arg0, arg1, arg1_len);
5327 	      tree strncmp_lhs = make_ssa_name (integer_type_node);
5328 	      gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5329 	      gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5330 	      gsi_remove (&gsi, true);
5331 	      gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5332 	      tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5333 
5334 	      if (is_gimple_assign (stmt))
5335 		{
5336 		  if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5337 		    {
5338 		      tree cond = gimple_assign_rhs1 (stmt);
5339 		      TREE_OPERAND (cond, 0) = strncmp_lhs;
5340 		      TREE_OPERAND (cond, 1) = zero;
5341 		    }
5342 		  else
5343 		    {
5344 		      gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5345 		      gimple_assign_set_rhs2 (stmt, zero);
5346 		    }
5347 		}
5348 	      else
5349 		{
5350 		  gcond *cond = as_a<gcond *> (stmt);
5351 		  gimple_cond_set_lhs (cond, strncmp_lhs);
5352 		  gimple_cond_set_rhs (cond, zero);
5353 		}
5354 	      update_stmt (stmt);
5355 	    }
5356 	}
5357     }
5358 }
5359 
5360 /* Return true if TYPE corresponds to a narrow character type.  */
5361 
5362 static bool
is_char_type(tree type)5363 is_char_type (tree type)
5364 {
5365   return (TREE_CODE (type) == INTEGER_TYPE
5366 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5367 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5368 }
5369 
5370 /* Check the built-in call at GSI for validity and optimize it.
5371    Uses RVALS to determine range information.
5372    Return true to let the caller advance *GSI to the next statement
5373    in the basic block and false otherwise.  */
5374 
5375 static bool
strlen_check_and_optimize_call(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)5376 strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write,
5377 				const vr_values *rvals)
5378 {
5379   gimple *stmt = gsi_stmt (*gsi);
5380 
5381   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5382     {
5383       tree fntype = gimple_call_fntype (stmt);
5384       if (fntype && lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5385 	handle_alloc_call (BUILT_IN_NONE, gsi);
5386     }
5387 
5388   /* When not optimizing we must be checking printf calls which
5389      we do even for user-defined functions when they are declared
5390      with attribute format.  */
5391   if (!flag_optimize_strlen
5392       || !strlen_optimize
5393       || !valid_builtin_call (stmt))
5394     return !handle_printf_call (gsi, rvals);
5395 
5396   tree callee = gimple_call_fndecl (stmt);
5397   switch (DECL_FUNCTION_CODE (callee))
5398     {
5399     case BUILT_IN_STRLEN:
5400     case BUILT_IN_STRNLEN:
5401       handle_builtin_strlen (gsi);
5402       break;
5403     case BUILT_IN_STRCHR:
5404       handle_builtin_strchr (gsi);
5405       break;
5406     case BUILT_IN_STRCPY:
5407     case BUILT_IN_STRCPY_CHK:
5408     case BUILT_IN_STPCPY:
5409     case BUILT_IN_STPCPY_CHK:
5410       handle_builtin_strcpy (DECL_FUNCTION_CODE (callee), gsi, rvals);
5411       break;
5412 
5413     case BUILT_IN_STRNCAT:
5414     case BUILT_IN_STRNCAT_CHK:
5415       handle_builtin_strncat (DECL_FUNCTION_CODE (callee), gsi);
5416       break;
5417 
5418     case BUILT_IN_STPNCPY:
5419     case BUILT_IN_STPNCPY_CHK:
5420     case BUILT_IN_STRNCPY:
5421     case BUILT_IN_STRNCPY_CHK:
5422       handle_builtin_stxncpy_strncat (false, gsi);
5423       break;
5424 
5425     case BUILT_IN_MEMCPY:
5426     case BUILT_IN_MEMCPY_CHK:
5427     case BUILT_IN_MEMPCPY:
5428     case BUILT_IN_MEMPCPY_CHK:
5429       handle_builtin_memcpy (DECL_FUNCTION_CODE (callee), gsi, rvals);
5430       break;
5431     case BUILT_IN_STRCAT:
5432     case BUILT_IN_STRCAT_CHK:
5433       handle_builtin_strcat (DECL_FUNCTION_CODE (callee), gsi);
5434       break;
5435     case BUILT_IN_ALLOCA:
5436     case BUILT_IN_ALLOCA_WITH_ALIGN:
5437     case BUILT_IN_MALLOC:
5438     case BUILT_IN_CALLOC:
5439       handle_alloc_call (DECL_FUNCTION_CODE (callee), gsi);
5440       break;
5441     case BUILT_IN_MEMSET:
5442       if (handle_builtin_memset (gsi, zero_write, rvals))
5443 	return false;
5444       break;
5445     case BUILT_IN_MEMCMP:
5446       if (handle_builtin_memcmp (gsi))
5447 	return false;
5448       break;
5449     case BUILT_IN_STRCMP:
5450     case BUILT_IN_STRNCMP:
5451       if (handle_builtin_string_cmp (gsi, rvals))
5452 	return false;
5453       break;
5454     default:
5455       if (handle_printf_call (gsi, rvals))
5456 	return false;
5457       break;
5458     }
5459 
5460   return true;
5461 }
5462 
5463 /* Handle an assignment statement at *GSI to a LHS of integral type.
5464    If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true.  */
5465 
5466 static void
handle_integral_assign(gimple_stmt_iterator * gsi,bool * cleanup_eh,const vr_values * rvals)5467 handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5468 			const vr_values *rvals)
5469 {
5470   gimple *stmt = gsi_stmt (*gsi);
5471   tree lhs = gimple_assign_lhs (stmt);
5472   tree lhs_type = TREE_TYPE (lhs);
5473 
5474   enum tree_code code = gimple_assign_rhs_code (stmt);
5475   if (code == COND_EXPR)
5476     {
5477       tree cond = gimple_assign_rhs1 (stmt);
5478       enum tree_code cond_code = TREE_CODE (cond);
5479 
5480       if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5481 	fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5482 				TREE_OPERAND (cond, 1), stmt);
5483     }
5484   else if (code == EQ_EXPR || code == NE_EXPR)
5485     fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5486 			    gimple_assign_rhs2 (stmt), stmt);
5487   else if (gimple_assign_load_p (stmt)
5488 	   && TREE_CODE (lhs_type) == INTEGER_TYPE
5489 	   && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5490 	   && (TYPE_PRECISION (lhs_type)
5491 	       == TYPE_PRECISION (char_type_node))
5492 	   && !gimple_has_volatile_ops (stmt))
5493     {
5494       tree off = integer_zero_node;
5495       unsigned HOST_WIDE_INT coff = 0;
5496       int idx = 0;
5497       tree rhs1 = gimple_assign_rhs1 (stmt);
5498       if (code == MEM_REF)
5499 	{
5500 	  idx = get_stridx (TREE_OPERAND (rhs1, 0));
5501 	  if (idx > 0)
5502 	    {
5503 	      strinfo *si = get_strinfo (idx);
5504 	      if (si
5505 		  && si->nonzero_chars
5506 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5507 		  && (wi::to_widest (si->nonzero_chars)
5508 		      >= wi::to_widest (off)))
5509 		off = TREE_OPERAND (rhs1, 1);
5510 	      else
5511 		/* This case is not useful.  See if get_addr_stridx
5512 		   returns something usable.  */
5513 		idx = 0;
5514 	    }
5515 	}
5516       if (idx <= 0)
5517 	idx = get_addr_stridx (rhs1, NULL_TREE, &coff);
5518       if (idx > 0)
5519 	{
5520 	  strinfo *si = get_strinfo (idx);
5521 	  if (si
5522 	      && si->nonzero_chars
5523 	      && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5524 	    {
5525 	      widest_int w1 = wi::to_widest (si->nonzero_chars);
5526 	      widest_int w2 = wi::to_widest (off) + coff;
5527 	      if (w1 == w2
5528 		  && si->full_string_p)
5529 		{
5530 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5531 		    {
5532 		      fprintf (dump_file, "Optimizing: ");
5533 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5534 		    }
5535 
5536 		  /* Reading the final '\0' character.  */
5537 		  tree zero = build_int_cst (lhs_type, 0);
5538 		  gimple_set_vuse (stmt, NULL_TREE);
5539 		  gimple_assign_set_rhs_from_tree (gsi, zero);
5540 		  *cleanup_eh
5541 		    |= maybe_clean_or_replace_eh_stmt (stmt,
5542 						       gsi_stmt (*gsi));
5543 		  stmt = gsi_stmt (*gsi);
5544 		  update_stmt (stmt);
5545 
5546 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5547 		    {
5548 		      fprintf (dump_file, "into: ");
5549 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5550 		    }
5551 		}
5552 	      else if (w1 > w2)
5553 		{
5554 		  /* Reading a character before the final '\0'
5555 		     character.  Just set the value range to ~[0, 0]
5556 		     if we don't have anything better.  */
5557 		  wide_int min, max;
5558 		  signop sign = TYPE_SIGN (lhs_type);
5559 		  int prec = TYPE_PRECISION (lhs_type);
5560 		  value_range_kind vr = get_range_info (lhs, &min, &max);
5561 		  if (vr == VR_VARYING
5562 		      || (vr == VR_RANGE
5563 			  && min == wi::min_value (prec, sign)
5564 			  && max == wi::max_value (prec, sign)))
5565 		    set_range_info (lhs, VR_ANTI_RANGE,
5566 				    wi::zero (prec), wi::zero (prec));
5567 		}
5568 	    }
5569 	}
5570     }
5571   else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5572     {
5573       if (int idx = new_stridx (lhs))
5574 	{
5575 	  /* Record multi-byte assignments from MEM_REFs.  */
5576 	  bool storing_all_nonzero_p;
5577 	  bool storing_all_zeros_p;
5578 	  bool full_string_p;
5579 	  unsigned lenrange[] = { UINT_MAX, 0, 0 };
5580 	  tree rhs = gimple_assign_rhs1 (stmt);
5581 	  const bool ranges_valid
5582 	    = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5583 				   &storing_all_zeros_p, &storing_all_nonzero_p,
5584 				   rvals);
5585 	  if (ranges_valid)
5586 	    {
5587 	      tree length = build_int_cst (sizetype, lenrange[0]);
5588 	      strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5589 	      set_strinfo (idx, si);
5590 	      si->writable = true;
5591 	      si->dont_invalidate = true;
5592 	      maybe_warn_overflow (stmt, lenrange[2], rvals);
5593 	    }
5594 	}
5595     }
5596 
5597   if (strlen_to_stridx)
5598     {
5599       tree rhs1 = gimple_assign_rhs1 (stmt);
5600       if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5601 	strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5602     }
5603 }
5604 
5605 /* Attempt to check for validity of the performed access a single statement
5606    at *GSI using string length knowledge, and to optimize it.
5607    If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5608    true.  Return true to let the caller advance *GSI to the next statement
5609    in the basic block and false otherwise.  */
5610 
5611 static bool
check_and_optimize_stmt(gimple_stmt_iterator * gsi,bool * cleanup_eh,const vr_values * rvals)5612 check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5613 			 const vr_values *rvals)
5614 {
5615   gimple *stmt = gsi_stmt (*gsi);
5616 
5617   /* For statements that modify a string, set to true if the write
5618      is only zeros.  */
5619   bool zero_write = false;
5620 
5621   if (is_gimple_call (stmt))
5622     {
5623       if (!strlen_check_and_optimize_call (gsi, &zero_write, rvals))
5624 	return false;
5625     }
5626   else if (!flag_optimize_strlen || !strlen_optimize)
5627     return true;
5628   else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5629     {
5630       /* Handle non-clobbering assignment.  */
5631       tree lhs = gimple_assign_lhs (stmt);
5632       tree lhs_type = TREE_TYPE (lhs);
5633 
5634       if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5635 	{
5636 	  if (gimple_assign_single_p (stmt)
5637 	      || (gimple_assign_cast_p (stmt)
5638 		  && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5639 	    {
5640 	      int idx = get_stridx (gimple_assign_rhs1 (stmt));
5641 	      ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5642 	    }
5643 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5644 	    handle_pointer_plus (gsi);
5645 	}
5646       else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5647 	/* Handle assignment to a character.  */
5648 	handle_integral_assign (gsi, cleanup_eh, rvals);
5649       else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5650 	{
5651 	  tree type = TREE_TYPE (lhs);
5652 	  if (TREE_CODE (type) == ARRAY_TYPE)
5653 	    type = TREE_TYPE (type);
5654 
5655 	bool is_char_store = is_char_type (type);
5656 	if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5657 	  {
5658 	    /* To consider stores into char objects via integer types
5659 	       other than char but not those to non-character objects,
5660 	       determine the type of the destination rather than just
5661 	       the type of the access.  */
5662 	    for (int i = 0; i != 2; ++i)
5663 	      {
5664 		tree ref = TREE_OPERAND (lhs, i);
5665 		type = TREE_TYPE (ref);
5666 		if (TREE_CODE (type) == POINTER_TYPE)
5667 		  type = TREE_TYPE (type);
5668 		if (TREE_CODE (type) == ARRAY_TYPE)
5669 		  type = TREE_TYPE (type);
5670 		if (is_char_type (type))
5671 		  {
5672 		    is_char_store = true;
5673 		    break;
5674 		  }
5675 	      }
5676 	  }
5677 
5678 	  /* Handle a single or multibyte assignment.  */
5679 	  if (is_char_store && !handle_store (gsi, &zero_write, rvals))
5680 	    return false;
5681 	}
5682     }
5683   else if (gcond *cond = dyn_cast<gcond *> (stmt))
5684     {
5685       enum tree_code code = gimple_cond_code (cond);
5686       if (code == EQ_EXPR || code == NE_EXPR)
5687 	fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5688 				gimple_cond_rhs (stmt), stmt);
5689     }
5690 
5691   if (gimple_vdef (stmt))
5692     maybe_invalidate (stmt, zero_write);
5693   return true;
5694 }
5695 
5696 /* Recursively call maybe_invalidate on stmts that might be executed
5697    in between dombb and current bb and that contain a vdef.  Stop when
5698    *count stmts are inspected, or if the whole strinfo vector has
5699    been invalidated.  */
5700 
5701 static void
do_invalidate(basic_block dombb,gimple * phi,bitmap visited,int * count)5702 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5703 {
5704   unsigned int i, n = gimple_phi_num_args (phi);
5705 
5706   for (i = 0; i < n; i++)
5707     {
5708       tree vuse = gimple_phi_arg_def (phi, i);
5709       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5710       basic_block bb = gimple_bb (stmt);
5711       if (bb == NULL
5712 	  || bb == dombb
5713 	  || !bitmap_set_bit (visited, bb->index)
5714 	  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5715 	continue;
5716       while (1)
5717 	{
5718 	  if (gimple_code (stmt) == GIMPLE_PHI)
5719 	    {
5720 	      do_invalidate (dombb, stmt, visited, count);
5721 	      if (*count == 0)
5722 		return;
5723 	      break;
5724 	    }
5725 	  if (--*count == 0)
5726 	    return;
5727 	  if (!maybe_invalidate (stmt))
5728 	    {
5729 	      *count = 0;
5730 	      return;
5731 	    }
5732 	  vuse = gimple_vuse (stmt);
5733 	  stmt = SSA_NAME_DEF_STMT (vuse);
5734 	  if (gimple_bb (stmt) != bb)
5735 	    {
5736 	      bb = gimple_bb (stmt);
5737 	      if (bb == NULL
5738 		  || bb == dombb
5739 		  || !bitmap_set_bit (visited, bb->index)
5740 		  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5741 		break;
5742 	    }
5743 	}
5744     }
5745 }
5746 
5747 class strlen_dom_walker : public dom_walker
5748 {
5749 public:
strlen_dom_walker(cdi_direction direction)5750   strlen_dom_walker (cdi_direction direction)
5751     : dom_walker (direction),
5752     evrp (false),
5753     m_cleanup_cfg (false)
5754   {}
5755 
5756   virtual edge before_dom_children (basic_block);
5757   virtual void after_dom_children (basic_block);
5758 
5759   /* EVRP analyzer used for printf argument range processing, and
5760      to track strlen results across integer variable assignments.  */
5761   evrp_range_analyzer evrp;
5762 
5763   /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
5764      execute function.  */
5765   bool m_cleanup_cfg;
5766 };
5767 
5768 /* Callback for walk_dominator_tree.  Attempt to optimize various
5769    string ops by remembering string lengths pointed by pointer SSA_NAMEs.  */
5770 
5771 edge
before_dom_children(basic_block bb)5772 strlen_dom_walker::before_dom_children (basic_block bb)
5773 {
5774   evrp.enter (bb);
5775 
5776   basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5777 
5778   if (dombb == NULL)
5779     stridx_to_strinfo = NULL;
5780   else
5781     {
5782       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5783       if (stridx_to_strinfo)
5784 	{
5785 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5786 	       gsi_next (&gsi))
5787 	    {
5788 	      gphi *phi = gsi.phi ();
5789 	      if (virtual_operand_p (gimple_phi_result (phi)))
5790 		{
5791 		  bitmap visited = BITMAP_ALLOC (NULL);
5792 		  int count_vdef = 100;
5793 		  do_invalidate (dombb, phi, visited, &count_vdef);
5794 		  BITMAP_FREE (visited);
5795 		  if (count_vdef == 0)
5796 		    {
5797 		      /* If there were too many vdefs in between immediate
5798 			 dominator and current bb, invalidate everything.
5799 			 If stridx_to_strinfo has been unshared, we need
5800 			 to free it, otherwise just set it to NULL.  */
5801 		      if (!strinfo_shared ())
5802 			{
5803 			  unsigned int i;
5804 			  strinfo *si;
5805 
5806 			  for (i = 1;
5807 			       vec_safe_iterate (stridx_to_strinfo, i, &si);
5808 			       ++i)
5809 			    {
5810 			      free_strinfo (si);
5811 			      (*stridx_to_strinfo)[i] = NULL;
5812 			    }
5813 			}
5814 		      else
5815 			stridx_to_strinfo = NULL;
5816 		    }
5817 		  break;
5818 		}
5819 	    }
5820 	}
5821     }
5822 
5823   /* If all PHI arguments have the same string index, the PHI result
5824      has it as well.  */
5825   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5826        gsi_next (&gsi))
5827     {
5828       gphi *phi = gsi.phi ();
5829       tree result = gimple_phi_result (phi);
5830       if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5831 	{
5832 	  int idx = get_stridx (gimple_phi_arg_def (phi, 0));
5833 	  if (idx != 0)
5834 	    {
5835 	      unsigned int i, n = gimple_phi_num_args (phi);
5836 	      for (i = 1; i < n; i++)
5837 		if (idx != get_stridx (gimple_phi_arg_def (phi, i)))
5838 		  break;
5839 	      if (i == n)
5840 		ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5841 	    }
5842 	}
5843     }
5844 
5845   bool cleanup_eh = false;
5846 
5847   /* Attempt to optimize individual statements.  */
5848   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
5849     {
5850       gimple *stmt = gsi_stmt (gsi);
5851 
5852       /* First record ranges generated by this statement so they
5853 	 can be used by printf argument processing.  */
5854       evrp.record_ranges_from_stmt (stmt, false);
5855 
5856       if (check_and_optimize_stmt (&gsi, &cleanup_eh, evrp.get_vr_values ()))
5857 	gsi_next (&gsi);
5858     }
5859 
5860   if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5861       m_cleanup_cfg = true;
5862 
5863   bb->aux = stridx_to_strinfo;
5864   if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5865     (*stridx_to_strinfo)[0] = (strinfo *) bb;
5866   return NULL;
5867 }
5868 
5869 /* Callback for walk_dominator_tree.  Free strinfo vector if it is
5870    owned by the current bb, clear bb->aux.  */
5871 
5872 void
after_dom_children(basic_block bb)5873 strlen_dom_walker::after_dom_children (basic_block bb)
5874 {
5875   evrp.leave (bb);
5876 
5877   if (bb->aux)
5878     {
5879       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5880       if (vec_safe_length (stridx_to_strinfo)
5881 	  && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5882 	{
5883 	  unsigned int i;
5884 	  strinfo *si;
5885 
5886 	  for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5887 	    free_strinfo (si);
5888 	  vec_free (stridx_to_strinfo);
5889 	}
5890       bb->aux = NULL;
5891     }
5892 }
5893 
5894 namespace {
5895 
5896 static unsigned int
printf_strlen_execute(function * fun,bool warn_only)5897 printf_strlen_execute (function *fun, bool warn_only)
5898 {
5899   strlen_optimize = !warn_only;
5900 
5901   calculate_dominance_info (CDI_DOMINATORS);
5902 
5903   bool use_scev = optimize > 0 && flag_printf_return_value;
5904   if (use_scev)
5905     {
5906       loop_optimizer_init (LOOPS_NORMAL);
5907       scev_initialize ();
5908     }
5909 
5910   gcc_assert (!strlen_to_stridx);
5911   if (warn_stringop_overflow || warn_stringop_truncation)
5912     strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5913 
5914   /* This has to happen after initializing the loop optimizer
5915      and initializing SCEV as they create new SSA_NAMEs.  */
5916   ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
5917   max_stridx = 1;
5918 
5919   /* String length optimization is implemented as a walk of the dominator
5920      tree and a forward walk of statements within each block.  */
5921   strlen_dom_walker walker (CDI_DOMINATORS);
5922   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5923 
5924   ssa_ver_to_stridx.release ();
5925   strinfo_pool.release ();
5926   if (decl_to_stridxlist_htab)
5927     {
5928       obstack_free (&stridx_obstack, NULL);
5929       delete decl_to_stridxlist_htab;
5930       decl_to_stridxlist_htab = NULL;
5931     }
5932   laststmt.stmt = NULL;
5933   laststmt.len = NULL_TREE;
5934   laststmt.stridx = 0;
5935 
5936   if (strlen_to_stridx)
5937     {
5938       strlen_to_stridx->empty ();
5939       delete strlen_to_stridx;
5940       strlen_to_stridx = NULL;
5941     }
5942 
5943   if (use_scev)
5944     {
5945       scev_finalize ();
5946       loop_optimizer_finalize ();
5947     }
5948 
5949   /* Clean up object size info.  */
5950   fini_object_sizes ();
5951 
5952   return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5953 }
5954 
5955 /* This file defines two passes: one for warnings that runs only when
5956    optimization is disabled, and another that implements optimizations
5957    and also issues warnings.  */
5958 
5959 const pass_data pass_data_warn_printf =
5960 {
5961   GIMPLE_PASS, /* type */
5962   "warn-printf", /* name */
5963   OPTGROUP_NONE, /* optinfo_flags */
5964   TV_NONE, /* tv_id */
5965   /* Normally an optimization pass would require PROP_ssa but because
5966      this pass runs early, with no optimization, to do sprintf format
5967      checking, it only requires PROP_cfg.  */
5968   PROP_cfg, /* properties_required */
5969   0, /* properties_provided */
5970   0, /* properties_destroyed */
5971   0, /* todo_flags_start */
5972   0, /* todo_flags_finish */
5973 };
5974 
5975 class pass_warn_printf : public gimple_opt_pass
5976 {
5977 public:
pass_warn_printf(gcc::context * ctxt)5978   pass_warn_printf (gcc::context *ctxt)
5979     : gimple_opt_pass (pass_data_warn_printf, ctxt)
5980   {}
5981 
5982   virtual bool gate (function *);
execute(function * fun)5983   virtual unsigned int execute (function *fun)
5984   {
5985     return printf_strlen_execute (fun, true);
5986   }
5987 };
5988 
5989 
5990 /* Return true to run the warning pass only when not optimizing and
5991    iff either -Wformat-overflow or -Wformat-truncation is specified.  */
5992 
5993 bool
gate(function *)5994 pass_warn_printf::gate (function *)
5995 {
5996   return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5997 }
5998 
5999 const pass_data pass_data_strlen =
6000 {
6001   GIMPLE_PASS, /* type */
6002   "strlen", /* name */
6003   OPTGROUP_NONE, /* optinfo_flags */
6004   TV_TREE_STRLEN, /* tv_id */
6005   PROP_cfg | PROP_ssa, /* properties_required */
6006   0, /* properties_provided */
6007   0, /* properties_destroyed */
6008   0, /* todo_flags_start */
6009   0, /* todo_flags_finish */
6010 };
6011 
6012 class pass_strlen : public gimple_opt_pass
6013 {
6014 public:
pass_strlen(gcc::context * ctxt)6015   pass_strlen (gcc::context *ctxt)
6016     : gimple_opt_pass (pass_data_strlen, ctxt)
6017   {}
6018 
clone()6019   opt_pass * clone () { return new pass_strlen (m_ctxt); }
6020 
6021   virtual bool gate (function *);
execute(function * fun)6022   virtual unsigned int execute (function *fun)
6023   {
6024     return printf_strlen_execute (fun, false);
6025   }
6026 };
6027 
6028 /* Return true to run the pass only when the sprintf and/or strlen
6029    optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6030    are specified.  */
6031 
6032 bool
gate(function *)6033 pass_strlen::gate (function *)
6034 {
6035   return ((warn_format_overflow > 0
6036 	   || warn_format_trunc > 0
6037 	   || warn_restrict > 0
6038 	   || flag_optimize_strlen > 0
6039 	   || flag_printf_return_value)
6040 	  && optimize > 0);
6041 }
6042 
6043 } // anon namespace
6044 
6045 gimple_opt_pass *
make_pass_warn_printf(gcc::context * ctxt)6046 make_pass_warn_printf (gcc::context *ctxt)
6047 {
6048   return new pass_warn_printf (ctxt);
6049 }
6050 
6051 gimple_opt_pass *
make_pass_strlen(gcc::context * ctxt)6052 make_pass_strlen (gcc::context *ctxt)
6053 {
6054   return new pass_strlen (ctxt);
6055 }
6056