1 /* String length optimization
2    Copyright (C) 2011-2020 Free Software Foundation, Inc.
3    Contributed by Jakub Jelinek <jakub@redhat.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-restrict.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "expr.h"
42 #include "tree-cfg.h"
43 #include "tree-dfa.h"
44 #include "domwalk.h"
45 #include "tree-ssa-alias.h"
46 #include "tree-ssa-propagate.h"
47 #include "tree-ssa-strlen.h"
48 #include "tree-hash-traits.h"
49 #include "tree-object-size.h"
50 #include "builtins.h"
51 #include "target.h"
52 #include "diagnostic-core.h"
53 #include "diagnostic.h"
54 #include "intl.h"
55 #include "attribs.h"
56 #include "calls.h"
57 #include "cfgloop.h"
58 #include "tree-ssa-loop.h"
59 #include "tree-scalar-evolution.h"
60 #include "vr-values.h"
61 #include "gimple-ssa-evrp-analyze.h"
62 #include "tree-ssa.h"
63 
64 /* A vector indexed by SSA_NAME_VERSION.  0 means unknown, positive value
65    is an index into strinfo vector, negative value stands for
66    string length of a string literal (~strlen).  */
67 static vec<int> ssa_ver_to_stridx;
68 
69 /* Number of currently active string indexes plus one.  */
70 static int max_stridx;
71 
72 /* Set to true to optimize, false when just checking.  */
73 static bool strlen_optimize;
74 
75 /* String information record.  */
76 struct strinfo
77 {
78   /* Number of leading characters that are known to be nonzero.  This is
79      also the length of the string if FULL_STRING_P.
80 
81      The values in a list of related string pointers must be consistent;
82      that is, if strinfo B comes X bytes after strinfo A, it must be
83      the case that A->nonzero_chars == X + B->nonzero_chars.  */
84   tree nonzero_chars;
85   /* Any of the corresponding pointers for querying alias oracle.  */
86   tree ptr;
87   /* STMT is used for two things:
88 
89      - To record the statement that should be used for delayed length
90        computations.  We maintain the invariant that all related strinfos
91        have delayed lengths or none do.
92 
93      - To record the malloc or calloc call that produced this result
94        to optimize away malloc/memset sequences.  STMT is reset after
95        a calloc-allocated object has been stored a non-zero value into.  */
96   gimple *stmt;
97   /* Set to the dynamic allocation statement for the object (alloca,
98      calloc, malloc, or VLA).  Unlike STMT, once set for a strinfo
99      object, ALLOC doesn't change.  */
100   gimple *alloc;
101   /* Pointer to '\0' if known, if NULL, it can be computed as
102      ptr + length.  */
103   tree endptr;
104   /* Reference count.  Any changes to strinfo entry possibly shared
105      with dominating basic blocks need unshare_strinfo first, except
106      for dont_invalidate which affects only the immediately next
107      maybe_invalidate.  */
108   int refcount;
109   /* Copy of index.  get_strinfo (si->idx) should return si;  */
110   int idx;
111   /* These 3 fields are for chaining related string pointers together.
112      E.g. for
113      bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
114      strcpy (c, d); e = c + dl;
115      strinfo(a) -> strinfo(c) -> strinfo(e)
116      All have ->first field equal to strinfo(a)->idx and are doubly
117      chained through prev/next fields.  The later strinfos are required
118      to point into the same string with zero or more bytes after
119      the previous pointer and all bytes in between the two pointers
120      must be non-zero.  Functions like strcpy or memcpy are supposed
121      to adjust all previous strinfo lengths, but not following strinfo
122      lengths (those are uncertain, usually invalidated during
123      maybe_invalidate, except when the alias oracle knows better).
124      Functions like strcat on the other side adjust the whole
125      related strinfo chain.
126      They are updated lazily, so to use the chain the same first fields
127      and si->prev->next == si->idx needs to be verified.  */
128   int first;
129   int next;
130   int prev;
131   /* A flag whether the string is known to be written in the current
132      function.  */
133   bool writable;
134   /* A flag for the next maybe_invalidate that this strinfo shouldn't
135      be invalidated.  Always cleared by maybe_invalidate.  */
136   bool dont_invalidate;
137   /* True if the string is known to be nul-terminated after NONZERO_CHARS
138      characters.  False is useful when detecting strings that are built
139      up via successive memcpys.  */
140   bool full_string_p;
141 };
142 
143 /* Pool for allocating strinfo_struct entries.  */
144 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
145 
146 /* Vector mapping positive string indexes to strinfo, for the
147    current basic block.  The first pointer in the vector is special,
148    it is either NULL, meaning the vector isn't shared, or it is
149    a basic block pointer to the owner basic_block if shared.
150    If some other bb wants to modify the vector, the vector needs
151    to be unshared first, and only the owner bb is supposed to free it.  */
152 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
153 
154 /* One OFFSET->IDX mapping.  */
155 struct stridxlist
156 {
157   struct stridxlist *next;
158   HOST_WIDE_INT offset;
159   int idx;
160 };
161 
162 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings.  */
163 struct decl_stridxlist_map
164 {
165   struct tree_map_base base;
166   struct stridxlist list;
167 };
168 
169 /* Hash table for mapping decls to a chained list of offset -> idx
170    mappings.  */
171 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
172 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
173 
174 /* Hash table mapping strlen (or strnlen with constant bound and return
175    smaller than bound) calls to stridx instances describing
176    the calls' arguments.  Non-null only when warn_stringop_truncation
177    is non-zero.  */
178 typedef std::pair<int, location_t> stridx_strlenloc;
179 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
180 
181 /* Obstack for struct stridxlist and struct decl_stridxlist_map.  */
182 static struct obstack stridx_obstack;
183 
184 /* Last memcpy statement if it could be adjusted if the trailing
185    '\0' written is immediately overwritten, or
186    *x = '\0' store that could be removed if it is immediately overwritten.  */
187 struct laststmt_struct
188 {
189   gimple *stmt;
190   tree len;
191   int stridx;
192 } laststmt;
193 
194 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
195 static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *);
196 
197 /* Sets MINMAX to either the constant value or the range VAL is in
198    and returns either the constant value or VAL on success or null
199    when the range couldn't be determined.  Uses RVALS when nonnull
200    to determine the range, otherwise get_range_info.  */
201 
202 tree
get_range(tree val,wide_int minmax[2],const vr_values * rvals)203 get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */)
204 {
205   if (TREE_CODE (val) == INTEGER_CST)
206     {
207       minmax[0] = minmax[1] = wi::to_wide (val);
208       return val;
209     }
210 
211   if (TREE_CODE (val) != SSA_NAME)
212     return NULL_TREE;
213 
214   if (rvals)
215     {
216       /* The range below may be "inaccurate" if a constant has been
217 	 substituted earlier for VAL by this pass that hasn't been
218 	 propagated through the CFG.  This shoud be fixed by the new
219 	 on-demand VRP if/when it becomes available (hopefully in
220 	 GCC 11).  */
221       const value_range *vr
222 	= (CONST_CAST (class vr_values *, rvals)->get_value_range (val));
223       value_range_kind rng = vr->kind ();
224       if (rng != VR_RANGE || !range_int_cst_p (vr))
225 	return NULL_TREE;
226 
227       minmax[0] = wi::to_wide (vr->min ());
228       minmax[1] = wi::to_wide (vr->max ());
229       return val;
230     }
231 
232   value_range_kind rng = get_range_info (val, minmax, minmax + 1);
233   if (rng == VR_RANGE)
234     return val;
235 
236   /* Do not handle anti-ranges and instead make use of the on-demand
237      VRP if/when it becomes available (hopefully in GCC 11).  */
238   return NULL_TREE;
239 }
240 
241 /* Return:
242 
243    *  +1  if SI is known to start with more than OFF nonzero characters.
244 
245    *   0  if SI is known to start with exactly OFF nonzero characters.
246 
247    *  -1  if SI either does not start with OFF nonzero characters
248 	  or the relationship between the number of leading nonzero
249 	  characters in SI and OFF is unknown.  */
250 
251 static inline int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off)252 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
253 {
254   if (si->nonzero_chars
255       && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
256     return compare_tree_int (si->nonzero_chars, off);
257   else
258     return -1;
259 }
260 
261 /* Same as above but suitable also for strings with non-constant lengths.
262    Uses RVALS to determine length range.  */
263 
264 static int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off,const vr_values * rvals)265 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
266 		       const vr_values *rvals)
267 {
268   if (!si->nonzero_chars)
269     return -1;
270 
271   if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
272     return compare_tree_int (si->nonzero_chars, off);
273 
274   if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
275     return -1;
276 
277   const value_range_equiv *vr
278     = (CONST_CAST (class vr_values *, rvals)
279        ->get_value_range (si->nonzero_chars));
280 
281   value_range_kind rng = vr->kind ();
282   if (rng != VR_RANGE || !range_int_cst_p (vr))
283     return -1;
284 
285   /* If the offset is less than the minimum length or if the bounds
286      of the length range are equal return the result of the comparison
287      same as in the constant case.  Otherwise return a conservative
288      result.  */
289   int cmpmin = compare_tree_int (vr->min (), off);
290   if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ()))
291     return cmpmin;
292 
293   return -1;
294 }
295 
296 /* Return true if SI is known to be a zero-length string.  */
297 
298 static inline bool
zero_length_string_p(strinfo * si)299 zero_length_string_p (strinfo *si)
300 {
301   return si->full_string_p && integer_zerop (si->nonzero_chars);
302 }
303 
304 /* Return strinfo vector entry IDX.  */
305 
306 static inline strinfo *
get_strinfo(int idx)307 get_strinfo (int idx)
308 {
309   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
310     return NULL;
311   return (*stridx_to_strinfo)[idx];
312 }
313 
314 /* Get the next strinfo in the chain after SI, or null if none.  */
315 
316 static inline strinfo *
get_next_strinfo(strinfo * si)317 get_next_strinfo (strinfo *si)
318 {
319   if (si->next == 0)
320     return NULL;
321   strinfo *nextsi = get_strinfo (si->next);
322   if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
323     return NULL;
324   return nextsi;
325 }
326 
327 /* Helper function for get_stridx.  Return the strinfo index of the address
328    of EXP, which is available in PTR if nonnull.  If OFFSET_OUT, it is
329    OK to return the index for some X <= &EXP and store &EXP - X in
330    *OFFSET_OUT.  When RVALS is nonnull uses it to determine range
331    information.  */
332 
333 static int
334 get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out,
335 		 const vr_values *rvals = NULL)
336 {
337   HOST_WIDE_INT off;
338   struct stridxlist *list, *last = NULL;
339   tree base;
340 
341   if (!decl_to_stridxlist_htab)
342     return 0;
343 
344   poly_int64 poff;
345   base = get_addr_base_and_unit_offset (exp, &poff);
346   if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
347     return 0;
348 
349   list = decl_to_stridxlist_htab->get (base);
350   if (list == NULL)
351     return 0;
352 
353   do
354     {
355       if (list->offset == off)
356 	{
357 	  if (offset_out)
358 	    *offset_out = 0;
359 	  return list->idx;
360 	}
361       if (list->offset > off)
362 	return 0;
363       last = list;
364       list = list->next;
365     }
366   while (list);
367 
368   if ((offset_out || ptr) && last && last->idx > 0)
369     {
370       unsigned HOST_WIDE_INT rel_off
371 	= (unsigned HOST_WIDE_INT) off - last->offset;
372       strinfo *si = get_strinfo (last->idx);
373       if (si && compare_nonzero_chars (si, rel_off, rvals) >= 0)
374 	{
375 	  if (offset_out)
376 	    {
377 	      *offset_out = rel_off;
378 	      return last->idx;
379 	    }
380 	  else
381 	    return get_stridx_plus_constant (si, rel_off, ptr);
382 	}
383     }
384   return 0;
385 }
386 
387 /* Returns string index for EXP.  When EXP is an SSA_NAME that refers
388    to a known strinfo with an offset and OFFRNG is non-null, sets
389    both elements of the OFFRNG array to the range of the offset and
390    returns the index of the known strinfo.  In this case the result
391    must not be used in for functions that modify the string.
392    When nonnull, uses RVALS to determine range information.  */
393 
394 static int
395 get_stridx (tree exp, wide_int offrng[2] = NULL, const vr_values *rvals = NULL)
396 {
397   if (offrng)
398     offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
399 
400   if (TREE_CODE (exp) == SSA_NAME)
401     {
402       if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
403 	return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
404 
405       tree e = exp;
406       int last_idx = 0;
407       HOST_WIDE_INT offset = 0;
408       /* Follow a chain of at most 5 assignments.  */
409       for (int i = 0; i < 5; i++)
410 	{
411 	  gimple *def_stmt = SSA_NAME_DEF_STMT (e);
412 	  if (!is_gimple_assign (def_stmt))
413 	    return last_idx;
414 
415 	  tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
416 	  tree ptr, off;
417 
418 	  if (rhs_code == ADDR_EXPR)
419 	    {
420 	      /* Handle indices/offsets into VLAs which are implemented
421 	         as pointers to arrays.  */
422 	      ptr = gimple_assign_rhs1 (def_stmt);
423 	      ptr = TREE_OPERAND (ptr, 0);
424 
425 	      /* Handle also VLAs of types larger than char.  */
426 	      if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
427 		{
428 		  if (TREE_CODE (ptr) == ARRAY_REF)
429 		    {
430 		      off = TREE_OPERAND (ptr, 1);
431 		      ptr = TREE_OPERAND (ptr, 0);
432 		      if (!integer_onep (eltsize))
433 			{
434 			  /* Scale the array index by the size of the element
435 			     type in the rare case that it's greater than
436 			     the typical 1 for char, making sure both operands
437 			     have the same type.  */
438 			  eltsize = fold_convert (ssizetype, eltsize);
439 			  off = fold_convert (ssizetype, off);
440 			  off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
441 			}
442 		    }
443 		  else
444 		    off = integer_zero_node;
445 		}
446 	      else
447 		return 0;
448 
449 	      if (TREE_CODE (ptr) != MEM_REF)
450 	        return 0;
451 
452 	      /* Add the MEM_REF byte offset.  */
453 	      tree mem_off = TREE_OPERAND (ptr, 1);
454 	      off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
455 	      ptr = TREE_OPERAND (ptr, 0);
456 	    }
457 	  else if (rhs_code == POINTER_PLUS_EXPR)
458 	    {
459 	      ptr = gimple_assign_rhs1 (def_stmt);
460 	      off = gimple_assign_rhs2 (def_stmt);
461 	    }
462 	  else
463 	    return 0;
464 
465 	  if (TREE_CODE (ptr) != SSA_NAME)
466 	    return 0;
467 
468 	  if (!tree_fits_shwi_p (off))
469 	    {
470 	      if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
471 		if (offrng)
472 		  {
473 		    /* Only when requested by setting OFFRNG to non-null,
474 		       return the index corresponding to the SSA_NAME.
475 		       Do this irrespective of the whether the offset
476 		       is known.  */
477 		    if (get_range (off, offrng, rvals))
478 		      {
479 			/* When the offset range is known, increment it
480 			   it by the constant offset computed in prior
481 			   iterations and store it in the OFFRNG array.  */
482  			offrng[0] += offset;
483 			offrng[1] += offset;
484 		      }
485 		    else
486 		      {
487 			/* When the offset range cannot be determined
488 			   store [0, SIZE_MAX] and let the caller decide
489 			   if the offset matters.  */
490 			offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
491 			offrng[0] = wi::zero (offrng[1].get_precision ());
492 		      }
493 		    return idx;
494 		  }
495 	      return 0;
496 	    }
497 
498 	  HOST_WIDE_INT this_off = tree_to_shwi (off);
499 	  if (offrng)
500 	    {
501 	      offrng[0] += wi::shwi (this_off, offrng->get_precision ());
502 	      offrng[1] += offrng[0];
503 	    }
504 
505 	  if (this_off < 0)
506 	    return last_idx;
507 
508 	  offset = (unsigned HOST_WIDE_INT) offset + this_off;
509 	  if (offset < 0)
510 	    return last_idx;
511 
512 	  if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
513 	    {
514 	      strinfo *si = get_strinfo (idx);
515 	      if (si)
516 		{
517 		  if (compare_nonzero_chars (si, offset) >= 0)
518 		    return get_stridx_plus_constant (si, offset, exp);
519 
520 		  if (offrng)
521 		    last_idx = idx;
522 		}
523 	    }
524 	  e = ptr;
525 	}
526 
527       return last_idx;
528     }
529 
530   if (TREE_CODE (exp) == ADDR_EXPR)
531     {
532       int idx = get_addr_stridx (TREE_OPERAND (exp, 0), exp, NULL);
533       if (idx != 0)
534 	return idx;
535     }
536 
537   const char *p = c_getstr (exp);
538   if (p)
539     return ~(int) strlen (p);
540 
541   return 0;
542 }
543 
544 /* Return true if strinfo vector is shared with the immediate dominator.  */
545 
546 static inline bool
strinfo_shared(void)547 strinfo_shared (void)
548 {
549   return vec_safe_length (stridx_to_strinfo)
550 	 && (*stridx_to_strinfo)[0] != NULL;
551 }
552 
553 /* Unshare strinfo vector that is shared with the immediate dominator.  */
554 
555 static void
unshare_strinfo_vec(void)556 unshare_strinfo_vec (void)
557 {
558   strinfo *si;
559   unsigned int i = 0;
560 
561   gcc_assert (strinfo_shared ());
562   stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
563   for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
564     if (si != NULL)
565       si->refcount++;
566   (*stridx_to_strinfo)[0] = NULL;
567 }
568 
569 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
570    Return a pointer to the location where the string index can
571    be stored (if 0) or is stored, or NULL if this can't be tracked.  */
572 
573 static int *
addr_stridxptr(tree exp)574 addr_stridxptr (tree exp)
575 {
576   HOST_WIDE_INT off;
577 
578   poly_int64 poff;
579   tree base = get_addr_base_and_unit_offset (exp, &poff);
580   if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
581     return NULL;
582 
583   if (!decl_to_stridxlist_htab)
584     {
585       decl_to_stridxlist_htab
586        	= new hash_map<tree_decl_hash, stridxlist> (64);
587       gcc_obstack_init (&stridx_obstack);
588     }
589 
590   bool existed;
591   stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
592   if (existed)
593     {
594       int i;
595       stridxlist *before = NULL;
596       for (i = 0; i < 32; i++)
597 	{
598 	  if (list->offset == off)
599 	    return &list->idx;
600 	  if (list->offset > off && before == NULL)
601 	    before = list;
602 	  if (list->next == NULL)
603 	    break;
604 	  list = list->next;
605 	}
606       if (i == 32)
607 	return NULL;
608       if (before)
609 	{
610 	  list = before;
611 	  before = XOBNEW (&stridx_obstack, struct stridxlist);
612 	  *before = *list;
613 	  list->next = before;
614 	  list->offset = off;
615 	  list->idx = 0;
616 	  return &list->idx;
617 	}
618       list->next = XOBNEW (&stridx_obstack, struct stridxlist);
619       list = list->next;
620     }
621 
622   list->next = NULL;
623   list->offset = off;
624   list->idx = 0;
625   return &list->idx;
626 }
627 
628 /* Create a new string index, or return 0 if reached limit.  */
629 
630 static int
new_stridx(tree exp)631 new_stridx (tree exp)
632 {
633   int idx;
634   if (max_stridx >= param_max_tracked_strlens)
635     return 0;
636   if (TREE_CODE (exp) == SSA_NAME)
637     {
638       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
639 	return 0;
640       idx = max_stridx++;
641       ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
642       return idx;
643     }
644   if (TREE_CODE (exp) == ADDR_EXPR)
645     {
646       int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
647       if (pidx != NULL)
648 	{
649 	  gcc_assert (*pidx == 0);
650 	  *pidx = max_stridx++;
651 	  return *pidx;
652 	}
653     }
654   return 0;
655 }
656 
657 /* Like new_stridx, but for ADDR_EXPR's operand instead.  */
658 
659 static int
new_addr_stridx(tree exp)660 new_addr_stridx (tree exp)
661 {
662   int *pidx;
663   if (max_stridx >= param_max_tracked_strlens)
664     return 0;
665   pidx = addr_stridxptr (exp);
666   if (pidx != NULL)
667     {
668       gcc_assert (*pidx == 0);
669       *pidx = max_stridx++;
670       return *pidx;
671     }
672   return 0;
673 }
674 
675 /* Create a new strinfo.  */
676 
677 static strinfo *
new_strinfo(tree ptr,int idx,tree nonzero_chars,bool full_string_p)678 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
679 {
680   strinfo *si = strinfo_pool.allocate ();
681   si->nonzero_chars = nonzero_chars;
682   STRIP_USELESS_TYPE_CONVERSION (ptr);
683   si->ptr = ptr;
684   si->stmt = NULL;
685   si->alloc = NULL;
686   si->endptr = NULL_TREE;
687   si->refcount = 1;
688   si->idx = idx;
689   si->first = 0;
690   si->prev = 0;
691   si->next = 0;
692   si->writable = false;
693   si->dont_invalidate = false;
694   si->full_string_p = full_string_p;
695   return si;
696 }
697 
698 /* Decrease strinfo refcount and free it if not referenced anymore.  */
699 
700 static inline void
free_strinfo(strinfo * si)701 free_strinfo (strinfo *si)
702 {
703   if (si && --si->refcount == 0)
704     strinfo_pool.remove (si);
705 }
706 
707 /* Set strinfo in the vector entry IDX to SI.  */
708 
709 static inline void
set_strinfo(int idx,strinfo * si)710 set_strinfo (int idx, strinfo *si)
711 {
712   if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
713     unshare_strinfo_vec ();
714   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
715     vec_safe_grow_cleared (stridx_to_strinfo, idx + 1);
716   (*stridx_to_strinfo)[idx] = si;
717 }
718 
719 /* Return the first strinfo in the related strinfo chain
720    if all strinfos in between belong to the chain, otherwise NULL.  */
721 
722 static strinfo *
verify_related_strinfos(strinfo * origsi)723 verify_related_strinfos (strinfo *origsi)
724 {
725   strinfo *si = origsi, *psi;
726 
727   if (origsi->first == 0)
728     return NULL;
729   for (; si->prev; si = psi)
730     {
731       if (si->first != origsi->first)
732 	return NULL;
733       psi = get_strinfo (si->prev);
734       if (psi == NULL)
735 	return NULL;
736       if (psi->next != si->idx)
737 	return NULL;
738     }
739   if (si->idx != si->first)
740     return NULL;
741   return si;
742 }
743 
744 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
745    Use LOC for folding.  */
746 
747 static void
set_endptr_and_length(location_t loc,strinfo * si,tree endptr)748 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
749 {
750   si->endptr = endptr;
751   si->stmt = NULL;
752   tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
753   tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
754   si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
755 				       end_as_size, start_as_size);
756   si->full_string_p = true;
757 }
758 
759 /* Return the string length, or NULL if it can't be computed.
760    The length may but need not be constant.  Instead, it might be
761    the result of a strlen() call.  */
762 
763 static tree
get_string_length(strinfo * si)764 get_string_length (strinfo *si)
765 {
766   /* If the length has already been computed return it if it's exact
767      (i.e., the string is nul-terminated at NONZERO_CHARS), or return
768      null if it isn't.  */
769   if (si->nonzero_chars)
770     return si->full_string_p ? si->nonzero_chars : NULL;
771 
772   /* If the string is the result of one of the built-in calls below
773      attempt to compute the length from the call statement.  */
774   if (si->stmt)
775     {
776       gimple *stmt = si->stmt, *lenstmt;
777       tree callee, lhs, fn, tem;
778       location_t loc;
779       gimple_stmt_iterator gsi;
780 
781       gcc_assert (is_gimple_call (stmt));
782       callee = gimple_call_fndecl (stmt);
783       gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
784       lhs = gimple_call_lhs (stmt);
785       /* unshare_strinfo is intentionally not called here.  The (delayed)
786 	 transformation of strcpy or strcat into stpcpy is done at the place
787 	 of the former strcpy/strcat call and so can affect all the strinfos
788 	 with the same stmt.  If they were unshared before and transformation
789 	 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
790 	 just compute the right length.  */
791       switch (DECL_FUNCTION_CODE (callee))
792 	{
793 	case BUILT_IN_STRCAT:
794 	case BUILT_IN_STRCAT_CHK:
795 	  gsi = gsi_for_stmt (stmt);
796 	  fn = builtin_decl_implicit (BUILT_IN_STRLEN);
797 	  gcc_assert (lhs == NULL_TREE);
798 	  tem = unshare_expr (gimple_call_arg (stmt, 0));
799 	  lenstmt = gimple_build_call (fn, 1, tem);
800 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
801 	  gimple_call_set_lhs (lenstmt, lhs);
802 	  gimple_set_vuse (lenstmt, gimple_vuse (stmt));
803 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
804 	  tem = gimple_call_arg (stmt, 0);
805           if (!ptrofftype_p (TREE_TYPE (lhs)))
806             {
807               lhs = convert_to_ptrofftype (lhs);
808               lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
809                                               true, GSI_SAME_STMT);
810             }
811 	  lenstmt = gimple_build_assign
812 			(make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
813 			 POINTER_PLUS_EXPR,tem, lhs);
814 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
815 	  gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
816 	  lhs = NULL_TREE;
817 	  /* FALLTHRU */
818 	case BUILT_IN_STRCPY:
819 	case BUILT_IN_STRCPY_CHK:
820 	  gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
821 	  if (gimple_call_num_args (stmt) == 2)
822 	    fn = builtin_decl_implicit (BUILT_IN_STPCPY);
823 	  else
824 	    fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
825 	  gcc_assert (lhs == NULL_TREE);
826 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
827 	    {
828 	      fprintf (dump_file, "Optimizing: ");
829 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
830 	    }
831 	  gimple_call_set_fndecl (stmt, fn);
832 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
833 	  gimple_call_set_lhs (stmt, lhs);
834 	  update_stmt (stmt);
835 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
836 	    {
837 	      fprintf (dump_file, "into: ");
838 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
839 	    }
840 	  /* FALLTHRU */
841 	case BUILT_IN_STPCPY:
842 	case BUILT_IN_STPCPY_CHK:
843 	  gcc_assert (lhs != NULL_TREE);
844 	  loc = gimple_location (stmt);
845 	  set_endptr_and_length (loc, si, lhs);
846 	  for (strinfo *chainsi = verify_related_strinfos (si);
847 	       chainsi != NULL;
848 	       chainsi = get_next_strinfo (chainsi))
849 	    if (chainsi->nonzero_chars == NULL)
850 	      set_endptr_and_length (loc, chainsi, lhs);
851 	  break;
852 	case BUILT_IN_ALLOCA:
853 	case BUILT_IN_ALLOCA_WITH_ALIGN:
854 	case BUILT_IN_MALLOC:
855 	  break;
856 	/* BUILT_IN_CALLOC always has si->nonzero_chars set.  */
857 	default:
858 	  gcc_unreachable ();
859 	  break;
860 	}
861     }
862 
863   return si->nonzero_chars;
864 }
865 
866 /* Dump strlen data to FP for statement STMT.  When non-null, RVALS
867    points to EVRP info and is used to dump strlen range for non-constant
868    results.  */
869 
870 DEBUG_FUNCTION void
dump_strlen_info(FILE * fp,gimple * stmt,const vr_values * rvals)871 dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals)
872 {
873   if (stmt)
874     {
875       fprintf (fp, "\nDumping strlen pass data after ");
876       print_gimple_expr (fp, stmt, TDF_LINENO);
877       fputc ('\n', fp);
878     }
879   else
880     fprintf (fp, "\nDumping strlen pass data\n");
881 
882   fprintf (fp, "max_stridx = %i\n", max_stridx);
883   fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
884 	   ssa_ver_to_stridx.length ());
885   fprintf (fp, "stridx_to_strinfo");
886   if (stridx_to_strinfo)
887     {
888       fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
889       for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
890 	{
891 	  if (strinfo *si = (*stridx_to_strinfo)[i])
892 	    {
893 	      if (!si->idx)
894 		continue;
895 	      fprintf (fp, "  idx = %i", si->idx);
896 	      if (si->ptr)
897 		{
898 		  fprintf (fp, ", ptr = ");
899 		  print_generic_expr (fp, si->ptr);
900 		}
901 
902 	      if (si->nonzero_chars)
903 		{
904 		  fprintf (fp, ", nonzero_chars = ");
905 		  print_generic_expr (fp, si->nonzero_chars);
906 		  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
907 		    {
908 		      value_range_kind rng = VR_UNDEFINED;
909 		      wide_int min, max;
910 		      if (rvals)
911 			{
912 			  const value_range *vr
913 			    = CONST_CAST (class vr_values *, rvals)
914 			    ->get_value_range (si->nonzero_chars);
915 			  rng = vr->kind ();
916 			  if (range_int_cst_p (vr))
917 			    {
918 			      min = wi::to_wide (vr->min ());
919 			      max = wi::to_wide (vr->max ());
920 			    }
921 			  else
922 			    rng = VR_UNDEFINED;
923 			}
924 		      else
925 			rng = get_range_info (si->nonzero_chars, &min, &max);
926 
927 		      if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
928 			{
929 			  fprintf (fp, " %s[%llu, %llu]",
930 				   rng == VR_RANGE ? "" : "~",
931 				   (long long) min.to_uhwi (),
932 				   (long long) max.to_uhwi ());
933 			}
934 		    }
935 		}
936 
937 	      fprintf (fp, ", refcount = %i", si->refcount);
938 	      if (si->stmt)
939 		{
940 		  fprintf (fp, ", stmt = ");
941 		  print_gimple_expr (fp, si->stmt, 0);
942 		}
943 	      if (si->alloc)
944 		{
945 		  fprintf (fp, ", alloc = ");
946 		  print_gimple_expr (fp, si->alloc, 0);
947 		}
948 	      if (si->writable)
949 		fprintf (fp, ", writable");
950 	      if (si->dont_invalidate)
951 		fprintf (fp, ", dont_invalidate");
952 	      if (si->full_string_p)
953 		fprintf (fp, ", full_string_p");
954 	      if (strinfo *next = get_next_strinfo (si))
955 		{
956 		  fprintf (fp, ", {");
957 		  do
958 		    fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
959 		  while ((next = get_next_strinfo (next)));
960 		  fprintf (fp, "}");
961 		}
962 	      fputs ("\n", fp);
963 	    }
964 	}
965     }
966   else
967     fprintf (fp, " = null\n");
968 
969   fprintf (fp, "decl_to_stridxlist_htab");
970   if (decl_to_stridxlist_htab)
971     {
972       fputs ("\n", fp);
973       typedef decl_to_stridxlist_htab_t::iterator iter_t;
974       for (iter_t it = decl_to_stridxlist_htab->begin ();
975 	   it != decl_to_stridxlist_htab->end (); ++it)
976 	{
977 	  tree decl = (*it).first;
978 	  stridxlist *list = &(*it).second;
979 	  fprintf (fp, "  decl = ");
980 	  print_generic_expr (fp, decl);
981 	  if (list)
982 	    {
983 	      fprintf (fp, ", offsets = {");
984 	      for (; list; list = list->next)
985 		fprintf (fp, "%lli%s", (long long) list->offset,
986 			 list->next ? ", " : "");
987 	      fputs ("}", fp);
988 	    }
989 	  fputs ("\n", fp);
990 	}
991     }
992   else
993     fprintf (fp, " = null\n");
994 
995   if (laststmt.stmt)
996     {
997       fprintf (fp, "laststmt = ");
998       print_gimple_expr (fp, laststmt.stmt, 0);
999       fprintf (fp, ", len = ");
1000       print_generic_expr (fp, laststmt.len);
1001       fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1002     }
1003 }
1004 
1005 /* Attempt to determine the length of the string SRC.  On success, store
1006    the length in *PDATA and return true.  Otherwise, return false.
1007    VISITED is a bitmap of visited PHI nodes.  RVALS points to EVRP info
1008    and PSSA_DEF_MAX to an SSA_NAME assignment limit used to prevent runaway
1009    recursion.  */
1010 
1011 static bool
get_range_strlen_dynamic(tree src,c_strlen_data * pdata,bitmap * visited,const vr_values * rvals,unsigned * pssa_def_max)1012 get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
1013 			  const vr_values *rvals, unsigned *pssa_def_max)
1014 {
1015   int idx = get_stridx (src);
1016   if (!idx)
1017     {
1018       if (TREE_CODE (src) == SSA_NAME)
1019 	{
1020 	  gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1021 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
1022 	    {
1023 	      if (!*visited)
1024 		*visited = BITMAP_ALLOC (NULL);
1025 
1026 	      if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1027 		return true;
1028 
1029 	      if (*pssa_def_max == 0)
1030 		return false;
1031 
1032 	      --*pssa_def_max;
1033 
1034 	      /* Iterate over the PHI arguments and determine the minimum
1035 		 and maximum length/size of each and incorporate them into
1036 		 the overall result.  */
1037 	      gphi *phi = as_a <gphi *> (def_stmt);
1038 	      for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1039 		{
1040 		  tree arg = gimple_phi_arg_def (phi, i);
1041 		  if (arg == gimple_phi_result (def_stmt))
1042 		    continue;
1043 
1044 		  c_strlen_data argdata = { };
1045 		  if (get_range_strlen_dynamic (arg, &argdata, visited, rvals,
1046 						pssa_def_max))
1047 		    {
1048 		      /* Set the DECL of an unterminated array this argument
1049 			 refers to if one hasn't been found yet.  */
1050 		      if (!pdata->decl && argdata.decl)
1051 			pdata->decl = argdata.decl;
1052 
1053 		      if (!argdata.minlen
1054 			  || (integer_zerop (argdata.minlen)
1055 			      && (!argdata.maxbound
1056 				  || integer_all_onesp (argdata.maxbound))
1057 			      && integer_all_onesp (argdata.maxlen)))
1058 			{
1059 			  /* Set the upper bound of the length to unbounded.  */
1060 			  pdata->maxlen = build_all_ones_cst (size_type_node);
1061 			  continue;
1062 			}
1063 
1064 		      /* Adjust the minimum and maximum length determined
1065 			 so far and the upper bound on the array size.  */
1066 		      if (!pdata->minlen
1067 			  || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1068 			pdata->minlen = argdata.minlen;
1069 		      if (!pdata->maxlen
1070 			  || (argdata.maxlen
1071 			      && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1072 			pdata->maxlen = argdata.maxlen;
1073 		      if (!pdata->maxbound
1074 			  || TREE_CODE (pdata->maxbound) != INTEGER_CST
1075 			  || (argdata.maxbound
1076 			      && tree_int_cst_lt (pdata->maxbound,
1077 						  argdata.maxbound)
1078 			      && !integer_all_onesp (argdata.maxbound)))
1079 			pdata->maxbound = argdata.maxbound;
1080 		    }
1081 		  else
1082 		    pdata->maxlen = build_all_ones_cst (size_type_node);
1083 		}
1084 
1085 	      return true;
1086 	    }
1087 	}
1088 
1089       /* Return success regardless of the result and handle *PDATA
1090 	 in the caller.  */
1091       get_range_strlen (src, pdata, 1);
1092       return true;
1093     }
1094 
1095   if (idx < 0)
1096     {
1097       /* SRC is a string of constant length.  */
1098       pdata->minlen = build_int_cst (size_type_node, ~idx);
1099       pdata->maxlen = pdata->minlen;
1100       pdata->maxbound = pdata->maxlen;
1101       return true;
1102     }
1103 
1104   if (strinfo *si = get_strinfo (idx))
1105     {
1106       pdata->minlen = get_string_length (si);
1107       if (!pdata->minlen && si->nonzero_chars)
1108 	{
1109 	  if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1110 	    pdata->minlen = si->nonzero_chars;
1111 	  else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1112 	    {
1113 	      const value_range_equiv *vr
1114 		= CONST_CAST (class vr_values *, rvals)
1115 		->get_value_range (si->nonzero_chars);
1116 	      if (vr->kind () == VR_RANGE
1117 		  && range_int_cst_p (vr))
1118 		{
1119 		  pdata->minlen = vr->min ();
1120 		  pdata->maxlen = vr->max ();
1121 		}
1122 	      else
1123 		pdata->minlen = build_zero_cst (size_type_node);
1124 	    }
1125 	  else
1126 	    pdata->minlen = build_zero_cst (size_type_node);
1127 
1128 	  tree base = si->ptr;
1129 	  if (TREE_CODE (base) == ADDR_EXPR)
1130 	    base = TREE_OPERAND (base, 0);
1131 
1132 	  HOST_WIDE_INT off;
1133 	  poly_int64 poff;
1134 	  base = get_addr_base_and_unit_offset (base, &poff);
1135 	  if (base
1136 	      && DECL_P (base)
1137 	      && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1138 	      && TYPE_SIZE_UNIT (TREE_TYPE (base))
1139 	      && poff.is_constant (&off))
1140 	    {
1141 	      tree basetype = TREE_TYPE (base);
1142 	      tree size = TYPE_SIZE_UNIT (basetype);
1143 	      if (TREE_CODE (size) == INTEGER_CST)
1144 		{
1145 		  ++off;   /* Increment for the terminating nul.  */
1146 		  tree toffset = build_int_cst (size_type_node, off);
1147 		  pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1148 					       toffset);
1149 		  pdata->maxbound = pdata->maxlen;
1150 		}
1151 	      else
1152 		pdata->maxlen = build_all_ones_cst (size_type_node);
1153 	    }
1154 	  else
1155 	    pdata->maxlen = build_all_ones_cst (size_type_node);
1156 	}
1157       else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1158 	{
1159 	  const value_range_equiv *vr
1160 	    = CONST_CAST (class vr_values *, rvals)
1161 	    ->get_value_range (si->nonzero_chars);
1162 	  if (vr->kind () == VR_RANGE
1163 	      && range_int_cst_p (vr))
1164 	    {
1165 	      pdata->minlen = vr->min ();
1166 	      pdata->maxlen = vr->max ();
1167 	      pdata->maxbound = pdata->maxlen;
1168 	    }
1169 	  else
1170 	    {
1171 	      pdata->minlen = build_zero_cst (size_type_node);
1172 	      pdata->maxlen = build_all_ones_cst (size_type_node);
1173 	    }
1174 	}
1175       else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1176 	{
1177 	  pdata->maxlen = pdata->minlen;
1178 	  pdata->maxbound = pdata->minlen;
1179 	}
1180       else
1181 	{
1182 	  /* For PDATA->MINLEN that's a non-constant expression such
1183 	     as PLUS_EXPR whose value range is unknown, set the bounds
1184 	     to zero and SIZE_MAX.  */
1185 	  pdata->minlen = build_zero_cst (size_type_node);
1186 	  pdata->maxlen = build_all_ones_cst (size_type_node);
1187 	}
1188 
1189       return true;
1190     }
1191 
1192   return false;
1193 }
1194 
1195 /* Analogous to get_range_strlen but for dynamically created strings,
1196    i.e., those created by calls to strcpy as opposed to just string
1197    constants.
1198    Try to obtain the range of the lengths of the string(s) referenced
1199    by SRC, or the size of the largest array SRC refers to if the range
1200    of lengths cannot be determined, and store all in *PDATA.  RVALS
1201    points to EVRP info.  */
1202 
1203 void
get_range_strlen_dynamic(tree src,c_strlen_data * pdata,const vr_values * rvals)1204 get_range_strlen_dynamic (tree src, c_strlen_data *pdata,
1205 			  const vr_values *rvals)
1206 {
1207   bitmap visited = NULL;
1208   tree maxbound = pdata->maxbound;
1209 
1210   unsigned limit = param_ssa_name_def_chain_limit;
1211   if (!get_range_strlen_dynamic (src, pdata, &visited, rvals, &limit))
1212     {
1213       /* On failure extend the length range to an impossible maximum
1214 	 (a valid MAXLEN must be less than PTRDIFF_MAX - 1).  Other
1215 	 members can stay unchanged regardless.  */
1216       pdata->minlen = ssize_int (0);
1217       pdata->maxlen = build_all_ones_cst (size_type_node);
1218     }
1219   else if (!pdata->minlen)
1220     pdata->minlen = ssize_int (0);
1221 
1222   /* If it's unchanged from it initial non-null value, set the conservative
1223      MAXBOUND to SIZE_MAX.  Otherwise leave it null (if it is null).  */
1224   if (maxbound && pdata->maxbound == maxbound)
1225     pdata->maxbound = build_all_ones_cst (size_type_node);
1226 
1227   if (visited)
1228     BITMAP_FREE (visited);
1229 }
1230 
1231 /* Invalidate string length information for strings whose length might
1232    change due to stores in STMT, except those marked DONT_INVALIDATE.
1233    For string-modifying statements, ZERO_WRITE is set when the statement
1234    wrote only zeros.
1235    Returns true if any STRIDX_TO_STRINFO entries were considered
1236    for invalidation.  */
1237 
1238 static bool
1239 maybe_invalidate (gimple *stmt, bool zero_write = false)
1240 {
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "%s called for ", __func__);
1244       print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1245     }
1246 
1247   strinfo *si;
1248   bool nonempty = false;
1249 
1250   for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1251     {
1252       if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1253 	continue;
1254 
1255       nonempty = true;
1256 
1257       /* Unconditionally reset DONT_INVALIDATE.  */
1258       bool dont_invalidate = si->dont_invalidate;
1259       si->dont_invalidate = false;
1260 
1261       if (dont_invalidate)
1262 	continue;
1263 
1264       ao_ref r;
1265       tree size = NULL_TREE;
1266       if (si->nonzero_chars)
1267 	{
1268 	  /* Include the terminating nul in the size of the string
1269 	     to consider when determining possible clobber.  */
1270 	  tree type = TREE_TYPE (si->nonzero_chars);
1271 	  size = fold_build2 (PLUS_EXPR, type, si->nonzero_chars,
1272 			      build_int_cst (type, 1));
1273 	}
1274       ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1275       if (stmt_may_clobber_ref_p_1 (stmt, &r))
1276 	{
1277 	  if (dump_file && (dump_flags & TDF_DETAILS))
1278 	    {
1279 	      fputs ("  statement may clobber object ", dump_file);
1280 	      print_generic_expr (dump_file, si->ptr);
1281 	      if (size && tree_fits_uhwi_p (size))
1282 		fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1283 			 " bytes in size", tree_to_uhwi (size));
1284 	      fputc ('\n', dump_file);
1285 	    }
1286 
1287 	  set_strinfo (i, NULL);
1288 	  free_strinfo (si);
1289 	  continue;
1290 	}
1291 
1292       if (size
1293 	  && !zero_write
1294 	  && si->stmt
1295 	  && is_gimple_call (si->stmt)
1296 	  && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1297 	      == BUILT_IN_CALLOC))
1298 	{
1299 	  /* If the clobber test above considered the length of
1300 	     the string (including the nul), then for (potentially)
1301 	     non-zero writes that might modify storage allocated by
1302 	     calloc consider the whole object and if it might be
1303 	     clobbered by the statement reset the statement.  */
1304 	  ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1305 	  if (stmt_may_clobber_ref_p_1 (stmt, &r))
1306 	    si->stmt = NULL;
1307 	}
1308     }
1309 
1310   if (dump_file && (dump_flags & TDF_DETAILS))
1311     fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1312 
1313   return nonempty;
1314 }
1315 
1316 /* Unshare strinfo record SI, if it has refcount > 1 or
1317    if stridx_to_strinfo vector is shared with some other
1318    bbs.  */
1319 
1320 static strinfo *
unshare_strinfo(strinfo * si)1321 unshare_strinfo (strinfo *si)
1322 {
1323   strinfo *nsi;
1324 
1325   if (si->refcount == 1 && !strinfo_shared ())
1326     return si;
1327 
1328   nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1329   nsi->stmt = si->stmt;
1330   nsi->alloc = si->alloc;
1331   nsi->endptr = si->endptr;
1332   nsi->first = si->first;
1333   nsi->prev = si->prev;
1334   nsi->next = si->next;
1335   nsi->writable = si->writable;
1336   set_strinfo (si->idx, nsi);
1337   free_strinfo (si);
1338   return nsi;
1339 }
1340 
1341 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1342    strinfo if there is any.  Return it's idx, or 0 if no strinfo has
1343    been created.  */
1344 
1345 static int
get_stridx_plus_constant(strinfo * basesi,unsigned HOST_WIDE_INT off,tree ptr)1346 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1347 			  tree ptr)
1348 {
1349   if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1350     return 0;
1351 
1352   if (compare_nonzero_chars (basesi, off) < 0
1353       || !tree_fits_uhwi_p (basesi->nonzero_chars))
1354     return 0;
1355 
1356   unsigned HOST_WIDE_INT nonzero_chars
1357     = tree_to_uhwi (basesi->nonzero_chars) - off;
1358   strinfo *si = basesi, *chainsi;
1359   if (si->first || si->prev || si->next)
1360     si = verify_related_strinfos (basesi);
1361   if (si == NULL
1362       || si->nonzero_chars == NULL_TREE
1363       || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1364     return 0;
1365 
1366   if (TREE_CODE (ptr) == SSA_NAME
1367       && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1368     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1369 
1370   gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1371   for (chainsi = si; chainsi->next; chainsi = si)
1372     {
1373       si = get_next_strinfo (chainsi);
1374       if (si == NULL
1375 	  || si->nonzero_chars == NULL_TREE
1376 	  || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1377 	break;
1378       int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1379       if (r != 1)
1380 	{
1381 	  if (r == 0)
1382 	    {
1383 	      if (TREE_CODE (ptr) == SSA_NAME)
1384 		ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1385 	      else
1386 		{
1387 		  int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1388 		  if (pidx != NULL && *pidx == 0)
1389 		    *pidx = si->idx;
1390 		}
1391 	      return si->idx;
1392 	    }
1393 	  break;
1394 	}
1395     }
1396 
1397   int idx = new_stridx (ptr);
1398   if (idx == 0)
1399     return 0;
1400   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1401 		    basesi->full_string_p);
1402   set_strinfo (idx, si);
1403   if (strinfo *nextsi = get_strinfo (chainsi->next))
1404     {
1405       nextsi = unshare_strinfo (nextsi);
1406       si->next = nextsi->idx;
1407       nextsi->prev = idx;
1408     }
1409   chainsi = unshare_strinfo (chainsi);
1410   if (chainsi->first == 0)
1411     chainsi->first = chainsi->idx;
1412   chainsi->next = idx;
1413   if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1414     chainsi->endptr = ptr;
1415   si->endptr = chainsi->endptr;
1416   si->prev = chainsi->idx;
1417   si->first = chainsi->first;
1418   si->writable = chainsi->writable;
1419   return si->idx;
1420 }
1421 
1422 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1423    to a zero-length string and if possible chain it to a related strinfo
1424    chain whose part is or might be CHAINSI.  */
1425 
1426 static strinfo *
zero_length_string(tree ptr,strinfo * chainsi)1427 zero_length_string (tree ptr, strinfo *chainsi)
1428 {
1429   strinfo *si;
1430   int idx;
1431   if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1432     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1433   gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1434 		       && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1435 
1436   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1437     return NULL;
1438   if (chainsi != NULL)
1439     {
1440       si = verify_related_strinfos (chainsi);
1441       if (si)
1442 	{
1443 	  do
1444 	    {
1445 	      /* We shouldn't mix delayed and non-delayed lengths.  */
1446 	      gcc_assert (si->full_string_p);
1447 	      if (si->endptr == NULL_TREE)
1448 		{
1449 		  si = unshare_strinfo (si);
1450 		  si->endptr = ptr;
1451 		}
1452 	      chainsi = si;
1453 	      si = get_next_strinfo (si);
1454 	    }
1455 	  while (si != NULL);
1456 	  if (zero_length_string_p (chainsi))
1457 	    {
1458 	      if (chainsi->next)
1459 		{
1460 		  chainsi = unshare_strinfo (chainsi);
1461 		  chainsi->next = 0;
1462 		}
1463 	      ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1464 	      return chainsi;
1465 	    }
1466 	}
1467       else
1468 	{
1469 	  /* We shouldn't mix delayed and non-delayed lengths.  */
1470 	  gcc_assert (chainsi->full_string_p);
1471 	  if (chainsi->first || chainsi->prev || chainsi->next)
1472 	    {
1473 	      chainsi = unshare_strinfo (chainsi);
1474 	      chainsi->first = 0;
1475 	      chainsi->prev = 0;
1476 	      chainsi->next = 0;
1477 	    }
1478 	}
1479     }
1480   idx = new_stridx (ptr);
1481   if (idx == 0)
1482     return NULL;
1483   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1484   set_strinfo (idx, si);
1485   si->endptr = ptr;
1486   if (chainsi != NULL)
1487     {
1488       chainsi = unshare_strinfo (chainsi);
1489       if (chainsi->first == 0)
1490 	chainsi->first = chainsi->idx;
1491       chainsi->next = idx;
1492       if (chainsi->endptr == NULL_TREE)
1493 	chainsi->endptr = ptr;
1494       si->prev = chainsi->idx;
1495       si->first = chainsi->first;
1496       si->writable = chainsi->writable;
1497     }
1498   return si;
1499 }
1500 
1501 /* For strinfo ORIGSI whose length has been just updated, adjust other
1502    related strinfos so that they match the new ORIGSI.  This involves:
1503 
1504    - adding ADJ to the nonzero_chars fields
1505    - copying full_string_p from the new ORIGSI.  */
1506 
1507 static void
adjust_related_strinfos(location_t loc,strinfo * origsi,tree adj)1508 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1509 {
1510   strinfo *si = verify_related_strinfos (origsi);
1511 
1512   if (si == NULL)
1513     return;
1514 
1515   while (1)
1516     {
1517       strinfo *nsi;
1518 
1519       if (si != origsi)
1520 	{
1521 	  tree tem;
1522 
1523 	  si = unshare_strinfo (si);
1524 	  /* We shouldn't see delayed lengths here; the caller must
1525 	     have calculated the old length in order to calculate
1526 	     the adjustment.  */
1527 	  gcc_assert (si->nonzero_chars);
1528 	  tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1529 	  si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1530 					       TREE_TYPE (si->nonzero_chars),
1531 					       si->nonzero_chars, tem);
1532 	  si->full_string_p = origsi->full_string_p;
1533 
1534 	  si->endptr = NULL_TREE;
1535 	  si->dont_invalidate = true;
1536 	}
1537       nsi = get_next_strinfo (si);
1538       if (nsi == NULL)
1539 	return;
1540       si = nsi;
1541     }
1542 }
1543 
1544 /* Find if there are other SSA_NAME pointers equal to PTR
1545    for which we don't track their string lengths yet.  If so, use
1546    IDX for them.  */
1547 
1548 static void
find_equal_ptrs(tree ptr,int idx)1549 find_equal_ptrs (tree ptr, int idx)
1550 {
1551   if (TREE_CODE (ptr) != SSA_NAME)
1552     return;
1553   while (1)
1554     {
1555       gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1556       if (!is_gimple_assign (stmt))
1557 	return;
1558       ptr = gimple_assign_rhs1 (stmt);
1559       switch (gimple_assign_rhs_code (stmt))
1560 	{
1561 	case SSA_NAME:
1562 	  break;
1563 	CASE_CONVERT:
1564 	  if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1565 	    return;
1566 	  if (TREE_CODE (ptr) == SSA_NAME)
1567 	    break;
1568 	  if (TREE_CODE (ptr) != ADDR_EXPR)
1569 	    return;
1570 	  /* FALLTHRU */
1571 	case ADDR_EXPR:
1572 	  {
1573 	    int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1574 	    if (pidx != NULL && *pidx == 0)
1575 	      *pidx = idx;
1576 	    return;
1577 	  }
1578 	default:
1579 	  return;
1580 	}
1581 
1582       /* We might find an endptr created in this pass.  Grow the
1583 	 vector in that case.  */
1584       if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1585 	ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
1586 
1587       if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1588 	return;
1589       ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1590     }
1591 }
1592 
1593 /* Return true if STMT is a call to a builtin function with the right
1594    arguments and attributes that should be considered for optimization
1595    by this pass.  */
1596 
1597 static bool
valid_builtin_call(gimple * stmt)1598 valid_builtin_call (gimple *stmt)
1599 {
1600   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1601     return false;
1602 
1603   tree callee = gimple_call_fndecl (stmt);
1604   tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1605   if (decl
1606       && decl != callee
1607       && !gimple_builtin_call_types_compatible_p (stmt, decl))
1608     return false;
1609 
1610   switch (DECL_FUNCTION_CODE (callee))
1611     {
1612     case BUILT_IN_MEMCMP:
1613     case BUILT_IN_MEMCMP_EQ:
1614     case BUILT_IN_STRCMP:
1615     case BUILT_IN_STRNCMP:
1616     case BUILT_IN_STRCHR:
1617     case BUILT_IN_STRLEN:
1618     case BUILT_IN_STRNLEN:
1619       /* The above functions should be pure.  Punt if they aren't.  */
1620       if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1621 	return false;
1622       break;
1623 
1624     case BUILT_IN_ALLOCA:
1625     case BUILT_IN_ALLOCA_WITH_ALIGN:
1626     case BUILT_IN_CALLOC:
1627     case BUILT_IN_MALLOC:
1628     case BUILT_IN_MEMCPY:
1629     case BUILT_IN_MEMCPY_CHK:
1630     case BUILT_IN_MEMPCPY:
1631     case BUILT_IN_MEMPCPY_CHK:
1632     case BUILT_IN_MEMSET:
1633     case BUILT_IN_STPCPY:
1634     case BUILT_IN_STPCPY_CHK:
1635     case BUILT_IN_STPNCPY:
1636     case BUILT_IN_STPNCPY_CHK:
1637     case BUILT_IN_STRCAT:
1638     case BUILT_IN_STRCAT_CHK:
1639     case BUILT_IN_STRCPY:
1640     case BUILT_IN_STRCPY_CHK:
1641     case BUILT_IN_STRNCAT:
1642     case BUILT_IN_STRNCAT_CHK:
1643     case BUILT_IN_STRNCPY:
1644     case BUILT_IN_STRNCPY_CHK:
1645       /* The above functions should be neither const nor pure.  Punt if they
1646 	 aren't.  */
1647       if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1648 	return false;
1649       break;
1650 
1651     default:
1652       break;
1653     }
1654 
1655   return true;
1656 }
1657 
1658 /* If the last .MEM setter statement before STMT is
1659    memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1660    and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1661    just memcpy (x, y, strlen (y)).  SI must be the zero length
1662    strinfo.  */
1663 
1664 static void
adjust_last_stmt(strinfo * si,gimple * stmt,bool is_strcat)1665 adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1666 {
1667   tree vuse, callee, len;
1668   struct laststmt_struct last = laststmt;
1669   strinfo *lastsi, *firstsi;
1670   unsigned len_arg_no = 2;
1671 
1672   laststmt.stmt = NULL;
1673   laststmt.len = NULL_TREE;
1674   laststmt.stridx = 0;
1675 
1676   if (last.stmt == NULL)
1677     return;
1678 
1679   vuse = gimple_vuse (stmt);
1680   if (vuse == NULL_TREE
1681       || SSA_NAME_DEF_STMT (vuse) != last.stmt
1682       || !has_single_use (vuse))
1683     return;
1684 
1685   gcc_assert (last.stridx > 0);
1686   lastsi = get_strinfo (last.stridx);
1687   if (lastsi == NULL)
1688     return;
1689 
1690   if (lastsi != si)
1691     {
1692       if (lastsi->first == 0 || lastsi->first != si->first)
1693 	return;
1694 
1695       firstsi = verify_related_strinfos (si);
1696       if (firstsi == NULL)
1697 	return;
1698       while (firstsi != lastsi)
1699 	{
1700 	  firstsi = get_next_strinfo (firstsi);
1701 	  if (firstsi == NULL)
1702 	    return;
1703 	}
1704     }
1705 
1706   if (!is_strcat && !zero_length_string_p (si))
1707     return;
1708 
1709   if (is_gimple_assign (last.stmt))
1710     {
1711       gimple_stmt_iterator gsi;
1712 
1713       if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1714 	return;
1715       if (stmt_could_throw_p (cfun, last.stmt))
1716 	return;
1717       gsi = gsi_for_stmt (last.stmt);
1718       unlink_stmt_vdef (last.stmt);
1719       release_defs (last.stmt);
1720       gsi_remove (&gsi, true);
1721       return;
1722     }
1723 
1724   if (!valid_builtin_call (last.stmt))
1725     return;
1726 
1727   callee = gimple_call_fndecl (last.stmt);
1728   switch (DECL_FUNCTION_CODE (callee))
1729     {
1730     case BUILT_IN_MEMCPY:
1731     case BUILT_IN_MEMCPY_CHK:
1732       break;
1733     default:
1734       return;
1735     }
1736 
1737   len = gimple_call_arg (last.stmt, len_arg_no);
1738   if (tree_fits_uhwi_p (len))
1739     {
1740       if (!tree_fits_uhwi_p (last.len)
1741 	  || integer_zerop (len)
1742 	  || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1743 	return;
1744       /* Don't adjust the length if it is divisible by 4, it is more efficient
1745 	 to store the extra '\0' in that case.  */
1746       if ((tree_to_uhwi (len) & 3) == 0)
1747 	return;
1748 
1749       /* Don't fold away an out of bounds access, as this defeats proper
1750 	 warnings.  */
1751       tree dst = gimple_call_arg (last.stmt, 0);
1752       tree size = compute_objsize (dst, 0);
1753       if (size && tree_int_cst_lt (size, len))
1754 	return;
1755     }
1756   else if (TREE_CODE (len) == SSA_NAME)
1757     {
1758       gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1759       if (!is_gimple_assign (def_stmt)
1760 	  || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1761 	  || gimple_assign_rhs1 (def_stmt) != last.len
1762 	  || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1763 	return;
1764     }
1765   else
1766     return;
1767 
1768   gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1769   update_stmt (last.stmt);
1770 }
1771 
1772 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1773    call, or when BOUND is non-null, of a strnlen() call, set LHS
1774    range info to [0, min (MAX, BOUND)] when the range includes more
1775    than one value and return LHS.  Otherwise, when the range
1776    [MIN, MAX] is such that MIN == MAX, return the tree representation
1777    of (MIN). The latter allows callers to fold suitable strnlen() calls
1778    to constants.  */
1779 
1780 tree
set_strlen_range(tree lhs,wide_int min,wide_int max,tree bound)1781 set_strlen_range (tree lhs, wide_int min, wide_int max,
1782 		  tree bound /* = NULL_TREE */)
1783 {
1784   if (TREE_CODE (lhs) != SSA_NAME
1785       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1786     return NULL_TREE;
1787 
1788   if (bound)
1789     {
1790       /* For strnlen, adjust MIN and MAX as necessary.  If the bound
1791 	 is less than the size of the array set MAX to it.  It it's
1792 	 greater than MAX and MAX is non-zero bump MAX down to account
1793 	 for the necessary terminating nul.  Otherwise leave it alone.  */
1794       if (TREE_CODE (bound) == INTEGER_CST)
1795 	{
1796 	  wide_int wibnd = wi::to_wide (bound);
1797 	  int cmp = wi::cmpu (wibnd, max);
1798 	  if (cmp < 0)
1799 	    max = wibnd;
1800 	  else if (cmp && wi::ne_p (max, min))
1801 	    --max;
1802 	}
1803       else if (TREE_CODE (bound) == SSA_NAME)
1804 	{
1805 	  wide_int minbound, maxbound;
1806 	  value_range_kind rng = get_range_info (bound, &minbound, &maxbound);
1807 	  if (rng == VR_RANGE)
1808 	    {
1809 	      /* For a bound in a known range, adjust the range determined
1810 		 above as necessary.  For a bound in some anti-range or
1811 		 in an unknown range, use the range determined by callers.  */
1812 	      if (wi::ltu_p (minbound, min))
1813 		min = minbound;
1814 	      if (wi::ltu_p (maxbound, max))
1815 		max = maxbound;
1816 	    }
1817 	}
1818     }
1819 
1820   if (min == max)
1821     return wide_int_to_tree (size_type_node, min);
1822 
1823   set_range_info (lhs, VR_RANGE, min, max);
1824   return lhs;
1825 }
1826 
1827 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1828    SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1829    a character array A[N] with unknown length bounded by N, and for
1830    strnlen(), by min (N, BOUND).  */
1831 
1832 static tree
maybe_set_strlen_range(tree lhs,tree src,tree bound)1833 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1834 {
1835   if (TREE_CODE (lhs) != SSA_NAME
1836       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1837     return NULL_TREE;
1838 
1839   if (TREE_CODE (src) == SSA_NAME)
1840     {
1841       gimple *def = SSA_NAME_DEF_STMT (src);
1842       if (is_gimple_assign (def)
1843 	  && gimple_assign_rhs_code (def) == ADDR_EXPR)
1844 	src = gimple_assign_rhs1 (def);
1845     }
1846 
1847   /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1848      NUL so that the difference between a pointer to just past it and
1849      one to its beginning is positive.  */
1850   wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1851 
1852   if (TREE_CODE (src) == ADDR_EXPR)
1853     {
1854       /* The last array member of a struct can be bigger than its size
1855 	 suggests if it's treated as a poor-man's flexible array member.  */
1856       src = TREE_OPERAND (src, 0);
1857       if (TREE_CODE (src) != MEM_REF
1858 	  && !array_at_struct_end_p (src))
1859 	{
1860 	  tree type = TREE_TYPE (src);
1861 	  tree size = TYPE_SIZE_UNIT (type);
1862 	  if (size
1863 	      && TREE_CODE (size) == INTEGER_CST
1864 	      && !integer_zerop (size))
1865 	    {
1866 	      /* Even though such uses of strlen would be undefined,
1867 		 avoid relying on arrays of arrays in case some genius
1868 		 decides to call strlen on an unterminated array element
1869 		 that's followed by a terminated one.  Likewise, avoid
1870 		 assuming that a struct array member is necessarily
1871 		 nul-terminated (the nul may be in the member that
1872 		 follows).  In those cases, assume that the length
1873 		 of the string stored in such an array is bounded
1874 		 by the size of the enclosing object if one can be
1875 		 determined.  */
1876 	      tree base = get_base_address (src);
1877 	      if (VAR_P (base))
1878 		{
1879 		  if (tree size = DECL_SIZE_UNIT (base))
1880 		    if (size
1881 			&& TREE_CODE (size) == INTEGER_CST
1882 			&& TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1883 		      max = wi::to_wide (size);
1884 		}
1885 	    }
1886 
1887 	  /* For strlen() the upper bound above is equal to
1888 	     the longest string that can be stored in the array
1889 	     (i.e., it accounts for the terminating nul.  For
1890 	     strnlen() bump up the maximum by one since the array
1891 	     need not be nul-terminated.  */
1892 	  if (!bound && max != 0)
1893 	    --max;
1894 	}
1895     }
1896 
1897   wide_int min = wi::zero (max.get_precision ());
1898   return set_strlen_range (lhs, min, max, bound);
1899 }
1900 
1901 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1902    either into a region allocated for the object SI when non-null,
1903    or into an object designated by the LHS of STMT otherwise.
1904    When nonnull uses RVALS to determine range information.
1905    RAWMEM may be set by memcpy and other raw memory functions
1906    to allow accesses across subobject boundaries.  */
1907 
1908 static void
1909 maybe_warn_overflow (gimple *stmt, tree len,
1910 		     const vr_values *rvals = NULL,
1911 		     strinfo *si = NULL, bool plus_one = false,
1912 		     bool rawmem = false)
1913 {
1914   if (!len || gimple_no_warning_p (stmt))
1915     return;
1916 
1917   /* The DECL of the function performing the write if it is done
1918      by one.  */
1919   tree writefn = NULL_TREE;
1920   /* The destination expression involved in the store STMT.  */
1921   tree dest = NULL_TREE;
1922 
1923   if (is_gimple_assign (stmt))
1924     dest = gimple_assign_lhs (stmt);
1925   else if (is_gimple_call (stmt))
1926     {
1927       dest = gimple_call_arg (stmt, 0);
1928       writefn = gimple_call_fndecl (stmt);
1929     }
1930 
1931   if (TREE_NO_WARNING (dest))
1932     return;
1933 
1934   /* Use maximum precision to avoid overflow in the addition below.
1935      Make sure all operands have the same precision to keep wide_int
1936      from ICE'ing.  */
1937 
1938   /* Convenience constants.  */
1939   const widest_int diff_min
1940     = wi::to_widest (TYPE_MIN_VALUE (ptrdiff_type_node));
1941   const widest_int diff_max
1942     = wi::to_widest (TYPE_MAX_VALUE (ptrdiff_type_node));
1943   const widest_int size_max
1944     = wi::to_widest (TYPE_MAX_VALUE (size_type_node));
1945 
1946   /* The offset into the destination object computed below and not
1947      reflected in DESTSIZE.  */
1948   widest_int offrng[2] = { 0, 0 };
1949 
1950   if (!si)
1951     {
1952       /* If no destination STRINFO was provided try to get it from
1953 	 the DEST argument.  */
1954       tree ref = dest;
1955       if (TREE_CODE (ref) == ARRAY_REF)
1956 	{
1957 	  /* Handle stores to VLAs (represented as
1958 	     ARRAY_REF (MEM_REF (vlaptr, 0), N].  */
1959 	  tree off = TREE_OPERAND (ref, 1);
1960 	  ref = TREE_OPERAND (ref, 0);
1961 	  wide_int rng[2];
1962 	  if (get_range (off, rng, rvals))
1963 	    {
1964 	      /* Convert offsets to the maximum precision.  */
1965 	      offrng[0] = widest_int::from (rng[0], SIGNED);
1966 	      offrng[1] = widest_int::from (rng[1], SIGNED);
1967 	    }
1968 	  else
1969 	    {
1970 	      offrng[0] = diff_min;
1971 	      offrng[1] = diff_max;
1972 	    }
1973 	}
1974 
1975       if (TREE_CODE (ref) == MEM_REF)
1976 	{
1977 	  tree mem_off = TREE_OPERAND (ref, 1);
1978 	  ref = TREE_OPERAND (ref, 0);
1979 	  wide_int rng[2];
1980 	  if (get_range (mem_off, rng, rvals))
1981 	    {
1982 	      offrng[0] += widest_int::from (rng[0], SIGNED);
1983 	      offrng[1] += widest_int::from (rng[1], SIGNED);
1984 	    }
1985 	  else
1986 	    {
1987 	      offrng[0] = diff_min;
1988 	      offrng[1] = diff_max;
1989 	    }
1990 	}
1991 
1992       wide_int rng[2];
1993       if (int idx = get_stridx (ref, rng, rvals))
1994 	{
1995 	  si = get_strinfo (idx);
1996 	  offrng[0] += widest_int::from (rng[0], SIGNED);
1997 	  offrng[1] += widest_int::from (rng[1], SIGNED);
1998 	}
1999     }
2000 
2001   /* The allocation call if the destination object was allocated
2002      by one.  */
2003   gimple *alloc_call = NULL;
2004   /* The DECL of the destination object if known and not dynamically
2005      allocated.  */
2006   tree destdecl = NULL_TREE;
2007   /* The offset into the destination object set by compute_objsize
2008      but already reflected in DESTSIZE.  */
2009   tree destoff = NULL_TREE;
2010   /* The size of the destination region (which is smaller than
2011      the destination object for stores at a non-zero offset).  */
2012   tree destsize = NULL_TREE;
2013 
2014   /* Compute the range of sizes of the destination object.  The range
2015      is constant for declared objects but may be a range for allocated
2016      objects.  */
2017   widest_int sizrng[2] = { 0, 0 };
2018   if (si)
2019     {
2020       wide_int rng[2];
2021       destsize = gimple_call_alloc_size (si->alloc, rng, rvals);
2022       if (destsize)
2023 	{
2024 	  sizrng[0] = widest_int::from (rng[0], UNSIGNED);
2025 	  sizrng[1] = widest_int::from (rng[1], UNSIGNED);
2026 	}
2027       alloc_call = si->alloc;
2028     }
2029   else
2030     offrng[0] = offrng[1] = 0;
2031 
2032   if (!destsize)
2033     {
2034       /* If there is no STRINFO for DEST, fall back on compute_objsize.  */
2035       tree off = NULL_TREE;
2036       destsize = compute_objsize (dest, rawmem ? 0 : 1, &destdecl, &off, rvals);
2037       if (destsize)
2038 	{
2039 	  /* Remember OFF but clear OFFRNG that may have been set above.  */
2040 	  destoff = off;
2041 	  offrng[0] = offrng[1] = 0;
2042 
2043 	  if (destdecl && TREE_CODE (destdecl) == SSA_NAME)
2044 	    {
2045 	      gimple *stmt = SSA_NAME_DEF_STMT (destdecl);
2046 	      if (is_gimple_call (stmt))
2047 		alloc_call = stmt;
2048 	      destdecl = NULL_TREE;
2049 	    }
2050 
2051 	  wide_int rng[2];
2052 	  if (get_range (destsize, rng, rvals))
2053 	    {
2054 	      sizrng[0] = widest_int::from (rng[0], UNSIGNED);
2055 	      sizrng[1] = widest_int::from (rng[1], UNSIGNED);
2056 	    }
2057 	  else
2058 	    {
2059 	      /* On failure, rather than failing, set the maximum range
2060 		 so that overflow in allocated objects whose size depends
2061 		 on the strlen of the source can still be diagnosed
2062 		 below.  */
2063 	      sizrng[0] = 0;
2064 	      sizrng[1] = size_max;
2065 	    }
2066 	}
2067     }
2068 
2069   if (!destsize)
2070     {
2071       sizrng[0] = 0;
2072       sizrng[1] = size_max;
2073     };
2074 
2075   /* Return early if the DESTSIZE size expression is the same as LEN
2076      and the offset into the destination is zero.  This might happen
2077      in the case of a pair of malloc and memset calls to allocate
2078      an object and clear it as if by calloc.  */
2079   if (destsize == len && !plus_one && offrng[0] == 0 && offrng[0] == offrng[1])
2080     return;
2081 
2082   wide_int rng[2];
2083   if (!get_range (len, rng, rvals))
2084     return;
2085 
2086   widest_int lenrng[2] =
2087     { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2088 
2089   if (plus_one)
2090     {
2091       lenrng[0] += 1;
2092       lenrng[1] += 1;
2093     }
2094 
2095   /* The size of the remaining space in the destination computed
2096      as the size of the latter minus the offset into it.  */
2097   widest_int spcrng[2] = { sizrng[0], sizrng[1] };
2098   if (wi::neg_p (offrng[0]) && wi::neg_p (offrng[1]))
2099     {
2100       /* When the offset is negative and the size of the destination
2101 	 object unknown there is little to do.
2102 	 FIXME: Detect offsets that are necessarily invalid regardless
2103 	 of the size of the object.  */
2104       if (!destsize)
2105 	return;
2106 
2107       /* The remaining space is necessarily zero.  */
2108       spcrng[0] = spcrng[1] = 0;
2109     }
2110   else if (wi::neg_p (offrng[0]))
2111     {
2112       /* When the lower bound of the offset is negative but the upper
2113 	 bound is not, reduce the upper bound of the remaining space
2114 	 by the upper bound of the offset but leave the lower bound
2115 	 unchanged.  If that makes the upper bound of the space less
2116 	 than the lower bound swap the two.  */
2117       spcrng[1] -= wi::ltu_p (offrng[1], spcrng[1]) ? offrng[1] : spcrng[1];
2118       if (wi::ltu_p (spcrng[1], spcrng[0]))
2119 	std::swap (spcrng[1], spcrng[0]);
2120     }
2121   else
2122     {
2123       /* When the offset is positive reduce the remaining space by
2124 	 the lower bound of the offset or clear it if the offset is
2125 	 greater.  */
2126       spcrng[0] -= wi::ltu_p (offrng[0], spcrng[0]) ? offrng[0] : spcrng[0];
2127       spcrng[1] -= wi::ltu_p (offrng[0], spcrng[1]) ? offrng[0] : spcrng[1];
2128     }
2129 
2130   if (wi::leu_p (lenrng[0], spcrng[0])
2131       && wi::leu_p (lenrng[1], spcrng[1]))
2132     return;
2133 
2134   if (lenrng[0] == spcrng[1]
2135       && (len != destsize
2136 	  || !si || !is_strlen_related_p (si->ptr, len)))
2137     return;
2138 
2139   location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2140   bool warned = false;
2141   if (wi::leu_p (lenrng[0], spcrng[1]))
2142     {
2143       if (len != destsize
2144 	  && (!si || !is_strlen_related_p (si->ptr, len)))
2145 	return;
2146 
2147       warned = (writefn
2148 		? warning_at (loc, OPT_Wstringop_overflow_,
2149 			      "%G%qD writing one too many bytes into a region "
2150 			      "of a size that depends on %<strlen%>",
2151 			      stmt, writefn)
2152 		: warning_at (loc, OPT_Wstringop_overflow_,
2153 			      "%Gwriting one too many bytes into a region "
2154 			      "of a size that depends on %<strlen%>",
2155 			      stmt));
2156     }
2157   else if (lenrng[0] == lenrng[1])
2158     {
2159       if (spcrng[0] == spcrng[1])
2160 	warned = (writefn
2161 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2162 			       lenrng[0].to_uhwi (),
2163 			       "%G%qD writing %wu byte into a region "
2164 			       "of size %wu",
2165 			       "%G%qD writing %wu bytes into a region "
2166 			       "of size %wu",
2167 			       stmt, writefn, lenrng[0].to_uhwi (),
2168 			       spcrng[0].to_uhwi ())
2169 		  : warning_n (loc, OPT_Wstringop_overflow_,
2170 			       lenrng[0].to_uhwi (),
2171 			       "%Gwriting %wu byte into a region "
2172 			       "of size %wu",
2173 			       "%Gwriting %wu bytes into a region "
2174 			       "of size %wu",
2175 			       stmt, lenrng[0].to_uhwi (),
2176 			       spcrng[0].to_uhwi ()));
2177       else
2178 	warned = (writefn
2179 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2180 			       lenrng[0].to_uhwi (),
2181 			       "%G%qD writing %wu byte into a region "
2182 			       "of size between %wu and %wu",
2183 			       "%G%qD writing %wu bytes into a region "
2184 			       "of size between %wu and %wu",
2185 			       stmt, writefn, lenrng[0].to_uhwi (),
2186 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2187 		  : warning_n (loc, OPT_Wstringop_overflow_,
2188 			       lenrng[0].to_uhwi (),
2189 			       "%Gwriting %wu byte into a region "
2190 			       "of size between %wu and %wu",
2191 			       "%Gwriting %wu bytes into a region "
2192 			       "of size between %wu and %wu",
2193 			       stmt, lenrng[0].to_uhwi (),
2194 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2195     }
2196   else if (spcrng[0] == spcrng[1])
2197     warned = (writefn
2198 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2199 			    "%G%qD writing between %wu and %wu bytes "
2200 			    "into a region of size %wu",
2201 			    stmt, writefn, lenrng[0].to_uhwi (),
2202 			    lenrng[1].to_uhwi (),
2203 			    spcrng[0].to_uhwi ())
2204 	      : warning_at (loc, OPT_Wstringop_overflow_,
2205 			    "%Gwriting between %wu and %wu bytes "
2206 			    "into a region of size %wu",
2207 			    stmt, lenrng[0].to_uhwi (),
2208 			    lenrng[1].to_uhwi (),
2209 			    spcrng[0].to_uhwi ()));
2210   else
2211     warned = (writefn
2212 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2213 			    "%G%qD writing between %wu and %wu bytes "
2214 			    "into a region of size between %wu and %wu",
2215 			    stmt, writefn, lenrng[0].to_uhwi (),
2216 			    lenrng[1].to_uhwi (),
2217 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2218 	      : warning_at (loc, OPT_Wstringop_overflow_,
2219 			    "%Gwriting between %wu and %wu bytes "
2220 			    "into a region of size between %wu and %wu",
2221 			    stmt, lenrng[0].to_uhwi (),
2222 			    lenrng[1].to_uhwi (),
2223 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2224 
2225   if (!warned)
2226     return;
2227 
2228   gimple_set_no_warning (stmt, true);
2229 
2230   /* If DESTOFF is not null, use it to format the offset value/range.  */
2231   if (destoff)
2232     {
2233       wide_int rng[2];
2234       if (get_range (destoff, rng))
2235 	{
2236 	  offrng[0] = widest_int::from (rng[0], SIGNED);
2237 	  offrng[1] = widest_int::from (rng[1], SIGNED);
2238 	}
2239       else
2240 	offrng[0] = offrng[1] = 0;
2241     }
2242 
2243   /* Format the offset to keep the number of inform calls from growing
2244      out of control.  */
2245   char offstr[64];
2246   if (offrng[0] == offrng[1])
2247     sprintf (offstr, "%lli", (long long) offrng[0].to_shwi ());
2248   else
2249     sprintf (offstr, "[%lli, %lli]",
2250 	     (long long) offrng[0].to_shwi (), (long long) offrng[1].to_shwi ());
2251 
2252   if (destdecl)
2253     {
2254       if (tree size = DECL_SIZE_UNIT (destdecl))
2255 	inform (DECL_SOURCE_LOCATION (destdecl),
2256 		"at offset %s to object %qD with size %E declared here",
2257 		offstr, destdecl, size);
2258       else
2259 	inform (DECL_SOURCE_LOCATION (destdecl),
2260 		"at offset %s to object %qD declared here",
2261 		offstr, destdecl);
2262       return;
2263     }
2264 
2265   if (!alloc_call)
2266     return;
2267 
2268   tree allocfn = gimple_call_fndecl (alloc_call);
2269   if (!allocfn)
2270     {
2271       /* For an ALLOC_CALL via a function pointer make a small effort
2272 	 to determine the destination of the pointer.  */
2273       allocfn = gimple_call_fn (alloc_call);
2274       if (TREE_CODE (allocfn) == SSA_NAME)
2275 	{
2276 	  gimple *def = SSA_NAME_DEF_STMT (allocfn);
2277 	  if (gimple_assign_single_p (def))
2278 	    {
2279 	      tree rhs = gimple_assign_rhs1 (def);
2280 	      if (DECL_P (rhs))
2281 		allocfn = rhs;
2282 	      else if (TREE_CODE (rhs) == COMPONENT_REF)
2283 		allocfn = TREE_OPERAND (rhs, 1);
2284 	    }
2285 	}
2286     }
2287 
2288   if (gimple_call_builtin_p (alloc_call, BUILT_IN_ALLOCA_WITH_ALIGN))
2289     {
2290       if (sizrng[0] == sizrng[1])
2291 	inform (gimple_location (alloc_call),
2292 		"at offset %s to an object with size %wu declared here",
2293 		offstr, sizrng[0].to_uhwi ());
2294       else if (sizrng[0] == 0)
2295 	{
2296 	  /* Avoid printing impossible sizes.  */
2297 	  if (wi::ltu_p (sizrng[1], diff_max - 2))
2298 	    inform (gimple_location (alloc_call),
2299 		    "at offset %s to an object with size at most %wu "
2300 		    "declared here",
2301 		    offstr, sizrng[1].to_uhwi ());
2302 	  else
2303 	    inform (gimple_location (alloc_call),
2304 		    "at offset %s to an object declared here", offstr);
2305 	}
2306       else
2307 	inform (gimple_location (alloc_call),
2308 		"at offset %s to an object with size between %wu and %wu "
2309 		"declared here",
2310 		offstr, sizrng[0].to_uhwi (), sizrng[1].to_uhwi ());
2311       return;
2312     }
2313 
2314   if (sizrng[0] == sizrng[1])
2315     inform (gimple_location (alloc_call),
2316 	    "at offset %s to an object with size %wu allocated by %qE here",
2317 	    offstr, sizrng[0].to_uhwi (), allocfn);
2318   else if (sizrng[0] == 0)
2319     {
2320       /* Avoid printing impossible sizes.  */
2321       if (wi::ltu_p (sizrng[1], diff_max - 2))
2322 	inform (gimple_location (alloc_call),
2323 		"at offset %s to an object with size at most %wu allocated "
2324 		"by %qD here",
2325 		offstr, sizrng[1].to_uhwi (), allocfn);
2326       else
2327 	inform (gimple_location (alloc_call),
2328 		"at offset %s to an object allocated by %qE here",
2329 		offstr, allocfn);
2330     }
2331   else
2332     inform (gimple_location (alloc_call),
2333 	    "at offset %s to an object with size between %wu and %wu "
2334 	    "allocated by %qE here",
2335 	    offstr, sizrng[0].to_uhwi (), sizrng[1].to_uhwi (), allocfn);
2336 }
2337 
2338 /* Convenience wrapper for the above.  */
2339 
2340 static inline void
2341 maybe_warn_overflow (gimple *stmt, unsigned HOST_WIDE_INT len,
2342 		     const vr_values *rvals = NULL, strinfo *si = NULL,
2343 		     bool plus_one = false, bool rawmem = false)
2344 {
2345   maybe_warn_overflow (stmt, build_int_cst (size_type_node, len), rvals,
2346 		       si, plus_one, rawmem);
2347 }
2348 
2349 /* Handle a strlen call.  If strlen of the argument is known, replace
2350    the strlen call with the known value, otherwise remember that strlen
2351    of the argument is stored in the lhs SSA_NAME.  */
2352 
2353 static void
handle_builtin_strlen(gimple_stmt_iterator * gsi)2354 handle_builtin_strlen (gimple_stmt_iterator *gsi)
2355 {
2356   gimple *stmt = gsi_stmt (*gsi);
2357   tree lhs = gimple_call_lhs (stmt);
2358 
2359   if (lhs == NULL_TREE)
2360     return;
2361 
2362   location_t loc = gimple_location (stmt);
2363   tree callee = gimple_call_fndecl (stmt);
2364   tree src = gimple_call_arg (stmt, 0);
2365   tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2366 		? gimple_call_arg (stmt, 1) : NULL_TREE);
2367   int idx = get_stridx (src);
2368   if (idx || (bound && integer_zerop (bound)))
2369     {
2370       strinfo *si = NULL;
2371       tree rhs;
2372 
2373       if (idx < 0)
2374 	rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2375       else if (idx == 0)
2376 	rhs = bound;
2377       else
2378 	{
2379 	  rhs = NULL_TREE;
2380 	  si = get_strinfo (idx);
2381 	  if (si != NULL)
2382 	    {
2383 	      rhs = get_string_length (si);
2384 	      /* For strnlen, if bound is constant, even if si is not known
2385 		 to be zero terminated, if we know at least bound bytes are
2386 		 not zero, the return value will be bound.  */
2387 	      if (rhs == NULL_TREE
2388 		  && bound != NULL_TREE
2389 		  && TREE_CODE (bound) == INTEGER_CST
2390 		  && si->nonzero_chars != NULL_TREE
2391 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2392 		  && tree_int_cst_le (bound, si->nonzero_chars))
2393 		rhs = bound;
2394 	    }
2395 	}
2396       if (rhs != NULL_TREE)
2397 	{
2398 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2399 	    {
2400 	      fprintf (dump_file, "Optimizing: ");
2401 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2402 	    }
2403 	  rhs = unshare_expr (rhs);
2404 	  if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2405 	    rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2406 
2407 	  if (bound)
2408 	    rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2409 
2410 	  if (!update_call_from_tree (gsi, rhs))
2411 	    gimplify_and_update_call_from_tree (gsi, rhs);
2412 	  stmt = gsi_stmt (*gsi);
2413 	  update_stmt (stmt);
2414 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2415 	    {
2416 	      fprintf (dump_file, "into: ");
2417 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2418 	    }
2419 
2420 	  if (si != NULL
2421 	      /* Don't update anything for strnlen.  */
2422 	      && bound == NULL_TREE
2423 	      && TREE_CODE (si->nonzero_chars) != SSA_NAME
2424 	      && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2425 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2426 	    {
2427 	      si = unshare_strinfo (si);
2428 	      si->nonzero_chars = lhs;
2429 	      gcc_assert (si->full_string_p);
2430 	    }
2431 
2432 	  if (strlen_to_stridx
2433 	      && (bound == NULL_TREE
2434 		  /* For strnlen record this only if the call is proven
2435 		     to return the same value as strlen would.  */
2436 		  || (TREE_CODE (bound) == INTEGER_CST
2437 		      && TREE_CODE (rhs) == INTEGER_CST
2438 		      && tree_int_cst_lt (rhs, bound))))
2439 	    strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2440 
2441 	  return;
2442 	}
2443     }
2444   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2445     return;
2446 
2447   if (idx == 0)
2448     idx = new_stridx (src);
2449   else
2450     {
2451       strinfo *si = get_strinfo (idx);
2452       if (si != NULL)
2453 	{
2454 	  if (!si->full_string_p && !si->stmt)
2455 	    {
2456 	      /* Until now we only had a lower bound on the string length.
2457 		 Install LHS as the actual length.  */
2458 	      si = unshare_strinfo (si);
2459 	      tree old = si->nonzero_chars;
2460 	      si->nonzero_chars = lhs;
2461 	      si->full_string_p = true;
2462 	      if (old && TREE_CODE (old) == INTEGER_CST)
2463 		{
2464 		  old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2465 		  tree adj = fold_build2_loc (loc, MINUS_EXPR,
2466 					      TREE_TYPE (lhs), lhs, old);
2467 		  adjust_related_strinfos (loc, si, adj);
2468 		  /* Use the constant minimum length as the lower bound
2469 		     of the non-constant length.  */
2470 		  wide_int min = wi::to_wide (old);
2471 		  wide_int max
2472 		    = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2473 		  set_strlen_range (lhs, min, max);
2474 		}
2475 	      else
2476 		{
2477 		  si->first = 0;
2478 		  si->prev = 0;
2479 		  si->next = 0;
2480 		}
2481 	    }
2482 	  return;
2483 	}
2484     }
2485   if (idx)
2486     {
2487       if (!bound)
2488 	{
2489 	  /* Only store the new length information for calls to strlen(),
2490 	     not for those to strnlen().  */
2491 	  strinfo *si = new_strinfo (src, idx, lhs, true);
2492 	  set_strinfo (idx, si);
2493 	  find_equal_ptrs (src, idx);
2494 	}
2495 
2496       /* For SRC that is an array of N elements, set LHS's range
2497 	 to [0, min (N, BOUND)].  A constant return value means
2498 	 the range would have consisted of a single value.  In
2499 	 that case, fold the result into the returned constant.  */
2500       if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2501 	if (TREE_CODE (ret) == INTEGER_CST)
2502 	  {
2503 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2504 	      {
2505 		fprintf (dump_file, "Optimizing: ");
2506 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2507 	      }
2508 	    if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2509 	      ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2510 	    if (!update_call_from_tree (gsi, ret))
2511 	      gimplify_and_update_call_from_tree (gsi, ret);
2512 	    stmt = gsi_stmt (*gsi);
2513 	    update_stmt (stmt);
2514 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2515 	      {
2516 		fprintf (dump_file, "into: ");
2517 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2518 	      }
2519 	  }
2520 
2521       if (strlen_to_stridx && !bound)
2522 	strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2523     }
2524 }
2525 
2526 /* Handle a strchr call.  If strlen of the first argument is known, replace
2527    the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2528    that lhs of the call is endptr and strlen of the argument is endptr - x.  */
2529 
2530 static void
handle_builtin_strchr(gimple_stmt_iterator * gsi)2531 handle_builtin_strchr (gimple_stmt_iterator *gsi)
2532 {
2533   gimple *stmt = gsi_stmt (*gsi);
2534   tree lhs = gimple_call_lhs (stmt);
2535 
2536   if (lhs == NULL_TREE)
2537     return;
2538 
2539   if (!integer_zerop (gimple_call_arg (stmt, 1)))
2540     return;
2541 
2542   tree src = gimple_call_arg (stmt, 0);
2543 
2544   /* Avoid folding if the first argument is not a nul-terminated array.
2545      Defer warning until later.  */
2546   if (!check_nul_terminated_array (NULL_TREE, src))
2547     return;
2548 
2549   int idx = get_stridx (src);
2550   if (idx)
2551     {
2552       strinfo *si = NULL;
2553       tree rhs;
2554 
2555       if (idx < 0)
2556 	rhs = build_int_cst (size_type_node, ~idx);
2557       else
2558 	{
2559 	  rhs = NULL_TREE;
2560 	  si = get_strinfo (idx);
2561 	  if (si != NULL)
2562 	    rhs = get_string_length (si);
2563 	}
2564       if (rhs != NULL_TREE)
2565 	{
2566 	  location_t loc = gimple_location (stmt);
2567 
2568 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2569 	    {
2570 	      fprintf (dump_file, "Optimizing: ");
2571 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2572 	    }
2573 	  if (si != NULL && si->endptr != NULL_TREE)
2574 	    {
2575 	      rhs = unshare_expr (si->endptr);
2576 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2577 					      TREE_TYPE (rhs)))
2578 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2579 	    }
2580 	  else
2581 	    {
2582 	      rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2583 	      rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2584 				     TREE_TYPE (src), src, rhs);
2585 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2586 					      TREE_TYPE (rhs)))
2587 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2588 	    }
2589 	  if (!update_call_from_tree (gsi, rhs))
2590 	    gimplify_and_update_call_from_tree (gsi, rhs);
2591 	  stmt = gsi_stmt (*gsi);
2592 	  update_stmt (stmt);
2593 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2594 	    {
2595 	      fprintf (dump_file, "into: ");
2596 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2597 	    }
2598 	  if (si != NULL
2599 	      && si->endptr == NULL_TREE
2600 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2601 	    {
2602 	      si = unshare_strinfo (si);
2603 	      si->endptr = lhs;
2604 	    }
2605 	  zero_length_string (lhs, si);
2606 	  return;
2607 	}
2608     }
2609   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2610     return;
2611   if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2612     {
2613       if (idx == 0)
2614 	idx = new_stridx (src);
2615       else if (get_strinfo (idx) != NULL)
2616 	{
2617 	  zero_length_string (lhs, NULL);
2618 	  return;
2619 	}
2620       if (idx)
2621 	{
2622 	  location_t loc = gimple_location (stmt);
2623 	  tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2624 	  tree srcu = fold_convert_loc (loc, size_type_node, src);
2625 	  tree length = fold_build2_loc (loc, MINUS_EXPR,
2626 					 size_type_node, lhsu, srcu);
2627 	  strinfo *si = new_strinfo (src, idx, length, true);
2628 	  si->endptr = lhs;
2629 	  set_strinfo (idx, si);
2630 	  find_equal_ptrs (src, idx);
2631 	  zero_length_string (lhs, si);
2632 	}
2633     }
2634   else
2635     zero_length_string (lhs, NULL);
2636 }
2637 
2638 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2639    If strlen of the second argument is known, strlen of the first argument
2640    is the same after this call.  Furthermore, attempt to convert it to
2641    memcpy.  Uses RVALS to determine range information.  */
2642 
2643 static void
handle_builtin_strcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,const vr_values * rvals)2644 handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
2645 		       const vr_values *rvals)
2646 {
2647   int idx, didx;
2648   tree src, dst, srclen, len, lhs, type, fn, oldlen;
2649   bool success;
2650   gimple *stmt = gsi_stmt (*gsi);
2651   strinfo *si, *dsi, *olddsi, *zsi;
2652   location_t loc;
2653 
2654   src = gimple_call_arg (stmt, 1);
2655   dst = gimple_call_arg (stmt, 0);
2656   lhs = gimple_call_lhs (stmt);
2657   idx = get_stridx (src);
2658   si = NULL;
2659   if (idx > 0)
2660     si = get_strinfo (idx);
2661 
2662   didx = get_stridx (dst);
2663   olddsi = NULL;
2664   oldlen = NULL_TREE;
2665   if (didx > 0)
2666     olddsi = get_strinfo (didx);
2667   else if (didx < 0)
2668     return;
2669 
2670   if (olddsi != NULL)
2671     adjust_last_stmt (olddsi, stmt, false);
2672 
2673   srclen = NULL_TREE;
2674   if (si != NULL)
2675     srclen = get_string_length (si);
2676   else if (idx < 0)
2677     srclen = build_int_cst (size_type_node, ~idx);
2678 
2679   maybe_warn_overflow (stmt, srclen, rvals, olddsi, true);
2680 
2681   if (olddsi != NULL)
2682     adjust_last_stmt (olddsi, stmt, false);
2683 
2684   loc = gimple_location (stmt);
2685   if (srclen == NULL_TREE)
2686     switch (bcode)
2687       {
2688       case BUILT_IN_STRCPY:
2689       case BUILT_IN_STRCPY_CHK:
2690 	if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2691 	  return;
2692 	break;
2693       case BUILT_IN_STPCPY:
2694       case BUILT_IN_STPCPY_CHK:
2695 	if (lhs == NULL_TREE)
2696 	  return;
2697 	else
2698 	  {
2699 	    tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2700 	    srclen = fold_convert_loc (loc, size_type_node, dst);
2701 	    srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2702 				      lhsuint, srclen);
2703 	  }
2704 	break;
2705       default:
2706 	gcc_unreachable ();
2707       }
2708 
2709   if (didx == 0)
2710     {
2711       didx = new_stridx (dst);
2712       if (didx == 0)
2713 	return;
2714     }
2715   if (olddsi != NULL)
2716     {
2717       oldlen = olddsi->nonzero_chars;
2718       dsi = unshare_strinfo (olddsi);
2719       dsi->nonzero_chars = srclen;
2720       dsi->full_string_p = (srclen != NULL_TREE);
2721       /* Break the chain, so adjust_related_strinfo on later pointers in
2722 	 the chain won't adjust this one anymore.  */
2723       dsi->next = 0;
2724       dsi->stmt = NULL;
2725       dsi->endptr = NULL_TREE;
2726     }
2727   else
2728     {
2729       dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2730       set_strinfo (didx, dsi);
2731       find_equal_ptrs (dst, didx);
2732     }
2733   dsi->writable = true;
2734   dsi->dont_invalidate = true;
2735 
2736   if (dsi->nonzero_chars == NULL_TREE)
2737     {
2738       strinfo *chainsi;
2739 
2740       /* If string length of src is unknown, use delayed length
2741 	 computation.  If string length of dst will be needed, it
2742 	 can be computed by transforming this strcpy call into
2743 	 stpcpy and subtracting dst from the return value.  */
2744 
2745       /* Look for earlier strings whose length could be determined if
2746 	 this strcpy is turned into an stpcpy.  */
2747 
2748       if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2749 	{
2750 	  for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2751 	    {
2752 	      /* When setting a stmt for delayed length computation
2753 		 prevent all strinfos through dsi from being
2754 		 invalidated.  */
2755 	      chainsi = unshare_strinfo (chainsi);
2756 	      chainsi->stmt = stmt;
2757 	      chainsi->nonzero_chars = NULL_TREE;
2758 	      chainsi->full_string_p = false;
2759 	      chainsi->endptr = NULL_TREE;
2760 	      chainsi->dont_invalidate = true;
2761 	    }
2762 	}
2763       dsi->stmt = stmt;
2764 
2765       /* Try to detect overlap before returning.  This catches cases
2766 	 like strcpy (d, d + n) where n is non-constant whose range
2767 	 is such that (n <= strlen (d) holds).
2768 
2769 	 OLDDSI->NONZERO_chars may have been reset by this point with
2770 	 oldlen holding it original value.  */
2771       if (olddsi && oldlen)
2772 	{
2773 	  /* Add 1 for the terminating NUL.  */
2774 	  tree type = TREE_TYPE (oldlen);
2775 	  oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2776 				build_int_cst (type, 1));
2777 	  check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2778 	}
2779 
2780       return;
2781     }
2782 
2783   if (olddsi != NULL)
2784     {
2785       tree adj = NULL_TREE;
2786       if (oldlen == NULL_TREE)
2787 	;
2788       else if (integer_zerop (oldlen))
2789 	adj = srclen;
2790       else if (TREE_CODE (oldlen) == INTEGER_CST
2791 	       || TREE_CODE (srclen) == INTEGER_CST)
2792 	adj = fold_build2_loc (loc, MINUS_EXPR,
2793 			       TREE_TYPE (srclen), srclen,
2794 			       fold_convert_loc (loc, TREE_TYPE (srclen),
2795 						 oldlen));
2796       if (adj != NULL_TREE)
2797 	adjust_related_strinfos (loc, dsi, adj);
2798       else
2799 	dsi->prev = 0;
2800     }
2801   /* strcpy src may not overlap dst, so src doesn't need to be
2802      invalidated either.  */
2803   if (si != NULL)
2804     si->dont_invalidate = true;
2805 
2806   fn = NULL_TREE;
2807   zsi = NULL;
2808   switch (bcode)
2809     {
2810     case BUILT_IN_STRCPY:
2811       fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2812       if (lhs)
2813 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2814       break;
2815     case BUILT_IN_STRCPY_CHK:
2816       fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2817       if (lhs)
2818 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2819       break;
2820     case BUILT_IN_STPCPY:
2821       /* This would need adjustment of the lhs (subtract one),
2822 	 or detection that the trailing '\0' doesn't need to be
2823 	 written, if it will be immediately overwritten.
2824       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY);  */
2825       if (lhs)
2826 	{
2827 	  dsi->endptr = lhs;
2828 	  zsi = zero_length_string (lhs, dsi);
2829 	}
2830       break;
2831     case BUILT_IN_STPCPY_CHK:
2832       /* This would need adjustment of the lhs (subtract one),
2833 	 or detection that the trailing '\0' doesn't need to be
2834 	 written, if it will be immediately overwritten.
2835       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK);  */
2836       if (lhs)
2837 	{
2838 	  dsi->endptr = lhs;
2839 	  zsi = zero_length_string (lhs, dsi);
2840 	}
2841       break;
2842     default:
2843       gcc_unreachable ();
2844     }
2845   if (zsi != NULL)
2846     zsi->dont_invalidate = true;
2847 
2848   if (fn)
2849     {
2850       tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2851       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2852     }
2853   else
2854     type = size_type_node;
2855 
2856   len = fold_convert_loc (loc, type, unshare_expr (srclen));
2857   len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2858 
2859   /* Set the no-warning bit on the transformed statement?  */
2860   bool set_no_warning = false;
2861 
2862   if (const strinfo *chksi = olddsi ? olddsi : dsi)
2863     if (si
2864 	&& check_bounds_or_overlap (stmt, chksi->ptr, si->ptr, NULL_TREE, len))
2865       {
2866 	gimple_set_no_warning (stmt, true);
2867 	set_no_warning = true;
2868       }
2869 
2870   if (fn == NULL_TREE)
2871     return;
2872 
2873   len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
2874 				  GSI_SAME_STMT);
2875   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2876     {
2877       fprintf (dump_file, "Optimizing: ");
2878       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2879     }
2880   if (gimple_call_num_args (stmt) == 2)
2881     success = update_gimple_call (gsi, fn, 3, dst, src, len);
2882   else
2883     success = update_gimple_call (gsi, fn, 4, dst, src, len,
2884 				  gimple_call_arg (stmt, 2));
2885   if (success)
2886     {
2887       stmt = gsi_stmt (*gsi);
2888       update_stmt (stmt);
2889       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2890 	{
2891 	  fprintf (dump_file, "into: ");
2892 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2893 	}
2894       /* Allow adjust_last_stmt to decrease this memcpy's size.  */
2895       laststmt.stmt = stmt;
2896       laststmt.len = srclen;
2897       laststmt.stridx = dsi->idx;
2898     }
2899   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2900     fprintf (dump_file, "not possible.\n");
2901 
2902   if (set_no_warning)
2903     gimple_set_no_warning (stmt, true);
2904 }
2905 
2906 /* Check the size argument to the built-in forms of stpncpy and strncpy
2907    for out-of-bounds offsets or overlapping access, and to see if the
2908    size argument is derived from a call to strlen() on the source argument,
2909    and if so, issue an appropriate warning.  */
2910 
2911 static void
handle_builtin_strncat(built_in_function,gimple_stmt_iterator * gsi)2912 handle_builtin_strncat (built_in_function, gimple_stmt_iterator *gsi)
2913 {
2914   /* Same as stxncpy().  */
2915   handle_builtin_stxncpy_strncat (true, gsi);
2916 }
2917 
2918 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2919    way.  LEN can either be an integer expression, or a pointer (to char).
2920    When it is the latter (such as in recursive calls to self) it is
2921    assumed to be the argument in some call to strlen() whose relationship
2922    to SRC is being ascertained.  */
2923 
2924 bool
is_strlen_related_p(tree src,tree len)2925 is_strlen_related_p (tree src, tree len)
2926 {
2927   if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2928       && operand_equal_p (src, len, 0))
2929     return true;
2930 
2931   if (TREE_CODE (len) != SSA_NAME)
2932     return false;
2933 
2934   if (TREE_CODE (src) == SSA_NAME)
2935     {
2936       gimple *srcdef = SSA_NAME_DEF_STMT (src);
2937       if (is_gimple_assign (srcdef))
2938 	{
2939 	  /* Handle bitwise AND used in conversions from wider size_t
2940 	     to narrower unsigned types.  */
2941 	  tree_code code = gimple_assign_rhs_code (srcdef);
2942 	  if (code == BIT_AND_EXPR
2943 	      || code == NOP_EXPR)
2944 	    return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2945 
2946 	  return false;
2947 	}
2948 
2949       if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2950 	{
2951 	  /* If SRC is the result of a call to an allocation function
2952 	     or strlen, use the function's argument instead.  */
2953 	  tree func = gimple_call_fndecl (srcdef);
2954 	  built_in_function code = DECL_FUNCTION_CODE (func);
2955 	  if (code == BUILT_IN_ALLOCA
2956 	      || code == BUILT_IN_ALLOCA_WITH_ALIGN
2957 	      || code == BUILT_IN_MALLOC
2958 	      || code == BUILT_IN_STRLEN)
2959 	    return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2960 
2961 	  /* FIXME: Handle other functions with attribute alloc_size.  */
2962 	  return false;
2963 	}
2964     }
2965 
2966   gimple *lendef = SSA_NAME_DEF_STMT (len);
2967   if (!lendef)
2968     return false;
2969 
2970   if (is_gimple_call (lendef))
2971     {
2972       tree func = gimple_call_fndecl (lendef);
2973       if (!valid_builtin_call (lendef)
2974 	  || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2975 	return false;
2976 
2977       tree arg = gimple_call_arg (lendef, 0);
2978       return is_strlen_related_p (src, arg);
2979     }
2980 
2981   if (!is_gimple_assign (lendef))
2982     return false;
2983 
2984   tree_code code = gimple_assign_rhs_code (lendef);
2985   tree rhs1 = gimple_assign_rhs1 (lendef);
2986   tree rhstype = TREE_TYPE (rhs1);
2987 
2988   if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2989       || (INTEGRAL_TYPE_P (rhstype)
2990 	  && (code == BIT_AND_EXPR
2991 	      || code == NOP_EXPR)))
2992     {
2993       /* Pointer plus (an integer), and truncation are considered among
2994 	 the (potentially) related expressions to strlen.  */
2995       return is_strlen_related_p (src, rhs1);
2996     }
2997 
2998   if (tree rhs2 = gimple_assign_rhs2 (lendef))
2999     {
3000       /* Integer subtraction is considered strlen-related when both
3001 	 arguments are integers and second one is strlen-related.  */
3002       rhstype = TREE_TYPE (rhs2);
3003       if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
3004 	return is_strlen_related_p (src, rhs2);
3005     }
3006 
3007   return false;
3008 }
3009 
3010 /* Called by handle_builtin_stxncpy_strncat and by
3011    gimple_fold_builtin_strncpy in gimple-fold.c.
3012    Check to see if the specified bound is a) equal to the size of
3013    the destination DST and if so, b) if it's immediately followed by
3014    DST[CNT - 1] = '\0'.  If a) holds and b) does not, warn.  Otherwise,
3015    do nothing.  Return true if diagnostic has been issued.
3016 
3017    The purpose is to diagnose calls to strncpy and stpncpy that do
3018    not nul-terminate the copy while allowing for the idiom where
3019    such a call is immediately followed by setting the last element
3020    to nul, as in:
3021      char a[32];
3022      strncpy (a, s, sizeof a);
3023      a[sizeof a - 1] = '\0';
3024 */
3025 
3026 bool
maybe_diag_stxncpy_trunc(gimple_stmt_iterator gsi,tree src,tree cnt)3027 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt)
3028 {
3029   gimple *stmt = gsi_stmt (gsi);
3030   if (gimple_no_warning_p (stmt))
3031     return false;
3032 
3033   wide_int cntrange[2];
3034 
3035   if (TREE_CODE (cnt) == INTEGER_CST)
3036     cntrange[0] = cntrange[1] = wi::to_wide (cnt);
3037   else if (TREE_CODE (cnt) == SSA_NAME)
3038     {
3039       enum value_range_kind rng = get_range_info (cnt, cntrange, cntrange + 1);
3040       if (rng == VR_RANGE)
3041 	;
3042       else if (rng == VR_ANTI_RANGE)
3043 	{
3044 	  wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
3045 
3046 	  if (wi::ltu_p (cntrange[1], maxobjsize))
3047 	    {
3048 	      cntrange[0] = cntrange[1] + 1;
3049 	      cntrange[1] = maxobjsize;
3050 	    }
3051 	  else
3052 	    {
3053 	      cntrange[1] = cntrange[0] - 1;
3054 	      cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
3055 	    }
3056 	}
3057       else
3058 	return false;
3059     }
3060   else
3061     return false;
3062 
3063   /* Negative value is the constant string length.  If it's less than
3064      the lower bound there is no truncation.  Avoid calling get_stridx()
3065      when ssa_ver_to_stridx is empty.  That implies the caller isn't
3066      running under the control of this pass and ssa_ver_to_stridx hasn't
3067      been created yet.  */
3068   int sidx = ssa_ver_to_stridx.length () ? get_stridx (src) : 0;
3069   if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
3070     return false;
3071 
3072   tree dst = gimple_call_arg (stmt, 0);
3073   tree dstdecl = dst;
3074   if (TREE_CODE (dstdecl) == ADDR_EXPR)
3075     dstdecl = TREE_OPERAND (dstdecl, 0);
3076 
3077   tree ref = NULL_TREE;
3078 
3079   if (!sidx)
3080     {
3081       /* If the source is a non-string return early to avoid warning
3082 	 for possible truncation (if the truncation is certain SIDX
3083 	 is non-zero).  */
3084       tree srcdecl = gimple_call_arg (stmt, 1);
3085       if (TREE_CODE (srcdecl) == ADDR_EXPR)
3086 	srcdecl = TREE_OPERAND (srcdecl, 0);
3087       if (get_attr_nonstring_decl (srcdecl, &ref))
3088 	return false;
3089     }
3090 
3091   /* Likewise, if the destination refers to an array/pointer declared
3092      nonstring return early.  */
3093   if (get_attr_nonstring_decl (dstdecl, &ref))
3094     return false;
3095 
3096   /* Look for dst[i] = '\0'; after the stxncpy() call and if found
3097      avoid the truncation warning.  */
3098   gsi_next_nondebug (&gsi);
3099   gimple *next_stmt = gsi_stmt (gsi);
3100   if (!next_stmt)
3101     {
3102       /* When there is no statement in the same basic block check
3103 	 the immediate successor block.  */
3104       if (basic_block bb = gimple_bb (stmt))
3105 	{
3106 	  if (single_succ_p (bb))
3107 	    {
3108 	      /* For simplicity, ignore blocks with multiple outgoing
3109 		 edges for now and only consider successor blocks along
3110 		 normal edges.  */
3111 	      edge e = EDGE_SUCC (bb, 0);
3112 	      if (!(e->flags & EDGE_ABNORMAL))
3113 		{
3114 		  gsi = gsi_start_bb (e->dest);
3115 		  next_stmt = gsi_stmt (gsi);
3116 		  if (next_stmt && is_gimple_debug (next_stmt))
3117 		    {
3118 		      gsi_next_nondebug (&gsi);
3119 		      next_stmt = gsi_stmt (gsi);
3120 		    }
3121 		}
3122 	    }
3123 	}
3124     }
3125 
3126   if (next_stmt && is_gimple_assign (next_stmt))
3127     {
3128       tree lhs = gimple_assign_lhs (next_stmt);
3129       tree_code code = TREE_CODE (lhs);
3130       if (code == ARRAY_REF || code == MEM_REF)
3131 	lhs = TREE_OPERAND (lhs, 0);
3132 
3133       tree func = gimple_call_fndecl (stmt);
3134       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3135 	{
3136 	  tree ret = gimple_call_lhs (stmt);
3137 	  if (ret && operand_equal_p (ret, lhs, 0))
3138 	    return false;
3139 	}
3140 
3141       /* Determine the base address and offset of the reference,
3142 	 ignoring the innermost array index.  */
3143       if (TREE_CODE (ref) == ARRAY_REF)
3144 	ref = TREE_OPERAND (ref, 0);
3145 
3146       poly_int64 dstoff;
3147       tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3148 
3149       poly_int64 lhsoff;
3150       tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3151       if (lhsbase
3152 	  && dstbase
3153 	  && known_eq (dstoff, lhsoff)
3154 	  && operand_equal_p (dstbase, lhsbase, 0))
3155 	return false;
3156     }
3157 
3158   int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3159   wide_int lenrange[2];
3160   if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3161     {
3162       lenrange[0] = (sisrc->nonzero_chars
3163 		     && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3164 		     ? wi::to_wide (sisrc->nonzero_chars)
3165 		     : wi::zero (prec));
3166       lenrange[1] = lenrange[0];
3167     }
3168   else if (sidx < 0)
3169     lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3170   else
3171     {
3172       c_strlen_data lendata = { };
3173       /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3174 	 to have it set to the length of the longest string in a PHI.  */
3175       lendata.maxbound = src;
3176       get_range_strlen (src, &lendata, /* eltsize = */1);
3177       if (TREE_CODE (lendata.minlen) == INTEGER_CST
3178 	  && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3179 	{
3180 	  /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3181 	     which stores the length of the shortest known string.  */
3182 	  if (integer_all_onesp (lendata.maxlen))
3183 	    lenrange[0] = wi::shwi (0, prec);
3184 	  else
3185 	    lenrange[0] = wi::to_wide (lendata.minlen, prec);
3186 	  lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3187 	}
3188       else
3189 	{
3190 	  lenrange[0] = wi::shwi (0, prec);
3191 	  lenrange[1] = wi::shwi (-1, prec);
3192 	}
3193     }
3194 
3195   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3196   tree func = gimple_call_fndecl (stmt);
3197 
3198   if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3199     {
3200       /* If the longest source string is shorter than the lower bound
3201 	 of the specified count the copy is definitely nul-terminated.  */
3202       if (wi::ltu_p (lenrange[1], cntrange[0]))
3203 	return false;
3204 
3205       if (wi::neg_p (lenrange[1]))
3206 	{
3207 	  /* The length of one of the strings is unknown but at least
3208 	     one has non-zero length and that length is stored in
3209 	     LENRANGE[1].  Swap the bounds to force a "may be truncated"
3210 	     warning below.  */
3211 	  lenrange[1] = lenrange[0];
3212 	  lenrange[0] = wi::shwi (0, prec);
3213 	}
3214 
3215       /* Set to true for strncat whose bound is derived from the length
3216 	 of the destination (the expected usage pattern).  */
3217       bool cat_dstlen_bounded = false;
3218       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3219 	cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3220 
3221       if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3222 	return warning_n (callloc, OPT_Wstringop_truncation,
3223 			  cntrange[0].to_uhwi (),
3224 			  "%G%qD output truncated before terminating "
3225 			  "nul copying %E byte from a string of the "
3226 			  "same length",
3227 			  "%G%qD output truncated before terminating nul "
3228 			  "copying %E bytes from a string of the same "
3229 			  "length",
3230 			  stmt, func, cnt);
3231       else if (!cat_dstlen_bounded)
3232 	{
3233 	  if (wi::geu_p (lenrange[0], cntrange[1]))
3234 	    {
3235 	      /* The shortest string is longer than the upper bound of
3236 		 the count so the truncation is certain.  */
3237 	      if (cntrange[0] == cntrange[1])
3238 		return warning_n (callloc, OPT_Wstringop_truncation,
3239 				  cntrange[0].to_uhwi (),
3240 				  "%G%qD output truncated copying %E byte "
3241 				  "from a string of length %wu",
3242 				  "%G%qD output truncated copying %E bytes "
3243 				  "from a string of length %wu",
3244 				  stmt, func, cnt, lenrange[0].to_uhwi ());
3245 
3246 	      return warning_at (callloc, OPT_Wstringop_truncation,
3247 				 "%G%qD output truncated copying between %wu "
3248 				 "and %wu bytes from a string of length %wu",
3249 				 stmt, func, cntrange[0].to_uhwi (),
3250 				 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3251 	    }
3252 	  else if (wi::geu_p (lenrange[1], cntrange[1]))
3253 	    {
3254 	      /* The longest string is longer than the upper bound of
3255 		 the count so the truncation is possible.  */
3256 	      if (cntrange[0] == cntrange[1])
3257 		return warning_n (callloc, OPT_Wstringop_truncation,
3258 				  cntrange[0].to_uhwi (),
3259 				  "%G%qD output may be truncated copying %E "
3260 				  "byte from a string of length %wu",
3261 				  "%G%qD output may be truncated copying %E "
3262 				  "bytes from a string of length %wu",
3263 				  stmt, func, cnt, lenrange[1].to_uhwi ());
3264 
3265 	      return warning_at (callloc, OPT_Wstringop_truncation,
3266 				 "%G%qD output may be truncated copying between "
3267 				 "%wu and %wu bytes from a string of length %wu",
3268 				 stmt, func, cntrange[0].to_uhwi (),
3269 				 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3270 	    }
3271 	}
3272 
3273       if (!cat_dstlen_bounded
3274 	  && cntrange[0] != cntrange[1]
3275 	  && wi::leu_p (cntrange[0], lenrange[0])
3276 	  && wi::leu_p (cntrange[1], lenrange[0] + 1))
3277 	{
3278 	  /* If the source (including the terminating nul) is longer than
3279 	     the lower bound of the specified count but shorter than the
3280 	     upper bound the copy may (but need not) be truncated.  */
3281 	  return warning_at (callloc, OPT_Wstringop_truncation,
3282 			     "%G%qD output may be truncated copying between "
3283 			     "%wu and %wu bytes from a string of length %wu",
3284 			     stmt, func, cntrange[0].to_uhwi (),
3285 			     cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3286 	}
3287     }
3288 
3289   if (tree dstsize = compute_objsize (dst, 1))
3290     {
3291       /* The source length is unknown.  Try to determine the destination
3292 	 size and see if it matches the specified bound.  If not, bail.
3293 	 Otherwise go on to see if it should be diagnosed for possible
3294 	 truncation.  */
3295       if (!dstsize)
3296 	return false;
3297 
3298       if (wi::to_wide (dstsize) != cntrange[1])
3299 	return false;
3300 
3301       /* Avoid warning for strncpy(a, b, N) calls where the following
3302 	 equalities hold:
3303 	   N == sizeof a && N == sizeof b */
3304       if (tree srcsize = compute_objsize (src, 1))
3305 	if (wi::to_wide (srcsize) == cntrange[1])
3306 	  return false;
3307 
3308       if (cntrange[0] == cntrange[1])
3309 	return warning_at (callloc, OPT_Wstringop_truncation,
3310 			   "%G%qD specified bound %E equals destination size",
3311 			   stmt, func, cnt);
3312     }
3313 
3314   return false;
3315 }
3316 
3317 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3318    strncat, for out-of-bounds offsets or overlapping access, and to see
3319    if the size is derived from calling strlen() on the source argument,
3320    and if so, issue the appropriate warning.
3321    APPEND_P is true for strncat.  */
3322 
3323 static void
handle_builtin_stxncpy_strncat(bool append_p,gimple_stmt_iterator * gsi)3324 handle_builtin_stxncpy_strncat (bool append_p, gimple_stmt_iterator *gsi)
3325 {
3326   if (!strlen_to_stridx)
3327     return;
3328 
3329   gimple *stmt = gsi_stmt (*gsi);
3330 
3331   tree dst = gimple_call_arg (stmt, 0);
3332   tree src = gimple_call_arg (stmt, 1);
3333   tree len = gimple_call_arg (stmt, 2);
3334   tree dstsize = NULL_TREE, srcsize = NULL_TREE;
3335 
3336   int didx = get_stridx (dst);
3337   if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3338     {
3339       /* Compute the size of the destination string including the nul
3340 	 if it is known to be nul-terminated.  */
3341       if (sidst->nonzero_chars)
3342 	{
3343 	  if (sidst->full_string_p)
3344 	    {
3345 	      /* String is known to be nul-terminated.  */
3346 	      tree type = TREE_TYPE (sidst->nonzero_chars);
3347 	      dstsize = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3348 				     build_int_cst (type, 1));
3349 	    }
3350 	  else
3351 	    dstsize = sidst->nonzero_chars;
3352 	}
3353 
3354       dst = sidst->ptr;
3355     }
3356 
3357   int sidx = get_stridx (src);
3358   strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3359   if (sisrc)
3360     {
3361       /* strncat() and strncpy() can modify the source string by writing
3362 	 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3363 	 clear.  */
3364 
3365       /* Compute the size of the source string including the terminating
3366 	 nul if its known to be nul-terminated.  */
3367       if (sisrc->nonzero_chars)
3368 	{
3369 	  if (sisrc->full_string_p)
3370 	    {
3371 	      tree type = TREE_TYPE (sisrc->nonzero_chars);
3372 	      srcsize = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3373 				     build_int_cst (type, 1));
3374 	    }
3375 	  else
3376 	    srcsize = sisrc->nonzero_chars;
3377 	}
3378 
3379 	src = sisrc->ptr;
3380     }
3381   else
3382     srcsize = NULL_TREE;
3383 
3384   if (check_bounds_or_overlap (stmt, dst, src, dstsize, srcsize))
3385     {
3386       gimple_set_no_warning (stmt, true);
3387       return;
3388     }
3389 
3390   /* If the length argument was computed from strlen(S) for some string
3391      S retrieve the strinfo index for the string (PSS->FIRST) along with
3392      the location of the strlen() call (PSS->SECOND).  */
3393   stridx_strlenloc *pss = strlen_to_stridx->get (len);
3394   if (!pss || pss->first <= 0)
3395     {
3396       if (maybe_diag_stxncpy_trunc (*gsi, src, len))
3397 	gimple_set_no_warning (stmt, true);
3398 
3399       return;
3400     }
3401 
3402   /* Retrieve the strinfo data for the string S that LEN was computed
3403      from as some function F of strlen (S) (i.e., LEN need not be equal
3404      to strlen(S)).  */
3405   strinfo *silen = get_strinfo (pss->first);
3406 
3407   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3408 
3409   tree func = gimple_call_fndecl (stmt);
3410 
3411   bool warned = false;
3412 
3413   /* When -Wstringop-truncation is set, try to determine truncation
3414      before diagnosing possible overflow.  Truncation is implied by
3415      the LEN argument being equal to strlen(SRC), regardless of
3416      whether its value is known.  Otherwise, issue the more generic
3417      -Wstringop-overflow which triggers for LEN arguments that in
3418      any meaningful way depend on strlen(SRC).  */
3419   if (!append_p
3420       && sisrc == silen
3421       && is_strlen_related_p (src, len)
3422       && warning_at (callloc, OPT_Wstringop_truncation,
3423 		     "%G%qD output truncated before terminating nul "
3424 		     "copying as many bytes from a string as its length",
3425 		     stmt, func))
3426     warned = true;
3427   else if (silen && is_strlen_related_p (src, silen->ptr))
3428     warned = warning_at (callloc, OPT_Wstringop_overflow_,
3429 			 "%G%qD specified bound depends on the length "
3430 			 "of the source argument",
3431 			 stmt, func);
3432   if (warned)
3433     {
3434       location_t strlenloc = pss->second;
3435       if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3436 	inform (strlenloc, "length computed here");
3437     }
3438 }
3439 
3440 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3441    If strlen of the second argument is known and length of the third argument
3442    is that plus one, strlen of the first argument is the same after this
3443    call.  Uses RVALS to determine range information.  */
3444 
3445 static void
handle_builtin_memcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,const vr_values * rvals)3446 handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3447 		       const vr_values *rvals)
3448 {
3449   tree lhs, oldlen, newlen;
3450   gimple *stmt = gsi_stmt (*gsi);
3451   strinfo *si, *dsi;
3452 
3453   tree len = gimple_call_arg (stmt, 2);
3454   tree src = gimple_call_arg (stmt, 1);
3455   tree dst = gimple_call_arg (stmt, 0);
3456 
3457   int didx = get_stridx (dst);
3458   strinfo *olddsi = NULL;
3459   if (didx > 0)
3460     olddsi = get_strinfo (didx);
3461   else if (didx < 0)
3462     return;
3463 
3464   if (olddsi != NULL
3465       && !integer_zerop (len))
3466     {
3467       maybe_warn_overflow (stmt, len, rvals, olddsi, false, true);
3468       adjust_last_stmt (olddsi, stmt, false);
3469     }
3470 
3471   int idx = get_stridx (src);
3472   if (idx == 0)
3473     return;
3474 
3475   bool full_string_p;
3476   if (idx > 0)
3477     {
3478       gimple *def_stmt;
3479 
3480       /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3481 	 is known.  */
3482       si = get_strinfo (idx);
3483       if (si == NULL || si->nonzero_chars == NULL_TREE)
3484 	return;
3485       if (TREE_CODE (len) == INTEGER_CST
3486 	  && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3487 	{
3488 	  if (tree_int_cst_le (len, si->nonzero_chars))
3489 	    {
3490 	      /* Copying LEN nonzero characters, where LEN is constant.  */
3491 	      newlen = len;
3492 	      full_string_p = false;
3493 	    }
3494 	  else
3495 	    {
3496 	      /* Copying the whole of the analyzed part of SI.  */
3497 	      newlen = si->nonzero_chars;
3498 	      full_string_p = si->full_string_p;
3499 	    }
3500 	}
3501       else
3502 	{
3503 	  if (!si->full_string_p)
3504 	    return;
3505 	  if (TREE_CODE (len) != SSA_NAME)
3506 	    return;
3507 	  def_stmt = SSA_NAME_DEF_STMT (len);
3508 	  if (!is_gimple_assign (def_stmt)
3509 	      || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3510 	      || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3511 	      || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3512 	    return;
3513 	  /* Copying variable-length string SI (and no more).  */
3514 	  newlen = si->nonzero_chars;
3515 	  full_string_p = true;
3516 	}
3517     }
3518   else
3519     {
3520       si = NULL;
3521       /* Handle memcpy (x, "abcd", 5) or
3522 	 memcpy (x, "abc\0uvw", 7).  */
3523       if (!tree_fits_uhwi_p (len))
3524 	return;
3525 
3526       unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3527       unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3528       newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3529       full_string_p = clen > nonzero_chars;
3530     }
3531 
3532   if (!full_string_p
3533       && olddsi
3534       && olddsi->nonzero_chars
3535       && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3536       && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3537     {
3538       /* The SRC substring being written strictly overlaps
3539 	 a subsequence of the existing string OLDDSI.  */
3540       newlen = olddsi->nonzero_chars;
3541       full_string_p = olddsi->full_string_p;
3542     }
3543 
3544   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3545     adjust_last_stmt (olddsi, stmt, false);
3546 
3547   if (didx == 0)
3548     {
3549       didx = new_stridx (dst);
3550       if (didx == 0)
3551 	return;
3552     }
3553   oldlen = NULL_TREE;
3554   if (olddsi != NULL)
3555     {
3556       dsi = unshare_strinfo (olddsi);
3557       oldlen = olddsi->nonzero_chars;
3558       dsi->nonzero_chars = newlen;
3559       dsi->full_string_p = full_string_p;
3560       /* Break the chain, so adjust_related_strinfo on later pointers in
3561 	 the chain won't adjust this one anymore.  */
3562       dsi->next = 0;
3563       dsi->stmt = NULL;
3564       dsi->endptr = NULL_TREE;
3565     }
3566   else
3567     {
3568       dsi = new_strinfo (dst, didx, newlen, full_string_p);
3569       set_strinfo (didx, dsi);
3570       find_equal_ptrs (dst, didx);
3571     }
3572   dsi->writable = true;
3573   dsi->dont_invalidate = true;
3574   if (olddsi != NULL)
3575     {
3576       tree adj = NULL_TREE;
3577       location_t loc = gimple_location (stmt);
3578       if (oldlen == NULL_TREE)
3579 	;
3580       else if (integer_zerop (oldlen))
3581 	adj = newlen;
3582       else if (TREE_CODE (oldlen) == INTEGER_CST
3583 	       || TREE_CODE (newlen) == INTEGER_CST)
3584 	adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3585 			       fold_convert_loc (loc, TREE_TYPE (newlen),
3586 						 oldlen));
3587       if (adj != NULL_TREE)
3588 	adjust_related_strinfos (loc, dsi, adj);
3589       else
3590 	dsi->prev = 0;
3591     }
3592   /* memcpy src may not overlap dst, so src doesn't need to be
3593      invalidated either.  */
3594   if (si != NULL)
3595     si->dont_invalidate = true;
3596 
3597   if (full_string_p)
3598     {
3599       lhs = gimple_call_lhs (stmt);
3600       switch (bcode)
3601 	{
3602 	case BUILT_IN_MEMCPY:
3603 	case BUILT_IN_MEMCPY_CHK:
3604 	  /* Allow adjust_last_stmt to decrease this memcpy's size.  */
3605 	  laststmt.stmt = stmt;
3606 	  laststmt.len = dsi->nonzero_chars;
3607 	  laststmt.stridx = dsi->idx;
3608 	  if (lhs)
3609 	    ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3610 	  break;
3611 	case BUILT_IN_MEMPCPY:
3612 	case BUILT_IN_MEMPCPY_CHK:
3613 	  break;
3614 	default:
3615 	  gcc_unreachable ();
3616 	}
3617     }
3618 }
3619 
3620 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3621    If strlen of the second argument is known, strlen of the first argument
3622    is increased by the length of the second argument.  Furthermore, attempt
3623    to convert it to memcpy/strcpy if the length of the first argument
3624    is known.  */
3625 
3626 static void
handle_builtin_strcat(enum built_in_function bcode,gimple_stmt_iterator * gsi)3627 handle_builtin_strcat (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3628 {
3629   int idx, didx;
3630   tree srclen, args, type, fn, objsz, endptr;
3631   bool success;
3632   gimple *stmt = gsi_stmt (*gsi);
3633   strinfo *si, *dsi;
3634   location_t loc = gimple_location (stmt);
3635 
3636   tree src = gimple_call_arg (stmt, 1);
3637   tree dst = gimple_call_arg (stmt, 0);
3638 
3639   /* Bail if the source is the same as destination.  It will be diagnosed
3640      elsewhere.  */
3641   if (operand_equal_p (src, dst, 0))
3642     return;
3643 
3644   tree lhs = gimple_call_lhs (stmt);
3645 
3646   didx = get_stridx (dst);
3647   if (didx < 0)
3648     return;
3649 
3650   dsi = NULL;
3651   if (didx > 0)
3652     dsi = get_strinfo (didx);
3653 
3654   srclen = NULL_TREE;
3655   si = NULL;
3656   idx = get_stridx (src);
3657   if (idx < 0)
3658     srclen = build_int_cst (size_type_node, ~idx);
3659   else if (idx > 0)
3660     {
3661       si = get_strinfo (idx);
3662       if (si != NULL)
3663 	srclen = get_string_length (si);
3664     }
3665 
3666   /* Set the no-warning bit on the transformed statement?  */
3667   bool set_no_warning = false;
3668 
3669   if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3670     {
3671       {
3672 	  /* The concatenation always involves copying at least one byte
3673 	     (the terminating nul), even if the source string is empty.
3674 	     If the source is unknown assume it's one character long and
3675 	     used that as both sizes.  */
3676 	tree slen = srclen;
3677 	if (slen)
3678 	  {
3679 	    tree type = TREE_TYPE (slen);
3680 	    slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3681 	  }
3682 
3683 	tree sptr = si && si->ptr ? si->ptr : src;
3684 
3685 	if (check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE, slen))
3686 	  {
3687 	    gimple_set_no_warning (stmt, true);
3688 	    set_no_warning = true;
3689 	  }
3690       }
3691 
3692       /* strcat (p, q) can be transformed into
3693 	 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3694 	 with length endptr - p if we need to compute the length
3695 	 later on.  Don't do this transformation if we don't need
3696 	 it.  */
3697       if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3698 	{
3699 	  if (didx == 0)
3700 	    {
3701 	      didx = new_stridx (dst);
3702 	      if (didx == 0)
3703 		return;
3704 	    }
3705 	  if (dsi == NULL)
3706 	    {
3707 	      dsi = new_strinfo (dst, didx, NULL_TREE, false);
3708 	      set_strinfo (didx, dsi);
3709 	      find_equal_ptrs (dst, didx);
3710 	    }
3711 	  else
3712 	    {
3713 	      dsi = unshare_strinfo (dsi);
3714 	      dsi->nonzero_chars = NULL_TREE;
3715 	      dsi->full_string_p = false;
3716 	      dsi->next = 0;
3717 	      dsi->endptr = NULL_TREE;
3718 	    }
3719 	  dsi->writable = true;
3720 	  dsi->stmt = stmt;
3721 	  dsi->dont_invalidate = true;
3722 	}
3723       return;
3724     }
3725 
3726   tree dstlen = dsi->nonzero_chars;
3727   endptr = dsi->endptr;
3728 
3729   dsi = unshare_strinfo (dsi);
3730   dsi->endptr = NULL_TREE;
3731   dsi->stmt = NULL;
3732   dsi->writable = true;
3733 
3734   if (srclen != NULL_TREE)
3735     {
3736       dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3737 					    TREE_TYPE (dsi->nonzero_chars),
3738 					    dsi->nonzero_chars, srclen);
3739       gcc_assert (dsi->full_string_p);
3740       adjust_related_strinfos (loc, dsi, srclen);
3741       dsi->dont_invalidate = true;
3742     }
3743   else
3744     {
3745       dsi->nonzero_chars = NULL;
3746       dsi->full_string_p = false;
3747       if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3748 	dsi->dont_invalidate = true;
3749     }
3750 
3751   if (si != NULL)
3752     /* strcat src may not overlap dst, so src doesn't need to be
3753        invalidated either.  */
3754     si->dont_invalidate = true;
3755 
3756   /* For now.  Could remove the lhs from the call and add
3757      lhs = dst; afterwards.  */
3758   if (lhs)
3759     return;
3760 
3761   fn = NULL_TREE;
3762   objsz = NULL_TREE;
3763   switch (bcode)
3764     {
3765     case BUILT_IN_STRCAT:
3766       if (srclen != NULL_TREE)
3767 	fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3768       else
3769 	fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3770       break;
3771     case BUILT_IN_STRCAT_CHK:
3772       if (srclen != NULL_TREE)
3773 	fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3774       else
3775 	fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3776       objsz = gimple_call_arg (stmt, 2);
3777       break;
3778     default:
3779       gcc_unreachable ();
3780     }
3781 
3782   if (fn == NULL_TREE)
3783     return;
3784 
3785   if (dsi && dstlen)
3786     {
3787       tree type = TREE_TYPE (dstlen);
3788 
3789       /* Compute the size of the source sequence, including the nul.  */
3790       tree srcsize = srclen ? srclen : size_zero_node;
3791       tree one = build_int_cst (type, 1);
3792       srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3793       tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3794       tree sptr = si && si->ptr ? si->ptr : src;
3795 
3796       if (check_bounds_or_overlap (stmt, dst, sptr, dstsize, srcsize))
3797 	{
3798 	  gimple_set_no_warning (stmt, true);
3799 	  set_no_warning = true;
3800 	}
3801     }
3802 
3803   tree len = NULL_TREE;
3804   if (srclen != NULL_TREE)
3805     {
3806       args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3807       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3808 
3809       len = fold_convert_loc (loc, type, unshare_expr (srclen));
3810       len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3811 			     build_int_cst (type, 1));
3812       len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
3813 				      GSI_SAME_STMT);
3814     }
3815   if (endptr)
3816     dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3817   else
3818     dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3819 			   fold_convert_loc (loc, sizetype,
3820 					     unshare_expr (dstlen)));
3821   dst = force_gimple_operand_gsi (gsi, dst, true, NULL_TREE, true,
3822 				  GSI_SAME_STMT);
3823   if (objsz)
3824     {
3825       objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3826 			       fold_convert_loc (loc, TREE_TYPE (objsz),
3827 						 unshare_expr (dstlen)));
3828       objsz = force_gimple_operand_gsi (gsi, objsz, true, NULL_TREE, true,
3829 					GSI_SAME_STMT);
3830     }
3831   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3832     {
3833       fprintf (dump_file, "Optimizing: ");
3834       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3835     }
3836   if (srclen != NULL_TREE)
3837     success = update_gimple_call (gsi, fn, 3 + (objsz != NULL_TREE),
3838 				  dst, src, len, objsz);
3839   else
3840     success = update_gimple_call (gsi, fn, 2 + (objsz != NULL_TREE),
3841 				  dst, src, objsz);
3842   if (success)
3843     {
3844       stmt = gsi_stmt (*gsi);
3845       update_stmt (stmt);
3846       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3847 	{
3848 	  fprintf (dump_file, "into: ");
3849 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3850 	}
3851       /* If srclen == NULL, note that current string length can be
3852 	 computed by transforming this strcpy into stpcpy.  */
3853       if (srclen == NULL_TREE && dsi->dont_invalidate)
3854 	dsi->stmt = stmt;
3855       adjust_last_stmt (dsi, stmt, true);
3856       if (srclen != NULL_TREE)
3857 	{
3858 	  laststmt.stmt = stmt;
3859 	  laststmt.len = srclen;
3860 	  laststmt.stridx = dsi->idx;
3861 	}
3862     }
3863   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3864     fprintf (dump_file, "not possible.\n");
3865 
3866   if (set_no_warning)
3867     gimple_set_no_warning (stmt, true);
3868 }
3869 
3870 /* Handle a call to an allocation function like alloca, malloc or calloc,
3871    or an ordinary allocation function declared with attribute alloc_size.  */
3872 
3873 static void
handle_alloc_call(enum built_in_function bcode,gimple_stmt_iterator * gsi)3874 handle_alloc_call (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3875 {
3876   gimple *stmt = gsi_stmt (*gsi);
3877   tree lhs = gimple_call_lhs (stmt);
3878   if (lhs == NULL_TREE)
3879     return;
3880 
3881   gcc_assert (get_stridx (lhs) == 0);
3882   int idx = new_stridx (lhs);
3883   tree length = NULL_TREE;
3884   if (bcode == BUILT_IN_CALLOC)
3885     length = build_int_cst (size_type_node, 0);
3886   strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3887   if (bcode == BUILT_IN_CALLOC)
3888     {
3889       /* Only set STMT for calloc and malloc.  */
3890       si->stmt = stmt;
3891       /* Only set ENDPTR for calloc.  */
3892       si->endptr = lhs;
3893     }
3894   else if (bcode == BUILT_IN_MALLOC)
3895     si->stmt = stmt;
3896 
3897   /* Set ALLOC is set for all allocation functions.  */
3898   si->alloc = stmt;
3899   set_strinfo (idx, si);
3900   si->writable = true;
3901   si->dont_invalidate = true;
3902 }
3903 
3904 /* Handle a call to memset.
3905    After a call to calloc, memset(,0,) is unnecessary.
3906    memset(malloc(n),0,n) is calloc(n,1).
3907    return true when the call is transformed, false otherwise.
3908    When nonnull uses RVALS to determine range information.  */
3909 
3910 static bool
handle_builtin_memset(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)3911 handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write,
3912 		       const vr_values *rvals)
3913 {
3914   gimple *memset_stmt = gsi_stmt (*gsi);
3915   tree ptr = gimple_call_arg (memset_stmt, 0);
3916   /* Set to the non-constant offset added to PTR.  */
3917   wide_int offrng[2];
3918   int idx1 = get_stridx (ptr, offrng, rvals);
3919   if (idx1 <= 0)
3920     return false;
3921   strinfo *si1 = get_strinfo (idx1);
3922   if (!si1)
3923     return false;
3924   gimple *alloc_stmt = si1->alloc;
3925   if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3926     return false;
3927   tree callee1 = gimple_call_fndecl (alloc_stmt);
3928   if (!valid_builtin_call (alloc_stmt))
3929     return false;
3930   tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3931   tree memset_size = gimple_call_arg (memset_stmt, 2);
3932 
3933   /* Check for overflow.  */
3934   maybe_warn_overflow (memset_stmt, memset_size, rvals, NULL, false, true);
3935 
3936   /* Bail when there is no statement associated with the destination
3937      (the statement may be null even when SI1->ALLOC is not).  */
3938   if (!si1->stmt)
3939     return false;
3940 
3941   /* Avoid optimizing if store is at a variable offset from the beginning
3942      of the allocated object.  */
3943   if (offrng[0] != 0 || offrng[0] != offrng[1])
3944     return false;
3945 
3946   /* Bail when the call writes a non-zero value.  */
3947   if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3948     return false;
3949 
3950   /* Let the caller know the memset call cleared the destination.  */
3951   *zero_write = true;
3952 
3953   enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3954   if (code1 == BUILT_IN_CALLOC)
3955     /* Not touching alloc_stmt */ ;
3956   else if (code1 == BUILT_IN_MALLOC
3957 	   && operand_equal_p (memset_size, alloc_size, 0))
3958     {
3959       /* Replace the malloc + memset calls with calloc.  */
3960       gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3961       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3962 			  alloc_size, build_one_cst (size_type_node));
3963       si1->nonzero_chars = build_int_cst (size_type_node, 0);
3964       si1->full_string_p = true;
3965       si1->stmt = gsi_stmt (gsi1);
3966     }
3967   else
3968     return false;
3969   tree lhs = gimple_call_lhs (memset_stmt);
3970   unlink_stmt_vdef (memset_stmt);
3971   if (lhs)
3972     {
3973       gimple *assign = gimple_build_assign (lhs, ptr);
3974       gsi_replace (gsi, assign, false);
3975     }
3976   else
3977     {
3978       gsi_remove (gsi, true);
3979       release_defs (memset_stmt);
3980     }
3981 
3982   return true;
3983 }
3984 
3985 /* Return a pointer to the first such equality expression if RES is used
3986    only in expressions testing its equality to zero, and null otherwise.  */
3987 
3988 static gimple *
used_only_for_zero_equality(tree res)3989 used_only_for_zero_equality (tree res)
3990 {
3991   gimple *first_use = NULL;
3992 
3993   use_operand_p use_p;
3994   imm_use_iterator iter;
3995 
3996   FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3997     {
3998       gimple *use_stmt = USE_STMT (use_p);
3999 
4000       if (is_gimple_debug (use_stmt))
4001         continue;
4002       if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
4003 	{
4004 	  tree_code code = gimple_assign_rhs_code (use_stmt);
4005 	  if (code == COND_EXPR)
4006 	    {
4007 	      tree cond_expr = gimple_assign_rhs1 (use_stmt);
4008 	      if ((TREE_CODE (cond_expr) != EQ_EXPR
4009 		   && (TREE_CODE (cond_expr) != NE_EXPR))
4010 		  || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
4011 		return NULL;
4012 	    }
4013 	  else if (code == EQ_EXPR || code == NE_EXPR)
4014 	    {
4015 	      if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
4016 		return NULL;
4017             }
4018 	  else
4019 	    return NULL;
4020 	}
4021       else if (gimple_code (use_stmt) == GIMPLE_COND)
4022 	{
4023 	  tree_code code = gimple_cond_code (use_stmt);
4024 	  if ((code != EQ_EXPR && code != NE_EXPR)
4025 	      || !integer_zerop (gimple_cond_rhs (use_stmt)))
4026 	    return NULL;
4027 	}
4028       else
4029         return NULL;
4030 
4031       if (!first_use)
4032 	first_use = use_stmt;
4033     }
4034 
4035   return first_use;
4036 }
4037 
4038 /* Handle a call to memcmp.  We try to handle small comparisons by
4039    converting them to load and compare, and replacing the call to memcmp
4040    with a __builtin_memcmp_eq call where possible.
4041    return true when call is transformed, return false otherwise.  */
4042 
4043 static bool
handle_builtin_memcmp(gimple_stmt_iterator * gsi)4044 handle_builtin_memcmp (gimple_stmt_iterator *gsi)
4045 {
4046   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4047   tree res = gimple_call_lhs (stmt);
4048 
4049   if (!res || !used_only_for_zero_equality (res))
4050     return false;
4051 
4052   tree arg1 = gimple_call_arg (stmt, 0);
4053   tree arg2 = gimple_call_arg (stmt, 1);
4054   tree len = gimple_call_arg (stmt, 2);
4055   unsigned HOST_WIDE_INT leni;
4056 
4057   if (tree_fits_uhwi_p (len)
4058       && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
4059       && pow2p_hwi (leni))
4060     {
4061       leni *= CHAR_TYPE_SIZE;
4062       unsigned align1 = get_pointer_alignment (arg1);
4063       unsigned align2 = get_pointer_alignment (arg2);
4064       unsigned align = MIN (align1, align2);
4065       scalar_int_mode mode;
4066       if (int_mode_for_size (leni, 1).exists (&mode)
4067 	  && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4068 	{
4069 	  location_t loc = gimple_location (stmt);
4070 	  tree type, off;
4071 	  type = build_nonstandard_integer_type (leni, 1);
4072 	  gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4073 	  tree ptrtype = build_pointer_type_for_mode (char_type_node,
4074 						      ptr_mode, true);
4075 	  off = build_int_cst (ptrtype, 0);
4076 	  arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4077 	  arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4078 	  tree tem1 = fold_const_aggregate_ref (arg1);
4079 	  if (tem1)
4080 	    arg1 = tem1;
4081 	  tree tem2 = fold_const_aggregate_ref (arg2);
4082 	  if (tem2)
4083 	    arg2 = tem2;
4084 	  res = fold_convert_loc (loc, TREE_TYPE (res),
4085 				  fold_build2_loc (loc, NE_EXPR,
4086 						   boolean_type_node,
4087 						   arg1, arg2));
4088 	  gimplify_and_update_call_from_tree (gsi, res);
4089 	  return true;
4090 	}
4091     }
4092 
4093   gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4094   return true;
4095 }
4096 
4097 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4098    of the string(s) referenced by ARG if it can be determined.
4099    If the length cannot be determined, sets *SIZE to the size of
4100    the array the string is stored in, if any.  If no such array is
4101    known, sets *SIZE to -1.  When the strings are nul-terminated sets
4102    *NULTERM to true, otherwise to false.  When nonnull uses RVALS to
4103    determine range information. Returns true on success.  */
4104 
4105 static bool
get_len_or_size(tree arg,int idx,unsigned HOST_WIDE_INT lenrng[2],unsigned HOST_WIDE_INT * size,bool * nulterm,const vr_values * rvals)4106 get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2],
4107 		 unsigned HOST_WIDE_INT *size, bool *nulterm,
4108 		 const vr_values *rvals)
4109 {
4110   /* Invalidate.  */
4111   *size = HOST_WIDE_INT_M1U;
4112 
4113   if (idx < 0)
4114     {
4115       /* IDX is the inverted constant string length.  */
4116       lenrng[0] = ~idx;
4117       lenrng[1] = lenrng[0];
4118       *nulterm = true;
4119       return true;
4120     }
4121 
4122   /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4123      possible length + 1.  */
4124   lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4125 
4126   if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4127     {
4128       /* FIXME: Handle all this in_range_strlen_dynamic.  */
4129       if (!si->nonzero_chars)
4130 	;
4131       else if (tree_fits_uhwi_p (si->nonzero_chars))
4132 	{
4133 	  lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4134 	  *nulterm = si->full_string_p;
4135 	  /* Set the upper bound only if the string is known to be
4136 	     nul-terminated, otherwise leave it at maximum + 1.  */
4137 	  if (*nulterm)
4138 	    lenrng[1] = lenrng[0];
4139 	}
4140       else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4141 	{
4142 	  wide_int min, max;
4143 	  value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max);
4144 	  if (rng == VR_RANGE)
4145 	    {
4146 	      lenrng[0] = min.to_uhwi ();
4147 	      lenrng[1] = max.to_uhwi ();
4148 	      *nulterm = si->full_string_p;
4149 	    }
4150 	}
4151     }
4152 
4153   if (lenrng[0] != HOST_WIDE_INT_MAX)
4154     return true;
4155 
4156   /* Compute the minimum and maximum real or possible lengths.  */
4157   c_strlen_data lendata = { };
4158   /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4159      to have it set to the length of the longest string in a PHI.  */
4160   lendata.maxbound = arg;
4161   get_range_strlen_dynamic (arg, &lendata, rvals);
4162 
4163   unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4164   if (tree_fits_uhwi_p (lendata.maxbound)
4165       && !integer_all_onesp (lendata.maxbound))
4166     maxbound = tree_to_uhwi (lendata.maxbound);
4167 
4168   if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4169     {
4170       unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4171       unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4172 
4173       /* The longest string in this data model.  */
4174       const unsigned HOST_WIDE_INT lenmax
4175 	= tree_to_uhwi (max_object_size ()) - 2;
4176 
4177       if (maxbound == HOST_WIDE_INT_M1U)
4178 	{
4179 	  lenrng[0] = minlen;
4180 	  lenrng[1] = maxlen;
4181 	  *nulterm = minlen == maxlen;
4182 	}
4183       else if (maxlen < lenmax)
4184 	{
4185 	  *size = maxbound + 1;
4186 	  *nulterm = false;
4187 	}
4188       else
4189 	return false;
4190 
4191       return true;
4192     }
4193 
4194   if (maxbound != HOST_WIDE_INT_M1U
4195       && lendata.maxlen
4196       && !integer_all_onesp (lendata.maxlen))
4197     {
4198       /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4199 	 of the longest string based on the sizes of the arrays referenced
4200 	 by ARG.  */
4201       *size = maxbound + 1;
4202       *nulterm = false;
4203       return true;
4204     }
4205 
4206   return false;
4207 }
4208 
4209 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4210    the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4211    for a sufficiently large BOUND).  If the result is based on the length
4212    of one string being greater than the longest string that would fit in
4213    the array pointer to by the argument, set *PLEN and *PSIZE to
4214    the corresponding length (or its complement when the string is known
4215    to be at least as long and need not be nul-terminated) and size.
4216    Otherwise return null.  */
4217 
4218 static tree
strxcmp_eqz_result(tree arg1,int idx1,tree arg2,int idx2,unsigned HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT * psize,const vr_values * rvals)4219 strxcmp_eqz_result (tree arg1, int idx1, tree arg2, int idx2,
4220 		    unsigned HOST_WIDE_INT bound, unsigned HOST_WIDE_INT len[2],
4221 		    unsigned HOST_WIDE_INT *psize, const vr_values *rvals)
4222 {
4223   /* Determine the range the length of each string is in and whether it's
4224      known to be nul-terminated, or the size of the array it's stored in.  */
4225   bool nul1, nul2;
4226   unsigned HOST_WIDE_INT siz1, siz2;
4227   unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4228   if (!get_len_or_size (arg1, idx1, len1rng, &siz1, &nul1, rvals)
4229       || !get_len_or_size (arg2, idx2, len2rng, &siz2, &nul2, rvals))
4230     return NULL_TREE;
4231 
4232   /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4233      to HWI_MAX when invalid.  Adjust the length of each string to consider
4234      to be no more than BOUND.  */
4235   if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4236     len1rng[0] = bound;
4237   if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4238     len1rng[1] = bound;
4239   if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4240     len2rng[0] = bound;
4241   if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4242     len2rng[1] = bound;
4243 
4244   /* Two empty strings are equal.  */
4245   if (len1rng[1] == 0 && len2rng[1] == 0)
4246     return integer_one_node;
4247 
4248   /* The strings are definitely unequal when the lower bound of the length
4249      of one of them is greater than the length of the longest string that
4250      would fit into the other array.  */
4251   if (len1rng[0] == HOST_WIDE_INT_MAX
4252       && len2rng[0] != HOST_WIDE_INT_MAX
4253       && ((len2rng[0] < bound && len2rng[0] >= siz1)
4254 	  || len2rng[0] > siz1))
4255     {
4256       *psize = siz1;
4257       len[0] = len1rng[0];
4258       /* Set LEN[0] to the lower bound of ARG1's length when it's
4259 	 nul-terminated or to the complement of its minimum length
4260 	 otherwise,  */
4261       len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4262       return integer_zero_node;
4263     }
4264 
4265   if (len2rng[0] == HOST_WIDE_INT_MAX
4266       && len1rng[0] != HOST_WIDE_INT_MAX
4267       && ((len1rng[0] < bound && len1rng[0] >= siz2)
4268 	  || len1rng[0] > siz2))
4269     {
4270       *psize = siz2;
4271       len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4272       len[1] = len2rng[0];
4273       return integer_zero_node;
4274     }
4275 
4276   /* The strings are also definitely unequal when their lengths are unequal
4277      and at least one is nul-terminated.  */
4278   if (len1rng[0] != HOST_WIDE_INT_MAX
4279       && len2rng[0] != HOST_WIDE_INT_MAX
4280       && ((len1rng[1] < len2rng[0] && nul1)
4281 	  || (len2rng[1] < len1rng[0] && nul2)))
4282     {
4283       if (bound <= len1rng[0] || bound <= len2rng[0])
4284 	*psize = bound;
4285       else
4286 	*psize = HOST_WIDE_INT_M1U;
4287 
4288       len[0] = len1rng[0];
4289       len[1] = len2rng[0];
4290       return integer_zero_node;
4291     }
4292 
4293   /* The string lengths may be equal or unequal.  Even when equal and
4294      both strings nul-terminated, without the string contents there's
4295      no way to determine whether they are equal.  */
4296   return NULL_TREE;
4297 }
4298 
4299 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4300    arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4301    whose result is used in equality expressions that evaluate to
4302    a constant due to one argument being longer than the size of
4303    the other.  */
4304 
4305 static void
maybe_warn_pointless_strcmp(gimple * stmt,HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT siz)4306 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4307 			     unsigned HOST_WIDE_INT len[2],
4308 			     unsigned HOST_WIDE_INT siz)
4309 {
4310   tree lhs = gimple_call_lhs (stmt);
4311   gimple *use = used_only_for_zero_equality (lhs);
4312   if (!use)
4313     return;
4314 
4315   bool at_least = false;
4316 
4317   /* Excessive LEN[i] indicates a lower bound.  */
4318   if (len[0] > HOST_WIDE_INT_MAX)
4319     {
4320       at_least = true;
4321       len[0] = ~len[0];
4322     }
4323 
4324   if (len[1] > HOST_WIDE_INT_MAX)
4325     {
4326       at_least = true;
4327       len[1] = ~len[1];
4328     }
4329 
4330   unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4331 
4332   /* FIXME: Include a note pointing to the declaration of the smaller
4333      array.  */
4334   location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4335 
4336   tree callee = gimple_call_fndecl (stmt);
4337   bool warned = false;
4338   if (siz <= minlen && bound == -1)
4339     warned = warning_at (stmt_loc, OPT_Wstring_compare,
4340 			 (at_least
4341 			  ? G_("%G%qD of a string of length %wu or more and "
4342 			       "an array of size %wu evaluates to nonzero")
4343 			  : G_("%G%qD of a string of length %wu and an array "
4344 			       "of size %wu evaluates to nonzero")),
4345 			 stmt, callee, minlen, siz);
4346   else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4347     {
4348       if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4349 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4350 			     "%G%qD of strings of length %wu and %wu "
4351 			     "and bound of %wu evaluates to nonzero",
4352 			     stmt, callee, len[0], len[1], bound);
4353       else
4354 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4355 			     "%G%qD of a string of length %wu, an array "
4356 			     "of size %wu and bound of %wu evaluates to "
4357 			     "nonzero",
4358 			     stmt, callee, minlen, siz, bound);
4359     }
4360 
4361   if (warned)
4362     {
4363       location_t use_loc = gimple_location (use);
4364       if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4365 	inform (use_loc, "in this expression");
4366     }
4367 }
4368 
4369 
4370 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4371    when possible or by transforming the latter to the former.  Warn about
4372    calls where the length of one argument is greater than the size of
4373    the array to which the other argument points if the latter's length
4374    is not known.  Return true when the call has been transformed into
4375    another and false otherwise.  */
4376 
4377 static bool
handle_builtin_string_cmp(gimple_stmt_iterator * gsi,const vr_values * rvals)4378 handle_builtin_string_cmp (gimple_stmt_iterator *gsi, const vr_values *rvals)
4379 {
4380   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4381   tree lhs = gimple_call_lhs (stmt);
4382 
4383   if (!lhs)
4384     return false;
4385 
4386   tree arg1 = gimple_call_arg (stmt, 0);
4387   tree arg2 = gimple_call_arg (stmt, 1);
4388   int idx1 = get_stridx (arg1);
4389   int idx2 = get_stridx (arg2);
4390 
4391   /* For strncmp set to the value of the third argument if known.  */
4392   HOST_WIDE_INT bound = -1;
4393   tree len = NULL_TREE;
4394   /* Extract the strncmp bound.  */
4395   if (gimple_call_num_args (stmt) == 3)
4396     {
4397       len = gimple_call_arg (stmt, 2);
4398       if (tree_fits_shwi_p (len))
4399         bound = tree_to_shwi (len);
4400 
4401       /* If the bound argument is NOT known, do nothing.  */
4402       if (bound < 0)
4403 	return false;
4404     }
4405 
4406   /* Avoid folding if either argument is not a nul-terminated array.
4407      Defer warning until later.  */
4408   if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4409       || !check_nul_terminated_array (NULL_TREE, arg2, len))
4410     return false;
4411 
4412   {
4413     /* Set to the length of one argument (or its complement if it's
4414        the lower bound of a range) and the size of the array storing
4415        the other if the result is based on the former being equal to
4416        or greater than the latter.  */
4417     unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4418     unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4419 
4420     /* Try to determine if the two strings are either definitely equal
4421        or definitely unequal and if so, either fold the result to zero
4422        (when equal) or set the range of the result to ~[0, 0] otherwise.  */
4423     if (tree eqz = strxcmp_eqz_result (arg1, idx1, arg2, idx2, bound,
4424 				       len, &siz, rvals))
4425       {
4426 	if (integer_zerop (eqz))
4427 	  {
4428 	    maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4429 
4430 	    /* When the lengths of the first two string arguments are
4431 	       known to be unequal set the range of the result to non-zero.
4432 	       This allows the call to be eliminated if its result is only
4433 	       used in tests for equality to zero.  */
4434 	    wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4435 	    set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4436 	    return false;
4437 	  }
4438 	/* When the two strings are definitely equal (such as when they
4439 	   are both empty) fold the call to the constant result.  */
4440 	replace_call_with_value (gsi, integer_zero_node);
4441 	return true;
4442       }
4443   }
4444 
4445   /* Return if nothing is known about the strings pointed to by ARG1
4446      and ARG2.  */
4447   if (idx1 == 0 && idx2 == 0)
4448     return false;
4449 
4450   /* Determine either the length or the size of each of the strings,
4451      whichever is available.  */
4452   HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4453   HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4454 
4455   {
4456     unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4457     unsigned HOST_WIDE_INT arsz1, arsz2;
4458     bool nulterm[2];
4459 
4460     if (!get_len_or_size (arg1, idx1, len1rng, &arsz1, nulterm, rvals)
4461 	|| !get_len_or_size (arg2, idx2, len2rng, &arsz2, nulterm + 1, rvals))
4462       return false;
4463 
4464     if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4465       cstlen1 = len1rng[0];
4466     else if (arsz1 < HOST_WIDE_INT_M1U)
4467       arysiz1 = arsz1;
4468 
4469     if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4470       cstlen2 = len2rng[0];
4471     else if (arsz2 < HOST_WIDE_INT_M1U)
4472       arysiz2 = arsz2;
4473   }
4474 
4475   /* Bail if neither the string length nor the size of the array
4476      it is stored in can be determined.  */
4477   if ((cstlen1 < 0 && arysiz1 < 0)
4478       || (cstlen2 < 0 && arysiz2 < 0)
4479       || (cstlen1 < 0 && cstlen2 < 0))
4480     return false;
4481 
4482   if (cstlen1 >= 0)
4483     ++cstlen1;
4484   if (cstlen2 >= 0)
4485     ++cstlen2;
4486 
4487   /* The exact number of characters to compare.  */
4488   HOST_WIDE_INT cmpsiz;
4489   if (cstlen1 >= 0 && cstlen2 >= 0)
4490     cmpsiz = MIN (cstlen1, cstlen2);
4491   else if (cstlen1 >= 0)
4492     cmpsiz = cstlen1;
4493   else
4494     cmpsiz = cstlen2;
4495   if (bound >= 0)
4496     cmpsiz = MIN (cmpsiz, bound);
4497   /* The size of the array in which the unknown string is stored.  */
4498   HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4499 
4500   if (cmpsiz < varsiz && used_only_for_zero_equality (lhs))
4501     {
4502       /* If the known length is less than the size of the other array
4503 	 and the strcmp result is only used to test equality to zero,
4504 	 transform the call to the equivalent _eq call.  */
4505       if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4506 					   : BUILT_IN_STRNCMP_EQ))
4507 	{
4508 	  tree n = build_int_cst (size_type_node, cmpsiz);
4509 	  update_gimple_call (gsi, fn, 3, arg1, arg2, n);
4510 	  return true;
4511 	}
4512     }
4513 
4514   return false;
4515 }
4516 
4517 /* Handle a POINTER_PLUS_EXPR statement.
4518    For p = "abcd" + 2; compute associated length, or if
4519    p = q + off is pointing to a '\0' character of a string, call
4520    zero_length_string on it.  */
4521 
4522 static void
handle_pointer_plus(gimple_stmt_iterator * gsi)4523 handle_pointer_plus (gimple_stmt_iterator *gsi)
4524 {
4525   gimple *stmt = gsi_stmt (*gsi);
4526   tree lhs = gimple_assign_lhs (stmt), off;
4527   int idx = get_stridx (gimple_assign_rhs1 (stmt));
4528   strinfo *si, *zsi;
4529 
4530   if (idx == 0)
4531     return;
4532 
4533   if (idx < 0)
4534     {
4535       tree off = gimple_assign_rhs2 (stmt);
4536       if (tree_fits_uhwi_p (off)
4537 	  && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4538 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4539 	    = ~(~idx - (int) tree_to_uhwi (off));
4540       return;
4541     }
4542 
4543   si = get_strinfo (idx);
4544   if (si == NULL || si->nonzero_chars == NULL_TREE)
4545     return;
4546 
4547   off = gimple_assign_rhs2 (stmt);
4548   zsi = NULL;
4549   if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4550     zsi = zero_length_string (lhs, si);
4551   else if (TREE_CODE (off) == SSA_NAME)
4552     {
4553       gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4554       if (gimple_assign_single_p (def_stmt)
4555 	  && si->full_string_p
4556 	  && operand_equal_p (si->nonzero_chars,
4557 			      gimple_assign_rhs1 (def_stmt), 0))
4558 	zsi = zero_length_string (lhs, si);
4559     }
4560   if (zsi != NULL
4561       && si->endptr != NULL_TREE
4562       && si->endptr != lhs
4563       && TREE_CODE (si->endptr) == SSA_NAME)
4564     {
4565       enum tree_code rhs_code
4566 	= useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4567 	  ? SSA_NAME : NOP_EXPR;
4568       gimple_assign_set_rhs_with_ops (gsi, rhs_code, si->endptr);
4569       gcc_assert (gsi_stmt (*gsi) == stmt);
4570       update_stmt (stmt);
4571     }
4572 }
4573 
4574 /* Describes recursion limits used by count_nonzero_bytes.  */
4575 
4576 class ssa_name_limit_t
4577 {
4578   bitmap visited;         /* Bitmap of visited SSA_NAMEs.  */
4579   unsigned ssa_def_max;   /* Longest chain of SSA_NAMEs to follow.  */
4580 
4581   /* Not copyable or assignable.  */
4582   ssa_name_limit_t (ssa_name_limit_t&);
4583   void operator= (ssa_name_limit_t&);
4584 
4585  public:
4586 
ssa_name_limit_t()4587   ssa_name_limit_t ()
4588     : visited (NULL),
4589     ssa_def_max (param_ssa_name_def_chain_limit) { }
4590 
4591   int next_ssa_name (tree);
4592 
~ssa_name_limit_t()4593   ~ssa_name_limit_t ()
4594     {
4595       if (visited)
4596 	BITMAP_FREE (visited);
4597     }
4598 };
4599 
4600 /* If the SSA_NAME has already been "seen" return a positive value.
4601    Otherwise add it to VISITED.  If the SSA_NAME limit has been
4602    reached, return a negative value.  Otherwise return zero.  */
4603 
next_ssa_name(tree ssa_name)4604 int ssa_name_limit_t::next_ssa_name (tree ssa_name)
4605 {
4606   if (!visited)
4607     visited = BITMAP_ALLOC (NULL);
4608 
4609   /* Return a positive value if SSA_NAME has already been visited.  */
4610   if (!bitmap_set_bit (visited, SSA_NAME_VERSION (ssa_name)))
4611     return 1;
4612 
4613   /* Return a negative value to let caller avoid recursing beyond
4614      the specified limit.  */
4615   if (ssa_def_max == 0)
4616     return -1;
4617 
4618   --ssa_def_max;
4619 
4620   return 0;
4621 }
4622 
4623 static bool
4624 count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4625 			  unsigned [3], bool *, bool *, bool *,
4626 			  const vr_values *, ssa_name_limit_t &);
4627 
4628 /* Determines the minimum and maximum number of leading non-zero bytes
4629    in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4630    to each.
4631    Sets LENRANGE[2] to the total size of the access (which may be less
4632    than LENRANGE[1] when what's being referenced by EXP is a pointer
4633    rather than an array).
4634    Sets *NULTERM if the representation contains a zero byte, and sets
4635    *ALLNUL if all the bytes are zero.
4636    OFFSET and NBYTES are the offset into the representation and
4637    the size of the access to it determined from an ADDR_EXPR (i.e.,
4638    a pointer) or MEM_REF or zero for other expressions.
4639    Uses RVALS to determine range information.
4640    Avoids recursing deeper than the limits in SNLIM allow.
4641    Returns true on success and false otherwise.  */
4642 
4643 static bool
count_nonzero_bytes(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals,ssa_name_limit_t & snlim)4644 count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
4645 		     unsigned HOST_WIDE_INT nbytes,
4646 		     unsigned lenrange[3], bool *nulterm,
4647 		     bool *allnul, bool *allnonnul, const vr_values *rvals,
4648 		     ssa_name_limit_t &snlim)
4649 {
4650   if (TREE_CODE (exp) == SSA_NAME)
4651     {
4652       /* Handle non-zero single-character stores specially.  */
4653       tree type = TREE_TYPE (exp);
4654       if (TREE_CODE (type) == INTEGER_TYPE
4655 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4656 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4657 	  && tree_expr_nonzero_p (exp))
4658 	{
4659 	  /* If the character EXP is known to be non-zero (even if its
4660 	     exact value is not known) recurse once to set the range
4661 	     for an arbitrary constant.  */
4662 	  exp = build_int_cst (type, 1);
4663 	  return count_nonzero_bytes (exp, offset, 1, lenrange,
4664 				      nulterm, allnul, allnonnul, rvals, snlim);
4665 	}
4666 
4667       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4668       if (gimple_assign_single_p (stmt))
4669 	{
4670 	  exp = gimple_assign_rhs1 (stmt);
4671 	  if (TREE_CODE (exp) != MEM_REF)
4672 	    return false;
4673 	  /* Handle MEM_REF below.  */
4674 	}
4675       else if (gimple_code (stmt) == GIMPLE_PHI)
4676 	{
4677 	  /* Avoid processing an SSA_NAME that has already been visited
4678 	     or if an SSA_NAME limit has been reached.  Indicate success
4679 	     if the former and failure if the latter.  */
4680 	  if (int res = snlim.next_ssa_name (exp))
4681 	    return res > 0;
4682 
4683 	  /* Determine the minimum and maximum from the PHI arguments.  */
4684 	  unsigned int n = gimple_phi_num_args (stmt);
4685 	  for (unsigned i = 0; i != n; i++)
4686 	    {
4687 	      tree def = gimple_phi_arg_def (stmt, i);
4688 	      if (!count_nonzero_bytes (def, offset, nbytes, lenrange, nulterm,
4689 					allnul, allnonnul, rvals, snlim))
4690 		return false;
4691 	    }
4692 
4693 	  return true;
4694 	}
4695     }
4696 
4697   if (TREE_CODE (exp) == MEM_REF)
4698     {
4699       if (nbytes)
4700 	return false;
4701 
4702       tree arg = TREE_OPERAND (exp, 0);
4703       tree off = TREE_OPERAND (exp, 1);
4704 
4705       if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4706 	return false;
4707 
4708       unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4709       if (INT_MAX < wioff)
4710 	return false;
4711 
4712       offset += wioff;
4713       if (INT_MAX < offset)
4714 	return false;
4715 
4716       /* The size of the MEM_REF access determines the number of bytes.  */
4717       tree type = TREE_TYPE (exp);
4718       tree typesize = TYPE_SIZE_UNIT (type);
4719       if (!typesize || !tree_fits_uhwi_p (typesize))
4720 	return false;
4721       nbytes = tree_to_uhwi (typesize);
4722       if (!nbytes)
4723 	return false;
4724 
4725       /* Handle MEM_REF = SSA_NAME types of assignments.  */
4726       return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
4727 				       allnul, allnonnul, rvals, snlim);
4728     }
4729 
4730   if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4731     {
4732       exp = ctor_for_folding (exp);
4733       if (!exp)
4734 	return false;
4735     }
4736 
4737   const char *prep = NULL;
4738   if (TREE_CODE (exp) == STRING_CST)
4739     {
4740       unsigned nchars = TREE_STRING_LENGTH (exp);
4741       if (nchars < offset)
4742 	return false;
4743 
4744       if (!nbytes)
4745 	/* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4746 	   (i.e., it's the size of a pointer), or from MEM_REF (as the size
4747 	   of the access), set it here to the size of the string, including
4748 	   all internal and trailing nuls if the string has any.  */
4749 	nbytes = nchars - offset;
4750       else if (nchars - offset < nbytes)
4751 	return false;
4752 
4753       prep = TREE_STRING_POINTER (exp) + offset;
4754     }
4755 
4756   unsigned char buf[256];
4757   if (!prep)
4758     {
4759       if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4760 	return false;
4761       /* If the pointer to representation hasn't been set above
4762 	 for STRING_CST point it at the buffer.  */
4763       prep = reinterpret_cast <char *>(buf);
4764       /* Try to extract the representation of the constant object
4765 	 or expression starting from the offset.  */
4766       unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4767       if (repsize < nbytes)
4768 	{
4769 	  /* This should only happen when REPSIZE is zero because EXP
4770 	     doesn't denote an object with a known initializer, except
4771 	     perhaps when the reference reads past its end.  */
4772 	  lenrange[0] = 0;
4773 	  prep = NULL;
4774 	}
4775       else if (!nbytes)
4776 	nbytes = repsize;
4777       else if (nbytes < repsize)
4778 	return false;
4779     }
4780 
4781   if (!nbytes)
4782     return false;
4783 
4784   /* Compute the number of leading nonzero bytes in the representation
4785      and update the minimum and maximum.  */
4786   unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4787 
4788   if (n < lenrange[0])
4789     lenrange[0] = n;
4790   if (lenrange[1] < n)
4791     lenrange[1] = n;
4792 
4793   /* Set the size of the representation.  */
4794   if (lenrange[2] < nbytes)
4795     lenrange[2] = nbytes;
4796 
4797   /* Clear NULTERM if none of the bytes is zero.  */
4798   if (n == nbytes)
4799     *nulterm = false;
4800 
4801   if (n)
4802     {
4803       /* When the initial number of non-zero bytes N is non-zero, reset
4804 	 *ALLNUL; if N is less than that the size of the representation
4805 	 also clear *ALLNONNUL.  */
4806       *allnul = false;
4807       if (n < nbytes)
4808 	*allnonnul = false;
4809     }
4810   else if (*allnul || *allnonnul)
4811     {
4812       *allnonnul = false;
4813 
4814       if (*allnul)
4815 	{
4816 	  /* When either ALLNUL is set and N is zero, also determine
4817 	     whether all subsequent bytes after the first one (which
4818 	     is nul) are zero or nonzero and clear ALLNUL if not.  */
4819 	  for (const char *p = prep; p != prep + nbytes; ++p)
4820 	    if (*p)
4821 	      {
4822 		*allnul = false;
4823 		break;
4824 	      }
4825 	}
4826     }
4827 
4828   return true;
4829 }
4830 
4831 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4832    bytes that are pointed to by EXP, which should be a pointer.  */
4833 
4834 static bool
count_nonzero_bytes_addr(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals,ssa_name_limit_t & snlim)4835 count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
4836 			  unsigned HOST_WIDE_INT nbytes,
4837 			  unsigned lenrange[3], bool *nulterm,
4838 			  bool *allnul, bool *allnonnul,
4839 			  const vr_values *rvals, ssa_name_limit_t &snlim)
4840 {
4841   int idx = get_stridx (exp);
4842   if (idx > 0)
4843     {
4844       strinfo *si = get_strinfo (idx);
4845       if (!si)
4846 	return false;
4847 
4848       /* Handle both constant lengths as well non-constant lengths
4849 	 in some range.  */
4850       unsigned HOST_WIDE_INT minlen, maxlen;
4851       if (tree_fits_shwi_p (si->nonzero_chars))
4852 	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4853       else if (si->nonzero_chars
4854 	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4855 	{
4856 	  vr_values *v = CONST_CAST (vr_values *, rvals);
4857 	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
4858 	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
4859 	    return false;
4860 
4861 	  minlen = tree_to_uhwi (vr->min ());
4862 	  maxlen = tree_to_uhwi (vr->max ());
4863 	}
4864       else
4865 	return false;
4866 
4867       if (maxlen < offset)
4868 	return false;
4869 
4870       minlen = minlen < offset ? 0 : minlen - offset;
4871       maxlen -= offset;
4872       if (maxlen + 1 < nbytes)
4873 	return false;
4874 
4875       if (nbytes <= minlen)
4876 	*nulterm = false;
4877 
4878       if (nbytes < minlen)
4879 	{
4880 	  minlen = nbytes;
4881 	  if (nbytes < maxlen)
4882 	    maxlen = nbytes;
4883 	}
4884 
4885       if (minlen < lenrange[0])
4886 	lenrange[0] = minlen;
4887       if (lenrange[1] < maxlen)
4888 	lenrange[1] = maxlen;
4889 
4890       if (lenrange[2] < nbytes)
4891 	lenrange[2] = nbytes;
4892 
4893       /* Since only the length of the string are known and not its contents,
4894 	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
4895       *allnul = false;
4896       if (minlen < nbytes)
4897 	*allnonnul = false;
4898 
4899       return true;
4900     }
4901 
4902   if (TREE_CODE (exp) == ADDR_EXPR)
4903     return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
4904 				lenrange, nulterm, allnul, allnonnul, rvals,
4905 				snlim);
4906 
4907   if (TREE_CODE (exp) == SSA_NAME)
4908     {
4909       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4910       if (gimple_code (stmt) == GIMPLE_PHI)
4911 	{
4912 	  /* Avoid processing an SSA_NAME that has already been visited
4913 	     or if an SSA_NAME limit has been reached.  Indicate success
4914 	     if the former and failure if the latter.  */
4915 	  if (int res = snlim.next_ssa_name (exp))
4916 	    return res > 0;
4917 
4918 	  /* Determine the minimum and maximum from the PHI arguments.  */
4919 	  unsigned int n = gimple_phi_num_args (stmt);
4920 	  for (unsigned i = 0; i != n; i++)
4921 	    {
4922 	      tree def = gimple_phi_arg_def (stmt, i);
4923 	      if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
4924 					     nulterm, allnul, allnonnul, rvals,
4925 					     snlim))
4926 		return false;
4927 	    }
4928 
4929 	  return true;
4930 	}
4931     }
4932 
4933   /* Otherwise we don't know anything.  */
4934   lenrange[0] = 0;
4935   if (lenrange[1] < nbytes)
4936     lenrange[1] = nbytes;
4937   if (lenrange[2] < nbytes)
4938     lenrange[2] = nbytes;
4939   *nulterm = false;
4940   *allnul = false;
4941   *allnonnul = false;
4942   return true;
4943 }
4944 
4945 /* Same as above except with an implicit SSA_NAME limit.  RVALS is used
4946    to determine ranges of dynamically computed string lengths (the results
4947    of strlen).  */
4948 
4949 static bool
count_nonzero_bytes(tree exp,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,const vr_values * rvals)4950 count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm,
4951 		     bool *allnul, bool *allnonnul, const vr_values *rvals)
4952 {
4953   /* Set to optimistic values so the caller doesn't have to worry about
4954      initializing these and to what.  On success, the function will clear
4955      these if it determines their values are different but being recursive
4956      it never sets either to true.  On failure, their values are
4957      unspecified.  */
4958   *nulterm = true;
4959   *allnul = true;
4960   *allnonnul = true;
4961 
4962   ssa_name_limit_t snlim;
4963   return count_nonzero_bytes (exp, 0, 0, lenrange, nulterm, allnul, allnonnul,
4964 			      rvals, snlim);
4965 }
4966 
4967 /* Handle a single or multibyte store other than by a built-in function,
4968    either via a single character assignment or by multi-byte assignment
4969    either via MEM_REF or via a type other than char (such as in
4970    '*(int*)a = 12345').  Return true to let the caller advance *GSI to
4971    the next statement in the basic block and false otherwise.  */
4972 
4973 static bool
handle_store(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)4974 handle_store (gimple_stmt_iterator *gsi, bool *zero_write,
4975 	      const vr_values *rvals)
4976 {
4977   int idx = -1;
4978   strinfo *si = NULL;
4979   gimple *stmt = gsi_stmt (*gsi);
4980   tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
4981   tree rhs = gimple_assign_rhs1 (stmt);
4982 
4983   /* The offset of the first byte in LHS modified by the store.  */
4984   unsigned HOST_WIDE_INT offset = 0;
4985 
4986   if (TREE_CODE (lhs) == MEM_REF
4987       && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4988     {
4989       tree mem_offset = TREE_OPERAND (lhs, 1);
4990       if (tree_fits_uhwi_p (mem_offset))
4991 	{
4992 	  /* Get the strinfo for the base, and use it if it starts with at
4993 	     least OFFSET nonzero characters.  This is trivially true if
4994 	     OFFSET is zero.  */
4995 	  offset = tree_to_uhwi (mem_offset);
4996 	  idx = get_stridx (TREE_OPERAND (lhs, 0));
4997 	  if (idx > 0)
4998 	    si = get_strinfo (idx);
4999 	  if (offset == 0)
5000 	    ssaname = TREE_OPERAND (lhs, 0);
5001 	  else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0)
5002 	    {
5003 	      *zero_write = initializer_zerop (rhs);
5004 
5005 	      bool dummy;
5006 	      unsigned lenrange[] = { UINT_MAX, 0, 0 };
5007 	      if (count_nonzero_bytes (rhs, lenrange, &dummy, &dummy, &dummy,
5008 				       rvals))
5009 		maybe_warn_overflow (stmt, lenrange[2], rvals);
5010 
5011 	      return true;
5012 	    }
5013 	}
5014     }
5015   else
5016     {
5017       idx = get_addr_stridx (lhs, NULL_TREE, &offset, rvals);
5018       if (idx > 0)
5019 	si = get_strinfo (idx);
5020     }
5021 
5022   /* Minimum and maximum leading non-zero bytes and the size of the store.  */
5023   unsigned lenrange[] = { UINT_MAX, 0, 0 };
5024 
5025   /* Set to the minimum length of the string being assigned if known.  */
5026   unsigned HOST_WIDE_INT rhs_minlen;
5027 
5028   /* STORING_NONZERO_P is true iff not all stored characters are zero.
5029      STORING_ALL_NONZERO_P is true if all stored characters are zero.
5030      STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5031      Both are false when it's impossible to determine which is true.  */
5032   bool storing_nonzero_p;
5033   bool storing_all_nonzero_p;
5034   bool storing_all_zeros_p;
5035   /* FULL_STRING_P is set when the stored sequence of characters form
5036      a nul-terminated string.  */
5037   bool full_string_p;
5038 
5039   const bool ranges_valid
5040     = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5041 			   &storing_all_zeros_p, &storing_all_nonzero_p,
5042 			   rvals);
5043   if (ranges_valid)
5044     {
5045       rhs_minlen = lenrange[0];
5046       storing_nonzero_p = lenrange[1] > 0;
5047       *zero_write = storing_all_zeros_p;
5048 
5049       maybe_warn_overflow (stmt, lenrange[2], rvals);
5050     }
5051   else
5052     {
5053       rhs_minlen = HOST_WIDE_INT_M1U;
5054       full_string_p = false;
5055       storing_nonzero_p = false;
5056       storing_all_zeros_p = false;
5057       storing_all_nonzero_p = false;
5058     }
5059 
5060   if (si != NULL)
5061     {
5062       /* The corresponding element is set to 1 if the first and last
5063 	 element, respectively, of the sequence of characters being
5064 	 written over the string described by SI ends before
5065 	 the terminating nul (if it has one), to zero if the nul is
5066 	 being overwritten but not beyond, or negative otherwise.  */
5067       int store_before_nul[2];
5068       if (ranges_valid)
5069 	{
5070 	  /* The offset of the last stored byte.  */
5071 	  unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5072 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
5073 	  if (endoff == offset)
5074 	    store_before_nul[1] = store_before_nul[0];
5075 	  else
5076 	    store_before_nul[1] = compare_nonzero_chars (si, endoff, rvals);
5077 	}
5078       else
5079 	{
5080 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
5081 	  store_before_nul[1] = store_before_nul[0];
5082 	  gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5083 	}
5084 
5085       if (storing_all_zeros_p
5086 	  && store_before_nul[0] == 0
5087 	  && store_before_nul[1] == 0
5088 	  && si->full_string_p)
5089 	{
5090 	  /* When overwriting a '\0' with a '\0', the store can be removed
5091 	     if we know it has been stored in the current function.  */
5092 	  if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5093 	    {
5094 	      unlink_stmt_vdef (stmt);
5095 	      release_defs (stmt);
5096 	      gsi_remove (gsi, true);
5097 	      return false;
5098 	    }
5099 	  else
5100 	    {
5101 	      si->writable = true;
5102 	      gsi_next (gsi);
5103 	      return false;
5104 	    }
5105 	}
5106 
5107       if (store_before_nul[1] > 0
5108 	  && storing_nonzero_p
5109 	  && lenrange[0] == lenrange[1]
5110 	  && lenrange[0] == lenrange[2]
5111 	  && TREE_CODE (TREE_TYPE (rhs)) == INTEGER_TYPE)
5112 	{
5113 	  /* Handle a store of one or more non-nul characters that ends
5114 	     before the terminating nul of the destination and so does
5115 	     not affect its length
5116 	     If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5117 	     and if we aren't storing '\0', we know that the length of
5118 	     the string and any other zero terminated string in memory
5119 	     remains the same.  In that case we move to the next gimple
5120 	     statement and return to signal the caller that it shouldn't
5121 	     invalidate anything.
5122 
5123 	     This is beneficial for cases like:
5124 
5125 	     char p[20];
5126 	     void foo (char *q)
5127 	     {
5128 	       strcpy (p, "foobar");
5129 	       size_t len = strlen (p);     // can be folded to 6
5130 	       size_t len2 = strlen (q);    // has to be computed
5131 	       p[0] = 'X';
5132 	       size_t len3 = strlen (p);    // can be folded to 6
5133 	       size_t len4 = strlen (q);    // can be folded to len2
5134 	       bar (len, len2, len3, len4);
5135 	       } */
5136 	  gsi_next (gsi);
5137 	  return false;
5138 	}
5139 
5140       if (storing_all_zeros_p
5141 	  || storing_nonzero_p
5142 	  || (offset != 0 && store_before_nul[1] > 0))
5143 	{
5144 	  /* When STORING_NONZERO_P, we know that the string will start
5145 	     with at least OFFSET + 1 nonzero characters.  If storing
5146 	     a single character, set si->NONZERO_CHARS to the result.
5147 	     If storing multiple characters, try to determine the number
5148 	     of leading non-zero characters and set si->NONZERO_CHARS to
5149 	     the result instead.
5150 
5151 	     When STORING_ALL_ZEROS_P, we know that the string is now
5152 	     OFFSET characters long.
5153 
5154 	     Otherwise, we're storing an unknown value at offset OFFSET,
5155 	     so need to clip the nonzero_chars to OFFSET.
5156 	     Use the minimum length of the string (or individual character)
5157 	     being stored if it's known.  Otherwise, STORING_NONZERO_P
5158 	     guarantees it's at least 1.  */
5159 	  HOST_WIDE_INT len
5160 	    = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5161 	  location_t loc = gimple_location (stmt);
5162 	  tree oldlen = si->nonzero_chars;
5163 	  if (store_before_nul[1] == 0 && si->full_string_p)
5164 	    /* We're overwriting the nul terminator with a nonzero or
5165 	       unknown character.  If the previous stmt was a memcpy,
5166 	       its length may be decreased.  */
5167 	    adjust_last_stmt (si, stmt, false);
5168 	  si = unshare_strinfo (si);
5169 	  if (storing_nonzero_p)
5170 	    {
5171 	      gcc_assert (len >= 0);
5172 	      si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5173 	    }
5174 	  else
5175 	    si->nonzero_chars = build_int_cst (size_type_node, offset);
5176 
5177 	  /* Set FULL_STRING_P only if the length of the strings being
5178 	     written is the same, and clear it if the strings have
5179 	     different lengths.  In the latter case the length stored
5180 	     in si->NONZERO_CHARS becomes the lower bound.
5181 	     FIXME: Handle the upper bound of the length if possible.  */
5182 	  si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5183 
5184 	  if (storing_all_zeros_p
5185 	      && ssaname
5186 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5187 	    si->endptr = ssaname;
5188 	  else
5189 	    si->endptr = NULL;
5190 	  si->next = 0;
5191 	  si->stmt = NULL;
5192 	  si->writable = true;
5193 	  si->dont_invalidate = true;
5194 	  if (oldlen)
5195 	    {
5196 	      tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5197 					  si->nonzero_chars, oldlen);
5198 	      adjust_related_strinfos (loc, si, adj);
5199 	    }
5200 	  else
5201 	    si->prev = 0;
5202 	}
5203     }
5204   else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5205     {
5206       if (ssaname)
5207 	idx = new_stridx (ssaname);
5208       else
5209 	idx = new_addr_stridx (lhs);
5210       if (idx != 0)
5211 	{
5212 	  tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5213 
5214 	  HOST_WIDE_INT slen;
5215 	  if (storing_all_zeros_p)
5216 	    slen = 0;
5217 	  else if (storing_nonzero_p && ranges_valid)
5218 	    {
5219 	      /* FIXME: Handle the upper bound of the length when
5220 		 LENRANGE[0] != LENRANGE[1].  */
5221 	      slen = lenrange[0];
5222 	      if (lenrange[0] != lenrange[1])
5223 		/* Set the minimum length but ignore the maximum
5224 		   for now.  */
5225 		full_string_p = false;
5226 	    }
5227 	  else
5228 	    slen = -1;
5229 
5230 	  tree len = (slen <= 0
5231 		      ? size_zero_node
5232 		      : build_int_cst (size_type_node, slen));
5233 	  si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5234 	  set_strinfo (idx, si);
5235 	  if (storing_all_zeros_p
5236 	      && ssaname
5237 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5238 	    si->endptr = ssaname;
5239 	  si->dont_invalidate = true;
5240 	  si->writable = true;
5241 	}
5242     }
5243   else if (idx == 0
5244 	   && rhs_minlen < HOST_WIDE_INT_M1U
5245 	   && ssaname == NULL_TREE
5246 	   && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5247     {
5248       HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5249       if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5250 	{
5251 	  int idx = new_addr_stridx (lhs);
5252 	  if (idx != 0)
5253 	    {
5254 	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
5255 				build_int_cst (size_type_node, rhs_minlen),
5256 				full_string_p);
5257 	      set_strinfo (idx, si);
5258 	      si->dont_invalidate = true;
5259 	    }
5260 	}
5261     }
5262 
5263   if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5264     {
5265       /* For single-byte stores only, allow adjust_last_stmt to remove
5266 	 the statement if the stored '\0' is immediately overwritten.  */
5267       laststmt.stmt = stmt;
5268       laststmt.len = build_int_cst (size_type_node, 1);
5269       laststmt.stridx = si->idx;
5270     }
5271   return true;
5272 }
5273 
5274 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0.  */
5275 
5276 static void
fold_strstr_to_strncmp(tree rhs1,tree rhs2,gimple * stmt)5277 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5278 {
5279   if (TREE_CODE (rhs1) != SSA_NAME
5280       || TREE_CODE (rhs2) != SSA_NAME)
5281     return;
5282 
5283   gimple *call_stmt = NULL;
5284   for (int pass = 0; pass < 2; pass++)
5285     {
5286       gimple *g = SSA_NAME_DEF_STMT (rhs1);
5287       if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5288 	  && has_single_use (rhs1)
5289 	  && gimple_call_arg (g, 0) == rhs2)
5290 	{
5291 	  call_stmt = g;
5292 	  break;
5293 	}
5294       std::swap (rhs1, rhs2);
5295     }
5296 
5297   if (call_stmt)
5298     {
5299       tree arg0 = gimple_call_arg (call_stmt, 0);
5300 
5301       if (arg0 == rhs2)
5302 	{
5303 	  tree arg1 = gimple_call_arg (call_stmt, 1);
5304 	  tree arg1_len = NULL_TREE;
5305 	  int idx = get_stridx (arg1);
5306 
5307 	  if (idx)
5308 	    {
5309 	      if (idx < 0)
5310 		arg1_len = build_int_cst (size_type_node, ~idx);
5311 	      else
5312 		{
5313 		  strinfo *si = get_strinfo (idx);
5314 		  if (si)
5315 		    arg1_len = get_string_length (si);
5316 		}
5317 	    }
5318 
5319 	  if (arg1_len != NULL_TREE)
5320 	    {
5321 	      gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5322 	      tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5323 
5324 	      if (!is_gimple_val (arg1_len))
5325 		{
5326 		  tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5327 		  gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5328 							    arg1_len);
5329 		  gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5330 		  arg1_len = arg1_len_tmp;
5331 		}
5332 
5333 	      gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5334 						      arg0, arg1, arg1_len);
5335 	      tree strncmp_lhs = make_ssa_name (integer_type_node);
5336 	      gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5337 	      gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5338 	      gsi_remove (&gsi, true);
5339 	      gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5340 	      tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5341 
5342 	      if (is_gimple_assign (stmt))
5343 		{
5344 		  if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5345 		    {
5346 		      tree cond = gimple_assign_rhs1 (stmt);
5347 		      TREE_OPERAND (cond, 0) = strncmp_lhs;
5348 		      TREE_OPERAND (cond, 1) = zero;
5349 		    }
5350 		  else
5351 		    {
5352 		      gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5353 		      gimple_assign_set_rhs2 (stmt, zero);
5354 		    }
5355 		}
5356 	      else
5357 		{
5358 		  gcond *cond = as_a<gcond *> (stmt);
5359 		  gimple_cond_set_lhs (cond, strncmp_lhs);
5360 		  gimple_cond_set_rhs (cond, zero);
5361 		}
5362 	      update_stmt (stmt);
5363 	    }
5364 	}
5365     }
5366 }
5367 
5368 /* Return true if TYPE corresponds to a narrow character type.  */
5369 
5370 static bool
is_char_type(tree type)5371 is_char_type (tree type)
5372 {
5373   return (TREE_CODE (type) == INTEGER_TYPE
5374 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5375 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5376 }
5377 
5378 /* Check the built-in call at GSI for validity and optimize it.
5379    Uses RVALS to determine range information.
5380    Return true to let the caller advance *GSI to the next statement
5381    in the basic block and false otherwise.  */
5382 
5383 static bool
strlen_check_and_optimize_call(gimple_stmt_iterator * gsi,bool * zero_write,const vr_values * rvals)5384 strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write,
5385 				const vr_values *rvals)
5386 {
5387   gimple *stmt = gsi_stmt (*gsi);
5388 
5389   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5390     {
5391       tree fntype = gimple_call_fntype (stmt);
5392       if (fntype && lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5393 	handle_alloc_call (BUILT_IN_NONE, gsi);
5394     }
5395 
5396   /* When not optimizing we must be checking printf calls which
5397      we do even for user-defined functions when they are declared
5398      with attribute format.  */
5399   if (!flag_optimize_strlen
5400       || !strlen_optimize
5401       || !valid_builtin_call (stmt))
5402     return !handle_printf_call (gsi, rvals);
5403 
5404   tree callee = gimple_call_fndecl (stmt);
5405   switch (DECL_FUNCTION_CODE (callee))
5406     {
5407     case BUILT_IN_STRLEN:
5408     case BUILT_IN_STRNLEN:
5409       handle_builtin_strlen (gsi);
5410       break;
5411     case BUILT_IN_STRCHR:
5412       handle_builtin_strchr (gsi);
5413       break;
5414     case BUILT_IN_STRCPY:
5415     case BUILT_IN_STRCPY_CHK:
5416     case BUILT_IN_STPCPY:
5417     case BUILT_IN_STPCPY_CHK:
5418       handle_builtin_strcpy (DECL_FUNCTION_CODE (callee), gsi, rvals);
5419       break;
5420 
5421     case BUILT_IN_STRNCAT:
5422     case BUILT_IN_STRNCAT_CHK:
5423       handle_builtin_strncat (DECL_FUNCTION_CODE (callee), gsi);
5424       break;
5425 
5426     case BUILT_IN_STPNCPY:
5427     case BUILT_IN_STPNCPY_CHK:
5428     case BUILT_IN_STRNCPY:
5429     case BUILT_IN_STRNCPY_CHK:
5430       handle_builtin_stxncpy_strncat (false, gsi);
5431       break;
5432 
5433     case BUILT_IN_MEMCPY:
5434     case BUILT_IN_MEMCPY_CHK:
5435     case BUILT_IN_MEMPCPY:
5436     case BUILT_IN_MEMPCPY_CHK:
5437       handle_builtin_memcpy (DECL_FUNCTION_CODE (callee), gsi, rvals);
5438       break;
5439     case BUILT_IN_STRCAT:
5440     case BUILT_IN_STRCAT_CHK:
5441       handle_builtin_strcat (DECL_FUNCTION_CODE (callee), gsi);
5442       break;
5443     case BUILT_IN_ALLOCA:
5444     case BUILT_IN_ALLOCA_WITH_ALIGN:
5445     case BUILT_IN_MALLOC:
5446     case BUILT_IN_CALLOC:
5447       handle_alloc_call (DECL_FUNCTION_CODE (callee), gsi);
5448       break;
5449     case BUILT_IN_MEMSET:
5450       if (handle_builtin_memset (gsi, zero_write, rvals))
5451 	return false;
5452       break;
5453     case BUILT_IN_MEMCMP:
5454       if (handle_builtin_memcmp (gsi))
5455 	return false;
5456       break;
5457     case BUILT_IN_STRCMP:
5458     case BUILT_IN_STRNCMP:
5459       if (handle_builtin_string_cmp (gsi, rvals))
5460 	return false;
5461       break;
5462     default:
5463       if (handle_printf_call (gsi, rvals))
5464 	return false;
5465       break;
5466     }
5467 
5468   return true;
5469 }
5470 
5471 /* Handle an assignment statement at *GSI to a LHS of integral type.
5472    If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true.  */
5473 
5474 static void
handle_integral_assign(gimple_stmt_iterator * gsi,bool * cleanup_eh,const vr_values * rvals)5475 handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5476 			const vr_values *rvals)
5477 {
5478   gimple *stmt = gsi_stmt (*gsi);
5479   tree lhs = gimple_assign_lhs (stmt);
5480   tree lhs_type = TREE_TYPE (lhs);
5481 
5482   enum tree_code code = gimple_assign_rhs_code (stmt);
5483   if (code == COND_EXPR)
5484     {
5485       tree cond = gimple_assign_rhs1 (stmt);
5486       enum tree_code cond_code = TREE_CODE (cond);
5487 
5488       if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5489 	fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5490 				TREE_OPERAND (cond, 1), stmt);
5491     }
5492   else if (code == EQ_EXPR || code == NE_EXPR)
5493     fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5494 			    gimple_assign_rhs2 (stmt), stmt);
5495   else if (gimple_assign_load_p (stmt)
5496 	   && TREE_CODE (lhs_type) == INTEGER_TYPE
5497 	   && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5498 	   && (TYPE_PRECISION (lhs_type)
5499 	       == TYPE_PRECISION (char_type_node))
5500 	   && !gimple_has_volatile_ops (stmt))
5501     {
5502       tree off = integer_zero_node;
5503       unsigned HOST_WIDE_INT coff = 0;
5504       int idx = 0;
5505       tree rhs1 = gimple_assign_rhs1 (stmt);
5506       if (code == MEM_REF)
5507 	{
5508 	  idx = get_stridx (TREE_OPERAND (rhs1, 0));
5509 	  if (idx > 0)
5510 	    {
5511 	      strinfo *si = get_strinfo (idx);
5512 	      if (si
5513 		  && si->nonzero_chars
5514 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5515 		  && (wi::to_widest (si->nonzero_chars)
5516 		      >= wi::to_widest (off)))
5517 		off = TREE_OPERAND (rhs1, 1);
5518 	      else
5519 		/* This case is not useful.  See if get_addr_stridx
5520 		   returns something usable.  */
5521 		idx = 0;
5522 	    }
5523 	}
5524       if (idx <= 0)
5525 	idx = get_addr_stridx (rhs1, NULL_TREE, &coff);
5526       if (idx > 0)
5527 	{
5528 	  strinfo *si = get_strinfo (idx);
5529 	  if (si
5530 	      && si->nonzero_chars
5531 	      && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5532 	    {
5533 	      widest_int w1 = wi::to_widest (si->nonzero_chars);
5534 	      widest_int w2 = wi::to_widest (off) + coff;
5535 	      if (w1 == w2
5536 		  && si->full_string_p)
5537 		{
5538 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5539 		    {
5540 		      fprintf (dump_file, "Optimizing: ");
5541 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5542 		    }
5543 
5544 		  /* Reading the final '\0' character.  */
5545 		  tree zero = build_int_cst (lhs_type, 0);
5546 		  gimple_set_vuse (stmt, NULL_TREE);
5547 		  gimple_assign_set_rhs_from_tree (gsi, zero);
5548 		  *cleanup_eh
5549 		    |= maybe_clean_or_replace_eh_stmt (stmt,
5550 						       gsi_stmt (*gsi));
5551 		  stmt = gsi_stmt (*gsi);
5552 		  update_stmt (stmt);
5553 
5554 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5555 		    {
5556 		      fprintf (dump_file, "into: ");
5557 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5558 		    }
5559 		}
5560 	      else if (w1 > w2)
5561 		{
5562 		  /* Reading a character before the final '\0'
5563 		     character.  Just set the value range to ~[0, 0]
5564 		     if we don't have anything better.  */
5565 		  wide_int min, max;
5566 		  signop sign = TYPE_SIGN (lhs_type);
5567 		  int prec = TYPE_PRECISION (lhs_type);
5568 		  value_range_kind vr = get_range_info (lhs, &min, &max);
5569 		  if (vr == VR_VARYING
5570 		      || (vr == VR_RANGE
5571 			  && min == wi::min_value (prec, sign)
5572 			  && max == wi::max_value (prec, sign)))
5573 		    set_range_info (lhs, VR_ANTI_RANGE,
5574 				    wi::zero (prec), wi::zero (prec));
5575 		}
5576 	    }
5577 	}
5578     }
5579   else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5580     {
5581       if (int idx = new_stridx (lhs))
5582 	{
5583 	  /* Record multi-byte assignments from MEM_REFs.  */
5584 	  bool storing_all_nonzero_p;
5585 	  bool storing_all_zeros_p;
5586 	  bool full_string_p;
5587 	  unsigned lenrange[] = { UINT_MAX, 0, 0 };
5588 	  tree rhs = gimple_assign_rhs1 (stmt);
5589 	  const bool ranges_valid
5590 	    = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5591 				   &storing_all_zeros_p, &storing_all_nonzero_p,
5592 				   rvals);
5593 	  if (ranges_valid)
5594 	    {
5595 	      tree length = build_int_cst (sizetype, lenrange[0]);
5596 	      strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5597 	      set_strinfo (idx, si);
5598 	      si->writable = true;
5599 	      si->dont_invalidate = true;
5600 	      maybe_warn_overflow (stmt, lenrange[2], rvals);
5601 	    }
5602 	}
5603     }
5604 
5605   if (strlen_to_stridx)
5606     {
5607       tree rhs1 = gimple_assign_rhs1 (stmt);
5608       if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5609 	strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5610     }
5611 }
5612 
5613 /* Attempt to check for validity of the performed access a single statement
5614    at *GSI using string length knowledge, and to optimize it.
5615    If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5616    true.  Return true to let the caller advance *GSI to the next statement
5617    in the basic block and false otherwise.  */
5618 
5619 static bool
check_and_optimize_stmt(gimple_stmt_iterator * gsi,bool * cleanup_eh,const vr_values * rvals)5620 check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5621 			 const vr_values *rvals)
5622 {
5623   gimple *stmt = gsi_stmt (*gsi);
5624 
5625   /* For statements that modify a string, set to true if the write
5626      is only zeros.  */
5627   bool zero_write = false;
5628 
5629   if (is_gimple_call (stmt))
5630     {
5631       if (!strlen_check_and_optimize_call (gsi, &zero_write, rvals))
5632 	return false;
5633     }
5634   else if (!flag_optimize_strlen || !strlen_optimize)
5635     return true;
5636   else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5637     {
5638       /* Handle non-clobbering assignment.  */
5639       tree lhs = gimple_assign_lhs (stmt);
5640       tree lhs_type = TREE_TYPE (lhs);
5641 
5642       if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5643 	{
5644 	  if (gimple_assign_single_p (stmt)
5645 	      || (gimple_assign_cast_p (stmt)
5646 		  && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5647 	    {
5648 	      int idx = get_stridx (gimple_assign_rhs1 (stmt));
5649 	      ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5650 	    }
5651 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5652 	    handle_pointer_plus (gsi);
5653 	}
5654       else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5655 	/* Handle assignment to a character.  */
5656 	handle_integral_assign (gsi, cleanup_eh, rvals);
5657       else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5658 	{
5659 	  tree type = TREE_TYPE (lhs);
5660 	  if (TREE_CODE (type) == ARRAY_TYPE)
5661 	    type = TREE_TYPE (type);
5662 
5663 	bool is_char_store = is_char_type (type);
5664 	if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5665 	  {
5666 	    /* To consider stores into char objects via integer types
5667 	       other than char but not those to non-character objects,
5668 	       determine the type of the destination rather than just
5669 	       the type of the access.  */
5670 	    for (int i = 0; i != 2; ++i)
5671 	      {
5672 		tree ref = TREE_OPERAND (lhs, i);
5673 		type = TREE_TYPE (ref);
5674 		if (TREE_CODE (type) == POINTER_TYPE)
5675 		  type = TREE_TYPE (type);
5676 		if (TREE_CODE (type) == ARRAY_TYPE)
5677 		  type = TREE_TYPE (type);
5678 		if (is_char_type (type))
5679 		  {
5680 		    is_char_store = true;
5681 		    break;
5682 		  }
5683 	      }
5684 	  }
5685 
5686 	  /* Handle a single or multibyte assignment.  */
5687 	  if (is_char_store && !handle_store (gsi, &zero_write, rvals))
5688 	    return false;
5689 	}
5690     }
5691   else if (gcond *cond = dyn_cast<gcond *> (stmt))
5692     {
5693       enum tree_code code = gimple_cond_code (cond);
5694       if (code == EQ_EXPR || code == NE_EXPR)
5695 	fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5696 				gimple_cond_rhs (stmt), stmt);
5697     }
5698 
5699   if (gimple_vdef (stmt))
5700     maybe_invalidate (stmt, zero_write);
5701   return true;
5702 }
5703 
5704 /* Recursively call maybe_invalidate on stmts that might be executed
5705    in between dombb and current bb and that contain a vdef.  Stop when
5706    *count stmts are inspected, or if the whole strinfo vector has
5707    been invalidated.  */
5708 
5709 static void
do_invalidate(basic_block dombb,gimple * phi,bitmap visited,int * count)5710 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5711 {
5712   unsigned int i, n = gimple_phi_num_args (phi);
5713 
5714   for (i = 0; i < n; i++)
5715     {
5716       tree vuse = gimple_phi_arg_def (phi, i);
5717       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5718       basic_block bb = gimple_bb (stmt);
5719       if (bb == NULL
5720 	  || bb == dombb
5721 	  || !bitmap_set_bit (visited, bb->index)
5722 	  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5723 	continue;
5724       while (1)
5725 	{
5726 	  if (gimple_code (stmt) == GIMPLE_PHI)
5727 	    {
5728 	      do_invalidate (dombb, stmt, visited, count);
5729 	      if (*count == 0)
5730 		return;
5731 	      break;
5732 	    }
5733 	  if (--*count == 0)
5734 	    return;
5735 	  if (!maybe_invalidate (stmt))
5736 	    {
5737 	      *count = 0;
5738 	      return;
5739 	    }
5740 	  vuse = gimple_vuse (stmt);
5741 	  stmt = SSA_NAME_DEF_STMT (vuse);
5742 	  if (gimple_bb (stmt) != bb)
5743 	    {
5744 	      bb = gimple_bb (stmt);
5745 	      if (bb == NULL
5746 		  || bb == dombb
5747 		  || !bitmap_set_bit (visited, bb->index)
5748 		  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5749 		break;
5750 	    }
5751 	}
5752     }
5753 }
5754 
5755 class strlen_dom_walker : public dom_walker
5756 {
5757 public:
strlen_dom_walker(cdi_direction direction)5758   strlen_dom_walker (cdi_direction direction)
5759     : dom_walker (direction),
5760     evrp (false),
5761     m_cleanup_cfg (false)
5762   {}
5763 
5764   virtual edge before_dom_children (basic_block);
5765   virtual void after_dom_children (basic_block);
5766 
5767   /* EVRP analyzer used for printf argument range processing, and
5768      to track strlen results across integer variable assignments.  */
5769   evrp_range_analyzer evrp;
5770 
5771   /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
5772      execute function.  */
5773   bool m_cleanup_cfg;
5774 };
5775 
5776 /* Callback for walk_dominator_tree.  Attempt to optimize various
5777    string ops by remembering string lengths pointed by pointer SSA_NAMEs.  */
5778 
5779 edge
before_dom_children(basic_block bb)5780 strlen_dom_walker::before_dom_children (basic_block bb)
5781 {
5782   evrp.enter (bb);
5783 
5784   basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5785 
5786   if (dombb == NULL)
5787     stridx_to_strinfo = NULL;
5788   else
5789     {
5790       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5791       if (stridx_to_strinfo)
5792 	{
5793 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5794 	       gsi_next (&gsi))
5795 	    {
5796 	      gphi *phi = gsi.phi ();
5797 	      if (virtual_operand_p (gimple_phi_result (phi)))
5798 		{
5799 		  bitmap visited = BITMAP_ALLOC (NULL);
5800 		  int count_vdef = 100;
5801 		  do_invalidate (dombb, phi, visited, &count_vdef);
5802 		  BITMAP_FREE (visited);
5803 		  if (count_vdef == 0)
5804 		    {
5805 		      /* If there were too many vdefs in between immediate
5806 			 dominator and current bb, invalidate everything.
5807 			 If stridx_to_strinfo has been unshared, we need
5808 			 to free it, otherwise just set it to NULL.  */
5809 		      if (!strinfo_shared ())
5810 			{
5811 			  unsigned int i;
5812 			  strinfo *si;
5813 
5814 			  for (i = 1;
5815 			       vec_safe_iterate (stridx_to_strinfo, i, &si);
5816 			       ++i)
5817 			    {
5818 			      free_strinfo (si);
5819 			      (*stridx_to_strinfo)[i] = NULL;
5820 			    }
5821 			}
5822 		      else
5823 			stridx_to_strinfo = NULL;
5824 		    }
5825 		  break;
5826 		}
5827 	    }
5828 	}
5829     }
5830 
5831   /* If all PHI arguments have the same string index, the PHI result
5832      has it as well.  */
5833   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5834        gsi_next (&gsi))
5835     {
5836       gphi *phi = gsi.phi ();
5837       tree result = gimple_phi_result (phi);
5838       if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5839 	{
5840 	  int idx = get_stridx (gimple_phi_arg_def (phi, 0));
5841 	  if (idx != 0)
5842 	    {
5843 	      unsigned int i, n = gimple_phi_num_args (phi);
5844 	      for (i = 1; i < n; i++)
5845 		if (idx != get_stridx (gimple_phi_arg_def (phi, i)))
5846 		  break;
5847 	      if (i == n)
5848 		ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5849 	    }
5850 	}
5851     }
5852 
5853   bool cleanup_eh = false;
5854 
5855   /* Attempt to optimize individual statements.  */
5856   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
5857     {
5858       gimple *stmt = gsi_stmt (gsi);
5859 
5860       /* First record ranges generated by this statement so they
5861 	 can be used by printf argument processing.  */
5862       evrp.record_ranges_from_stmt (stmt, false);
5863 
5864       if (check_and_optimize_stmt (&gsi, &cleanup_eh, evrp.get_vr_values ()))
5865 	gsi_next (&gsi);
5866     }
5867 
5868   if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5869       m_cleanup_cfg = true;
5870 
5871   bb->aux = stridx_to_strinfo;
5872   if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5873     (*stridx_to_strinfo)[0] = (strinfo *) bb;
5874   return NULL;
5875 }
5876 
5877 /* Callback for walk_dominator_tree.  Free strinfo vector if it is
5878    owned by the current bb, clear bb->aux.  */
5879 
5880 void
after_dom_children(basic_block bb)5881 strlen_dom_walker::after_dom_children (basic_block bb)
5882 {
5883   evrp.leave (bb);
5884 
5885   if (bb->aux)
5886     {
5887       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5888       if (vec_safe_length (stridx_to_strinfo)
5889 	  && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5890 	{
5891 	  unsigned int i;
5892 	  strinfo *si;
5893 
5894 	  for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5895 	    free_strinfo (si);
5896 	  vec_free (stridx_to_strinfo);
5897 	}
5898       bb->aux = NULL;
5899     }
5900 }
5901 
5902 namespace {
5903 
5904 static unsigned int
printf_strlen_execute(function * fun,bool warn_only)5905 printf_strlen_execute (function *fun, bool warn_only)
5906 {
5907   strlen_optimize = !warn_only;
5908 
5909   calculate_dominance_info (CDI_DOMINATORS);
5910 
5911   bool use_scev = optimize > 0 && flag_printf_return_value;
5912   if (use_scev)
5913     {
5914       loop_optimizer_init (LOOPS_NORMAL);
5915       scev_initialize ();
5916     }
5917 
5918   gcc_assert (!strlen_to_stridx);
5919   if (warn_stringop_overflow || warn_stringop_truncation)
5920     strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5921 
5922   /* This has to happen after initializing the loop optimizer
5923      and initializing SCEV as they create new SSA_NAMEs.  */
5924   ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names);
5925   max_stridx = 1;
5926 
5927   /* String length optimization is implemented as a walk of the dominator
5928      tree and a forward walk of statements within each block.  */
5929   strlen_dom_walker walker (CDI_DOMINATORS);
5930   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5931 
5932   ssa_ver_to_stridx.release ();
5933   strinfo_pool.release ();
5934   if (decl_to_stridxlist_htab)
5935     {
5936       obstack_free (&stridx_obstack, NULL);
5937       delete decl_to_stridxlist_htab;
5938       decl_to_stridxlist_htab = NULL;
5939     }
5940   laststmt.stmt = NULL;
5941   laststmt.len = NULL_TREE;
5942   laststmt.stridx = 0;
5943 
5944   if (strlen_to_stridx)
5945     {
5946       strlen_to_stridx->empty ();
5947       delete strlen_to_stridx;
5948       strlen_to_stridx = NULL;
5949     }
5950 
5951   if (use_scev)
5952     {
5953       scev_finalize ();
5954       loop_optimizer_finalize ();
5955     }
5956 
5957   /* Clean up object size info.  */
5958   fini_object_sizes ();
5959 
5960   return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5961 }
5962 
5963 /* This file defines two passes: one for warnings that runs only when
5964    optimization is disabled, and another that implements optimizations
5965    and also issues warnings.  */
5966 
5967 const pass_data pass_data_warn_printf =
5968 {
5969   GIMPLE_PASS, /* type */
5970   "warn-printf", /* name */
5971   OPTGROUP_NONE, /* optinfo_flags */
5972   TV_NONE, /* tv_id */
5973   /* Normally an optimization pass would require PROP_ssa but because
5974      this pass runs early, with no optimization, to do sprintf format
5975      checking, it only requires PROP_cfg.  */
5976   PROP_cfg, /* properties_required */
5977   0, /* properties_provided */
5978   0, /* properties_destroyed */
5979   0, /* todo_flags_start */
5980   0, /* todo_flags_finish */
5981 };
5982 
5983 class pass_warn_printf : public gimple_opt_pass
5984 {
5985 public:
pass_warn_printf(gcc::context * ctxt)5986   pass_warn_printf (gcc::context *ctxt)
5987     : gimple_opt_pass (pass_data_warn_printf, ctxt)
5988   {}
5989 
5990   virtual bool gate (function *);
execute(function * fun)5991   virtual unsigned int execute (function *fun)
5992   {
5993     return printf_strlen_execute (fun, true);
5994   }
5995 };
5996 
5997 
5998 /* Return true to run the warning pass only when not optimizing and
5999    iff either -Wformat-overflow or -Wformat-truncation is specified.  */
6000 
6001 bool
gate(function *)6002 pass_warn_printf::gate (function *)
6003 {
6004   return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
6005 }
6006 
6007 const pass_data pass_data_strlen =
6008 {
6009   GIMPLE_PASS, /* type */
6010   "strlen", /* name */
6011   OPTGROUP_NONE, /* optinfo_flags */
6012   TV_TREE_STRLEN, /* tv_id */
6013   PROP_cfg | PROP_ssa, /* properties_required */
6014   0, /* properties_provided */
6015   0, /* properties_destroyed */
6016   0, /* todo_flags_start */
6017   0, /* todo_flags_finish */
6018 };
6019 
6020 class pass_strlen : public gimple_opt_pass
6021 {
6022 public:
pass_strlen(gcc::context * ctxt)6023   pass_strlen (gcc::context *ctxt)
6024     : gimple_opt_pass (pass_data_strlen, ctxt)
6025   {}
6026 
clone()6027   opt_pass * clone () { return new pass_strlen (m_ctxt); }
6028 
6029   virtual bool gate (function *);
execute(function * fun)6030   virtual unsigned int execute (function *fun)
6031   {
6032     return printf_strlen_execute (fun, false);
6033   }
6034 };
6035 
6036 /* Return true to run the pass only when the sprintf and/or strlen
6037    optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6038    are specified.  */
6039 
6040 bool
gate(function *)6041 pass_strlen::gate (function *)
6042 {
6043   return ((warn_format_overflow > 0
6044 	   || warn_format_trunc > 0
6045 	   || warn_restrict > 0
6046 	   || flag_optimize_strlen > 0
6047 	   || flag_printf_return_value)
6048 	  && optimize > 0);
6049 }
6050 
6051 } // anon namespace
6052 
6053 gimple_opt_pass *
make_pass_warn_printf(gcc::context * ctxt)6054 make_pass_warn_printf (gcc::context *ctxt)
6055 {
6056   return new pass_warn_printf (ctxt);
6057 }
6058 
6059 gimple_opt_pass *
make_pass_strlen(gcc::context * ctxt)6060 make_pass_strlen (gcc::context *ctxt)
6061 {
6062   return new pass_strlen (ctxt);
6063 }
6064