1 /* String length optimization
2    Copyright (C) 2011-2021 Free Software Foundation, Inc.
3    Contributed by Jakub Jelinek <jakub@redhat.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-restrict.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "expr.h"
42 #include "tree-cfg.h"
43 #include "tree-dfa.h"
44 #include "domwalk.h"
45 #include "tree-ssa-alias.h"
46 #include "tree-ssa-propagate.h"
47 #include "tree-ssa-strlen.h"
48 #include "tree-hash-traits.h"
49 #include "builtins.h"
50 #include "target.h"
51 #include "diagnostic-core.h"
52 #include "diagnostic.h"
53 #include "intl.h"
54 #include "attribs.h"
55 #include "calls.h"
56 #include "cfgloop.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-scalar-evolution.h"
59 #include "vr-values.h"
60 #include "gimple-ssa-evrp-analyze.h"
61 #include "tree-ssa.h"
62 
63 /* A vector indexed by SSA_NAME_VERSION.  0 means unknown, positive value
64    is an index into strinfo vector, negative value stands for
65    string length of a string literal (~strlen).  */
66 static vec<int> ssa_ver_to_stridx;
67 
68 /* Number of currently active string indexes plus one.  */
69 static int max_stridx;
70 
71 /* Set to true to optimize, false when just checking.  */
72 static bool strlen_optimize;
73 
74 /* String information record.  */
75 struct strinfo
76 {
77   /* Number of leading characters that are known to be nonzero.  This is
78      also the length of the string if FULL_STRING_P.
79 
80      The values in a list of related string pointers must be consistent;
81      that is, if strinfo B comes X bytes after strinfo A, it must be
82      the case that A->nonzero_chars == X + B->nonzero_chars.  */
83   tree nonzero_chars;
84   /* Any of the corresponding pointers for querying alias oracle.  */
85   tree ptr;
86   /* STMT is used for two things:
87 
88      - To record the statement that should be used for delayed length
89        computations.  We maintain the invariant that all related strinfos
90        have delayed lengths or none do.
91 
92      - To record the malloc or calloc call that produced this result
93        to optimize away malloc/memset sequences.  STMT is reset after
94        a calloc-allocated object has been stored a non-zero value into.  */
95   gimple *stmt;
96   /* Set to the dynamic allocation statement for the object (alloca,
97      calloc, malloc, or VLA).  Unlike STMT, once set for a strinfo
98      object, ALLOC doesn't change.  */
99   gimple *alloc;
100   /* Pointer to '\0' if known, if NULL, it can be computed as
101      ptr + length.  */
102   tree endptr;
103   /* Reference count.  Any changes to strinfo entry possibly shared
104      with dominating basic blocks need unshare_strinfo first, except
105      for dont_invalidate which affects only the immediately next
106      maybe_invalidate.  */
107   int refcount;
108   /* Copy of index.  get_strinfo (si->idx) should return si;  */
109   int idx;
110   /* These 3 fields are for chaining related string pointers together.
111      E.g. for
112      bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
113      strcpy (c, d); e = c + dl;
114      strinfo(a) -> strinfo(c) -> strinfo(e)
115      All have ->first field equal to strinfo(a)->idx and are doubly
116      chained through prev/next fields.  The later strinfos are required
117      to point into the same string with zero or more bytes after
118      the previous pointer and all bytes in between the two pointers
119      must be non-zero.  Functions like strcpy or memcpy are supposed
120      to adjust all previous strinfo lengths, but not following strinfo
121      lengths (those are uncertain, usually invalidated during
122      maybe_invalidate, except when the alias oracle knows better).
123      Functions like strcat on the other side adjust the whole
124      related strinfo chain.
125      They are updated lazily, so to use the chain the same first fields
126      and si->prev->next == si->idx needs to be verified.  */
127   int first;
128   int next;
129   int prev;
130   /* A flag whether the string is known to be written in the current
131      function.  */
132   bool writable;
133   /* A flag for the next maybe_invalidate that this strinfo shouldn't
134      be invalidated.  Always cleared by maybe_invalidate.  */
135   bool dont_invalidate;
136   /* True if the string is known to be nul-terminated after NONZERO_CHARS
137      characters.  False is useful when detecting strings that are built
138      up via successive memcpys.  */
139   bool full_string_p;
140 };
141 
142 /* Pool for allocating strinfo_struct entries.  */
143 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
144 
145 /* Vector mapping positive string indexes to strinfo, for the
146    current basic block.  The first pointer in the vector is special,
147    it is either NULL, meaning the vector isn't shared, or it is
148    a basic block pointer to the owner basic_block if shared.
149    If some other bb wants to modify the vector, the vector needs
150    to be unshared first, and only the owner bb is supposed to free it.  */
151 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
152 
153 /* One OFFSET->IDX mapping.  */
154 struct stridxlist
155 {
156   struct stridxlist *next;
157   HOST_WIDE_INT offset;
158   int idx;
159 };
160 
161 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings.  */
162 struct decl_stridxlist_map
163 {
164   struct tree_map_base base;
165   struct stridxlist list;
166 };
167 
168 /* Hash table for mapping decls to a chained list of offset -> idx
169    mappings.  */
170 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
171 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
172 
173 /* Hash table mapping strlen (or strnlen with constant bound and return
174    smaller than bound) calls to stridx instances describing
175    the calls' arguments.  Non-null only when warn_stringop_truncation
176    is non-zero.  */
177 typedef std::pair<int, location_t> stridx_strlenloc;
178 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
179 
180 /* Obstack for struct stridxlist and struct decl_stridxlist_map.  */
181 static struct obstack stridx_obstack;
182 
183 /* Last memcpy statement if it could be adjusted if the trailing
184    '\0' written is immediately overwritten, or
185    *x = '\0' store that could be removed if it is immediately overwritten.  */
186 struct laststmt_struct
187 {
188   gimple *stmt;
189   tree len;
190   int stridx;
191 } laststmt;
192 
193 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
194 static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *);
195 
196 /* Sets MINMAX to either the constant value or the range VAL is in
197    and returns either the constant value or VAL on success or null
198    when the range couldn't be determined.  Uses RVALS when nonnull
199    to determine the range, otherwise get_range_info.  */
200 
201 tree
get_range(tree val,gimple * stmt,wide_int minmax[2],range_query * rvals)202 get_range (tree val, gimple *stmt, wide_int minmax[2],
203 	   range_query *rvals /* = NULL */)
204 {
205   if (TREE_CODE (val) == INTEGER_CST)
206     {
207       minmax[0] = minmax[1] = wi::to_wide (val);
208       return val;
209     }
210 
211   if (TREE_CODE (val) != SSA_NAME)
212     return NULL_TREE;
213 
214   if (rvals && stmt)
215     {
216       value_range vr;
217       if (!rvals->range_of_expr (vr, val, stmt))
218 	return NULL_TREE;
219       value_range_kind rng = vr.kind ();
220       if (rng != VR_RANGE)
221 	return NULL_TREE;
222 
223       minmax[0] = wi::to_wide (vr.min ());
224       minmax[1] = wi::to_wide (vr.max ());
225       return val;
226     }
227 
228   value_range_kind rng = get_range_info (val, minmax, minmax + 1);
229   if (rng == VR_RANGE)
230     /* This may be an inverted range whose MINMAX[1] < MINMAX[0].  */
231     return val;
232 
233   if (rng == VR_ANTI_RANGE)
234     {
235       /* An anti-range is the same as an ordinary range with inverted
236 	 bounds (where MINMAX[1] < MINMAX[0] is true) that may result
237 	 from the conversion of a signed anti-range to unsigned.  */
238       wide_int tmp = minmax[0];
239       minmax[0] = minmax[1] + 1;
240       minmax[1] = wi::sub (tmp, 1);
241       return val;
242     }
243 
244   /* Do not handle anti-ranges and instead make use of the on-demand
245      VRP if/when it becomes available (hopefully in GCC 11).  */
246   return NULL_TREE;
247 }
248 
249 /* Return:
250 
251    *  +1  if SI is known to start with more than OFF nonzero characters.
252 
253    *   0  if SI is known to start with exactly OFF nonzero characters.
254 
255    *  -1  if SI either does not start with OFF nonzero characters
256 	  or the relationship between the number of leading nonzero
257 	  characters in SI and OFF is unknown.  */
258 
259 static inline int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off)260 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
261 {
262   if (si->nonzero_chars
263       && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
264     return compare_tree_int (si->nonzero_chars, off);
265   else
266     return -1;
267 }
268 
269 /* Same as above but suitable also for strings with non-constant lengths.
270    Uses RVALS to determine length range.  */
271 
272 static int
compare_nonzero_chars(strinfo * si,unsigned HOST_WIDE_INT off,range_query * rvals)273 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
274 		       range_query *rvals)
275 {
276   if (!si->nonzero_chars)
277     return -1;
278 
279   if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
280     return compare_tree_int (si->nonzero_chars, off);
281 
282   if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
283     return -1;
284 
285   value_range vr;
286   if (!rvals->range_of_expr (vr, si->nonzero_chars, si->stmt))
287     return -1;
288   value_range_kind rng = vr.kind ();
289   if (rng != VR_RANGE)
290     return -1;
291 
292   /* If the offset is less than the minimum length or if the bounds
293      of the length range are equal return the result of the comparison
294      same as in the constant case.  Otherwise return a conservative
295      result.  */
296   int cmpmin = compare_tree_int (vr.min (), off);
297   if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
298     return cmpmin;
299 
300   return -1;
301 }
302 
303 /* Return true if SI is known to be a zero-length string.  */
304 
305 static inline bool
zero_length_string_p(strinfo * si)306 zero_length_string_p (strinfo *si)
307 {
308   return si->full_string_p && integer_zerop (si->nonzero_chars);
309 }
310 
311 /* Return strinfo vector entry IDX.  */
312 
313 static inline strinfo *
get_strinfo(int idx)314 get_strinfo (int idx)
315 {
316   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
317     return NULL;
318   return (*stridx_to_strinfo)[idx];
319 }
320 
321 /* Get the next strinfo in the chain after SI, or null if none.  */
322 
323 static inline strinfo *
get_next_strinfo(strinfo * si)324 get_next_strinfo (strinfo *si)
325 {
326   if (si->next == 0)
327     return NULL;
328   strinfo *nextsi = get_strinfo (si->next);
329   if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
330     return NULL;
331   return nextsi;
332 }
333 
334 /* Helper function for get_stridx.  Return the strinfo index of the address
335    of EXP, which is available in PTR if nonnull.  If OFFSET_OUT, it is
336    OK to return the index for some X <= &EXP and store &EXP - X in
337    *OFFSET_OUT.  When RVALS is nonnull uses it to determine range
338    information.  */
339 
340 static int
341 get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out,
342 		 range_query *rvals = NULL)
343 {
344   HOST_WIDE_INT off;
345   struct stridxlist *list, *last = NULL;
346   tree base;
347 
348   if (!decl_to_stridxlist_htab)
349     return 0;
350 
351   poly_int64 poff;
352   base = get_addr_base_and_unit_offset (exp, &poff);
353   if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
354     return 0;
355 
356   list = decl_to_stridxlist_htab->get (base);
357   if (list == NULL)
358     return 0;
359 
360   do
361     {
362       if (list->offset == off)
363 	{
364 	  if (offset_out)
365 	    *offset_out = 0;
366 	  return list->idx;
367 	}
368       if (list->offset > off)
369 	return 0;
370       last = list;
371       list = list->next;
372     }
373   while (list);
374 
375   if ((offset_out || ptr) && last && last->idx > 0)
376     {
377       unsigned HOST_WIDE_INT rel_off
378 	= (unsigned HOST_WIDE_INT) off - last->offset;
379       strinfo *si = get_strinfo (last->idx);
380       if (si && compare_nonzero_chars (si, rel_off, rvals) >= 0)
381 	{
382 	  if (offset_out)
383 	    {
384 	      *offset_out = rel_off;
385 	      return last->idx;
386 	    }
387 	  else
388 	    return get_stridx_plus_constant (si, rel_off, ptr);
389 	}
390     }
391   return 0;
392 }
393 
394 /* Returns string index for EXP.  When EXP is an SSA_NAME that refers
395    to a known strinfo with an offset and OFFRNG is non-null, sets
396    both elements of the OFFRNG array to the range of the offset and
397    returns the index of the known strinfo.  In this case the result
398    must not be used in for functions that modify the string.
399    When nonnull, uses RVALS to determine range information.  */
400 
401 static int
402 get_stridx (tree exp, wide_int offrng[2] = NULL, range_query *rvals = NULL)
403 {
404   if (offrng)
405     offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
406 
407   if (TREE_CODE (exp) == SSA_NAME)
408     {
409       if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
410 	return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
411 
412       tree e = exp;
413       int last_idx = 0;
414       HOST_WIDE_INT offset = 0;
415       /* Follow a chain of at most 5 assignments.  */
416       for (int i = 0; i < 5; i++)
417 	{
418 	  gimple *def_stmt = SSA_NAME_DEF_STMT (e);
419 	  if (!is_gimple_assign (def_stmt))
420 	    return last_idx;
421 
422 	  tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
423 	  tree ptr, off;
424 
425 	  if (rhs_code == ADDR_EXPR)
426 	    {
427 	      /* Handle indices/offsets into VLAs which are implemented
428 	         as pointers to arrays.  */
429 	      ptr = gimple_assign_rhs1 (def_stmt);
430 	      ptr = TREE_OPERAND (ptr, 0);
431 
432 	      /* Handle also VLAs of types larger than char.  */
433 	      if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
434 		{
435 		  if (TREE_CODE (ptr) == ARRAY_REF)
436 		    {
437 		      off = TREE_OPERAND (ptr, 1);
438 		      ptr = TREE_OPERAND (ptr, 0);
439 		      if (!integer_onep (eltsize))
440 			{
441 			  /* Scale the array index by the size of the element
442 			     type in the rare case that it's greater than
443 			     the typical 1 for char, making sure both operands
444 			     have the same type.  */
445 			  eltsize = fold_convert (ssizetype, eltsize);
446 			  off = fold_convert (ssizetype, off);
447 			  off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
448 			}
449 		    }
450 		  else
451 		    off = integer_zero_node;
452 		}
453 	      else
454 		return 0;
455 
456 	      if (TREE_CODE (ptr) != MEM_REF)
457 	        return 0;
458 
459 	      /* Add the MEM_REF byte offset.  */
460 	      tree mem_off = TREE_OPERAND (ptr, 1);
461 	      off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
462 	      ptr = TREE_OPERAND (ptr, 0);
463 	    }
464 	  else if (rhs_code == POINTER_PLUS_EXPR)
465 	    {
466 	      ptr = gimple_assign_rhs1 (def_stmt);
467 	      off = gimple_assign_rhs2 (def_stmt);
468 	    }
469 	  else
470 	    return 0;
471 
472 	  if (TREE_CODE (ptr) != SSA_NAME)
473 	    return 0;
474 
475 	  if (!tree_fits_shwi_p (off))
476 	    {
477 	      if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
478 		if (offrng)
479 		  {
480 		    /* Only when requested by setting OFFRNG to non-null,
481 		       return the index corresponding to the SSA_NAME.
482 		       Do this irrespective of the whether the offset
483 		       is known.  */
484 		    if (get_range (off, def_stmt, offrng, rvals))
485 		      {
486 			/* When the offset range is known, increment it
487 			   it by the constant offset computed in prior
488 			   iterations and store it in the OFFRNG array.  */
489  			offrng[0] += offset;
490 			offrng[1] += offset;
491 		      }
492 		    else
493 		      {
494 			/* When the offset range cannot be determined
495 			   store [0, SIZE_MAX] and let the caller decide
496 			   if the offset matters.  */
497 			offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
498 			offrng[0] = wi::zero (offrng[1].get_precision ());
499 		      }
500 		    return idx;
501 		  }
502 	      return 0;
503 	    }
504 
505 	  HOST_WIDE_INT this_off = tree_to_shwi (off);
506 	  if (offrng)
507 	    {
508 	      offrng[0] += wi::shwi (this_off, offrng->get_precision ());
509 	      offrng[1] += offrng[0];
510 	    }
511 
512 	  if (this_off < 0)
513 	    return last_idx;
514 
515 	  offset = (unsigned HOST_WIDE_INT) offset + this_off;
516 	  if (offset < 0)
517 	    return last_idx;
518 
519 	  if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
520 	    {
521 	      strinfo *si = get_strinfo (idx);
522 	      if (si)
523 		{
524 		  if (compare_nonzero_chars (si, offset) >= 0)
525 		    return get_stridx_plus_constant (si, offset, exp);
526 
527 		  if (offrng)
528 		    last_idx = idx;
529 		}
530 	    }
531 	  e = ptr;
532 	}
533 
534       return last_idx;
535     }
536 
537   if (TREE_CODE (exp) == ADDR_EXPR)
538     {
539       int idx = get_addr_stridx (TREE_OPERAND (exp, 0), exp, NULL);
540       if (idx != 0)
541 	return idx;
542     }
543 
544   const char *p = c_getstr (exp);
545   if (p)
546     return ~(int) strlen (p);
547 
548   return 0;
549 }
550 
551 /* Return true if strinfo vector is shared with the immediate dominator.  */
552 
553 static inline bool
strinfo_shared(void)554 strinfo_shared (void)
555 {
556   return vec_safe_length (stridx_to_strinfo)
557 	 && (*stridx_to_strinfo)[0] != NULL;
558 }
559 
560 /* Unshare strinfo vector that is shared with the immediate dominator.  */
561 
562 static void
unshare_strinfo_vec(void)563 unshare_strinfo_vec (void)
564 {
565   strinfo *si;
566   unsigned int i = 0;
567 
568   gcc_assert (strinfo_shared ());
569   stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
570   for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
571     if (si != NULL)
572       si->refcount++;
573   (*stridx_to_strinfo)[0] = NULL;
574 }
575 
576 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
577    Return a pointer to the location where the string index can
578    be stored (if 0) or is stored, or NULL if this can't be tracked.  */
579 
580 static int *
addr_stridxptr(tree exp)581 addr_stridxptr (tree exp)
582 {
583   HOST_WIDE_INT off;
584 
585   poly_int64 poff;
586   tree base = get_addr_base_and_unit_offset (exp, &poff);
587   if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
588     return NULL;
589 
590   if (!decl_to_stridxlist_htab)
591     {
592       decl_to_stridxlist_htab
593        	= new hash_map<tree_decl_hash, stridxlist> (64);
594       gcc_obstack_init (&stridx_obstack);
595     }
596 
597   bool existed;
598   stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
599   if (existed)
600     {
601       int i;
602       stridxlist *before = NULL;
603       for (i = 0; i < 32; i++)
604 	{
605 	  if (list->offset == off)
606 	    return &list->idx;
607 	  if (list->offset > off && before == NULL)
608 	    before = list;
609 	  if (list->next == NULL)
610 	    break;
611 	  list = list->next;
612 	}
613       if (i == 32)
614 	return NULL;
615       if (before)
616 	{
617 	  list = before;
618 	  before = XOBNEW (&stridx_obstack, struct stridxlist);
619 	  *before = *list;
620 	  list->next = before;
621 	  list->offset = off;
622 	  list->idx = 0;
623 	  return &list->idx;
624 	}
625       list->next = XOBNEW (&stridx_obstack, struct stridxlist);
626       list = list->next;
627     }
628 
629   list->next = NULL;
630   list->offset = off;
631   list->idx = 0;
632   return &list->idx;
633 }
634 
635 /* Create a new string index, or return 0 if reached limit.  */
636 
637 static int
new_stridx(tree exp)638 new_stridx (tree exp)
639 {
640   int idx;
641   if (max_stridx >= param_max_tracked_strlens)
642     return 0;
643   if (TREE_CODE (exp) == SSA_NAME)
644     {
645       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
646 	return 0;
647       idx = max_stridx++;
648       ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
649       return idx;
650     }
651   if (TREE_CODE (exp) == ADDR_EXPR)
652     {
653       int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
654       if (pidx != NULL)
655 	{
656 	  gcc_assert (*pidx == 0);
657 	  *pidx = max_stridx++;
658 	  return *pidx;
659 	}
660     }
661   return 0;
662 }
663 
664 /* Like new_stridx, but for ADDR_EXPR's operand instead.  */
665 
666 static int
new_addr_stridx(tree exp)667 new_addr_stridx (tree exp)
668 {
669   int *pidx;
670   if (max_stridx >= param_max_tracked_strlens)
671     return 0;
672   pidx = addr_stridxptr (exp);
673   if (pidx != NULL)
674     {
675       gcc_assert (*pidx == 0);
676       *pidx = max_stridx++;
677       return *pidx;
678     }
679   return 0;
680 }
681 
682 /* Create a new strinfo.  */
683 
684 static strinfo *
new_strinfo(tree ptr,int idx,tree nonzero_chars,bool full_string_p)685 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
686 {
687   strinfo *si = strinfo_pool.allocate ();
688   si->nonzero_chars = nonzero_chars;
689   STRIP_USELESS_TYPE_CONVERSION (ptr);
690   si->ptr = ptr;
691   si->stmt = NULL;
692   si->alloc = NULL;
693   si->endptr = NULL_TREE;
694   si->refcount = 1;
695   si->idx = idx;
696   si->first = 0;
697   si->prev = 0;
698   si->next = 0;
699   si->writable = false;
700   si->dont_invalidate = false;
701   si->full_string_p = full_string_p;
702   return si;
703 }
704 
705 /* Decrease strinfo refcount and free it if not referenced anymore.  */
706 
707 static inline void
free_strinfo(strinfo * si)708 free_strinfo (strinfo *si)
709 {
710   if (si && --si->refcount == 0)
711     strinfo_pool.remove (si);
712 }
713 
714 /* Set strinfo in the vector entry IDX to SI.  */
715 
716 static inline void
set_strinfo(int idx,strinfo * si)717 set_strinfo (int idx, strinfo *si)
718 {
719   if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
720     unshare_strinfo_vec ();
721   if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
722     vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
723   (*stridx_to_strinfo)[idx] = si;
724 }
725 
726 /* Return the first strinfo in the related strinfo chain
727    if all strinfos in between belong to the chain, otherwise NULL.  */
728 
729 static strinfo *
verify_related_strinfos(strinfo * origsi)730 verify_related_strinfos (strinfo *origsi)
731 {
732   strinfo *si = origsi, *psi;
733 
734   if (origsi->first == 0)
735     return NULL;
736   for (; si->prev; si = psi)
737     {
738       if (si->first != origsi->first)
739 	return NULL;
740       psi = get_strinfo (si->prev);
741       if (psi == NULL)
742 	return NULL;
743       if (psi->next != si->idx)
744 	return NULL;
745     }
746   if (si->idx != si->first)
747     return NULL;
748   return si;
749 }
750 
751 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
752    Use LOC for folding.  */
753 
754 static void
set_endptr_and_length(location_t loc,strinfo * si,tree endptr)755 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
756 {
757   si->endptr = endptr;
758   si->stmt = NULL;
759   tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
760   tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
761   si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
762 				       end_as_size, start_as_size);
763   si->full_string_p = true;
764 }
765 
766 /* Return the string length, or NULL if it can't be computed.
767    The length may but need not be constant.  Instead, it might be
768    the result of a strlen() call.  */
769 
770 static tree
get_string_length(strinfo * si)771 get_string_length (strinfo *si)
772 {
773   /* If the length has already been computed return it if it's exact
774      (i.e., the string is nul-terminated at NONZERO_CHARS), or return
775      null if it isn't.  */
776   if (si->nonzero_chars)
777     return si->full_string_p ? si->nonzero_chars : NULL;
778 
779   /* If the string is the result of one of the built-in calls below
780      attempt to compute the length from the call statement.  */
781   if (si->stmt)
782     {
783       gimple *stmt = si->stmt, *lenstmt;
784       tree callee, lhs, fn, tem;
785       location_t loc;
786       gimple_stmt_iterator gsi;
787 
788       gcc_assert (is_gimple_call (stmt));
789       callee = gimple_call_fndecl (stmt);
790       gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
791       lhs = gimple_call_lhs (stmt);
792       /* unshare_strinfo is intentionally not called here.  The (delayed)
793 	 transformation of strcpy or strcat into stpcpy is done at the place
794 	 of the former strcpy/strcat call and so can affect all the strinfos
795 	 with the same stmt.  If they were unshared before and transformation
796 	 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
797 	 just compute the right length.  */
798       switch (DECL_FUNCTION_CODE (callee))
799 	{
800 	case BUILT_IN_STRCAT:
801 	case BUILT_IN_STRCAT_CHK:
802 	  gsi = gsi_for_stmt (stmt);
803 	  fn = builtin_decl_implicit (BUILT_IN_STRLEN);
804 	  gcc_assert (lhs == NULL_TREE);
805 	  tem = unshare_expr (gimple_call_arg (stmt, 0));
806 	  lenstmt = gimple_build_call (fn, 1, tem);
807 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
808 	  gimple_call_set_lhs (lenstmt, lhs);
809 	  gimple_set_vuse (lenstmt, gimple_vuse (stmt));
810 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
811 	  tem = gimple_call_arg (stmt, 0);
812           if (!ptrofftype_p (TREE_TYPE (lhs)))
813             {
814               lhs = convert_to_ptrofftype (lhs);
815               lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
816                                               true, GSI_SAME_STMT);
817             }
818 	  lenstmt = gimple_build_assign
819 			(make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
820 			 POINTER_PLUS_EXPR,tem, lhs);
821 	  gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
822 	  gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
823 	  lhs = NULL_TREE;
824 	  /* FALLTHRU */
825 	case BUILT_IN_STRCPY:
826 	case BUILT_IN_STRCPY_CHK:
827 	  gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
828 	  if (gimple_call_num_args (stmt) == 2)
829 	    fn = builtin_decl_implicit (BUILT_IN_STPCPY);
830 	  else
831 	    fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
832 	  gcc_assert (lhs == NULL_TREE);
833 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
834 	    {
835 	      fprintf (dump_file, "Optimizing: ");
836 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
837 	    }
838 	  gimple_call_set_fndecl (stmt, fn);
839 	  lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
840 	  gimple_call_set_lhs (stmt, lhs);
841 	  update_stmt (stmt);
842 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
843 	    {
844 	      fprintf (dump_file, "into: ");
845 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
846 	    }
847 	  /* FALLTHRU */
848 	case BUILT_IN_STPCPY:
849 	case BUILT_IN_STPCPY_CHK:
850 	  gcc_assert (lhs != NULL_TREE);
851 	  loc = gimple_location (stmt);
852 	  set_endptr_and_length (loc, si, lhs);
853 	  for (strinfo *chainsi = verify_related_strinfos (si);
854 	       chainsi != NULL;
855 	       chainsi = get_next_strinfo (chainsi))
856 	    if (chainsi->nonzero_chars == NULL)
857 	      set_endptr_and_length (loc, chainsi, lhs);
858 	  break;
859 	case BUILT_IN_ALLOCA:
860 	case BUILT_IN_ALLOCA_WITH_ALIGN:
861 	case BUILT_IN_MALLOC:
862 	  break;
863 	/* BUILT_IN_CALLOC always has si->nonzero_chars set.  */
864 	default:
865 	  gcc_unreachable ();
866 	  break;
867 	}
868     }
869 
870   return si->nonzero_chars;
871 }
872 
873 /* Dump strlen data to FP for statement STMT.  When non-null, RVALS
874    points to the valuation engine used to calculate ranges, and is
875    used to dump strlen range for non-constant results.  */
876 
877 DEBUG_FUNCTION void
dump_strlen_info(FILE * fp,gimple * stmt,range_query * rvals)878 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
879 {
880   if (stmt)
881     {
882       fprintf (fp, "\nDumping strlen pass data after ");
883       print_gimple_expr (fp, stmt, TDF_LINENO);
884       fputc ('\n', fp);
885     }
886   else
887     fprintf (fp, "\nDumping strlen pass data\n");
888 
889   fprintf (fp, "max_stridx = %i\n", max_stridx);
890   fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
891 	   ssa_ver_to_stridx.length ());
892   fprintf (fp, "stridx_to_strinfo");
893   if (stridx_to_strinfo)
894     {
895       fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
896       for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
897 	{
898 	  if (strinfo *si = (*stridx_to_strinfo)[i])
899 	    {
900 	      if (!si->idx)
901 		continue;
902 	      fprintf (fp, "  idx = %i", si->idx);
903 	      if (si->ptr)
904 		{
905 		  fprintf (fp, ", ptr = ");
906 		  print_generic_expr (fp, si->ptr);
907 		}
908 
909 	      if (si->nonzero_chars)
910 		{
911 		  fprintf (fp, ", nonzero_chars = ");
912 		  print_generic_expr (fp, si->nonzero_chars);
913 		  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
914 		    {
915 		      value_range_kind rng = VR_UNDEFINED;
916 		      wide_int min, max;
917 		      if (rvals)
918 			{
919 			  value_range vr;
920 			  rvals->range_of_expr (vr, si->nonzero_chars,
921 						si->stmt);
922 			  rng = vr.kind ();
923 			  if (range_int_cst_p (&vr))
924 			    {
925 			      min = wi::to_wide (vr.min ());
926 			      max = wi::to_wide (vr.max ());
927 			    }
928 			  else
929 			    rng = VR_UNDEFINED;
930 			}
931 		      else
932 			rng = get_range_info (si->nonzero_chars, &min, &max);
933 
934 		      if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
935 			{
936 			  fprintf (fp, " %s[%llu, %llu]",
937 				   rng == VR_RANGE ? "" : "~",
938 				   (long long) min.to_uhwi (),
939 				   (long long) max.to_uhwi ());
940 			}
941 		    }
942 		}
943 
944 	      fprintf (fp, ", refcount = %i", si->refcount);
945 	      if (si->stmt)
946 		{
947 		  fprintf (fp, ", stmt = ");
948 		  print_gimple_expr (fp, si->stmt, 0);
949 		}
950 	      if (si->alloc)
951 		{
952 		  fprintf (fp, ", alloc = ");
953 		  print_gimple_expr (fp, si->alloc, 0);
954 		}
955 	      if (si->writable)
956 		fprintf (fp, ", writable");
957 	      if (si->dont_invalidate)
958 		fprintf (fp, ", dont_invalidate");
959 	      if (si->full_string_p)
960 		fprintf (fp, ", full_string_p");
961 	      if (strinfo *next = get_next_strinfo (si))
962 		{
963 		  fprintf (fp, ", {");
964 		  do
965 		    fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
966 		  while ((next = get_next_strinfo (next)));
967 		  fprintf (fp, "}");
968 		}
969 	      fputs ("\n", fp);
970 	    }
971 	}
972     }
973   else
974     fprintf (fp, " = null\n");
975 
976   fprintf (fp, "decl_to_stridxlist_htab");
977   if (decl_to_stridxlist_htab)
978     {
979       fputs ("\n", fp);
980       typedef decl_to_stridxlist_htab_t::iterator iter_t;
981       for (iter_t it = decl_to_stridxlist_htab->begin ();
982 	   it != decl_to_stridxlist_htab->end (); ++it)
983 	{
984 	  tree decl = (*it).first;
985 	  stridxlist *list = &(*it).second;
986 	  fprintf (fp, "  decl = ");
987 	  print_generic_expr (fp, decl);
988 	  if (list)
989 	    {
990 	      fprintf (fp, ", offsets = {");
991 	      for (; list; list = list->next)
992 		fprintf (fp, "%lli%s", (long long) list->offset,
993 			 list->next ? ", " : "");
994 	      fputs ("}", fp);
995 	    }
996 	  fputs ("\n", fp);
997 	}
998     }
999   else
1000     fprintf (fp, " = null\n");
1001 
1002   if (laststmt.stmt)
1003     {
1004       fprintf (fp, "laststmt = ");
1005       print_gimple_expr (fp, laststmt.stmt, 0);
1006       fprintf (fp, ", len = ");
1007       print_generic_expr (fp, laststmt.len);
1008       fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1009     }
1010 }
1011 
1012 /* Attempt to determine the length of the string SRC.  On success, store
1013    the length in *PDATA and return true.  Otherwise, return false.
1014    VISITED is a bitmap of visited PHI nodes.  RVALS points to the valuation
1015    engine used to calculate ranges.  PSSA_DEF_MAX to an SSA_NAME
1016    assignment limit used to prevent runaway recursion.  */
1017 
1018 static bool
get_range_strlen_dynamic(tree src,gimple * stmt,c_strlen_data * pdata,bitmap * visited,range_query * rvals,unsigned * pssa_def_max)1019 get_range_strlen_dynamic (tree src, gimple *stmt,
1020 			  c_strlen_data *pdata, bitmap *visited,
1021 			  range_query *rvals, unsigned *pssa_def_max)
1022 {
1023   int idx = get_stridx (src);
1024   if (!idx)
1025     {
1026       if (TREE_CODE (src) == SSA_NAME)
1027 	{
1028 	  gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1029 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
1030 	    {
1031 	      if (!*visited)
1032 		*visited = BITMAP_ALLOC (NULL);
1033 
1034 	      if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1035 		return true;
1036 
1037 	      if (*pssa_def_max == 0)
1038 		return false;
1039 
1040 	      --*pssa_def_max;
1041 
1042 	      /* Iterate over the PHI arguments and determine the minimum
1043 		 and maximum length/size of each and incorporate them into
1044 		 the overall result.  */
1045 	      gphi *phi = as_a <gphi *> (def_stmt);
1046 	      for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1047 		{
1048 		  tree arg = gimple_phi_arg_def (phi, i);
1049 		  if (arg == gimple_phi_result (def_stmt))
1050 		    continue;
1051 
1052 		  c_strlen_data argdata = { };
1053 		  if (get_range_strlen_dynamic (arg, phi, &argdata, visited,
1054 						rvals, pssa_def_max))
1055 		    {
1056 		      /* Set the DECL of an unterminated array this argument
1057 			 refers to if one hasn't been found yet.  */
1058 		      if (!pdata->decl && argdata.decl)
1059 			pdata->decl = argdata.decl;
1060 
1061 		      if (!argdata.minlen
1062 			  || (integer_zerop (argdata.minlen)
1063 			      && (!argdata.maxbound
1064 				  || integer_all_onesp (argdata.maxbound))
1065 			      && integer_all_onesp (argdata.maxlen)))
1066 			{
1067 			  /* Set the upper bound of the length to unbounded.  */
1068 			  pdata->maxlen = build_all_ones_cst (size_type_node);
1069 			  continue;
1070 			}
1071 
1072 		      /* Adjust the minimum and maximum length determined
1073 			 so far and the upper bound on the array size.  */
1074 		      if (!pdata->minlen
1075 			  || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1076 			pdata->minlen = argdata.minlen;
1077 		      if (!pdata->maxlen
1078 			  || (argdata.maxlen
1079 			      && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1080 			pdata->maxlen = argdata.maxlen;
1081 		      if (!pdata->maxbound
1082 			  || TREE_CODE (pdata->maxbound) != INTEGER_CST
1083 			  || (argdata.maxbound
1084 			      && tree_int_cst_lt (pdata->maxbound,
1085 						  argdata.maxbound)
1086 			      && !integer_all_onesp (argdata.maxbound)))
1087 			pdata->maxbound = argdata.maxbound;
1088 		    }
1089 		  else
1090 		    pdata->maxlen = build_all_ones_cst (size_type_node);
1091 		}
1092 
1093 	      return true;
1094 	    }
1095 	}
1096 
1097       /* Return success regardless of the result and handle *PDATA
1098 	 in the caller.  */
1099       get_range_strlen (src, pdata, 1);
1100       return true;
1101     }
1102 
1103   if (idx < 0)
1104     {
1105       /* SRC is a string of constant length.  */
1106       pdata->minlen = build_int_cst (size_type_node, ~idx);
1107       pdata->maxlen = pdata->minlen;
1108       pdata->maxbound = pdata->maxlen;
1109       return true;
1110     }
1111 
1112   if (strinfo *si = get_strinfo (idx))
1113     {
1114       pdata->minlen = get_string_length (si);
1115       if (!pdata->minlen && si->nonzero_chars)
1116 	{
1117 	  if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1118 	    pdata->minlen = si->nonzero_chars;
1119 	  else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1120 	    {
1121 	      value_range vr;
1122 	      rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1123 	      if (range_int_cst_p (&vr))
1124 		{
1125 		  pdata->minlen = vr.min ();
1126 		  pdata->maxlen = vr.max ();
1127 		}
1128 	      else
1129 		pdata->minlen = build_zero_cst (size_type_node);
1130 	    }
1131 	  else
1132 	    pdata->minlen = build_zero_cst (size_type_node);
1133 
1134 	  tree base = si->ptr;
1135 	  if (TREE_CODE (base) == ADDR_EXPR)
1136 	    base = TREE_OPERAND (base, 0);
1137 
1138 	  HOST_WIDE_INT off;
1139 	  poly_int64 poff;
1140 	  base = get_addr_base_and_unit_offset (base, &poff);
1141 	  if (base
1142 	      && DECL_P (base)
1143 	      && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1144 	      && TYPE_SIZE_UNIT (TREE_TYPE (base))
1145 	      && poff.is_constant (&off))
1146 	    {
1147 	      tree basetype = TREE_TYPE (base);
1148 	      tree size = TYPE_SIZE_UNIT (basetype);
1149 	      if (TREE_CODE (size) == INTEGER_CST)
1150 		{
1151 		  ++off;   /* Increment for the terminating nul.  */
1152 		  tree toffset = build_int_cst (size_type_node, off);
1153 		  pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1154 					       toffset);
1155 		  pdata->maxbound = pdata->maxlen;
1156 		}
1157 	      else
1158 		pdata->maxlen = build_all_ones_cst (size_type_node);
1159 	    }
1160 	  else
1161 	    pdata->maxlen = build_all_ones_cst (size_type_node);
1162 	}
1163       else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1164 	{
1165 	  value_range vr;
1166 	  rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1167 	  if (range_int_cst_p (&vr))
1168 	    {
1169 	      pdata->minlen = vr.min ();
1170 	      pdata->maxlen = vr.max ();
1171 	      pdata->maxbound = pdata->maxlen;
1172 	    }
1173 	  else
1174 	    {
1175 	      pdata->minlen = build_zero_cst (size_type_node);
1176 	      pdata->maxlen = build_all_ones_cst (size_type_node);
1177 	    }
1178 	}
1179       else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1180 	{
1181 	  pdata->maxlen = pdata->minlen;
1182 	  pdata->maxbound = pdata->minlen;
1183 	}
1184       else
1185 	{
1186 	  /* For PDATA->MINLEN that's a non-constant expression such
1187 	     as PLUS_EXPR whose value range is unknown, set the bounds
1188 	     to zero and SIZE_MAX.  */
1189 	  pdata->minlen = build_zero_cst (size_type_node);
1190 	  pdata->maxlen = build_all_ones_cst (size_type_node);
1191 	}
1192 
1193       return true;
1194     }
1195 
1196   return false;
1197 }
1198 
1199 /* Analogous to get_range_strlen but for dynamically created strings,
1200    i.e., those created by calls to strcpy as opposed to just string
1201    constants.
1202    Try to obtain the range of the lengths of the string(s) referenced
1203    by SRC, or the size of the largest array SRC refers to if the range
1204    of lengths cannot be determined, and store all in *PDATA.  RVALS
1205    points to the valuation engine used to calculate ranges.  */
1206 
1207 void
get_range_strlen_dynamic(tree src,gimple * stmt,c_strlen_data * pdata,range_query * rvals)1208 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1209 			  range_query *rvals)
1210 {
1211   bitmap visited = NULL;
1212   tree maxbound = pdata->maxbound;
1213 
1214   unsigned limit = param_ssa_name_def_chain_limit;
1215   if (!get_range_strlen_dynamic (src, stmt, pdata, &visited, rvals, &limit))
1216     {
1217       /* On failure extend the length range to an impossible maximum
1218 	 (a valid MAXLEN must be less than PTRDIFF_MAX - 1).  Other
1219 	 members can stay unchanged regardless.  */
1220       pdata->minlen = ssize_int (0);
1221       pdata->maxlen = build_all_ones_cst (size_type_node);
1222     }
1223   else if (!pdata->minlen)
1224     pdata->minlen = ssize_int (0);
1225 
1226   /* If it's unchanged from it initial non-null value, set the conservative
1227      MAXBOUND to SIZE_MAX.  Otherwise leave it null (if it is null).  */
1228   if (maxbound && pdata->maxbound == maxbound)
1229     pdata->maxbound = build_all_ones_cst (size_type_node);
1230 
1231   if (visited)
1232     BITMAP_FREE (visited);
1233 }
1234 
1235 /* Invalidate string length information for strings whose length might
1236    change due to stores in STMT, except those marked DONT_INVALIDATE.
1237    For string-modifying statements, ZERO_WRITE is set when the statement
1238    wrote only zeros.
1239    Returns true if any STRIDX_TO_STRINFO entries were considered
1240    for invalidation.  */
1241 
1242 static bool
1243 maybe_invalidate (gimple *stmt, bool zero_write = false)
1244 {
1245   if (dump_file && (dump_flags & TDF_DETAILS))
1246     {
1247       fprintf (dump_file, "%s called for ", __func__);
1248       print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1249     }
1250 
1251   strinfo *si;
1252   bool nonempty = false;
1253 
1254   for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1255     {
1256       if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1257 	continue;
1258 
1259       nonempty = true;
1260 
1261       /* Unconditionally reset DONT_INVALIDATE.  */
1262       bool dont_invalidate = si->dont_invalidate;
1263       si->dont_invalidate = false;
1264 
1265       if (dont_invalidate)
1266 	continue;
1267 
1268       ao_ref r;
1269       tree size = NULL_TREE;
1270       if (si->nonzero_chars)
1271 	{
1272 	  /* Include the terminating nul in the size of the string
1273 	     to consider when determining possible clobber.  */
1274 	  tree type = TREE_TYPE (si->nonzero_chars);
1275 	  size = fold_build2 (PLUS_EXPR, type, si->nonzero_chars,
1276 			      build_int_cst (type, 1));
1277 	}
1278       ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1279       if (stmt_may_clobber_ref_p_1 (stmt, &r))
1280 	{
1281 	  if (dump_file && (dump_flags & TDF_DETAILS))
1282 	    {
1283 	      fputs ("  statement may clobber object ", dump_file);
1284 	      print_generic_expr (dump_file, si->ptr);
1285 	      if (size && tree_fits_uhwi_p (size))
1286 		fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1287 			 " bytes in size", tree_to_uhwi (size));
1288 	      fputc ('\n', dump_file);
1289 	    }
1290 
1291 	  set_strinfo (i, NULL);
1292 	  free_strinfo (si);
1293 	  continue;
1294 	}
1295 
1296       if (size
1297 	  && !zero_write
1298 	  && si->stmt
1299 	  && is_gimple_call (si->stmt)
1300 	  && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1301 	      == BUILT_IN_CALLOC))
1302 	{
1303 	  /* If the clobber test above considered the length of
1304 	     the string (including the nul), then for (potentially)
1305 	     non-zero writes that might modify storage allocated by
1306 	     calloc consider the whole object and if it might be
1307 	     clobbered by the statement reset the statement.  */
1308 	  ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1309 	  if (stmt_may_clobber_ref_p_1 (stmt, &r))
1310 	    si->stmt = NULL;
1311 	}
1312     }
1313 
1314   if (dump_file && (dump_flags & TDF_DETAILS))
1315     fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1316 
1317   return nonempty;
1318 }
1319 
1320 /* Unshare strinfo record SI, if it has refcount > 1 or
1321    if stridx_to_strinfo vector is shared with some other
1322    bbs.  */
1323 
1324 static strinfo *
unshare_strinfo(strinfo * si)1325 unshare_strinfo (strinfo *si)
1326 {
1327   strinfo *nsi;
1328 
1329   if (si->refcount == 1 && !strinfo_shared ())
1330     return si;
1331 
1332   nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1333   nsi->stmt = si->stmt;
1334   nsi->alloc = si->alloc;
1335   nsi->endptr = si->endptr;
1336   nsi->first = si->first;
1337   nsi->prev = si->prev;
1338   nsi->next = si->next;
1339   nsi->writable = si->writable;
1340   set_strinfo (si->idx, nsi);
1341   free_strinfo (si);
1342   return nsi;
1343 }
1344 
1345 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1346    strinfo if there is any.  Return it's idx, or 0 if no strinfo has
1347    been created.  */
1348 
1349 static int
get_stridx_plus_constant(strinfo * basesi,unsigned HOST_WIDE_INT off,tree ptr)1350 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1351 			  tree ptr)
1352 {
1353   if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1354     return 0;
1355 
1356   if (compare_nonzero_chars (basesi, off) < 0
1357       || !tree_fits_uhwi_p (basesi->nonzero_chars))
1358     return 0;
1359 
1360   unsigned HOST_WIDE_INT nonzero_chars
1361     = tree_to_uhwi (basesi->nonzero_chars) - off;
1362   strinfo *si = basesi, *chainsi;
1363   if (si->first || si->prev || si->next)
1364     si = verify_related_strinfos (basesi);
1365   if (si == NULL
1366       || si->nonzero_chars == NULL_TREE
1367       || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1368     return 0;
1369 
1370   if (TREE_CODE (ptr) == SSA_NAME
1371       && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1372     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1373 
1374   gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1375   for (chainsi = si; chainsi->next; chainsi = si)
1376     {
1377       si = get_next_strinfo (chainsi);
1378       if (si == NULL
1379 	  || si->nonzero_chars == NULL_TREE
1380 	  || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1381 	break;
1382       int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1383       if (r != 1)
1384 	{
1385 	  if (r == 0)
1386 	    {
1387 	      if (TREE_CODE (ptr) == SSA_NAME)
1388 		ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1389 	      else
1390 		{
1391 		  int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1392 		  if (pidx != NULL && *pidx == 0)
1393 		    *pidx = si->idx;
1394 		}
1395 	      return si->idx;
1396 	    }
1397 	  break;
1398 	}
1399     }
1400 
1401   int idx = new_stridx (ptr);
1402   if (idx == 0)
1403     return 0;
1404   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1405 		    basesi->full_string_p);
1406   set_strinfo (idx, si);
1407   if (strinfo *nextsi = get_strinfo (chainsi->next))
1408     {
1409       nextsi = unshare_strinfo (nextsi);
1410       si->next = nextsi->idx;
1411       nextsi->prev = idx;
1412     }
1413   chainsi = unshare_strinfo (chainsi);
1414   if (chainsi->first == 0)
1415     chainsi->first = chainsi->idx;
1416   chainsi->next = idx;
1417   if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1418     chainsi->endptr = ptr;
1419   si->endptr = chainsi->endptr;
1420   si->prev = chainsi->idx;
1421   si->first = chainsi->first;
1422   si->writable = chainsi->writable;
1423   return si->idx;
1424 }
1425 
1426 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1427    to a zero-length string and if possible chain it to a related strinfo
1428    chain whose part is or might be CHAINSI.  */
1429 
1430 static strinfo *
zero_length_string(tree ptr,strinfo * chainsi)1431 zero_length_string (tree ptr, strinfo *chainsi)
1432 {
1433   strinfo *si;
1434   int idx;
1435   if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1436     ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1437   gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1438 		       && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1439 
1440   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1441     return NULL;
1442   if (chainsi != NULL)
1443     {
1444       si = verify_related_strinfos (chainsi);
1445       if (si)
1446 	{
1447 	  do
1448 	    {
1449 	      /* We shouldn't mix delayed and non-delayed lengths.  */
1450 	      gcc_assert (si->full_string_p);
1451 	      if (si->endptr == NULL_TREE)
1452 		{
1453 		  si = unshare_strinfo (si);
1454 		  si->endptr = ptr;
1455 		}
1456 	      chainsi = si;
1457 	      si = get_next_strinfo (si);
1458 	    }
1459 	  while (si != NULL);
1460 	  if (zero_length_string_p (chainsi))
1461 	    {
1462 	      if (chainsi->next)
1463 		{
1464 		  chainsi = unshare_strinfo (chainsi);
1465 		  chainsi->next = 0;
1466 		}
1467 	      ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1468 	      return chainsi;
1469 	    }
1470 	}
1471       else
1472 	{
1473 	  /* We shouldn't mix delayed and non-delayed lengths.  */
1474 	  gcc_assert (chainsi->full_string_p);
1475 	  if (chainsi->first || chainsi->prev || chainsi->next)
1476 	    {
1477 	      chainsi = unshare_strinfo (chainsi);
1478 	      chainsi->first = 0;
1479 	      chainsi->prev = 0;
1480 	      chainsi->next = 0;
1481 	    }
1482 	}
1483     }
1484   idx = new_stridx (ptr);
1485   if (idx == 0)
1486     return NULL;
1487   si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1488   set_strinfo (idx, si);
1489   si->endptr = ptr;
1490   if (chainsi != NULL)
1491     {
1492       chainsi = unshare_strinfo (chainsi);
1493       if (chainsi->first == 0)
1494 	chainsi->first = chainsi->idx;
1495       chainsi->next = idx;
1496       if (chainsi->endptr == NULL_TREE)
1497 	chainsi->endptr = ptr;
1498       si->prev = chainsi->idx;
1499       si->first = chainsi->first;
1500       si->writable = chainsi->writable;
1501     }
1502   return si;
1503 }
1504 
1505 /* For strinfo ORIGSI whose length has been just updated, adjust other
1506    related strinfos so that they match the new ORIGSI.  This involves:
1507 
1508    - adding ADJ to the nonzero_chars fields
1509    - copying full_string_p from the new ORIGSI.  */
1510 
1511 static void
adjust_related_strinfos(location_t loc,strinfo * origsi,tree adj)1512 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1513 {
1514   strinfo *si = verify_related_strinfos (origsi);
1515 
1516   if (si == NULL)
1517     return;
1518 
1519   while (1)
1520     {
1521       strinfo *nsi;
1522 
1523       if (si != origsi)
1524 	{
1525 	  tree tem;
1526 
1527 	  si = unshare_strinfo (si);
1528 	  /* We shouldn't see delayed lengths here; the caller must
1529 	     have calculated the old length in order to calculate
1530 	     the adjustment.  */
1531 	  gcc_assert (si->nonzero_chars);
1532 	  tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1533 	  si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1534 					       TREE_TYPE (si->nonzero_chars),
1535 					       si->nonzero_chars, tem);
1536 	  si->full_string_p = origsi->full_string_p;
1537 
1538 	  si->endptr = NULL_TREE;
1539 	  si->dont_invalidate = true;
1540 	}
1541       nsi = get_next_strinfo (si);
1542       if (nsi == NULL)
1543 	return;
1544       si = nsi;
1545     }
1546 }
1547 
1548 /* Find if there are other SSA_NAME pointers equal to PTR
1549    for which we don't track their string lengths yet.  If so, use
1550    IDX for them.  */
1551 
1552 static void
find_equal_ptrs(tree ptr,int idx)1553 find_equal_ptrs (tree ptr, int idx)
1554 {
1555   if (TREE_CODE (ptr) != SSA_NAME)
1556     return;
1557   while (1)
1558     {
1559       gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1560       if (!is_gimple_assign (stmt))
1561 	return;
1562       ptr = gimple_assign_rhs1 (stmt);
1563       switch (gimple_assign_rhs_code (stmt))
1564 	{
1565 	case SSA_NAME:
1566 	  break;
1567 	CASE_CONVERT:
1568 	  if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1569 	    return;
1570 	  if (TREE_CODE (ptr) == SSA_NAME)
1571 	    break;
1572 	  if (TREE_CODE (ptr) != ADDR_EXPR)
1573 	    return;
1574 	  /* FALLTHRU */
1575 	case ADDR_EXPR:
1576 	  {
1577 	    int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1578 	    if (pidx != NULL && *pidx == 0)
1579 	      *pidx = idx;
1580 	    return;
1581 	  }
1582 	default:
1583 	  return;
1584 	}
1585 
1586       /* We might find an endptr created in this pass.  Grow the
1587 	 vector in that case.  */
1588       if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1589 	ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1590 
1591       if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1592 	return;
1593       ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1594     }
1595 }
1596 
1597 /* Return true if STMT is a call to a builtin function with the right
1598    arguments and attributes that should be considered for optimization
1599    by this pass.  */
1600 
1601 static bool
valid_builtin_call(gimple * stmt)1602 valid_builtin_call (gimple *stmt)
1603 {
1604   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1605     return false;
1606 
1607   tree callee = gimple_call_fndecl (stmt);
1608   tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1609   if (decl
1610       && decl != callee
1611       && !gimple_builtin_call_types_compatible_p (stmt, decl))
1612     return false;
1613 
1614   switch (DECL_FUNCTION_CODE (callee))
1615     {
1616     case BUILT_IN_MEMCMP:
1617     case BUILT_IN_MEMCMP_EQ:
1618     case BUILT_IN_STRCMP:
1619     case BUILT_IN_STRNCMP:
1620     case BUILT_IN_STRCHR:
1621     case BUILT_IN_STRLEN:
1622     case BUILT_IN_STRNLEN:
1623       /* The above functions should be pure.  Punt if they aren't.  */
1624       if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1625 	return false;
1626       break;
1627 
1628     case BUILT_IN_ALLOCA:
1629     case BUILT_IN_ALLOCA_WITH_ALIGN:
1630     case BUILT_IN_CALLOC:
1631     case BUILT_IN_MALLOC:
1632     case BUILT_IN_MEMCPY:
1633     case BUILT_IN_MEMCPY_CHK:
1634     case BUILT_IN_MEMPCPY:
1635     case BUILT_IN_MEMPCPY_CHK:
1636     case BUILT_IN_MEMSET:
1637     case BUILT_IN_STPCPY:
1638     case BUILT_IN_STPCPY_CHK:
1639     case BUILT_IN_STPNCPY:
1640     case BUILT_IN_STPNCPY_CHK:
1641     case BUILT_IN_STRCAT:
1642     case BUILT_IN_STRCAT_CHK:
1643     case BUILT_IN_STRCPY:
1644     case BUILT_IN_STRCPY_CHK:
1645     case BUILT_IN_STRNCAT:
1646     case BUILT_IN_STRNCAT_CHK:
1647     case BUILT_IN_STRNCPY:
1648     case BUILT_IN_STRNCPY_CHK:
1649       /* The above functions should be neither const nor pure.  Punt if they
1650 	 aren't.  */
1651       if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1652 	return false;
1653       break;
1654 
1655     default:
1656       break;
1657     }
1658 
1659   return true;
1660 }
1661 
1662 /* If the last .MEM setter statement before STMT is
1663    memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1664    and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1665    just memcpy (x, y, strlen (y)).  SI must be the zero length
1666    strinfo.  */
1667 
1668 static void
adjust_last_stmt(strinfo * si,gimple * stmt,bool is_strcat,pointer_query & ptr_qry)1669 adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat,
1670 		  pointer_query &ptr_qry)
1671 {
1672   tree vuse, callee, len;
1673   struct laststmt_struct last = laststmt;
1674   strinfo *lastsi, *firstsi;
1675   unsigned len_arg_no = 2;
1676 
1677   laststmt.stmt = NULL;
1678   laststmt.len = NULL_TREE;
1679   laststmt.stridx = 0;
1680 
1681   if (last.stmt == NULL)
1682     return;
1683 
1684   vuse = gimple_vuse (stmt);
1685   if (vuse == NULL_TREE
1686       || SSA_NAME_DEF_STMT (vuse) != last.stmt
1687       || !has_single_use (vuse))
1688     return;
1689 
1690   gcc_assert (last.stridx > 0);
1691   lastsi = get_strinfo (last.stridx);
1692   if (lastsi == NULL)
1693     return;
1694 
1695   if (lastsi != si)
1696     {
1697       if (lastsi->first == 0 || lastsi->first != si->first)
1698 	return;
1699 
1700       firstsi = verify_related_strinfos (si);
1701       if (firstsi == NULL)
1702 	return;
1703       while (firstsi != lastsi)
1704 	{
1705 	  firstsi = get_next_strinfo (firstsi);
1706 	  if (firstsi == NULL)
1707 	    return;
1708 	}
1709     }
1710 
1711   if (!is_strcat && !zero_length_string_p (si))
1712     return;
1713 
1714   if (is_gimple_assign (last.stmt))
1715     {
1716       gimple_stmt_iterator gsi;
1717 
1718       if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1719 	return;
1720       if (stmt_could_throw_p (cfun, last.stmt))
1721 	return;
1722       gsi = gsi_for_stmt (last.stmt);
1723       unlink_stmt_vdef (last.stmt);
1724       release_defs (last.stmt);
1725       gsi_remove (&gsi, true);
1726       return;
1727     }
1728 
1729   if (!valid_builtin_call (last.stmt))
1730     return;
1731 
1732   callee = gimple_call_fndecl (last.stmt);
1733   switch (DECL_FUNCTION_CODE (callee))
1734     {
1735     case BUILT_IN_MEMCPY:
1736     case BUILT_IN_MEMCPY_CHK:
1737       break;
1738     default:
1739       return;
1740     }
1741 
1742   len = gimple_call_arg (last.stmt, len_arg_no);
1743   if (tree_fits_uhwi_p (len))
1744     {
1745       if (!tree_fits_uhwi_p (last.len)
1746 	  || integer_zerop (len)
1747 	  || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1748 	return;
1749       /* Don't adjust the length if it is divisible by 4, it is more efficient
1750 	 to store the extra '\0' in that case.  */
1751       if ((tree_to_uhwi (len) & 3) == 0)
1752 	return;
1753 
1754       /* Don't fold away an out of bounds access, as this defeats proper
1755 	 warnings.  */
1756       tree dst = gimple_call_arg (last.stmt, 0);
1757 
1758       access_ref aref;
1759       tree size = compute_objsize (dst, 1, &aref, &ptr_qry);
1760       if (size && tree_int_cst_lt (size, len))
1761 	return;
1762     }
1763   else if (TREE_CODE (len) == SSA_NAME)
1764     {
1765       gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1766       if (!is_gimple_assign (def_stmt)
1767 	  || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1768 	  || gimple_assign_rhs1 (def_stmt) != last.len
1769 	  || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1770 	return;
1771     }
1772   else
1773     return;
1774 
1775   gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1776   update_stmt (last.stmt);
1777 }
1778 
1779 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1780    call, or when BOUND is non-null, of a strnlen() call, set LHS
1781    range info to [0, min (MAX, BOUND)] when the range includes more
1782    than one value and return LHS.  Otherwise, when the range
1783    [MIN, MAX] is such that MIN == MAX, return the tree representation
1784    of (MIN). The latter allows callers to fold suitable strnlen() calls
1785    to constants.  */
1786 
1787 tree
set_strlen_range(tree lhs,wide_int min,wide_int max,tree bound)1788 set_strlen_range (tree lhs, wide_int min, wide_int max,
1789 		  tree bound /* = NULL_TREE */)
1790 {
1791   if (TREE_CODE (lhs) != SSA_NAME
1792       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1793     return NULL_TREE;
1794 
1795   if (bound)
1796     {
1797       /* For strnlen, adjust MIN and MAX as necessary.  If the bound
1798 	 is less than the size of the array set MAX to it.  It it's
1799 	 greater than MAX and MAX is non-zero bump MAX down to account
1800 	 for the necessary terminating nul.  Otherwise leave it alone.  */
1801       if (TREE_CODE (bound) == INTEGER_CST)
1802 	{
1803 	  wide_int wibnd = wi::to_wide (bound);
1804 	  int cmp = wi::cmpu (wibnd, max);
1805 	  if (cmp < 0)
1806 	    max = wibnd;
1807 	  else if (cmp && wi::ne_p (max, min))
1808 	    --max;
1809 	}
1810       else if (TREE_CODE (bound) == SSA_NAME)
1811 	{
1812 	  wide_int minbound, maxbound;
1813 	  // FIXME: Use range_query instead of global ranges.
1814 	  value_range_kind rng = get_range_info (bound, &minbound, &maxbound);
1815 	  if (rng == VR_RANGE)
1816 	    {
1817 	      /* For a bound in a known range, adjust the range determined
1818 		 above as necessary.  For a bound in some anti-range or
1819 		 in an unknown range, use the range determined by callers.  */
1820 	      if (wi::ltu_p (minbound, min))
1821 		min = minbound;
1822 	      if (wi::ltu_p (maxbound, max))
1823 		max = maxbound;
1824 	    }
1825 	}
1826     }
1827 
1828   if (min == max)
1829     return wide_int_to_tree (size_type_node, min);
1830 
1831   set_range_info (lhs, VR_RANGE, min, max);
1832   return lhs;
1833 }
1834 
1835 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1836    SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1837    a character array A[N] with unknown length bounded by N, and for
1838    strnlen(), by min (N, BOUND).  */
1839 
1840 static tree
maybe_set_strlen_range(tree lhs,tree src,tree bound)1841 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1842 {
1843   if (TREE_CODE (lhs) != SSA_NAME
1844       || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1845     return NULL_TREE;
1846 
1847   if (TREE_CODE (src) == SSA_NAME)
1848     {
1849       gimple *def = SSA_NAME_DEF_STMT (src);
1850       if (is_gimple_assign (def)
1851 	  && gimple_assign_rhs_code (def) == ADDR_EXPR)
1852 	src = gimple_assign_rhs1 (def);
1853     }
1854 
1855   /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1856      NUL so that the difference between a pointer to just past it and
1857      one to its beginning is positive.  */
1858   wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1859 
1860   if (TREE_CODE (src) == ADDR_EXPR)
1861     {
1862       /* The last array member of a struct can be bigger than its size
1863 	 suggests if it's treated as a poor-man's flexible array member.  */
1864       src = TREE_OPERAND (src, 0);
1865       if (TREE_CODE (src) != MEM_REF
1866 	  && !array_at_struct_end_p (src))
1867 	{
1868 	  tree type = TREE_TYPE (src);
1869 	  tree size = TYPE_SIZE_UNIT (type);
1870 	  if (size
1871 	      && TREE_CODE (size) == INTEGER_CST
1872 	      && !integer_zerop (size))
1873 	    {
1874 	      /* Even though such uses of strlen would be undefined,
1875 		 avoid relying on arrays of arrays in case some genius
1876 		 decides to call strlen on an unterminated array element
1877 		 that's followed by a terminated one.  Likewise, avoid
1878 		 assuming that a struct array member is necessarily
1879 		 nul-terminated (the nul may be in the member that
1880 		 follows).  In those cases, assume that the length
1881 		 of the string stored in such an array is bounded
1882 		 by the size of the enclosing object if one can be
1883 		 determined.  */
1884 	      tree base = get_base_address (src);
1885 	      if (VAR_P (base))
1886 		{
1887 		  if (tree size = DECL_SIZE_UNIT (base))
1888 		    if (size
1889 			&& TREE_CODE (size) == INTEGER_CST
1890 			&& TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1891 		      max = wi::to_wide (size);
1892 		}
1893 	    }
1894 
1895 	  /* For strlen() the upper bound above is equal to
1896 	     the longest string that can be stored in the array
1897 	     (i.e., it accounts for the terminating nul.  For
1898 	     strnlen() bump up the maximum by one since the array
1899 	     need not be nul-terminated.  */
1900 	  if (!bound && max != 0)
1901 	    --max;
1902 	}
1903     }
1904 
1905   wide_int min = wi::zero (max.get_precision ());
1906   return set_strlen_range (lhs, min, max, bound);
1907 }
1908 
1909 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1910    either into a region allocated for the object SI when non-null,
1911    or into an object designated by the LHS of STMT otherwise.
1912    When nonnull uses RVALS to determine range information.
1913    RAWMEM may be set by memcpy and other raw memory functions
1914    to allow accesses across subobject boundaries.  */
1915 
1916 static void
1917 maybe_warn_overflow (gimple *stmt, tree len, pointer_query &ptr_qry,
1918 		     strinfo *si = NULL, bool plus_one = false,
1919 		     bool rawmem = false)
1920 {
1921   if (!len || gimple_no_warning_p (stmt))
1922     return;
1923 
1924   /* The DECL of the function performing the write if it is done
1925      by one.  */
1926   tree writefn = NULL_TREE;
1927   /* The destination expression involved in the store STMT.  */
1928   tree dest = NULL_TREE;
1929 
1930   if (is_gimple_assign (stmt))
1931     dest = gimple_assign_lhs (stmt);
1932   else if (is_gimple_call (stmt))
1933     {
1934       dest = gimple_call_arg (stmt, 0);
1935       writefn = gimple_call_fndecl (stmt);
1936     }
1937   else
1938     return;
1939 
1940   if (TREE_NO_WARNING (dest))
1941     return;
1942 
1943   const int ostype = rawmem ? 0 : 1;
1944 
1945   /* Use maximum precision to avoid overflow in the addition below.
1946      Make sure all operands have the same precision to keep wide_int
1947      from ICE'ing.  */
1948 
1949   access_ref aref;
1950   /* The size of the destination region (which is smaller than
1951      the destination object for stores at a non-zero offset).  */
1952   tree destsize = compute_objsize (dest, ostype, &aref, &ptr_qry);
1953 
1954   if (!destsize)
1955     {
1956       aref.sizrng[0] = 0;
1957       aref.sizrng[1] = wi::to_offset (max_object_size ());
1958     }
1959 
1960   /* Return early if the DESTSIZE size expression is the same as LEN
1961      and the offset into the destination is zero.  This might happen
1962      in the case of a pair of malloc and memset calls to allocate
1963      an object and clear it as if by calloc.  */
1964   if (destsize == len && !plus_one
1965       && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
1966     return;
1967 
1968   wide_int rng[2];
1969   if (!get_range (len, stmt, rng, ptr_qry.rvals))
1970     return;
1971 
1972   widest_int lenrng[2] =
1973     { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
1974 
1975   if (plus_one)
1976     {
1977       lenrng[0] += 1;
1978       lenrng[1] += 1;
1979     }
1980 
1981   /* The size of the remaining space in the destination computed
1982      as the size of the latter minus the offset into it.  */
1983   widest_int spcrng[2];
1984   {
1985     offset_int remrng[2];
1986     remrng[1] = aref.size_remaining (remrng);
1987     spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
1988     spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
1989   }
1990 
1991   if (wi::leu_p (lenrng[0], spcrng[0])
1992       && wi::leu_p (lenrng[1], spcrng[1]))
1993     return;
1994 
1995   location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
1996   bool warned = false;
1997   if (wi::leu_p (lenrng[0], spcrng[1]))
1998     {
1999       if (len != destsize
2000 	  && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2001 	return;
2002 
2003       warned = (writefn
2004 		? warning_at (loc, OPT_Wstringop_overflow_,
2005 			      "%G%qD writing one too many bytes into a region "
2006 			      "of a size that depends on %<strlen%>",
2007 			      stmt, writefn)
2008 		: warning_at (loc, OPT_Wstringop_overflow_,
2009 			      "%Gwriting one too many bytes into a region "
2010 			      "of a size that depends on %<strlen%>",
2011 			      stmt));
2012     }
2013   else if (lenrng[0] == lenrng[1])
2014     {
2015       if (spcrng[0] == spcrng[1])
2016 	warned = (writefn
2017 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2018 			       lenrng[0].to_uhwi (),
2019 			       "%G%qD writing %wu byte into a region "
2020 			       "of size %wu",
2021 			       "%G%qD writing %wu bytes into a region "
2022 			       "of size %wu",
2023 			       stmt, writefn, lenrng[0].to_uhwi (),
2024 			       spcrng[0].to_uhwi ())
2025 		  : warning_n (loc, OPT_Wstringop_overflow_,
2026 			       lenrng[0].to_uhwi (),
2027 			       "%Gwriting %wu byte into a region "
2028 			       "of size %wu",
2029 			       "%Gwriting %wu bytes into a region "
2030 			       "of size %wu",
2031 			       stmt, lenrng[0].to_uhwi (),
2032 			       spcrng[0].to_uhwi ()));
2033       else
2034 	warned = (writefn
2035 		  ? warning_n (loc, OPT_Wstringop_overflow_,
2036 			       lenrng[0].to_uhwi (),
2037 			       "%G%qD writing %wu byte into a region "
2038 			       "of size between %wu and %wu",
2039 			       "%G%qD writing %wu bytes into a region "
2040 			       "of size between %wu and %wu",
2041 			       stmt, writefn, lenrng[0].to_uhwi (),
2042 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2043 		  : warning_n (loc, OPT_Wstringop_overflow_,
2044 			       lenrng[0].to_uhwi (),
2045 			       "%Gwriting %wu byte into a region "
2046 			       "of size between %wu and %wu",
2047 			       "%Gwriting %wu bytes into a region "
2048 			       "of size between %wu and %wu",
2049 			       stmt, lenrng[0].to_uhwi (),
2050 			       spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2051     }
2052   else if (spcrng[0] == spcrng[1])
2053     warned = (writefn
2054 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2055 			    "%G%qD writing between %wu and %wu bytes "
2056 			    "into a region of size %wu",
2057 			    stmt, writefn, lenrng[0].to_uhwi (),
2058 			    lenrng[1].to_uhwi (),
2059 			    spcrng[0].to_uhwi ())
2060 	      : warning_at (loc, OPT_Wstringop_overflow_,
2061 			    "%Gwriting between %wu and %wu bytes "
2062 			    "into a region of size %wu",
2063 			    stmt, lenrng[0].to_uhwi (),
2064 			    lenrng[1].to_uhwi (),
2065 			    spcrng[0].to_uhwi ()));
2066   else
2067     warned = (writefn
2068 	      ? warning_at (loc, OPT_Wstringop_overflow_,
2069 			    "%G%qD writing between %wu and %wu bytes "
2070 			    "into a region of size between %wu and %wu",
2071 			    stmt, writefn, lenrng[0].to_uhwi (),
2072 			    lenrng[1].to_uhwi (),
2073 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2074 	      : warning_at (loc, OPT_Wstringop_overflow_,
2075 			    "%Gwriting between %wu and %wu bytes "
2076 			    "into a region of size between %wu and %wu",
2077 			    stmt, lenrng[0].to_uhwi (),
2078 			    lenrng[1].to_uhwi (),
2079 			    spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2080 
2081   if (!warned)
2082     return;
2083 
2084   gimple_set_no_warning (stmt, true);
2085 
2086   aref.inform_access (access_write_only);
2087 }
2088 
2089 /* Convenience wrapper for the above.  */
2090 
2091 static inline void
2092 maybe_warn_overflow (gimple *stmt, unsigned HOST_WIDE_INT len,
2093 		     pointer_query &ptr_qry, strinfo *si = NULL,
2094 		     bool plus_one = false, bool rawmem = false)
2095 {
2096   maybe_warn_overflow (stmt, build_int_cst (size_type_node, len), ptr_qry,
2097 		       si, plus_one, rawmem);
2098 }
2099 
2100 /* Handle a strlen call.  If strlen of the argument is known, replace
2101    the strlen call with the known value, otherwise remember that strlen
2102    of the argument is stored in the lhs SSA_NAME.  */
2103 
2104 static void
handle_builtin_strlen(gimple_stmt_iterator * gsi)2105 handle_builtin_strlen (gimple_stmt_iterator *gsi)
2106 {
2107   gimple *stmt = gsi_stmt (*gsi);
2108   tree lhs = gimple_call_lhs (stmt);
2109 
2110   if (lhs == NULL_TREE)
2111     return;
2112 
2113   location_t loc = gimple_location (stmt);
2114   tree callee = gimple_call_fndecl (stmt);
2115   tree src = gimple_call_arg (stmt, 0);
2116   tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2117 		? gimple_call_arg (stmt, 1) : NULL_TREE);
2118   int idx = get_stridx (src);
2119   if (idx || (bound && integer_zerop (bound)))
2120     {
2121       strinfo *si = NULL;
2122       tree rhs;
2123 
2124       if (idx < 0)
2125 	rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2126       else if (idx == 0)
2127 	rhs = bound;
2128       else
2129 	{
2130 	  rhs = NULL_TREE;
2131 	  si = get_strinfo (idx);
2132 	  if (si != NULL)
2133 	    {
2134 	      rhs = get_string_length (si);
2135 	      /* For strnlen, if bound is constant, even if si is not known
2136 		 to be zero terminated, if we know at least bound bytes are
2137 		 not zero, the return value will be bound.  */
2138 	      if (rhs == NULL_TREE
2139 		  && bound != NULL_TREE
2140 		  && TREE_CODE (bound) == INTEGER_CST
2141 		  && si->nonzero_chars != NULL_TREE
2142 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2143 		  && tree_int_cst_le (bound, si->nonzero_chars))
2144 		rhs = bound;
2145 	    }
2146 	}
2147       if (rhs != NULL_TREE)
2148 	{
2149 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2150 	    {
2151 	      fprintf (dump_file, "Optimizing: ");
2152 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2153 	    }
2154 	  rhs = unshare_expr (rhs);
2155 	  if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2156 	    rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2157 
2158 	  if (bound)
2159 	    rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2160 
2161 	  if (!update_call_from_tree (gsi, rhs))
2162 	    gimplify_and_update_call_from_tree (gsi, rhs);
2163 	  stmt = gsi_stmt (*gsi);
2164 	  update_stmt (stmt);
2165 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2166 	    {
2167 	      fprintf (dump_file, "into: ");
2168 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2169 	    }
2170 
2171 	  if (si != NULL
2172 	      /* Don't update anything for strnlen.  */
2173 	      && bound == NULL_TREE
2174 	      && TREE_CODE (si->nonzero_chars) != SSA_NAME
2175 	      && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2176 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2177 	    {
2178 	      si = unshare_strinfo (si);
2179 	      si->nonzero_chars = lhs;
2180 	      gcc_assert (si->full_string_p);
2181 	    }
2182 
2183 	  if (strlen_to_stridx
2184 	      && (bound == NULL_TREE
2185 		  /* For strnlen record this only if the call is proven
2186 		     to return the same value as strlen would.  */
2187 		  || (TREE_CODE (bound) == INTEGER_CST
2188 		      && TREE_CODE (rhs) == INTEGER_CST
2189 		      && tree_int_cst_lt (rhs, bound))))
2190 	    strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2191 
2192 	  return;
2193 	}
2194     }
2195   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2196     return;
2197 
2198   if (idx == 0)
2199     idx = new_stridx (src);
2200   else
2201     {
2202       strinfo *si = get_strinfo (idx);
2203       if (si != NULL)
2204 	{
2205 	  if (!si->full_string_p && !si->stmt)
2206 	    {
2207 	      /* Until now we only had a lower bound on the string length.
2208 		 Install LHS as the actual length.  */
2209 	      si = unshare_strinfo (si);
2210 	      tree old = si->nonzero_chars;
2211 	      si->nonzero_chars = lhs;
2212 	      si->full_string_p = true;
2213 	      if (old && TREE_CODE (old) == INTEGER_CST)
2214 		{
2215 		  old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2216 		  tree adj = fold_build2_loc (loc, MINUS_EXPR,
2217 					      TREE_TYPE (lhs), lhs, old);
2218 		  adjust_related_strinfos (loc, si, adj);
2219 		  /* Use the constant minimum length as the lower bound
2220 		     of the non-constant length.  */
2221 		  wide_int min = wi::to_wide (old);
2222 		  wide_int max
2223 		    = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2224 		  set_strlen_range (lhs, min, max);
2225 		}
2226 	      else
2227 		{
2228 		  si->first = 0;
2229 		  si->prev = 0;
2230 		  si->next = 0;
2231 		}
2232 	    }
2233 	  return;
2234 	}
2235     }
2236   if (idx)
2237     {
2238       if (!bound)
2239 	{
2240 	  /* Only store the new length information for calls to strlen(),
2241 	     not for those to strnlen().  */
2242 	  strinfo *si = new_strinfo (src, idx, lhs, true);
2243 	  set_strinfo (idx, si);
2244 	  find_equal_ptrs (src, idx);
2245 	}
2246 
2247       /* For SRC that is an array of N elements, set LHS's range
2248 	 to [0, min (N, BOUND)].  A constant return value means
2249 	 the range would have consisted of a single value.  In
2250 	 that case, fold the result into the returned constant.  */
2251       if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2252 	if (TREE_CODE (ret) == INTEGER_CST)
2253 	  {
2254 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2255 	      {
2256 		fprintf (dump_file, "Optimizing: ");
2257 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2258 	      }
2259 	    if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2260 	      ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2261 	    if (!update_call_from_tree (gsi, ret))
2262 	      gimplify_and_update_call_from_tree (gsi, ret);
2263 	    stmt = gsi_stmt (*gsi);
2264 	    update_stmt (stmt);
2265 	    if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2266 	      {
2267 		fprintf (dump_file, "into: ");
2268 		print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2269 	      }
2270 	  }
2271 
2272       if (strlen_to_stridx && !bound)
2273 	strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2274     }
2275 }
2276 
2277 /* Handle a strchr call.  If strlen of the first argument is known, replace
2278    the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2279    that lhs of the call is endptr and strlen of the argument is endptr - x.  */
2280 
2281 static void
handle_builtin_strchr(gimple_stmt_iterator * gsi)2282 handle_builtin_strchr (gimple_stmt_iterator *gsi)
2283 {
2284   gimple *stmt = gsi_stmt (*gsi);
2285   tree lhs = gimple_call_lhs (stmt);
2286 
2287   if (lhs == NULL_TREE)
2288     return;
2289 
2290   if (!integer_zerop (gimple_call_arg (stmt, 1)))
2291     return;
2292 
2293   tree src = gimple_call_arg (stmt, 0);
2294 
2295   /* Avoid folding if the first argument is not a nul-terminated array.
2296      Defer warning until later.  */
2297   if (!check_nul_terminated_array (NULL_TREE, src))
2298     return;
2299 
2300   int idx = get_stridx (src);
2301   if (idx)
2302     {
2303       strinfo *si = NULL;
2304       tree rhs;
2305 
2306       if (idx < 0)
2307 	rhs = build_int_cst (size_type_node, ~idx);
2308       else
2309 	{
2310 	  rhs = NULL_TREE;
2311 	  si = get_strinfo (idx);
2312 	  if (si != NULL)
2313 	    rhs = get_string_length (si);
2314 	}
2315       if (rhs != NULL_TREE)
2316 	{
2317 	  location_t loc = gimple_location (stmt);
2318 
2319 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2320 	    {
2321 	      fprintf (dump_file, "Optimizing: ");
2322 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2323 	    }
2324 	  if (si != NULL && si->endptr != NULL_TREE)
2325 	    {
2326 	      rhs = unshare_expr (si->endptr);
2327 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2328 					      TREE_TYPE (rhs)))
2329 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2330 	    }
2331 	  else
2332 	    {
2333 	      rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2334 	      rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2335 				     TREE_TYPE (src), src, rhs);
2336 	      if (!useless_type_conversion_p (TREE_TYPE (lhs),
2337 					      TREE_TYPE (rhs)))
2338 		rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2339 	    }
2340 	  if (!update_call_from_tree (gsi, rhs))
2341 	    gimplify_and_update_call_from_tree (gsi, rhs);
2342 	  stmt = gsi_stmt (*gsi);
2343 	  update_stmt (stmt);
2344 	  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2345 	    {
2346 	      fprintf (dump_file, "into: ");
2347 	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2348 	    }
2349 	  if (si != NULL
2350 	      && si->endptr == NULL_TREE
2351 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2352 	    {
2353 	      si = unshare_strinfo (si);
2354 	      si->endptr = lhs;
2355 	    }
2356 	  zero_length_string (lhs, si);
2357 	  return;
2358 	}
2359     }
2360   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2361     return;
2362   if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2363     {
2364       if (idx == 0)
2365 	idx = new_stridx (src);
2366       else if (get_strinfo (idx) != NULL)
2367 	{
2368 	  zero_length_string (lhs, NULL);
2369 	  return;
2370 	}
2371       if (idx)
2372 	{
2373 	  location_t loc = gimple_location (stmt);
2374 	  tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2375 	  tree srcu = fold_convert_loc (loc, size_type_node, src);
2376 	  tree length = fold_build2_loc (loc, MINUS_EXPR,
2377 					 size_type_node, lhsu, srcu);
2378 	  strinfo *si = new_strinfo (src, idx, length, true);
2379 	  si->endptr = lhs;
2380 	  set_strinfo (idx, si);
2381 	  find_equal_ptrs (src, idx);
2382 	  zero_length_string (lhs, si);
2383 	}
2384     }
2385   else
2386     zero_length_string (lhs, NULL);
2387 }
2388 
2389 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2390    If strlen of the second argument is known, strlen of the first argument
2391    is the same after this call.  Furthermore, attempt to convert it to
2392    memcpy.  Uses RVALS to determine range information.  */
2393 
2394 static void
handle_builtin_strcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,pointer_query & ptr_qry)2395 handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
2396 		       pointer_query &ptr_qry)
2397 {
2398   int idx, didx;
2399   tree src, dst, srclen, len, lhs, type, fn, oldlen;
2400   bool success;
2401   gimple *stmt = gsi_stmt (*gsi);
2402   strinfo *si, *dsi, *olddsi, *zsi;
2403   location_t loc;
2404 
2405   src = gimple_call_arg (stmt, 1);
2406   dst = gimple_call_arg (stmt, 0);
2407   lhs = gimple_call_lhs (stmt);
2408   idx = get_stridx (src);
2409   si = NULL;
2410   if (idx > 0)
2411     si = get_strinfo (idx);
2412 
2413   didx = get_stridx (dst);
2414   olddsi = NULL;
2415   oldlen = NULL_TREE;
2416   if (didx > 0)
2417     olddsi = get_strinfo (didx);
2418   else if (didx < 0)
2419     return;
2420 
2421   if (olddsi != NULL)
2422     adjust_last_stmt (olddsi, stmt, false, ptr_qry);
2423 
2424   srclen = NULL_TREE;
2425   if (si != NULL)
2426     srclen = get_string_length (si);
2427   else if (idx < 0)
2428     srclen = build_int_cst (size_type_node, ~idx);
2429 
2430   maybe_warn_overflow (stmt, srclen, ptr_qry, olddsi, true);
2431 
2432   if (olddsi != NULL)
2433     adjust_last_stmt (olddsi, stmt, false, ptr_qry);
2434 
2435   loc = gimple_location (stmt);
2436   if (srclen == NULL_TREE)
2437     switch (bcode)
2438       {
2439       case BUILT_IN_STRCPY:
2440       case BUILT_IN_STRCPY_CHK:
2441 	if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2442 	  return;
2443 	break;
2444       case BUILT_IN_STPCPY:
2445       case BUILT_IN_STPCPY_CHK:
2446 	if (lhs == NULL_TREE)
2447 	  return;
2448 	else
2449 	  {
2450 	    tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2451 	    srclen = fold_convert_loc (loc, size_type_node, dst);
2452 	    srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2453 				      lhsuint, srclen);
2454 	  }
2455 	break;
2456       default:
2457 	gcc_unreachable ();
2458       }
2459 
2460   if (didx == 0)
2461     {
2462       didx = new_stridx (dst);
2463       if (didx == 0)
2464 	return;
2465     }
2466   if (olddsi != NULL)
2467     {
2468       oldlen = olddsi->nonzero_chars;
2469       dsi = unshare_strinfo (olddsi);
2470       dsi->nonzero_chars = srclen;
2471       dsi->full_string_p = (srclen != NULL_TREE);
2472       /* Break the chain, so adjust_related_strinfo on later pointers in
2473 	 the chain won't adjust this one anymore.  */
2474       dsi->next = 0;
2475       dsi->stmt = NULL;
2476       dsi->endptr = NULL_TREE;
2477     }
2478   else
2479     {
2480       dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2481       set_strinfo (didx, dsi);
2482       find_equal_ptrs (dst, didx);
2483     }
2484   dsi->writable = true;
2485   dsi->dont_invalidate = true;
2486 
2487   if (dsi->nonzero_chars == NULL_TREE)
2488     {
2489       strinfo *chainsi;
2490 
2491       /* If string length of src is unknown, use delayed length
2492 	 computation.  If string length of dst will be needed, it
2493 	 can be computed by transforming this strcpy call into
2494 	 stpcpy and subtracting dst from the return value.  */
2495 
2496       /* Look for earlier strings whose length could be determined if
2497 	 this strcpy is turned into an stpcpy.  */
2498 
2499       if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2500 	{
2501 	  for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2502 	    {
2503 	      /* When setting a stmt for delayed length computation
2504 		 prevent all strinfos through dsi from being
2505 		 invalidated.  */
2506 	      chainsi = unshare_strinfo (chainsi);
2507 	      chainsi->stmt = stmt;
2508 	      chainsi->nonzero_chars = NULL_TREE;
2509 	      chainsi->full_string_p = false;
2510 	      chainsi->endptr = NULL_TREE;
2511 	      chainsi->dont_invalidate = true;
2512 	    }
2513 	}
2514       dsi->stmt = stmt;
2515 
2516       /* Try to detect overlap before returning.  This catches cases
2517 	 like strcpy (d, d + n) where n is non-constant whose range
2518 	 is such that (n <= strlen (d) holds).
2519 
2520 	 OLDDSI->NONZERO_chars may have been reset by this point with
2521 	 oldlen holding it original value.  */
2522       if (olddsi && oldlen)
2523 	{
2524 	  /* Add 1 for the terminating NUL.  */
2525 	  tree type = TREE_TYPE (oldlen);
2526 	  oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2527 				build_int_cst (type, 1));
2528 	  check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2529 	}
2530 
2531       return;
2532     }
2533 
2534   if (olddsi != NULL)
2535     {
2536       tree adj = NULL_TREE;
2537       if (oldlen == NULL_TREE)
2538 	;
2539       else if (integer_zerop (oldlen))
2540 	adj = srclen;
2541       else if (TREE_CODE (oldlen) == INTEGER_CST
2542 	       || TREE_CODE (srclen) == INTEGER_CST)
2543 	adj = fold_build2_loc (loc, MINUS_EXPR,
2544 			       TREE_TYPE (srclen), srclen,
2545 			       fold_convert_loc (loc, TREE_TYPE (srclen),
2546 						 oldlen));
2547       if (adj != NULL_TREE)
2548 	adjust_related_strinfos (loc, dsi, adj);
2549       else
2550 	dsi->prev = 0;
2551     }
2552   /* strcpy src may not overlap dst, so src doesn't need to be
2553      invalidated either.  */
2554   if (si != NULL)
2555     si->dont_invalidate = true;
2556 
2557   fn = NULL_TREE;
2558   zsi = NULL;
2559   switch (bcode)
2560     {
2561     case BUILT_IN_STRCPY:
2562       fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2563       if (lhs)
2564 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2565       break;
2566     case BUILT_IN_STRCPY_CHK:
2567       fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2568       if (lhs)
2569 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2570       break;
2571     case BUILT_IN_STPCPY:
2572       /* This would need adjustment of the lhs (subtract one),
2573 	 or detection that the trailing '\0' doesn't need to be
2574 	 written, if it will be immediately overwritten.
2575       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY);  */
2576       if (lhs)
2577 	{
2578 	  dsi->endptr = lhs;
2579 	  zsi = zero_length_string (lhs, dsi);
2580 	}
2581       break;
2582     case BUILT_IN_STPCPY_CHK:
2583       /* This would need adjustment of the lhs (subtract one),
2584 	 or detection that the trailing '\0' doesn't need to be
2585 	 written, if it will be immediately overwritten.
2586       fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK);  */
2587       if (lhs)
2588 	{
2589 	  dsi->endptr = lhs;
2590 	  zsi = zero_length_string (lhs, dsi);
2591 	}
2592       break;
2593     default:
2594       gcc_unreachable ();
2595     }
2596   if (zsi != NULL)
2597     zsi->dont_invalidate = true;
2598 
2599   if (fn)
2600     {
2601       tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2602       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2603     }
2604   else
2605     type = size_type_node;
2606 
2607   len = fold_convert_loc (loc, type, unshare_expr (srclen));
2608   len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2609 
2610   /* Set the no-warning bit on the transformed statement?  */
2611   bool set_no_warning = false;
2612 
2613   if (const strinfo *chksi = olddsi ? olddsi : dsi)
2614     if (si
2615 	&& check_bounds_or_overlap (stmt, chksi->ptr, si->ptr, NULL_TREE, len))
2616       {
2617 	gimple_set_no_warning (stmt, true);
2618 	set_no_warning = true;
2619       }
2620 
2621   if (fn == NULL_TREE)
2622     return;
2623 
2624   len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
2625 				  GSI_SAME_STMT);
2626   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2627     {
2628       fprintf (dump_file, "Optimizing: ");
2629       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2630     }
2631   if (gimple_call_num_args (stmt) == 2)
2632     success = update_gimple_call (gsi, fn, 3, dst, src, len);
2633   else
2634     success = update_gimple_call (gsi, fn, 4, dst, src, len,
2635 				  gimple_call_arg (stmt, 2));
2636   if (success)
2637     {
2638       stmt = gsi_stmt (*gsi);
2639       update_stmt (stmt);
2640       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2641 	{
2642 	  fprintf (dump_file, "into: ");
2643 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2644 	}
2645       /* Allow adjust_last_stmt to decrease this memcpy's size.  */
2646       laststmt.stmt = stmt;
2647       laststmt.len = srclen;
2648       laststmt.stridx = dsi->idx;
2649     }
2650   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2651     fprintf (dump_file, "not possible.\n");
2652 
2653   if (set_no_warning)
2654     gimple_set_no_warning (stmt, true);
2655 }
2656 
2657 /* Check the size argument to the built-in forms of stpncpy and strncpy
2658    for out-of-bounds offsets or overlapping access, and to see if the
2659    size argument is derived from a call to strlen() on the source argument,
2660    and if so, issue an appropriate warning.  */
2661 
2662 static void
handle_builtin_strncat(built_in_function,gimple_stmt_iterator * gsi)2663 handle_builtin_strncat (built_in_function, gimple_stmt_iterator *gsi)
2664 {
2665   /* Same as stxncpy().  */
2666   handle_builtin_stxncpy_strncat (true, gsi);
2667 }
2668 
2669 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2670    way.  LEN can either be an integer expression, or a pointer (to char).
2671    When it is the latter (such as in recursive calls to self) it is
2672    assumed to be the argument in some call to strlen() whose relationship
2673    to SRC is being ascertained.  */
2674 
2675 bool
is_strlen_related_p(tree src,tree len)2676 is_strlen_related_p (tree src, tree len)
2677 {
2678   if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2679       && operand_equal_p (src, len, 0))
2680     return true;
2681 
2682   if (TREE_CODE (len) != SSA_NAME)
2683     return false;
2684 
2685   if (TREE_CODE (src) == SSA_NAME)
2686     {
2687       gimple *srcdef = SSA_NAME_DEF_STMT (src);
2688       if (is_gimple_assign (srcdef))
2689 	{
2690 	  /* Handle bitwise AND used in conversions from wider size_t
2691 	     to narrower unsigned types.  */
2692 	  tree_code code = gimple_assign_rhs_code (srcdef);
2693 	  if (code == BIT_AND_EXPR
2694 	      || code == NOP_EXPR)
2695 	    return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2696 
2697 	  return false;
2698 	}
2699 
2700       if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2701 	{
2702 	  /* If SRC is the result of a call to an allocation function
2703 	     or strlen, use the function's argument instead.  */
2704 	  tree func = gimple_call_fndecl (srcdef);
2705 	  built_in_function code = DECL_FUNCTION_CODE (func);
2706 	  if (code == BUILT_IN_ALLOCA
2707 	      || code == BUILT_IN_ALLOCA_WITH_ALIGN
2708 	      || code == BUILT_IN_MALLOC
2709 	      || code == BUILT_IN_STRLEN)
2710 	    return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2711 
2712 	  /* FIXME: Handle other functions with attribute alloc_size.  */
2713 	  return false;
2714 	}
2715     }
2716 
2717   gimple *lendef = SSA_NAME_DEF_STMT (len);
2718   if (!lendef)
2719     return false;
2720 
2721   if (is_gimple_call (lendef))
2722     {
2723       tree func = gimple_call_fndecl (lendef);
2724       if (!valid_builtin_call (lendef)
2725 	  || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2726 	return false;
2727 
2728       tree arg = gimple_call_arg (lendef, 0);
2729       return is_strlen_related_p (src, arg);
2730     }
2731 
2732   if (!is_gimple_assign (lendef))
2733     return false;
2734 
2735   tree_code code = gimple_assign_rhs_code (lendef);
2736   tree rhs1 = gimple_assign_rhs1 (lendef);
2737   tree rhstype = TREE_TYPE (rhs1);
2738 
2739   if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2740       || (INTEGRAL_TYPE_P (rhstype)
2741 	  && (code == BIT_AND_EXPR
2742 	      || code == NOP_EXPR)))
2743     {
2744       /* Pointer plus (an integer), and truncation are considered among
2745 	 the (potentially) related expressions to strlen.  */
2746       return is_strlen_related_p (src, rhs1);
2747     }
2748 
2749   if (tree rhs2 = gimple_assign_rhs2 (lendef))
2750     {
2751       /* Integer subtraction is considered strlen-related when both
2752 	 arguments are integers and second one is strlen-related.  */
2753       rhstype = TREE_TYPE (rhs2);
2754       if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2755 	return is_strlen_related_p (src, rhs2);
2756     }
2757 
2758   return false;
2759 }
2760 
2761 /* Called by handle_builtin_stxncpy_strncat and by
2762    gimple_fold_builtin_strncpy in gimple-fold.c.
2763    Check to see if the specified bound is a) equal to the size of
2764    the destination DST and if so, b) if it's immediately followed by
2765    DST[CNT - 1] = '\0'.  If a) holds and b) does not, warn.  Otherwise,
2766    do nothing.  Return true if diagnostic has been issued.
2767 
2768    The purpose is to diagnose calls to strncpy and stpncpy that do
2769    not nul-terminate the copy while allowing for the idiom where
2770    such a call is immediately followed by setting the last element
2771    to nul, as in:
2772      char a[32];
2773      strncpy (a, s, sizeof a);
2774      a[sizeof a - 1] = '\0';
2775 */
2776 
2777 bool
maybe_diag_stxncpy_trunc(gimple_stmt_iterator gsi,tree src,tree cnt,pointer_query * ptr_qry)2778 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2779 			  pointer_query *ptr_qry /* = NULL */)
2780 {
2781   gimple *stmt = gsi_stmt (gsi);
2782   if (gimple_no_warning_p (stmt))
2783     return false;
2784 
2785   wide_int cntrange[2];
2786 
2787   // FIXME: Use range_query instead of global ranges.
2788   enum value_range_kind rng = get_range_info (cnt, cntrange, cntrange + 1);
2789   if (rng == VR_RANGE)
2790     ;
2791   else if (rng == VR_ANTI_RANGE)
2792     {
2793       wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2794 
2795       if (wi::ltu_p (cntrange[1], maxobjsize))
2796 	{
2797 	  cntrange[0] = cntrange[1] + 1;
2798 	  cntrange[1] = maxobjsize;
2799 	}
2800       else
2801 	{
2802 	  cntrange[1] = cntrange[0] - 1;
2803 	  cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2804 	}
2805     }
2806   else
2807     return false;
2808 
2809   /* Negative value is the constant string length.  If it's less than
2810      the lower bound there is no truncation.  Avoid calling get_stridx()
2811      when ssa_ver_to_stridx is empty.  That implies the caller isn't
2812      running under the control of this pass and ssa_ver_to_stridx hasn't
2813      been created yet.  */
2814   int sidx = ssa_ver_to_stridx.length () ? get_stridx (src) : 0;
2815   if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2816     return false;
2817 
2818   tree dst = gimple_call_arg (stmt, 0);
2819   tree dstdecl = dst;
2820   if (TREE_CODE (dstdecl) == ADDR_EXPR)
2821     dstdecl = TREE_OPERAND (dstdecl, 0);
2822 
2823   tree ref = NULL_TREE;
2824 
2825   if (!sidx)
2826     {
2827       /* If the source is a non-string return early to avoid warning
2828 	 for possible truncation (if the truncation is certain SIDX
2829 	 is non-zero).  */
2830       tree srcdecl = gimple_call_arg (stmt, 1);
2831       if (TREE_CODE (srcdecl) == ADDR_EXPR)
2832 	srcdecl = TREE_OPERAND (srcdecl, 0);
2833       if (get_attr_nonstring_decl (srcdecl, &ref))
2834 	return false;
2835     }
2836 
2837   /* Likewise, if the destination refers to an array/pointer declared
2838      nonstring return early.  */
2839   if (get_attr_nonstring_decl (dstdecl, &ref))
2840     return false;
2841 
2842   /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2843      avoid the truncation warning.  */
2844   gsi_next_nondebug (&gsi);
2845   gimple *next_stmt = gsi_stmt (gsi);
2846   if (!next_stmt)
2847     {
2848       /* When there is no statement in the same basic block check
2849 	 the immediate successor block.  */
2850       if (basic_block bb = gimple_bb (stmt))
2851 	{
2852 	  if (single_succ_p (bb))
2853 	    {
2854 	      /* For simplicity, ignore blocks with multiple outgoing
2855 		 edges for now and only consider successor blocks along
2856 		 normal edges.  */
2857 	      edge e = EDGE_SUCC (bb, 0);
2858 	      if (!(e->flags & EDGE_ABNORMAL))
2859 		{
2860 		  gsi = gsi_start_bb (e->dest);
2861 		  next_stmt = gsi_stmt (gsi);
2862 		  if (next_stmt && is_gimple_debug (next_stmt))
2863 		    {
2864 		      gsi_next_nondebug (&gsi);
2865 		      next_stmt = gsi_stmt (gsi);
2866 		    }
2867 		}
2868 	    }
2869 	}
2870     }
2871 
2872   if (next_stmt && is_gimple_assign (next_stmt))
2873     {
2874       tree lhs = gimple_assign_lhs (next_stmt);
2875       tree_code code = TREE_CODE (lhs);
2876       if (code == ARRAY_REF || code == MEM_REF)
2877 	lhs = TREE_OPERAND (lhs, 0);
2878 
2879       tree func = gimple_call_fndecl (stmt);
2880       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
2881 	{
2882 	  tree ret = gimple_call_lhs (stmt);
2883 	  if (ret && operand_equal_p (ret, lhs, 0))
2884 	    return false;
2885 	}
2886 
2887       /* Determine the base address and offset of the reference,
2888 	 ignoring the innermost array index.  */
2889       if (TREE_CODE (ref) == ARRAY_REF)
2890 	ref = TREE_OPERAND (ref, 0);
2891 
2892       poly_int64 dstoff;
2893       tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
2894 
2895       poly_int64 lhsoff;
2896       tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
2897       if (lhsbase
2898 	  && dstbase
2899 	  && known_eq (dstoff, lhsoff)
2900 	  && operand_equal_p (dstbase, lhsbase, 0))
2901 	return false;
2902     }
2903 
2904   int prec = TYPE_PRECISION (TREE_TYPE (cnt));
2905   wide_int lenrange[2];
2906   if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
2907     {
2908       lenrange[0] = (sisrc->nonzero_chars
2909 		     && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
2910 		     ? wi::to_wide (sisrc->nonzero_chars)
2911 		     : wi::zero (prec));
2912       lenrange[1] = lenrange[0];
2913     }
2914   else if (sidx < 0)
2915     lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
2916   else
2917     {
2918       c_strlen_data lendata = { };
2919       /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
2920 	 to have it set to the length of the longest string in a PHI.  */
2921       lendata.maxbound = src;
2922       get_range_strlen (src, &lendata, /* eltsize = */1);
2923       if (TREE_CODE (lendata.minlen) == INTEGER_CST
2924 	  && TREE_CODE (lendata.maxbound) == INTEGER_CST)
2925 	{
2926 	  /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
2927 	     which stores the length of the shortest known string.  */
2928 	  if (integer_all_onesp (lendata.maxlen))
2929 	    lenrange[0] = wi::shwi (0, prec);
2930 	  else
2931 	    lenrange[0] = wi::to_wide (lendata.minlen, prec);
2932 	  lenrange[1] = wi::to_wide (lendata.maxbound, prec);
2933 	}
2934       else
2935 	{
2936 	  lenrange[0] = wi::shwi (0, prec);
2937 	  lenrange[1] = wi::shwi (-1, prec);
2938 	}
2939     }
2940 
2941   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
2942   tree func = gimple_call_fndecl (stmt);
2943 
2944   if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
2945     {
2946       /* If the longest source string is shorter than the lower bound
2947 	 of the specified count the copy is definitely nul-terminated.  */
2948       if (wi::ltu_p (lenrange[1], cntrange[0]))
2949 	return false;
2950 
2951       if (wi::neg_p (lenrange[1]))
2952 	{
2953 	  /* The length of one of the strings is unknown but at least
2954 	     one has non-zero length and that length is stored in
2955 	     LENRANGE[1].  Swap the bounds to force a "may be truncated"
2956 	     warning below.  */
2957 	  lenrange[1] = lenrange[0];
2958 	  lenrange[0] = wi::shwi (0, prec);
2959 	}
2960 
2961       /* Set to true for strncat whose bound is derived from the length
2962 	 of the destination (the expected usage pattern).  */
2963       bool cat_dstlen_bounded = false;
2964       if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
2965 	cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
2966 
2967       if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
2968 	return warning_n (callloc, OPT_Wstringop_truncation,
2969 			  cntrange[0].to_uhwi (),
2970 			  "%G%qD output truncated before terminating "
2971 			  "nul copying %E byte from a string of the "
2972 			  "same length",
2973 			  "%G%qD output truncated before terminating nul "
2974 			  "copying %E bytes from a string of the same "
2975 			  "length",
2976 			  stmt, func, cnt);
2977       else if (!cat_dstlen_bounded)
2978 	{
2979 	  if (wi::geu_p (lenrange[0], cntrange[1]))
2980 	    {
2981 	      /* The shortest string is longer than the upper bound of
2982 		 the count so the truncation is certain.  */
2983 	      if (cntrange[0] == cntrange[1])
2984 		return warning_n (callloc, OPT_Wstringop_truncation,
2985 				  cntrange[0].to_uhwi (),
2986 				  "%G%qD output truncated copying %E byte "
2987 				  "from a string of length %wu",
2988 				  "%G%qD output truncated copying %E bytes "
2989 				  "from a string of length %wu",
2990 				  stmt, func, cnt, lenrange[0].to_uhwi ());
2991 
2992 	      return warning_at (callloc, OPT_Wstringop_truncation,
2993 				 "%G%qD output truncated copying between %wu "
2994 				 "and %wu bytes from a string of length %wu",
2995 				 stmt, func, cntrange[0].to_uhwi (),
2996 				 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
2997 	    }
2998 	  else if (wi::geu_p (lenrange[1], cntrange[1]))
2999 	    {
3000 	      /* The longest string is longer than the upper bound of
3001 		 the count so the truncation is possible.  */
3002 	      if (cntrange[0] == cntrange[1])
3003 		return warning_n (callloc, OPT_Wstringop_truncation,
3004 				  cntrange[0].to_uhwi (),
3005 				  "%G%qD output may be truncated copying %E "
3006 				  "byte from a string of length %wu",
3007 				  "%G%qD output may be truncated copying %E "
3008 				  "bytes from a string of length %wu",
3009 				  stmt, func, cnt, lenrange[1].to_uhwi ());
3010 
3011 	      return warning_at (callloc, OPT_Wstringop_truncation,
3012 				 "%G%qD output may be truncated copying between "
3013 				 "%wu and %wu bytes from a string of length %wu",
3014 				 stmt, func, cntrange[0].to_uhwi (),
3015 				 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3016 	    }
3017 	}
3018 
3019       if (!cat_dstlen_bounded
3020 	  && cntrange[0] != cntrange[1]
3021 	  && wi::leu_p (cntrange[0], lenrange[0])
3022 	  && wi::leu_p (cntrange[1], lenrange[0] + 1))
3023 	{
3024 	  /* If the source (including the terminating nul) is longer than
3025 	     the lower bound of the specified count but shorter than the
3026 	     upper bound the copy may (but need not) be truncated.  */
3027 	  return warning_at (callloc, OPT_Wstringop_truncation,
3028 			     "%G%qD output may be truncated copying between "
3029 			     "%wu and %wu bytes from a string of length %wu",
3030 			     stmt, func, cntrange[0].to_uhwi (),
3031 			     cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3032 	}
3033     }
3034 
3035   access_ref aref;
3036   if (tree dstsize = compute_objsize (dst, 1, &aref, ptr_qry))
3037     {
3038       /* The source length is unknown.  Try to determine the destination
3039 	 size and see if it matches the specified bound.  If not, bail.
3040 	 Otherwise go on to see if it should be diagnosed for possible
3041 	 truncation.  */
3042       if (!dstsize)
3043 	return false;
3044 
3045       if (wi::to_wide (dstsize) != cntrange[1])
3046 	return false;
3047 
3048       /* Avoid warning for strncpy(a, b, N) calls where the following
3049 	 equalities hold:
3050 	   N == sizeof a && N == sizeof b */
3051       if (tree srcsize = compute_objsize (src, 1, &aref, ptr_qry))
3052 	if (wi::to_wide (srcsize) == cntrange[1])
3053 	  return false;
3054 
3055       if (cntrange[0] == cntrange[1])
3056 	return warning_at (callloc, OPT_Wstringop_truncation,
3057 			   "%G%qD specified bound %E equals destination size",
3058 			   stmt, func, cnt);
3059     }
3060 
3061   return false;
3062 }
3063 
3064 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3065    strncat, for out-of-bounds offsets or overlapping access, and to see
3066    if the size is derived from calling strlen() on the source argument,
3067    and if so, issue the appropriate warning.
3068    APPEND_P is true for strncat.  */
3069 
3070 static void
handle_builtin_stxncpy_strncat(bool append_p,gimple_stmt_iterator * gsi)3071 handle_builtin_stxncpy_strncat (bool append_p, gimple_stmt_iterator *gsi)
3072 {
3073   if (!strlen_to_stridx)
3074     return;
3075 
3076   gimple *stmt = gsi_stmt (*gsi);
3077 
3078   tree dst = gimple_call_arg (stmt, 0);
3079   tree src = gimple_call_arg (stmt, 1);
3080   tree len = gimple_call_arg (stmt, 2);
3081   /* An upper bound of the size of the destination.  */
3082   tree dstsize = NULL_TREE;
3083   /* The length of the destination and source strings (plus 1 for those
3084      whose FULL_STRING_P is set, i.e., whose length is exact rather than
3085      a lower bound).  */
3086   tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3087 
3088   int didx = get_stridx (dst);
3089   if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3090     {
3091       /* Compute the size of the destination string including the nul
3092 	 if it is known to be nul-terminated.  */
3093       if (sidst->nonzero_chars)
3094 	{
3095 	  if (sidst->full_string_p)
3096 	    {
3097 	      /* String is known to be nul-terminated.  */
3098 	      tree type = TREE_TYPE (sidst->nonzero_chars);
3099 	      dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3100 				     build_int_cst (type, 1));
3101 	    }
3102 	  else
3103 	    dstlenp1 = sidst->nonzero_chars;
3104 	}
3105       else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3106 	{
3107 	  gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3108 	  dstsize = gimple_call_alloc_size (def_stmt);
3109 	}
3110 
3111       dst = sidst->ptr;
3112     }
3113 
3114   int sidx = get_stridx (src);
3115   strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3116   if (sisrc)
3117     {
3118       /* strncat() and strncpy() can modify the source string by writing
3119 	 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3120 	 clear.  */
3121 
3122       /* Compute the size of the source string including the terminating
3123 	 nul if its known to be nul-terminated.  */
3124       if (sisrc->nonzero_chars)
3125 	{
3126 	  if (sisrc->full_string_p)
3127 	    {
3128 	      tree type = TREE_TYPE (sisrc->nonzero_chars);
3129 	      srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3130 				     build_int_cst (type, 1));
3131 	    }
3132 	  else
3133 	    srclenp1 = sisrc->nonzero_chars;
3134 	}
3135 
3136 	src = sisrc->ptr;
3137     }
3138   else
3139     srclenp1 = NULL_TREE;
3140 
3141   if (check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1))
3142     {
3143       gimple_set_no_warning (stmt, true);
3144       return;
3145     }
3146 
3147   /* If the length argument was computed from strlen(S) for some string
3148      S retrieve the strinfo index for the string (PSS->FIRST) along with
3149      the location of the strlen() call (PSS->SECOND).  */
3150   stridx_strlenloc *pss = strlen_to_stridx->get (len);
3151   if (!pss || pss->first <= 0)
3152     {
3153       if (maybe_diag_stxncpy_trunc (*gsi, src, len))
3154 	gimple_set_no_warning (stmt, true);
3155 
3156       return;
3157     }
3158 
3159   /* Retrieve the strinfo data for the string S that LEN was computed
3160      from as some function F of strlen (S) (i.e., LEN need not be equal
3161      to strlen(S)).  */
3162   strinfo *silen = get_strinfo (pss->first);
3163 
3164   location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3165 
3166   tree func = gimple_call_fndecl (stmt);
3167 
3168   bool warned = false;
3169 
3170   /* When -Wstringop-truncation is set, try to determine truncation
3171      before diagnosing possible overflow.  Truncation is implied by
3172      the LEN argument being equal to strlen(SRC), regardless of
3173      whether its value is known.  Otherwise, when appending, or
3174      when copying into a destination of known size, issue the more
3175      generic -Wstringop-overflow which triggers for LEN arguments
3176      that in any meaningful way depend on strlen(SRC).  */
3177   if (!append_p
3178       && sisrc == silen
3179       && is_strlen_related_p (src, len)
3180       && warning_at (callloc, OPT_Wstringop_truncation,
3181 		     "%G%qD output truncated before terminating nul "
3182 		     "copying as many bytes from a string as its length",
3183 		     stmt, func))
3184     warned = true;
3185   else if ((append_p || !dstsize || len == dstlenp1)
3186 	   && silen && is_strlen_related_p (src, silen->ptr))
3187     {
3188       /* Issue -Wstringop-overflow when appending or when writing into
3189 	 a destination of a known size.  Otherwise, when copying into
3190 	 a destination of an unknown size, it's truncation.  */
3191       int opt = (append_p || dstsize
3192 		 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3193       warned = warning_at (callloc, opt,
3194 			   "%G%qD specified bound depends on the length "
3195 			   "of the source argument",
3196 			   stmt, func);
3197     }
3198   if (warned)
3199     {
3200       location_t strlenloc = pss->second;
3201       if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3202 	inform (strlenloc, "length computed here");
3203     }
3204 }
3205 
3206 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3207    If strlen of the second argument is known and length of the third argument
3208    is that plus one, strlen of the first argument is the same after this
3209    call.  Uses RVALS to determine range information.  */
3210 
3211 static void
handle_builtin_memcpy(enum built_in_function bcode,gimple_stmt_iterator * gsi,pointer_query & ptr_qry)3212 handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3213 		       pointer_query &ptr_qry)
3214 {
3215   tree lhs, oldlen, newlen;
3216   gimple *stmt = gsi_stmt (*gsi);
3217   strinfo *si, *dsi;
3218 
3219   tree len = gimple_call_arg (stmt, 2);
3220   tree src = gimple_call_arg (stmt, 1);
3221   tree dst = gimple_call_arg (stmt, 0);
3222 
3223   int didx = get_stridx (dst);
3224   strinfo *olddsi = NULL;
3225   if (didx > 0)
3226     olddsi = get_strinfo (didx);
3227   else if (didx < 0)
3228     return;
3229 
3230   if (olddsi != NULL
3231       && !integer_zerop (len))
3232     {
3233       maybe_warn_overflow (stmt, len, ptr_qry, olddsi, false, true);
3234       adjust_last_stmt (olddsi, stmt, false, ptr_qry);
3235     }
3236 
3237   int idx = get_stridx (src);
3238   if (idx == 0)
3239     return;
3240 
3241   bool full_string_p;
3242   if (idx > 0)
3243     {
3244       gimple *def_stmt;
3245 
3246       /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3247 	 is known.  */
3248       si = get_strinfo (idx);
3249       if (si == NULL || si->nonzero_chars == NULL_TREE)
3250 	return;
3251       if (TREE_CODE (len) == INTEGER_CST
3252 	  && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3253 	{
3254 	  if (tree_int_cst_le (len, si->nonzero_chars))
3255 	    {
3256 	      /* Copying LEN nonzero characters, where LEN is constant.  */
3257 	      newlen = len;
3258 	      full_string_p = false;
3259 	    }
3260 	  else
3261 	    {
3262 	      /* Copying the whole of the analyzed part of SI.  */
3263 	      newlen = si->nonzero_chars;
3264 	      full_string_p = si->full_string_p;
3265 	    }
3266 	}
3267       else
3268 	{
3269 	  if (!si->full_string_p)
3270 	    return;
3271 	  if (TREE_CODE (len) != SSA_NAME)
3272 	    return;
3273 	  def_stmt = SSA_NAME_DEF_STMT (len);
3274 	  if (!is_gimple_assign (def_stmt)
3275 	      || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3276 	      || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3277 	      || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3278 	    return;
3279 	  /* Copying variable-length string SI (and no more).  */
3280 	  newlen = si->nonzero_chars;
3281 	  full_string_p = true;
3282 	}
3283     }
3284   else
3285     {
3286       si = NULL;
3287       /* Handle memcpy (x, "abcd", 5) or
3288 	 memcpy (x, "abc\0uvw", 7).  */
3289       if (!tree_fits_uhwi_p (len))
3290 	return;
3291 
3292       unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3293       unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3294       newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3295       full_string_p = clen > nonzero_chars;
3296     }
3297 
3298   if (!full_string_p
3299       && olddsi
3300       && olddsi->nonzero_chars
3301       && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3302       && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3303     {
3304       /* The SRC substring being written strictly overlaps
3305 	 a subsequence of the existing string OLDDSI.  */
3306       newlen = olddsi->nonzero_chars;
3307       full_string_p = olddsi->full_string_p;
3308     }
3309 
3310   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3311     adjust_last_stmt (olddsi, stmt, false, ptr_qry);
3312 
3313   if (didx == 0)
3314     {
3315       didx = new_stridx (dst);
3316       if (didx == 0)
3317 	return;
3318     }
3319   oldlen = NULL_TREE;
3320   if (olddsi != NULL)
3321     {
3322       dsi = unshare_strinfo (olddsi);
3323       oldlen = olddsi->nonzero_chars;
3324       dsi->nonzero_chars = newlen;
3325       dsi->full_string_p = full_string_p;
3326       /* Break the chain, so adjust_related_strinfo on later pointers in
3327 	 the chain won't adjust this one anymore.  */
3328       dsi->next = 0;
3329       dsi->stmt = NULL;
3330       dsi->endptr = NULL_TREE;
3331     }
3332   else
3333     {
3334       dsi = new_strinfo (dst, didx, newlen, full_string_p);
3335       set_strinfo (didx, dsi);
3336       find_equal_ptrs (dst, didx);
3337     }
3338   dsi->writable = true;
3339   dsi->dont_invalidate = true;
3340   if (olddsi != NULL)
3341     {
3342       tree adj = NULL_TREE;
3343       location_t loc = gimple_location (stmt);
3344       if (oldlen == NULL_TREE)
3345 	;
3346       else if (integer_zerop (oldlen))
3347 	adj = newlen;
3348       else if (TREE_CODE (oldlen) == INTEGER_CST
3349 	       || TREE_CODE (newlen) == INTEGER_CST)
3350 	adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3351 			       fold_convert_loc (loc, TREE_TYPE (newlen),
3352 						 oldlen));
3353       if (adj != NULL_TREE)
3354 	adjust_related_strinfos (loc, dsi, adj);
3355       else
3356 	dsi->prev = 0;
3357     }
3358   /* memcpy src may not overlap dst, so src doesn't need to be
3359      invalidated either.  */
3360   if (si != NULL)
3361     si->dont_invalidate = true;
3362 
3363   if (full_string_p)
3364     {
3365       lhs = gimple_call_lhs (stmt);
3366       switch (bcode)
3367 	{
3368 	case BUILT_IN_MEMCPY:
3369 	case BUILT_IN_MEMCPY_CHK:
3370 	  /* Allow adjust_last_stmt to decrease this memcpy's size.  */
3371 	  laststmt.stmt = stmt;
3372 	  laststmt.len = dsi->nonzero_chars;
3373 	  laststmt.stridx = dsi->idx;
3374 	  if (lhs)
3375 	    ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3376 	  break;
3377 	case BUILT_IN_MEMPCPY:
3378 	case BUILT_IN_MEMPCPY_CHK:
3379 	  break;
3380 	default:
3381 	  gcc_unreachable ();
3382 	}
3383     }
3384 }
3385 
3386 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3387    If strlen of the second argument is known, strlen of the first argument
3388    is increased by the length of the second argument.  Furthermore, attempt
3389    to convert it to memcpy/strcpy if the length of the first argument
3390    is known.  */
3391 
3392 static void
handle_builtin_strcat(enum built_in_function bcode,gimple_stmt_iterator * gsi,pointer_query & ptr_qry)3393 handle_builtin_strcat (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3394 		       pointer_query &ptr_qry)
3395 {
3396   int idx, didx;
3397   tree srclen, args, type, fn, objsz, endptr;
3398   bool success;
3399   gimple *stmt = gsi_stmt (*gsi);
3400   strinfo *si, *dsi;
3401   location_t loc = gimple_location (stmt);
3402 
3403   tree src = gimple_call_arg (stmt, 1);
3404   tree dst = gimple_call_arg (stmt, 0);
3405 
3406   /* Bail if the source is the same as destination.  It will be diagnosed
3407      elsewhere.  */
3408   if (operand_equal_p (src, dst, 0))
3409     return;
3410 
3411   tree lhs = gimple_call_lhs (stmt);
3412 
3413   didx = get_stridx (dst);
3414   if (didx < 0)
3415     return;
3416 
3417   dsi = NULL;
3418   if (didx > 0)
3419     dsi = get_strinfo (didx);
3420 
3421   srclen = NULL_TREE;
3422   si = NULL;
3423   idx = get_stridx (src);
3424   if (idx < 0)
3425     srclen = build_int_cst (size_type_node, ~idx);
3426   else if (idx > 0)
3427     {
3428       si = get_strinfo (idx);
3429       if (si != NULL)
3430 	srclen = get_string_length (si);
3431     }
3432 
3433   /* Set the no-warning bit on the transformed statement?  */
3434   bool set_no_warning = false;
3435 
3436   if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3437     {
3438       {
3439 	  /* The concatenation always involves copying at least one byte
3440 	     (the terminating nul), even if the source string is empty.
3441 	     If the source is unknown assume it's one character long and
3442 	     used that as both sizes.  */
3443 	tree slen = srclen;
3444 	if (slen)
3445 	  {
3446 	    tree type = TREE_TYPE (slen);
3447 	    slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3448 	  }
3449 
3450 	tree sptr = si && si->ptr ? si->ptr : src;
3451 
3452 	if (check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE, slen))
3453 	  {
3454 	    gimple_set_no_warning (stmt, true);
3455 	    set_no_warning = true;
3456 	  }
3457       }
3458 
3459       /* strcat (p, q) can be transformed into
3460 	 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3461 	 with length endptr - p if we need to compute the length
3462 	 later on.  Don't do this transformation if we don't need
3463 	 it.  */
3464       if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3465 	{
3466 	  if (didx == 0)
3467 	    {
3468 	      didx = new_stridx (dst);
3469 	      if (didx == 0)
3470 		return;
3471 	    }
3472 	  if (dsi == NULL)
3473 	    {
3474 	      dsi = new_strinfo (dst, didx, NULL_TREE, false);
3475 	      set_strinfo (didx, dsi);
3476 	      find_equal_ptrs (dst, didx);
3477 	    }
3478 	  else
3479 	    {
3480 	      dsi = unshare_strinfo (dsi);
3481 	      dsi->nonzero_chars = NULL_TREE;
3482 	      dsi->full_string_p = false;
3483 	      dsi->next = 0;
3484 	      dsi->endptr = NULL_TREE;
3485 	    }
3486 	  dsi->writable = true;
3487 	  dsi->stmt = stmt;
3488 	  dsi->dont_invalidate = true;
3489 	}
3490       return;
3491     }
3492 
3493   tree dstlen = dsi->nonzero_chars;
3494   endptr = dsi->endptr;
3495 
3496   dsi = unshare_strinfo (dsi);
3497   dsi->endptr = NULL_TREE;
3498   dsi->stmt = NULL;
3499   dsi->writable = true;
3500 
3501   if (srclen != NULL_TREE)
3502     {
3503       dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3504 					    TREE_TYPE (dsi->nonzero_chars),
3505 					    dsi->nonzero_chars, srclen);
3506       gcc_assert (dsi->full_string_p);
3507       adjust_related_strinfos (loc, dsi, srclen);
3508       dsi->dont_invalidate = true;
3509     }
3510   else
3511     {
3512       dsi->nonzero_chars = NULL;
3513       dsi->full_string_p = false;
3514       if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3515 	dsi->dont_invalidate = true;
3516     }
3517 
3518   if (si != NULL)
3519     /* strcat src may not overlap dst, so src doesn't need to be
3520        invalidated either.  */
3521     si->dont_invalidate = true;
3522 
3523   /* For now.  Could remove the lhs from the call and add
3524      lhs = dst; afterwards.  */
3525   if (lhs)
3526     return;
3527 
3528   fn = NULL_TREE;
3529   objsz = NULL_TREE;
3530   switch (bcode)
3531     {
3532     case BUILT_IN_STRCAT:
3533       if (srclen != NULL_TREE)
3534 	fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3535       else
3536 	fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3537       break;
3538     case BUILT_IN_STRCAT_CHK:
3539       if (srclen != NULL_TREE)
3540 	fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3541       else
3542 	fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3543       objsz = gimple_call_arg (stmt, 2);
3544       break;
3545     default:
3546       gcc_unreachable ();
3547     }
3548 
3549   if (fn == NULL_TREE)
3550     return;
3551 
3552   if (dsi && dstlen)
3553     {
3554       tree type = TREE_TYPE (dstlen);
3555 
3556       /* Compute the size of the source sequence, including the nul.  */
3557       tree srcsize = srclen ? srclen : size_zero_node;
3558       tree one = build_int_cst (type, 1);
3559       srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3560       tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3561       tree sptr = si && si->ptr ? si->ptr : src;
3562 
3563       if (check_bounds_or_overlap (stmt, dst, sptr, dstsize, srcsize))
3564 	{
3565 	  gimple_set_no_warning (stmt, true);
3566 	  set_no_warning = true;
3567 	}
3568     }
3569 
3570   tree len = NULL_TREE;
3571   if (srclen != NULL_TREE)
3572     {
3573       args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3574       type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3575 
3576       len = fold_convert_loc (loc, type, unshare_expr (srclen));
3577       len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3578 			     build_int_cst (type, 1));
3579       len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
3580 				      GSI_SAME_STMT);
3581     }
3582   if (endptr)
3583     dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3584   else
3585     dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3586 			   fold_convert_loc (loc, sizetype,
3587 					     unshare_expr (dstlen)));
3588   dst = force_gimple_operand_gsi (gsi, dst, true, NULL_TREE, true,
3589 				  GSI_SAME_STMT);
3590   if (objsz)
3591     {
3592       objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3593 			       fold_convert_loc (loc, TREE_TYPE (objsz),
3594 						 unshare_expr (dstlen)));
3595       objsz = force_gimple_operand_gsi (gsi, objsz, true, NULL_TREE, true,
3596 					GSI_SAME_STMT);
3597     }
3598   if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3599     {
3600       fprintf (dump_file, "Optimizing: ");
3601       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3602     }
3603   if (srclen != NULL_TREE)
3604     success = update_gimple_call (gsi, fn, 3 + (objsz != NULL_TREE),
3605 				  dst, src, len, objsz);
3606   else
3607     success = update_gimple_call (gsi, fn, 2 + (objsz != NULL_TREE),
3608 				  dst, src, objsz);
3609   if (success)
3610     {
3611       stmt = gsi_stmt (*gsi);
3612       update_stmt (stmt);
3613       if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3614 	{
3615 	  fprintf (dump_file, "into: ");
3616 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3617 	}
3618       /* If srclen == NULL, note that current string length can be
3619 	 computed by transforming this strcpy into stpcpy.  */
3620       if (srclen == NULL_TREE && dsi->dont_invalidate)
3621 	dsi->stmt = stmt;
3622       adjust_last_stmt (dsi, stmt, true, ptr_qry);
3623       if (srclen != NULL_TREE)
3624 	{
3625 	  laststmt.stmt = stmt;
3626 	  laststmt.len = srclen;
3627 	  laststmt.stridx = dsi->idx;
3628 	}
3629     }
3630   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3631     fprintf (dump_file, "not possible.\n");
3632 
3633   if (set_no_warning)
3634     gimple_set_no_warning (stmt, true);
3635 }
3636 
3637 /* Handle a call to an allocation function like alloca, malloc or calloc,
3638    or an ordinary allocation function declared with attribute alloc_size.  */
3639 
3640 static void
handle_alloc_call(enum built_in_function bcode,gimple_stmt_iterator * gsi)3641 handle_alloc_call (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3642 {
3643   gimple *stmt = gsi_stmt (*gsi);
3644   tree lhs = gimple_call_lhs (stmt);
3645   if (lhs == NULL_TREE)
3646     return;
3647 
3648   gcc_assert (get_stridx (lhs) == 0);
3649   int idx = new_stridx (lhs);
3650   tree length = NULL_TREE;
3651   if (bcode == BUILT_IN_CALLOC)
3652     length = build_int_cst (size_type_node, 0);
3653   strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3654   if (bcode == BUILT_IN_CALLOC)
3655     {
3656       /* Only set STMT for calloc and malloc.  */
3657       si->stmt = stmt;
3658       /* Only set ENDPTR for calloc.  */
3659       si->endptr = lhs;
3660     }
3661   else if (bcode == BUILT_IN_MALLOC)
3662     si->stmt = stmt;
3663 
3664   /* Set ALLOC is set for all allocation functions.  */
3665   si->alloc = stmt;
3666   set_strinfo (idx, si);
3667   si->writable = true;
3668   si->dont_invalidate = true;
3669 }
3670 
3671 /* Handle a call to memset.
3672    After a call to calloc, memset(,0,) is unnecessary.
3673    memset(malloc(n),0,n) is calloc(n,1).
3674    return true when the call is transformed, false otherwise.
3675    When nonnull uses RVALS to determine range information.  */
3676 
3677 static bool
handle_builtin_memset(gimple_stmt_iterator * gsi,bool * zero_write,pointer_query & ptr_qry)3678 handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write,
3679 		       pointer_query &ptr_qry)
3680 {
3681   gimple *memset_stmt = gsi_stmt (*gsi);
3682   tree ptr = gimple_call_arg (memset_stmt, 0);
3683   /* Set to the non-constant offset added to PTR.  */
3684   wide_int offrng[2];
3685   int idx1 = get_stridx (ptr, offrng, ptr_qry.rvals);
3686   if (idx1 <= 0)
3687     return false;
3688   strinfo *si1 = get_strinfo (idx1);
3689   if (!si1)
3690     return false;
3691   gimple *alloc_stmt = si1->alloc;
3692   if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3693     return false;
3694   tree callee1 = gimple_call_fndecl (alloc_stmt);
3695   if (!valid_builtin_call (alloc_stmt))
3696     return false;
3697   tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3698   tree memset_size = gimple_call_arg (memset_stmt, 2);
3699 
3700   /* Check for overflow.  */
3701   maybe_warn_overflow (memset_stmt, memset_size, ptr_qry, NULL, false, true);
3702 
3703   /* Bail when there is no statement associated with the destination
3704      (the statement may be null even when SI1->ALLOC is not).  */
3705   if (!si1->stmt)
3706     return false;
3707 
3708   /* Avoid optimizing if store is at a variable offset from the beginning
3709      of the allocated object.  */
3710   if (offrng[0] != 0 || offrng[0] != offrng[1])
3711     return false;
3712 
3713   /* Bail when the call writes a non-zero value.  */
3714   if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3715     return false;
3716 
3717   /* Let the caller know the memset call cleared the destination.  */
3718   *zero_write = true;
3719 
3720   enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3721   if (code1 == BUILT_IN_CALLOC)
3722     /* Not touching alloc_stmt */ ;
3723   else if (code1 == BUILT_IN_MALLOC
3724 	   && operand_equal_p (memset_size, alloc_size, 0))
3725     {
3726       /* Replace the malloc + memset calls with calloc.  */
3727       gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3728       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3729 			  alloc_size, build_one_cst (size_type_node));
3730       si1->nonzero_chars = build_int_cst (size_type_node, 0);
3731       si1->full_string_p = true;
3732       si1->stmt = gsi_stmt (gsi1);
3733     }
3734   else
3735     return false;
3736   tree lhs = gimple_call_lhs (memset_stmt);
3737   unlink_stmt_vdef (memset_stmt);
3738   if (lhs)
3739     {
3740       gimple *assign = gimple_build_assign (lhs, ptr);
3741       gsi_replace (gsi, assign, false);
3742     }
3743   else
3744     {
3745       gsi_remove (gsi, true);
3746       release_defs (memset_stmt);
3747     }
3748 
3749   return true;
3750 }
3751 
3752 /* Return first such statement if RES is used in statements testing its
3753    equality to zero, and null otherwise.  If EXCLUSIVE is true, return
3754    nonnull if and only RES is used in such expressions exclusively and
3755    in none other.  */
3756 
3757 static gimple *
3758 use_in_zero_equality (tree res, bool exclusive = true)
3759 {
3760   gimple *first_use = NULL;
3761 
3762   use_operand_p use_p;
3763   imm_use_iterator iter;
3764 
FOR_EACH_IMM_USE_FAST(use_p,iter,res)3765   FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3766     {
3767       gimple *use_stmt = USE_STMT (use_p);
3768 
3769       if (is_gimple_debug (use_stmt))
3770         continue;
3771 
3772       if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3773 	{
3774 	  tree_code code = gimple_assign_rhs_code (use_stmt);
3775 	  if (code == COND_EXPR)
3776 	    {
3777 	      tree cond_expr = gimple_assign_rhs1 (use_stmt);
3778 	      if ((TREE_CODE (cond_expr) != EQ_EXPR
3779 		   && (TREE_CODE (cond_expr) != NE_EXPR))
3780 		  || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3781 		{
3782 		  if (exclusive)
3783 		    return NULL;
3784 		  continue;
3785 		}
3786 	    }
3787 	  else if (code == EQ_EXPR || code == NE_EXPR)
3788 	    {
3789 	      if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3790 		{
3791 		  if (exclusive)
3792 		    return NULL;
3793 		  continue;
3794 		}
3795             }
3796 	  else if (exclusive)
3797 	    return NULL;
3798 	  else
3799 	    continue;
3800 	}
3801       else if (gimple_code (use_stmt) == GIMPLE_COND)
3802 	{
3803 	  tree_code code = gimple_cond_code (use_stmt);
3804 	  if ((code != EQ_EXPR && code != NE_EXPR)
3805 	      || !integer_zerop (gimple_cond_rhs (use_stmt)))
3806 	    {
3807 	      if (exclusive)
3808 		return NULL;
3809 	      continue;
3810 	    }
3811 	}
3812       else if (exclusive)
3813 	return NULL;
3814       else
3815 	continue;
3816 
3817       if (!first_use)
3818 	first_use = use_stmt;
3819     }
3820 
3821   return first_use;
3822 }
3823 
3824 /* Handle a call to memcmp.  We try to handle small comparisons by
3825    converting them to load and compare, and replacing the call to memcmp
3826    with a __builtin_memcmp_eq call where possible.
3827    return true when call is transformed, return false otherwise.  */
3828 
3829 static bool
handle_builtin_memcmp(gimple_stmt_iterator * gsi)3830 handle_builtin_memcmp (gimple_stmt_iterator *gsi)
3831 {
3832   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
3833   tree res = gimple_call_lhs (stmt);
3834 
3835   if (!res || !use_in_zero_equality (res))
3836     return false;
3837 
3838   tree arg1 = gimple_call_arg (stmt, 0);
3839   tree arg2 = gimple_call_arg (stmt, 1);
3840   tree len = gimple_call_arg (stmt, 2);
3841   unsigned HOST_WIDE_INT leni;
3842 
3843   if (tree_fits_uhwi_p (len)
3844       && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3845       && pow2p_hwi (leni))
3846     {
3847       leni *= CHAR_TYPE_SIZE;
3848       unsigned align1 = get_pointer_alignment (arg1);
3849       unsigned align2 = get_pointer_alignment (arg2);
3850       unsigned align = MIN (align1, align2);
3851       scalar_int_mode mode;
3852       if (int_mode_for_size (leni, 1).exists (&mode)
3853 	  && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3854 	{
3855 	  location_t loc = gimple_location (stmt);
3856 	  tree type, off;
3857 	  type = build_nonstandard_integer_type (leni, 1);
3858 	  gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3859 	  tree ptrtype = build_pointer_type_for_mode (char_type_node,
3860 						      ptr_mode, true);
3861 	  off = build_int_cst (ptrtype, 0);
3862 	  arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
3863 	  arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
3864 	  tree tem1 = fold_const_aggregate_ref (arg1);
3865 	  if (tem1)
3866 	    arg1 = tem1;
3867 	  tree tem2 = fold_const_aggregate_ref (arg2);
3868 	  if (tem2)
3869 	    arg2 = tem2;
3870 	  res = fold_convert_loc (loc, TREE_TYPE (res),
3871 				  fold_build2_loc (loc, NE_EXPR,
3872 						   boolean_type_node,
3873 						   arg1, arg2));
3874 	  gimplify_and_update_call_from_tree (gsi, res);
3875 	  return true;
3876 	}
3877     }
3878 
3879   gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
3880   return true;
3881 }
3882 
3883 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
3884    of the string(s) referenced by ARG if it can be determined.
3885    If the length cannot be determined, sets *SIZE to the size of
3886    the array the string is stored in, if any.  If no such array is
3887    known, sets *SIZE to -1.  When the strings are nul-terminated sets
3888    *NULTERM to true, otherwise to false.  When nonnull uses RVALS to
3889    determine range information. Returns true on success.  */
3890 
3891 static bool
get_len_or_size(gimple * stmt,tree arg,int idx,unsigned HOST_WIDE_INT lenrng[2],unsigned HOST_WIDE_INT * size,bool * nulterm,range_query * rvals)3892 get_len_or_size (gimple *stmt, tree arg, int idx,
3893 		 unsigned HOST_WIDE_INT lenrng[2],
3894 		 unsigned HOST_WIDE_INT *size, bool *nulterm,
3895 		 range_query *rvals)
3896 {
3897   /* Invalidate.  */
3898   *size = HOST_WIDE_INT_M1U;
3899 
3900   if (idx < 0)
3901     {
3902       /* IDX is the inverted constant string length.  */
3903       lenrng[0] = ~idx;
3904       lenrng[1] = lenrng[0];
3905       *nulterm = true;
3906       return true;
3907     }
3908 
3909   /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
3910      possible length + 1.  */
3911   lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
3912 
3913   if (strinfo *si = idx ? get_strinfo (idx) : NULL)
3914     {
3915       /* FIXME: Handle all this in_range_strlen_dynamic.  */
3916       if (!si->nonzero_chars)
3917 	;
3918       else if (tree_fits_uhwi_p (si->nonzero_chars))
3919 	{
3920 	  lenrng[0] = tree_to_uhwi (si->nonzero_chars);
3921 	  *nulterm = si->full_string_p;
3922 	  /* Set the upper bound only if the string is known to be
3923 	     nul-terminated, otherwise leave it at maximum + 1.  */
3924 	  if (*nulterm)
3925 	    lenrng[1] = lenrng[0];
3926 	}
3927       else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
3928 	{
3929 	  wide_int min, max;
3930 	  // FIXME: Use range_query instead of global ranges.
3931 	  value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max);
3932 	  if (rng == VR_RANGE)
3933 	    {
3934 	      lenrng[0] = min.to_uhwi ();
3935 	      lenrng[1] = max.to_uhwi ();
3936 	      *nulterm = si->full_string_p;
3937 	    }
3938 	}
3939     }
3940 
3941   if (lenrng[0] != HOST_WIDE_INT_MAX)
3942     return true;
3943 
3944   /* Compute the minimum and maximum real or possible lengths.  */
3945   c_strlen_data lendata = { };
3946   /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3947      to have it set to the length of the longest string in a PHI.  */
3948   lendata.maxbound = arg;
3949   get_range_strlen_dynamic (arg, stmt, &lendata, rvals);
3950 
3951   unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
3952   if (tree_fits_uhwi_p (lendata.maxbound)
3953       && !integer_all_onesp (lendata.maxbound))
3954     maxbound = tree_to_uhwi (lendata.maxbound);
3955 
3956   if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
3957     {
3958       unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
3959       unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
3960 
3961       /* The longest string in this data model.  */
3962       const unsigned HOST_WIDE_INT lenmax
3963 	= tree_to_uhwi (max_object_size ()) - 2;
3964 
3965       if (maxbound == HOST_WIDE_INT_M1U)
3966 	{
3967 	  lenrng[0] = minlen;
3968 	  lenrng[1] = maxlen;
3969 	  *nulterm = minlen == maxlen;
3970 	}
3971       else if (maxlen < lenmax)
3972 	{
3973 	  *size = maxbound + 1;
3974 	  *nulterm = false;
3975 	}
3976       else
3977 	return false;
3978 
3979       return true;
3980     }
3981 
3982   if (maxbound != HOST_WIDE_INT_M1U
3983       && lendata.maxlen
3984       && !integer_all_onesp (lendata.maxlen))
3985     {
3986       /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
3987 	 of the longest string based on the sizes of the arrays referenced
3988 	 by ARG.  */
3989       *size = maxbound + 1;
3990       *nulterm = false;
3991       return true;
3992     }
3993 
3994   return false;
3995 }
3996 
3997 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
3998    the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
3999    for a sufficiently large BOUND).  If the result is based on the length
4000    of one string being greater than the longest string that would fit in
4001    the array pointer to by the argument, set *PLEN and *PSIZE to
4002    the corresponding length (or its complement when the string is known
4003    to be at least as long and need not be nul-terminated) and size.
4004    Otherwise return null.  */
4005 
4006 static tree
strxcmp_eqz_result(gimple * stmt,tree arg1,int idx1,tree arg2,int idx2,unsigned HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT * psize,range_query * rvals)4007 strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1, tree arg2, int idx2,
4008 		    unsigned HOST_WIDE_INT bound, unsigned HOST_WIDE_INT len[2],
4009 		    unsigned HOST_WIDE_INT *psize, range_query *rvals)
4010 {
4011   /* Determine the range the length of each string is in and whether it's
4012      known to be nul-terminated, or the size of the array it's stored in.  */
4013   bool nul1, nul2;
4014   unsigned HOST_WIDE_INT siz1, siz2;
4015   unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4016   if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1, rvals)
4017       || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2, rvals))
4018     return NULL_TREE;
4019 
4020   /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4021      to HWI_MAX when invalid.  Adjust the length of each string to consider
4022      to be no more than BOUND.  */
4023   if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4024     len1rng[0] = bound;
4025   if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4026     len1rng[1] = bound;
4027   if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4028     len2rng[0] = bound;
4029   if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4030     len2rng[1] = bound;
4031 
4032   /* Two empty strings are equal.  */
4033   if (len1rng[1] == 0 && len2rng[1] == 0)
4034     return integer_one_node;
4035 
4036   /* The strings are definitely unequal when the lower bound of the length
4037      of one of them is greater than the length of the longest string that
4038      would fit into the other array.  */
4039   if (len1rng[0] == HOST_WIDE_INT_MAX
4040       && len2rng[0] != HOST_WIDE_INT_MAX
4041       && ((len2rng[0] < bound && len2rng[0] >= siz1)
4042 	  || len2rng[0] > siz1))
4043     {
4044       *psize = siz1;
4045       len[0] = len1rng[0];
4046       /* Set LEN[0] to the lower bound of ARG1's length when it's
4047 	 nul-terminated or to the complement of its minimum length
4048 	 otherwise,  */
4049       len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4050       return integer_zero_node;
4051     }
4052 
4053   if (len2rng[0] == HOST_WIDE_INT_MAX
4054       && len1rng[0] != HOST_WIDE_INT_MAX
4055       && ((len1rng[0] < bound && len1rng[0] >= siz2)
4056 	  || len1rng[0] > siz2))
4057     {
4058       *psize = siz2;
4059       len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4060       len[1] = len2rng[0];
4061       return integer_zero_node;
4062     }
4063 
4064   /* The strings are also definitely unequal when their lengths are unequal
4065      and at least one is nul-terminated.  */
4066   if (len1rng[0] != HOST_WIDE_INT_MAX
4067       && len2rng[0] != HOST_WIDE_INT_MAX
4068       && ((len1rng[1] < len2rng[0] && nul1)
4069 	  || (len2rng[1] < len1rng[0] && nul2)))
4070     {
4071       if (bound <= len1rng[0] || bound <= len2rng[0])
4072 	*psize = bound;
4073       else
4074 	*psize = HOST_WIDE_INT_M1U;
4075 
4076       len[0] = len1rng[0];
4077       len[1] = len2rng[0];
4078       return integer_zero_node;
4079     }
4080 
4081   /* The string lengths may be equal or unequal.  Even when equal and
4082      both strings nul-terminated, without the string contents there's
4083      no way to determine whether they are equal.  */
4084   return NULL_TREE;
4085 }
4086 
4087 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4088    arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4089    whose result is used in equality expressions that evaluate to
4090    a constant due to one argument being longer than the size of
4091    the other.  */
4092 
4093 static void
maybe_warn_pointless_strcmp(gimple * stmt,HOST_WIDE_INT bound,unsigned HOST_WIDE_INT len[2],unsigned HOST_WIDE_INT siz)4094 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4095 			     unsigned HOST_WIDE_INT len[2],
4096 			     unsigned HOST_WIDE_INT siz)
4097 {
4098   tree lhs = gimple_call_lhs (stmt);
4099   gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4100   if (!use)
4101     return;
4102 
4103   bool at_least = false;
4104 
4105   /* Excessive LEN[i] indicates a lower bound.  */
4106   if (len[0] > HOST_WIDE_INT_MAX)
4107     {
4108       at_least = true;
4109       len[0] = ~len[0];
4110     }
4111 
4112   if (len[1] > HOST_WIDE_INT_MAX)
4113     {
4114       at_least = true;
4115       len[1] = ~len[1];
4116     }
4117 
4118   unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4119 
4120   /* FIXME: Include a note pointing to the declaration of the smaller
4121      array.  */
4122   location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4123 
4124   tree callee = gimple_call_fndecl (stmt);
4125   bool warned = false;
4126   if (siz <= minlen && bound == -1)
4127     warned = warning_at (stmt_loc, OPT_Wstring_compare,
4128 			 (at_least
4129 			  ? G_("%G%qD of a string of length %wu or more and "
4130 			       "an array of size %wu evaluates to nonzero")
4131 			  : G_("%G%qD of a string of length %wu and an array "
4132 			       "of size %wu evaluates to nonzero")),
4133 			 stmt, callee, minlen, siz);
4134   else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4135     {
4136       if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4137 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4138 			     "%G%qD of strings of length %wu and %wu "
4139 			     "and bound of %wu evaluates to nonzero",
4140 			     stmt, callee, len[0], len[1], bound);
4141       else
4142 	warned = warning_at (stmt_loc, OPT_Wstring_compare,
4143 			     "%G%qD of a string of length %wu, an array "
4144 			     "of size %wu and bound of %wu evaluates to "
4145 			     "nonzero",
4146 			     stmt, callee, minlen, siz, bound);
4147     }
4148 
4149   if (!warned)
4150     return;
4151 
4152   location_t use_loc = gimple_location (use);
4153   if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4154     inform (use_loc, "in this expression");
4155 }
4156 
4157 
4158 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4159    when possible or by transforming the latter to the former.  Warn about
4160    calls where the length of one argument is greater than the size of
4161    the array to which the other argument points if the latter's length
4162    is not known.  Return true when the call has been transformed into
4163    another and false otherwise.  */
4164 
4165 static bool
handle_builtin_string_cmp(gimple_stmt_iterator * gsi,range_query * rvals)4166 handle_builtin_string_cmp (gimple_stmt_iterator *gsi, range_query *rvals)
4167 {
4168   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4169   tree lhs = gimple_call_lhs (stmt);
4170 
4171   if (!lhs)
4172     return false;
4173 
4174   tree arg1 = gimple_call_arg (stmt, 0);
4175   tree arg2 = gimple_call_arg (stmt, 1);
4176   int idx1 = get_stridx (arg1);
4177   int idx2 = get_stridx (arg2);
4178 
4179   /* For strncmp set to the value of the third argument if known.  */
4180   HOST_WIDE_INT bound = -1;
4181   tree len = NULL_TREE;
4182   /* Extract the strncmp bound.  */
4183   if (gimple_call_num_args (stmt) == 3)
4184     {
4185       len = gimple_call_arg (stmt, 2);
4186       if (tree_fits_shwi_p (len))
4187         bound = tree_to_shwi (len);
4188 
4189       /* If the bound argument is NOT known, do nothing.  */
4190       if (bound < 0)
4191 	return false;
4192     }
4193 
4194   /* Avoid folding if either argument is not a nul-terminated array.
4195      Defer warning until later.  */
4196   if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4197       || !check_nul_terminated_array (NULL_TREE, arg2, len))
4198     return false;
4199 
4200   {
4201     /* Set to the length of one argument (or its complement if it's
4202        the lower bound of a range) and the size of the array storing
4203        the other if the result is based on the former being equal to
4204        or greater than the latter.  */
4205     unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4206     unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4207 
4208     /* Try to determine if the two strings are either definitely equal
4209        or definitely unequal and if so, either fold the result to zero
4210        (when equal) or set the range of the result to ~[0, 0] otherwise.  */
4211     if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4212 				       len, &siz, rvals))
4213       {
4214 	if (integer_zerop (eqz))
4215 	  {
4216 	    maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4217 
4218 	    /* When the lengths of the first two string arguments are
4219 	       known to be unequal set the range of the result to non-zero.
4220 	       This allows the call to be eliminated if its result is only
4221 	       used in tests for equality to zero.  */
4222 	    wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4223 	    set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4224 	    return false;
4225 	  }
4226 	/* When the two strings are definitely equal (such as when they
4227 	   are both empty) fold the call to the constant result.  */
4228 	replace_call_with_value (gsi, integer_zero_node);
4229 	return true;
4230       }
4231   }
4232 
4233   /* Return if nothing is known about the strings pointed to by ARG1
4234      and ARG2.  */
4235   if (idx1 == 0 && idx2 == 0)
4236     return false;
4237 
4238   /* Determine either the length or the size of each of the strings,
4239      whichever is available.  */
4240   HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4241   HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4242 
4243   {
4244     unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4245     unsigned HOST_WIDE_INT arsz1, arsz2;
4246     bool nulterm[2];
4247 
4248     if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm, rvals)
4249 	|| !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1,
4250 			     rvals))
4251       return false;
4252 
4253     if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4254       cstlen1 = len1rng[0];
4255     else if (arsz1 < HOST_WIDE_INT_M1U)
4256       arysiz1 = arsz1;
4257 
4258     if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4259       cstlen2 = len2rng[0];
4260     else if (arsz2 < HOST_WIDE_INT_M1U)
4261       arysiz2 = arsz2;
4262   }
4263 
4264   /* Bail if neither the string length nor the size of the array
4265      it is stored in can be determined.  */
4266   if ((cstlen1 < 0 && arysiz1 < 0)
4267       || (cstlen2 < 0 && arysiz2 < 0)
4268       || (cstlen1 < 0 && cstlen2 < 0))
4269     return false;
4270 
4271   if (cstlen1 >= 0)
4272     ++cstlen1;
4273   if (cstlen2 >= 0)
4274     ++cstlen2;
4275 
4276   /* The exact number of characters to compare.  */
4277   HOST_WIDE_INT cmpsiz;
4278   if (cstlen1 >= 0 && cstlen2 >= 0)
4279     cmpsiz = MIN (cstlen1, cstlen2);
4280   else if (cstlen1 >= 0)
4281     cmpsiz = cstlen1;
4282   else
4283     cmpsiz = cstlen2;
4284   if (bound >= 0)
4285     cmpsiz = MIN (cmpsiz, bound);
4286   /* The size of the array in which the unknown string is stored.  */
4287   HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4288 
4289   if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4290     {
4291       /* If the known length is less than the size of the other array
4292 	 and the strcmp result is only used to test equality to zero,
4293 	 transform the call to the equivalent _eq call.  */
4294       if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4295 					   : BUILT_IN_STRNCMP_EQ))
4296 	{
4297 	  tree n = build_int_cst (size_type_node, cmpsiz);
4298 	  update_gimple_call (gsi, fn, 3, arg1, arg2, n);
4299 	  return true;
4300 	}
4301     }
4302 
4303   return false;
4304 }
4305 
4306 /* Handle a POINTER_PLUS_EXPR statement.
4307    For p = "abcd" + 2; compute associated length, or if
4308    p = q + off is pointing to a '\0' character of a string, call
4309    zero_length_string on it.  */
4310 
4311 static void
handle_pointer_plus(gimple_stmt_iterator * gsi)4312 handle_pointer_plus (gimple_stmt_iterator *gsi)
4313 {
4314   gimple *stmt = gsi_stmt (*gsi);
4315   tree lhs = gimple_assign_lhs (stmt), off;
4316   int idx = get_stridx (gimple_assign_rhs1 (stmt));
4317   strinfo *si, *zsi;
4318 
4319   if (idx == 0)
4320     return;
4321 
4322   if (idx < 0)
4323     {
4324       tree off = gimple_assign_rhs2 (stmt);
4325       if (tree_fits_uhwi_p (off)
4326 	  && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4327 	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4328 	    = ~(~idx - (int) tree_to_uhwi (off));
4329       return;
4330     }
4331 
4332   si = get_strinfo (idx);
4333   if (si == NULL || si->nonzero_chars == NULL_TREE)
4334     return;
4335 
4336   off = gimple_assign_rhs2 (stmt);
4337   zsi = NULL;
4338   if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4339     zsi = zero_length_string (lhs, si);
4340   else if (TREE_CODE (off) == SSA_NAME)
4341     {
4342       gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4343       if (gimple_assign_single_p (def_stmt)
4344 	  && si->full_string_p
4345 	  && operand_equal_p (si->nonzero_chars,
4346 			      gimple_assign_rhs1 (def_stmt), 0))
4347 	zsi = zero_length_string (lhs, si);
4348     }
4349   if (zsi != NULL
4350       && si->endptr != NULL_TREE
4351       && si->endptr != lhs
4352       && TREE_CODE (si->endptr) == SSA_NAME)
4353     {
4354       enum tree_code rhs_code
4355 	= useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4356 	  ? SSA_NAME : NOP_EXPR;
4357       gimple_assign_set_rhs_with_ops (gsi, rhs_code, si->endptr);
4358       gcc_assert (gsi_stmt (*gsi) == stmt);
4359       update_stmt (stmt);
4360     }
4361 }
4362 
4363 static bool
4364 count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4365 			  unsigned [3], bool *, bool *, bool *,
4366 			  range_query *, ssa_name_limit_t &);
4367 
4368 /* Determines the minimum and maximum number of leading non-zero bytes
4369    in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4370    to each.
4371    Sets LENRANGE[2] to the total size of the access (which may be less
4372    than LENRANGE[1] when what's being referenced by EXP is a pointer
4373    rather than an array).
4374    Sets *NULTERM if the representation contains a zero byte, and sets
4375    *ALLNUL if all the bytes are zero.
4376    OFFSET and NBYTES are the offset into the representation and
4377    the size of the access to it determined from an ADDR_EXPR (i.e.,
4378    a pointer) or MEM_REF or zero for other expressions.
4379    Uses RVALS to determine range information.
4380    Avoids recursing deeper than the limits in SNLIM allow.
4381    Returns true on success and false otherwise.  */
4382 
4383 static bool
count_nonzero_bytes(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,range_query * rvals,ssa_name_limit_t & snlim)4384 count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
4385 		     unsigned HOST_WIDE_INT nbytes,
4386 		     unsigned lenrange[3], bool *nulterm,
4387 		     bool *allnul, bool *allnonnul, range_query *rvals,
4388 		     ssa_name_limit_t &snlim)
4389 {
4390   if (TREE_CODE (exp) == SSA_NAME)
4391     {
4392       /* Handle non-zero single-character stores specially.  */
4393       tree type = TREE_TYPE (exp);
4394       if (TREE_CODE (type) == INTEGER_TYPE
4395 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4396 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4397 	  && tree_expr_nonzero_p (exp))
4398 	{
4399 	  /* If the character EXP is known to be non-zero (even if its
4400 	     exact value is not known) recurse once to set the range
4401 	     for an arbitrary constant.  */
4402 	  exp = build_int_cst (type, 1);
4403 	  return count_nonzero_bytes (exp, offset, 1, lenrange,
4404 				      nulterm, allnul, allnonnul, rvals, snlim);
4405 	}
4406 
4407       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4408       if (gimple_assign_single_p (stmt))
4409 	{
4410 	  exp = gimple_assign_rhs1 (stmt);
4411 	  if (TREE_CODE (exp) != MEM_REF)
4412 	    return false;
4413 	  /* Handle MEM_REF below.  */
4414 	}
4415       else if (gimple_code (stmt) == GIMPLE_PHI)
4416 	{
4417 	  /* Avoid processing an SSA_NAME that has already been visited
4418 	     or if an SSA_NAME limit has been reached.  Indicate success
4419 	     if the former and failure if the latter.  */
4420 	  if (int res = snlim.next_phi (exp))
4421 	    return res > 0;
4422 
4423 	  /* Determine the minimum and maximum from the PHI arguments.  */
4424 	  unsigned int n = gimple_phi_num_args (stmt);
4425 	  for (unsigned i = 0; i != n; i++)
4426 	    {
4427 	      tree def = gimple_phi_arg_def (stmt, i);
4428 	      if (!count_nonzero_bytes (def, offset, nbytes, lenrange, nulterm,
4429 					allnul, allnonnul, rvals, snlim))
4430 		return false;
4431 	    }
4432 
4433 	  return true;
4434 	}
4435     }
4436 
4437   if (TREE_CODE (exp) == MEM_REF)
4438     {
4439       if (nbytes)
4440 	return false;
4441 
4442       tree arg = TREE_OPERAND (exp, 0);
4443       tree off = TREE_OPERAND (exp, 1);
4444 
4445       if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4446 	return false;
4447 
4448       unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4449       if (INT_MAX < wioff)
4450 	return false;
4451 
4452       offset += wioff;
4453       if (INT_MAX < offset)
4454 	return false;
4455 
4456       /* The size of the MEM_REF access determines the number of bytes.  */
4457       tree type = TREE_TYPE (exp);
4458       tree typesize = TYPE_SIZE_UNIT (type);
4459       if (!typesize || !tree_fits_uhwi_p (typesize))
4460 	return false;
4461       nbytes = tree_to_uhwi (typesize);
4462       if (!nbytes)
4463 	return false;
4464 
4465       /* Handle MEM_REF = SSA_NAME types of assignments.  */
4466       return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
4467 				       allnul, allnonnul, rvals, snlim);
4468     }
4469 
4470   if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4471     {
4472       exp = ctor_for_folding (exp);
4473       if (!exp)
4474 	return false;
4475     }
4476 
4477   const char *prep = NULL;
4478   if (TREE_CODE (exp) == STRING_CST)
4479     {
4480       unsigned nchars = TREE_STRING_LENGTH (exp);
4481       if (nchars < offset)
4482 	return false;
4483 
4484       if (!nbytes)
4485 	/* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4486 	   (i.e., it's the size of a pointer), or from MEM_REF (as the size
4487 	   of the access), set it here to the size of the string, including
4488 	   all internal and trailing nuls if the string has any.  */
4489 	nbytes = nchars - offset;
4490       else if (nchars - offset < nbytes)
4491 	return false;
4492 
4493       prep = TREE_STRING_POINTER (exp) + offset;
4494     }
4495 
4496   unsigned char buf[256];
4497   if (!prep)
4498     {
4499       if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4500 	return false;
4501       /* If the pointer to representation hasn't been set above
4502 	 for STRING_CST point it at the buffer.  */
4503       prep = reinterpret_cast <char *>(buf);
4504       /* Try to extract the representation of the constant object
4505 	 or expression starting from the offset.  */
4506       unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4507       if (repsize < nbytes)
4508 	{
4509 	  /* This should only happen when REPSIZE is zero because EXP
4510 	     doesn't denote an object with a known initializer, except
4511 	     perhaps when the reference reads past its end.  */
4512 	  lenrange[0] = 0;
4513 	  prep = NULL;
4514 	}
4515       else if (!nbytes)
4516 	nbytes = repsize;
4517       else if (nbytes < repsize)
4518 	return false;
4519     }
4520 
4521   if (!nbytes)
4522     return false;
4523 
4524   /* Compute the number of leading nonzero bytes in the representation
4525      and update the minimum and maximum.  */
4526   unsigned n = prep ? strnlen (prep, nbytes) : nbytes;
4527 
4528   if (n < lenrange[0])
4529     lenrange[0] = n;
4530   if (lenrange[1] < n)
4531     lenrange[1] = n;
4532 
4533   /* Set the size of the representation.  */
4534   if (lenrange[2] < nbytes)
4535     lenrange[2] = nbytes;
4536 
4537   /* Clear NULTERM if none of the bytes is zero.  */
4538   if (n == nbytes)
4539     *nulterm = false;
4540 
4541   if (n)
4542     {
4543       /* When the initial number of non-zero bytes N is non-zero, reset
4544 	 *ALLNUL; if N is less than that the size of the representation
4545 	 also clear *ALLNONNUL.  */
4546       *allnul = false;
4547       if (n < nbytes)
4548 	*allnonnul = false;
4549     }
4550   else if (*allnul || *allnonnul)
4551     {
4552       *allnonnul = false;
4553 
4554       if (*allnul)
4555 	{
4556 	  /* When either ALLNUL is set and N is zero, also determine
4557 	     whether all subsequent bytes after the first one (which
4558 	     is nul) are zero or nonzero and clear ALLNUL if not.  */
4559 	  for (const char *p = prep; p != prep + nbytes; ++p)
4560 	    if (*p)
4561 	      {
4562 		*allnul = false;
4563 		break;
4564 	      }
4565 	}
4566     }
4567 
4568   return true;
4569 }
4570 
4571 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4572    bytes that are pointed to by EXP, which should be a pointer.  */
4573 
4574 static bool
count_nonzero_bytes_addr(tree exp,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT nbytes,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,range_query * rvals,ssa_name_limit_t & snlim)4575 count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
4576 			  unsigned HOST_WIDE_INT nbytes,
4577 			  unsigned lenrange[3], bool *nulterm,
4578 			  bool *allnul, bool *allnonnul,
4579 			  range_query *rvals, ssa_name_limit_t &snlim)
4580 {
4581   int idx = get_stridx (exp);
4582   if (idx > 0)
4583     {
4584       strinfo *si = get_strinfo (idx);
4585       if (!si)
4586 	return false;
4587 
4588       /* Handle both constant lengths as well non-constant lengths
4589 	 in some range.  */
4590       unsigned HOST_WIDE_INT minlen, maxlen;
4591       if (tree_fits_shwi_p (si->nonzero_chars))
4592 	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4593       else if (si->nonzero_chars
4594 	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4595 	{
4596 	  value_range vr;
4597 	  rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
4598 	  if (vr.kind () != VR_RANGE)
4599 	    return false;
4600 
4601 	  minlen = tree_to_uhwi (vr.min ());
4602 	  maxlen = tree_to_uhwi (vr.max ());
4603 	}
4604       else
4605 	return false;
4606 
4607       if (maxlen < offset)
4608 	return false;
4609 
4610       minlen = minlen < offset ? 0 : minlen - offset;
4611       maxlen -= offset;
4612       if (maxlen + 1 < nbytes)
4613 	return false;
4614 
4615       if (nbytes <= minlen)
4616 	*nulterm = false;
4617 
4618       if (nbytes < minlen)
4619 	{
4620 	  minlen = nbytes;
4621 	  if (nbytes < maxlen)
4622 	    maxlen = nbytes;
4623 	}
4624 
4625       if (minlen < lenrange[0])
4626 	lenrange[0] = minlen;
4627       if (lenrange[1] < maxlen)
4628 	lenrange[1] = maxlen;
4629 
4630       if (lenrange[2] < nbytes)
4631 	lenrange[2] = nbytes;
4632 
4633       /* Since only the length of the string are known and not its contents,
4634 	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
4635       *allnul = false;
4636       if (minlen < nbytes)
4637 	*allnonnul = false;
4638 
4639       return true;
4640     }
4641 
4642   if (TREE_CODE (exp) == ADDR_EXPR)
4643     return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
4644 				lenrange, nulterm, allnul, allnonnul, rvals,
4645 				snlim);
4646 
4647   if (TREE_CODE (exp) == SSA_NAME)
4648     {
4649       gimple *stmt = SSA_NAME_DEF_STMT (exp);
4650       if (gimple_code (stmt) == GIMPLE_PHI)
4651 	{
4652 	  /* Avoid processing an SSA_NAME that has already been visited
4653 	     or if an SSA_NAME limit has been reached.  Indicate success
4654 	     if the former and failure if the latter.  */
4655 	  if (int res = snlim.next_phi (exp))
4656 	    return res > 0;
4657 
4658 	  /* Determine the minimum and maximum from the PHI arguments.  */
4659 	  unsigned int n = gimple_phi_num_args (stmt);
4660 	  for (unsigned i = 0; i != n; i++)
4661 	    {
4662 	      tree def = gimple_phi_arg_def (stmt, i);
4663 	      if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
4664 					     nulterm, allnul, allnonnul, rvals,
4665 					     snlim))
4666 		return false;
4667 	    }
4668 
4669 	  return true;
4670 	}
4671     }
4672 
4673   /* Otherwise we don't know anything.  */
4674   lenrange[0] = 0;
4675   if (lenrange[1] < nbytes)
4676     lenrange[1] = nbytes;
4677   if (lenrange[2] < nbytes)
4678     lenrange[2] = nbytes;
4679   *nulterm = false;
4680   *allnul = false;
4681   *allnonnul = false;
4682   return true;
4683 }
4684 
4685 /* Same as above except with an implicit SSA_NAME limit.  RVALS is used
4686    to determine ranges of dynamically computed string lengths (the results
4687    of strlen).  */
4688 
4689 static bool
count_nonzero_bytes(tree exp,unsigned lenrange[3],bool * nulterm,bool * allnul,bool * allnonnul,range_query * rvals)4690 count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm,
4691 		     bool *allnul, bool *allnonnul, range_query *rvals)
4692 {
4693   /* Set to optimistic values so the caller doesn't have to worry about
4694      initializing these and to what.  On success, the function will clear
4695      these if it determines their values are different but being recursive
4696      it never sets either to true.  On failure, their values are
4697      unspecified.  */
4698   *nulterm = true;
4699   *allnul = true;
4700   *allnonnul = true;
4701 
4702   ssa_name_limit_t snlim;
4703   return count_nonzero_bytes (exp, 0, 0, lenrange, nulterm, allnul, allnonnul,
4704 			      rvals, snlim);
4705 }
4706 
4707 /* Handle a single or multibyte store other than by a built-in function,
4708    either via a single character assignment or by multi-byte assignment
4709    either via MEM_REF or via a type other than char (such as in
4710    '*(int*)a = 12345').  Return true to let the caller advance *GSI to
4711    the next statement in the basic block and false otherwise.  */
4712 
4713 static bool
handle_store(gimple_stmt_iterator * gsi,bool * zero_write,pointer_query & ptr_qry)4714 handle_store (gimple_stmt_iterator *gsi, bool *zero_write,
4715 	      pointer_query &ptr_qry)
4716 {
4717   int idx = -1;
4718   strinfo *si = NULL;
4719   gimple *stmt = gsi_stmt (*gsi);
4720   tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
4721   tree rhs = gimple_assign_rhs1 (stmt);
4722 
4723   range_query *const rvals = ptr_qry.rvals;
4724 
4725   /* The offset of the first byte in LHS modified by the store.  */
4726   unsigned HOST_WIDE_INT offset = 0;
4727 
4728   if (TREE_CODE (lhs) == MEM_REF
4729       && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4730     {
4731       tree mem_offset = TREE_OPERAND (lhs, 1);
4732       if (tree_fits_uhwi_p (mem_offset))
4733 	{
4734 	  /* Get the strinfo for the base, and use it if it starts with at
4735 	     least OFFSET nonzero characters.  This is trivially true if
4736 	     OFFSET is zero.  */
4737 	  offset = tree_to_uhwi (mem_offset);
4738 	  idx = get_stridx (TREE_OPERAND (lhs, 0));
4739 	  if (idx > 0)
4740 	    si = get_strinfo (idx);
4741 	  if (offset == 0)
4742 	    ssaname = TREE_OPERAND (lhs, 0);
4743 	  else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0)
4744 	    {
4745 	      *zero_write = initializer_zerop (rhs);
4746 
4747 	      bool dummy;
4748 	      unsigned lenrange[] = { UINT_MAX, 0, 0 };
4749 	      if (count_nonzero_bytes (rhs, lenrange, &dummy, &dummy, &dummy,
4750 				       rvals))
4751 		maybe_warn_overflow (stmt, lenrange[2], ptr_qry);
4752 
4753 	      return true;
4754 	    }
4755 	}
4756     }
4757   else
4758     {
4759       idx = get_addr_stridx (lhs, NULL_TREE, &offset, rvals);
4760       if (idx > 0)
4761 	si = get_strinfo (idx);
4762     }
4763 
4764   /* Minimum and maximum leading non-zero bytes and the size of the store.  */
4765   unsigned lenrange[] = { UINT_MAX, 0, 0 };
4766 
4767   /* Set to the minimum length of the string being assigned if known.  */
4768   unsigned HOST_WIDE_INT rhs_minlen;
4769 
4770   /* STORING_NONZERO_P is true iff not all stored characters are zero.
4771      STORING_ALL_NONZERO_P is true if all stored characters are zero.
4772      STORING_ALL_ZEROS_P is true iff all stored characters are zero.
4773      Both are false when it's impossible to determine which is true.  */
4774   bool storing_nonzero_p;
4775   bool storing_all_nonzero_p;
4776   bool storing_all_zeros_p;
4777   /* FULL_STRING_P is set when the stored sequence of characters form
4778      a nul-terminated string.  */
4779   bool full_string_p;
4780 
4781   const bool ranges_valid
4782     = count_nonzero_bytes (rhs, lenrange, &full_string_p,
4783 			   &storing_all_zeros_p, &storing_all_nonzero_p,
4784 			   rvals);
4785   if (ranges_valid)
4786     {
4787       rhs_minlen = lenrange[0];
4788       storing_nonzero_p = lenrange[1] > 0;
4789       *zero_write = storing_all_zeros_p;
4790 
4791       maybe_warn_overflow (stmt, lenrange[2], ptr_qry);
4792     }
4793   else
4794     {
4795       rhs_minlen = HOST_WIDE_INT_M1U;
4796       full_string_p = false;
4797       storing_nonzero_p = false;
4798       storing_all_zeros_p = false;
4799       storing_all_nonzero_p = false;
4800     }
4801 
4802   if (si != NULL)
4803     {
4804       /* The corresponding element is set to 1 if the first and last
4805 	 element, respectively, of the sequence of characters being
4806 	 written over the string described by SI ends before
4807 	 the terminating nul (if it has one), to zero if the nul is
4808 	 being overwritten but not beyond, or negative otherwise.  */
4809       int store_before_nul[2];
4810       if (ranges_valid)
4811 	{
4812 	  /* The offset of the last stored byte.  */
4813 	  unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
4814 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4815 	  if (endoff == offset)
4816 	    store_before_nul[1] = store_before_nul[0];
4817 	  else
4818 	    store_before_nul[1] = compare_nonzero_chars (si, endoff, rvals);
4819 	}
4820       else
4821 	{
4822 	  store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4823 	  store_before_nul[1] = store_before_nul[0];
4824 	  gcc_assert (offset == 0 || store_before_nul[0] >= 0);
4825 	}
4826 
4827       if (storing_all_zeros_p
4828 	  && store_before_nul[0] == 0
4829 	  && store_before_nul[1] == 0
4830 	  && si->full_string_p)
4831 	{
4832 	  /* When overwriting a '\0' with a '\0', the store can be removed
4833 	     if we know it has been stored in the current function.  */
4834 	  if (!stmt_could_throw_p (cfun, stmt) && si->writable)
4835 	    {
4836 	      unlink_stmt_vdef (stmt);
4837 	      release_defs (stmt);
4838 	      gsi_remove (gsi, true);
4839 	      return false;
4840 	    }
4841 	  else
4842 	    {
4843 	      si->writable = true;
4844 	      gsi_next (gsi);
4845 	      return false;
4846 	    }
4847 	}
4848 
4849       if (store_before_nul[1] > 0
4850 	  && storing_nonzero_p
4851 	  && lenrange[0] == lenrange[1]
4852 	  && lenrange[0] == lenrange[2]
4853 	  && TREE_CODE (TREE_TYPE (rhs)) == INTEGER_TYPE)
4854 	{
4855 	  /* Handle a store of one or more non-nul characters that ends
4856 	     before the terminating nul of the destination and so does
4857 	     not affect its length
4858 	     If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
4859 	     and if we aren't storing '\0', we know that the length of
4860 	     the string and any other zero terminated string in memory
4861 	     remains the same.  In that case we move to the next gimple
4862 	     statement and return to signal the caller that it shouldn't
4863 	     invalidate anything.
4864 
4865 	     This is beneficial for cases like:
4866 
4867 	     char p[20];
4868 	     void foo (char *q)
4869 	     {
4870 	       strcpy (p, "foobar");
4871 	       size_t len = strlen (p);     // can be folded to 6
4872 	       size_t len2 = strlen (q);    // has to be computed
4873 	       p[0] = 'X';
4874 	       size_t len3 = strlen (p);    // can be folded to 6
4875 	       size_t len4 = strlen (q);    // can be folded to len2
4876 	       bar (len, len2, len3, len4);
4877 	       } */
4878 	  gsi_next (gsi);
4879 	  return false;
4880 	}
4881 
4882       if (storing_all_zeros_p
4883 	  || storing_nonzero_p
4884 	  || (offset != 0 && store_before_nul[1] > 0))
4885 	{
4886 	  /* When STORING_NONZERO_P, we know that the string will start
4887 	     with at least OFFSET + 1 nonzero characters.  If storing
4888 	     a single character, set si->NONZERO_CHARS to the result.
4889 	     If storing multiple characters, try to determine the number
4890 	     of leading non-zero characters and set si->NONZERO_CHARS to
4891 	     the result instead.
4892 
4893 	     When STORING_ALL_ZEROS_P, we know that the string is now
4894 	     OFFSET characters long.
4895 
4896 	     Otherwise, we're storing an unknown value at offset OFFSET,
4897 	     so need to clip the nonzero_chars to OFFSET.
4898 	     Use the minimum length of the string (or individual character)
4899 	     being stored if it's known.  Otherwise, STORING_NONZERO_P
4900 	     guarantees it's at least 1.  */
4901 	  HOST_WIDE_INT len
4902 	    = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
4903 	  location_t loc = gimple_location (stmt);
4904 	  tree oldlen = si->nonzero_chars;
4905 	  if (store_before_nul[1] == 0 && si->full_string_p)
4906 	    /* We're overwriting the nul terminator with a nonzero or
4907 	       unknown character.  If the previous stmt was a memcpy,
4908 	       its length may be decreased.  */
4909 	    adjust_last_stmt (si, stmt, false, ptr_qry);
4910 	  si = unshare_strinfo (si);
4911 	  if (storing_nonzero_p)
4912 	    {
4913 	      gcc_assert (len >= 0);
4914 	      si->nonzero_chars = build_int_cst (size_type_node, offset + len);
4915 	    }
4916 	  else
4917 	    si->nonzero_chars = build_int_cst (size_type_node, offset);
4918 
4919 	  /* Set FULL_STRING_P only if the length of the strings being
4920 	     written is the same, and clear it if the strings have
4921 	     different lengths.  In the latter case the length stored
4922 	     in si->NONZERO_CHARS becomes the lower bound.
4923 	     FIXME: Handle the upper bound of the length if possible.  */
4924 	  si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
4925 
4926 	  if (storing_all_zeros_p
4927 	      && ssaname
4928 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
4929 	    si->endptr = ssaname;
4930 	  else
4931 	    si->endptr = NULL;
4932 	  si->next = 0;
4933 	  si->stmt = NULL;
4934 	  si->writable = true;
4935 	  si->dont_invalidate = true;
4936 	  if (oldlen)
4937 	    {
4938 	      tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
4939 					  si->nonzero_chars, oldlen);
4940 	      adjust_related_strinfos (loc, si, adj);
4941 	    }
4942 	  else
4943 	    si->prev = 0;
4944 	}
4945     }
4946   else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
4947     {
4948       if (ssaname)
4949 	idx = new_stridx (ssaname);
4950       else
4951 	idx = new_addr_stridx (lhs);
4952       if (idx != 0)
4953 	{
4954 	  tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
4955 
4956 	  HOST_WIDE_INT slen;
4957 	  if (storing_all_zeros_p)
4958 	    slen = 0;
4959 	  else if (storing_nonzero_p && ranges_valid)
4960 	    {
4961 	      /* FIXME: Handle the upper bound of the length when
4962 		 LENRANGE[0] != LENRANGE[1].  */
4963 	      slen = lenrange[0];
4964 	      if (lenrange[0] != lenrange[1])
4965 		/* Set the minimum length but ignore the maximum
4966 		   for now.  */
4967 		full_string_p = false;
4968 	    }
4969 	  else
4970 	    slen = -1;
4971 
4972 	  tree len = (slen <= 0
4973 		      ? size_zero_node
4974 		      : build_int_cst (size_type_node, slen));
4975 	  si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
4976 	  set_strinfo (idx, si);
4977 	  if (storing_all_zeros_p
4978 	      && ssaname
4979 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
4980 	    si->endptr = ssaname;
4981 	  si->dont_invalidate = true;
4982 	  si->writable = true;
4983 	}
4984     }
4985   else if (idx == 0
4986 	   && rhs_minlen < HOST_WIDE_INT_M1U
4987 	   && ssaname == NULL_TREE
4988 	   && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
4989     {
4990       HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
4991       if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
4992 	{
4993 	  int idx = new_addr_stridx (lhs);
4994 	  if (idx != 0)
4995 	    {
4996 	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
4997 				build_int_cst (size_type_node, rhs_minlen),
4998 				full_string_p);
4999 	      set_strinfo (idx, si);
5000 	      si->dont_invalidate = true;
5001 	    }
5002 	}
5003     }
5004 
5005   if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5006     {
5007       /* For single-byte stores only, allow adjust_last_stmt to remove
5008 	 the statement if the stored '\0' is immediately overwritten.  */
5009       laststmt.stmt = stmt;
5010       laststmt.len = build_int_cst (size_type_node, 1);
5011       laststmt.stridx = si->idx;
5012     }
5013   return true;
5014 }
5015 
5016 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0.  */
5017 
5018 static void
fold_strstr_to_strncmp(tree rhs1,tree rhs2,gimple * stmt)5019 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5020 {
5021   if (TREE_CODE (rhs1) != SSA_NAME
5022       || TREE_CODE (rhs2) != SSA_NAME)
5023     return;
5024 
5025   gimple *call_stmt = NULL;
5026   for (int pass = 0; pass < 2; pass++)
5027     {
5028       gimple *g = SSA_NAME_DEF_STMT (rhs1);
5029       if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5030 	  && has_single_use (rhs1)
5031 	  && gimple_call_arg (g, 0) == rhs2)
5032 	{
5033 	  call_stmt = g;
5034 	  break;
5035 	}
5036       std::swap (rhs1, rhs2);
5037     }
5038 
5039   if (call_stmt)
5040     {
5041       tree arg0 = gimple_call_arg (call_stmt, 0);
5042 
5043       if (arg0 == rhs2)
5044 	{
5045 	  tree arg1 = gimple_call_arg (call_stmt, 1);
5046 	  tree arg1_len = NULL_TREE;
5047 	  int idx = get_stridx (arg1);
5048 
5049 	  if (idx)
5050 	    {
5051 	      if (idx < 0)
5052 		arg1_len = build_int_cst (size_type_node, ~idx);
5053 	      else
5054 		{
5055 		  strinfo *si = get_strinfo (idx);
5056 		  if (si)
5057 		    arg1_len = get_string_length (si);
5058 		}
5059 	    }
5060 
5061 	  if (arg1_len != NULL_TREE)
5062 	    {
5063 	      gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5064 	      tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5065 
5066 	      if (!is_gimple_val (arg1_len))
5067 		{
5068 		  tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5069 		  gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5070 							    arg1_len);
5071 		  gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5072 		  arg1_len = arg1_len_tmp;
5073 		}
5074 
5075 	      gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5076 						      arg0, arg1, arg1_len);
5077 	      tree strncmp_lhs = make_ssa_name (integer_type_node);
5078 	      gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5079 	      gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5080 	      gsi_remove (&gsi, true);
5081 	      gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5082 	      tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5083 
5084 	      if (is_gimple_assign (stmt))
5085 		{
5086 		  if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5087 		    {
5088 		      tree cond = gimple_assign_rhs1 (stmt);
5089 		      TREE_OPERAND (cond, 0) = strncmp_lhs;
5090 		      TREE_OPERAND (cond, 1) = zero;
5091 		    }
5092 		  else
5093 		    {
5094 		      gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5095 		      gimple_assign_set_rhs2 (stmt, zero);
5096 		    }
5097 		}
5098 	      else
5099 		{
5100 		  gcond *cond = as_a<gcond *> (stmt);
5101 		  gimple_cond_set_lhs (cond, strncmp_lhs);
5102 		  gimple_cond_set_rhs (cond, zero);
5103 		}
5104 	      update_stmt (stmt);
5105 	    }
5106 	}
5107     }
5108 }
5109 
5110 /* Return true if TYPE corresponds to a narrow character type.  */
5111 
5112 static bool
is_char_type(tree type)5113 is_char_type (tree type)
5114 {
5115   return (TREE_CODE (type) == INTEGER_TYPE
5116 	  && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5117 	  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5118 }
5119 
5120 /* Check the built-in call at GSI for validity and optimize it.
5121    Uses RVALS to determine range information.
5122    Return true to let the caller advance *GSI to the next statement
5123    in the basic block and false otherwise.  */
5124 
5125 static bool
strlen_check_and_optimize_call(gimple_stmt_iterator * gsi,bool * zero_write,pointer_query & ptr_qry)5126 strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write,
5127 				pointer_query &ptr_qry)
5128 {
5129   gimple *stmt = gsi_stmt (*gsi);
5130 
5131   if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5132     {
5133       tree fntype = gimple_call_fntype (stmt);
5134       if (fntype && lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5135 	handle_alloc_call (BUILT_IN_NONE, gsi);
5136     }
5137 
5138   /* When not optimizing we must be checking printf calls which
5139      we do even for user-defined functions when they are declared
5140      with attribute format.  */
5141   if (!flag_optimize_strlen
5142       || !strlen_optimize
5143       || !valid_builtin_call (stmt))
5144     return !handle_printf_call (gsi, ptr_qry);
5145 
5146   tree callee = gimple_call_fndecl (stmt);
5147   switch (DECL_FUNCTION_CODE (callee))
5148     {
5149     case BUILT_IN_STRLEN:
5150     case BUILT_IN_STRNLEN:
5151       handle_builtin_strlen (gsi);
5152       break;
5153     case BUILT_IN_STRCHR:
5154       handle_builtin_strchr (gsi);
5155       break;
5156     case BUILT_IN_STRCPY:
5157     case BUILT_IN_STRCPY_CHK:
5158     case BUILT_IN_STPCPY:
5159     case BUILT_IN_STPCPY_CHK:
5160       handle_builtin_strcpy (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5161       break;
5162 
5163     case BUILT_IN_STRNCAT:
5164     case BUILT_IN_STRNCAT_CHK:
5165       handle_builtin_strncat (DECL_FUNCTION_CODE (callee), gsi);
5166       break;
5167 
5168     case BUILT_IN_STPNCPY:
5169     case BUILT_IN_STPNCPY_CHK:
5170     case BUILT_IN_STRNCPY:
5171     case BUILT_IN_STRNCPY_CHK:
5172       handle_builtin_stxncpy_strncat (false, gsi);
5173       break;
5174 
5175     case BUILT_IN_MEMCPY:
5176     case BUILT_IN_MEMCPY_CHK:
5177     case BUILT_IN_MEMPCPY:
5178     case BUILT_IN_MEMPCPY_CHK:
5179       handle_builtin_memcpy (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5180       break;
5181     case BUILT_IN_STRCAT:
5182     case BUILT_IN_STRCAT_CHK:
5183       handle_builtin_strcat (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5184       break;
5185     case BUILT_IN_ALLOCA:
5186     case BUILT_IN_ALLOCA_WITH_ALIGN:
5187     case BUILT_IN_MALLOC:
5188     case BUILT_IN_CALLOC:
5189       handle_alloc_call (DECL_FUNCTION_CODE (callee), gsi);
5190       break;
5191     case BUILT_IN_MEMSET:
5192       if (handle_builtin_memset (gsi, zero_write, ptr_qry))
5193 	return false;
5194       break;
5195     case BUILT_IN_MEMCMP:
5196       if (handle_builtin_memcmp (gsi))
5197 	return false;
5198       break;
5199     case BUILT_IN_STRCMP:
5200     case BUILT_IN_STRNCMP:
5201       if (handle_builtin_string_cmp (gsi, ptr_qry.rvals))
5202 	return false;
5203       break;
5204     default:
5205       if (handle_printf_call (gsi, ptr_qry))
5206 	return false;
5207       break;
5208     }
5209 
5210   return true;
5211 }
5212 
5213 /* Handle an assignment statement at *GSI to a LHS of integral type.
5214    If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true.  */
5215 
5216 static void
handle_integral_assign(gimple_stmt_iterator * gsi,bool * cleanup_eh,range_query * rvals)5217 handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5218 			range_query *rvals)
5219 {
5220   gimple *stmt = gsi_stmt (*gsi);
5221   tree lhs = gimple_assign_lhs (stmt);
5222   tree lhs_type = TREE_TYPE (lhs);
5223 
5224   enum tree_code code = gimple_assign_rhs_code (stmt);
5225   if (code == COND_EXPR)
5226     {
5227       tree cond = gimple_assign_rhs1 (stmt);
5228       enum tree_code cond_code = TREE_CODE (cond);
5229 
5230       if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5231 	fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5232 				TREE_OPERAND (cond, 1), stmt);
5233     }
5234   else if (code == EQ_EXPR || code == NE_EXPR)
5235     fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5236 			    gimple_assign_rhs2 (stmt), stmt);
5237   else if (gimple_assign_load_p (stmt)
5238 	   && TREE_CODE (lhs_type) == INTEGER_TYPE
5239 	   && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5240 	   && (TYPE_PRECISION (lhs_type)
5241 	       == TYPE_PRECISION (char_type_node))
5242 	   && !gimple_has_volatile_ops (stmt))
5243     {
5244       tree off = integer_zero_node;
5245       unsigned HOST_WIDE_INT coff = 0;
5246       int idx = 0;
5247       tree rhs1 = gimple_assign_rhs1 (stmt);
5248       if (code == MEM_REF)
5249 	{
5250 	  idx = get_stridx (TREE_OPERAND (rhs1, 0));
5251 	  if (idx > 0)
5252 	    {
5253 	      strinfo *si = get_strinfo (idx);
5254 	      if (si
5255 		  && si->nonzero_chars
5256 		  && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5257 		  && (wi::to_widest (si->nonzero_chars)
5258 		      >= wi::to_widest (off)))
5259 		off = TREE_OPERAND (rhs1, 1);
5260 	      else
5261 		/* This case is not useful.  See if get_addr_stridx
5262 		   returns something usable.  */
5263 		idx = 0;
5264 	    }
5265 	}
5266       if (idx <= 0)
5267 	idx = get_addr_stridx (rhs1, NULL_TREE, &coff);
5268       if (idx > 0)
5269 	{
5270 	  strinfo *si = get_strinfo (idx);
5271 	  if (si
5272 	      && si->nonzero_chars
5273 	      && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5274 	    {
5275 	      widest_int w1 = wi::to_widest (si->nonzero_chars);
5276 	      widest_int w2 = wi::to_widest (off) + coff;
5277 	      if (w1 == w2
5278 		  && si->full_string_p)
5279 		{
5280 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5281 		    {
5282 		      fprintf (dump_file, "Optimizing: ");
5283 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5284 		    }
5285 
5286 		  /* Reading the final '\0' character.  */
5287 		  tree zero = build_int_cst (lhs_type, 0);
5288 		  gimple_set_vuse (stmt, NULL_TREE);
5289 		  gimple_assign_set_rhs_from_tree (gsi, zero);
5290 		  *cleanup_eh
5291 		    |= maybe_clean_or_replace_eh_stmt (stmt,
5292 						       gsi_stmt (*gsi));
5293 		  stmt = gsi_stmt (*gsi);
5294 		  update_stmt (stmt);
5295 
5296 		  if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5297 		    {
5298 		      fprintf (dump_file, "into: ");
5299 		      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5300 		    }
5301 		}
5302 	      else if (w1 > w2)
5303 		{
5304 		  /* Reading a character before the final '\0'
5305 		     character.  Just set the value range to ~[0, 0]
5306 		     if we don't have anything better.  */
5307 		  wide_int min, max;
5308 		  signop sign = TYPE_SIGN (lhs_type);
5309 		  int prec = TYPE_PRECISION (lhs_type);
5310 		  // FIXME: Use range_query instead of global ranges.
5311 		  value_range_kind vr = get_range_info (lhs, &min, &max);
5312 		  if (vr == VR_VARYING
5313 		      || (vr == VR_RANGE
5314 			  && min == wi::min_value (prec, sign)
5315 			  && max == wi::max_value (prec, sign)))
5316 		    set_range_info (lhs, VR_ANTI_RANGE,
5317 				    wi::zero (prec), wi::zero (prec));
5318 		}
5319 	    }
5320 	}
5321     }
5322   else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5323     {
5324       if (int idx = new_stridx (lhs))
5325 	{
5326 	  /* Record multi-byte assignments from MEM_REFs.  */
5327 	  bool storing_all_nonzero_p;
5328 	  bool storing_all_zeros_p;
5329 	  bool full_string_p;
5330 	  unsigned lenrange[] = { UINT_MAX, 0, 0 };
5331 	  tree rhs = gimple_assign_rhs1 (stmt);
5332 	  const bool ranges_valid
5333 	    = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5334 				   &storing_all_zeros_p, &storing_all_nonzero_p,
5335 				   rvals);
5336 	  if (ranges_valid)
5337 	    {
5338 	      tree length = build_int_cst (sizetype, lenrange[0]);
5339 	      strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5340 	      set_strinfo (idx, si);
5341 	      si->writable = true;
5342 	      si->dont_invalidate = true;
5343 	    }
5344 	}
5345     }
5346 
5347   if (strlen_to_stridx)
5348     {
5349       tree rhs1 = gimple_assign_rhs1 (stmt);
5350       if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5351 	strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5352     }
5353 }
5354 
5355 /* Attempt to check for validity of the performed access a single statement
5356    at *GSI using string length knowledge, and to optimize it.
5357    If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5358    true.  Return true to let the caller advance *GSI to the next statement
5359    in the basic block and false otherwise.  */
5360 
5361 static bool
check_and_optimize_stmt(gimple_stmt_iterator * gsi,bool * cleanup_eh,pointer_query & ptr_qry)5362 check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5363 			 pointer_query &ptr_qry)
5364 {
5365   gimple *stmt = gsi_stmt (*gsi);
5366 
5367   /* For statements that modify a string, set to true if the write
5368      is only zeros.  */
5369   bool zero_write = false;
5370 
5371   if (is_gimple_call (stmt))
5372     {
5373       if (!strlen_check_and_optimize_call (gsi, &zero_write, ptr_qry))
5374 	return false;
5375     }
5376   else if (!flag_optimize_strlen || !strlen_optimize)
5377     return true;
5378   else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5379     {
5380       /* Handle non-clobbering assignment.  */
5381       tree lhs = gimple_assign_lhs (stmt);
5382       tree lhs_type = TREE_TYPE (lhs);
5383 
5384       if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5385 	{
5386 	  if (gimple_assign_single_p (stmt)
5387 	      || (gimple_assign_cast_p (stmt)
5388 		  && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5389 	    {
5390 	      int idx = get_stridx (gimple_assign_rhs1 (stmt));
5391 	      ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5392 	    }
5393 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5394 	    handle_pointer_plus (gsi);
5395 	}
5396       else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5397 	/* Handle assignment to a character.  */
5398 	handle_integral_assign (gsi, cleanup_eh, ptr_qry.rvals);
5399       else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5400 	{
5401 	  tree type = TREE_TYPE (lhs);
5402 	  if (TREE_CODE (type) == ARRAY_TYPE)
5403 	    type = TREE_TYPE (type);
5404 
5405 	bool is_char_store = is_char_type (type);
5406 	if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5407 	  {
5408 	    /* To consider stores into char objects via integer types
5409 	       other than char but not those to non-character objects,
5410 	       determine the type of the destination rather than just
5411 	       the type of the access.  */
5412 	    for (int i = 0; i != 2; ++i)
5413 	      {
5414 		tree ref = TREE_OPERAND (lhs, i);
5415 		type = TREE_TYPE (ref);
5416 		if (TREE_CODE (type) == POINTER_TYPE)
5417 		  type = TREE_TYPE (type);
5418 		if (TREE_CODE (type) == ARRAY_TYPE)
5419 		  type = TREE_TYPE (type);
5420 		if (is_char_type (type))
5421 		  {
5422 		    is_char_store = true;
5423 		    break;
5424 		  }
5425 	      }
5426 	  }
5427 
5428 	  /* Handle a single or multibyte assignment.  */
5429 	  if (is_char_store && !handle_store (gsi, &zero_write, ptr_qry))
5430 	    return false;
5431 	}
5432     }
5433   else if (gcond *cond = dyn_cast<gcond *> (stmt))
5434     {
5435       enum tree_code code = gimple_cond_code (cond);
5436       if (code == EQ_EXPR || code == NE_EXPR)
5437 	fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5438 				gimple_cond_rhs (stmt), stmt);
5439     }
5440 
5441   if (gimple_vdef (stmt))
5442     maybe_invalidate (stmt, zero_write);
5443   return true;
5444 }
5445 
5446 /* Recursively call maybe_invalidate on stmts that might be executed
5447    in between dombb and current bb and that contain a vdef.  Stop when
5448    *count stmts are inspected, or if the whole strinfo vector has
5449    been invalidated.  */
5450 
5451 static void
do_invalidate(basic_block dombb,gimple * phi,bitmap visited,int * count)5452 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5453 {
5454   unsigned int i, n = gimple_phi_num_args (phi);
5455 
5456   for (i = 0; i < n; i++)
5457     {
5458       tree vuse = gimple_phi_arg_def (phi, i);
5459       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5460       basic_block bb = gimple_bb (stmt);
5461       if (bb == NULL
5462 	  || bb == dombb
5463 	  || !bitmap_set_bit (visited, bb->index)
5464 	  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5465 	continue;
5466       while (1)
5467 	{
5468 	  if (gimple_code (stmt) == GIMPLE_PHI)
5469 	    {
5470 	      do_invalidate (dombb, stmt, visited, count);
5471 	      if (*count == 0)
5472 		return;
5473 	      break;
5474 	    }
5475 	  if (--*count == 0)
5476 	    return;
5477 	  if (!maybe_invalidate (stmt))
5478 	    {
5479 	      *count = 0;
5480 	      return;
5481 	    }
5482 	  vuse = gimple_vuse (stmt);
5483 	  stmt = SSA_NAME_DEF_STMT (vuse);
5484 	  if (gimple_bb (stmt) != bb)
5485 	    {
5486 	      bb = gimple_bb (stmt);
5487 	      if (bb == NULL
5488 		  || bb == dombb
5489 		  || !bitmap_set_bit (visited, bb->index)
5490 		  || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5491 		break;
5492 	    }
5493 	}
5494     }
5495 }
5496 
5497 class strlen_dom_walker : public dom_walker
5498 {
5499 public:
strlen_dom_walker(cdi_direction direction)5500   strlen_dom_walker (cdi_direction direction)
5501     : dom_walker (direction),
5502     evrp (false),
5503     ptr_qry (&evrp, &var_cache),
5504     var_cache (),
5505     m_cleanup_cfg (false)
5506   { }
5507 
5508   ~strlen_dom_walker ();
5509 
5510   virtual edge before_dom_children (basic_block);
5511   virtual void after_dom_children (basic_block);
5512 
5513   /* EVRP analyzer used for printf argument range processing, and
5514      to track strlen results across integer variable assignments.  */
5515   evrp_range_analyzer evrp;
5516 
5517   /* A pointer_query object and its cache to store information about
5518      pointers and their targets in.  */
5519   pointer_query ptr_qry;
5520   pointer_query::cache_type var_cache;
5521 
5522   /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
5523      execute function.  */
5524   bool m_cleanup_cfg;
5525 };
5526 
5527 /* Release pointer_query cache.  */
5528 
~strlen_dom_walker()5529 strlen_dom_walker::~strlen_dom_walker ()
5530 {
5531   ptr_qry.flush_cache ();
5532 }
5533 
5534 /* Callback for walk_dominator_tree.  Attempt to optimize various
5535    string ops by remembering string lengths pointed by pointer SSA_NAMEs.  */
5536 
5537 edge
before_dom_children(basic_block bb)5538 strlen_dom_walker::before_dom_children (basic_block bb)
5539 {
5540   evrp.enter (bb);
5541 
5542   basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5543 
5544   if (dombb == NULL)
5545     stridx_to_strinfo = NULL;
5546   else
5547     {
5548       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5549       if (stridx_to_strinfo)
5550 	{
5551 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5552 	       gsi_next (&gsi))
5553 	    {
5554 	      gphi *phi = gsi.phi ();
5555 	      if (virtual_operand_p (gimple_phi_result (phi)))
5556 		{
5557 		  bitmap visited = BITMAP_ALLOC (NULL);
5558 		  int count_vdef = 100;
5559 		  do_invalidate (dombb, phi, visited, &count_vdef);
5560 		  BITMAP_FREE (visited);
5561 		  if (count_vdef == 0)
5562 		    {
5563 		      /* If there were too many vdefs in between immediate
5564 			 dominator and current bb, invalidate everything.
5565 			 If stridx_to_strinfo has been unshared, we need
5566 			 to free it, otherwise just set it to NULL.  */
5567 		      if (!strinfo_shared ())
5568 			{
5569 			  unsigned int i;
5570 			  strinfo *si;
5571 
5572 			  for (i = 1;
5573 			       vec_safe_iterate (stridx_to_strinfo, i, &si);
5574 			       ++i)
5575 			    {
5576 			      free_strinfo (si);
5577 			      (*stridx_to_strinfo)[i] = NULL;
5578 			    }
5579 			}
5580 		      else
5581 			stridx_to_strinfo = NULL;
5582 		    }
5583 		  break;
5584 		}
5585 	    }
5586 	}
5587     }
5588 
5589   /* If all PHI arguments have the same string index, the PHI result
5590      has it as well.  */
5591   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5592        gsi_next (&gsi))
5593     {
5594       gphi *phi = gsi.phi ();
5595       tree result = gimple_phi_result (phi);
5596       if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5597 	{
5598 	  int idx = get_stridx (gimple_phi_arg_def (phi, 0));
5599 	  if (idx != 0)
5600 	    {
5601 	      unsigned int i, n = gimple_phi_num_args (phi);
5602 	      for (i = 1; i < n; i++)
5603 		if (idx != get_stridx (gimple_phi_arg_def (phi, i)))
5604 		  break;
5605 	      if (i == n)
5606 		ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5607 	    }
5608 	}
5609     }
5610 
5611   bool cleanup_eh = false;
5612 
5613   /* Attempt to optimize individual statements.  */
5614   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
5615     {
5616       gimple *stmt = gsi_stmt (gsi);
5617 
5618       /* First record ranges generated by this statement so they
5619 	 can be used by printf argument processing.  */
5620       evrp.record_ranges_from_stmt (stmt, false);
5621 
5622       /* Reset search depth preformance counter.  */
5623       ptr_qry.depth = 0;
5624 
5625       if (check_and_optimize_stmt (&gsi, &cleanup_eh, ptr_qry))
5626 	gsi_next (&gsi);
5627     }
5628 
5629   if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5630       m_cleanup_cfg = true;
5631 
5632   bb->aux = stridx_to_strinfo;
5633   if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5634     (*stridx_to_strinfo)[0] = (strinfo *) bb;
5635   return NULL;
5636 }
5637 
5638 /* Callback for walk_dominator_tree.  Free strinfo vector if it is
5639    owned by the current bb, clear bb->aux.  */
5640 
5641 void
after_dom_children(basic_block bb)5642 strlen_dom_walker::after_dom_children (basic_block bb)
5643 {
5644   evrp.leave (bb);
5645 
5646   if (bb->aux)
5647     {
5648       stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5649       if (vec_safe_length (stridx_to_strinfo)
5650 	  && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5651 	{
5652 	  unsigned int i;
5653 	  strinfo *si;
5654 
5655 	  for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5656 	    free_strinfo (si);
5657 	  vec_free (stridx_to_strinfo);
5658 	}
5659       bb->aux = NULL;
5660     }
5661 }
5662 
5663 namespace {
5664 
5665 static unsigned int
printf_strlen_execute(function * fun,bool warn_only)5666 printf_strlen_execute (function *fun, bool warn_only)
5667 {
5668   strlen_optimize = !warn_only;
5669 
5670   calculate_dominance_info (CDI_DOMINATORS);
5671 
5672   bool use_scev = optimize > 0 && flag_printf_return_value;
5673   if (use_scev)
5674     {
5675       loop_optimizer_init (LOOPS_NORMAL);
5676       scev_initialize ();
5677     }
5678 
5679   gcc_assert (!strlen_to_stridx);
5680   if (warn_stringop_overflow || warn_stringop_truncation)
5681     strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5682 
5683   /* This has to happen after initializing the loop optimizer
5684      and initializing SCEV as they create new SSA_NAMEs.  */
5685   ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5686   max_stridx = 1;
5687 
5688   /* String length optimization is implemented as a walk of the dominator
5689      tree and a forward walk of statements within each block.  */
5690   strlen_dom_walker walker (CDI_DOMINATORS);
5691   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5692 
5693   if (dump_file && (dump_flags & TDF_DETAILS))
5694     {
5695       unsigned nused = 0;
5696       unsigned nidxs = walker.ptr_qry.var_cache->indices.length ();
5697       for (unsigned i = 0; i != nidxs; ++i)
5698 	if (walker.ptr_qry.var_cache->indices[i])
5699 	  ++nused;
5700 
5701       fprintf (dump_file, "pointer_query counters\n"
5702 	       "  index cache size:  %u\n"
5703 	       "  utilization:       %u%%\n"
5704 	       "  access cache size: %u\n"
5705 	       "  hits:              %u\n"
5706 	       "  misses:            %u\n"
5707 	       "  failures:          %u\n"
5708 	       "  max_depth:         %u\n",
5709 	       nidxs,
5710 	       nidxs == 0 ? 0 : (nused * 100) / nidxs,
5711 	       walker.ptr_qry.var_cache->access_refs.length (),
5712 	       walker.ptr_qry.hits, walker.ptr_qry.misses,
5713 	       walker.ptr_qry.failures, walker.ptr_qry.max_depth);
5714     }
5715 
5716   ssa_ver_to_stridx.release ();
5717   strinfo_pool.release ();
5718   if (decl_to_stridxlist_htab)
5719     {
5720       obstack_free (&stridx_obstack, NULL);
5721       delete decl_to_stridxlist_htab;
5722       decl_to_stridxlist_htab = NULL;
5723     }
5724   laststmt.stmt = NULL;
5725   laststmt.len = NULL_TREE;
5726   laststmt.stridx = 0;
5727 
5728   if (strlen_to_stridx)
5729     {
5730       strlen_to_stridx->empty ();
5731       delete strlen_to_stridx;
5732       strlen_to_stridx = NULL;
5733     }
5734 
5735   if (use_scev)
5736     {
5737       scev_finalize ();
5738       loop_optimizer_finalize ();
5739     }
5740 
5741   return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5742 }
5743 
5744 /* This file defines two passes: one for warnings that runs only when
5745    optimization is disabled, and another that implements optimizations
5746    and also issues warnings.  */
5747 
5748 const pass_data pass_data_warn_printf =
5749 {
5750   GIMPLE_PASS, /* type */
5751   "warn-printf", /* name */
5752   OPTGROUP_NONE, /* optinfo_flags */
5753   TV_NONE, /* tv_id */
5754   /* Normally an optimization pass would require PROP_ssa but because
5755      this pass runs early, with no optimization, to do sprintf format
5756      checking, it only requires PROP_cfg.  */
5757   PROP_cfg, /* properties_required */
5758   0, /* properties_provided */
5759   0, /* properties_destroyed */
5760   0, /* todo_flags_start */
5761   0, /* todo_flags_finish */
5762 };
5763 
5764 class pass_warn_printf : public gimple_opt_pass
5765 {
5766 public:
pass_warn_printf(gcc::context * ctxt)5767   pass_warn_printf (gcc::context *ctxt)
5768     : gimple_opt_pass (pass_data_warn_printf, ctxt)
5769   {}
5770 
5771   virtual bool gate (function *);
execute(function * fun)5772   virtual unsigned int execute (function *fun)
5773   {
5774     return printf_strlen_execute (fun, true);
5775   }
5776 };
5777 
5778 
5779 /* Return true to run the warning pass only when not optimizing and
5780    iff either -Wformat-overflow or -Wformat-truncation is specified.  */
5781 
5782 bool
gate(function *)5783 pass_warn_printf::gate (function *)
5784 {
5785   return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5786 }
5787 
5788 const pass_data pass_data_strlen =
5789 {
5790   GIMPLE_PASS, /* type */
5791   "strlen", /* name */
5792   OPTGROUP_NONE, /* optinfo_flags */
5793   TV_TREE_STRLEN, /* tv_id */
5794   PROP_cfg | PROP_ssa, /* properties_required */
5795   0, /* properties_provided */
5796   0, /* properties_destroyed */
5797   0, /* todo_flags_start */
5798   0, /* todo_flags_finish */
5799 };
5800 
5801 class pass_strlen : public gimple_opt_pass
5802 {
5803 public:
pass_strlen(gcc::context * ctxt)5804   pass_strlen (gcc::context *ctxt)
5805     : gimple_opt_pass (pass_data_strlen, ctxt)
5806   {}
5807 
clone()5808   opt_pass * clone () { return new pass_strlen (m_ctxt); }
5809 
5810   virtual bool gate (function *);
execute(function * fun)5811   virtual unsigned int execute (function *fun)
5812   {
5813     return printf_strlen_execute (fun, false);
5814   }
5815 };
5816 
5817 /* Return true to run the pass only when the sprintf and/or strlen
5818    optimizations are enabled and -Wformat-overflow or -Wformat-truncation
5819    are specified.  */
5820 
5821 bool
gate(function *)5822 pass_strlen::gate (function *)
5823 {
5824   return ((warn_format_overflow > 0
5825 	   || warn_format_trunc > 0
5826 	   || warn_restrict > 0
5827 	   || flag_optimize_strlen > 0
5828 	   || flag_printf_return_value)
5829 	  && optimize > 0);
5830 }
5831 
5832 } // anon namespace
5833 
5834 gimple_opt_pass *
make_pass_warn_printf(gcc::context * ctxt)5835 make_pass_warn_printf (gcc::context *ctxt)
5836 {
5837   return new pass_warn_printf (ctxt);
5838 }
5839 
5840 gimple_opt_pass *
make_pass_strlen(gcc::context * ctxt)5841 make_pass_strlen (gcc::context *ctxt)
5842 {
5843   return new pass_strlen (ctxt);
5844 }
5845