xref: /openbsd/gnu/gcc/gcc/tree-sra.c (revision 404b540a)
1 /* Scalar Replacement of Aggregates (SRA) converts some structure
2    references into scalar references, exposing them to the scalar
3    optimizers.
4    Copyright (C) 2003, 2004, 2005, 2006, 2007
5    Free Software Foundation, Inc.
6    Contributed by Diego Novillo <dnovillo@redhat.com>
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option) any
13 later version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING.  If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301, USA.  */
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "ggc.h"
30 #include "tree.h"
31 
32 /* These RTL headers are needed for basic-block.h.  */
33 #include "rtl.h"
34 #include "tm_p.h"
35 #include "hard-reg-set.h"
36 #include "basic-block.h"
37 #include "diagnostic.h"
38 #include "langhooks.h"
39 #include "tree-inline.h"
40 #include "tree-flow.h"
41 #include "tree-gimple.h"
42 #include "tree-dump.h"
43 #include "tree-pass.h"
44 #include "timevar.h"
45 #include "flags.h"
46 #include "bitmap.h"
47 #include "obstack.h"
48 #include "target.h"
49 /* expr.h is needed for MOVE_RATIO.  */
50 #include "expr.h"
51 #include "params.h"
52 
53 
54 /* This object of this pass is to replace a non-addressable aggregate with a
55    set of independent variables.  Most of the time, all of these variables
56    will be scalars.  But a secondary objective is to break up larger
57    aggregates into smaller aggregates.  In the process we may find that some
58    bits of the larger aggregate can be deleted as unreferenced.
59 
60    This substitution is done globally.  More localized substitutions would
61    be the purvey of a load-store motion pass.
62 
63    The optimization proceeds in phases:
64 
65      (1) Identify variables that have types that are candidates for
66 	 decomposition.
67 
68      (2) Scan the function looking for the ways these variables are used.
69 	 In particular we're interested in the number of times a variable
70 	 (or member) is needed as a complete unit, and the number of times
71 	 a variable (or member) is copied.
72 
73      (3) Based on the usage profile, instantiate substitution variables.
74 
75      (4) Scan the function making replacements.
76 */
77 
78 
79 /* The set of todo flags to return from tree_sra.  */
80 static unsigned int todoflags;
81 
82 /* The set of aggregate variables that are candidates for scalarization.  */
83 static bitmap sra_candidates;
84 
85 /* Set of scalarizable PARM_DECLs that need copy-in operations at the
86    beginning of the function.  */
87 static bitmap needs_copy_in;
88 
89 /* Sets of bit pairs that cache type decomposition and instantiation.  */
90 static bitmap sra_type_decomp_cache;
91 static bitmap sra_type_inst_cache;
92 
93 /* One of these structures is created for each candidate aggregate and
94    each (accessed) member or group of members of such an aggregate.  */
95 struct sra_elt
96 {
97   /* A tree of the elements.  Used when we want to traverse everything.  */
98   struct sra_elt *parent;
99   struct sra_elt *groups;
100   struct sra_elt *children;
101   struct sra_elt *sibling;
102 
103   /* If this element is a root, then this is the VAR_DECL.  If this is
104      a sub-element, this is some token used to identify the reference.
105      In the case of COMPONENT_REF, this is the FIELD_DECL.  In the case
106      of an ARRAY_REF, this is the (constant) index.  In the case of an
107      ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR.  In the case
108      of a complex number, this is a zero or one.  */
109   tree element;
110 
111   /* The type of the element.  */
112   tree type;
113 
114   /* A VAR_DECL, for any sub-element we've decided to replace.  */
115   tree replacement;
116 
117   /* The number of times the element is referenced as a whole.  I.e.
118      given "a.b.c", this would be incremented for C, but not for A or B.  */
119   unsigned int n_uses;
120 
121   /* The number of times the element is copied to or from another
122      scalarizable element.  */
123   unsigned int n_copies;
124 
125   /* True if TYPE is scalar.  */
126   bool is_scalar;
127 
128   /* True if this element is a group of members of its parent.  */
129   bool is_group;
130 
131   /* True if we saw something about this element that prevents scalarization,
132      such as non-constant indexing.  */
133   bool cannot_scalarize;
134 
135   /* True if we've decided that structure-to-structure assignment
136      should happen via memcpy and not per-element.  */
137   bool use_block_copy;
138 
139   /* True if everything under this element has been marked TREE_NO_WARNING.  */
140   bool all_no_warning;
141 
142   /* A flag for use with/after random access traversals.  */
143   bool visited;
144 };
145 
146 #define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
147 
148 #define FOR_EACH_ACTUAL_CHILD(CHILD, ELT)			\
149   for ((CHILD) = (ELT)->is_group				\
150 		 ? next_child_for_group (NULL, (ELT))		\
151 		 : (ELT)->children;				\
152        (CHILD);							\
153        (CHILD) = (ELT)->is_group				\
154 		 ? next_child_for_group ((CHILD), (ELT))	\
155 		 : (CHILD)->sibling)
156 
157 /* Helper function for above macro.  Return next child in group.  */
158 static struct sra_elt *
next_child_for_group(struct sra_elt * child,struct sra_elt * group)159 next_child_for_group (struct sra_elt *child, struct sra_elt *group)
160 {
161   gcc_assert (group->is_group);
162 
163   /* Find the next child in the parent.  */
164   if (child)
165     child = child->sibling;
166   else
167     child = group->parent->children;
168 
169   /* Skip siblings that do not belong to the group.  */
170   while (child)
171     {
172       tree g_elt = group->element;
173       if (TREE_CODE (g_elt) == RANGE_EXPR)
174 	{
175 	  if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0))
176 	      && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element))
177 	    break;
178 	}
179       else
180 	gcc_unreachable ();
181 
182       child = child->sibling;
183     }
184 
185   return child;
186 }
187 
188 /* Random access to the child of a parent is performed by hashing.
189    This prevents quadratic behavior, and allows SRA to function
190    reasonably on larger records.  */
191 static htab_t sra_map;
192 
193 /* All structures are allocated out of the following obstack.  */
194 static struct obstack sra_obstack;
195 
196 /* Debugging functions.  */
197 static void dump_sra_elt_name (FILE *, struct sra_elt *);
198 extern void debug_sra_elt_name (struct sra_elt *);
199 
200 /* Forward declarations.  */
201 static tree generate_element_ref (struct sra_elt *);
202 
203 /* Return true if DECL is an SRA candidate.  */
204 
205 static bool
is_sra_candidate_decl(tree decl)206 is_sra_candidate_decl (tree decl)
207 {
208   return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
209 }
210 
211 /* Return true if TYPE is a scalar type.  */
212 
213 static bool
is_sra_scalar_type(tree type)214 is_sra_scalar_type (tree type)
215 {
216   enum tree_code code = TREE_CODE (type);
217   return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
218 	  || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
219 	  || code == POINTER_TYPE || code == OFFSET_TYPE
220 	  || code == REFERENCE_TYPE);
221 }
222 
223 /* Return true if TYPE can be decomposed into a set of independent variables.
224 
225    Note that this doesn't imply that all elements of TYPE can be
226    instantiated, just that if we decide to break up the type into
227    separate pieces that it can be done.  */
228 
229 bool
sra_type_can_be_decomposed_p(tree type)230 sra_type_can_be_decomposed_p (tree type)
231 {
232   unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
233   tree t;
234 
235   /* Avoid searching the same type twice.  */
236   if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
237     return true;
238   if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
239     return false;
240 
241   /* The type must have a definite nonzero size.  */
242   if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
243       || integer_zerop (TYPE_SIZE (type)))
244     goto fail;
245 
246   /* The type must be a non-union aggregate.  */
247   switch (TREE_CODE (type))
248     {
249     case RECORD_TYPE:
250       {
251 	bool saw_one_field = false;
252 
253 	for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
254 	  if (TREE_CODE (t) == FIELD_DECL)
255 	    {
256 	      /* Reject incorrectly represented bit fields.  */
257 	      if (DECL_BIT_FIELD (t)
258 		  && (tree_low_cst (DECL_SIZE (t), 1)
259 		      != TYPE_PRECISION (TREE_TYPE (t))))
260 		goto fail;
261 
262 	      saw_one_field = true;
263 	    }
264 
265 	/* Record types must have at least one field.  */
266 	if (!saw_one_field)
267 	  goto fail;
268       }
269       break;
270 
271     case ARRAY_TYPE:
272       /* Array types must have a fixed lower and upper bound.  */
273       t = TYPE_DOMAIN (type);
274       if (t == NULL)
275 	goto fail;
276       if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
277 	goto fail;
278       if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
279 	goto fail;
280       break;
281 
282     case COMPLEX_TYPE:
283       break;
284 
285     default:
286       goto fail;
287     }
288 
289   bitmap_set_bit (sra_type_decomp_cache, cache+0);
290   return true;
291 
292  fail:
293   bitmap_set_bit (sra_type_decomp_cache, cache+1);
294   return false;
295 }
296 
297 /* Return true if DECL can be decomposed into a set of independent
298    (though not necessarily scalar) variables.  */
299 
300 static bool
decl_can_be_decomposed_p(tree var)301 decl_can_be_decomposed_p (tree var)
302 {
303   /* Early out for scalars.  */
304   if (is_sra_scalar_type (TREE_TYPE (var)))
305     return false;
306 
307   /* The variable must not be aliased.  */
308   if (!is_gimple_non_addressable (var))
309     {
310       if (dump_file && (dump_flags & TDF_DETAILS))
311 	{
312 	  fprintf (dump_file, "Cannot scalarize variable ");
313 	  print_generic_expr (dump_file, var, dump_flags);
314 	  fprintf (dump_file, " because it must live in memory\n");
315 	}
316       return false;
317     }
318 
319   /* The variable must not be volatile.  */
320   if (TREE_THIS_VOLATILE (var))
321     {
322       if (dump_file && (dump_flags & TDF_DETAILS))
323 	{
324 	  fprintf (dump_file, "Cannot scalarize variable ");
325 	  print_generic_expr (dump_file, var, dump_flags);
326 	  fprintf (dump_file, " because it is declared volatile\n");
327 	}
328       return false;
329     }
330 
331   /* We must be able to decompose the variable's type.  */
332   if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
333     {
334       if (dump_file && (dump_flags & TDF_DETAILS))
335 	{
336 	  fprintf (dump_file, "Cannot scalarize variable ");
337 	  print_generic_expr (dump_file, var, dump_flags);
338 	  fprintf (dump_file, " because its type cannot be decomposed\n");
339 	}
340       return false;
341     }
342 
343   return true;
344 }
345 
346 /* Return true if TYPE can be *completely* decomposed into scalars.  */
347 
348 static bool
type_can_instantiate_all_elements(tree type)349 type_can_instantiate_all_elements (tree type)
350 {
351   if (is_sra_scalar_type (type))
352     return true;
353   if (!sra_type_can_be_decomposed_p (type))
354     return false;
355 
356   switch (TREE_CODE (type))
357     {
358     case RECORD_TYPE:
359       {
360 	unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
361 	tree f;
362 
363 	if (bitmap_bit_p (sra_type_inst_cache, cache+0))
364 	  return true;
365 	if (bitmap_bit_p (sra_type_inst_cache, cache+1))
366 	  return false;
367 
368 	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
369 	  if (TREE_CODE (f) == FIELD_DECL)
370 	    {
371 	      if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
372 		{
373 		  bitmap_set_bit (sra_type_inst_cache, cache+1);
374 		  return false;
375 		}
376 	    }
377 
378 	bitmap_set_bit (sra_type_inst_cache, cache+0);
379 	return true;
380       }
381 
382     case ARRAY_TYPE:
383       return type_can_instantiate_all_elements (TREE_TYPE (type));
384 
385     case COMPLEX_TYPE:
386       return true;
387 
388     default:
389       gcc_unreachable ();
390     }
391 }
392 
393 /* Test whether ELT or some sub-element cannot be scalarized.  */
394 
395 static bool
can_completely_scalarize_p(struct sra_elt * elt)396 can_completely_scalarize_p (struct sra_elt *elt)
397 {
398   struct sra_elt *c;
399 
400   if (elt->cannot_scalarize)
401     return false;
402 
403   for (c = elt->children; c; c = c->sibling)
404     if (!can_completely_scalarize_p (c))
405       return false;
406 
407   for (c = elt->groups; c; c = c->sibling)
408     if (!can_completely_scalarize_p (c))
409       return false;
410 
411   return true;
412 }
413 
414 
415 /* A simplified tree hashing algorithm that only handles the types of
416    trees we expect to find in sra_elt->element.  */
417 
418 static hashval_t
sra_hash_tree(tree t)419 sra_hash_tree (tree t)
420 {
421   hashval_t h;
422 
423   switch (TREE_CODE (t))
424     {
425     case VAR_DECL:
426     case PARM_DECL:
427     case RESULT_DECL:
428       h = DECL_UID (t);
429       break;
430 
431     case INTEGER_CST:
432       h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
433       break;
434 
435     case RANGE_EXPR:
436       h = iterative_hash_expr (TREE_OPERAND (t, 0), 0);
437       h = iterative_hash_expr (TREE_OPERAND (t, 1), h);
438       break;
439 
440     case FIELD_DECL:
441       /* We can have types that are compatible, but have different member
442 	 lists, so we can't hash fields by ID.  Use offsets instead.  */
443       h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
444       h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
445       break;
446 
447     default:
448       gcc_unreachable ();
449     }
450 
451   return h;
452 }
453 
454 /* Hash function for type SRA_PAIR.  */
455 
456 static hashval_t
sra_elt_hash(const void * x)457 sra_elt_hash (const void *x)
458 {
459   const struct sra_elt *e = x;
460   const struct sra_elt *p;
461   hashval_t h;
462 
463   h = sra_hash_tree (e->element);
464 
465   /* Take into account everything back up the chain.  Given that chain
466      lengths are rarely very long, this should be acceptable.  If we
467      truly identify this as a performance problem, it should work to
468      hash the pointer value "e->parent".  */
469   for (p = e->parent; p ; p = p->parent)
470     h = (h * 65521) ^ sra_hash_tree (p->element);
471 
472   return h;
473 }
474 
475 /* Equality function for type SRA_PAIR.  */
476 
477 static int
sra_elt_eq(const void * x,const void * y)478 sra_elt_eq (const void *x, const void *y)
479 {
480   const struct sra_elt *a = x;
481   const struct sra_elt *b = y;
482   tree ae, be;
483 
484   if (a->parent != b->parent)
485     return false;
486 
487   ae = a->element;
488   be = b->element;
489 
490   if (ae == be)
491     return true;
492   if (TREE_CODE (ae) != TREE_CODE (be))
493     return false;
494 
495   switch (TREE_CODE (ae))
496     {
497     case VAR_DECL:
498     case PARM_DECL:
499     case RESULT_DECL:
500       /* These are all pointer unique.  */
501       return false;
502 
503     case INTEGER_CST:
504       /* Integers are not pointer unique, so compare their values.  */
505       return tree_int_cst_equal (ae, be);
506 
507     case RANGE_EXPR:
508       return
509 	tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0))
510 	&& tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1));
511 
512     case FIELD_DECL:
513       /* Fields are unique within a record, but not between
514 	 compatible records.  */
515       if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
516 	return false;
517       return fields_compatible_p (ae, be);
518 
519     default:
520       gcc_unreachable ();
521     }
522 }
523 
524 /* Create or return the SRA_ELT structure for CHILD in PARENT.  PARENT
525    may be null, in which case CHILD must be a DECL.  */
526 
527 static struct sra_elt *
lookup_element(struct sra_elt * parent,tree child,tree type,enum insert_option insert)528 lookup_element (struct sra_elt *parent, tree child, tree type,
529 		enum insert_option insert)
530 {
531   struct sra_elt dummy;
532   struct sra_elt **slot;
533   struct sra_elt *elt;
534 
535   if (parent)
536     dummy.parent = parent->is_group ? parent->parent : parent;
537   else
538     dummy.parent = NULL;
539   dummy.element = child;
540 
541   slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
542   if (!slot && insert == NO_INSERT)
543     return NULL;
544 
545   elt = *slot;
546   if (!elt && insert == INSERT)
547     {
548       *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
549       memset (elt, 0, sizeof (*elt));
550 
551       elt->parent = parent;
552       elt->element = child;
553       elt->type = type;
554       elt->is_scalar = is_sra_scalar_type (type);
555 
556       if (parent)
557 	{
558 	  if (IS_ELEMENT_FOR_GROUP (elt->element))
559 	    {
560 	      elt->is_group = true;
561 	      elt->sibling = parent->groups;
562 	      parent->groups = elt;
563 	    }
564 	  else
565 	    {
566 	      elt->sibling = parent->children;
567 	      parent->children = elt;
568 	    }
569 	}
570 
571       /* If this is a parameter, then if we want to scalarize, we have
572 	 one copy from the true function parameter.  Count it now.  */
573       if (TREE_CODE (child) == PARM_DECL)
574 	{
575 	  elt->n_copies = 1;
576 	  bitmap_set_bit (needs_copy_in, DECL_UID (child));
577 	}
578     }
579 
580   return elt;
581 }
582 
583 /* Create or return the SRA_ELT structure for EXPR if the expression
584    refers to a scalarizable variable.  */
585 
586 static struct sra_elt *
maybe_lookup_element_for_expr(tree expr)587 maybe_lookup_element_for_expr (tree expr)
588 {
589   struct sra_elt *elt;
590   tree child;
591 
592   switch (TREE_CODE (expr))
593     {
594     case VAR_DECL:
595     case PARM_DECL:
596     case RESULT_DECL:
597       if (is_sra_candidate_decl (expr))
598 	return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
599       return NULL;
600 
601     case ARRAY_REF:
602       /* We can't scalarize variable array indices.  */
603       if (in_array_bounds_p (expr))
604         child = TREE_OPERAND (expr, 1);
605       else
606 	return NULL;
607       break;
608 
609     case ARRAY_RANGE_REF:
610       /* We can't scalarize variable array indices.  */
611       if (range_in_array_bounds_p (expr))
612 	{
613 	  tree domain = TYPE_DOMAIN (TREE_TYPE (expr));
614 	  child = build2 (RANGE_EXPR, integer_type_node,
615 			  TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain));
616 	}
617       else
618 	return NULL;
619       break;
620 
621     case COMPONENT_REF:
622       /* Don't look through unions.  */
623       if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
624 	return NULL;
625       child = TREE_OPERAND (expr, 1);
626       break;
627 
628     case REALPART_EXPR:
629       child = integer_zero_node;
630       break;
631     case IMAGPART_EXPR:
632       child = integer_one_node;
633       break;
634 
635     default:
636       return NULL;
637     }
638 
639   elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
640   if (elt)
641     return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
642   return NULL;
643 }
644 
645 
646 /* Functions to walk just enough of the tree to see all scalarizable
647    references, and categorize them.  */
648 
649 /* A set of callbacks for phases 2 and 4.  They'll be invoked for the
650    various kinds of references seen.  In all cases, *BSI is an iterator
651    pointing to the statement being processed.  */
652 struct sra_walk_fns
653 {
654   /* Invoked when ELT is required as a unit.  Note that ELT might refer to
655      a leaf node, in which case this is a simple scalar reference.  *EXPR_P
656      points to the location of the expression.  IS_OUTPUT is true if this
657      is a left-hand-side reference.  USE_ALL is true if we saw something we
658      couldn't quite identify and had to force the use of the entire object.  */
659   void (*use) (struct sra_elt *elt, tree *expr_p,
660 	       block_stmt_iterator *bsi, bool is_output, bool use_all);
661 
662   /* Invoked when we have a copy between two scalarizable references.  */
663   void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
664 		block_stmt_iterator *bsi);
665 
666   /* Invoked when ELT is initialized from a constant.  VALUE may be NULL,
667      in which case it should be treated as an empty CONSTRUCTOR.  */
668   void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
669 
670   /* Invoked when we have a copy between one scalarizable reference ELT
671      and one non-scalarizable reference OTHER without side-effects.
672      IS_OUTPUT is true if ELT is on the left-hand side.  */
673   void (*ldst) (struct sra_elt *elt, tree other,
674 		block_stmt_iterator *bsi, bool is_output);
675 
676   /* True during phase 2, false during phase 4.  */
677   /* ??? This is a hack.  */
678   bool initial_scan;
679 };
680 
681 #ifdef ENABLE_CHECKING
682 /* Invoked via walk_tree, if *TP contains a candidate decl, return it.  */
683 
684 static tree
sra_find_candidate_decl(tree * tp,int * walk_subtrees,void * data ATTRIBUTE_UNUSED)685 sra_find_candidate_decl (tree *tp, int *walk_subtrees,
686 			 void *data ATTRIBUTE_UNUSED)
687 {
688   tree t = *tp;
689   enum tree_code code = TREE_CODE (t);
690 
691   if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
692     {
693       *walk_subtrees = 0;
694       if (is_sra_candidate_decl (t))
695 	return t;
696     }
697   else if (TYPE_P (t))
698     *walk_subtrees = 0;
699 
700   return NULL;
701 }
702 #endif
703 
704 /* Walk most expressions looking for a scalarizable aggregate.
705    If we find one, invoke FNS->USE.  */
706 
707 static void
sra_walk_expr(tree * expr_p,block_stmt_iterator * bsi,bool is_output,const struct sra_walk_fns * fns)708 sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
709 	       const struct sra_walk_fns *fns)
710 {
711   tree expr = *expr_p;
712   tree inner = expr;
713   bool disable_scalarization = false;
714   bool use_all_p = false;
715 
716   /* We're looking to collect a reference expression between EXPR and INNER,
717      such that INNER is a scalarizable decl and all other nodes through EXPR
718      are references that we can scalarize.  If we come across something that
719      we can't scalarize, we reset EXPR.  This has the effect of making it
720      appear that we're referring to the larger expression as a whole.  */
721 
722   while (1)
723     switch (TREE_CODE (inner))
724       {
725       case VAR_DECL:
726       case PARM_DECL:
727       case RESULT_DECL:
728 	/* If there is a scalarizable decl at the bottom, then process it.  */
729 	if (is_sra_candidate_decl (inner))
730 	  {
731 	    struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
732 	    if (disable_scalarization)
733 	      elt->cannot_scalarize = true;
734 	    else
735 	      fns->use (elt, expr_p, bsi, is_output, use_all_p);
736 	  }
737 	return;
738 
739       case ARRAY_REF:
740 	/* Non-constant index means any member may be accessed.  Prevent the
741 	   expression from being scalarized.  If we were to treat this as a
742 	   reference to the whole array, we can wind up with a single dynamic
743 	   index reference inside a loop being overridden by several constant
744 	   index references during loop setup.  It's possible that this could
745 	   be avoided by using dynamic usage counts based on BB trip counts
746 	   (based on loop analysis or profiling), but that hardly seems worth
747 	   the effort.  */
748 	/* ??? Hack.  Figure out how to push this into the scan routines
749 	   without duplicating too much code.  */
750 	if (!in_array_bounds_p (inner))
751 	  {
752 	    disable_scalarization = true;
753 	    goto use_all;
754 	  }
755 	/* ??? Are we assured that non-constant bounds and stride will have
756 	   the same value everywhere?  I don't think Fortran will...  */
757 	if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
758 	  goto use_all;
759 	inner = TREE_OPERAND (inner, 0);
760 	break;
761 
762       case ARRAY_RANGE_REF:
763 	if (!range_in_array_bounds_p (inner))
764 	  {
765 	    disable_scalarization = true;
766 	    goto use_all;
767 	  }
768 	/* ??? See above non-constant bounds and stride .  */
769 	if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
770 	  goto use_all;
771 	inner = TREE_OPERAND (inner, 0);
772 	break;
773 
774       case COMPONENT_REF:
775 	/* A reference to a union member constitutes a reference to the
776 	   entire union.  */
777 	if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
778 	  goto use_all;
779 	/* ??? See above re non-constant stride.  */
780 	if (TREE_OPERAND (inner, 2))
781 	  goto use_all;
782 	inner = TREE_OPERAND (inner, 0);
783 	break;
784 
785       case REALPART_EXPR:
786       case IMAGPART_EXPR:
787 	inner = TREE_OPERAND (inner, 0);
788 	break;
789 
790       case BIT_FIELD_REF:
791 	/* A bit field reference (access to *multiple* fields simultaneously)
792 	   is not currently scalarized.  Consider this an access to the
793 	   complete outer element, to which walk_tree will bring us next.  */
794 	goto use_all;
795 
796       case VIEW_CONVERT_EXPR:
797       case NOP_EXPR:
798 	/* Similarly, a view/nop explicitly wants to look at an object in a
799 	   type other than the one we've scalarized.  */
800 	goto use_all;
801 
802       case WITH_SIZE_EXPR:
803 	/* This is a transparent wrapper.  The entire inner expression really
804 	   is being used.  */
805 	goto use_all;
806 
807       use_all:
808         expr_p = &TREE_OPERAND (inner, 0);
809 	inner = expr = *expr_p;
810 	use_all_p = true;
811 	break;
812 
813       default:
814 #ifdef ENABLE_CHECKING
815 	/* Validate that we're not missing any references.  */
816 	gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
817 #endif
818 	return;
819       }
820 }
821 
822 /* Walk a TREE_LIST of values looking for scalarizable aggregates.
823    If we find one, invoke FNS->USE.  */
824 
825 static void
sra_walk_tree_list(tree list,block_stmt_iterator * bsi,bool is_output,const struct sra_walk_fns * fns)826 sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
827 		    const struct sra_walk_fns *fns)
828 {
829   tree op;
830   for (op = list; op ; op = TREE_CHAIN (op))
831     sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
832 }
833 
834 /* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
835    If we find one, invoke FNS->USE.  */
836 
837 static void
sra_walk_call_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)838 sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
839 		    const struct sra_walk_fns *fns)
840 {
841   sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns);
842 }
843 
844 /* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
845    aggregates.  If we find one, invoke FNS->USE.  */
846 
847 static void
sra_walk_asm_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)848 sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
849 		   const struct sra_walk_fns *fns)
850 {
851   sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
852   sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
853 }
854 
855 /* Walk a MODIFY_EXPR and categorize the assignment appropriately.  */
856 
857 static void
sra_walk_modify_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)858 sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi,
859 		      const struct sra_walk_fns *fns)
860 {
861   struct sra_elt *lhs_elt, *rhs_elt;
862   tree lhs, rhs;
863 
864   lhs = TREE_OPERAND (expr, 0);
865   rhs = TREE_OPERAND (expr, 1);
866   lhs_elt = maybe_lookup_element_for_expr (lhs);
867   rhs_elt = maybe_lookup_element_for_expr (rhs);
868 
869   /* If both sides are scalarizable, this is a COPY operation.  */
870   if (lhs_elt && rhs_elt)
871     {
872       fns->copy (lhs_elt, rhs_elt, bsi);
873       return;
874     }
875 
876   /* If the RHS is scalarizable, handle it.  There are only two cases.  */
877   if (rhs_elt)
878     {
879       if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
880 	fns->ldst (rhs_elt, lhs, bsi, false);
881       else
882 	fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false, false);
883     }
884 
885   /* If it isn't scalarizable, there may be scalarizable variables within, so
886      check for a call or else walk the RHS to see if we need to do any
887      copy-in operations.  We need to do it before the LHS is scalarized so
888      that the statements get inserted in the proper place, before any
889      copy-out operations.  */
890   else
891     {
892       tree call = get_call_expr_in (rhs);
893       if (call)
894 	sra_walk_call_expr (call, bsi, fns);
895       else
896 	sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns);
897     }
898 
899   /* Likewise, handle the LHS being scalarizable.  We have cases similar
900      to those above, but also want to handle RHS being constant.  */
901   if (lhs_elt)
902     {
903       /* If this is an assignment from a constant, or constructor, then
904 	 we have access to all of the elements individually.  Invoke INIT.  */
905       if (TREE_CODE (rhs) == COMPLEX_EXPR
906 	  || TREE_CODE (rhs) == COMPLEX_CST
907 	  || TREE_CODE (rhs) == CONSTRUCTOR)
908 	fns->init (lhs_elt, rhs, bsi);
909 
910       /* If this is an assignment from read-only memory, treat this as if
911 	 we'd been passed the constructor directly.  Invoke INIT.  */
912       else if (TREE_CODE (rhs) == VAR_DECL
913 	       && TREE_STATIC (rhs)
914 	       && TREE_READONLY (rhs)
915 	       && targetm.binds_local_p (rhs))
916 	fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
917 
918       /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
919 	 The lvalue requirement prevents us from trying to directly scalarize
920 	 the result of a function call.  Which would result in trying to call
921 	 the function multiple times, and other evil things.  */
922       else if (!lhs_elt->is_scalar
923 	       && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
924 	fns->ldst (lhs_elt, rhs, bsi, true);
925 
926       /* Otherwise we're being used in some context that requires the
927 	 aggregate to be seen as a whole.  Invoke USE.  */
928       else
929 	fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true, false);
930     }
931 
932   /* Similarly to above, LHS_ELT being null only means that the LHS as a
933      whole is not a scalarizable reference.  There may be occurrences of
934      scalarizable variables within, which implies a USE.  */
935   else
936     sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns);
937 }
938 
939 /* Entry point to the walk functions.  Search the entire function,
940    invoking the callbacks in FNS on each of the references to
941    scalarizable variables.  */
942 
943 static void
sra_walk_function(const struct sra_walk_fns * fns)944 sra_walk_function (const struct sra_walk_fns *fns)
945 {
946   basic_block bb;
947   block_stmt_iterator si, ni;
948 
949   /* ??? Phase 4 could derive some benefit to walking the function in
950      dominator tree order.  */
951 
952   FOR_EACH_BB (bb)
953     for (si = bsi_start (bb); !bsi_end_p (si); si = ni)
954       {
955 	tree stmt, t;
956 	stmt_ann_t ann;
957 
958 	stmt = bsi_stmt (si);
959 	ann = stmt_ann (stmt);
960 
961 	ni = si;
962 	bsi_next (&ni);
963 
964 	/* If the statement has no virtual operands, then it doesn't
965 	   make any structure references that we care about.  */
966 	if (ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)))
967 	  continue;
968 
969 	switch (TREE_CODE (stmt))
970 	  {
971 	  case RETURN_EXPR:
972 	    /* If we have "return <retval>" then the return value is
973 	       already exposed for our pleasure.  Walk it as a USE to
974 	       force all the components back in place for the return.
975 
976 	       If we have an embedded assignment, then <retval> is of
977 	       a type that gets returned in registers in this ABI, and
978 	       we do not wish to extend their lifetimes.  Treat this
979 	       as a USE of the variable on the RHS of this assignment.  */
980 
981 	    t = TREE_OPERAND (stmt, 0);
982 	    if (TREE_CODE (t) == MODIFY_EXPR)
983 	      sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns);
984 	    else
985 	      sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
986 	    break;
987 
988 	  case MODIFY_EXPR:
989 	    sra_walk_modify_expr (stmt, &si, fns);
990 	    break;
991 	  case CALL_EXPR:
992 	    sra_walk_call_expr (stmt, &si, fns);
993 	    break;
994 	  case ASM_EXPR:
995 	    sra_walk_asm_expr (stmt, &si, fns);
996 	    break;
997 
998 	  default:
999 	    break;
1000 	  }
1001       }
1002 }
1003 
1004 /* Phase One: Scan all referenced variables in the program looking for
1005    structures that could be decomposed.  */
1006 
1007 static bool
find_candidates_for_sra(void)1008 find_candidates_for_sra (void)
1009 {
1010   bool any_set = false;
1011   tree var;
1012   referenced_var_iterator rvi;
1013 
1014   FOR_EACH_REFERENCED_VAR (var, rvi)
1015     {
1016       if (decl_can_be_decomposed_p (var))
1017         {
1018           bitmap_set_bit (sra_candidates, DECL_UID (var));
1019           any_set = true;
1020         }
1021     }
1022 
1023   return any_set;
1024 }
1025 
1026 
1027 /* Phase Two: Scan all references to scalarizable variables.  Count the
1028    number of times they are used or copied respectively.  */
1029 
1030 /* Callbacks to fill in SRA_WALK_FNS.  Everything but USE is
1031    considered a copy, because we can decompose the reference such that
1032    the sub-elements needn't be contiguous.  */
1033 
1034 static void
scan_use(struct sra_elt * elt,tree * expr_p ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,bool is_output ATTRIBUTE_UNUSED,bool use_all ATTRIBUTE_UNUSED)1035 scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
1036 	  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1037 	  bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED)
1038 {
1039   elt->n_uses += 1;
1040 }
1041 
1042 static void
scan_copy(struct sra_elt * lhs_elt,struct sra_elt * rhs_elt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED)1043 scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
1044 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1045 {
1046   lhs_elt->n_copies += 1;
1047   rhs_elt->n_copies += 1;
1048 }
1049 
1050 static void
scan_init(struct sra_elt * lhs_elt,tree rhs ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED)1051 scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
1052 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1053 {
1054   lhs_elt->n_copies += 1;
1055 }
1056 
1057 static void
scan_ldst(struct sra_elt * elt,tree other ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,bool is_output ATTRIBUTE_UNUSED)1058 scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
1059 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1060 	   bool is_output ATTRIBUTE_UNUSED)
1061 {
1062   elt->n_copies += 1;
1063 }
1064 
1065 /* Dump the values we collected during the scanning phase.  */
1066 
1067 static void
scan_dump(struct sra_elt * elt)1068 scan_dump (struct sra_elt *elt)
1069 {
1070   struct sra_elt *c;
1071 
1072   dump_sra_elt_name (dump_file, elt);
1073   fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
1074 
1075   for (c = elt->children; c ; c = c->sibling)
1076     scan_dump (c);
1077 
1078   for (c = elt->groups; c ; c = c->sibling)
1079     scan_dump (c);
1080 }
1081 
1082 /* Entry point to phase 2.  Scan the entire function, building up
1083    scalarization data structures, recording copies and uses.  */
1084 
1085 static void
scan_function(void)1086 scan_function (void)
1087 {
1088   static const struct sra_walk_fns fns = {
1089     scan_use, scan_copy, scan_init, scan_ldst, true
1090   };
1091   bitmap_iterator bi;
1092 
1093   sra_walk_function (&fns);
1094 
1095   if (dump_file && (dump_flags & TDF_DETAILS))
1096     {
1097       unsigned i;
1098 
1099       fputs ("\nScan results:\n", dump_file);
1100       EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1101 	{
1102 	  tree var = referenced_var (i);
1103 	  struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1104 	  if (elt)
1105 	    scan_dump (elt);
1106 	}
1107       fputc ('\n', dump_file);
1108     }
1109 }
1110 
1111 /* Phase Three: Make decisions about which variables to scalarize, if any.
1112    All elements to be scalarized have replacement variables made for them.  */
1113 
1114 /* A subroutine of build_element_name.  Recursively build the element
1115    name on the obstack.  */
1116 
1117 static void
build_element_name_1(struct sra_elt * elt)1118 build_element_name_1 (struct sra_elt *elt)
1119 {
1120   tree t;
1121   char buffer[32];
1122 
1123   if (elt->parent)
1124     {
1125       build_element_name_1 (elt->parent);
1126       obstack_1grow (&sra_obstack, '$');
1127 
1128       if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
1129 	{
1130 	  if (elt->element == integer_zero_node)
1131 	    obstack_grow (&sra_obstack, "real", 4);
1132 	  else
1133 	    obstack_grow (&sra_obstack, "imag", 4);
1134 	  return;
1135 	}
1136     }
1137 
1138   t = elt->element;
1139   if (TREE_CODE (t) == INTEGER_CST)
1140     {
1141       /* ??? Eh.  Don't bother doing double-wide printing.  */
1142       sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
1143       obstack_grow (&sra_obstack, buffer, strlen (buffer));
1144     }
1145   else
1146     {
1147       tree name = DECL_NAME (t);
1148       if (name)
1149 	obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
1150 		      IDENTIFIER_LENGTH (name));
1151       else
1152 	{
1153 	  sprintf (buffer, "D%u", DECL_UID (t));
1154 	  obstack_grow (&sra_obstack, buffer, strlen (buffer));
1155 	}
1156     }
1157 }
1158 
1159 /* Construct a pretty variable name for an element's replacement variable.
1160    The name is built on the obstack.  */
1161 
1162 static char *
build_element_name(struct sra_elt * elt)1163 build_element_name (struct sra_elt *elt)
1164 {
1165   build_element_name_1 (elt);
1166   obstack_1grow (&sra_obstack, '\0');
1167   return XOBFINISH (&sra_obstack, char *);
1168 }
1169 
1170 /* Instantiate an element as an independent variable.  */
1171 
1172 static void
instantiate_element(struct sra_elt * elt)1173 instantiate_element (struct sra_elt *elt)
1174 {
1175   struct sra_elt *base_elt;
1176   tree var, base;
1177 
1178   for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
1179     continue;
1180   base = base_elt->element;
1181 
1182   elt->replacement = var = make_rename_temp (elt->type, "SR");
1183   DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
1184   DECL_ARTIFICIAL (var) = 1;
1185 
1186   if (TREE_THIS_VOLATILE (elt->type))
1187     {
1188       TREE_THIS_VOLATILE (var) = 1;
1189       TREE_SIDE_EFFECTS (var) = 1;
1190     }
1191 
1192   if (DECL_NAME (base) && !DECL_IGNORED_P (base))
1193     {
1194       char *pretty_name = build_element_name (elt);
1195       DECL_NAME (var) = get_identifier (pretty_name);
1196       obstack_free (&sra_obstack, pretty_name);
1197 
1198       SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
1199       DECL_DEBUG_EXPR_IS_FROM (var) = 1;
1200 
1201       DECL_IGNORED_P (var) = 0;
1202       TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
1203     }
1204   else
1205     {
1206       DECL_IGNORED_P (var) = 1;
1207       /* ??? We can't generate any warning that would be meaningful.  */
1208       TREE_NO_WARNING (var) = 1;
1209     }
1210 
1211   if (dump_file)
1212     {
1213       fputs ("  ", dump_file);
1214       dump_sra_elt_name (dump_file, elt);
1215       fputs (" -> ", dump_file);
1216       print_generic_expr (dump_file, var, dump_flags);
1217       fputc ('\n', dump_file);
1218     }
1219 }
1220 
1221 /* Make one pass across an element tree deciding whether or not it's
1222    profitable to instantiate individual leaf scalars.
1223 
1224    PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
1225    fields all the way up the tree.  */
1226 
1227 static void
decide_instantiation_1(struct sra_elt * elt,unsigned int parent_uses,unsigned int parent_copies)1228 decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
1229 			unsigned int parent_copies)
1230 {
1231   if (dump_file && !elt->parent)
1232     {
1233       fputs ("Initial instantiation for ", dump_file);
1234       dump_sra_elt_name (dump_file, elt);
1235       fputc ('\n', dump_file);
1236     }
1237 
1238   if (elt->cannot_scalarize)
1239     return;
1240 
1241   if (elt->is_scalar)
1242     {
1243       /* The decision is simple: instantiate if we're used more frequently
1244 	 than the parent needs to be seen as a complete unit.  */
1245       if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
1246 	instantiate_element (elt);
1247     }
1248   else
1249     {
1250       struct sra_elt *c, *group;
1251       unsigned int this_uses = elt->n_uses + parent_uses;
1252       unsigned int this_copies = elt->n_copies + parent_copies;
1253 
1254       /* Consider groups of sub-elements as weighing in favour of
1255 	 instantiation whatever their size.  */
1256       for (group = elt->groups; group ; group = group->sibling)
1257 	FOR_EACH_ACTUAL_CHILD (c, group)
1258 	  {
1259 	    c->n_uses += group->n_uses;
1260 	    c->n_copies += group->n_copies;
1261 	  }
1262 
1263       for (c = elt->children; c ; c = c->sibling)
1264 	decide_instantiation_1 (c, this_uses, this_copies);
1265     }
1266 }
1267 
1268 /* Compute the size and number of all instantiated elements below ELT.
1269    We will only care about this if the size of the complete structure
1270    fits in a HOST_WIDE_INT, so we don't have to worry about overflow.  */
1271 
1272 static unsigned int
sum_instantiated_sizes(struct sra_elt * elt,unsigned HOST_WIDE_INT * sizep)1273 sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
1274 {
1275   if (elt->replacement)
1276     {
1277       *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
1278       return 1;
1279     }
1280   else
1281     {
1282       struct sra_elt *c;
1283       unsigned int count = 0;
1284 
1285       for (c = elt->children; c ; c = c->sibling)
1286 	count += sum_instantiated_sizes (c, sizep);
1287 
1288       return count;
1289     }
1290 }
1291 
1292 /* Instantiate fields in ELT->TYPE that are not currently present as
1293    children of ELT.  */
1294 
1295 static void instantiate_missing_elements (struct sra_elt *elt);
1296 
1297 static void
instantiate_missing_elements_1(struct sra_elt * elt,tree child,tree type)1298 instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
1299 {
1300   struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
1301   if (sub->is_scalar)
1302     {
1303       if (sub->replacement == NULL)
1304 	instantiate_element (sub);
1305     }
1306   else
1307     instantiate_missing_elements (sub);
1308 }
1309 
1310 static void
instantiate_missing_elements(struct sra_elt * elt)1311 instantiate_missing_elements (struct sra_elt *elt)
1312 {
1313   tree type = elt->type;
1314 
1315   switch (TREE_CODE (type))
1316     {
1317     case RECORD_TYPE:
1318       {
1319 	tree f;
1320 	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
1321 	  if (TREE_CODE (f) == FIELD_DECL)
1322 	    {
1323 	      tree field_type = TREE_TYPE (f);
1324 
1325 	      /* canonicalize_component_ref() unwidens some bit-field
1326 		 types (not marked as DECL_BIT_FIELD in C++), so we
1327 		 must do the same, lest we may introduce type
1328 		 mismatches.  */
1329 	      if (INTEGRAL_TYPE_P (field_type)
1330 		  && DECL_MODE (f) != TYPE_MODE (field_type))
1331 		field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF,
1332 							       field_type,
1333 							       elt->element,
1334 							       f, NULL_TREE),
1335 						       NULL_TREE));
1336 
1337 	      instantiate_missing_elements_1 (elt, f, field_type);
1338 	    }
1339 	break;
1340       }
1341 
1342     case ARRAY_TYPE:
1343       {
1344 	tree i, max, subtype;
1345 
1346 	i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
1347 	max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
1348 	subtype = TREE_TYPE (type);
1349 
1350 	while (1)
1351 	  {
1352 	    instantiate_missing_elements_1 (elt, i, subtype);
1353 	    if (tree_int_cst_equal (i, max))
1354 	      break;
1355 	    i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
1356 	  }
1357 
1358 	break;
1359       }
1360 
1361     case COMPLEX_TYPE:
1362       type = TREE_TYPE (type);
1363       instantiate_missing_elements_1 (elt, integer_zero_node, type);
1364       instantiate_missing_elements_1 (elt, integer_one_node, type);
1365       break;
1366 
1367     default:
1368       gcc_unreachable ();
1369     }
1370 }
1371 
1372 /* Make one pass across an element tree deciding whether to perform block
1373    or element copies.  If we decide on element copies, instantiate all
1374    elements.  Return true if there are any instantiated sub-elements.  */
1375 
1376 static bool
decide_block_copy(struct sra_elt * elt)1377 decide_block_copy (struct sra_elt *elt)
1378 {
1379   struct sra_elt *c;
1380   bool any_inst;
1381 
1382   /* We shouldn't be invoked on groups of sub-elements as they must
1383      behave like their parent as far as block copy is concerned.  */
1384   gcc_assert (!elt->is_group);
1385 
1386   /* If scalarization is disabled, respect it.  */
1387   if (elt->cannot_scalarize)
1388     {
1389       elt->use_block_copy = 1;
1390 
1391       if (dump_file)
1392 	{
1393 	  fputs ("Scalarization disabled for ", dump_file);
1394 	  dump_sra_elt_name (dump_file, elt);
1395 	  fputc ('\n', dump_file);
1396 	}
1397 
1398       /* Disable scalarization of sub-elements */
1399       for (c = elt->children; c; c = c->sibling)
1400 	{
1401 	  c->cannot_scalarize = 1;
1402 	  decide_block_copy (c);
1403 	}
1404 
1405       /* Groups behave like their parent.  */
1406       for (c = elt->groups; c; c = c->sibling)
1407 	{
1408 	  c->cannot_scalarize = 1;
1409 	  c->use_block_copy = 1;
1410 	}
1411 
1412       return false;
1413     }
1414 
1415   /* Don't decide if we've no uses.  */
1416   if (elt->n_uses == 0 && elt->n_copies == 0)
1417     ;
1418 
1419   else if (!elt->is_scalar)
1420     {
1421       tree size_tree = TYPE_SIZE_UNIT (elt->type);
1422       bool use_block_copy = true;
1423 
1424       /* Tradeoffs for COMPLEX types pretty much always make it better
1425 	 to go ahead and split the components.  */
1426       if (TREE_CODE (elt->type) == COMPLEX_TYPE)
1427 	use_block_copy = false;
1428 
1429       /* Don't bother trying to figure out the rest if the structure is
1430 	 so large we can't do easy arithmetic.  This also forces block
1431 	 copies for variable sized structures.  */
1432       else if (host_integerp (size_tree, 1))
1433 	{
1434 	  unsigned HOST_WIDE_INT full_size, inst_size = 0;
1435 	  unsigned int max_size, max_count, inst_count, full_count;
1436 
1437 	  /* If the sra-max-structure-size parameter is 0, then the
1438 	     user has not overridden the parameter and we can choose a
1439 	     sensible default.  */
1440 	  max_size = SRA_MAX_STRUCTURE_SIZE
1441 	    ? SRA_MAX_STRUCTURE_SIZE
1442 	    : MOVE_RATIO * UNITS_PER_WORD;
1443 	  max_count = SRA_MAX_STRUCTURE_COUNT
1444 	    ? SRA_MAX_STRUCTURE_COUNT
1445 	    : MOVE_RATIO;
1446 
1447 	  full_size = tree_low_cst (size_tree, 1);
1448 	  full_count = count_type_elements (elt->type, false);
1449 	  inst_count = sum_instantiated_sizes (elt, &inst_size);
1450 
1451 	  /* ??? What to do here.  If there are two fields, and we've only
1452 	     instantiated one, then instantiating the other is clearly a win.
1453 	     If there are a large number of fields then the size of the copy
1454 	     is much more of a factor.  */
1455 
1456 	  /* If the structure is small, and we've made copies, go ahead
1457 	     and instantiate, hoping that the copies will go away.  */
1458 	  if (full_size <= max_size
1459 	      && (full_count - inst_count) <= max_count
1460 	      && elt->n_copies > elt->n_uses)
1461 	    use_block_copy = false;
1462 	  else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
1463 		   && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
1464 	    use_block_copy = false;
1465 
1466 	  /* In order to avoid block copy, we have to be able to instantiate
1467 	     all elements of the type.  See if this is possible.  */
1468 	  if (!use_block_copy
1469 	      && (!can_completely_scalarize_p (elt)
1470 		  || !type_can_instantiate_all_elements (elt->type)))
1471 	    use_block_copy = true;
1472 	}
1473 
1474       elt->use_block_copy = use_block_copy;
1475 
1476       /* Groups behave like their parent.  */
1477       for (c = elt->groups; c; c = c->sibling)
1478 	c->use_block_copy = use_block_copy;
1479 
1480       if (dump_file)
1481 	{
1482 	  fprintf (dump_file, "Using %s for ",
1483 		   use_block_copy ? "block-copy" : "element-copy");
1484 	  dump_sra_elt_name (dump_file, elt);
1485 	  fputc ('\n', dump_file);
1486 	}
1487 
1488       if (!use_block_copy)
1489 	{
1490 	  instantiate_missing_elements (elt);
1491 	  return true;
1492 	}
1493     }
1494 
1495   any_inst = elt->replacement != NULL;
1496 
1497   for (c = elt->children; c ; c = c->sibling)
1498     any_inst |= decide_block_copy (c);
1499 
1500   return any_inst;
1501 }
1502 
1503 /* Entry point to phase 3.  Instantiate scalar replacement variables.  */
1504 
1505 static void
decide_instantiations(void)1506 decide_instantiations (void)
1507 {
1508   unsigned int i;
1509   bool cleared_any;
1510   bitmap_head done_head;
1511   bitmap_iterator bi;
1512 
1513   /* We cannot clear bits from a bitmap we're iterating over,
1514      so save up all the bits to clear until the end.  */
1515   bitmap_initialize (&done_head, &bitmap_default_obstack);
1516   cleared_any = false;
1517 
1518   EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1519     {
1520       tree var = referenced_var (i);
1521       struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1522       if (elt)
1523 	{
1524 	  decide_instantiation_1 (elt, 0, 0);
1525 	  if (!decide_block_copy (elt))
1526 	    elt = NULL;
1527 	}
1528       if (!elt)
1529 	{
1530 	  bitmap_set_bit (&done_head, i);
1531 	  cleared_any = true;
1532 	}
1533     }
1534 
1535   if (cleared_any)
1536     {
1537       bitmap_and_compl_into (sra_candidates, &done_head);
1538       bitmap_and_compl_into (needs_copy_in, &done_head);
1539     }
1540   bitmap_clear (&done_head);
1541 
1542   if (!bitmap_empty_p (sra_candidates))
1543     todoflags |= TODO_update_smt_usage;
1544 
1545   mark_set_for_renaming (sra_candidates);
1546 
1547   if (dump_file)
1548     fputc ('\n', dump_file);
1549 }
1550 
1551 
1552 /* Phase Four: Update the function to match the replacements created.  */
1553 
1554 /* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for
1555    renaming. This becomes necessary when we modify all of a non-scalar.  */
1556 
1557 static void
mark_all_v_defs_1(tree stmt)1558 mark_all_v_defs_1 (tree stmt)
1559 {
1560   tree sym;
1561   ssa_op_iter iter;
1562 
1563   update_stmt_if_modified (stmt);
1564 
1565   FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS)
1566     {
1567       if (TREE_CODE (sym) == SSA_NAME)
1568 	sym = SSA_NAME_VAR (sym);
1569       mark_sym_for_renaming (sym);
1570     }
1571 }
1572 
1573 
1574 /* Mark all the variables in virtual operands in all the statements in
1575    LIST for renaming.  */
1576 
1577 static void
mark_all_v_defs(tree list)1578 mark_all_v_defs (tree list)
1579 {
1580   if (TREE_CODE (list) != STATEMENT_LIST)
1581     mark_all_v_defs_1 (list);
1582   else
1583     {
1584       tree_stmt_iterator i;
1585       for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i))
1586 	mark_all_v_defs_1 (tsi_stmt (i));
1587     }
1588 }
1589 
1590 /* Mark every replacement under ELT with TREE_NO_WARNING.  */
1591 
1592 static void
mark_no_warning(struct sra_elt * elt)1593 mark_no_warning (struct sra_elt *elt)
1594 {
1595   if (!elt->all_no_warning)
1596     {
1597       if (elt->replacement)
1598 	TREE_NO_WARNING (elt->replacement) = 1;
1599       else
1600 	{
1601 	  struct sra_elt *c;
1602 	  FOR_EACH_ACTUAL_CHILD (c, elt)
1603 	    mark_no_warning (c);
1604 	}
1605       elt->all_no_warning = true;
1606     }
1607 }
1608 
1609 /* Build a single level component reference to ELT rooted at BASE.  */
1610 
1611 static tree
generate_one_element_ref(struct sra_elt * elt,tree base)1612 generate_one_element_ref (struct sra_elt *elt, tree base)
1613 {
1614   switch (TREE_CODE (TREE_TYPE (base)))
1615     {
1616     case RECORD_TYPE:
1617       {
1618 	tree field = elt->element;
1619 
1620 	/* Watch out for compatible records with differing field lists.  */
1621 	if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
1622 	  field = find_compatible_field (TREE_TYPE (base), field);
1623 
1624         return build3 (COMPONENT_REF, elt->type, base, field, NULL);
1625       }
1626 
1627     case ARRAY_TYPE:
1628       todoflags |= TODO_update_smt_usage;
1629       if (TREE_CODE (elt->element) == RANGE_EXPR)
1630 	return build4 (ARRAY_RANGE_REF, elt->type, base,
1631 		       TREE_OPERAND (elt->element, 0), NULL, NULL);
1632       else
1633 	return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
1634 
1635     case COMPLEX_TYPE:
1636       if (elt->element == integer_zero_node)
1637 	return build1 (REALPART_EXPR, elt->type, base);
1638       else
1639 	return build1 (IMAGPART_EXPR, elt->type, base);
1640 
1641     default:
1642       gcc_unreachable ();
1643     }
1644 }
1645 
1646 /* Build a full component reference to ELT rooted at its native variable.  */
1647 
1648 static tree
generate_element_ref(struct sra_elt * elt)1649 generate_element_ref (struct sra_elt *elt)
1650 {
1651   if (elt->parent)
1652     return generate_one_element_ref (elt, generate_element_ref (elt->parent));
1653   else
1654     return elt->element;
1655 }
1656 
1657 static tree
sra_build_assignment(tree dst,tree src)1658 sra_build_assignment (tree dst, tree src)
1659 {
1660   /* We need TYPE_CANONICAL to compare the types of dst and src
1661      efficiently, but that's only introduced in GCC 4.3.  */
1662   return build2 (MODIFY_EXPR, void_type_node, dst, src);
1663 }
1664 
1665 /* Generate a set of assignment statements in *LIST_P to copy all
1666    instantiated elements under ELT to or from the equivalent structure
1667    rooted at EXPR.  COPY_OUT controls the direction of the copy, with
1668    true meaning to copy out of EXPR into ELT.  */
1669 
1670 static void
generate_copy_inout(struct sra_elt * elt,bool copy_out,tree expr,tree * list_p)1671 generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
1672 		     tree *list_p)
1673 {
1674   struct sra_elt *c;
1675   tree t;
1676 
1677   if (!copy_out && TREE_CODE (expr) == SSA_NAME
1678       && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
1679     {
1680       tree r, i;
1681 
1682       c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
1683       r = c->replacement;
1684       c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
1685       i = c->replacement;
1686 
1687       t = build2 (COMPLEX_EXPR, elt->type, r, i);
1688       t = sra_build_assignment (expr, t);
1689       SSA_NAME_DEF_STMT (expr) = t;
1690       append_to_statement_list (t, list_p);
1691     }
1692   else if (elt->replacement)
1693     {
1694       if (copy_out)
1695 	t = sra_build_assignment (elt->replacement, expr);
1696       else
1697 	t = sra_build_assignment (expr, elt->replacement);
1698       append_to_statement_list (t, list_p);
1699     }
1700   else
1701     {
1702       FOR_EACH_ACTUAL_CHILD (c, elt)
1703 	{
1704 	  t = generate_one_element_ref (c, unshare_expr (expr));
1705 	  generate_copy_inout (c, copy_out, t, list_p);
1706 	}
1707     }
1708 }
1709 
1710 /* Generate a set of assignment statements in *LIST_P to copy all instantiated
1711    elements under SRC to their counterparts under DST.  There must be a 1-1
1712    correspondence of instantiated elements.  */
1713 
1714 static void
generate_element_copy(struct sra_elt * dst,struct sra_elt * src,tree * list_p)1715 generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
1716 {
1717   struct sra_elt *dc, *sc;
1718 
1719   FOR_EACH_ACTUAL_CHILD (dc, dst)
1720     {
1721       sc = lookup_element (src, dc->element, NULL, NO_INSERT);
1722       gcc_assert (sc);
1723       generate_element_copy (dc, sc, list_p);
1724     }
1725 
1726   if (dst->replacement)
1727     {
1728       tree t;
1729 
1730       gcc_assert (src->replacement);
1731 
1732       t = sra_build_assignment (dst->replacement, src->replacement);
1733       append_to_statement_list (t, list_p);
1734     }
1735 }
1736 
1737 /* Generate a set of assignment statements in *LIST_P to zero all instantiated
1738    elements under ELT.  In addition, do not assign to elements that have been
1739    marked VISITED but do reset the visited flag; this allows easy coordination
1740    with generate_element_init.  */
1741 
1742 static void
generate_element_zero(struct sra_elt * elt,tree * list_p)1743 generate_element_zero (struct sra_elt *elt, tree *list_p)
1744 {
1745   struct sra_elt *c;
1746 
1747   if (elt->visited)
1748     {
1749       elt->visited = false;
1750       return;
1751     }
1752 
1753   FOR_EACH_ACTUAL_CHILD (c, elt)
1754     generate_element_zero (c, list_p);
1755 
1756   if (elt->replacement)
1757     {
1758       tree t;
1759 
1760       gcc_assert (elt->is_scalar);
1761       t = fold_convert (elt->type, integer_zero_node);
1762 
1763       t = sra_build_assignment (elt->replacement, t);
1764       append_to_statement_list (t, list_p);
1765     }
1766 }
1767 
1768 /* Generate an assignment VAR = INIT, where INIT may need gimplification.
1769    Add the result to *LIST_P.  */
1770 
1771 static void
generate_one_element_init(tree var,tree init,tree * list_p)1772 generate_one_element_init (tree var, tree init, tree *list_p)
1773 {
1774   /* The replacement can be almost arbitrarily complex.  Gimplify.  */
1775   tree stmt = sra_build_assignment (var, init);
1776   gimplify_and_add (stmt, list_p);
1777 }
1778 
1779 /* Generate a set of assignment statements in *LIST_P to set all instantiated
1780    elements under ELT with the contents of the initializer INIT.  In addition,
1781    mark all assigned elements VISITED; this allows easy coordination with
1782    generate_element_zero.  Return false if we found a case we couldn't
1783    handle.  */
1784 
1785 static bool
generate_element_init_1(struct sra_elt * elt,tree init,tree * list_p)1786 generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p)
1787 {
1788   bool result = true;
1789   enum tree_code init_code;
1790   struct sra_elt *sub;
1791   tree t;
1792   unsigned HOST_WIDE_INT idx;
1793   tree value, purpose;
1794 
1795   /* We can be passed DECL_INITIAL of a static variable.  It might have a
1796      conversion, which we strip off here.  */
1797   STRIP_USELESS_TYPE_CONVERSION (init);
1798   init_code = TREE_CODE (init);
1799 
1800   if (elt->is_scalar)
1801     {
1802       if (elt->replacement)
1803 	{
1804 	  generate_one_element_init (elt->replacement, init, list_p);
1805 	  elt->visited = true;
1806 	}
1807       return result;
1808     }
1809 
1810   switch (init_code)
1811     {
1812     case COMPLEX_CST:
1813     case COMPLEX_EXPR:
1814       FOR_EACH_ACTUAL_CHILD (sub, elt)
1815 	{
1816 	  if (sub->element == integer_zero_node)
1817 	    t = (init_code == COMPLEX_EXPR
1818 		 ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
1819 	  else
1820 	    t = (init_code == COMPLEX_EXPR
1821 		 ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
1822 	  result &= generate_element_init_1 (sub, t, list_p);
1823 	}
1824       break;
1825 
1826     case CONSTRUCTOR:
1827       FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
1828 	{
1829 	  if (TREE_CODE (purpose) == RANGE_EXPR)
1830 	    {
1831 	      tree lower = TREE_OPERAND (purpose, 0);
1832 	      tree upper = TREE_OPERAND (purpose, 1);
1833 
1834 	      while (1)
1835 		{
1836 	  	  sub = lookup_element (elt, lower, NULL, NO_INSERT);
1837 		  if (sub != NULL)
1838 		    result &= generate_element_init_1 (sub, value, list_p);
1839 		  if (tree_int_cst_equal (lower, upper))
1840 		    break;
1841 		  lower = int_const_binop (PLUS_EXPR, lower,
1842 					   integer_one_node, true);
1843 		}
1844 	    }
1845 	  else
1846 	    {
1847 	      sub = lookup_element (elt, purpose, NULL, NO_INSERT);
1848 	      if (sub != NULL)
1849 		result &= generate_element_init_1 (sub, value, list_p);
1850 	    }
1851 	}
1852       break;
1853 
1854     default:
1855       elt->visited = true;
1856       result = false;
1857     }
1858 
1859   return result;
1860 }
1861 
1862 /* A wrapper function for generate_element_init_1 that handles cleanup after
1863    gimplification.  */
1864 
1865 static bool
generate_element_init(struct sra_elt * elt,tree init,tree * list_p)1866 generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
1867 {
1868   bool ret;
1869 
1870   push_gimplify_context ();
1871   ret = generate_element_init_1 (elt, init, list_p);
1872   pop_gimplify_context (NULL);
1873 
1874   /* The replacement can expose previously unreferenced variables.  */
1875   if (ret && *list_p)
1876     {
1877       tree_stmt_iterator i;
1878 
1879       for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i))
1880 	find_new_referenced_vars (tsi_stmt_ptr (i));
1881     }
1882 
1883   return ret;
1884 }
1885 
1886 /* Insert STMT on all the outgoing edges out of BB.  Note that if BB
1887    has more than one edge, STMT will be replicated for each edge.  Also,
1888    abnormal edges will be ignored.  */
1889 
1890 void
insert_edge_copies(tree stmt,basic_block bb)1891 insert_edge_copies (tree stmt, basic_block bb)
1892 {
1893   edge e;
1894   edge_iterator ei;
1895   bool first_copy;
1896 
1897   first_copy = true;
1898   FOR_EACH_EDGE (e, ei, bb->succs)
1899     {
1900       /* We don't need to insert copies on abnormal edges.  The
1901 	 value of the scalar replacement is not guaranteed to
1902 	 be valid through an abnormal edge.  */
1903       if (!(e->flags & EDGE_ABNORMAL))
1904 	{
1905 	  if (first_copy)
1906 	    {
1907 	      bsi_insert_on_edge (e, stmt);
1908 	      first_copy = false;
1909 	    }
1910 	  else
1911 	    bsi_insert_on_edge (e, unsave_expr_now (stmt));
1912 	}
1913     }
1914 }
1915 
1916 /* Helper function to insert LIST before BSI, and set up line number info.  */
1917 
1918 void
sra_insert_before(block_stmt_iterator * bsi,tree list)1919 sra_insert_before (block_stmt_iterator *bsi, tree list)
1920 {
1921   tree stmt = bsi_stmt (*bsi);
1922 
1923   if (EXPR_HAS_LOCATION (stmt))
1924     annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1925   bsi_insert_before (bsi, list, BSI_SAME_STMT);
1926 }
1927 
1928 /* Similarly, but insert after BSI.  Handles insertion onto edges as well.  */
1929 
1930 void
sra_insert_after(block_stmt_iterator * bsi,tree list)1931 sra_insert_after (block_stmt_iterator *bsi, tree list)
1932 {
1933   tree stmt = bsi_stmt (*bsi);
1934 
1935   if (EXPR_HAS_LOCATION (stmt))
1936     annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1937 
1938   if (stmt_ends_bb_p (stmt))
1939     insert_edge_copies (list, bsi->bb);
1940   else
1941     bsi_insert_after (bsi, list, BSI_SAME_STMT);
1942 }
1943 
1944 /* Similarly, but replace the statement at BSI.  */
1945 
1946 static void
sra_replace(block_stmt_iterator * bsi,tree list)1947 sra_replace (block_stmt_iterator *bsi, tree list)
1948 {
1949   sra_insert_before (bsi, list);
1950   bsi_remove (bsi, false);
1951   if (bsi_end_p (*bsi))
1952     *bsi = bsi_last (bsi->bb);
1953   else
1954     bsi_prev (bsi);
1955 }
1956 
1957 /* Scalarize a USE.  To recap, this is either a simple reference to ELT,
1958    if elt is scalar, or some occurrence of ELT that requires a complete
1959    aggregate.  IS_OUTPUT is true if ELT is being modified.  */
1960 
1961 static void
scalarize_use(struct sra_elt * elt,tree * expr_p,block_stmt_iterator * bsi,bool is_output,bool use_all)1962 scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
1963 	       bool is_output, bool use_all)
1964 {
1965   tree list = NULL, stmt = bsi_stmt (*bsi);
1966 
1967   if (elt->replacement)
1968     {
1969       /* If we have a replacement, then updating the reference is as
1970 	 simple as modifying the existing statement in place.  */
1971       if (is_output)
1972 	mark_all_v_defs (stmt);
1973       *expr_p = elt->replacement;
1974       update_stmt (stmt);
1975     }
1976   else
1977     {
1978       /* Otherwise we need some copies.  If ELT is being read, then we want
1979 	 to store all (modified) sub-elements back into the structure before
1980 	 the reference takes place.  If ELT is being written, then we want to
1981 	 load the changed values back into our shadow variables.  */
1982       /* ??? We don't check modified for reads, we just always write all of
1983 	 the values.  We should be able to record the SSA number of the VOP
1984 	 for which the values were last read.  If that number matches the
1985 	 SSA number of the VOP in the current statement, then we needn't
1986 	 emit an assignment.  This would also eliminate double writes when
1987 	 a structure is passed as more than one argument to a function call.
1988 	 This optimization would be most effective if sra_walk_function
1989 	 processed the blocks in dominator order.  */
1990 
1991       generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
1992       if (list == NULL)
1993 	return;
1994       mark_all_v_defs (list);
1995       if (is_output)
1996 	sra_insert_after (bsi, list);
1997       else
1998 	{
1999 	  sra_insert_before (bsi, list);
2000 	  if (use_all)
2001 	    mark_no_warning (elt);
2002 	}
2003     }
2004 }
2005 
2006 /* Scalarize a COPY.  To recap, this is an assignment statement between
2007    two scalarizable references, LHS_ELT and RHS_ELT.  */
2008 
2009 static void
scalarize_copy(struct sra_elt * lhs_elt,struct sra_elt * rhs_elt,block_stmt_iterator * bsi)2010 scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
2011 		block_stmt_iterator *bsi)
2012 {
2013   tree list, stmt;
2014 
2015   if (lhs_elt->replacement && rhs_elt->replacement)
2016     {
2017       /* If we have two scalar operands, modify the existing statement.  */
2018       stmt = bsi_stmt (*bsi);
2019 
2020       /* See the commentary in sra_walk_function concerning
2021 	 RETURN_EXPR, and why we should never see one here.  */
2022       gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2023 
2024       TREE_OPERAND (stmt, 0) = lhs_elt->replacement;
2025       TREE_OPERAND (stmt, 1) = rhs_elt->replacement;
2026       update_stmt (stmt);
2027     }
2028   else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
2029     {
2030       /* If either side requires a block copy, then sync the RHS back
2031 	 to the original structure, leave the original assignment
2032 	 statement (which will perform the block copy), then load the
2033 	 LHS values out of its now-updated original structure.  */
2034       /* ??? Could perform a modified pair-wise element copy.  That
2035 	 would at least allow those elements that are instantiated in
2036 	 both structures to be optimized well.  */
2037 
2038       list = NULL;
2039       generate_copy_inout (rhs_elt, false,
2040 			   generate_element_ref (rhs_elt), &list);
2041       if (list)
2042 	{
2043 	  mark_all_v_defs (list);
2044 	  sra_insert_before (bsi, list);
2045 	}
2046 
2047       list = NULL;
2048       generate_copy_inout (lhs_elt, true,
2049 			   generate_element_ref (lhs_elt), &list);
2050       if (list)
2051 	{
2052 	  mark_all_v_defs (list);
2053 	  sra_insert_after (bsi, list);
2054 	}
2055     }
2056   else
2057     {
2058       /* Otherwise both sides must be fully instantiated.  In which
2059 	 case perform pair-wise element assignments and replace the
2060 	 original block copy statement.  */
2061 
2062       stmt = bsi_stmt (*bsi);
2063       mark_all_v_defs (stmt);
2064 
2065       list = NULL;
2066       generate_element_copy (lhs_elt, rhs_elt, &list);
2067       gcc_assert (list);
2068       mark_all_v_defs (list);
2069       sra_replace (bsi, list);
2070     }
2071 }
2072 
2073 /* Scalarize an INIT.  To recap, this is an assignment to a scalarizable
2074    reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
2075    COMPLEX_EXPR.  If RHS is NULL, it should be treated as an empty
2076    CONSTRUCTOR.  */
2077 
2078 static void
scalarize_init(struct sra_elt * lhs_elt,tree rhs,block_stmt_iterator * bsi)2079 scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
2080 {
2081   bool result = true;
2082   tree list = NULL;
2083 
2084   /* Generate initialization statements for all members extant in the RHS.  */
2085   if (rhs)
2086     {
2087       /* Unshare the expression just in case this is from a decl's initial.  */
2088       rhs = unshare_expr (rhs);
2089       result = generate_element_init (lhs_elt, rhs, &list);
2090     }
2091 
2092   /* CONSTRUCTOR is defined such that any member not mentioned is assigned
2093      a zero value.  Initialize the rest of the instantiated elements.  */
2094   generate_element_zero (lhs_elt, &list);
2095 
2096   if (!result)
2097     {
2098       /* If we failed to convert the entire initializer, then we must
2099 	 leave the structure assignment in place and must load values
2100 	 from the structure into the slots for which we did not find
2101 	 constants.  The easiest way to do this is to generate a complete
2102 	 copy-out, and then follow that with the constant assignments
2103 	 that we were able to build.  DCE will clean things up.  */
2104       tree list0 = NULL;
2105       generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
2106 			   &list0);
2107       append_to_statement_list (list, &list0);
2108       list = list0;
2109     }
2110 
2111   if (lhs_elt->use_block_copy || !result)
2112     {
2113       /* Since LHS is not fully instantiated, we must leave the structure
2114 	 assignment in place.  Treating this case differently from a USE
2115 	 exposes constants to later optimizations.  */
2116       if (list)
2117 	{
2118 	  mark_all_v_defs (list);
2119 	  sra_insert_after (bsi, list);
2120 	}
2121     }
2122   else
2123     {
2124       /* The LHS is fully instantiated.  The list of initializations
2125 	 replaces the original structure assignment.  */
2126       gcc_assert (list);
2127       mark_all_v_defs (bsi_stmt (*bsi));
2128       mark_all_v_defs (list);
2129       sra_replace (bsi, list);
2130     }
2131 }
2132 
2133 /* A subroutine of scalarize_ldst called via walk_tree.  Set TREE_NO_TRAP
2134    on all INDIRECT_REFs.  */
2135 
2136 static tree
mark_notrap(tree * tp,int * walk_subtrees,void * data ATTRIBUTE_UNUSED)2137 mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
2138 {
2139   tree t = *tp;
2140 
2141   if (TREE_CODE (t) == INDIRECT_REF)
2142     {
2143       TREE_THIS_NOTRAP (t) = 1;
2144       *walk_subtrees = 0;
2145     }
2146   else if (IS_TYPE_OR_DECL_P (t))
2147     *walk_subtrees = 0;
2148 
2149   return NULL;
2150 }
2151 
2152 /* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
2153    reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
2154    if ELT is on the left-hand side.  */
2155 
2156 static void
scalarize_ldst(struct sra_elt * elt,tree other,block_stmt_iterator * bsi,bool is_output)2157 scalarize_ldst (struct sra_elt *elt, tree other,
2158 		block_stmt_iterator *bsi, bool is_output)
2159 {
2160   /* Shouldn't have gotten called for a scalar.  */
2161   gcc_assert (!elt->replacement);
2162 
2163   if (elt->use_block_copy)
2164     {
2165       /* Since ELT is not fully instantiated, we have to leave the
2166 	 block copy in place.  Treat this as a USE.  */
2167       scalarize_use (elt, NULL, bsi, is_output, false);
2168     }
2169   else
2170     {
2171       /* The interesting case is when ELT is fully instantiated.  In this
2172 	 case we can have each element stored/loaded directly to/from the
2173 	 corresponding slot in OTHER.  This avoids a block copy.  */
2174 
2175       tree list = NULL, stmt = bsi_stmt (*bsi);
2176 
2177       mark_all_v_defs (stmt);
2178       generate_copy_inout (elt, is_output, other, &list);
2179       mark_all_v_defs (list);
2180       gcc_assert (list);
2181 
2182       /* Preserve EH semantics.  */
2183       if (stmt_ends_bb_p (stmt))
2184 	{
2185 	  tree_stmt_iterator tsi;
2186 	  tree first;
2187 
2188 	  /* Extract the first statement from LIST.  */
2189 	  tsi = tsi_start (list);
2190 	  first = tsi_stmt (tsi);
2191 	  tsi_delink (&tsi);
2192 
2193 	  /* Replace the old statement with this new representative.  */
2194 	  bsi_replace (bsi, first, true);
2195 
2196 	  if (!tsi_end_p (tsi))
2197 	    {
2198 	      /* If any reference would trap, then they all would.  And more
2199 		 to the point, the first would.  Therefore none of the rest
2200 		 will trap since the first didn't.  Indicate this by
2201 		 iterating over the remaining statements and set
2202 		 TREE_THIS_NOTRAP in all INDIRECT_REFs.  */
2203 	      do
2204 		{
2205 		  walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
2206 		  tsi_next (&tsi);
2207 		}
2208 	      while (!tsi_end_p (tsi));
2209 
2210 	      insert_edge_copies (list, bsi->bb);
2211 	    }
2212 	}
2213       else
2214 	sra_replace (bsi, list);
2215     }
2216 }
2217 
2218 /* Generate initializations for all scalarizable parameters.  */
2219 
2220 static void
scalarize_parms(void)2221 scalarize_parms (void)
2222 {
2223   tree list = NULL;
2224   unsigned i;
2225   bitmap_iterator bi;
2226 
2227   EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
2228     {
2229       tree var = referenced_var (i);
2230       struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
2231       generate_copy_inout (elt, true, var, &list);
2232     }
2233 
2234   if (list)
2235     {
2236       insert_edge_copies (list, ENTRY_BLOCK_PTR);
2237       mark_all_v_defs (list);
2238     }
2239 }
2240 
2241 /* Entry point to phase 4.  Update the function to match replacements.  */
2242 
2243 static void
scalarize_function(void)2244 scalarize_function (void)
2245 {
2246   static const struct sra_walk_fns fns = {
2247     scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
2248   };
2249 
2250   sra_walk_function (&fns);
2251   scalarize_parms ();
2252   bsi_commit_edge_inserts ();
2253 }
2254 
2255 
2256 /* Debug helper function.  Print ELT in a nice human-readable format.  */
2257 
2258 static void
dump_sra_elt_name(FILE * f,struct sra_elt * elt)2259 dump_sra_elt_name (FILE *f, struct sra_elt *elt)
2260 {
2261   if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
2262     {
2263       fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
2264       dump_sra_elt_name (f, elt->parent);
2265     }
2266   else
2267     {
2268       if (elt->parent)
2269         dump_sra_elt_name (f, elt->parent);
2270       if (DECL_P (elt->element))
2271 	{
2272 	  if (TREE_CODE (elt->element) == FIELD_DECL)
2273 	    fputc ('.', f);
2274 	  print_generic_expr (f, elt->element, dump_flags);
2275 	}
2276       else if (TREE_CODE (elt->element) == RANGE_EXPR)
2277 	fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]",
2278 		 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)),
2279 		 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1)));
2280       else
2281 	fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
2282 		 TREE_INT_CST_LOW (elt->element));
2283     }
2284 }
2285 
2286 /* Likewise, but callable from the debugger.  */
2287 
2288 void
debug_sra_elt_name(struct sra_elt * elt)2289 debug_sra_elt_name (struct sra_elt *elt)
2290 {
2291   dump_sra_elt_name (stderr, elt);
2292   fputc ('\n', stderr);
2293 }
2294 
2295 void
sra_init_cache(void)2296 sra_init_cache (void)
2297 {
2298   if (sra_type_decomp_cache)
2299     return;
2300 
2301   sra_type_decomp_cache = BITMAP_ALLOC (NULL);
2302   sra_type_inst_cache = BITMAP_ALLOC (NULL);
2303 }
2304 
2305 /* Main entry point.  */
2306 
2307 static unsigned int
tree_sra(void)2308 tree_sra (void)
2309 {
2310   /* Initialize local variables.  */
2311   todoflags = 0;
2312   gcc_obstack_init (&sra_obstack);
2313   sra_candidates = BITMAP_ALLOC (NULL);
2314   needs_copy_in = BITMAP_ALLOC (NULL);
2315   sra_init_cache ();
2316   sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
2317 
2318   /* Scan.  If we find anything, instantiate and scalarize.  */
2319   if (find_candidates_for_sra ())
2320     {
2321       scan_function ();
2322       decide_instantiations ();
2323       scalarize_function ();
2324     }
2325 
2326   /* Free allocated memory.  */
2327   htab_delete (sra_map);
2328   sra_map = NULL;
2329   BITMAP_FREE (sra_candidates);
2330   BITMAP_FREE (needs_copy_in);
2331   BITMAP_FREE (sra_type_decomp_cache);
2332   BITMAP_FREE (sra_type_inst_cache);
2333   obstack_free (&sra_obstack, NULL);
2334   return todoflags;
2335 }
2336 
2337 static bool
gate_sra(void)2338 gate_sra (void)
2339 {
2340   return flag_tree_sra != 0;
2341 }
2342 
2343 struct tree_opt_pass pass_sra =
2344 {
2345   "sra",				/* name */
2346   gate_sra,				/* gate */
2347   tree_sra,				/* execute */
2348   NULL,					/* sub */
2349   NULL,					/* next */
2350   0,					/* static_pass_number */
2351   TV_TREE_SRA,				/* tv_id */
2352   PROP_cfg | PROP_ssa | PROP_alias,	/* properties_required */
2353   0,					/* properties_provided */
2354   PROP_smt_usage,		        /* properties_destroyed */
2355   0,					/* todo_flags_start */
2356   TODO_dump_func /* todo_flags_finish */
2357   | TODO_update_ssa
2358   | TODO_ggc_collect | TODO_verify_ssa,
2359   0					/* letter */
2360 };
2361