1 /* Scalar Replacement of Aggregates (SRA) converts some structure
2    references into scalar references, exposing them to the scalar
3    optimizers.
4    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5    Contributed by Diego Novillo <dnovillo@redhat.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING.  If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA.  */
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 
31 /* These RTL headers are needed for basic-block.h.  */
32 #include "rtl.h"
33 #include "tm_p.h"
34 #include "hard-reg-set.h"
35 #include "basic-block.h"
36 #include "diagnostic.h"
37 #include "langhooks.h"
38 #include "tree-inline.h"
39 #include "tree-flow.h"
40 #include "tree-gimple.h"
41 #include "tree-dump.h"
42 #include "tree-pass.h"
43 #include "timevar.h"
44 #include "flags.h"
45 #include "bitmap.h"
46 #include "obstack.h"
47 #include "target.h"
48 /* expr.h is needed for MOVE_RATIO.  */
49 #include "expr.h"
50 #include "params.h"
51 
52 
53 /* This object of this pass is to replace a non-addressable aggregate with a
54    set of independent variables.  Most of the time, all of these variables
55    will be scalars.  But a secondary objective is to break up larger
56    aggregates into smaller aggregates.  In the process we may find that some
57    bits of the larger aggregate can be deleted as unreferenced.
58 
59    This substitution is done globally.  More localized substitutions would
60    be the purvey of a load-store motion pass.
61 
62    The optimization proceeds in phases:
63 
64      (1) Identify variables that have types that are candidates for
65 	 decomposition.
66 
67      (2) Scan the function looking for the ways these variables are used.
68 	 In particular we're interested in the number of times a variable
69 	 (or member) is needed as a complete unit, and the number of times
70 	 a variable (or member) is copied.
71 
72      (3) Based on the usage profile, instantiate substitution variables.
73 
74      (4) Scan the function making replacements.
75 */
76 
77 
78 /* The set of aggregate variables that are candidates for scalarization.  */
79 static bitmap sra_candidates;
80 
81 /* Set of scalarizable PARM_DECLs that need copy-in operations at the
82    beginning of the function.  */
83 static bitmap needs_copy_in;
84 
85 /* Sets of bit pairs that cache type decomposition and instantiation.  */
86 static bitmap sra_type_decomp_cache;
87 static bitmap sra_type_inst_cache;
88 
89 /* One of these structures is created for each candidate aggregate
90    and each (accessed) member of such an aggregate.  */
91 struct sra_elt
92 {
93   /* A tree of the elements.  Used when we want to traverse everything.  */
94   struct sra_elt *parent;
95   struct sra_elt *children;
96   struct sra_elt *sibling;
97 
98   /* If this element is a root, then this is the VAR_DECL.  If this is
99      a sub-element, this is some token used to identify the reference.
100      In the case of COMPONENT_REF, this is the FIELD_DECL.  In the case
101      of an ARRAY_REF, this is the (constant) index.  In the case of a
102      complex number, this is a zero or one.  */
103   tree element;
104 
105   /* The type of the element.  */
106   tree type;
107 
108   /* A VAR_DECL, for any sub-element we've decided to replace.  */
109   tree replacement;
110 
111   /* The number of times the element is referenced as a whole.  I.e.
112      given "a.b.c", this would be incremented for C, but not for A or B.  */
113   unsigned int n_uses;
114 
115   /* The number of times the element is copied to or from another
116      scalarizable element.  */
117   unsigned int n_copies;
118 
119   /* True if TYPE is scalar.  */
120   bool is_scalar;
121 
122   /* True if we saw something about this element that prevents scalarization,
123      such as non-constant indexing.  */
124   bool cannot_scalarize;
125 
126   /* True if we've decided that structure-to-structure assignment
127      should happen via memcpy and not per-element.  */
128   bool use_block_copy;
129 
130   /* True if everything under this element has been marked TREE_NO_WARNING.  */
131   bool all_no_warning;
132 
133   /* A flag for use with/after random access traversals.  */
134   bool visited;
135 };
136 
137 /* Random access to the child of a parent is performed by hashing.
138    This prevents quadratic behavior, and allows SRA to function
139    reasonably on larger records.  */
140 static htab_t sra_map;
141 
142 /* All structures are allocated out of the following obstack.  */
143 static struct obstack sra_obstack;
144 
145 /* Debugging functions.  */
146 static void dump_sra_elt_name (FILE *, struct sra_elt *);
147 extern void debug_sra_elt_name (struct sra_elt *);
148 
149 /* Forward declarations.  */
150 static tree generate_element_ref (struct sra_elt *);
151 
152 /* Return true if DECL is an SRA candidate.  */
153 
154 static bool
is_sra_candidate_decl(tree decl)155 is_sra_candidate_decl (tree decl)
156 {
157   return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
158 }
159 
160 /* Return true if TYPE is a scalar type.  */
161 
162 static bool
is_sra_scalar_type(tree type)163 is_sra_scalar_type (tree type)
164 {
165   enum tree_code code = TREE_CODE (type);
166   return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
167 	  || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
168 	  || code == CHAR_TYPE || code == POINTER_TYPE || code == OFFSET_TYPE
169 	  || code == REFERENCE_TYPE);
170 }
171 
172 /* Return true if TYPE can be decomposed into a set of independent variables.
173 
174    Note that this doesn't imply that all elements of TYPE can be
175    instantiated, just that if we decide to break up the type into
176    separate pieces that it can be done.  */
177 
178 bool
sra_type_can_be_decomposed_p(tree type)179 sra_type_can_be_decomposed_p (tree type)
180 {
181   unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
182   tree t;
183 
184   /* Avoid searching the same type twice.  */
185   if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
186     return true;
187   if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
188     return false;
189 
190   /* The type must have a definite nonzero size.  */
191   if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
192       || integer_zerop (TYPE_SIZE (type)))
193     goto fail;
194 
195   /* The type must be a non-union aggregate.  */
196   switch (TREE_CODE (type))
197     {
198     case RECORD_TYPE:
199       {
200 	bool saw_one_field = false;
201 
202 	for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
203 	  if (TREE_CODE (t) == FIELD_DECL)
204 	    {
205 	      /* Reject incorrectly represented bit fields.  */
206 	      if (DECL_BIT_FIELD (t)
207 		  && (tree_low_cst (DECL_SIZE (t), 1)
208 		      != TYPE_PRECISION (TREE_TYPE (t))))
209 		goto fail;
210 
211 	      saw_one_field = true;
212 	    }
213 
214 	/* Record types must have at least one field.  */
215 	if (!saw_one_field)
216 	  goto fail;
217       }
218       break;
219 
220     case ARRAY_TYPE:
221       /* Array types must have a fixed lower and upper bound.  */
222       t = TYPE_DOMAIN (type);
223       if (t == NULL)
224 	goto fail;
225       if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
226 	goto fail;
227       if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
228 	goto fail;
229       break;
230 
231     case COMPLEX_TYPE:
232       break;
233 
234     default:
235       goto fail;
236     }
237 
238   bitmap_set_bit (sra_type_decomp_cache, cache+0);
239   return true;
240 
241  fail:
242   bitmap_set_bit (sra_type_decomp_cache, cache+1);
243   return false;
244 }
245 
246 /* Return true if DECL can be decomposed into a set of independent
247    (though not necessarily scalar) variables.  */
248 
249 static bool
decl_can_be_decomposed_p(tree var)250 decl_can_be_decomposed_p (tree var)
251 {
252   /* Early out for scalars.  */
253   if (is_sra_scalar_type (TREE_TYPE (var)))
254     return false;
255 
256   /* The variable must not be aliased.  */
257   if (!is_gimple_non_addressable (var))
258     {
259       if (dump_file && (dump_flags & TDF_DETAILS))
260 	{
261 	  fprintf (dump_file, "Cannot scalarize variable ");
262 	  print_generic_expr (dump_file, var, dump_flags);
263 	  fprintf (dump_file, " because it must live in memory\n");
264 	}
265       return false;
266     }
267 
268   /* The variable must not be volatile.  */
269   if (TREE_THIS_VOLATILE (var))
270     {
271       if (dump_file && (dump_flags & TDF_DETAILS))
272 	{
273 	  fprintf (dump_file, "Cannot scalarize variable ");
274 	  print_generic_expr (dump_file, var, dump_flags);
275 	  fprintf (dump_file, " because it is declared volatile\n");
276 	}
277       return false;
278     }
279 
280   /* We must be able to decompose the variable's type.  */
281   if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
282     {
283       if (dump_file && (dump_flags & TDF_DETAILS))
284 	{
285 	  fprintf (dump_file, "Cannot scalarize variable ");
286 	  print_generic_expr (dump_file, var, dump_flags);
287 	  fprintf (dump_file, " because its type cannot be decomposed\n");
288 	}
289       return false;
290     }
291 
292   return true;
293 }
294 
295 /* Return true if TYPE can be *completely* decomposed into scalars.  */
296 
297 static bool
type_can_instantiate_all_elements(tree type)298 type_can_instantiate_all_elements (tree type)
299 {
300   if (is_sra_scalar_type (type))
301     return true;
302   if (!sra_type_can_be_decomposed_p (type))
303     return false;
304 
305   switch (TREE_CODE (type))
306     {
307     case RECORD_TYPE:
308       {
309 	unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
310 	tree f;
311 
312 	if (bitmap_bit_p (sra_type_inst_cache, cache+0))
313 	  return true;
314 	if (bitmap_bit_p (sra_type_inst_cache, cache+1))
315 	  return false;
316 
317 	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
318 	  if (TREE_CODE (f) == FIELD_DECL)
319 	    {
320 	      if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
321 		{
322 		  bitmap_set_bit (sra_type_inst_cache, cache+1);
323 		  return false;
324 		}
325 	    }
326 
327 	bitmap_set_bit (sra_type_inst_cache, cache+0);
328 	return true;
329       }
330 
331     case ARRAY_TYPE:
332       return type_can_instantiate_all_elements (TREE_TYPE (type));
333 
334     case COMPLEX_TYPE:
335       return true;
336 
337     default:
338       gcc_unreachable ();
339     }
340 }
341 
342 /* Test whether ELT or some sub-element cannot be scalarized.  */
343 
344 static bool
can_completely_scalarize_p(struct sra_elt * elt)345 can_completely_scalarize_p (struct sra_elt *elt)
346 {
347   struct sra_elt *c;
348 
349   if (elt->cannot_scalarize)
350     return false;
351 
352   for (c = elt->children; c ; c = c->sibling)
353     if (!can_completely_scalarize_p (c))
354       return false;
355 
356   return true;
357 }
358 
359 
360 /* A simplified tree hashing algorithm that only handles the types of
361    trees we expect to find in sra_elt->element.  */
362 
363 static hashval_t
sra_hash_tree(tree t)364 sra_hash_tree (tree t)
365 {
366   hashval_t h;
367 
368   switch (TREE_CODE (t))
369     {
370     case VAR_DECL:
371     case PARM_DECL:
372     case RESULT_DECL:
373       h = DECL_UID (t);
374       break;
375 
376     case INTEGER_CST:
377       h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
378       break;
379 
380     case FIELD_DECL:
381       /* We can have types that are compatible, but have different member
382 	 lists, so we can't hash fields by ID.  Use offsets instead.  */
383       h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
384       h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
385       break;
386 
387     default:
388       gcc_unreachable ();
389     }
390 
391   return h;
392 }
393 
394 /* Hash function for type SRA_PAIR.  */
395 
396 static hashval_t
sra_elt_hash(const void * x)397 sra_elt_hash (const void *x)
398 {
399   const struct sra_elt *e = x;
400   const struct sra_elt *p;
401   hashval_t h;
402 
403   h = sra_hash_tree (e->element);
404 
405   /* Take into account everything back up the chain.  Given that chain
406      lengths are rarely very long, this should be acceptable.  If we
407      truly identify this as a performance problem, it should work to
408      hash the pointer value "e->parent".  */
409   for (p = e->parent; p ; p = p->parent)
410     h = (h * 65521) ^ sra_hash_tree (p->element);
411 
412   return h;
413 }
414 
415 /* Equality function for type SRA_PAIR.  */
416 
417 static int
sra_elt_eq(const void * x,const void * y)418 sra_elt_eq (const void *x, const void *y)
419 {
420   const struct sra_elt *a = x;
421   const struct sra_elt *b = y;
422   tree ae, be;
423 
424   if (a->parent != b->parent)
425     return false;
426 
427   ae = a->element;
428   be = b->element;
429 
430   if (ae == be)
431     return true;
432   if (TREE_CODE (ae) != TREE_CODE (be))
433     return false;
434 
435   switch (TREE_CODE (ae))
436     {
437     case VAR_DECL:
438     case PARM_DECL:
439     case RESULT_DECL:
440       /* These are all pointer unique.  */
441       return false;
442 
443     case INTEGER_CST:
444       /* Integers are not pointer unique, so compare their values.  */
445       return tree_int_cst_equal (ae, be);
446 
447     case FIELD_DECL:
448       /* Fields are unique within a record, but not between
449 	 compatible records.  */
450       if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
451 	return false;
452       return fields_compatible_p (ae, be);
453 
454     default:
455       gcc_unreachable ();
456     }
457 }
458 
459 /* Create or return the SRA_ELT structure for CHILD in PARENT.  PARENT
460    may be null, in which case CHILD must be a DECL.  */
461 
462 static struct sra_elt *
lookup_element(struct sra_elt * parent,tree child,tree type,enum insert_option insert)463 lookup_element (struct sra_elt *parent, tree child, tree type,
464 		enum insert_option insert)
465 {
466   struct sra_elt dummy;
467   struct sra_elt **slot;
468   struct sra_elt *elt;
469 
470   dummy.parent = parent;
471   dummy.element = child;
472 
473   slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
474   if (!slot && insert == NO_INSERT)
475     return NULL;
476 
477   elt = *slot;
478   if (!elt && insert == INSERT)
479     {
480       *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
481       memset (elt, 0, sizeof (*elt));
482 
483       elt->parent = parent;
484       elt->element = child;
485       elt->type = type;
486       elt->is_scalar = is_sra_scalar_type (type);
487 
488       if (parent)
489 	{
490 	  elt->sibling = parent->children;
491 	  parent->children = elt;
492 	}
493 
494       /* If this is a parameter, then if we want to scalarize, we have
495 	 one copy from the true function parameter.  Count it now.  */
496       if (TREE_CODE (child) == PARM_DECL)
497 	{
498 	  elt->n_copies = 1;
499 	  bitmap_set_bit (needs_copy_in, DECL_UID (child));
500 	}
501     }
502 
503   return elt;
504 }
505 
506 /* Return true if the ARRAY_REF in EXPR is a constant, in bounds access.  */
507 
508 static bool
is_valid_const_index(tree expr)509 is_valid_const_index (tree expr)
510 {
511   tree dom, t, index = TREE_OPERAND (expr, 1);
512 
513   if (TREE_CODE (index) != INTEGER_CST)
514     return false;
515 
516   /* Watch out for stupid user tricks, indexing outside the array.
517 
518      Careful, we're not called only on scalarizable types, so do not
519      assume constant array bounds.  We needn't do anything with such
520      cases, since they'll be referring to objects that we should have
521      already rejected for scalarization, so returning false is fine.  */
522 
523   dom = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (expr, 0)));
524   if (dom == NULL)
525     return false;
526 
527   t = TYPE_MIN_VALUE (dom);
528   if (!t || TREE_CODE (t) != INTEGER_CST)
529     return false;
530   if (tree_int_cst_lt (index, t))
531     return false;
532 
533   t = TYPE_MAX_VALUE (dom);
534   if (!t || TREE_CODE (t) != INTEGER_CST)
535     return false;
536   if (tree_int_cst_lt (t, index))
537     return false;
538 
539   return true;
540 }
541 
542 /* Create or return the SRA_ELT structure for EXPR if the expression
543    refers to a scalarizable variable.  */
544 
545 static struct sra_elt *
maybe_lookup_element_for_expr(tree expr)546 maybe_lookup_element_for_expr (tree expr)
547 {
548   struct sra_elt *elt;
549   tree child;
550 
551   switch (TREE_CODE (expr))
552     {
553     case VAR_DECL:
554     case PARM_DECL:
555     case RESULT_DECL:
556       if (is_sra_candidate_decl (expr))
557 	return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
558       return NULL;
559 
560     case ARRAY_REF:
561       /* We can't scalarize variable array indicies.  */
562       if (is_valid_const_index (expr))
563         child = TREE_OPERAND (expr, 1);
564       else
565 	return NULL;
566       break;
567 
568     case COMPONENT_REF:
569       /* Don't look through unions.  */
570       if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
571 	return NULL;
572       child = TREE_OPERAND (expr, 1);
573       break;
574 
575     case REALPART_EXPR:
576       child = integer_zero_node;
577       break;
578     case IMAGPART_EXPR:
579       child = integer_one_node;
580       break;
581 
582     default:
583       return NULL;
584     }
585 
586   elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
587   if (elt)
588     return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
589   return NULL;
590 }
591 
592 
593 /* Functions to walk just enough of the tree to see all scalarizable
594    references, and categorize them.  */
595 
596 /* A set of callbacks for phases 2 and 4.  They'll be invoked for the
597    various kinds of references seen.  In all cases, *BSI is an iterator
598    pointing to the statement being processed.  */
599 struct sra_walk_fns
600 {
601   /* Invoked when ELT is required as a unit.  Note that ELT might refer to
602      a leaf node, in which case this is a simple scalar reference.  *EXPR_P
603      points to the location of the expression.  IS_OUTPUT is true if this
604      is a left-hand-side reference.  USE_ALL is true if we saw something we
605      couldn't quite identify and had to force the use of the entire object.  */
606   void (*use) (struct sra_elt *elt, tree *expr_p,
607 	       block_stmt_iterator *bsi, bool is_output, bool use_all);
608 
609   /* Invoked when we have a copy between two scalarizable references.  */
610   void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
611 		block_stmt_iterator *bsi);
612 
613   /* Invoked when ELT is initialized from a constant.  VALUE may be NULL,
614      in which case it should be treated as an empty CONSTRUCTOR.  */
615   void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
616 
617   /* Invoked when we have a copy between one scalarizable reference ELT
618      and one non-scalarizable reference OTHER.  IS_OUTPUT is true if ELT
619      is on the left-hand side.  */
620   void (*ldst) (struct sra_elt *elt, tree other,
621 		block_stmt_iterator *bsi, bool is_output);
622 
623   /* True during phase 2, false during phase 4.  */
624   /* ??? This is a hack.  */
625   bool initial_scan;
626 };
627 
628 #ifdef ENABLE_CHECKING
629 /* Invoked via walk_tree, if *TP contains a candidate decl, return it.  */
630 
631 static tree
sra_find_candidate_decl(tree * tp,int * walk_subtrees,void * data ATTRIBUTE_UNUSED)632 sra_find_candidate_decl (tree *tp, int *walk_subtrees,
633 			 void *data ATTRIBUTE_UNUSED)
634 {
635   tree t = *tp;
636   enum tree_code code = TREE_CODE (t);
637 
638   if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
639     {
640       *walk_subtrees = 0;
641       if (is_sra_candidate_decl (t))
642 	return t;
643     }
644   else if (TYPE_P (t))
645     *walk_subtrees = 0;
646 
647   return NULL;
648 }
649 #endif
650 
651 /* Walk most expressions looking for a scalarizable aggregate.
652    If we find one, invoke FNS->USE.  */
653 
654 static void
sra_walk_expr(tree * expr_p,block_stmt_iterator * bsi,bool is_output,const struct sra_walk_fns * fns)655 sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
656 	       const struct sra_walk_fns *fns)
657 {
658   tree expr = *expr_p;
659   tree inner = expr;
660   bool disable_scalarization = false;
661   bool use_all_p = false;
662 
663   /* We're looking to collect a reference expression between EXPR and INNER,
664      such that INNER is a scalarizable decl and all other nodes through EXPR
665      are references that we can scalarize.  If we come across something that
666      we can't scalarize, we reset EXPR.  This has the effect of making it
667      appear that we're referring to the larger expression as a whole.  */
668 
669   while (1)
670     switch (TREE_CODE (inner))
671       {
672       case VAR_DECL:
673       case PARM_DECL:
674       case RESULT_DECL:
675 	/* If there is a scalarizable decl at the bottom, then process it.  */
676 	if (is_sra_candidate_decl (inner))
677 	  {
678 	    struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
679 	    if (disable_scalarization)
680 	      elt->cannot_scalarize = true;
681 	    else
682 	      fns->use (elt, expr_p, bsi, is_output, use_all_p);
683 	  }
684 	return;
685 
686       case ARRAY_REF:
687 	/* Non-constant index means any member may be accessed.  Prevent the
688 	   expression from being scalarized.  If we were to treat this as a
689 	   reference to the whole array, we can wind up with a single dynamic
690 	   index reference inside a loop being overridden by several constant
691 	   index references during loop setup.  It's possible that this could
692 	   be avoided by using dynamic usage counts based on BB trip counts
693 	   (based on loop analysis or profiling), but that hardly seems worth
694 	   the effort.  */
695 	/* ??? Hack.  Figure out how to push this into the scan routines
696 	   without duplicating too much code.  */
697 	if (!is_valid_const_index (inner))
698 	  {
699 	    disable_scalarization = true;
700 	    goto use_all;
701 	  }
702 	/* ??? Are we assured that non-constant bounds and stride will have
703 	   the same value everywhere?  I don't think Fortran will...  */
704 	if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
705 	  goto use_all;
706 	inner = TREE_OPERAND (inner, 0);
707 	break;
708 
709       case COMPONENT_REF:
710 	/* A reference to a union member constitutes a reference to the
711 	   entire union.  */
712 	if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
713 	  goto use_all;
714 	/* ??? See above re non-constant stride.  */
715 	if (TREE_OPERAND (inner, 2))
716 	  goto use_all;
717 	inner = TREE_OPERAND (inner, 0);
718 	break;
719 
720       case REALPART_EXPR:
721       case IMAGPART_EXPR:
722 	inner = TREE_OPERAND (inner, 0);
723 	break;
724 
725       case BIT_FIELD_REF:
726 	/* A bit field reference (access to *multiple* fields simultaneously)
727 	   is not currently scalarized.  Consider this an access to the
728 	   complete outer element, to which walk_tree will bring us next.  */
729 	goto use_all;
730 
731       case ARRAY_RANGE_REF:
732 	/* Similarly, a subrange reference is used to modify indexing.  Which
733 	   means that the canonical element names that we have won't work.  */
734 	goto use_all;
735 
736       case VIEW_CONVERT_EXPR:
737       case NOP_EXPR:
738 	/* Similarly, a view/nop explicitly wants to look at an object in a
739 	   type other than the one we've scalarized.  */
740 	goto use_all;
741 
742       case WITH_SIZE_EXPR:
743 	/* This is a transparent wrapper.  The entire inner expression really
744 	   is being used.  */
745 	goto use_all;
746 
747       use_all:
748         expr_p = &TREE_OPERAND (inner, 0);
749 	inner = expr = *expr_p;
750 	use_all_p = true;
751 	break;
752 
753       default:
754 #ifdef ENABLE_CHECKING
755 	/* Validate that we're not missing any references.  */
756 	gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
757 #endif
758 	return;
759       }
760 }
761 
762 /* Walk a TREE_LIST of values looking for scalarizable aggregates.
763    If we find one, invoke FNS->USE.  */
764 
765 static void
sra_walk_tree_list(tree list,block_stmt_iterator * bsi,bool is_output,const struct sra_walk_fns * fns)766 sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
767 		    const struct sra_walk_fns *fns)
768 {
769   tree op;
770   for (op = list; op ; op = TREE_CHAIN (op))
771     sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
772 }
773 
774 /* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
775    If we find one, invoke FNS->USE.  */
776 
777 static void
sra_walk_call_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)778 sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
779 		    const struct sra_walk_fns *fns)
780 {
781   sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns);
782 }
783 
784 /* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
785    aggregates.  If we find one, invoke FNS->USE.  */
786 
787 static void
sra_walk_asm_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)788 sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
789 		   const struct sra_walk_fns *fns)
790 {
791   sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
792   sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
793 }
794 
795 /* Walk a MODIFY_EXPR and categorize the assignment appropriately.  */
796 
797 static void
sra_walk_modify_expr(tree expr,block_stmt_iterator * bsi,const struct sra_walk_fns * fns)798 sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi,
799 		      const struct sra_walk_fns *fns)
800 {
801   struct sra_elt *lhs_elt, *rhs_elt;
802   tree lhs, rhs;
803 
804   lhs = TREE_OPERAND (expr, 0);
805   rhs = TREE_OPERAND (expr, 1);
806   lhs_elt = maybe_lookup_element_for_expr (lhs);
807   rhs_elt = maybe_lookup_element_for_expr (rhs);
808 
809   /* If both sides are scalarizable, this is a COPY operation.  */
810   if (lhs_elt && rhs_elt)
811     {
812       fns->copy (lhs_elt, rhs_elt, bsi);
813       return;
814     }
815 
816   /* If the RHS is scalarizable, handle it.  There are only two cases.  */
817   if (rhs_elt)
818     {
819       if (!rhs_elt->is_scalar)
820 	fns->ldst (rhs_elt, lhs, bsi, false);
821       else
822 	fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false, false);
823     }
824 
825   /* If it isn't scalarizable, there may be scalarizable variables within, so
826      check for a call or else walk the RHS to see if we need to do any
827      copy-in operations.  We need to do it before the LHS is scalarized so
828      that the statements get inserted in the proper place, before any
829      copy-out operations.  */
830   else
831     {
832       tree call = get_call_expr_in (rhs);
833       if (call)
834 	sra_walk_call_expr (call, bsi, fns);
835       else
836 	sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns);
837     }
838 
839   /* Likewise, handle the LHS being scalarizable.  We have cases similar
840      to those above, but also want to handle RHS being constant.  */
841   if (lhs_elt)
842     {
843       /* If this is an assignment from a constant, or constructor, then
844 	 we have access to all of the elements individually.  Invoke INIT.  */
845       if (TREE_CODE (rhs) == COMPLEX_EXPR
846 	  || TREE_CODE (rhs) == COMPLEX_CST
847 	  || TREE_CODE (rhs) == CONSTRUCTOR)
848 	fns->init (lhs_elt, rhs, bsi);
849 
850       /* If this is an assignment from read-only memory, treat this as if
851 	 we'd been passed the constructor directly.  Invoke INIT.  */
852       else if (TREE_CODE (rhs) == VAR_DECL
853 	       && TREE_STATIC (rhs)
854 	       && TREE_READONLY (rhs)
855 	       && targetm.binds_local_p (rhs))
856 	fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
857 
858       /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
859 	 The lvalue requirement prevents us from trying to directly scalarize
860 	 the result of a function call.  Which would result in trying to call
861 	 the function multiple times, and other evil things.  */
862       else if (!lhs_elt->is_scalar && is_gimple_addressable (rhs))
863 	fns->ldst (lhs_elt, rhs, bsi, true);
864 
865       /* Otherwise we're being used in some context that requires the
866 	 aggregate to be seen as a whole.  Invoke USE.  */
867       else
868 	fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true, false);
869     }
870 
871   /* Similarly to above, LHS_ELT being null only means that the LHS as a
872      whole is not a scalarizable reference.  There may be occurrences of
873      scalarizable variables within, which implies a USE.  */
874   else
875     sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns);
876 }
877 
878 /* Entry point to the walk functions.  Search the entire function,
879    invoking the callbacks in FNS on each of the references to
880    scalarizable variables.  */
881 
882 static void
sra_walk_function(const struct sra_walk_fns * fns)883 sra_walk_function (const struct sra_walk_fns *fns)
884 {
885   basic_block bb;
886   block_stmt_iterator si, ni;
887 
888   /* ??? Phase 4 could derive some benefit to walking the function in
889      dominator tree order.  */
890 
891   FOR_EACH_BB (bb)
892     for (si = bsi_start (bb); !bsi_end_p (si); si = ni)
893       {
894 	tree stmt, t;
895 	stmt_ann_t ann;
896 
897 	stmt = bsi_stmt (si);
898 	ann = stmt_ann (stmt);
899 
900 	ni = si;
901 	bsi_next (&ni);
902 
903 	/* If the statement has no virtual operands, then it doesn't
904 	   make any structure references that we care about.  */
905 	if (ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)))
906 	  continue;
907 
908 	switch (TREE_CODE (stmt))
909 	  {
910 	  case RETURN_EXPR:
911 	    /* If we have "return <retval>" then the return value is
912 	       already exposed for our pleasure.  Walk it as a USE to
913 	       force all the components back in place for the return.
914 
915 	       If we have an embedded assignment, then <retval> is of
916 	       a type that gets returned in registers in this ABI, and
917 	       we do not wish to extend their lifetimes.  Treat this
918 	       as a USE of the variable on the RHS of this assignment.  */
919 
920 	    t = TREE_OPERAND (stmt, 0);
921 	    if (TREE_CODE (t) == MODIFY_EXPR)
922 	      sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns);
923 	    else
924 	      sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
925 	    break;
926 
927 	  case MODIFY_EXPR:
928 	    sra_walk_modify_expr (stmt, &si, fns);
929 	    break;
930 	  case CALL_EXPR:
931 	    sra_walk_call_expr (stmt, &si, fns);
932 	    break;
933 	  case ASM_EXPR:
934 	    sra_walk_asm_expr (stmt, &si, fns);
935 	    break;
936 
937 	  default:
938 	    break;
939 	  }
940       }
941 }
942 
943 /* Phase One: Scan all referenced variables in the program looking for
944    structures that could be decomposed.  */
945 
946 static bool
find_candidates_for_sra(void)947 find_candidates_for_sra (void)
948 {
949   bool any_set = false;
950   tree var;
951   referenced_var_iterator rvi;
952 
953   FOR_EACH_REFERENCED_VAR (var, rvi)
954     {
955       if (decl_can_be_decomposed_p (var))
956         {
957           bitmap_set_bit (sra_candidates, DECL_UID (var));
958           any_set = true;
959         }
960     }
961 
962   return any_set;
963 }
964 
965 
966 /* Phase Two: Scan all references to scalarizable variables.  Count the
967    number of times they are used or copied respectively.  */
968 
969 /* Callbacks to fill in SRA_WALK_FNS.  Everything but USE is
970    considered a copy, because we can decompose the reference such that
971    the sub-elements needn't be contiguous.  */
972 
973 static void
scan_use(struct sra_elt * elt,tree * expr_p ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,bool is_output ATTRIBUTE_UNUSED,bool use_all ATTRIBUTE_UNUSED)974 scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
975 	  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
976 	  bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED)
977 {
978   elt->n_uses += 1;
979 }
980 
981 static void
scan_copy(struct sra_elt * lhs_elt,struct sra_elt * rhs_elt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED)982 scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
983 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
984 {
985   lhs_elt->n_copies += 1;
986   rhs_elt->n_copies += 1;
987 }
988 
989 static void
scan_init(struct sra_elt * lhs_elt,tree rhs ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED)990 scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
991 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
992 {
993   lhs_elt->n_copies += 1;
994 }
995 
996 static void
scan_ldst(struct sra_elt * elt,tree other ATTRIBUTE_UNUSED,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,bool is_output ATTRIBUTE_UNUSED)997 scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
998 	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
999 	   bool is_output ATTRIBUTE_UNUSED)
1000 {
1001   elt->n_copies += 1;
1002 }
1003 
1004 /* Dump the values we collected during the scanning phase.  */
1005 
1006 static void
scan_dump(struct sra_elt * elt)1007 scan_dump (struct sra_elt *elt)
1008 {
1009   struct sra_elt *c;
1010 
1011   dump_sra_elt_name (dump_file, elt);
1012   fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
1013 
1014   for (c = elt->children; c ; c = c->sibling)
1015     scan_dump (c);
1016 }
1017 
1018 /* Entry point to phase 2.  Scan the entire function, building up
1019    scalarization data structures, recording copies and uses.  */
1020 
1021 static void
scan_function(void)1022 scan_function (void)
1023 {
1024   static const struct sra_walk_fns fns = {
1025     scan_use, scan_copy, scan_init, scan_ldst, true
1026   };
1027   bitmap_iterator bi;
1028 
1029   sra_walk_function (&fns);
1030 
1031   if (dump_file && (dump_flags & TDF_DETAILS))
1032     {
1033       unsigned i;
1034 
1035       fputs ("\nScan results:\n", dump_file);
1036       EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1037 	{
1038 	  tree var = referenced_var (i);
1039 	  struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1040 	  if (elt)
1041 	    scan_dump (elt);
1042 	}
1043       fputc ('\n', dump_file);
1044     }
1045 }
1046 
1047 /* Phase Three: Make decisions about which variables to scalarize, if any.
1048    All elements to be scalarized have replacement variables made for them.  */
1049 
1050 /* A subroutine of build_element_name.  Recursively build the element
1051    name on the obstack.  */
1052 
1053 static void
build_element_name_1(struct sra_elt * elt)1054 build_element_name_1 (struct sra_elt *elt)
1055 {
1056   tree t;
1057   char buffer[32];
1058 
1059   if (elt->parent)
1060     {
1061       build_element_name_1 (elt->parent);
1062       obstack_1grow (&sra_obstack, '$');
1063 
1064       if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
1065 	{
1066 	  if (elt->element == integer_zero_node)
1067 	    obstack_grow (&sra_obstack, "real", 4);
1068 	  else
1069 	    obstack_grow (&sra_obstack, "imag", 4);
1070 	  return;
1071 	}
1072     }
1073 
1074   t = elt->element;
1075   if (TREE_CODE (t) == INTEGER_CST)
1076     {
1077       /* ??? Eh.  Don't bother doing double-wide printing.  */
1078       sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
1079       obstack_grow (&sra_obstack, buffer, strlen (buffer));
1080     }
1081   else
1082     {
1083       tree name = DECL_NAME (t);
1084       if (name)
1085 	obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
1086 		      IDENTIFIER_LENGTH (name));
1087       else
1088 	{
1089 	  sprintf (buffer, "D%u", DECL_UID (t));
1090 	  obstack_grow (&sra_obstack, buffer, strlen (buffer));
1091 	}
1092     }
1093 }
1094 
1095 /* Construct a pretty variable name for an element's replacement variable.
1096    The name is built on the obstack.  */
1097 
1098 static char *
build_element_name(struct sra_elt * elt)1099 build_element_name (struct sra_elt *elt)
1100 {
1101   build_element_name_1 (elt);
1102   obstack_1grow (&sra_obstack, '\0');
1103   return XOBFINISH (&sra_obstack, char *);
1104 }
1105 
1106 /* Instantiate an element as an independent variable.  */
1107 
1108 static void
instantiate_element(struct sra_elt * elt)1109 instantiate_element (struct sra_elt *elt)
1110 {
1111   struct sra_elt *base_elt;
1112   tree var, base;
1113 
1114   for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
1115     continue;
1116   base = base_elt->element;
1117 
1118   elt->replacement = var = make_rename_temp (elt->type, "SR");
1119   DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
1120   DECL_ARTIFICIAL (var) = 1;
1121 
1122   if (TREE_THIS_VOLATILE (elt->type))
1123     {
1124       TREE_THIS_VOLATILE (var) = 1;
1125       TREE_SIDE_EFFECTS (var) = 1;
1126     }
1127 
1128   if (DECL_NAME (base) && !DECL_IGNORED_P (base))
1129     {
1130       char *pretty_name = build_element_name (elt);
1131       DECL_NAME (var) = get_identifier (pretty_name);
1132       obstack_free (&sra_obstack, pretty_name);
1133 
1134       SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
1135       DECL_DEBUG_EXPR_IS_FROM (var) = 1;
1136 
1137       DECL_IGNORED_P (var) = 0;
1138       TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
1139     }
1140   else
1141     {
1142       DECL_IGNORED_P (var) = 1;
1143       /* ??? We can't generate any warning that would be meaningful.  */
1144       TREE_NO_WARNING (var) = 1;
1145     }
1146 
1147   if (dump_file)
1148     {
1149       fputs ("  ", dump_file);
1150       dump_sra_elt_name (dump_file, elt);
1151       fputs (" -> ", dump_file);
1152       print_generic_expr (dump_file, var, dump_flags);
1153       fputc ('\n', dump_file);
1154     }
1155 }
1156 
1157 /* Make one pass across an element tree deciding whether or not it's
1158    profitable to instantiate individual leaf scalars.
1159 
1160    PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
1161    fields all the way up the tree.  */
1162 
1163 static void
decide_instantiation_1(struct sra_elt * elt,unsigned int parent_uses,unsigned int parent_copies)1164 decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
1165 			unsigned int parent_copies)
1166 {
1167   if (dump_file && !elt->parent)
1168     {
1169       fputs ("Initial instantiation for ", dump_file);
1170       dump_sra_elt_name (dump_file, elt);
1171       fputc ('\n', dump_file);
1172     }
1173 
1174   if (elt->cannot_scalarize)
1175     return;
1176 
1177   if (elt->is_scalar)
1178     {
1179       /* The decision is simple: instantiate if we're used more frequently
1180 	 than the parent needs to be seen as a complete unit.  */
1181       if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
1182 	instantiate_element (elt);
1183     }
1184   else
1185     {
1186       struct sra_elt *c;
1187       unsigned int this_uses = elt->n_uses + parent_uses;
1188       unsigned int this_copies = elt->n_copies + parent_copies;
1189 
1190       for (c = elt->children; c ; c = c->sibling)
1191 	decide_instantiation_1 (c, this_uses, this_copies);
1192     }
1193 }
1194 
1195 /* Compute the size and number of all instantiated elements below ELT.
1196    We will only care about this if the size of the complete structure
1197    fits in a HOST_WIDE_INT, so we don't have to worry about overflow.  */
1198 
1199 static unsigned int
sum_instantiated_sizes(struct sra_elt * elt,unsigned HOST_WIDE_INT * sizep)1200 sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
1201 {
1202   if (elt->replacement)
1203     {
1204       *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
1205       return 1;
1206     }
1207   else
1208     {
1209       struct sra_elt *c;
1210       unsigned int count = 0;
1211 
1212       for (c = elt->children; c ; c = c->sibling)
1213 	count += sum_instantiated_sizes (c, sizep);
1214 
1215       return count;
1216     }
1217 }
1218 
1219 /* Instantiate fields in ELT->TYPE that are not currently present as
1220    children of ELT.  */
1221 
1222 static void instantiate_missing_elements (struct sra_elt *elt);
1223 
1224 static void
instantiate_missing_elements_1(struct sra_elt * elt,tree child,tree type)1225 instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
1226 {
1227   struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
1228   if (sub->is_scalar)
1229     {
1230       if (sub->replacement == NULL)
1231 	instantiate_element (sub);
1232     }
1233   else
1234     instantiate_missing_elements (sub);
1235 }
1236 
1237 static void
instantiate_missing_elements(struct sra_elt * elt)1238 instantiate_missing_elements (struct sra_elt *elt)
1239 {
1240   tree type = elt->type;
1241 
1242   switch (TREE_CODE (type))
1243     {
1244     case RECORD_TYPE:
1245       {
1246 	tree f;
1247 	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
1248 	  if (TREE_CODE (f) == FIELD_DECL)
1249 	    instantiate_missing_elements_1 (elt, f, TREE_TYPE (f));
1250 	break;
1251       }
1252 
1253     case ARRAY_TYPE:
1254       {
1255 	tree i, max, subtype;
1256 
1257 	i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
1258 	max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
1259 	subtype = TREE_TYPE (type);
1260 
1261 	while (1)
1262 	  {
1263 	    instantiate_missing_elements_1 (elt, i, subtype);
1264 	    if (tree_int_cst_equal (i, max))
1265 	      break;
1266 	    i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
1267 	  }
1268 
1269 	break;
1270       }
1271 
1272     case COMPLEX_TYPE:
1273       type = TREE_TYPE (type);
1274       instantiate_missing_elements_1 (elt, integer_zero_node, type);
1275       instantiate_missing_elements_1 (elt, integer_one_node, type);
1276       break;
1277 
1278     default:
1279       gcc_unreachable ();
1280     }
1281 }
1282 
1283 /* Make one pass across an element tree deciding whether to perform block
1284    or element copies.  If we decide on element copies, instantiate all
1285    elements.  Return true if there are any instantiated sub-elements.  */
1286 
1287 static bool
decide_block_copy(struct sra_elt * elt)1288 decide_block_copy (struct sra_elt *elt)
1289 {
1290   struct sra_elt *c;
1291   bool any_inst;
1292 
1293   /* If scalarization is disabled, respect it.  */
1294   if (elt->cannot_scalarize)
1295     {
1296       elt->use_block_copy = 1;
1297 
1298       if (dump_file)
1299 	{
1300 	  fputs ("Scalarization disabled for ", dump_file);
1301 	  dump_sra_elt_name (dump_file, elt);
1302 	  fputc ('\n', dump_file);
1303 	}
1304 
1305       /* Disable scalarization of sub-elements */
1306       for (c = elt->children; c; c = c->sibling)
1307 	{
1308 	  c->cannot_scalarize = 1;
1309 	  decide_block_copy (c);
1310 	}
1311       return false;
1312     }
1313 
1314   /* Don't decide if we've no uses.  */
1315   if (elt->n_uses == 0 && elt->n_copies == 0)
1316     ;
1317 
1318   else if (!elt->is_scalar)
1319     {
1320       tree size_tree = TYPE_SIZE_UNIT (elt->type);
1321       bool use_block_copy = true;
1322 
1323       /* Tradeoffs for COMPLEX types pretty much always make it better
1324 	 to go ahead and split the components.  */
1325       if (TREE_CODE (elt->type) == COMPLEX_TYPE)
1326 	use_block_copy = false;
1327 
1328       /* Don't bother trying to figure out the rest if the structure is
1329 	 so large we can't do easy arithmetic.  This also forces block
1330 	 copies for variable sized structures.  */
1331       else if (host_integerp (size_tree, 1))
1332 	{
1333 	  unsigned HOST_WIDE_INT full_size, inst_size = 0;
1334 	  unsigned int max_size, max_count, inst_count, full_count;
1335 
1336 	  /* If the sra-max-structure-size parameter is 0, then the
1337 	     user has not overridden the parameter and we can choose a
1338 	     sensible default.  */
1339 	  max_size = SRA_MAX_STRUCTURE_SIZE
1340 	    ? SRA_MAX_STRUCTURE_SIZE
1341 	    : MOVE_RATIO * UNITS_PER_WORD;
1342 	  max_count = SRA_MAX_STRUCTURE_COUNT
1343 	    ? SRA_MAX_STRUCTURE_COUNT
1344 	    : MOVE_RATIO;
1345 
1346 	  full_size = tree_low_cst (size_tree, 1);
1347 	  full_count = count_type_elements (elt->type, false);
1348 	  inst_count = sum_instantiated_sizes (elt, &inst_size);
1349 
1350 	  /* ??? What to do here.  If there are two fields, and we've only
1351 	     instantiated one, then instantiating the other is clearly a win.
1352 	     If there are a large number of fields then the size of the copy
1353 	     is much more of a factor.  */
1354 
1355 	  /* If the structure is small, and we've made copies, go ahead
1356 	     and instantiate, hoping that the copies will go away.  */
1357 	  if (full_size <= max_size
1358 	      && (full_count - inst_count) <= max_count
1359 	      && elt->n_copies > elt->n_uses)
1360 	    use_block_copy = false;
1361 	  else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
1362 		   && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
1363 	    use_block_copy = false;
1364 
1365 	  /* In order to avoid block copy, we have to be able to instantiate
1366 	     all elements of the type.  See if this is possible.  */
1367 	  if (!use_block_copy
1368 	      && (!can_completely_scalarize_p (elt)
1369 		  || !type_can_instantiate_all_elements (elt->type)))
1370 	    use_block_copy = true;
1371 	}
1372       elt->use_block_copy = use_block_copy;
1373 
1374       if (dump_file)
1375 	{
1376 	  fprintf (dump_file, "Using %s for ",
1377 		   use_block_copy ? "block-copy" : "element-copy");
1378 	  dump_sra_elt_name (dump_file, elt);
1379 	  fputc ('\n', dump_file);
1380 	}
1381 
1382       if (!use_block_copy)
1383 	{
1384 	  instantiate_missing_elements (elt);
1385 	  return true;
1386 	}
1387     }
1388 
1389   any_inst = elt->replacement != NULL;
1390 
1391   for (c = elt->children; c ; c = c->sibling)
1392     any_inst |= decide_block_copy (c);
1393 
1394   return any_inst;
1395 }
1396 
1397 /* Entry point to phase 3.  Instantiate scalar replacement variables.  */
1398 
1399 static void
decide_instantiations(void)1400 decide_instantiations (void)
1401 {
1402   unsigned int i;
1403   bool cleared_any;
1404   bitmap_head done_head;
1405   bitmap_iterator bi;
1406 
1407   /* We cannot clear bits from a bitmap we're iterating over,
1408      so save up all the bits to clear until the end.  */
1409   bitmap_initialize (&done_head, &bitmap_default_obstack);
1410   cleared_any = false;
1411 
1412   EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1413     {
1414       tree var = referenced_var (i);
1415       struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1416       if (elt)
1417 	{
1418 	  decide_instantiation_1 (elt, 0, 0);
1419 	  if (!decide_block_copy (elt))
1420 	    elt = NULL;
1421 	}
1422       if (!elt)
1423 	{
1424 	  bitmap_set_bit (&done_head, i);
1425 	  cleared_any = true;
1426 	}
1427     }
1428 
1429   if (cleared_any)
1430     {
1431       bitmap_and_compl_into (sra_candidates, &done_head);
1432       bitmap_and_compl_into (needs_copy_in, &done_head);
1433     }
1434   bitmap_clear (&done_head);
1435 
1436   mark_set_for_renaming (sra_candidates);
1437 
1438   if (dump_file)
1439     fputc ('\n', dump_file);
1440 }
1441 
1442 
1443 /* Phase Four: Update the function to match the replacements created.  */
1444 
1445 /* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for
1446    renaming. This becomes necessary when we modify all of a non-scalar.  */
1447 
1448 static void
mark_all_v_defs_1(tree stmt)1449 mark_all_v_defs_1 (tree stmt)
1450 {
1451   tree sym;
1452   ssa_op_iter iter;
1453 
1454   update_stmt_if_modified (stmt);
1455 
1456   FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS)
1457     {
1458       if (TREE_CODE (sym) == SSA_NAME)
1459 	sym = SSA_NAME_VAR (sym);
1460       mark_sym_for_renaming (sym);
1461     }
1462 }
1463 
1464 
1465 /* Mark all the variables in virtual operands in all the statements in
1466    LIST for renaming.  */
1467 
1468 static void
mark_all_v_defs(tree list)1469 mark_all_v_defs (tree list)
1470 {
1471   if (TREE_CODE (list) != STATEMENT_LIST)
1472     mark_all_v_defs_1 (list);
1473   else
1474     {
1475       tree_stmt_iterator i;
1476       for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i))
1477 	mark_all_v_defs_1 (tsi_stmt (i));
1478     }
1479 }
1480 
1481 /* Mark every replacement under ELT with TREE_NO_WARNING.  */
1482 
1483 static void
mark_no_warning(struct sra_elt * elt)1484 mark_no_warning (struct sra_elt *elt)
1485 {
1486   if (!elt->all_no_warning)
1487     {
1488       if (elt->replacement)
1489 	TREE_NO_WARNING (elt->replacement) = 1;
1490       else
1491 	{
1492 	  struct sra_elt *c;
1493 	  for (c = elt->children; c ; c = c->sibling)
1494 	    mark_no_warning (c);
1495 	}
1496     }
1497 }
1498 
1499 /* Build a single level component reference to ELT rooted at BASE.  */
1500 
1501 static tree
generate_one_element_ref(struct sra_elt * elt,tree base)1502 generate_one_element_ref (struct sra_elt *elt, tree base)
1503 {
1504   switch (TREE_CODE (TREE_TYPE (base)))
1505     {
1506     case RECORD_TYPE:
1507       {
1508 	tree field = elt->element;
1509 
1510 	/* Watch out for compatible records with differing field lists.  */
1511 	if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
1512 	  field = find_compatible_field (TREE_TYPE (base), field);
1513 
1514         return build (COMPONENT_REF, elt->type, base, field, NULL);
1515       }
1516 
1517     case ARRAY_TYPE:
1518       return build (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
1519 
1520     case COMPLEX_TYPE:
1521       if (elt->element == integer_zero_node)
1522 	return build (REALPART_EXPR, elt->type, base);
1523       else
1524 	return build (IMAGPART_EXPR, elt->type, base);
1525 
1526     default:
1527       gcc_unreachable ();
1528     }
1529 }
1530 
1531 /* Build a full component reference to ELT rooted at its native variable.  */
1532 
1533 static tree
generate_element_ref(struct sra_elt * elt)1534 generate_element_ref (struct sra_elt *elt)
1535 {
1536   if (elt->parent)
1537     return generate_one_element_ref (elt, generate_element_ref (elt->parent));
1538   else
1539     return elt->element;
1540 }
1541 
1542 /* Generate a set of assignment statements in *LIST_P to copy all
1543    instantiated elements under ELT to or from the equivalent structure
1544    rooted at EXPR.  COPY_OUT controls the direction of the copy, with
1545    true meaning to copy out of EXPR into ELT.  */
1546 
1547 static void
generate_copy_inout(struct sra_elt * elt,bool copy_out,tree expr,tree * list_p)1548 generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
1549 		     tree *list_p)
1550 {
1551   struct sra_elt *c;
1552   tree t;
1553 
1554   if (!copy_out && TREE_CODE (expr) == SSA_NAME
1555       && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
1556     {
1557       tree r, i;
1558 
1559       c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
1560       r = c->replacement;
1561       c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
1562       i = c->replacement;
1563 
1564       t = build (COMPLEX_EXPR, elt->type, r, i);
1565       t = build (MODIFY_EXPR, void_type_node, expr, t);
1566       SSA_NAME_DEF_STMT (expr) = t;
1567       append_to_statement_list (t, list_p);
1568     }
1569   else if (elt->replacement)
1570     {
1571       if (copy_out)
1572 	t = build (MODIFY_EXPR, void_type_node, elt->replacement, expr);
1573       else
1574 	t = build (MODIFY_EXPR, void_type_node, expr, elt->replacement);
1575       append_to_statement_list (t, list_p);
1576     }
1577   else
1578     {
1579       for (c = elt->children; c ; c = c->sibling)
1580 	{
1581 	  t = generate_one_element_ref (c, unshare_expr (expr));
1582 	  generate_copy_inout (c, copy_out, t, list_p);
1583 	}
1584     }
1585 }
1586 
1587 /* Generate a set of assignment statements in *LIST_P to copy all instantiated
1588    elements under SRC to their counterparts under DST.  There must be a 1-1
1589    correspondence of instantiated elements.  */
1590 
1591 static void
generate_element_copy(struct sra_elt * dst,struct sra_elt * src,tree * list_p)1592 generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
1593 {
1594   struct sra_elt *dc, *sc;
1595 
1596   for (dc = dst->children; dc ; dc = dc->sibling)
1597     {
1598       sc = lookup_element (src, dc->element, NULL, NO_INSERT);
1599       gcc_assert (sc);
1600       generate_element_copy (dc, sc, list_p);
1601     }
1602 
1603   if (dst->replacement)
1604     {
1605       tree t;
1606 
1607       gcc_assert (src->replacement);
1608 
1609       t = build (MODIFY_EXPR, void_type_node, dst->replacement,
1610 		 src->replacement);
1611       append_to_statement_list (t, list_p);
1612     }
1613 }
1614 
1615 /* Generate a set of assignment statements in *LIST_P to zero all instantiated
1616    elements under ELT.  In addition, do not assign to elements that have been
1617    marked VISITED but do reset the visited flag; this allows easy coordination
1618    with generate_element_init.  */
1619 
1620 static void
generate_element_zero(struct sra_elt * elt,tree * list_p)1621 generate_element_zero (struct sra_elt *elt, tree *list_p)
1622 {
1623   struct sra_elt *c;
1624 
1625   if (elt->visited)
1626     {
1627       elt->visited = false;
1628       return;
1629     }
1630 
1631   for (c = elt->children; c ; c = c->sibling)
1632     generate_element_zero (c, list_p);
1633 
1634   if (elt->replacement)
1635     {
1636       tree t;
1637 
1638       gcc_assert (elt->is_scalar);
1639       t = fold_convert (elt->type, integer_zero_node);
1640 
1641       t = build (MODIFY_EXPR, void_type_node, elt->replacement, t);
1642       append_to_statement_list (t, list_p);
1643     }
1644 }
1645 
1646 /* Generate an assignment VAR = INIT, where INIT may need gimplification.
1647    Add the result to *LIST_P.  */
1648 
1649 static void
generate_one_element_init(tree var,tree init,tree * list_p)1650 generate_one_element_init (tree var, tree init, tree *list_p)
1651 {
1652   /* The replacement can be almost arbitrarily complex.  Gimplify.  */
1653   tree stmt = build (MODIFY_EXPR, void_type_node, var, init);
1654   gimplify_and_add (stmt, list_p);
1655 }
1656 
1657 /* Generate a set of assignment statements in *LIST_P to set all instantiated
1658    elements under ELT with the contents of the initializer INIT.  In addition,
1659    mark all assigned elements VISITED; this allows easy coordination with
1660    generate_element_zero.  Return false if we found a case we couldn't
1661    handle.  */
1662 
1663 static bool
generate_element_init_1(struct sra_elt * elt,tree init,tree * list_p)1664 generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p)
1665 {
1666   bool result = true;
1667   enum tree_code init_code;
1668   struct sra_elt *sub;
1669   tree t;
1670   unsigned HOST_WIDE_INT idx;
1671   tree value, purpose;
1672 
1673   /* We can be passed DECL_INITIAL of a static variable.  It might have a
1674      conversion, which we strip off here.  */
1675   STRIP_USELESS_TYPE_CONVERSION (init);
1676   init_code = TREE_CODE (init);
1677 
1678   if (elt->is_scalar)
1679     {
1680       if (elt->replacement)
1681 	{
1682 	  generate_one_element_init (elt->replacement, init, list_p);
1683 	  elt->visited = true;
1684 	}
1685       return result;
1686     }
1687 
1688   switch (init_code)
1689     {
1690     case COMPLEX_CST:
1691     case COMPLEX_EXPR:
1692       for (sub = elt->children; sub ; sub = sub->sibling)
1693 	{
1694 	  if (sub->element == integer_zero_node)
1695 	    t = (init_code == COMPLEX_EXPR
1696 		 ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
1697 	  else
1698 	    t = (init_code == COMPLEX_EXPR
1699 		 ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
1700 	  result &= generate_element_init_1 (sub, t, list_p);
1701 	}
1702       break;
1703 
1704     case CONSTRUCTOR:
1705       FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
1706 	{
1707 	  if (TREE_CODE (purpose) == RANGE_EXPR)
1708 	    {
1709 	      tree lower = TREE_OPERAND (purpose, 0);
1710 	      tree upper = TREE_OPERAND (purpose, 1);
1711 
1712 	      while (1)
1713 		{
1714 	  	  sub = lookup_element (elt, lower, NULL, NO_INSERT);
1715 		  if (sub != NULL)
1716 		    result &= generate_element_init_1 (sub, value, list_p);
1717 		  if (tree_int_cst_equal (lower, upper))
1718 		    break;
1719 		  lower = int_const_binop (PLUS_EXPR, lower,
1720 					   integer_one_node, true);
1721 		}
1722 	    }
1723 	  else
1724 	    {
1725 	      sub = lookup_element (elt, purpose, NULL, NO_INSERT);
1726 	      if (sub != NULL)
1727 		result &= generate_element_init_1 (sub, value, list_p);
1728 	    }
1729 	}
1730       break;
1731 
1732     default:
1733       elt->visited = true;
1734       result = false;
1735     }
1736 
1737   return result;
1738 }
1739 
1740 /* A wrapper function for generate_element_init_1 that handles cleanup after
1741    gimplification.  */
1742 
1743 static bool
generate_element_init(struct sra_elt * elt,tree init,tree * list_p)1744 generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
1745 {
1746   bool ret;
1747 
1748   push_gimplify_context ();
1749   ret = generate_element_init_1 (elt, init, list_p);
1750   pop_gimplify_context (NULL);
1751 
1752   /* The replacement can expose previously unreferenced variables.  */
1753   if (ret && *list_p)
1754     {
1755       tree_stmt_iterator i;
1756 
1757       for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i))
1758 	find_new_referenced_vars (tsi_stmt_ptr (i));
1759     }
1760 
1761   return ret;
1762 }
1763 
1764 /* Insert STMT on all the outgoing edges out of BB.  Note that if BB
1765    has more than one edge, STMT will be replicated for each edge.  Also,
1766    abnormal edges will be ignored.  */
1767 
1768 void
insert_edge_copies(tree stmt,basic_block bb)1769 insert_edge_copies (tree stmt, basic_block bb)
1770 {
1771   edge e;
1772   edge_iterator ei;
1773   bool first_copy;
1774 
1775   first_copy = true;
1776   FOR_EACH_EDGE (e, ei, bb->succs)
1777     {
1778       /* We don't need to insert copies on abnormal edges.  The
1779 	 value of the scalar replacement is not guaranteed to
1780 	 be valid through an abnormal edge.  */
1781       if (!(e->flags & EDGE_ABNORMAL))
1782 	{
1783 	  if (first_copy)
1784 	    {
1785 	      bsi_insert_on_edge (e, stmt);
1786 	      first_copy = false;
1787 	    }
1788 	  else
1789 	    bsi_insert_on_edge (e, unsave_expr_now (stmt));
1790 	}
1791     }
1792 }
1793 
1794 /* Helper function to insert LIST before BSI, and set up line number info.  */
1795 
1796 void
sra_insert_before(block_stmt_iterator * bsi,tree list)1797 sra_insert_before (block_stmt_iterator *bsi, tree list)
1798 {
1799   tree stmt = bsi_stmt (*bsi);
1800 
1801   if (EXPR_HAS_LOCATION (stmt))
1802     annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1803   bsi_insert_before (bsi, list, BSI_SAME_STMT);
1804 }
1805 
1806 /* Similarly, but insert after BSI.  Handles insertion onto edges as well.  */
1807 
1808 void
sra_insert_after(block_stmt_iterator * bsi,tree list)1809 sra_insert_after (block_stmt_iterator *bsi, tree list)
1810 {
1811   tree stmt = bsi_stmt (*bsi);
1812 
1813   if (EXPR_HAS_LOCATION (stmt))
1814     annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1815 
1816   if (stmt_ends_bb_p (stmt))
1817     insert_edge_copies (list, bsi->bb);
1818   else
1819     bsi_insert_after (bsi, list, BSI_SAME_STMT);
1820 }
1821 
1822 /* Similarly, but replace the statement at BSI.  */
1823 
1824 static void
sra_replace(block_stmt_iterator * bsi,tree list)1825 sra_replace (block_stmt_iterator *bsi, tree list)
1826 {
1827   sra_insert_before (bsi, list);
1828   bsi_remove (bsi);
1829   if (bsi_end_p (*bsi))
1830     *bsi = bsi_last (bsi->bb);
1831   else
1832     bsi_prev (bsi);
1833 }
1834 
1835 /* Scalarize a USE.  To recap, this is either a simple reference to ELT,
1836    if elt is scalar, or some occurrence of ELT that requires a complete
1837    aggregate.  IS_OUTPUT is true if ELT is being modified.  */
1838 
1839 static void
scalarize_use(struct sra_elt * elt,tree * expr_p,block_stmt_iterator * bsi,bool is_output,bool use_all)1840 scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
1841 	       bool is_output, bool use_all)
1842 {
1843   tree list = NULL, stmt = bsi_stmt (*bsi);
1844 
1845   if (elt->replacement)
1846     {
1847       /* If we have a replacement, then updating the reference is as
1848 	 simple as modifying the existing statement in place.  */
1849       if (is_output)
1850 	mark_all_v_defs (stmt);
1851       *expr_p = elt->replacement;
1852       update_stmt (stmt);
1853     }
1854   else
1855     {
1856       /* Otherwise we need some copies.  If ELT is being read, then we want
1857 	 to store all (modified) sub-elements back into the structure before
1858 	 the reference takes place.  If ELT is being written, then we want to
1859 	 load the changed values back into our shadow variables.  */
1860       /* ??? We don't check modified for reads, we just always write all of
1861 	 the values.  We should be able to record the SSA number of the VOP
1862 	 for which the values were last read.  If that number matches the
1863 	 SSA number of the VOP in the current statement, then we needn't
1864 	 emit an assignment.  This would also eliminate double writes when
1865 	 a structure is passed as more than one argument to a function call.
1866 	 This optimization would be most effective if sra_walk_function
1867 	 processed the blocks in dominator order.  */
1868 
1869       generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
1870       if (list == NULL)
1871 	return;
1872       mark_all_v_defs (list);
1873       if (is_output)
1874 	sra_insert_after (bsi, list);
1875       else
1876 	{
1877 	  sra_insert_before (bsi, list);
1878 	  if (use_all)
1879 	    mark_no_warning (elt);
1880 	}
1881     }
1882 }
1883 
1884 /* Scalarize a COPY.  To recap, this is an assignment statement between
1885    two scalarizable references, LHS_ELT and RHS_ELT.  */
1886 
1887 static void
scalarize_copy(struct sra_elt * lhs_elt,struct sra_elt * rhs_elt,block_stmt_iterator * bsi)1888 scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
1889 		block_stmt_iterator *bsi)
1890 {
1891   tree list, stmt;
1892 
1893   if (lhs_elt->replacement && rhs_elt->replacement)
1894     {
1895       /* If we have two scalar operands, modify the existing statement.  */
1896       stmt = bsi_stmt (*bsi);
1897 
1898       /* See the commentary in sra_walk_function concerning
1899 	 RETURN_EXPR, and why we should never see one here.  */
1900       gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1901 
1902       TREE_OPERAND (stmt, 0) = lhs_elt->replacement;
1903       TREE_OPERAND (stmt, 1) = rhs_elt->replacement;
1904       update_stmt (stmt);
1905     }
1906   else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
1907     {
1908       /* If either side requires a block copy, then sync the RHS back
1909 	 to the original structure, leave the original assignment
1910 	 statement (which will perform the block copy), then load the
1911 	 LHS values out of its now-updated original structure.  */
1912       /* ??? Could perform a modified pair-wise element copy.  That
1913 	 would at least allow those elements that are instantiated in
1914 	 both structures to be optimized well.  */
1915 
1916       list = NULL;
1917       generate_copy_inout (rhs_elt, false,
1918 			   generate_element_ref (rhs_elt), &list);
1919       if (list)
1920 	{
1921 	  mark_all_v_defs (list);
1922 	  sra_insert_before (bsi, list);
1923 	}
1924 
1925       list = NULL;
1926       generate_copy_inout (lhs_elt, true,
1927 			   generate_element_ref (lhs_elt), &list);
1928       if (list)
1929 	{
1930 	  mark_all_v_defs (list);
1931 	  sra_insert_after (bsi, list);
1932 	}
1933     }
1934   else
1935     {
1936       /* Otherwise both sides must be fully instantiated.  In which
1937 	 case perform pair-wise element assignments and replace the
1938 	 original block copy statement.  */
1939 
1940       stmt = bsi_stmt (*bsi);
1941       mark_all_v_defs (stmt);
1942 
1943       list = NULL;
1944       generate_element_copy (lhs_elt, rhs_elt, &list);
1945       gcc_assert (list);
1946       mark_all_v_defs (list);
1947       sra_replace (bsi, list);
1948     }
1949 }
1950 
1951 /* Scalarize an INIT.  To recap, this is an assignment to a scalarizable
1952    reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
1953    COMPLEX_EXPR.  If RHS is NULL, it should be treated as an empty
1954    CONSTRUCTOR.  */
1955 
1956 static void
scalarize_init(struct sra_elt * lhs_elt,tree rhs,block_stmt_iterator * bsi)1957 scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
1958 {
1959   bool result = true;
1960   tree list = NULL;
1961 
1962   /* Generate initialization statements for all members extant in the RHS.  */
1963   if (rhs)
1964     {
1965       /* Unshare the expression just in case this is from a decl's initial.  */
1966       rhs = unshare_expr (rhs);
1967       result = generate_element_init (lhs_elt, rhs, &list);
1968     }
1969 
1970   /* CONSTRUCTOR is defined such that any member not mentioned is assigned
1971      a zero value.  Initialize the rest of the instantiated elements.  */
1972   generate_element_zero (lhs_elt, &list);
1973 
1974   if (!result)
1975     {
1976       /* If we failed to convert the entire initializer, then we must
1977 	 leave the structure assignment in place and must load values
1978 	 from the structure into the slots for which we did not find
1979 	 constants.  The easiest way to do this is to generate a complete
1980 	 copy-out, and then follow that with the constant assignments
1981 	 that we were able to build.  DCE will clean things up.  */
1982       tree list0 = NULL;
1983       generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
1984 			   &list0);
1985       append_to_statement_list (list, &list0);
1986       list = list0;
1987     }
1988 
1989   if (lhs_elt->use_block_copy || !result)
1990     {
1991       /* Since LHS is not fully instantiated, we must leave the structure
1992 	 assignment in place.  Treating this case differently from a USE
1993 	 exposes constants to later optimizations.  */
1994       if (list)
1995 	{
1996 	  mark_all_v_defs (list);
1997 	  sra_insert_after (bsi, list);
1998 	}
1999     }
2000   else
2001     {
2002       /* The LHS is fully instantiated.  The list of initializations
2003 	 replaces the original structure assignment.  */
2004       gcc_assert (list);
2005       mark_all_v_defs (bsi_stmt (*bsi));
2006       mark_all_v_defs (list);
2007       sra_replace (bsi, list);
2008     }
2009 }
2010 
2011 /* A subroutine of scalarize_ldst called via walk_tree.  Set TREE_NO_TRAP
2012    on all INDIRECT_REFs.  */
2013 
2014 static tree
mark_notrap(tree * tp,int * walk_subtrees,void * data ATTRIBUTE_UNUSED)2015 mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
2016 {
2017   tree t = *tp;
2018 
2019   if (TREE_CODE (t) == INDIRECT_REF)
2020     {
2021       TREE_THIS_NOTRAP (t) = 1;
2022       *walk_subtrees = 0;
2023     }
2024   else if (IS_TYPE_OR_DECL_P (t))
2025     *walk_subtrees = 0;
2026 
2027   return NULL;
2028 }
2029 
2030 /* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
2031    reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
2032    if ELT is on the left-hand side.  */
2033 
2034 static void
scalarize_ldst(struct sra_elt * elt,tree other,block_stmt_iterator * bsi,bool is_output)2035 scalarize_ldst (struct sra_elt *elt, tree other,
2036 		block_stmt_iterator *bsi, bool is_output)
2037 {
2038   /* Shouldn't have gotten called for a scalar.  */
2039   gcc_assert (!elt->replacement);
2040 
2041   if (elt->use_block_copy)
2042     {
2043       /* Since ELT is not fully instantiated, we have to leave the
2044 	 block copy in place.  Treat this as a USE.  */
2045       scalarize_use (elt, NULL, bsi, is_output, false);
2046     }
2047   else
2048     {
2049       /* The interesting case is when ELT is fully instantiated.  In this
2050 	 case we can have each element stored/loaded directly to/from the
2051 	 corresponding slot in OTHER.  This avoids a block copy.  */
2052 
2053       tree list = NULL, stmt = bsi_stmt (*bsi);
2054 
2055       mark_all_v_defs (stmt);
2056       generate_copy_inout (elt, is_output, other, &list);
2057       mark_all_v_defs (list);
2058       gcc_assert (list);
2059 
2060       /* Preserve EH semantics.  */
2061       if (stmt_ends_bb_p (stmt))
2062 	{
2063 	  tree_stmt_iterator tsi;
2064 	  tree first;
2065 
2066 	  /* Extract the first statement from LIST.  */
2067 	  tsi = tsi_start (list);
2068 	  first = tsi_stmt (tsi);
2069 	  tsi_delink (&tsi);
2070 
2071 	  /* Replace the old statement with this new representative.  */
2072 	  bsi_replace (bsi, first, true);
2073 
2074 	  if (!tsi_end_p (tsi))
2075 	    {
2076 	      /* If any reference would trap, then they all would.  And more
2077 		 to the point, the first would.  Therefore none of the rest
2078 		 will trap since the first didn't.  Indicate this by
2079 		 iterating over the remaining statements and set
2080 		 TREE_THIS_NOTRAP in all INDIRECT_REFs.  */
2081 	      do
2082 		{
2083 		  walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
2084 		  tsi_next (&tsi);
2085 		}
2086 	      while (!tsi_end_p (tsi));
2087 
2088 	      insert_edge_copies (list, bsi->bb);
2089 	    }
2090 	}
2091       else
2092 	sra_replace (bsi, list);
2093     }
2094 }
2095 
2096 /* Generate initializations for all scalarizable parameters.  */
2097 
2098 static void
scalarize_parms(void)2099 scalarize_parms (void)
2100 {
2101   tree list = NULL;
2102   unsigned i;
2103   bitmap_iterator bi;
2104 
2105   EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
2106     {
2107       tree var = referenced_var (i);
2108       struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
2109       generate_copy_inout (elt, true, var, &list);
2110     }
2111 
2112   if (list)
2113     {
2114       insert_edge_copies (list, ENTRY_BLOCK_PTR);
2115       mark_all_v_defs (list);
2116     }
2117 }
2118 
2119 /* Entry point to phase 4.  Update the function to match replacements.  */
2120 
2121 static void
scalarize_function(void)2122 scalarize_function (void)
2123 {
2124   static const struct sra_walk_fns fns = {
2125     scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
2126   };
2127 
2128   sra_walk_function (&fns);
2129   scalarize_parms ();
2130   bsi_commit_edge_inserts ();
2131 }
2132 
2133 
2134 /* Debug helper function.  Print ELT in a nice human-readable format.  */
2135 
2136 static void
dump_sra_elt_name(FILE * f,struct sra_elt * elt)2137 dump_sra_elt_name (FILE *f, struct sra_elt *elt)
2138 {
2139   if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
2140     {
2141       fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
2142       dump_sra_elt_name (f, elt->parent);
2143     }
2144   else
2145     {
2146       if (elt->parent)
2147         dump_sra_elt_name (f, elt->parent);
2148       if (DECL_P (elt->element))
2149 	{
2150 	  if (TREE_CODE (elt->element) == FIELD_DECL)
2151 	    fputc ('.', f);
2152 	  print_generic_expr (f, elt->element, dump_flags);
2153 	}
2154       else
2155 	fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
2156 		 TREE_INT_CST_LOW (elt->element));
2157     }
2158 }
2159 
2160 /* Likewise, but callable from the debugger.  */
2161 
2162 void
debug_sra_elt_name(struct sra_elt * elt)2163 debug_sra_elt_name (struct sra_elt *elt)
2164 {
2165   dump_sra_elt_name (stderr, elt);
2166   fputc ('\n', stderr);
2167 }
2168 
2169 void
sra_init_cache(void)2170 sra_init_cache (void)
2171 {
2172   if (sra_type_decomp_cache)
2173     return;
2174 
2175   sra_type_decomp_cache = BITMAP_ALLOC (NULL);
2176   sra_type_inst_cache = BITMAP_ALLOC (NULL);
2177 }
2178 
2179 /* Main entry point.  */
2180 
2181 static void
tree_sra(void)2182 tree_sra (void)
2183 {
2184   /* Initialize local variables.  */
2185   gcc_obstack_init (&sra_obstack);
2186   sra_candidates = BITMAP_ALLOC (NULL);
2187   needs_copy_in = BITMAP_ALLOC (NULL);
2188   sra_init_cache ();
2189   sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
2190 
2191   /* Scan.  If we find anything, instantiate and scalarize.  */
2192   if (find_candidates_for_sra ())
2193     {
2194       scan_function ();
2195       decide_instantiations ();
2196       scalarize_function ();
2197     }
2198 
2199   /* Free allocated memory.  */
2200   htab_delete (sra_map);
2201   sra_map = NULL;
2202   BITMAP_FREE (sra_candidates);
2203   BITMAP_FREE (needs_copy_in);
2204   BITMAP_FREE (sra_type_decomp_cache);
2205   BITMAP_FREE (sra_type_inst_cache);
2206   obstack_free (&sra_obstack, NULL);
2207 }
2208 
2209 static bool
gate_sra(void)2210 gate_sra (void)
2211 {
2212   return flag_tree_sra != 0;
2213 }
2214 
2215 struct tree_opt_pass pass_sra =
2216 {
2217   "sra",				/* name */
2218   gate_sra,				/* gate */
2219   tree_sra,				/* execute */
2220   NULL,					/* sub */
2221   NULL,					/* next */
2222   0,					/* static_pass_number */
2223   TV_TREE_SRA,				/* tv_id */
2224   PROP_cfg | PROP_ssa | PROP_alias,	/* properties_required */
2225   0,					/* properties_provided */
2226   0,					/* properties_destroyed */
2227   0,					/* todo_flags_start */
2228   TODO_dump_func | TODO_update_ssa
2229     | TODO_ggc_collect | TODO_verify_ssa,  /* todo_flags_finish */
2230   0					/* letter */
2231 };
2232