1 /* Predictive commoning.
2    Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    Apart from predictive commoning on Load-Load and Store-Load chains, we
160    also support Store-Store chains -- stores killed by other store can be
161    eliminated.  Given below example:
162 
163      for (i = 0; i < n; i++)
164        {
165 	 a[i] = 1;
166 	 a[i+2] = 2;
167        }
168 
169    It can be replaced with:
170 
171      t0 = a[0];
172      t1 = a[1];
173      for (i = 0; i < n; i++)
174        {
175 	 a[i] = 1;
176 	 t2 = 2;
177 	 t0 = t1;
178 	 t1 = t2;
179        }
180      a[n] = t0;
181      a[n+1] = t1;
182 
183    If the loop runs more than 1 iterations, it can be further simplified into:
184 
185      for (i = 0; i < n; i++)
186        {
187 	 a[i] = 1;
188        }
189      a[n] = 2;
190      a[n+1] = 2;
191 
192    The interesting part is this can be viewed either as general store motion
193    or general dead store elimination in either intra/inter-iterations way.
194 
195    With trivial effort, we also support load inside Store-Store chains if the
196    load is dominated by a store statement in the same iteration of loop.  You
197    can see this as a restricted Store-Mixed-Load-Store chain.
198 
199    TODO: For now, we don't support store-store chains in multi-exit loops.  We
200    force to not unroll in case of store-store chain even if other chains might
201    ask for unroll.
202 
203    Predictive commoning can be generalized for arbitrary computations (not
204    just memory loads), and also nontrivial transfer functions (e.g., replacing
205    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
206 
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "params.h"
235 #include "tree-affine.h"
236 #include "builtins.h"
237 
238 /* The maximum number of iterations between the considered memory
239    references.  */
240 
241 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242 
243 /* Data references (or phi nodes that carry data reference values across
244    loop iterations).  */
245 
246 typedef struct dref_d
247 {
248   /* The reference itself.  */
249   struct data_reference *ref;
250 
251   /* The statement in that the reference appears.  */
252   gimple *stmt;
253 
254   /* In case that STMT is a phi node, this field is set to the SSA name
255      defined by it in replace_phis_by_defined_names (in order to avoid
256      pointing to phi node that got reallocated in the meantime).  */
257   tree name_defined_by_phi;
258 
259   /* Distance of the reference from the root of the chain (in number of
260      iterations of the loop).  */
261   unsigned distance;
262 
263   /* Number of iterations offset from the first reference in the component.  */
264   widest_int offset;
265 
266   /* Number of the reference in a component, in dominance ordering.  */
267   unsigned pos;
268 
269   /* True if the memory reference is always accessed when the loop is
270      entered.  */
271   unsigned always_accessed : 1;
272 } *dref;
273 
274 
275 /* Type of the chain of the references.  */
276 
277 enum chain_type
278 {
279   /* The addresses of the references in the chain are constant.  */
280   CT_INVARIANT,
281 
282   /* There are only loads in the chain.  */
283   CT_LOAD,
284 
285   /* Root of the chain is store, the rest are loads.  */
286   CT_STORE_LOAD,
287 
288   /* There are only stores in the chain.  */
289   CT_STORE_STORE,
290 
291   /* A combination of two chains.  */
292   CT_COMBINATION
293 };
294 
295 /* Chains of data references.  */
296 
297 typedef struct chain
298 {
299   /* Type of the chain.  */
300   enum chain_type type;
301 
302   /* For combination chains, the operator and the two chains that are
303      combined, and the type of the result.  */
304   enum tree_code op;
305   tree rslt_type;
306   struct chain *ch1, *ch2;
307 
308   /* The references in the chain.  */
309   vec<dref> refs;
310 
311   /* The maximum distance of the reference in the chain from the root.  */
312   unsigned length;
313 
314   /* The variables used to copy the value throughout iterations.  */
315   vec<tree> vars;
316 
317   /* Initializers for the variables.  */
318   vec<tree> inits;
319 
320   /* Finalizers for the eliminated stores.  */
321   vec<tree> finis;
322 
323   /* gimple stmts intializing the initial variables of the chain.  */
324   gimple_seq init_seq;
325 
326   /* gimple stmts finalizing the eliminated stores of the chain.  */
327   gimple_seq fini_seq;
328 
329   /* True if there is a use of a variable with the maximal distance
330      that comes after the root in the loop.  */
331   unsigned has_max_use_after : 1;
332 
333   /* True if all the memory references in the chain are always accessed.  */
334   unsigned all_always_accessed : 1;
335 
336   /* True if this chain was combined together with some other chain.  */
337   unsigned combined : 1;
338 
339   /* True if this is store elimination chain and eliminated stores store
340      loop invariant value into memory.  */
341   unsigned inv_store_elimination : 1;
342 } *chain_p;
343 
344 
345 /* Describes the knowledge about the step of the memory references in
346    the component.  */
347 
348 enum ref_step_type
349 {
350   /* The step is zero.  */
351   RS_INVARIANT,
352 
353   /* The step is nonzero.  */
354   RS_NONZERO,
355 
356   /* The step may or may not be nonzero.  */
357   RS_ANY
358 };
359 
360 /* Components of the data dependence graph.  */
361 
362 struct component
363 {
364   /* The references in the component.  */
365   vec<dref> refs;
366 
367   /* What we know about the step of the references in the component.  */
368   enum ref_step_type comp_step;
369 
370   /* True if all references in component are stores and we try to do
371      intra/inter loop iteration dead store elimination.  */
372   bool eliminate_store_p;
373 
374   /* Next component in the list.  */
375   struct component *next;
376 };
377 
378 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
379 
380 static bitmap looparound_phis;
381 
382 /* Cache used by tree_to_aff_combination_expand.  */
383 
384 static hash_map<tree, name_expansion *> *name_expansions;
385 
386 /* Dumps data reference REF to FILE.  */
387 
388 extern void dump_dref (FILE *, dref);
389 void
390 dump_dref (FILE *file, dref ref)
391 {
392   if (ref->ref)
393     {
394       fprintf (file, "    ");
395       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396       fprintf (file, " (id %u%s)\n", ref->pos,
397 	       DR_IS_READ (ref->ref) ? "" : ", write");
398 
399       fprintf (file, "      offset ");
400       print_decs (ref->offset, file);
401       fprintf (file, "\n");
402 
403       fprintf (file, "      distance %u\n", ref->distance);
404     }
405   else
406     {
407       if (gimple_code (ref->stmt) == GIMPLE_PHI)
408 	fprintf (file, "    looparound ref\n");
409       else
410 	fprintf (file, "    combination ref\n");
411       fprintf (file, "      in statement ");
412       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
413       fprintf (file, "\n");
414       fprintf (file, "      distance %u\n", ref->distance);
415     }
416 
417 }
418 
419 /* Dumps CHAIN to FILE.  */
420 
421 extern void dump_chain (FILE *, chain_p);
422 void
423 dump_chain (FILE *file, chain_p chain)
424 {
425   dref a;
426   const char *chain_type;
427   unsigned i;
428   tree var;
429 
430   switch (chain->type)
431     {
432     case CT_INVARIANT:
433       chain_type = "Load motion";
434       break;
435 
436     case CT_LOAD:
437       chain_type = "Loads-only";
438       break;
439 
440     case CT_STORE_LOAD:
441       chain_type = "Store-loads";
442       break;
443 
444     case CT_STORE_STORE:
445       chain_type = "Store-stores";
446       break;
447 
448     case CT_COMBINATION:
449       chain_type = "Combination";
450       break;
451 
452     default:
453       gcc_unreachable ();
454     }
455 
456   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 	   chain->combined ? " (combined)" : "");
458   if (chain->type != CT_INVARIANT)
459     fprintf (file, "  max distance %u%s\n", chain->length,
460 	     chain->has_max_use_after ? "" : ", may reuse first");
461 
462   if (chain->type == CT_COMBINATION)
463     {
464       fprintf (file, "  equal to %p %s %p in type ",
465 	       (void *) chain->ch1, op_symbol_code (chain->op),
466 	       (void *) chain->ch2);
467       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468       fprintf (file, "\n");
469     }
470 
471   if (chain->vars.exists ())
472     {
473       fprintf (file, "  vars");
474       FOR_EACH_VEC_ELT (chain->vars, i, var)
475 	{
476 	  fprintf (file, " ");
477 	  print_generic_expr (file, var, TDF_SLIM);
478 	}
479       fprintf (file, "\n");
480     }
481 
482   if (chain->inits.exists ())
483     {
484       fprintf (file, "  inits");
485       FOR_EACH_VEC_ELT (chain->inits, i, var)
486 	{
487 	  fprintf (file, " ");
488 	  print_generic_expr (file, var, TDF_SLIM);
489 	}
490       fprintf (file, "\n");
491     }
492 
493   fprintf (file, "  references:\n");
494   FOR_EACH_VEC_ELT (chain->refs, i, a)
495     dump_dref (file, a);
496 
497   fprintf (file, "\n");
498 }
499 
500 /* Dumps CHAINS to FILE.  */
501 
502 extern void dump_chains (FILE *, vec<chain_p> );
503 void
504 dump_chains (FILE *file, vec<chain_p> chains)
505 {
506   chain_p chain;
507   unsigned i;
508 
509   FOR_EACH_VEC_ELT (chains, i, chain)
510     dump_chain (file, chain);
511 }
512 
513 /* Dumps COMP to FILE.  */
514 
515 extern void dump_component (FILE *, struct component *);
516 void
517 dump_component (FILE *file, struct component *comp)
518 {
519   dref a;
520   unsigned i;
521 
522   fprintf (file, "Component%s:\n",
523 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
524   FOR_EACH_VEC_ELT (comp->refs, i, a)
525     dump_dref (file, a);
526   fprintf (file, "\n");
527 }
528 
529 /* Dumps COMPS to FILE.  */
530 
531 extern void dump_components (FILE *, struct component *);
532 void
533 dump_components (FILE *file, struct component *comps)
534 {
535   struct component *comp;
536 
537   for (comp = comps; comp; comp = comp->next)
538     dump_component (file, comp);
539 }
540 
541 /* Frees a chain CHAIN.  */
542 
543 static void
544 release_chain (chain_p chain)
545 {
546   dref ref;
547   unsigned i;
548 
549   if (chain == NULL)
550     return;
551 
552   FOR_EACH_VEC_ELT (chain->refs, i, ref)
553     free (ref);
554 
555   chain->refs.release ();
556   chain->vars.release ();
557   chain->inits.release ();
558   if (chain->init_seq)
559     gimple_seq_discard (chain->init_seq);
560 
561   chain->finis.release ();
562   if (chain->fini_seq)
563     gimple_seq_discard (chain->fini_seq);
564 
565   free (chain);
566 }
567 
568 /* Frees CHAINS.  */
569 
570 static void
571 release_chains (vec<chain_p> chains)
572 {
573   unsigned i;
574   chain_p chain;
575 
576   FOR_EACH_VEC_ELT (chains, i, chain)
577     release_chain (chain);
578   chains.release ();
579 }
580 
581 /* Frees a component COMP.  */
582 
583 static void
584 release_component (struct component *comp)
585 {
586   comp->refs.release ();
587   free (comp);
588 }
589 
590 /* Frees list of components COMPS.  */
591 
592 static void
593 release_components (struct component *comps)
594 {
595   struct component *act, *next;
596 
597   for (act = comps; act; act = next)
598     {
599       next = act->next;
600       release_component (act);
601     }
602 }
603 
604 /* Finds a root of tree given by FATHERS containing A, and performs path
605    shortening.  */
606 
607 static unsigned
608 component_of (unsigned fathers[], unsigned a)
609 {
610   unsigned root, n;
611 
612   for (root = a; root != fathers[root]; root = fathers[root])
613     continue;
614 
615   for (; a != root; a = n)
616     {
617       n = fathers[a];
618       fathers[a] = root;
619     }
620 
621   return root;
622 }
623 
624 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
625    components, A and B are components to merge.  */
626 
627 static void
628 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629 {
630   unsigned ca = component_of (fathers, a);
631   unsigned cb = component_of (fathers, b);
632 
633   if (ca == cb)
634     return;
635 
636   if (sizes[ca] < sizes[cb])
637     {
638       sizes[cb] += sizes[ca];
639       fathers[ca] = cb;
640     }
641   else
642     {
643       sizes[ca] += sizes[cb];
644       fathers[cb] = ca;
645     }
646 }
647 
648 /* Returns true if A is a reference that is suitable for predictive commoning
649    in the innermost loop that contains it.  REF_STEP is set according to the
650    step of the reference A.  */
651 
652 static bool
653 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654 {
655   tree ref = DR_REF (a), step = DR_STEP (a);
656 
657   if (!step
658       || TREE_THIS_VOLATILE (ref)
659       || !is_gimple_reg_type (TREE_TYPE (ref))
660       || tree_could_throw_p (ref))
661     return false;
662 
663   if (integer_zerop (step))
664     *ref_step = RS_INVARIANT;
665   else if (integer_nonzerop (step))
666     *ref_step = RS_NONZERO;
667   else
668     *ref_step = RS_ANY;
669 
670   return true;
671 }
672 
673 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
674 
675 static void
676 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677 {
678   tree type = TREE_TYPE (DR_OFFSET (dr));
679   aff_tree delta;
680 
681   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
682 				  &name_expansions);
683   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
684   aff_combination_add (offset, &delta);
685 }
686 
687 /* Determines number of iterations of the innermost enclosing loop before B
688    refers to exactly the same location as A and stores it to OFF.  If A and
689    B do not have the same step, they never meet, or anything else fails,
690    returns false, otherwise returns true.  Both A and B are assumed to
691    satisfy suitable_reference_p.  */
692 
693 static bool
694 determine_offset (struct data_reference *a, struct data_reference *b,
695 		  poly_widest_int *off)
696 {
697   aff_tree diff, baseb, step;
698   tree typea, typeb;
699 
700   /* Check that both the references access the location in the same type.  */
701   typea = TREE_TYPE (DR_REF (a));
702   typeb = TREE_TYPE (DR_REF (b));
703   if (!useless_type_conversion_p (typeb, typea))
704     return false;
705 
706   /* Check whether the base address and the step of both references is the
707      same.  */
708   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710     return false;
711 
712   if (integer_zerop (DR_STEP (a)))
713     {
714       /* If the references have loop invariant address, check that they access
715 	 exactly the same location.  */
716       *off = 0;
717       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719     }
720 
721   /* Compare the offsets of the addresses, and check whether the difference
722      is a multiple of step.  */
723   aff_combination_dr_offset (a, &diff);
724   aff_combination_dr_offset (b, &baseb);
725   aff_combination_scale (&baseb, -1);
726   aff_combination_add (&diff, &baseb);
727 
728   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
729 				  &step, &name_expansions);
730   return aff_combination_constant_multiple_p (&diff, &step, off);
731 }
732 
733 /* Returns the last basic block in LOOP for that we are sure that
734    it is executed whenever the loop is entered.  */
735 
736 static basic_block
737 last_always_executed_block (struct loop *loop)
738 {
739   unsigned i;
740   vec<edge> exits = get_loop_exit_edges (loop);
741   edge ex;
742   basic_block last = loop->latch;
743 
744   FOR_EACH_VEC_ELT (exits, i, ex)
745     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
746   exits.release ();
747 
748   return last;
749 }
750 
751 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
752 
753 static struct component *
754 split_data_refs_to_components (struct loop *loop,
755 			       vec<data_reference_p> datarefs,
756 			       vec<ddr_p> depends)
757 {
758   unsigned i, n = datarefs.length ();
759   unsigned ca, ia, ib, bad;
760   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762   struct component **comps;
763   struct data_reference *dr, *dra, *drb;
764   struct data_dependence_relation *ddr;
765   struct component *comp_list = NULL, *comp;
766   dref dataref;
767   /* Don't do store elimination if loop has multiple exit edges.  */
768   bool eliminate_store_p = single_exit (loop) != NULL;
769   basic_block last_always_executed = last_always_executed_block (loop);
770 
771   FOR_EACH_VEC_ELT (datarefs, i, dr)
772     {
773       if (!DR_REF (dr))
774 	{
775 	  /* A fake reference for call or asm_expr that may clobber memory;
776 	     just fail.  */
777 	  goto end;
778 	}
779       /* predcom pass isn't prepared to handle calls with data references.  */
780       if (is_gimple_call (DR_STMT (dr)))
781 	goto end;
782       dr->aux = (void *) (size_t) i;
783       comp_father[i] = i;
784       comp_size[i] = 1;
785     }
786 
787   /* A component reserved for the "bad" data references.  */
788   comp_father[n] = n;
789   comp_size[n] = 1;
790 
791   FOR_EACH_VEC_ELT (datarefs, i, dr)
792     {
793       enum ref_step_type dummy;
794 
795       if (!suitable_reference_p (dr, &dummy))
796 	{
797 	  ia = (unsigned) (size_t) dr->aux;
798 	  merge_comps (comp_father, comp_size, n, ia);
799 	}
800     }
801 
802   FOR_EACH_VEC_ELT (depends, i, ddr)
803     {
804       poly_widest_int dummy_off;
805 
806       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
807 	continue;
808 
809       dra = DDR_A (ddr);
810       drb = DDR_B (ddr);
811 
812       /* Don't do store elimination if there is any unknown dependence for
813 	 any store data reference.  */
814       if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
815 	  && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
816 	      || DDR_NUM_DIST_VECTS (ddr) == 0))
817 	eliminate_store_p = false;
818 
819       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
820       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
821       if (ia == ib)
822 	continue;
823 
824       bad = component_of (comp_father, n);
825 
826       /* If both A and B are reads, we may ignore unsuitable dependences.  */
827       if (DR_IS_READ (dra) && DR_IS_READ (drb))
828 	{
829 	  if (ia == bad || ib == bad
830 	      || !determine_offset (dra, drb, &dummy_off))
831 	    continue;
832 	}
833       /* If A is read and B write or vice versa and there is unsuitable
834 	 dependence, instead of merging both components into a component
835 	 that will certainly not pass suitable_component_p, just put the
836 	 read into bad component, perhaps at least the write together with
837 	 all the other data refs in it's component will be optimizable.  */
838       else if (DR_IS_READ (dra) && ib != bad)
839 	{
840 	  if (ia == bad)
841 	    continue;
842 	  else if (!determine_offset (dra, drb, &dummy_off))
843 	    {
844 	      merge_comps (comp_father, comp_size, bad, ia);
845 	      continue;
846 	    }
847 	}
848       else if (DR_IS_READ (drb) && ia != bad)
849 	{
850 	  if (ib == bad)
851 	    continue;
852 	  else if (!determine_offset (dra, drb, &dummy_off))
853 	    {
854 	      merge_comps (comp_father, comp_size, bad, ib);
855 	      continue;
856 	    }
857 	}
858       else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
859 	       && ia != bad && ib != bad
860 	       && !determine_offset (dra, drb, &dummy_off))
861 	{
862 	  merge_comps (comp_father, comp_size, bad, ia);
863 	  merge_comps (comp_father, comp_size, bad, ib);
864 	  continue;
865 	}
866 
867       merge_comps (comp_father, comp_size, ia, ib);
868     }
869 
870   if (eliminate_store_p)
871     {
872       tree niters = number_of_latch_executions (loop);
873 
874       /* Don't do store elimination if niters info is unknown because stores
875 	 in the last iteration can't be eliminated and we need to recover it
876 	 after loop.  */
877       eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
878     }
879 
880   comps = XCNEWVEC (struct component *, n);
881   bad = component_of (comp_father, n);
882   FOR_EACH_VEC_ELT (datarefs, i, dr)
883     {
884       ia = (unsigned) (size_t) dr->aux;
885       ca = component_of (comp_father, ia);
886       if (ca == bad)
887 	continue;
888 
889       comp = comps[ca];
890       if (!comp)
891 	{
892 	  comp = XCNEW (struct component);
893 	  comp->refs.create (comp_size[ca]);
894 	  comp->eliminate_store_p = eliminate_store_p;
895 	  comps[ca] = comp;
896 	}
897 
898       dataref = XCNEW (struct dref_d);
899       dataref->ref = dr;
900       dataref->stmt = DR_STMT (dr);
901       dataref->offset = 0;
902       dataref->distance = 0;
903 
904       dataref->always_accessed
905 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
906 				gimple_bb (dataref->stmt));
907       dataref->pos = comp->refs.length ();
908       comp->refs.quick_push (dataref);
909     }
910 
911   for (i = 0; i < n; i++)
912     {
913       comp = comps[i];
914       if (comp)
915 	{
916 	  comp->next = comp_list;
917 	  comp_list = comp;
918 	}
919     }
920   free (comps);
921 
922 end:
923   free (comp_father);
924   free (comp_size);
925   return comp_list;
926 }
927 
928 /* Returns true if the component COMP satisfies the conditions
929    described in 2) at the beginning of this file.  LOOP is the current
930    loop.  */
931 
932 static bool
933 suitable_component_p (struct loop *loop, struct component *comp)
934 {
935   unsigned i;
936   dref a, first;
937   basic_block ba, bp = loop->header;
938   bool ok, has_write = false;
939 
940   FOR_EACH_VEC_ELT (comp->refs, i, a)
941     {
942       ba = gimple_bb (a->stmt);
943 
944       if (!just_once_each_iteration_p (loop, ba))
945 	return false;
946 
947       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
948       bp = ba;
949 
950       if (DR_IS_WRITE (a->ref))
951 	has_write = true;
952     }
953 
954   first = comp->refs[0];
955   ok = suitable_reference_p (first->ref, &comp->comp_step);
956   gcc_assert (ok);
957   first->offset = 0;
958 
959   for (i = 1; comp->refs.iterate (i, &a); i++)
960     {
961       /* Polynomial offsets are no use, since we need to know the
962 	 gap between iteration numbers at compile time.  */
963       poly_widest_int offset;
964       if (!determine_offset (first->ref, a->ref, &offset)
965 	  || !offset.is_constant (&a->offset))
966 	return false;
967 
968       enum ref_step_type a_step;
969       gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
970 			   && a_step == comp->comp_step);
971     }
972 
973   /* If there is a write inside the component, we must know whether the
974      step is nonzero or not -- we would not otherwise be able to recognize
975      whether the value accessed by reads comes from the OFFSET-th iteration
976      or the previous one.  */
977   if (has_write && comp->comp_step == RS_ANY)
978     return false;
979 
980   return true;
981 }
982 
983 /* Check the conditions on references inside each of components COMPS,
984    and remove the unsuitable components from the list.  The new list
985    of components is returned.  The conditions are described in 2) at
986    the beginning of this file.  LOOP is the current loop.  */
987 
988 static struct component *
989 filter_suitable_components (struct loop *loop, struct component *comps)
990 {
991   struct component **comp, *act;
992 
993   for (comp = &comps; *comp; )
994     {
995       act = *comp;
996       if (suitable_component_p (loop, act))
997 	comp = &act->next;
998       else
999 	{
1000 	  dref ref;
1001 	  unsigned i;
1002 
1003 	  *comp = act->next;
1004 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
1005 	    free (ref);
1006 	  release_component (act);
1007 	}
1008     }
1009 
1010   return comps;
1011 }
1012 
1013 /* Compares two drefs A and B by their offset and position.  Callback for
1014    qsort.  */
1015 
1016 static int
1017 order_drefs (const void *a, const void *b)
1018 {
1019   const dref *const da = (const dref *) a;
1020   const dref *const db = (const dref *) b;
1021   int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1022 
1023   if (offcmp != 0)
1024     return offcmp;
1025 
1026   return (*da)->pos - (*db)->pos;
1027 }
1028 
1029 /* Compares two drefs A and B by their position.  Callback for qsort.  */
1030 
1031 static int
1032 order_drefs_by_pos (const void *a, const void *b)
1033 {
1034   const dref *const da = (const dref *) a;
1035   const dref *const db = (const dref *) b;
1036 
1037   return (*da)->pos - (*db)->pos;
1038 }
1039 
1040 /* Returns root of the CHAIN.  */
1041 
1042 static inline dref
1043 get_chain_root (chain_p chain)
1044 {
1045   return chain->refs[0];
1046 }
1047 
1048 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1049    exist.  */
1050 
1051 static inline dref
1052 get_chain_last_write_at (chain_p chain, unsigned distance)
1053 {
1054   for (unsigned i = chain->refs.length (); i > 0; i--)
1055     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1056 	&& distance == chain->refs[i - 1]->distance)
1057       return chain->refs[i - 1];
1058 
1059   return NULL;
1060 }
1061 
1062 /* Given CHAIN, returns the last write ref with the same distance before load
1063    at index LOAD_IDX, or NULL if it doesn't exist.  */
1064 
1065 static inline dref
1066 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1067 {
1068   gcc_assert (load_idx < chain->refs.length ());
1069 
1070   unsigned distance = chain->refs[load_idx]->distance;
1071 
1072   for (unsigned i = load_idx; i > 0; i--)
1073     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1074 	&& distance == chain->refs[i - 1]->distance)
1075       return chain->refs[i - 1];
1076 
1077   return NULL;
1078 }
1079 
1080 /* Adds REF to the chain CHAIN.  */
1081 
1082 static void
1083 add_ref_to_chain (chain_p chain, dref ref)
1084 {
1085   dref root = get_chain_root (chain);
1086 
1087   gcc_assert (wi::les_p (root->offset, ref->offset));
1088   widest_int dist = ref->offset - root->offset;
1089   gcc_assert (wi::fits_uhwi_p (dist));
1090 
1091   chain->refs.safe_push (ref);
1092 
1093   ref->distance = dist.to_uhwi ();
1094 
1095   if (ref->distance >= chain->length)
1096     {
1097       chain->length = ref->distance;
1098       chain->has_max_use_after = false;
1099     }
1100 
1101   /* Promote this chain to CT_STORE_STORE if it has multiple stores.  */
1102   if (DR_IS_WRITE (ref->ref))
1103     chain->type = CT_STORE_STORE;
1104 
1105   /* Don't set the flag for store-store chain since there is no use.  */
1106   if (chain->type != CT_STORE_STORE
1107       && ref->distance == chain->length
1108       && ref->pos > root->pos)
1109     chain->has_max_use_after = true;
1110 
1111   chain->all_always_accessed &= ref->always_accessed;
1112 }
1113 
1114 /* Returns the chain for invariant component COMP.  */
1115 
1116 static chain_p
1117 make_invariant_chain (struct component *comp)
1118 {
1119   chain_p chain = XCNEW (struct chain);
1120   unsigned i;
1121   dref ref;
1122 
1123   chain->type = CT_INVARIANT;
1124 
1125   chain->all_always_accessed = true;
1126 
1127   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1128     {
1129       chain->refs.safe_push (ref);
1130       chain->all_always_accessed &= ref->always_accessed;
1131     }
1132 
1133   chain->inits = vNULL;
1134   chain->finis = vNULL;
1135 
1136   return chain;
1137 }
1138 
1139 /* Make a new chain of type TYPE rooted at REF.  */
1140 
1141 static chain_p
1142 make_rooted_chain (dref ref, enum chain_type type)
1143 {
1144   chain_p chain = XCNEW (struct chain);
1145 
1146   chain->type = type;
1147   chain->refs.safe_push (ref);
1148   chain->all_always_accessed = ref->always_accessed;
1149   ref->distance = 0;
1150 
1151   chain->inits = vNULL;
1152   chain->finis = vNULL;
1153 
1154   return chain;
1155 }
1156 
1157 /* Returns true if CHAIN is not trivial.  */
1158 
1159 static bool
1160 nontrivial_chain_p (chain_p chain)
1161 {
1162   return chain != NULL && chain->refs.length () > 1;
1163 }
1164 
1165 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1166    is no such name.  */
1167 
1168 static tree
1169 name_for_ref (dref ref)
1170 {
1171   tree name;
1172 
1173   if (is_gimple_assign (ref->stmt))
1174     {
1175       if (!ref->ref || DR_IS_READ (ref->ref))
1176 	name = gimple_assign_lhs (ref->stmt);
1177       else
1178 	name = gimple_assign_rhs1 (ref->stmt);
1179     }
1180   else
1181     name = PHI_RESULT (ref->stmt);
1182 
1183   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1184 }
1185 
1186 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1187    iterations of the innermost enclosing loop).  */
1188 
1189 static bool
1190 valid_initializer_p (struct data_reference *ref,
1191 		     unsigned distance, struct data_reference *root)
1192 {
1193   aff_tree diff, base, step;
1194   poly_widest_int off;
1195 
1196   /* Both REF and ROOT must be accessing the same object.  */
1197   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1198     return false;
1199 
1200   /* The initializer is defined outside of loop, hence its address must be
1201      invariant inside the loop.  */
1202   gcc_assert (integer_zerop (DR_STEP (ref)));
1203 
1204   /* If the address of the reference is invariant, initializer must access
1205      exactly the same location.  */
1206   if (integer_zerop (DR_STEP (root)))
1207     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1208 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1209 
1210   /* Verify that this index of REF is equal to the root's index at
1211      -DISTANCE-th iteration.  */
1212   aff_combination_dr_offset (root, &diff);
1213   aff_combination_dr_offset (ref, &base);
1214   aff_combination_scale (&base, -1);
1215   aff_combination_add (&diff, &base);
1216 
1217   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1218 				  &step, &name_expansions);
1219   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1220     return false;
1221 
1222   if (maybe_ne (off, distance))
1223     return false;
1224 
1225   return true;
1226 }
1227 
1228 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1229    initial value is correct (equal to initial value of REF shifted by one
1230    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1231    is the root of the current chain.  */
1232 
1233 static gphi *
1234 find_looparound_phi (struct loop *loop, dref ref, dref root)
1235 {
1236   tree name, init, init_ref;
1237   gphi *phi = NULL;
1238   gimple *init_stmt;
1239   edge latch = loop_latch_edge (loop);
1240   struct data_reference init_dr;
1241   gphi_iterator psi;
1242 
1243   if (is_gimple_assign (ref->stmt))
1244     {
1245       if (DR_IS_READ (ref->ref))
1246 	name = gimple_assign_lhs (ref->stmt);
1247       else
1248 	name = gimple_assign_rhs1 (ref->stmt);
1249     }
1250   else
1251     name = PHI_RESULT (ref->stmt);
1252   if (!name)
1253     return NULL;
1254 
1255   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1256     {
1257       phi = psi.phi ();
1258       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1259 	break;
1260     }
1261 
1262   if (gsi_end_p (psi))
1263     return NULL;
1264 
1265   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1266   if (TREE_CODE (init) != SSA_NAME)
1267     return NULL;
1268   init_stmt = SSA_NAME_DEF_STMT (init);
1269   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1270     return NULL;
1271   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1272 
1273   init_ref = gimple_assign_rhs1 (init_stmt);
1274   if (!REFERENCE_CLASS_P (init_ref)
1275       && !DECL_P (init_ref))
1276     return NULL;
1277 
1278   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1279      loop enclosing PHI).  */
1280   memset (&init_dr, 0, sizeof (struct data_reference));
1281   DR_REF (&init_dr) = init_ref;
1282   DR_STMT (&init_dr) = phi;
1283   if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
1284     return NULL;
1285 
1286   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1287     return NULL;
1288 
1289   return phi;
1290 }
1291 
1292 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1293 
1294 static void
1295 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1296 {
1297   dref nw = XCNEW (struct dref_d), aref;
1298   unsigned i;
1299 
1300   nw->stmt = phi;
1301   nw->distance = ref->distance + 1;
1302   nw->always_accessed = 1;
1303 
1304   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1305     if (aref->distance >= nw->distance)
1306       break;
1307   chain->refs.safe_insert (i, nw);
1308 
1309   if (nw->distance > chain->length)
1310     {
1311       chain->length = nw->distance;
1312       chain->has_max_use_after = false;
1313     }
1314 }
1315 
1316 /* For references in CHAIN that are copied around the LOOP (created previously
1317    by PRE, or by user), add the results of such copies to the chain.  This
1318    enables us to remove the copies by unrolling, and may need less registers
1319    (also, it may allow us to combine chains together).  */
1320 
1321 static void
1322 add_looparound_copies (struct loop *loop, chain_p chain)
1323 {
1324   unsigned i;
1325   dref ref, root = get_chain_root (chain);
1326   gphi *phi;
1327 
1328   if (chain->type == CT_STORE_STORE)
1329     return;
1330 
1331   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1332     {
1333       phi = find_looparound_phi (loop, ref, root);
1334       if (!phi)
1335 	continue;
1336 
1337       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1338       insert_looparound_copy (chain, ref, phi);
1339     }
1340 }
1341 
1342 /* Find roots of the values and determine distances in the component COMP.
1343    The references are redistributed into CHAINS.  LOOP is the current
1344    loop.  */
1345 
1346 static void
1347 determine_roots_comp (struct loop *loop,
1348 		      struct component *comp,
1349 		      vec<chain_p> *chains)
1350 {
1351   unsigned i;
1352   dref a;
1353   chain_p chain = NULL;
1354   widest_int last_ofs = 0;
1355   enum chain_type type;
1356 
1357   /* Invariants are handled specially.  */
1358   if (comp->comp_step == RS_INVARIANT)
1359     {
1360       chain = make_invariant_chain (comp);
1361       chains->safe_push (chain);
1362       return;
1363     }
1364 
1365   /* Trivial component.  */
1366   if (comp->refs.length () <= 1)
1367     {
1368       if (comp->refs.length () == 1)
1369 	{
1370 	  free (comp->refs[0]);
1371 	  comp->refs.truncate (0);
1372 	}
1373       return;
1374     }
1375 
1376   comp->refs.qsort (order_drefs);
1377 
1378   /* For Store-Store chain, we only support load if it is dominated by a
1379      store statement in the same iteration of loop.  */
1380   if (comp->eliminate_store_p)
1381     for (a = NULL, i = 0; i < comp->refs.length (); i++)
1382       {
1383 	if (DR_IS_WRITE (comp->refs[i]->ref))
1384 	  a = comp->refs[i];
1385 	else if (a == NULL || a->offset != comp->refs[i]->offset)
1386 	  {
1387 	    /* If there is load that is not dominated by a store in the
1388 	       same iteration of loop, clear the flag so no Store-Store
1389 	       chain is generated for this component.  */
1390 	    comp->eliminate_store_p = false;
1391 	    break;
1392 	  }
1393       }
1394 
1395   /* Determine roots and create chains for components.  */
1396   FOR_EACH_VEC_ELT (comp->refs, i, a)
1397     {
1398       if (!chain
1399 	  || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1400 	  || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1401 	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1402 	{
1403 	  if (nontrivial_chain_p (chain))
1404 	    {
1405 	      add_looparound_copies (loop, chain);
1406 	      chains->safe_push (chain);
1407 	    }
1408 	  else
1409 	    release_chain (chain);
1410 
1411 	  /* Determine type of the chain.  If the root reference is a load,
1412 	     this can only be a CT_LOAD chain; other chains are intialized
1413 	     to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1414 	     new reference is added.  */
1415 	  type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1416 	  chain = make_rooted_chain (a, type);
1417 	  last_ofs = a->offset;
1418 	  continue;
1419 	}
1420 
1421       add_ref_to_chain (chain, a);
1422     }
1423 
1424   if (nontrivial_chain_p (chain))
1425     {
1426       add_looparound_copies (loop, chain);
1427       chains->safe_push (chain);
1428     }
1429   else
1430     release_chain (chain);
1431 }
1432 
1433 /* Find roots of the values and determine distances in components COMPS, and
1434    separates the references to CHAINS.  LOOP is the current loop.  */
1435 
1436 static void
1437 determine_roots (struct loop *loop,
1438 		 struct component *comps, vec<chain_p> *chains)
1439 {
1440   struct component *comp;
1441 
1442   for (comp = comps; comp; comp = comp->next)
1443     determine_roots_comp (loop, comp, chains);
1444 }
1445 
1446 /* Replace the reference in statement STMT with temporary variable
1447    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1448    the reference in the statement.  IN_LHS is true if the reference
1449    is in the lhs of STMT, false if it is in rhs.  */
1450 
1451 static void
1452 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1453 {
1454   tree val;
1455   gassign *new_stmt;
1456   gimple_stmt_iterator bsi, psi;
1457 
1458   if (gimple_code (stmt) == GIMPLE_PHI)
1459     {
1460       gcc_assert (!in_lhs && !set);
1461 
1462       val = PHI_RESULT (stmt);
1463       bsi = gsi_after_labels (gimple_bb (stmt));
1464       psi = gsi_for_stmt (stmt);
1465       remove_phi_node (&psi, false);
1466 
1467       /* Turn the phi node into GIMPLE_ASSIGN.  */
1468       new_stmt = gimple_build_assign (val, new_tree);
1469       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1470       return;
1471     }
1472 
1473   /* Since the reference is of gimple_reg type, it should only
1474      appear as lhs or rhs of modify statement.  */
1475   gcc_assert (is_gimple_assign (stmt));
1476 
1477   bsi = gsi_for_stmt (stmt);
1478 
1479   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1480   if (!set)
1481     {
1482       gcc_assert (!in_lhs);
1483       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1484       stmt = gsi_stmt (bsi);
1485       update_stmt (stmt);
1486       return;
1487     }
1488 
1489   if (in_lhs)
1490     {
1491       /* We have statement
1492 
1493 	 OLD = VAL
1494 
1495 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1496 	 this to
1497 
1498 	 OLD = VAL
1499 	 NEW = VAL
1500 
1501 	 Otherwise, we are replacing a combination chain,
1502 	 VAL is the expression that performs the combination, and OLD is an
1503 	 SSA name.  In this case, we transform the assignment to
1504 
1505 	 OLD = VAL
1506 	 NEW = OLD
1507 
1508 	 */
1509 
1510       val = gimple_assign_lhs (stmt);
1511       if (TREE_CODE (val) != SSA_NAME)
1512 	{
1513 	  val = gimple_assign_rhs1 (stmt);
1514 	  gcc_assert (gimple_assign_single_p (stmt));
1515 	  if (TREE_CLOBBER_P (val))
1516 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1517 	  else
1518 	    gcc_assert (gimple_assign_copy_p (stmt));
1519 	}
1520     }
1521   else
1522     {
1523       /* VAL = OLD
1524 
1525 	 is transformed to
1526 
1527 	 VAL = OLD
1528 	 NEW = VAL  */
1529 
1530       val = gimple_assign_lhs (stmt);
1531     }
1532 
1533   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1534   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1535 }
1536 
1537 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1538    of the loop it was analyzed in.  Append init stmts to STMTS.  */
1539 
1540 static tree
1541 ref_at_iteration (data_reference_p dr, int iter,
1542 		  gimple_seq *stmts, tree niters = NULL_TREE)
1543 {
1544   tree off = DR_OFFSET (dr);
1545   tree coff = DR_INIT (dr);
1546   tree ref = DR_REF (dr);
1547   enum tree_code ref_code = ERROR_MARK;
1548   tree ref_type = NULL_TREE;
1549   tree ref_op1 = NULL_TREE;
1550   tree ref_op2 = NULL_TREE;
1551   tree new_offset;
1552 
1553   if (iter != 0)
1554     {
1555       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1556       if (TREE_CODE (new_offset) == INTEGER_CST)
1557 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1558       else
1559 	off = size_binop (PLUS_EXPR, off, new_offset);
1560     }
1561 
1562   if (niters != NULL_TREE)
1563     {
1564       niters = fold_convert (ssizetype, niters);
1565       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1566       if (TREE_CODE (niters) == INTEGER_CST)
1567 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1568       else
1569 	off = size_binop (PLUS_EXPR, off, new_offset);
1570     }
1571 
1572   /* While data-ref analysis punts on bit offsets it still handles
1573      bitfield accesses at byte boundaries.  Cope with that.  Note that
1574      if the bitfield object also starts at a byte-boundary we can simply
1575      replicate the COMPONENT_REF, but we have to subtract the component's
1576      byte-offset from the MEM_REF address first.
1577      Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1578      start at offset zero.  */
1579   if (TREE_CODE (ref) == COMPONENT_REF
1580       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1581     {
1582       unsigned HOST_WIDE_INT boff;
1583       tree field = TREE_OPERAND (ref, 1);
1584       tree offset = component_ref_field_offset (ref);
1585       ref_type = TREE_TYPE (ref);
1586       boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1587       /* This can occur in Ada.  See the comment in get_bit_range.  */
1588       if (boff % BITS_PER_UNIT != 0
1589 	  || !tree_fits_uhwi_p (offset))
1590 	{
1591 	  ref_code = BIT_FIELD_REF;
1592 	  ref_op1 = DECL_SIZE (field);
1593 	  ref_op2 = bitsize_zero_node;
1594 	}
1595       else
1596 	{
1597 	  boff >>= LOG2_BITS_PER_UNIT;
1598 	  boff += tree_to_uhwi (offset);
1599 	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1600 	  ref_code = COMPONENT_REF;
1601 	  ref_op1 = field;
1602 	  ref_op2 = TREE_OPERAND (ref, 2);
1603 	  ref = TREE_OPERAND (ref, 0);
1604 	}
1605     }
1606   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1607   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1608 				 is_gimple_mem_ref_addr, NULL_TREE);
1609   tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1610   tree type = build_aligned_type (TREE_TYPE (ref),
1611 				  get_object_alignment (ref));
1612   ref = build2 (MEM_REF, type, addr, alias_ptr);
1613   if (ref_type)
1614     ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1615   return ref;
1616 }
1617 
1618 /* Get the initialization expression for the INDEX-th temporary variable
1619    of CHAIN.  */
1620 
1621 static tree
1622 get_init_expr (chain_p chain, unsigned index)
1623 {
1624   if (chain->type == CT_COMBINATION)
1625     {
1626       tree e1 = get_init_expr (chain->ch1, index);
1627       tree e2 = get_init_expr (chain->ch2, index);
1628 
1629       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1630     }
1631   else
1632     return chain->inits[index];
1633 }
1634 
1635 /* Returns a new temporary variable used for the I-th variable carrying
1636    value of REF.  The variable's uid is marked in TMP_VARS.  */
1637 
1638 static tree
1639 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1640 {
1641   tree type = TREE_TYPE (ref);
1642   /* We never access the components of the temporary variable in predictive
1643      commoning.  */
1644   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1645   bitmap_set_bit (tmp_vars, DECL_UID (var));
1646   return var;
1647 }
1648 
1649 /* Creates the variables for CHAIN, as well as phi nodes for them and
1650    initialization on entry to LOOP.  Uids of the newly created
1651    temporary variables are marked in TMP_VARS.  */
1652 
1653 static void
1654 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1655 {
1656   unsigned i;
1657   unsigned n = chain->length;
1658   dref root = get_chain_root (chain);
1659   bool reuse_first = !chain->has_max_use_after;
1660   tree ref, init, var, next;
1661   gphi *phi;
1662   gimple_seq stmts;
1663   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1664 
1665   /* If N == 0, then all the references are within the single iteration.  And
1666      since this is an nonempty chain, reuse_first cannot be true.  */
1667   gcc_assert (n > 0 || !reuse_first);
1668 
1669   chain->vars.create (n + 1);
1670 
1671   if (chain->type == CT_COMBINATION)
1672     ref = gimple_assign_lhs (root->stmt);
1673   else
1674     ref = DR_REF (root->ref);
1675 
1676   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1677     {
1678       var = predcom_tmp_var (ref, i, tmp_vars);
1679       chain->vars.quick_push (var);
1680     }
1681   if (reuse_first)
1682     chain->vars.quick_push (chain->vars[0]);
1683 
1684   FOR_EACH_VEC_ELT (chain->vars, i, var)
1685     chain->vars[i] = make_ssa_name (var);
1686 
1687   for (i = 0; i < n; i++)
1688     {
1689       var = chain->vars[i];
1690       next = chain->vars[i + 1];
1691       init = get_init_expr (chain, i);
1692 
1693       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1694       if (stmts)
1695 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1696 
1697       phi = create_phi_node (var, loop->header);
1698       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1699       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1700     }
1701 }
1702 
1703 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1704    all stores to be eliminated store loop invariant values into memory.
1705    In this case, we can use these invariant values directly after LOOP.  */
1706 
1707 static bool
1708 is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
1709 {
1710   if (chain->length == 0 || chain->type != CT_STORE_STORE)
1711     return false;
1712 
1713   gcc_assert (!chain->has_max_use_after);
1714 
1715   /* If loop iterates for unknown times or fewer times than chain->lenght,
1716      we still need to setup root variable and propagate it with PHI node.  */
1717   tree niters = number_of_latch_executions (loop);
1718   if (TREE_CODE (niters) != INTEGER_CST
1719       || wi::leu_p (wi::to_wide (niters), chain->length))
1720     return false;
1721 
1722   /* Check stores in chain for elimination if they only store loop invariant
1723      values.  */
1724   for (unsigned i = 0; i < chain->length; i++)
1725     {
1726       dref a = get_chain_last_write_at (chain, i);
1727       if (a == NULL)
1728 	continue;
1729 
1730       gimple *def_stmt, *stmt = a->stmt;
1731       if (!gimple_assign_single_p (stmt))
1732 	return false;
1733 
1734       tree val = gimple_assign_rhs1 (stmt);
1735       if (TREE_CLOBBER_P (val))
1736 	return false;
1737 
1738       if (CONSTANT_CLASS_P (val))
1739 	continue;
1740 
1741       if (TREE_CODE (val) != SSA_NAME)
1742 	return false;
1743 
1744       def_stmt = SSA_NAME_DEF_STMT (val);
1745       if (gimple_nop_p (def_stmt))
1746 	continue;
1747 
1748       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1749 	return false;
1750     }
1751   return true;
1752 }
1753 
1754 /* Creates root variables for store elimination CHAIN in which stores for
1755    elimination only store loop invariant values.  In this case, we neither
1756    need to load root variables before loop nor propagate it with PHI nodes.  */
1757 
1758 static void
1759 initialize_root_vars_store_elim_1 (chain_p chain)
1760 {
1761   tree var;
1762   unsigned i, n = chain->length;
1763 
1764   chain->vars.create (n);
1765   chain->vars.safe_grow_cleared (n);
1766 
1767   /* Initialize root value for eliminated stores at each distance.  */
1768   for (i = 0; i < n; i++)
1769     {
1770       dref a = get_chain_last_write_at (chain, i);
1771       if (a == NULL)
1772 	continue;
1773 
1774       var = gimple_assign_rhs1 (a->stmt);
1775       chain->vars[a->distance] = var;
1776     }
1777 
1778   /* We don't propagate values with PHI nodes, so manually propagate value
1779      to bubble positions.  */
1780   var = chain->vars[0];
1781   for (i = 1; i < n; i++)
1782     {
1783       if (chain->vars[i] != NULL_TREE)
1784 	{
1785 	  var = chain->vars[i];
1786 	  continue;
1787 	}
1788       chain->vars[i] = var;
1789     }
1790 
1791   /* Revert the vector.  */
1792   for (i = 0; i < n / 2; i++)
1793     std::swap (chain->vars[i], chain->vars[n - i - 1]);
1794 }
1795 
1796 /* Creates root variables for store elimination CHAIN in which stores for
1797    elimination store loop variant values.  In this case, we may need to
1798    load root variables before LOOP and propagate it with PHI nodes.  Uids
1799    of the newly created root variables are marked in TMP_VARS.  */
1800 
1801 static void
1802 initialize_root_vars_store_elim_2 (struct loop *loop,
1803 				   chain_p chain, bitmap tmp_vars)
1804 {
1805   unsigned i, n = chain->length;
1806   tree ref, init, var, next, val, phi_result;
1807   gimple *stmt;
1808   gimple_seq stmts;
1809 
1810   chain->vars.create (n);
1811 
1812   ref = DR_REF (get_chain_root (chain)->ref);
1813   for (i = 0; i < n; i++)
1814     {
1815       var = predcom_tmp_var (ref, i, tmp_vars);
1816       chain->vars.quick_push (var);
1817     }
1818 
1819   FOR_EACH_VEC_ELT (chain->vars, i, var)
1820     chain->vars[i] = make_ssa_name (var);
1821 
1822   /* Root values are either rhs operand of stores to be eliminated, or
1823      loaded from memory before loop.  */
1824   auto_vec<tree> vtemps;
1825   vtemps.safe_grow_cleared (n);
1826   for (i = 0; i < n; i++)
1827     {
1828       init = get_init_expr (chain, i);
1829       if (init == NULL_TREE)
1830 	{
1831 	  /* Root value is rhs operand of the store to be eliminated if
1832 	     it isn't loaded from memory before loop.  */
1833 	  dref a = get_chain_last_write_at (chain, i);
1834 	  val = gimple_assign_rhs1 (a->stmt);
1835 	  if (TREE_CLOBBER_P (val))
1836 	    {
1837 	      val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1838 	      gimple_assign_set_rhs1 (a->stmt, val);
1839 	    }
1840 
1841 	  vtemps[n - i - 1] = val;
1842 	}
1843       else
1844 	{
1845 	  edge latch = loop_latch_edge (loop);
1846 	  edge entry = loop_preheader_edge (loop);
1847 
1848 	  /* Root value is loaded from memory before loop, we also need
1849 	     to add PHI nodes to propagate the value across iterations.  */
1850 	  init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1851 	  if (stmts)
1852 	    gsi_insert_seq_on_edge_immediate (entry, stmts);
1853 
1854 	  next = chain->vars[n - i];
1855 	  phi_result = copy_ssa_name (next);
1856 	  gphi *phi = create_phi_node (phi_result, loop->header);
1857 	  add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1858 	  add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1859 	  vtemps[n - i - 1] = phi_result;
1860 	}
1861     }
1862 
1863   /* Find the insertion position.  */
1864   dref last = get_chain_root (chain);
1865   for (i = 0; i < chain->refs.length (); i++)
1866     {
1867       if (chain->refs[i]->pos > last->pos)
1868 	last = chain->refs[i];
1869     }
1870 
1871   gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1872 
1873   /* Insert statements copying root value to root variable.  */
1874   for (i = 0; i < n; i++)
1875     {
1876       var = chain->vars[i];
1877       val = vtemps[i];
1878       stmt = gimple_build_assign (var, val);
1879       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1880     }
1881 }
1882 
1883 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1884    (CHAIN->length - 1) iterations.  */
1885 
1886 static void
1887 finalize_eliminated_stores (struct loop *loop, chain_p chain)
1888 {
1889   unsigned i, n = chain->length;
1890 
1891   for (i = 0; i < n; i++)
1892     {
1893       tree var = chain->vars[i];
1894       tree fini = chain->finis[n - i - 1];
1895       gimple *stmt = gimple_build_assign (fini, var);
1896 
1897       gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1898     }
1899 
1900   if (chain->fini_seq)
1901     {
1902       gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1903       chain->fini_seq = NULL;
1904     }
1905 }
1906 
1907 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1908    initialization on entry to LOOP if necessary.  The ssa name for the variable
1909    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1910    around the loop is created.  Uid of the newly created temporary variable
1911    is marked in TMP_VARS.  INITS is the list containing the (single)
1912    initializer.  */
1913 
1914 static void
1915 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1916 			 vec<tree> *vars, vec<tree> inits,
1917 			 bitmap tmp_vars)
1918 {
1919   unsigned i;
1920   tree ref = DR_REF (root->ref), init, var, next;
1921   gimple_seq stmts;
1922   gphi *phi;
1923   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1924 
1925   /* Find the initializer for the variable, and check that it cannot
1926      trap.  */
1927   init = inits[0];
1928 
1929   vars->create (written ? 2 : 1);
1930   var = predcom_tmp_var (ref, 0, tmp_vars);
1931   vars->quick_push (var);
1932   if (written)
1933     vars->quick_push ((*vars)[0]);
1934 
1935   FOR_EACH_VEC_ELT (*vars, i, var)
1936     (*vars)[i] = make_ssa_name (var);
1937 
1938   var = (*vars)[0];
1939 
1940   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1941   if (stmts)
1942     gsi_insert_seq_on_edge_immediate (entry, stmts);
1943 
1944   if (written)
1945     {
1946       next = (*vars)[1];
1947       phi = create_phi_node (var, loop->header);
1948       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1949       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1950     }
1951   else
1952     {
1953       gassign *init_stmt = gimple_build_assign (var, init);
1954       gsi_insert_on_edge_immediate (entry, init_stmt);
1955     }
1956 }
1957 
1958 
1959 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1960    created temporary variables are marked in TMP_VARS.  */
1961 
1962 static void
1963 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1964 {
1965   auto_vec<tree> vars;
1966   dref a;
1967   unsigned n_writes = 0, ridx, i;
1968   tree var;
1969 
1970   gcc_assert (chain->type == CT_INVARIANT);
1971   gcc_assert (!chain->combined);
1972   FOR_EACH_VEC_ELT (chain->refs, i, a)
1973     if (DR_IS_WRITE (a->ref))
1974       n_writes++;
1975 
1976   /* If there are no reads in the loop, there is nothing to do.  */
1977   if (n_writes == chain->refs.length ())
1978     return;
1979 
1980   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1981 			   &vars, chain->inits, tmp_vars);
1982 
1983   ridx = 0;
1984   FOR_EACH_VEC_ELT (chain->refs, i, a)
1985     {
1986       bool is_read = DR_IS_READ (a->ref);
1987 
1988       if (DR_IS_WRITE (a->ref))
1989 	{
1990 	  n_writes--;
1991 	  if (n_writes)
1992 	    {
1993 	      var = vars[0];
1994 	      var = make_ssa_name (SSA_NAME_VAR (var));
1995 	      vars[0] = var;
1996 	    }
1997 	  else
1998 	    ridx = 1;
1999 	}
2000 
2001       replace_ref_with (a->stmt, vars[ridx],
2002 			!is_read, !is_read);
2003     }
2004 }
2005 
2006 /* Returns the single statement in that NAME is used, excepting
2007    the looparound phi nodes contained in one of the chains.  If there is no
2008    such statement, or more statements, NULL is returned.  */
2009 
2010 static gimple *
2011 single_nonlooparound_use (tree name)
2012 {
2013   use_operand_p use;
2014   imm_use_iterator it;
2015   gimple *stmt, *ret = NULL;
2016 
2017   FOR_EACH_IMM_USE_FAST (use, it, name)
2018     {
2019       stmt = USE_STMT (use);
2020 
2021       if (gimple_code (stmt) == GIMPLE_PHI)
2022 	{
2023 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
2024 	     could not be processed anyway, so just fail for them.  */
2025 	  if (bitmap_bit_p (looparound_phis,
2026 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
2027 	    continue;
2028 
2029 	  return NULL;
2030 	}
2031       else if (is_gimple_debug (stmt))
2032 	continue;
2033       else if (ret != NULL)
2034 	return NULL;
2035       else
2036 	ret = stmt;
2037     }
2038 
2039   return ret;
2040 }
2041 
2042 /* Remove statement STMT, as well as the chain of assignments in that it is
2043    used.  */
2044 
2045 static void
2046 remove_stmt (gimple *stmt)
2047 {
2048   tree name;
2049   gimple *next;
2050   gimple_stmt_iterator psi;
2051 
2052   if (gimple_code (stmt) == GIMPLE_PHI)
2053     {
2054       name = PHI_RESULT (stmt);
2055       next = single_nonlooparound_use (name);
2056       reset_debug_uses (stmt);
2057       psi = gsi_for_stmt (stmt);
2058       remove_phi_node (&psi, true);
2059 
2060       if (!next
2061 	  || !gimple_assign_ssa_name_copy_p (next)
2062 	  || gimple_assign_rhs1 (next) != name)
2063 	return;
2064 
2065       stmt = next;
2066     }
2067 
2068   while (1)
2069     {
2070       gimple_stmt_iterator bsi;
2071 
2072       bsi = gsi_for_stmt (stmt);
2073 
2074       name = gimple_assign_lhs (stmt);
2075       if (TREE_CODE (name) == SSA_NAME)
2076 	{
2077 	  next = single_nonlooparound_use (name);
2078 	  reset_debug_uses (stmt);
2079 	}
2080       else
2081 	{
2082 	  /* This is a store to be eliminated.  */
2083 	  gcc_assert (gimple_vdef (stmt) != NULL);
2084 	  next = NULL;
2085 	}
2086 
2087       unlink_stmt_vdef (stmt);
2088       gsi_remove (&bsi, true);
2089       release_defs (stmt);
2090 
2091       if (!next
2092 	  || !gimple_assign_ssa_name_copy_p (next)
2093 	  || gimple_assign_rhs1 (next) != name)
2094 	return;
2095 
2096       stmt = next;
2097     }
2098 }
2099 
2100 /* Perform the predictive commoning optimization for a chain CHAIN.
2101    Uids of the newly created temporary variables are marked in TMP_VARS.*/
2102 
2103 static void
2104 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
2105 			      bitmap tmp_vars)
2106 {
2107   unsigned i;
2108   dref a;
2109   tree var;
2110   bool in_lhs;
2111 
2112   if (chain->combined)
2113     {
2114       /* For combined chains, just remove the statements that are used to
2115 	 compute the values of the expression (except for the root one).
2116 	 We delay this until after all chains are processed.  */
2117     }
2118   else if (chain->type == CT_STORE_STORE)
2119     {
2120       if (chain->length > 0)
2121 	{
2122 	  if (chain->inv_store_elimination)
2123 	    {
2124 	      /* If dead stores in this chain only store loop invariant
2125 		 values, we can simply record the invariant value and use
2126 		 it directly after loop.  */
2127 	      initialize_root_vars_store_elim_1 (chain);
2128 	    }
2129 	  else
2130 	    {
2131 	      /* If dead stores in this chain store loop variant values,
2132 		 we need to set up the variables by loading from memory
2133 		 before loop and propagating it with PHI nodes.  */
2134 	      initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2135 	    }
2136 
2137 	  /* For inter-iteration store elimination chain, stores at each
2138 	     distance in loop's last (chain->length - 1) iterations can't
2139 	     be eliminated, because there is no following killing store.
2140 	     We need to generate these stores after loop.  */
2141 	  finalize_eliminated_stores (loop, chain);
2142 	}
2143 
2144       bool last_store_p = true;
2145       for (i = chain->refs.length (); i > 0; i--)
2146 	{
2147 	  a = chain->refs[i - 1];
2148 	  /* Preserve the last store of the chain.  Eliminate other stores
2149 	     which are killed by the last one.  */
2150 	  if (DR_IS_WRITE (a->ref))
2151 	    {
2152 	      if (last_store_p)
2153 		last_store_p = false;
2154 	      else
2155 		remove_stmt (a->stmt);
2156 
2157 	      continue;
2158 	    }
2159 
2160 	  /* Any load in Store-Store chain must be dominated by a previous
2161 	     store, we replace the load reference with rhs of the store.  */
2162 	  dref b = get_chain_last_write_before_load (chain, i - 1);
2163 	  gcc_assert (b != NULL);
2164 	  var = gimple_assign_rhs1 (b->stmt);
2165 	  replace_ref_with (a->stmt, var, false, false);
2166 	}
2167     }
2168   else
2169     {
2170       /* For non-combined chains, set up the variables that hold its value.  */
2171       initialize_root_vars (loop, chain, tmp_vars);
2172       a = get_chain_root (chain);
2173       in_lhs = (chain->type == CT_STORE_LOAD
2174 		|| chain->type == CT_COMBINATION);
2175       replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2176 
2177       /* Replace the uses of the original references by these variables.  */
2178       for (i = 1; chain->refs.iterate (i, &a); i++)
2179 	{
2180 	  var = chain->vars[chain->length - a->distance];
2181 	  replace_ref_with (a->stmt, var, false, false);
2182 	}
2183     }
2184 }
2185 
2186 /* Determines the unroll factor necessary to remove as many temporary variable
2187    copies as possible.  CHAINS is the list of chains that will be
2188    optimized.  */
2189 
2190 static unsigned
2191 determine_unroll_factor (vec<chain_p> chains)
2192 {
2193   chain_p chain;
2194   unsigned factor = 1, af, nfactor, i;
2195   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2196 
2197   FOR_EACH_VEC_ELT (chains, i, chain)
2198     {
2199       if (chain->type == CT_INVARIANT)
2200 	continue;
2201       /* For now we can't handle unrolling when eliminating stores.  */
2202       else if (chain->type == CT_STORE_STORE)
2203 	return 1;
2204 
2205       if (chain->combined)
2206 	{
2207 	  /* For combined chains, we can't handle unrolling if we replace
2208 	     looparound PHIs.  */
2209 	  dref a;
2210 	  unsigned j;
2211 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2212 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
2213 	      return 1;
2214 	  continue;
2215 	}
2216 
2217       /* The best unroll factor for this chain is equal to the number of
2218 	 temporary variables that we create for it.  */
2219       af = chain->length;
2220       if (chain->has_max_use_after)
2221 	af++;
2222 
2223       nfactor = factor * af / gcd (factor, af);
2224       if (nfactor <= max)
2225 	factor = nfactor;
2226     }
2227 
2228   return factor;
2229 }
2230 
2231 /* Perform the predictive commoning optimization for CHAINS.
2232    Uids of the newly created temporary variables are marked in TMP_VARS.  */
2233 
2234 static void
2235 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
2236 			bitmap tmp_vars)
2237 {
2238   chain_p chain;
2239   unsigned i;
2240 
2241   FOR_EACH_VEC_ELT (chains, i, chain)
2242     {
2243       if (chain->type == CT_INVARIANT)
2244 	execute_load_motion (loop, chain, tmp_vars);
2245       else
2246 	execute_pred_commoning_chain (loop, chain, tmp_vars);
2247     }
2248 
2249   FOR_EACH_VEC_ELT (chains, i, chain)
2250     {
2251       if (chain->type == CT_INVARIANT)
2252 	;
2253       else if (chain->combined)
2254 	{
2255 	  /* For combined chains, just remove the statements that are used to
2256 	     compute the values of the expression (except for the root one).  */
2257 	  dref a;
2258 	  unsigned j;
2259 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2260 	    remove_stmt (a->stmt);
2261 	}
2262     }
2263 
2264   update_ssa (TODO_update_ssa_only_virtuals);
2265 }
2266 
2267 /* For each reference in CHAINS, if its defining statement is
2268    phi node, record the ssa name that is defined by it.  */
2269 
2270 static void
2271 replace_phis_by_defined_names (vec<chain_p> chains)
2272 {
2273   chain_p chain;
2274   dref a;
2275   unsigned i, j;
2276 
2277   FOR_EACH_VEC_ELT (chains, i, chain)
2278     FOR_EACH_VEC_ELT (chain->refs, j, a)
2279       {
2280 	if (gimple_code (a->stmt) == GIMPLE_PHI)
2281 	  {
2282 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
2283 	    a->stmt = NULL;
2284 	  }
2285       }
2286 }
2287 
2288 /* For each reference in CHAINS, if name_defined_by_phi is not
2289    NULL, use it to set the stmt field.  */
2290 
2291 static void
2292 replace_names_by_phis (vec<chain_p> chains)
2293 {
2294   chain_p chain;
2295   dref a;
2296   unsigned i, j;
2297 
2298   FOR_EACH_VEC_ELT (chains, i, chain)
2299     FOR_EACH_VEC_ELT (chain->refs, j, a)
2300       if (a->stmt == NULL)
2301 	{
2302 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2303 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2304 	  a->name_defined_by_phi = NULL_TREE;
2305 	}
2306 }
2307 
2308 /* Wrapper over execute_pred_commoning, to pass it as a callback
2309    to tree_transform_and_unroll_loop.  */
2310 
2311 struct epcc_data
2312 {
2313   vec<chain_p> chains;
2314   bitmap tmp_vars;
2315 };
2316 
2317 static void
2318 execute_pred_commoning_cbck (struct loop *loop, void *data)
2319 {
2320   struct epcc_data *const dta = (struct epcc_data *) data;
2321 
2322   /* Restore phi nodes that were replaced by ssa names before
2323      tree_transform_and_unroll_loop (see detailed description in
2324      tree_predictive_commoning_loop).  */
2325   replace_names_by_phis (dta->chains);
2326   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2327 }
2328 
2329 /* Base NAME and all the names in the chain of phi nodes that use it
2330    on variable VAR.  The phi nodes are recognized by being in the copies of
2331    the header of the LOOP.  */
2332 
2333 static void
2334 base_names_in_chain_on (struct loop *loop, tree name, tree var)
2335 {
2336   gimple *stmt, *phi;
2337   imm_use_iterator iter;
2338 
2339   replace_ssa_name_symbol (name, var);
2340 
2341   while (1)
2342     {
2343       phi = NULL;
2344       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2345 	{
2346 	  if (gimple_code (stmt) == GIMPLE_PHI
2347 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2348 	    {
2349 	      phi = stmt;
2350 	      BREAK_FROM_IMM_USE_STMT (iter);
2351 	    }
2352 	}
2353       if (!phi)
2354 	return;
2355 
2356       name = PHI_RESULT (phi);
2357       replace_ssa_name_symbol (name, var);
2358     }
2359 }
2360 
2361 /* Given an unrolled LOOP after predictive commoning, remove the
2362    register copies arising from phi nodes by changing the base
2363    variables of SSA names.  TMP_VARS is the set of the temporary variables
2364    for those we want to perform this.  */
2365 
2366 static void
2367 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
2368 {
2369   edge e;
2370   gphi *phi;
2371   gimple *stmt;
2372   tree name, use, var;
2373   gphi_iterator psi;
2374 
2375   e = loop_latch_edge (loop);
2376   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2377     {
2378       phi = psi.phi ();
2379       name = PHI_RESULT (phi);
2380       var = SSA_NAME_VAR (name);
2381       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2382 	continue;
2383       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2384       gcc_assert (TREE_CODE (use) == SSA_NAME);
2385 
2386       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
2387       stmt = SSA_NAME_DEF_STMT (use);
2388       while (gimple_code (stmt) == GIMPLE_PHI
2389 	     /* In case we could not unroll the loop enough to eliminate
2390 		all copies, we may reach the loop header before the defining
2391 		statement (in that case, some register copies will be present
2392 		in loop latch in the final code, corresponding to the newly
2393 		created looparound phi nodes).  */
2394 	     && gimple_bb (stmt) != loop->header)
2395 	{
2396 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
2397 	  use = PHI_ARG_DEF (stmt, 0);
2398 	  stmt = SSA_NAME_DEF_STMT (use);
2399 	}
2400 
2401       base_names_in_chain_on (loop, use, var);
2402     }
2403 }
2404 
2405 /* Returns true if CHAIN is suitable to be combined.  */
2406 
2407 static bool
2408 chain_can_be_combined_p (chain_p chain)
2409 {
2410   return (!chain->combined
2411 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2412 }
2413 
2414 /* Returns the modify statement that uses NAME.  Skips over assignment
2415    statements, NAME is replaced with the actual name used in the returned
2416    statement.  */
2417 
2418 static gimple *
2419 find_use_stmt (tree *name)
2420 {
2421   gimple *stmt;
2422   tree rhs, lhs;
2423 
2424   /* Skip over assignments.  */
2425   while (1)
2426     {
2427       stmt = single_nonlooparound_use (*name);
2428       if (!stmt)
2429 	return NULL;
2430 
2431       if (gimple_code (stmt) != GIMPLE_ASSIGN)
2432 	return NULL;
2433 
2434       lhs = gimple_assign_lhs (stmt);
2435       if (TREE_CODE (lhs) != SSA_NAME)
2436 	return NULL;
2437 
2438       if (gimple_assign_copy_p (stmt))
2439 	{
2440 	  rhs = gimple_assign_rhs1 (stmt);
2441 	  if (rhs != *name)
2442 	    return NULL;
2443 
2444 	  *name = lhs;
2445 	}
2446       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2447 	       == GIMPLE_BINARY_RHS)
2448 	return stmt;
2449       else
2450 	return NULL;
2451     }
2452 }
2453 
2454 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2455 
2456 static bool
2457 may_reassociate_p (tree type, enum tree_code code)
2458 {
2459   if (FLOAT_TYPE_P (type)
2460       && !flag_unsafe_math_optimizations)
2461     return false;
2462 
2463   return (commutative_tree_code (code)
2464 	  && associative_tree_code (code));
2465 }
2466 
2467 /* If the operation used in STMT is associative and commutative, go through the
2468    tree of the same operations and returns its root.  Distance to the root
2469    is stored in DISTANCE.  */
2470 
2471 static gimple *
2472 find_associative_operation_root (gimple *stmt, unsigned *distance)
2473 {
2474   tree lhs;
2475   gimple *next;
2476   enum tree_code code = gimple_assign_rhs_code (stmt);
2477   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2478   unsigned dist = 0;
2479 
2480   if (!may_reassociate_p (type, code))
2481     return NULL;
2482 
2483   while (1)
2484     {
2485       lhs = gimple_assign_lhs (stmt);
2486       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2487 
2488       next = find_use_stmt (&lhs);
2489       if (!next
2490 	  || gimple_assign_rhs_code (next) != code)
2491 	break;
2492 
2493       stmt = next;
2494       dist++;
2495     }
2496 
2497   if (distance)
2498     *distance = dist;
2499   return stmt;
2500 }
2501 
2502 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2503    is no such statement, returns NULL_TREE.  In case the operation used on
2504    NAME1 and NAME2 is associative and commutative, returns the root of the
2505    tree formed by this operation instead of the statement that uses NAME1 or
2506    NAME2.  */
2507 
2508 static gimple *
2509 find_common_use_stmt (tree *name1, tree *name2)
2510 {
2511   gimple *stmt1, *stmt2;
2512 
2513   stmt1 = find_use_stmt (name1);
2514   if (!stmt1)
2515     return NULL;
2516 
2517   stmt2 = find_use_stmt (name2);
2518   if (!stmt2)
2519     return NULL;
2520 
2521   if (stmt1 == stmt2)
2522     return stmt1;
2523 
2524   stmt1 = find_associative_operation_root (stmt1, NULL);
2525   if (!stmt1)
2526     return NULL;
2527   stmt2 = find_associative_operation_root (stmt2, NULL);
2528   if (!stmt2)
2529     return NULL;
2530 
2531   return (stmt1 == stmt2 ? stmt1 : NULL);
2532 }
2533 
2534 /* Checks whether R1 and R2 are combined together using CODE, with the result
2535    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2536    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2537 
2538 static bool
2539 combinable_refs_p (dref r1, dref r2,
2540 		   enum tree_code *code, bool *swap, tree *rslt_type)
2541 {
2542   enum tree_code acode;
2543   bool aswap;
2544   tree atype;
2545   tree name1, name2;
2546   gimple *stmt;
2547 
2548   name1 = name_for_ref (r1);
2549   name2 = name_for_ref (r2);
2550   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2551 
2552   stmt = find_common_use_stmt (&name1, &name2);
2553 
2554   if (!stmt
2555       /* A simple post-dominance check - make sure the combination
2556          is executed under the same condition as the references.  */
2557       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2558 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2559     return false;
2560 
2561   acode = gimple_assign_rhs_code (stmt);
2562   aswap = (!commutative_tree_code (acode)
2563 	   && gimple_assign_rhs1 (stmt) != name1);
2564   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2565 
2566   if (*code == ERROR_MARK)
2567     {
2568       *code = acode;
2569       *swap = aswap;
2570       *rslt_type = atype;
2571       return true;
2572     }
2573 
2574   return (*code == acode
2575 	  && *swap == aswap
2576 	  && *rslt_type == atype);
2577 }
2578 
2579 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2580    an assignment of the remaining operand.  */
2581 
2582 static void
2583 remove_name_from_operation (gimple *stmt, tree op)
2584 {
2585   tree other_op;
2586   gimple_stmt_iterator si;
2587 
2588   gcc_assert (is_gimple_assign (stmt));
2589 
2590   if (gimple_assign_rhs1 (stmt) == op)
2591     other_op = gimple_assign_rhs2 (stmt);
2592   else
2593     other_op = gimple_assign_rhs1 (stmt);
2594 
2595   si = gsi_for_stmt (stmt);
2596   gimple_assign_set_rhs_from_tree (&si, other_op);
2597 
2598   /* We should not have reallocated STMT.  */
2599   gcc_assert (gsi_stmt (si) == stmt);
2600 
2601   update_stmt (stmt);
2602 }
2603 
2604 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2605    are combined in a single statement, and returns this statement.  */
2606 
2607 static gimple *
2608 reassociate_to_the_same_stmt (tree name1, tree name2)
2609 {
2610   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2611   gassign *new_stmt, *tmp_stmt;
2612   tree new_name, tmp_name, var, r1, r2;
2613   unsigned dist1, dist2;
2614   enum tree_code code;
2615   tree type = TREE_TYPE (name1);
2616   gimple_stmt_iterator bsi;
2617 
2618   stmt1 = find_use_stmt (&name1);
2619   stmt2 = find_use_stmt (&name2);
2620   root1 = find_associative_operation_root (stmt1, &dist1);
2621   root2 = find_associative_operation_root (stmt2, &dist2);
2622   code = gimple_assign_rhs_code (stmt1);
2623 
2624   gcc_assert (root1 && root2 && root1 == root2
2625 	      && code == gimple_assign_rhs_code (stmt2));
2626 
2627   /* Find the root of the nearest expression in that both NAME1 and NAME2
2628      are used.  */
2629   r1 = name1;
2630   s1 = stmt1;
2631   r2 = name2;
2632   s2 = stmt2;
2633 
2634   while (dist1 > dist2)
2635     {
2636       s1 = find_use_stmt (&r1);
2637       r1 = gimple_assign_lhs (s1);
2638       dist1--;
2639     }
2640   while (dist2 > dist1)
2641     {
2642       s2 = find_use_stmt (&r2);
2643       r2 = gimple_assign_lhs (s2);
2644       dist2--;
2645     }
2646 
2647   while (s1 != s2)
2648     {
2649       s1 = find_use_stmt (&r1);
2650       r1 = gimple_assign_lhs (s1);
2651       s2 = find_use_stmt (&r2);
2652       r2 = gimple_assign_lhs (s2);
2653     }
2654 
2655   /* Remove NAME1 and NAME2 from the statements in that they are used
2656      currently.  */
2657   remove_name_from_operation (stmt1, name1);
2658   remove_name_from_operation (stmt2, name2);
2659 
2660   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2661      combine it with the rhs of S1.  */
2662   var = create_tmp_reg (type, "predreastmp");
2663   new_name = make_ssa_name (var);
2664   new_stmt = gimple_build_assign (new_name, code, name1, name2);
2665 
2666   var = create_tmp_reg (type, "predreastmp");
2667   tmp_name = make_ssa_name (var);
2668 
2669   /* Rhs of S1 may now be either a binary expression with operation
2670      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2671      so that name1 or name2 was removed from it).  */
2672   tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2673 				  gimple_assign_rhs1 (s1),
2674 				  gimple_assign_rhs2 (s1));
2675 
2676   bsi = gsi_for_stmt (s1);
2677   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2678   s1 = gsi_stmt (bsi);
2679   update_stmt (s1);
2680 
2681   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2682   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2683 
2684   return new_stmt;
2685 }
2686 
2687 /* Returns the statement that combines references R1 and R2.  In case R1
2688    and R2 are not used in the same statement, but they are used with an
2689    associative and commutative operation in the same expression, reassociate
2690    the expression so that they are used in the same statement.  */
2691 
2692 static gimple *
2693 stmt_combining_refs (dref r1, dref r2)
2694 {
2695   gimple *stmt1, *stmt2;
2696   tree name1 = name_for_ref (r1);
2697   tree name2 = name_for_ref (r2);
2698 
2699   stmt1 = find_use_stmt (&name1);
2700   stmt2 = find_use_stmt (&name2);
2701   if (stmt1 == stmt2)
2702     return stmt1;
2703 
2704   return reassociate_to_the_same_stmt (name1, name2);
2705 }
2706 
2707 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2708    description of the new chain is returned, otherwise we return NULL.  */
2709 
2710 static chain_p
2711 combine_chains (chain_p ch1, chain_p ch2)
2712 {
2713   dref r1, r2, nw;
2714   enum tree_code op = ERROR_MARK;
2715   bool swap = false;
2716   chain_p new_chain;
2717   unsigned i;
2718   tree rslt_type = NULL_TREE;
2719 
2720   if (ch1 == ch2)
2721     return NULL;
2722   if (ch1->length != ch2->length)
2723     return NULL;
2724 
2725   if (ch1->refs.length () != ch2->refs.length ())
2726     return NULL;
2727 
2728   for (i = 0; (ch1->refs.iterate (i, &r1)
2729 	       && ch2->refs.iterate (i, &r2)); i++)
2730     {
2731       if (r1->distance != r2->distance)
2732 	return NULL;
2733 
2734       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2735 	return NULL;
2736     }
2737 
2738   if (swap)
2739     std::swap (ch1, ch2);
2740 
2741   new_chain = XCNEW (struct chain);
2742   new_chain->type = CT_COMBINATION;
2743   new_chain->op = op;
2744   new_chain->ch1 = ch1;
2745   new_chain->ch2 = ch2;
2746   new_chain->rslt_type = rslt_type;
2747   new_chain->length = ch1->length;
2748 
2749   for (i = 0; (ch1->refs.iterate (i, &r1)
2750 	       && ch2->refs.iterate (i, &r2)); i++)
2751     {
2752       nw = XCNEW (struct dref_d);
2753       nw->stmt = stmt_combining_refs (r1, r2);
2754       nw->distance = r1->distance;
2755 
2756       new_chain->refs.safe_push (nw);
2757     }
2758 
2759   ch1->combined = true;
2760   ch2->combined = true;
2761   return new_chain;
2762 }
2763 
2764 /* Recursively update position information of all offspring chains to ROOT
2765    chain's position information.  */
2766 
2767 static void
2768 update_pos_for_combined_chains (chain_p root)
2769 {
2770   chain_p ch1 = root->ch1, ch2 = root->ch2;
2771   dref ref, ref1, ref2;
2772   for (unsigned j = 0; (root->refs.iterate (j, &ref)
2773 			&& ch1->refs.iterate (j, &ref1)
2774 			&& ch2->refs.iterate (j, &ref2)); ++j)
2775     ref1->pos = ref2->pos = ref->pos;
2776 
2777   if (ch1->type == CT_COMBINATION)
2778     update_pos_for_combined_chains (ch1);
2779   if (ch2->type == CT_COMBINATION)
2780     update_pos_for_combined_chains (ch2);
2781 }
2782 
2783 /* Returns true if statement S1 dominates statement S2.  */
2784 
2785 static bool
2786 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2787 {
2788   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2789 
2790   if (!bb1 || s1 == s2)
2791     return true;
2792 
2793   if (bb1 == bb2)
2794     return gimple_uid (s1) < gimple_uid (s2);
2795 
2796   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2797 }
2798 
2799 /* Try to combine the CHAINS in LOOP.  */
2800 
2801 static void
2802 try_combine_chains (struct loop *loop, vec<chain_p> *chains)
2803 {
2804   unsigned i, j;
2805   chain_p ch1, ch2, cch;
2806   auto_vec<chain_p> worklist;
2807   bool combined_p = false;
2808 
2809   FOR_EACH_VEC_ELT (*chains, i, ch1)
2810     if (chain_can_be_combined_p (ch1))
2811       worklist.safe_push (ch1);
2812 
2813   while (!worklist.is_empty ())
2814     {
2815       ch1 = worklist.pop ();
2816       if (!chain_can_be_combined_p (ch1))
2817 	continue;
2818 
2819       FOR_EACH_VEC_ELT (*chains, j, ch2)
2820 	{
2821 	  if (!chain_can_be_combined_p (ch2))
2822 	    continue;
2823 
2824 	  cch = combine_chains (ch1, ch2);
2825 	  if (cch)
2826 	    {
2827 	      worklist.safe_push (cch);
2828 	      chains->safe_push (cch);
2829 	      combined_p = true;
2830 	      break;
2831 	    }
2832 	}
2833     }
2834   if (!combined_p)
2835     return;
2836 
2837   /* Setup UID for all statements in dominance order.  */
2838   basic_block *bbs = get_loop_body_in_dom_order (loop);
2839   renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2840   free (bbs);
2841 
2842   /* Re-association in combined chains may generate statements different to
2843      order of references of the original chain.  We need to keep references
2844      of combined chain in dominance order so that all uses will be inserted
2845      after definitions.  Note:
2846        A) This is necessary for all combined chains.
2847        B) This is only necessary for ZERO distance references because other
2848 	  references inherit value from loop carried PHIs.
2849 
2850      We first update position information for all combined chains.  */
2851   dref ref;
2852   for (i = 0; chains->iterate (i, &ch1); ++i)
2853     {
2854       if (ch1->type != CT_COMBINATION || ch1->combined)
2855 	continue;
2856 
2857       for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2858 	ref->pos = gimple_uid (ref->stmt);
2859 
2860       update_pos_for_combined_chains (ch1);
2861     }
2862   /* Then sort references according to newly updated position information.  */
2863   for (i = 0; chains->iterate (i, &ch1); ++i)
2864     {
2865       if (ch1->type != CT_COMBINATION && !ch1->combined)
2866 	continue;
2867 
2868       /* Find the first reference with non-ZERO distance.  */
2869       if (ch1->length == 0)
2870 	j = ch1->refs.length();
2871       else
2872 	{
2873 	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2874 	    if (ref->distance != 0)
2875 	      break;
2876 	}
2877 
2878       /* Sort all ZERO distance references by position.  */
2879       qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2880 
2881       if (ch1->combined)
2882 	continue;
2883 
2884       /* For ZERO length chain, has_max_use_after must be true since root
2885 	 combined stmt must dominates others.  */
2886       if (ch1->length == 0)
2887 	{
2888 	  ch1->has_max_use_after = true;
2889 	  continue;
2890 	}
2891       /* Check if there is use at max distance after root for combined chains
2892 	 and set flag accordingly.  */
2893       ch1->has_max_use_after = false;
2894       gimple *root_stmt = get_chain_root (ch1)->stmt;
2895       for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2896 	{
2897 	  if (ref->distance == ch1->length
2898 	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2899 	    {
2900 	      ch1->has_max_use_after = true;
2901 	      break;
2902 	    }
2903 	}
2904     }
2905 }
2906 
2907 /* Prepare initializers for store elimination CHAIN in LOOP.  Returns false
2908    if this is impossible because one of these initializers may trap, true
2909    otherwise.  */
2910 
2911 static bool
2912 prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
2913 {
2914   unsigned i, n = chain->length;
2915 
2916   /* For now we can't eliminate stores if some of them are conditional
2917      executed.  */
2918   if (!chain->all_always_accessed)
2919     return false;
2920 
2921   /* Nothing to intialize for intra-iteration store elimination.  */
2922   if (n == 0 && chain->type == CT_STORE_STORE)
2923     return true;
2924 
2925   /* For store elimination chain, there is nothing to initialize if stores
2926      to be eliminated only store loop invariant values into memory.  */
2927   if (chain->type == CT_STORE_STORE
2928       && is_inv_store_elimination_chain (loop, chain))
2929     {
2930       chain->inv_store_elimination = true;
2931       return true;
2932     }
2933 
2934   chain->inits.create (n);
2935   chain->inits.safe_grow_cleared (n);
2936 
2937   /* For store eliminatin chain like below:
2938 
2939      for (i = 0; i < len; i++)
2940        {
2941 	 a[i] = 1;
2942 	 // a[i + 1] = ...
2943 	 a[i + 2] = 3;
2944        }
2945 
2946      store to a[i + 1] is missed in loop body, it acts like bubbles.  The
2947      content of a[i + 1] remain the same if the loop iterates fewer times
2948      than chain->length.  We need to set up root variables for such stores
2949      by loading from memory before loop.  Note we only need to load bubble
2950      elements because loop body is guaranteed to be executed at least once
2951      after loop's preheader edge.  */
2952   auto_vec<bool> bubbles;
2953   bubbles.safe_grow_cleared (n + 1);
2954   for (i = 0; i < chain->refs.length (); i++)
2955     bubbles[chain->refs[i]->distance] = true;
2956 
2957   struct data_reference *dr = get_chain_root (chain)->ref;
2958   for (i = 0; i < n; i++)
2959     {
2960       if (bubbles[i])
2961 	continue;
2962 
2963       gimple_seq stmts = NULL;
2964 
2965       tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2966       if (stmts)
2967 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2968 
2969       chain->inits[i] = init;
2970     }
2971 
2972   return true;
2973 }
2974 
2975 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2976    impossible because one of these initializers may trap, true otherwise.  */
2977 
2978 static bool
2979 prepare_initializers_chain (struct loop *loop, chain_p chain)
2980 {
2981   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2982   struct data_reference *dr = get_chain_root (chain)->ref;
2983   tree init;
2984   dref laref;
2985   edge entry = loop_preheader_edge (loop);
2986 
2987   if (chain->type == CT_STORE_STORE)
2988     return prepare_initializers_chain_store_elim (loop, chain);
2989 
2990   /* Find the initializers for the variables, and check that they cannot
2991      trap.  */
2992   chain->inits.create (n);
2993   for (i = 0; i < n; i++)
2994     chain->inits.quick_push (NULL_TREE);
2995 
2996   /* If we have replaced some looparound phi nodes, use their initializers
2997      instead of creating our own.  */
2998   FOR_EACH_VEC_ELT (chain->refs, i, laref)
2999     {
3000       if (gimple_code (laref->stmt) != GIMPLE_PHI)
3001 	continue;
3002 
3003       gcc_assert (laref->distance > 0);
3004       chain->inits[n - laref->distance]
3005 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3006     }
3007 
3008   for (i = 0; i < n; i++)
3009     {
3010       gimple_seq stmts = NULL;
3011 
3012       if (chain->inits[i] != NULL_TREE)
3013 	continue;
3014 
3015       init = ref_at_iteration (dr, (int) i - n, &stmts);
3016       if (!chain->all_always_accessed && tree_could_trap_p (init))
3017 	{
3018 	  gimple_seq_discard (stmts);
3019 	  return false;
3020 	}
3021 
3022       if (stmts)
3023 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3024 
3025       chain->inits[i] = init;
3026     }
3027 
3028   return true;
3029 }
3030 
3031 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3032    be used because the initializers might trap.  */
3033 
3034 static void
3035 prepare_initializers (struct loop *loop, vec<chain_p> chains)
3036 {
3037   chain_p chain;
3038   unsigned i;
3039 
3040   for (i = 0; i < chains.length (); )
3041     {
3042       chain = chains[i];
3043       if (prepare_initializers_chain (loop, chain))
3044 	i++;
3045       else
3046 	{
3047 	  release_chain (chain);
3048 	  chains.unordered_remove (i);
3049 	}
3050     }
3051 }
3052 
3053 /* Generates finalizer memory references for CHAIN in LOOP.  Returns true
3054    if finalizer code for CHAIN can be generated, otherwise false.  */
3055 
3056 static bool
3057 prepare_finalizers_chain (struct loop *loop, chain_p chain)
3058 {
3059   unsigned i, n = chain->length;
3060   struct data_reference *dr = get_chain_root (chain)->ref;
3061   tree fini, niters = number_of_latch_executions (loop);
3062 
3063   /* For now we can't eliminate stores if some of them are conditional
3064      executed.  */
3065   if (!chain->all_always_accessed)
3066     return false;
3067 
3068   chain->finis.create (n);
3069   for (i = 0; i < n; i++)
3070     chain->finis.quick_push (NULL_TREE);
3071 
3072   /* We never use looparound phi node for store elimination chains.  */
3073 
3074   /* Find the finalizers for the variables, and check that they cannot
3075      trap.  */
3076   for (i = 0; i < n; i++)
3077     {
3078       gimple_seq stmts = NULL;
3079       gcc_assert (chain->finis[i] == NULL_TREE);
3080 
3081       if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3082 	{
3083 	  niters = unshare_expr (niters);
3084 	  niters = force_gimple_operand (niters, &stmts, true, NULL);
3085 	  if (stmts)
3086 	    {
3087 	      gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3088 	      stmts = NULL;
3089 	    }
3090 	}
3091       fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3092       if (stmts)
3093 	gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3094 
3095       chain->finis[i] = fini;
3096     }
3097 
3098   return true;
3099 }
3100 
3101 /* Generates finalizer memory reference for CHAINS in LOOP.  Returns true
3102    if finalizer code generation for CHAINS breaks loop closed ssa form.  */
3103 
3104 static bool
3105 prepare_finalizers (struct loop *loop, vec<chain_p> chains)
3106 {
3107   chain_p chain;
3108   unsigned i;
3109   bool loop_closed_ssa = false;
3110 
3111   for (i = 0; i < chains.length ();)
3112     {
3113       chain = chains[i];
3114 
3115       /* Finalizer is only necessary for inter-iteration store elimination
3116 	 chains.  */
3117       if (chain->length == 0 || chain->type != CT_STORE_STORE)
3118 	{
3119 	  i++;
3120 	  continue;
3121 	}
3122 
3123       if (prepare_finalizers_chain (loop, chain))
3124 	{
3125 	  i++;
3126 	  /* Be conservative, assume loop closed ssa form is corrupted
3127 	     by store-store chain.  Though it's not always the case if
3128 	     eliminated stores only store loop invariant values into
3129 	     memory.  */
3130 	  loop_closed_ssa = true;
3131 	}
3132       else
3133 	{
3134 	  release_chain (chain);
3135 	  chains.unordered_remove (i);
3136 	}
3137     }
3138   return loop_closed_ssa;
3139 }
3140 
3141 /* Insert all initializing gimple stmts into loop's entry edge.  */
3142 
3143 static void
3144 insert_init_seqs (struct loop *loop, vec<chain_p> chains)
3145 {
3146   unsigned i;
3147   edge entry = loop_preheader_edge (loop);
3148 
3149   for (i = 0; i < chains.length (); ++i)
3150     if (chains[i]->init_seq)
3151       {
3152 	gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3153 	chains[i]->init_seq = NULL;
3154       }
3155 }
3156 
3157 /* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
3158    if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3159    form was corrupted.  */
3160 
3161 static unsigned
3162 tree_predictive_commoning_loop (struct loop *loop)
3163 {
3164   vec<data_reference_p> datarefs;
3165   vec<ddr_p> dependences;
3166   struct component *components;
3167   vec<chain_p> chains = vNULL;
3168   unsigned unroll_factor;
3169   struct tree_niter_desc desc;
3170   bool unroll = false, loop_closed_ssa = false;
3171   edge exit;
3172 
3173   if (dump_file && (dump_flags & TDF_DETAILS))
3174     fprintf (dump_file, "Processing loop %d\n",  loop->num);
3175 
3176   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
3177   if (get_max_loop_iterations_int (loop) == 0)
3178     {
3179       if (dump_file && (dump_flags & TDF_DETAILS))
3180 	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3181 
3182       return 0;
3183     }
3184 
3185   /* Find the data references and split them into components according to their
3186      dependence relations.  */
3187   auto_vec<loop_p, 3> loop_nest;
3188   dependences.create (10);
3189   datarefs.create (10);
3190   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3191 					   &dependences))
3192     {
3193       if (dump_file && (dump_flags & TDF_DETAILS))
3194 	fprintf (dump_file, "Cannot analyze data dependencies\n");
3195       free_data_refs (datarefs);
3196       free_dependence_relations (dependences);
3197       return 0;
3198     }
3199 
3200   if (dump_file && (dump_flags & TDF_DETAILS))
3201     dump_data_dependence_relations (dump_file, dependences);
3202 
3203   components = split_data_refs_to_components (loop, datarefs, dependences);
3204   loop_nest.release ();
3205   free_dependence_relations (dependences);
3206   if (!components)
3207     {
3208       free_data_refs (datarefs);
3209       free_affine_expand_cache (&name_expansions);
3210       return 0;
3211     }
3212 
3213   if (dump_file && (dump_flags & TDF_DETAILS))
3214     {
3215       fprintf (dump_file, "Initial state:\n\n");
3216       dump_components (dump_file, components);
3217     }
3218 
3219   /* Find the suitable components and split them into chains.  */
3220   components = filter_suitable_components (loop, components);
3221 
3222   auto_bitmap tmp_vars;
3223   looparound_phis = BITMAP_ALLOC (NULL);
3224   determine_roots (loop, components, &chains);
3225   release_components (components);
3226 
3227   if (!chains.exists ())
3228     {
3229       if (dump_file && (dump_flags & TDF_DETAILS))
3230 	fprintf (dump_file,
3231 		 "Predictive commoning failed: no suitable chains\n");
3232       goto end;
3233     }
3234   prepare_initializers (loop, chains);
3235   loop_closed_ssa = prepare_finalizers (loop, chains);
3236 
3237   /* Try to combine the chains that are always worked with together.  */
3238   try_combine_chains (loop, &chains);
3239 
3240   insert_init_seqs (loop, chains);
3241 
3242   if (dump_file && (dump_flags & TDF_DETAILS))
3243     {
3244       fprintf (dump_file, "Before commoning:\n\n");
3245       dump_chains (dump_file, chains);
3246     }
3247 
3248   /* Determine the unroll factor, and if the loop should be unrolled, ensure
3249      that its number of iterations is divisible by the factor.  */
3250   unroll_factor = determine_unroll_factor (chains);
3251   scev_reset ();
3252   unroll = (unroll_factor > 1
3253 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
3254   exit = single_dom_exit (loop);
3255 
3256   /* Execute the predictive commoning transformations, and possibly unroll the
3257      loop.  */
3258   if (unroll)
3259     {
3260       struct epcc_data dta;
3261 
3262       if (dump_file && (dump_flags & TDF_DETAILS))
3263 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3264 
3265       dta.chains = chains;
3266       dta.tmp_vars = tmp_vars;
3267 
3268       update_ssa (TODO_update_ssa_only_virtuals);
3269 
3270       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3271 	 execute_pred_commoning_cbck is called may cause phi nodes to be
3272 	 reallocated, which is a problem since CHAINS may point to these
3273 	 statements.  To fix this, we store the ssa names defined by the
3274 	 phi nodes here instead of the phi nodes themselves, and restore
3275 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
3276       replace_phis_by_defined_names (chains);
3277 
3278       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3279 				      execute_pred_commoning_cbck, &dta);
3280       eliminate_temp_copies (loop, tmp_vars);
3281     }
3282   else
3283     {
3284       if (dump_file && (dump_flags & TDF_DETAILS))
3285 	fprintf (dump_file,
3286 		 "Executing predictive commoning without unrolling.\n");
3287       execute_pred_commoning (loop, chains, tmp_vars);
3288     }
3289 
3290 end: ;
3291   release_chains (chains);
3292   free_data_refs (datarefs);
3293   BITMAP_FREE (looparound_phis);
3294 
3295   free_affine_expand_cache (&name_expansions);
3296 
3297   return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3298 }
3299 
3300 /* Runs predictive commoning.  */
3301 
3302 unsigned
3303 tree_predictive_commoning (void)
3304 {
3305   struct loop *loop;
3306   unsigned ret = 0, changed = 0;
3307 
3308   initialize_original_copy_tables ();
3309   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3310     if (optimize_loop_for_speed_p (loop))
3311       {
3312 	changed |= tree_predictive_commoning_loop (loop);
3313       }
3314   free_original_copy_tables ();
3315 
3316   if (changed > 0)
3317     {
3318       scev_reset ();
3319 
3320       if (changed > 1)
3321 	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3322 
3323       ret = TODO_cleanup_cfg;
3324     }
3325 
3326   return ret;
3327 }
3328 
3329 /* Predictive commoning Pass.  */
3330 
3331 static unsigned
3332 run_tree_predictive_commoning (struct function *fun)
3333 {
3334   if (number_of_loops (fun) <= 1)
3335     return 0;
3336 
3337   return tree_predictive_commoning ();
3338 }
3339 
3340 namespace {
3341 
3342 const pass_data pass_data_predcom =
3343 {
3344   GIMPLE_PASS, /* type */
3345   "pcom", /* name */
3346   OPTGROUP_LOOP, /* optinfo_flags */
3347   TV_PREDCOM, /* tv_id */
3348   PROP_cfg, /* properties_required */
3349   0, /* properties_provided */
3350   0, /* properties_destroyed */
3351   0, /* todo_flags_start */
3352   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3353 };
3354 
3355 class pass_predcom : public gimple_opt_pass
3356 {
3357 public:
3358   pass_predcom (gcc::context *ctxt)
3359     : gimple_opt_pass (pass_data_predcom, ctxt)
3360   {}
3361 
3362   /* opt_pass methods: */
3363   virtual bool gate (function *) { return flag_predictive_commoning != 0; }
3364   virtual unsigned int execute (function *fun)
3365     {
3366       return run_tree_predictive_commoning (fun);
3367     }
3368 
3369 }; // class pass_predcom
3370 
3371 } // anon namespace
3372 
3373 gimple_opt_pass *
3374 make_pass_predcom (gcc::context *ctxt)
3375 {
3376   return new pass_predcom (ctxt);
3377 }
3378 
3379 
3380