1 /* Predictive commoning.
2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    Apart from predictive commoning on Load-Load and Store-Load chains, we
160    also support Store-Store chains -- stores killed by other store can be
161    eliminated.  Given below example:
162 
163      for (i = 0; i < n; i++)
164        {
165 	 a[i] = 1;
166 	 a[i+2] = 2;
167        }
168 
169    It can be replaced with:
170 
171      t0 = a[0];
172      t1 = a[1];
173      for (i = 0; i < n; i++)
174        {
175 	 a[i] = 1;
176 	 t2 = 2;
177 	 t0 = t1;
178 	 t1 = t2;
179        }
180      a[n] = t0;
181      a[n+1] = t1;
182 
183    If the loop runs more than 1 iterations, it can be further simplified into:
184 
185      for (i = 0; i < n; i++)
186        {
187 	 a[i] = 1;
188        }
189      a[n] = 2;
190      a[n+1] = 2;
191 
192    The interesting part is this can be viewed either as general store motion
193    or general dead store elimination in either intra/inter-iterations way.
194 
195    With trivial effort, we also support load inside Store-Store chains if the
196    load is dominated by a store statement in the same iteration of loop.  You
197    can see this as a restricted Store-Mixed-Load-Store chain.
198 
199    TODO: For now, we don't support store-store chains in multi-exit loops.  We
200    force to not unroll in case of store-store chain even if other chains might
201    ask for unroll.
202 
203    Predictive commoning can be generalized for arbitrary computations (not
204    just memory loads), and also nontrivial transfer functions (e.g., replacing
205    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
206 
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "tree-affine.h"
235 #include "builtins.h"
236 
237 /* The maximum number of iterations between the considered memory
238    references.  */
239 
240 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
241 
242 /* Data references (or phi nodes that carry data reference values across
243    loop iterations).  */
244 
245 typedef class dref_d
246 {
247 public:
248   /* The reference itself.  */
249   struct data_reference *ref;
250 
251   /* The statement in that the reference appears.  */
252   gimple *stmt;
253 
254   /* In case that STMT is a phi node, this field is set to the SSA name
255      defined by it in replace_phis_by_defined_names (in order to avoid
256      pointing to phi node that got reallocated in the meantime).  */
257   tree name_defined_by_phi;
258 
259   /* Distance of the reference from the root of the chain (in number of
260      iterations of the loop).  */
261   unsigned distance;
262 
263   /* Number of iterations offset from the first reference in the component.  */
264   widest_int offset;
265 
266   /* Number of the reference in a component, in dominance ordering.  */
267   unsigned pos;
268 
269   /* True if the memory reference is always accessed when the loop is
270      entered.  */
271   unsigned always_accessed : 1;
272 } *dref;
273 
274 
275 /* Type of the chain of the references.  */
276 
277 enum chain_type
278 {
279   /* The addresses of the references in the chain are constant.  */
280   CT_INVARIANT,
281 
282   /* There are only loads in the chain.  */
283   CT_LOAD,
284 
285   /* Root of the chain is store, the rest are loads.  */
286   CT_STORE_LOAD,
287 
288   /* There are only stores in the chain.  */
289   CT_STORE_STORE,
290 
291   /* A combination of two chains.  */
292   CT_COMBINATION
293 };
294 
295 /* Chains of data references.  */
296 
297 typedef struct chain
298 {
299   /* Type of the chain.  */
300   enum chain_type type;
301 
302   /* For combination chains, the operator and the two chains that are
303      combined, and the type of the result.  */
304   enum tree_code op;
305   tree rslt_type;
306   struct chain *ch1, *ch2;
307 
308   /* The references in the chain.  */
309   vec<dref> refs;
310 
311   /* The maximum distance of the reference in the chain from the root.  */
312   unsigned length;
313 
314   /* The variables used to copy the value throughout iterations.  */
315   vec<tree> vars;
316 
317   /* Initializers for the variables.  */
318   vec<tree> inits;
319 
320   /* Finalizers for the eliminated stores.  */
321   vec<tree> finis;
322 
323   /* gimple stmts intializing the initial variables of the chain.  */
324   gimple_seq init_seq;
325 
326   /* gimple stmts finalizing the eliminated stores of the chain.  */
327   gimple_seq fini_seq;
328 
329   /* True if there is a use of a variable with the maximal distance
330      that comes after the root in the loop.  */
331   unsigned has_max_use_after : 1;
332 
333   /* True if all the memory references in the chain are always accessed.  */
334   unsigned all_always_accessed : 1;
335 
336   /* True if this chain was combined together with some other chain.  */
337   unsigned combined : 1;
338 
339   /* True if this is store elimination chain and eliminated stores store
340      loop invariant value into memory.  */
341   unsigned inv_store_elimination : 1;
342 } *chain_p;
343 
344 
345 /* Describes the knowledge about the step of the memory references in
346    the component.  */
347 
348 enum ref_step_type
349 {
350   /* The step is zero.  */
351   RS_INVARIANT,
352 
353   /* The step is nonzero.  */
354   RS_NONZERO,
355 
356   /* The step may or may not be nonzero.  */
357   RS_ANY
358 };
359 
360 /* Components of the data dependence graph.  */
361 
362 struct component
363 {
364   /* The references in the component.  */
365   vec<dref> refs;
366 
367   /* What we know about the step of the references in the component.  */
368   enum ref_step_type comp_step;
369 
370   /* True if all references in component are stores and we try to do
371      intra/inter loop iteration dead store elimination.  */
372   bool eliminate_store_p;
373 
374   /* Next component in the list.  */
375   struct component *next;
376 };
377 
378 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
379 
380 static bitmap looparound_phis;
381 
382 /* Cache used by tree_to_aff_combination_expand.  */
383 
384 static hash_map<tree, name_expansion *> *name_expansions;
385 
386 /* Dumps data reference REF to FILE.  */
387 
388 extern void dump_dref (FILE *, dref);
389 void
dump_dref(FILE * file,dref ref)390 dump_dref (FILE *file, dref ref)
391 {
392   if (ref->ref)
393     {
394       fprintf (file, "    ");
395       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396       fprintf (file, " (id %u%s)\n", ref->pos,
397 	       DR_IS_READ (ref->ref) ? "" : ", write");
398 
399       fprintf (file, "      offset ");
400       print_decs (ref->offset, file);
401       fprintf (file, "\n");
402 
403       fprintf (file, "      distance %u\n", ref->distance);
404     }
405   else
406     {
407       if (gimple_code (ref->stmt) == GIMPLE_PHI)
408 	fprintf (file, "    looparound ref\n");
409       else
410 	fprintf (file, "    combination ref\n");
411       fprintf (file, "      in statement ");
412       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
413       fprintf (file, "\n");
414       fprintf (file, "      distance %u\n", ref->distance);
415     }
416 
417 }
418 
419 /* Dumps CHAIN to FILE.  */
420 
421 extern void dump_chain (FILE *, chain_p);
422 void
dump_chain(FILE * file,chain_p chain)423 dump_chain (FILE *file, chain_p chain)
424 {
425   dref a;
426   const char *chain_type;
427   unsigned i;
428   tree var;
429 
430   switch (chain->type)
431     {
432     case CT_INVARIANT:
433       chain_type = "Load motion";
434       break;
435 
436     case CT_LOAD:
437       chain_type = "Loads-only";
438       break;
439 
440     case CT_STORE_LOAD:
441       chain_type = "Store-loads";
442       break;
443 
444     case CT_STORE_STORE:
445       chain_type = "Store-stores";
446       break;
447 
448     case CT_COMBINATION:
449       chain_type = "Combination";
450       break;
451 
452     default:
453       gcc_unreachable ();
454     }
455 
456   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 	   chain->combined ? " (combined)" : "");
458   if (chain->type != CT_INVARIANT)
459     fprintf (file, "  max distance %u%s\n", chain->length,
460 	     chain->has_max_use_after ? "" : ", may reuse first");
461 
462   if (chain->type == CT_COMBINATION)
463     {
464       fprintf (file, "  equal to %p %s %p in type ",
465 	       (void *) chain->ch1, op_symbol_code (chain->op),
466 	       (void *) chain->ch2);
467       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468       fprintf (file, "\n");
469     }
470 
471   if (chain->vars.exists ())
472     {
473       fprintf (file, "  vars");
474       FOR_EACH_VEC_ELT (chain->vars, i, var)
475 	{
476 	  fprintf (file, " ");
477 	  print_generic_expr (file, var, TDF_SLIM);
478 	}
479       fprintf (file, "\n");
480     }
481 
482   if (chain->inits.exists ())
483     {
484       fprintf (file, "  inits");
485       FOR_EACH_VEC_ELT (chain->inits, i, var)
486 	{
487 	  fprintf (file, " ");
488 	  print_generic_expr (file, var, TDF_SLIM);
489 	}
490       fprintf (file, "\n");
491     }
492 
493   fprintf (file, "  references:\n");
494   FOR_EACH_VEC_ELT (chain->refs, i, a)
495     dump_dref (file, a);
496 
497   fprintf (file, "\n");
498 }
499 
500 /* Dumps CHAINS to FILE.  */
501 
502 extern void dump_chains (FILE *, vec<chain_p> );
503 void
dump_chains(FILE * file,vec<chain_p> chains)504 dump_chains (FILE *file, vec<chain_p> chains)
505 {
506   chain_p chain;
507   unsigned i;
508 
509   FOR_EACH_VEC_ELT (chains, i, chain)
510     dump_chain (file, chain);
511 }
512 
513 /* Dumps COMP to FILE.  */
514 
515 extern void dump_component (FILE *, struct component *);
516 void
dump_component(FILE * file,struct component * comp)517 dump_component (FILE *file, struct component *comp)
518 {
519   dref a;
520   unsigned i;
521 
522   fprintf (file, "Component%s:\n",
523 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
524   FOR_EACH_VEC_ELT (comp->refs, i, a)
525     dump_dref (file, a);
526   fprintf (file, "\n");
527 }
528 
529 /* Dumps COMPS to FILE.  */
530 
531 extern void dump_components (FILE *, struct component *);
532 void
dump_components(FILE * file,struct component * comps)533 dump_components (FILE *file, struct component *comps)
534 {
535   struct component *comp;
536 
537   for (comp = comps; comp; comp = comp->next)
538     dump_component (file, comp);
539 }
540 
541 /* Frees a chain CHAIN.  */
542 
543 static void
release_chain(chain_p chain)544 release_chain (chain_p chain)
545 {
546   dref ref;
547   unsigned i;
548 
549   if (chain == NULL)
550     return;
551 
552   FOR_EACH_VEC_ELT (chain->refs, i, ref)
553     free (ref);
554 
555   chain->refs.release ();
556   chain->vars.release ();
557   chain->inits.release ();
558   if (chain->init_seq)
559     gimple_seq_discard (chain->init_seq);
560 
561   chain->finis.release ();
562   if (chain->fini_seq)
563     gimple_seq_discard (chain->fini_seq);
564 
565   free (chain);
566 }
567 
568 /* Frees CHAINS.  */
569 
570 static void
release_chains(vec<chain_p> chains)571 release_chains (vec<chain_p> chains)
572 {
573   unsigned i;
574   chain_p chain;
575 
576   FOR_EACH_VEC_ELT (chains, i, chain)
577     release_chain (chain);
578   chains.release ();
579 }
580 
581 /* Frees a component COMP.  */
582 
583 static void
release_component(struct component * comp)584 release_component (struct component *comp)
585 {
586   comp->refs.release ();
587   free (comp);
588 }
589 
590 /* Frees list of components COMPS.  */
591 
592 static void
release_components(struct component * comps)593 release_components (struct component *comps)
594 {
595   struct component *act, *next;
596 
597   for (act = comps; act; act = next)
598     {
599       next = act->next;
600       release_component (act);
601     }
602 }
603 
604 /* Finds a root of tree given by FATHERS containing A, and performs path
605    shortening.  */
606 
607 static unsigned
component_of(unsigned fathers[],unsigned a)608 component_of (unsigned fathers[], unsigned a)
609 {
610   unsigned root, n;
611 
612   for (root = a; root != fathers[root]; root = fathers[root])
613     continue;
614 
615   for (; a != root; a = n)
616     {
617       n = fathers[a];
618       fathers[a] = root;
619     }
620 
621   return root;
622 }
623 
624 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
625    components, A and B are components to merge.  */
626 
627 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)628 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629 {
630   unsigned ca = component_of (fathers, a);
631   unsigned cb = component_of (fathers, b);
632 
633   if (ca == cb)
634     return;
635 
636   if (sizes[ca] < sizes[cb])
637     {
638       sizes[cb] += sizes[ca];
639       fathers[ca] = cb;
640     }
641   else
642     {
643       sizes[ca] += sizes[cb];
644       fathers[cb] = ca;
645     }
646 }
647 
648 /* Returns true if A is a reference that is suitable for predictive commoning
649    in the innermost loop that contains it.  REF_STEP is set according to the
650    step of the reference A.  */
651 
652 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)653 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654 {
655   tree ref = DR_REF (a), step = DR_STEP (a);
656 
657   if (!step
658       || TREE_THIS_VOLATILE (ref)
659       || !is_gimple_reg_type (TREE_TYPE (ref))
660       || tree_could_throw_p (ref))
661     return false;
662 
663   if (integer_zerop (step))
664     *ref_step = RS_INVARIANT;
665   else if (integer_nonzerop (step))
666     *ref_step = RS_NONZERO;
667   else
668     *ref_step = RS_ANY;
669 
670   return true;
671 }
672 
673 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
674 
675 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)676 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677 {
678   tree type = TREE_TYPE (DR_OFFSET (dr));
679   aff_tree delta;
680 
681   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
682 				  &name_expansions);
683   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
684   aff_combination_add (offset, &delta);
685 }
686 
687 /* Determines number of iterations of the innermost enclosing loop before B
688    refers to exactly the same location as A and stores it to OFF.  If A and
689    B do not have the same step, they never meet, or anything else fails,
690    returns false, otherwise returns true.  Both A and B are assumed to
691    satisfy suitable_reference_p.  */
692 
693 static bool
determine_offset(struct data_reference * a,struct data_reference * b,poly_widest_int * off)694 determine_offset (struct data_reference *a, struct data_reference *b,
695 		  poly_widest_int *off)
696 {
697   aff_tree diff, baseb, step;
698   tree typea, typeb;
699 
700   /* Check that both the references access the location in the same type.  */
701   typea = TREE_TYPE (DR_REF (a));
702   typeb = TREE_TYPE (DR_REF (b));
703   if (!useless_type_conversion_p (typeb, typea))
704     return false;
705 
706   /* Check whether the base address and the step of both references is the
707      same.  */
708   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710     return false;
711 
712   if (integer_zerop (DR_STEP (a)))
713     {
714       /* If the references have loop invariant address, check that they access
715 	 exactly the same location.  */
716       *off = 0;
717       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719     }
720 
721   /* Compare the offsets of the addresses, and check whether the difference
722      is a multiple of step.  */
723   aff_combination_dr_offset (a, &diff);
724   aff_combination_dr_offset (b, &baseb);
725   aff_combination_scale (&baseb, -1);
726   aff_combination_add (&diff, &baseb);
727 
728   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
729 				  &step, &name_expansions);
730   return aff_combination_constant_multiple_p (&diff, &step, off);
731 }
732 
733 /* Returns the last basic block in LOOP for that we are sure that
734    it is executed whenever the loop is entered.  */
735 
736 static basic_block
last_always_executed_block(class loop * loop)737 last_always_executed_block (class loop *loop)
738 {
739   unsigned i;
740   auto_vec<edge> exits = get_loop_exit_edges (loop);
741   edge ex;
742   basic_block last = loop->latch;
743 
744   FOR_EACH_VEC_ELT (exits, i, ex)
745     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
746 
747   return last;
748 }
749 
750 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
751 
752 static struct component *
split_data_refs_to_components(class loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)753 split_data_refs_to_components (class loop *loop,
754 			       vec<data_reference_p> datarefs,
755 			       vec<ddr_p> depends)
756 {
757   unsigned i, n = datarefs.length ();
758   unsigned ca, ia, ib, bad;
759   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
760   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
761   struct component **comps;
762   struct data_reference *dr, *dra, *drb;
763   struct data_dependence_relation *ddr;
764   struct component *comp_list = NULL, *comp;
765   dref dataref;
766   /* Don't do store elimination if loop has multiple exit edges.  */
767   bool eliminate_store_p = single_exit (loop) != NULL;
768   basic_block last_always_executed = last_always_executed_block (loop);
769   auto_bitmap no_store_store_comps;
770 
771   FOR_EACH_VEC_ELT (datarefs, i, dr)
772     {
773       if (!DR_REF (dr))
774 	{
775 	  /* A fake reference for call or asm_expr that may clobber memory;
776 	     just fail.  */
777 	  goto end;
778 	}
779       /* predcom pass isn't prepared to handle calls with data references.  */
780       if (is_gimple_call (DR_STMT (dr)))
781 	goto end;
782       dr->aux = (void *) (size_t) i;
783       comp_father[i] = i;
784       comp_size[i] = 1;
785     }
786 
787   /* A component reserved for the "bad" data references.  */
788   comp_father[n] = n;
789   comp_size[n] = 1;
790 
791   FOR_EACH_VEC_ELT (datarefs, i, dr)
792     {
793       enum ref_step_type dummy;
794 
795       if (!suitable_reference_p (dr, &dummy))
796 	{
797 	  ia = (unsigned) (size_t) dr->aux;
798 	  merge_comps (comp_father, comp_size, n, ia);
799 	}
800     }
801 
802   FOR_EACH_VEC_ELT (depends, i, ddr)
803     {
804       poly_widest_int dummy_off;
805 
806       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
807 	continue;
808 
809       dra = DDR_A (ddr);
810       drb = DDR_B (ddr);
811 
812       /* Don't do store elimination if there is any unknown dependence for
813 	 any store data reference.  */
814       if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
815 	  && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
816 	      || DDR_NUM_DIST_VECTS (ddr) == 0))
817 	eliminate_store_p = false;
818 
819       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
820       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
821       if (ia == ib)
822 	continue;
823 
824       bad = component_of (comp_father, n);
825 
826       /* If both A and B are reads, we may ignore unsuitable dependences.  */
827       if (DR_IS_READ (dra) && DR_IS_READ (drb))
828 	{
829 	  if (ia == bad || ib == bad
830 	      || !determine_offset (dra, drb, &dummy_off))
831 	    continue;
832 	}
833       /* If A is read and B write or vice versa and there is unsuitable
834 	 dependence, instead of merging both components into a component
835 	 that will certainly not pass suitable_component_p, just put the
836 	 read into bad component, perhaps at least the write together with
837 	 all the other data refs in it's component will be optimizable.  */
838       else if (DR_IS_READ (dra) && ib != bad)
839 	{
840 	  if (ia == bad)
841 	    {
842 	      bitmap_set_bit (no_store_store_comps, ib);
843 	      continue;
844 	    }
845 	  else if (!determine_offset (dra, drb, &dummy_off))
846 	    {
847 	      bitmap_set_bit (no_store_store_comps, ib);
848 	      merge_comps (comp_father, comp_size, bad, ia);
849 	      continue;
850 	    }
851 	}
852       else if (DR_IS_READ (drb) && ia != bad)
853 	{
854 	  if (ib == bad)
855 	    {
856 	      bitmap_set_bit (no_store_store_comps, ia);
857 	      continue;
858 	    }
859 	  else if (!determine_offset (dra, drb, &dummy_off))
860 	    {
861 	      bitmap_set_bit (no_store_store_comps, ia);
862 	      merge_comps (comp_father, comp_size, bad, ib);
863 	      continue;
864 	    }
865 	}
866       else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
867 	       && ia != bad && ib != bad
868 	       && !determine_offset (dra, drb, &dummy_off))
869 	{
870 	  merge_comps (comp_father, comp_size, bad, ia);
871 	  merge_comps (comp_father, comp_size, bad, ib);
872 	  continue;
873 	}
874 
875       merge_comps (comp_father, comp_size, ia, ib);
876     }
877 
878   if (eliminate_store_p)
879     {
880       tree niters = number_of_latch_executions (loop);
881 
882       /* Don't do store elimination if niters info is unknown because stores
883 	 in the last iteration can't be eliminated and we need to recover it
884 	 after loop.  */
885       eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
886     }
887 
888   comps = XCNEWVEC (struct component *, n);
889   bad = component_of (comp_father, n);
890   FOR_EACH_VEC_ELT (datarefs, i, dr)
891     {
892       ia = (unsigned) (size_t) dr->aux;
893       ca = component_of (comp_father, ia);
894       if (ca == bad)
895 	continue;
896 
897       comp = comps[ca];
898       if (!comp)
899 	{
900 	  comp = XCNEW (struct component);
901 	  comp->refs.create (comp_size[ca]);
902 	  comp->eliminate_store_p = eliminate_store_p;
903 	  comps[ca] = comp;
904 	}
905 
906       dataref = XCNEW (class dref_d);
907       dataref->ref = dr;
908       dataref->stmt = DR_STMT (dr);
909       dataref->offset = 0;
910       dataref->distance = 0;
911 
912       dataref->always_accessed
913 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
914 				gimple_bb (dataref->stmt));
915       dataref->pos = comp->refs.length ();
916       comp->refs.quick_push (dataref);
917     }
918 
919   if (eliminate_store_p)
920     {
921       bitmap_iterator bi;
922       EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
923 	{
924 	  ca = component_of (comp_father, ia);
925 	  if (ca != bad)
926 	    comps[ca]->eliminate_store_p = false;
927 	}
928     }
929 
930   for (i = 0; i < n; i++)
931     {
932       comp = comps[i];
933       if (comp)
934 	{
935 	  comp->next = comp_list;
936 	  comp_list = comp;
937 	}
938     }
939   free (comps);
940 
941 end:
942   free (comp_father);
943   free (comp_size);
944   return comp_list;
945 }
946 
947 /* Returns true if the component COMP satisfies the conditions
948    described in 2) at the beginning of this file.  LOOP is the current
949    loop.  */
950 
951 static bool
suitable_component_p(class loop * loop,struct component * comp)952 suitable_component_p (class loop *loop, struct component *comp)
953 {
954   unsigned i;
955   dref a, first;
956   basic_block ba, bp = loop->header;
957   bool ok, has_write = false;
958 
959   FOR_EACH_VEC_ELT (comp->refs, i, a)
960     {
961       ba = gimple_bb (a->stmt);
962 
963       if (!just_once_each_iteration_p (loop, ba))
964 	return false;
965 
966       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
967       bp = ba;
968 
969       if (DR_IS_WRITE (a->ref))
970 	has_write = true;
971     }
972 
973   first = comp->refs[0];
974   ok = suitable_reference_p (first->ref, &comp->comp_step);
975   gcc_assert (ok);
976   first->offset = 0;
977 
978   for (i = 1; comp->refs.iterate (i, &a); i++)
979     {
980       /* Polynomial offsets are no use, since we need to know the
981 	 gap between iteration numbers at compile time.  */
982       poly_widest_int offset;
983       if (!determine_offset (first->ref, a->ref, &offset)
984 	  || !offset.is_constant (&a->offset))
985 	return false;
986 
987       enum ref_step_type a_step;
988       gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
989 			   && a_step == comp->comp_step);
990     }
991 
992   /* If there is a write inside the component, we must know whether the
993      step is nonzero or not -- we would not otherwise be able to recognize
994      whether the value accessed by reads comes from the OFFSET-th iteration
995      or the previous one.  */
996   if (has_write && comp->comp_step == RS_ANY)
997     return false;
998 
999   return true;
1000 }
1001 
1002 /* Check the conditions on references inside each of components COMPS,
1003    and remove the unsuitable components from the list.  The new list
1004    of components is returned.  The conditions are described in 2) at
1005    the beginning of this file.  LOOP is the current loop.  */
1006 
1007 static struct component *
filter_suitable_components(class loop * loop,struct component * comps)1008 filter_suitable_components (class loop *loop, struct component *comps)
1009 {
1010   struct component **comp, *act;
1011 
1012   for (comp = &comps; *comp; )
1013     {
1014       act = *comp;
1015       if (suitable_component_p (loop, act))
1016 	comp = &act->next;
1017       else
1018 	{
1019 	  dref ref;
1020 	  unsigned i;
1021 
1022 	  *comp = act->next;
1023 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
1024 	    free (ref);
1025 	  release_component (act);
1026 	}
1027     }
1028 
1029   return comps;
1030 }
1031 
1032 /* Compares two drefs A and B by their offset and position.  Callback for
1033    qsort.  */
1034 
1035 static int
order_drefs(const void * a,const void * b)1036 order_drefs (const void *a, const void *b)
1037 {
1038   const dref *const da = (const dref *) a;
1039   const dref *const db = (const dref *) b;
1040   int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1041 
1042   if (offcmp != 0)
1043     return offcmp;
1044 
1045   return (*da)->pos - (*db)->pos;
1046 }
1047 
1048 /* Compares two drefs A and B by their position.  Callback for qsort.  */
1049 
1050 static int
order_drefs_by_pos(const void * a,const void * b)1051 order_drefs_by_pos (const void *a, const void *b)
1052 {
1053   const dref *const da = (const dref *) a;
1054   const dref *const db = (const dref *) b;
1055 
1056   return (*da)->pos - (*db)->pos;
1057 }
1058 
1059 /* Returns root of the CHAIN.  */
1060 
1061 static inline dref
get_chain_root(chain_p chain)1062 get_chain_root (chain_p chain)
1063 {
1064   return chain->refs[0];
1065 }
1066 
1067 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1068    exist.  */
1069 
1070 static inline dref
get_chain_last_write_at(chain_p chain,unsigned distance)1071 get_chain_last_write_at (chain_p chain, unsigned distance)
1072 {
1073   for (unsigned i = chain->refs.length (); i > 0; i--)
1074     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1075 	&& distance == chain->refs[i - 1]->distance)
1076       return chain->refs[i - 1];
1077 
1078   return NULL;
1079 }
1080 
1081 /* Given CHAIN, returns the last write ref with the same distance before load
1082    at index LOAD_IDX, or NULL if it doesn't exist.  */
1083 
1084 static inline dref
get_chain_last_write_before_load(chain_p chain,unsigned load_idx)1085 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1086 {
1087   gcc_assert (load_idx < chain->refs.length ());
1088 
1089   unsigned distance = chain->refs[load_idx]->distance;
1090 
1091   for (unsigned i = load_idx; i > 0; i--)
1092     if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1093 	&& distance == chain->refs[i - 1]->distance)
1094       return chain->refs[i - 1];
1095 
1096   return NULL;
1097 }
1098 
1099 /* Adds REF to the chain CHAIN.  */
1100 
1101 static void
add_ref_to_chain(chain_p chain,dref ref)1102 add_ref_to_chain (chain_p chain, dref ref)
1103 {
1104   dref root = get_chain_root (chain);
1105 
1106   gcc_assert (wi::les_p (root->offset, ref->offset));
1107   widest_int dist = ref->offset - root->offset;
1108   gcc_assert (wi::fits_uhwi_p (dist));
1109 
1110   chain->refs.safe_push (ref);
1111 
1112   ref->distance = dist.to_uhwi ();
1113 
1114   if (ref->distance >= chain->length)
1115     {
1116       chain->length = ref->distance;
1117       chain->has_max_use_after = false;
1118     }
1119 
1120   /* Promote this chain to CT_STORE_STORE if it has multiple stores.  */
1121   if (DR_IS_WRITE (ref->ref))
1122     chain->type = CT_STORE_STORE;
1123 
1124   /* Don't set the flag for store-store chain since there is no use.  */
1125   if (chain->type != CT_STORE_STORE
1126       && ref->distance == chain->length
1127       && ref->pos > root->pos)
1128     chain->has_max_use_after = true;
1129 
1130   chain->all_always_accessed &= ref->always_accessed;
1131 }
1132 
1133 /* Returns the chain for invariant component COMP.  */
1134 
1135 static chain_p
make_invariant_chain(struct component * comp)1136 make_invariant_chain (struct component *comp)
1137 {
1138   chain_p chain = XCNEW (struct chain);
1139   unsigned i;
1140   dref ref;
1141 
1142   chain->type = CT_INVARIANT;
1143 
1144   chain->all_always_accessed = true;
1145 
1146   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1147     {
1148       chain->refs.safe_push (ref);
1149       chain->all_always_accessed &= ref->always_accessed;
1150     }
1151 
1152   chain->inits = vNULL;
1153   chain->finis = vNULL;
1154 
1155   return chain;
1156 }
1157 
1158 /* Make a new chain of type TYPE rooted at REF.  */
1159 
1160 static chain_p
make_rooted_chain(dref ref,enum chain_type type)1161 make_rooted_chain (dref ref, enum chain_type type)
1162 {
1163   chain_p chain = XCNEW (struct chain);
1164 
1165   chain->type = type;
1166   chain->refs.safe_push (ref);
1167   chain->all_always_accessed = ref->always_accessed;
1168   ref->distance = 0;
1169 
1170   chain->inits = vNULL;
1171   chain->finis = vNULL;
1172 
1173   return chain;
1174 }
1175 
1176 /* Returns true if CHAIN is not trivial.  */
1177 
1178 static bool
nontrivial_chain_p(chain_p chain)1179 nontrivial_chain_p (chain_p chain)
1180 {
1181   return chain != NULL && chain->refs.length () > 1;
1182 }
1183 
1184 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1185    is no such name.  */
1186 
1187 static tree
name_for_ref(dref ref)1188 name_for_ref (dref ref)
1189 {
1190   tree name;
1191 
1192   if (is_gimple_assign (ref->stmt))
1193     {
1194       if (!ref->ref || DR_IS_READ (ref->ref))
1195 	name = gimple_assign_lhs (ref->stmt);
1196       else
1197 	name = gimple_assign_rhs1 (ref->stmt);
1198     }
1199   else
1200     name = PHI_RESULT (ref->stmt);
1201 
1202   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1203 }
1204 
1205 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1206    iterations of the innermost enclosing loop).  */
1207 
1208 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1209 valid_initializer_p (struct data_reference *ref,
1210 		     unsigned distance, struct data_reference *root)
1211 {
1212   aff_tree diff, base, step;
1213   poly_widest_int off;
1214 
1215   /* Both REF and ROOT must be accessing the same object.  */
1216   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1217     return false;
1218 
1219   /* The initializer is defined outside of loop, hence its address must be
1220      invariant inside the loop.  */
1221   gcc_assert (integer_zerop (DR_STEP (ref)));
1222 
1223   /* If the address of the reference is invariant, initializer must access
1224      exactly the same location.  */
1225   if (integer_zerop (DR_STEP (root)))
1226     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1227 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1228 
1229   /* Verify that this index of REF is equal to the root's index at
1230      -DISTANCE-th iteration.  */
1231   aff_combination_dr_offset (root, &diff);
1232   aff_combination_dr_offset (ref, &base);
1233   aff_combination_scale (&base, -1);
1234   aff_combination_add (&diff, &base);
1235 
1236   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1237 				  &step, &name_expansions);
1238   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1239     return false;
1240 
1241   if (maybe_ne (off, distance))
1242     return false;
1243 
1244   return true;
1245 }
1246 
1247 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1248    initial value is correct (equal to initial value of REF shifted by one
1249    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1250    is the root of the current chain.  */
1251 
1252 static gphi *
find_looparound_phi(class loop * loop,dref ref,dref root)1253 find_looparound_phi (class loop *loop, dref ref, dref root)
1254 {
1255   tree name, init, init_ref;
1256   gphi *phi = NULL;
1257   gimple *init_stmt;
1258   edge latch = loop_latch_edge (loop);
1259   struct data_reference init_dr;
1260   gphi_iterator psi;
1261 
1262   if (is_gimple_assign (ref->stmt))
1263     {
1264       if (DR_IS_READ (ref->ref))
1265 	name = gimple_assign_lhs (ref->stmt);
1266       else
1267 	name = gimple_assign_rhs1 (ref->stmt);
1268     }
1269   else
1270     name = PHI_RESULT (ref->stmt);
1271   if (!name)
1272     return NULL;
1273 
1274   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1275     {
1276       phi = psi.phi ();
1277       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1278 	break;
1279     }
1280 
1281   if (gsi_end_p (psi))
1282     return NULL;
1283 
1284   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1285   if (TREE_CODE (init) != SSA_NAME)
1286     return NULL;
1287   init_stmt = SSA_NAME_DEF_STMT (init);
1288   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1289     return NULL;
1290   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1291 
1292   init_ref = gimple_assign_rhs1 (init_stmt);
1293   if (!REFERENCE_CLASS_P (init_ref)
1294       && !DECL_P (init_ref))
1295     return NULL;
1296 
1297   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1298      loop enclosing PHI).  */
1299   memset (&init_dr, 0, sizeof (struct data_reference));
1300   DR_REF (&init_dr) = init_ref;
1301   DR_STMT (&init_dr) = phi;
1302   if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1303 			     init_stmt))
1304     return NULL;
1305 
1306   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1307     return NULL;
1308 
1309   return phi;
1310 }
1311 
1312 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1313 
1314 static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)1315 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1316 {
1317   dref nw = XCNEW (class dref_d), aref;
1318   unsigned i;
1319 
1320   nw->stmt = phi;
1321   nw->distance = ref->distance + 1;
1322   nw->always_accessed = 1;
1323 
1324   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1325     if (aref->distance >= nw->distance)
1326       break;
1327   chain->refs.safe_insert (i, nw);
1328 
1329   if (nw->distance > chain->length)
1330     {
1331       chain->length = nw->distance;
1332       chain->has_max_use_after = false;
1333     }
1334 }
1335 
1336 /* For references in CHAIN that are copied around the LOOP (created previously
1337    by PRE, or by user), add the results of such copies to the chain.  This
1338    enables us to remove the copies by unrolling, and may need less registers
1339    (also, it may allow us to combine chains together).  */
1340 
1341 static void
add_looparound_copies(class loop * loop,chain_p chain)1342 add_looparound_copies (class loop *loop, chain_p chain)
1343 {
1344   unsigned i;
1345   dref ref, root = get_chain_root (chain);
1346   gphi *phi;
1347 
1348   if (chain->type == CT_STORE_STORE)
1349     return;
1350 
1351   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1352     {
1353       phi = find_looparound_phi (loop, ref, root);
1354       if (!phi)
1355 	continue;
1356 
1357       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1358       insert_looparound_copy (chain, ref, phi);
1359     }
1360 }
1361 
1362 /* Find roots of the values and determine distances in the component COMP.
1363    The references are redistributed into CHAINS.  LOOP is the current
1364    loop.  */
1365 
1366 static void
determine_roots_comp(class loop * loop,struct component * comp,vec<chain_p> * chains)1367 determine_roots_comp (class loop *loop,
1368 		      struct component *comp,
1369 		      vec<chain_p> *chains)
1370 {
1371   unsigned i;
1372   dref a;
1373   chain_p chain = NULL;
1374   widest_int last_ofs = 0;
1375   enum chain_type type;
1376 
1377   /* Invariants are handled specially.  */
1378   if (comp->comp_step == RS_INVARIANT)
1379     {
1380       chain = make_invariant_chain (comp);
1381       chains->safe_push (chain);
1382       return;
1383     }
1384 
1385   /* Trivial component.  */
1386   if (comp->refs.length () <= 1)
1387     {
1388       if (comp->refs.length () == 1)
1389 	{
1390 	  free (comp->refs[0]);
1391 	  comp->refs.truncate (0);
1392 	}
1393       return;
1394     }
1395 
1396   comp->refs.qsort (order_drefs);
1397 
1398   /* For Store-Store chain, we only support load if it is dominated by a
1399      store statement in the same iteration of loop.  */
1400   if (comp->eliminate_store_p)
1401     for (a = NULL, i = 0; i < comp->refs.length (); i++)
1402       {
1403 	if (DR_IS_WRITE (comp->refs[i]->ref))
1404 	  a = comp->refs[i];
1405 	else if (a == NULL || a->offset != comp->refs[i]->offset)
1406 	  {
1407 	    /* If there is load that is not dominated by a store in the
1408 	       same iteration of loop, clear the flag so no Store-Store
1409 	       chain is generated for this component.  */
1410 	    comp->eliminate_store_p = false;
1411 	    break;
1412 	  }
1413       }
1414 
1415   /* Determine roots and create chains for components.  */
1416   FOR_EACH_VEC_ELT (comp->refs, i, a)
1417     {
1418       if (!chain
1419 	  || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1420 	  || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1421 	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1422 	{
1423 	  if (nontrivial_chain_p (chain))
1424 	    {
1425 	      add_looparound_copies (loop, chain);
1426 	      chains->safe_push (chain);
1427 	    }
1428 	  else
1429 	    release_chain (chain);
1430 
1431 	  /* Determine type of the chain.  If the root reference is a load,
1432 	     this can only be a CT_LOAD chain; other chains are intialized
1433 	     to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1434 	     new reference is added.  */
1435 	  type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1436 	  chain = make_rooted_chain (a, type);
1437 	  last_ofs = a->offset;
1438 	  continue;
1439 	}
1440 
1441       add_ref_to_chain (chain, a);
1442     }
1443 
1444   if (nontrivial_chain_p (chain))
1445     {
1446       add_looparound_copies (loop, chain);
1447       chains->safe_push (chain);
1448     }
1449   else
1450     release_chain (chain);
1451 }
1452 
1453 /* Find roots of the values and determine distances in components COMPS, and
1454    separates the references to CHAINS.  LOOP is the current loop.  */
1455 
1456 static void
determine_roots(class loop * loop,struct component * comps,vec<chain_p> * chains)1457 determine_roots (class loop *loop,
1458 		 struct component *comps, vec<chain_p> *chains)
1459 {
1460   struct component *comp;
1461 
1462   for (comp = comps; comp; comp = comp->next)
1463     determine_roots_comp (loop, comp, chains);
1464 }
1465 
1466 /* Replace the reference in statement STMT with temporary variable
1467    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1468    the reference in the statement.  IN_LHS is true if the reference
1469    is in the lhs of STMT, false if it is in rhs.  */
1470 
1471 static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)1472 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1473 {
1474   tree val;
1475   gassign *new_stmt;
1476   gimple_stmt_iterator bsi, psi;
1477 
1478   if (gimple_code (stmt) == GIMPLE_PHI)
1479     {
1480       gcc_assert (!in_lhs && !set);
1481 
1482       val = PHI_RESULT (stmt);
1483       bsi = gsi_after_labels (gimple_bb (stmt));
1484       psi = gsi_for_stmt (stmt);
1485       remove_phi_node (&psi, false);
1486 
1487       /* Turn the phi node into GIMPLE_ASSIGN.  */
1488       new_stmt = gimple_build_assign (val, new_tree);
1489       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1490       return;
1491     }
1492 
1493   /* Since the reference is of gimple_reg type, it should only
1494      appear as lhs or rhs of modify statement.  */
1495   gcc_assert (is_gimple_assign (stmt));
1496 
1497   bsi = gsi_for_stmt (stmt);
1498 
1499   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1500   if (!set)
1501     {
1502       gcc_assert (!in_lhs);
1503       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1504       stmt = gsi_stmt (bsi);
1505       update_stmt (stmt);
1506       return;
1507     }
1508 
1509   if (in_lhs)
1510     {
1511       /* We have statement
1512 
1513 	 OLD = VAL
1514 
1515 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1516 	 this to
1517 
1518 	 OLD = VAL
1519 	 NEW = VAL
1520 
1521 	 Otherwise, we are replacing a combination chain,
1522 	 VAL is the expression that performs the combination, and OLD is an
1523 	 SSA name.  In this case, we transform the assignment to
1524 
1525 	 OLD = VAL
1526 	 NEW = OLD
1527 
1528 	 */
1529 
1530       val = gimple_assign_lhs (stmt);
1531       if (TREE_CODE (val) != SSA_NAME)
1532 	{
1533 	  val = gimple_assign_rhs1 (stmt);
1534 	  gcc_assert (gimple_assign_single_p (stmt));
1535 	  if (TREE_CLOBBER_P (val))
1536 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1537 	  else
1538 	    gcc_assert (gimple_assign_copy_p (stmt));
1539 	}
1540     }
1541   else
1542     {
1543       /* VAL = OLD
1544 
1545 	 is transformed to
1546 
1547 	 VAL = OLD
1548 	 NEW = VAL  */
1549 
1550       val = gimple_assign_lhs (stmt);
1551     }
1552 
1553   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1554   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1555 }
1556 
1557 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1558    of the loop it was analyzed in.  Append init stmts to STMTS.  */
1559 
1560 static tree
1561 ref_at_iteration (data_reference_p dr, int iter,
1562 		  gimple_seq *stmts, tree niters = NULL_TREE)
1563 {
1564   tree off = DR_OFFSET (dr);
1565   tree coff = DR_INIT (dr);
1566   tree ref = DR_REF (dr);
1567   enum tree_code ref_code = ERROR_MARK;
1568   tree ref_type = NULL_TREE;
1569   tree ref_op1 = NULL_TREE;
1570   tree ref_op2 = NULL_TREE;
1571   tree new_offset;
1572 
1573   if (iter != 0)
1574     {
1575       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1576       if (TREE_CODE (new_offset) == INTEGER_CST)
1577 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1578       else
1579 	off = size_binop (PLUS_EXPR, off, new_offset);
1580     }
1581 
1582   if (niters != NULL_TREE)
1583     {
1584       niters = fold_convert (ssizetype, niters);
1585       new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1586       if (TREE_CODE (niters) == INTEGER_CST)
1587 	coff = size_binop (PLUS_EXPR, coff, new_offset);
1588       else
1589 	off = size_binop (PLUS_EXPR, off, new_offset);
1590     }
1591 
1592   /* While data-ref analysis punts on bit offsets it still handles
1593      bitfield accesses at byte boundaries.  Cope with that.  Note that
1594      if the bitfield object also starts at a byte-boundary we can simply
1595      replicate the COMPONENT_REF, but we have to subtract the component's
1596      byte-offset from the MEM_REF address first.
1597      Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1598      start at offset zero.  */
1599   if (TREE_CODE (ref) == COMPONENT_REF
1600       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1601     {
1602       unsigned HOST_WIDE_INT boff;
1603       tree field = TREE_OPERAND (ref, 1);
1604       tree offset = component_ref_field_offset (ref);
1605       ref_type = TREE_TYPE (ref);
1606       boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1607       /* This can occur in Ada.  See the comment in get_bit_range.  */
1608       if (boff % BITS_PER_UNIT != 0
1609 	  || !tree_fits_uhwi_p (offset))
1610 	{
1611 	  ref_code = BIT_FIELD_REF;
1612 	  ref_op1 = DECL_SIZE (field);
1613 	  ref_op2 = bitsize_zero_node;
1614 	}
1615       else
1616 	{
1617 	  boff >>= LOG2_BITS_PER_UNIT;
1618 	  boff += tree_to_uhwi (offset);
1619 	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1620 	  ref_code = COMPONENT_REF;
1621 	  ref_op1 = field;
1622 	  ref_op2 = TREE_OPERAND (ref, 2);
1623 	  ref = TREE_OPERAND (ref, 0);
1624 	}
1625     }
1626   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1627   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1628 				 is_gimple_mem_ref_addr, NULL_TREE);
1629   tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1630   tree type = build_aligned_type (TREE_TYPE (ref),
1631 				  get_object_alignment (ref));
1632   ref = build2 (MEM_REF, type, addr, alias_ptr);
1633   if (ref_type)
1634     ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1635   return ref;
1636 }
1637 
1638 /* Get the initialization expression for the INDEX-th temporary variable
1639    of CHAIN.  */
1640 
1641 static tree
get_init_expr(chain_p chain,unsigned index)1642 get_init_expr (chain_p chain, unsigned index)
1643 {
1644   if (chain->type == CT_COMBINATION)
1645     {
1646       tree e1 = get_init_expr (chain->ch1, index);
1647       tree e2 = get_init_expr (chain->ch2, index);
1648 
1649       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1650     }
1651   else
1652     return chain->inits[index];
1653 }
1654 
1655 /* Returns a new temporary variable used for the I-th variable carrying
1656    value of REF.  The variable's uid is marked in TMP_VARS.  */
1657 
1658 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1659 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1660 {
1661   tree type = TREE_TYPE (ref);
1662   /* We never access the components of the temporary variable in predictive
1663      commoning.  */
1664   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1665   bitmap_set_bit (tmp_vars, DECL_UID (var));
1666   return var;
1667 }
1668 
1669 /* Creates the variables for CHAIN, as well as phi nodes for them and
1670    initialization on entry to LOOP.  Uids of the newly created
1671    temporary variables are marked in TMP_VARS.  */
1672 
1673 static void
initialize_root_vars(class loop * loop,chain_p chain,bitmap tmp_vars)1674 initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1675 {
1676   unsigned i;
1677   unsigned n = chain->length;
1678   dref root = get_chain_root (chain);
1679   bool reuse_first = !chain->has_max_use_after;
1680   tree ref, init, var, next;
1681   gphi *phi;
1682   gimple_seq stmts;
1683   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1684 
1685   /* If N == 0, then all the references are within the single iteration.  And
1686      since this is an nonempty chain, reuse_first cannot be true.  */
1687   gcc_assert (n > 0 || !reuse_first);
1688 
1689   chain->vars.create (n + 1);
1690 
1691   if (chain->type == CT_COMBINATION)
1692     ref = gimple_assign_lhs (root->stmt);
1693   else
1694     ref = DR_REF (root->ref);
1695 
1696   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1697     {
1698       var = predcom_tmp_var (ref, i, tmp_vars);
1699       chain->vars.quick_push (var);
1700     }
1701   if (reuse_first)
1702     chain->vars.quick_push (chain->vars[0]);
1703 
1704   FOR_EACH_VEC_ELT (chain->vars, i, var)
1705     chain->vars[i] = make_ssa_name (var);
1706 
1707   for (i = 0; i < n; i++)
1708     {
1709       var = chain->vars[i];
1710       next = chain->vars[i + 1];
1711       init = get_init_expr (chain, i);
1712 
1713       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1714       if (stmts)
1715 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1716 
1717       phi = create_phi_node (var, loop->header);
1718       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1719       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1720     }
1721 }
1722 
1723 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1724    all stores to be eliminated store loop invariant values into memory.
1725    In this case, we can use these invariant values directly after LOOP.  */
1726 
1727 static bool
is_inv_store_elimination_chain(class loop * loop,chain_p chain)1728 is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1729 {
1730   if (chain->length == 0 || chain->type != CT_STORE_STORE)
1731     return false;
1732 
1733   gcc_assert (!chain->has_max_use_after);
1734 
1735   /* If loop iterates for unknown times or fewer times than chain->length,
1736      we still need to setup root variable and propagate it with PHI node.  */
1737   tree niters = number_of_latch_executions (loop);
1738   if (TREE_CODE (niters) != INTEGER_CST
1739       || wi::leu_p (wi::to_wide (niters), chain->length))
1740     return false;
1741 
1742   /* Check stores in chain for elimination if they only store loop invariant
1743      values.  */
1744   for (unsigned i = 0; i < chain->length; i++)
1745     {
1746       dref a = get_chain_last_write_at (chain, i);
1747       if (a == NULL)
1748 	continue;
1749 
1750       gimple *def_stmt, *stmt = a->stmt;
1751       if (!gimple_assign_single_p (stmt))
1752 	return false;
1753 
1754       tree val = gimple_assign_rhs1 (stmt);
1755       if (TREE_CLOBBER_P (val))
1756 	return false;
1757 
1758       if (CONSTANT_CLASS_P (val))
1759 	continue;
1760 
1761       if (TREE_CODE (val) != SSA_NAME)
1762 	return false;
1763 
1764       def_stmt = SSA_NAME_DEF_STMT (val);
1765       if (gimple_nop_p (def_stmt))
1766 	continue;
1767 
1768       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1769 	return false;
1770     }
1771   return true;
1772 }
1773 
1774 /* Creates root variables for store elimination CHAIN in which stores for
1775    elimination only store loop invariant values.  In this case, we neither
1776    need to load root variables before loop nor propagate it with PHI nodes.  */
1777 
1778 static void
initialize_root_vars_store_elim_1(chain_p chain)1779 initialize_root_vars_store_elim_1 (chain_p chain)
1780 {
1781   tree var;
1782   unsigned i, n = chain->length;
1783 
1784   chain->vars.create (n);
1785   chain->vars.safe_grow_cleared (n, true);
1786 
1787   /* Initialize root value for eliminated stores at each distance.  */
1788   for (i = 0; i < n; i++)
1789     {
1790       dref a = get_chain_last_write_at (chain, i);
1791       if (a == NULL)
1792 	continue;
1793 
1794       var = gimple_assign_rhs1 (a->stmt);
1795       chain->vars[a->distance] = var;
1796     }
1797 
1798   /* We don't propagate values with PHI nodes, so manually propagate value
1799      to bubble positions.  */
1800   var = chain->vars[0];
1801   for (i = 1; i < n; i++)
1802     {
1803       if (chain->vars[i] != NULL_TREE)
1804 	{
1805 	  var = chain->vars[i];
1806 	  continue;
1807 	}
1808       chain->vars[i] = var;
1809     }
1810 
1811   /* Revert the vector.  */
1812   for (i = 0; i < n / 2; i++)
1813     std::swap (chain->vars[i], chain->vars[n - i - 1]);
1814 }
1815 
1816 /* Creates root variables for store elimination CHAIN in which stores for
1817    elimination store loop variant values.  In this case, we may need to
1818    load root variables before LOOP and propagate it with PHI nodes.  Uids
1819    of the newly created root variables are marked in TMP_VARS.  */
1820 
1821 static void
initialize_root_vars_store_elim_2(class loop * loop,chain_p chain,bitmap tmp_vars)1822 initialize_root_vars_store_elim_2 (class loop *loop,
1823 				   chain_p chain, bitmap tmp_vars)
1824 {
1825   unsigned i, n = chain->length;
1826   tree ref, init, var, next, val, phi_result;
1827   gimple *stmt;
1828   gimple_seq stmts;
1829 
1830   chain->vars.create (n);
1831 
1832   ref = DR_REF (get_chain_root (chain)->ref);
1833   for (i = 0; i < n; i++)
1834     {
1835       var = predcom_tmp_var (ref, i, tmp_vars);
1836       chain->vars.quick_push (var);
1837     }
1838 
1839   FOR_EACH_VEC_ELT (chain->vars, i, var)
1840     chain->vars[i] = make_ssa_name (var);
1841 
1842   /* Root values are either rhs operand of stores to be eliminated, or
1843      loaded from memory before loop.  */
1844   auto_vec<tree> vtemps;
1845   vtemps.safe_grow_cleared (n, true);
1846   for (i = 0; i < n; i++)
1847     {
1848       init = get_init_expr (chain, i);
1849       if (init == NULL_TREE)
1850 	{
1851 	  /* Root value is rhs operand of the store to be eliminated if
1852 	     it isn't loaded from memory before loop.  */
1853 	  dref a = get_chain_last_write_at (chain, i);
1854 	  val = gimple_assign_rhs1 (a->stmt);
1855 	  if (TREE_CLOBBER_P (val))
1856 	    {
1857 	      val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1858 	      gimple_assign_set_rhs1 (a->stmt, val);
1859 	    }
1860 
1861 	  vtemps[n - i - 1] = val;
1862 	}
1863       else
1864 	{
1865 	  edge latch = loop_latch_edge (loop);
1866 	  edge entry = loop_preheader_edge (loop);
1867 
1868 	  /* Root value is loaded from memory before loop, we also need
1869 	     to add PHI nodes to propagate the value across iterations.  */
1870 	  init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1871 	  if (stmts)
1872 	    gsi_insert_seq_on_edge_immediate (entry, stmts);
1873 
1874 	  next = chain->vars[n - i];
1875 	  phi_result = copy_ssa_name (next);
1876 	  gphi *phi = create_phi_node (phi_result, loop->header);
1877 	  add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1878 	  add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1879 	  vtemps[n - i - 1] = phi_result;
1880 	}
1881     }
1882 
1883   /* Find the insertion position.  */
1884   dref last = get_chain_root (chain);
1885   for (i = 0; i < chain->refs.length (); i++)
1886     {
1887       if (chain->refs[i]->pos > last->pos)
1888 	last = chain->refs[i];
1889     }
1890 
1891   gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1892 
1893   /* Insert statements copying root value to root variable.  */
1894   for (i = 0; i < n; i++)
1895     {
1896       var = chain->vars[i];
1897       val = vtemps[i];
1898       stmt = gimple_build_assign (var, val);
1899       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1900     }
1901 }
1902 
1903 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1904    (CHAIN->length - 1) iterations.  */
1905 
1906 static void
finalize_eliminated_stores(class loop * loop,chain_p chain)1907 finalize_eliminated_stores (class loop *loop, chain_p chain)
1908 {
1909   unsigned i, n = chain->length;
1910 
1911   for (i = 0; i < n; i++)
1912     {
1913       tree var = chain->vars[i];
1914       tree fini = chain->finis[n - i - 1];
1915       gimple *stmt = gimple_build_assign (fini, var);
1916 
1917       gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1918     }
1919 
1920   if (chain->fini_seq)
1921     {
1922       gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1923       chain->fini_seq = NULL;
1924     }
1925 }
1926 
1927 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1928    initialization on entry to LOOP if necessary.  The ssa name for the variable
1929    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1930    around the loop is created.  Uid of the newly created temporary variable
1931    is marked in TMP_VARS.  INITS is the list containing the (single)
1932    initializer.  */
1933 
1934 static void
initialize_root_vars_lm(class loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1935 initialize_root_vars_lm (class loop *loop, dref root, bool written,
1936 			 vec<tree> *vars, vec<tree> inits,
1937 			 bitmap tmp_vars)
1938 {
1939   unsigned i;
1940   tree ref = DR_REF (root->ref), init, var, next;
1941   gimple_seq stmts;
1942   gphi *phi;
1943   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1944 
1945   /* Find the initializer for the variable, and check that it cannot
1946      trap.  */
1947   init = inits[0];
1948 
1949   vars->create (written ? 2 : 1);
1950   var = predcom_tmp_var (ref, 0, tmp_vars);
1951   vars->quick_push (var);
1952   if (written)
1953     vars->quick_push ((*vars)[0]);
1954 
1955   FOR_EACH_VEC_ELT (*vars, i, var)
1956     (*vars)[i] = make_ssa_name (var);
1957 
1958   var = (*vars)[0];
1959 
1960   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1961   if (stmts)
1962     gsi_insert_seq_on_edge_immediate (entry, stmts);
1963 
1964   if (written)
1965     {
1966       next = (*vars)[1];
1967       phi = create_phi_node (var, loop->header);
1968       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1969       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1970     }
1971   else
1972     {
1973       gassign *init_stmt = gimple_build_assign (var, init);
1974       gsi_insert_on_edge_immediate (entry, init_stmt);
1975     }
1976 }
1977 
1978 
1979 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1980    created temporary variables are marked in TMP_VARS.  */
1981 
1982 static void
execute_load_motion(class loop * loop,chain_p chain,bitmap tmp_vars)1983 execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
1984 {
1985   auto_vec<tree> vars;
1986   dref a;
1987   unsigned n_writes = 0, ridx, i;
1988   tree var;
1989 
1990   gcc_assert (chain->type == CT_INVARIANT);
1991   gcc_assert (!chain->combined);
1992   FOR_EACH_VEC_ELT (chain->refs, i, a)
1993     if (DR_IS_WRITE (a->ref))
1994       n_writes++;
1995 
1996   /* If there are no reads in the loop, there is nothing to do.  */
1997   if (n_writes == chain->refs.length ())
1998     return;
1999 
2000   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2001 			   &vars, chain->inits, tmp_vars);
2002 
2003   ridx = 0;
2004   FOR_EACH_VEC_ELT (chain->refs, i, a)
2005     {
2006       bool is_read = DR_IS_READ (a->ref);
2007 
2008       if (DR_IS_WRITE (a->ref))
2009 	{
2010 	  n_writes--;
2011 	  if (n_writes)
2012 	    {
2013 	      var = vars[0];
2014 	      var = make_ssa_name (SSA_NAME_VAR (var));
2015 	      vars[0] = var;
2016 	    }
2017 	  else
2018 	    ridx = 1;
2019 	}
2020 
2021       replace_ref_with (a->stmt, vars[ridx],
2022 			!is_read, !is_read);
2023     }
2024 }
2025 
2026 /* Returns the single statement in that NAME is used, excepting
2027    the looparound phi nodes contained in one of the chains.  If there is no
2028    such statement, or more statements, NULL is returned.  */
2029 
2030 static gimple *
single_nonlooparound_use(tree name)2031 single_nonlooparound_use (tree name)
2032 {
2033   use_operand_p use;
2034   imm_use_iterator it;
2035   gimple *stmt, *ret = NULL;
2036 
2037   FOR_EACH_IMM_USE_FAST (use, it, name)
2038     {
2039       stmt = USE_STMT (use);
2040 
2041       if (gimple_code (stmt) == GIMPLE_PHI)
2042 	{
2043 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
2044 	     could not be processed anyway, so just fail for them.  */
2045 	  if (bitmap_bit_p (looparound_phis,
2046 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
2047 	    continue;
2048 
2049 	  return NULL;
2050 	}
2051       else if (is_gimple_debug (stmt))
2052 	continue;
2053       else if (ret != NULL)
2054 	return NULL;
2055       else
2056 	ret = stmt;
2057     }
2058 
2059   return ret;
2060 }
2061 
2062 /* Remove statement STMT, as well as the chain of assignments in that it is
2063    used.  */
2064 
2065 static void
remove_stmt(gimple * stmt)2066 remove_stmt (gimple *stmt)
2067 {
2068   tree name;
2069   gimple *next;
2070   gimple_stmt_iterator psi;
2071 
2072   if (gimple_code (stmt) == GIMPLE_PHI)
2073     {
2074       name = PHI_RESULT (stmt);
2075       next = single_nonlooparound_use (name);
2076       reset_debug_uses (stmt);
2077       psi = gsi_for_stmt (stmt);
2078       remove_phi_node (&psi, true);
2079 
2080       if (!next
2081 	  || !gimple_assign_ssa_name_copy_p (next)
2082 	  || gimple_assign_rhs1 (next) != name)
2083 	return;
2084 
2085       stmt = next;
2086     }
2087 
2088   while (1)
2089     {
2090       gimple_stmt_iterator bsi;
2091 
2092       bsi = gsi_for_stmt (stmt);
2093 
2094       name = gimple_assign_lhs (stmt);
2095       if (TREE_CODE (name) == SSA_NAME)
2096 	{
2097 	  next = single_nonlooparound_use (name);
2098 	  reset_debug_uses (stmt);
2099 	}
2100       else
2101 	{
2102 	  /* This is a store to be eliminated.  */
2103 	  gcc_assert (gimple_vdef (stmt) != NULL);
2104 	  next = NULL;
2105 	}
2106 
2107       unlink_stmt_vdef (stmt);
2108       gsi_remove (&bsi, true);
2109       release_defs (stmt);
2110 
2111       if (!next
2112 	  || !gimple_assign_ssa_name_copy_p (next)
2113 	  || gimple_assign_rhs1 (next) != name)
2114 	return;
2115 
2116       stmt = next;
2117     }
2118 }
2119 
2120 /* Perform the predictive commoning optimization for a chain CHAIN.
2121    Uids of the newly created temporary variables are marked in TMP_VARS.*/
2122 
2123 static void
execute_pred_commoning_chain(class loop * loop,chain_p chain,bitmap tmp_vars)2124 execute_pred_commoning_chain (class loop *loop, chain_p chain,
2125 			      bitmap tmp_vars)
2126 {
2127   unsigned i;
2128   dref a;
2129   tree var;
2130   bool in_lhs;
2131 
2132   if (chain->combined)
2133     {
2134       /* For combined chains, just remove the statements that are used to
2135 	 compute the values of the expression (except for the root one).
2136 	 We delay this until after all chains are processed.  */
2137     }
2138   else if (chain->type == CT_STORE_STORE)
2139     {
2140       if (chain->length > 0)
2141 	{
2142 	  if (chain->inv_store_elimination)
2143 	    {
2144 	      /* If dead stores in this chain only store loop invariant
2145 		 values, we can simply record the invariant value and use
2146 		 it directly after loop.  */
2147 	      initialize_root_vars_store_elim_1 (chain);
2148 	    }
2149 	  else
2150 	    {
2151 	      /* If dead stores in this chain store loop variant values,
2152 		 we need to set up the variables by loading from memory
2153 		 before loop and propagating it with PHI nodes.  */
2154 	      initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2155 	    }
2156 
2157 	  /* For inter-iteration store elimination chain, stores at each
2158 	     distance in loop's last (chain->length - 1) iterations can't
2159 	     be eliminated, because there is no following killing store.
2160 	     We need to generate these stores after loop.  */
2161 	  finalize_eliminated_stores (loop, chain);
2162 	}
2163 
2164       bool last_store_p = true;
2165       for (i = chain->refs.length (); i > 0; i--)
2166 	{
2167 	  a = chain->refs[i - 1];
2168 	  /* Preserve the last store of the chain.  Eliminate other stores
2169 	     which are killed by the last one.  */
2170 	  if (DR_IS_WRITE (a->ref))
2171 	    {
2172 	      if (last_store_p)
2173 		last_store_p = false;
2174 	      else
2175 		remove_stmt (a->stmt);
2176 
2177 	      continue;
2178 	    }
2179 
2180 	  /* Any load in Store-Store chain must be dominated by a previous
2181 	     store, we replace the load reference with rhs of the store.  */
2182 	  dref b = get_chain_last_write_before_load (chain, i - 1);
2183 	  gcc_assert (b != NULL);
2184 	  var = gimple_assign_rhs1 (b->stmt);
2185 	  replace_ref_with (a->stmt, var, false, false);
2186 	}
2187     }
2188   else
2189     {
2190       /* For non-combined chains, set up the variables that hold its value.  */
2191       initialize_root_vars (loop, chain, tmp_vars);
2192       a = get_chain_root (chain);
2193       in_lhs = (chain->type == CT_STORE_LOAD
2194 		|| chain->type == CT_COMBINATION);
2195       replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2196 
2197       /* Replace the uses of the original references by these variables.  */
2198       for (i = 1; chain->refs.iterate (i, &a); i++)
2199 	{
2200 	  var = chain->vars[chain->length - a->distance];
2201 	  replace_ref_with (a->stmt, var, false, false);
2202 	}
2203     }
2204 }
2205 
2206 /* Determines the unroll factor necessary to remove as many temporary variable
2207    copies as possible.  CHAINS is the list of chains that will be
2208    optimized.  */
2209 
2210 static unsigned
determine_unroll_factor(vec<chain_p> chains)2211 determine_unroll_factor (vec<chain_p> chains)
2212 {
2213   chain_p chain;
2214   unsigned factor = 1, af, nfactor, i;
2215   unsigned max = param_max_unroll_times;
2216 
2217   FOR_EACH_VEC_ELT (chains, i, chain)
2218     {
2219       if (chain->type == CT_INVARIANT)
2220 	continue;
2221       /* For now we can't handle unrolling when eliminating stores.  */
2222       else if (chain->type == CT_STORE_STORE)
2223 	return 1;
2224 
2225       if (chain->combined)
2226 	{
2227 	  /* For combined chains, we can't handle unrolling if we replace
2228 	     looparound PHIs.  */
2229 	  dref a;
2230 	  unsigned j;
2231 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2232 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
2233 	      return 1;
2234 	  continue;
2235 	}
2236 
2237       /* The best unroll factor for this chain is equal to the number of
2238 	 temporary variables that we create for it.  */
2239       af = chain->length;
2240       if (chain->has_max_use_after)
2241 	af++;
2242 
2243       nfactor = factor * af / gcd (factor, af);
2244       if (nfactor <= max)
2245 	factor = nfactor;
2246     }
2247 
2248   return factor;
2249 }
2250 
2251 /* Perform the predictive commoning optimization for CHAINS.
2252    Uids of the newly created temporary variables are marked in TMP_VARS.  */
2253 
2254 static void
execute_pred_commoning(class loop * loop,vec<chain_p> chains,bitmap tmp_vars)2255 execute_pred_commoning (class loop *loop, vec<chain_p> chains,
2256 			bitmap tmp_vars)
2257 {
2258   chain_p chain;
2259   unsigned i;
2260 
2261   FOR_EACH_VEC_ELT (chains, i, chain)
2262     {
2263       if (chain->type == CT_INVARIANT)
2264 	execute_load_motion (loop, chain, tmp_vars);
2265       else
2266 	execute_pred_commoning_chain (loop, chain, tmp_vars);
2267     }
2268 
2269   FOR_EACH_VEC_ELT (chains, i, chain)
2270     {
2271       if (chain->type == CT_INVARIANT)
2272 	;
2273       else if (chain->combined)
2274 	{
2275 	  /* For combined chains, just remove the statements that are used to
2276 	     compute the values of the expression (except for the root one).  */
2277 	  dref a;
2278 	  unsigned j;
2279 	  for (j = 1; chain->refs.iterate (j, &a); j++)
2280 	    remove_stmt (a->stmt);
2281 	}
2282     }
2283 
2284   update_ssa (TODO_update_ssa_only_virtuals);
2285 }
2286 
2287 /* For each reference in CHAINS, if its defining statement is
2288    phi node, record the ssa name that is defined by it.  */
2289 
2290 static void
replace_phis_by_defined_names(vec<chain_p> chains)2291 replace_phis_by_defined_names (vec<chain_p> chains)
2292 {
2293   chain_p chain;
2294   dref a;
2295   unsigned i, j;
2296 
2297   FOR_EACH_VEC_ELT (chains, i, chain)
2298     FOR_EACH_VEC_ELT (chain->refs, j, a)
2299       {
2300 	if (gimple_code (a->stmt) == GIMPLE_PHI)
2301 	  {
2302 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
2303 	    a->stmt = NULL;
2304 	  }
2305       }
2306 }
2307 
2308 /* For each reference in CHAINS, if name_defined_by_phi is not
2309    NULL, use it to set the stmt field.  */
2310 
2311 static void
replace_names_by_phis(vec<chain_p> chains)2312 replace_names_by_phis (vec<chain_p> chains)
2313 {
2314   chain_p chain;
2315   dref a;
2316   unsigned i, j;
2317 
2318   FOR_EACH_VEC_ELT (chains, i, chain)
2319     FOR_EACH_VEC_ELT (chain->refs, j, a)
2320       if (a->stmt == NULL)
2321 	{
2322 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2323 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2324 	  a->name_defined_by_phi = NULL_TREE;
2325 	}
2326 }
2327 
2328 /* Wrapper over execute_pred_commoning, to pass it as a callback
2329    to tree_transform_and_unroll_loop.  */
2330 
2331 struct epcc_data
2332 {
2333   vec<chain_p> chains;
2334   bitmap tmp_vars;
2335 };
2336 
2337 static void
execute_pred_commoning_cbck(class loop * loop,void * data)2338 execute_pred_commoning_cbck (class loop *loop, void *data)
2339 {
2340   struct epcc_data *const dta = (struct epcc_data *) data;
2341 
2342   /* Restore phi nodes that were replaced by ssa names before
2343      tree_transform_and_unroll_loop (see detailed description in
2344      tree_predictive_commoning_loop).  */
2345   replace_names_by_phis (dta->chains);
2346   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2347 }
2348 
2349 /* Base NAME and all the names in the chain of phi nodes that use it
2350    on variable VAR.  The phi nodes are recognized by being in the copies of
2351    the header of the LOOP.  */
2352 
2353 static void
base_names_in_chain_on(class loop * loop,tree name,tree var)2354 base_names_in_chain_on (class loop *loop, tree name, tree var)
2355 {
2356   gimple *stmt, *phi;
2357   imm_use_iterator iter;
2358 
2359   replace_ssa_name_symbol (name, var);
2360 
2361   while (1)
2362     {
2363       phi = NULL;
2364       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2365 	{
2366 	  if (gimple_code (stmt) == GIMPLE_PHI
2367 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2368 	    {
2369 	      phi = stmt;
2370 	      break;
2371 	    }
2372 	}
2373       if (!phi)
2374 	return;
2375 
2376       name = PHI_RESULT (phi);
2377       replace_ssa_name_symbol (name, var);
2378     }
2379 }
2380 
2381 /* Given an unrolled LOOP after predictive commoning, remove the
2382    register copies arising from phi nodes by changing the base
2383    variables of SSA names.  TMP_VARS is the set of the temporary variables
2384    for those we want to perform this.  */
2385 
2386 static void
eliminate_temp_copies(class loop * loop,bitmap tmp_vars)2387 eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2388 {
2389   edge e;
2390   gphi *phi;
2391   gimple *stmt;
2392   tree name, use, var;
2393   gphi_iterator psi;
2394 
2395   e = loop_latch_edge (loop);
2396   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2397     {
2398       phi = psi.phi ();
2399       name = PHI_RESULT (phi);
2400       var = SSA_NAME_VAR (name);
2401       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2402 	continue;
2403       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2404       gcc_assert (TREE_CODE (use) == SSA_NAME);
2405 
2406       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
2407       stmt = SSA_NAME_DEF_STMT (use);
2408       while (gimple_code (stmt) == GIMPLE_PHI
2409 	     /* In case we could not unroll the loop enough to eliminate
2410 		all copies, we may reach the loop header before the defining
2411 		statement (in that case, some register copies will be present
2412 		in loop latch in the final code, corresponding to the newly
2413 		created looparound phi nodes).  */
2414 	     && gimple_bb (stmt) != loop->header)
2415 	{
2416 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
2417 	  use = PHI_ARG_DEF (stmt, 0);
2418 	  stmt = SSA_NAME_DEF_STMT (use);
2419 	}
2420 
2421       base_names_in_chain_on (loop, use, var);
2422     }
2423 }
2424 
2425 /* Returns true if CHAIN is suitable to be combined.  */
2426 
2427 static bool
chain_can_be_combined_p(chain_p chain)2428 chain_can_be_combined_p (chain_p chain)
2429 {
2430   return (!chain->combined
2431 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2432 }
2433 
2434 /* Returns the modify statement that uses NAME.  Skips over assignment
2435    statements, NAME is replaced with the actual name used in the returned
2436    statement.  */
2437 
2438 static gimple *
find_use_stmt(tree * name)2439 find_use_stmt (tree *name)
2440 {
2441   gimple *stmt;
2442   tree rhs, lhs;
2443 
2444   /* Skip over assignments.  */
2445   while (1)
2446     {
2447       stmt = single_nonlooparound_use (*name);
2448       if (!stmt)
2449 	return NULL;
2450 
2451       if (gimple_code (stmt) != GIMPLE_ASSIGN)
2452 	return NULL;
2453 
2454       lhs = gimple_assign_lhs (stmt);
2455       if (TREE_CODE (lhs) != SSA_NAME)
2456 	return NULL;
2457 
2458       if (gimple_assign_copy_p (stmt))
2459 	{
2460 	  rhs = gimple_assign_rhs1 (stmt);
2461 	  if (rhs != *name)
2462 	    return NULL;
2463 
2464 	  *name = lhs;
2465 	}
2466       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2467 	       == GIMPLE_BINARY_RHS)
2468 	return stmt;
2469       else
2470 	return NULL;
2471     }
2472 }
2473 
2474 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2475 
2476 static bool
may_reassociate_p(tree type,enum tree_code code)2477 may_reassociate_p (tree type, enum tree_code code)
2478 {
2479   if (FLOAT_TYPE_P (type)
2480       && !flag_unsafe_math_optimizations)
2481     return false;
2482 
2483   return (commutative_tree_code (code)
2484 	  && associative_tree_code (code));
2485 }
2486 
2487 /* If the operation used in STMT is associative and commutative, go through the
2488    tree of the same operations and returns its root.  Distance to the root
2489    is stored in DISTANCE.  */
2490 
2491 static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)2492 find_associative_operation_root (gimple *stmt, unsigned *distance)
2493 {
2494   tree lhs;
2495   gimple *next;
2496   enum tree_code code = gimple_assign_rhs_code (stmt);
2497   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2498   unsigned dist = 0;
2499 
2500   if (!may_reassociate_p (type, code))
2501     return NULL;
2502 
2503   while (1)
2504     {
2505       lhs = gimple_assign_lhs (stmt);
2506       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2507 
2508       next = find_use_stmt (&lhs);
2509       if (!next
2510 	  || gimple_assign_rhs_code (next) != code)
2511 	break;
2512 
2513       stmt = next;
2514       dist++;
2515     }
2516 
2517   if (distance)
2518     *distance = dist;
2519   return stmt;
2520 }
2521 
2522 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2523    is no such statement, returns NULL_TREE.  In case the operation used on
2524    NAME1 and NAME2 is associative and commutative, returns the root of the
2525    tree formed by this operation instead of the statement that uses NAME1 or
2526    NAME2.  */
2527 
2528 static gimple *
find_common_use_stmt(tree * name1,tree * name2)2529 find_common_use_stmt (tree *name1, tree *name2)
2530 {
2531   gimple *stmt1, *stmt2;
2532 
2533   stmt1 = find_use_stmt (name1);
2534   if (!stmt1)
2535     return NULL;
2536 
2537   stmt2 = find_use_stmt (name2);
2538   if (!stmt2)
2539     return NULL;
2540 
2541   if (stmt1 == stmt2)
2542     return stmt1;
2543 
2544   stmt1 = find_associative_operation_root (stmt1, NULL);
2545   if (!stmt1)
2546     return NULL;
2547   stmt2 = find_associative_operation_root (stmt2, NULL);
2548   if (!stmt2)
2549     return NULL;
2550 
2551   return (stmt1 == stmt2 ? stmt1 : NULL);
2552 }
2553 
2554 /* Checks whether R1 and R2 are combined together using CODE, with the result
2555    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2556    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2557 
2558 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2559 combinable_refs_p (dref r1, dref r2,
2560 		   enum tree_code *code, bool *swap, tree *rslt_type)
2561 {
2562   enum tree_code acode;
2563   bool aswap;
2564   tree atype;
2565   tree name1, name2;
2566   gimple *stmt;
2567 
2568   name1 = name_for_ref (r1);
2569   name2 = name_for_ref (r2);
2570   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2571 
2572   stmt = find_common_use_stmt (&name1, &name2);
2573 
2574   if (!stmt
2575       /* A simple post-dominance check - make sure the combination
2576          is executed under the same condition as the references.  */
2577       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2578 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2579     return false;
2580 
2581   acode = gimple_assign_rhs_code (stmt);
2582   aswap = (!commutative_tree_code (acode)
2583 	   && gimple_assign_rhs1 (stmt) != name1);
2584   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2585 
2586   if (*code == ERROR_MARK)
2587     {
2588       *code = acode;
2589       *swap = aswap;
2590       *rslt_type = atype;
2591       return true;
2592     }
2593 
2594   return (*code == acode
2595 	  && *swap == aswap
2596 	  && *rslt_type == atype);
2597 }
2598 
2599 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2600    an assignment of the remaining operand.  */
2601 
2602 static void
remove_name_from_operation(gimple * stmt,tree op)2603 remove_name_from_operation (gimple *stmt, tree op)
2604 {
2605   tree other_op;
2606   gimple_stmt_iterator si;
2607 
2608   gcc_assert (is_gimple_assign (stmt));
2609 
2610   if (gimple_assign_rhs1 (stmt) == op)
2611     other_op = gimple_assign_rhs2 (stmt);
2612   else
2613     other_op = gimple_assign_rhs1 (stmt);
2614 
2615   si = gsi_for_stmt (stmt);
2616   gimple_assign_set_rhs_from_tree (&si, other_op);
2617 
2618   /* We should not have reallocated STMT.  */
2619   gcc_assert (gsi_stmt (si) == stmt);
2620 
2621   update_stmt (stmt);
2622 }
2623 
2624 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2625    are combined in a single statement, and returns this statement.  */
2626 
2627 static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)2628 reassociate_to_the_same_stmt (tree name1, tree name2)
2629 {
2630   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2631   gassign *new_stmt, *tmp_stmt;
2632   tree new_name, tmp_name, var, r1, r2;
2633   unsigned dist1, dist2;
2634   enum tree_code code;
2635   tree type = TREE_TYPE (name1);
2636   gimple_stmt_iterator bsi;
2637 
2638   stmt1 = find_use_stmt (&name1);
2639   stmt2 = find_use_stmt (&name2);
2640   root1 = find_associative_operation_root (stmt1, &dist1);
2641   root2 = find_associative_operation_root (stmt2, &dist2);
2642   code = gimple_assign_rhs_code (stmt1);
2643 
2644   gcc_assert (root1 && root2 && root1 == root2
2645 	      && code == gimple_assign_rhs_code (stmt2));
2646 
2647   /* Find the root of the nearest expression in that both NAME1 and NAME2
2648      are used.  */
2649   r1 = name1;
2650   s1 = stmt1;
2651   r2 = name2;
2652   s2 = stmt2;
2653 
2654   while (dist1 > dist2)
2655     {
2656       s1 = find_use_stmt (&r1);
2657       r1 = gimple_assign_lhs (s1);
2658       dist1--;
2659     }
2660   while (dist2 > dist1)
2661     {
2662       s2 = find_use_stmt (&r2);
2663       r2 = gimple_assign_lhs (s2);
2664       dist2--;
2665     }
2666 
2667   while (s1 != s2)
2668     {
2669       s1 = find_use_stmt (&r1);
2670       r1 = gimple_assign_lhs (s1);
2671       s2 = find_use_stmt (&r2);
2672       r2 = gimple_assign_lhs (s2);
2673     }
2674 
2675   /* Remove NAME1 and NAME2 from the statements in that they are used
2676      currently.  */
2677   remove_name_from_operation (stmt1, name1);
2678   remove_name_from_operation (stmt2, name2);
2679 
2680   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2681      combine it with the rhs of S1.  */
2682   var = create_tmp_reg (type, "predreastmp");
2683   new_name = make_ssa_name (var);
2684   new_stmt = gimple_build_assign (new_name, code, name1, name2);
2685 
2686   var = create_tmp_reg (type, "predreastmp");
2687   tmp_name = make_ssa_name (var);
2688 
2689   /* Rhs of S1 may now be either a binary expression with operation
2690      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2691      so that name1 or name2 was removed from it).  */
2692   tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2693 				  gimple_assign_rhs1 (s1),
2694 				  gimple_assign_rhs2 (s1));
2695 
2696   bsi = gsi_for_stmt (s1);
2697   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2698   s1 = gsi_stmt (bsi);
2699   update_stmt (s1);
2700 
2701   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2702   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2703 
2704   return new_stmt;
2705 }
2706 
2707 /* Returns the statement that combines references R1 and R2.  In case R1
2708    and R2 are not used in the same statement, but they are used with an
2709    associative and commutative operation in the same expression, reassociate
2710    the expression so that they are used in the same statement.  */
2711 
2712 static gimple *
stmt_combining_refs(dref r1,dref r2)2713 stmt_combining_refs (dref r1, dref r2)
2714 {
2715   gimple *stmt1, *stmt2;
2716   tree name1 = name_for_ref (r1);
2717   tree name2 = name_for_ref (r2);
2718 
2719   stmt1 = find_use_stmt (&name1);
2720   stmt2 = find_use_stmt (&name2);
2721   if (stmt1 == stmt2)
2722     return stmt1;
2723 
2724   return reassociate_to_the_same_stmt (name1, name2);
2725 }
2726 
2727 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2728    description of the new chain is returned, otherwise we return NULL.  */
2729 
2730 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2731 combine_chains (chain_p ch1, chain_p ch2)
2732 {
2733   dref r1, r2, nw;
2734   enum tree_code op = ERROR_MARK;
2735   bool swap = false;
2736   chain_p new_chain;
2737   unsigned i;
2738   tree rslt_type = NULL_TREE;
2739 
2740   if (ch1 == ch2)
2741     return NULL;
2742   if (ch1->length != ch2->length)
2743     return NULL;
2744 
2745   if (ch1->refs.length () != ch2->refs.length ())
2746     return NULL;
2747 
2748   for (i = 0; (ch1->refs.iterate (i, &r1)
2749 	       && ch2->refs.iterate (i, &r2)); i++)
2750     {
2751       if (r1->distance != r2->distance)
2752 	return NULL;
2753 
2754       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2755 	return NULL;
2756     }
2757 
2758   if (swap)
2759     std::swap (ch1, ch2);
2760 
2761   new_chain = XCNEW (struct chain);
2762   new_chain->type = CT_COMBINATION;
2763   new_chain->op = op;
2764   new_chain->ch1 = ch1;
2765   new_chain->ch2 = ch2;
2766   new_chain->rslt_type = rslt_type;
2767   new_chain->length = ch1->length;
2768 
2769   for (i = 0; (ch1->refs.iterate (i, &r1)
2770 	       && ch2->refs.iterate (i, &r2)); i++)
2771     {
2772       nw = XCNEW (class dref_d);
2773       nw->stmt = stmt_combining_refs (r1, r2);
2774       nw->distance = r1->distance;
2775 
2776       new_chain->refs.safe_push (nw);
2777     }
2778 
2779   ch1->combined = true;
2780   ch2->combined = true;
2781   return new_chain;
2782 }
2783 
2784 /* Recursively update position information of all offspring chains to ROOT
2785    chain's position information.  */
2786 
2787 static void
update_pos_for_combined_chains(chain_p root)2788 update_pos_for_combined_chains (chain_p root)
2789 {
2790   chain_p ch1 = root->ch1, ch2 = root->ch2;
2791   dref ref, ref1, ref2;
2792   for (unsigned j = 0; (root->refs.iterate (j, &ref)
2793 			&& ch1->refs.iterate (j, &ref1)
2794 			&& ch2->refs.iterate (j, &ref2)); ++j)
2795     ref1->pos = ref2->pos = ref->pos;
2796 
2797   if (ch1->type == CT_COMBINATION)
2798     update_pos_for_combined_chains (ch1);
2799   if (ch2->type == CT_COMBINATION)
2800     update_pos_for_combined_chains (ch2);
2801 }
2802 
2803 /* Returns true if statement S1 dominates statement S2.  */
2804 
2805 static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)2806 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2807 {
2808   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2809 
2810   if (!bb1 || s1 == s2)
2811     return true;
2812 
2813   if (bb1 == bb2)
2814     return gimple_uid (s1) < gimple_uid (s2);
2815 
2816   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2817 }
2818 
2819 /* Try to combine the CHAINS in LOOP.  */
2820 
2821 static void
try_combine_chains(class loop * loop,vec<chain_p> * chains)2822 try_combine_chains (class loop *loop, vec<chain_p> *chains)
2823 {
2824   unsigned i, j;
2825   chain_p ch1, ch2, cch;
2826   auto_vec<chain_p> worklist;
2827   bool combined_p = false;
2828 
2829   FOR_EACH_VEC_ELT (*chains, i, ch1)
2830     if (chain_can_be_combined_p (ch1))
2831       worklist.safe_push (ch1);
2832 
2833   while (!worklist.is_empty ())
2834     {
2835       ch1 = worklist.pop ();
2836       if (!chain_can_be_combined_p (ch1))
2837 	continue;
2838 
2839       FOR_EACH_VEC_ELT (*chains, j, ch2)
2840 	{
2841 	  if (!chain_can_be_combined_p (ch2))
2842 	    continue;
2843 
2844 	  cch = combine_chains (ch1, ch2);
2845 	  if (cch)
2846 	    {
2847 	      worklist.safe_push (cch);
2848 	      chains->safe_push (cch);
2849 	      combined_p = true;
2850 	      break;
2851 	    }
2852 	}
2853     }
2854   if (!combined_p)
2855     return;
2856 
2857   /* Setup UID for all statements in dominance order.  */
2858   basic_block *bbs = get_loop_body_in_dom_order (loop);
2859   renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2860   free (bbs);
2861 
2862   /* Re-association in combined chains may generate statements different to
2863      order of references of the original chain.  We need to keep references
2864      of combined chain in dominance order so that all uses will be inserted
2865      after definitions.  Note:
2866        A) This is necessary for all combined chains.
2867        B) This is only necessary for ZERO distance references because other
2868 	  references inherit value from loop carried PHIs.
2869 
2870      We first update position information for all combined chains.  */
2871   dref ref;
2872   for (i = 0; chains->iterate (i, &ch1); ++i)
2873     {
2874       if (ch1->type != CT_COMBINATION || ch1->combined)
2875 	continue;
2876 
2877       for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2878 	ref->pos = gimple_uid (ref->stmt);
2879 
2880       update_pos_for_combined_chains (ch1);
2881     }
2882   /* Then sort references according to newly updated position information.  */
2883   for (i = 0; chains->iterate (i, &ch1); ++i)
2884     {
2885       if (ch1->type != CT_COMBINATION && !ch1->combined)
2886 	continue;
2887 
2888       /* Find the first reference with non-ZERO distance.  */
2889       if (ch1->length == 0)
2890 	j = ch1->refs.length();
2891       else
2892 	{
2893 	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2894 	    if (ref->distance != 0)
2895 	      break;
2896 	}
2897 
2898       /* Sort all ZERO distance references by position.  */
2899       qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2900 
2901       if (ch1->combined)
2902 	continue;
2903 
2904       /* For ZERO length chain, has_max_use_after must be true since root
2905 	 combined stmt must dominates others.  */
2906       if (ch1->length == 0)
2907 	{
2908 	  ch1->has_max_use_after = true;
2909 	  continue;
2910 	}
2911       /* Check if there is use at max distance after root for combined chains
2912 	 and set flag accordingly.  */
2913       ch1->has_max_use_after = false;
2914       gimple *root_stmt = get_chain_root (ch1)->stmt;
2915       for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2916 	{
2917 	  if (ref->distance == ch1->length
2918 	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2919 	    {
2920 	      ch1->has_max_use_after = true;
2921 	      break;
2922 	    }
2923 	}
2924     }
2925 }
2926 
2927 /* Prepare initializers for store elimination CHAIN in LOOP.  Returns false
2928    if this is impossible because one of these initializers may trap, true
2929    otherwise.  */
2930 
2931 static bool
prepare_initializers_chain_store_elim(class loop * loop,chain_p chain)2932 prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
2933 {
2934   unsigned i, n = chain->length;
2935 
2936   /* For now we can't eliminate stores if some of them are conditional
2937      executed.  */
2938   if (!chain->all_always_accessed)
2939     return false;
2940 
2941   /* Nothing to intialize for intra-iteration store elimination.  */
2942   if (n == 0 && chain->type == CT_STORE_STORE)
2943     return true;
2944 
2945   /* For store elimination chain, there is nothing to initialize if stores
2946      to be eliminated only store loop invariant values into memory.  */
2947   if (chain->type == CT_STORE_STORE
2948       && is_inv_store_elimination_chain (loop, chain))
2949     {
2950       chain->inv_store_elimination = true;
2951       return true;
2952     }
2953 
2954   chain->inits.create (n);
2955   chain->inits.safe_grow_cleared (n, true);
2956 
2957   /* For store eliminatin chain like below:
2958 
2959      for (i = 0; i < len; i++)
2960        {
2961 	 a[i] = 1;
2962 	 // a[i + 1] = ...
2963 	 a[i + 2] = 3;
2964        }
2965 
2966      store to a[i + 1] is missed in loop body, it acts like bubbles.  The
2967      content of a[i + 1] remain the same if the loop iterates fewer times
2968      than chain->length.  We need to set up root variables for such stores
2969      by loading from memory before loop.  Note we only need to load bubble
2970      elements because loop body is guaranteed to be executed at least once
2971      after loop's preheader edge.  */
2972   auto_vec<bool> bubbles;
2973   bubbles.safe_grow_cleared (n + 1, true);
2974   for (i = 0; i < chain->refs.length (); i++)
2975     bubbles[chain->refs[i]->distance] = true;
2976 
2977   struct data_reference *dr = get_chain_root (chain)->ref;
2978   for (i = 0; i < n; i++)
2979     {
2980       if (bubbles[i])
2981 	continue;
2982 
2983       gimple_seq stmts = NULL;
2984 
2985       tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2986       if (stmts)
2987 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2988 
2989       chain->inits[i] = init;
2990     }
2991 
2992   return true;
2993 }
2994 
2995 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2996    impossible because one of these initializers may trap, true otherwise.  */
2997 
2998 static bool
prepare_initializers_chain(class loop * loop,chain_p chain)2999 prepare_initializers_chain (class loop *loop, chain_p chain)
3000 {
3001   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3002   struct data_reference *dr = get_chain_root (chain)->ref;
3003   tree init;
3004   dref laref;
3005   edge entry = loop_preheader_edge (loop);
3006 
3007   if (chain->type == CT_STORE_STORE)
3008     return prepare_initializers_chain_store_elim (loop, chain);
3009 
3010   /* Find the initializers for the variables, and check that they cannot
3011      trap.  */
3012   chain->inits.create (n);
3013   for (i = 0; i < n; i++)
3014     chain->inits.quick_push (NULL_TREE);
3015 
3016   /* If we have replaced some looparound phi nodes, use their initializers
3017      instead of creating our own.  */
3018   FOR_EACH_VEC_ELT (chain->refs, i, laref)
3019     {
3020       if (gimple_code (laref->stmt) != GIMPLE_PHI)
3021 	continue;
3022 
3023       gcc_assert (laref->distance > 0);
3024       chain->inits[n - laref->distance]
3025 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3026     }
3027 
3028   for (i = 0; i < n; i++)
3029     {
3030       gimple_seq stmts = NULL;
3031 
3032       if (chain->inits[i] != NULL_TREE)
3033 	continue;
3034 
3035       init = ref_at_iteration (dr, (int) i - n, &stmts);
3036       if (!chain->all_always_accessed && tree_could_trap_p (init))
3037 	{
3038 	  gimple_seq_discard (stmts);
3039 	  return false;
3040 	}
3041 
3042       if (stmts)
3043 	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3044 
3045       chain->inits[i] = init;
3046     }
3047 
3048   return true;
3049 }
3050 
3051 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3052    be used because the initializers might trap.  */
3053 
3054 static void
prepare_initializers(class loop * loop,vec<chain_p> chains)3055 prepare_initializers (class loop *loop, vec<chain_p> chains)
3056 {
3057   chain_p chain;
3058   unsigned i;
3059 
3060   for (i = 0; i < chains.length (); )
3061     {
3062       chain = chains[i];
3063       if (prepare_initializers_chain (loop, chain))
3064 	i++;
3065       else
3066 	{
3067 	  release_chain (chain);
3068 	  chains.unordered_remove (i);
3069 	}
3070     }
3071 }
3072 
3073 /* Generates finalizer memory references for CHAIN in LOOP.  Returns true
3074    if finalizer code for CHAIN can be generated, otherwise false.  */
3075 
3076 static bool
prepare_finalizers_chain(class loop * loop,chain_p chain)3077 prepare_finalizers_chain (class loop *loop, chain_p chain)
3078 {
3079   unsigned i, n = chain->length;
3080   struct data_reference *dr = get_chain_root (chain)->ref;
3081   tree fini, niters = number_of_latch_executions (loop);
3082 
3083   /* For now we can't eliminate stores if some of them are conditional
3084      executed.  */
3085   if (!chain->all_always_accessed)
3086     return false;
3087 
3088   chain->finis.create (n);
3089   for (i = 0; i < n; i++)
3090     chain->finis.quick_push (NULL_TREE);
3091 
3092   /* We never use looparound phi node for store elimination chains.  */
3093 
3094   /* Find the finalizers for the variables, and check that they cannot
3095      trap.  */
3096   for (i = 0; i < n; i++)
3097     {
3098       gimple_seq stmts = NULL;
3099       gcc_assert (chain->finis[i] == NULL_TREE);
3100 
3101       if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3102 	{
3103 	  niters = unshare_expr (niters);
3104 	  niters = force_gimple_operand (niters, &stmts, true, NULL);
3105 	  if (stmts)
3106 	    {
3107 	      gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3108 	      stmts = NULL;
3109 	    }
3110 	}
3111       fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3112       if (stmts)
3113 	gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3114 
3115       chain->finis[i] = fini;
3116     }
3117 
3118   return true;
3119 }
3120 
3121 /* Generates finalizer memory reference for CHAINS in LOOP.  Returns true
3122    if finalizer code generation for CHAINS breaks loop closed ssa form.  */
3123 
3124 static bool
prepare_finalizers(class loop * loop,vec<chain_p> chains)3125 prepare_finalizers (class loop *loop, vec<chain_p> chains)
3126 {
3127   chain_p chain;
3128   unsigned i;
3129   bool loop_closed_ssa = false;
3130 
3131   for (i = 0; i < chains.length ();)
3132     {
3133       chain = chains[i];
3134 
3135       /* Finalizer is only necessary for inter-iteration store elimination
3136 	 chains.  */
3137       if (chain->length == 0 || chain->type != CT_STORE_STORE)
3138 	{
3139 	  i++;
3140 	  continue;
3141 	}
3142 
3143       if (prepare_finalizers_chain (loop, chain))
3144 	{
3145 	  i++;
3146 	  /* Be conservative, assume loop closed ssa form is corrupted
3147 	     by store-store chain.  Though it's not always the case if
3148 	     eliminated stores only store loop invariant values into
3149 	     memory.  */
3150 	  loop_closed_ssa = true;
3151 	}
3152       else
3153 	{
3154 	  release_chain (chain);
3155 	  chains.unordered_remove (i);
3156 	}
3157     }
3158   return loop_closed_ssa;
3159 }
3160 
3161 /* Insert all initializing gimple stmts into loop's entry edge.  */
3162 
3163 static void
insert_init_seqs(class loop * loop,vec<chain_p> chains)3164 insert_init_seqs (class loop *loop, vec<chain_p> chains)
3165 {
3166   unsigned i;
3167   edge entry = loop_preheader_edge (loop);
3168 
3169   for (i = 0; i < chains.length (); ++i)
3170     if (chains[i]->init_seq)
3171       {
3172 	gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3173 	chains[i]->init_seq = NULL;
3174       }
3175 }
3176 
3177 /* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
3178    if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3179    form was corrupted.  */
3180 
3181 static unsigned
tree_predictive_commoning_loop(class loop * loop)3182 tree_predictive_commoning_loop (class loop *loop)
3183 {
3184   vec<data_reference_p> datarefs;
3185   vec<ddr_p> dependences;
3186   struct component *components;
3187   vec<chain_p> chains = vNULL;
3188   unsigned unroll_factor;
3189   class tree_niter_desc desc;
3190   bool unroll = false, loop_closed_ssa = false;
3191   edge exit;
3192 
3193   if (dump_file && (dump_flags & TDF_DETAILS))
3194     fprintf (dump_file, "Processing loop %d\n",  loop->num);
3195 
3196   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
3197   if (get_max_loop_iterations_int (loop) == 0)
3198     {
3199       if (dump_file && (dump_flags & TDF_DETAILS))
3200 	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3201 
3202       return 0;
3203     }
3204 
3205   /* Find the data references and split them into components according to their
3206      dependence relations.  */
3207   auto_vec<loop_p, 3> loop_nest;
3208   dependences.create (10);
3209   datarefs.create (10);
3210   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3211 					   &dependences))
3212     {
3213       if (dump_file && (dump_flags & TDF_DETAILS))
3214 	fprintf (dump_file, "Cannot analyze data dependencies\n");
3215       free_data_refs (datarefs);
3216       free_dependence_relations (dependences);
3217       return 0;
3218     }
3219 
3220   if (dump_file && (dump_flags & TDF_DETAILS))
3221     dump_data_dependence_relations (dump_file, dependences);
3222 
3223   components = split_data_refs_to_components (loop, datarefs, dependences);
3224   loop_nest.release ();
3225   free_dependence_relations (dependences);
3226   if (!components)
3227     {
3228       free_data_refs (datarefs);
3229       free_affine_expand_cache (&name_expansions);
3230       return 0;
3231     }
3232 
3233   if (dump_file && (dump_flags & TDF_DETAILS))
3234     {
3235       fprintf (dump_file, "Initial state:\n\n");
3236       dump_components (dump_file, components);
3237     }
3238 
3239   /* Find the suitable components and split them into chains.  */
3240   components = filter_suitable_components (loop, components);
3241 
3242   auto_bitmap tmp_vars;
3243   looparound_phis = BITMAP_ALLOC (NULL);
3244   determine_roots (loop, components, &chains);
3245   release_components (components);
3246 
3247   if (!chains.exists ())
3248     {
3249       if (dump_file && (dump_flags & TDF_DETAILS))
3250 	fprintf (dump_file,
3251 		 "Predictive commoning failed: no suitable chains\n");
3252       goto end;
3253     }
3254   prepare_initializers (loop, chains);
3255   loop_closed_ssa = prepare_finalizers (loop, chains);
3256 
3257   /* Try to combine the chains that are always worked with together.  */
3258   try_combine_chains (loop, &chains);
3259 
3260   insert_init_seqs (loop, chains);
3261 
3262   if (dump_file && (dump_flags & TDF_DETAILS))
3263     {
3264       fprintf (dump_file, "Before commoning:\n\n");
3265       dump_chains (dump_file, chains);
3266     }
3267 
3268   /* Determine the unroll factor, and if the loop should be unrolled, ensure
3269      that its number of iterations is divisible by the factor.  */
3270   unroll_factor = determine_unroll_factor (chains);
3271   scev_reset ();
3272   unroll = (unroll_factor > 1
3273 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
3274   exit = single_dom_exit (loop);
3275 
3276   /* Execute the predictive commoning transformations, and possibly unroll the
3277      loop.  */
3278   if (unroll)
3279     {
3280       struct epcc_data dta;
3281 
3282       if (dump_file && (dump_flags & TDF_DETAILS))
3283 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3284 
3285       dta.chains = chains;
3286       dta.tmp_vars = tmp_vars;
3287 
3288       update_ssa (TODO_update_ssa_only_virtuals);
3289 
3290       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3291 	 execute_pred_commoning_cbck is called may cause phi nodes to be
3292 	 reallocated, which is a problem since CHAINS may point to these
3293 	 statements.  To fix this, we store the ssa names defined by the
3294 	 phi nodes here instead of the phi nodes themselves, and restore
3295 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
3296       replace_phis_by_defined_names (chains);
3297 
3298       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3299 				      execute_pred_commoning_cbck, &dta);
3300       eliminate_temp_copies (loop, tmp_vars);
3301     }
3302   else
3303     {
3304       if (dump_file && (dump_flags & TDF_DETAILS))
3305 	fprintf (dump_file,
3306 		 "Executing predictive commoning without unrolling.\n");
3307       execute_pred_commoning (loop, chains, tmp_vars);
3308     }
3309 
3310 end: ;
3311   release_chains (chains);
3312   free_data_refs (datarefs);
3313   BITMAP_FREE (looparound_phis);
3314 
3315   free_affine_expand_cache (&name_expansions);
3316 
3317   return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
3318 }
3319 
3320 /* Runs predictive commoning.  */
3321 
3322 unsigned
tree_predictive_commoning(void)3323 tree_predictive_commoning (void)
3324 {
3325   class loop *loop;
3326   unsigned ret = 0, changed = 0;
3327 
3328   initialize_original_copy_tables ();
3329   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3330     if (optimize_loop_for_speed_p (loop))
3331       {
3332 	changed |= tree_predictive_commoning_loop (loop);
3333       }
3334   free_original_copy_tables ();
3335 
3336   if (changed > 0)
3337     {
3338       scev_reset ();
3339 
3340       if (changed > 1)
3341 	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3342 
3343       ret = TODO_cleanup_cfg;
3344     }
3345 
3346   return ret;
3347 }
3348 
3349 /* Predictive commoning Pass.  */
3350 
3351 static unsigned
run_tree_predictive_commoning(struct function * fun)3352 run_tree_predictive_commoning (struct function *fun)
3353 {
3354   if (number_of_loops (fun) <= 1)
3355     return 0;
3356 
3357   return tree_predictive_commoning ();
3358 }
3359 
3360 namespace {
3361 
3362 const pass_data pass_data_predcom =
3363 {
3364   GIMPLE_PASS, /* type */
3365   "pcom", /* name */
3366   OPTGROUP_LOOP, /* optinfo_flags */
3367   TV_PREDCOM, /* tv_id */
3368   PROP_cfg, /* properties_required */
3369   0, /* properties_provided */
3370   0, /* properties_destroyed */
3371   0, /* todo_flags_start */
3372   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3373 };
3374 
3375 class pass_predcom : public gimple_opt_pass
3376 {
3377 public:
pass_predcom(gcc::context * ctxt)3378   pass_predcom (gcc::context *ctxt)
3379     : gimple_opt_pass (pass_data_predcom, ctxt)
3380   {}
3381 
3382   /* opt_pass methods: */
gate(function *)3383   virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)3384   virtual unsigned int execute (function *fun)
3385     {
3386       return run_tree_predictive_commoning (fun);
3387     }
3388 
3389 }; // class pass_predcom
3390 
3391 } // anon namespace
3392 
3393 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)3394 make_pass_predcom (gcc::context *ctxt)
3395 {
3396   return new pass_predcom (ctxt);
3397 }
3398 
3399 
3400