1 /* Predictive commoning.
2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    TODO -- stores killing other stores can be taken into account, e.g.,
160    for (i = 0; i < n; i++)
161      {
162        a[i] = 1;
163        a[i+2] = 2;
164      }
165 
166    can be replaced with
167 
168    t0 = a[0];
169    t1 = a[1];
170    for (i = 0; i < n; i++)
171      {
172        a[i] = 1;
173        t2 = 2;
174        t0 = t1;
175        t1 = t2;
176      }
177    a[n] = t0;
178    a[n+1] = t1;
179 
180    The interesting part is that this would generalize store motion; still, since
181    sm is performed elsewhere, it does not seem that important.
182 
183    Predictive commoning can be generalized for arbitrary computations (not
184    just memory loads), and also nontrivial transfer functions (e.g., replacing
185    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
186 
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "tm.h"
191 #include "tree.h"
192 #include "tm_p.h"
193 #include "cfgloop.h"
194 #include "basic-block.h"
195 #include "tree-ssa-alias.h"
196 #include "internal-fn.h"
197 #include "tree-eh.h"
198 #include "gimple-expr.h"
199 #include "is-a.h"
200 #include "gimple.h"
201 #include "gimplify.h"
202 #include "gimple-iterator.h"
203 #include "gimplify-me.h"
204 #include "gimple-ssa.h"
205 #include "tree-phinodes.h"
206 #include "ssa-iterators.h"
207 #include "stringpool.h"
208 #include "tree-ssanames.h"
209 #include "tree-ssa-loop-ivopts.h"
210 #include "tree-ssa-loop-manip.h"
211 #include "tree-ssa-loop-niter.h"
212 #include "tree-ssa-loop.h"
213 #include "tree-into-ssa.h"
214 #include "expr.h"
215 #include "tree-dfa.h"
216 #include "tree-ssa.h"
217 #include "tree-data-ref.h"
218 #include "tree-scalar-evolution.h"
219 #include "tree-chrec.h"
220 #include "params.h"
221 #include "gimple-pretty-print.h"
222 #include "tree-pass.h"
223 #include "tree-affine.h"
224 #include "tree-inline.h"
225 
226 /* The maximum number of iterations between the considered memory
227    references.  */
228 
229 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
230 
231 /* Data references (or phi nodes that carry data reference values across
232    loop iterations).  */
233 
234 typedef struct dref_d
235 {
236   /* The reference itself.  */
237   struct data_reference *ref;
238 
239   /* The statement in that the reference appears.  */
240   gimple stmt;
241 
242   /* In case that STMT is a phi node, this field is set to the SSA name
243      defined by it in replace_phis_by_defined_names (in order to avoid
244      pointing to phi node that got reallocated in the meantime).  */
245   tree name_defined_by_phi;
246 
247   /* Distance of the reference from the root of the chain (in number of
248      iterations of the loop).  */
249   unsigned distance;
250 
251   /* Number of iterations offset from the first reference in the component.  */
252   double_int offset;
253 
254   /* Number of the reference in a component, in dominance ordering.  */
255   unsigned pos;
256 
257   /* True if the memory reference is always accessed when the loop is
258      entered.  */
259   unsigned always_accessed : 1;
260 } *dref;
261 
262 
263 /* Type of the chain of the references.  */
264 
265 enum chain_type
266 {
267   /* The addresses of the references in the chain are constant.  */
268   CT_INVARIANT,
269 
270   /* There are only loads in the chain.  */
271   CT_LOAD,
272 
273   /* Root of the chain is store, the rest are loads.  */
274   CT_STORE_LOAD,
275 
276   /* A combination of two chains.  */
277   CT_COMBINATION
278 };
279 
280 /* Chains of data references.  */
281 
282 typedef struct chain
283 {
284   /* Type of the chain.  */
285   enum chain_type type;
286 
287   /* For combination chains, the operator and the two chains that are
288      combined, and the type of the result.  */
289   enum tree_code op;
290   tree rslt_type;
291   struct chain *ch1, *ch2;
292 
293   /* The references in the chain.  */
294   vec<dref> refs;
295 
296   /* The maximum distance of the reference in the chain from the root.  */
297   unsigned length;
298 
299   /* The variables used to copy the value throughout iterations.  */
300   vec<tree> vars;
301 
302   /* Initializers for the variables.  */
303   vec<tree> inits;
304 
305   /* True if there is a use of a variable with the maximal distance
306      that comes after the root in the loop.  */
307   unsigned has_max_use_after : 1;
308 
309   /* True if all the memory references in the chain are always accessed.  */
310   unsigned all_always_accessed : 1;
311 
312   /* True if this chain was combined together with some other chain.  */
313   unsigned combined : 1;
314 } *chain_p;
315 
316 
317 /* Describes the knowledge about the step of the memory references in
318    the component.  */
319 
320 enum ref_step_type
321 {
322   /* The step is zero.  */
323   RS_INVARIANT,
324 
325   /* The step is nonzero.  */
326   RS_NONZERO,
327 
328   /* The step may or may not be nonzero.  */
329   RS_ANY
330 };
331 
332 /* Components of the data dependence graph.  */
333 
334 struct component
335 {
336   /* The references in the component.  */
337   vec<dref> refs;
338 
339   /* What we know about the step of the references in the component.  */
340   enum ref_step_type comp_step;
341 
342   /* Next component in the list.  */
343   struct component *next;
344 };
345 
346 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
347 
348 static bitmap looparound_phis;
349 
350 /* Cache used by tree_to_aff_combination_expand.  */
351 
352 static struct pointer_map_t *name_expansions;
353 
354 /* Dumps data reference REF to FILE.  */
355 
356 extern void dump_dref (FILE *, dref);
357 void
dump_dref(FILE * file,dref ref)358 dump_dref (FILE *file, dref ref)
359 {
360   if (ref->ref)
361     {
362       fprintf (file, "    ");
363       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
364       fprintf (file, " (id %u%s)\n", ref->pos,
365 	       DR_IS_READ (ref->ref) ? "" : ", write");
366 
367       fprintf (file, "      offset ");
368       dump_double_int (file, ref->offset, false);
369       fprintf (file, "\n");
370 
371       fprintf (file, "      distance %u\n", ref->distance);
372     }
373   else
374     {
375       if (gimple_code (ref->stmt) == GIMPLE_PHI)
376 	fprintf (file, "    looparound ref\n");
377       else
378 	fprintf (file, "    combination ref\n");
379       fprintf (file, "      in statement ");
380       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
381       fprintf (file, "\n");
382       fprintf (file, "      distance %u\n", ref->distance);
383     }
384 
385 }
386 
387 /* Dumps CHAIN to FILE.  */
388 
389 extern void dump_chain (FILE *, chain_p);
390 void
dump_chain(FILE * file,chain_p chain)391 dump_chain (FILE *file, chain_p chain)
392 {
393   dref a;
394   const char *chain_type;
395   unsigned i;
396   tree var;
397 
398   switch (chain->type)
399     {
400     case CT_INVARIANT:
401       chain_type = "Load motion";
402       break;
403 
404     case CT_LOAD:
405       chain_type = "Loads-only";
406       break;
407 
408     case CT_STORE_LOAD:
409       chain_type = "Store-loads";
410       break;
411 
412     case CT_COMBINATION:
413       chain_type = "Combination";
414       break;
415 
416     default:
417       gcc_unreachable ();
418     }
419 
420   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
421 	   chain->combined ? " (combined)" : "");
422   if (chain->type != CT_INVARIANT)
423     fprintf (file, "  max distance %u%s\n", chain->length,
424 	     chain->has_max_use_after ? "" : ", may reuse first");
425 
426   if (chain->type == CT_COMBINATION)
427     {
428       fprintf (file, "  equal to %p %s %p in type ",
429 	       (void *) chain->ch1, op_symbol_code (chain->op),
430 	       (void *) chain->ch2);
431       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
432       fprintf (file, "\n");
433     }
434 
435   if (chain->vars.exists ())
436     {
437       fprintf (file, "  vars");
438       FOR_EACH_VEC_ELT (chain->vars, i, var)
439 	{
440 	  fprintf (file, " ");
441 	  print_generic_expr (file, var, TDF_SLIM);
442 	}
443       fprintf (file, "\n");
444     }
445 
446   if (chain->inits.exists ())
447     {
448       fprintf (file, "  inits");
449       FOR_EACH_VEC_ELT (chain->inits, i, var)
450 	{
451 	  fprintf (file, " ");
452 	  print_generic_expr (file, var, TDF_SLIM);
453 	}
454       fprintf (file, "\n");
455     }
456 
457   fprintf (file, "  references:\n");
458   FOR_EACH_VEC_ELT (chain->refs, i, a)
459     dump_dref (file, a);
460 
461   fprintf (file, "\n");
462 }
463 
464 /* Dumps CHAINS to FILE.  */
465 
466 extern void dump_chains (FILE *, vec<chain_p> );
467 void
dump_chains(FILE * file,vec<chain_p> chains)468 dump_chains (FILE *file, vec<chain_p> chains)
469 {
470   chain_p chain;
471   unsigned i;
472 
473   FOR_EACH_VEC_ELT (chains, i, chain)
474     dump_chain (file, chain);
475 }
476 
477 /* Dumps COMP to FILE.  */
478 
479 extern void dump_component (FILE *, struct component *);
480 void
dump_component(FILE * file,struct component * comp)481 dump_component (FILE *file, struct component *comp)
482 {
483   dref a;
484   unsigned i;
485 
486   fprintf (file, "Component%s:\n",
487 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
488   FOR_EACH_VEC_ELT (comp->refs, i, a)
489     dump_dref (file, a);
490   fprintf (file, "\n");
491 }
492 
493 /* Dumps COMPS to FILE.  */
494 
495 extern void dump_components (FILE *, struct component *);
496 void
dump_components(FILE * file,struct component * comps)497 dump_components (FILE *file, struct component *comps)
498 {
499   struct component *comp;
500 
501   for (comp = comps; comp; comp = comp->next)
502     dump_component (file, comp);
503 }
504 
505 /* Frees a chain CHAIN.  */
506 
507 static void
release_chain(chain_p chain)508 release_chain (chain_p chain)
509 {
510   dref ref;
511   unsigned i;
512 
513   if (chain == NULL)
514     return;
515 
516   FOR_EACH_VEC_ELT (chain->refs, i, ref)
517     free (ref);
518 
519   chain->refs.release ();
520   chain->vars.release ();
521   chain->inits.release ();
522 
523   free (chain);
524 }
525 
526 /* Frees CHAINS.  */
527 
528 static void
release_chains(vec<chain_p> chains)529 release_chains (vec<chain_p> chains)
530 {
531   unsigned i;
532   chain_p chain;
533 
534   FOR_EACH_VEC_ELT (chains, i, chain)
535     release_chain (chain);
536   chains.release ();
537 }
538 
539 /* Frees a component COMP.  */
540 
541 static void
release_component(struct component * comp)542 release_component (struct component *comp)
543 {
544   comp->refs.release ();
545   free (comp);
546 }
547 
548 /* Frees list of components COMPS.  */
549 
550 static void
release_components(struct component * comps)551 release_components (struct component *comps)
552 {
553   struct component *act, *next;
554 
555   for (act = comps; act; act = next)
556     {
557       next = act->next;
558       release_component (act);
559     }
560 }
561 
562 /* Finds a root of tree given by FATHERS containing A, and performs path
563    shortening.  */
564 
565 static unsigned
component_of(unsigned fathers[],unsigned a)566 component_of (unsigned fathers[], unsigned a)
567 {
568   unsigned root, n;
569 
570   for (root = a; root != fathers[root]; root = fathers[root])
571     continue;
572 
573   for (; a != root; a = n)
574     {
575       n = fathers[a];
576       fathers[a] = root;
577     }
578 
579   return root;
580 }
581 
582 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
583    components, A and B are components to merge.  */
584 
585 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)586 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
587 {
588   unsigned ca = component_of (fathers, a);
589   unsigned cb = component_of (fathers, b);
590 
591   if (ca == cb)
592     return;
593 
594   if (sizes[ca] < sizes[cb])
595     {
596       sizes[cb] += sizes[ca];
597       fathers[ca] = cb;
598     }
599   else
600     {
601       sizes[ca] += sizes[cb];
602       fathers[cb] = ca;
603     }
604 }
605 
606 /* Returns true if A is a reference that is suitable for predictive commoning
607    in the innermost loop that contains it.  REF_STEP is set according to the
608    step of the reference A.  */
609 
610 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)611 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
612 {
613   tree ref = DR_REF (a), step = DR_STEP (a);
614 
615   if (!step
616       || TREE_THIS_VOLATILE (ref)
617       || !is_gimple_reg_type (TREE_TYPE (ref))
618       || tree_could_throw_p (ref))
619     return false;
620 
621   if (integer_zerop (step))
622     *ref_step = RS_INVARIANT;
623   else if (integer_nonzerop (step))
624     *ref_step = RS_NONZERO;
625   else
626     *ref_step = RS_ANY;
627 
628   return true;
629 }
630 
631 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
632 
633 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)634 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
635 {
636   tree type = TREE_TYPE (DR_OFFSET (dr));
637   aff_tree delta;
638 
639   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
640 				  &name_expansions);
641   aff_combination_const (&delta, type, tree_to_double_int (DR_INIT (dr)));
642   aff_combination_add (offset, &delta);
643 }
644 
645 /* Determines number of iterations of the innermost enclosing loop before B
646    refers to exactly the same location as A and stores it to OFF.  If A and
647    B do not have the same step, they never meet, or anything else fails,
648    returns false, otherwise returns true.  Both A and B are assumed to
649    satisfy suitable_reference_p.  */
650 
651 static bool
determine_offset(struct data_reference * a,struct data_reference * b,double_int * off)652 determine_offset (struct data_reference *a, struct data_reference *b,
653 		  double_int *off)
654 {
655   aff_tree diff, baseb, step;
656   tree typea, typeb;
657 
658   /* Check that both the references access the location in the same type.  */
659   typea = TREE_TYPE (DR_REF (a));
660   typeb = TREE_TYPE (DR_REF (b));
661   if (!useless_type_conversion_p (typeb, typea))
662     return false;
663 
664   /* Check whether the base address and the step of both references is the
665      same.  */
666   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
667       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
668     return false;
669 
670   if (integer_zerop (DR_STEP (a)))
671     {
672       /* If the references have loop invariant address, check that they access
673 	 exactly the same location.  */
674       *off = double_int_zero;
675       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
676 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
677     }
678 
679   /* Compare the offsets of the addresses, and check whether the difference
680      is a multiple of step.  */
681   aff_combination_dr_offset (a, &diff);
682   aff_combination_dr_offset (b, &baseb);
683   aff_combination_scale (&baseb, double_int_minus_one);
684   aff_combination_add (&diff, &baseb);
685 
686   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
687 				  &step, &name_expansions);
688   return aff_combination_constant_multiple_p (&diff, &step, off);
689 }
690 
691 /* Returns the last basic block in LOOP for that we are sure that
692    it is executed whenever the loop is entered.  */
693 
694 static basic_block
last_always_executed_block(struct loop * loop)695 last_always_executed_block (struct loop *loop)
696 {
697   unsigned i;
698   vec<edge> exits = get_loop_exit_edges (loop);
699   edge ex;
700   basic_block last = loop->latch;
701 
702   FOR_EACH_VEC_ELT (exits, i, ex)
703     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
704   exits.release ();
705 
706   return last;
707 }
708 
709 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
710 
711 static struct component *
split_data_refs_to_components(struct loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)712 split_data_refs_to_components (struct loop *loop,
713 			       vec<data_reference_p> datarefs,
714 			       vec<ddr_p> depends)
715 {
716   unsigned i, n = datarefs.length ();
717   unsigned ca, ia, ib, bad;
718   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
719   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
720   struct component **comps;
721   struct data_reference *dr, *dra, *drb;
722   struct data_dependence_relation *ddr;
723   struct component *comp_list = NULL, *comp;
724   dref dataref;
725   basic_block last_always_executed = last_always_executed_block (loop);
726 
727   FOR_EACH_VEC_ELT (datarefs, i, dr)
728     {
729       if (!DR_REF (dr))
730 	{
731 	  /* A fake reference for call or asm_expr that may clobber memory;
732 	     just fail.  */
733 	  goto end;
734 	}
735       /* predcom pass isn't prepared to handle calls with data references.  */
736       if (is_gimple_call (DR_STMT (dr)))
737 	goto end;
738       dr->aux = (void *) (size_t) i;
739       comp_father[i] = i;
740       comp_size[i] = 1;
741     }
742 
743   /* A component reserved for the "bad" data references.  */
744   comp_father[n] = n;
745   comp_size[n] = 1;
746 
747   FOR_EACH_VEC_ELT (datarefs, i, dr)
748     {
749       enum ref_step_type dummy;
750 
751       if (!suitable_reference_p (dr, &dummy))
752 	{
753 	  ia = (unsigned) (size_t) dr->aux;
754 	  merge_comps (comp_father, comp_size, n, ia);
755 	}
756     }
757 
758   FOR_EACH_VEC_ELT (depends, i, ddr)
759     {
760       double_int dummy_off;
761 
762       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
763 	continue;
764 
765       dra = DDR_A (ddr);
766       drb = DDR_B (ddr);
767       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
768       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
769       if (ia == ib)
770 	continue;
771 
772       bad = component_of (comp_father, n);
773 
774       /* If both A and B are reads, we may ignore unsuitable dependences.  */
775       if (DR_IS_READ (dra) && DR_IS_READ (drb))
776 	{
777 	  if (ia == bad || ib == bad
778 	      || !determine_offset (dra, drb, &dummy_off))
779 	    continue;
780 	}
781       /* If A is read and B write or vice versa and there is unsuitable
782 	 dependence, instead of merging both components into a component
783 	 that will certainly not pass suitable_component_p, just put the
784 	 read into bad component, perhaps at least the write together with
785 	 all the other data refs in it's component will be optimizable.  */
786       else if (DR_IS_READ (dra) && ib != bad)
787 	{
788 	  if (ia == bad)
789 	    continue;
790 	  else if (!determine_offset (dra, drb, &dummy_off))
791 	    {
792 	      merge_comps (comp_father, comp_size, bad, ia);
793 	      continue;
794 	    }
795 	}
796       else if (DR_IS_READ (drb) && ia != bad)
797 	{
798 	  if (ib == bad)
799 	    continue;
800 	  else if (!determine_offset (dra, drb, &dummy_off))
801 	    {
802 	      merge_comps (comp_father, comp_size, bad, ib);
803 	      continue;
804 	    }
805 	}
806 
807       merge_comps (comp_father, comp_size, ia, ib);
808     }
809 
810   comps = XCNEWVEC (struct component *, n);
811   bad = component_of (comp_father, n);
812   FOR_EACH_VEC_ELT (datarefs, i, dr)
813     {
814       ia = (unsigned) (size_t) dr->aux;
815       ca = component_of (comp_father, ia);
816       if (ca == bad)
817 	continue;
818 
819       comp = comps[ca];
820       if (!comp)
821 	{
822 	  comp = XCNEW (struct component);
823 	  comp->refs.create (comp_size[ca]);
824 	  comps[ca] = comp;
825 	}
826 
827       dataref = XCNEW (struct dref_d);
828       dataref->ref = dr;
829       dataref->stmt = DR_STMT (dr);
830       dataref->offset = double_int_zero;
831       dataref->distance = 0;
832 
833       dataref->always_accessed
834 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
835 				gimple_bb (dataref->stmt));
836       dataref->pos = comp->refs.length ();
837       comp->refs.quick_push (dataref);
838     }
839 
840   for (i = 0; i < n; i++)
841     {
842       comp = comps[i];
843       if (comp)
844 	{
845 	  comp->next = comp_list;
846 	  comp_list = comp;
847 	}
848     }
849   free (comps);
850 
851 end:
852   free (comp_father);
853   free (comp_size);
854   return comp_list;
855 }
856 
857 /* Returns true if the component COMP satisfies the conditions
858    described in 2) at the beginning of this file.  LOOP is the current
859    loop.  */
860 
861 static bool
suitable_component_p(struct loop * loop,struct component * comp)862 suitable_component_p (struct loop *loop, struct component *comp)
863 {
864   unsigned i;
865   dref a, first;
866   basic_block ba, bp = loop->header;
867   bool ok, has_write = false;
868 
869   FOR_EACH_VEC_ELT (comp->refs, i, a)
870     {
871       ba = gimple_bb (a->stmt);
872 
873       if (!just_once_each_iteration_p (loop, ba))
874 	return false;
875 
876       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
877       bp = ba;
878 
879       if (DR_IS_WRITE (a->ref))
880 	has_write = true;
881     }
882 
883   first = comp->refs[0];
884   ok = suitable_reference_p (first->ref, &comp->comp_step);
885   gcc_assert (ok);
886   first->offset = double_int_zero;
887 
888   for (i = 1; comp->refs.iterate (i, &a); i++)
889     {
890       if (!determine_offset (first->ref, a->ref, &a->offset))
891 	return false;
892 
893 #ifdef ENABLE_CHECKING
894       {
895 	enum ref_step_type a_step;
896 	ok = suitable_reference_p (a->ref, &a_step);
897 	gcc_assert (ok && a_step == comp->comp_step);
898       }
899 #endif
900     }
901 
902   /* If there is a write inside the component, we must know whether the
903      step is nonzero or not -- we would not otherwise be able to recognize
904      whether the value accessed by reads comes from the OFFSET-th iteration
905      or the previous one.  */
906   if (has_write && comp->comp_step == RS_ANY)
907     return false;
908 
909   return true;
910 }
911 
912 /* Check the conditions on references inside each of components COMPS,
913    and remove the unsuitable components from the list.  The new list
914    of components is returned.  The conditions are described in 2) at
915    the beginning of this file.  LOOP is the current loop.  */
916 
917 static struct component *
filter_suitable_components(struct loop * loop,struct component * comps)918 filter_suitable_components (struct loop *loop, struct component *comps)
919 {
920   struct component **comp, *act;
921 
922   for (comp = &comps; *comp; )
923     {
924       act = *comp;
925       if (suitable_component_p (loop, act))
926 	comp = &act->next;
927       else
928 	{
929 	  dref ref;
930 	  unsigned i;
931 
932 	  *comp = act->next;
933 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
934 	    free (ref);
935 	  release_component (act);
936 	}
937     }
938 
939   return comps;
940 }
941 
942 /* Compares two drefs A and B by their offset and position.  Callback for
943    qsort.  */
944 
945 static int
order_drefs(const void * a,const void * b)946 order_drefs (const void *a, const void *b)
947 {
948   const dref *const da = (const dref *) a;
949   const dref *const db = (const dref *) b;
950   int offcmp = (*da)->offset.scmp ((*db)->offset);
951 
952   if (offcmp != 0)
953     return offcmp;
954 
955   return (*da)->pos - (*db)->pos;
956 }
957 
958 /* Returns root of the CHAIN.  */
959 
960 static inline dref
get_chain_root(chain_p chain)961 get_chain_root (chain_p chain)
962 {
963   return chain->refs[0];
964 }
965 
966 /* Adds REF to the chain CHAIN.  */
967 
968 static void
add_ref_to_chain(chain_p chain,dref ref)969 add_ref_to_chain (chain_p chain, dref ref)
970 {
971   dref root = get_chain_root (chain);
972   double_int dist;
973 
974   gcc_assert (root->offset.sle (ref->offset));
975   dist = ref->offset - root->offset;
976   if (double_int::from_uhwi (MAX_DISTANCE).ule (dist))
977     {
978       free (ref);
979       return;
980     }
981   gcc_assert (dist.fits_uhwi ());
982 
983   chain->refs.safe_push (ref);
984 
985   ref->distance = dist.to_uhwi ();
986 
987   if (ref->distance >= chain->length)
988     {
989       chain->length = ref->distance;
990       chain->has_max_use_after = false;
991     }
992 
993   if (ref->distance == chain->length
994       && ref->pos > root->pos)
995     chain->has_max_use_after = true;
996 
997   chain->all_always_accessed &= ref->always_accessed;
998 }
999 
1000 /* Returns the chain for invariant component COMP.  */
1001 
1002 static chain_p
make_invariant_chain(struct component * comp)1003 make_invariant_chain (struct component *comp)
1004 {
1005   chain_p chain = XCNEW (struct chain);
1006   unsigned i;
1007   dref ref;
1008 
1009   chain->type = CT_INVARIANT;
1010 
1011   chain->all_always_accessed = true;
1012 
1013   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1014     {
1015       chain->refs.safe_push (ref);
1016       chain->all_always_accessed &= ref->always_accessed;
1017     }
1018 
1019   return chain;
1020 }
1021 
1022 /* Make a new chain rooted at REF.  */
1023 
1024 static chain_p
make_rooted_chain(dref ref)1025 make_rooted_chain (dref ref)
1026 {
1027   chain_p chain = XCNEW (struct chain);
1028 
1029   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1030 
1031   chain->refs.safe_push (ref);
1032   chain->all_always_accessed = ref->always_accessed;
1033 
1034   ref->distance = 0;
1035 
1036   return chain;
1037 }
1038 
1039 /* Returns true if CHAIN is not trivial.  */
1040 
1041 static bool
nontrivial_chain_p(chain_p chain)1042 nontrivial_chain_p (chain_p chain)
1043 {
1044   return chain != NULL && chain->refs.length () > 1;
1045 }
1046 
1047 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1048    is no such name.  */
1049 
1050 static tree
name_for_ref(dref ref)1051 name_for_ref (dref ref)
1052 {
1053   tree name;
1054 
1055   if (is_gimple_assign (ref->stmt))
1056     {
1057       if (!ref->ref || DR_IS_READ (ref->ref))
1058 	name = gimple_assign_lhs (ref->stmt);
1059       else
1060 	name = gimple_assign_rhs1 (ref->stmt);
1061     }
1062   else
1063     name = PHI_RESULT (ref->stmt);
1064 
1065   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1066 }
1067 
1068 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1069    iterations of the innermost enclosing loop).  */
1070 
1071 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1072 valid_initializer_p (struct data_reference *ref,
1073 		     unsigned distance, struct data_reference *root)
1074 {
1075   aff_tree diff, base, step;
1076   double_int off;
1077 
1078   /* Both REF and ROOT must be accessing the same object.  */
1079   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1080     return false;
1081 
1082   /* The initializer is defined outside of loop, hence its address must be
1083      invariant inside the loop.  */
1084   gcc_assert (integer_zerop (DR_STEP (ref)));
1085 
1086   /* If the address of the reference is invariant, initializer must access
1087      exactly the same location.  */
1088   if (integer_zerop (DR_STEP (root)))
1089     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1090 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1091 
1092   /* Verify that this index of REF is equal to the root's index at
1093      -DISTANCE-th iteration.  */
1094   aff_combination_dr_offset (root, &diff);
1095   aff_combination_dr_offset (ref, &base);
1096   aff_combination_scale (&base, double_int_minus_one);
1097   aff_combination_add (&diff, &base);
1098 
1099   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1100 				  &step, &name_expansions);
1101   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1102     return false;
1103 
1104   if (off != double_int::from_uhwi (distance))
1105     return false;
1106 
1107   return true;
1108 }
1109 
1110 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1111    initial value is correct (equal to initial value of REF shifted by one
1112    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1113    is the root of the current chain.  */
1114 
1115 static gimple
find_looparound_phi(struct loop * loop,dref ref,dref root)1116 find_looparound_phi (struct loop *loop, dref ref, dref root)
1117 {
1118   tree name, init, init_ref;
1119   gimple phi = NULL, init_stmt;
1120   edge latch = loop_latch_edge (loop);
1121   struct data_reference init_dr;
1122   gimple_stmt_iterator psi;
1123 
1124   if (is_gimple_assign (ref->stmt))
1125     {
1126       if (DR_IS_READ (ref->ref))
1127 	name = gimple_assign_lhs (ref->stmt);
1128       else
1129 	name = gimple_assign_rhs1 (ref->stmt);
1130     }
1131   else
1132     name = PHI_RESULT (ref->stmt);
1133   if (!name)
1134     return NULL;
1135 
1136   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1137     {
1138       phi = gsi_stmt (psi);
1139       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1140 	break;
1141     }
1142 
1143   if (gsi_end_p (psi))
1144     return NULL;
1145 
1146   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1147   if (TREE_CODE (init) != SSA_NAME)
1148     return NULL;
1149   init_stmt = SSA_NAME_DEF_STMT (init);
1150   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1151     return NULL;
1152   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1153 
1154   init_ref = gimple_assign_rhs1 (init_stmt);
1155   if (!REFERENCE_CLASS_P (init_ref)
1156       && !DECL_P (init_ref))
1157     return NULL;
1158 
1159   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1160      loop enclosing PHI).  */
1161   memset (&init_dr, 0, sizeof (struct data_reference));
1162   DR_REF (&init_dr) = init_ref;
1163   DR_STMT (&init_dr) = phi;
1164   if (!dr_analyze_innermost (&init_dr, loop))
1165     return NULL;
1166 
1167   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1168     return NULL;
1169 
1170   return phi;
1171 }
1172 
1173 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1174 
1175 static void
insert_looparound_copy(chain_p chain,dref ref,gimple phi)1176 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
1177 {
1178   dref nw = XCNEW (struct dref_d), aref;
1179   unsigned i;
1180 
1181   nw->stmt = phi;
1182   nw->distance = ref->distance + 1;
1183   nw->always_accessed = 1;
1184 
1185   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1186     if (aref->distance >= nw->distance)
1187       break;
1188   chain->refs.safe_insert (i, nw);
1189 
1190   if (nw->distance > chain->length)
1191     {
1192       chain->length = nw->distance;
1193       chain->has_max_use_after = false;
1194     }
1195 }
1196 
1197 /* For references in CHAIN that are copied around the LOOP (created previously
1198    by PRE, or by user), add the results of such copies to the chain.  This
1199    enables us to remove the copies by unrolling, and may need less registers
1200    (also, it may allow us to combine chains together).  */
1201 
1202 static void
add_looparound_copies(struct loop * loop,chain_p chain)1203 add_looparound_copies (struct loop *loop, chain_p chain)
1204 {
1205   unsigned i;
1206   dref ref, root = get_chain_root (chain);
1207   gimple phi;
1208 
1209   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1210     {
1211       phi = find_looparound_phi (loop, ref, root);
1212       if (!phi)
1213 	continue;
1214 
1215       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1216       insert_looparound_copy (chain, ref, phi);
1217     }
1218 }
1219 
1220 /* Find roots of the values and determine distances in the component COMP.
1221    The references are redistributed into CHAINS.  LOOP is the current
1222    loop.  */
1223 
1224 static void
determine_roots_comp(struct loop * loop,struct component * comp,vec<chain_p> * chains)1225 determine_roots_comp (struct loop *loop,
1226 		      struct component *comp,
1227 		      vec<chain_p> *chains)
1228 {
1229   unsigned i;
1230   dref a;
1231   chain_p chain = NULL;
1232   double_int last_ofs = double_int_zero;
1233 
1234   /* Invariants are handled specially.  */
1235   if (comp->comp_step == RS_INVARIANT)
1236     {
1237       chain = make_invariant_chain (comp);
1238       chains->safe_push (chain);
1239       return;
1240     }
1241 
1242   comp->refs.qsort (order_drefs);
1243 
1244   FOR_EACH_VEC_ELT (comp->refs, i, a)
1245     {
1246       if (!chain || DR_IS_WRITE (a->ref)
1247 	  || double_int::from_uhwi (MAX_DISTANCE).ule (a->offset - last_ofs))
1248 	{
1249 	  if (nontrivial_chain_p (chain))
1250 	    {
1251 	      add_looparound_copies (loop, chain);
1252 	      chains->safe_push (chain);
1253 	    }
1254 	  else
1255 	    release_chain (chain);
1256 	  chain = make_rooted_chain (a);
1257 	  last_ofs = a->offset;
1258 	  continue;
1259 	}
1260 
1261       add_ref_to_chain (chain, a);
1262     }
1263 
1264   if (nontrivial_chain_p (chain))
1265     {
1266       add_looparound_copies (loop, chain);
1267       chains->safe_push (chain);
1268     }
1269   else
1270     release_chain (chain);
1271 }
1272 
1273 /* Find roots of the values and determine distances in components COMPS, and
1274    separates the references to CHAINS.  LOOP is the current loop.  */
1275 
1276 static void
determine_roots(struct loop * loop,struct component * comps,vec<chain_p> * chains)1277 determine_roots (struct loop *loop,
1278 		 struct component *comps, vec<chain_p> *chains)
1279 {
1280   struct component *comp;
1281 
1282   for (comp = comps; comp; comp = comp->next)
1283     determine_roots_comp (loop, comp, chains);
1284 }
1285 
1286 /* Replace the reference in statement STMT with temporary variable
1287    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1288    the reference in the statement.  IN_LHS is true if the reference
1289    is in the lhs of STMT, false if it is in rhs.  */
1290 
1291 static void
replace_ref_with(gimple stmt,tree new_tree,bool set,bool in_lhs)1292 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1293 {
1294   tree val;
1295   gimple new_stmt;
1296   gimple_stmt_iterator bsi, psi;
1297 
1298   if (gimple_code (stmt) == GIMPLE_PHI)
1299     {
1300       gcc_assert (!in_lhs && !set);
1301 
1302       val = PHI_RESULT (stmt);
1303       bsi = gsi_after_labels (gimple_bb (stmt));
1304       psi = gsi_for_stmt (stmt);
1305       remove_phi_node (&psi, false);
1306 
1307       /* Turn the phi node into GIMPLE_ASSIGN.  */
1308       new_stmt = gimple_build_assign (val, new_tree);
1309       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1310       return;
1311     }
1312 
1313   /* Since the reference is of gimple_reg type, it should only
1314      appear as lhs or rhs of modify statement.  */
1315   gcc_assert (is_gimple_assign (stmt));
1316 
1317   bsi = gsi_for_stmt (stmt);
1318 
1319   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1320   if (!set)
1321     {
1322       gcc_assert (!in_lhs);
1323       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1324       stmt = gsi_stmt (bsi);
1325       update_stmt (stmt);
1326       return;
1327     }
1328 
1329   if (in_lhs)
1330     {
1331       /* We have statement
1332 
1333 	 OLD = VAL
1334 
1335 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1336 	 this to
1337 
1338 	 OLD = VAL
1339 	 NEW = VAL
1340 
1341 	 Otherwise, we are replacing a combination chain,
1342 	 VAL is the expression that performs the combination, and OLD is an
1343 	 SSA name.  In this case, we transform the assignment to
1344 
1345 	 OLD = VAL
1346 	 NEW = OLD
1347 
1348 	 */
1349 
1350       val = gimple_assign_lhs (stmt);
1351       if (TREE_CODE (val) != SSA_NAME)
1352 	{
1353 	  val = gimple_assign_rhs1 (stmt);
1354 	  gcc_assert (gimple_assign_single_p (stmt));
1355 	  if (TREE_CLOBBER_P (val))
1356 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1357 	  else
1358 	    gcc_assert (gimple_assign_copy_p (stmt));
1359 	}
1360     }
1361   else
1362     {
1363       /* VAL = OLD
1364 
1365 	 is transformed to
1366 
1367 	 VAL = OLD
1368 	 NEW = VAL  */
1369 
1370       val = gimple_assign_lhs (stmt);
1371     }
1372 
1373   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1374   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1375 }
1376 
1377 /* Returns a memory reference to DR in the ITER-th iteration of
1378    the loop it was analyzed in.  Append init stmts to STMTS.  */
1379 
1380 static tree
ref_at_iteration(data_reference_p dr,int iter,gimple_seq * stmts)1381 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1382 {
1383   tree off = DR_OFFSET (dr);
1384   tree coff = DR_INIT (dr);
1385   if (iter == 0)
1386     ;
1387   else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1388     coff = size_binop (PLUS_EXPR, coff,
1389 		       size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1390   else
1391     off = size_binop (PLUS_EXPR, off,
1392 		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1393   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1394   addr = force_gimple_operand_1 (addr, stmts, is_gimple_mem_ref_addr,
1395 				 NULL_TREE);
1396   tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1397   /* While data-ref analysis punts on bit offsets it still handles
1398      bitfield accesses at byte boundaries.  Cope with that.  Note that
1399      we cannot simply re-apply the outer COMPONENT_REF because the
1400      byte-granular portion of it is already applied via DR_INIT and
1401      DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1402      start at offset zero.  */
1403   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1404       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1405     {
1406       tree field = TREE_OPERAND (DR_REF (dr), 1);
1407       return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1408 		     build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1409 			     addr, alias_ptr),
1410 		     DECL_SIZE (field), bitsize_zero_node);
1411     }
1412   else
1413     return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1414 }
1415 
1416 /* Get the initialization expression for the INDEX-th temporary variable
1417    of CHAIN.  */
1418 
1419 static tree
get_init_expr(chain_p chain,unsigned index)1420 get_init_expr (chain_p chain, unsigned index)
1421 {
1422   if (chain->type == CT_COMBINATION)
1423     {
1424       tree e1 = get_init_expr (chain->ch1, index);
1425       tree e2 = get_init_expr (chain->ch2, index);
1426 
1427       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1428     }
1429   else
1430     return chain->inits[index];
1431 }
1432 
1433 /* Returns a new temporary variable used for the I-th variable carrying
1434    value of REF.  The variable's uid is marked in TMP_VARS.  */
1435 
1436 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1437 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1438 {
1439   tree type = TREE_TYPE (ref);
1440   /* We never access the components of the temporary variable in predictive
1441      commoning.  */
1442   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1443   bitmap_set_bit (tmp_vars, DECL_UID (var));
1444   return var;
1445 }
1446 
1447 /* Creates the variables for CHAIN, as well as phi nodes for them and
1448    initialization on entry to LOOP.  Uids of the newly created
1449    temporary variables are marked in TMP_VARS.  */
1450 
1451 static void
initialize_root_vars(struct loop * loop,chain_p chain,bitmap tmp_vars)1452 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1453 {
1454   unsigned i;
1455   unsigned n = chain->length;
1456   dref root = get_chain_root (chain);
1457   bool reuse_first = !chain->has_max_use_after;
1458   tree ref, init, var, next;
1459   gimple phi;
1460   gimple_seq stmts;
1461   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462 
1463   /* If N == 0, then all the references are within the single iteration.  And
1464      since this is an nonempty chain, reuse_first cannot be true.  */
1465   gcc_assert (n > 0 || !reuse_first);
1466 
1467   chain->vars.create (n + 1);
1468 
1469   if (chain->type == CT_COMBINATION)
1470     ref = gimple_assign_lhs (root->stmt);
1471   else
1472     ref = DR_REF (root->ref);
1473 
1474   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475     {
1476       var = predcom_tmp_var (ref, i, tmp_vars);
1477       chain->vars.quick_push (var);
1478     }
1479   if (reuse_first)
1480     chain->vars.quick_push (chain->vars[0]);
1481 
1482   FOR_EACH_VEC_ELT (chain->vars, i, var)
1483     chain->vars[i] = make_ssa_name (var, NULL);
1484 
1485   for (i = 0; i < n; i++)
1486     {
1487       var = chain->vars[i];
1488       next = chain->vars[i + 1];
1489       init = get_init_expr (chain, i);
1490 
1491       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492       if (stmts)
1493 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1494 
1495       phi = create_phi_node (var, loop->header);
1496       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1497       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1498     }
1499 }
1500 
1501 /* Create the variables and initialization statement for root of chain
1502    CHAIN.  Uids of the newly created temporary variables are marked
1503    in TMP_VARS.  */
1504 
1505 static void
initialize_root(struct loop * loop,chain_p chain,bitmap tmp_vars)1506 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1507 {
1508   dref root = get_chain_root (chain);
1509   bool in_lhs = (chain->type == CT_STORE_LOAD
1510 		 || chain->type == CT_COMBINATION);
1511 
1512   initialize_root_vars (loop, chain, tmp_vars);
1513   replace_ref_with (root->stmt,
1514 		    chain->vars[chain->length],
1515 		    true, in_lhs);
1516 }
1517 
1518 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1519    initialization on entry to LOOP if necessary.  The ssa name for the variable
1520    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1521    around the loop is created.  Uid of the newly created temporary variable
1522    is marked in TMP_VARS.  INITS is the list containing the (single)
1523    initializer.  */
1524 
1525 static void
initialize_root_vars_lm(struct loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1526 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1527 			 vec<tree> *vars, vec<tree> inits,
1528 			 bitmap tmp_vars)
1529 {
1530   unsigned i;
1531   tree ref = DR_REF (root->ref), init, var, next;
1532   gimple_seq stmts;
1533   gimple phi;
1534   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1535 
1536   /* Find the initializer for the variable, and check that it cannot
1537      trap.  */
1538   init = inits[0];
1539 
1540   vars->create (written ? 2 : 1);
1541   var = predcom_tmp_var (ref, 0, tmp_vars);
1542   vars->quick_push (var);
1543   if (written)
1544     vars->quick_push ((*vars)[0]);
1545 
1546   FOR_EACH_VEC_ELT (*vars, i, var)
1547     (*vars)[i] = make_ssa_name (var, NULL);
1548 
1549   var = (*vars)[0];
1550 
1551   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1552   if (stmts)
1553     gsi_insert_seq_on_edge_immediate (entry, stmts);
1554 
1555   if (written)
1556     {
1557       next = (*vars)[1];
1558       phi = create_phi_node (var, loop->header);
1559       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1560       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1561     }
1562   else
1563     {
1564       gimple init_stmt = gimple_build_assign (var, init);
1565       gsi_insert_on_edge_immediate (entry, init_stmt);
1566     }
1567 }
1568 
1569 
1570 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1571    created temporary variables are marked in TMP_VARS.  */
1572 
1573 static void
execute_load_motion(struct loop * loop,chain_p chain,bitmap tmp_vars)1574 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575 {
1576   auto_vec<tree> vars;
1577   dref a;
1578   unsigned n_writes = 0, ridx, i;
1579   tree var;
1580 
1581   gcc_assert (chain->type == CT_INVARIANT);
1582   gcc_assert (!chain->combined);
1583   FOR_EACH_VEC_ELT (chain->refs, i, a)
1584     if (DR_IS_WRITE (a->ref))
1585       n_writes++;
1586 
1587   /* If there are no reads in the loop, there is nothing to do.  */
1588   if (n_writes == chain->refs.length ())
1589     return;
1590 
1591   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592 			   &vars, chain->inits, tmp_vars);
1593 
1594   ridx = 0;
1595   FOR_EACH_VEC_ELT (chain->refs, i, a)
1596     {
1597       bool is_read = DR_IS_READ (a->ref);
1598 
1599       if (DR_IS_WRITE (a->ref))
1600 	{
1601 	  n_writes--;
1602 	  if (n_writes)
1603 	    {
1604 	      var = vars[0];
1605 	      var = make_ssa_name (SSA_NAME_VAR (var), NULL);
1606 	      vars[0] = var;
1607 	    }
1608 	  else
1609 	    ridx = 1;
1610 	}
1611 
1612       replace_ref_with (a->stmt, vars[ridx],
1613 			!is_read, !is_read);
1614     }
1615 }
1616 
1617 /* Returns the single statement in that NAME is used, excepting
1618    the looparound phi nodes contained in one of the chains.  If there is no
1619    such statement, or more statements, NULL is returned.  */
1620 
1621 static gimple
single_nonlooparound_use(tree name)1622 single_nonlooparound_use (tree name)
1623 {
1624   use_operand_p use;
1625   imm_use_iterator it;
1626   gimple stmt, ret = NULL;
1627 
1628   FOR_EACH_IMM_USE_FAST (use, it, name)
1629     {
1630       stmt = USE_STMT (use);
1631 
1632       if (gimple_code (stmt) == GIMPLE_PHI)
1633 	{
1634 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1635 	     could not be processed anyway, so just fail for them.  */
1636 	  if (bitmap_bit_p (looparound_phis,
1637 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
1638 	    continue;
1639 
1640 	  return NULL;
1641 	}
1642       else if (is_gimple_debug (stmt))
1643 	continue;
1644       else if (ret != NULL)
1645 	return NULL;
1646       else
1647 	ret = stmt;
1648     }
1649 
1650   return ret;
1651 }
1652 
1653 /* Remove statement STMT, as well as the chain of assignments in that it is
1654    used.  */
1655 
1656 static void
remove_stmt(gimple stmt)1657 remove_stmt (gimple stmt)
1658 {
1659   tree name;
1660   gimple next;
1661   gimple_stmt_iterator psi;
1662 
1663   if (gimple_code (stmt) == GIMPLE_PHI)
1664     {
1665       name = PHI_RESULT (stmt);
1666       next = single_nonlooparound_use (name);
1667       reset_debug_uses (stmt);
1668       psi = gsi_for_stmt (stmt);
1669       remove_phi_node (&psi, true);
1670 
1671       if (!next
1672 	  || !gimple_assign_ssa_name_copy_p (next)
1673 	  || gimple_assign_rhs1 (next) != name)
1674 	return;
1675 
1676       stmt = next;
1677     }
1678 
1679   while (1)
1680     {
1681       gimple_stmt_iterator bsi;
1682 
1683       bsi = gsi_for_stmt (stmt);
1684 
1685       name = gimple_assign_lhs (stmt);
1686       gcc_assert (TREE_CODE (name) == SSA_NAME);
1687 
1688       next = single_nonlooparound_use (name);
1689       reset_debug_uses (stmt);
1690 
1691       unlink_stmt_vdef (stmt);
1692       gsi_remove (&bsi, true);
1693       release_defs (stmt);
1694 
1695       if (!next
1696 	  || !gimple_assign_ssa_name_copy_p (next)
1697 	  || gimple_assign_rhs1 (next) != name)
1698 	return;
1699 
1700       stmt = next;
1701     }
1702 }
1703 
1704 /* Perform the predictive commoning optimization for a chain CHAIN.
1705    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1706 
1707 static void
execute_pred_commoning_chain(struct loop * loop,chain_p chain,bitmap tmp_vars)1708 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1709 			     bitmap tmp_vars)
1710 {
1711   unsigned i;
1712   dref a;
1713   tree var;
1714 
1715   if (chain->combined)
1716     {
1717       /* For combined chains, just remove the statements that are used to
1718 	 compute the values of the expression (except for the root one).  */
1719       for (i = 1; chain->refs.iterate (i, &a); i++)
1720 	remove_stmt (a->stmt);
1721     }
1722   else
1723     {
1724       /* For non-combined chains, set up the variables that hold its value,
1725 	 and replace the uses of the original references by these
1726 	 variables.  */
1727       initialize_root (loop, chain, tmp_vars);
1728       for (i = 1; chain->refs.iterate (i, &a); i++)
1729 	{
1730 	  var = chain->vars[chain->length - a->distance];
1731 	  replace_ref_with (a->stmt, var, false, false);
1732 	}
1733     }
1734 }
1735 
1736 /* Determines the unroll factor necessary to remove as many temporary variable
1737    copies as possible.  CHAINS is the list of chains that will be
1738    optimized.  */
1739 
1740 static unsigned
determine_unroll_factor(vec<chain_p> chains)1741 determine_unroll_factor (vec<chain_p> chains)
1742 {
1743   chain_p chain;
1744   unsigned factor = 1, af, nfactor, i;
1745   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1746 
1747   FOR_EACH_VEC_ELT (chains, i, chain)
1748     {
1749       if (chain->type == CT_INVARIANT || chain->combined)
1750 	continue;
1751 
1752       /* The best unroll factor for this chain is equal to the number of
1753 	 temporary variables that we create for it.  */
1754       af = chain->length;
1755       if (chain->has_max_use_after)
1756 	af++;
1757 
1758       nfactor = factor * af / gcd (factor, af);
1759       if (nfactor <= max)
1760 	factor = nfactor;
1761     }
1762 
1763   return factor;
1764 }
1765 
1766 /* Perform the predictive commoning optimization for CHAINS.
1767    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1768 
1769 static void
execute_pred_commoning(struct loop * loop,vec<chain_p> chains,bitmap tmp_vars)1770 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1771 			bitmap tmp_vars)
1772 {
1773   chain_p chain;
1774   unsigned i;
1775 
1776   FOR_EACH_VEC_ELT (chains, i, chain)
1777     {
1778       if (chain->type == CT_INVARIANT)
1779 	execute_load_motion (loop, chain, tmp_vars);
1780       else
1781 	execute_pred_commoning_chain (loop, chain, tmp_vars);
1782     }
1783 
1784   update_ssa (TODO_update_ssa_only_virtuals);
1785 }
1786 
1787 /* For each reference in CHAINS, if its defining statement is
1788    phi node, record the ssa name that is defined by it.  */
1789 
1790 static void
replace_phis_by_defined_names(vec<chain_p> chains)1791 replace_phis_by_defined_names (vec<chain_p> chains)
1792 {
1793   chain_p chain;
1794   dref a;
1795   unsigned i, j;
1796 
1797   FOR_EACH_VEC_ELT (chains, i, chain)
1798     FOR_EACH_VEC_ELT (chain->refs, j, a)
1799       {
1800 	if (gimple_code (a->stmt) == GIMPLE_PHI)
1801 	  {
1802 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
1803 	    a->stmt = NULL;
1804 	  }
1805       }
1806 }
1807 
1808 /* For each reference in CHAINS, if name_defined_by_phi is not
1809    NULL, use it to set the stmt field.  */
1810 
1811 static void
replace_names_by_phis(vec<chain_p> chains)1812 replace_names_by_phis (vec<chain_p> chains)
1813 {
1814   chain_p chain;
1815   dref a;
1816   unsigned i, j;
1817 
1818   FOR_EACH_VEC_ELT (chains, i, chain)
1819     FOR_EACH_VEC_ELT (chain->refs, j, a)
1820       if (a->stmt == NULL)
1821 	{
1822 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1823 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1824 	  a->name_defined_by_phi = NULL_TREE;
1825 	}
1826 }
1827 
1828 /* Wrapper over execute_pred_commoning, to pass it as a callback
1829    to tree_transform_and_unroll_loop.  */
1830 
1831 struct epcc_data
1832 {
1833   vec<chain_p> chains;
1834   bitmap tmp_vars;
1835 };
1836 
1837 static void
execute_pred_commoning_cbck(struct loop * loop,void * data)1838 execute_pred_commoning_cbck (struct loop *loop, void *data)
1839 {
1840   struct epcc_data *const dta = (struct epcc_data *) data;
1841 
1842   /* Restore phi nodes that were replaced by ssa names before
1843      tree_transform_and_unroll_loop (see detailed description in
1844      tree_predictive_commoning_loop).  */
1845   replace_names_by_phis (dta->chains);
1846   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1847 }
1848 
1849 /* Base NAME and all the names in the chain of phi nodes that use it
1850    on variable VAR.  The phi nodes are recognized by being in the copies of
1851    the header of the LOOP.  */
1852 
1853 static void
base_names_in_chain_on(struct loop * loop,tree name,tree var)1854 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1855 {
1856   gimple stmt, phi;
1857   imm_use_iterator iter;
1858 
1859   replace_ssa_name_symbol (name, var);
1860 
1861   while (1)
1862     {
1863       phi = NULL;
1864       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1865 	{
1866 	  if (gimple_code (stmt) == GIMPLE_PHI
1867 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1868 	    {
1869 	      phi = stmt;
1870 	      BREAK_FROM_IMM_USE_STMT (iter);
1871 	    }
1872 	}
1873       if (!phi)
1874 	return;
1875 
1876       name = PHI_RESULT (phi);
1877       replace_ssa_name_symbol (name, var);
1878     }
1879 }
1880 
1881 /* Given an unrolled LOOP after predictive commoning, remove the
1882    register copies arising from phi nodes by changing the base
1883    variables of SSA names.  TMP_VARS is the set of the temporary variables
1884    for those we want to perform this.  */
1885 
1886 static void
eliminate_temp_copies(struct loop * loop,bitmap tmp_vars)1887 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1888 {
1889   edge e;
1890   gimple phi, stmt;
1891   tree name, use, var;
1892   gimple_stmt_iterator psi;
1893 
1894   e = loop_latch_edge (loop);
1895   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1896     {
1897       phi = gsi_stmt (psi);
1898       name = PHI_RESULT (phi);
1899       var = SSA_NAME_VAR (name);
1900       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1901 	continue;
1902       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1903       gcc_assert (TREE_CODE (use) == SSA_NAME);
1904 
1905       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1906       stmt = SSA_NAME_DEF_STMT (use);
1907       while (gimple_code (stmt) == GIMPLE_PHI
1908 	     /* In case we could not unroll the loop enough to eliminate
1909 		all copies, we may reach the loop header before the defining
1910 		statement (in that case, some register copies will be present
1911 		in loop latch in the final code, corresponding to the newly
1912 		created looparound phi nodes).  */
1913 	     && gimple_bb (stmt) != loop->header)
1914 	{
1915 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
1916 	  use = PHI_ARG_DEF (stmt, 0);
1917 	  stmt = SSA_NAME_DEF_STMT (use);
1918 	}
1919 
1920       base_names_in_chain_on (loop, use, var);
1921     }
1922 }
1923 
1924 /* Returns true if CHAIN is suitable to be combined.  */
1925 
1926 static bool
chain_can_be_combined_p(chain_p chain)1927 chain_can_be_combined_p (chain_p chain)
1928 {
1929   return (!chain->combined
1930 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1931 }
1932 
1933 /* Returns the modify statement that uses NAME.  Skips over assignment
1934    statements, NAME is replaced with the actual name used in the returned
1935    statement.  */
1936 
1937 static gimple
find_use_stmt(tree * name)1938 find_use_stmt (tree *name)
1939 {
1940   gimple stmt;
1941   tree rhs, lhs;
1942 
1943   /* Skip over assignments.  */
1944   while (1)
1945     {
1946       stmt = single_nonlooparound_use (*name);
1947       if (!stmt)
1948 	return NULL;
1949 
1950       if (gimple_code (stmt) != GIMPLE_ASSIGN)
1951 	return NULL;
1952 
1953       lhs = gimple_assign_lhs (stmt);
1954       if (TREE_CODE (lhs) != SSA_NAME)
1955 	return NULL;
1956 
1957       if (gimple_assign_copy_p (stmt))
1958 	{
1959 	  rhs = gimple_assign_rhs1 (stmt);
1960 	  if (rhs != *name)
1961 	    return NULL;
1962 
1963 	  *name = lhs;
1964 	}
1965       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1966 	       == GIMPLE_BINARY_RHS)
1967 	return stmt;
1968       else
1969 	return NULL;
1970     }
1971 }
1972 
1973 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
1974 
1975 static bool
may_reassociate_p(tree type,enum tree_code code)1976 may_reassociate_p (tree type, enum tree_code code)
1977 {
1978   if (FLOAT_TYPE_P (type)
1979       && !flag_unsafe_math_optimizations)
1980     return false;
1981 
1982   return (commutative_tree_code (code)
1983 	  && associative_tree_code (code));
1984 }
1985 
1986 /* If the operation used in STMT is associative and commutative, go through the
1987    tree of the same operations and returns its root.  Distance to the root
1988    is stored in DISTANCE.  */
1989 
1990 static gimple
find_associative_operation_root(gimple stmt,unsigned * distance)1991 find_associative_operation_root (gimple stmt, unsigned *distance)
1992 {
1993   tree lhs;
1994   gimple next;
1995   enum tree_code code = gimple_assign_rhs_code (stmt);
1996   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1997   unsigned dist = 0;
1998 
1999   if (!may_reassociate_p (type, code))
2000     return NULL;
2001 
2002   while (1)
2003     {
2004       lhs = gimple_assign_lhs (stmt);
2005       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2006 
2007       next = find_use_stmt (&lhs);
2008       if (!next
2009 	  || gimple_assign_rhs_code (next) != code)
2010 	break;
2011 
2012       stmt = next;
2013       dist++;
2014     }
2015 
2016   if (distance)
2017     *distance = dist;
2018   return stmt;
2019 }
2020 
2021 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2022    is no such statement, returns NULL_TREE.  In case the operation used on
2023    NAME1 and NAME2 is associative and commutative, returns the root of the
2024    tree formed by this operation instead of the statement that uses NAME1 or
2025    NAME2.  */
2026 
2027 static gimple
find_common_use_stmt(tree * name1,tree * name2)2028 find_common_use_stmt (tree *name1, tree *name2)
2029 {
2030   gimple stmt1, stmt2;
2031 
2032   stmt1 = find_use_stmt (name1);
2033   if (!stmt1)
2034     return NULL;
2035 
2036   stmt2 = find_use_stmt (name2);
2037   if (!stmt2)
2038     return NULL;
2039 
2040   if (stmt1 == stmt2)
2041     return stmt1;
2042 
2043   stmt1 = find_associative_operation_root (stmt1, NULL);
2044   if (!stmt1)
2045     return NULL;
2046   stmt2 = find_associative_operation_root (stmt2, NULL);
2047   if (!stmt2)
2048     return NULL;
2049 
2050   return (stmt1 == stmt2 ? stmt1 : NULL);
2051 }
2052 
2053 /* Checks whether R1 and R2 are combined together using CODE, with the result
2054    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2055    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2056 
2057 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2058 combinable_refs_p (dref r1, dref r2,
2059 		   enum tree_code *code, bool *swap, tree *rslt_type)
2060 {
2061   enum tree_code acode;
2062   bool aswap;
2063   tree atype;
2064   tree name1, name2;
2065   gimple stmt;
2066 
2067   name1 = name_for_ref (r1);
2068   name2 = name_for_ref (r2);
2069   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2070 
2071   stmt = find_common_use_stmt (&name1, &name2);
2072 
2073   if (!stmt
2074       /* A simple post-dominance check - make sure the combination
2075          is executed under the same condition as the references.  */
2076       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2077 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2078     return false;
2079 
2080   acode = gimple_assign_rhs_code (stmt);
2081   aswap = (!commutative_tree_code (acode)
2082 	   && gimple_assign_rhs1 (stmt) != name1);
2083   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2084 
2085   if (*code == ERROR_MARK)
2086     {
2087       *code = acode;
2088       *swap = aswap;
2089       *rslt_type = atype;
2090       return true;
2091     }
2092 
2093   return (*code == acode
2094 	  && *swap == aswap
2095 	  && *rslt_type == atype);
2096 }
2097 
2098 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2099    an assignment of the remaining operand.  */
2100 
2101 static void
remove_name_from_operation(gimple stmt,tree op)2102 remove_name_from_operation (gimple stmt, tree op)
2103 {
2104   tree other_op;
2105   gimple_stmt_iterator si;
2106 
2107   gcc_assert (is_gimple_assign (stmt));
2108 
2109   if (gimple_assign_rhs1 (stmt) == op)
2110     other_op = gimple_assign_rhs2 (stmt);
2111   else
2112     other_op = gimple_assign_rhs1 (stmt);
2113 
2114   si = gsi_for_stmt (stmt);
2115   gimple_assign_set_rhs_from_tree (&si, other_op);
2116 
2117   /* We should not have reallocated STMT.  */
2118   gcc_assert (gsi_stmt (si) == stmt);
2119 
2120   update_stmt (stmt);
2121 }
2122 
2123 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2124    are combined in a single statement, and returns this statement.  */
2125 
2126 static gimple
reassociate_to_the_same_stmt(tree name1,tree name2)2127 reassociate_to_the_same_stmt (tree name1, tree name2)
2128 {
2129   gimple stmt1, stmt2, root1, root2, s1, s2;
2130   gimple new_stmt, tmp_stmt;
2131   tree new_name, tmp_name, var, r1, r2;
2132   unsigned dist1, dist2;
2133   enum tree_code code;
2134   tree type = TREE_TYPE (name1);
2135   gimple_stmt_iterator bsi;
2136 
2137   stmt1 = find_use_stmt (&name1);
2138   stmt2 = find_use_stmt (&name2);
2139   root1 = find_associative_operation_root (stmt1, &dist1);
2140   root2 = find_associative_operation_root (stmt2, &dist2);
2141   code = gimple_assign_rhs_code (stmt1);
2142 
2143   gcc_assert (root1 && root2 && root1 == root2
2144 	      && code == gimple_assign_rhs_code (stmt2));
2145 
2146   /* Find the root of the nearest expression in that both NAME1 and NAME2
2147      are used.  */
2148   r1 = name1;
2149   s1 = stmt1;
2150   r2 = name2;
2151   s2 = stmt2;
2152 
2153   while (dist1 > dist2)
2154     {
2155       s1 = find_use_stmt (&r1);
2156       r1 = gimple_assign_lhs (s1);
2157       dist1--;
2158     }
2159   while (dist2 > dist1)
2160     {
2161       s2 = find_use_stmt (&r2);
2162       r2 = gimple_assign_lhs (s2);
2163       dist2--;
2164     }
2165 
2166   while (s1 != s2)
2167     {
2168       s1 = find_use_stmt (&r1);
2169       r1 = gimple_assign_lhs (s1);
2170       s2 = find_use_stmt (&r2);
2171       r2 = gimple_assign_lhs (s2);
2172     }
2173 
2174   /* Remove NAME1 and NAME2 from the statements in that they are used
2175      currently.  */
2176   remove_name_from_operation (stmt1, name1);
2177   remove_name_from_operation (stmt2, name2);
2178 
2179   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2180      combine it with the rhs of S1.  */
2181   var = create_tmp_reg (type, "predreastmp");
2182   new_name = make_ssa_name (var, NULL);
2183   new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
2184 
2185   var = create_tmp_reg (type, "predreastmp");
2186   tmp_name = make_ssa_name (var, NULL);
2187 
2188   /* Rhs of S1 may now be either a binary expression with operation
2189      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2190      so that name1 or name2 was removed from it).  */
2191   tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
2192 					   tmp_name,
2193 					   gimple_assign_rhs1 (s1),
2194 					   gimple_assign_rhs2 (s1));
2195 
2196   bsi = gsi_for_stmt (s1);
2197   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2198   s1 = gsi_stmt (bsi);
2199   update_stmt (s1);
2200 
2201   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2202   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2203 
2204   return new_stmt;
2205 }
2206 
2207 /* Returns the statement that combines references R1 and R2.  In case R1
2208    and R2 are not used in the same statement, but they are used with an
2209    associative and commutative operation in the same expression, reassociate
2210    the expression so that they are used in the same statement.  */
2211 
2212 static gimple
stmt_combining_refs(dref r1,dref r2)2213 stmt_combining_refs (dref r1, dref r2)
2214 {
2215   gimple stmt1, stmt2;
2216   tree name1 = name_for_ref (r1);
2217   tree name2 = name_for_ref (r2);
2218 
2219   stmt1 = find_use_stmt (&name1);
2220   stmt2 = find_use_stmt (&name2);
2221   if (stmt1 == stmt2)
2222     return stmt1;
2223 
2224   return reassociate_to_the_same_stmt (name1, name2);
2225 }
2226 
2227 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2228    description of the new chain is returned, otherwise we return NULL.  */
2229 
2230 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2231 combine_chains (chain_p ch1, chain_p ch2)
2232 {
2233   dref r1, r2, nw;
2234   enum tree_code op = ERROR_MARK;
2235   bool swap = false;
2236   chain_p new_chain;
2237   unsigned i;
2238   gimple root_stmt;
2239   tree rslt_type = NULL_TREE;
2240 
2241   if (ch1 == ch2)
2242     return NULL;
2243   if (ch1->length != ch2->length)
2244     return NULL;
2245 
2246   if (ch1->refs.length () != ch2->refs.length ())
2247     return NULL;
2248 
2249   for (i = 0; (ch1->refs.iterate (i, &r1)
2250 	       && ch2->refs.iterate (i, &r2)); i++)
2251     {
2252       if (r1->distance != r2->distance)
2253 	return NULL;
2254 
2255       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2256 	return NULL;
2257     }
2258 
2259   if (swap)
2260     {
2261       chain_p tmp = ch1;
2262       ch1 = ch2;
2263       ch2 = tmp;
2264     }
2265 
2266   new_chain = XCNEW (struct chain);
2267   new_chain->type = CT_COMBINATION;
2268   new_chain->op = op;
2269   new_chain->ch1 = ch1;
2270   new_chain->ch2 = ch2;
2271   new_chain->rslt_type = rslt_type;
2272   new_chain->length = ch1->length;
2273 
2274   for (i = 0; (ch1->refs.iterate (i, &r1)
2275 	       && ch2->refs.iterate (i, &r2)); i++)
2276     {
2277       nw = XCNEW (struct dref_d);
2278       nw->stmt = stmt_combining_refs (r1, r2);
2279       nw->distance = r1->distance;
2280 
2281       new_chain->refs.safe_push (nw);
2282     }
2283 
2284   new_chain->has_max_use_after = false;
2285   root_stmt = get_chain_root (new_chain)->stmt;
2286   for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2287     {
2288       if (nw->distance == new_chain->length
2289 	  && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2290 	{
2291 	  new_chain->has_max_use_after = true;
2292 	  break;
2293 	}
2294     }
2295 
2296   ch1->combined = true;
2297   ch2->combined = true;
2298   return new_chain;
2299 }
2300 
2301 /* Try to combine the CHAINS.  */
2302 
2303 static void
try_combine_chains(vec<chain_p> * chains)2304 try_combine_chains (vec<chain_p> *chains)
2305 {
2306   unsigned i, j;
2307   chain_p ch1, ch2, cch;
2308   auto_vec<chain_p> worklist;
2309 
2310   FOR_EACH_VEC_ELT (*chains, i, ch1)
2311     if (chain_can_be_combined_p (ch1))
2312       worklist.safe_push (ch1);
2313 
2314   while (!worklist.is_empty ())
2315     {
2316       ch1 = worklist.pop ();
2317       if (!chain_can_be_combined_p (ch1))
2318 	continue;
2319 
2320       FOR_EACH_VEC_ELT (*chains, j, ch2)
2321 	{
2322 	  if (!chain_can_be_combined_p (ch2))
2323 	    continue;
2324 
2325 	  cch = combine_chains (ch1, ch2);
2326 	  if (cch)
2327 	    {
2328 	      worklist.safe_push (cch);
2329 	      chains->safe_push (cch);
2330 	      break;
2331 	    }
2332 	}
2333     }
2334 }
2335 
2336 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2337    impossible because one of these initializers may trap, true otherwise.  */
2338 
2339 static bool
prepare_initializers_chain(struct loop * loop,chain_p chain)2340 prepare_initializers_chain (struct loop *loop, chain_p chain)
2341 {
2342   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2343   struct data_reference *dr = get_chain_root (chain)->ref;
2344   tree init;
2345   gimple_seq stmts;
2346   dref laref;
2347   edge entry = loop_preheader_edge (loop);
2348 
2349   /* Find the initializers for the variables, and check that they cannot
2350      trap.  */
2351   chain->inits.create (n);
2352   for (i = 0; i < n; i++)
2353     chain->inits.quick_push (NULL_TREE);
2354 
2355   /* If we have replaced some looparound phi nodes, use their initializers
2356      instead of creating our own.  */
2357   FOR_EACH_VEC_ELT (chain->refs, i, laref)
2358     {
2359       if (gimple_code (laref->stmt) != GIMPLE_PHI)
2360 	continue;
2361 
2362       gcc_assert (laref->distance > 0);
2363       chain->inits[n - laref->distance]
2364 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2365     }
2366 
2367   for (i = 0; i < n; i++)
2368     {
2369       if (chain->inits[i] != NULL_TREE)
2370 	continue;
2371 
2372       init = ref_at_iteration (dr, (int) i - n, &stmts);
2373       if (!chain->all_always_accessed && tree_could_trap_p (init))
2374 	return false;
2375 
2376       if (stmts)
2377 	gsi_insert_seq_on_edge_immediate (entry, stmts);
2378 
2379       chain->inits[i] = init;
2380     }
2381 
2382   return true;
2383 }
2384 
2385 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2386    be used because the initializers might trap.  */
2387 
2388 static void
prepare_initializers(struct loop * loop,vec<chain_p> chains)2389 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2390 {
2391   chain_p chain;
2392   unsigned i;
2393 
2394   for (i = 0; i < chains.length (); )
2395     {
2396       chain = chains[i];
2397       if (prepare_initializers_chain (loop, chain))
2398 	i++;
2399       else
2400 	{
2401 	  release_chain (chain);
2402 	  chains.unordered_remove (i);
2403 	}
2404     }
2405 }
2406 
2407 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2408    unrolled.  */
2409 
2410 static bool
tree_predictive_commoning_loop(struct loop * loop)2411 tree_predictive_commoning_loop (struct loop *loop)
2412 {
2413   vec<data_reference_p> datarefs;
2414   vec<ddr_p> dependences;
2415   struct component *components;
2416   vec<chain_p> chains = vNULL;
2417   unsigned unroll_factor;
2418   struct tree_niter_desc desc;
2419   bool unroll = false;
2420   edge exit;
2421   bitmap tmp_vars;
2422 
2423   if (dump_file && (dump_flags & TDF_DETAILS))
2424     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2425 
2426   /* Find the data references and split them into components according to their
2427      dependence relations.  */
2428   auto_vec<loop_p, 3> loop_nest;
2429   dependences.create (10);
2430   datarefs.create (10);
2431   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2432 					   &dependences))
2433     {
2434       if (dump_file && (dump_flags & TDF_DETAILS))
2435 	fprintf (dump_file, "Cannot analyze data dependencies\n");
2436       free_data_refs (datarefs);
2437       free_dependence_relations (dependences);
2438       return false;
2439     }
2440 
2441   if (dump_file && (dump_flags & TDF_DETAILS))
2442     dump_data_dependence_relations (dump_file, dependences);
2443 
2444   components = split_data_refs_to_components (loop, datarefs, dependences);
2445   loop_nest.release ();
2446   free_dependence_relations (dependences);
2447   if (!components)
2448     {
2449       free_data_refs (datarefs);
2450       free_affine_expand_cache (&name_expansions);
2451       return false;
2452     }
2453 
2454   if (dump_file && (dump_flags & TDF_DETAILS))
2455     {
2456       fprintf (dump_file, "Initial state:\n\n");
2457       dump_components (dump_file, components);
2458     }
2459 
2460   /* Find the suitable components and split them into chains.  */
2461   components = filter_suitable_components (loop, components);
2462 
2463   tmp_vars = BITMAP_ALLOC (NULL);
2464   looparound_phis = BITMAP_ALLOC (NULL);
2465   determine_roots (loop, components, &chains);
2466   release_components (components);
2467 
2468   if (!chains.exists ())
2469     {
2470       if (dump_file && (dump_flags & TDF_DETAILS))
2471 	fprintf (dump_file,
2472 		 "Predictive commoning failed: no suitable chains\n");
2473       goto end;
2474     }
2475   prepare_initializers (loop, chains);
2476 
2477   /* Try to combine the chains that are always worked with together.  */
2478   try_combine_chains (&chains);
2479 
2480   if (dump_file && (dump_flags & TDF_DETAILS))
2481     {
2482       fprintf (dump_file, "Before commoning:\n\n");
2483       dump_chains (dump_file, chains);
2484     }
2485 
2486   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2487      that its number of iterations is divisible by the factor.  */
2488   unroll_factor = determine_unroll_factor (chains);
2489   scev_reset ();
2490   unroll = (unroll_factor > 1
2491 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
2492   exit = single_dom_exit (loop);
2493 
2494   /* Execute the predictive commoning transformations, and possibly unroll the
2495      loop.  */
2496   if (unroll)
2497     {
2498       struct epcc_data dta;
2499 
2500       if (dump_file && (dump_flags & TDF_DETAILS))
2501 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2502 
2503       dta.chains = chains;
2504       dta.tmp_vars = tmp_vars;
2505 
2506       update_ssa (TODO_update_ssa_only_virtuals);
2507 
2508       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2509 	 execute_pred_commoning_cbck is called may cause phi nodes to be
2510 	 reallocated, which is a problem since CHAINS may point to these
2511 	 statements.  To fix this, we store the ssa names defined by the
2512 	 phi nodes here instead of the phi nodes themselves, and restore
2513 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2514       replace_phis_by_defined_names (chains);
2515 
2516       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2517 				      execute_pred_commoning_cbck, &dta);
2518       eliminate_temp_copies (loop, tmp_vars);
2519     }
2520   else
2521     {
2522       if (dump_file && (dump_flags & TDF_DETAILS))
2523 	fprintf (dump_file,
2524 		 "Executing predictive commoning without unrolling.\n");
2525       execute_pred_commoning (loop, chains, tmp_vars);
2526     }
2527 
2528 end: ;
2529   release_chains (chains);
2530   free_data_refs (datarefs);
2531   BITMAP_FREE (tmp_vars);
2532   BITMAP_FREE (looparound_phis);
2533 
2534   free_affine_expand_cache (&name_expansions);
2535 
2536   return unroll;
2537 }
2538 
2539 /* Runs predictive commoning.  */
2540 
2541 unsigned
tree_predictive_commoning(void)2542 tree_predictive_commoning (void)
2543 {
2544   bool unrolled = false;
2545   struct loop *loop;
2546   unsigned ret = 0;
2547 
2548   initialize_original_copy_tables ();
2549   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2550     if (optimize_loop_for_speed_p (loop))
2551       {
2552 	unrolled |= tree_predictive_commoning_loop (loop);
2553       }
2554 
2555   if (unrolled)
2556     {
2557       scev_reset ();
2558       ret = TODO_cleanup_cfg;
2559     }
2560   free_original_copy_tables ();
2561 
2562   return ret;
2563 }
2564 
2565 /* Predictive commoning Pass.  */
2566 
2567 static unsigned
run_tree_predictive_commoning(void)2568 run_tree_predictive_commoning (void)
2569 {
2570   if (!current_loops)
2571     return 0;
2572 
2573   return tree_predictive_commoning ();
2574 }
2575 
2576 static bool
gate_tree_predictive_commoning(void)2577 gate_tree_predictive_commoning (void)
2578 {
2579   return flag_predictive_commoning != 0;
2580 }
2581 
2582 namespace {
2583 
2584 const pass_data pass_data_predcom =
2585 {
2586   GIMPLE_PASS, /* type */
2587   "pcom", /* name */
2588   OPTGROUP_LOOP, /* optinfo_flags */
2589   true, /* has_gate */
2590   true, /* has_execute */
2591   TV_PREDCOM, /* tv_id */
2592   PROP_cfg, /* properties_required */
2593   0, /* properties_provided */
2594   0, /* properties_destroyed */
2595   0, /* todo_flags_start */
2596   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2597 };
2598 
2599 class pass_predcom : public gimple_opt_pass
2600 {
2601 public:
pass_predcom(gcc::context * ctxt)2602   pass_predcom (gcc::context *ctxt)
2603     : gimple_opt_pass (pass_data_predcom, ctxt)
2604   {}
2605 
2606   /* opt_pass methods: */
gate()2607   bool gate () { return gate_tree_predictive_commoning (); }
execute()2608   unsigned int execute () { return run_tree_predictive_commoning (); }
2609 
2610 }; // class pass_predcom
2611 
2612 } // anon namespace
2613 
2614 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)2615 make_pass_predcom (gcc::context *ctxt)
2616 {
2617   return new pass_predcom (ctxt);
2618 }
2619 
2620 
2621