1 /* Predictive commoning.
2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file implements the predictive commoning optimization.  Predictive
21    commoning can be viewed as CSE around a loop, and with some improvements,
22    as generalized strength reduction-- i.e., reusing values computed in
23    earlier iterations of a loop in the later ones.  So far, the pass only
24    handles the most useful case, that is, reusing values of memory references.
25    If you think this is all just a special case of PRE, you are sort of right;
26    however, concentrating on loops is simpler, and makes it possible to
27    incorporate data dependence analysis to detect the opportunities, perform
28    loop unrolling to avoid copies together with renaming immediately,
29    and if needed, we could also take register pressure into account.
30 
31    Let us demonstrate what is done on an example:
32 
33    for (i = 0; i < 100; i++)
34      {
35        a[i+2] = a[i] + a[i+1];
36        b[10] = b[10] + i;
37        c[i] = c[99 - i];
38        d[i] = d[i + 1];
39      }
40 
41    1) We find data references in the loop, and split them to mutually
42       independent groups (i.e., we find components of a data dependence
43       graph).  We ignore read-read dependences whose distance is not constant.
44       (TODO -- we could also ignore antidependences).  In this example, we
45       find the following groups:
46 
47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
48       b[10]{read}, b[10]{write}
49       c[99 - i]{read}, c[i]{write}
50       d[i + 1]{read}, d[i]{write}
51 
52    2) Inside each of the group, we verify several conditions:
53       a) all the references must differ in indices only, and the indices
54 	 must all have the same step
55       b) the references must dominate loop latch (and thus, they must be
56 	 ordered by dominance relation).
57       c) the distance of the indices must be a small multiple of the step
58       We are then able to compute the difference of the references (# of
59       iterations before they point to the same place as the first of them).
60       Also, in case there are writes in the loop, we split the groups into
61       chains whose head is the write whose values are used by the reads in
62       the same chain.  The chains are then processed independently,
63       making the further transformations simpler.  Also, the shorter chains
64       need the same number of registers, but may require lower unrolling
65       factor in order to get rid of the copies on the loop latch.
66 
67       In our example, we get the following chains (the chain for c is invalid).
68 
69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70       b[10]{read,+0}, b[10]{write,+0}
71       d[i + 1]{read,+0}, d[i]{write,+1}
72 
73    3) For each read, we determine the read or write whose value it reuses,
74       together with the distance of this reuse.  I.e. we take the last
75       reference before it with distance 0, or the last of the references
76       with the smallest positive distance to the read.  Then, we remove
77       the references that are not used in any of these chains, discard the
78       empty groups, and propagate all the links so that they point to the
79       single root reference of the chain (adjusting their distance
80       appropriately).  Some extra care needs to be taken for references with
81       step 0.  In our example (the numbers indicate the distance of the
82       reuse),
83 
84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85       b[10] --> (*) 1, b[10] (*)
86 
87    4) The chains are combined together if possible.  If the corresponding
88       elements of two chains are always combined together with the same
89       operator, we remember just the result of this combination, instead
90       of remembering the values separately.  We may need to perform
91       reassociation to enable combining, for example
92 
93       e[i] + f[i+1] + e[i+1] + f[i]
94 
95       can be reassociated as
96 
97       (e[i] + f[i]) + (e[i+1] + f[i+1])
98 
99       and we can combine the chains for e and f into one chain.
100 
101    5) For each root reference (end of the chain) R, let N be maximum distance
102       of a reference reusing its value.  Variables R0 up to RN are created,
103       together with phi nodes that transfer values from R1 .. RN to
104       R0 .. R(N-1).
105       Initial values are loaded to R0..R(N-1) (in case not all references
106       must necessarily be accessed and they may trap, we may fail here;
107       TODO sometimes, the loads could be guarded by a check for the number
108       of iterations).  Values loaded/stored in roots are also copied to
109       RN.  Other reads are replaced with the appropriate variable Ri.
110       Everything is put to SSA form.
111 
112       As a small improvement, if R0 is dead after the root (i.e., all uses of
113       the value with the maximum distance dominate the root), we can avoid
114       creating RN and use R0 instead of it.
115 
116       In our example, we get (only the parts concerning a and b are shown):
117       for (i = 0; i < 100; i++)
118 	{
119 	  f = phi (a[0], s);
120 	  s = phi (a[1], f);
121 	  x = phi (b[10], x);
122 
123 	  f = f + s;
124 	  a[i+2] = f;
125 	  x = x + i;
126 	  b[10] = x;
127 	}
128 
129    6) Factor F for unrolling is determined as the smallest common multiple of
130       (N + 1) for each root reference (N for references for that we avoided
131       creating RN).  If F and the loop is small enough, loop is unrolled F
132       times.  The stores to RN (R0) in the copies of the loop body are
133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134       be coalesced and the copies can be eliminated.
135 
136       TODO -- copy propagation and other optimizations may change the live
137       ranges of the temporary registers and prevent them from being coalesced;
138       this may increase the register pressure.
139 
140       In our case, F = 2 and the (main loop of the) result is
141 
142       for (i = 0; i < ...; i += 2)
143         {
144           f = phi (a[0], f);
145           s = phi (a[1], s);
146           x = phi (b[10], x);
147 
148           f = f + s;
149           a[i+2] = f;
150           x = x + i;
151           b[10] = x;
152 
153           s = s + f;
154           a[i+3] = s;
155           x = x + i;
156           b[10] = x;
157        }
158 
159    TODO -- stores killing other stores can be taken into account, e.g.,
160    for (i = 0; i < n; i++)
161      {
162        a[i] = 1;
163        a[i+2] = 2;
164      }
165 
166    can be replaced with
167 
168    t0 = a[0];
169    t1 = a[1];
170    for (i = 0; i < n; i++)
171      {
172        a[i] = 1;
173        t2 = 2;
174        t0 = t1;
175        t1 = t2;
176      }
177    a[n] = t0;
178    a[n+1] = t1;
179 
180    The interesting part is that this would generalize store motion; still, since
181    sm is performed elsewhere, it does not seem that important.
182 
183    Predictive commoning can be generalized for arbitrary computations (not
184    just memory loads), and also nontrivial transfer functions (e.g., replacing
185    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
186 
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "gimple-pretty-print.h"
198 #include "alias.h"
199 #include "fold-const.h"
200 #include "cfgloop.h"
201 #include "tree-eh.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
214 #include "params.h"
215 #include "tree-affine.h"
216 #include "builtins.h"
217 
218 /* The maximum number of iterations between the considered memory
219    references.  */
220 
221 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222 
223 /* Data references (or phi nodes that carry data reference values across
224    loop iterations).  */
225 
226 typedef struct dref_d
227 {
228   /* The reference itself.  */
229   struct data_reference *ref;
230 
231   /* The statement in that the reference appears.  */
232   gimple *stmt;
233 
234   /* In case that STMT is a phi node, this field is set to the SSA name
235      defined by it in replace_phis_by_defined_names (in order to avoid
236      pointing to phi node that got reallocated in the meantime).  */
237   tree name_defined_by_phi;
238 
239   /* Distance of the reference from the root of the chain (in number of
240      iterations of the loop).  */
241   unsigned distance;
242 
243   /* Number of iterations offset from the first reference in the component.  */
244   widest_int offset;
245 
246   /* Number of the reference in a component, in dominance ordering.  */
247   unsigned pos;
248 
249   /* True if the memory reference is always accessed when the loop is
250      entered.  */
251   unsigned always_accessed : 1;
252 } *dref;
253 
254 
255 /* Type of the chain of the references.  */
256 
257 enum chain_type
258 {
259   /* The addresses of the references in the chain are constant.  */
260   CT_INVARIANT,
261 
262   /* There are only loads in the chain.  */
263   CT_LOAD,
264 
265   /* Root of the chain is store, the rest are loads.  */
266   CT_STORE_LOAD,
267 
268   /* A combination of two chains.  */
269   CT_COMBINATION
270 };
271 
272 /* Chains of data references.  */
273 
274 typedef struct chain
275 {
276   /* Type of the chain.  */
277   enum chain_type type;
278 
279   /* For combination chains, the operator and the two chains that are
280      combined, and the type of the result.  */
281   enum tree_code op;
282   tree rslt_type;
283   struct chain *ch1, *ch2;
284 
285   /* The references in the chain.  */
286   vec<dref> refs;
287 
288   /* The maximum distance of the reference in the chain from the root.  */
289   unsigned length;
290 
291   /* The variables used to copy the value throughout iterations.  */
292   vec<tree> vars;
293 
294   /* Initializers for the variables.  */
295   vec<tree> inits;
296 
297   /* True if there is a use of a variable with the maximal distance
298      that comes after the root in the loop.  */
299   unsigned has_max_use_after : 1;
300 
301   /* True if all the memory references in the chain are always accessed.  */
302   unsigned all_always_accessed : 1;
303 
304   /* True if this chain was combined together with some other chain.  */
305   unsigned combined : 1;
306 } *chain_p;
307 
308 
309 /* Describes the knowledge about the step of the memory references in
310    the component.  */
311 
312 enum ref_step_type
313 {
314   /* The step is zero.  */
315   RS_INVARIANT,
316 
317   /* The step is nonzero.  */
318   RS_NONZERO,
319 
320   /* The step may or may not be nonzero.  */
321   RS_ANY
322 };
323 
324 /* Components of the data dependence graph.  */
325 
326 struct component
327 {
328   /* The references in the component.  */
329   vec<dref> refs;
330 
331   /* What we know about the step of the references in the component.  */
332   enum ref_step_type comp_step;
333 
334   /* Next component in the list.  */
335   struct component *next;
336 };
337 
338 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
339 
340 static bitmap looparound_phis;
341 
342 /* Cache used by tree_to_aff_combination_expand.  */
343 
344 static hash_map<tree, name_expansion *> *name_expansions;
345 
346 /* Dumps data reference REF to FILE.  */
347 
348 extern void dump_dref (FILE *, dref);
349 void
dump_dref(FILE * file,dref ref)350 dump_dref (FILE *file, dref ref)
351 {
352   if (ref->ref)
353     {
354       fprintf (file, "    ");
355       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
356       fprintf (file, " (id %u%s)\n", ref->pos,
357 	       DR_IS_READ (ref->ref) ? "" : ", write");
358 
359       fprintf (file, "      offset ");
360       print_decs (ref->offset, file);
361       fprintf (file, "\n");
362 
363       fprintf (file, "      distance %u\n", ref->distance);
364     }
365   else
366     {
367       if (gimple_code (ref->stmt) == GIMPLE_PHI)
368 	fprintf (file, "    looparound ref\n");
369       else
370 	fprintf (file, "    combination ref\n");
371       fprintf (file, "      in statement ");
372       print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
373       fprintf (file, "\n");
374       fprintf (file, "      distance %u\n", ref->distance);
375     }
376 
377 }
378 
379 /* Dumps CHAIN to FILE.  */
380 
381 extern void dump_chain (FILE *, chain_p);
382 void
dump_chain(FILE * file,chain_p chain)383 dump_chain (FILE *file, chain_p chain)
384 {
385   dref a;
386   const char *chain_type;
387   unsigned i;
388   tree var;
389 
390   switch (chain->type)
391     {
392     case CT_INVARIANT:
393       chain_type = "Load motion";
394       break;
395 
396     case CT_LOAD:
397       chain_type = "Loads-only";
398       break;
399 
400     case CT_STORE_LOAD:
401       chain_type = "Store-loads";
402       break;
403 
404     case CT_COMBINATION:
405       chain_type = "Combination";
406       break;
407 
408     default:
409       gcc_unreachable ();
410     }
411 
412   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
413 	   chain->combined ? " (combined)" : "");
414   if (chain->type != CT_INVARIANT)
415     fprintf (file, "  max distance %u%s\n", chain->length,
416 	     chain->has_max_use_after ? "" : ", may reuse first");
417 
418   if (chain->type == CT_COMBINATION)
419     {
420       fprintf (file, "  equal to %p %s %p in type ",
421 	       (void *) chain->ch1, op_symbol_code (chain->op),
422 	       (void *) chain->ch2);
423       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
424       fprintf (file, "\n");
425     }
426 
427   if (chain->vars.exists ())
428     {
429       fprintf (file, "  vars");
430       FOR_EACH_VEC_ELT (chain->vars, i, var)
431 	{
432 	  fprintf (file, " ");
433 	  print_generic_expr (file, var, TDF_SLIM);
434 	}
435       fprintf (file, "\n");
436     }
437 
438   if (chain->inits.exists ())
439     {
440       fprintf (file, "  inits");
441       FOR_EACH_VEC_ELT (chain->inits, i, var)
442 	{
443 	  fprintf (file, " ");
444 	  print_generic_expr (file, var, TDF_SLIM);
445 	}
446       fprintf (file, "\n");
447     }
448 
449   fprintf (file, "  references:\n");
450   FOR_EACH_VEC_ELT (chain->refs, i, a)
451     dump_dref (file, a);
452 
453   fprintf (file, "\n");
454 }
455 
456 /* Dumps CHAINS to FILE.  */
457 
458 extern void dump_chains (FILE *, vec<chain_p> );
459 void
dump_chains(FILE * file,vec<chain_p> chains)460 dump_chains (FILE *file, vec<chain_p> chains)
461 {
462   chain_p chain;
463   unsigned i;
464 
465   FOR_EACH_VEC_ELT (chains, i, chain)
466     dump_chain (file, chain);
467 }
468 
469 /* Dumps COMP to FILE.  */
470 
471 extern void dump_component (FILE *, struct component *);
472 void
dump_component(FILE * file,struct component * comp)473 dump_component (FILE *file, struct component *comp)
474 {
475   dref a;
476   unsigned i;
477 
478   fprintf (file, "Component%s:\n",
479 	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
480   FOR_EACH_VEC_ELT (comp->refs, i, a)
481     dump_dref (file, a);
482   fprintf (file, "\n");
483 }
484 
485 /* Dumps COMPS to FILE.  */
486 
487 extern void dump_components (FILE *, struct component *);
488 void
dump_components(FILE * file,struct component * comps)489 dump_components (FILE *file, struct component *comps)
490 {
491   struct component *comp;
492 
493   for (comp = comps; comp; comp = comp->next)
494     dump_component (file, comp);
495 }
496 
497 /* Frees a chain CHAIN.  */
498 
499 static void
release_chain(chain_p chain)500 release_chain (chain_p chain)
501 {
502   dref ref;
503   unsigned i;
504 
505   if (chain == NULL)
506     return;
507 
508   FOR_EACH_VEC_ELT (chain->refs, i, ref)
509     free (ref);
510 
511   chain->refs.release ();
512   chain->vars.release ();
513   chain->inits.release ();
514 
515   free (chain);
516 }
517 
518 /* Frees CHAINS.  */
519 
520 static void
release_chains(vec<chain_p> chains)521 release_chains (vec<chain_p> chains)
522 {
523   unsigned i;
524   chain_p chain;
525 
526   FOR_EACH_VEC_ELT (chains, i, chain)
527     release_chain (chain);
528   chains.release ();
529 }
530 
531 /* Frees a component COMP.  */
532 
533 static void
release_component(struct component * comp)534 release_component (struct component *comp)
535 {
536   comp->refs.release ();
537   free (comp);
538 }
539 
540 /* Frees list of components COMPS.  */
541 
542 static void
release_components(struct component * comps)543 release_components (struct component *comps)
544 {
545   struct component *act, *next;
546 
547   for (act = comps; act; act = next)
548     {
549       next = act->next;
550       release_component (act);
551     }
552 }
553 
554 /* Finds a root of tree given by FATHERS containing A, and performs path
555    shortening.  */
556 
557 static unsigned
component_of(unsigned fathers[],unsigned a)558 component_of (unsigned fathers[], unsigned a)
559 {
560   unsigned root, n;
561 
562   for (root = a; root != fathers[root]; root = fathers[root])
563     continue;
564 
565   for (; a != root; a = n)
566     {
567       n = fathers[a];
568       fathers[a] = root;
569     }
570 
571   return root;
572 }
573 
574 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
575    components, A and B are components to merge.  */
576 
577 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)578 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579 {
580   unsigned ca = component_of (fathers, a);
581   unsigned cb = component_of (fathers, b);
582 
583   if (ca == cb)
584     return;
585 
586   if (sizes[ca] < sizes[cb])
587     {
588       sizes[cb] += sizes[ca];
589       fathers[ca] = cb;
590     }
591   else
592     {
593       sizes[ca] += sizes[cb];
594       fathers[cb] = ca;
595     }
596 }
597 
598 /* Returns true if A is a reference that is suitable for predictive commoning
599    in the innermost loop that contains it.  REF_STEP is set according to the
600    step of the reference A.  */
601 
602 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)603 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604 {
605   tree ref = DR_REF (a), step = DR_STEP (a);
606 
607   if (!step
608       || TREE_THIS_VOLATILE (ref)
609       || !is_gimple_reg_type (TREE_TYPE (ref))
610       || tree_could_throw_p (ref))
611     return false;
612 
613   if (integer_zerop (step))
614     *ref_step = RS_INVARIANT;
615   else if (integer_nonzerop (step))
616     *ref_step = RS_NONZERO;
617   else
618     *ref_step = RS_ANY;
619 
620   return true;
621 }
622 
623 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
624 
625 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)626 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627 {
628   tree type = TREE_TYPE (DR_OFFSET (dr));
629   aff_tree delta;
630 
631   tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
632 				  &name_expansions);
633   aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
634   aff_combination_add (offset, &delta);
635 }
636 
637 /* Determines number of iterations of the innermost enclosing loop before B
638    refers to exactly the same location as A and stores it to OFF.  If A and
639    B do not have the same step, they never meet, or anything else fails,
640    returns false, otherwise returns true.  Both A and B are assumed to
641    satisfy suitable_reference_p.  */
642 
643 static bool
determine_offset(struct data_reference * a,struct data_reference * b,widest_int * off)644 determine_offset (struct data_reference *a, struct data_reference *b,
645 		  widest_int *off)
646 {
647   aff_tree diff, baseb, step;
648   tree typea, typeb;
649 
650   /* Check that both the references access the location in the same type.  */
651   typea = TREE_TYPE (DR_REF (a));
652   typeb = TREE_TYPE (DR_REF (b));
653   if (!useless_type_conversion_p (typeb, typea))
654     return false;
655 
656   /* Check whether the base address and the step of both references is the
657      same.  */
658   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
659       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
660     return false;
661 
662   if (integer_zerop (DR_STEP (a)))
663     {
664       /* If the references have loop invariant address, check that they access
665 	 exactly the same location.  */
666       *off = 0;
667       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
668 	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
669     }
670 
671   /* Compare the offsets of the addresses, and check whether the difference
672      is a multiple of step.  */
673   aff_combination_dr_offset (a, &diff);
674   aff_combination_dr_offset (b, &baseb);
675   aff_combination_scale (&baseb, -1);
676   aff_combination_add (&diff, &baseb);
677 
678   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
679 				  &step, &name_expansions);
680   return aff_combination_constant_multiple_p (&diff, &step, off);
681 }
682 
683 /* Returns the last basic block in LOOP for that we are sure that
684    it is executed whenever the loop is entered.  */
685 
686 static basic_block
last_always_executed_block(struct loop * loop)687 last_always_executed_block (struct loop *loop)
688 {
689   unsigned i;
690   vec<edge> exits = get_loop_exit_edges (loop);
691   edge ex;
692   basic_block last = loop->latch;
693 
694   FOR_EACH_VEC_ELT (exits, i, ex)
695     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
696   exits.release ();
697 
698   return last;
699 }
700 
701 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
702 
703 static struct component *
split_data_refs_to_components(struct loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)704 split_data_refs_to_components (struct loop *loop,
705 			       vec<data_reference_p> datarefs,
706 			       vec<ddr_p> depends)
707 {
708   unsigned i, n = datarefs.length ();
709   unsigned ca, ia, ib, bad;
710   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
711   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
712   struct component **comps;
713   struct data_reference *dr, *dra, *drb;
714   struct data_dependence_relation *ddr;
715   struct component *comp_list = NULL, *comp;
716   dref dataref;
717   basic_block last_always_executed = last_always_executed_block (loop);
718 
719   FOR_EACH_VEC_ELT (datarefs, i, dr)
720     {
721       if (!DR_REF (dr))
722 	{
723 	  /* A fake reference for call or asm_expr that may clobber memory;
724 	     just fail.  */
725 	  goto end;
726 	}
727       /* predcom pass isn't prepared to handle calls with data references.  */
728       if (is_gimple_call (DR_STMT (dr)))
729 	goto end;
730       dr->aux = (void *) (size_t) i;
731       comp_father[i] = i;
732       comp_size[i] = 1;
733     }
734 
735   /* A component reserved for the "bad" data references.  */
736   comp_father[n] = n;
737   comp_size[n] = 1;
738 
739   FOR_EACH_VEC_ELT (datarefs, i, dr)
740     {
741       enum ref_step_type dummy;
742 
743       if (!suitable_reference_p (dr, &dummy))
744 	{
745 	  ia = (unsigned) (size_t) dr->aux;
746 	  merge_comps (comp_father, comp_size, n, ia);
747 	}
748     }
749 
750   FOR_EACH_VEC_ELT (depends, i, ddr)
751     {
752       widest_int dummy_off;
753 
754       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
755 	continue;
756 
757       dra = DDR_A (ddr);
758       drb = DDR_B (ddr);
759       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
760       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
761       if (ia == ib)
762 	continue;
763 
764       bad = component_of (comp_father, n);
765 
766       /* If both A and B are reads, we may ignore unsuitable dependences.  */
767       if (DR_IS_READ (dra) && DR_IS_READ (drb))
768 	{
769 	  if (ia == bad || ib == bad
770 	      || !determine_offset (dra, drb, &dummy_off))
771 	    continue;
772 	}
773       /* If A is read and B write or vice versa and there is unsuitable
774 	 dependence, instead of merging both components into a component
775 	 that will certainly not pass suitable_component_p, just put the
776 	 read into bad component, perhaps at least the write together with
777 	 all the other data refs in it's component will be optimizable.  */
778       else if (DR_IS_READ (dra) && ib != bad)
779 	{
780 	  if (ia == bad)
781 	    continue;
782 	  else if (!determine_offset (dra, drb, &dummy_off))
783 	    {
784 	      merge_comps (comp_father, comp_size, bad, ia);
785 	      continue;
786 	    }
787 	}
788       else if (DR_IS_READ (drb) && ia != bad)
789 	{
790 	  if (ib == bad)
791 	    continue;
792 	  else if (!determine_offset (dra, drb, &dummy_off))
793 	    {
794 	      merge_comps (comp_father, comp_size, bad, ib);
795 	      continue;
796 	    }
797 	}
798 
799       merge_comps (comp_father, comp_size, ia, ib);
800     }
801 
802   comps = XCNEWVEC (struct component *, n);
803   bad = component_of (comp_father, n);
804   FOR_EACH_VEC_ELT (datarefs, i, dr)
805     {
806       ia = (unsigned) (size_t) dr->aux;
807       ca = component_of (comp_father, ia);
808       if (ca == bad)
809 	continue;
810 
811       comp = comps[ca];
812       if (!comp)
813 	{
814 	  comp = XCNEW (struct component);
815 	  comp->refs.create (comp_size[ca]);
816 	  comps[ca] = comp;
817 	}
818 
819       dataref = XCNEW (struct dref_d);
820       dataref->ref = dr;
821       dataref->stmt = DR_STMT (dr);
822       dataref->offset = 0;
823       dataref->distance = 0;
824 
825       dataref->always_accessed
826 	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
827 				gimple_bb (dataref->stmt));
828       dataref->pos = comp->refs.length ();
829       comp->refs.quick_push (dataref);
830     }
831 
832   for (i = 0; i < n; i++)
833     {
834       comp = comps[i];
835       if (comp)
836 	{
837 	  comp->next = comp_list;
838 	  comp_list = comp;
839 	}
840     }
841   free (comps);
842 
843 end:
844   free (comp_father);
845   free (comp_size);
846   return comp_list;
847 }
848 
849 /* Returns true if the component COMP satisfies the conditions
850    described in 2) at the beginning of this file.  LOOP is the current
851    loop.  */
852 
853 static bool
suitable_component_p(struct loop * loop,struct component * comp)854 suitable_component_p (struct loop *loop, struct component *comp)
855 {
856   unsigned i;
857   dref a, first;
858   basic_block ba, bp = loop->header;
859   bool ok, has_write = false;
860 
861   FOR_EACH_VEC_ELT (comp->refs, i, a)
862     {
863       ba = gimple_bb (a->stmt);
864 
865       if (!just_once_each_iteration_p (loop, ba))
866 	return false;
867 
868       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
869       bp = ba;
870 
871       if (DR_IS_WRITE (a->ref))
872 	has_write = true;
873     }
874 
875   first = comp->refs[0];
876   ok = suitable_reference_p (first->ref, &comp->comp_step);
877   gcc_assert (ok);
878   first->offset = 0;
879 
880   for (i = 1; comp->refs.iterate (i, &a); i++)
881     {
882       if (!determine_offset (first->ref, a->ref, &a->offset))
883 	return false;
884 
885       enum ref_step_type a_step;
886       gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
887 			   && a_step == comp->comp_step);
888     }
889 
890   /* If there is a write inside the component, we must know whether the
891      step is nonzero or not -- we would not otherwise be able to recognize
892      whether the value accessed by reads comes from the OFFSET-th iteration
893      or the previous one.  */
894   if (has_write && comp->comp_step == RS_ANY)
895     return false;
896 
897   return true;
898 }
899 
900 /* Check the conditions on references inside each of components COMPS,
901    and remove the unsuitable components from the list.  The new list
902    of components is returned.  The conditions are described in 2) at
903    the beginning of this file.  LOOP is the current loop.  */
904 
905 static struct component *
filter_suitable_components(struct loop * loop,struct component * comps)906 filter_suitable_components (struct loop *loop, struct component *comps)
907 {
908   struct component **comp, *act;
909 
910   for (comp = &comps; *comp; )
911     {
912       act = *comp;
913       if (suitable_component_p (loop, act))
914 	comp = &act->next;
915       else
916 	{
917 	  dref ref;
918 	  unsigned i;
919 
920 	  *comp = act->next;
921 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
922 	    free (ref);
923 	  release_component (act);
924 	}
925     }
926 
927   return comps;
928 }
929 
930 /* Compares two drefs A and B by their offset and position.  Callback for
931    qsort.  */
932 
933 static int
order_drefs(const void * a,const void * b)934 order_drefs (const void *a, const void *b)
935 {
936   const dref *const da = (const dref *) a;
937   const dref *const db = (const dref *) b;
938   int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
939 
940   if (offcmp != 0)
941     return offcmp;
942 
943   return (*da)->pos - (*db)->pos;
944 }
945 
946 /* Compares two drefs A and B by their position.  Callback for qsort.  */
947 
948 static int
order_drefs_by_pos(const void * a,const void * b)949 order_drefs_by_pos (const void *a, const void *b)
950 {
951   const dref *const da = (const dref *) a;
952   const dref *const db = (const dref *) b;
953 
954   return (*da)->pos - (*db)->pos;
955 }
956 
957 /* Returns root of the CHAIN.  */
958 
959 static inline dref
get_chain_root(chain_p chain)960 get_chain_root (chain_p chain)
961 {
962   return chain->refs[0];
963 }
964 
965 /* Adds REF to the chain CHAIN.  */
966 
967 static void
add_ref_to_chain(chain_p chain,dref ref)968 add_ref_to_chain (chain_p chain, dref ref)
969 {
970   dref root = get_chain_root (chain);
971 
972   gcc_assert (wi::les_p (root->offset, ref->offset));
973   widest_int dist = ref->offset - root->offset;
974   if (wi::leu_p (MAX_DISTANCE, dist))
975     {
976       free (ref);
977       return;
978     }
979   gcc_assert (wi::fits_uhwi_p (dist));
980 
981   chain->refs.safe_push (ref);
982 
983   ref->distance = dist.to_uhwi ();
984 
985   if (ref->distance >= chain->length)
986     {
987       chain->length = ref->distance;
988       chain->has_max_use_after = false;
989     }
990 
991   if (ref->distance == chain->length
992       && ref->pos > root->pos)
993     chain->has_max_use_after = true;
994 
995   chain->all_always_accessed &= ref->always_accessed;
996 }
997 
998 /* Returns the chain for invariant component COMP.  */
999 
1000 static chain_p
make_invariant_chain(struct component * comp)1001 make_invariant_chain (struct component *comp)
1002 {
1003   chain_p chain = XCNEW (struct chain);
1004   unsigned i;
1005   dref ref;
1006 
1007   chain->type = CT_INVARIANT;
1008 
1009   chain->all_always_accessed = true;
1010 
1011   FOR_EACH_VEC_ELT (comp->refs, i, ref)
1012     {
1013       chain->refs.safe_push (ref);
1014       chain->all_always_accessed &= ref->always_accessed;
1015     }
1016 
1017   return chain;
1018 }
1019 
1020 /* Make a new chain rooted at REF.  */
1021 
1022 static chain_p
make_rooted_chain(dref ref)1023 make_rooted_chain (dref ref)
1024 {
1025   chain_p chain = XCNEW (struct chain);
1026 
1027   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1028 
1029   chain->refs.safe_push (ref);
1030   chain->all_always_accessed = ref->always_accessed;
1031 
1032   ref->distance = 0;
1033 
1034   return chain;
1035 }
1036 
1037 /* Returns true if CHAIN is not trivial.  */
1038 
1039 static bool
nontrivial_chain_p(chain_p chain)1040 nontrivial_chain_p (chain_p chain)
1041 {
1042   return chain != NULL && chain->refs.length () > 1;
1043 }
1044 
1045 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1046    is no such name.  */
1047 
1048 static tree
name_for_ref(dref ref)1049 name_for_ref (dref ref)
1050 {
1051   tree name;
1052 
1053   if (is_gimple_assign (ref->stmt))
1054     {
1055       if (!ref->ref || DR_IS_READ (ref->ref))
1056 	name = gimple_assign_lhs (ref->stmt);
1057       else
1058 	name = gimple_assign_rhs1 (ref->stmt);
1059     }
1060   else
1061     name = PHI_RESULT (ref->stmt);
1062 
1063   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1064 }
1065 
1066 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1067    iterations of the innermost enclosing loop).  */
1068 
1069 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1070 valid_initializer_p (struct data_reference *ref,
1071 		     unsigned distance, struct data_reference *root)
1072 {
1073   aff_tree diff, base, step;
1074   widest_int off;
1075 
1076   /* Both REF and ROOT must be accessing the same object.  */
1077   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1078     return false;
1079 
1080   /* The initializer is defined outside of loop, hence its address must be
1081      invariant inside the loop.  */
1082   gcc_assert (integer_zerop (DR_STEP (ref)));
1083 
1084   /* If the address of the reference is invariant, initializer must access
1085      exactly the same location.  */
1086   if (integer_zerop (DR_STEP (root)))
1087     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1088 	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1089 
1090   /* Verify that this index of REF is equal to the root's index at
1091      -DISTANCE-th iteration.  */
1092   aff_combination_dr_offset (root, &diff);
1093   aff_combination_dr_offset (ref, &base);
1094   aff_combination_scale (&base, -1);
1095   aff_combination_add (&diff, &base);
1096 
1097   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1098 				  &step, &name_expansions);
1099   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1100     return false;
1101 
1102   if (off != distance)
1103     return false;
1104 
1105   return true;
1106 }
1107 
1108 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1109    initial value is correct (equal to initial value of REF shifted by one
1110    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1111    is the root of the current chain.  */
1112 
1113 static gphi *
find_looparound_phi(struct loop * loop,dref ref,dref root)1114 find_looparound_phi (struct loop *loop, dref ref, dref root)
1115 {
1116   tree name, init, init_ref;
1117   gphi *phi = NULL;
1118   gimple *init_stmt;
1119   edge latch = loop_latch_edge (loop);
1120   struct data_reference init_dr;
1121   gphi_iterator psi;
1122 
1123   if (is_gimple_assign (ref->stmt))
1124     {
1125       if (DR_IS_READ (ref->ref))
1126 	name = gimple_assign_lhs (ref->stmt);
1127       else
1128 	name = gimple_assign_rhs1 (ref->stmt);
1129     }
1130   else
1131     name = PHI_RESULT (ref->stmt);
1132   if (!name)
1133     return NULL;
1134 
1135   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1136     {
1137       phi = psi.phi ();
1138       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1139 	break;
1140     }
1141 
1142   if (gsi_end_p (psi))
1143     return NULL;
1144 
1145   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1146   if (TREE_CODE (init) != SSA_NAME)
1147     return NULL;
1148   init_stmt = SSA_NAME_DEF_STMT (init);
1149   if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1150     return NULL;
1151   gcc_assert (gimple_assign_lhs (init_stmt) == init);
1152 
1153   init_ref = gimple_assign_rhs1 (init_stmt);
1154   if (!REFERENCE_CLASS_P (init_ref)
1155       && !DECL_P (init_ref))
1156     return NULL;
1157 
1158   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1159      loop enclosing PHI).  */
1160   memset (&init_dr, 0, sizeof (struct data_reference));
1161   DR_REF (&init_dr) = init_ref;
1162   DR_STMT (&init_dr) = phi;
1163   if (!dr_analyze_innermost (&init_dr, loop))
1164     return NULL;
1165 
1166   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1167     return NULL;
1168 
1169   return phi;
1170 }
1171 
1172 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1173 
1174 static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)1175 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1176 {
1177   dref nw = XCNEW (struct dref_d), aref;
1178   unsigned i;
1179 
1180   nw->stmt = phi;
1181   nw->distance = ref->distance + 1;
1182   nw->always_accessed = 1;
1183 
1184   FOR_EACH_VEC_ELT (chain->refs, i, aref)
1185     if (aref->distance >= nw->distance)
1186       break;
1187   chain->refs.safe_insert (i, nw);
1188 
1189   if (nw->distance > chain->length)
1190     {
1191       chain->length = nw->distance;
1192       chain->has_max_use_after = false;
1193     }
1194 }
1195 
1196 /* For references in CHAIN that are copied around the LOOP (created previously
1197    by PRE, or by user), add the results of such copies to the chain.  This
1198    enables us to remove the copies by unrolling, and may need less registers
1199    (also, it may allow us to combine chains together).  */
1200 
1201 static void
add_looparound_copies(struct loop * loop,chain_p chain)1202 add_looparound_copies (struct loop *loop, chain_p chain)
1203 {
1204   unsigned i;
1205   dref ref, root = get_chain_root (chain);
1206   gphi *phi;
1207 
1208   FOR_EACH_VEC_ELT (chain->refs, i, ref)
1209     {
1210       phi = find_looparound_phi (loop, ref, root);
1211       if (!phi)
1212 	continue;
1213 
1214       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1215       insert_looparound_copy (chain, ref, phi);
1216     }
1217 }
1218 
1219 /* Find roots of the values and determine distances in the component COMP.
1220    The references are redistributed into CHAINS.  LOOP is the current
1221    loop.  */
1222 
1223 static void
determine_roots_comp(struct loop * loop,struct component * comp,vec<chain_p> * chains)1224 determine_roots_comp (struct loop *loop,
1225 		      struct component *comp,
1226 		      vec<chain_p> *chains)
1227 {
1228   unsigned i;
1229   dref a;
1230   chain_p chain = NULL;
1231   widest_int last_ofs = 0;
1232 
1233   /* Invariants are handled specially.  */
1234   if (comp->comp_step == RS_INVARIANT)
1235     {
1236       chain = make_invariant_chain (comp);
1237       chains->safe_push (chain);
1238       return;
1239     }
1240 
1241   comp->refs.qsort (order_drefs);
1242 
1243   FOR_EACH_VEC_ELT (comp->refs, i, a)
1244     {
1245       if (!chain || DR_IS_WRITE (a->ref)
1246 	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1247 	{
1248 	  if (nontrivial_chain_p (chain))
1249 	    {
1250 	      add_looparound_copies (loop, chain);
1251 	      chains->safe_push (chain);
1252 	    }
1253 	  else
1254 	    release_chain (chain);
1255 	  chain = make_rooted_chain (a);
1256 	  last_ofs = a->offset;
1257 	  continue;
1258 	}
1259 
1260       add_ref_to_chain (chain, a);
1261     }
1262 
1263   if (nontrivial_chain_p (chain))
1264     {
1265       add_looparound_copies (loop, chain);
1266       chains->safe_push (chain);
1267     }
1268   else
1269     release_chain (chain);
1270 }
1271 
1272 /* Find roots of the values and determine distances in components COMPS, and
1273    separates the references to CHAINS.  LOOP is the current loop.  */
1274 
1275 static void
determine_roots(struct loop * loop,struct component * comps,vec<chain_p> * chains)1276 determine_roots (struct loop *loop,
1277 		 struct component *comps, vec<chain_p> *chains)
1278 {
1279   struct component *comp;
1280 
1281   for (comp = comps; comp; comp = comp->next)
1282     determine_roots_comp (loop, comp, chains);
1283 }
1284 
1285 /* Replace the reference in statement STMT with temporary variable
1286    NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1287    the reference in the statement.  IN_LHS is true if the reference
1288    is in the lhs of STMT, false if it is in rhs.  */
1289 
1290 static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)1291 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1292 {
1293   tree val;
1294   gassign *new_stmt;
1295   gimple_stmt_iterator bsi, psi;
1296 
1297   if (gimple_code (stmt) == GIMPLE_PHI)
1298     {
1299       gcc_assert (!in_lhs && !set);
1300 
1301       val = PHI_RESULT (stmt);
1302       bsi = gsi_after_labels (gimple_bb (stmt));
1303       psi = gsi_for_stmt (stmt);
1304       remove_phi_node (&psi, false);
1305 
1306       /* Turn the phi node into GIMPLE_ASSIGN.  */
1307       new_stmt = gimple_build_assign (val, new_tree);
1308       gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1309       return;
1310     }
1311 
1312   /* Since the reference is of gimple_reg type, it should only
1313      appear as lhs or rhs of modify statement.  */
1314   gcc_assert (is_gimple_assign (stmt));
1315 
1316   bsi = gsi_for_stmt (stmt);
1317 
1318   /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1319   if (!set)
1320     {
1321       gcc_assert (!in_lhs);
1322       gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1323       stmt = gsi_stmt (bsi);
1324       update_stmt (stmt);
1325       return;
1326     }
1327 
1328   if (in_lhs)
1329     {
1330       /* We have statement
1331 
1332 	 OLD = VAL
1333 
1334 	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1335 	 this to
1336 
1337 	 OLD = VAL
1338 	 NEW = VAL
1339 
1340 	 Otherwise, we are replacing a combination chain,
1341 	 VAL is the expression that performs the combination, and OLD is an
1342 	 SSA name.  In this case, we transform the assignment to
1343 
1344 	 OLD = VAL
1345 	 NEW = OLD
1346 
1347 	 */
1348 
1349       val = gimple_assign_lhs (stmt);
1350       if (TREE_CODE (val) != SSA_NAME)
1351 	{
1352 	  val = gimple_assign_rhs1 (stmt);
1353 	  gcc_assert (gimple_assign_single_p (stmt));
1354 	  if (TREE_CLOBBER_P (val))
1355 	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1356 	  else
1357 	    gcc_assert (gimple_assign_copy_p (stmt));
1358 	}
1359     }
1360   else
1361     {
1362       /* VAL = OLD
1363 
1364 	 is transformed to
1365 
1366 	 VAL = OLD
1367 	 NEW = VAL  */
1368 
1369       val = gimple_assign_lhs (stmt);
1370     }
1371 
1372   new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1373   gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1374 }
1375 
1376 /* Returns a memory reference to DR in the ITER-th iteration of
1377    the loop it was analyzed in.  Append init stmts to STMTS.  */
1378 
1379 static tree
ref_at_iteration(data_reference_p dr,int iter,gimple_seq * stmts)1380 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1381 {
1382   tree off = DR_OFFSET (dr);
1383   tree coff = DR_INIT (dr);
1384   if (iter == 0)
1385     ;
1386   else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1387     coff = size_binop (PLUS_EXPR, coff,
1388 		       size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1389   else
1390     off = size_binop (PLUS_EXPR, off,
1391 		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1392   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1393   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1394 				 is_gimple_mem_ref_addr, NULL_TREE);
1395   tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1396   tree type = build_aligned_type (TREE_TYPE (DR_REF (dr)),
1397 				  get_object_alignment (DR_REF (dr)));
1398   /* While data-ref analysis punts on bit offsets it still handles
1399      bitfield accesses at byte boundaries.  Cope with that.  Note that
1400      we cannot simply re-apply the outer COMPONENT_REF because the
1401      byte-granular portion of it is already applied via DR_INIT and
1402      DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1403      start at offset zero.  */
1404   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1405       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1406     {
1407       tree field = TREE_OPERAND (DR_REF (dr), 1);
1408       return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1409 		     build2 (MEM_REF, type, addr, alias_ptr),
1410 		     DECL_SIZE (field), bitsize_zero_node);
1411     }
1412   else
1413     return fold_build2 (MEM_REF, type, addr, alias_ptr);
1414 }
1415 
1416 /* Get the initialization expression for the INDEX-th temporary variable
1417    of CHAIN.  */
1418 
1419 static tree
get_init_expr(chain_p chain,unsigned index)1420 get_init_expr (chain_p chain, unsigned index)
1421 {
1422   if (chain->type == CT_COMBINATION)
1423     {
1424       tree e1 = get_init_expr (chain->ch1, index);
1425       tree e2 = get_init_expr (chain->ch2, index);
1426 
1427       return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1428     }
1429   else
1430     return chain->inits[index];
1431 }
1432 
1433 /* Returns a new temporary variable used for the I-th variable carrying
1434    value of REF.  The variable's uid is marked in TMP_VARS.  */
1435 
1436 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1437 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1438 {
1439   tree type = TREE_TYPE (ref);
1440   /* We never access the components of the temporary variable in predictive
1441      commoning.  */
1442   tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1443   bitmap_set_bit (tmp_vars, DECL_UID (var));
1444   return var;
1445 }
1446 
1447 /* Creates the variables for CHAIN, as well as phi nodes for them and
1448    initialization on entry to LOOP.  Uids of the newly created
1449    temporary variables are marked in TMP_VARS.  */
1450 
1451 static void
initialize_root_vars(struct loop * loop,chain_p chain,bitmap tmp_vars)1452 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1453 {
1454   unsigned i;
1455   unsigned n = chain->length;
1456   dref root = get_chain_root (chain);
1457   bool reuse_first = !chain->has_max_use_after;
1458   tree ref, init, var, next;
1459   gphi *phi;
1460   gimple_seq stmts;
1461   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462 
1463   /* If N == 0, then all the references are within the single iteration.  And
1464      since this is an nonempty chain, reuse_first cannot be true.  */
1465   gcc_assert (n > 0 || !reuse_first);
1466 
1467   chain->vars.create (n + 1);
1468 
1469   if (chain->type == CT_COMBINATION)
1470     ref = gimple_assign_lhs (root->stmt);
1471   else
1472     ref = DR_REF (root->ref);
1473 
1474   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475     {
1476       var = predcom_tmp_var (ref, i, tmp_vars);
1477       chain->vars.quick_push (var);
1478     }
1479   if (reuse_first)
1480     chain->vars.quick_push (chain->vars[0]);
1481 
1482   FOR_EACH_VEC_ELT (chain->vars, i, var)
1483     chain->vars[i] = make_ssa_name (var);
1484 
1485   for (i = 0; i < n; i++)
1486     {
1487       var = chain->vars[i];
1488       next = chain->vars[i + 1];
1489       init = get_init_expr (chain, i);
1490 
1491       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492       if (stmts)
1493 	gsi_insert_seq_on_edge_immediate (entry, stmts);
1494 
1495       phi = create_phi_node (var, loop->header);
1496       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1497       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1498     }
1499 }
1500 
1501 /* Create the variables and initialization statement for root of chain
1502    CHAIN.  Uids of the newly created temporary variables are marked
1503    in TMP_VARS.  */
1504 
1505 static void
initialize_root(struct loop * loop,chain_p chain,bitmap tmp_vars)1506 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1507 {
1508   dref root = get_chain_root (chain);
1509   bool in_lhs = (chain->type == CT_STORE_LOAD
1510 		 || chain->type == CT_COMBINATION);
1511 
1512   initialize_root_vars (loop, chain, tmp_vars);
1513   replace_ref_with (root->stmt,
1514 		    chain->vars[chain->length],
1515 		    true, in_lhs);
1516 }
1517 
1518 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1519    initialization on entry to LOOP if necessary.  The ssa name for the variable
1520    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1521    around the loop is created.  Uid of the newly created temporary variable
1522    is marked in TMP_VARS.  INITS is the list containing the (single)
1523    initializer.  */
1524 
1525 static void
initialize_root_vars_lm(struct loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1526 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1527 			 vec<tree> *vars, vec<tree> inits,
1528 			 bitmap tmp_vars)
1529 {
1530   unsigned i;
1531   tree ref = DR_REF (root->ref), init, var, next;
1532   gimple_seq stmts;
1533   gphi *phi;
1534   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1535 
1536   /* Find the initializer for the variable, and check that it cannot
1537      trap.  */
1538   init = inits[0];
1539 
1540   vars->create (written ? 2 : 1);
1541   var = predcom_tmp_var (ref, 0, tmp_vars);
1542   vars->quick_push (var);
1543   if (written)
1544     vars->quick_push ((*vars)[0]);
1545 
1546   FOR_EACH_VEC_ELT (*vars, i, var)
1547     (*vars)[i] = make_ssa_name (var);
1548 
1549   var = (*vars)[0];
1550 
1551   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1552   if (stmts)
1553     gsi_insert_seq_on_edge_immediate (entry, stmts);
1554 
1555   if (written)
1556     {
1557       next = (*vars)[1];
1558       phi = create_phi_node (var, loop->header);
1559       add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1560       add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1561     }
1562   else
1563     {
1564       gassign *init_stmt = gimple_build_assign (var, init);
1565       gsi_insert_on_edge_immediate (entry, init_stmt);
1566     }
1567 }
1568 
1569 
1570 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1571    created temporary variables are marked in TMP_VARS.  */
1572 
1573 static void
execute_load_motion(struct loop * loop,chain_p chain,bitmap tmp_vars)1574 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575 {
1576   auto_vec<tree> vars;
1577   dref a;
1578   unsigned n_writes = 0, ridx, i;
1579   tree var;
1580 
1581   gcc_assert (chain->type == CT_INVARIANT);
1582   gcc_assert (!chain->combined);
1583   FOR_EACH_VEC_ELT (chain->refs, i, a)
1584     if (DR_IS_WRITE (a->ref))
1585       n_writes++;
1586 
1587   /* If there are no reads in the loop, there is nothing to do.  */
1588   if (n_writes == chain->refs.length ())
1589     return;
1590 
1591   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592 			   &vars, chain->inits, tmp_vars);
1593 
1594   ridx = 0;
1595   FOR_EACH_VEC_ELT (chain->refs, i, a)
1596     {
1597       bool is_read = DR_IS_READ (a->ref);
1598 
1599       if (DR_IS_WRITE (a->ref))
1600 	{
1601 	  n_writes--;
1602 	  if (n_writes)
1603 	    {
1604 	      var = vars[0];
1605 	      var = make_ssa_name (SSA_NAME_VAR (var));
1606 	      vars[0] = var;
1607 	    }
1608 	  else
1609 	    ridx = 1;
1610 	}
1611 
1612       replace_ref_with (a->stmt, vars[ridx],
1613 			!is_read, !is_read);
1614     }
1615 }
1616 
1617 /* Returns the single statement in that NAME is used, excepting
1618    the looparound phi nodes contained in one of the chains.  If there is no
1619    such statement, or more statements, NULL is returned.  */
1620 
1621 static gimple *
single_nonlooparound_use(tree name)1622 single_nonlooparound_use (tree name)
1623 {
1624   use_operand_p use;
1625   imm_use_iterator it;
1626   gimple *stmt, *ret = NULL;
1627 
1628   FOR_EACH_IMM_USE_FAST (use, it, name)
1629     {
1630       stmt = USE_STMT (use);
1631 
1632       if (gimple_code (stmt) == GIMPLE_PHI)
1633 	{
1634 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1635 	     could not be processed anyway, so just fail for them.  */
1636 	  if (bitmap_bit_p (looparound_phis,
1637 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
1638 	    continue;
1639 
1640 	  return NULL;
1641 	}
1642       else if (is_gimple_debug (stmt))
1643 	continue;
1644       else if (ret != NULL)
1645 	return NULL;
1646       else
1647 	ret = stmt;
1648     }
1649 
1650   return ret;
1651 }
1652 
1653 /* Remove statement STMT, as well as the chain of assignments in that it is
1654    used.  */
1655 
1656 static void
remove_stmt(gimple * stmt)1657 remove_stmt (gimple *stmt)
1658 {
1659   tree name;
1660   gimple *next;
1661   gimple_stmt_iterator psi;
1662 
1663   if (gimple_code (stmt) == GIMPLE_PHI)
1664     {
1665       name = PHI_RESULT (stmt);
1666       next = single_nonlooparound_use (name);
1667       reset_debug_uses (stmt);
1668       psi = gsi_for_stmt (stmt);
1669       remove_phi_node (&psi, true);
1670 
1671       if (!next
1672 	  || !gimple_assign_ssa_name_copy_p (next)
1673 	  || gimple_assign_rhs1 (next) != name)
1674 	return;
1675 
1676       stmt = next;
1677     }
1678 
1679   while (1)
1680     {
1681       gimple_stmt_iterator bsi;
1682 
1683       bsi = gsi_for_stmt (stmt);
1684 
1685       name = gimple_assign_lhs (stmt);
1686       gcc_assert (TREE_CODE (name) == SSA_NAME);
1687 
1688       next = single_nonlooparound_use (name);
1689       reset_debug_uses (stmt);
1690 
1691       unlink_stmt_vdef (stmt);
1692       gsi_remove (&bsi, true);
1693       release_defs (stmt);
1694 
1695       if (!next
1696 	  || !gimple_assign_ssa_name_copy_p (next)
1697 	  || gimple_assign_rhs1 (next) != name)
1698 	return;
1699 
1700       stmt = next;
1701     }
1702 }
1703 
1704 /* Perform the predictive commoning optimization for a chain CHAIN.
1705    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1706 
1707 static void
execute_pred_commoning_chain(struct loop * loop,chain_p chain,bitmap tmp_vars)1708 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1709 			     bitmap tmp_vars)
1710 {
1711   unsigned i;
1712   dref a;
1713   tree var;
1714 
1715   if (chain->combined)
1716     {
1717       /* For combined chains, just remove the statements that are used to
1718 	 compute the values of the expression (except for the root one).
1719 	 We delay this until after all chains are processed.  */
1720     }
1721   else
1722     {
1723       /* For non-combined chains, set up the variables that hold its value,
1724 	 and replace the uses of the original references by these
1725 	 variables.  */
1726       initialize_root (loop, chain, tmp_vars);
1727       for (i = 1; chain->refs.iterate (i, &a); i++)
1728 	{
1729 	  var = chain->vars[chain->length - a->distance];
1730 	  replace_ref_with (a->stmt, var, false, false);
1731 	}
1732     }
1733 }
1734 
1735 /* Determines the unroll factor necessary to remove as many temporary variable
1736    copies as possible.  CHAINS is the list of chains that will be
1737    optimized.  */
1738 
1739 static unsigned
determine_unroll_factor(vec<chain_p> chains)1740 determine_unroll_factor (vec<chain_p> chains)
1741 {
1742   chain_p chain;
1743   unsigned factor = 1, af, nfactor, i;
1744   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1745 
1746   FOR_EACH_VEC_ELT (chains, i, chain)
1747     {
1748       if (chain->type == CT_INVARIANT)
1749 	continue;
1750 
1751       if (chain->combined)
1752 	{
1753 	  /* For combined chains, we can't handle unrolling if we replace
1754 	     looparound PHIs.  */
1755 	  dref a;
1756 	  unsigned j;
1757 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1758 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
1759 	      return 1;
1760 	  continue;
1761 	}
1762 
1763       /* The best unroll factor for this chain is equal to the number of
1764 	 temporary variables that we create for it.  */
1765       af = chain->length;
1766       if (chain->has_max_use_after)
1767 	af++;
1768 
1769       nfactor = factor * af / gcd (factor, af);
1770       if (nfactor <= max)
1771 	factor = nfactor;
1772     }
1773 
1774   return factor;
1775 }
1776 
1777 /* Perform the predictive commoning optimization for CHAINS.
1778    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1779 
1780 static void
execute_pred_commoning(struct loop * loop,vec<chain_p> chains,bitmap tmp_vars)1781 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1782 			bitmap tmp_vars)
1783 {
1784   chain_p chain;
1785   unsigned i;
1786 
1787   FOR_EACH_VEC_ELT (chains, i, chain)
1788     {
1789       if (chain->type == CT_INVARIANT)
1790 	execute_load_motion (loop, chain, tmp_vars);
1791       else
1792 	execute_pred_commoning_chain (loop, chain, tmp_vars);
1793     }
1794 
1795   FOR_EACH_VEC_ELT (chains, i, chain)
1796     {
1797       if (chain->type == CT_INVARIANT)
1798 	;
1799       else if (chain->combined)
1800 	{
1801 	  /* For combined chains, just remove the statements that are used to
1802 	     compute the values of the expression (except for the root one).  */
1803 	  dref a;
1804 	  unsigned j;
1805 	  for (j = 1; chain->refs.iterate (j, &a); j++)
1806 	    remove_stmt (a->stmt);
1807 	}
1808     }
1809 
1810   update_ssa (TODO_update_ssa_only_virtuals);
1811 }
1812 
1813 /* For each reference in CHAINS, if its defining statement is
1814    phi node, record the ssa name that is defined by it.  */
1815 
1816 static void
replace_phis_by_defined_names(vec<chain_p> chains)1817 replace_phis_by_defined_names (vec<chain_p> chains)
1818 {
1819   chain_p chain;
1820   dref a;
1821   unsigned i, j;
1822 
1823   FOR_EACH_VEC_ELT (chains, i, chain)
1824     FOR_EACH_VEC_ELT (chain->refs, j, a)
1825       {
1826 	if (gimple_code (a->stmt) == GIMPLE_PHI)
1827 	  {
1828 	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
1829 	    a->stmt = NULL;
1830 	  }
1831       }
1832 }
1833 
1834 /* For each reference in CHAINS, if name_defined_by_phi is not
1835    NULL, use it to set the stmt field.  */
1836 
1837 static void
replace_names_by_phis(vec<chain_p> chains)1838 replace_names_by_phis (vec<chain_p> chains)
1839 {
1840   chain_p chain;
1841   dref a;
1842   unsigned i, j;
1843 
1844   FOR_EACH_VEC_ELT (chains, i, chain)
1845     FOR_EACH_VEC_ELT (chain->refs, j, a)
1846       if (a->stmt == NULL)
1847 	{
1848 	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1849 	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1850 	  a->name_defined_by_phi = NULL_TREE;
1851 	}
1852 }
1853 
1854 /* Wrapper over execute_pred_commoning, to pass it as a callback
1855    to tree_transform_and_unroll_loop.  */
1856 
1857 struct epcc_data
1858 {
1859   vec<chain_p> chains;
1860   bitmap tmp_vars;
1861 };
1862 
1863 static void
execute_pred_commoning_cbck(struct loop * loop,void * data)1864 execute_pred_commoning_cbck (struct loop *loop, void *data)
1865 {
1866   struct epcc_data *const dta = (struct epcc_data *) data;
1867 
1868   /* Restore phi nodes that were replaced by ssa names before
1869      tree_transform_and_unroll_loop (see detailed description in
1870      tree_predictive_commoning_loop).  */
1871   replace_names_by_phis (dta->chains);
1872   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1873 }
1874 
1875 /* Base NAME and all the names in the chain of phi nodes that use it
1876    on variable VAR.  The phi nodes are recognized by being in the copies of
1877    the header of the LOOP.  */
1878 
1879 static void
base_names_in_chain_on(struct loop * loop,tree name,tree var)1880 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1881 {
1882   gimple *stmt, *phi;
1883   imm_use_iterator iter;
1884 
1885   replace_ssa_name_symbol (name, var);
1886 
1887   while (1)
1888     {
1889       phi = NULL;
1890       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1891 	{
1892 	  if (gimple_code (stmt) == GIMPLE_PHI
1893 	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1894 	    {
1895 	      phi = stmt;
1896 	      BREAK_FROM_IMM_USE_STMT (iter);
1897 	    }
1898 	}
1899       if (!phi)
1900 	return;
1901 
1902       name = PHI_RESULT (phi);
1903       replace_ssa_name_symbol (name, var);
1904     }
1905 }
1906 
1907 /* Given an unrolled LOOP after predictive commoning, remove the
1908    register copies arising from phi nodes by changing the base
1909    variables of SSA names.  TMP_VARS is the set of the temporary variables
1910    for those we want to perform this.  */
1911 
1912 static void
eliminate_temp_copies(struct loop * loop,bitmap tmp_vars)1913 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1914 {
1915   edge e;
1916   gphi *phi;
1917   gimple *stmt;
1918   tree name, use, var;
1919   gphi_iterator psi;
1920 
1921   e = loop_latch_edge (loop);
1922   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1923     {
1924       phi = psi.phi ();
1925       name = PHI_RESULT (phi);
1926       var = SSA_NAME_VAR (name);
1927       if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1928 	continue;
1929       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1930       gcc_assert (TREE_CODE (use) == SSA_NAME);
1931 
1932       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1933       stmt = SSA_NAME_DEF_STMT (use);
1934       while (gimple_code (stmt) == GIMPLE_PHI
1935 	     /* In case we could not unroll the loop enough to eliminate
1936 		all copies, we may reach the loop header before the defining
1937 		statement (in that case, some register copies will be present
1938 		in loop latch in the final code, corresponding to the newly
1939 		created looparound phi nodes).  */
1940 	     && gimple_bb (stmt) != loop->header)
1941 	{
1942 	  gcc_assert (single_pred_p (gimple_bb (stmt)));
1943 	  use = PHI_ARG_DEF (stmt, 0);
1944 	  stmt = SSA_NAME_DEF_STMT (use);
1945 	}
1946 
1947       base_names_in_chain_on (loop, use, var);
1948     }
1949 }
1950 
1951 /* Returns true if CHAIN is suitable to be combined.  */
1952 
1953 static bool
chain_can_be_combined_p(chain_p chain)1954 chain_can_be_combined_p (chain_p chain)
1955 {
1956   return (!chain->combined
1957 	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1958 }
1959 
1960 /* Returns the modify statement that uses NAME.  Skips over assignment
1961    statements, NAME is replaced with the actual name used in the returned
1962    statement.  */
1963 
1964 static gimple *
find_use_stmt(tree * name)1965 find_use_stmt (tree *name)
1966 {
1967   gimple *stmt;
1968   tree rhs, lhs;
1969 
1970   /* Skip over assignments.  */
1971   while (1)
1972     {
1973       stmt = single_nonlooparound_use (*name);
1974       if (!stmt)
1975 	return NULL;
1976 
1977       if (gimple_code (stmt) != GIMPLE_ASSIGN)
1978 	return NULL;
1979 
1980       lhs = gimple_assign_lhs (stmt);
1981       if (TREE_CODE (lhs) != SSA_NAME)
1982 	return NULL;
1983 
1984       if (gimple_assign_copy_p (stmt))
1985 	{
1986 	  rhs = gimple_assign_rhs1 (stmt);
1987 	  if (rhs != *name)
1988 	    return NULL;
1989 
1990 	  *name = lhs;
1991 	}
1992       else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1993 	       == GIMPLE_BINARY_RHS)
1994 	return stmt;
1995       else
1996 	return NULL;
1997     }
1998 }
1999 
2000 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2001 
2002 static bool
may_reassociate_p(tree type,enum tree_code code)2003 may_reassociate_p (tree type, enum tree_code code)
2004 {
2005   if (FLOAT_TYPE_P (type)
2006       && !flag_unsafe_math_optimizations)
2007     return false;
2008 
2009   return (commutative_tree_code (code)
2010 	  && associative_tree_code (code));
2011 }
2012 
2013 /* If the operation used in STMT is associative and commutative, go through the
2014    tree of the same operations and returns its root.  Distance to the root
2015    is stored in DISTANCE.  */
2016 
2017 static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)2018 find_associative_operation_root (gimple *stmt, unsigned *distance)
2019 {
2020   tree lhs;
2021   gimple *next;
2022   enum tree_code code = gimple_assign_rhs_code (stmt);
2023   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2024   unsigned dist = 0;
2025 
2026   if (!may_reassociate_p (type, code))
2027     return NULL;
2028 
2029   while (1)
2030     {
2031       lhs = gimple_assign_lhs (stmt);
2032       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2033 
2034       next = find_use_stmt (&lhs);
2035       if (!next
2036 	  || gimple_assign_rhs_code (next) != code)
2037 	break;
2038 
2039       stmt = next;
2040       dist++;
2041     }
2042 
2043   if (distance)
2044     *distance = dist;
2045   return stmt;
2046 }
2047 
2048 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2049    is no such statement, returns NULL_TREE.  In case the operation used on
2050    NAME1 and NAME2 is associative and commutative, returns the root of the
2051    tree formed by this operation instead of the statement that uses NAME1 or
2052    NAME2.  */
2053 
2054 static gimple *
find_common_use_stmt(tree * name1,tree * name2)2055 find_common_use_stmt (tree *name1, tree *name2)
2056 {
2057   gimple *stmt1, *stmt2;
2058 
2059   stmt1 = find_use_stmt (name1);
2060   if (!stmt1)
2061     return NULL;
2062 
2063   stmt2 = find_use_stmt (name2);
2064   if (!stmt2)
2065     return NULL;
2066 
2067   if (stmt1 == stmt2)
2068     return stmt1;
2069 
2070   stmt1 = find_associative_operation_root (stmt1, NULL);
2071   if (!stmt1)
2072     return NULL;
2073   stmt2 = find_associative_operation_root (stmt2, NULL);
2074   if (!stmt2)
2075     return NULL;
2076 
2077   return (stmt1 == stmt2 ? stmt1 : NULL);
2078 }
2079 
2080 /* Checks whether R1 and R2 are combined together using CODE, with the result
2081    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2082    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2083 
2084 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2085 combinable_refs_p (dref r1, dref r2,
2086 		   enum tree_code *code, bool *swap, tree *rslt_type)
2087 {
2088   enum tree_code acode;
2089   bool aswap;
2090   tree atype;
2091   tree name1, name2;
2092   gimple *stmt;
2093 
2094   name1 = name_for_ref (r1);
2095   name2 = name_for_ref (r2);
2096   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2097 
2098   stmt = find_common_use_stmt (&name1, &name2);
2099 
2100   if (!stmt
2101       /* A simple post-dominance check - make sure the combination
2102          is executed under the same condition as the references.  */
2103       || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2104 	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2105     return false;
2106 
2107   acode = gimple_assign_rhs_code (stmt);
2108   aswap = (!commutative_tree_code (acode)
2109 	   && gimple_assign_rhs1 (stmt) != name1);
2110   atype = TREE_TYPE (gimple_assign_lhs (stmt));
2111 
2112   if (*code == ERROR_MARK)
2113     {
2114       *code = acode;
2115       *swap = aswap;
2116       *rslt_type = atype;
2117       return true;
2118     }
2119 
2120   return (*code == acode
2121 	  && *swap == aswap
2122 	  && *rslt_type == atype);
2123 }
2124 
2125 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2126    an assignment of the remaining operand.  */
2127 
2128 static void
remove_name_from_operation(gimple * stmt,tree op)2129 remove_name_from_operation (gimple *stmt, tree op)
2130 {
2131   tree other_op;
2132   gimple_stmt_iterator si;
2133 
2134   gcc_assert (is_gimple_assign (stmt));
2135 
2136   if (gimple_assign_rhs1 (stmt) == op)
2137     other_op = gimple_assign_rhs2 (stmt);
2138   else
2139     other_op = gimple_assign_rhs1 (stmt);
2140 
2141   si = gsi_for_stmt (stmt);
2142   gimple_assign_set_rhs_from_tree (&si, other_op);
2143 
2144   /* We should not have reallocated STMT.  */
2145   gcc_assert (gsi_stmt (si) == stmt);
2146 
2147   update_stmt (stmt);
2148 }
2149 
2150 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2151    are combined in a single statement, and returns this statement.  */
2152 
2153 static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)2154 reassociate_to_the_same_stmt (tree name1, tree name2)
2155 {
2156   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2157   gassign *new_stmt, *tmp_stmt;
2158   tree new_name, tmp_name, var, r1, r2;
2159   unsigned dist1, dist2;
2160   enum tree_code code;
2161   tree type = TREE_TYPE (name1);
2162   gimple_stmt_iterator bsi;
2163 
2164   stmt1 = find_use_stmt (&name1);
2165   stmt2 = find_use_stmt (&name2);
2166   root1 = find_associative_operation_root (stmt1, &dist1);
2167   root2 = find_associative_operation_root (stmt2, &dist2);
2168   code = gimple_assign_rhs_code (stmt1);
2169 
2170   gcc_assert (root1 && root2 && root1 == root2
2171 	      && code == gimple_assign_rhs_code (stmt2));
2172 
2173   /* Find the root of the nearest expression in that both NAME1 and NAME2
2174      are used.  */
2175   r1 = name1;
2176   s1 = stmt1;
2177   r2 = name2;
2178   s2 = stmt2;
2179 
2180   while (dist1 > dist2)
2181     {
2182       s1 = find_use_stmt (&r1);
2183       r1 = gimple_assign_lhs (s1);
2184       dist1--;
2185     }
2186   while (dist2 > dist1)
2187     {
2188       s2 = find_use_stmt (&r2);
2189       r2 = gimple_assign_lhs (s2);
2190       dist2--;
2191     }
2192 
2193   while (s1 != s2)
2194     {
2195       s1 = find_use_stmt (&r1);
2196       r1 = gimple_assign_lhs (s1);
2197       s2 = find_use_stmt (&r2);
2198       r2 = gimple_assign_lhs (s2);
2199     }
2200 
2201   /* Remove NAME1 and NAME2 from the statements in that they are used
2202      currently.  */
2203   remove_name_from_operation (stmt1, name1);
2204   remove_name_from_operation (stmt2, name2);
2205 
2206   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2207      combine it with the rhs of S1.  */
2208   var = create_tmp_reg (type, "predreastmp");
2209   new_name = make_ssa_name (var);
2210   new_stmt = gimple_build_assign (new_name, code, name1, name2);
2211 
2212   var = create_tmp_reg (type, "predreastmp");
2213   tmp_name = make_ssa_name (var);
2214 
2215   /* Rhs of S1 may now be either a binary expression with operation
2216      CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2217      so that name1 or name2 was removed from it).  */
2218   tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2219 				  gimple_assign_rhs1 (s1),
2220 				  gimple_assign_rhs2 (s1));
2221 
2222   bsi = gsi_for_stmt (s1);
2223   gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2224   s1 = gsi_stmt (bsi);
2225   update_stmt (s1);
2226 
2227   gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2228   gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2229 
2230   return new_stmt;
2231 }
2232 
2233 /* Returns the statement that combines references R1 and R2.  In case R1
2234    and R2 are not used in the same statement, but they are used with an
2235    associative and commutative operation in the same expression, reassociate
2236    the expression so that they are used in the same statement.  */
2237 
2238 static gimple *
stmt_combining_refs(dref r1,dref r2)2239 stmt_combining_refs (dref r1, dref r2)
2240 {
2241   gimple *stmt1, *stmt2;
2242   tree name1 = name_for_ref (r1);
2243   tree name2 = name_for_ref (r2);
2244 
2245   stmt1 = find_use_stmt (&name1);
2246   stmt2 = find_use_stmt (&name2);
2247   if (stmt1 == stmt2)
2248     return stmt1;
2249 
2250   return reassociate_to_the_same_stmt (name1, name2);
2251 }
2252 
2253 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2254    description of the new chain is returned, otherwise we return NULL.  */
2255 
2256 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2257 combine_chains (chain_p ch1, chain_p ch2)
2258 {
2259   dref r1, r2, nw;
2260   enum tree_code op = ERROR_MARK;
2261   bool swap = false;
2262   chain_p new_chain;
2263   unsigned i;
2264   tree rslt_type = NULL_TREE;
2265 
2266   if (ch1 == ch2)
2267     return NULL;
2268   if (ch1->length != ch2->length)
2269     return NULL;
2270 
2271   if (ch1->refs.length () != ch2->refs.length ())
2272     return NULL;
2273 
2274   for (i = 0; (ch1->refs.iterate (i, &r1)
2275 	       && ch2->refs.iterate (i, &r2)); i++)
2276     {
2277       if (r1->distance != r2->distance)
2278 	return NULL;
2279 
2280       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2281 	return NULL;
2282     }
2283 
2284   if (swap)
2285     std::swap (ch1, ch2);
2286 
2287   new_chain = XCNEW (struct chain);
2288   new_chain->type = CT_COMBINATION;
2289   new_chain->op = op;
2290   new_chain->ch1 = ch1;
2291   new_chain->ch2 = ch2;
2292   new_chain->rslt_type = rslt_type;
2293   new_chain->length = ch1->length;
2294 
2295   for (i = 0; (ch1->refs.iterate (i, &r1)
2296 	       && ch2->refs.iterate (i, &r2)); i++)
2297     {
2298       nw = XCNEW (struct dref_d);
2299       nw->stmt = stmt_combining_refs (r1, r2);
2300       nw->distance = r1->distance;
2301 
2302       new_chain->refs.safe_push (nw);
2303     }
2304 
2305   ch1->combined = true;
2306   ch2->combined = true;
2307   return new_chain;
2308 }
2309 
2310 /* Recursively update position information of all offspring chains to ROOT
2311    chain's position information.  */
2312 
2313 static void
update_pos_for_combined_chains(chain_p root)2314 update_pos_for_combined_chains (chain_p root)
2315 {
2316   chain_p ch1 = root->ch1, ch2 = root->ch2;
2317   dref ref, ref1, ref2;
2318   for (unsigned j = 0; (root->refs.iterate (j, &ref)
2319 			&& ch1->refs.iterate (j, &ref1)
2320 			&& ch2->refs.iterate (j, &ref2)); ++j)
2321     ref1->pos = ref2->pos = ref->pos;
2322 
2323   if (ch1->type == CT_COMBINATION)
2324     update_pos_for_combined_chains (ch1);
2325   if (ch2->type == CT_COMBINATION)
2326     update_pos_for_combined_chains (ch2);
2327 }
2328 
2329 /* Returns true if statement S1 dominates statement S2.  */
2330 
2331 static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)2332 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2333 {
2334   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2335 
2336   if (!bb1 || s1 == s2)
2337     return true;
2338 
2339   if (bb1 == bb2)
2340     return gimple_uid (s1) < gimple_uid (s2);
2341 
2342   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2343 }
2344 
2345 /* Try to combine the CHAINS in LOOP.  */
2346 
2347 static void
try_combine_chains(struct loop * loop,vec<chain_p> * chains)2348 try_combine_chains (struct loop *loop, vec<chain_p> *chains)
2349 {
2350   unsigned i, j;
2351   chain_p ch1, ch2, cch;
2352   auto_vec<chain_p> worklist;
2353   bool combined_p = false;
2354 
2355   FOR_EACH_VEC_ELT (*chains, i, ch1)
2356     if (chain_can_be_combined_p (ch1))
2357       worklist.safe_push (ch1);
2358 
2359   while (!worklist.is_empty ())
2360     {
2361       ch1 = worklist.pop ();
2362       if (!chain_can_be_combined_p (ch1))
2363 	continue;
2364 
2365       FOR_EACH_VEC_ELT (*chains, j, ch2)
2366 	{
2367 	  if (!chain_can_be_combined_p (ch2))
2368 	    continue;
2369 
2370 	  cch = combine_chains (ch1, ch2);
2371 	  if (cch)
2372 	    {
2373 	      worklist.safe_push (cch);
2374 	      chains->safe_push (cch);
2375 	      combined_p = true;
2376 	      break;
2377 	    }
2378 	}
2379     }
2380   if (!combined_p)
2381     return;
2382 
2383   /* Setup UID for all statements in dominance order.  */
2384   basic_block *bbs = get_loop_body (loop);
2385   renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2386   free (bbs);
2387 
2388   /* Re-association in combined chains may generate statements different to
2389      order of references of the original chain.  We need to keep references
2390      of combined chain in dominance order so that all uses will be inserted
2391      after definitions.  Note:
2392        A) This is necessary for all combined chains.
2393        B) This is only necessary for ZERO distance references because other
2394 	  references inherit value from loop carried PHIs.
2395 
2396      We first update position information for all combined chains.  */
2397   dref ref;
2398   for (i = 0; chains->iterate (i, &ch1); ++i)
2399     {
2400       if (ch1->type != CT_COMBINATION || ch1->combined)
2401 	continue;
2402 
2403       for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2404 	ref->pos = gimple_uid (ref->stmt);
2405 
2406       update_pos_for_combined_chains (ch1);
2407     }
2408   /* Then sort references according to newly updated position information.  */
2409   for (i = 0; chains->iterate (i, &ch1); ++i)
2410     {
2411       if (ch1->type != CT_COMBINATION && !ch1->combined)
2412 	continue;
2413 
2414       /* Find the first reference with non-ZERO distance.  */
2415       if (ch1->length == 0)
2416 	j = ch1->refs.length();
2417       else
2418 	{
2419 	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2420 	    if (ref->distance != 0)
2421 	      break;
2422 	}
2423 
2424       /* Sort all ZERO distance references by position.  */
2425       qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2426 
2427       if (ch1->combined)
2428 	continue;
2429 
2430       /* For ZERO length chain, has_max_use_after must be true since root
2431 	 combined stmt must dominates others.  */
2432       if (ch1->length == 0)
2433 	{
2434 	  ch1->has_max_use_after = true;
2435 	  continue;
2436 	}
2437       /* Check if there is use at max distance after root for combined chains
2438 	 and set flag accordingly.  */
2439       ch1->has_max_use_after = false;
2440       gimple *root_stmt = get_chain_root (ch1)->stmt;
2441       for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2442 	{
2443 	  if (ref->distance == ch1->length
2444 	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2445 	    {
2446 	      ch1->has_max_use_after = true;
2447 	      break;
2448 	    }
2449 	}
2450     }
2451 }
2452 
2453 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2454    impossible because one of these initializers may trap, true otherwise.  */
2455 
2456 static bool
prepare_initializers_chain(struct loop * loop,chain_p chain)2457 prepare_initializers_chain (struct loop *loop, chain_p chain)
2458 {
2459   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2460   struct data_reference *dr = get_chain_root (chain)->ref;
2461   tree init;
2462   dref laref;
2463   edge entry = loop_preheader_edge (loop);
2464 
2465   /* Find the initializers for the variables, and check that they cannot
2466      trap.  */
2467   chain->inits.create (n);
2468   for (i = 0; i < n; i++)
2469     chain->inits.quick_push (NULL_TREE);
2470 
2471   /* If we have replaced some looparound phi nodes, use their initializers
2472      instead of creating our own.  */
2473   FOR_EACH_VEC_ELT (chain->refs, i, laref)
2474     {
2475       if (gimple_code (laref->stmt) != GIMPLE_PHI)
2476 	continue;
2477 
2478       gcc_assert (laref->distance > 0);
2479       chain->inits[n - laref->distance]
2480 	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2481     }
2482 
2483   for (i = 0; i < n; i++)
2484     {
2485       gimple_seq stmts = NULL;
2486 
2487       if (chain->inits[i] != NULL_TREE)
2488 	continue;
2489 
2490       init = ref_at_iteration (dr, (int) i - n, &stmts);
2491       if (!chain->all_always_accessed && tree_could_trap_p (init))
2492 	{
2493 	  gimple_seq_discard (stmts);
2494 	  return false;
2495 	}
2496 
2497       if (stmts)
2498 	gsi_insert_seq_on_edge_immediate (entry, stmts);
2499 
2500       chain->inits[i] = init;
2501     }
2502 
2503   return true;
2504 }
2505 
2506 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2507    be used because the initializers might trap.  */
2508 
2509 static void
prepare_initializers(struct loop * loop,vec<chain_p> chains)2510 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2511 {
2512   chain_p chain;
2513   unsigned i;
2514 
2515   for (i = 0; i < chains.length (); )
2516     {
2517       chain = chains[i];
2518       if (prepare_initializers_chain (loop, chain))
2519 	i++;
2520       else
2521 	{
2522 	  release_chain (chain);
2523 	  chains.unordered_remove (i);
2524 	}
2525     }
2526 }
2527 
2528 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2529    unrolled.  */
2530 
2531 static bool
tree_predictive_commoning_loop(struct loop * loop)2532 tree_predictive_commoning_loop (struct loop *loop)
2533 {
2534   vec<data_reference_p> datarefs;
2535   vec<ddr_p> dependences;
2536   struct component *components;
2537   vec<chain_p> chains = vNULL;
2538   unsigned unroll_factor;
2539   struct tree_niter_desc desc;
2540   bool unroll = false;
2541   edge exit;
2542   bitmap tmp_vars;
2543 
2544   if (dump_file && (dump_flags & TDF_DETAILS))
2545     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2546 
2547   /* Find the data references and split them into components according to their
2548      dependence relations.  */
2549   auto_vec<loop_p, 3> loop_nest;
2550   dependences.create (10);
2551   datarefs.create (10);
2552   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2553 					   &dependences))
2554     {
2555       if (dump_file && (dump_flags & TDF_DETAILS))
2556 	fprintf (dump_file, "Cannot analyze data dependencies\n");
2557       free_data_refs (datarefs);
2558       free_dependence_relations (dependences);
2559       return false;
2560     }
2561 
2562   if (dump_file && (dump_flags & TDF_DETAILS))
2563     dump_data_dependence_relations (dump_file, dependences);
2564 
2565   components = split_data_refs_to_components (loop, datarefs, dependences);
2566   loop_nest.release ();
2567   free_dependence_relations (dependences);
2568   if (!components)
2569     {
2570       free_data_refs (datarefs);
2571       free_affine_expand_cache (&name_expansions);
2572       return false;
2573     }
2574 
2575   if (dump_file && (dump_flags & TDF_DETAILS))
2576     {
2577       fprintf (dump_file, "Initial state:\n\n");
2578       dump_components (dump_file, components);
2579     }
2580 
2581   /* Find the suitable components and split them into chains.  */
2582   components = filter_suitable_components (loop, components);
2583 
2584   tmp_vars = BITMAP_ALLOC (NULL);
2585   looparound_phis = BITMAP_ALLOC (NULL);
2586   determine_roots (loop, components, &chains);
2587   release_components (components);
2588 
2589   if (!chains.exists ())
2590     {
2591       if (dump_file && (dump_flags & TDF_DETAILS))
2592 	fprintf (dump_file,
2593 		 "Predictive commoning failed: no suitable chains\n");
2594       goto end;
2595     }
2596   prepare_initializers (loop, chains);
2597 
2598   /* Try to combine the chains that are always worked with together.  */
2599   try_combine_chains (loop, &chains);
2600 
2601   if (dump_file && (dump_flags & TDF_DETAILS))
2602     {
2603       fprintf (dump_file, "Before commoning:\n\n");
2604       dump_chains (dump_file, chains);
2605     }
2606 
2607   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2608      that its number of iterations is divisible by the factor.  */
2609   unroll_factor = determine_unroll_factor (chains);
2610   scev_reset ();
2611   unroll = (unroll_factor > 1
2612 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
2613   exit = single_dom_exit (loop);
2614 
2615   /* Execute the predictive commoning transformations, and possibly unroll the
2616      loop.  */
2617   if (unroll)
2618     {
2619       struct epcc_data dta;
2620 
2621       if (dump_file && (dump_flags & TDF_DETAILS))
2622 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2623 
2624       dta.chains = chains;
2625       dta.tmp_vars = tmp_vars;
2626 
2627       update_ssa (TODO_update_ssa_only_virtuals);
2628 
2629       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2630 	 execute_pred_commoning_cbck is called may cause phi nodes to be
2631 	 reallocated, which is a problem since CHAINS may point to these
2632 	 statements.  To fix this, we store the ssa names defined by the
2633 	 phi nodes here instead of the phi nodes themselves, and restore
2634 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2635       replace_phis_by_defined_names (chains);
2636 
2637       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2638 				      execute_pred_commoning_cbck, &dta);
2639       eliminate_temp_copies (loop, tmp_vars);
2640     }
2641   else
2642     {
2643       if (dump_file && (dump_flags & TDF_DETAILS))
2644 	fprintf (dump_file,
2645 		 "Executing predictive commoning without unrolling.\n");
2646       execute_pred_commoning (loop, chains, tmp_vars);
2647     }
2648 
2649 end: ;
2650   release_chains (chains);
2651   free_data_refs (datarefs);
2652   BITMAP_FREE (tmp_vars);
2653   BITMAP_FREE (looparound_phis);
2654 
2655   free_affine_expand_cache (&name_expansions);
2656 
2657   return unroll;
2658 }
2659 
2660 /* Runs predictive commoning.  */
2661 
2662 unsigned
tree_predictive_commoning(void)2663 tree_predictive_commoning (void)
2664 {
2665   bool unrolled = false;
2666   struct loop *loop;
2667   unsigned ret = 0;
2668 
2669   initialize_original_copy_tables ();
2670   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2671     if (optimize_loop_for_speed_p (loop))
2672       {
2673 	unrolled |= tree_predictive_commoning_loop (loop);
2674       }
2675 
2676   if (unrolled)
2677     {
2678       scev_reset ();
2679       ret = TODO_cleanup_cfg;
2680     }
2681   free_original_copy_tables ();
2682 
2683   return ret;
2684 }
2685 
2686 /* Predictive commoning Pass.  */
2687 
2688 static unsigned
run_tree_predictive_commoning(struct function * fun)2689 run_tree_predictive_commoning (struct function *fun)
2690 {
2691   if (number_of_loops (fun) <= 1)
2692     return 0;
2693 
2694   return tree_predictive_commoning ();
2695 }
2696 
2697 namespace {
2698 
2699 const pass_data pass_data_predcom =
2700 {
2701   GIMPLE_PASS, /* type */
2702   "pcom", /* name */
2703   OPTGROUP_LOOP, /* optinfo_flags */
2704   TV_PREDCOM, /* tv_id */
2705   PROP_cfg, /* properties_required */
2706   0, /* properties_provided */
2707   0, /* properties_destroyed */
2708   0, /* todo_flags_start */
2709   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2710 };
2711 
2712 class pass_predcom : public gimple_opt_pass
2713 {
2714 public:
pass_predcom(gcc::context * ctxt)2715   pass_predcom (gcc::context *ctxt)
2716     : gimple_opt_pass (pass_data_predcom, ctxt)
2717   {}
2718 
2719   /* opt_pass methods: */
gate(function *)2720   virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)2721   virtual unsigned int execute (function *fun)
2722     {
2723       return run_tree_predictive_commoning (fun);
2724     }
2725 
2726 }; // class pass_predcom
2727 
2728 } // anon namespace
2729 
2730 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)2731 make_pass_predcom (gcc::context *ctxt)
2732 {
2733   return new pass_predcom (ctxt);
2734 }
2735 
2736 
2737