1 /* Predictive commoning.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "gimple-pretty-print.h"
198 #include "alias.h"
199 #include "fold-const.h"
200 #include "cfgloop.h"
201 #include "tree-eh.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
214 #include "params.h"
215 #include "tree-affine.h"
216 #include "builtins.h"
217
218 /* The maximum number of iterations between the considered memory
219 references. */
220
221 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
222
223 /* Data references (or phi nodes that carry data reference values across
224 loop iterations). */
225
226 typedef struct dref_d
227 {
228 /* The reference itself. */
229 struct data_reference *ref;
230
231 /* The statement in that the reference appears. */
232 gimple *stmt;
233
234 /* In case that STMT is a phi node, this field is set to the SSA name
235 defined by it in replace_phis_by_defined_names (in order to avoid
236 pointing to phi node that got reallocated in the meantime). */
237 tree name_defined_by_phi;
238
239 /* Distance of the reference from the root of the chain (in number of
240 iterations of the loop). */
241 unsigned distance;
242
243 /* Number of iterations offset from the first reference in the component. */
244 widest_int offset;
245
246 /* Number of the reference in a component, in dominance ordering. */
247 unsigned pos;
248
249 /* True if the memory reference is always accessed when the loop is
250 entered. */
251 unsigned always_accessed : 1;
252 } *dref;
253
254
255 /* Type of the chain of the references. */
256
257 enum chain_type
258 {
259 /* The addresses of the references in the chain are constant. */
260 CT_INVARIANT,
261
262 /* There are only loads in the chain. */
263 CT_LOAD,
264
265 /* Root of the chain is store, the rest are loads. */
266 CT_STORE_LOAD,
267
268 /* A combination of two chains. */
269 CT_COMBINATION
270 };
271
272 /* Chains of data references. */
273
274 typedef struct chain
275 {
276 /* Type of the chain. */
277 enum chain_type type;
278
279 /* For combination chains, the operator and the two chains that are
280 combined, and the type of the result. */
281 enum tree_code op;
282 tree rslt_type;
283 struct chain *ch1, *ch2;
284
285 /* The references in the chain. */
286 vec<dref> refs;
287
288 /* The maximum distance of the reference in the chain from the root. */
289 unsigned length;
290
291 /* The variables used to copy the value throughout iterations. */
292 vec<tree> vars;
293
294 /* Initializers for the variables. */
295 vec<tree> inits;
296
297 /* True if there is a use of a variable with the maximal distance
298 that comes after the root in the loop. */
299 unsigned has_max_use_after : 1;
300
301 /* True if all the memory references in the chain are always accessed. */
302 unsigned all_always_accessed : 1;
303
304 /* True if this chain was combined together with some other chain. */
305 unsigned combined : 1;
306 } *chain_p;
307
308
309 /* Describes the knowledge about the step of the memory references in
310 the component. */
311
312 enum ref_step_type
313 {
314 /* The step is zero. */
315 RS_INVARIANT,
316
317 /* The step is nonzero. */
318 RS_NONZERO,
319
320 /* The step may or may not be nonzero. */
321 RS_ANY
322 };
323
324 /* Components of the data dependence graph. */
325
326 struct component
327 {
328 /* The references in the component. */
329 vec<dref> refs;
330
331 /* What we know about the step of the references in the component. */
332 enum ref_step_type comp_step;
333
334 /* Next component in the list. */
335 struct component *next;
336 };
337
338 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
339
340 static bitmap looparound_phis;
341
342 /* Cache used by tree_to_aff_combination_expand. */
343
344 static hash_map<tree, name_expansion *> *name_expansions;
345
346 /* Dumps data reference REF to FILE. */
347
348 extern void dump_dref (FILE *, dref);
349 void
dump_dref(FILE * file,dref ref)350 dump_dref (FILE *file, dref ref)
351 {
352 if (ref->ref)
353 {
354 fprintf (file, " ");
355 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
356 fprintf (file, " (id %u%s)\n", ref->pos,
357 DR_IS_READ (ref->ref) ? "" : ", write");
358
359 fprintf (file, " offset ");
360 print_decs (ref->offset, file);
361 fprintf (file, "\n");
362
363 fprintf (file, " distance %u\n", ref->distance);
364 }
365 else
366 {
367 if (gimple_code (ref->stmt) == GIMPLE_PHI)
368 fprintf (file, " looparound ref\n");
369 else
370 fprintf (file, " combination ref\n");
371 fprintf (file, " in statement ");
372 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
373 fprintf (file, "\n");
374 fprintf (file, " distance %u\n", ref->distance);
375 }
376
377 }
378
379 /* Dumps CHAIN to FILE. */
380
381 extern void dump_chain (FILE *, chain_p);
382 void
dump_chain(FILE * file,chain_p chain)383 dump_chain (FILE *file, chain_p chain)
384 {
385 dref a;
386 const char *chain_type;
387 unsigned i;
388 tree var;
389
390 switch (chain->type)
391 {
392 case CT_INVARIANT:
393 chain_type = "Load motion";
394 break;
395
396 case CT_LOAD:
397 chain_type = "Loads-only";
398 break;
399
400 case CT_STORE_LOAD:
401 chain_type = "Store-loads";
402 break;
403
404 case CT_COMBINATION:
405 chain_type = "Combination";
406 break;
407
408 default:
409 gcc_unreachable ();
410 }
411
412 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
413 chain->combined ? " (combined)" : "");
414 if (chain->type != CT_INVARIANT)
415 fprintf (file, " max distance %u%s\n", chain->length,
416 chain->has_max_use_after ? "" : ", may reuse first");
417
418 if (chain->type == CT_COMBINATION)
419 {
420 fprintf (file, " equal to %p %s %p in type ",
421 (void *) chain->ch1, op_symbol_code (chain->op),
422 (void *) chain->ch2);
423 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
424 fprintf (file, "\n");
425 }
426
427 if (chain->vars.exists ())
428 {
429 fprintf (file, " vars");
430 FOR_EACH_VEC_ELT (chain->vars, i, var)
431 {
432 fprintf (file, " ");
433 print_generic_expr (file, var, TDF_SLIM);
434 }
435 fprintf (file, "\n");
436 }
437
438 if (chain->inits.exists ())
439 {
440 fprintf (file, " inits");
441 FOR_EACH_VEC_ELT (chain->inits, i, var)
442 {
443 fprintf (file, " ");
444 print_generic_expr (file, var, TDF_SLIM);
445 }
446 fprintf (file, "\n");
447 }
448
449 fprintf (file, " references:\n");
450 FOR_EACH_VEC_ELT (chain->refs, i, a)
451 dump_dref (file, a);
452
453 fprintf (file, "\n");
454 }
455
456 /* Dumps CHAINS to FILE. */
457
458 extern void dump_chains (FILE *, vec<chain_p> );
459 void
dump_chains(FILE * file,vec<chain_p> chains)460 dump_chains (FILE *file, vec<chain_p> chains)
461 {
462 chain_p chain;
463 unsigned i;
464
465 FOR_EACH_VEC_ELT (chains, i, chain)
466 dump_chain (file, chain);
467 }
468
469 /* Dumps COMP to FILE. */
470
471 extern void dump_component (FILE *, struct component *);
472 void
dump_component(FILE * file,struct component * comp)473 dump_component (FILE *file, struct component *comp)
474 {
475 dref a;
476 unsigned i;
477
478 fprintf (file, "Component%s:\n",
479 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
480 FOR_EACH_VEC_ELT (comp->refs, i, a)
481 dump_dref (file, a);
482 fprintf (file, "\n");
483 }
484
485 /* Dumps COMPS to FILE. */
486
487 extern void dump_components (FILE *, struct component *);
488 void
dump_components(FILE * file,struct component * comps)489 dump_components (FILE *file, struct component *comps)
490 {
491 struct component *comp;
492
493 for (comp = comps; comp; comp = comp->next)
494 dump_component (file, comp);
495 }
496
497 /* Frees a chain CHAIN. */
498
499 static void
release_chain(chain_p chain)500 release_chain (chain_p chain)
501 {
502 dref ref;
503 unsigned i;
504
505 if (chain == NULL)
506 return;
507
508 FOR_EACH_VEC_ELT (chain->refs, i, ref)
509 free (ref);
510
511 chain->refs.release ();
512 chain->vars.release ();
513 chain->inits.release ();
514
515 free (chain);
516 }
517
518 /* Frees CHAINS. */
519
520 static void
release_chains(vec<chain_p> chains)521 release_chains (vec<chain_p> chains)
522 {
523 unsigned i;
524 chain_p chain;
525
526 FOR_EACH_VEC_ELT (chains, i, chain)
527 release_chain (chain);
528 chains.release ();
529 }
530
531 /* Frees a component COMP. */
532
533 static void
release_component(struct component * comp)534 release_component (struct component *comp)
535 {
536 comp->refs.release ();
537 free (comp);
538 }
539
540 /* Frees list of components COMPS. */
541
542 static void
release_components(struct component * comps)543 release_components (struct component *comps)
544 {
545 struct component *act, *next;
546
547 for (act = comps; act; act = next)
548 {
549 next = act->next;
550 release_component (act);
551 }
552 }
553
554 /* Finds a root of tree given by FATHERS containing A, and performs path
555 shortening. */
556
557 static unsigned
component_of(unsigned fathers[],unsigned a)558 component_of (unsigned fathers[], unsigned a)
559 {
560 unsigned root, n;
561
562 for (root = a; root != fathers[root]; root = fathers[root])
563 continue;
564
565 for (; a != root; a = n)
566 {
567 n = fathers[a];
568 fathers[a] = root;
569 }
570
571 return root;
572 }
573
574 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
575 components, A and B are components to merge. */
576
577 static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)578 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
579 {
580 unsigned ca = component_of (fathers, a);
581 unsigned cb = component_of (fathers, b);
582
583 if (ca == cb)
584 return;
585
586 if (sizes[ca] < sizes[cb])
587 {
588 sizes[cb] += sizes[ca];
589 fathers[ca] = cb;
590 }
591 else
592 {
593 sizes[ca] += sizes[cb];
594 fathers[cb] = ca;
595 }
596 }
597
598 /* Returns true if A is a reference that is suitable for predictive commoning
599 in the innermost loop that contains it. REF_STEP is set according to the
600 step of the reference A. */
601
602 static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)603 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
604 {
605 tree ref = DR_REF (a), step = DR_STEP (a);
606
607 if (!step
608 || TREE_THIS_VOLATILE (ref)
609 || !is_gimple_reg_type (TREE_TYPE (ref))
610 || tree_could_throw_p (ref))
611 return false;
612
613 if (integer_zerop (step))
614 *ref_step = RS_INVARIANT;
615 else if (integer_nonzerop (step))
616 *ref_step = RS_NONZERO;
617 else
618 *ref_step = RS_ANY;
619
620 return true;
621 }
622
623 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
624
625 static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)626 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
627 {
628 tree type = TREE_TYPE (DR_OFFSET (dr));
629 aff_tree delta;
630
631 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
632 &name_expansions);
633 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
634 aff_combination_add (offset, &delta);
635 }
636
637 /* Determines number of iterations of the innermost enclosing loop before B
638 refers to exactly the same location as A and stores it to OFF. If A and
639 B do not have the same step, they never meet, or anything else fails,
640 returns false, otherwise returns true. Both A and B are assumed to
641 satisfy suitable_reference_p. */
642
643 static bool
determine_offset(struct data_reference * a,struct data_reference * b,widest_int * off)644 determine_offset (struct data_reference *a, struct data_reference *b,
645 widest_int *off)
646 {
647 aff_tree diff, baseb, step;
648 tree typea, typeb;
649
650 /* Check that both the references access the location in the same type. */
651 typea = TREE_TYPE (DR_REF (a));
652 typeb = TREE_TYPE (DR_REF (b));
653 if (!useless_type_conversion_p (typeb, typea))
654 return false;
655
656 /* Check whether the base address and the step of both references is the
657 same. */
658 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
659 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
660 return false;
661
662 if (integer_zerop (DR_STEP (a)))
663 {
664 /* If the references have loop invariant address, check that they access
665 exactly the same location. */
666 *off = 0;
667 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
668 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
669 }
670
671 /* Compare the offsets of the addresses, and check whether the difference
672 is a multiple of step. */
673 aff_combination_dr_offset (a, &diff);
674 aff_combination_dr_offset (b, &baseb);
675 aff_combination_scale (&baseb, -1);
676 aff_combination_add (&diff, &baseb);
677
678 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
679 &step, &name_expansions);
680 return aff_combination_constant_multiple_p (&diff, &step, off);
681 }
682
683 /* Returns the last basic block in LOOP for that we are sure that
684 it is executed whenever the loop is entered. */
685
686 static basic_block
last_always_executed_block(struct loop * loop)687 last_always_executed_block (struct loop *loop)
688 {
689 unsigned i;
690 vec<edge> exits = get_loop_exit_edges (loop);
691 edge ex;
692 basic_block last = loop->latch;
693
694 FOR_EACH_VEC_ELT (exits, i, ex)
695 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
696 exits.release ();
697
698 return last;
699 }
700
701 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
702
703 static struct component *
split_data_refs_to_components(struct loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)704 split_data_refs_to_components (struct loop *loop,
705 vec<data_reference_p> datarefs,
706 vec<ddr_p> depends)
707 {
708 unsigned i, n = datarefs.length ();
709 unsigned ca, ia, ib, bad;
710 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
711 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
712 struct component **comps;
713 struct data_reference *dr, *dra, *drb;
714 struct data_dependence_relation *ddr;
715 struct component *comp_list = NULL, *comp;
716 dref dataref;
717 basic_block last_always_executed = last_always_executed_block (loop);
718
719 FOR_EACH_VEC_ELT (datarefs, i, dr)
720 {
721 if (!DR_REF (dr))
722 {
723 /* A fake reference for call or asm_expr that may clobber memory;
724 just fail. */
725 goto end;
726 }
727 /* predcom pass isn't prepared to handle calls with data references. */
728 if (is_gimple_call (DR_STMT (dr)))
729 goto end;
730 dr->aux = (void *) (size_t) i;
731 comp_father[i] = i;
732 comp_size[i] = 1;
733 }
734
735 /* A component reserved for the "bad" data references. */
736 comp_father[n] = n;
737 comp_size[n] = 1;
738
739 FOR_EACH_VEC_ELT (datarefs, i, dr)
740 {
741 enum ref_step_type dummy;
742
743 if (!suitable_reference_p (dr, &dummy))
744 {
745 ia = (unsigned) (size_t) dr->aux;
746 merge_comps (comp_father, comp_size, n, ia);
747 }
748 }
749
750 FOR_EACH_VEC_ELT (depends, i, ddr)
751 {
752 widest_int dummy_off;
753
754 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
755 continue;
756
757 dra = DDR_A (ddr);
758 drb = DDR_B (ddr);
759 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
760 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
761 if (ia == ib)
762 continue;
763
764 bad = component_of (comp_father, n);
765
766 /* If both A and B are reads, we may ignore unsuitable dependences. */
767 if (DR_IS_READ (dra) && DR_IS_READ (drb))
768 {
769 if (ia == bad || ib == bad
770 || !determine_offset (dra, drb, &dummy_off))
771 continue;
772 }
773 /* If A is read and B write or vice versa and there is unsuitable
774 dependence, instead of merging both components into a component
775 that will certainly not pass suitable_component_p, just put the
776 read into bad component, perhaps at least the write together with
777 all the other data refs in it's component will be optimizable. */
778 else if (DR_IS_READ (dra) && ib != bad)
779 {
780 if (ia == bad)
781 continue;
782 else if (!determine_offset (dra, drb, &dummy_off))
783 {
784 merge_comps (comp_father, comp_size, bad, ia);
785 continue;
786 }
787 }
788 else if (DR_IS_READ (drb) && ia != bad)
789 {
790 if (ib == bad)
791 continue;
792 else if (!determine_offset (dra, drb, &dummy_off))
793 {
794 merge_comps (comp_father, comp_size, bad, ib);
795 continue;
796 }
797 }
798
799 merge_comps (comp_father, comp_size, ia, ib);
800 }
801
802 comps = XCNEWVEC (struct component *, n);
803 bad = component_of (comp_father, n);
804 FOR_EACH_VEC_ELT (datarefs, i, dr)
805 {
806 ia = (unsigned) (size_t) dr->aux;
807 ca = component_of (comp_father, ia);
808 if (ca == bad)
809 continue;
810
811 comp = comps[ca];
812 if (!comp)
813 {
814 comp = XCNEW (struct component);
815 comp->refs.create (comp_size[ca]);
816 comps[ca] = comp;
817 }
818
819 dataref = XCNEW (struct dref_d);
820 dataref->ref = dr;
821 dataref->stmt = DR_STMT (dr);
822 dataref->offset = 0;
823 dataref->distance = 0;
824
825 dataref->always_accessed
826 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
827 gimple_bb (dataref->stmt));
828 dataref->pos = comp->refs.length ();
829 comp->refs.quick_push (dataref);
830 }
831
832 for (i = 0; i < n; i++)
833 {
834 comp = comps[i];
835 if (comp)
836 {
837 comp->next = comp_list;
838 comp_list = comp;
839 }
840 }
841 free (comps);
842
843 end:
844 free (comp_father);
845 free (comp_size);
846 return comp_list;
847 }
848
849 /* Returns true if the component COMP satisfies the conditions
850 described in 2) at the beginning of this file. LOOP is the current
851 loop. */
852
853 static bool
suitable_component_p(struct loop * loop,struct component * comp)854 suitable_component_p (struct loop *loop, struct component *comp)
855 {
856 unsigned i;
857 dref a, first;
858 basic_block ba, bp = loop->header;
859 bool ok, has_write = false;
860
861 FOR_EACH_VEC_ELT (comp->refs, i, a)
862 {
863 ba = gimple_bb (a->stmt);
864
865 if (!just_once_each_iteration_p (loop, ba))
866 return false;
867
868 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
869 bp = ba;
870
871 if (DR_IS_WRITE (a->ref))
872 has_write = true;
873 }
874
875 first = comp->refs[0];
876 ok = suitable_reference_p (first->ref, &comp->comp_step);
877 gcc_assert (ok);
878 first->offset = 0;
879
880 for (i = 1; comp->refs.iterate (i, &a); i++)
881 {
882 if (!determine_offset (first->ref, a->ref, &a->offset))
883 return false;
884
885 enum ref_step_type a_step;
886 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
887 && a_step == comp->comp_step);
888 }
889
890 /* If there is a write inside the component, we must know whether the
891 step is nonzero or not -- we would not otherwise be able to recognize
892 whether the value accessed by reads comes from the OFFSET-th iteration
893 or the previous one. */
894 if (has_write && comp->comp_step == RS_ANY)
895 return false;
896
897 return true;
898 }
899
900 /* Check the conditions on references inside each of components COMPS,
901 and remove the unsuitable components from the list. The new list
902 of components is returned. The conditions are described in 2) at
903 the beginning of this file. LOOP is the current loop. */
904
905 static struct component *
filter_suitable_components(struct loop * loop,struct component * comps)906 filter_suitable_components (struct loop *loop, struct component *comps)
907 {
908 struct component **comp, *act;
909
910 for (comp = &comps; *comp; )
911 {
912 act = *comp;
913 if (suitable_component_p (loop, act))
914 comp = &act->next;
915 else
916 {
917 dref ref;
918 unsigned i;
919
920 *comp = act->next;
921 FOR_EACH_VEC_ELT (act->refs, i, ref)
922 free (ref);
923 release_component (act);
924 }
925 }
926
927 return comps;
928 }
929
930 /* Compares two drefs A and B by their offset and position. Callback for
931 qsort. */
932
933 static int
order_drefs(const void * a,const void * b)934 order_drefs (const void *a, const void *b)
935 {
936 const dref *const da = (const dref *) a;
937 const dref *const db = (const dref *) b;
938 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
939
940 if (offcmp != 0)
941 return offcmp;
942
943 return (*da)->pos - (*db)->pos;
944 }
945
946 /* Compares two drefs A and B by their position. Callback for qsort. */
947
948 static int
order_drefs_by_pos(const void * a,const void * b)949 order_drefs_by_pos (const void *a, const void *b)
950 {
951 const dref *const da = (const dref *) a;
952 const dref *const db = (const dref *) b;
953
954 return (*da)->pos - (*db)->pos;
955 }
956
957 /* Returns root of the CHAIN. */
958
959 static inline dref
get_chain_root(chain_p chain)960 get_chain_root (chain_p chain)
961 {
962 return chain->refs[0];
963 }
964
965 /* Adds REF to the chain CHAIN. */
966
967 static void
add_ref_to_chain(chain_p chain,dref ref)968 add_ref_to_chain (chain_p chain, dref ref)
969 {
970 dref root = get_chain_root (chain);
971
972 gcc_assert (wi::les_p (root->offset, ref->offset));
973 widest_int dist = ref->offset - root->offset;
974 if (wi::leu_p (MAX_DISTANCE, dist))
975 {
976 free (ref);
977 return;
978 }
979 gcc_assert (wi::fits_uhwi_p (dist));
980
981 chain->refs.safe_push (ref);
982
983 ref->distance = dist.to_uhwi ();
984
985 if (ref->distance >= chain->length)
986 {
987 chain->length = ref->distance;
988 chain->has_max_use_after = false;
989 }
990
991 if (ref->distance == chain->length
992 && ref->pos > root->pos)
993 chain->has_max_use_after = true;
994
995 chain->all_always_accessed &= ref->always_accessed;
996 }
997
998 /* Returns the chain for invariant component COMP. */
999
1000 static chain_p
make_invariant_chain(struct component * comp)1001 make_invariant_chain (struct component *comp)
1002 {
1003 chain_p chain = XCNEW (struct chain);
1004 unsigned i;
1005 dref ref;
1006
1007 chain->type = CT_INVARIANT;
1008
1009 chain->all_always_accessed = true;
1010
1011 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1012 {
1013 chain->refs.safe_push (ref);
1014 chain->all_always_accessed &= ref->always_accessed;
1015 }
1016
1017 return chain;
1018 }
1019
1020 /* Make a new chain rooted at REF. */
1021
1022 static chain_p
make_rooted_chain(dref ref)1023 make_rooted_chain (dref ref)
1024 {
1025 chain_p chain = XCNEW (struct chain);
1026
1027 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1028
1029 chain->refs.safe_push (ref);
1030 chain->all_always_accessed = ref->always_accessed;
1031
1032 ref->distance = 0;
1033
1034 return chain;
1035 }
1036
1037 /* Returns true if CHAIN is not trivial. */
1038
1039 static bool
nontrivial_chain_p(chain_p chain)1040 nontrivial_chain_p (chain_p chain)
1041 {
1042 return chain != NULL && chain->refs.length () > 1;
1043 }
1044
1045 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1046 is no such name. */
1047
1048 static tree
name_for_ref(dref ref)1049 name_for_ref (dref ref)
1050 {
1051 tree name;
1052
1053 if (is_gimple_assign (ref->stmt))
1054 {
1055 if (!ref->ref || DR_IS_READ (ref->ref))
1056 name = gimple_assign_lhs (ref->stmt);
1057 else
1058 name = gimple_assign_rhs1 (ref->stmt);
1059 }
1060 else
1061 name = PHI_RESULT (ref->stmt);
1062
1063 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1064 }
1065
1066 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1067 iterations of the innermost enclosing loop). */
1068
1069 static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)1070 valid_initializer_p (struct data_reference *ref,
1071 unsigned distance, struct data_reference *root)
1072 {
1073 aff_tree diff, base, step;
1074 widest_int off;
1075
1076 /* Both REF and ROOT must be accessing the same object. */
1077 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1078 return false;
1079
1080 /* The initializer is defined outside of loop, hence its address must be
1081 invariant inside the loop. */
1082 gcc_assert (integer_zerop (DR_STEP (ref)));
1083
1084 /* If the address of the reference is invariant, initializer must access
1085 exactly the same location. */
1086 if (integer_zerop (DR_STEP (root)))
1087 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1088 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1089
1090 /* Verify that this index of REF is equal to the root's index at
1091 -DISTANCE-th iteration. */
1092 aff_combination_dr_offset (root, &diff);
1093 aff_combination_dr_offset (ref, &base);
1094 aff_combination_scale (&base, -1);
1095 aff_combination_add (&diff, &base);
1096
1097 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1098 &step, &name_expansions);
1099 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1100 return false;
1101
1102 if (off != distance)
1103 return false;
1104
1105 return true;
1106 }
1107
1108 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1109 initial value is correct (equal to initial value of REF shifted by one
1110 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1111 is the root of the current chain. */
1112
1113 static gphi *
find_looparound_phi(struct loop * loop,dref ref,dref root)1114 find_looparound_phi (struct loop *loop, dref ref, dref root)
1115 {
1116 tree name, init, init_ref;
1117 gphi *phi = NULL;
1118 gimple *init_stmt;
1119 edge latch = loop_latch_edge (loop);
1120 struct data_reference init_dr;
1121 gphi_iterator psi;
1122
1123 if (is_gimple_assign (ref->stmt))
1124 {
1125 if (DR_IS_READ (ref->ref))
1126 name = gimple_assign_lhs (ref->stmt);
1127 else
1128 name = gimple_assign_rhs1 (ref->stmt);
1129 }
1130 else
1131 name = PHI_RESULT (ref->stmt);
1132 if (!name)
1133 return NULL;
1134
1135 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1136 {
1137 phi = psi.phi ();
1138 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1139 break;
1140 }
1141
1142 if (gsi_end_p (psi))
1143 return NULL;
1144
1145 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1146 if (TREE_CODE (init) != SSA_NAME)
1147 return NULL;
1148 init_stmt = SSA_NAME_DEF_STMT (init);
1149 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1150 return NULL;
1151 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1152
1153 init_ref = gimple_assign_rhs1 (init_stmt);
1154 if (!REFERENCE_CLASS_P (init_ref)
1155 && !DECL_P (init_ref))
1156 return NULL;
1157
1158 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1159 loop enclosing PHI). */
1160 memset (&init_dr, 0, sizeof (struct data_reference));
1161 DR_REF (&init_dr) = init_ref;
1162 DR_STMT (&init_dr) = phi;
1163 if (!dr_analyze_innermost (&init_dr, loop))
1164 return NULL;
1165
1166 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1167 return NULL;
1168
1169 return phi;
1170 }
1171
1172 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1173
1174 static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)1175 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1176 {
1177 dref nw = XCNEW (struct dref_d), aref;
1178 unsigned i;
1179
1180 nw->stmt = phi;
1181 nw->distance = ref->distance + 1;
1182 nw->always_accessed = 1;
1183
1184 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1185 if (aref->distance >= nw->distance)
1186 break;
1187 chain->refs.safe_insert (i, nw);
1188
1189 if (nw->distance > chain->length)
1190 {
1191 chain->length = nw->distance;
1192 chain->has_max_use_after = false;
1193 }
1194 }
1195
1196 /* For references in CHAIN that are copied around the LOOP (created previously
1197 by PRE, or by user), add the results of such copies to the chain. This
1198 enables us to remove the copies by unrolling, and may need less registers
1199 (also, it may allow us to combine chains together). */
1200
1201 static void
add_looparound_copies(struct loop * loop,chain_p chain)1202 add_looparound_copies (struct loop *loop, chain_p chain)
1203 {
1204 unsigned i;
1205 dref ref, root = get_chain_root (chain);
1206 gphi *phi;
1207
1208 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1209 {
1210 phi = find_looparound_phi (loop, ref, root);
1211 if (!phi)
1212 continue;
1213
1214 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1215 insert_looparound_copy (chain, ref, phi);
1216 }
1217 }
1218
1219 /* Find roots of the values and determine distances in the component COMP.
1220 The references are redistributed into CHAINS. LOOP is the current
1221 loop. */
1222
1223 static void
determine_roots_comp(struct loop * loop,struct component * comp,vec<chain_p> * chains)1224 determine_roots_comp (struct loop *loop,
1225 struct component *comp,
1226 vec<chain_p> *chains)
1227 {
1228 unsigned i;
1229 dref a;
1230 chain_p chain = NULL;
1231 widest_int last_ofs = 0;
1232
1233 /* Invariants are handled specially. */
1234 if (comp->comp_step == RS_INVARIANT)
1235 {
1236 chain = make_invariant_chain (comp);
1237 chains->safe_push (chain);
1238 return;
1239 }
1240
1241 comp->refs.qsort (order_drefs);
1242
1243 FOR_EACH_VEC_ELT (comp->refs, i, a)
1244 {
1245 if (!chain || DR_IS_WRITE (a->ref)
1246 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1247 {
1248 if (nontrivial_chain_p (chain))
1249 {
1250 add_looparound_copies (loop, chain);
1251 chains->safe_push (chain);
1252 }
1253 else
1254 release_chain (chain);
1255 chain = make_rooted_chain (a);
1256 last_ofs = a->offset;
1257 continue;
1258 }
1259
1260 add_ref_to_chain (chain, a);
1261 }
1262
1263 if (nontrivial_chain_p (chain))
1264 {
1265 add_looparound_copies (loop, chain);
1266 chains->safe_push (chain);
1267 }
1268 else
1269 release_chain (chain);
1270 }
1271
1272 /* Find roots of the values and determine distances in components COMPS, and
1273 separates the references to CHAINS. LOOP is the current loop. */
1274
1275 static void
determine_roots(struct loop * loop,struct component * comps,vec<chain_p> * chains)1276 determine_roots (struct loop *loop,
1277 struct component *comps, vec<chain_p> *chains)
1278 {
1279 struct component *comp;
1280
1281 for (comp = comps; comp; comp = comp->next)
1282 determine_roots_comp (loop, comp, chains);
1283 }
1284
1285 /* Replace the reference in statement STMT with temporary variable
1286 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1287 the reference in the statement. IN_LHS is true if the reference
1288 is in the lhs of STMT, false if it is in rhs. */
1289
1290 static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)1291 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1292 {
1293 tree val;
1294 gassign *new_stmt;
1295 gimple_stmt_iterator bsi, psi;
1296
1297 if (gimple_code (stmt) == GIMPLE_PHI)
1298 {
1299 gcc_assert (!in_lhs && !set);
1300
1301 val = PHI_RESULT (stmt);
1302 bsi = gsi_after_labels (gimple_bb (stmt));
1303 psi = gsi_for_stmt (stmt);
1304 remove_phi_node (&psi, false);
1305
1306 /* Turn the phi node into GIMPLE_ASSIGN. */
1307 new_stmt = gimple_build_assign (val, new_tree);
1308 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1309 return;
1310 }
1311
1312 /* Since the reference is of gimple_reg type, it should only
1313 appear as lhs or rhs of modify statement. */
1314 gcc_assert (is_gimple_assign (stmt));
1315
1316 bsi = gsi_for_stmt (stmt);
1317
1318 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1319 if (!set)
1320 {
1321 gcc_assert (!in_lhs);
1322 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1323 stmt = gsi_stmt (bsi);
1324 update_stmt (stmt);
1325 return;
1326 }
1327
1328 if (in_lhs)
1329 {
1330 /* We have statement
1331
1332 OLD = VAL
1333
1334 If OLD is a memory reference, then VAL is gimple_val, and we transform
1335 this to
1336
1337 OLD = VAL
1338 NEW = VAL
1339
1340 Otherwise, we are replacing a combination chain,
1341 VAL is the expression that performs the combination, and OLD is an
1342 SSA name. In this case, we transform the assignment to
1343
1344 OLD = VAL
1345 NEW = OLD
1346
1347 */
1348
1349 val = gimple_assign_lhs (stmt);
1350 if (TREE_CODE (val) != SSA_NAME)
1351 {
1352 val = gimple_assign_rhs1 (stmt);
1353 gcc_assert (gimple_assign_single_p (stmt));
1354 if (TREE_CLOBBER_P (val))
1355 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1356 else
1357 gcc_assert (gimple_assign_copy_p (stmt));
1358 }
1359 }
1360 else
1361 {
1362 /* VAL = OLD
1363
1364 is transformed to
1365
1366 VAL = OLD
1367 NEW = VAL */
1368
1369 val = gimple_assign_lhs (stmt);
1370 }
1371
1372 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1373 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1374 }
1375
1376 /* Returns a memory reference to DR in the ITER-th iteration of
1377 the loop it was analyzed in. Append init stmts to STMTS. */
1378
1379 static tree
ref_at_iteration(data_reference_p dr,int iter,gimple_seq * stmts)1380 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1381 {
1382 tree off = DR_OFFSET (dr);
1383 tree coff = DR_INIT (dr);
1384 if (iter == 0)
1385 ;
1386 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1387 coff = size_binop (PLUS_EXPR, coff,
1388 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1389 else
1390 off = size_binop (PLUS_EXPR, off,
1391 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1392 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1393 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1394 is_gimple_mem_ref_addr, NULL_TREE);
1395 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1396 tree type = build_aligned_type (TREE_TYPE (DR_REF (dr)),
1397 get_object_alignment (DR_REF (dr)));
1398 /* While data-ref analysis punts on bit offsets it still handles
1399 bitfield accesses at byte boundaries. Cope with that. Note that
1400 we cannot simply re-apply the outer COMPONENT_REF because the
1401 byte-granular portion of it is already applied via DR_INIT and
1402 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1403 start at offset zero. */
1404 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1405 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1406 {
1407 tree field = TREE_OPERAND (DR_REF (dr), 1);
1408 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1409 build2 (MEM_REF, type, addr, alias_ptr),
1410 DECL_SIZE (field), bitsize_zero_node);
1411 }
1412 else
1413 return fold_build2 (MEM_REF, type, addr, alias_ptr);
1414 }
1415
1416 /* Get the initialization expression for the INDEX-th temporary variable
1417 of CHAIN. */
1418
1419 static tree
get_init_expr(chain_p chain,unsigned index)1420 get_init_expr (chain_p chain, unsigned index)
1421 {
1422 if (chain->type == CT_COMBINATION)
1423 {
1424 tree e1 = get_init_expr (chain->ch1, index);
1425 tree e2 = get_init_expr (chain->ch2, index);
1426
1427 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1428 }
1429 else
1430 return chain->inits[index];
1431 }
1432
1433 /* Returns a new temporary variable used for the I-th variable carrying
1434 value of REF. The variable's uid is marked in TMP_VARS. */
1435
1436 static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)1437 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1438 {
1439 tree type = TREE_TYPE (ref);
1440 /* We never access the components of the temporary variable in predictive
1441 commoning. */
1442 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1443 bitmap_set_bit (tmp_vars, DECL_UID (var));
1444 return var;
1445 }
1446
1447 /* Creates the variables for CHAIN, as well as phi nodes for them and
1448 initialization on entry to LOOP. Uids of the newly created
1449 temporary variables are marked in TMP_VARS. */
1450
1451 static void
initialize_root_vars(struct loop * loop,chain_p chain,bitmap tmp_vars)1452 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1453 {
1454 unsigned i;
1455 unsigned n = chain->length;
1456 dref root = get_chain_root (chain);
1457 bool reuse_first = !chain->has_max_use_after;
1458 tree ref, init, var, next;
1459 gphi *phi;
1460 gimple_seq stmts;
1461 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1462
1463 /* If N == 0, then all the references are within the single iteration. And
1464 since this is an nonempty chain, reuse_first cannot be true. */
1465 gcc_assert (n > 0 || !reuse_first);
1466
1467 chain->vars.create (n + 1);
1468
1469 if (chain->type == CT_COMBINATION)
1470 ref = gimple_assign_lhs (root->stmt);
1471 else
1472 ref = DR_REF (root->ref);
1473
1474 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1475 {
1476 var = predcom_tmp_var (ref, i, tmp_vars);
1477 chain->vars.quick_push (var);
1478 }
1479 if (reuse_first)
1480 chain->vars.quick_push (chain->vars[0]);
1481
1482 FOR_EACH_VEC_ELT (chain->vars, i, var)
1483 chain->vars[i] = make_ssa_name (var);
1484
1485 for (i = 0; i < n; i++)
1486 {
1487 var = chain->vars[i];
1488 next = chain->vars[i + 1];
1489 init = get_init_expr (chain, i);
1490
1491 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1492 if (stmts)
1493 gsi_insert_seq_on_edge_immediate (entry, stmts);
1494
1495 phi = create_phi_node (var, loop->header);
1496 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1497 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1498 }
1499 }
1500
1501 /* Create the variables and initialization statement for root of chain
1502 CHAIN. Uids of the newly created temporary variables are marked
1503 in TMP_VARS. */
1504
1505 static void
initialize_root(struct loop * loop,chain_p chain,bitmap tmp_vars)1506 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1507 {
1508 dref root = get_chain_root (chain);
1509 bool in_lhs = (chain->type == CT_STORE_LOAD
1510 || chain->type == CT_COMBINATION);
1511
1512 initialize_root_vars (loop, chain, tmp_vars);
1513 replace_ref_with (root->stmt,
1514 chain->vars[chain->length],
1515 true, in_lhs);
1516 }
1517
1518 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1519 initialization on entry to LOOP if necessary. The ssa name for the variable
1520 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1521 around the loop is created. Uid of the newly created temporary variable
1522 is marked in TMP_VARS. INITS is the list containing the (single)
1523 initializer. */
1524
1525 static void
initialize_root_vars_lm(struct loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)1526 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1527 vec<tree> *vars, vec<tree> inits,
1528 bitmap tmp_vars)
1529 {
1530 unsigned i;
1531 tree ref = DR_REF (root->ref), init, var, next;
1532 gimple_seq stmts;
1533 gphi *phi;
1534 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1535
1536 /* Find the initializer for the variable, and check that it cannot
1537 trap. */
1538 init = inits[0];
1539
1540 vars->create (written ? 2 : 1);
1541 var = predcom_tmp_var (ref, 0, tmp_vars);
1542 vars->quick_push (var);
1543 if (written)
1544 vars->quick_push ((*vars)[0]);
1545
1546 FOR_EACH_VEC_ELT (*vars, i, var)
1547 (*vars)[i] = make_ssa_name (var);
1548
1549 var = (*vars)[0];
1550
1551 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1552 if (stmts)
1553 gsi_insert_seq_on_edge_immediate (entry, stmts);
1554
1555 if (written)
1556 {
1557 next = (*vars)[1];
1558 phi = create_phi_node (var, loop->header);
1559 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1560 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1561 }
1562 else
1563 {
1564 gassign *init_stmt = gimple_build_assign (var, init);
1565 gsi_insert_on_edge_immediate (entry, init_stmt);
1566 }
1567 }
1568
1569
1570 /* Execute load motion for references in chain CHAIN. Uids of the newly
1571 created temporary variables are marked in TMP_VARS. */
1572
1573 static void
execute_load_motion(struct loop * loop,chain_p chain,bitmap tmp_vars)1574 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575 {
1576 auto_vec<tree> vars;
1577 dref a;
1578 unsigned n_writes = 0, ridx, i;
1579 tree var;
1580
1581 gcc_assert (chain->type == CT_INVARIANT);
1582 gcc_assert (!chain->combined);
1583 FOR_EACH_VEC_ELT (chain->refs, i, a)
1584 if (DR_IS_WRITE (a->ref))
1585 n_writes++;
1586
1587 /* If there are no reads in the loop, there is nothing to do. */
1588 if (n_writes == chain->refs.length ())
1589 return;
1590
1591 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592 &vars, chain->inits, tmp_vars);
1593
1594 ridx = 0;
1595 FOR_EACH_VEC_ELT (chain->refs, i, a)
1596 {
1597 bool is_read = DR_IS_READ (a->ref);
1598
1599 if (DR_IS_WRITE (a->ref))
1600 {
1601 n_writes--;
1602 if (n_writes)
1603 {
1604 var = vars[0];
1605 var = make_ssa_name (SSA_NAME_VAR (var));
1606 vars[0] = var;
1607 }
1608 else
1609 ridx = 1;
1610 }
1611
1612 replace_ref_with (a->stmt, vars[ridx],
1613 !is_read, !is_read);
1614 }
1615 }
1616
1617 /* Returns the single statement in that NAME is used, excepting
1618 the looparound phi nodes contained in one of the chains. If there is no
1619 such statement, or more statements, NULL is returned. */
1620
1621 static gimple *
single_nonlooparound_use(tree name)1622 single_nonlooparound_use (tree name)
1623 {
1624 use_operand_p use;
1625 imm_use_iterator it;
1626 gimple *stmt, *ret = NULL;
1627
1628 FOR_EACH_IMM_USE_FAST (use, it, name)
1629 {
1630 stmt = USE_STMT (use);
1631
1632 if (gimple_code (stmt) == GIMPLE_PHI)
1633 {
1634 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1635 could not be processed anyway, so just fail for them. */
1636 if (bitmap_bit_p (looparound_phis,
1637 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1638 continue;
1639
1640 return NULL;
1641 }
1642 else if (is_gimple_debug (stmt))
1643 continue;
1644 else if (ret != NULL)
1645 return NULL;
1646 else
1647 ret = stmt;
1648 }
1649
1650 return ret;
1651 }
1652
1653 /* Remove statement STMT, as well as the chain of assignments in that it is
1654 used. */
1655
1656 static void
remove_stmt(gimple * stmt)1657 remove_stmt (gimple *stmt)
1658 {
1659 tree name;
1660 gimple *next;
1661 gimple_stmt_iterator psi;
1662
1663 if (gimple_code (stmt) == GIMPLE_PHI)
1664 {
1665 name = PHI_RESULT (stmt);
1666 next = single_nonlooparound_use (name);
1667 reset_debug_uses (stmt);
1668 psi = gsi_for_stmt (stmt);
1669 remove_phi_node (&psi, true);
1670
1671 if (!next
1672 || !gimple_assign_ssa_name_copy_p (next)
1673 || gimple_assign_rhs1 (next) != name)
1674 return;
1675
1676 stmt = next;
1677 }
1678
1679 while (1)
1680 {
1681 gimple_stmt_iterator bsi;
1682
1683 bsi = gsi_for_stmt (stmt);
1684
1685 name = gimple_assign_lhs (stmt);
1686 gcc_assert (TREE_CODE (name) == SSA_NAME);
1687
1688 next = single_nonlooparound_use (name);
1689 reset_debug_uses (stmt);
1690
1691 unlink_stmt_vdef (stmt);
1692 gsi_remove (&bsi, true);
1693 release_defs (stmt);
1694
1695 if (!next
1696 || !gimple_assign_ssa_name_copy_p (next)
1697 || gimple_assign_rhs1 (next) != name)
1698 return;
1699
1700 stmt = next;
1701 }
1702 }
1703
1704 /* Perform the predictive commoning optimization for a chain CHAIN.
1705 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1706
1707 static void
execute_pred_commoning_chain(struct loop * loop,chain_p chain,bitmap tmp_vars)1708 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1709 bitmap tmp_vars)
1710 {
1711 unsigned i;
1712 dref a;
1713 tree var;
1714
1715 if (chain->combined)
1716 {
1717 /* For combined chains, just remove the statements that are used to
1718 compute the values of the expression (except for the root one).
1719 We delay this until after all chains are processed. */
1720 }
1721 else
1722 {
1723 /* For non-combined chains, set up the variables that hold its value,
1724 and replace the uses of the original references by these
1725 variables. */
1726 initialize_root (loop, chain, tmp_vars);
1727 for (i = 1; chain->refs.iterate (i, &a); i++)
1728 {
1729 var = chain->vars[chain->length - a->distance];
1730 replace_ref_with (a->stmt, var, false, false);
1731 }
1732 }
1733 }
1734
1735 /* Determines the unroll factor necessary to remove as many temporary variable
1736 copies as possible. CHAINS is the list of chains that will be
1737 optimized. */
1738
1739 static unsigned
determine_unroll_factor(vec<chain_p> chains)1740 determine_unroll_factor (vec<chain_p> chains)
1741 {
1742 chain_p chain;
1743 unsigned factor = 1, af, nfactor, i;
1744 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1745
1746 FOR_EACH_VEC_ELT (chains, i, chain)
1747 {
1748 if (chain->type == CT_INVARIANT)
1749 continue;
1750
1751 if (chain->combined)
1752 {
1753 /* For combined chains, we can't handle unrolling if we replace
1754 looparound PHIs. */
1755 dref a;
1756 unsigned j;
1757 for (j = 1; chain->refs.iterate (j, &a); j++)
1758 if (gimple_code (a->stmt) == GIMPLE_PHI)
1759 return 1;
1760 continue;
1761 }
1762
1763 /* The best unroll factor for this chain is equal to the number of
1764 temporary variables that we create for it. */
1765 af = chain->length;
1766 if (chain->has_max_use_after)
1767 af++;
1768
1769 nfactor = factor * af / gcd (factor, af);
1770 if (nfactor <= max)
1771 factor = nfactor;
1772 }
1773
1774 return factor;
1775 }
1776
1777 /* Perform the predictive commoning optimization for CHAINS.
1778 Uids of the newly created temporary variables are marked in TMP_VARS. */
1779
1780 static void
execute_pred_commoning(struct loop * loop,vec<chain_p> chains,bitmap tmp_vars)1781 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1782 bitmap tmp_vars)
1783 {
1784 chain_p chain;
1785 unsigned i;
1786
1787 FOR_EACH_VEC_ELT (chains, i, chain)
1788 {
1789 if (chain->type == CT_INVARIANT)
1790 execute_load_motion (loop, chain, tmp_vars);
1791 else
1792 execute_pred_commoning_chain (loop, chain, tmp_vars);
1793 }
1794
1795 FOR_EACH_VEC_ELT (chains, i, chain)
1796 {
1797 if (chain->type == CT_INVARIANT)
1798 ;
1799 else if (chain->combined)
1800 {
1801 /* For combined chains, just remove the statements that are used to
1802 compute the values of the expression (except for the root one). */
1803 dref a;
1804 unsigned j;
1805 for (j = 1; chain->refs.iterate (j, &a); j++)
1806 remove_stmt (a->stmt);
1807 }
1808 }
1809
1810 update_ssa (TODO_update_ssa_only_virtuals);
1811 }
1812
1813 /* For each reference in CHAINS, if its defining statement is
1814 phi node, record the ssa name that is defined by it. */
1815
1816 static void
replace_phis_by_defined_names(vec<chain_p> chains)1817 replace_phis_by_defined_names (vec<chain_p> chains)
1818 {
1819 chain_p chain;
1820 dref a;
1821 unsigned i, j;
1822
1823 FOR_EACH_VEC_ELT (chains, i, chain)
1824 FOR_EACH_VEC_ELT (chain->refs, j, a)
1825 {
1826 if (gimple_code (a->stmt) == GIMPLE_PHI)
1827 {
1828 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1829 a->stmt = NULL;
1830 }
1831 }
1832 }
1833
1834 /* For each reference in CHAINS, if name_defined_by_phi is not
1835 NULL, use it to set the stmt field. */
1836
1837 static void
replace_names_by_phis(vec<chain_p> chains)1838 replace_names_by_phis (vec<chain_p> chains)
1839 {
1840 chain_p chain;
1841 dref a;
1842 unsigned i, j;
1843
1844 FOR_EACH_VEC_ELT (chains, i, chain)
1845 FOR_EACH_VEC_ELT (chain->refs, j, a)
1846 if (a->stmt == NULL)
1847 {
1848 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1849 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1850 a->name_defined_by_phi = NULL_TREE;
1851 }
1852 }
1853
1854 /* Wrapper over execute_pred_commoning, to pass it as a callback
1855 to tree_transform_and_unroll_loop. */
1856
1857 struct epcc_data
1858 {
1859 vec<chain_p> chains;
1860 bitmap tmp_vars;
1861 };
1862
1863 static void
execute_pred_commoning_cbck(struct loop * loop,void * data)1864 execute_pred_commoning_cbck (struct loop *loop, void *data)
1865 {
1866 struct epcc_data *const dta = (struct epcc_data *) data;
1867
1868 /* Restore phi nodes that were replaced by ssa names before
1869 tree_transform_and_unroll_loop (see detailed description in
1870 tree_predictive_commoning_loop). */
1871 replace_names_by_phis (dta->chains);
1872 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1873 }
1874
1875 /* Base NAME and all the names in the chain of phi nodes that use it
1876 on variable VAR. The phi nodes are recognized by being in the copies of
1877 the header of the LOOP. */
1878
1879 static void
base_names_in_chain_on(struct loop * loop,tree name,tree var)1880 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1881 {
1882 gimple *stmt, *phi;
1883 imm_use_iterator iter;
1884
1885 replace_ssa_name_symbol (name, var);
1886
1887 while (1)
1888 {
1889 phi = NULL;
1890 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1891 {
1892 if (gimple_code (stmt) == GIMPLE_PHI
1893 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1894 {
1895 phi = stmt;
1896 BREAK_FROM_IMM_USE_STMT (iter);
1897 }
1898 }
1899 if (!phi)
1900 return;
1901
1902 name = PHI_RESULT (phi);
1903 replace_ssa_name_symbol (name, var);
1904 }
1905 }
1906
1907 /* Given an unrolled LOOP after predictive commoning, remove the
1908 register copies arising from phi nodes by changing the base
1909 variables of SSA names. TMP_VARS is the set of the temporary variables
1910 for those we want to perform this. */
1911
1912 static void
eliminate_temp_copies(struct loop * loop,bitmap tmp_vars)1913 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1914 {
1915 edge e;
1916 gphi *phi;
1917 gimple *stmt;
1918 tree name, use, var;
1919 gphi_iterator psi;
1920
1921 e = loop_latch_edge (loop);
1922 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1923 {
1924 phi = psi.phi ();
1925 name = PHI_RESULT (phi);
1926 var = SSA_NAME_VAR (name);
1927 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1928 continue;
1929 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1930 gcc_assert (TREE_CODE (use) == SSA_NAME);
1931
1932 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1933 stmt = SSA_NAME_DEF_STMT (use);
1934 while (gimple_code (stmt) == GIMPLE_PHI
1935 /* In case we could not unroll the loop enough to eliminate
1936 all copies, we may reach the loop header before the defining
1937 statement (in that case, some register copies will be present
1938 in loop latch in the final code, corresponding to the newly
1939 created looparound phi nodes). */
1940 && gimple_bb (stmt) != loop->header)
1941 {
1942 gcc_assert (single_pred_p (gimple_bb (stmt)));
1943 use = PHI_ARG_DEF (stmt, 0);
1944 stmt = SSA_NAME_DEF_STMT (use);
1945 }
1946
1947 base_names_in_chain_on (loop, use, var);
1948 }
1949 }
1950
1951 /* Returns true if CHAIN is suitable to be combined. */
1952
1953 static bool
chain_can_be_combined_p(chain_p chain)1954 chain_can_be_combined_p (chain_p chain)
1955 {
1956 return (!chain->combined
1957 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1958 }
1959
1960 /* Returns the modify statement that uses NAME. Skips over assignment
1961 statements, NAME is replaced with the actual name used in the returned
1962 statement. */
1963
1964 static gimple *
find_use_stmt(tree * name)1965 find_use_stmt (tree *name)
1966 {
1967 gimple *stmt;
1968 tree rhs, lhs;
1969
1970 /* Skip over assignments. */
1971 while (1)
1972 {
1973 stmt = single_nonlooparound_use (*name);
1974 if (!stmt)
1975 return NULL;
1976
1977 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1978 return NULL;
1979
1980 lhs = gimple_assign_lhs (stmt);
1981 if (TREE_CODE (lhs) != SSA_NAME)
1982 return NULL;
1983
1984 if (gimple_assign_copy_p (stmt))
1985 {
1986 rhs = gimple_assign_rhs1 (stmt);
1987 if (rhs != *name)
1988 return NULL;
1989
1990 *name = lhs;
1991 }
1992 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1993 == GIMPLE_BINARY_RHS)
1994 return stmt;
1995 else
1996 return NULL;
1997 }
1998 }
1999
2000 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2001
2002 static bool
may_reassociate_p(tree type,enum tree_code code)2003 may_reassociate_p (tree type, enum tree_code code)
2004 {
2005 if (FLOAT_TYPE_P (type)
2006 && !flag_unsafe_math_optimizations)
2007 return false;
2008
2009 return (commutative_tree_code (code)
2010 && associative_tree_code (code));
2011 }
2012
2013 /* If the operation used in STMT is associative and commutative, go through the
2014 tree of the same operations and returns its root. Distance to the root
2015 is stored in DISTANCE. */
2016
2017 static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)2018 find_associative_operation_root (gimple *stmt, unsigned *distance)
2019 {
2020 tree lhs;
2021 gimple *next;
2022 enum tree_code code = gimple_assign_rhs_code (stmt);
2023 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2024 unsigned dist = 0;
2025
2026 if (!may_reassociate_p (type, code))
2027 return NULL;
2028
2029 while (1)
2030 {
2031 lhs = gimple_assign_lhs (stmt);
2032 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2033
2034 next = find_use_stmt (&lhs);
2035 if (!next
2036 || gimple_assign_rhs_code (next) != code)
2037 break;
2038
2039 stmt = next;
2040 dist++;
2041 }
2042
2043 if (distance)
2044 *distance = dist;
2045 return stmt;
2046 }
2047
2048 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2049 is no such statement, returns NULL_TREE. In case the operation used on
2050 NAME1 and NAME2 is associative and commutative, returns the root of the
2051 tree formed by this operation instead of the statement that uses NAME1 or
2052 NAME2. */
2053
2054 static gimple *
find_common_use_stmt(tree * name1,tree * name2)2055 find_common_use_stmt (tree *name1, tree *name2)
2056 {
2057 gimple *stmt1, *stmt2;
2058
2059 stmt1 = find_use_stmt (name1);
2060 if (!stmt1)
2061 return NULL;
2062
2063 stmt2 = find_use_stmt (name2);
2064 if (!stmt2)
2065 return NULL;
2066
2067 if (stmt1 == stmt2)
2068 return stmt1;
2069
2070 stmt1 = find_associative_operation_root (stmt1, NULL);
2071 if (!stmt1)
2072 return NULL;
2073 stmt2 = find_associative_operation_root (stmt2, NULL);
2074 if (!stmt2)
2075 return NULL;
2076
2077 return (stmt1 == stmt2 ? stmt1 : NULL);
2078 }
2079
2080 /* Checks whether R1 and R2 are combined together using CODE, with the result
2081 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2082 if it is true. If CODE is ERROR_MARK, set these values instead. */
2083
2084 static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)2085 combinable_refs_p (dref r1, dref r2,
2086 enum tree_code *code, bool *swap, tree *rslt_type)
2087 {
2088 enum tree_code acode;
2089 bool aswap;
2090 tree atype;
2091 tree name1, name2;
2092 gimple *stmt;
2093
2094 name1 = name_for_ref (r1);
2095 name2 = name_for_ref (r2);
2096 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2097
2098 stmt = find_common_use_stmt (&name1, &name2);
2099
2100 if (!stmt
2101 /* A simple post-dominance check - make sure the combination
2102 is executed under the same condition as the references. */
2103 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2104 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2105 return false;
2106
2107 acode = gimple_assign_rhs_code (stmt);
2108 aswap = (!commutative_tree_code (acode)
2109 && gimple_assign_rhs1 (stmt) != name1);
2110 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2111
2112 if (*code == ERROR_MARK)
2113 {
2114 *code = acode;
2115 *swap = aswap;
2116 *rslt_type = atype;
2117 return true;
2118 }
2119
2120 return (*code == acode
2121 && *swap == aswap
2122 && *rslt_type == atype);
2123 }
2124
2125 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2126 an assignment of the remaining operand. */
2127
2128 static void
remove_name_from_operation(gimple * stmt,tree op)2129 remove_name_from_operation (gimple *stmt, tree op)
2130 {
2131 tree other_op;
2132 gimple_stmt_iterator si;
2133
2134 gcc_assert (is_gimple_assign (stmt));
2135
2136 if (gimple_assign_rhs1 (stmt) == op)
2137 other_op = gimple_assign_rhs2 (stmt);
2138 else
2139 other_op = gimple_assign_rhs1 (stmt);
2140
2141 si = gsi_for_stmt (stmt);
2142 gimple_assign_set_rhs_from_tree (&si, other_op);
2143
2144 /* We should not have reallocated STMT. */
2145 gcc_assert (gsi_stmt (si) == stmt);
2146
2147 update_stmt (stmt);
2148 }
2149
2150 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2151 are combined in a single statement, and returns this statement. */
2152
2153 static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)2154 reassociate_to_the_same_stmt (tree name1, tree name2)
2155 {
2156 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2157 gassign *new_stmt, *tmp_stmt;
2158 tree new_name, tmp_name, var, r1, r2;
2159 unsigned dist1, dist2;
2160 enum tree_code code;
2161 tree type = TREE_TYPE (name1);
2162 gimple_stmt_iterator bsi;
2163
2164 stmt1 = find_use_stmt (&name1);
2165 stmt2 = find_use_stmt (&name2);
2166 root1 = find_associative_operation_root (stmt1, &dist1);
2167 root2 = find_associative_operation_root (stmt2, &dist2);
2168 code = gimple_assign_rhs_code (stmt1);
2169
2170 gcc_assert (root1 && root2 && root1 == root2
2171 && code == gimple_assign_rhs_code (stmt2));
2172
2173 /* Find the root of the nearest expression in that both NAME1 and NAME2
2174 are used. */
2175 r1 = name1;
2176 s1 = stmt1;
2177 r2 = name2;
2178 s2 = stmt2;
2179
2180 while (dist1 > dist2)
2181 {
2182 s1 = find_use_stmt (&r1);
2183 r1 = gimple_assign_lhs (s1);
2184 dist1--;
2185 }
2186 while (dist2 > dist1)
2187 {
2188 s2 = find_use_stmt (&r2);
2189 r2 = gimple_assign_lhs (s2);
2190 dist2--;
2191 }
2192
2193 while (s1 != s2)
2194 {
2195 s1 = find_use_stmt (&r1);
2196 r1 = gimple_assign_lhs (s1);
2197 s2 = find_use_stmt (&r2);
2198 r2 = gimple_assign_lhs (s2);
2199 }
2200
2201 /* Remove NAME1 and NAME2 from the statements in that they are used
2202 currently. */
2203 remove_name_from_operation (stmt1, name1);
2204 remove_name_from_operation (stmt2, name2);
2205
2206 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2207 combine it with the rhs of S1. */
2208 var = create_tmp_reg (type, "predreastmp");
2209 new_name = make_ssa_name (var);
2210 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2211
2212 var = create_tmp_reg (type, "predreastmp");
2213 tmp_name = make_ssa_name (var);
2214
2215 /* Rhs of S1 may now be either a binary expression with operation
2216 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2217 so that name1 or name2 was removed from it). */
2218 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2219 gimple_assign_rhs1 (s1),
2220 gimple_assign_rhs2 (s1));
2221
2222 bsi = gsi_for_stmt (s1);
2223 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2224 s1 = gsi_stmt (bsi);
2225 update_stmt (s1);
2226
2227 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2228 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2229
2230 return new_stmt;
2231 }
2232
2233 /* Returns the statement that combines references R1 and R2. In case R1
2234 and R2 are not used in the same statement, but they are used with an
2235 associative and commutative operation in the same expression, reassociate
2236 the expression so that they are used in the same statement. */
2237
2238 static gimple *
stmt_combining_refs(dref r1,dref r2)2239 stmt_combining_refs (dref r1, dref r2)
2240 {
2241 gimple *stmt1, *stmt2;
2242 tree name1 = name_for_ref (r1);
2243 tree name2 = name_for_ref (r2);
2244
2245 stmt1 = find_use_stmt (&name1);
2246 stmt2 = find_use_stmt (&name2);
2247 if (stmt1 == stmt2)
2248 return stmt1;
2249
2250 return reassociate_to_the_same_stmt (name1, name2);
2251 }
2252
2253 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2254 description of the new chain is returned, otherwise we return NULL. */
2255
2256 static chain_p
combine_chains(chain_p ch1,chain_p ch2)2257 combine_chains (chain_p ch1, chain_p ch2)
2258 {
2259 dref r1, r2, nw;
2260 enum tree_code op = ERROR_MARK;
2261 bool swap = false;
2262 chain_p new_chain;
2263 unsigned i;
2264 tree rslt_type = NULL_TREE;
2265
2266 if (ch1 == ch2)
2267 return NULL;
2268 if (ch1->length != ch2->length)
2269 return NULL;
2270
2271 if (ch1->refs.length () != ch2->refs.length ())
2272 return NULL;
2273
2274 for (i = 0; (ch1->refs.iterate (i, &r1)
2275 && ch2->refs.iterate (i, &r2)); i++)
2276 {
2277 if (r1->distance != r2->distance)
2278 return NULL;
2279
2280 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2281 return NULL;
2282 }
2283
2284 if (swap)
2285 std::swap (ch1, ch2);
2286
2287 new_chain = XCNEW (struct chain);
2288 new_chain->type = CT_COMBINATION;
2289 new_chain->op = op;
2290 new_chain->ch1 = ch1;
2291 new_chain->ch2 = ch2;
2292 new_chain->rslt_type = rslt_type;
2293 new_chain->length = ch1->length;
2294
2295 for (i = 0; (ch1->refs.iterate (i, &r1)
2296 && ch2->refs.iterate (i, &r2)); i++)
2297 {
2298 nw = XCNEW (struct dref_d);
2299 nw->stmt = stmt_combining_refs (r1, r2);
2300 nw->distance = r1->distance;
2301
2302 new_chain->refs.safe_push (nw);
2303 }
2304
2305 ch1->combined = true;
2306 ch2->combined = true;
2307 return new_chain;
2308 }
2309
2310 /* Recursively update position information of all offspring chains to ROOT
2311 chain's position information. */
2312
2313 static void
update_pos_for_combined_chains(chain_p root)2314 update_pos_for_combined_chains (chain_p root)
2315 {
2316 chain_p ch1 = root->ch1, ch2 = root->ch2;
2317 dref ref, ref1, ref2;
2318 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2319 && ch1->refs.iterate (j, &ref1)
2320 && ch2->refs.iterate (j, &ref2)); ++j)
2321 ref1->pos = ref2->pos = ref->pos;
2322
2323 if (ch1->type == CT_COMBINATION)
2324 update_pos_for_combined_chains (ch1);
2325 if (ch2->type == CT_COMBINATION)
2326 update_pos_for_combined_chains (ch2);
2327 }
2328
2329 /* Returns true if statement S1 dominates statement S2. */
2330
2331 static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)2332 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2333 {
2334 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2335
2336 if (!bb1 || s1 == s2)
2337 return true;
2338
2339 if (bb1 == bb2)
2340 return gimple_uid (s1) < gimple_uid (s2);
2341
2342 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2343 }
2344
2345 /* Try to combine the CHAINS in LOOP. */
2346
2347 static void
try_combine_chains(struct loop * loop,vec<chain_p> * chains)2348 try_combine_chains (struct loop *loop, vec<chain_p> *chains)
2349 {
2350 unsigned i, j;
2351 chain_p ch1, ch2, cch;
2352 auto_vec<chain_p> worklist;
2353 bool combined_p = false;
2354
2355 FOR_EACH_VEC_ELT (*chains, i, ch1)
2356 if (chain_can_be_combined_p (ch1))
2357 worklist.safe_push (ch1);
2358
2359 while (!worklist.is_empty ())
2360 {
2361 ch1 = worklist.pop ();
2362 if (!chain_can_be_combined_p (ch1))
2363 continue;
2364
2365 FOR_EACH_VEC_ELT (*chains, j, ch2)
2366 {
2367 if (!chain_can_be_combined_p (ch2))
2368 continue;
2369
2370 cch = combine_chains (ch1, ch2);
2371 if (cch)
2372 {
2373 worklist.safe_push (cch);
2374 chains->safe_push (cch);
2375 combined_p = true;
2376 break;
2377 }
2378 }
2379 }
2380 if (!combined_p)
2381 return;
2382
2383 /* Setup UID for all statements in dominance order. */
2384 basic_block *bbs = get_loop_body (loop);
2385 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2386 free (bbs);
2387
2388 /* Re-association in combined chains may generate statements different to
2389 order of references of the original chain. We need to keep references
2390 of combined chain in dominance order so that all uses will be inserted
2391 after definitions. Note:
2392 A) This is necessary for all combined chains.
2393 B) This is only necessary for ZERO distance references because other
2394 references inherit value from loop carried PHIs.
2395
2396 We first update position information for all combined chains. */
2397 dref ref;
2398 for (i = 0; chains->iterate (i, &ch1); ++i)
2399 {
2400 if (ch1->type != CT_COMBINATION || ch1->combined)
2401 continue;
2402
2403 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2404 ref->pos = gimple_uid (ref->stmt);
2405
2406 update_pos_for_combined_chains (ch1);
2407 }
2408 /* Then sort references according to newly updated position information. */
2409 for (i = 0; chains->iterate (i, &ch1); ++i)
2410 {
2411 if (ch1->type != CT_COMBINATION && !ch1->combined)
2412 continue;
2413
2414 /* Find the first reference with non-ZERO distance. */
2415 if (ch1->length == 0)
2416 j = ch1->refs.length();
2417 else
2418 {
2419 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2420 if (ref->distance != 0)
2421 break;
2422 }
2423
2424 /* Sort all ZERO distance references by position. */
2425 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2426
2427 if (ch1->combined)
2428 continue;
2429
2430 /* For ZERO length chain, has_max_use_after must be true since root
2431 combined stmt must dominates others. */
2432 if (ch1->length == 0)
2433 {
2434 ch1->has_max_use_after = true;
2435 continue;
2436 }
2437 /* Check if there is use at max distance after root for combined chains
2438 and set flag accordingly. */
2439 ch1->has_max_use_after = false;
2440 gimple *root_stmt = get_chain_root (ch1)->stmt;
2441 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2442 {
2443 if (ref->distance == ch1->length
2444 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2445 {
2446 ch1->has_max_use_after = true;
2447 break;
2448 }
2449 }
2450 }
2451 }
2452
2453 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2454 impossible because one of these initializers may trap, true otherwise. */
2455
2456 static bool
prepare_initializers_chain(struct loop * loop,chain_p chain)2457 prepare_initializers_chain (struct loop *loop, chain_p chain)
2458 {
2459 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2460 struct data_reference *dr = get_chain_root (chain)->ref;
2461 tree init;
2462 dref laref;
2463 edge entry = loop_preheader_edge (loop);
2464
2465 /* Find the initializers for the variables, and check that they cannot
2466 trap. */
2467 chain->inits.create (n);
2468 for (i = 0; i < n; i++)
2469 chain->inits.quick_push (NULL_TREE);
2470
2471 /* If we have replaced some looparound phi nodes, use their initializers
2472 instead of creating our own. */
2473 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2474 {
2475 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2476 continue;
2477
2478 gcc_assert (laref->distance > 0);
2479 chain->inits[n - laref->distance]
2480 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2481 }
2482
2483 for (i = 0; i < n; i++)
2484 {
2485 gimple_seq stmts = NULL;
2486
2487 if (chain->inits[i] != NULL_TREE)
2488 continue;
2489
2490 init = ref_at_iteration (dr, (int) i - n, &stmts);
2491 if (!chain->all_always_accessed && tree_could_trap_p (init))
2492 {
2493 gimple_seq_discard (stmts);
2494 return false;
2495 }
2496
2497 if (stmts)
2498 gsi_insert_seq_on_edge_immediate (entry, stmts);
2499
2500 chain->inits[i] = init;
2501 }
2502
2503 return true;
2504 }
2505
2506 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2507 be used because the initializers might trap. */
2508
2509 static void
prepare_initializers(struct loop * loop,vec<chain_p> chains)2510 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2511 {
2512 chain_p chain;
2513 unsigned i;
2514
2515 for (i = 0; i < chains.length (); )
2516 {
2517 chain = chains[i];
2518 if (prepare_initializers_chain (loop, chain))
2519 i++;
2520 else
2521 {
2522 release_chain (chain);
2523 chains.unordered_remove (i);
2524 }
2525 }
2526 }
2527
2528 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2529 unrolled. */
2530
2531 static bool
tree_predictive_commoning_loop(struct loop * loop)2532 tree_predictive_commoning_loop (struct loop *loop)
2533 {
2534 vec<data_reference_p> datarefs;
2535 vec<ddr_p> dependences;
2536 struct component *components;
2537 vec<chain_p> chains = vNULL;
2538 unsigned unroll_factor;
2539 struct tree_niter_desc desc;
2540 bool unroll = false;
2541 edge exit;
2542 bitmap tmp_vars;
2543
2544 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "Processing loop %d\n", loop->num);
2546
2547 /* Find the data references and split them into components according to their
2548 dependence relations. */
2549 auto_vec<loop_p, 3> loop_nest;
2550 dependences.create (10);
2551 datarefs.create (10);
2552 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2553 &dependences))
2554 {
2555 if (dump_file && (dump_flags & TDF_DETAILS))
2556 fprintf (dump_file, "Cannot analyze data dependencies\n");
2557 free_data_refs (datarefs);
2558 free_dependence_relations (dependences);
2559 return false;
2560 }
2561
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 dump_data_dependence_relations (dump_file, dependences);
2564
2565 components = split_data_refs_to_components (loop, datarefs, dependences);
2566 loop_nest.release ();
2567 free_dependence_relations (dependences);
2568 if (!components)
2569 {
2570 free_data_refs (datarefs);
2571 free_affine_expand_cache (&name_expansions);
2572 return false;
2573 }
2574
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 {
2577 fprintf (dump_file, "Initial state:\n\n");
2578 dump_components (dump_file, components);
2579 }
2580
2581 /* Find the suitable components and split them into chains. */
2582 components = filter_suitable_components (loop, components);
2583
2584 tmp_vars = BITMAP_ALLOC (NULL);
2585 looparound_phis = BITMAP_ALLOC (NULL);
2586 determine_roots (loop, components, &chains);
2587 release_components (components);
2588
2589 if (!chains.exists ())
2590 {
2591 if (dump_file && (dump_flags & TDF_DETAILS))
2592 fprintf (dump_file,
2593 "Predictive commoning failed: no suitable chains\n");
2594 goto end;
2595 }
2596 prepare_initializers (loop, chains);
2597
2598 /* Try to combine the chains that are always worked with together. */
2599 try_combine_chains (loop, &chains);
2600
2601 if (dump_file && (dump_flags & TDF_DETAILS))
2602 {
2603 fprintf (dump_file, "Before commoning:\n\n");
2604 dump_chains (dump_file, chains);
2605 }
2606
2607 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2608 that its number of iterations is divisible by the factor. */
2609 unroll_factor = determine_unroll_factor (chains);
2610 scev_reset ();
2611 unroll = (unroll_factor > 1
2612 && can_unroll_loop_p (loop, unroll_factor, &desc));
2613 exit = single_dom_exit (loop);
2614
2615 /* Execute the predictive commoning transformations, and possibly unroll the
2616 loop. */
2617 if (unroll)
2618 {
2619 struct epcc_data dta;
2620
2621 if (dump_file && (dump_flags & TDF_DETAILS))
2622 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2623
2624 dta.chains = chains;
2625 dta.tmp_vars = tmp_vars;
2626
2627 update_ssa (TODO_update_ssa_only_virtuals);
2628
2629 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2630 execute_pred_commoning_cbck is called may cause phi nodes to be
2631 reallocated, which is a problem since CHAINS may point to these
2632 statements. To fix this, we store the ssa names defined by the
2633 phi nodes here instead of the phi nodes themselves, and restore
2634 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2635 replace_phis_by_defined_names (chains);
2636
2637 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2638 execute_pred_commoning_cbck, &dta);
2639 eliminate_temp_copies (loop, tmp_vars);
2640 }
2641 else
2642 {
2643 if (dump_file && (dump_flags & TDF_DETAILS))
2644 fprintf (dump_file,
2645 "Executing predictive commoning without unrolling.\n");
2646 execute_pred_commoning (loop, chains, tmp_vars);
2647 }
2648
2649 end: ;
2650 release_chains (chains);
2651 free_data_refs (datarefs);
2652 BITMAP_FREE (tmp_vars);
2653 BITMAP_FREE (looparound_phis);
2654
2655 free_affine_expand_cache (&name_expansions);
2656
2657 return unroll;
2658 }
2659
2660 /* Runs predictive commoning. */
2661
2662 unsigned
tree_predictive_commoning(void)2663 tree_predictive_commoning (void)
2664 {
2665 bool unrolled = false;
2666 struct loop *loop;
2667 unsigned ret = 0;
2668
2669 initialize_original_copy_tables ();
2670 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2671 if (optimize_loop_for_speed_p (loop))
2672 {
2673 unrolled |= tree_predictive_commoning_loop (loop);
2674 }
2675
2676 if (unrolled)
2677 {
2678 scev_reset ();
2679 ret = TODO_cleanup_cfg;
2680 }
2681 free_original_copy_tables ();
2682
2683 return ret;
2684 }
2685
2686 /* Predictive commoning Pass. */
2687
2688 static unsigned
run_tree_predictive_commoning(struct function * fun)2689 run_tree_predictive_commoning (struct function *fun)
2690 {
2691 if (number_of_loops (fun) <= 1)
2692 return 0;
2693
2694 return tree_predictive_commoning ();
2695 }
2696
2697 namespace {
2698
2699 const pass_data pass_data_predcom =
2700 {
2701 GIMPLE_PASS, /* type */
2702 "pcom", /* name */
2703 OPTGROUP_LOOP, /* optinfo_flags */
2704 TV_PREDCOM, /* tv_id */
2705 PROP_cfg, /* properties_required */
2706 0, /* properties_provided */
2707 0, /* properties_destroyed */
2708 0, /* todo_flags_start */
2709 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2710 };
2711
2712 class pass_predcom : public gimple_opt_pass
2713 {
2714 public:
pass_predcom(gcc::context * ctxt)2715 pass_predcom (gcc::context *ctxt)
2716 : gimple_opt_pass (pass_data_predcom, ctxt)
2717 {}
2718
2719 /* opt_pass methods: */
gate(function *)2720 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)2721 virtual unsigned int execute (function *fun)
2722 {
2723 return run_tree_predictive_commoning (fun);
2724 }
2725
2726 }; // class pass_predcom
2727
2728 } // anon namespace
2729
2730 gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)2731 make_pass_predcom (gcc::context *ctxt)
2732 {
2733 return new pass_predcom (ctxt);
2734 }
2735
2736
2737