138fd1498Szrj /* Predictive commoning.
238fd1498Szrj Copyright (C) 2005-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it
738fd1498Szrj under the terms of the GNU General Public License as published by the
838fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
938fd1498Szrj later version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1238fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj /* This file implements the predictive commoning optimization. Predictive
2138fd1498Szrj commoning can be viewed as CSE around a loop, and with some improvements,
2238fd1498Szrj as generalized strength reduction-- i.e., reusing values computed in
2338fd1498Szrj earlier iterations of a loop in the later ones. So far, the pass only
2438fd1498Szrj handles the most useful case, that is, reusing values of memory references.
2538fd1498Szrj If you think this is all just a special case of PRE, you are sort of right;
2638fd1498Szrj however, concentrating on loops is simpler, and makes it possible to
2738fd1498Szrj incorporate data dependence analysis to detect the opportunities, perform
2838fd1498Szrj loop unrolling to avoid copies together with renaming immediately,
2938fd1498Szrj and if needed, we could also take register pressure into account.
3038fd1498Szrj
3138fd1498Szrj Let us demonstrate what is done on an example:
3238fd1498Szrj
3338fd1498Szrj for (i = 0; i < 100; i++)
3438fd1498Szrj {
3538fd1498Szrj a[i+2] = a[i] + a[i+1];
3638fd1498Szrj b[10] = b[10] + i;
3738fd1498Szrj c[i] = c[99 - i];
3838fd1498Szrj d[i] = d[i + 1];
3938fd1498Szrj }
4038fd1498Szrj
4138fd1498Szrj 1) We find data references in the loop, and split them to mutually
4238fd1498Szrj independent groups (i.e., we find components of a data dependence
4338fd1498Szrj graph). We ignore read-read dependences whose distance is not constant.
4438fd1498Szrj (TODO -- we could also ignore antidependences). In this example, we
4538fd1498Szrj find the following groups:
4638fd1498Szrj
4738fd1498Szrj a[i]{read}, a[i+1]{read}, a[i+2]{write}
4838fd1498Szrj b[10]{read}, b[10]{write}
4938fd1498Szrj c[99 - i]{read}, c[i]{write}
5038fd1498Szrj d[i + 1]{read}, d[i]{write}
5138fd1498Szrj
5238fd1498Szrj 2) Inside each of the group, we verify several conditions:
5338fd1498Szrj a) all the references must differ in indices only, and the indices
5438fd1498Szrj must all have the same step
5538fd1498Szrj b) the references must dominate loop latch (and thus, they must be
5638fd1498Szrj ordered by dominance relation).
5738fd1498Szrj c) the distance of the indices must be a small multiple of the step
5838fd1498Szrj We are then able to compute the difference of the references (# of
5938fd1498Szrj iterations before they point to the same place as the first of them).
6038fd1498Szrj Also, in case there are writes in the loop, we split the groups into
6138fd1498Szrj chains whose head is the write whose values are used by the reads in
6238fd1498Szrj the same chain. The chains are then processed independently,
6338fd1498Szrj making the further transformations simpler. Also, the shorter chains
6438fd1498Szrj need the same number of registers, but may require lower unrolling
6538fd1498Szrj factor in order to get rid of the copies on the loop latch.
6638fd1498Szrj
6738fd1498Szrj In our example, we get the following chains (the chain for c is invalid).
6838fd1498Szrj
6938fd1498Szrj a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
7038fd1498Szrj b[10]{read,+0}, b[10]{write,+0}
7138fd1498Szrj d[i + 1]{read,+0}, d[i]{write,+1}
7238fd1498Szrj
7338fd1498Szrj 3) For each read, we determine the read or write whose value it reuses,
7438fd1498Szrj together with the distance of this reuse. I.e. we take the last
7538fd1498Szrj reference before it with distance 0, or the last of the references
7638fd1498Szrj with the smallest positive distance to the read. Then, we remove
7738fd1498Szrj the references that are not used in any of these chains, discard the
7838fd1498Szrj empty groups, and propagate all the links so that they point to the
7938fd1498Szrj single root reference of the chain (adjusting their distance
8038fd1498Szrj appropriately). Some extra care needs to be taken for references with
8138fd1498Szrj step 0. In our example (the numbers indicate the distance of the
8238fd1498Szrj reuse),
8338fd1498Szrj
8438fd1498Szrj a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
8538fd1498Szrj b[10] --> (*) 1, b[10] (*)
8638fd1498Szrj
8738fd1498Szrj 4) The chains are combined together if possible. If the corresponding
8838fd1498Szrj elements of two chains are always combined together with the same
8938fd1498Szrj operator, we remember just the result of this combination, instead
9038fd1498Szrj of remembering the values separately. We may need to perform
9138fd1498Szrj reassociation to enable combining, for example
9238fd1498Szrj
9338fd1498Szrj e[i] + f[i+1] + e[i+1] + f[i]
9438fd1498Szrj
9538fd1498Szrj can be reassociated as
9638fd1498Szrj
9738fd1498Szrj (e[i] + f[i]) + (e[i+1] + f[i+1])
9838fd1498Szrj
9938fd1498Szrj and we can combine the chains for e and f into one chain.
10038fd1498Szrj
10138fd1498Szrj 5) For each root reference (end of the chain) R, let N be maximum distance
10238fd1498Szrj of a reference reusing its value. Variables R0 up to RN are created,
10338fd1498Szrj together with phi nodes that transfer values from R1 .. RN to
10438fd1498Szrj R0 .. R(N-1).
10538fd1498Szrj Initial values are loaded to R0..R(N-1) (in case not all references
10638fd1498Szrj must necessarily be accessed and they may trap, we may fail here;
10738fd1498Szrj TODO sometimes, the loads could be guarded by a check for the number
10838fd1498Szrj of iterations). Values loaded/stored in roots are also copied to
10938fd1498Szrj RN. Other reads are replaced with the appropriate variable Ri.
11038fd1498Szrj Everything is put to SSA form.
11138fd1498Szrj
11238fd1498Szrj As a small improvement, if R0 is dead after the root (i.e., all uses of
11338fd1498Szrj the value with the maximum distance dominate the root), we can avoid
11438fd1498Szrj creating RN and use R0 instead of it.
11538fd1498Szrj
11638fd1498Szrj In our example, we get (only the parts concerning a and b are shown):
11738fd1498Szrj for (i = 0; i < 100; i++)
11838fd1498Szrj {
11938fd1498Szrj f = phi (a[0], s);
12038fd1498Szrj s = phi (a[1], f);
12138fd1498Szrj x = phi (b[10], x);
12238fd1498Szrj
12338fd1498Szrj f = f + s;
12438fd1498Szrj a[i+2] = f;
12538fd1498Szrj x = x + i;
12638fd1498Szrj b[10] = x;
12738fd1498Szrj }
12838fd1498Szrj
12938fd1498Szrj 6) Factor F for unrolling is determined as the smallest common multiple of
13038fd1498Szrj (N + 1) for each root reference (N for references for that we avoided
13138fd1498Szrj creating RN). If F and the loop is small enough, loop is unrolled F
13238fd1498Szrj times. The stores to RN (R0) in the copies of the loop body are
13338fd1498Szrj periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
13438fd1498Szrj be coalesced and the copies can be eliminated.
13538fd1498Szrj
13638fd1498Szrj TODO -- copy propagation and other optimizations may change the live
13738fd1498Szrj ranges of the temporary registers and prevent them from being coalesced;
13838fd1498Szrj this may increase the register pressure.
13938fd1498Szrj
14038fd1498Szrj In our case, F = 2 and the (main loop of the) result is
14138fd1498Szrj
14238fd1498Szrj for (i = 0; i < ...; i += 2)
14338fd1498Szrj {
14438fd1498Szrj f = phi (a[0], f);
14538fd1498Szrj s = phi (a[1], s);
14638fd1498Szrj x = phi (b[10], x);
14738fd1498Szrj
14838fd1498Szrj f = f + s;
14938fd1498Szrj a[i+2] = f;
15038fd1498Szrj x = x + i;
15138fd1498Szrj b[10] = x;
15238fd1498Szrj
15338fd1498Szrj s = s + f;
15438fd1498Szrj a[i+3] = s;
15538fd1498Szrj x = x + i;
15638fd1498Szrj b[10] = x;
15738fd1498Szrj }
15838fd1498Szrj
15938fd1498Szrj Apart from predictive commoning on Load-Load and Store-Load chains, we
16038fd1498Szrj also support Store-Store chains -- stores killed by other store can be
16138fd1498Szrj eliminated. Given below example:
16238fd1498Szrj
16338fd1498Szrj for (i = 0; i < n; i++)
16438fd1498Szrj {
16538fd1498Szrj a[i] = 1;
16638fd1498Szrj a[i+2] = 2;
16738fd1498Szrj }
16838fd1498Szrj
16938fd1498Szrj It can be replaced with:
17038fd1498Szrj
17138fd1498Szrj t0 = a[0];
17238fd1498Szrj t1 = a[1];
17338fd1498Szrj for (i = 0; i < n; i++)
17438fd1498Szrj {
17538fd1498Szrj a[i] = 1;
17638fd1498Szrj t2 = 2;
17738fd1498Szrj t0 = t1;
17838fd1498Szrj t1 = t2;
17938fd1498Szrj }
18038fd1498Szrj a[n] = t0;
18138fd1498Szrj a[n+1] = t1;
18238fd1498Szrj
18338fd1498Szrj If the loop runs more than 1 iterations, it can be further simplified into:
18438fd1498Szrj
18538fd1498Szrj for (i = 0; i < n; i++)
18638fd1498Szrj {
18738fd1498Szrj a[i] = 1;
18838fd1498Szrj }
18938fd1498Szrj a[n] = 2;
19038fd1498Szrj a[n+1] = 2;
19138fd1498Szrj
19238fd1498Szrj The interesting part is this can be viewed either as general store motion
19338fd1498Szrj or general dead store elimination in either intra/inter-iterations way.
19438fd1498Szrj
19538fd1498Szrj With trivial effort, we also support load inside Store-Store chains if the
19638fd1498Szrj load is dominated by a store statement in the same iteration of loop. You
19738fd1498Szrj can see this as a restricted Store-Mixed-Load-Store chain.
19838fd1498Szrj
19938fd1498Szrj TODO: For now, we don't support store-store chains in multi-exit loops. We
20038fd1498Szrj force to not unroll in case of store-store chain even if other chains might
20138fd1498Szrj ask for unroll.
20238fd1498Szrj
20338fd1498Szrj Predictive commoning can be generalized for arbitrary computations (not
20438fd1498Szrj just memory loads), and also nontrivial transfer functions (e.g., replacing
20538fd1498Szrj i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
20638fd1498Szrj
20738fd1498Szrj #include "config.h"
20838fd1498Szrj #include "system.h"
20938fd1498Szrj #include "coretypes.h"
21038fd1498Szrj #include "backend.h"
21138fd1498Szrj #include "rtl.h"
21238fd1498Szrj #include "tree.h"
21338fd1498Szrj #include "gimple.h"
21438fd1498Szrj #include "predict.h"
21538fd1498Szrj #include "tree-pass.h"
21638fd1498Szrj #include "ssa.h"
21738fd1498Szrj #include "gimple-pretty-print.h"
21838fd1498Szrj #include "alias.h"
21938fd1498Szrj #include "fold-const.h"
22038fd1498Szrj #include "cfgloop.h"
22138fd1498Szrj #include "tree-eh.h"
22238fd1498Szrj #include "gimplify.h"
22338fd1498Szrj #include "gimple-iterator.h"
22438fd1498Szrj #include "gimplify-me.h"
22538fd1498Szrj #include "tree-ssa-loop-ivopts.h"
22638fd1498Szrj #include "tree-ssa-loop-manip.h"
22738fd1498Szrj #include "tree-ssa-loop-niter.h"
22838fd1498Szrj #include "tree-ssa-loop.h"
22938fd1498Szrj #include "tree-into-ssa.h"
23038fd1498Szrj #include "tree-dfa.h"
23138fd1498Szrj #include "tree-ssa.h"
23238fd1498Szrj #include "tree-data-ref.h"
23338fd1498Szrj #include "tree-scalar-evolution.h"
23438fd1498Szrj #include "params.h"
23538fd1498Szrj #include "tree-affine.h"
23638fd1498Szrj #include "builtins.h"
23738fd1498Szrj
23838fd1498Szrj /* The maximum number of iterations between the considered memory
23938fd1498Szrj references. */
24038fd1498Szrj
24138fd1498Szrj #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
24238fd1498Szrj
24338fd1498Szrj /* Data references (or phi nodes that carry data reference values across
24438fd1498Szrj loop iterations). */
24538fd1498Szrj
24638fd1498Szrj typedef struct dref_d
24738fd1498Szrj {
24838fd1498Szrj /* The reference itself. */
24938fd1498Szrj struct data_reference *ref;
25038fd1498Szrj
25138fd1498Szrj /* The statement in that the reference appears. */
25238fd1498Szrj gimple *stmt;
25338fd1498Szrj
25438fd1498Szrj /* In case that STMT is a phi node, this field is set to the SSA name
25538fd1498Szrj defined by it in replace_phis_by_defined_names (in order to avoid
25638fd1498Szrj pointing to phi node that got reallocated in the meantime). */
25738fd1498Szrj tree name_defined_by_phi;
25838fd1498Szrj
25938fd1498Szrj /* Distance of the reference from the root of the chain (in number of
26038fd1498Szrj iterations of the loop). */
26138fd1498Szrj unsigned distance;
26238fd1498Szrj
26338fd1498Szrj /* Number of iterations offset from the first reference in the component. */
26438fd1498Szrj widest_int offset;
26538fd1498Szrj
26638fd1498Szrj /* Number of the reference in a component, in dominance ordering. */
26738fd1498Szrj unsigned pos;
26838fd1498Szrj
26938fd1498Szrj /* True if the memory reference is always accessed when the loop is
27038fd1498Szrj entered. */
27138fd1498Szrj unsigned always_accessed : 1;
27238fd1498Szrj } *dref;
27338fd1498Szrj
27438fd1498Szrj
27538fd1498Szrj /* Type of the chain of the references. */
27638fd1498Szrj
27738fd1498Szrj enum chain_type
27838fd1498Szrj {
27938fd1498Szrj /* The addresses of the references in the chain are constant. */
28038fd1498Szrj CT_INVARIANT,
28138fd1498Szrj
28238fd1498Szrj /* There are only loads in the chain. */
28338fd1498Szrj CT_LOAD,
28438fd1498Szrj
28538fd1498Szrj /* Root of the chain is store, the rest are loads. */
28638fd1498Szrj CT_STORE_LOAD,
28738fd1498Szrj
28838fd1498Szrj /* There are only stores in the chain. */
28938fd1498Szrj CT_STORE_STORE,
29038fd1498Szrj
29138fd1498Szrj /* A combination of two chains. */
29238fd1498Szrj CT_COMBINATION
29338fd1498Szrj };
29438fd1498Szrj
29538fd1498Szrj /* Chains of data references. */
29638fd1498Szrj
29738fd1498Szrj typedef struct chain
29838fd1498Szrj {
29938fd1498Szrj /* Type of the chain. */
30038fd1498Szrj enum chain_type type;
30138fd1498Szrj
30238fd1498Szrj /* For combination chains, the operator and the two chains that are
30338fd1498Szrj combined, and the type of the result. */
30438fd1498Szrj enum tree_code op;
30538fd1498Szrj tree rslt_type;
30638fd1498Szrj struct chain *ch1, *ch2;
30738fd1498Szrj
30838fd1498Szrj /* The references in the chain. */
30938fd1498Szrj vec<dref> refs;
31038fd1498Szrj
31138fd1498Szrj /* The maximum distance of the reference in the chain from the root. */
31238fd1498Szrj unsigned length;
31338fd1498Szrj
31438fd1498Szrj /* The variables used to copy the value throughout iterations. */
31538fd1498Szrj vec<tree> vars;
31638fd1498Szrj
31738fd1498Szrj /* Initializers for the variables. */
31838fd1498Szrj vec<tree> inits;
31938fd1498Szrj
32038fd1498Szrj /* Finalizers for the eliminated stores. */
32138fd1498Szrj vec<tree> finis;
32238fd1498Szrj
32338fd1498Szrj /* gimple stmts intializing the initial variables of the chain. */
32438fd1498Szrj gimple_seq init_seq;
32538fd1498Szrj
32638fd1498Szrj /* gimple stmts finalizing the eliminated stores of the chain. */
32738fd1498Szrj gimple_seq fini_seq;
32838fd1498Szrj
32938fd1498Szrj /* True if there is a use of a variable with the maximal distance
33038fd1498Szrj that comes after the root in the loop. */
33138fd1498Szrj unsigned has_max_use_after : 1;
33238fd1498Szrj
33338fd1498Szrj /* True if all the memory references in the chain are always accessed. */
33438fd1498Szrj unsigned all_always_accessed : 1;
33538fd1498Szrj
33638fd1498Szrj /* True if this chain was combined together with some other chain. */
33738fd1498Szrj unsigned combined : 1;
33838fd1498Szrj
33938fd1498Szrj /* True if this is store elimination chain and eliminated stores store
34038fd1498Szrj loop invariant value into memory. */
34138fd1498Szrj unsigned inv_store_elimination : 1;
34238fd1498Szrj } *chain_p;
34338fd1498Szrj
34438fd1498Szrj
34538fd1498Szrj /* Describes the knowledge about the step of the memory references in
34638fd1498Szrj the component. */
34738fd1498Szrj
34838fd1498Szrj enum ref_step_type
34938fd1498Szrj {
35038fd1498Szrj /* The step is zero. */
35138fd1498Szrj RS_INVARIANT,
35238fd1498Szrj
35338fd1498Szrj /* The step is nonzero. */
35438fd1498Szrj RS_NONZERO,
35538fd1498Szrj
35638fd1498Szrj /* The step may or may not be nonzero. */
35738fd1498Szrj RS_ANY
35838fd1498Szrj };
35938fd1498Szrj
36038fd1498Szrj /* Components of the data dependence graph. */
36138fd1498Szrj
36238fd1498Szrj struct component
36338fd1498Szrj {
36438fd1498Szrj /* The references in the component. */
36538fd1498Szrj vec<dref> refs;
36638fd1498Szrj
36738fd1498Szrj /* What we know about the step of the references in the component. */
36838fd1498Szrj enum ref_step_type comp_step;
36938fd1498Szrj
37038fd1498Szrj /* True if all references in component are stores and we try to do
37138fd1498Szrj intra/inter loop iteration dead store elimination. */
37238fd1498Szrj bool eliminate_store_p;
37338fd1498Szrj
37438fd1498Szrj /* Next component in the list. */
37538fd1498Szrj struct component *next;
37638fd1498Szrj };
37738fd1498Szrj
37838fd1498Szrj /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
37938fd1498Szrj
38038fd1498Szrj static bitmap looparound_phis;
38138fd1498Szrj
38238fd1498Szrj /* Cache used by tree_to_aff_combination_expand. */
38338fd1498Szrj
38438fd1498Szrj static hash_map<tree, name_expansion *> *name_expansions;
38538fd1498Szrj
38638fd1498Szrj /* Dumps data reference REF to FILE. */
38738fd1498Szrj
38838fd1498Szrj extern void dump_dref (FILE *, dref);
38938fd1498Szrj void
dump_dref(FILE * file,dref ref)39038fd1498Szrj dump_dref (FILE *file, dref ref)
39138fd1498Szrj {
39238fd1498Szrj if (ref->ref)
39338fd1498Szrj {
39438fd1498Szrj fprintf (file, " ");
39538fd1498Szrj print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
39638fd1498Szrj fprintf (file, " (id %u%s)\n", ref->pos,
39738fd1498Szrj DR_IS_READ (ref->ref) ? "" : ", write");
39838fd1498Szrj
39938fd1498Szrj fprintf (file, " offset ");
40038fd1498Szrj print_decs (ref->offset, file);
40138fd1498Szrj fprintf (file, "\n");
40238fd1498Szrj
40338fd1498Szrj fprintf (file, " distance %u\n", ref->distance);
40438fd1498Szrj }
40538fd1498Szrj else
40638fd1498Szrj {
40738fd1498Szrj if (gimple_code (ref->stmt) == GIMPLE_PHI)
40838fd1498Szrj fprintf (file, " looparound ref\n");
40938fd1498Szrj else
41038fd1498Szrj fprintf (file, " combination ref\n");
41138fd1498Szrj fprintf (file, " in statement ");
41238fd1498Szrj print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
41338fd1498Szrj fprintf (file, "\n");
41438fd1498Szrj fprintf (file, " distance %u\n", ref->distance);
41538fd1498Szrj }
41638fd1498Szrj
41738fd1498Szrj }
41838fd1498Szrj
41938fd1498Szrj /* Dumps CHAIN to FILE. */
42038fd1498Szrj
42138fd1498Szrj extern void dump_chain (FILE *, chain_p);
42238fd1498Szrj void
dump_chain(FILE * file,chain_p chain)42338fd1498Szrj dump_chain (FILE *file, chain_p chain)
42438fd1498Szrj {
42538fd1498Szrj dref a;
42638fd1498Szrj const char *chain_type;
42738fd1498Szrj unsigned i;
42838fd1498Szrj tree var;
42938fd1498Szrj
43038fd1498Szrj switch (chain->type)
43138fd1498Szrj {
43238fd1498Szrj case CT_INVARIANT:
43338fd1498Szrj chain_type = "Load motion";
43438fd1498Szrj break;
43538fd1498Szrj
43638fd1498Szrj case CT_LOAD:
43738fd1498Szrj chain_type = "Loads-only";
43838fd1498Szrj break;
43938fd1498Szrj
44038fd1498Szrj case CT_STORE_LOAD:
44138fd1498Szrj chain_type = "Store-loads";
44238fd1498Szrj break;
44338fd1498Szrj
44438fd1498Szrj case CT_STORE_STORE:
44538fd1498Szrj chain_type = "Store-stores";
44638fd1498Szrj break;
44738fd1498Szrj
44838fd1498Szrj case CT_COMBINATION:
44938fd1498Szrj chain_type = "Combination";
45038fd1498Szrj break;
45138fd1498Szrj
45238fd1498Szrj default:
45338fd1498Szrj gcc_unreachable ();
45438fd1498Szrj }
45538fd1498Szrj
45638fd1498Szrj fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
45738fd1498Szrj chain->combined ? " (combined)" : "");
45838fd1498Szrj if (chain->type != CT_INVARIANT)
45938fd1498Szrj fprintf (file, " max distance %u%s\n", chain->length,
46038fd1498Szrj chain->has_max_use_after ? "" : ", may reuse first");
46138fd1498Szrj
46238fd1498Szrj if (chain->type == CT_COMBINATION)
46338fd1498Szrj {
46438fd1498Szrj fprintf (file, " equal to %p %s %p in type ",
46538fd1498Szrj (void *) chain->ch1, op_symbol_code (chain->op),
46638fd1498Szrj (void *) chain->ch2);
46738fd1498Szrj print_generic_expr (file, chain->rslt_type, TDF_SLIM);
46838fd1498Szrj fprintf (file, "\n");
46938fd1498Szrj }
47038fd1498Szrj
47138fd1498Szrj if (chain->vars.exists ())
47238fd1498Szrj {
47338fd1498Szrj fprintf (file, " vars");
47438fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var)
47538fd1498Szrj {
47638fd1498Szrj fprintf (file, " ");
47738fd1498Szrj print_generic_expr (file, var, TDF_SLIM);
47838fd1498Szrj }
47938fd1498Szrj fprintf (file, "\n");
48038fd1498Szrj }
48138fd1498Szrj
48238fd1498Szrj if (chain->inits.exists ())
48338fd1498Szrj {
48438fd1498Szrj fprintf (file, " inits");
48538fd1498Szrj FOR_EACH_VEC_ELT (chain->inits, i, var)
48638fd1498Szrj {
48738fd1498Szrj fprintf (file, " ");
48838fd1498Szrj print_generic_expr (file, var, TDF_SLIM);
48938fd1498Szrj }
49038fd1498Szrj fprintf (file, "\n");
49138fd1498Szrj }
49238fd1498Szrj
49338fd1498Szrj fprintf (file, " references:\n");
49438fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a)
49538fd1498Szrj dump_dref (file, a);
49638fd1498Szrj
49738fd1498Szrj fprintf (file, "\n");
49838fd1498Szrj }
49938fd1498Szrj
50038fd1498Szrj /* Dumps CHAINS to FILE. */
50138fd1498Szrj
50238fd1498Szrj extern void dump_chains (FILE *, vec<chain_p> );
50338fd1498Szrj void
dump_chains(FILE * file,vec<chain_p> chains)50438fd1498Szrj dump_chains (FILE *file, vec<chain_p> chains)
50538fd1498Szrj {
50638fd1498Szrj chain_p chain;
50738fd1498Szrj unsigned i;
50838fd1498Szrj
50938fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
51038fd1498Szrj dump_chain (file, chain);
51138fd1498Szrj }
51238fd1498Szrj
51338fd1498Szrj /* Dumps COMP to FILE. */
51438fd1498Szrj
51538fd1498Szrj extern void dump_component (FILE *, struct component *);
51638fd1498Szrj void
dump_component(FILE * file,struct component * comp)51738fd1498Szrj dump_component (FILE *file, struct component *comp)
51838fd1498Szrj {
51938fd1498Szrj dref a;
52038fd1498Szrj unsigned i;
52138fd1498Szrj
52238fd1498Szrj fprintf (file, "Component%s:\n",
52338fd1498Szrj comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
52438fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a)
52538fd1498Szrj dump_dref (file, a);
52638fd1498Szrj fprintf (file, "\n");
52738fd1498Szrj }
52838fd1498Szrj
52938fd1498Szrj /* Dumps COMPS to FILE. */
53038fd1498Szrj
53138fd1498Szrj extern void dump_components (FILE *, struct component *);
53238fd1498Szrj void
dump_components(FILE * file,struct component * comps)53338fd1498Szrj dump_components (FILE *file, struct component *comps)
53438fd1498Szrj {
53538fd1498Szrj struct component *comp;
53638fd1498Szrj
53738fd1498Szrj for (comp = comps; comp; comp = comp->next)
53838fd1498Szrj dump_component (file, comp);
53938fd1498Szrj }
54038fd1498Szrj
54138fd1498Szrj /* Frees a chain CHAIN. */
54238fd1498Szrj
54338fd1498Szrj static void
release_chain(chain_p chain)54438fd1498Szrj release_chain (chain_p chain)
54538fd1498Szrj {
54638fd1498Szrj dref ref;
54738fd1498Szrj unsigned i;
54838fd1498Szrj
54938fd1498Szrj if (chain == NULL)
55038fd1498Szrj return;
55138fd1498Szrj
55238fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, ref)
55338fd1498Szrj free (ref);
55438fd1498Szrj
55538fd1498Szrj chain->refs.release ();
55638fd1498Szrj chain->vars.release ();
55738fd1498Szrj chain->inits.release ();
55838fd1498Szrj if (chain->init_seq)
55938fd1498Szrj gimple_seq_discard (chain->init_seq);
56038fd1498Szrj
56138fd1498Szrj chain->finis.release ();
56238fd1498Szrj if (chain->fini_seq)
56338fd1498Szrj gimple_seq_discard (chain->fini_seq);
56438fd1498Szrj
56538fd1498Szrj free (chain);
56638fd1498Szrj }
56738fd1498Szrj
56838fd1498Szrj /* Frees CHAINS. */
56938fd1498Szrj
57038fd1498Szrj static void
release_chains(vec<chain_p> chains)57138fd1498Szrj release_chains (vec<chain_p> chains)
57238fd1498Szrj {
57338fd1498Szrj unsigned i;
57438fd1498Szrj chain_p chain;
57538fd1498Szrj
57638fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
57738fd1498Szrj release_chain (chain);
57838fd1498Szrj chains.release ();
57938fd1498Szrj }
58038fd1498Szrj
58138fd1498Szrj /* Frees a component COMP. */
58238fd1498Szrj
58338fd1498Szrj static void
release_component(struct component * comp)58438fd1498Szrj release_component (struct component *comp)
58538fd1498Szrj {
58638fd1498Szrj comp->refs.release ();
58738fd1498Szrj free (comp);
58838fd1498Szrj }
58938fd1498Szrj
59038fd1498Szrj /* Frees list of components COMPS. */
59138fd1498Szrj
59238fd1498Szrj static void
release_components(struct component * comps)59338fd1498Szrj release_components (struct component *comps)
59438fd1498Szrj {
59538fd1498Szrj struct component *act, *next;
59638fd1498Szrj
59738fd1498Szrj for (act = comps; act; act = next)
59838fd1498Szrj {
59938fd1498Szrj next = act->next;
60038fd1498Szrj release_component (act);
60138fd1498Szrj }
60238fd1498Szrj }
60338fd1498Szrj
60438fd1498Szrj /* Finds a root of tree given by FATHERS containing A, and performs path
60538fd1498Szrj shortening. */
60638fd1498Szrj
60738fd1498Szrj static unsigned
component_of(unsigned fathers[],unsigned a)60838fd1498Szrj component_of (unsigned fathers[], unsigned a)
60938fd1498Szrj {
61038fd1498Szrj unsigned root, n;
61138fd1498Szrj
61238fd1498Szrj for (root = a; root != fathers[root]; root = fathers[root])
61338fd1498Szrj continue;
61438fd1498Szrj
61538fd1498Szrj for (; a != root; a = n)
61638fd1498Szrj {
61738fd1498Szrj n = fathers[a];
61838fd1498Szrj fathers[a] = root;
61938fd1498Szrj }
62038fd1498Szrj
62138fd1498Szrj return root;
62238fd1498Szrj }
62338fd1498Szrj
62438fd1498Szrj /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
62538fd1498Szrj components, A and B are components to merge. */
62638fd1498Szrj
62738fd1498Szrj static void
merge_comps(unsigned fathers[],unsigned sizes[],unsigned a,unsigned b)62838fd1498Szrj merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
62938fd1498Szrj {
63038fd1498Szrj unsigned ca = component_of (fathers, a);
63138fd1498Szrj unsigned cb = component_of (fathers, b);
63238fd1498Szrj
63338fd1498Szrj if (ca == cb)
63438fd1498Szrj return;
63538fd1498Szrj
63638fd1498Szrj if (sizes[ca] < sizes[cb])
63738fd1498Szrj {
63838fd1498Szrj sizes[cb] += sizes[ca];
63938fd1498Szrj fathers[ca] = cb;
64038fd1498Szrj }
64138fd1498Szrj else
64238fd1498Szrj {
64338fd1498Szrj sizes[ca] += sizes[cb];
64438fd1498Szrj fathers[cb] = ca;
64538fd1498Szrj }
64638fd1498Szrj }
64738fd1498Szrj
64838fd1498Szrj /* Returns true if A is a reference that is suitable for predictive commoning
64938fd1498Szrj in the innermost loop that contains it. REF_STEP is set according to the
65038fd1498Szrj step of the reference A. */
65138fd1498Szrj
65238fd1498Szrj static bool
suitable_reference_p(struct data_reference * a,enum ref_step_type * ref_step)65338fd1498Szrj suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
65438fd1498Szrj {
65538fd1498Szrj tree ref = DR_REF (a), step = DR_STEP (a);
65638fd1498Szrj
65738fd1498Szrj if (!step
65838fd1498Szrj || TREE_THIS_VOLATILE (ref)
65938fd1498Szrj || !is_gimple_reg_type (TREE_TYPE (ref))
66038fd1498Szrj || tree_could_throw_p (ref))
66138fd1498Szrj return false;
66238fd1498Szrj
66338fd1498Szrj if (integer_zerop (step))
66438fd1498Szrj *ref_step = RS_INVARIANT;
66538fd1498Szrj else if (integer_nonzerop (step))
66638fd1498Szrj *ref_step = RS_NONZERO;
66738fd1498Szrj else
66838fd1498Szrj *ref_step = RS_ANY;
66938fd1498Szrj
67038fd1498Szrj return true;
67138fd1498Szrj }
67238fd1498Szrj
67338fd1498Szrj /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
67438fd1498Szrj
67538fd1498Szrj static void
aff_combination_dr_offset(struct data_reference * dr,aff_tree * offset)67638fd1498Szrj aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
67738fd1498Szrj {
67838fd1498Szrj tree type = TREE_TYPE (DR_OFFSET (dr));
67938fd1498Szrj aff_tree delta;
68038fd1498Szrj
68138fd1498Szrj tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
68238fd1498Szrj &name_expansions);
68338fd1498Szrj aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
68438fd1498Szrj aff_combination_add (offset, &delta);
68538fd1498Szrj }
68638fd1498Szrj
68738fd1498Szrj /* Determines number of iterations of the innermost enclosing loop before B
68838fd1498Szrj refers to exactly the same location as A and stores it to OFF. If A and
68938fd1498Szrj B do not have the same step, they never meet, or anything else fails,
69038fd1498Szrj returns false, otherwise returns true. Both A and B are assumed to
69138fd1498Szrj satisfy suitable_reference_p. */
69238fd1498Szrj
69338fd1498Szrj static bool
determine_offset(struct data_reference * a,struct data_reference * b,poly_widest_int * off)69438fd1498Szrj determine_offset (struct data_reference *a, struct data_reference *b,
69538fd1498Szrj poly_widest_int *off)
69638fd1498Szrj {
69738fd1498Szrj aff_tree diff, baseb, step;
69838fd1498Szrj tree typea, typeb;
69938fd1498Szrj
70038fd1498Szrj /* Check that both the references access the location in the same type. */
70138fd1498Szrj typea = TREE_TYPE (DR_REF (a));
70238fd1498Szrj typeb = TREE_TYPE (DR_REF (b));
70338fd1498Szrj if (!useless_type_conversion_p (typeb, typea))
70438fd1498Szrj return false;
70538fd1498Szrj
70638fd1498Szrj /* Check whether the base address and the step of both references is the
70738fd1498Szrj same. */
70838fd1498Szrj if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
70938fd1498Szrj || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
71038fd1498Szrj return false;
71138fd1498Szrj
71238fd1498Szrj if (integer_zerop (DR_STEP (a)))
71338fd1498Szrj {
71438fd1498Szrj /* If the references have loop invariant address, check that they access
71538fd1498Szrj exactly the same location. */
71638fd1498Szrj *off = 0;
71738fd1498Szrj return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
71838fd1498Szrj && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
71938fd1498Szrj }
72038fd1498Szrj
72138fd1498Szrj /* Compare the offsets of the addresses, and check whether the difference
72238fd1498Szrj is a multiple of step. */
72338fd1498Szrj aff_combination_dr_offset (a, &diff);
72438fd1498Szrj aff_combination_dr_offset (b, &baseb);
72538fd1498Szrj aff_combination_scale (&baseb, -1);
72638fd1498Szrj aff_combination_add (&diff, &baseb);
72738fd1498Szrj
72838fd1498Szrj tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
72938fd1498Szrj &step, &name_expansions);
73038fd1498Szrj return aff_combination_constant_multiple_p (&diff, &step, off);
73138fd1498Szrj }
73238fd1498Szrj
73338fd1498Szrj /* Returns the last basic block in LOOP for that we are sure that
73438fd1498Szrj it is executed whenever the loop is entered. */
73538fd1498Szrj
73638fd1498Szrj static basic_block
last_always_executed_block(struct loop * loop)73738fd1498Szrj last_always_executed_block (struct loop *loop)
73838fd1498Szrj {
73938fd1498Szrj unsigned i;
74038fd1498Szrj vec<edge> exits = get_loop_exit_edges (loop);
74138fd1498Szrj edge ex;
74238fd1498Szrj basic_block last = loop->latch;
74338fd1498Szrj
74438fd1498Szrj FOR_EACH_VEC_ELT (exits, i, ex)
74538fd1498Szrj last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
74638fd1498Szrj exits.release ();
74738fd1498Szrj
74838fd1498Szrj return last;
74938fd1498Szrj }
75038fd1498Szrj
75138fd1498Szrj /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
75238fd1498Szrj
75338fd1498Szrj static struct component *
split_data_refs_to_components(struct loop * loop,vec<data_reference_p> datarefs,vec<ddr_p> depends)75438fd1498Szrj split_data_refs_to_components (struct loop *loop,
75538fd1498Szrj vec<data_reference_p> datarefs,
75638fd1498Szrj vec<ddr_p> depends)
75738fd1498Szrj {
75838fd1498Szrj unsigned i, n = datarefs.length ();
75938fd1498Szrj unsigned ca, ia, ib, bad;
76038fd1498Szrj unsigned *comp_father = XNEWVEC (unsigned, n + 1);
76138fd1498Szrj unsigned *comp_size = XNEWVEC (unsigned, n + 1);
76238fd1498Szrj struct component **comps;
76338fd1498Szrj struct data_reference *dr, *dra, *drb;
76438fd1498Szrj struct data_dependence_relation *ddr;
76538fd1498Szrj struct component *comp_list = NULL, *comp;
76638fd1498Szrj dref dataref;
76738fd1498Szrj /* Don't do store elimination if loop has multiple exit edges. */
76838fd1498Szrj bool eliminate_store_p = single_exit (loop) != NULL;
76938fd1498Szrj basic_block last_always_executed = last_always_executed_block (loop);
77038fd1498Szrj
77138fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr)
77238fd1498Szrj {
77338fd1498Szrj if (!DR_REF (dr))
77438fd1498Szrj {
77538fd1498Szrj /* A fake reference for call or asm_expr that may clobber memory;
77638fd1498Szrj just fail. */
77738fd1498Szrj goto end;
77838fd1498Szrj }
77938fd1498Szrj /* predcom pass isn't prepared to handle calls with data references. */
78038fd1498Szrj if (is_gimple_call (DR_STMT (dr)))
78138fd1498Szrj goto end;
78238fd1498Szrj dr->aux = (void *) (size_t) i;
78338fd1498Szrj comp_father[i] = i;
78438fd1498Szrj comp_size[i] = 1;
78538fd1498Szrj }
78638fd1498Szrj
78738fd1498Szrj /* A component reserved for the "bad" data references. */
78838fd1498Szrj comp_father[n] = n;
78938fd1498Szrj comp_size[n] = 1;
79038fd1498Szrj
79138fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr)
79238fd1498Szrj {
79338fd1498Szrj enum ref_step_type dummy;
79438fd1498Szrj
79538fd1498Szrj if (!suitable_reference_p (dr, &dummy))
79638fd1498Szrj {
79738fd1498Szrj ia = (unsigned) (size_t) dr->aux;
79838fd1498Szrj merge_comps (comp_father, comp_size, n, ia);
79938fd1498Szrj }
80038fd1498Szrj }
80138fd1498Szrj
80238fd1498Szrj FOR_EACH_VEC_ELT (depends, i, ddr)
80338fd1498Szrj {
80438fd1498Szrj poly_widest_int dummy_off;
80538fd1498Szrj
80638fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
80738fd1498Szrj continue;
80838fd1498Szrj
80938fd1498Szrj dra = DDR_A (ddr);
81038fd1498Szrj drb = DDR_B (ddr);
81138fd1498Szrj
81238fd1498Szrj /* Don't do store elimination if there is any unknown dependence for
81338fd1498Szrj any store data reference. */
81438fd1498Szrj if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
81538fd1498Szrj && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
81638fd1498Szrj || DDR_NUM_DIST_VECTS (ddr) == 0))
81738fd1498Szrj eliminate_store_p = false;
81838fd1498Szrj
81938fd1498Szrj ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
82038fd1498Szrj ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
82138fd1498Szrj if (ia == ib)
82238fd1498Szrj continue;
82338fd1498Szrj
82438fd1498Szrj bad = component_of (comp_father, n);
82538fd1498Szrj
82638fd1498Szrj /* If both A and B are reads, we may ignore unsuitable dependences. */
82738fd1498Szrj if (DR_IS_READ (dra) && DR_IS_READ (drb))
82838fd1498Szrj {
82938fd1498Szrj if (ia == bad || ib == bad
83038fd1498Szrj || !determine_offset (dra, drb, &dummy_off))
83138fd1498Szrj continue;
83238fd1498Szrj }
83338fd1498Szrj /* If A is read and B write or vice versa and there is unsuitable
83438fd1498Szrj dependence, instead of merging both components into a component
83538fd1498Szrj that will certainly not pass suitable_component_p, just put the
83638fd1498Szrj read into bad component, perhaps at least the write together with
83738fd1498Szrj all the other data refs in it's component will be optimizable. */
83838fd1498Szrj else if (DR_IS_READ (dra) && ib != bad)
83938fd1498Szrj {
84038fd1498Szrj if (ia == bad)
84138fd1498Szrj continue;
84238fd1498Szrj else if (!determine_offset (dra, drb, &dummy_off))
84338fd1498Szrj {
84438fd1498Szrj merge_comps (comp_father, comp_size, bad, ia);
84538fd1498Szrj continue;
84638fd1498Szrj }
84738fd1498Szrj }
84838fd1498Szrj else if (DR_IS_READ (drb) && ia != bad)
84938fd1498Szrj {
85038fd1498Szrj if (ib == bad)
85138fd1498Szrj continue;
85238fd1498Szrj else if (!determine_offset (dra, drb, &dummy_off))
85338fd1498Szrj {
85438fd1498Szrj merge_comps (comp_father, comp_size, bad, ib);
85538fd1498Szrj continue;
85638fd1498Szrj }
85738fd1498Szrj }
85838fd1498Szrj else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
85938fd1498Szrj && ia != bad && ib != bad
86038fd1498Szrj && !determine_offset (dra, drb, &dummy_off))
86138fd1498Szrj {
86238fd1498Szrj merge_comps (comp_father, comp_size, bad, ia);
86338fd1498Szrj merge_comps (comp_father, comp_size, bad, ib);
86438fd1498Szrj continue;
86538fd1498Szrj }
86638fd1498Szrj
86738fd1498Szrj merge_comps (comp_father, comp_size, ia, ib);
86838fd1498Szrj }
86938fd1498Szrj
87038fd1498Szrj if (eliminate_store_p)
87138fd1498Szrj {
87238fd1498Szrj tree niters = number_of_latch_executions (loop);
87338fd1498Szrj
87438fd1498Szrj /* Don't do store elimination if niters info is unknown because stores
87538fd1498Szrj in the last iteration can't be eliminated and we need to recover it
87638fd1498Szrj after loop. */
87738fd1498Szrj eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
87838fd1498Szrj }
87938fd1498Szrj
88038fd1498Szrj comps = XCNEWVEC (struct component *, n);
88138fd1498Szrj bad = component_of (comp_father, n);
88238fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr)
88338fd1498Szrj {
88438fd1498Szrj ia = (unsigned) (size_t) dr->aux;
88538fd1498Szrj ca = component_of (comp_father, ia);
88638fd1498Szrj if (ca == bad)
88738fd1498Szrj continue;
88838fd1498Szrj
88938fd1498Szrj comp = comps[ca];
89038fd1498Szrj if (!comp)
89138fd1498Szrj {
89238fd1498Szrj comp = XCNEW (struct component);
89338fd1498Szrj comp->refs.create (comp_size[ca]);
89438fd1498Szrj comp->eliminate_store_p = eliminate_store_p;
89538fd1498Szrj comps[ca] = comp;
89638fd1498Szrj }
89738fd1498Szrj
89838fd1498Szrj dataref = XCNEW (struct dref_d);
89938fd1498Szrj dataref->ref = dr;
90038fd1498Szrj dataref->stmt = DR_STMT (dr);
90138fd1498Szrj dataref->offset = 0;
90238fd1498Szrj dataref->distance = 0;
90338fd1498Szrj
90438fd1498Szrj dataref->always_accessed
90538fd1498Szrj = dominated_by_p (CDI_DOMINATORS, last_always_executed,
90638fd1498Szrj gimple_bb (dataref->stmt));
90738fd1498Szrj dataref->pos = comp->refs.length ();
90838fd1498Szrj comp->refs.quick_push (dataref);
90938fd1498Szrj }
91038fd1498Szrj
91138fd1498Szrj for (i = 0; i < n; i++)
91238fd1498Szrj {
91338fd1498Szrj comp = comps[i];
91438fd1498Szrj if (comp)
91538fd1498Szrj {
91638fd1498Szrj comp->next = comp_list;
91738fd1498Szrj comp_list = comp;
91838fd1498Szrj }
91938fd1498Szrj }
92038fd1498Szrj free (comps);
92138fd1498Szrj
92238fd1498Szrj end:
92338fd1498Szrj free (comp_father);
92438fd1498Szrj free (comp_size);
92538fd1498Szrj return comp_list;
92638fd1498Szrj }
92738fd1498Szrj
92838fd1498Szrj /* Returns true if the component COMP satisfies the conditions
92938fd1498Szrj described in 2) at the beginning of this file. LOOP is the current
93038fd1498Szrj loop. */
93138fd1498Szrj
93238fd1498Szrj static bool
suitable_component_p(struct loop * loop,struct component * comp)93338fd1498Szrj suitable_component_p (struct loop *loop, struct component *comp)
93438fd1498Szrj {
93538fd1498Szrj unsigned i;
93638fd1498Szrj dref a, first;
93738fd1498Szrj basic_block ba, bp = loop->header;
93838fd1498Szrj bool ok, has_write = false;
93938fd1498Szrj
94038fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a)
94138fd1498Szrj {
94238fd1498Szrj ba = gimple_bb (a->stmt);
94338fd1498Szrj
94438fd1498Szrj if (!just_once_each_iteration_p (loop, ba))
94538fd1498Szrj return false;
94638fd1498Szrj
94738fd1498Szrj gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
94838fd1498Szrj bp = ba;
94938fd1498Szrj
95038fd1498Szrj if (DR_IS_WRITE (a->ref))
95138fd1498Szrj has_write = true;
95238fd1498Szrj }
95338fd1498Szrj
95438fd1498Szrj first = comp->refs[0];
95538fd1498Szrj ok = suitable_reference_p (first->ref, &comp->comp_step);
95638fd1498Szrj gcc_assert (ok);
95738fd1498Szrj first->offset = 0;
95838fd1498Szrj
95938fd1498Szrj for (i = 1; comp->refs.iterate (i, &a); i++)
96038fd1498Szrj {
96138fd1498Szrj /* Polynomial offsets are no use, since we need to know the
96238fd1498Szrj gap between iteration numbers at compile time. */
96338fd1498Szrj poly_widest_int offset;
96438fd1498Szrj if (!determine_offset (first->ref, a->ref, &offset)
96538fd1498Szrj || !offset.is_constant (&a->offset))
96638fd1498Szrj return false;
96738fd1498Szrj
96838fd1498Szrj enum ref_step_type a_step;
96938fd1498Szrj gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
97038fd1498Szrj && a_step == comp->comp_step);
97138fd1498Szrj }
97238fd1498Szrj
97338fd1498Szrj /* If there is a write inside the component, we must know whether the
97438fd1498Szrj step is nonzero or not -- we would not otherwise be able to recognize
97538fd1498Szrj whether the value accessed by reads comes from the OFFSET-th iteration
97638fd1498Szrj or the previous one. */
97738fd1498Szrj if (has_write && comp->comp_step == RS_ANY)
97838fd1498Szrj return false;
97938fd1498Szrj
98038fd1498Szrj return true;
98138fd1498Szrj }
98238fd1498Szrj
98338fd1498Szrj /* Check the conditions on references inside each of components COMPS,
98438fd1498Szrj and remove the unsuitable components from the list. The new list
98538fd1498Szrj of components is returned. The conditions are described in 2) at
98638fd1498Szrj the beginning of this file. LOOP is the current loop. */
98738fd1498Szrj
98838fd1498Szrj static struct component *
filter_suitable_components(struct loop * loop,struct component * comps)98938fd1498Szrj filter_suitable_components (struct loop *loop, struct component *comps)
99038fd1498Szrj {
99138fd1498Szrj struct component **comp, *act;
99238fd1498Szrj
99338fd1498Szrj for (comp = &comps; *comp; )
99438fd1498Szrj {
99538fd1498Szrj act = *comp;
99638fd1498Szrj if (suitable_component_p (loop, act))
99738fd1498Szrj comp = &act->next;
99838fd1498Szrj else
99938fd1498Szrj {
100038fd1498Szrj dref ref;
100138fd1498Szrj unsigned i;
100238fd1498Szrj
100338fd1498Szrj *comp = act->next;
100438fd1498Szrj FOR_EACH_VEC_ELT (act->refs, i, ref)
100538fd1498Szrj free (ref);
100638fd1498Szrj release_component (act);
100738fd1498Szrj }
100838fd1498Szrj }
100938fd1498Szrj
101038fd1498Szrj return comps;
101138fd1498Szrj }
101238fd1498Szrj
101338fd1498Szrj /* Compares two drefs A and B by their offset and position. Callback for
101438fd1498Szrj qsort. */
101538fd1498Szrj
101638fd1498Szrj static int
order_drefs(const void * a,const void * b)101738fd1498Szrj order_drefs (const void *a, const void *b)
101838fd1498Szrj {
101938fd1498Szrj const dref *const da = (const dref *) a;
102038fd1498Szrj const dref *const db = (const dref *) b;
102138fd1498Szrj int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
102238fd1498Szrj
102338fd1498Szrj if (offcmp != 0)
102438fd1498Szrj return offcmp;
102538fd1498Szrj
102638fd1498Szrj return (*da)->pos - (*db)->pos;
102738fd1498Szrj }
102838fd1498Szrj
102938fd1498Szrj /* Compares two drefs A and B by their position. Callback for qsort. */
103038fd1498Szrj
103138fd1498Szrj static int
order_drefs_by_pos(const void * a,const void * b)103238fd1498Szrj order_drefs_by_pos (const void *a, const void *b)
103338fd1498Szrj {
103438fd1498Szrj const dref *const da = (const dref *) a;
103538fd1498Szrj const dref *const db = (const dref *) b;
103638fd1498Szrj
103738fd1498Szrj return (*da)->pos - (*db)->pos;
103838fd1498Szrj }
103938fd1498Szrj
104038fd1498Szrj /* Returns root of the CHAIN. */
104138fd1498Szrj
104238fd1498Szrj static inline dref
get_chain_root(chain_p chain)104338fd1498Szrj get_chain_root (chain_p chain)
104438fd1498Szrj {
104538fd1498Szrj return chain->refs[0];
104638fd1498Szrj }
104738fd1498Szrj
104838fd1498Szrj /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
104938fd1498Szrj exist. */
105038fd1498Szrj
105138fd1498Szrj static inline dref
get_chain_last_write_at(chain_p chain,unsigned distance)105238fd1498Szrj get_chain_last_write_at (chain_p chain, unsigned distance)
105338fd1498Szrj {
105438fd1498Szrj for (unsigned i = chain->refs.length (); i > 0; i--)
105538fd1498Szrj if (DR_IS_WRITE (chain->refs[i - 1]->ref)
105638fd1498Szrj && distance == chain->refs[i - 1]->distance)
105738fd1498Szrj return chain->refs[i - 1];
105838fd1498Szrj
105938fd1498Szrj return NULL;
106038fd1498Szrj }
106138fd1498Szrj
106238fd1498Szrj /* Given CHAIN, returns the last write ref with the same distance before load
106338fd1498Szrj at index LOAD_IDX, or NULL if it doesn't exist. */
106438fd1498Szrj
106538fd1498Szrj static inline dref
get_chain_last_write_before_load(chain_p chain,unsigned load_idx)106638fd1498Szrj get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
106738fd1498Szrj {
106838fd1498Szrj gcc_assert (load_idx < chain->refs.length ());
106938fd1498Szrj
107038fd1498Szrj unsigned distance = chain->refs[load_idx]->distance;
107138fd1498Szrj
107238fd1498Szrj for (unsigned i = load_idx; i > 0; i--)
107338fd1498Szrj if (DR_IS_WRITE (chain->refs[i - 1]->ref)
107438fd1498Szrj && distance == chain->refs[i - 1]->distance)
107538fd1498Szrj return chain->refs[i - 1];
107638fd1498Szrj
107738fd1498Szrj return NULL;
107838fd1498Szrj }
107938fd1498Szrj
108038fd1498Szrj /* Adds REF to the chain CHAIN. */
108138fd1498Szrj
108238fd1498Szrj static void
add_ref_to_chain(chain_p chain,dref ref)108338fd1498Szrj add_ref_to_chain (chain_p chain, dref ref)
108438fd1498Szrj {
108538fd1498Szrj dref root = get_chain_root (chain);
108638fd1498Szrj
108738fd1498Szrj gcc_assert (wi::les_p (root->offset, ref->offset));
108838fd1498Szrj widest_int dist = ref->offset - root->offset;
108938fd1498Szrj gcc_assert (wi::fits_uhwi_p (dist));
109038fd1498Szrj
109138fd1498Szrj chain->refs.safe_push (ref);
109238fd1498Szrj
109338fd1498Szrj ref->distance = dist.to_uhwi ();
109438fd1498Szrj
109538fd1498Szrj if (ref->distance >= chain->length)
109638fd1498Szrj {
109738fd1498Szrj chain->length = ref->distance;
109838fd1498Szrj chain->has_max_use_after = false;
109938fd1498Szrj }
110038fd1498Szrj
110138fd1498Szrj /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
110238fd1498Szrj if (DR_IS_WRITE (ref->ref))
110338fd1498Szrj chain->type = CT_STORE_STORE;
110438fd1498Szrj
110538fd1498Szrj /* Don't set the flag for store-store chain since there is no use. */
110638fd1498Szrj if (chain->type != CT_STORE_STORE
110738fd1498Szrj && ref->distance == chain->length
110838fd1498Szrj && ref->pos > root->pos)
110938fd1498Szrj chain->has_max_use_after = true;
111038fd1498Szrj
111138fd1498Szrj chain->all_always_accessed &= ref->always_accessed;
111238fd1498Szrj }
111338fd1498Szrj
111438fd1498Szrj /* Returns the chain for invariant component COMP. */
111538fd1498Szrj
111638fd1498Szrj static chain_p
make_invariant_chain(struct component * comp)111738fd1498Szrj make_invariant_chain (struct component *comp)
111838fd1498Szrj {
111938fd1498Szrj chain_p chain = XCNEW (struct chain);
112038fd1498Szrj unsigned i;
112138fd1498Szrj dref ref;
112238fd1498Szrj
112338fd1498Szrj chain->type = CT_INVARIANT;
112438fd1498Szrj
112538fd1498Szrj chain->all_always_accessed = true;
112638fd1498Szrj
112738fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, ref)
112838fd1498Szrj {
112938fd1498Szrj chain->refs.safe_push (ref);
113038fd1498Szrj chain->all_always_accessed &= ref->always_accessed;
113138fd1498Szrj }
113238fd1498Szrj
113338fd1498Szrj chain->inits = vNULL;
113438fd1498Szrj chain->finis = vNULL;
113538fd1498Szrj
113638fd1498Szrj return chain;
113738fd1498Szrj }
113838fd1498Szrj
113938fd1498Szrj /* Make a new chain of type TYPE rooted at REF. */
114038fd1498Szrj
114138fd1498Szrj static chain_p
make_rooted_chain(dref ref,enum chain_type type)114238fd1498Szrj make_rooted_chain (dref ref, enum chain_type type)
114338fd1498Szrj {
114438fd1498Szrj chain_p chain = XCNEW (struct chain);
114538fd1498Szrj
114638fd1498Szrj chain->type = type;
114738fd1498Szrj chain->refs.safe_push (ref);
114838fd1498Szrj chain->all_always_accessed = ref->always_accessed;
114938fd1498Szrj ref->distance = 0;
115038fd1498Szrj
115138fd1498Szrj chain->inits = vNULL;
115238fd1498Szrj chain->finis = vNULL;
115338fd1498Szrj
115438fd1498Szrj return chain;
115538fd1498Szrj }
115638fd1498Szrj
115738fd1498Szrj /* Returns true if CHAIN is not trivial. */
115838fd1498Szrj
115938fd1498Szrj static bool
nontrivial_chain_p(chain_p chain)116038fd1498Szrj nontrivial_chain_p (chain_p chain)
116138fd1498Szrj {
116238fd1498Szrj return chain != NULL && chain->refs.length () > 1;
116338fd1498Szrj }
116438fd1498Szrj
116538fd1498Szrj /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
116638fd1498Szrj is no such name. */
116738fd1498Szrj
116838fd1498Szrj static tree
name_for_ref(dref ref)116938fd1498Szrj name_for_ref (dref ref)
117038fd1498Szrj {
117138fd1498Szrj tree name;
117238fd1498Szrj
117338fd1498Szrj if (is_gimple_assign (ref->stmt))
117438fd1498Szrj {
117538fd1498Szrj if (!ref->ref || DR_IS_READ (ref->ref))
117638fd1498Szrj name = gimple_assign_lhs (ref->stmt);
117738fd1498Szrj else
117838fd1498Szrj name = gimple_assign_rhs1 (ref->stmt);
117938fd1498Szrj }
118038fd1498Szrj else
118138fd1498Szrj name = PHI_RESULT (ref->stmt);
118238fd1498Szrj
118338fd1498Szrj return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
118438fd1498Szrj }
118538fd1498Szrj
118638fd1498Szrj /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
118738fd1498Szrj iterations of the innermost enclosing loop). */
118838fd1498Szrj
118938fd1498Szrj static bool
valid_initializer_p(struct data_reference * ref,unsigned distance,struct data_reference * root)119038fd1498Szrj valid_initializer_p (struct data_reference *ref,
119138fd1498Szrj unsigned distance, struct data_reference *root)
119238fd1498Szrj {
119338fd1498Szrj aff_tree diff, base, step;
119438fd1498Szrj poly_widest_int off;
119538fd1498Szrj
119638fd1498Szrj /* Both REF and ROOT must be accessing the same object. */
119738fd1498Szrj if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
119838fd1498Szrj return false;
119938fd1498Szrj
120038fd1498Szrj /* The initializer is defined outside of loop, hence its address must be
120138fd1498Szrj invariant inside the loop. */
120238fd1498Szrj gcc_assert (integer_zerop (DR_STEP (ref)));
120338fd1498Szrj
120438fd1498Szrj /* If the address of the reference is invariant, initializer must access
120538fd1498Szrj exactly the same location. */
120638fd1498Szrj if (integer_zerop (DR_STEP (root)))
120738fd1498Szrj return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
120838fd1498Szrj && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
120938fd1498Szrj
121038fd1498Szrj /* Verify that this index of REF is equal to the root's index at
121138fd1498Szrj -DISTANCE-th iteration. */
121238fd1498Szrj aff_combination_dr_offset (root, &diff);
121338fd1498Szrj aff_combination_dr_offset (ref, &base);
121438fd1498Szrj aff_combination_scale (&base, -1);
121538fd1498Szrj aff_combination_add (&diff, &base);
121638fd1498Szrj
121738fd1498Szrj tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
121838fd1498Szrj &step, &name_expansions);
121938fd1498Szrj if (!aff_combination_constant_multiple_p (&diff, &step, &off))
122038fd1498Szrj return false;
122138fd1498Szrj
122238fd1498Szrj if (maybe_ne (off, distance))
122338fd1498Szrj return false;
122438fd1498Szrj
122538fd1498Szrj return true;
122638fd1498Szrj }
122738fd1498Szrj
122838fd1498Szrj /* Finds looparound phi node of LOOP that copies the value of REF, and if its
122938fd1498Szrj initial value is correct (equal to initial value of REF shifted by one
123038fd1498Szrj iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
123138fd1498Szrj is the root of the current chain. */
123238fd1498Szrj
123338fd1498Szrj static gphi *
find_looparound_phi(struct loop * loop,dref ref,dref root)123438fd1498Szrj find_looparound_phi (struct loop *loop, dref ref, dref root)
123538fd1498Szrj {
123638fd1498Szrj tree name, init, init_ref;
123738fd1498Szrj gphi *phi = NULL;
123838fd1498Szrj gimple *init_stmt;
123938fd1498Szrj edge latch = loop_latch_edge (loop);
124038fd1498Szrj struct data_reference init_dr;
124138fd1498Szrj gphi_iterator psi;
124238fd1498Szrj
124338fd1498Szrj if (is_gimple_assign (ref->stmt))
124438fd1498Szrj {
124538fd1498Szrj if (DR_IS_READ (ref->ref))
124638fd1498Szrj name = gimple_assign_lhs (ref->stmt);
124738fd1498Szrj else
124838fd1498Szrj name = gimple_assign_rhs1 (ref->stmt);
124938fd1498Szrj }
125038fd1498Szrj else
125138fd1498Szrj name = PHI_RESULT (ref->stmt);
125238fd1498Szrj if (!name)
125338fd1498Szrj return NULL;
125438fd1498Szrj
125538fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
125638fd1498Szrj {
125738fd1498Szrj phi = psi.phi ();
125838fd1498Szrj if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
125938fd1498Szrj break;
126038fd1498Szrj }
126138fd1498Szrj
126238fd1498Szrj if (gsi_end_p (psi))
126338fd1498Szrj return NULL;
126438fd1498Szrj
126538fd1498Szrj init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
126638fd1498Szrj if (TREE_CODE (init) != SSA_NAME)
126738fd1498Szrj return NULL;
126838fd1498Szrj init_stmt = SSA_NAME_DEF_STMT (init);
126938fd1498Szrj if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
127038fd1498Szrj return NULL;
127138fd1498Szrj gcc_assert (gimple_assign_lhs (init_stmt) == init);
127238fd1498Szrj
127338fd1498Szrj init_ref = gimple_assign_rhs1 (init_stmt);
127438fd1498Szrj if (!REFERENCE_CLASS_P (init_ref)
127538fd1498Szrj && !DECL_P (init_ref))
127638fd1498Szrj return NULL;
127738fd1498Szrj
127838fd1498Szrj /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
127938fd1498Szrj loop enclosing PHI). */
128038fd1498Szrj memset (&init_dr, 0, sizeof (struct data_reference));
128138fd1498Szrj DR_REF (&init_dr) = init_ref;
128238fd1498Szrj DR_STMT (&init_dr) = phi;
128338fd1498Szrj if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
128438fd1498Szrj return NULL;
128538fd1498Szrj
128638fd1498Szrj if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
128738fd1498Szrj return NULL;
128838fd1498Szrj
128938fd1498Szrj return phi;
129038fd1498Szrj }
129138fd1498Szrj
129238fd1498Szrj /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
129338fd1498Szrj
129438fd1498Szrj static void
insert_looparound_copy(chain_p chain,dref ref,gphi * phi)129538fd1498Szrj insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
129638fd1498Szrj {
129738fd1498Szrj dref nw = XCNEW (struct dref_d), aref;
129838fd1498Szrj unsigned i;
129938fd1498Szrj
130038fd1498Szrj nw->stmt = phi;
130138fd1498Szrj nw->distance = ref->distance + 1;
130238fd1498Szrj nw->always_accessed = 1;
130338fd1498Szrj
130438fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, aref)
130538fd1498Szrj if (aref->distance >= nw->distance)
130638fd1498Szrj break;
130738fd1498Szrj chain->refs.safe_insert (i, nw);
130838fd1498Szrj
130938fd1498Szrj if (nw->distance > chain->length)
131038fd1498Szrj {
131138fd1498Szrj chain->length = nw->distance;
131238fd1498Szrj chain->has_max_use_after = false;
131338fd1498Szrj }
131438fd1498Szrj }
131538fd1498Szrj
131638fd1498Szrj /* For references in CHAIN that are copied around the LOOP (created previously
131738fd1498Szrj by PRE, or by user), add the results of such copies to the chain. This
131838fd1498Szrj enables us to remove the copies by unrolling, and may need less registers
131938fd1498Szrj (also, it may allow us to combine chains together). */
132038fd1498Szrj
132138fd1498Szrj static void
add_looparound_copies(struct loop * loop,chain_p chain)132238fd1498Szrj add_looparound_copies (struct loop *loop, chain_p chain)
132338fd1498Szrj {
132438fd1498Szrj unsigned i;
132538fd1498Szrj dref ref, root = get_chain_root (chain);
132638fd1498Szrj gphi *phi;
132738fd1498Szrj
132838fd1498Szrj if (chain->type == CT_STORE_STORE)
132938fd1498Szrj return;
133038fd1498Szrj
133138fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, ref)
133238fd1498Szrj {
133338fd1498Szrj phi = find_looparound_phi (loop, ref, root);
133438fd1498Szrj if (!phi)
133538fd1498Szrj continue;
133638fd1498Szrj
133738fd1498Szrj bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
133838fd1498Szrj insert_looparound_copy (chain, ref, phi);
133938fd1498Szrj }
134038fd1498Szrj }
134138fd1498Szrj
134238fd1498Szrj /* Find roots of the values and determine distances in the component COMP.
134338fd1498Szrj The references are redistributed into CHAINS. LOOP is the current
134438fd1498Szrj loop. */
134538fd1498Szrj
134638fd1498Szrj static void
determine_roots_comp(struct loop * loop,struct component * comp,vec<chain_p> * chains)134738fd1498Szrj determine_roots_comp (struct loop *loop,
134838fd1498Szrj struct component *comp,
134938fd1498Szrj vec<chain_p> *chains)
135038fd1498Szrj {
135138fd1498Szrj unsigned i;
135238fd1498Szrj dref a;
135338fd1498Szrj chain_p chain = NULL;
135438fd1498Szrj widest_int last_ofs = 0;
135538fd1498Szrj enum chain_type type;
135638fd1498Szrj
135738fd1498Szrj /* Invariants are handled specially. */
135838fd1498Szrj if (comp->comp_step == RS_INVARIANT)
135938fd1498Szrj {
136038fd1498Szrj chain = make_invariant_chain (comp);
136138fd1498Szrj chains->safe_push (chain);
136238fd1498Szrj return;
136338fd1498Szrj }
136438fd1498Szrj
136538fd1498Szrj /* Trivial component. */
136638fd1498Szrj if (comp->refs.length () <= 1)
136738fd1498Szrj {
136838fd1498Szrj if (comp->refs.length () == 1)
136938fd1498Szrj {
137038fd1498Szrj free (comp->refs[0]);
137138fd1498Szrj comp->refs.truncate (0);
137238fd1498Szrj }
137338fd1498Szrj return;
137438fd1498Szrj }
137538fd1498Szrj
137638fd1498Szrj comp->refs.qsort (order_drefs);
137738fd1498Szrj
137838fd1498Szrj /* For Store-Store chain, we only support load if it is dominated by a
137938fd1498Szrj store statement in the same iteration of loop. */
138038fd1498Szrj if (comp->eliminate_store_p)
138138fd1498Szrj for (a = NULL, i = 0; i < comp->refs.length (); i++)
138238fd1498Szrj {
138338fd1498Szrj if (DR_IS_WRITE (comp->refs[i]->ref))
138438fd1498Szrj a = comp->refs[i];
138538fd1498Szrj else if (a == NULL || a->offset != comp->refs[i]->offset)
138638fd1498Szrj {
138738fd1498Szrj /* If there is load that is not dominated by a store in the
138838fd1498Szrj same iteration of loop, clear the flag so no Store-Store
138938fd1498Szrj chain is generated for this component. */
139038fd1498Szrj comp->eliminate_store_p = false;
139138fd1498Szrj break;
139238fd1498Szrj }
139338fd1498Szrj }
139438fd1498Szrj
139538fd1498Szrj /* Determine roots and create chains for components. */
139638fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a)
139738fd1498Szrj {
139838fd1498Szrj if (!chain
139938fd1498Szrj || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
140038fd1498Szrj || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
140138fd1498Szrj || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
140238fd1498Szrj {
140338fd1498Szrj if (nontrivial_chain_p (chain))
140438fd1498Szrj {
140538fd1498Szrj add_looparound_copies (loop, chain);
140638fd1498Szrj chains->safe_push (chain);
140738fd1498Szrj }
140838fd1498Szrj else
140938fd1498Szrj release_chain (chain);
141038fd1498Szrj
141138fd1498Szrj /* Determine type of the chain. If the root reference is a load,
141238fd1498Szrj this can only be a CT_LOAD chain; other chains are intialized
141338fd1498Szrj to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
141438fd1498Szrj new reference is added. */
141538fd1498Szrj type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
141638fd1498Szrj chain = make_rooted_chain (a, type);
141738fd1498Szrj last_ofs = a->offset;
141838fd1498Szrj continue;
141938fd1498Szrj }
142038fd1498Szrj
142138fd1498Szrj add_ref_to_chain (chain, a);
142238fd1498Szrj }
142338fd1498Szrj
142438fd1498Szrj if (nontrivial_chain_p (chain))
142538fd1498Szrj {
142638fd1498Szrj add_looparound_copies (loop, chain);
142738fd1498Szrj chains->safe_push (chain);
142838fd1498Szrj }
142938fd1498Szrj else
143038fd1498Szrj release_chain (chain);
143138fd1498Szrj }
143238fd1498Szrj
143338fd1498Szrj /* Find roots of the values and determine distances in components COMPS, and
143438fd1498Szrj separates the references to CHAINS. LOOP is the current loop. */
143538fd1498Szrj
143638fd1498Szrj static void
determine_roots(struct loop * loop,struct component * comps,vec<chain_p> * chains)143738fd1498Szrj determine_roots (struct loop *loop,
143838fd1498Szrj struct component *comps, vec<chain_p> *chains)
143938fd1498Szrj {
144038fd1498Szrj struct component *comp;
144138fd1498Szrj
144238fd1498Szrj for (comp = comps; comp; comp = comp->next)
144338fd1498Szrj determine_roots_comp (loop, comp, chains);
144438fd1498Szrj }
144538fd1498Szrj
144638fd1498Szrj /* Replace the reference in statement STMT with temporary variable
144738fd1498Szrj NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
144838fd1498Szrj the reference in the statement. IN_LHS is true if the reference
144938fd1498Szrj is in the lhs of STMT, false if it is in rhs. */
145038fd1498Szrj
145138fd1498Szrj static void
replace_ref_with(gimple * stmt,tree new_tree,bool set,bool in_lhs)145238fd1498Szrj replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
145338fd1498Szrj {
145438fd1498Szrj tree val;
145538fd1498Szrj gassign *new_stmt;
145638fd1498Szrj gimple_stmt_iterator bsi, psi;
145738fd1498Szrj
145838fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
145938fd1498Szrj {
146038fd1498Szrj gcc_assert (!in_lhs && !set);
146138fd1498Szrj
146238fd1498Szrj val = PHI_RESULT (stmt);
146338fd1498Szrj bsi = gsi_after_labels (gimple_bb (stmt));
146438fd1498Szrj psi = gsi_for_stmt (stmt);
146538fd1498Szrj remove_phi_node (&psi, false);
146638fd1498Szrj
146738fd1498Szrj /* Turn the phi node into GIMPLE_ASSIGN. */
146838fd1498Szrj new_stmt = gimple_build_assign (val, new_tree);
146938fd1498Szrj gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
147038fd1498Szrj return;
147138fd1498Szrj }
147238fd1498Szrj
147338fd1498Szrj /* Since the reference is of gimple_reg type, it should only
147438fd1498Szrj appear as lhs or rhs of modify statement. */
147538fd1498Szrj gcc_assert (is_gimple_assign (stmt));
147638fd1498Szrj
147738fd1498Szrj bsi = gsi_for_stmt (stmt);
147838fd1498Szrj
147938fd1498Szrj /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
148038fd1498Szrj if (!set)
148138fd1498Szrj {
148238fd1498Szrj gcc_assert (!in_lhs);
148338fd1498Szrj gimple_assign_set_rhs_from_tree (&bsi, new_tree);
148438fd1498Szrj stmt = gsi_stmt (bsi);
148538fd1498Szrj update_stmt (stmt);
148638fd1498Szrj return;
148738fd1498Szrj }
148838fd1498Szrj
148938fd1498Szrj if (in_lhs)
149038fd1498Szrj {
149138fd1498Szrj /* We have statement
149238fd1498Szrj
149338fd1498Szrj OLD = VAL
149438fd1498Szrj
149538fd1498Szrj If OLD is a memory reference, then VAL is gimple_val, and we transform
149638fd1498Szrj this to
149738fd1498Szrj
149838fd1498Szrj OLD = VAL
149938fd1498Szrj NEW = VAL
150038fd1498Szrj
150138fd1498Szrj Otherwise, we are replacing a combination chain,
150238fd1498Szrj VAL is the expression that performs the combination, and OLD is an
150338fd1498Szrj SSA name. In this case, we transform the assignment to
150438fd1498Szrj
150538fd1498Szrj OLD = VAL
150638fd1498Szrj NEW = OLD
150738fd1498Szrj
150838fd1498Szrj */
150938fd1498Szrj
151038fd1498Szrj val = gimple_assign_lhs (stmt);
151138fd1498Szrj if (TREE_CODE (val) != SSA_NAME)
151238fd1498Szrj {
151338fd1498Szrj val = gimple_assign_rhs1 (stmt);
151438fd1498Szrj gcc_assert (gimple_assign_single_p (stmt));
151538fd1498Szrj if (TREE_CLOBBER_P (val))
151638fd1498Szrj val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
151738fd1498Szrj else
151838fd1498Szrj gcc_assert (gimple_assign_copy_p (stmt));
151938fd1498Szrj }
152038fd1498Szrj }
152138fd1498Szrj else
152238fd1498Szrj {
152338fd1498Szrj /* VAL = OLD
152438fd1498Szrj
152538fd1498Szrj is transformed to
152638fd1498Szrj
152738fd1498Szrj VAL = OLD
152838fd1498Szrj NEW = VAL */
152938fd1498Szrj
153038fd1498Szrj val = gimple_assign_lhs (stmt);
153138fd1498Szrj }
153238fd1498Szrj
153338fd1498Szrj new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
153438fd1498Szrj gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
153538fd1498Szrj }
153638fd1498Szrj
153738fd1498Szrj /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
153838fd1498Szrj of the loop it was analyzed in. Append init stmts to STMTS. */
153938fd1498Szrj
154038fd1498Szrj static tree
154138fd1498Szrj ref_at_iteration (data_reference_p dr, int iter,
154238fd1498Szrj gimple_seq *stmts, tree niters = NULL_TREE)
154338fd1498Szrj {
154438fd1498Szrj tree off = DR_OFFSET (dr);
154538fd1498Szrj tree coff = DR_INIT (dr);
154638fd1498Szrj tree ref = DR_REF (dr);
154738fd1498Szrj enum tree_code ref_code = ERROR_MARK;
154838fd1498Szrj tree ref_type = NULL_TREE;
154938fd1498Szrj tree ref_op1 = NULL_TREE;
155038fd1498Szrj tree ref_op2 = NULL_TREE;
155138fd1498Szrj tree new_offset;
155238fd1498Szrj
155338fd1498Szrj if (iter != 0)
155438fd1498Szrj {
155538fd1498Szrj new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
155638fd1498Szrj if (TREE_CODE (new_offset) == INTEGER_CST)
155738fd1498Szrj coff = size_binop (PLUS_EXPR, coff, new_offset);
155838fd1498Szrj else
155938fd1498Szrj off = size_binop (PLUS_EXPR, off, new_offset);
156038fd1498Szrj }
156138fd1498Szrj
156238fd1498Szrj if (niters != NULL_TREE)
156338fd1498Szrj {
156438fd1498Szrj niters = fold_convert (ssizetype, niters);
156538fd1498Szrj new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
156638fd1498Szrj if (TREE_CODE (niters) == INTEGER_CST)
156738fd1498Szrj coff = size_binop (PLUS_EXPR, coff, new_offset);
156838fd1498Szrj else
156938fd1498Szrj off = size_binop (PLUS_EXPR, off, new_offset);
157038fd1498Szrj }
157138fd1498Szrj
157238fd1498Szrj /* While data-ref analysis punts on bit offsets it still handles
157338fd1498Szrj bitfield accesses at byte boundaries. Cope with that. Note that
157438fd1498Szrj if the bitfield object also starts at a byte-boundary we can simply
157538fd1498Szrj replicate the COMPONENT_REF, but we have to subtract the component's
157638fd1498Szrj byte-offset from the MEM_REF address first.
157738fd1498Szrj Otherwise we simply build a BIT_FIELD_REF knowing that the bits
157838fd1498Szrj start at offset zero. */
157938fd1498Szrj if (TREE_CODE (ref) == COMPONENT_REF
158038fd1498Szrj && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
158138fd1498Szrj {
158238fd1498Szrj unsigned HOST_WIDE_INT boff;
158338fd1498Szrj tree field = TREE_OPERAND (ref, 1);
158438fd1498Szrj tree offset = component_ref_field_offset (ref);
158538fd1498Szrj ref_type = TREE_TYPE (ref);
158638fd1498Szrj boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
158738fd1498Szrj /* This can occur in Ada. See the comment in get_bit_range. */
158838fd1498Szrj if (boff % BITS_PER_UNIT != 0
158938fd1498Szrj || !tree_fits_uhwi_p (offset))
159038fd1498Szrj {
159138fd1498Szrj ref_code = BIT_FIELD_REF;
159238fd1498Szrj ref_op1 = DECL_SIZE (field);
159338fd1498Szrj ref_op2 = bitsize_zero_node;
159438fd1498Szrj }
159538fd1498Szrj else
159638fd1498Szrj {
159738fd1498Szrj boff >>= LOG2_BITS_PER_UNIT;
159838fd1498Szrj boff += tree_to_uhwi (offset);
159938fd1498Szrj coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
160038fd1498Szrj ref_code = COMPONENT_REF;
160138fd1498Szrj ref_op1 = field;
160238fd1498Szrj ref_op2 = TREE_OPERAND (ref, 2);
160338fd1498Szrj ref = TREE_OPERAND (ref, 0);
160438fd1498Szrj }
160538fd1498Szrj }
160638fd1498Szrj tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
160738fd1498Szrj addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
160838fd1498Szrj is_gimple_mem_ref_addr, NULL_TREE);
160938fd1498Szrj tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
161038fd1498Szrj tree type = build_aligned_type (TREE_TYPE (ref),
161138fd1498Szrj get_object_alignment (ref));
161238fd1498Szrj ref = build2 (MEM_REF, type, addr, alias_ptr);
161338fd1498Szrj if (ref_type)
161438fd1498Szrj ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
161538fd1498Szrj return ref;
161638fd1498Szrj }
161738fd1498Szrj
161838fd1498Szrj /* Get the initialization expression for the INDEX-th temporary variable
161938fd1498Szrj of CHAIN. */
162038fd1498Szrj
162138fd1498Szrj static tree
get_init_expr(chain_p chain,unsigned index)162238fd1498Szrj get_init_expr (chain_p chain, unsigned index)
162338fd1498Szrj {
162438fd1498Szrj if (chain->type == CT_COMBINATION)
162538fd1498Szrj {
162638fd1498Szrj tree e1 = get_init_expr (chain->ch1, index);
162738fd1498Szrj tree e2 = get_init_expr (chain->ch2, index);
162838fd1498Szrj
162938fd1498Szrj return fold_build2 (chain->op, chain->rslt_type, e1, e2);
163038fd1498Szrj }
163138fd1498Szrj else
163238fd1498Szrj return chain->inits[index];
163338fd1498Szrj }
163438fd1498Szrj
163538fd1498Szrj /* Returns a new temporary variable used for the I-th variable carrying
163638fd1498Szrj value of REF. The variable's uid is marked in TMP_VARS. */
163738fd1498Szrj
163838fd1498Szrj static tree
predcom_tmp_var(tree ref,unsigned i,bitmap tmp_vars)163938fd1498Szrj predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
164038fd1498Szrj {
164138fd1498Szrj tree type = TREE_TYPE (ref);
164238fd1498Szrj /* We never access the components of the temporary variable in predictive
164338fd1498Szrj commoning. */
164438fd1498Szrj tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
164538fd1498Szrj bitmap_set_bit (tmp_vars, DECL_UID (var));
164638fd1498Szrj return var;
164738fd1498Szrj }
164838fd1498Szrj
164938fd1498Szrj /* Creates the variables for CHAIN, as well as phi nodes for them and
165038fd1498Szrj initialization on entry to LOOP. Uids of the newly created
165138fd1498Szrj temporary variables are marked in TMP_VARS. */
165238fd1498Szrj
165338fd1498Szrj static void
initialize_root_vars(struct loop * loop,chain_p chain,bitmap tmp_vars)165438fd1498Szrj initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
165538fd1498Szrj {
165638fd1498Szrj unsigned i;
165738fd1498Szrj unsigned n = chain->length;
165838fd1498Szrj dref root = get_chain_root (chain);
165938fd1498Szrj bool reuse_first = !chain->has_max_use_after;
166038fd1498Szrj tree ref, init, var, next;
166138fd1498Szrj gphi *phi;
166238fd1498Szrj gimple_seq stmts;
166338fd1498Szrj edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
166438fd1498Szrj
166538fd1498Szrj /* If N == 0, then all the references are within the single iteration. And
166638fd1498Szrj since this is an nonempty chain, reuse_first cannot be true. */
166738fd1498Szrj gcc_assert (n > 0 || !reuse_first);
166838fd1498Szrj
166938fd1498Szrj chain->vars.create (n + 1);
167038fd1498Szrj
167138fd1498Szrj if (chain->type == CT_COMBINATION)
167238fd1498Szrj ref = gimple_assign_lhs (root->stmt);
167338fd1498Szrj else
167438fd1498Szrj ref = DR_REF (root->ref);
167538fd1498Szrj
167638fd1498Szrj for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
167738fd1498Szrj {
167838fd1498Szrj var = predcom_tmp_var (ref, i, tmp_vars);
167938fd1498Szrj chain->vars.quick_push (var);
168038fd1498Szrj }
168138fd1498Szrj if (reuse_first)
168238fd1498Szrj chain->vars.quick_push (chain->vars[0]);
168338fd1498Szrj
168438fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var)
168538fd1498Szrj chain->vars[i] = make_ssa_name (var);
168638fd1498Szrj
168738fd1498Szrj for (i = 0; i < n; i++)
168838fd1498Szrj {
168938fd1498Szrj var = chain->vars[i];
169038fd1498Szrj next = chain->vars[i + 1];
169138fd1498Szrj init = get_init_expr (chain, i);
169238fd1498Szrj
169338fd1498Szrj init = force_gimple_operand (init, &stmts, true, NULL_TREE);
169438fd1498Szrj if (stmts)
169538fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts);
169638fd1498Szrj
169738fd1498Szrj phi = create_phi_node (var, loop->header);
169838fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
169938fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
170038fd1498Szrj }
170138fd1498Szrj }
170238fd1498Szrj
170338fd1498Szrj /* For inter-iteration store elimination CHAIN in LOOP, returns true if
170438fd1498Szrj all stores to be eliminated store loop invariant values into memory.
170538fd1498Szrj In this case, we can use these invariant values directly after LOOP. */
170638fd1498Szrj
170738fd1498Szrj static bool
is_inv_store_elimination_chain(struct loop * loop,chain_p chain)170838fd1498Szrj is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
170938fd1498Szrj {
171038fd1498Szrj if (chain->length == 0 || chain->type != CT_STORE_STORE)
171138fd1498Szrj return false;
171238fd1498Szrj
171338fd1498Szrj gcc_assert (!chain->has_max_use_after);
171438fd1498Szrj
171538fd1498Szrj /* If loop iterates for unknown times or fewer times than chain->lenght,
171638fd1498Szrj we still need to setup root variable and propagate it with PHI node. */
171738fd1498Szrj tree niters = number_of_latch_executions (loop);
171838fd1498Szrj if (TREE_CODE (niters) != INTEGER_CST
171938fd1498Szrj || wi::leu_p (wi::to_wide (niters), chain->length))
172038fd1498Szrj return false;
172138fd1498Szrj
172238fd1498Szrj /* Check stores in chain for elimination if they only store loop invariant
172338fd1498Szrj values. */
172438fd1498Szrj for (unsigned i = 0; i < chain->length; i++)
172538fd1498Szrj {
172638fd1498Szrj dref a = get_chain_last_write_at (chain, i);
172738fd1498Szrj if (a == NULL)
172838fd1498Szrj continue;
172938fd1498Szrj
173038fd1498Szrj gimple *def_stmt, *stmt = a->stmt;
173138fd1498Szrj if (!gimple_assign_single_p (stmt))
173238fd1498Szrj return false;
173338fd1498Szrj
173438fd1498Szrj tree val = gimple_assign_rhs1 (stmt);
173538fd1498Szrj if (TREE_CLOBBER_P (val))
173638fd1498Szrj return false;
173738fd1498Szrj
173838fd1498Szrj if (CONSTANT_CLASS_P (val))
173938fd1498Szrj continue;
174038fd1498Szrj
174138fd1498Szrj if (TREE_CODE (val) != SSA_NAME)
174238fd1498Szrj return false;
174338fd1498Szrj
174438fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (val);
174538fd1498Szrj if (gimple_nop_p (def_stmt))
174638fd1498Szrj continue;
174738fd1498Szrj
174838fd1498Szrj if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
174938fd1498Szrj return false;
175038fd1498Szrj }
175138fd1498Szrj return true;
175238fd1498Szrj }
175338fd1498Szrj
175438fd1498Szrj /* Creates root variables for store elimination CHAIN in which stores for
175538fd1498Szrj elimination only store loop invariant values. In this case, we neither
175638fd1498Szrj need to load root variables before loop nor propagate it with PHI nodes. */
175738fd1498Szrj
175838fd1498Szrj static void
initialize_root_vars_store_elim_1(chain_p chain)175938fd1498Szrj initialize_root_vars_store_elim_1 (chain_p chain)
176038fd1498Szrj {
176138fd1498Szrj tree var;
176238fd1498Szrj unsigned i, n = chain->length;
176338fd1498Szrj
176438fd1498Szrj chain->vars.create (n);
176538fd1498Szrj chain->vars.safe_grow_cleared (n);
176638fd1498Szrj
176738fd1498Szrj /* Initialize root value for eliminated stores at each distance. */
176838fd1498Szrj for (i = 0; i < n; i++)
176938fd1498Szrj {
177038fd1498Szrj dref a = get_chain_last_write_at (chain, i);
177138fd1498Szrj if (a == NULL)
177238fd1498Szrj continue;
177338fd1498Szrj
177438fd1498Szrj var = gimple_assign_rhs1 (a->stmt);
177538fd1498Szrj chain->vars[a->distance] = var;
177638fd1498Szrj }
177738fd1498Szrj
177838fd1498Szrj /* We don't propagate values with PHI nodes, so manually propagate value
177938fd1498Szrj to bubble positions. */
178038fd1498Szrj var = chain->vars[0];
178138fd1498Szrj for (i = 1; i < n; i++)
178238fd1498Szrj {
178338fd1498Szrj if (chain->vars[i] != NULL_TREE)
178438fd1498Szrj {
178538fd1498Szrj var = chain->vars[i];
178638fd1498Szrj continue;
178738fd1498Szrj }
178838fd1498Szrj chain->vars[i] = var;
178938fd1498Szrj }
179038fd1498Szrj
179138fd1498Szrj /* Revert the vector. */
179238fd1498Szrj for (i = 0; i < n / 2; i++)
179338fd1498Szrj std::swap (chain->vars[i], chain->vars[n - i - 1]);
179438fd1498Szrj }
179538fd1498Szrj
179638fd1498Szrj /* Creates root variables for store elimination CHAIN in which stores for
179738fd1498Szrj elimination store loop variant values. In this case, we may need to
179838fd1498Szrj load root variables before LOOP and propagate it with PHI nodes. Uids
179938fd1498Szrj of the newly created root variables are marked in TMP_VARS. */
180038fd1498Szrj
180138fd1498Szrj static void
initialize_root_vars_store_elim_2(struct loop * loop,chain_p chain,bitmap tmp_vars)180238fd1498Szrj initialize_root_vars_store_elim_2 (struct loop *loop,
180338fd1498Szrj chain_p chain, bitmap tmp_vars)
180438fd1498Szrj {
180538fd1498Szrj unsigned i, n = chain->length;
180638fd1498Szrj tree ref, init, var, next, val, phi_result;
180738fd1498Szrj gimple *stmt;
180838fd1498Szrj gimple_seq stmts;
180938fd1498Szrj
181038fd1498Szrj chain->vars.create (n);
181138fd1498Szrj
181238fd1498Szrj ref = DR_REF (get_chain_root (chain)->ref);
181338fd1498Szrj for (i = 0; i < n; i++)
181438fd1498Szrj {
181538fd1498Szrj var = predcom_tmp_var (ref, i, tmp_vars);
181638fd1498Szrj chain->vars.quick_push (var);
181738fd1498Szrj }
181838fd1498Szrj
181938fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var)
182038fd1498Szrj chain->vars[i] = make_ssa_name (var);
182138fd1498Szrj
182238fd1498Szrj /* Root values are either rhs operand of stores to be eliminated, or
182338fd1498Szrj loaded from memory before loop. */
182438fd1498Szrj auto_vec<tree> vtemps;
182538fd1498Szrj vtemps.safe_grow_cleared (n);
182638fd1498Szrj for (i = 0; i < n; i++)
182738fd1498Szrj {
182838fd1498Szrj init = get_init_expr (chain, i);
182938fd1498Szrj if (init == NULL_TREE)
183038fd1498Szrj {
183138fd1498Szrj /* Root value is rhs operand of the store to be eliminated if
183238fd1498Szrj it isn't loaded from memory before loop. */
183338fd1498Szrj dref a = get_chain_last_write_at (chain, i);
183438fd1498Szrj val = gimple_assign_rhs1 (a->stmt);
183538fd1498Szrj if (TREE_CLOBBER_P (val))
183638fd1498Szrj {
183738fd1498Szrj val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
183838fd1498Szrj gimple_assign_set_rhs1 (a->stmt, val);
183938fd1498Szrj }
184038fd1498Szrj
184138fd1498Szrj vtemps[n - i - 1] = val;
184238fd1498Szrj }
184338fd1498Szrj else
184438fd1498Szrj {
184538fd1498Szrj edge latch = loop_latch_edge (loop);
184638fd1498Szrj edge entry = loop_preheader_edge (loop);
184738fd1498Szrj
184838fd1498Szrj /* Root value is loaded from memory before loop, we also need
184938fd1498Szrj to add PHI nodes to propagate the value across iterations. */
185038fd1498Szrj init = force_gimple_operand (init, &stmts, true, NULL_TREE);
185138fd1498Szrj if (stmts)
185238fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts);
185338fd1498Szrj
185438fd1498Szrj next = chain->vars[n - i];
185538fd1498Szrj phi_result = copy_ssa_name (next);
185638fd1498Szrj gphi *phi = create_phi_node (phi_result, loop->header);
185738fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
185838fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
185938fd1498Szrj vtemps[n - i - 1] = phi_result;
186038fd1498Szrj }
186138fd1498Szrj }
186238fd1498Szrj
186338fd1498Szrj /* Find the insertion position. */
186438fd1498Szrj dref last = get_chain_root (chain);
186538fd1498Szrj for (i = 0; i < chain->refs.length (); i++)
186638fd1498Szrj {
186738fd1498Szrj if (chain->refs[i]->pos > last->pos)
186838fd1498Szrj last = chain->refs[i];
186938fd1498Szrj }
187038fd1498Szrj
187138fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
187238fd1498Szrj
187338fd1498Szrj /* Insert statements copying root value to root variable. */
187438fd1498Szrj for (i = 0; i < n; i++)
187538fd1498Szrj {
187638fd1498Szrj var = chain->vars[i];
187738fd1498Szrj val = vtemps[i];
187838fd1498Szrj stmt = gimple_build_assign (var, val);
187938fd1498Szrj gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
188038fd1498Szrj }
188138fd1498Szrj }
188238fd1498Szrj
188338fd1498Szrj /* Generates stores for CHAIN's eliminated stores in LOOP's last
188438fd1498Szrj (CHAIN->length - 1) iterations. */
188538fd1498Szrj
188638fd1498Szrj static void
finalize_eliminated_stores(struct loop * loop,chain_p chain)188738fd1498Szrj finalize_eliminated_stores (struct loop *loop, chain_p chain)
188838fd1498Szrj {
188938fd1498Szrj unsigned i, n = chain->length;
189038fd1498Szrj
189138fd1498Szrj for (i = 0; i < n; i++)
189238fd1498Szrj {
189338fd1498Szrj tree var = chain->vars[i];
189438fd1498Szrj tree fini = chain->finis[n - i - 1];
189538fd1498Szrj gimple *stmt = gimple_build_assign (fini, var);
189638fd1498Szrj
189738fd1498Szrj gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
189838fd1498Szrj }
189938fd1498Szrj
190038fd1498Szrj if (chain->fini_seq)
190138fd1498Szrj {
190238fd1498Szrj gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
190338fd1498Szrj chain->fini_seq = NULL;
190438fd1498Szrj }
190538fd1498Szrj }
190638fd1498Szrj
190738fd1498Szrj /* Initializes a variable for load motion for ROOT and prepares phi nodes and
190838fd1498Szrj initialization on entry to LOOP if necessary. The ssa name for the variable
190938fd1498Szrj is stored in VARS. If WRITTEN is true, also a phi node to copy its value
191038fd1498Szrj around the loop is created. Uid of the newly created temporary variable
191138fd1498Szrj is marked in TMP_VARS. INITS is the list containing the (single)
191238fd1498Szrj initializer. */
191338fd1498Szrj
191438fd1498Szrj static void
initialize_root_vars_lm(struct loop * loop,dref root,bool written,vec<tree> * vars,vec<tree> inits,bitmap tmp_vars)191538fd1498Szrj initialize_root_vars_lm (struct loop *loop, dref root, bool written,
191638fd1498Szrj vec<tree> *vars, vec<tree> inits,
191738fd1498Szrj bitmap tmp_vars)
191838fd1498Szrj {
191938fd1498Szrj unsigned i;
192038fd1498Szrj tree ref = DR_REF (root->ref), init, var, next;
192138fd1498Szrj gimple_seq stmts;
192238fd1498Szrj gphi *phi;
192338fd1498Szrj edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
192438fd1498Szrj
192538fd1498Szrj /* Find the initializer for the variable, and check that it cannot
192638fd1498Szrj trap. */
192738fd1498Szrj init = inits[0];
192838fd1498Szrj
192938fd1498Szrj vars->create (written ? 2 : 1);
193038fd1498Szrj var = predcom_tmp_var (ref, 0, tmp_vars);
193138fd1498Szrj vars->quick_push (var);
193238fd1498Szrj if (written)
193338fd1498Szrj vars->quick_push ((*vars)[0]);
193438fd1498Szrj
193538fd1498Szrj FOR_EACH_VEC_ELT (*vars, i, var)
193638fd1498Szrj (*vars)[i] = make_ssa_name (var);
193738fd1498Szrj
193838fd1498Szrj var = (*vars)[0];
193938fd1498Szrj
194038fd1498Szrj init = force_gimple_operand (init, &stmts, written, NULL_TREE);
194138fd1498Szrj if (stmts)
194238fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts);
194338fd1498Szrj
194438fd1498Szrj if (written)
194538fd1498Szrj {
194638fd1498Szrj next = (*vars)[1];
194738fd1498Szrj phi = create_phi_node (var, loop->header);
194838fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
194938fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
195038fd1498Szrj }
195138fd1498Szrj else
195238fd1498Szrj {
195338fd1498Szrj gassign *init_stmt = gimple_build_assign (var, init);
195438fd1498Szrj gsi_insert_on_edge_immediate (entry, init_stmt);
195538fd1498Szrj }
195638fd1498Szrj }
195738fd1498Szrj
195838fd1498Szrj
195938fd1498Szrj /* Execute load motion for references in chain CHAIN. Uids of the newly
196038fd1498Szrj created temporary variables are marked in TMP_VARS. */
196138fd1498Szrj
196238fd1498Szrj static void
execute_load_motion(struct loop * loop,chain_p chain,bitmap tmp_vars)196338fd1498Szrj execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
196438fd1498Szrj {
196538fd1498Szrj auto_vec<tree> vars;
196638fd1498Szrj dref a;
196738fd1498Szrj unsigned n_writes = 0, ridx, i;
196838fd1498Szrj tree var;
196938fd1498Szrj
197038fd1498Szrj gcc_assert (chain->type == CT_INVARIANT);
197138fd1498Szrj gcc_assert (!chain->combined);
197238fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a)
197338fd1498Szrj if (DR_IS_WRITE (a->ref))
197438fd1498Szrj n_writes++;
197538fd1498Szrj
197638fd1498Szrj /* If there are no reads in the loop, there is nothing to do. */
197738fd1498Szrj if (n_writes == chain->refs.length ())
197838fd1498Szrj return;
197938fd1498Szrj
198038fd1498Szrj initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
198138fd1498Szrj &vars, chain->inits, tmp_vars);
198238fd1498Szrj
198338fd1498Szrj ridx = 0;
198438fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a)
198538fd1498Szrj {
198638fd1498Szrj bool is_read = DR_IS_READ (a->ref);
198738fd1498Szrj
198838fd1498Szrj if (DR_IS_WRITE (a->ref))
198938fd1498Szrj {
199038fd1498Szrj n_writes--;
199138fd1498Szrj if (n_writes)
199238fd1498Szrj {
199338fd1498Szrj var = vars[0];
199438fd1498Szrj var = make_ssa_name (SSA_NAME_VAR (var));
199538fd1498Szrj vars[0] = var;
199638fd1498Szrj }
199738fd1498Szrj else
199838fd1498Szrj ridx = 1;
199938fd1498Szrj }
200038fd1498Szrj
200138fd1498Szrj replace_ref_with (a->stmt, vars[ridx],
200238fd1498Szrj !is_read, !is_read);
200338fd1498Szrj }
200438fd1498Szrj }
200538fd1498Szrj
200638fd1498Szrj /* Returns the single statement in that NAME is used, excepting
200738fd1498Szrj the looparound phi nodes contained in one of the chains. If there is no
200838fd1498Szrj such statement, or more statements, NULL is returned. */
200938fd1498Szrj
201038fd1498Szrj static gimple *
single_nonlooparound_use(tree name)201138fd1498Szrj single_nonlooparound_use (tree name)
201238fd1498Szrj {
201338fd1498Szrj use_operand_p use;
201438fd1498Szrj imm_use_iterator it;
201538fd1498Szrj gimple *stmt, *ret = NULL;
201638fd1498Szrj
201738fd1498Szrj FOR_EACH_IMM_USE_FAST (use, it, name)
201838fd1498Szrj {
201938fd1498Szrj stmt = USE_STMT (use);
202038fd1498Szrj
202138fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
202238fd1498Szrj {
202338fd1498Szrj /* Ignore uses in looparound phi nodes. Uses in other phi nodes
202438fd1498Szrj could not be processed anyway, so just fail for them. */
202538fd1498Szrj if (bitmap_bit_p (looparound_phis,
202638fd1498Szrj SSA_NAME_VERSION (PHI_RESULT (stmt))))
202738fd1498Szrj continue;
202838fd1498Szrj
202938fd1498Szrj return NULL;
203038fd1498Szrj }
203138fd1498Szrj else if (is_gimple_debug (stmt))
203238fd1498Szrj continue;
203338fd1498Szrj else if (ret != NULL)
203438fd1498Szrj return NULL;
203538fd1498Szrj else
203638fd1498Szrj ret = stmt;
203738fd1498Szrj }
203838fd1498Szrj
203938fd1498Szrj return ret;
204038fd1498Szrj }
204138fd1498Szrj
204238fd1498Szrj /* Remove statement STMT, as well as the chain of assignments in that it is
204338fd1498Szrj used. */
204438fd1498Szrj
204538fd1498Szrj static void
remove_stmt(gimple * stmt)204638fd1498Szrj remove_stmt (gimple *stmt)
204738fd1498Szrj {
204838fd1498Szrj tree name;
204938fd1498Szrj gimple *next;
205038fd1498Szrj gimple_stmt_iterator psi;
205138fd1498Szrj
205238fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
205338fd1498Szrj {
205438fd1498Szrj name = PHI_RESULT (stmt);
205538fd1498Szrj next = single_nonlooparound_use (name);
205638fd1498Szrj reset_debug_uses (stmt);
205738fd1498Szrj psi = gsi_for_stmt (stmt);
205838fd1498Szrj remove_phi_node (&psi, true);
205938fd1498Szrj
206038fd1498Szrj if (!next
206138fd1498Szrj || !gimple_assign_ssa_name_copy_p (next)
206238fd1498Szrj || gimple_assign_rhs1 (next) != name)
206338fd1498Szrj return;
206438fd1498Szrj
206538fd1498Szrj stmt = next;
206638fd1498Szrj }
206738fd1498Szrj
206838fd1498Szrj while (1)
206938fd1498Szrj {
207038fd1498Szrj gimple_stmt_iterator bsi;
207138fd1498Szrj
207238fd1498Szrj bsi = gsi_for_stmt (stmt);
207338fd1498Szrj
207438fd1498Szrj name = gimple_assign_lhs (stmt);
207538fd1498Szrj if (TREE_CODE (name) == SSA_NAME)
207638fd1498Szrj {
207738fd1498Szrj next = single_nonlooparound_use (name);
207838fd1498Szrj reset_debug_uses (stmt);
207938fd1498Szrj }
208038fd1498Szrj else
208138fd1498Szrj {
208238fd1498Szrj /* This is a store to be eliminated. */
208338fd1498Szrj gcc_assert (gimple_vdef (stmt) != NULL);
208438fd1498Szrj next = NULL;
208538fd1498Szrj }
208638fd1498Szrj
208738fd1498Szrj unlink_stmt_vdef (stmt);
208838fd1498Szrj gsi_remove (&bsi, true);
208938fd1498Szrj release_defs (stmt);
209038fd1498Szrj
209138fd1498Szrj if (!next
209238fd1498Szrj || !gimple_assign_ssa_name_copy_p (next)
209338fd1498Szrj || gimple_assign_rhs1 (next) != name)
209438fd1498Szrj return;
209538fd1498Szrj
209638fd1498Szrj stmt = next;
209738fd1498Szrj }
209838fd1498Szrj }
209938fd1498Szrj
210038fd1498Szrj /* Perform the predictive commoning optimization for a chain CHAIN.
210138fd1498Szrj Uids of the newly created temporary variables are marked in TMP_VARS.*/
210238fd1498Szrj
210338fd1498Szrj static void
execute_pred_commoning_chain(struct loop * loop,chain_p chain,bitmap tmp_vars)210438fd1498Szrj execute_pred_commoning_chain (struct loop *loop, chain_p chain,
210538fd1498Szrj bitmap tmp_vars)
210638fd1498Szrj {
210738fd1498Szrj unsigned i;
210838fd1498Szrj dref a;
210938fd1498Szrj tree var;
211038fd1498Szrj bool in_lhs;
211138fd1498Szrj
211238fd1498Szrj if (chain->combined)
211338fd1498Szrj {
211438fd1498Szrj /* For combined chains, just remove the statements that are used to
211538fd1498Szrj compute the values of the expression (except for the root one).
211638fd1498Szrj We delay this until after all chains are processed. */
211738fd1498Szrj }
211838fd1498Szrj else if (chain->type == CT_STORE_STORE)
211938fd1498Szrj {
212038fd1498Szrj if (chain->length > 0)
212138fd1498Szrj {
212238fd1498Szrj if (chain->inv_store_elimination)
212338fd1498Szrj {
212438fd1498Szrj /* If dead stores in this chain only store loop invariant
212538fd1498Szrj values, we can simply record the invariant value and use
212638fd1498Szrj it directly after loop. */
212738fd1498Szrj initialize_root_vars_store_elim_1 (chain);
212838fd1498Szrj }
212938fd1498Szrj else
213038fd1498Szrj {
213138fd1498Szrj /* If dead stores in this chain store loop variant values,
213238fd1498Szrj we need to set up the variables by loading from memory
213338fd1498Szrj before loop and propagating it with PHI nodes. */
213438fd1498Szrj initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
213538fd1498Szrj }
213638fd1498Szrj
213738fd1498Szrj /* For inter-iteration store elimination chain, stores at each
213838fd1498Szrj distance in loop's last (chain->length - 1) iterations can't
213938fd1498Szrj be eliminated, because there is no following killing store.
214038fd1498Szrj We need to generate these stores after loop. */
214138fd1498Szrj finalize_eliminated_stores (loop, chain);
214238fd1498Szrj }
214338fd1498Szrj
214438fd1498Szrj bool last_store_p = true;
214538fd1498Szrj for (i = chain->refs.length (); i > 0; i--)
214638fd1498Szrj {
214738fd1498Szrj a = chain->refs[i - 1];
214838fd1498Szrj /* Preserve the last store of the chain. Eliminate other stores
214938fd1498Szrj which are killed by the last one. */
215038fd1498Szrj if (DR_IS_WRITE (a->ref))
215138fd1498Szrj {
215238fd1498Szrj if (last_store_p)
215338fd1498Szrj last_store_p = false;
215438fd1498Szrj else
215538fd1498Szrj remove_stmt (a->stmt);
215638fd1498Szrj
215738fd1498Szrj continue;
215838fd1498Szrj }
215938fd1498Szrj
216038fd1498Szrj /* Any load in Store-Store chain must be dominated by a previous
216138fd1498Szrj store, we replace the load reference with rhs of the store. */
216238fd1498Szrj dref b = get_chain_last_write_before_load (chain, i - 1);
216338fd1498Szrj gcc_assert (b != NULL);
216438fd1498Szrj var = gimple_assign_rhs1 (b->stmt);
216538fd1498Szrj replace_ref_with (a->stmt, var, false, false);
216638fd1498Szrj }
216738fd1498Szrj }
216838fd1498Szrj else
216938fd1498Szrj {
217038fd1498Szrj /* For non-combined chains, set up the variables that hold its value. */
217138fd1498Szrj initialize_root_vars (loop, chain, tmp_vars);
217238fd1498Szrj a = get_chain_root (chain);
217338fd1498Szrj in_lhs = (chain->type == CT_STORE_LOAD
217438fd1498Szrj || chain->type == CT_COMBINATION);
217538fd1498Szrj replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
217638fd1498Szrj
217738fd1498Szrj /* Replace the uses of the original references by these variables. */
217838fd1498Szrj for (i = 1; chain->refs.iterate (i, &a); i++)
217938fd1498Szrj {
218038fd1498Szrj var = chain->vars[chain->length - a->distance];
218138fd1498Szrj replace_ref_with (a->stmt, var, false, false);
218238fd1498Szrj }
218338fd1498Szrj }
218438fd1498Szrj }
218538fd1498Szrj
218638fd1498Szrj /* Determines the unroll factor necessary to remove as many temporary variable
218738fd1498Szrj copies as possible. CHAINS is the list of chains that will be
218838fd1498Szrj optimized. */
218938fd1498Szrj
219038fd1498Szrj static unsigned
determine_unroll_factor(vec<chain_p> chains)219138fd1498Szrj determine_unroll_factor (vec<chain_p> chains)
219238fd1498Szrj {
219338fd1498Szrj chain_p chain;
219438fd1498Szrj unsigned factor = 1, af, nfactor, i;
219538fd1498Szrj unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
219638fd1498Szrj
219738fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
219838fd1498Szrj {
219938fd1498Szrj if (chain->type == CT_INVARIANT)
220038fd1498Szrj continue;
220138fd1498Szrj /* For now we can't handle unrolling when eliminating stores. */
220238fd1498Szrj else if (chain->type == CT_STORE_STORE)
220338fd1498Szrj return 1;
220438fd1498Szrj
220538fd1498Szrj if (chain->combined)
220638fd1498Szrj {
220738fd1498Szrj /* For combined chains, we can't handle unrolling if we replace
220838fd1498Szrj looparound PHIs. */
220938fd1498Szrj dref a;
221038fd1498Szrj unsigned j;
221138fd1498Szrj for (j = 1; chain->refs.iterate (j, &a); j++)
221238fd1498Szrj if (gimple_code (a->stmt) == GIMPLE_PHI)
221338fd1498Szrj return 1;
221438fd1498Szrj continue;
221538fd1498Szrj }
221638fd1498Szrj
221738fd1498Szrj /* The best unroll factor for this chain is equal to the number of
221838fd1498Szrj temporary variables that we create for it. */
221938fd1498Szrj af = chain->length;
222038fd1498Szrj if (chain->has_max_use_after)
222138fd1498Szrj af++;
222238fd1498Szrj
222338fd1498Szrj nfactor = factor * af / gcd (factor, af);
222438fd1498Szrj if (nfactor <= max)
222538fd1498Szrj factor = nfactor;
222638fd1498Szrj }
222738fd1498Szrj
222838fd1498Szrj return factor;
222938fd1498Szrj }
223038fd1498Szrj
223138fd1498Szrj /* Perform the predictive commoning optimization for CHAINS.
223238fd1498Szrj Uids of the newly created temporary variables are marked in TMP_VARS. */
223338fd1498Szrj
223438fd1498Szrj static void
execute_pred_commoning(struct loop * loop,vec<chain_p> chains,bitmap tmp_vars)223538fd1498Szrj execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
223638fd1498Szrj bitmap tmp_vars)
223738fd1498Szrj {
223838fd1498Szrj chain_p chain;
223938fd1498Szrj unsigned i;
224038fd1498Szrj
224138fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
224238fd1498Szrj {
224338fd1498Szrj if (chain->type == CT_INVARIANT)
224438fd1498Szrj execute_load_motion (loop, chain, tmp_vars);
224538fd1498Szrj else
224638fd1498Szrj execute_pred_commoning_chain (loop, chain, tmp_vars);
224738fd1498Szrj }
224838fd1498Szrj
224938fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
225038fd1498Szrj {
225138fd1498Szrj if (chain->type == CT_INVARIANT)
225238fd1498Szrj ;
225338fd1498Szrj else if (chain->combined)
225438fd1498Szrj {
225538fd1498Szrj /* For combined chains, just remove the statements that are used to
225638fd1498Szrj compute the values of the expression (except for the root one). */
225738fd1498Szrj dref a;
225838fd1498Szrj unsigned j;
225938fd1498Szrj for (j = 1; chain->refs.iterate (j, &a); j++)
226038fd1498Szrj remove_stmt (a->stmt);
226138fd1498Szrj }
226238fd1498Szrj }
226338fd1498Szrj
226438fd1498Szrj update_ssa (TODO_update_ssa_only_virtuals);
226538fd1498Szrj }
226638fd1498Szrj
226738fd1498Szrj /* For each reference in CHAINS, if its defining statement is
226838fd1498Szrj phi node, record the ssa name that is defined by it. */
226938fd1498Szrj
227038fd1498Szrj static void
replace_phis_by_defined_names(vec<chain_p> chains)227138fd1498Szrj replace_phis_by_defined_names (vec<chain_p> chains)
227238fd1498Szrj {
227338fd1498Szrj chain_p chain;
227438fd1498Szrj dref a;
227538fd1498Szrj unsigned i, j;
227638fd1498Szrj
227738fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
227838fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, j, a)
227938fd1498Szrj {
228038fd1498Szrj if (gimple_code (a->stmt) == GIMPLE_PHI)
228138fd1498Szrj {
228238fd1498Szrj a->name_defined_by_phi = PHI_RESULT (a->stmt);
228338fd1498Szrj a->stmt = NULL;
228438fd1498Szrj }
228538fd1498Szrj }
228638fd1498Szrj }
228738fd1498Szrj
228838fd1498Szrj /* For each reference in CHAINS, if name_defined_by_phi is not
228938fd1498Szrj NULL, use it to set the stmt field. */
229038fd1498Szrj
229138fd1498Szrj static void
replace_names_by_phis(vec<chain_p> chains)229238fd1498Szrj replace_names_by_phis (vec<chain_p> chains)
229338fd1498Szrj {
229438fd1498Szrj chain_p chain;
229538fd1498Szrj dref a;
229638fd1498Szrj unsigned i, j;
229738fd1498Szrj
229838fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain)
229938fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, j, a)
230038fd1498Szrj if (a->stmt == NULL)
230138fd1498Szrj {
230238fd1498Szrj a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
230338fd1498Szrj gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
230438fd1498Szrj a->name_defined_by_phi = NULL_TREE;
230538fd1498Szrj }
230638fd1498Szrj }
230738fd1498Szrj
230838fd1498Szrj /* Wrapper over execute_pred_commoning, to pass it as a callback
230938fd1498Szrj to tree_transform_and_unroll_loop. */
231038fd1498Szrj
231138fd1498Szrj struct epcc_data
231238fd1498Szrj {
231338fd1498Szrj vec<chain_p> chains;
231438fd1498Szrj bitmap tmp_vars;
231538fd1498Szrj };
231638fd1498Szrj
231738fd1498Szrj static void
execute_pred_commoning_cbck(struct loop * loop,void * data)231838fd1498Szrj execute_pred_commoning_cbck (struct loop *loop, void *data)
231938fd1498Szrj {
232038fd1498Szrj struct epcc_data *const dta = (struct epcc_data *) data;
232138fd1498Szrj
232238fd1498Szrj /* Restore phi nodes that were replaced by ssa names before
232338fd1498Szrj tree_transform_and_unroll_loop (see detailed description in
232438fd1498Szrj tree_predictive_commoning_loop). */
232538fd1498Szrj replace_names_by_phis (dta->chains);
232638fd1498Szrj execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
232738fd1498Szrj }
232838fd1498Szrj
232938fd1498Szrj /* Base NAME and all the names in the chain of phi nodes that use it
233038fd1498Szrj on variable VAR. The phi nodes are recognized by being in the copies of
233138fd1498Szrj the header of the LOOP. */
233238fd1498Szrj
233338fd1498Szrj static void
base_names_in_chain_on(struct loop * loop,tree name,tree var)233438fd1498Szrj base_names_in_chain_on (struct loop *loop, tree name, tree var)
233538fd1498Szrj {
233638fd1498Szrj gimple *stmt, *phi;
233738fd1498Szrj imm_use_iterator iter;
233838fd1498Szrj
233938fd1498Szrj replace_ssa_name_symbol (name, var);
234038fd1498Szrj
234138fd1498Szrj while (1)
234238fd1498Szrj {
234338fd1498Szrj phi = NULL;
234438fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, iter, name)
234538fd1498Szrj {
234638fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI
234738fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
234838fd1498Szrj {
234938fd1498Szrj phi = stmt;
235038fd1498Szrj BREAK_FROM_IMM_USE_STMT (iter);
235138fd1498Szrj }
235238fd1498Szrj }
235338fd1498Szrj if (!phi)
235438fd1498Szrj return;
235538fd1498Szrj
235638fd1498Szrj name = PHI_RESULT (phi);
235738fd1498Szrj replace_ssa_name_symbol (name, var);
235838fd1498Szrj }
235938fd1498Szrj }
236038fd1498Szrj
236138fd1498Szrj /* Given an unrolled LOOP after predictive commoning, remove the
236238fd1498Szrj register copies arising from phi nodes by changing the base
236338fd1498Szrj variables of SSA names. TMP_VARS is the set of the temporary variables
236438fd1498Szrj for those we want to perform this. */
236538fd1498Szrj
236638fd1498Szrj static void
eliminate_temp_copies(struct loop * loop,bitmap tmp_vars)236738fd1498Szrj eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
236838fd1498Szrj {
236938fd1498Szrj edge e;
237038fd1498Szrj gphi *phi;
237138fd1498Szrj gimple *stmt;
237238fd1498Szrj tree name, use, var;
237338fd1498Szrj gphi_iterator psi;
237438fd1498Szrj
237538fd1498Szrj e = loop_latch_edge (loop);
237638fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
237738fd1498Szrj {
237838fd1498Szrj phi = psi.phi ();
237938fd1498Szrj name = PHI_RESULT (phi);
238038fd1498Szrj var = SSA_NAME_VAR (name);
238138fd1498Szrj if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
238238fd1498Szrj continue;
238338fd1498Szrj use = PHI_ARG_DEF_FROM_EDGE (phi, e);
238438fd1498Szrj gcc_assert (TREE_CODE (use) == SSA_NAME);
238538fd1498Szrj
238638fd1498Szrj /* Base all the ssa names in the ud and du chain of NAME on VAR. */
238738fd1498Szrj stmt = SSA_NAME_DEF_STMT (use);
238838fd1498Szrj while (gimple_code (stmt) == GIMPLE_PHI
238938fd1498Szrj /* In case we could not unroll the loop enough to eliminate
239038fd1498Szrj all copies, we may reach the loop header before the defining
239138fd1498Szrj statement (in that case, some register copies will be present
239238fd1498Szrj in loop latch in the final code, corresponding to the newly
239338fd1498Szrj created looparound phi nodes). */
239438fd1498Szrj && gimple_bb (stmt) != loop->header)
239538fd1498Szrj {
239638fd1498Szrj gcc_assert (single_pred_p (gimple_bb (stmt)));
239738fd1498Szrj use = PHI_ARG_DEF (stmt, 0);
239838fd1498Szrj stmt = SSA_NAME_DEF_STMT (use);
239938fd1498Szrj }
240038fd1498Szrj
240138fd1498Szrj base_names_in_chain_on (loop, use, var);
240238fd1498Szrj }
240338fd1498Szrj }
240438fd1498Szrj
240538fd1498Szrj /* Returns true if CHAIN is suitable to be combined. */
240638fd1498Szrj
240738fd1498Szrj static bool
chain_can_be_combined_p(chain_p chain)240838fd1498Szrj chain_can_be_combined_p (chain_p chain)
240938fd1498Szrj {
241038fd1498Szrj return (!chain->combined
241138fd1498Szrj && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
241238fd1498Szrj }
241338fd1498Szrj
241438fd1498Szrj /* Returns the modify statement that uses NAME. Skips over assignment
241538fd1498Szrj statements, NAME is replaced with the actual name used in the returned
241638fd1498Szrj statement. */
241738fd1498Szrj
241838fd1498Szrj static gimple *
find_use_stmt(tree * name)241938fd1498Szrj find_use_stmt (tree *name)
242038fd1498Szrj {
242138fd1498Szrj gimple *stmt;
242238fd1498Szrj tree rhs, lhs;
242338fd1498Szrj
242438fd1498Szrj /* Skip over assignments. */
242538fd1498Szrj while (1)
242638fd1498Szrj {
242738fd1498Szrj stmt = single_nonlooparound_use (*name);
242838fd1498Szrj if (!stmt)
242938fd1498Szrj return NULL;
243038fd1498Szrj
243138fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN)
243238fd1498Szrj return NULL;
243338fd1498Szrj
243438fd1498Szrj lhs = gimple_assign_lhs (stmt);
243538fd1498Szrj if (TREE_CODE (lhs) != SSA_NAME)
243638fd1498Szrj return NULL;
243738fd1498Szrj
243838fd1498Szrj if (gimple_assign_copy_p (stmt))
243938fd1498Szrj {
244038fd1498Szrj rhs = gimple_assign_rhs1 (stmt);
244138fd1498Szrj if (rhs != *name)
244238fd1498Szrj return NULL;
244338fd1498Szrj
244438fd1498Szrj *name = lhs;
244538fd1498Szrj }
244638fd1498Szrj else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
244738fd1498Szrj == GIMPLE_BINARY_RHS)
244838fd1498Szrj return stmt;
244938fd1498Szrj else
245038fd1498Szrj return NULL;
245138fd1498Szrj }
245238fd1498Szrj }
245338fd1498Szrj
245438fd1498Szrj /* Returns true if we may perform reassociation for operation CODE in TYPE. */
245538fd1498Szrj
245638fd1498Szrj static bool
may_reassociate_p(tree type,enum tree_code code)245738fd1498Szrj may_reassociate_p (tree type, enum tree_code code)
245838fd1498Szrj {
245938fd1498Szrj if (FLOAT_TYPE_P (type)
246038fd1498Szrj && !flag_unsafe_math_optimizations)
246138fd1498Szrj return false;
246238fd1498Szrj
246338fd1498Szrj return (commutative_tree_code (code)
246438fd1498Szrj && associative_tree_code (code));
246538fd1498Szrj }
246638fd1498Szrj
246738fd1498Szrj /* If the operation used in STMT is associative and commutative, go through the
246838fd1498Szrj tree of the same operations and returns its root. Distance to the root
246938fd1498Szrj is stored in DISTANCE. */
247038fd1498Szrj
247138fd1498Szrj static gimple *
find_associative_operation_root(gimple * stmt,unsigned * distance)247238fd1498Szrj find_associative_operation_root (gimple *stmt, unsigned *distance)
247338fd1498Szrj {
247438fd1498Szrj tree lhs;
247538fd1498Szrj gimple *next;
247638fd1498Szrj enum tree_code code = gimple_assign_rhs_code (stmt);
247738fd1498Szrj tree type = TREE_TYPE (gimple_assign_lhs (stmt));
247838fd1498Szrj unsigned dist = 0;
247938fd1498Szrj
248038fd1498Szrj if (!may_reassociate_p (type, code))
248138fd1498Szrj return NULL;
248238fd1498Szrj
248338fd1498Szrj while (1)
248438fd1498Szrj {
248538fd1498Szrj lhs = gimple_assign_lhs (stmt);
248638fd1498Szrj gcc_assert (TREE_CODE (lhs) == SSA_NAME);
248738fd1498Szrj
248838fd1498Szrj next = find_use_stmt (&lhs);
248938fd1498Szrj if (!next
249038fd1498Szrj || gimple_assign_rhs_code (next) != code)
249138fd1498Szrj break;
249238fd1498Szrj
249338fd1498Szrj stmt = next;
249438fd1498Szrj dist++;
249538fd1498Szrj }
249638fd1498Szrj
249738fd1498Szrj if (distance)
249838fd1498Szrj *distance = dist;
249938fd1498Szrj return stmt;
250038fd1498Szrj }
250138fd1498Szrj
250238fd1498Szrj /* Returns the common statement in that NAME1 and NAME2 have a use. If there
250338fd1498Szrj is no such statement, returns NULL_TREE. In case the operation used on
250438fd1498Szrj NAME1 and NAME2 is associative and commutative, returns the root of the
250538fd1498Szrj tree formed by this operation instead of the statement that uses NAME1 or
250638fd1498Szrj NAME2. */
250738fd1498Szrj
250838fd1498Szrj static gimple *
find_common_use_stmt(tree * name1,tree * name2)250938fd1498Szrj find_common_use_stmt (tree *name1, tree *name2)
251038fd1498Szrj {
251138fd1498Szrj gimple *stmt1, *stmt2;
251238fd1498Szrj
251338fd1498Szrj stmt1 = find_use_stmt (name1);
251438fd1498Szrj if (!stmt1)
251538fd1498Szrj return NULL;
251638fd1498Szrj
251738fd1498Szrj stmt2 = find_use_stmt (name2);
251838fd1498Szrj if (!stmt2)
251938fd1498Szrj return NULL;
252038fd1498Szrj
252138fd1498Szrj if (stmt1 == stmt2)
252238fd1498Szrj return stmt1;
252338fd1498Szrj
252438fd1498Szrj stmt1 = find_associative_operation_root (stmt1, NULL);
252538fd1498Szrj if (!stmt1)
252638fd1498Szrj return NULL;
252738fd1498Szrj stmt2 = find_associative_operation_root (stmt2, NULL);
252838fd1498Szrj if (!stmt2)
252938fd1498Szrj return NULL;
253038fd1498Szrj
253138fd1498Szrj return (stmt1 == stmt2 ? stmt1 : NULL);
253238fd1498Szrj }
253338fd1498Szrj
253438fd1498Szrj /* Checks whether R1 and R2 are combined together using CODE, with the result
253538fd1498Szrj in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
253638fd1498Szrj if it is true. If CODE is ERROR_MARK, set these values instead. */
253738fd1498Szrj
253838fd1498Szrj static bool
combinable_refs_p(dref r1,dref r2,enum tree_code * code,bool * swap,tree * rslt_type)253938fd1498Szrj combinable_refs_p (dref r1, dref r2,
254038fd1498Szrj enum tree_code *code, bool *swap, tree *rslt_type)
254138fd1498Szrj {
254238fd1498Szrj enum tree_code acode;
254338fd1498Szrj bool aswap;
254438fd1498Szrj tree atype;
254538fd1498Szrj tree name1, name2;
254638fd1498Szrj gimple *stmt;
254738fd1498Szrj
254838fd1498Szrj name1 = name_for_ref (r1);
254938fd1498Szrj name2 = name_for_ref (r2);
255038fd1498Szrj gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
255138fd1498Szrj
255238fd1498Szrj stmt = find_common_use_stmt (&name1, &name2);
255338fd1498Szrj
255438fd1498Szrj if (!stmt
255538fd1498Szrj /* A simple post-dominance check - make sure the combination
255638fd1498Szrj is executed under the same condition as the references. */
255738fd1498Szrj || (gimple_bb (stmt) != gimple_bb (r1->stmt)
255838fd1498Szrj && gimple_bb (stmt) != gimple_bb (r2->stmt)))
255938fd1498Szrj return false;
256038fd1498Szrj
256138fd1498Szrj acode = gimple_assign_rhs_code (stmt);
256238fd1498Szrj aswap = (!commutative_tree_code (acode)
256338fd1498Szrj && gimple_assign_rhs1 (stmt) != name1);
256438fd1498Szrj atype = TREE_TYPE (gimple_assign_lhs (stmt));
256538fd1498Szrj
256638fd1498Szrj if (*code == ERROR_MARK)
256738fd1498Szrj {
256838fd1498Szrj *code = acode;
256938fd1498Szrj *swap = aswap;
257038fd1498Szrj *rslt_type = atype;
257138fd1498Szrj return true;
257238fd1498Szrj }
257338fd1498Szrj
257438fd1498Szrj return (*code == acode
257538fd1498Szrj && *swap == aswap
257638fd1498Szrj && *rslt_type == atype);
257738fd1498Szrj }
257838fd1498Szrj
257938fd1498Szrj /* Remove OP from the operation on rhs of STMT, and replace STMT with
258038fd1498Szrj an assignment of the remaining operand. */
258138fd1498Szrj
258238fd1498Szrj static void
remove_name_from_operation(gimple * stmt,tree op)258338fd1498Szrj remove_name_from_operation (gimple *stmt, tree op)
258438fd1498Szrj {
258538fd1498Szrj tree other_op;
258638fd1498Szrj gimple_stmt_iterator si;
258738fd1498Szrj
258838fd1498Szrj gcc_assert (is_gimple_assign (stmt));
258938fd1498Szrj
259038fd1498Szrj if (gimple_assign_rhs1 (stmt) == op)
259138fd1498Szrj other_op = gimple_assign_rhs2 (stmt);
259238fd1498Szrj else
259338fd1498Szrj other_op = gimple_assign_rhs1 (stmt);
259438fd1498Szrj
259538fd1498Szrj si = gsi_for_stmt (stmt);
259638fd1498Szrj gimple_assign_set_rhs_from_tree (&si, other_op);
259738fd1498Szrj
259838fd1498Szrj /* We should not have reallocated STMT. */
259938fd1498Szrj gcc_assert (gsi_stmt (si) == stmt);
260038fd1498Szrj
260138fd1498Szrj update_stmt (stmt);
260238fd1498Szrj }
260338fd1498Szrj
260438fd1498Szrj /* Reassociates the expression in that NAME1 and NAME2 are used so that they
260538fd1498Szrj are combined in a single statement, and returns this statement. */
260638fd1498Szrj
260738fd1498Szrj static gimple *
reassociate_to_the_same_stmt(tree name1,tree name2)260838fd1498Szrj reassociate_to_the_same_stmt (tree name1, tree name2)
260938fd1498Szrj {
261038fd1498Szrj gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
261138fd1498Szrj gassign *new_stmt, *tmp_stmt;
261238fd1498Szrj tree new_name, tmp_name, var, r1, r2;
261338fd1498Szrj unsigned dist1, dist2;
261438fd1498Szrj enum tree_code code;
261538fd1498Szrj tree type = TREE_TYPE (name1);
261638fd1498Szrj gimple_stmt_iterator bsi;
261738fd1498Szrj
261838fd1498Szrj stmt1 = find_use_stmt (&name1);
261938fd1498Szrj stmt2 = find_use_stmt (&name2);
262038fd1498Szrj root1 = find_associative_operation_root (stmt1, &dist1);
262138fd1498Szrj root2 = find_associative_operation_root (stmt2, &dist2);
262238fd1498Szrj code = gimple_assign_rhs_code (stmt1);
262338fd1498Szrj
262438fd1498Szrj gcc_assert (root1 && root2 && root1 == root2
262538fd1498Szrj && code == gimple_assign_rhs_code (stmt2));
262638fd1498Szrj
262738fd1498Szrj /* Find the root of the nearest expression in that both NAME1 and NAME2
262838fd1498Szrj are used. */
262938fd1498Szrj r1 = name1;
263038fd1498Szrj s1 = stmt1;
263138fd1498Szrj r2 = name2;
263238fd1498Szrj s2 = stmt2;
263338fd1498Szrj
263438fd1498Szrj while (dist1 > dist2)
263538fd1498Szrj {
263638fd1498Szrj s1 = find_use_stmt (&r1);
263738fd1498Szrj r1 = gimple_assign_lhs (s1);
263838fd1498Szrj dist1--;
263938fd1498Szrj }
264038fd1498Szrj while (dist2 > dist1)
264138fd1498Szrj {
264238fd1498Szrj s2 = find_use_stmt (&r2);
264338fd1498Szrj r2 = gimple_assign_lhs (s2);
264438fd1498Szrj dist2--;
264538fd1498Szrj }
264638fd1498Szrj
264738fd1498Szrj while (s1 != s2)
264838fd1498Szrj {
264938fd1498Szrj s1 = find_use_stmt (&r1);
265038fd1498Szrj r1 = gimple_assign_lhs (s1);
265138fd1498Szrj s2 = find_use_stmt (&r2);
265238fd1498Szrj r2 = gimple_assign_lhs (s2);
265338fd1498Szrj }
265438fd1498Szrj
265538fd1498Szrj /* Remove NAME1 and NAME2 from the statements in that they are used
265638fd1498Szrj currently. */
265738fd1498Szrj remove_name_from_operation (stmt1, name1);
265838fd1498Szrj remove_name_from_operation (stmt2, name2);
265938fd1498Szrj
266038fd1498Szrj /* Insert the new statement combining NAME1 and NAME2 before S1, and
266138fd1498Szrj combine it with the rhs of S1. */
266238fd1498Szrj var = create_tmp_reg (type, "predreastmp");
266338fd1498Szrj new_name = make_ssa_name (var);
266438fd1498Szrj new_stmt = gimple_build_assign (new_name, code, name1, name2);
266538fd1498Szrj
266638fd1498Szrj var = create_tmp_reg (type, "predreastmp");
266738fd1498Szrj tmp_name = make_ssa_name (var);
266838fd1498Szrj
266938fd1498Szrj /* Rhs of S1 may now be either a binary expression with operation
267038fd1498Szrj CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
267138fd1498Szrj so that name1 or name2 was removed from it). */
267238fd1498Szrj tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
267338fd1498Szrj gimple_assign_rhs1 (s1),
267438fd1498Szrj gimple_assign_rhs2 (s1));
267538fd1498Szrj
267638fd1498Szrj bsi = gsi_for_stmt (s1);
267738fd1498Szrj gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
267838fd1498Szrj s1 = gsi_stmt (bsi);
267938fd1498Szrj update_stmt (s1);
268038fd1498Szrj
268138fd1498Szrj gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
268238fd1498Szrj gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
268338fd1498Szrj
268438fd1498Szrj return new_stmt;
268538fd1498Szrj }
268638fd1498Szrj
268738fd1498Szrj /* Returns the statement that combines references R1 and R2. In case R1
268838fd1498Szrj and R2 are not used in the same statement, but they are used with an
268938fd1498Szrj associative and commutative operation in the same expression, reassociate
269038fd1498Szrj the expression so that they are used in the same statement. */
269138fd1498Szrj
269238fd1498Szrj static gimple *
stmt_combining_refs(dref r1,dref r2)269338fd1498Szrj stmt_combining_refs (dref r1, dref r2)
269438fd1498Szrj {
269538fd1498Szrj gimple *stmt1, *stmt2;
269638fd1498Szrj tree name1 = name_for_ref (r1);
269738fd1498Szrj tree name2 = name_for_ref (r2);
269838fd1498Szrj
269938fd1498Szrj stmt1 = find_use_stmt (&name1);
270038fd1498Szrj stmt2 = find_use_stmt (&name2);
270138fd1498Szrj if (stmt1 == stmt2)
270238fd1498Szrj return stmt1;
270338fd1498Szrj
270438fd1498Szrj return reassociate_to_the_same_stmt (name1, name2);
270538fd1498Szrj }
270638fd1498Szrj
270738fd1498Szrj /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
270838fd1498Szrj description of the new chain is returned, otherwise we return NULL. */
270938fd1498Szrj
271038fd1498Szrj static chain_p
combine_chains(chain_p ch1,chain_p ch2)271138fd1498Szrj combine_chains (chain_p ch1, chain_p ch2)
271238fd1498Szrj {
271338fd1498Szrj dref r1, r2, nw;
271438fd1498Szrj enum tree_code op = ERROR_MARK;
271538fd1498Szrj bool swap = false;
271638fd1498Szrj chain_p new_chain;
271738fd1498Szrj unsigned i;
271838fd1498Szrj tree rslt_type = NULL_TREE;
271938fd1498Szrj
272038fd1498Szrj if (ch1 == ch2)
272138fd1498Szrj return NULL;
272238fd1498Szrj if (ch1->length != ch2->length)
272338fd1498Szrj return NULL;
272438fd1498Szrj
272538fd1498Szrj if (ch1->refs.length () != ch2->refs.length ())
272638fd1498Szrj return NULL;
272738fd1498Szrj
272838fd1498Szrj for (i = 0; (ch1->refs.iterate (i, &r1)
272938fd1498Szrj && ch2->refs.iterate (i, &r2)); i++)
273038fd1498Szrj {
273138fd1498Szrj if (r1->distance != r2->distance)
273238fd1498Szrj return NULL;
273338fd1498Szrj
273438fd1498Szrj if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
273538fd1498Szrj return NULL;
273638fd1498Szrj }
273738fd1498Szrj
273838fd1498Szrj if (swap)
273938fd1498Szrj std::swap (ch1, ch2);
274038fd1498Szrj
274138fd1498Szrj new_chain = XCNEW (struct chain);
274238fd1498Szrj new_chain->type = CT_COMBINATION;
274338fd1498Szrj new_chain->op = op;
274438fd1498Szrj new_chain->ch1 = ch1;
274538fd1498Szrj new_chain->ch2 = ch2;
274638fd1498Szrj new_chain->rslt_type = rslt_type;
274738fd1498Szrj new_chain->length = ch1->length;
274838fd1498Szrj
274938fd1498Szrj for (i = 0; (ch1->refs.iterate (i, &r1)
275038fd1498Szrj && ch2->refs.iterate (i, &r2)); i++)
275138fd1498Szrj {
275238fd1498Szrj nw = XCNEW (struct dref_d);
275338fd1498Szrj nw->stmt = stmt_combining_refs (r1, r2);
275438fd1498Szrj nw->distance = r1->distance;
275538fd1498Szrj
275638fd1498Szrj new_chain->refs.safe_push (nw);
275738fd1498Szrj }
275838fd1498Szrj
275938fd1498Szrj ch1->combined = true;
276038fd1498Szrj ch2->combined = true;
276138fd1498Szrj return new_chain;
276238fd1498Szrj }
276338fd1498Szrj
276438fd1498Szrj /* Recursively update position information of all offspring chains to ROOT
276538fd1498Szrj chain's position information. */
276638fd1498Szrj
276738fd1498Szrj static void
update_pos_for_combined_chains(chain_p root)276838fd1498Szrj update_pos_for_combined_chains (chain_p root)
276938fd1498Szrj {
277038fd1498Szrj chain_p ch1 = root->ch1, ch2 = root->ch2;
277138fd1498Szrj dref ref, ref1, ref2;
277238fd1498Szrj for (unsigned j = 0; (root->refs.iterate (j, &ref)
277338fd1498Szrj && ch1->refs.iterate (j, &ref1)
277438fd1498Szrj && ch2->refs.iterate (j, &ref2)); ++j)
277538fd1498Szrj ref1->pos = ref2->pos = ref->pos;
277638fd1498Szrj
277738fd1498Szrj if (ch1->type == CT_COMBINATION)
277838fd1498Szrj update_pos_for_combined_chains (ch1);
277938fd1498Szrj if (ch2->type == CT_COMBINATION)
278038fd1498Szrj update_pos_for_combined_chains (ch2);
278138fd1498Szrj }
278238fd1498Szrj
278338fd1498Szrj /* Returns true if statement S1 dominates statement S2. */
278438fd1498Szrj
278538fd1498Szrj static bool
pcom_stmt_dominates_stmt_p(gimple * s1,gimple * s2)278638fd1498Szrj pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
278738fd1498Szrj {
278838fd1498Szrj basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
278938fd1498Szrj
279038fd1498Szrj if (!bb1 || s1 == s2)
279138fd1498Szrj return true;
279238fd1498Szrj
279338fd1498Szrj if (bb1 == bb2)
279438fd1498Szrj return gimple_uid (s1) < gimple_uid (s2);
279538fd1498Szrj
279638fd1498Szrj return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
279738fd1498Szrj }
279838fd1498Szrj
279938fd1498Szrj /* Try to combine the CHAINS in LOOP. */
280038fd1498Szrj
280138fd1498Szrj static void
try_combine_chains(struct loop * loop,vec<chain_p> * chains)280238fd1498Szrj try_combine_chains (struct loop *loop, vec<chain_p> *chains)
280338fd1498Szrj {
280438fd1498Szrj unsigned i, j;
280538fd1498Szrj chain_p ch1, ch2, cch;
280638fd1498Szrj auto_vec<chain_p> worklist;
280738fd1498Szrj bool combined_p = false;
280838fd1498Szrj
280938fd1498Szrj FOR_EACH_VEC_ELT (*chains, i, ch1)
281038fd1498Szrj if (chain_can_be_combined_p (ch1))
281138fd1498Szrj worklist.safe_push (ch1);
281238fd1498Szrj
281338fd1498Szrj while (!worklist.is_empty ())
281438fd1498Szrj {
281538fd1498Szrj ch1 = worklist.pop ();
281638fd1498Szrj if (!chain_can_be_combined_p (ch1))
281738fd1498Szrj continue;
281838fd1498Szrj
281938fd1498Szrj FOR_EACH_VEC_ELT (*chains, j, ch2)
282038fd1498Szrj {
282138fd1498Szrj if (!chain_can_be_combined_p (ch2))
282238fd1498Szrj continue;
282338fd1498Szrj
282438fd1498Szrj cch = combine_chains (ch1, ch2);
282538fd1498Szrj if (cch)
282638fd1498Szrj {
282738fd1498Szrj worklist.safe_push (cch);
282838fd1498Szrj chains->safe_push (cch);
282938fd1498Szrj combined_p = true;
283038fd1498Szrj break;
283138fd1498Szrj }
283238fd1498Szrj }
283338fd1498Szrj }
283438fd1498Szrj if (!combined_p)
283538fd1498Szrj return;
283638fd1498Szrj
283738fd1498Szrj /* Setup UID for all statements in dominance order. */
2838*58e805e6Szrj basic_block *bbs = get_loop_body_in_dom_order (loop);
283938fd1498Szrj renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
284038fd1498Szrj free (bbs);
284138fd1498Szrj
284238fd1498Szrj /* Re-association in combined chains may generate statements different to
284338fd1498Szrj order of references of the original chain. We need to keep references
284438fd1498Szrj of combined chain in dominance order so that all uses will be inserted
284538fd1498Szrj after definitions. Note:
284638fd1498Szrj A) This is necessary for all combined chains.
284738fd1498Szrj B) This is only necessary for ZERO distance references because other
284838fd1498Szrj references inherit value from loop carried PHIs.
284938fd1498Szrj
285038fd1498Szrj We first update position information for all combined chains. */
285138fd1498Szrj dref ref;
285238fd1498Szrj for (i = 0; chains->iterate (i, &ch1); ++i)
285338fd1498Szrj {
285438fd1498Szrj if (ch1->type != CT_COMBINATION || ch1->combined)
285538fd1498Szrj continue;
285638fd1498Szrj
285738fd1498Szrj for (j = 0; ch1->refs.iterate (j, &ref); ++j)
285838fd1498Szrj ref->pos = gimple_uid (ref->stmt);
285938fd1498Szrj
286038fd1498Szrj update_pos_for_combined_chains (ch1);
286138fd1498Szrj }
286238fd1498Szrj /* Then sort references according to newly updated position information. */
286338fd1498Szrj for (i = 0; chains->iterate (i, &ch1); ++i)
286438fd1498Szrj {
286538fd1498Szrj if (ch1->type != CT_COMBINATION && !ch1->combined)
286638fd1498Szrj continue;
286738fd1498Szrj
286838fd1498Szrj /* Find the first reference with non-ZERO distance. */
286938fd1498Szrj if (ch1->length == 0)
287038fd1498Szrj j = ch1->refs.length();
287138fd1498Szrj else
287238fd1498Szrj {
287338fd1498Szrj for (j = 0; ch1->refs.iterate (j, &ref); ++j)
287438fd1498Szrj if (ref->distance != 0)
287538fd1498Szrj break;
287638fd1498Szrj }
287738fd1498Szrj
287838fd1498Szrj /* Sort all ZERO distance references by position. */
287938fd1498Szrj qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
288038fd1498Szrj
288138fd1498Szrj if (ch1->combined)
288238fd1498Szrj continue;
288338fd1498Szrj
288438fd1498Szrj /* For ZERO length chain, has_max_use_after must be true since root
288538fd1498Szrj combined stmt must dominates others. */
288638fd1498Szrj if (ch1->length == 0)
288738fd1498Szrj {
288838fd1498Szrj ch1->has_max_use_after = true;
288938fd1498Szrj continue;
289038fd1498Szrj }
289138fd1498Szrj /* Check if there is use at max distance after root for combined chains
289238fd1498Szrj and set flag accordingly. */
289338fd1498Szrj ch1->has_max_use_after = false;
289438fd1498Szrj gimple *root_stmt = get_chain_root (ch1)->stmt;
289538fd1498Szrj for (j = 1; ch1->refs.iterate (j, &ref); ++j)
289638fd1498Szrj {
289738fd1498Szrj if (ref->distance == ch1->length
289838fd1498Szrj && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
289938fd1498Szrj {
290038fd1498Szrj ch1->has_max_use_after = true;
290138fd1498Szrj break;
290238fd1498Szrj }
290338fd1498Szrj }
290438fd1498Szrj }
290538fd1498Szrj }
290638fd1498Szrj
290738fd1498Szrj /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
290838fd1498Szrj if this is impossible because one of these initializers may trap, true
290938fd1498Szrj otherwise. */
291038fd1498Szrj
291138fd1498Szrj static bool
prepare_initializers_chain_store_elim(struct loop * loop,chain_p chain)291238fd1498Szrj prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
291338fd1498Szrj {
291438fd1498Szrj unsigned i, n = chain->length;
291538fd1498Szrj
291638fd1498Szrj /* For now we can't eliminate stores if some of them are conditional
291738fd1498Szrj executed. */
291838fd1498Szrj if (!chain->all_always_accessed)
291938fd1498Szrj return false;
292038fd1498Szrj
292138fd1498Szrj /* Nothing to intialize for intra-iteration store elimination. */
292238fd1498Szrj if (n == 0 && chain->type == CT_STORE_STORE)
292338fd1498Szrj return true;
292438fd1498Szrj
292538fd1498Szrj /* For store elimination chain, there is nothing to initialize if stores
292638fd1498Szrj to be eliminated only store loop invariant values into memory. */
292738fd1498Szrj if (chain->type == CT_STORE_STORE
292838fd1498Szrj && is_inv_store_elimination_chain (loop, chain))
292938fd1498Szrj {
293038fd1498Szrj chain->inv_store_elimination = true;
293138fd1498Szrj return true;
293238fd1498Szrj }
293338fd1498Szrj
293438fd1498Szrj chain->inits.create (n);
293538fd1498Szrj chain->inits.safe_grow_cleared (n);
293638fd1498Szrj
293738fd1498Szrj /* For store eliminatin chain like below:
293838fd1498Szrj
293938fd1498Szrj for (i = 0; i < len; i++)
294038fd1498Szrj {
294138fd1498Szrj a[i] = 1;
294238fd1498Szrj // a[i + 1] = ...
294338fd1498Szrj a[i + 2] = 3;
294438fd1498Szrj }
294538fd1498Szrj
294638fd1498Szrj store to a[i + 1] is missed in loop body, it acts like bubbles. The
294738fd1498Szrj content of a[i + 1] remain the same if the loop iterates fewer times
294838fd1498Szrj than chain->length. We need to set up root variables for such stores
294938fd1498Szrj by loading from memory before loop. Note we only need to load bubble
295038fd1498Szrj elements because loop body is guaranteed to be executed at least once
295138fd1498Szrj after loop's preheader edge. */
295238fd1498Szrj auto_vec<bool> bubbles;
295338fd1498Szrj bubbles.safe_grow_cleared (n + 1);
295438fd1498Szrj for (i = 0; i < chain->refs.length (); i++)
295538fd1498Szrj bubbles[chain->refs[i]->distance] = true;
295638fd1498Szrj
295738fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref;
295838fd1498Szrj for (i = 0; i < n; i++)
295938fd1498Szrj {
296038fd1498Szrj if (bubbles[i])
296138fd1498Szrj continue;
296238fd1498Szrj
296338fd1498Szrj gimple_seq stmts = NULL;
296438fd1498Szrj
296538fd1498Szrj tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
296638fd1498Szrj if (stmts)
296738fd1498Szrj gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
296838fd1498Szrj
296938fd1498Szrj chain->inits[i] = init;
297038fd1498Szrj }
297138fd1498Szrj
297238fd1498Szrj return true;
297338fd1498Szrj }
297438fd1498Szrj
297538fd1498Szrj /* Prepare initializers for CHAIN in LOOP. Returns false if this is
297638fd1498Szrj impossible because one of these initializers may trap, true otherwise. */
297738fd1498Szrj
297838fd1498Szrj static bool
prepare_initializers_chain(struct loop * loop,chain_p chain)297938fd1498Szrj prepare_initializers_chain (struct loop *loop, chain_p chain)
298038fd1498Szrj {
298138fd1498Szrj unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
298238fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref;
298338fd1498Szrj tree init;
298438fd1498Szrj dref laref;
298538fd1498Szrj edge entry = loop_preheader_edge (loop);
298638fd1498Szrj
298738fd1498Szrj if (chain->type == CT_STORE_STORE)
298838fd1498Szrj return prepare_initializers_chain_store_elim (loop, chain);
298938fd1498Szrj
299038fd1498Szrj /* Find the initializers for the variables, and check that they cannot
299138fd1498Szrj trap. */
299238fd1498Szrj chain->inits.create (n);
299338fd1498Szrj for (i = 0; i < n; i++)
299438fd1498Szrj chain->inits.quick_push (NULL_TREE);
299538fd1498Szrj
299638fd1498Szrj /* If we have replaced some looparound phi nodes, use their initializers
299738fd1498Szrj instead of creating our own. */
299838fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, laref)
299938fd1498Szrj {
300038fd1498Szrj if (gimple_code (laref->stmt) != GIMPLE_PHI)
300138fd1498Szrj continue;
300238fd1498Szrj
300338fd1498Szrj gcc_assert (laref->distance > 0);
300438fd1498Szrj chain->inits[n - laref->distance]
300538fd1498Szrj = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
300638fd1498Szrj }
300738fd1498Szrj
300838fd1498Szrj for (i = 0; i < n; i++)
300938fd1498Szrj {
301038fd1498Szrj gimple_seq stmts = NULL;
301138fd1498Szrj
301238fd1498Szrj if (chain->inits[i] != NULL_TREE)
301338fd1498Szrj continue;
301438fd1498Szrj
301538fd1498Szrj init = ref_at_iteration (dr, (int) i - n, &stmts);
301638fd1498Szrj if (!chain->all_always_accessed && tree_could_trap_p (init))
301738fd1498Szrj {
301838fd1498Szrj gimple_seq_discard (stmts);
301938fd1498Szrj return false;
302038fd1498Szrj }
302138fd1498Szrj
302238fd1498Szrj if (stmts)
302338fd1498Szrj gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
302438fd1498Szrj
302538fd1498Szrj chain->inits[i] = init;
302638fd1498Szrj }
302738fd1498Szrj
302838fd1498Szrj return true;
302938fd1498Szrj }
303038fd1498Szrj
303138fd1498Szrj /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
303238fd1498Szrj be used because the initializers might trap. */
303338fd1498Szrj
303438fd1498Szrj static void
prepare_initializers(struct loop * loop,vec<chain_p> chains)303538fd1498Szrj prepare_initializers (struct loop *loop, vec<chain_p> chains)
303638fd1498Szrj {
303738fd1498Szrj chain_p chain;
303838fd1498Szrj unsigned i;
303938fd1498Szrj
304038fd1498Szrj for (i = 0; i < chains.length (); )
304138fd1498Szrj {
304238fd1498Szrj chain = chains[i];
304338fd1498Szrj if (prepare_initializers_chain (loop, chain))
304438fd1498Szrj i++;
304538fd1498Szrj else
304638fd1498Szrj {
304738fd1498Szrj release_chain (chain);
304838fd1498Szrj chains.unordered_remove (i);
304938fd1498Szrj }
305038fd1498Szrj }
305138fd1498Szrj }
305238fd1498Szrj
305338fd1498Szrj /* Generates finalizer memory references for CHAIN in LOOP. Returns true
305438fd1498Szrj if finalizer code for CHAIN can be generated, otherwise false. */
305538fd1498Szrj
305638fd1498Szrj static bool
prepare_finalizers_chain(struct loop * loop,chain_p chain)305738fd1498Szrj prepare_finalizers_chain (struct loop *loop, chain_p chain)
305838fd1498Szrj {
305938fd1498Szrj unsigned i, n = chain->length;
306038fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref;
306138fd1498Szrj tree fini, niters = number_of_latch_executions (loop);
306238fd1498Szrj
306338fd1498Szrj /* For now we can't eliminate stores if some of them are conditional
306438fd1498Szrj executed. */
306538fd1498Szrj if (!chain->all_always_accessed)
306638fd1498Szrj return false;
306738fd1498Szrj
306838fd1498Szrj chain->finis.create (n);
306938fd1498Szrj for (i = 0; i < n; i++)
307038fd1498Szrj chain->finis.quick_push (NULL_TREE);
307138fd1498Szrj
307238fd1498Szrj /* We never use looparound phi node for store elimination chains. */
307338fd1498Szrj
307438fd1498Szrj /* Find the finalizers for the variables, and check that they cannot
307538fd1498Szrj trap. */
307638fd1498Szrj for (i = 0; i < n; i++)
307738fd1498Szrj {
307838fd1498Szrj gimple_seq stmts = NULL;
307938fd1498Szrj gcc_assert (chain->finis[i] == NULL_TREE);
308038fd1498Szrj
308138fd1498Szrj if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
308238fd1498Szrj {
308338fd1498Szrj niters = unshare_expr (niters);
308438fd1498Szrj niters = force_gimple_operand (niters, &stmts, true, NULL);
308538fd1498Szrj if (stmts)
308638fd1498Szrj {
308738fd1498Szrj gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
308838fd1498Szrj stmts = NULL;
308938fd1498Szrj }
309038fd1498Szrj }
309138fd1498Szrj fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
309238fd1498Szrj if (stmts)
309338fd1498Szrj gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
309438fd1498Szrj
309538fd1498Szrj chain->finis[i] = fini;
309638fd1498Szrj }
309738fd1498Szrj
309838fd1498Szrj return true;
309938fd1498Szrj }
310038fd1498Szrj
310138fd1498Szrj /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
310238fd1498Szrj if finalizer code generation for CHAINS breaks loop closed ssa form. */
310338fd1498Szrj
310438fd1498Szrj static bool
prepare_finalizers(struct loop * loop,vec<chain_p> chains)310538fd1498Szrj prepare_finalizers (struct loop *loop, vec<chain_p> chains)
310638fd1498Szrj {
310738fd1498Szrj chain_p chain;
310838fd1498Szrj unsigned i;
310938fd1498Szrj bool loop_closed_ssa = false;
311038fd1498Szrj
311138fd1498Szrj for (i = 0; i < chains.length ();)
311238fd1498Szrj {
311338fd1498Szrj chain = chains[i];
311438fd1498Szrj
311538fd1498Szrj /* Finalizer is only necessary for inter-iteration store elimination
311638fd1498Szrj chains. */
311738fd1498Szrj if (chain->length == 0 || chain->type != CT_STORE_STORE)
311838fd1498Szrj {
311938fd1498Szrj i++;
312038fd1498Szrj continue;
312138fd1498Szrj }
312238fd1498Szrj
312338fd1498Szrj if (prepare_finalizers_chain (loop, chain))
312438fd1498Szrj {
312538fd1498Szrj i++;
312638fd1498Szrj /* Be conservative, assume loop closed ssa form is corrupted
312738fd1498Szrj by store-store chain. Though it's not always the case if
312838fd1498Szrj eliminated stores only store loop invariant values into
312938fd1498Szrj memory. */
313038fd1498Szrj loop_closed_ssa = true;
313138fd1498Szrj }
313238fd1498Szrj else
313338fd1498Szrj {
313438fd1498Szrj release_chain (chain);
313538fd1498Szrj chains.unordered_remove (i);
313638fd1498Szrj }
313738fd1498Szrj }
313838fd1498Szrj return loop_closed_ssa;
313938fd1498Szrj }
314038fd1498Szrj
314138fd1498Szrj /* Insert all initializing gimple stmts into loop's entry edge. */
314238fd1498Szrj
314338fd1498Szrj static void
insert_init_seqs(struct loop * loop,vec<chain_p> chains)314438fd1498Szrj insert_init_seqs (struct loop *loop, vec<chain_p> chains)
314538fd1498Szrj {
314638fd1498Szrj unsigned i;
314738fd1498Szrj edge entry = loop_preheader_edge (loop);
314838fd1498Szrj
314938fd1498Szrj for (i = 0; i < chains.length (); ++i)
315038fd1498Szrj if (chains[i]->init_seq)
315138fd1498Szrj {
315238fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
315338fd1498Szrj chains[i]->init_seq = NULL;
315438fd1498Szrj }
315538fd1498Szrj }
315638fd1498Szrj
315738fd1498Szrj /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
315838fd1498Szrj if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
315938fd1498Szrj form was corrupted. */
316038fd1498Szrj
316138fd1498Szrj static unsigned
tree_predictive_commoning_loop(struct loop * loop)316238fd1498Szrj tree_predictive_commoning_loop (struct loop *loop)
316338fd1498Szrj {
316438fd1498Szrj vec<data_reference_p> datarefs;
316538fd1498Szrj vec<ddr_p> dependences;
316638fd1498Szrj struct component *components;
316738fd1498Szrj vec<chain_p> chains = vNULL;
316838fd1498Szrj unsigned unroll_factor;
316938fd1498Szrj struct tree_niter_desc desc;
317038fd1498Szrj bool unroll = false, loop_closed_ssa = false;
317138fd1498Szrj edge exit;
317238fd1498Szrj
317338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
317438fd1498Szrj fprintf (dump_file, "Processing loop %d\n", loop->num);
317538fd1498Szrj
317638fd1498Szrj /* Nothing for predicitive commoning if loop only iterates 1 time. */
317738fd1498Szrj if (get_max_loop_iterations_int (loop) == 0)
317838fd1498Szrj {
317938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
318038fd1498Szrj fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
318138fd1498Szrj
318238fd1498Szrj return 0;
318338fd1498Szrj }
318438fd1498Szrj
318538fd1498Szrj /* Find the data references and split them into components according to their
318638fd1498Szrj dependence relations. */
318738fd1498Szrj auto_vec<loop_p, 3> loop_nest;
318838fd1498Szrj dependences.create (10);
318938fd1498Szrj datarefs.create (10);
319038fd1498Szrj if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
319138fd1498Szrj &dependences))
319238fd1498Szrj {
319338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
319438fd1498Szrj fprintf (dump_file, "Cannot analyze data dependencies\n");
319538fd1498Szrj free_data_refs (datarefs);
319638fd1498Szrj free_dependence_relations (dependences);
319738fd1498Szrj return 0;
319838fd1498Szrj }
319938fd1498Szrj
320038fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
320138fd1498Szrj dump_data_dependence_relations (dump_file, dependences);
320238fd1498Szrj
320338fd1498Szrj components = split_data_refs_to_components (loop, datarefs, dependences);
320438fd1498Szrj loop_nest.release ();
320538fd1498Szrj free_dependence_relations (dependences);
320638fd1498Szrj if (!components)
320738fd1498Szrj {
320838fd1498Szrj free_data_refs (datarefs);
320938fd1498Szrj free_affine_expand_cache (&name_expansions);
321038fd1498Szrj return 0;
321138fd1498Szrj }
321238fd1498Szrj
321338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
321438fd1498Szrj {
321538fd1498Szrj fprintf (dump_file, "Initial state:\n\n");
321638fd1498Szrj dump_components (dump_file, components);
321738fd1498Szrj }
321838fd1498Szrj
321938fd1498Szrj /* Find the suitable components and split them into chains. */
322038fd1498Szrj components = filter_suitable_components (loop, components);
322138fd1498Szrj
322238fd1498Szrj auto_bitmap tmp_vars;
322338fd1498Szrj looparound_phis = BITMAP_ALLOC (NULL);
322438fd1498Szrj determine_roots (loop, components, &chains);
322538fd1498Szrj release_components (components);
322638fd1498Szrj
322738fd1498Szrj if (!chains.exists ())
322838fd1498Szrj {
322938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
323038fd1498Szrj fprintf (dump_file,
323138fd1498Szrj "Predictive commoning failed: no suitable chains\n");
323238fd1498Szrj goto end;
323338fd1498Szrj }
323438fd1498Szrj prepare_initializers (loop, chains);
323538fd1498Szrj loop_closed_ssa = prepare_finalizers (loop, chains);
323638fd1498Szrj
323738fd1498Szrj /* Try to combine the chains that are always worked with together. */
323838fd1498Szrj try_combine_chains (loop, &chains);
323938fd1498Szrj
324038fd1498Szrj insert_init_seqs (loop, chains);
324138fd1498Szrj
324238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
324338fd1498Szrj {
324438fd1498Szrj fprintf (dump_file, "Before commoning:\n\n");
324538fd1498Szrj dump_chains (dump_file, chains);
324638fd1498Szrj }
324738fd1498Szrj
324838fd1498Szrj /* Determine the unroll factor, and if the loop should be unrolled, ensure
324938fd1498Szrj that its number of iterations is divisible by the factor. */
325038fd1498Szrj unroll_factor = determine_unroll_factor (chains);
325138fd1498Szrj scev_reset ();
325238fd1498Szrj unroll = (unroll_factor > 1
325338fd1498Szrj && can_unroll_loop_p (loop, unroll_factor, &desc));
325438fd1498Szrj exit = single_dom_exit (loop);
325538fd1498Szrj
325638fd1498Szrj /* Execute the predictive commoning transformations, and possibly unroll the
325738fd1498Szrj loop. */
325838fd1498Szrj if (unroll)
325938fd1498Szrj {
326038fd1498Szrj struct epcc_data dta;
326138fd1498Szrj
326238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
326338fd1498Szrj fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
326438fd1498Szrj
326538fd1498Szrj dta.chains = chains;
326638fd1498Szrj dta.tmp_vars = tmp_vars;
326738fd1498Szrj
326838fd1498Szrj update_ssa (TODO_update_ssa_only_virtuals);
326938fd1498Szrj
327038fd1498Szrj /* Cfg manipulations performed in tree_transform_and_unroll_loop before
327138fd1498Szrj execute_pred_commoning_cbck is called may cause phi nodes to be
327238fd1498Szrj reallocated, which is a problem since CHAINS may point to these
327338fd1498Szrj statements. To fix this, we store the ssa names defined by the
327438fd1498Szrj phi nodes here instead of the phi nodes themselves, and restore
327538fd1498Szrj the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
327638fd1498Szrj replace_phis_by_defined_names (chains);
327738fd1498Szrj
327838fd1498Szrj tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
327938fd1498Szrj execute_pred_commoning_cbck, &dta);
328038fd1498Szrj eliminate_temp_copies (loop, tmp_vars);
328138fd1498Szrj }
328238fd1498Szrj else
328338fd1498Szrj {
328438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
328538fd1498Szrj fprintf (dump_file,
328638fd1498Szrj "Executing predictive commoning without unrolling.\n");
328738fd1498Szrj execute_pred_commoning (loop, chains, tmp_vars);
328838fd1498Szrj }
328938fd1498Szrj
329038fd1498Szrj end: ;
329138fd1498Szrj release_chains (chains);
329238fd1498Szrj free_data_refs (datarefs);
329338fd1498Szrj BITMAP_FREE (looparound_phis);
329438fd1498Szrj
329538fd1498Szrj free_affine_expand_cache (&name_expansions);
329638fd1498Szrj
329738fd1498Szrj return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
329838fd1498Szrj }
329938fd1498Szrj
330038fd1498Szrj /* Runs predictive commoning. */
330138fd1498Szrj
330238fd1498Szrj unsigned
tree_predictive_commoning(void)330338fd1498Szrj tree_predictive_commoning (void)
330438fd1498Szrj {
330538fd1498Szrj struct loop *loop;
330638fd1498Szrj unsigned ret = 0, changed = 0;
330738fd1498Szrj
330838fd1498Szrj initialize_original_copy_tables ();
330938fd1498Szrj FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
331038fd1498Szrj if (optimize_loop_for_speed_p (loop))
331138fd1498Szrj {
331238fd1498Szrj changed |= tree_predictive_commoning_loop (loop);
331338fd1498Szrj }
331438fd1498Szrj free_original_copy_tables ();
331538fd1498Szrj
331638fd1498Szrj if (changed > 0)
331738fd1498Szrj {
331838fd1498Szrj scev_reset ();
331938fd1498Szrj
332038fd1498Szrj if (changed > 1)
332138fd1498Szrj rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
332238fd1498Szrj
332338fd1498Szrj ret = TODO_cleanup_cfg;
332438fd1498Szrj }
332538fd1498Szrj
332638fd1498Szrj return ret;
332738fd1498Szrj }
332838fd1498Szrj
332938fd1498Szrj /* Predictive commoning Pass. */
333038fd1498Szrj
333138fd1498Szrj static unsigned
run_tree_predictive_commoning(struct function * fun)333238fd1498Szrj run_tree_predictive_commoning (struct function *fun)
333338fd1498Szrj {
333438fd1498Szrj if (number_of_loops (fun) <= 1)
333538fd1498Szrj return 0;
333638fd1498Szrj
333738fd1498Szrj return tree_predictive_commoning ();
333838fd1498Szrj }
333938fd1498Szrj
334038fd1498Szrj namespace {
334138fd1498Szrj
334238fd1498Szrj const pass_data pass_data_predcom =
334338fd1498Szrj {
334438fd1498Szrj GIMPLE_PASS, /* type */
334538fd1498Szrj "pcom", /* name */
334638fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
334738fd1498Szrj TV_PREDCOM, /* tv_id */
334838fd1498Szrj PROP_cfg, /* properties_required */
334938fd1498Szrj 0, /* properties_provided */
335038fd1498Szrj 0, /* properties_destroyed */
335138fd1498Szrj 0, /* todo_flags_start */
335238fd1498Szrj TODO_update_ssa_only_virtuals, /* todo_flags_finish */
335338fd1498Szrj };
335438fd1498Szrj
335538fd1498Szrj class pass_predcom : public gimple_opt_pass
335638fd1498Szrj {
335738fd1498Szrj public:
pass_predcom(gcc::context * ctxt)335838fd1498Szrj pass_predcom (gcc::context *ctxt)
335938fd1498Szrj : gimple_opt_pass (pass_data_predcom, ctxt)
336038fd1498Szrj {}
336138fd1498Szrj
336238fd1498Szrj /* opt_pass methods: */
gate(function *)336338fd1498Szrj virtual bool gate (function *) { return flag_predictive_commoning != 0; }
execute(function * fun)336438fd1498Szrj virtual unsigned int execute (function *fun)
336538fd1498Szrj {
336638fd1498Szrj return run_tree_predictive_commoning (fun);
336738fd1498Szrj }
336838fd1498Szrj
336938fd1498Szrj }; // class pass_predcom
337038fd1498Szrj
337138fd1498Szrj } // anon namespace
337238fd1498Szrj
337338fd1498Szrj gimple_opt_pass *
make_pass_predcom(gcc::context * ctxt)337438fd1498Szrj make_pass_predcom (gcc::context *ctxt)
337538fd1498Szrj {
337638fd1498Szrj return new pass_predcom (ctxt);
337738fd1498Szrj }
337838fd1498Szrj
337938fd1498Szrj
3380