1 /* Linear Loop transforms
2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dberlin@dberlin.org>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 
31 #include "rtl.h"
32 #include "basic-block.h"
33 #include "diagnostic.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "timevar.h"
37 #include "cfgloop.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "tree-chrec.h"
41 #include "tree-data-ref.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-pass.h"
44 #include "varray.h"
45 #include "lambda.h"
46 
47 /* Linear loop transforms include any composition of interchange,
48    scaling, skewing, and reversal.  They are used to change the
49    iteration order of loop nests in order to optimize data locality of
50    traversals, or remove dependences that prevent
51    parallelization/vectorization/etc.
52 
53    TODO: Determine reuse vectors/matrix and use it to determine optimal
54    transform matrix for locality purposes.
55    TODO: Completion of partial transforms.  */
56 
57 /* Gather statistics for loop interchange.  LOOP is the loop being
58    considered. The first loop in the considered loop nest is
59    FIRST_LOOP, and consequently, the index of the considered loop is
60    obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
61 
62    Initializes:
63    - DEPENDENCE_STEPS the sum of all the data dependence distances
64    carried by loop LOOP,
65 
66    - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
67    for which the loop LOOP is not carrying any dependence,
68 
69    - ACCESS_STRIDES the sum of all the strides in LOOP.
70 
71    Example: for the following loop,
72 
73    | loop_1 runs 1335 times
74    |   loop_2 runs 1335 times
75    |     A[{{0, +, 1}_1, +, 1335}_2]
76    |     B[{{0, +, 1}_1, +, 1335}_2]
77    |   endloop_2
78    |   A[{0, +, 1336}_1]
79    | endloop_1
80 
81    gather_interchange_stats (in loop_1) will return
82    DEPENDENCE_STEPS = 3002
83    NB_DEPS_NOT_CARRIED_BY_LOOP = 5
84    ACCESS_STRIDES = 10694
85 
86    gather_interchange_stats (in loop_2) will return
87    DEPENDENCE_STEPS = 3000
88    NB_DEPS_NOT_CARRIED_BY_LOOP = 7
89    ACCESS_STRIDES = 8010
90 */
91 
92 static void
gather_interchange_stats(varray_type dependence_relations,varray_type datarefs,struct loop * loop,struct loop * first_loop,unsigned int * dependence_steps,unsigned int * nb_deps_not_carried_by_loop,unsigned int * access_strides)93 gather_interchange_stats (varray_type dependence_relations,
94 			  varray_type datarefs,
95 			  struct loop *loop,
96 			  struct loop *first_loop,
97 			  unsigned int *dependence_steps,
98 			  unsigned int *nb_deps_not_carried_by_loop,
99 			  unsigned int *access_strides)
100 {
101   unsigned int i, j;
102 
103   *dependence_steps = 0;
104   *nb_deps_not_carried_by_loop = 0;
105   *access_strides = 0;
106 
107   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
108     {
109       struct data_dependence_relation *ddr =
110 	(struct data_dependence_relation *)
111 	VARRAY_GENERIC_PTR (dependence_relations, i);
112 
113       /* If we don't know anything about this dependence, or the distance
114 	 vector is NULL, or there is no dependence, then there is no reuse of
115 	 data.  */
116       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
117 	  || DDR_ARE_DEPENDENT (ddr) == chrec_known
118 	  || DDR_NUM_DIST_VECTS (ddr) == 0)
119 	continue;
120 
121       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
122 	{
123 	  int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
124 
125 	  if (dist == 0)
126 	    (*nb_deps_not_carried_by_loop) += 1;
127 
128 	  else if (dist < 0)
129 	    (*dependence_steps) += -dist;
130 
131 	  else
132 	    (*dependence_steps) += dist;
133 	}
134     }
135 
136   /* Compute the access strides.  */
137   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
138     {
139       unsigned int it;
140       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
141       tree stmt = DR_STMT (dr);
142       struct loop *stmt_loop = loop_containing_stmt (stmt);
143       struct loop *inner_loop = first_loop->inner;
144 
145       if (inner_loop != stmt_loop
146 	  && !flow_loop_nested_p (inner_loop, stmt_loop))
147 	continue;
148       for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
149 	{
150 	  tree chrec = DR_ACCESS_FN (dr, it);
151 	  tree tstride = evolution_part_in_loop_num
152 	    (chrec, loop->num);
153 
154 	  if (tstride == NULL_TREE
155 	      || TREE_CODE (tstride) != INTEGER_CST)
156 	    continue;
157 
158 	  (*access_strides) += int_cst_value (tstride);
159 	}
160     }
161 }
162 
163 /* Attempt to apply interchange transformations to TRANS to maximize the
164    spatial and temporal locality of the loop.
165    Returns the new transform matrix.  The smaller the reuse vector
166    distances in the inner loops, the fewer the cache misses.
167    FIRST_LOOP is the loop->num of the first loop in the analyzed loop
168    nest.  */
169 
170 
171 static lambda_trans_matrix
try_interchange_loops(lambda_trans_matrix trans,unsigned int depth,varray_type dependence_relations,varray_type datarefs,struct loop * first_loop)172 try_interchange_loops (lambda_trans_matrix trans,
173 		       unsigned int depth,
174 		       varray_type dependence_relations,
175 		       varray_type datarefs,
176 		       struct loop *first_loop)
177 {
178   struct loop *loop_i;
179   struct loop *loop_j;
180   unsigned int dependence_steps_i, dependence_steps_j;
181   unsigned int access_strides_i, access_strides_j;
182   unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
183   struct data_dependence_relation *ddr;
184 
185   /* When there is an unknown relation in the dependence_relations, we
186      know that it is no worth looking at this loop nest: give up.  */
187   ddr = (struct data_dependence_relation *)
188     VARRAY_GENERIC_PTR (dependence_relations, 0);
189   if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
190     return trans;
191 
192   /* LOOP_I is always the outer loop.  */
193   for (loop_j = first_loop->inner;
194        loop_j;
195        loop_j = loop_j->inner)
196     for (loop_i = first_loop;
197 	 loop_i->depth < loop_j->depth;
198 	 loop_i = loop_i->inner)
199       {
200 	gather_interchange_stats (dependence_relations, datarefs,
201 				  loop_i, first_loop,
202 				  &dependence_steps_i,
203 				  &nb_deps_not_carried_by_i,
204 				  &access_strides_i);
205 	gather_interchange_stats (dependence_relations, datarefs,
206 				  loop_j, first_loop,
207 				  &dependence_steps_j,
208 				  &nb_deps_not_carried_by_j,
209 				  &access_strides_j);
210 
211 	/* Heuristics for loop interchange profitability:
212 
213 	   1. (spatial locality) Inner loops should have smallest
214               dependence steps.
215 
216 	   2. (spatial locality) Inner loops should contain more
217 	   dependence relations not carried by the loop.
218 
219 	   3. (temporal locality) Inner loops should have smallest
220 	      array access strides.
221 	*/
222 	if (dependence_steps_i < dependence_steps_j
223 	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
224 	    || access_strides_i < access_strides_j)
225 	  {
226 	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
227 					loop_i->depth - first_loop->depth,
228 					loop_j->depth - first_loop->depth);
229 	    /* Validate the resulting matrix.  When the transformation
230 	       is not valid, reverse to the previous transformation.  */
231 	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
232 	      lambda_matrix_row_exchange (LTM_MATRIX (trans),
233 					  loop_i->depth - first_loop->depth,
234 					  loop_j->depth - first_loop->depth);
235 	  }
236       }
237 
238   return trans;
239 }
240 
241 /* Perform a set of linear transforms on LOOPS.  */
242 
243 void
linear_transform_loops(struct loops * loops)244 linear_transform_loops (struct loops *loops)
245 {
246   unsigned int i;
247   VEC(tree,heap) *oldivs = NULL;
248   VEC(tree,heap) *invariants = NULL;
249 
250   for (i = 1; i < loops->num; i++)
251     {
252       unsigned int depth = 0;
253       varray_type datarefs;
254       varray_type dependence_relations;
255       struct loop *loop_nest = loops->parray[i];
256       struct loop *temp;
257       lambda_loopnest before, after;
258       lambda_trans_matrix trans;
259       bool problem = false;
260       bool need_perfect_nest = false;
261       /* If it's not a loop nest, we don't want it.
262          We also don't handle sibling loops properly,
263          which are loops of the following form:
264          for (i = 0; i < 50; i++)
265            {
266              for (j = 0; j < 50; j++)
267                {
268 	        ...
269                }
270            for (j = 0; j < 50; j++)
271                {
272                 ...
273                }
274            } */
275       if (!loop_nest || !loop_nest->inner)
276 	continue;
277       VEC_truncate (tree, oldivs, 0);
278       VEC_truncate (tree, invariants, 0);
279       depth = 1;
280       for (temp = loop_nest->inner; temp; temp = temp->inner)
281 	{
282 	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
283 	  if (temp->next || !temp->single_exit)
284 	    {
285 	      problem = true;
286 	      break;
287 	    }
288 	  depth ++;
289 	}
290       if (problem)
291 	continue;
292 
293       /* Analyze data references and dependence relations using scev.  */
294 
295       VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
296       VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
297 			       "dependence_relations");
298 
299 
300       compute_data_dependences_for_loop (loop_nest, true,
301 					 &datarefs, &dependence_relations);
302       if (dump_file && (dump_flags & TDF_DETAILS))
303 	{
304 	  unsigned int j;
305 	  for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
306 	    {
307 	      struct data_dependence_relation *ddr =
308 		(struct data_dependence_relation *)
309 		VARRAY_GENERIC_PTR (dependence_relations, j);
310 
311 	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
312 		dump_data_dependence_relation (dump_file, ddr);
313 	    }
314 	  fprintf (dump_file, "\n\n");
315 	}
316       /* Build the transformation matrix.  */
317       trans = lambda_trans_matrix_new (depth, depth);
318       lambda_matrix_id (LTM_MATRIX (trans), depth);
319 
320       trans = try_interchange_loops (trans, depth, dependence_relations,
321 				     datarefs, loop_nest);
322 
323       if (lambda_trans_matrix_id_p (trans))
324 	{
325 	  if (dump_file)
326 	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
327 	  continue;
328 	}
329 
330       /* Check whether the transformation is legal.  */
331       if (!lambda_transform_legal_p (trans, depth, dependence_relations))
332 	{
333 	  if (dump_file)
334 	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
335 	  continue;
336 	}
337       if (!perfect_nest_p (loop_nest))
338 	need_perfect_nest = true;
339       before = gcc_loopnest_to_lambda_loopnest (loops,
340 						loop_nest, &oldivs,
341 						&invariants,
342 						need_perfect_nest);
343       if (!before)
344 	continue;
345 
346       if (dump_file)
347 	{
348 	  fprintf (dump_file, "Before:\n");
349 	  print_lambda_loopnest (dump_file, before, 'i');
350 	}
351 
352       after = lambda_loopnest_transform (before, trans);
353       if (dump_file)
354 	{
355 	  fprintf (dump_file, "After:\n");
356 	  print_lambda_loopnest (dump_file, after, 'u');
357 	}
358       lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
359 				       after, trans);
360       if (dump_file)
361 	fprintf (dump_file, "Successfully transformed loop.\n");
362       free_dependence_relations (dependence_relations);
363       free_data_refs (datarefs);
364     }
365   VEC_free (tree, heap, oldivs);
366   VEC_free (tree, heap, invariants);
367   scev_reset ();
368   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
369 }
370