xref: /openbsd/gnu/gcc/gcc/tree-loop-linear.c (revision 404b540a)
1 /* Linear Loop transforms
2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dberlin@dberlin.org>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 
31 #include "rtl.h"
32 #include "basic-block.h"
33 #include "diagnostic.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "timevar.h"
37 #include "cfgloop.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "tree-chrec.h"
41 #include "tree-data-ref.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-pass.h"
44 #include "lambda.h"
45 
46 /* Linear loop transforms include any composition of interchange,
47    scaling, skewing, and reversal.  They are used to change the
48    iteration order of loop nests in order to optimize data locality of
49    traversals, or remove dependences that prevent
50    parallelization/vectorization/etc.
51 
52    TODO: Determine reuse vectors/matrix and use it to determine optimal
53    transform matrix for locality purposes.
54    TODO: Completion of partial transforms.  */
55 
56 /* Gather statistics for loop interchange.  LOOP is the loop being
57    considered. The first loop in the considered loop nest is
58    FIRST_LOOP, and consequently, the index of the considered loop is
59    obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
60 
61    Initializes:
62    - DEPENDENCE_STEPS the sum of all the data dependence distances
63    carried by loop LOOP,
64 
65    - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
66    for which the loop LOOP is not carrying any dependence,
67 
68    - ACCESS_STRIDES the sum of all the strides in LOOP.
69 
70    Example: for the following loop,
71 
72    | loop_1 runs 1335 times
73    |   loop_2 runs 1335 times
74    |     A[{{0, +, 1}_1, +, 1335}_2]
75    |     B[{{0, +, 1}_1, +, 1335}_2]
76    |   endloop_2
77    |   A[{0, +, 1336}_1]
78    | endloop_1
79 
80    gather_interchange_stats (in loop_1) will return
81    DEPENDENCE_STEPS = 3002
82    NB_DEPS_NOT_CARRIED_BY_LOOP = 5
83    ACCESS_STRIDES = 10694
84 
85    gather_interchange_stats (in loop_2) will return
86    DEPENDENCE_STEPS = 3000
87    NB_DEPS_NOT_CARRIED_BY_LOOP = 7
88    ACCESS_STRIDES = 8010
89 */
90 
91 static void
gather_interchange_stats(VEC (ddr_p,heap)* dependence_relations,VEC (data_reference_p,heap)* datarefs,struct loop * loop,struct loop * first_loop,unsigned int * dependence_steps,unsigned int * nb_deps_not_carried_by_loop,unsigned int * access_strides)92 gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations,
93 			  VEC (data_reference_p, heap) *datarefs,
94 			  struct loop *loop,
95 			  struct loop *first_loop,
96 			  unsigned int *dependence_steps,
97 			  unsigned int *nb_deps_not_carried_by_loop,
98 			  unsigned int *access_strides)
99 {
100   unsigned int i, j;
101   struct data_dependence_relation *ddr;
102   struct data_reference *dr;
103 
104   *dependence_steps = 0;
105   *nb_deps_not_carried_by_loop = 0;
106   *access_strides = 0;
107 
108   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
109     {
110       /* If we don't know anything about this dependence, or the distance
111 	 vector is NULL, or there is no dependence, then there is no reuse of
112 	 data.  */
113       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
114 	  || DDR_ARE_DEPENDENT (ddr) == chrec_known
115 	  || DDR_NUM_DIST_VECTS (ddr) == 0)
116 	continue;
117 
118       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
119 	{
120 	  int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
121 
122 	  if (dist == 0)
123 	    (*nb_deps_not_carried_by_loop) += 1;
124 
125 	  else if (dist < 0)
126 	    (*dependence_steps) += -dist;
127 
128 	  else
129 	    (*dependence_steps) += dist;
130 	}
131     }
132 
133   /* Compute the access strides.  */
134   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
135     {
136       unsigned int it;
137       tree stmt = DR_STMT (dr);
138       struct loop *stmt_loop = loop_containing_stmt (stmt);
139       struct loop *inner_loop = first_loop->inner;
140 
141       if (inner_loop != stmt_loop
142 	  && !flow_loop_nested_p (inner_loop, stmt_loop))
143 	continue;
144       for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
145 	{
146 	  tree chrec = DR_ACCESS_FN (dr, it);
147 	  tree tstride = evolution_part_in_loop_num
148 	    (chrec, loop->num);
149 
150 	  if (tstride == NULL_TREE
151 	      || TREE_CODE (tstride) != INTEGER_CST)
152 	    continue;
153 
154 	  (*access_strides) += int_cst_value (tstride);
155 	}
156     }
157 }
158 
159 /* Attempt to apply interchange transformations to TRANS to maximize the
160    spatial and temporal locality of the loop.
161    Returns the new transform matrix.  The smaller the reuse vector
162    distances in the inner loops, the fewer the cache misses.
163    FIRST_LOOP is the loop->num of the first loop in the analyzed loop
164    nest.  */
165 
166 
167 static lambda_trans_matrix
try_interchange_loops(lambda_trans_matrix trans,unsigned int depth,VEC (ddr_p,heap)* dependence_relations,VEC (data_reference_p,heap)* datarefs,struct loop * first_loop)168 try_interchange_loops (lambda_trans_matrix trans,
169 		       unsigned int depth,
170 		       VEC (ddr_p, heap) *dependence_relations,
171 		       VEC (data_reference_p, heap) *datarefs,
172 		       struct loop *first_loop)
173 {
174   struct loop *loop_i;
175   struct loop *loop_j;
176   unsigned int dependence_steps_i, dependence_steps_j;
177   unsigned int access_strides_i, access_strides_j;
178   unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
179   struct data_dependence_relation *ddr;
180 
181   if (VEC_length (ddr_p, dependence_relations) == 0)
182     return trans;
183 
184   /* When there is an unknown relation in the dependence_relations, we
185      know that it is no worth looking at this loop nest: give up.  */
186   ddr = VEC_index (ddr_p, dependence_relations, 0);
187   if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
188     return trans;
189 
190   /* LOOP_I is always the outer loop.  */
191   for (loop_j = first_loop->inner;
192        loop_j;
193        loop_j = loop_j->inner)
194     for (loop_i = first_loop;
195 	 loop_i->depth < loop_j->depth;
196 	 loop_i = loop_i->inner)
197       {
198 	gather_interchange_stats (dependence_relations, datarefs,
199 				  loop_i, first_loop,
200 				  &dependence_steps_i,
201 				  &nb_deps_not_carried_by_i,
202 				  &access_strides_i);
203 	gather_interchange_stats (dependence_relations, datarefs,
204 				  loop_j, first_loop,
205 				  &dependence_steps_j,
206 				  &nb_deps_not_carried_by_j,
207 				  &access_strides_j);
208 
209 	/* Heuristics for loop interchange profitability:
210 
211 	   1. (spatial locality) Inner loops should have smallest
212               dependence steps.
213 
214 	   2. (spatial locality) Inner loops should contain more
215 	   dependence relations not carried by the loop.
216 
217 	   3. (temporal locality) Inner loops should have smallest
218 	      array access strides.
219 	*/
220 	if (dependence_steps_i < dependence_steps_j
221 	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
222 	    || access_strides_i < access_strides_j)
223 	  {
224 	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
225 					loop_i->depth - first_loop->depth,
226 					loop_j->depth - first_loop->depth);
227 	    /* Validate the resulting matrix.  When the transformation
228 	       is not valid, reverse to the previous transformation.  */
229 	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
230 	      lambda_matrix_row_exchange (LTM_MATRIX (trans),
231 					  loop_i->depth - first_loop->depth,
232 					  loop_j->depth - first_loop->depth);
233 	  }
234       }
235 
236   return trans;
237 }
238 
239 /* Perform a set of linear transforms on LOOPS.  */
240 
241 void
linear_transform_loops(struct loops * loops)242 linear_transform_loops (struct loops *loops)
243 {
244   bool modified = false;
245   unsigned int i;
246   VEC(tree,heap) *oldivs = NULL;
247   VEC(tree,heap) *invariants = NULL;
248 
249   for (i = 1; i < loops->num; i++)
250     {
251       unsigned int depth = 0;
252       VEC (ddr_p, heap) *dependence_relations;
253       VEC (data_reference_p, heap) *datarefs;
254       struct loop *loop_nest = loops->parray[i];
255       struct loop *temp;
256       lambda_loopnest before, after;
257       lambda_trans_matrix trans;
258       bool problem = false;
259       /* If it's not a loop nest, we don't want it.
260          We also don't handle sibling loops properly,
261          which are loops of the following form:
262          for (i = 0; i < 50; i++)
263            {
264              for (j = 0; j < 50; j++)
265                {
266 	        ...
267                }
268              for (j = 0; j < 50; j++)
269                {
270                 ...
271                }
272            } */
273       if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit)
274 	continue;
275       VEC_truncate (tree, oldivs, 0);
276       VEC_truncate (tree, invariants, 0);
277       depth = 1;
278       for (temp = loop_nest->inner; temp; temp = temp->inner)
279 	{
280 	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
281 	  if (temp->next || !temp->single_exit)
282 	    {
283 	      problem = true;
284 	      break;
285 	    }
286 	  depth ++;
287 	}
288       if (problem)
289 	continue;
290 
291       /* Analyze data references and dependence relations using scev.  */
292 
293       datarefs = VEC_alloc (data_reference_p, heap, 10);
294       dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
295       compute_data_dependences_for_loop (loop_nest, true, &datarefs,
296 					 &dependence_relations);
297 
298       if (dump_file && (dump_flags & TDF_DETAILS))
299 	dump_ddrs (dump_file, dependence_relations);
300 
301       /* Build the transformation matrix.  */
302       trans = lambda_trans_matrix_new (depth, depth);
303       lambda_matrix_id (LTM_MATRIX (trans), depth);
304       trans = try_interchange_loops (trans, depth, dependence_relations,
305 				     datarefs, loop_nest);
306 
307       if (lambda_trans_matrix_id_p (trans))
308 	{
309 	  if (dump_file)
310 	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
311 	  goto free_and_continue;
312 	}
313 
314       /* Check whether the transformation is legal.  */
315       if (!lambda_transform_legal_p (trans, depth, dependence_relations))
316 	{
317 	  if (dump_file)
318 	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
319 	  goto free_and_continue;
320 	}
321 
322       before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
323 						&invariants);
324 
325       if (!before)
326 	goto free_and_continue;
327 
328       if (dump_file)
329 	{
330 	  fprintf (dump_file, "Before:\n");
331 	  print_lambda_loopnest (dump_file, before, 'i');
332 	}
333 
334       after = lambda_loopnest_transform (before, trans);
335 
336       if (dump_file)
337 	{
338 	  fprintf (dump_file, "After:\n");
339 	  print_lambda_loopnest (dump_file, after, 'u');
340 	}
341 
342       lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
343 				       after, trans);
344       modified = true;
345 
346       if (dump_file)
347 	fprintf (dump_file, "Successfully transformed loop.\n");
348 
349     free_and_continue:
350       free_dependence_relations (dependence_relations);
351       free_data_refs (datarefs);
352     }
353 
354   VEC_free (tree, heap, oldivs);
355   VEC_free (tree, heap, invariants);
356   scev_reset ();
357 
358   if (modified)
359     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
360 }
361