1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "gimple-pretty-print.h"
80 #include "tree-flow.h"
81 #include "cfgloop.h"
82 #include "tree-data-ref.h"
83 #include "tree-scalar-evolution.h"
84 #include "dumpfile.h"
85 #include "langhooks.h"
86 #include "tree-affine.h"
87 #include "params.h"
88 
89 static struct datadep_stats
90 {
91   int num_dependence_tests;
92   int num_dependence_dependent;
93   int num_dependence_independent;
94   int num_dependence_undetermined;
95 
96   int num_subscript_tests;
97   int num_subscript_undetermined;
98   int num_same_subscript_function;
99 
100   int num_ziv;
101   int num_ziv_independent;
102   int num_ziv_dependent;
103   int num_ziv_unimplemented;
104 
105   int num_siv;
106   int num_siv_independent;
107   int num_siv_dependent;
108   int num_siv_unimplemented;
109 
110   int num_miv;
111   int num_miv_independent;
112   int num_miv_dependent;
113   int num_miv_unimplemented;
114 } dependence_stats;
115 
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 					   struct data_reference *,
118 					   struct data_reference *,
119 					   struct loop *);
120 /* Returns true iff A divides B.  */
121 
122 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125   gcc_assert (TREE_CODE (a) == INTEGER_CST);
126   gcc_assert (TREE_CODE (b) == INTEGER_CST);
127   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129 
130 /* Returns true iff A divides B.  */
131 
132 static inline bool
int_divides_p(int a,int b)133 int_divides_p (int a, int b)
134 {
135   return ((b % a) == 0);
136 }
137 
138 
139 
140 /* Dump into FILE all the data references from DATAREFS.  */
141 
142 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)143 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
144 {
145   unsigned int i;
146   struct data_reference *dr;
147 
148   FOR_EACH_VEC_ELT (datarefs, i, dr)
149     dump_data_reference (file, dr);
150 }
151 
152 /* Dump into STDERR all the data references from DATAREFS.  */
153 
154 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)155 debug_data_references (vec<data_reference_p> datarefs)
156 {
157   dump_data_references (stderr, datarefs);
158 }
159 
160 /* Print to STDERR the data_reference DR.  */
161 
162 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)163 debug_data_reference (struct data_reference *dr)
164 {
165   dump_data_reference (stderr, dr);
166 }
167 
168 /* Dump function for a DATA_REFERENCE structure.  */
169 
170 void
dump_data_reference(FILE * outf,struct data_reference * dr)171 dump_data_reference (FILE *outf,
172 		     struct data_reference *dr)
173 {
174   unsigned int i;
175 
176   fprintf (outf, "#(Data Ref: \n");
177   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
178   fprintf (outf, "#  stmt: ");
179   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
180   fprintf (outf, "#  ref: ");
181   print_generic_stmt (outf, DR_REF (dr), 0);
182   fprintf (outf, "#  base_object: ");
183   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
184 
185   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
186     {
187       fprintf (outf, "#  Access function %d: ", i);
188       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
189     }
190   fprintf (outf, "#)\n");
191 }
192 
193 /* Dumps the affine function described by FN to the file OUTF.  */
194 
195 static void
dump_affine_function(FILE * outf,affine_fn fn)196 dump_affine_function (FILE *outf, affine_fn fn)
197 {
198   unsigned i;
199   tree coef;
200 
201   print_generic_expr (outf, fn[0], TDF_SLIM);
202   for (i = 1; fn.iterate (i, &coef); i++)
203     {
204       fprintf (outf, " + ");
205       print_generic_expr (outf, coef, TDF_SLIM);
206       fprintf (outf, " * x_%u", i);
207     }
208 }
209 
210 /* Dumps the conflict function CF to the file OUTF.  */
211 
212 static void
dump_conflict_function(FILE * outf,conflict_function * cf)213 dump_conflict_function (FILE *outf, conflict_function *cf)
214 {
215   unsigned i;
216 
217   if (cf->n == NO_DEPENDENCE)
218     fprintf (outf, "no dependence");
219   else if (cf->n == NOT_KNOWN)
220     fprintf (outf, "not known");
221   else
222     {
223       for (i = 0; i < cf->n; i++)
224 	{
225 	  if (i != 0)
226 	    fprintf (outf, " ");
227 	  fprintf (outf, "[");
228 	  dump_affine_function (outf, cf->fns[i]);
229 	  fprintf (outf, "]");
230 	}
231     }
232 }
233 
234 /* Dump function for a SUBSCRIPT structure.  */
235 
236 static void
dump_subscript(FILE * outf,struct subscript * subscript)237 dump_subscript (FILE *outf, struct subscript *subscript)
238 {
239   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
240 
241   fprintf (outf, "\n (subscript \n");
242   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
243   dump_conflict_function (outf, cf);
244   if (CF_NONTRIVIAL_P (cf))
245     {
246       tree last_iteration = SUB_LAST_CONFLICT (subscript);
247       fprintf (outf, "\n  last_conflict: ");
248       print_generic_expr (outf, last_iteration, 0);
249     }
250 
251   cf = SUB_CONFLICTS_IN_B (subscript);
252   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
253   dump_conflict_function (outf, cf);
254   if (CF_NONTRIVIAL_P (cf))
255     {
256       tree last_iteration = SUB_LAST_CONFLICT (subscript);
257       fprintf (outf, "\n  last_conflict: ");
258       print_generic_expr (outf, last_iteration, 0);
259     }
260 
261   fprintf (outf, "\n  (Subscript distance: ");
262   print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
263   fprintf (outf, " ))\n");
264 }
265 
266 /* Print the classic direction vector DIRV to OUTF.  */
267 
268 static void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)269 print_direction_vector (FILE *outf,
270 			lambda_vector dirv,
271 			int length)
272 {
273   int eq;
274 
275   for (eq = 0; eq < length; eq++)
276     {
277       enum data_dependence_direction dir = ((enum data_dependence_direction)
278 					    dirv[eq]);
279 
280       switch (dir)
281 	{
282 	case dir_positive:
283 	  fprintf (outf, "    +");
284 	  break;
285 	case dir_negative:
286 	  fprintf (outf, "    -");
287 	  break;
288 	case dir_equal:
289 	  fprintf (outf, "    =");
290 	  break;
291 	case dir_positive_or_equal:
292 	  fprintf (outf, "   +=");
293 	  break;
294 	case dir_positive_or_negative:
295 	  fprintf (outf, "   +-");
296 	  break;
297 	case dir_negative_or_equal:
298 	  fprintf (outf, "   -=");
299 	  break;
300 	case dir_star:
301 	  fprintf (outf, "    *");
302 	  break;
303 	default:
304 	  fprintf (outf, "indep");
305 	  break;
306 	}
307     }
308   fprintf (outf, "\n");
309 }
310 
311 /* Print a vector of direction vectors.  */
312 
313 static void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
315 		   int length)
316 {
317   unsigned j;
318   lambda_vector v;
319 
320   FOR_EACH_VEC_ELT (dir_vects, j, v)
321     print_direction_vector (outf, v, length);
322 }
323 
324 /* Print out a vector VEC of length N to OUTFILE.  */
325 
326 static inline void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328 {
329   int i;
330 
331   for (i = 0; i < n; i++)
332     fprintf (outfile, "%3d ", vector[i]);
333   fprintf (outfile, "\n");
334 }
335 
336 /* Print a vector of distance vectors.  */
337 
338 static void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
340 		    int length)
341 {
342   unsigned j;
343   lambda_vector v;
344 
345   FOR_EACH_VEC_ELT (dist_vects, j, v)
346     print_lambda_vector (outf, v, length);
347 }
348 
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
350 
351 static void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)352 dump_data_dependence_relation (FILE *outf,
353 			       struct data_dependence_relation *ddr)
354 {
355   struct data_reference *dra, *drb;
356 
357   fprintf (outf, "(Data Dep: \n");
358 
359   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360     {
361       if (ddr)
362 	{
363 	  dra = DDR_A (ddr);
364 	  drb = DDR_B (ddr);
365 	  if (dra)
366 	    dump_data_reference (outf, dra);
367 	  else
368 	    fprintf (outf, "    (nil)\n");
369 	  if (drb)
370 	    dump_data_reference (outf, drb);
371 	  else
372 	    fprintf (outf, "    (nil)\n");
373 	}
374       fprintf (outf, "    (don't know)\n)\n");
375       return;
376     }
377 
378   dra = DDR_A (ddr);
379   drb = DDR_B (ddr);
380   dump_data_reference (outf, dra);
381   dump_data_reference (outf, drb);
382 
383   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384     fprintf (outf, "    (no dependence)\n");
385 
386   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
387     {
388       unsigned int i;
389       struct loop *loopi;
390 
391       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 	{
393 	  fprintf (outf, "  access_fn_A: ");
394 	  print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 	  fprintf (outf, "  access_fn_B: ");
396 	  print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 	  dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
398 	}
399 
400       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401       fprintf (outf, "  loop nest: (");
402       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
403 	fprintf (outf, "%d ", loopi->num);
404       fprintf (outf, ")\n");
405 
406       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
407 	{
408 	  fprintf (outf, "  distance_vector: ");
409 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 			       DDR_NB_LOOPS (ddr));
411 	}
412 
413       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
414 	{
415 	  fprintf (outf, "  direction_vector: ");
416 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 				  DDR_NB_LOOPS (ddr));
418 	}
419     }
420 
421   fprintf (outf, ")\n");
422 }
423 
424 /* Debug version.  */
425 
426 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
428 {
429   dump_data_dependence_relation (stderr, ddr);
430 }
431 
432 /* Dump into FILE all the dependence relations from DDRS.  */
433 
434 void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)435 dump_data_dependence_relations (FILE *file,
436 				vec<ddr_p> ddrs)
437 {
438   unsigned int i;
439   struct data_dependence_relation *ddr;
440 
441   FOR_EACH_VEC_ELT (ddrs, i, ddr)
442     dump_data_dependence_relation (file, ddr);
443 }
444 
445 /* Dump to STDERR all the dependence relations from DDRS.  */
446 
447 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)448 debug_data_dependence_relations (vec<ddr_p> ddrs)
449 {
450   dump_data_dependence_relations (stderr, ddrs);
451 }
452 
453 /* Dumps the distance and direction vectors in FILE.  DDRS contains
454    the dependence relations, and VECT_SIZE is the size of the
455    dependence vectors, or in other words the number of loops in the
456    considered nest.  */
457 
458 static void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
460 {
461   unsigned int i, j;
462   struct data_dependence_relation *ddr;
463   lambda_vector v;
464 
465   FOR_EACH_VEC_ELT (ddrs, i, ddr)
466     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467       {
468 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
469 	  {
470 	    fprintf (file, "DISTANCE_V (");
471 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 	    fprintf (file, ")\n");
473 	  }
474 
475 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
476 	  {
477 	    fprintf (file, "DIRECTION_V (");
478 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 	    fprintf (file, ")\n");
480 	  }
481       }
482 
483   fprintf (file, "\n\n");
484 }
485 
486 /* Dumps the data dependence relations DDRS in FILE.  */
487 
488 static void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)489 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
490 {
491   unsigned int i;
492   struct data_dependence_relation *ddr;
493 
494   FOR_EACH_VEC_ELT (ddrs, i, ddr)
495     dump_data_dependence_relation (file, ddr);
496 
497   fprintf (file, "\n\n");
498 }
499 
500 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)501 debug_ddrs (vec<ddr_p> ddrs)
502 {
503   dump_ddrs (stderr, ddrs);
504 }
505 
506 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
507    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508    constant of type ssizetype, and returns true.  If we cannot do this
509    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510    is returned.  */
511 
512 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off)513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 			 tree *var, tree *off)
515 {
516   tree var0, var1;
517   tree off0, off1;
518   enum tree_code ocode = code;
519 
520   *var = NULL_TREE;
521   *off = NULL_TREE;
522 
523   switch (code)
524     {
525     case INTEGER_CST:
526       *var = build_int_cst (type, 0);
527       *off = fold_convert (ssizetype, op0);
528       return true;
529 
530     case POINTER_PLUS_EXPR:
531       ocode = PLUS_EXPR;
532       /* FALLTHROUGH */
533     case PLUS_EXPR:
534     case MINUS_EXPR:
535       split_constant_offset (op0, &var0, &off0);
536       split_constant_offset (op1, &var1, &off1);
537       *var = fold_build2 (code, type, var0, var1);
538       *off = size_binop (ocode, off0, off1);
539       return true;
540 
541     case MULT_EXPR:
542       if (TREE_CODE (op1) != INTEGER_CST)
543 	return false;
544 
545       split_constant_offset (op0, &var0, &off0);
546       *var = fold_build2 (MULT_EXPR, type, var0, op1);
547       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548       return true;
549 
550     case ADDR_EXPR:
551       {
552 	tree base, poffset;
553 	HOST_WIDE_INT pbitsize, pbitpos;
554 	enum machine_mode pmode;
555 	int punsignedp, pvolatilep;
556 
557 	op0 = TREE_OPERAND (op0, 0);
558 	base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 				    &pmode, &punsignedp, &pvolatilep, false);
560 
561 	if (pbitpos % BITS_PER_UNIT != 0)
562 	  return false;
563 	base = build_fold_addr_expr (base);
564 	off0 = ssize_int (pbitpos / BITS_PER_UNIT);
565 
566 	if (poffset)
567 	  {
568 	    split_constant_offset (poffset, &poffset, &off1);
569 	    off0 = size_binop (PLUS_EXPR, off0, off1);
570 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
571 	      base = fold_build_pointer_plus (base, poffset);
572 	    else
573 	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 				  fold_convert (TREE_TYPE (base), poffset));
575 	  }
576 
577 	var0 = fold_convert (type, base);
578 
579 	/* If variable length types are involved, punt, otherwise casts
580 	   might be converted into ARRAY_REFs in gimplify_conversion.
581 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 	   possibly no longer appears in current GIMPLE, might resurface.
583 	   This perhaps could run
584 	   if (CONVERT_EXPR_P (var0))
585 	     {
586 	       gimplify_conversion (&var0);
587 	       // Attempt to fill in any within var0 found ARRAY_REF's
588 	       // element size from corresponding op embedded ARRAY_REF,
589 	       // if unsuccessful, just punt.
590 	     }  */
591 	while (POINTER_TYPE_P (type))
592 	  type = TREE_TYPE (type);
593 	if (int_size_in_bytes (type) < 0)
594 	  return false;
595 
596 	*var = var0;
597 	*off = off0;
598 	return true;
599       }
600 
601     case SSA_NAME:
602       {
603 	gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 	enum tree_code subcode;
605 
606 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 	  return false;
608 
609 	var0 = gimple_assign_rhs1 (def_stmt);
610 	subcode = gimple_assign_rhs_code (def_stmt);
611 	var1 = gimple_assign_rhs2 (def_stmt);
612 
613 	return split_constant_offset_1 (type, var0, subcode, var1, var, off);
614       }
615     CASE_CONVERT:
616       {
617 	/* We must not introduce undefined overflow, and we must not change the value.
618 	   Hence we're okay if the inner type doesn't overflow to start with
619 	   (pointer or signed), the outer type also is an integer or pointer
620 	   and the outer precision is at least as large as the inner.  */
621 	tree itype = TREE_TYPE (op0);
622 	if ((POINTER_TYPE_P (itype)
623 	     || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 	  {
627 	    split_constant_offset (op0, &var0, off);
628 	    *var = fold_convert (type, var0);
629 	    return true;
630 	  }
631 	return false;
632       }
633 
634     default:
635       return false;
636     }
637 }
638 
639 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
640    will be ssizetype.  */
641 
642 void
split_constant_offset(tree exp,tree * var,tree * off)643 split_constant_offset (tree exp, tree *var, tree *off)
644 {
645   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646   enum tree_code code;
647 
648   *var = exp;
649   *off = ssize_int (0);
650   STRIP_NOPS (exp);
651 
652   if (tree_is_chrec (exp)
653       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654     return;
655 
656   otype = TREE_TYPE (exp);
657   code = TREE_CODE (exp);
658   extract_ops_from_tree (exp, &code, &op0, &op1);
659   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660     {
661       *var = fold_convert (type, e);
662       *off = o;
663     }
664 }
665 
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667    casts, and with type of pointer to the object).  */
668 
669 static tree
canonicalize_base_object_address(tree addr)670 canonicalize_base_object_address (tree addr)
671 {
672   tree orig = addr;
673 
674   STRIP_NOPS (addr);
675 
676   /* The base address may be obtained by casting from integer, in that case
677      keep the cast.  */
678   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679     return orig;
680 
681   if (TREE_CODE (addr) != ADDR_EXPR)
682     return addr;
683 
684   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
685 }
686 
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688    basic block that contains it.  Returns true if analysis succeed or false
689    otherwise.  */
690 
691 bool
dr_analyze_innermost(struct data_reference * dr,struct loop * nest)692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
693 {
694   gimple stmt = DR_STMT (dr);
695   struct loop *loop = loop_containing_stmt (stmt);
696   tree ref = DR_REF (dr);
697   HOST_WIDE_INT pbitsize, pbitpos;
698   tree base, poffset;
699   enum machine_mode pmode;
700   int punsignedp, pvolatilep;
701   affine_iv base_iv, offset_iv;
702   tree init, dinit, step;
703   bool in_loop = (loop && loop->num);
704 
705   if (dump_file && (dump_flags & TDF_DETAILS))
706     fprintf (dump_file, "analyze_innermost: ");
707 
708   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 			      &pmode, &punsignedp, &pvolatilep, false);
710   gcc_assert (base != NULL_TREE);
711 
712   if (pbitpos % BITS_PER_UNIT != 0)
713     {
714       if (dump_file && (dump_flags & TDF_DETAILS))
715 	fprintf (dump_file, "failed: bit offset alignment.\n");
716       return false;
717     }
718 
719   if (TREE_CODE (base) == MEM_REF)
720     {
721       if (!integer_zerop (TREE_OPERAND (base, 1)))
722 	{
723 	  double_int moff = mem_ref_offset (base);
724 	  tree mofft = double_int_to_tree (sizetype, moff);
725 	  if (!poffset)
726 	    poffset = mofft;
727 	  else
728 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
729 	}
730       base = TREE_OPERAND (base, 0);
731     }
732   else
733     base = build_fold_addr_expr (base);
734 
735   if (in_loop)
736     {
737       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
738                       nest ? true : false))
739         {
740           if (nest)
741             {
742               if (dump_file && (dump_flags & TDF_DETAILS))
743                 fprintf (dump_file, "failed: evolution of base is not"
744                                     " affine.\n");
745               return false;
746             }
747           else
748             {
749               base_iv.base = base;
750               base_iv.step = ssize_int (0);
751               base_iv.no_overflow = true;
752             }
753         }
754     }
755   else
756     {
757       base_iv.base = base;
758       base_iv.step = ssize_int (0);
759       base_iv.no_overflow = true;
760     }
761 
762   if (!poffset)
763     {
764       offset_iv.base = ssize_int (0);
765       offset_iv.step = ssize_int (0);
766     }
767   else
768     {
769       if (!in_loop)
770         {
771           offset_iv.base = poffset;
772           offset_iv.step = ssize_int (0);
773         }
774       else if (!simple_iv (loop, loop_containing_stmt (stmt),
775                            poffset, &offset_iv,
776 			   nest ? true : false))
777         {
778           if (nest)
779             {
780               if (dump_file && (dump_flags & TDF_DETAILS))
781                 fprintf (dump_file, "failed: evolution of offset is not"
782                                     " affine.\n");
783               return false;
784             }
785           else
786             {
787               offset_iv.base = poffset;
788               offset_iv.step = ssize_int (0);
789             }
790         }
791     }
792 
793   init = ssize_int (pbitpos / BITS_PER_UNIT);
794   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795   init =  size_binop (PLUS_EXPR, init, dinit);
796   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797   init =  size_binop (PLUS_EXPR, init, dinit);
798 
799   step = size_binop (PLUS_EXPR,
800 		     fold_convert (ssizetype, base_iv.step),
801 		     fold_convert (ssizetype, offset_iv.step));
802 
803   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
804 
805   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806   DR_INIT (dr) = init;
807   DR_STEP (dr) = step;
808 
809   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
810 
811   if (dump_file && (dump_flags & TDF_DETAILS))
812     fprintf (dump_file, "success.\n");
813 
814   return true;
815 }
816 
817 /* Determines the base object and the list of indices of memory reference
818    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
819 
820 static void
dr_analyze_indices(struct data_reference * dr,loop_p nest,loop_p loop)821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
822 {
823   vec<tree> access_fns = vNULL;
824   tree ref, op;
825   tree base, off, access_fn;
826   basic_block before_loop;
827 
828   /* If analyzing a basic-block there are no indices to analyze
829      and thus no access functions.  */
830   if (!nest)
831     {
832       DR_BASE_OBJECT (dr) = DR_REF (dr);
833       DR_ACCESS_FNS (dr).create (0);
834       return;
835     }
836 
837   ref = DR_REF (dr);
838   before_loop = block_before_loop (nest);
839 
840   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841      into a two element array with a constant index.  The base is
842      then just the immediate underlying object.  */
843   if (TREE_CODE (ref) == REALPART_EXPR)
844     {
845       ref = TREE_OPERAND (ref, 0);
846       access_fns.safe_push (integer_zero_node);
847     }
848   else if (TREE_CODE (ref) == IMAGPART_EXPR)
849     {
850       ref = TREE_OPERAND (ref, 0);
851       access_fns.safe_push (integer_one_node);
852     }
853 
854   /* Analyze access functions of dimensions we know to be independent.  */
855   while (handled_component_p (ref))
856     {
857       if (TREE_CODE (ref) == ARRAY_REF)
858 	{
859 	  op = TREE_OPERAND (ref, 1);
860 	  access_fn = analyze_scalar_evolution (loop, op);
861 	  access_fn = instantiate_scev (before_loop, loop, access_fn);
862 	  access_fns.safe_push (access_fn);
863 	}
864       else if (TREE_CODE (ref) == COMPONENT_REF
865 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
866 	{
867 	  /* For COMPONENT_REFs of records (but not unions!) use the
868 	     FIELD_DECL offset as constant access function so we can
869 	     disambiguate a[i].f1 and a[i].f2.  */
870 	  tree off = component_ref_field_offset (ref);
871 	  off = size_binop (PLUS_EXPR,
872 			    size_binop (MULT_EXPR,
873 					fold_convert (bitsizetype, off),
874 					bitsize_int (BITS_PER_UNIT)),
875 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 	  access_fns.safe_push (off);
877 	}
878       else
879 	/* If we have an unhandled component we could not translate
880 	   to an access function stop analyzing.  We have determined
881 	   our base object in this case.  */
882 	break;
883 
884       ref = TREE_OPERAND (ref, 0);
885     }
886 
887   /* If the address operand of a MEM_REF base has an evolution in the
888      analyzed nest, add it as an additional independent access-function.  */
889   if (TREE_CODE (ref) == MEM_REF)
890     {
891       op = TREE_OPERAND (ref, 0);
892       access_fn = analyze_scalar_evolution (loop, op);
893       access_fn = instantiate_scev (before_loop, loop, access_fn);
894       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
895 	{
896 	  tree orig_type;
897 	  tree memoff = TREE_OPERAND (ref, 1);
898 	  base = initial_condition (access_fn);
899 	  orig_type = TREE_TYPE (base);
900 	  STRIP_USELESS_TYPE_CONVERSION (base);
901 	  split_constant_offset (base, &base, &off);
902 	  /* Fold the MEM_REF offset into the evolutions initial
903 	     value to make more bases comparable.  */
904 	  if (!integer_zerop (memoff))
905 	    {
906 	      off = size_binop (PLUS_EXPR, off,
907 				fold_convert (ssizetype, memoff));
908 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
909 	    }
910 	  access_fn = chrec_replace_initial_condition
911 	      (access_fn, fold_convert (orig_type, off));
912 	  /* ???  This is still not a suitable base object for
913 	     dr_may_alias_p - the base object needs to be an
914 	     access that covers the object as whole.  With
915 	     an evolution in the pointer this cannot be
916 	     guaranteed.
917 	     As a band-aid, mark the access so we can special-case
918 	     it in dr_may_alias_p.  */
919 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
920 				 MEM_REF, TREE_TYPE (ref),
921 				 base, memoff);
922 	  DR_UNCONSTRAINED_BASE (dr) = true;
923 	  access_fns.safe_push (access_fn);
924 	}
925     }
926   else if (DECL_P (ref))
927     {
928       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
929       ref = build2 (MEM_REF, TREE_TYPE (ref),
930 		    build_fold_addr_expr (ref),
931 		    build_int_cst (reference_alias_ptr_type (ref), 0));
932     }
933 
934   DR_BASE_OBJECT (dr) = ref;
935   DR_ACCESS_FNS (dr) = access_fns;
936 }
937 
938 /* Extracts the alias analysis information from the memory reference DR.  */
939 
940 static void
dr_analyze_alias(struct data_reference * dr)941 dr_analyze_alias (struct data_reference *dr)
942 {
943   tree ref = DR_REF (dr);
944   tree base = get_base_address (ref), addr;
945 
946   if (INDIRECT_REF_P (base)
947       || TREE_CODE (base) == MEM_REF)
948     {
949       addr = TREE_OPERAND (base, 0);
950       if (TREE_CODE (addr) == SSA_NAME)
951 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
952     }
953 }
954 
955 /* Frees data reference DR.  */
956 
957 void
free_data_ref(data_reference_p dr)958 free_data_ref (data_reference_p dr)
959 {
960   DR_ACCESS_FNS (dr).release ();
961   free (dr);
962 }
963 
964 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
965    is read if IS_READ is true, write otherwise.  Returns the
966    data_reference description of MEMREF.  NEST is the outermost loop
967    in which the reference should be instantiated, LOOP is the loop in
968    which the data reference should be analyzed.  */
969 
970 struct data_reference *
create_data_ref(loop_p nest,loop_p loop,tree memref,gimple stmt,bool is_read)971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 		 bool is_read)
973 {
974   struct data_reference *dr;
975 
976   if (dump_file && (dump_flags & TDF_DETAILS))
977     {
978       fprintf (dump_file, "Creating dr for ");
979       print_generic_expr (dump_file, memref, TDF_SLIM);
980       fprintf (dump_file, "\n");
981     }
982 
983   dr = XCNEW (struct data_reference);
984   DR_STMT (dr) = stmt;
985   DR_REF (dr) = memref;
986   DR_IS_READ (dr) = is_read;
987 
988   dr_analyze_innermost (dr, nest);
989   dr_analyze_indices (dr, nest, loop);
990   dr_analyze_alias (dr);
991 
992   if (dump_file && (dump_flags & TDF_DETAILS))
993     {
994       unsigned i;
995       fprintf (dump_file, "\tbase_address: ");
996       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997       fprintf (dump_file, "\n\toffset from base address: ");
998       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999       fprintf (dump_file, "\n\tconstant offset from base address: ");
1000       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1001       fprintf (dump_file, "\n\tstep: ");
1002       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1003       fprintf (dump_file, "\n\taligned to: ");
1004       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005       fprintf (dump_file, "\n\tbase_object: ");
1006       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1007       fprintf (dump_file, "\n");
1008       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1009 	{
1010 	  fprintf (dump_file, "\tAccess function %d: ", i);
1011 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1012 	}
1013     }
1014 
1015   return dr;
1016 }
1017 
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019    expressions.  */
1020 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)1021 dr_equal_offsets_p1 (tree offset1, tree offset2)
1022 {
1023   bool res;
1024 
1025   STRIP_NOPS (offset1);
1026   STRIP_NOPS (offset2);
1027 
1028   if (offset1 == offset2)
1029     return true;
1030 
1031   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033     return false;
1034 
1035   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036                              TREE_OPERAND (offset2, 0));
1037 
1038   if (!res || !BINARY_CLASS_P (offset1))
1039     return res;
1040 
1041   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042                              TREE_OPERAND (offset2, 1));
1043 
1044   return res;
1045 }
1046 
1047 /* Check if DRA and DRB have equal offsets.  */
1048 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)1049 dr_equal_offsets_p (struct data_reference *dra,
1050                     struct data_reference *drb)
1051 {
1052   tree offset1, offset2;
1053 
1054   offset1 = DR_OFFSET (dra);
1055   offset2 = DR_OFFSET (drb);
1056 
1057   return dr_equal_offsets_p1 (offset1, offset2);
1058 }
1059 
1060 /* Returns true if FNA == FNB.  */
1061 
1062 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)1063 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1064 {
1065   unsigned i, n = fna.length ();
1066 
1067   if (n != fnb.length ())
1068     return false;
1069 
1070   for (i = 0; i < n; i++)
1071     if (!operand_equal_p (fna[i], fnb[i], 0))
1072       return false;
1073 
1074   return true;
1075 }
1076 
1077 /* If all the functions in CF are the same, returns one of them,
1078    otherwise returns NULL.  */
1079 
1080 static affine_fn
common_affine_function(conflict_function * cf)1081 common_affine_function (conflict_function *cf)
1082 {
1083   unsigned i;
1084   affine_fn comm;
1085 
1086   if (!CF_NONTRIVIAL_P (cf))
1087     return affine_fn();
1088 
1089   comm = cf->fns[0];
1090 
1091   for (i = 1; i < cf->n; i++)
1092     if (!affine_function_equal_p (comm, cf->fns[i]))
1093       return affine_fn();
1094 
1095   return comm;
1096 }
1097 
1098 /* Returns the base of the affine function FN.  */
1099 
1100 static tree
affine_function_base(affine_fn fn)1101 affine_function_base (affine_fn fn)
1102 {
1103   return fn[0];
1104 }
1105 
1106 /* Returns true if FN is a constant.  */
1107 
1108 static bool
affine_function_constant_p(affine_fn fn)1109 affine_function_constant_p (affine_fn fn)
1110 {
1111   unsigned i;
1112   tree coef;
1113 
1114   for (i = 1; fn.iterate (i, &coef); i++)
1115     if (!integer_zerop (coef))
1116       return false;
1117 
1118   return true;
1119 }
1120 
1121 /* Returns true if FN is the zero constant function.  */
1122 
1123 static bool
affine_function_zero_p(affine_fn fn)1124 affine_function_zero_p (affine_fn fn)
1125 {
1126   return (integer_zerop (affine_function_base (fn))
1127 	  && affine_function_constant_p (fn));
1128 }
1129 
1130 /* Returns a signed integer type with the largest precision from TA
1131    and TB.  */
1132 
1133 static tree
signed_type_for_types(tree ta,tree tb)1134 signed_type_for_types (tree ta, tree tb)
1135 {
1136   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1137     return signed_type_for (ta);
1138   else
1139     return signed_type_for (tb);
1140 }
1141 
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1143    result.  */
1144 
1145 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1147 {
1148   unsigned i, n, m;
1149   affine_fn ret;
1150   tree coef;
1151 
1152   if (fnb.length () > fna.length ())
1153     {
1154       n = fna.length ();
1155       m = fnb.length ();
1156     }
1157   else
1158     {
1159       n = fnb.length ();
1160       m = fna.length ();
1161     }
1162 
1163   ret.create (m);
1164   for (i = 0; i < n; i++)
1165     {
1166       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1167 					 TREE_TYPE (fnb[i]));
1168       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1169     }
1170 
1171   for (; fna.iterate (i, &coef); i++)
1172     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 				 coef, integer_zero_node));
1174   for (; fnb.iterate (i, &coef); i++)
1175     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1176 				 integer_zero_node, coef));
1177 
1178   return ret;
1179 }
1180 
1181 /* Returns the sum of affine functions FNA and FNB.  */
1182 
1183 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)1184 affine_fn_plus (affine_fn fna, affine_fn fnb)
1185 {
1186   return affine_fn_op (PLUS_EXPR, fna, fnb);
1187 }
1188 
1189 /* Returns the difference of affine functions FNA and FNB.  */
1190 
1191 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)1192 affine_fn_minus (affine_fn fna, affine_fn fnb)
1193 {
1194   return affine_fn_op (MINUS_EXPR, fna, fnb);
1195 }
1196 
1197 /* Frees affine function FN.  */
1198 
1199 static void
affine_fn_free(affine_fn fn)1200 affine_fn_free (affine_fn fn)
1201 {
1202   fn.release ();
1203 }
1204 
1205 /* Determine for each subscript in the data dependence relation DDR
1206    the distance.  */
1207 
1208 static void
compute_subscript_distance(struct data_dependence_relation * ddr)1209 compute_subscript_distance (struct data_dependence_relation *ddr)
1210 {
1211   conflict_function *cf_a, *cf_b;
1212   affine_fn fn_a, fn_b, diff;
1213 
1214   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1215     {
1216       unsigned int i;
1217 
1218       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1219  	{
1220  	  struct subscript *subscript;
1221 
1222  	  subscript = DDR_SUBSCRIPT (ddr, i);
1223  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
1224  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
1225 
1226 	  fn_a = common_affine_function (cf_a);
1227 	  fn_b = common_affine_function (cf_b);
1228 	  if (!fn_a.exists () || !fn_b.exists ())
1229 	    {
1230 	      SUB_DISTANCE (subscript) = chrec_dont_know;
1231 	      return;
1232 	    }
1233 	  diff = affine_fn_minus (fn_a, fn_b);
1234 
1235  	  if (affine_function_constant_p (diff))
1236  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
1237  	  else
1238  	    SUB_DISTANCE (subscript) = chrec_dont_know;
1239 
1240 	  affine_fn_free (diff);
1241  	}
1242     }
1243 }
1244 
1245 /* Returns the conflict function for "unknown".  */
1246 
1247 static conflict_function *
conflict_fn_not_known(void)1248 conflict_fn_not_known (void)
1249 {
1250   conflict_function *fn = XCNEW (conflict_function);
1251   fn->n = NOT_KNOWN;
1252 
1253   return fn;
1254 }
1255 
1256 /* Returns the conflict function for "independent".  */
1257 
1258 static conflict_function *
conflict_fn_no_dependence(void)1259 conflict_fn_no_dependence (void)
1260 {
1261   conflict_function *fn = XCNEW (conflict_function);
1262   fn->n = NO_DEPENDENCE;
1263 
1264   return fn;
1265 }
1266 
1267 /* Returns true if the address of OBJ is invariant in LOOP.  */
1268 
1269 static bool
object_address_invariant_in_loop_p(const struct loop * loop,const_tree obj)1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1271 {
1272   while (handled_component_p (obj))
1273     {
1274       if (TREE_CODE (obj) == ARRAY_REF)
1275 	{
1276 	  /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 	     need to check the stride and the lower bound of the reference.  */
1278 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1279 						      loop->num)
1280 	      || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1281 							 loop->num))
1282 	    return false;
1283 	}
1284       else if (TREE_CODE (obj) == COMPONENT_REF)
1285 	{
1286 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 						      loop->num))
1288 	    return false;
1289 	}
1290       obj = TREE_OPERAND (obj, 0);
1291     }
1292 
1293   if (!INDIRECT_REF_P (obj)
1294       && TREE_CODE (obj) != MEM_REF)
1295     return true;
1296 
1297   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1298 						  loop->num);
1299 }
1300 
1301 /* Returns false if we can prove that data references A and B do not alias,
1302    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1303    considered.  */
1304 
1305 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,bool loop_nest)1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1307 		bool loop_nest)
1308 {
1309   tree addr_a = DR_BASE_OBJECT (a);
1310   tree addr_b = DR_BASE_OBJECT (b);
1311 
1312   /* If we are not processing a loop nest but scalar code we
1313      do not need to care about possible cross-iteration dependences
1314      and thus can process the full original reference.  Do so,
1315      similar to how loop invariant motion applies extra offset-based
1316      disambiguation.  */
1317   if (!loop_nest)
1318     {
1319       aff_tree off1, off2;
1320       double_int size1, size2;
1321       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1322       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1323       aff_combination_scale (&off1, double_int_minus_one);
1324       aff_combination_add (&off2, &off1);
1325       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1326 	return false;
1327     }
1328 
1329   /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1330      the size of the base-object.  So we cannot do any offset/overlap
1331      based analysis but have to rely on points-to information only.  */
1332   if (TREE_CODE (addr_a) == MEM_REF
1333       && DR_UNCONSTRAINED_BASE (a))
1334     {
1335       if (TREE_CODE (addr_b) == MEM_REF
1336 	  && DR_UNCONSTRAINED_BASE (b))
1337 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1338 				       TREE_OPERAND (addr_b, 0));
1339       else
1340 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1341 				       build_fold_addr_expr (addr_b));
1342     }
1343   else if (TREE_CODE (addr_b) == MEM_REF
1344 	   && DR_UNCONSTRAINED_BASE (b))
1345     return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1346 				   TREE_OPERAND (addr_b, 0));
1347 
1348   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1349      that is being subsetted in the loop nest.  */
1350   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1351     return refs_output_dependent_p (addr_a, addr_b);
1352   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1353     return refs_anti_dependent_p (addr_a, addr_b);
1354   return refs_may_alias_p (addr_a, addr_b);
1355 }
1356 
1357 /* Initialize a data dependence relation between data accesses A and
1358    B.  NB_LOOPS is the number of loops surrounding the references: the
1359    size of the classic distance/direction vectors.  */
1360 
1361 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)1362 initialize_data_dependence_relation (struct data_reference *a,
1363 				     struct data_reference *b,
1364  				     vec<loop_p> loop_nest)
1365 {
1366   struct data_dependence_relation *res;
1367   unsigned int i;
1368 
1369   res = XNEW (struct data_dependence_relation);
1370   DDR_A (res) = a;
1371   DDR_B (res) = b;
1372   DDR_LOOP_NEST (res).create (0);
1373   DDR_REVERSED_P (res) = false;
1374   DDR_SUBSCRIPTS (res).create (0);
1375   DDR_DIR_VECTS (res).create (0);
1376   DDR_DIST_VECTS (res).create (0);
1377 
1378   if (a == NULL || b == NULL)
1379     {
1380       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1381       return res;
1382     }
1383 
1384   /* If the data references do not alias, then they are independent.  */
1385   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1386     {
1387       DDR_ARE_DEPENDENT (res) = chrec_known;
1388       return res;
1389     }
1390 
1391   /* The case where the references are exactly the same.  */
1392   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1393     {
1394      if (loop_nest.exists ()
1395         && !object_address_invariant_in_loop_p (loop_nest[0],
1396        					        DR_BASE_OBJECT (a)))
1397       {
1398         DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1399         return res;
1400       }
1401       DDR_AFFINE_P (res) = true;
1402       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1403       DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1404       DDR_LOOP_NEST (res) = loop_nest;
1405       DDR_INNER_LOOP (res) = 0;
1406       DDR_SELF_REFERENCE (res) = true;
1407       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1408        {
1409          struct subscript *subscript;
1410 
1411          subscript = XNEW (struct subscript);
1412          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1413          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1414          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1415          SUB_DISTANCE (subscript) = chrec_dont_know;
1416          DDR_SUBSCRIPTS (res).safe_push (subscript);
1417        }
1418       return res;
1419     }
1420 
1421   /* If the references do not access the same object, we do not know
1422      whether they alias or not.  */
1423   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1424     {
1425       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1426       return res;
1427     }
1428 
1429   /* If the base of the object is not invariant in the loop nest, we cannot
1430      analyze it.  TODO -- in fact, it would suffice to record that there may
1431      be arbitrary dependences in the loops where the base object varies.  */
1432   if (loop_nest.exists ()
1433       && !object_address_invariant_in_loop_p (loop_nest[0],
1434      					      DR_BASE_OBJECT (a)))
1435     {
1436       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1437       return res;
1438     }
1439 
1440   /* If the number of dimensions of the access to not agree we can have
1441      a pointer access to a component of the array element type and an
1442      array access while the base-objects are still the same.  Punt.  */
1443   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1444     {
1445       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1446       return res;
1447     }
1448 
1449   DDR_AFFINE_P (res) = true;
1450   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1451   DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1452   DDR_LOOP_NEST (res) = loop_nest;
1453   DDR_INNER_LOOP (res) = 0;
1454   DDR_SELF_REFERENCE (res) = false;
1455 
1456   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1457     {
1458       struct subscript *subscript;
1459 
1460       subscript = XNEW (struct subscript);
1461       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1462       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1463       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1464       SUB_DISTANCE (subscript) = chrec_dont_know;
1465       DDR_SUBSCRIPTS (res).safe_push (subscript);
1466     }
1467 
1468   return res;
1469 }
1470 
1471 /* Frees memory used by the conflict function F.  */
1472 
1473 static void
free_conflict_function(conflict_function * f)1474 free_conflict_function (conflict_function *f)
1475 {
1476   unsigned i;
1477 
1478   if (CF_NONTRIVIAL_P (f))
1479     {
1480       for (i = 0; i < f->n; i++)
1481 	affine_fn_free (f->fns[i]);
1482     }
1483   free (f);
1484 }
1485 
1486 /* Frees memory used by SUBSCRIPTS.  */
1487 
1488 static void
free_subscripts(vec<subscript_p> subscripts)1489 free_subscripts (vec<subscript_p> subscripts)
1490 {
1491   unsigned i;
1492   subscript_p s;
1493 
1494   FOR_EACH_VEC_ELT (subscripts, i, s)
1495     {
1496       free_conflict_function (s->conflicting_iterations_in_a);
1497       free_conflict_function (s->conflicting_iterations_in_b);
1498       free (s);
1499     }
1500   subscripts.release ();
1501 }
1502 
1503 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1504    description.  */
1505 
1506 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)1507 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1508 			tree chrec)
1509 {
1510   DDR_ARE_DEPENDENT (ddr) = chrec;
1511   free_subscripts (DDR_SUBSCRIPTS (ddr));
1512   DDR_SUBSCRIPTS (ddr).create (0);
1513 }
1514 
1515 /* The dependence relation DDR cannot be represented by a distance
1516    vector.  */
1517 
1518 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)1519 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1520 {
1521   if (dump_file && (dump_flags & TDF_DETAILS))
1522     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1523 
1524   DDR_AFFINE_P (ddr) = false;
1525 }
1526 
1527 
1528 
1529 /* This section contains the classic Banerjee tests.  */
1530 
1531 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1532    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1533 
1534 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)1535 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1536 {
1537   return (evolution_function_is_constant_p (chrec_a)
1538 	  && evolution_function_is_constant_p (chrec_b));
1539 }
1540 
1541 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1542    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1543 
1544 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)1545 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1546 {
1547   if ((evolution_function_is_constant_p (chrec_a)
1548        && evolution_function_is_univariate_p (chrec_b))
1549       || (evolution_function_is_constant_p (chrec_b)
1550 	  && evolution_function_is_univariate_p (chrec_a)))
1551     return true;
1552 
1553   if (evolution_function_is_univariate_p (chrec_a)
1554       && evolution_function_is_univariate_p (chrec_b))
1555     {
1556       switch (TREE_CODE (chrec_a))
1557 	{
1558 	case POLYNOMIAL_CHREC:
1559 	  switch (TREE_CODE (chrec_b))
1560 	    {
1561 	    case POLYNOMIAL_CHREC:
1562 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1563 		return false;
1564 
1565 	    default:
1566 	      return true;
1567 	    }
1568 
1569 	default:
1570 	  return true;
1571 	}
1572     }
1573 
1574   return false;
1575 }
1576 
1577 /* Creates a conflict function with N dimensions.  The affine functions
1578    in each dimension follow.  */
1579 
1580 static conflict_function *
conflict_fn(unsigned n,...)1581 conflict_fn (unsigned n, ...)
1582 {
1583   unsigned i;
1584   conflict_function *ret = XCNEW (conflict_function);
1585   va_list ap;
1586 
1587   gcc_assert (0 < n && n <= MAX_DIM);
1588   va_start(ap, n);
1589 
1590   ret->n = n;
1591   for (i = 0; i < n; i++)
1592     ret->fns[i] = va_arg (ap, affine_fn);
1593   va_end(ap);
1594 
1595   return ret;
1596 }
1597 
1598 /* Returns constant affine function with value CST.  */
1599 
1600 static affine_fn
affine_fn_cst(tree cst)1601 affine_fn_cst (tree cst)
1602 {
1603   affine_fn fn;
1604   fn.create (1);
1605   fn.quick_push (cst);
1606   return fn;
1607 }
1608 
1609 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1610 
1611 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)1612 affine_fn_univar (tree cst, unsigned dim, tree coef)
1613 {
1614   affine_fn fn;
1615   fn.create (dim + 1);
1616   unsigned i;
1617 
1618   gcc_assert (dim > 0);
1619   fn.quick_push (cst);
1620   for (i = 1; i < dim; i++)
1621     fn.quick_push (integer_zero_node);
1622   fn.quick_push (coef);
1623   return fn;
1624 }
1625 
1626 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1627    *OVERLAPS_B are initialized to the functions that describe the
1628    relation between the elements accessed twice by CHREC_A and
1629    CHREC_B.  For k >= 0, the following property is verified:
1630 
1631    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1632 
1633 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1634 analyze_ziv_subscript (tree chrec_a,
1635 		       tree chrec_b,
1636 		       conflict_function **overlaps_a,
1637 		       conflict_function **overlaps_b,
1638 		       tree *last_conflicts)
1639 {
1640   tree type, difference;
1641   dependence_stats.num_ziv++;
1642 
1643   if (dump_file && (dump_flags & TDF_DETAILS))
1644     fprintf (dump_file, "(analyze_ziv_subscript \n");
1645 
1646   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1647   chrec_a = chrec_convert (type, chrec_a, NULL);
1648   chrec_b = chrec_convert (type, chrec_b, NULL);
1649   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1650 
1651   switch (TREE_CODE (difference))
1652     {
1653     case INTEGER_CST:
1654       if (integer_zerop (difference))
1655 	{
1656 	  /* The difference is equal to zero: the accessed index
1657 	     overlaps for each iteration in the loop.  */
1658 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1659 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1660 	  *last_conflicts = chrec_dont_know;
1661 	  dependence_stats.num_ziv_dependent++;
1662 	}
1663       else
1664 	{
1665 	  /* The accesses do not overlap.  */
1666 	  *overlaps_a = conflict_fn_no_dependence ();
1667 	  *overlaps_b = conflict_fn_no_dependence ();
1668 	  *last_conflicts = integer_zero_node;
1669 	  dependence_stats.num_ziv_independent++;
1670 	}
1671       break;
1672 
1673     default:
1674       /* We're not sure whether the indexes overlap.  For the moment,
1675 	 conservatively answer "don't know".  */
1676       if (dump_file && (dump_flags & TDF_DETAILS))
1677 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1678 
1679       *overlaps_a = conflict_fn_not_known ();
1680       *overlaps_b = conflict_fn_not_known ();
1681       *last_conflicts = chrec_dont_know;
1682       dependence_stats.num_ziv_unimplemented++;
1683       break;
1684     }
1685 
1686   if (dump_file && (dump_flags & TDF_DETAILS))
1687     fprintf (dump_file, ")\n");
1688 }
1689 
1690 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1691    and only if it fits to the int type.  If this is not the case, or the
1692    bound  on the number of iterations of LOOP could not be derived, returns
1693    chrec_dont_know.  */
1694 
1695 static tree
max_stmt_executions_tree(struct loop * loop)1696 max_stmt_executions_tree (struct loop *loop)
1697 {
1698   double_int nit;
1699 
1700   if (!max_stmt_executions (loop, &nit))
1701     return chrec_dont_know;
1702 
1703   if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1704     return chrec_dont_know;
1705 
1706   return double_int_to_tree (unsigned_type_node, nit);
1707 }
1708 
1709 /* Determine whether the CHREC is always positive/negative.  If the expression
1710    cannot be statically analyzed, return false, otherwise set the answer into
1711    VALUE.  */
1712 
1713 static bool
chrec_is_positive(tree chrec,bool * value)1714 chrec_is_positive (tree chrec, bool *value)
1715 {
1716   bool value0, value1, value2;
1717   tree end_value, nb_iter;
1718 
1719   switch (TREE_CODE (chrec))
1720     {
1721     case POLYNOMIAL_CHREC:
1722       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1723 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1724 	return false;
1725 
1726       /* FIXME -- overflows.  */
1727       if (value0 == value1)
1728 	{
1729 	  *value = value0;
1730 	  return true;
1731 	}
1732 
1733       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1734 	 and the proof consists in showing that the sign never
1735 	 changes during the execution of the loop, from 0 to
1736 	 loop->nb_iterations.  */
1737       if (!evolution_function_is_affine_p (chrec))
1738 	return false;
1739 
1740       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1741       if (chrec_contains_undetermined (nb_iter))
1742 	return false;
1743 
1744 #if 0
1745       /* TODO -- If the test is after the exit, we may decrease the number of
1746 	 iterations by one.  */
1747       if (after_exit)
1748 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1749 #endif
1750 
1751       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1752 
1753       if (!chrec_is_positive (end_value, &value2))
1754 	return false;
1755 
1756       *value = value0;
1757       return value0 == value1;
1758 
1759     case INTEGER_CST:
1760       switch (tree_int_cst_sgn (chrec))
1761 	{
1762 	case -1:
1763 	  *value = false;
1764 	  break;
1765 	case 1:
1766 	  *value = true;
1767 	  break;
1768 	default:
1769 	  return false;
1770 	}
1771       return true;
1772 
1773     default:
1774       return false;
1775     }
1776 }
1777 
1778 
1779 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1780    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1781    *OVERLAPS_B are initialized to the functions that describe the
1782    relation between the elements accessed twice by CHREC_A and
1783    CHREC_B.  For k >= 0, the following property is verified:
1784 
1785    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1786 
1787 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1788 analyze_siv_subscript_cst_affine (tree chrec_a,
1789 				  tree chrec_b,
1790 				  conflict_function **overlaps_a,
1791 				  conflict_function **overlaps_b,
1792 				  tree *last_conflicts)
1793 {
1794   bool value0, value1, value2;
1795   tree type, difference, tmp;
1796 
1797   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1798   chrec_a = chrec_convert (type, chrec_a, NULL);
1799   chrec_b = chrec_convert (type, chrec_b, NULL);
1800   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1801 
1802   /* Special case overlap in the first iteration.  */
1803   if (integer_zerop (difference))
1804     {
1805       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1806       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1807       *last_conflicts = integer_one_node;
1808       return;
1809     }
1810 
1811   if (!chrec_is_positive (initial_condition (difference), &value0))
1812     {
1813       if (dump_file && (dump_flags & TDF_DETAILS))
1814 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1815 
1816       dependence_stats.num_siv_unimplemented++;
1817       *overlaps_a = conflict_fn_not_known ();
1818       *overlaps_b = conflict_fn_not_known ();
1819       *last_conflicts = chrec_dont_know;
1820       return;
1821     }
1822   else
1823     {
1824       if (value0 == false)
1825 	{
1826 	  if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1827 	    {
1828 	      if (dump_file && (dump_flags & TDF_DETAILS))
1829 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1830 
1831 	      *overlaps_a = conflict_fn_not_known ();
1832 	      *overlaps_b = conflict_fn_not_known ();
1833 	      *last_conflicts = chrec_dont_know;
1834 	      dependence_stats.num_siv_unimplemented++;
1835 	      return;
1836 	    }
1837 	  else
1838 	    {
1839 	      if (value1 == true)
1840 		{
1841 		  /* Example:
1842 		     chrec_a = 12
1843 		     chrec_b = {10, +, 1}
1844 		  */
1845 
1846 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1847 		    {
1848 		      HOST_WIDE_INT numiter;
1849 		      struct loop *loop = get_chrec_loop (chrec_b);
1850 
1851 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1852 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
1853 					 fold_build1 (ABS_EXPR, type, difference),
1854 					 CHREC_RIGHT (chrec_b));
1855 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1856 		      *last_conflicts = integer_one_node;
1857 
1858 
1859 		      /* Perform weak-zero siv test to see if overlap is
1860 			 outside the loop bounds.  */
1861 		      numiter = max_stmt_executions_int (loop);
1862 
1863 		      if (numiter >= 0
1864 			  && compare_tree_int (tmp, numiter) > 0)
1865 			{
1866 			  free_conflict_function (*overlaps_a);
1867 			  free_conflict_function (*overlaps_b);
1868 			  *overlaps_a = conflict_fn_no_dependence ();
1869 			  *overlaps_b = conflict_fn_no_dependence ();
1870 			  *last_conflicts = integer_zero_node;
1871 			  dependence_stats.num_siv_independent++;
1872 			  return;
1873 			}
1874 		      dependence_stats.num_siv_dependent++;
1875 		      return;
1876 		    }
1877 
1878 		  /* When the step does not divide the difference, there are
1879 		     no overlaps.  */
1880 		  else
1881 		    {
1882 		      *overlaps_a = conflict_fn_no_dependence ();
1883 		      *overlaps_b = conflict_fn_no_dependence ();
1884 		      *last_conflicts = integer_zero_node;
1885 		      dependence_stats.num_siv_independent++;
1886 		      return;
1887 		    }
1888 		}
1889 
1890 	      else
1891 		{
1892 		  /* Example:
1893 		     chrec_a = 12
1894 		     chrec_b = {10, +, -1}
1895 
1896 		     In this case, chrec_a will not overlap with chrec_b.  */
1897 		  *overlaps_a = conflict_fn_no_dependence ();
1898 		  *overlaps_b = conflict_fn_no_dependence ();
1899 		  *last_conflicts = integer_zero_node;
1900 		  dependence_stats.num_siv_independent++;
1901 		  return;
1902 		}
1903 	    }
1904 	}
1905       else
1906 	{
1907 	  if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1908 	    {
1909 	      if (dump_file && (dump_flags & TDF_DETAILS))
1910 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1911 
1912 	      *overlaps_a = conflict_fn_not_known ();
1913 	      *overlaps_b = conflict_fn_not_known ();
1914 	      *last_conflicts = chrec_dont_know;
1915 	      dependence_stats.num_siv_unimplemented++;
1916 	      return;
1917 	    }
1918 	  else
1919 	    {
1920 	      if (value2 == false)
1921 		{
1922 		  /* Example:
1923 		     chrec_a = 3
1924 		     chrec_b = {10, +, -1}
1925 		  */
1926 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1927 		    {
1928 		      HOST_WIDE_INT numiter;
1929 		      struct loop *loop = get_chrec_loop (chrec_b);
1930 
1931 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1932 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1933 					 CHREC_RIGHT (chrec_b));
1934 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1935 		      *last_conflicts = integer_one_node;
1936 
1937 		      /* Perform weak-zero siv test to see if overlap is
1938 			 outside the loop bounds.  */
1939 		      numiter = max_stmt_executions_int (loop);
1940 
1941 		      if (numiter >= 0
1942 			  && compare_tree_int (tmp, numiter) > 0)
1943 			{
1944 			  free_conflict_function (*overlaps_a);
1945 			  free_conflict_function (*overlaps_b);
1946 			  *overlaps_a = conflict_fn_no_dependence ();
1947 			  *overlaps_b = conflict_fn_no_dependence ();
1948 			  *last_conflicts = integer_zero_node;
1949 			  dependence_stats.num_siv_independent++;
1950 			  return;
1951 			}
1952 		      dependence_stats.num_siv_dependent++;
1953 		      return;
1954 		    }
1955 
1956 		  /* When the step does not divide the difference, there
1957 		     are no overlaps.  */
1958 		  else
1959 		    {
1960 		      *overlaps_a = conflict_fn_no_dependence ();
1961 		      *overlaps_b = conflict_fn_no_dependence ();
1962 		      *last_conflicts = integer_zero_node;
1963 		      dependence_stats.num_siv_independent++;
1964 		      return;
1965 		    }
1966 		}
1967 	      else
1968 		{
1969 		  /* Example:
1970 		     chrec_a = 3
1971 		     chrec_b = {4, +, 1}
1972 
1973 		     In this case, chrec_a will not overlap with chrec_b.  */
1974 		  *overlaps_a = conflict_fn_no_dependence ();
1975 		  *overlaps_b = conflict_fn_no_dependence ();
1976 		  *last_conflicts = integer_zero_node;
1977 		  dependence_stats.num_siv_independent++;
1978 		  return;
1979 		}
1980 	    }
1981 	}
1982     }
1983 }
1984 
1985 /* Helper recursive function for initializing the matrix A.  Returns
1986    the initial value of CHREC.  */
1987 
1988 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)1989 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1990 {
1991   gcc_assert (chrec);
1992 
1993   switch (TREE_CODE (chrec))
1994     {
1995     case POLYNOMIAL_CHREC:
1996       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1997 
1998       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1999       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2000 
2001     case PLUS_EXPR:
2002     case MULT_EXPR:
2003     case MINUS_EXPR:
2004       {
2005 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2006 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2007 
2008 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2009       }
2010 
2011     case NOP_EXPR:
2012       {
2013 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2014 	return chrec_convert (chrec_type (chrec), op, NULL);
2015       }
2016 
2017     case BIT_NOT_EXPR:
2018       {
2019 	/* Handle ~X as -1 - X.  */
2020 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2021 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2022 			      build_int_cst (TREE_TYPE (chrec), -1), op);
2023       }
2024 
2025     case INTEGER_CST:
2026       return chrec;
2027 
2028     default:
2029       gcc_unreachable ();
2030       return NULL_TREE;
2031     }
2032 }
2033 
2034 #define FLOOR_DIV(x,y) ((x) / (y))
2035 
2036 /* Solves the special case of the Diophantine equation:
2037    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2038 
2039    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2040    number of iterations that loops X and Y run.  The overlaps will be
2041    constructed as evolutions in dimension DIM.  */
2042 
2043 static void
compute_overlap_steps_for_affine_univar(int niter,int step_a,int step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)2044 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2045 					 affine_fn *overlaps_a,
2046 					 affine_fn *overlaps_b,
2047 					 tree *last_conflicts, int dim)
2048 {
2049   if (((step_a > 0 && step_b > 0)
2050        || (step_a < 0 && step_b < 0)))
2051     {
2052       int step_overlaps_a, step_overlaps_b;
2053       int gcd_steps_a_b, last_conflict, tau2;
2054 
2055       gcd_steps_a_b = gcd (step_a, step_b);
2056       step_overlaps_a = step_b / gcd_steps_a_b;
2057       step_overlaps_b = step_a / gcd_steps_a_b;
2058 
2059       if (niter > 0)
2060 	{
2061 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
2062 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2063 	  last_conflict = tau2;
2064 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2065 	}
2066       else
2067 	*last_conflicts = chrec_dont_know;
2068 
2069       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2070 				      build_int_cst (NULL_TREE,
2071 						     step_overlaps_a));
2072       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2073 				      build_int_cst (NULL_TREE,
2074 						     step_overlaps_b));
2075     }
2076 
2077   else
2078     {
2079       *overlaps_a = affine_fn_cst (integer_zero_node);
2080       *overlaps_b = affine_fn_cst (integer_zero_node);
2081       *last_conflicts = integer_zero_node;
2082     }
2083 }
2084 
2085 /* Solves the special case of a Diophantine equation where CHREC_A is
2086    an affine bivariate function, and CHREC_B is an affine univariate
2087    function.  For example,
2088 
2089    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2090 
2091    has the following overlapping functions:
2092 
2093    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2094    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2095    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2096 
2097    FORNOW: This is a specialized implementation for a case occurring in
2098    a common benchmark.  Implement the general algorithm.  */
2099 
2100 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2101 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2102 				      conflict_function **overlaps_a,
2103 				      conflict_function **overlaps_b,
2104 				      tree *last_conflicts)
2105 {
2106   bool xz_p, yz_p, xyz_p;
2107   int step_x, step_y, step_z;
2108   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2109   affine_fn overlaps_a_xz, overlaps_b_xz;
2110   affine_fn overlaps_a_yz, overlaps_b_yz;
2111   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2112   affine_fn ova1, ova2, ovb;
2113   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2114 
2115   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2116   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2117   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2118 
2119   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2120   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2121   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2122 
2123   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2124     {
2125       if (dump_file && (dump_flags & TDF_DETAILS))
2126 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2127 
2128       *overlaps_a = conflict_fn_not_known ();
2129       *overlaps_b = conflict_fn_not_known ();
2130       *last_conflicts = chrec_dont_know;
2131       return;
2132     }
2133 
2134   niter = MIN (niter_x, niter_z);
2135   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2136 					   &overlaps_a_xz,
2137 					   &overlaps_b_xz,
2138 					   &last_conflicts_xz, 1);
2139   niter = MIN (niter_y, niter_z);
2140   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2141 					   &overlaps_a_yz,
2142 					   &overlaps_b_yz,
2143 					   &last_conflicts_yz, 2);
2144   niter = MIN (niter_x, niter_z);
2145   niter = MIN (niter_y, niter);
2146   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2147 					   &overlaps_a_xyz,
2148 					   &overlaps_b_xyz,
2149 					   &last_conflicts_xyz, 3);
2150 
2151   xz_p = !integer_zerop (last_conflicts_xz);
2152   yz_p = !integer_zerop (last_conflicts_yz);
2153   xyz_p = !integer_zerop (last_conflicts_xyz);
2154 
2155   if (xz_p || yz_p || xyz_p)
2156     {
2157       ova1 = affine_fn_cst (integer_zero_node);
2158       ova2 = affine_fn_cst (integer_zero_node);
2159       ovb = affine_fn_cst (integer_zero_node);
2160       if (xz_p)
2161 	{
2162 	  affine_fn t0 = ova1;
2163 	  affine_fn t2 = ovb;
2164 
2165 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2166 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
2167 	  affine_fn_free (t0);
2168 	  affine_fn_free (t2);
2169 	  *last_conflicts = last_conflicts_xz;
2170 	}
2171       if (yz_p)
2172 	{
2173 	  affine_fn t0 = ova2;
2174 	  affine_fn t2 = ovb;
2175 
2176 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2177 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
2178 	  affine_fn_free (t0);
2179 	  affine_fn_free (t2);
2180 	  *last_conflicts = last_conflicts_yz;
2181 	}
2182       if (xyz_p)
2183 	{
2184 	  affine_fn t0 = ova1;
2185 	  affine_fn t2 = ova2;
2186 	  affine_fn t4 = ovb;
2187 
2188 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2189 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2190 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2191 	  affine_fn_free (t0);
2192 	  affine_fn_free (t2);
2193 	  affine_fn_free (t4);
2194 	  *last_conflicts = last_conflicts_xyz;
2195 	}
2196       *overlaps_a = conflict_fn (2, ova1, ova2);
2197       *overlaps_b = conflict_fn (1, ovb);
2198     }
2199   else
2200     {
2201       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2202       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2203       *last_conflicts = integer_zero_node;
2204     }
2205 
2206   affine_fn_free (overlaps_a_xz);
2207   affine_fn_free (overlaps_b_xz);
2208   affine_fn_free (overlaps_a_yz);
2209   affine_fn_free (overlaps_b_yz);
2210   affine_fn_free (overlaps_a_xyz);
2211   affine_fn_free (overlaps_b_xyz);
2212 }
2213 
2214 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2215 
2216 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)2217 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2218 		    int size)
2219 {
2220   memcpy (vec2, vec1, size * sizeof (*vec1));
2221 }
2222 
2223 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2224 
2225 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)2226 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2227 		    int m, int n)
2228 {
2229   int i;
2230 
2231   for (i = 0; i < m; i++)
2232     lambda_vector_copy (mat1[i], mat2[i], n);
2233 }
2234 
2235 /* Store the N x N identity matrix in MAT.  */
2236 
2237 static void
lambda_matrix_id(lambda_matrix mat,int size)2238 lambda_matrix_id (lambda_matrix mat, int size)
2239 {
2240   int i, j;
2241 
2242   for (i = 0; i < size; i++)
2243     for (j = 0; j < size; j++)
2244       mat[i][j] = (i == j) ? 1 : 0;
2245 }
2246 
2247 /* Return the first nonzero element of vector VEC1 between START and N.
2248    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2249 
2250 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)2251 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2252 {
2253   int j = start;
2254   while (j < n && vec1[j] == 0)
2255     j++;
2256   return j;
2257 }
2258 
2259 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2260    R2 = R2 + CONST1 * R1.  */
2261 
2262 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,int const1)2263 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2264 {
2265   int i;
2266 
2267   if (const1 == 0)
2268     return;
2269 
2270   for (i = 0; i < n; i++)
2271     mat[r2][i] += const1 * mat[r1][i];
2272 }
2273 
2274 /* Swap rows R1 and R2 in matrix MAT.  */
2275 
2276 static void
lambda_matrix_row_exchange(lambda_matrix mat,int r1,int r2)2277 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2278 {
2279   lambda_vector row;
2280 
2281   row = mat[r1];
2282   mat[r1] = mat[r2];
2283   mat[r2] = row;
2284 }
2285 
2286 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2287    and store the result in VEC2.  */
2288 
2289 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,int const1)2290 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2291 			  int size, int const1)
2292 {
2293   int i;
2294 
2295   if (const1 == 0)
2296     lambda_vector_clear (vec2, size);
2297   else
2298     for (i = 0; i < size; i++)
2299       vec2[i] = const1 * vec1[i];
2300 }
2301 
2302 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2303 
2304 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)2305 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2306 		      int size)
2307 {
2308   lambda_vector_mult_const (vec1, vec2, size, -1);
2309 }
2310 
2311 /* Negate row R1 of matrix MAT which has N columns.  */
2312 
2313 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)2314 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2315 {
2316   lambda_vector_negate (mat[r1], mat[r1], n);
2317 }
2318 
2319 /* Return true if two vectors are equal.  */
2320 
2321 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)2322 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2323 {
2324   int i;
2325   for (i = 0; i < size; i++)
2326     if (vec1[i] != vec2[i])
2327       return false;
2328   return true;
2329 }
2330 
2331 /* Given an M x N integer matrix A, this function determines an M x
2332    M unimodular matrix U, and an M x N echelon matrix S such that
2333    "U.A = S".  This decomposition is also known as "right Hermite".
2334 
2335    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2336    Restructuring Compilers" Utpal Banerjee.  */
2337 
2338 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)2339 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2340 			     lambda_matrix S, lambda_matrix U)
2341 {
2342   int i, j, i0 = 0;
2343 
2344   lambda_matrix_copy (A, S, m, n);
2345   lambda_matrix_id (U, m);
2346 
2347   for (j = 0; j < n; j++)
2348     {
2349       if (lambda_vector_first_nz (S[j], m, i0) < m)
2350 	{
2351 	  ++i0;
2352 	  for (i = m - 1; i >= i0; i--)
2353 	    {
2354 	      while (S[i][j] != 0)
2355 		{
2356 		  int sigma, factor, a, b;
2357 
2358 		  a = S[i-1][j];
2359 		  b = S[i][j];
2360 		  sigma = (a * b < 0) ? -1: 1;
2361 		  a = abs (a);
2362 		  b = abs (b);
2363 		  factor = sigma * (a / b);
2364 
2365 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
2366 		  lambda_matrix_row_exchange (S, i, i-1);
2367 
2368 		  lambda_matrix_row_add (U, m, i, i-1, -factor);
2369 		  lambda_matrix_row_exchange (U, i, i-1);
2370 		}
2371 	    }
2372 	}
2373     }
2374 }
2375 
2376 /* Determines the overlapping elements due to accesses CHREC_A and
2377    CHREC_B, that are affine functions.  This function cannot handle
2378    symbolic evolution functions, ie. when initial conditions are
2379    parameters, because it uses lambda matrices of integers.  */
2380 
2381 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2382 analyze_subscript_affine_affine (tree chrec_a,
2383 				 tree chrec_b,
2384 				 conflict_function **overlaps_a,
2385 				 conflict_function **overlaps_b,
2386 				 tree *last_conflicts)
2387 {
2388   unsigned nb_vars_a, nb_vars_b, dim;
2389   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2390   lambda_matrix A, U, S;
2391   struct obstack scratch_obstack;
2392 
2393   if (eq_evolutions_p (chrec_a, chrec_b))
2394     {
2395       /* The accessed index overlaps for each iteration in the
2396 	 loop.  */
2397       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2398       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2399       *last_conflicts = chrec_dont_know;
2400       return;
2401     }
2402   if (dump_file && (dump_flags & TDF_DETAILS))
2403     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2404 
2405   /* For determining the initial intersection, we have to solve a
2406      Diophantine equation.  This is the most time consuming part.
2407 
2408      For answering to the question: "Is there a dependence?" we have
2409      to prove that there exists a solution to the Diophantine
2410      equation, and that the solution is in the iteration domain,
2411      i.e. the solution is positive or zero, and that the solution
2412      happens before the upper bound loop.nb_iterations.  Otherwise
2413      there is no dependence.  This function outputs a description of
2414      the iterations that hold the intersections.  */
2415 
2416   nb_vars_a = nb_vars_in_chrec (chrec_a);
2417   nb_vars_b = nb_vars_in_chrec (chrec_b);
2418 
2419   gcc_obstack_init (&scratch_obstack);
2420 
2421   dim = nb_vars_a + nb_vars_b;
2422   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2423   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2424   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2425 
2426   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2427   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2428   gamma = init_b - init_a;
2429 
2430   /* Don't do all the hard work of solving the Diophantine equation
2431      when we already know the solution: for example,
2432      | {3, +, 1}_1
2433      | {3, +, 4}_2
2434      | gamma = 3 - 3 = 0.
2435      Then the first overlap occurs during the first iterations:
2436      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2437   */
2438   if (gamma == 0)
2439     {
2440       if (nb_vars_a == 1 && nb_vars_b == 1)
2441 	{
2442 	  HOST_WIDE_INT step_a, step_b;
2443 	  HOST_WIDE_INT niter, niter_a, niter_b;
2444 	  affine_fn ova, ovb;
2445 
2446 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2447 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2448 	  niter = MIN (niter_a, niter_b);
2449 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2450 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2451 
2452 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2453 						   &ova, &ovb,
2454 						   last_conflicts, 1);
2455 	  *overlaps_a = conflict_fn (1, ova);
2456 	  *overlaps_b = conflict_fn (1, ovb);
2457 	}
2458 
2459       else if (nb_vars_a == 2 && nb_vars_b == 1)
2460 	compute_overlap_steps_for_affine_1_2
2461 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2462 
2463       else if (nb_vars_a == 1 && nb_vars_b == 2)
2464 	compute_overlap_steps_for_affine_1_2
2465 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2466 
2467       else
2468 	{
2469 	  if (dump_file && (dump_flags & TDF_DETAILS))
2470 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2471 	  *overlaps_a = conflict_fn_not_known ();
2472 	  *overlaps_b = conflict_fn_not_known ();
2473 	  *last_conflicts = chrec_dont_know;
2474 	}
2475       goto end_analyze_subs_aa;
2476     }
2477 
2478   /* U.A = S */
2479   lambda_matrix_right_hermite (A, dim, 1, S, U);
2480 
2481   if (S[0][0] < 0)
2482     {
2483       S[0][0] *= -1;
2484       lambda_matrix_row_negate (U, dim, 0);
2485     }
2486   gcd_alpha_beta = S[0][0];
2487 
2488   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2489      but that is a quite strange case.  Instead of ICEing, answer
2490      don't know.  */
2491   if (gcd_alpha_beta == 0)
2492     {
2493       *overlaps_a = conflict_fn_not_known ();
2494       *overlaps_b = conflict_fn_not_known ();
2495       *last_conflicts = chrec_dont_know;
2496       goto end_analyze_subs_aa;
2497     }
2498 
2499   /* The classic "gcd-test".  */
2500   if (!int_divides_p (gcd_alpha_beta, gamma))
2501     {
2502       /* The "gcd-test" has determined that there is no integer
2503 	 solution, i.e. there is no dependence.  */
2504       *overlaps_a = conflict_fn_no_dependence ();
2505       *overlaps_b = conflict_fn_no_dependence ();
2506       *last_conflicts = integer_zero_node;
2507     }
2508 
2509   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2510   else if (nb_vars_a == 1 && nb_vars_b == 1)
2511     {
2512       /* Both functions should have the same evolution sign.  */
2513       if (((A[0][0] > 0 && -A[1][0] > 0)
2514 	   || (A[0][0] < 0 && -A[1][0] < 0)))
2515 	{
2516 	  /* The solutions are given by:
2517 	     |
2518 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2519 	     |                           [u21 u22]    [y0]
2520 
2521 	     For a given integer t.  Using the following variables,
2522 
2523 	     | i0 = u11 * gamma / gcd_alpha_beta
2524 	     | j0 = u12 * gamma / gcd_alpha_beta
2525 	     | i1 = u21
2526 	     | j1 = u22
2527 
2528 	     the solutions are:
2529 
2530 	     | x0 = i0 + i1 * t,
2531 	     | y0 = j0 + j1 * t.  */
2532       	  HOST_WIDE_INT i0, j0, i1, j1;
2533 
2534 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
2535 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
2536 	  i1 = U[1][0];
2537 	  j1 = U[1][1];
2538 
2539 	  if ((i1 == 0 && i0 < 0)
2540 	      || (j1 == 0 && j0 < 0))
2541 	    {
2542 	      /* There is no solution.
2543 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2544 		 falls in here, but for the moment we don't look at the
2545 		 upper bound of the iteration domain.  */
2546 	      *overlaps_a = conflict_fn_no_dependence ();
2547 	      *overlaps_b = conflict_fn_no_dependence ();
2548 	      *last_conflicts = integer_zero_node;
2549 	      goto end_analyze_subs_aa;
2550 	    }
2551 
2552 	  if (i1 > 0 && j1 > 0)
2553 	    {
2554 	      HOST_WIDE_INT niter_a
2555 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
2556 	      HOST_WIDE_INT niter_b
2557 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
2558 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2559 
2560 	      /* (X0, Y0) is a solution of the Diophantine equation:
2561 		 "chrec_a (X0) = chrec_b (Y0)".  */
2562 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2563 					CEIL (-j0, j1));
2564 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
2565 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
2566 
2567 	      /* (X1, Y1) is the smallest positive solution of the eq
2568 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2569 		 first conflict occurs.  */
2570 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2571 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2572 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2573 
2574 	      if (niter > 0)
2575 		{
2576 		  HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2577 					    FLOOR_DIV (niter - j0, j1));
2578 		  HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2579 
2580 		  /* If the overlap occurs outside of the bounds of the
2581 		     loop, there is no dependence.  */
2582 		  if (x1 >= niter || y1 >= niter)
2583 		    {
2584 		      *overlaps_a = conflict_fn_no_dependence ();
2585 		      *overlaps_b = conflict_fn_no_dependence ();
2586 		      *last_conflicts = integer_zero_node;
2587 		      goto end_analyze_subs_aa;
2588 		    }
2589 		  else
2590 		    *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2591 		}
2592 	      else
2593 		*last_conflicts = chrec_dont_know;
2594 
2595 	      *overlaps_a
2596 		= conflict_fn (1,
2597 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
2598 						 1,
2599 						 build_int_cst (NULL_TREE, i1)));
2600 	      *overlaps_b
2601 		= conflict_fn (1,
2602 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
2603 						 1,
2604 						 build_int_cst (NULL_TREE, j1)));
2605 	    }
2606 	  else
2607 	    {
2608 	      /* FIXME: For the moment, the upper bound of the
2609 		 iteration domain for i and j is not checked.  */
2610 	      if (dump_file && (dump_flags & TDF_DETAILS))
2611 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2612 	      *overlaps_a = conflict_fn_not_known ();
2613 	      *overlaps_b = conflict_fn_not_known ();
2614 	      *last_conflicts = chrec_dont_know;
2615 	    }
2616 	}
2617       else
2618 	{
2619 	  if (dump_file && (dump_flags & TDF_DETAILS))
2620 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2621 	  *overlaps_a = conflict_fn_not_known ();
2622 	  *overlaps_b = conflict_fn_not_known ();
2623 	  *last_conflicts = chrec_dont_know;
2624 	}
2625     }
2626   else
2627     {
2628       if (dump_file && (dump_flags & TDF_DETAILS))
2629 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2630       *overlaps_a = conflict_fn_not_known ();
2631       *overlaps_b = conflict_fn_not_known ();
2632       *last_conflicts = chrec_dont_know;
2633     }
2634 
2635 end_analyze_subs_aa:
2636   obstack_free (&scratch_obstack, NULL);
2637   if (dump_file && (dump_flags & TDF_DETAILS))
2638     {
2639       fprintf (dump_file, "  (overlaps_a = ");
2640       dump_conflict_function (dump_file, *overlaps_a);
2641       fprintf (dump_file, ")\n  (overlaps_b = ");
2642       dump_conflict_function (dump_file, *overlaps_b);
2643       fprintf (dump_file, "))\n");
2644     }
2645 }
2646 
2647 /* Returns true when analyze_subscript_affine_affine can be used for
2648    determining the dependence relation between chrec_a and chrec_b,
2649    that contain symbols.  This function modifies chrec_a and chrec_b
2650    such that the analysis result is the same, and such that they don't
2651    contain symbols, and then can safely be passed to the analyzer.
2652 
2653    Example: The analysis of the following tuples of evolutions produce
2654    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2655    vs. {0, +, 1}_1
2656 
2657    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2658    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2659 */
2660 
2661 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)2662 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2663 {
2664   tree diff, type, left_a, left_b, right_b;
2665 
2666   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2667       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2668     /* FIXME: For the moment not handled.  Might be refined later.  */
2669     return false;
2670 
2671   type = chrec_type (*chrec_a);
2672   left_a = CHREC_LEFT (*chrec_a);
2673   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2674   diff = chrec_fold_minus (type, left_a, left_b);
2675 
2676   if (!evolution_function_is_constant_p (diff))
2677     return false;
2678 
2679   if (dump_file && (dump_flags & TDF_DETAILS))
2680     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2681 
2682   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2683 				     diff, CHREC_RIGHT (*chrec_a));
2684   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2685   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2686 				     build_int_cst (type, 0),
2687 				     right_b);
2688   return true;
2689 }
2690 
2691 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2692    *OVERLAPS_B are initialized to the functions that describe the
2693    relation between the elements accessed twice by CHREC_A and
2694    CHREC_B.  For k >= 0, the following property is verified:
2695 
2696    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2697 
2698 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)2699 analyze_siv_subscript (tree chrec_a,
2700 		       tree chrec_b,
2701 		       conflict_function **overlaps_a,
2702 		       conflict_function **overlaps_b,
2703 		       tree *last_conflicts,
2704 		       int loop_nest_num)
2705 {
2706   dependence_stats.num_siv++;
2707 
2708   if (dump_file && (dump_flags & TDF_DETAILS))
2709     fprintf (dump_file, "(analyze_siv_subscript \n");
2710 
2711   if (evolution_function_is_constant_p (chrec_a)
2712       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2713     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2714 				      overlaps_a, overlaps_b, last_conflicts);
2715 
2716   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2717 	   && evolution_function_is_constant_p (chrec_b))
2718     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2719 				      overlaps_b, overlaps_a, last_conflicts);
2720 
2721   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2722 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2723     {
2724       if (!chrec_contains_symbols (chrec_a)
2725 	  && !chrec_contains_symbols (chrec_b))
2726 	{
2727 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
2728 					   overlaps_a, overlaps_b,
2729 					   last_conflicts);
2730 
2731 	  if (CF_NOT_KNOWN_P (*overlaps_a)
2732 	      || CF_NOT_KNOWN_P (*overlaps_b))
2733 	    dependence_stats.num_siv_unimplemented++;
2734 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2735 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
2736 	    dependence_stats.num_siv_independent++;
2737 	  else
2738 	    dependence_stats.num_siv_dependent++;
2739 	}
2740       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2741 							&chrec_b))
2742 	{
2743 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
2744 					   overlaps_a, overlaps_b,
2745 					   last_conflicts);
2746 
2747 	  if (CF_NOT_KNOWN_P (*overlaps_a)
2748 	      || CF_NOT_KNOWN_P (*overlaps_b))
2749 	    dependence_stats.num_siv_unimplemented++;
2750 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2751 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
2752 	    dependence_stats.num_siv_independent++;
2753 	  else
2754 	    dependence_stats.num_siv_dependent++;
2755 	}
2756       else
2757 	goto siv_subscript_dontknow;
2758     }
2759 
2760   else
2761     {
2762     siv_subscript_dontknow:;
2763       if (dump_file && (dump_flags & TDF_DETAILS))
2764 	fprintf (dump_file, "  siv test failed: unimplemented");
2765       *overlaps_a = conflict_fn_not_known ();
2766       *overlaps_b = conflict_fn_not_known ();
2767       *last_conflicts = chrec_dont_know;
2768       dependence_stats.num_siv_unimplemented++;
2769     }
2770 
2771   if (dump_file && (dump_flags & TDF_DETAILS))
2772     fprintf (dump_file, ")\n");
2773 }
2774 
2775 /* Returns false if we can prove that the greatest common divisor of the steps
2776    of CHREC does not divide CST, false otherwise.  */
2777 
2778 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)2779 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2780 {
2781   HOST_WIDE_INT cd = 0, val;
2782   tree step;
2783 
2784   if (!host_integerp (cst, 0))
2785     return true;
2786   val = tree_low_cst (cst, 0);
2787 
2788   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2789     {
2790       step = CHREC_RIGHT (chrec);
2791       if (!host_integerp (step, 0))
2792 	return true;
2793       cd = gcd (cd, tree_low_cst (step, 0));
2794       chrec = CHREC_LEFT (chrec);
2795     }
2796 
2797   return val % cd == 0;
2798 }
2799 
2800 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2801    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2802    functions that describe the relation between the elements accessed
2803    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2804    is verified:
2805 
2806    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2807 
2808 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,struct loop * loop_nest)2809 analyze_miv_subscript (tree chrec_a,
2810 		       tree chrec_b,
2811 		       conflict_function **overlaps_a,
2812 		       conflict_function **overlaps_b,
2813 		       tree *last_conflicts,
2814 		       struct loop *loop_nest)
2815 {
2816   tree type, difference;
2817 
2818   dependence_stats.num_miv++;
2819   if (dump_file && (dump_flags & TDF_DETAILS))
2820     fprintf (dump_file, "(analyze_miv_subscript \n");
2821 
2822   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2823   chrec_a = chrec_convert (type, chrec_a, NULL);
2824   chrec_b = chrec_convert (type, chrec_b, NULL);
2825   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2826 
2827   if (eq_evolutions_p (chrec_a, chrec_b))
2828     {
2829       /* Access functions are the same: all the elements are accessed
2830 	 in the same order.  */
2831       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2832       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2833       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2834       dependence_stats.num_miv_dependent++;
2835     }
2836 
2837   else if (evolution_function_is_constant_p (difference)
2838 	   /* For the moment, the following is verified:
2839 	      evolution_function_is_affine_multivariate_p (chrec_a,
2840 	      loop_nest->num) */
2841 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
2842     {
2843       /* testsuite/.../ssa-chrec-33.c
2844 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2845 
2846 	 The difference is 1, and all the evolution steps are multiples
2847 	 of 2, consequently there are no overlapping elements.  */
2848       *overlaps_a = conflict_fn_no_dependence ();
2849       *overlaps_b = conflict_fn_no_dependence ();
2850       *last_conflicts = integer_zero_node;
2851       dependence_stats.num_miv_independent++;
2852     }
2853 
2854   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2855 	   && !chrec_contains_symbols (chrec_a)
2856 	   && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2857 	   && !chrec_contains_symbols (chrec_b))
2858     {
2859       /* testsuite/.../ssa-chrec-35.c
2860 	 {0, +, 1}_2  vs.  {0, +, 1}_3
2861 	 the overlapping elements are respectively located at iterations:
2862 	 {0, +, 1}_x and {0, +, 1}_x,
2863 	 in other words, we have the equality:
2864 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2865 
2866 	 Other examples:
2867 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2868 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2869 
2870 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2871 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2872       */
2873       analyze_subscript_affine_affine (chrec_a, chrec_b,
2874 				       overlaps_a, overlaps_b, last_conflicts);
2875 
2876       if (CF_NOT_KNOWN_P (*overlaps_a)
2877  	  || CF_NOT_KNOWN_P (*overlaps_b))
2878 	dependence_stats.num_miv_unimplemented++;
2879       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2880 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
2881 	dependence_stats.num_miv_independent++;
2882       else
2883 	dependence_stats.num_miv_dependent++;
2884     }
2885 
2886   else
2887     {
2888       /* When the analysis is too difficult, answer "don't know".  */
2889       if (dump_file && (dump_flags & TDF_DETAILS))
2890 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2891 
2892       *overlaps_a = conflict_fn_not_known ();
2893       *overlaps_b = conflict_fn_not_known ();
2894       *last_conflicts = chrec_dont_know;
2895       dependence_stats.num_miv_unimplemented++;
2896     }
2897 
2898   if (dump_file && (dump_flags & TDF_DETAILS))
2899     fprintf (dump_file, ")\n");
2900 }
2901 
2902 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2903    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2904    OVERLAP_ITERATIONS_B are initialized with two functions that
2905    describe the iterations that contain conflicting elements.
2906 
2907    Remark: For an integer k >= 0, the following equality is true:
2908 
2909    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2910 */
2911 
2912 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,struct loop * loop_nest)2913 analyze_overlapping_iterations (tree chrec_a,
2914 				tree chrec_b,
2915 				conflict_function **overlap_iterations_a,
2916 				conflict_function **overlap_iterations_b,
2917 				tree *last_conflicts, struct loop *loop_nest)
2918 {
2919   unsigned int lnn = loop_nest->num;
2920 
2921   dependence_stats.num_subscript_tests++;
2922 
2923   if (dump_file && (dump_flags & TDF_DETAILS))
2924     {
2925       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2926       fprintf (dump_file, "  (chrec_a = ");
2927       print_generic_expr (dump_file, chrec_a, 0);
2928       fprintf (dump_file, ")\n  (chrec_b = ");
2929       print_generic_expr (dump_file, chrec_b, 0);
2930       fprintf (dump_file, ")\n");
2931     }
2932 
2933   if (chrec_a == NULL_TREE
2934       || chrec_b == NULL_TREE
2935       || chrec_contains_undetermined (chrec_a)
2936       || chrec_contains_undetermined (chrec_b))
2937     {
2938       dependence_stats.num_subscript_undetermined++;
2939 
2940       *overlap_iterations_a = conflict_fn_not_known ();
2941       *overlap_iterations_b = conflict_fn_not_known ();
2942     }
2943 
2944   /* If they are the same chrec, and are affine, they overlap
2945      on every iteration.  */
2946   else if (eq_evolutions_p (chrec_a, chrec_b)
2947 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2948 	       || operand_equal_p (chrec_a, chrec_b, 0)))
2949     {
2950       dependence_stats.num_same_subscript_function++;
2951       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2952       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2953       *last_conflicts = chrec_dont_know;
2954     }
2955 
2956   /* If they aren't the same, and aren't affine, we can't do anything
2957      yet.  */
2958   else if ((chrec_contains_symbols (chrec_a)
2959 	    || chrec_contains_symbols (chrec_b))
2960 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2961 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2962     {
2963       dependence_stats.num_subscript_undetermined++;
2964       *overlap_iterations_a = conflict_fn_not_known ();
2965       *overlap_iterations_b = conflict_fn_not_known ();
2966     }
2967 
2968   else if (ziv_subscript_p (chrec_a, chrec_b))
2969     analyze_ziv_subscript (chrec_a, chrec_b,
2970 			   overlap_iterations_a, overlap_iterations_b,
2971 			   last_conflicts);
2972 
2973   else if (siv_subscript_p (chrec_a, chrec_b))
2974     analyze_siv_subscript (chrec_a, chrec_b,
2975 			   overlap_iterations_a, overlap_iterations_b,
2976 			   last_conflicts, lnn);
2977 
2978   else
2979     analyze_miv_subscript (chrec_a, chrec_b,
2980 			   overlap_iterations_a, overlap_iterations_b,
2981 			   last_conflicts, loop_nest);
2982 
2983   if (dump_file && (dump_flags & TDF_DETAILS))
2984     {
2985       fprintf (dump_file, "  (overlap_iterations_a = ");
2986       dump_conflict_function (dump_file, *overlap_iterations_a);
2987       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
2988       dump_conflict_function (dump_file, *overlap_iterations_b);
2989       fprintf (dump_file, "))\n");
2990     }
2991 }
2992 
2993 /* Helper function for uniquely inserting distance vectors.  */
2994 
2995 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)2996 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2997 {
2998   unsigned i;
2999   lambda_vector v;
3000 
3001   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3002     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3003       return;
3004 
3005   DDR_DIST_VECTS (ddr).safe_push (dist_v);
3006 }
3007 
3008 /* Helper function for uniquely inserting direction vectors.  */
3009 
3010 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)3011 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3012 {
3013   unsigned i;
3014   lambda_vector v;
3015 
3016   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3017     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3018       return;
3019 
3020   DDR_DIR_VECTS (ddr).safe_push (dir_v);
3021 }
3022 
3023 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3024    haven't yet determined a distance for this outer loop, push a new
3025    distance vector composed of the previous distance, and a distance
3026    of 1 for this outer loop.  Example:
3027 
3028    | loop_1
3029    |   loop_2
3030    |     A[10]
3031    |   endloop_2
3032    | endloop_1
3033 
3034    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3035    save (0, 1), then we have to save (1, 0).  */
3036 
3037 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)3038 add_outer_distances (struct data_dependence_relation *ddr,
3039 		     lambda_vector dist_v, int index)
3040 {
3041   /* For each outer loop where init_v is not set, the accesses are
3042      in dependence of distance 1 in the loop.  */
3043   while (--index >= 0)
3044     {
3045       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3046       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3047       save_v[index] = 1;
3048       save_dist_v (ddr, save_v);
3049     }
3050 }
3051 
3052 /* Return false when fail to represent the data dependence as a
3053    distance vector.  INIT_B is set to true when a component has been
3054    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3055    the index in DIST_V that carries the dependence.  */
3056 
3057 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,struct data_reference * ddr_a,struct data_reference * ddr_b,lambda_vector dist_v,bool * init_b,int * index_carry)3058 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3059 			     struct data_reference *ddr_a,
3060 			     struct data_reference *ddr_b,
3061 			     lambda_vector dist_v, bool *init_b,
3062 			     int *index_carry)
3063 {
3064   unsigned i;
3065   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3066 
3067   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3068     {
3069       tree access_fn_a, access_fn_b;
3070       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3071 
3072       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3073 	{
3074 	  non_affine_dependence_relation (ddr);
3075 	  return false;
3076 	}
3077 
3078       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3079       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3080 
3081       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3082 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3083 	{
3084 	  int dist, index;
3085 	  int var_a = CHREC_VARIABLE (access_fn_a);
3086 	  int var_b = CHREC_VARIABLE (access_fn_b);
3087 
3088 	  if (var_a != var_b
3089 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3090 	    {
3091 	      non_affine_dependence_relation (ddr);
3092 	      return false;
3093 	    }
3094 
3095 	  dist = int_cst_value (SUB_DISTANCE (subscript));
3096 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3097 	  *index_carry = MIN (index, *index_carry);
3098 
3099 	  /* This is the subscript coupling test.  If we have already
3100 	     recorded a distance for this loop (a distance coming from
3101 	     another subscript), it should be the same.  For example,
3102 	     in the following code, there is no dependence:
3103 
3104 	     | loop i = 0, N, 1
3105 	     |   T[i+1][i] = ...
3106 	     |   ... = T[i][i]
3107 	     | endloop
3108 	  */
3109 	  if (init_v[index] != 0 && dist_v[index] != dist)
3110 	    {
3111 	      finalize_ddr_dependent (ddr, chrec_known);
3112 	      return false;
3113 	    }
3114 
3115 	  dist_v[index] = dist;
3116 	  init_v[index] = 1;
3117 	  *init_b = true;
3118 	}
3119       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3120 	{
3121 	  /* This can be for example an affine vs. constant dependence
3122 	     (T[i] vs. T[3]) that is not an affine dependence and is
3123 	     not representable as a distance vector.  */
3124 	  non_affine_dependence_relation (ddr);
3125 	  return false;
3126 	}
3127     }
3128 
3129   return true;
3130 }
3131 
3132 /* Return true when the DDR contains only constant access functions.  */
3133 
3134 static bool
constant_access_functions(const struct data_dependence_relation * ddr)3135 constant_access_functions (const struct data_dependence_relation *ddr)
3136 {
3137   unsigned i;
3138 
3139   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3140     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3141 	|| !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3142       return false;
3143 
3144   return true;
3145 }
3146 
3147 /* Helper function for the case where DDR_A and DDR_B are the same
3148    multivariate access function with a constant step.  For an example
3149    see pr34635-1.c.  */
3150 
3151 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)3152 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3153 {
3154   int x_1, x_2;
3155   tree c_1 = CHREC_LEFT (c_2);
3156   tree c_0 = CHREC_LEFT (c_1);
3157   lambda_vector dist_v;
3158   int v1, v2, cd;
3159 
3160   /* Polynomials with more than 2 variables are not handled yet.  When
3161      the evolution steps are parameters, it is not possible to
3162      represent the dependence using classical distance vectors.  */
3163   if (TREE_CODE (c_0) != INTEGER_CST
3164       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3165       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3166     {
3167       DDR_AFFINE_P (ddr) = false;
3168       return;
3169     }
3170 
3171   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3172   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3173 
3174   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3175   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3176   v1 = int_cst_value (CHREC_RIGHT (c_1));
3177   v2 = int_cst_value (CHREC_RIGHT (c_2));
3178   cd = gcd (v1, v2);
3179   v1 /= cd;
3180   v2 /= cd;
3181 
3182   if (v2 < 0)
3183     {
3184       v2 = -v2;
3185       v1 = -v1;
3186     }
3187 
3188   dist_v[x_1] = v2;
3189   dist_v[x_2] = -v1;
3190   save_dist_v (ddr, dist_v);
3191 
3192   add_outer_distances (ddr, dist_v, x_1);
3193 }
3194 
3195 /* Helper function for the case where DDR_A and DDR_B are the same
3196    access functions.  */
3197 
3198 static void
add_other_self_distances(struct data_dependence_relation * ddr)3199 add_other_self_distances (struct data_dependence_relation *ddr)
3200 {
3201   lambda_vector dist_v;
3202   unsigned i;
3203   int index_carry = DDR_NB_LOOPS (ddr);
3204 
3205   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3206     {
3207       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3208 
3209       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3210 	{
3211 	  if (!evolution_function_is_univariate_p (access_fun))
3212 	    {
3213 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3214 		{
3215 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3216 		  return;
3217 		}
3218 
3219 	      access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3220 
3221 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3222 		add_multivariate_self_dist (ddr, access_fun);
3223 	      else
3224 		/* The evolution step is not constant: it varies in
3225 		   the outer loop, so this cannot be represented by a
3226 		   distance vector.  For example in pr34635.c the
3227 		   evolution is {0, +, {0, +, 4}_1}_2.  */
3228 		DDR_AFFINE_P (ddr) = false;
3229 
3230 	      return;
3231 	    }
3232 
3233 	  index_carry = MIN (index_carry,
3234 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
3235 						 DDR_LOOP_NEST (ddr)));
3236 	}
3237     }
3238 
3239   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3240   add_outer_distances (ddr, dist_v, index_carry);
3241 }
3242 
3243 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)3244 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3245 {
3246   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3247 
3248   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3249   save_dist_v (ddr, dist_v);
3250 }
3251 
3252 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3253    is the case for example when access functions are the same and
3254    equal to a constant, as in:
3255 
3256    | loop_1
3257    |   A[3] = ...
3258    |   ... = A[3]
3259    | endloop_1
3260 
3261    in which case the distance vectors are (0) and (1).  */
3262 
3263 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)3264 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3265 {
3266   unsigned i, j;
3267 
3268   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3269     {
3270       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3271       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3272       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3273 
3274       for (j = 0; j < ca->n; j++)
3275 	if (affine_function_zero_p (ca->fns[j]))
3276 	  {
3277 	    insert_innermost_unit_dist_vector (ddr);
3278 	    return;
3279 	  }
3280 
3281       for (j = 0; j < cb->n; j++)
3282 	if (affine_function_zero_p (cb->fns[j]))
3283 	  {
3284 	    insert_innermost_unit_dist_vector (ddr);
3285 	    return;
3286 	  }
3287     }
3288 }
3289 
3290 /* Compute the classic per loop distance vector.  DDR is the data
3291    dependence relation to build a vector from.  Return false when fail
3292    to represent the data dependence as a distance vector.  */
3293 
3294 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,struct loop * loop_nest)3295 build_classic_dist_vector (struct data_dependence_relation *ddr,
3296 			   struct loop *loop_nest)
3297 {
3298   bool init_b = false;
3299   int index_carry = DDR_NB_LOOPS (ddr);
3300   lambda_vector dist_v;
3301 
3302   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3303     return false;
3304 
3305   if (same_access_functions (ddr))
3306     {
3307       /* Save the 0 vector.  */
3308       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3309       save_dist_v (ddr, dist_v);
3310 
3311       if (constant_access_functions (ddr))
3312 	add_distance_for_zero_overlaps (ddr);
3313 
3314       if (DDR_NB_LOOPS (ddr) > 1)
3315 	add_other_self_distances (ddr);
3316 
3317       return true;
3318     }
3319 
3320   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3321   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3322 				    dist_v, &init_b, &index_carry))
3323     return false;
3324 
3325   /* Save the distance vector if we initialized one.  */
3326   if (init_b)
3327     {
3328       /* Verify a basic constraint: classic distance vectors should
3329 	 always be lexicographically positive.
3330 
3331 	 Data references are collected in the order of execution of
3332 	 the program, thus for the following loop
3333 
3334 	 | for (i = 1; i < 100; i++)
3335 	 |   for (j = 1; j < 100; j++)
3336 	 |     {
3337 	 |       t = T[j+1][i-1];  // A
3338 	 |       T[j][i] = t + 2;  // B
3339 	 |     }
3340 
3341 	 references are collected following the direction of the wind:
3342 	 A then B.  The data dependence tests are performed also
3343 	 following this order, such that we're looking at the distance
3344 	 separating the elements accessed by A from the elements later
3345 	 accessed by B.  But in this example, the distance returned by
3346 	 test_dep (A, B) is lexicographically negative (-1, 1), that
3347 	 means that the access A occurs later than B with respect to
3348 	 the outer loop, ie. we're actually looking upwind.  In this
3349 	 case we solve test_dep (B, A) looking downwind to the
3350 	 lexicographically positive solution, that returns the
3351 	 distance vector (1, -1).  */
3352       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3353 	{
3354 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3355 	  if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3356 					      loop_nest))
3357 	    return false;
3358 	  compute_subscript_distance (ddr);
3359 	  if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3360 					    save_v, &init_b, &index_carry))
3361 	    return false;
3362 	  save_dist_v (ddr, save_v);
3363 	  DDR_REVERSED_P (ddr) = true;
3364 
3365 	  /* In this case there is a dependence forward for all the
3366 	     outer loops:
3367 
3368 	     | for (k = 1; k < 100; k++)
3369 	     |  for (i = 1; i < 100; i++)
3370 	     |   for (j = 1; j < 100; j++)
3371 	     |     {
3372 	     |       t = T[j+1][i-1];  // A
3373 	     |       T[j][i] = t + 2;  // B
3374 	     |     }
3375 
3376 	     the vectors are:
3377 	     (0,  1, -1)
3378 	     (1,  1, -1)
3379 	     (1, -1,  1)
3380 	  */
3381 	  if (DDR_NB_LOOPS (ddr) > 1)
3382 	    {
3383  	      add_outer_distances (ddr, save_v, index_carry);
3384 	      add_outer_distances (ddr, dist_v, index_carry);
3385 	    }
3386 	}
3387       else
3388 	{
3389 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3390 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3391 
3392 	  if (DDR_NB_LOOPS (ddr) > 1)
3393 	    {
3394 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3395 
3396 	      if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3397 						  DDR_A (ddr), loop_nest))
3398 		return false;
3399 	      compute_subscript_distance (ddr);
3400 	      if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3401 						opposite_v, &init_b,
3402 						&index_carry))
3403 		return false;
3404 
3405 	      save_dist_v (ddr, save_v);
3406 	      add_outer_distances (ddr, dist_v, index_carry);
3407 	      add_outer_distances (ddr, opposite_v, index_carry);
3408 	    }
3409 	  else
3410 	    save_dist_v (ddr, save_v);
3411 	}
3412     }
3413   else
3414     {
3415       /* There is a distance of 1 on all the outer loops: Example:
3416 	 there is a dependence of distance 1 on loop_1 for the array A.
3417 
3418 	 | loop_1
3419 	 |   A[5] = ...
3420 	 | endloop
3421       */
3422       add_outer_distances (ddr, dist_v,
3423 			   lambda_vector_first_nz (dist_v,
3424 						   DDR_NB_LOOPS (ddr), 0));
3425     }
3426 
3427   if (dump_file && (dump_flags & TDF_DETAILS))
3428     {
3429       unsigned i;
3430 
3431       fprintf (dump_file, "(build_classic_dist_vector\n");
3432       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3433 	{
3434 	  fprintf (dump_file, "  dist_vector = (");
3435 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3436 			       DDR_NB_LOOPS (ddr));
3437 	  fprintf (dump_file, "  )\n");
3438 	}
3439       fprintf (dump_file, ")\n");
3440     }
3441 
3442   return true;
3443 }
3444 
3445 /* Return the direction for a given distance.
3446    FIXME: Computing dir this way is suboptimal, since dir can catch
3447    cases that dist is unable to represent.  */
3448 
3449 static inline enum data_dependence_direction
dir_from_dist(int dist)3450 dir_from_dist (int dist)
3451 {
3452   if (dist > 0)
3453     return dir_positive;
3454   else if (dist < 0)
3455     return dir_negative;
3456   else
3457     return dir_equal;
3458 }
3459 
3460 /* Compute the classic per loop direction vector.  DDR is the data
3461    dependence relation to build a vector from.  */
3462 
3463 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)3464 build_classic_dir_vector (struct data_dependence_relation *ddr)
3465 {
3466   unsigned i, j;
3467   lambda_vector dist_v;
3468 
3469   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3470     {
3471       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3472 
3473       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3474 	dir_v[j] = dir_from_dist (dist_v[j]);
3475 
3476       save_dir_v (ddr, dir_v);
3477     }
3478 }
3479 
3480 /* Helper function.  Returns true when there is a dependence between
3481    data references DRA and DRB.  */
3482 
3483 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,struct data_reference * dra,struct data_reference * drb,struct loop * loop_nest)3484 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3485 			       struct data_reference *dra,
3486 			       struct data_reference *drb,
3487 			       struct loop *loop_nest)
3488 {
3489   unsigned int i;
3490   tree last_conflicts;
3491   struct subscript *subscript;
3492   tree res = NULL_TREE;
3493 
3494   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3495     {
3496       conflict_function *overlaps_a, *overlaps_b;
3497 
3498       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3499 				      DR_ACCESS_FN (drb, i),
3500 				      &overlaps_a, &overlaps_b,
3501 				      &last_conflicts, loop_nest);
3502 
3503       if (SUB_CONFLICTS_IN_A (subscript))
3504 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3505       if (SUB_CONFLICTS_IN_B (subscript))
3506 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3507 
3508       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3509       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3510       SUB_LAST_CONFLICT (subscript) = last_conflicts;
3511 
3512       /* If there is any undetermined conflict function we have to
3513          give a conservative answer in case we cannot prove that
3514 	 no dependence exists when analyzing another subscript.  */
3515       if (CF_NOT_KNOWN_P (overlaps_a)
3516  	  || CF_NOT_KNOWN_P (overlaps_b))
3517  	{
3518 	  res = chrec_dont_know;
3519 	  continue;
3520  	}
3521 
3522       /* When there is a subscript with no dependence we can stop.  */
3523       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3524  	       || CF_NO_DEPENDENCE_P (overlaps_b))
3525  	{
3526 	  res = chrec_known;
3527 	  break;
3528  	}
3529     }
3530 
3531   if (res == NULL_TREE)
3532     return true;
3533 
3534   if (res == chrec_known)
3535     dependence_stats.num_dependence_independent++;
3536   else
3537     dependence_stats.num_dependence_undetermined++;
3538   finalize_ddr_dependent (ddr, res);
3539   return false;
3540 }
3541 
3542 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3543 
3544 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,struct loop * loop_nest)3545 subscript_dependence_tester (struct data_dependence_relation *ddr,
3546 			     struct loop *loop_nest)
3547 {
3548   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3549     dependence_stats.num_dependence_dependent++;
3550 
3551   compute_subscript_distance (ddr);
3552   if (build_classic_dist_vector (ddr, loop_nest))
3553     build_classic_dir_vector (ddr);
3554 }
3555 
3556 /* Returns true when all the access functions of A are affine or
3557    constant with respect to LOOP_NEST.  */
3558 
3559 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const struct loop * loop_nest)3560 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3561 					   const struct loop *loop_nest)
3562 {
3563   unsigned int i;
3564   vec<tree> fns = DR_ACCESS_FNS (a);
3565   tree t;
3566 
3567   FOR_EACH_VEC_ELT (fns, i, t)
3568     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3569 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3570       return false;
3571 
3572   return true;
3573 }
3574 
3575 /* Initializes an equation for an OMEGA problem using the information
3576    contained in the ACCESS_FUN.  Returns true when the operation
3577    succeeded.
3578 
3579    PB is the omega constraint system.
3580    EQ is the number of the equation to be initialized.
3581    OFFSET is used for shifting the variables names in the constraints:
3582    a constrain is composed of 2 * the number of variables surrounding
3583    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3584    then it is set to n.
3585    ACCESS_FUN is expected to be an affine chrec.  */
3586 
3587 static bool
init_omega_eq_with_af(omega_pb pb,unsigned eq,unsigned int offset,tree access_fun,struct data_dependence_relation * ddr)3588 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3589 		       unsigned int offset, tree access_fun,
3590 		       struct data_dependence_relation *ddr)
3591 {
3592   switch (TREE_CODE (access_fun))
3593     {
3594     case POLYNOMIAL_CHREC:
3595       {
3596 	tree left = CHREC_LEFT (access_fun);
3597 	tree right = CHREC_RIGHT (access_fun);
3598 	int var = CHREC_VARIABLE (access_fun);
3599 	unsigned var_idx;
3600 
3601 	if (TREE_CODE (right) != INTEGER_CST)
3602 	  return false;
3603 
3604 	var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3605 	pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3606 
3607 	/* Compute the innermost loop index.  */
3608 	DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3609 
3610 	if (offset == 0)
3611 	  pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3612 	    += int_cst_value (right);
3613 
3614 	switch (TREE_CODE (left))
3615 	  {
3616 	  case POLYNOMIAL_CHREC:
3617 	    return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3618 
3619 	  case INTEGER_CST:
3620 	    pb->eqs[eq].coef[0] += int_cst_value (left);
3621 	    return true;
3622 
3623 	  default:
3624 	    return false;
3625 	  }
3626       }
3627 
3628     case INTEGER_CST:
3629       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3630       return true;
3631 
3632     default:
3633       return false;
3634     }
3635 }
3636 
3637 /* As explained in the comments preceding init_omega_for_ddr, we have
3638    to set up a system for each loop level, setting outer loops
3639    variation to zero, and current loop variation to positive or zero.
3640    Save each lexico positive distance vector.  */
3641 
3642 static void
omega_extract_distance_vectors(omega_pb pb,struct data_dependence_relation * ddr)3643 omega_extract_distance_vectors (omega_pb pb,
3644 				struct data_dependence_relation *ddr)
3645 {
3646   int eq, geq;
3647   unsigned i, j;
3648   struct loop *loopi, *loopj;
3649   enum omega_result res;
3650 
3651   /* Set a new problem for each loop in the nest.  The basis is the
3652      problem that we have initialized until now.  On top of this we
3653      add new constraints.  */
3654   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3655               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3656     {
3657       int dist = 0;
3658       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3659 					   DDR_NB_LOOPS (ddr));
3660 
3661       omega_copy_problem (copy, pb);
3662 
3663       /* For all the outer loops "loop_j", add "dj = 0".  */
3664       for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3665 	{
3666 	  eq = omega_add_zero_eq (copy, omega_black);
3667 	  copy->eqs[eq].coef[j + 1] = 1;
3668 	}
3669 
3670       /* For "loop_i", add "0 <= di".  */
3671       geq = omega_add_zero_geq (copy, omega_black);
3672       copy->geqs[geq].coef[i + 1] = 1;
3673 
3674       /* Reduce the constraint system, and test that the current
3675 	 problem is feasible.  */
3676       res = omega_simplify_problem (copy);
3677       if (res == omega_false
3678 	  || res == omega_unknown
3679 	  || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3680 	goto next_problem;
3681 
3682       for (eq = 0; eq < copy->num_subs; eq++)
3683 	if (copy->subs[eq].key == (int) i + 1)
3684 	  {
3685 	    dist = copy->subs[eq].coef[0];
3686 	    goto found_dist;
3687 	  }
3688 
3689       if (dist == 0)
3690 	{
3691 	  /* Reinitialize problem...  */
3692 	  omega_copy_problem (copy, pb);
3693 	  for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3694 	    {
3695 	      eq = omega_add_zero_eq (copy, omega_black);
3696 	      copy->eqs[eq].coef[j + 1] = 1;
3697 	    }
3698 
3699 	  /* ..., but this time "di = 1".  */
3700 	  eq = omega_add_zero_eq (copy, omega_black);
3701 	  copy->eqs[eq].coef[i + 1] = 1;
3702 	  copy->eqs[eq].coef[0] = -1;
3703 
3704 	  res = omega_simplify_problem (copy);
3705 	  if (res == omega_false
3706 	      || res == omega_unknown
3707 	      || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3708 	    goto next_problem;
3709 
3710 	  for (eq = 0; eq < copy->num_subs; eq++)
3711 	    if (copy->subs[eq].key == (int) i + 1)
3712 	      {
3713 		dist = copy->subs[eq].coef[0];
3714 		goto found_dist;
3715 	      }
3716 	}
3717 
3718     found_dist:;
3719       /* Save the lexicographically positive distance vector.  */
3720       if (dist >= 0)
3721 	{
3722 	  lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3723 	  lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3724 
3725 	  dist_v[i] = dist;
3726 
3727 	  for (eq = 0; eq < copy->num_subs; eq++)
3728 	    if (copy->subs[eq].key > 0)
3729 	      {
3730 		dist = copy->subs[eq].coef[0];
3731 		dist_v[copy->subs[eq].key - 1] = dist;
3732 	      }
3733 
3734 	  for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3735 	    dir_v[j] = dir_from_dist (dist_v[j]);
3736 
3737 	  save_dist_v (ddr, dist_v);
3738 	  save_dir_v (ddr, dir_v);
3739 	}
3740 
3741     next_problem:;
3742       omega_free_problem (copy);
3743     }
3744 }
3745 
3746 /* This is called for each subscript of a tuple of data references:
3747    insert an equality for representing the conflicts.  */
3748 
3749 static bool
omega_setup_subscript(tree access_fun_a,tree access_fun_b,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3750 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3751 		       struct data_dependence_relation *ddr,
3752 		       omega_pb pb, bool *maybe_dependent)
3753 {
3754   int eq;
3755   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3756 				     TREE_TYPE (access_fun_b));
3757   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3758   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3759   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3760   tree minus_one;
3761 
3762   /* When the fun_a - fun_b is not constant, the dependence is not
3763      captured by the classic distance vector representation.  */
3764   if (TREE_CODE (difference) != INTEGER_CST)
3765     return false;
3766 
3767   /* ZIV test.  */
3768   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3769     {
3770       /* There is no dependence.  */
3771       *maybe_dependent = false;
3772       return true;
3773     }
3774 
3775   minus_one = build_int_cst (type, -1);
3776   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3777 
3778   eq = omega_add_zero_eq (pb, omega_black);
3779   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3780       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3781     /* There is probably a dependence, but the system of
3782        constraints cannot be built: answer "don't know".  */
3783     return false;
3784 
3785   /* GCD test.  */
3786   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3787       && !int_divides_p (lambda_vector_gcd
3788 			 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3789 			  2 * DDR_NB_LOOPS (ddr)),
3790 			 pb->eqs[eq].coef[0]))
3791     {
3792       /* There is no dependence.  */
3793       *maybe_dependent = false;
3794       return true;
3795     }
3796 
3797   return true;
3798 }
3799 
3800 /* Helper function, same as init_omega_for_ddr but specialized for
3801    data references A and B.  */
3802 
3803 static bool
init_omega_for_ddr_1(struct data_reference * dra,struct data_reference * drb,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3804 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3805 		      struct data_dependence_relation *ddr,
3806 		      omega_pb pb, bool *maybe_dependent)
3807 {
3808   unsigned i;
3809   int ineq;
3810   struct loop *loopi;
3811   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3812 
3813   /* Insert an equality per subscript.  */
3814   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3815     {
3816       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3817 				  ddr, pb, maybe_dependent))
3818 	return false;
3819       else if (*maybe_dependent == false)
3820 	{
3821 	  /* There is no dependence.  */
3822 	  DDR_ARE_DEPENDENT (ddr) = chrec_known;
3823 	  return true;
3824 	}
3825     }
3826 
3827   /* Insert inequalities: constraints corresponding to the iteration
3828      domain, i.e. the loops surrounding the references "loop_x" and
3829      the distance variables "dx".  The layout of the OMEGA
3830      representation is as follows:
3831      - coef[0] is the constant
3832      - coef[1..nb_loops] are the protected variables that will not be
3833      removed by the solver: the "dx"
3834      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3835   */
3836   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3837 	      && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3838     {
3839       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3840 
3841       /* 0 <= loop_x */
3842       ineq = omega_add_zero_geq (pb, omega_black);
3843       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3844 
3845       /* 0 <= loop_x + dx */
3846       ineq = omega_add_zero_geq (pb, omega_black);
3847       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3848       pb->geqs[ineq].coef[i + 1] = 1;
3849 
3850       if (nbi != -1)
3851 	{
3852 	  /* loop_x <= nb_iters */
3853 	  ineq = omega_add_zero_geq (pb, omega_black);
3854 	  pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3855 	  pb->geqs[ineq].coef[0] = nbi;
3856 
3857 	  /* loop_x + dx <= nb_iters */
3858 	  ineq = omega_add_zero_geq (pb, omega_black);
3859 	  pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3860 	  pb->geqs[ineq].coef[i + 1] = -1;
3861 	  pb->geqs[ineq].coef[0] = nbi;
3862 
3863 	  /* A step "dx" bigger than nb_iters is not feasible, so
3864 	     add "0 <= nb_iters + dx",  */
3865 	  ineq = omega_add_zero_geq (pb, omega_black);
3866 	  pb->geqs[ineq].coef[i + 1] = 1;
3867 	  pb->geqs[ineq].coef[0] = nbi;
3868 	  /* and "dx <= nb_iters".  */
3869 	  ineq = omega_add_zero_geq (pb, omega_black);
3870 	  pb->geqs[ineq].coef[i + 1] = -1;
3871 	  pb->geqs[ineq].coef[0] = nbi;
3872 	}
3873     }
3874 
3875   omega_extract_distance_vectors (pb, ddr);
3876 
3877   return true;
3878 }
3879 
3880 /* Sets up the Omega dependence problem for the data dependence
3881    relation DDR.  Returns false when the constraint system cannot be
3882    built, ie. when the test answers "don't know".  Returns true
3883    otherwise, and when independence has been proved (using one of the
3884    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3885    set MAYBE_DEPENDENT to true.
3886 
3887    Example: for setting up the dependence system corresponding to the
3888    conflicting accesses
3889 
3890    | loop_i
3891    |   loop_j
3892    |     A[i, i+1] = ...
3893    |     ... A[2*j, 2*(i + j)]
3894    |   endloop_j
3895    | endloop_i
3896 
3897    the following constraints come from the iteration domain:
3898 
3899    0 <= i <= Ni
3900    0 <= i + di <= Ni
3901    0 <= j <= Nj
3902    0 <= j + dj <= Nj
3903 
3904    where di, dj are the distance variables.  The constraints
3905    representing the conflicting elements are:
3906 
3907    i = 2 * (j + dj)
3908    i + 1 = 2 * (i + di + j + dj)
3909 
3910    For asking that the resulting distance vector (di, dj) be
3911    lexicographically positive, we insert the constraint "di >= 0".  If
3912    "di = 0" in the solution, we fix that component to zero, and we
3913    look at the inner loops: we set a new problem where all the outer
3914    loop distances are zero, and fix this inner component to be
3915    positive.  When one of the components is positive, we save that
3916    distance, and set a new problem where the distance on this loop is
3917    zero, searching for other distances in the inner loops.  Here is
3918    the classic example that illustrates that we have to set for each
3919    inner loop a new problem:
3920 
3921    | loop_1
3922    |   loop_2
3923    |     A[10]
3924    |   endloop_2
3925    | endloop_1
3926 
3927    we have to save two distances (1, 0) and (0, 1).
3928 
3929    Given two array references, refA and refB, we have to set the
3930    dependence problem twice, refA vs. refB and refB vs. refA, and we
3931    cannot do a single test, as refB might occur before refA in the
3932    inner loops, and the contrary when considering outer loops: ex.
3933 
3934    | loop_0
3935    |   loop_1
3936    |     loop_2
3937    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3938    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3939    |     endloop_2
3940    |   endloop_1
3941    | endloop_0
3942 
3943    refB touches the elements in T before refA, and thus for the same
3944    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3945    but for successive loop_0 iterations, we have (1, -1, 1)
3946 
3947    The Omega solver expects the distance variables ("di" in the
3948    previous example) to come first in the constraint system (as
3949    variables to be protected, or "safe" variables), the constraint
3950    system is built using the following layout:
3951 
3952    "cst | distance vars | index vars".
3953 */
3954 
3955 static bool
init_omega_for_ddr(struct data_dependence_relation * ddr,bool * maybe_dependent)3956 init_omega_for_ddr (struct data_dependence_relation *ddr,
3957 		    bool *maybe_dependent)
3958 {
3959   omega_pb pb;
3960   bool res = false;
3961 
3962   *maybe_dependent = true;
3963 
3964   if (same_access_functions (ddr))
3965     {
3966       unsigned j;
3967       lambda_vector dir_v;
3968 
3969       /* Save the 0 vector.  */
3970       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3971       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3972       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3973 	dir_v[j] = dir_equal;
3974       save_dir_v (ddr, dir_v);
3975 
3976       /* Save the dependences carried by outer loops.  */
3977       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3978       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3979 				  maybe_dependent);
3980       omega_free_problem (pb);
3981       return res;
3982     }
3983 
3984   /* Omega expects the protected variables (those that have to be kept
3985      after elimination) to appear first in the constraint system.
3986      These variables are the distance variables.  In the following
3987      initialization we declare NB_LOOPS safe variables, and the total
3988      number of variables for the constraint system is 2*NB_LOOPS.  */
3989   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3990   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3991 			      maybe_dependent);
3992   omega_free_problem (pb);
3993 
3994   /* Stop computation if not decidable, or no dependence.  */
3995   if (res == false || *maybe_dependent == false)
3996     return res;
3997 
3998   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3999   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4000 			      maybe_dependent);
4001   omega_free_problem (pb);
4002 
4003   return res;
4004 }
4005 
4006 /* Return true when DDR contains the same information as that stored
4007    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
4008 
4009 static bool
ddr_consistent_p(FILE * file,struct data_dependence_relation * ddr,vec<lambda_vector> dist_vects,vec<lambda_vector> dir_vects)4010 ddr_consistent_p (FILE *file,
4011 		  struct data_dependence_relation *ddr,
4012 		  vec<lambda_vector> dist_vects,
4013 		  vec<lambda_vector> dir_vects)
4014 {
4015   unsigned int i, j;
4016 
4017   /* If dump_file is set, output there.  */
4018   if (dump_file && (dump_flags & TDF_DETAILS))
4019     file = dump_file;
4020 
4021   if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4022     {
4023       lambda_vector b_dist_v;
4024       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4025 	       dist_vects.length (),
4026 	       DDR_NUM_DIST_VECTS (ddr));
4027 
4028       fprintf (file, "Banerjee dist vectors:\n");
4029       FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4030 	print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4031 
4032       fprintf (file, "Omega dist vectors:\n");
4033       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4034 	print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4035 
4036       fprintf (file, "data dependence relation:\n");
4037       dump_data_dependence_relation (file, ddr);
4038 
4039       fprintf (file, ")\n");
4040       return false;
4041     }
4042 
4043   if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4044     {
4045       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4046 	       dir_vects.length (),
4047 	       DDR_NUM_DIR_VECTS (ddr));
4048       return false;
4049     }
4050 
4051   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4052     {
4053       lambda_vector a_dist_v;
4054       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4055 
4056       /* Distance vectors are not ordered in the same way in the DDR
4057 	 and in the DIST_VECTS: search for a matching vector.  */
4058       FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4059 	if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4060 	  break;
4061 
4062       if (j == dist_vects.length ())
4063 	{
4064 	  fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4065 	  print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4066 	  fprintf (file, "not found in Omega dist vectors:\n");
4067 	  print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4068 	  fprintf (file, "data dependence relation:\n");
4069 	  dump_data_dependence_relation (file, ddr);
4070 	  fprintf (file, ")\n");
4071 	}
4072     }
4073 
4074   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4075     {
4076       lambda_vector a_dir_v;
4077       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4078 
4079       /* Direction vectors are not ordered in the same way in the DDR
4080 	 and in the DIR_VECTS: search for a matching vector.  */
4081       FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4082 	if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4083 	  break;
4084 
4085       if (j == dist_vects.length ())
4086 	{
4087 	  fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4088 	  print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4089 	  fprintf (file, "not found in Omega dir vectors:\n");
4090 	  print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4091 	  fprintf (file, "data dependence relation:\n");
4092 	  dump_data_dependence_relation (file, ddr);
4093 	  fprintf (file, ")\n");
4094 	}
4095     }
4096 
4097   return true;
4098 }
4099 
4100 /* This computes the affine dependence relation between A and B with
4101    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4102    independence between two accesses, while CHREC_DONT_KNOW is used
4103    for representing the unknown relation.
4104 
4105    Note that it is possible to stop the computation of the dependence
4106    relation the first time we detect a CHREC_KNOWN element for a given
4107    subscript.  */
4108 
4109 void
compute_affine_dependence(struct data_dependence_relation * ddr,struct loop * loop_nest)4110 compute_affine_dependence (struct data_dependence_relation *ddr,
4111 			   struct loop *loop_nest)
4112 {
4113   struct data_reference *dra = DDR_A (ddr);
4114   struct data_reference *drb = DDR_B (ddr);
4115 
4116   if (dump_file && (dump_flags & TDF_DETAILS))
4117     {
4118       fprintf (dump_file, "(compute_affine_dependence\n");
4119       fprintf (dump_file, "  stmt_a: ");
4120       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4121       fprintf (dump_file, "  stmt_b: ");
4122       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4123     }
4124 
4125   /* Analyze only when the dependence relation is not yet known.  */
4126   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4127     {
4128       dependence_stats.num_dependence_tests++;
4129 
4130       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4131 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
4132 	{
4133 	  subscript_dependence_tester (ddr, loop_nest);
4134 
4135 	  if (flag_check_data_deps)
4136 	    {
4137 	      /* Dump the dependences from the first algorithm.  */
4138 	      if (dump_file && (dump_flags & TDF_DETAILS))
4139 		{
4140 		  fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4141 		  dump_data_dependence_relation (dump_file, ddr);
4142 		}
4143 
4144 	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4145 		{
4146 		  bool maybe_dependent;
4147 		  vec<lambda_vector> dir_vects, dist_vects;
4148 
4149 		  /* Save the result of the first DD analyzer.  */
4150 		  dist_vects = DDR_DIST_VECTS (ddr);
4151 		  dir_vects = DDR_DIR_VECTS (ddr);
4152 
4153 		  /* Reset the information.  */
4154 		  DDR_DIST_VECTS (ddr).create (0);
4155 		  DDR_DIR_VECTS (ddr).create (0);
4156 
4157 		  /* Compute the same information using Omega.  */
4158 		  if (!init_omega_for_ddr (ddr, &maybe_dependent))
4159 		    goto csys_dont_know;
4160 
4161 		  if (dump_file && (dump_flags & TDF_DETAILS))
4162 		    {
4163 		      fprintf (dump_file, "Omega Analyzer\n");
4164 		      dump_data_dependence_relation (dump_file, ddr);
4165 		    }
4166 
4167 		  /* Check that we get the same information.  */
4168 		  if (maybe_dependent)
4169 		    gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4170 						  dir_vects));
4171 		}
4172 	    }
4173 	}
4174 
4175       /* As a last case, if the dependence cannot be determined, or if
4176 	 the dependence is considered too difficult to determine, answer
4177 	 "don't know".  */
4178       else
4179 	{
4180 	csys_dont_know:;
4181 	  dependence_stats.num_dependence_undetermined++;
4182 
4183 	  if (dump_file && (dump_flags & TDF_DETAILS))
4184 	    {
4185 	      fprintf (dump_file, "Data ref a:\n");
4186 	      dump_data_reference (dump_file, dra);
4187 	      fprintf (dump_file, "Data ref b:\n");
4188 	      dump_data_reference (dump_file, drb);
4189 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4190 	    }
4191 	  finalize_ddr_dependent (ddr, chrec_dont_know);
4192 	}
4193     }
4194 
4195   if (dump_file && (dump_flags & TDF_DETAILS))
4196     {
4197       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4198 	fprintf (dump_file, ") -> no dependence\n");
4199       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4200 	fprintf (dump_file, ") -> dependence analysis failed\n");
4201       else
4202 	fprintf (dump_file, ")\n");
4203     }
4204 }
4205 
4206 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4207    the data references in DATAREFS, in the LOOP_NEST.  When
4208    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4209    relations.  Return true when successful, i.e. data references number
4210    is small enough to be handled.  */
4211 
4212 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)4213 compute_all_dependences (vec<data_reference_p> datarefs,
4214 			 vec<ddr_p> *dependence_relations,
4215 			 vec<loop_p> loop_nest,
4216 			 bool compute_self_and_rr)
4217 {
4218   struct data_dependence_relation *ddr;
4219   struct data_reference *a, *b;
4220   unsigned int i, j;
4221 
4222   if ((int) datarefs.length ()
4223       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4224     {
4225       struct data_dependence_relation *ddr;
4226 
4227       /* Insert a single relation into dependence_relations:
4228 	 chrec_dont_know.  */
4229       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4230       dependence_relations->safe_push (ddr);
4231       return false;
4232     }
4233 
4234   FOR_EACH_VEC_ELT (datarefs, i, a)
4235     for (j = i + 1; datarefs.iterate (j, &b); j++)
4236       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4237 	{
4238 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
4239 	  dependence_relations->safe_push (ddr);
4240           if (loop_nest.exists ())
4241    	    compute_affine_dependence (ddr, loop_nest[0]);
4242 	}
4243 
4244   if (compute_self_and_rr)
4245     FOR_EACH_VEC_ELT (datarefs, i, a)
4246       {
4247 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
4248 	dependence_relations->safe_push (ddr);
4249         if (loop_nest.exists ())
4250    	  compute_affine_dependence (ddr, loop_nest[0]);
4251       }
4252 
4253   return true;
4254 }
4255 
4256 /* Describes a location of a memory reference.  */
4257 
4258 typedef struct data_ref_loc_d
4259 {
4260     /* Position of the memory reference.  */
4261     tree *pos;
4262 
4263       /* True if the memory reference is read.  */
4264       bool is_read;
4265 } data_ref_loc;
4266 
4267 
4268 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4269    true if STMT clobbers memory, false otherwise.  */
4270 
4271 static bool
get_references_in_stmt(gimple stmt,vec<data_ref_loc> * references)4272 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
4273 {
4274   bool clobbers_memory = false;
4275   data_ref_loc ref;
4276   tree *op0, *op1;
4277   enum gimple_code stmt_code = gimple_code (stmt);
4278 
4279   references->create (0);
4280 
4281   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4282      As we cannot model data-references to not spelled out
4283      accesses give up if they may occur.  */
4284   if ((stmt_code == GIMPLE_CALL
4285        && !(gimple_call_flags (stmt) & ECF_CONST))
4286       || (stmt_code == GIMPLE_ASM
4287 	  && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4288     clobbers_memory = true;
4289 
4290   if (!gimple_vuse (stmt))
4291     return clobbers_memory;
4292 
4293   if (stmt_code == GIMPLE_ASSIGN)
4294     {
4295       tree base;
4296       op0 = gimple_assign_lhs_ptr (stmt);
4297       op1 = gimple_assign_rhs1_ptr (stmt);
4298 
4299       if (DECL_P (*op1)
4300 	  || (REFERENCE_CLASS_P (*op1)
4301 	      && (base = get_base_address (*op1))
4302 	      && TREE_CODE (base) != SSA_NAME))
4303 	{
4304 	  ref.pos = op1;
4305 	  ref.is_read = true;
4306 	  references->safe_push (ref);
4307 	}
4308     }
4309   else if (stmt_code == GIMPLE_CALL)
4310     {
4311       unsigned i, n;
4312 
4313       op0 = gimple_call_lhs_ptr (stmt);
4314       n = gimple_call_num_args (stmt);
4315       for (i = 0; i < n; i++)
4316 	{
4317 	  op1 = gimple_call_arg_ptr (stmt, i);
4318 
4319 	  if (DECL_P (*op1)
4320 	      || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4321 	    {
4322 	      ref.pos = op1;
4323 	      ref.is_read = true;
4324 	      references->safe_push (ref);
4325 	    }
4326 	}
4327     }
4328   else
4329     return clobbers_memory;
4330 
4331   if (*op0
4332       && (DECL_P (*op0)
4333 	  || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4334     {
4335       ref.pos = op0;
4336       ref.is_read = false;
4337       references->safe_push (ref);
4338     }
4339   return clobbers_memory;
4340 }
4341 
4342 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4343    reference, returns false, otherwise returns true.  NEST is the outermost
4344    loop of the loop nest in which the references should be analyzed.  */
4345 
4346 bool
find_data_references_in_stmt(struct loop * nest,gimple stmt,vec<data_reference_p> * datarefs)4347 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4348 			      vec<data_reference_p> *datarefs)
4349 {
4350   unsigned i;
4351   vec<data_ref_loc> references;
4352   data_ref_loc *ref;
4353   bool ret = true;
4354   data_reference_p dr;
4355 
4356   if (get_references_in_stmt (stmt, &references))
4357     {
4358       references.release ();
4359       return false;
4360     }
4361 
4362   FOR_EACH_VEC_ELT (references, i, ref)
4363     {
4364       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4365 			    *ref->pos, stmt, ref->is_read);
4366       gcc_assert (dr != NULL);
4367       datarefs->safe_push (dr);
4368     }
4369   references.release ();
4370   return ret;
4371 }
4372 
4373 /* Stores the data references in STMT to DATAREFS.  If there is an
4374    unanalyzable reference, returns false, otherwise returns true.
4375    NEST is the outermost loop of the loop nest in which the references
4376    should be instantiated, LOOP is the loop in which the references
4377    should be analyzed.  */
4378 
4379 bool
graphite_find_data_references_in_stmt(loop_p nest,loop_p loop,gimple stmt,vec<data_reference_p> * datarefs)4380 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4381 				       vec<data_reference_p> *datarefs)
4382 {
4383   unsigned i;
4384   vec<data_ref_loc> references;
4385   data_ref_loc *ref;
4386   bool ret = true;
4387   data_reference_p dr;
4388 
4389   if (get_references_in_stmt (stmt, &references))
4390     {
4391       references.release ();
4392       return false;
4393     }
4394 
4395   FOR_EACH_VEC_ELT (references, i, ref)
4396     {
4397       dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4398       gcc_assert (dr != NULL);
4399       datarefs->safe_push (dr);
4400     }
4401 
4402   references.release ();
4403   return ret;
4404 }
4405 
4406 /* Search the data references in LOOP, and record the information into
4407    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4408    difficult case, returns NULL_TREE otherwise.  */
4409 
4410 tree
find_data_references_in_bb(struct loop * loop,basic_block bb,vec<data_reference_p> * datarefs)4411 find_data_references_in_bb (struct loop *loop, basic_block bb,
4412                             vec<data_reference_p> *datarefs)
4413 {
4414   gimple_stmt_iterator bsi;
4415 
4416   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4417     {
4418       gimple stmt = gsi_stmt (bsi);
4419 
4420       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4421         {
4422           struct data_reference *res;
4423           res = XCNEW (struct data_reference);
4424           datarefs->safe_push (res);
4425 
4426           return chrec_dont_know;
4427         }
4428     }
4429 
4430   return NULL_TREE;
4431 }
4432 
4433 /* Search the data references in LOOP, and record the information into
4434    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4435    difficult case, returns NULL_TREE otherwise.
4436 
4437    TODO: This function should be made smarter so that it can handle address
4438    arithmetic as if they were array accesses, etc.  */
4439 
4440 static tree
find_data_references_in_loop(struct loop * loop,vec<data_reference_p> * datarefs)4441 find_data_references_in_loop (struct loop *loop,
4442 			      vec<data_reference_p> *datarefs)
4443 {
4444   basic_block bb, *bbs;
4445   unsigned int i;
4446 
4447   bbs = get_loop_body_in_dom_order (loop);
4448 
4449   for (i = 0; i < loop->num_nodes; i++)
4450     {
4451       bb = bbs[i];
4452 
4453       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4454         {
4455           free (bbs);
4456           return chrec_dont_know;
4457         }
4458     }
4459   free (bbs);
4460 
4461   return NULL_TREE;
4462 }
4463 
4464 /* Recursive helper function.  */
4465 
4466 static bool
find_loop_nest_1(struct loop * loop,vec<loop_p> * loop_nest)4467 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4468 {
4469   /* Inner loops of the nest should not contain siblings.  Example:
4470      when there are two consecutive loops,
4471 
4472      | loop_0
4473      |   loop_1
4474      |     A[{0, +, 1}_1]
4475      |   endloop_1
4476      |   loop_2
4477      |     A[{0, +, 1}_2]
4478      |   endloop_2
4479      | endloop_0
4480 
4481      the dependence relation cannot be captured by the distance
4482      abstraction.  */
4483   if (loop->next)
4484     return false;
4485 
4486   loop_nest->safe_push (loop);
4487   if (loop->inner)
4488     return find_loop_nest_1 (loop->inner, loop_nest);
4489   return true;
4490 }
4491 
4492 /* Return false when the LOOP is not well nested.  Otherwise return
4493    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4494    contain the loops from the outermost to the innermost, as they will
4495    appear in the classic distance vector.  */
4496 
4497 bool
find_loop_nest(struct loop * loop,vec<loop_p> * loop_nest)4498 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4499 {
4500   loop_nest->safe_push (loop);
4501   if (loop->inner)
4502     return find_loop_nest_1 (loop->inner, loop_nest);
4503   return true;
4504 }
4505 
4506 /* Returns true when the data dependences have been computed, false otherwise.
4507    Given a loop nest LOOP, the following vectors are returned:
4508    DATAREFS is initialized to all the array elements contained in this loop,
4509    DEPENDENCE_RELATIONS contains the relations between the data references.
4510    Compute read-read and self relations if
4511    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4512 
4513 bool
compute_data_dependences_for_loop(struct loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4514 compute_data_dependences_for_loop (struct loop *loop,
4515 				   bool compute_self_and_read_read_dependences,
4516 				   vec<loop_p> *loop_nest,
4517 				   vec<data_reference_p> *datarefs,
4518 				   vec<ddr_p> *dependence_relations)
4519 {
4520   bool res = true;
4521 
4522   memset (&dependence_stats, 0, sizeof (dependence_stats));
4523 
4524   /* If the loop nest is not well formed, or one of the data references
4525      is not computable, give up without spending time to compute other
4526      dependences.  */
4527   if (!loop
4528       || !find_loop_nest (loop, loop_nest)
4529       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4530       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4531 				   compute_self_and_read_read_dependences))
4532     res = false;
4533 
4534   if (dump_file && (dump_flags & TDF_STATS))
4535     {
4536       fprintf (dump_file, "Dependence tester statistics:\n");
4537 
4538       fprintf (dump_file, "Number of dependence tests: %d\n",
4539 	       dependence_stats.num_dependence_tests);
4540       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4541 	       dependence_stats.num_dependence_dependent);
4542       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4543 	       dependence_stats.num_dependence_independent);
4544       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4545 	       dependence_stats.num_dependence_undetermined);
4546 
4547       fprintf (dump_file, "Number of subscript tests: %d\n",
4548 	       dependence_stats.num_subscript_tests);
4549       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4550 	       dependence_stats.num_subscript_undetermined);
4551       fprintf (dump_file, "Number of same subscript function: %d\n",
4552 	       dependence_stats.num_same_subscript_function);
4553 
4554       fprintf (dump_file, "Number of ziv tests: %d\n",
4555 	       dependence_stats.num_ziv);
4556       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4557 	       dependence_stats.num_ziv_dependent);
4558       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4559 	       dependence_stats.num_ziv_independent);
4560       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4561 	       dependence_stats.num_ziv_unimplemented);
4562 
4563       fprintf (dump_file, "Number of siv tests: %d\n",
4564 	       dependence_stats.num_siv);
4565       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4566 	       dependence_stats.num_siv_dependent);
4567       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4568 	       dependence_stats.num_siv_independent);
4569       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4570 	       dependence_stats.num_siv_unimplemented);
4571 
4572       fprintf (dump_file, "Number of miv tests: %d\n",
4573 	       dependence_stats.num_miv);
4574       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4575 	       dependence_stats.num_miv_dependent);
4576       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4577 	       dependence_stats.num_miv_independent);
4578       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4579 	       dependence_stats.num_miv_unimplemented);
4580     }
4581 
4582   return res;
4583 }
4584 
4585 /* Returns true when the data dependences for the basic block BB have been
4586    computed, false otherwise.
4587    DATAREFS is initialized to all the array elements contained in this basic
4588    block, DEPENDENCE_RELATIONS contains the relations between the data
4589    references. Compute read-read and self relations if
4590    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4591 bool
compute_data_dependences_for_bb(basic_block bb,bool compute_self_and_read_read_dependences,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4592 compute_data_dependences_for_bb (basic_block bb,
4593                                  bool compute_self_and_read_read_dependences,
4594                                  vec<data_reference_p> *datarefs,
4595                                  vec<ddr_p> *dependence_relations)
4596 {
4597   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4598     return false;
4599 
4600   return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4601 				  compute_self_and_read_read_dependences);
4602 }
4603 
4604 /* Entry point (for testing only).  Analyze all the data references
4605    and the dependence relations in LOOP.
4606 
4607    The data references are computed first.
4608 
4609    A relation on these nodes is represented by a complete graph.  Some
4610    of the relations could be of no interest, thus the relations can be
4611    computed on demand.
4612 
4613    In the following function we compute all the relations.  This is
4614    just a first implementation that is here for:
4615    - for showing how to ask for the dependence relations,
4616    - for the debugging the whole dependence graph,
4617    - for the dejagnu testcases and maintenance.
4618 
4619    It is possible to ask only for a part of the graph, avoiding to
4620    compute the whole dependence graph.  The computed dependences are
4621    stored in a knowledge base (KB) such that later queries don't
4622    recompute the same information.  The implementation of this KB is
4623    transparent to the optimizer, and thus the KB can be changed with a
4624    more efficient implementation, or the KB could be disabled.  */
4625 static void
analyze_all_data_dependences(struct loop * loop)4626 analyze_all_data_dependences (struct loop *loop)
4627 {
4628   unsigned int i;
4629   int nb_data_refs = 10;
4630   vec<data_reference_p> datarefs;
4631   datarefs.create (nb_data_refs);
4632   vec<ddr_p> dependence_relations;
4633   dependence_relations.create (nb_data_refs * nb_data_refs);
4634   vec<loop_p> loop_nest;
4635   loop_nest.create (3);
4636 
4637   /* Compute DDs on the whole function.  */
4638   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4639 				     &dependence_relations);
4640 
4641   if (dump_file)
4642     {
4643       dump_data_dependence_relations (dump_file, dependence_relations);
4644       fprintf (dump_file, "\n\n");
4645 
4646       if (dump_flags & TDF_DETAILS)
4647 	dump_dist_dir_vectors (dump_file, dependence_relations);
4648 
4649       if (dump_flags & TDF_STATS)
4650 	{
4651 	  unsigned nb_top_relations = 0;
4652 	  unsigned nb_bot_relations = 0;
4653 	  unsigned nb_chrec_relations = 0;
4654 	  struct data_dependence_relation *ddr;
4655 
4656 	  FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4657 	    {
4658 	      if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4659 		nb_top_relations++;
4660 
4661 	      else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4662 		nb_bot_relations++;
4663 
4664 	      else
4665 		nb_chrec_relations++;
4666 	    }
4667 
4668 	  gather_stats_on_scev_database ();
4669 	}
4670     }
4671 
4672   loop_nest.release ();
4673   free_dependence_relations (dependence_relations);
4674   free_data_refs (datarefs);
4675 }
4676 
4677 /* Computes all the data dependences and check that the results of
4678    several analyzers are the same.  */
4679 
4680 void
tree_check_data_deps(void)4681 tree_check_data_deps (void)
4682 {
4683   loop_iterator li;
4684   struct loop *loop_nest;
4685 
4686   FOR_EACH_LOOP (li, loop_nest, 0)
4687     analyze_all_data_dependences (loop_nest);
4688 }
4689 
4690 /* Free the memory used by a data dependence relation DDR.  */
4691 
4692 void
free_dependence_relation(struct data_dependence_relation * ddr)4693 free_dependence_relation (struct data_dependence_relation *ddr)
4694 {
4695   if (ddr == NULL)
4696     return;
4697 
4698   if (DDR_SUBSCRIPTS (ddr).exists ())
4699     free_subscripts (DDR_SUBSCRIPTS (ddr));
4700   DDR_DIST_VECTS (ddr).release ();
4701   DDR_DIR_VECTS (ddr).release ();
4702 
4703   free (ddr);
4704 }
4705 
4706 /* Free the memory used by the data dependence relations from
4707    DEPENDENCE_RELATIONS.  */
4708 
4709 void
free_dependence_relations(vec<ddr_p> dependence_relations)4710 free_dependence_relations (vec<ddr_p> dependence_relations)
4711 {
4712   unsigned int i;
4713   struct data_dependence_relation *ddr;
4714 
4715   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4716     if (ddr)
4717       free_dependence_relation (ddr);
4718 
4719   dependence_relations.release ();
4720 }
4721 
4722 /* Free the memory used by the data references from DATAREFS.  */
4723 
4724 void
free_data_refs(vec<data_reference_p> datarefs)4725 free_data_refs (vec<data_reference_p> datarefs)
4726 {
4727   unsigned int i;
4728   struct data_reference *dr;
4729 
4730   FOR_EACH_VEC_ELT (datarefs, i, dr)
4731     free_data_ref (dr);
4732   datarefs.release ();
4733 }
4734 
4735 
4736 
4737 /* Dump vertex I in RDG to FILE.  */
4738 
4739 static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)4740 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4741 {
4742   struct vertex *v = &(rdg->vertices[i]);
4743   struct graph_edge *e;
4744 
4745   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4746 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4747 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4748 
4749   if (v->pred)
4750     for (e = v->pred; e; e = e->pred_next)
4751       fprintf (file, " %d", e->src);
4752 
4753   fprintf (file, ") (out:");
4754 
4755   if (v->succ)
4756     for (e = v->succ; e; e = e->succ_next)
4757       fprintf (file, " %d", e->dest);
4758 
4759   fprintf (file, ")\n");
4760   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4761   fprintf (file, ")\n");
4762 }
4763 
4764 /* Call dump_rdg_vertex on stderr.  */
4765 
4766 DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)4767 debug_rdg_vertex (struct graph *rdg, int i)
4768 {
4769   dump_rdg_vertex (stderr, rdg, i);
4770 }
4771 
4772 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4773    dumped vertices to that bitmap.  */
4774 
4775 static void
dump_rdg_component(FILE * file,struct graph * rdg,int c,bitmap dumped)4776 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4777 {
4778   int i;
4779 
4780   fprintf (file, "(%d\n", c);
4781 
4782   for (i = 0; i < rdg->n_vertices; i++)
4783     if (rdg->vertices[i].component == c)
4784       {
4785 	if (dumped)
4786 	  bitmap_set_bit (dumped, i);
4787 
4788 	dump_rdg_vertex (file, rdg, i);
4789       }
4790 
4791   fprintf (file, ")\n");
4792 }
4793 
4794 /* Call dump_rdg_vertex on stderr.  */
4795 
4796 DEBUG_FUNCTION void
debug_rdg_component(struct graph * rdg,int c)4797 debug_rdg_component (struct graph *rdg, int c)
4798 {
4799   dump_rdg_component (stderr, rdg, c, NULL);
4800 }
4801 
4802 /* Dump the reduced dependence graph RDG to FILE.  */
4803 
4804 void
dump_rdg(FILE * file,struct graph * rdg)4805 dump_rdg (FILE *file, struct graph *rdg)
4806 {
4807   int i;
4808   bitmap dumped = BITMAP_ALLOC (NULL);
4809 
4810   fprintf (file, "(rdg\n");
4811 
4812   for (i = 0; i < rdg->n_vertices; i++)
4813     if (!bitmap_bit_p (dumped, i))
4814       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4815 
4816   fprintf (file, ")\n");
4817   BITMAP_FREE (dumped);
4818 }
4819 
4820 /* Call dump_rdg on stderr.  */
4821 
4822 DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)4823 debug_rdg (struct graph *rdg)
4824 {
4825   dump_rdg (stderr, rdg);
4826 }
4827 
4828 static void
dot_rdg_1(FILE * file,struct graph * rdg)4829 dot_rdg_1 (FILE *file, struct graph *rdg)
4830 {
4831   int i;
4832 
4833   fprintf (file, "digraph RDG {\n");
4834 
4835   for (i = 0; i < rdg->n_vertices; i++)
4836     {
4837       struct vertex *v = &(rdg->vertices[i]);
4838       struct graph_edge *e;
4839 
4840       /* Highlight reads from memory.  */
4841       if (RDG_MEM_READS_STMT (rdg, i))
4842        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4843 
4844       /* Highlight stores to memory.  */
4845       if (RDG_MEM_WRITE_STMT (rdg, i))
4846        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4847 
4848       if (v->succ)
4849        for (e = v->succ; e; e = e->succ_next)
4850          switch (RDGE_TYPE (e))
4851            {
4852            case input_dd:
4853              fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4854              break;
4855 
4856            case output_dd:
4857              fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4858              break;
4859 
4860            case flow_dd:
4861              /* These are the most common dependences: don't print these. */
4862              fprintf (file, "%d -> %d \n", i, e->dest);
4863              break;
4864 
4865            case anti_dd:
4866              fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4867              break;
4868 
4869            default:
4870              gcc_unreachable ();
4871            }
4872     }
4873 
4874   fprintf (file, "}\n\n");
4875 }
4876 
4877 /* Display the Reduced Dependence Graph using dotty.  */
4878 extern void dot_rdg (struct graph *);
4879 
4880 DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)4881 dot_rdg (struct graph *rdg)
4882 {
4883   /* When debugging, enable the following code.  This cannot be used
4884      in production compilers because it calls "system".  */
4885 #if 0
4886   FILE *file = fopen ("/tmp/rdg.dot", "w");
4887   gcc_assert (file != NULL);
4888 
4889   dot_rdg_1 (file, rdg);
4890   fclose (file);
4891 
4892   system ("dotty /tmp/rdg.dot &");
4893 #else
4894   dot_rdg_1 (stderr, rdg);
4895 #endif
4896 }
4897 
4898 /* Returns the index of STMT in RDG.  */
4899 
4900 int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple stmt)4901 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
4902 {
4903   int index = gimple_uid (stmt);
4904   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4905   return index;
4906 }
4907 
4908 /* Creates an edge in RDG for each distance vector from DDR.  The
4909    order that we keep track of in the RDG is the order in which
4910    statements have to be executed.  */
4911 
4912 static void
create_rdg_edge_for_ddr(struct graph * rdg,ddr_p ddr)4913 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4914 {
4915   struct graph_edge *e;
4916   int va, vb;
4917   data_reference_p dra = DDR_A (ddr);
4918   data_reference_p drb = DDR_B (ddr);
4919   unsigned level = ddr_dependence_level (ddr);
4920 
4921   /* For non scalar dependences, when the dependence is REVERSED,
4922      statement B has to be executed before statement A.  */
4923   if (level > 0
4924       && !DDR_REVERSED_P (ddr))
4925     {
4926       data_reference_p tmp = dra;
4927       dra = drb;
4928       drb = tmp;
4929     }
4930 
4931   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4932   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4933 
4934   if (va < 0 || vb < 0)
4935     return;
4936 
4937   e = add_edge (rdg, va, vb);
4938   e->data = XNEW (struct rdg_edge);
4939 
4940   RDGE_LEVEL (e) = level;
4941   RDGE_RELATION (e) = ddr;
4942 
4943   /* Determines the type of the data dependence.  */
4944   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4945     RDGE_TYPE (e) = input_dd;
4946   else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4947     RDGE_TYPE (e) = output_dd;
4948   else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4949     RDGE_TYPE (e) = flow_dd;
4950   else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4951     RDGE_TYPE (e) = anti_dd;
4952 }
4953 
4954 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4955    the index of DEF in RDG.  */
4956 
4957 static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)4958 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4959 {
4960   use_operand_p imm_use_p;
4961   imm_use_iterator iterator;
4962 
4963   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4964     {
4965       struct graph_edge *e;
4966       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4967 
4968       if (use < 0)
4969 	continue;
4970 
4971       e = add_edge (rdg, idef, use);
4972       e->data = XNEW (struct rdg_edge);
4973       RDGE_TYPE (e) = flow_dd;
4974       RDGE_RELATION (e) = NULL;
4975     }
4976 }
4977 
4978 /* Creates the edges of the reduced dependence graph RDG.  */
4979 
4980 static void
create_rdg_edges(struct graph * rdg,vec<ddr_p> ddrs)4981 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
4982 {
4983   int i;
4984   struct data_dependence_relation *ddr;
4985   def_operand_p def_p;
4986   ssa_op_iter iter;
4987 
4988   FOR_EACH_VEC_ELT (ddrs, i, ddr)
4989     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4990       create_rdg_edge_for_ddr (rdg, ddr);
4991 
4992   for (i = 0; i < rdg->n_vertices; i++)
4993     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4994 			      iter, SSA_OP_DEF)
4995       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4996 }
4997 
4998 /* Build the vertices of the reduced dependence graph RDG.  */
4999 
5000 static void
create_rdg_vertices(struct graph * rdg,vec<gimple> stmts,loop_p loop)5001 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop)
5002 {
5003   int i, j;
5004   gimple stmt;
5005 
5006   FOR_EACH_VEC_ELT (stmts, i, stmt)
5007     {
5008       vec<data_ref_loc> references;
5009       data_ref_loc *ref;
5010       struct vertex *v = &(rdg->vertices[i]);
5011 
5012       /* Record statement to vertex mapping.  */
5013       gimple_set_uid (stmt, i);
5014 
5015       v->data = XNEW (struct rdg_vertex);
5016       RDGV_STMT (v) = stmt;
5017       RDGV_DATAREFS (v).create (0);
5018       RDGV_HAS_MEM_WRITE (v) = false;
5019       RDGV_HAS_MEM_READS (v) = false;
5020       if (gimple_code (stmt) == GIMPLE_PHI)
5021 	continue;
5022 
5023       get_references_in_stmt (stmt, &references);
5024       FOR_EACH_VEC_ELT (references, j, ref)
5025 	{
5026 	  data_reference_p dr;
5027 	  if (!ref->is_read)
5028 	    RDGV_HAS_MEM_WRITE (v) = true;
5029 	  else
5030 	    RDGV_HAS_MEM_READS (v) = true;
5031 	  dr = create_data_ref (loop, loop_containing_stmt (stmt),
5032 				*ref->pos, stmt, ref->is_read);
5033 	  if (dr)
5034 	    RDGV_DATAREFS (v).safe_push (dr);
5035 	}
5036       references.release ();
5037     }
5038 }
5039 
5040 /* Initialize STMTS with all the statements of LOOP.  When
5041    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
5042    which we discover statements is important as
5043    generate_loops_for_partition is using the same traversal for
5044    identifying statements. */
5045 
5046 static void
stmts_from_loop(struct loop * loop,vec<gimple> * stmts)5047 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
5048 {
5049   unsigned int i;
5050   basic_block *bbs = get_loop_body_in_dom_order (loop);
5051 
5052   for (i = 0; i < loop->num_nodes; i++)
5053     {
5054       basic_block bb = bbs[i];
5055       gimple_stmt_iterator bsi;
5056       gimple stmt;
5057 
5058       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5059 	stmts->safe_push (gsi_stmt (bsi));
5060 
5061       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5062 	{
5063 	  stmt = gsi_stmt (bsi);
5064 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5065 	    stmts->safe_push (stmt);
5066 	}
5067     }
5068 
5069   free (bbs);
5070 }
5071 
5072 /* Returns true when all the dependences are computable.  */
5073 
5074 static bool
known_dependences_p(vec<ddr_p> dependence_relations)5075 known_dependences_p (vec<ddr_p> dependence_relations)
5076 {
5077   ddr_p ddr;
5078   unsigned int i;
5079 
5080   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5081     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5082       return false;
5083 
5084   return true;
5085 }
5086 
5087 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5088    statement of the loop nest, and one edge per data dependence or
5089    scalar dependence.  */
5090 
5091 struct graph *
build_empty_rdg(int n_stmts)5092 build_empty_rdg (int n_stmts)
5093 {
5094   struct graph *rdg = new_graph (n_stmts);
5095   return rdg;
5096 }
5097 
5098 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5099    statement of the loop nest, and one edge per data dependence or
5100    scalar dependence.  */
5101 
5102 struct graph *
build_rdg(struct loop * loop,vec<loop_p> * loop_nest,vec<ddr_p> * dependence_relations,vec<data_reference_p> * datarefs)5103 build_rdg (struct loop *loop,
5104 	   vec<loop_p> *loop_nest,
5105 	   vec<ddr_p> *dependence_relations,
5106 	   vec<data_reference_p> *datarefs)
5107 {
5108   struct graph *rdg = NULL;
5109 
5110   if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5111 					 dependence_relations)
5112       && known_dependences_p (*dependence_relations))
5113     {
5114       vec<gimple> stmts;
5115       stmts.create (10);
5116       stmts_from_loop (loop, &stmts);
5117       rdg = build_empty_rdg (stmts.length ());
5118       create_rdg_vertices (rdg, stmts, loop);
5119       create_rdg_edges (rdg, *dependence_relations);
5120       stmts.release ();
5121     }
5122 
5123   return rdg;
5124 }
5125 
5126 /* Free the reduced dependence graph RDG.  */
5127 
5128 void
free_rdg(struct graph * rdg)5129 free_rdg (struct graph *rdg)
5130 {
5131   int i;
5132 
5133   for (i = 0; i < rdg->n_vertices; i++)
5134     {
5135       struct vertex *v = &(rdg->vertices[i]);
5136       struct graph_edge *e;
5137 
5138       for (e = v->succ; e; e = e->succ_next)
5139 	free (e->data);
5140 
5141       gimple_set_uid (RDGV_STMT (v), -1);
5142       free_data_refs (RDGV_DATAREFS (v));
5143       free (v->data);
5144     }
5145 
5146   free_graph (rdg);
5147 }
5148 
5149 /* Determines whether the statement from vertex V of the RDG has a
5150    definition used outside the loop that contains this statement.  */
5151 
5152 bool
rdg_defs_used_in_other_loops_p(struct graph * rdg,int v)5153 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5154 {
5155   gimple stmt = RDG_STMT (rdg, v);
5156   struct loop *loop = loop_containing_stmt (stmt);
5157   use_operand_p imm_use_p;
5158   imm_use_iterator iterator;
5159   ssa_op_iter it;
5160   def_operand_p def_p;
5161 
5162   if (!loop)
5163     return true;
5164 
5165   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5166     {
5167       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5168 	{
5169 	  if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5170 	    return true;
5171 	}
5172     }
5173 
5174   return false;
5175 }
5176