1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "gimple-pretty-print.h"
80 #include "tree-flow.h"
81 #include "cfgloop.h"
82 #include "tree-data-ref.h"
83 #include "tree-scalar-evolution.h"
84 #include "dumpfile.h"
85 #include "langhooks.h"
86 #include "tree-affine.h"
87 #include "params.h"
88 
89 static struct datadep_stats
90 {
91   int num_dependence_tests;
92   int num_dependence_dependent;
93   int num_dependence_independent;
94   int num_dependence_undetermined;
95 
96   int num_subscript_tests;
97   int num_subscript_undetermined;
98   int num_same_subscript_function;
99 
100   int num_ziv;
101   int num_ziv_independent;
102   int num_ziv_dependent;
103   int num_ziv_unimplemented;
104 
105   int num_siv;
106   int num_siv_independent;
107   int num_siv_dependent;
108   int num_siv_unimplemented;
109 
110   int num_miv;
111   int num_miv_independent;
112   int num_miv_dependent;
113   int num_miv_unimplemented;
114 } dependence_stats;
115 
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 					   struct data_reference *,
118 					   struct data_reference *,
119 					   struct loop *);
120 /* Returns true iff A divides B.  */
121 
122 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125   gcc_assert (TREE_CODE (a) == INTEGER_CST);
126   gcc_assert (TREE_CODE (b) == INTEGER_CST);
127   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129 
130 /* Returns true iff A divides B.  */
131 
132 static inline bool
int_divides_p(int a,int b)133 int_divides_p (int a, int b)
134 {
135   return ((b % a) == 0);
136 }
137 
138 
139 
140 /* Dump into FILE all the data references from DATAREFS.  */
141 
142 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)143 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
144 {
145   unsigned int i;
146   struct data_reference *dr;
147 
148   FOR_EACH_VEC_ELT (datarefs, i, dr)
149     dump_data_reference (file, dr);
150 }
151 
152 /* Dump into STDERR all the data references from DATAREFS.  */
153 
154 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)155 debug_data_references (vec<data_reference_p> datarefs)
156 {
157   dump_data_references (stderr, datarefs);
158 }
159 
160 /* Print to STDERR the data_reference DR.  */
161 
162 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)163 debug_data_reference (struct data_reference *dr)
164 {
165   dump_data_reference (stderr, dr);
166 }
167 
168 /* Dump function for a DATA_REFERENCE structure.  */
169 
170 void
dump_data_reference(FILE * outf,struct data_reference * dr)171 dump_data_reference (FILE *outf,
172 		     struct data_reference *dr)
173 {
174   unsigned int i;
175 
176   fprintf (outf, "#(Data Ref: \n");
177   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
178   fprintf (outf, "#  stmt: ");
179   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
180   fprintf (outf, "#  ref: ");
181   print_generic_stmt (outf, DR_REF (dr), 0);
182   fprintf (outf, "#  base_object: ");
183   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
184 
185   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
186     {
187       fprintf (outf, "#  Access function %d: ", i);
188       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
189     }
190   fprintf (outf, "#)\n");
191 }
192 
193 /* Dumps the affine function described by FN to the file OUTF.  */
194 
195 static void
dump_affine_function(FILE * outf,affine_fn fn)196 dump_affine_function (FILE *outf, affine_fn fn)
197 {
198   unsigned i;
199   tree coef;
200 
201   print_generic_expr (outf, fn[0], TDF_SLIM);
202   for (i = 1; fn.iterate (i, &coef); i++)
203     {
204       fprintf (outf, " + ");
205       print_generic_expr (outf, coef, TDF_SLIM);
206       fprintf (outf, " * x_%u", i);
207     }
208 }
209 
210 /* Dumps the conflict function CF to the file OUTF.  */
211 
212 static void
dump_conflict_function(FILE * outf,conflict_function * cf)213 dump_conflict_function (FILE *outf, conflict_function *cf)
214 {
215   unsigned i;
216 
217   if (cf->n == NO_DEPENDENCE)
218     fprintf (outf, "no dependence");
219   else if (cf->n == NOT_KNOWN)
220     fprintf (outf, "not known");
221   else
222     {
223       for (i = 0; i < cf->n; i++)
224 	{
225 	  if (i != 0)
226 	    fprintf (outf, " ");
227 	  fprintf (outf, "[");
228 	  dump_affine_function (outf, cf->fns[i]);
229 	  fprintf (outf, "]");
230 	}
231     }
232 }
233 
234 /* Dump function for a SUBSCRIPT structure.  */
235 
236 static void
dump_subscript(FILE * outf,struct subscript * subscript)237 dump_subscript (FILE *outf, struct subscript *subscript)
238 {
239   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
240 
241   fprintf (outf, "\n (subscript \n");
242   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
243   dump_conflict_function (outf, cf);
244   if (CF_NONTRIVIAL_P (cf))
245     {
246       tree last_iteration = SUB_LAST_CONFLICT (subscript);
247       fprintf (outf, "\n  last_conflict: ");
248       print_generic_expr (outf, last_iteration, 0);
249     }
250 
251   cf = SUB_CONFLICTS_IN_B (subscript);
252   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
253   dump_conflict_function (outf, cf);
254   if (CF_NONTRIVIAL_P (cf))
255     {
256       tree last_iteration = SUB_LAST_CONFLICT (subscript);
257       fprintf (outf, "\n  last_conflict: ");
258       print_generic_expr (outf, last_iteration, 0);
259     }
260 
261   fprintf (outf, "\n  (Subscript distance: ");
262   print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
263   fprintf (outf, " ))\n");
264 }
265 
266 /* Print the classic direction vector DIRV to OUTF.  */
267 
268 static void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)269 print_direction_vector (FILE *outf,
270 			lambda_vector dirv,
271 			int length)
272 {
273   int eq;
274 
275   for (eq = 0; eq < length; eq++)
276     {
277       enum data_dependence_direction dir = ((enum data_dependence_direction)
278 					    dirv[eq]);
279 
280       switch (dir)
281 	{
282 	case dir_positive:
283 	  fprintf (outf, "    +");
284 	  break;
285 	case dir_negative:
286 	  fprintf (outf, "    -");
287 	  break;
288 	case dir_equal:
289 	  fprintf (outf, "    =");
290 	  break;
291 	case dir_positive_or_equal:
292 	  fprintf (outf, "   +=");
293 	  break;
294 	case dir_positive_or_negative:
295 	  fprintf (outf, "   +-");
296 	  break;
297 	case dir_negative_or_equal:
298 	  fprintf (outf, "   -=");
299 	  break;
300 	case dir_star:
301 	  fprintf (outf, "    *");
302 	  break;
303 	default:
304 	  fprintf (outf, "indep");
305 	  break;
306 	}
307     }
308   fprintf (outf, "\n");
309 }
310 
311 /* Print a vector of direction vectors.  */
312 
313 static void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
315 		   int length)
316 {
317   unsigned j;
318   lambda_vector v;
319 
320   FOR_EACH_VEC_ELT (dir_vects, j, v)
321     print_direction_vector (outf, v, length);
322 }
323 
324 /* Print out a vector VEC of length N to OUTFILE.  */
325 
326 static inline void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328 {
329   int i;
330 
331   for (i = 0; i < n; i++)
332     fprintf (outfile, "%3d ", vector[i]);
333   fprintf (outfile, "\n");
334 }
335 
336 /* Print a vector of distance vectors.  */
337 
338 static void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
340 		    int length)
341 {
342   unsigned j;
343   lambda_vector v;
344 
345   FOR_EACH_VEC_ELT (dist_vects, j, v)
346     print_lambda_vector (outf, v, length);
347 }
348 
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
350 
351 static void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)352 dump_data_dependence_relation (FILE *outf,
353 			       struct data_dependence_relation *ddr)
354 {
355   struct data_reference *dra, *drb;
356 
357   fprintf (outf, "(Data Dep: \n");
358 
359   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360     {
361       if (ddr)
362 	{
363 	  dra = DDR_A (ddr);
364 	  drb = DDR_B (ddr);
365 	  if (dra)
366 	    dump_data_reference (outf, dra);
367 	  else
368 	    fprintf (outf, "    (nil)\n");
369 	  if (drb)
370 	    dump_data_reference (outf, drb);
371 	  else
372 	    fprintf (outf, "    (nil)\n");
373 	}
374       fprintf (outf, "    (don't know)\n)\n");
375       return;
376     }
377 
378   dra = DDR_A (ddr);
379   drb = DDR_B (ddr);
380   dump_data_reference (outf, dra);
381   dump_data_reference (outf, drb);
382 
383   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384     fprintf (outf, "    (no dependence)\n");
385 
386   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
387     {
388       unsigned int i;
389       struct loop *loopi;
390 
391       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 	{
393 	  fprintf (outf, "  access_fn_A: ");
394 	  print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 	  fprintf (outf, "  access_fn_B: ");
396 	  print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 	  dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
398 	}
399 
400       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401       fprintf (outf, "  loop nest: (");
402       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
403 	fprintf (outf, "%d ", loopi->num);
404       fprintf (outf, ")\n");
405 
406       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
407 	{
408 	  fprintf (outf, "  distance_vector: ");
409 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 			       DDR_NB_LOOPS (ddr));
411 	}
412 
413       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
414 	{
415 	  fprintf (outf, "  direction_vector: ");
416 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 				  DDR_NB_LOOPS (ddr));
418 	}
419     }
420 
421   fprintf (outf, ")\n");
422 }
423 
424 /* Debug version.  */
425 
426 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
428 {
429   dump_data_dependence_relation (stderr, ddr);
430 }
431 
432 /* Dump into FILE all the dependence relations from DDRS.  */
433 
434 void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)435 dump_data_dependence_relations (FILE *file,
436 				vec<ddr_p> ddrs)
437 {
438   unsigned int i;
439   struct data_dependence_relation *ddr;
440 
441   FOR_EACH_VEC_ELT (ddrs, i, ddr)
442     dump_data_dependence_relation (file, ddr);
443 }
444 
445 /* Dump to STDERR all the dependence relations from DDRS.  */
446 
447 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)448 debug_data_dependence_relations (vec<ddr_p> ddrs)
449 {
450   dump_data_dependence_relations (stderr, ddrs);
451 }
452 
453 /* Dumps the distance and direction vectors in FILE.  DDRS contains
454    the dependence relations, and VECT_SIZE is the size of the
455    dependence vectors, or in other words the number of loops in the
456    considered nest.  */
457 
458 static void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
460 {
461   unsigned int i, j;
462   struct data_dependence_relation *ddr;
463   lambda_vector v;
464 
465   FOR_EACH_VEC_ELT (ddrs, i, ddr)
466     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467       {
468 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
469 	  {
470 	    fprintf (file, "DISTANCE_V (");
471 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 	    fprintf (file, ")\n");
473 	  }
474 
475 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
476 	  {
477 	    fprintf (file, "DIRECTION_V (");
478 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 	    fprintf (file, ")\n");
480 	  }
481       }
482 
483   fprintf (file, "\n\n");
484 }
485 
486 /* Dumps the data dependence relations DDRS in FILE.  */
487 
488 static void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)489 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
490 {
491   unsigned int i;
492   struct data_dependence_relation *ddr;
493 
494   FOR_EACH_VEC_ELT (ddrs, i, ddr)
495     dump_data_dependence_relation (file, ddr);
496 
497   fprintf (file, "\n\n");
498 }
499 
500 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)501 debug_ddrs (vec<ddr_p> ddrs)
502 {
503   dump_ddrs (stderr, ddrs);
504 }
505 
506 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
507    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508    constant of type ssizetype, and returns true.  If we cannot do this
509    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510    is returned.  */
511 
512 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off)513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 			 tree *var, tree *off)
515 {
516   tree var0, var1;
517   tree off0, off1;
518   enum tree_code ocode = code;
519 
520   *var = NULL_TREE;
521   *off = NULL_TREE;
522 
523   switch (code)
524     {
525     case INTEGER_CST:
526       *var = build_int_cst (type, 0);
527       *off = fold_convert (ssizetype, op0);
528       return true;
529 
530     case POINTER_PLUS_EXPR:
531       ocode = PLUS_EXPR;
532       /* FALLTHROUGH */
533     case PLUS_EXPR:
534     case MINUS_EXPR:
535       split_constant_offset (op0, &var0, &off0);
536       split_constant_offset (op1, &var1, &off1);
537       *var = fold_build2 (code, type, var0, var1);
538       *off = size_binop (ocode, off0, off1);
539       return true;
540 
541     case MULT_EXPR:
542       if (TREE_CODE (op1) != INTEGER_CST)
543 	return false;
544 
545       split_constant_offset (op0, &var0, &off0);
546       *var = fold_build2 (MULT_EXPR, type, var0, op1);
547       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548       return true;
549 
550     case ADDR_EXPR:
551       {
552 	tree base, poffset;
553 	HOST_WIDE_INT pbitsize, pbitpos;
554 	enum machine_mode pmode;
555 	int punsignedp, pvolatilep;
556 
557 	op0 = TREE_OPERAND (op0, 0);
558 	base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 				    &pmode, &punsignedp, &pvolatilep, false);
560 
561 	if (pbitpos % BITS_PER_UNIT != 0)
562 	  return false;
563 	base = build_fold_addr_expr (base);
564 	off0 = ssize_int (pbitpos / BITS_PER_UNIT);
565 
566 	if (poffset)
567 	  {
568 	    split_constant_offset (poffset, &poffset, &off1);
569 	    off0 = size_binop (PLUS_EXPR, off0, off1);
570 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
571 	      base = fold_build_pointer_plus (base, poffset);
572 	    else
573 	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 				  fold_convert (TREE_TYPE (base), poffset));
575 	  }
576 
577 	var0 = fold_convert (type, base);
578 
579 	/* If variable length types are involved, punt, otherwise casts
580 	   might be converted into ARRAY_REFs in gimplify_conversion.
581 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 	   possibly no longer appears in current GIMPLE, might resurface.
583 	   This perhaps could run
584 	   if (CONVERT_EXPR_P (var0))
585 	     {
586 	       gimplify_conversion (&var0);
587 	       // Attempt to fill in any within var0 found ARRAY_REF's
588 	       // element size from corresponding op embedded ARRAY_REF,
589 	       // if unsuccessful, just punt.
590 	     }  */
591 	while (POINTER_TYPE_P (type))
592 	  type = TREE_TYPE (type);
593 	if (int_size_in_bytes (type) < 0)
594 	  return false;
595 
596 	*var = var0;
597 	*off = off0;
598 	return true;
599       }
600 
601     case SSA_NAME:
602       {
603 	gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 	enum tree_code subcode;
605 
606 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 	  return false;
608 
609 	var0 = gimple_assign_rhs1 (def_stmt);
610 	subcode = gimple_assign_rhs_code (def_stmt);
611 	var1 = gimple_assign_rhs2 (def_stmt);
612 
613 	return split_constant_offset_1 (type, var0, subcode, var1, var, off);
614       }
615     CASE_CONVERT:
616       {
617 	/* We must not introduce undefined overflow, and we must not change the value.
618 	   Hence we're okay if the inner type doesn't overflow to start with
619 	   (pointer or signed), the outer type also is an integer or pointer
620 	   and the outer precision is at least as large as the inner.  */
621 	tree itype = TREE_TYPE (op0);
622 	if ((POINTER_TYPE_P (itype)
623 	     || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 	  {
627 	    split_constant_offset (op0, &var0, off);
628 	    *var = fold_convert (type, var0);
629 	    return true;
630 	  }
631 	return false;
632       }
633 
634     default:
635       return false;
636     }
637 }
638 
639 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
640    will be ssizetype.  */
641 
642 void
split_constant_offset(tree exp,tree * var,tree * off)643 split_constant_offset (tree exp, tree *var, tree *off)
644 {
645   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646   enum tree_code code;
647 
648   *var = exp;
649   *off = ssize_int (0);
650   STRIP_NOPS (exp);
651 
652   if (tree_is_chrec (exp)
653       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654     return;
655 
656   otype = TREE_TYPE (exp);
657   code = TREE_CODE (exp);
658   extract_ops_from_tree (exp, &code, &op0, &op1);
659   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660     {
661       *var = fold_convert (type, e);
662       *off = o;
663     }
664 }
665 
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667    casts, and with type of pointer to the object).  */
668 
669 static tree
canonicalize_base_object_address(tree addr)670 canonicalize_base_object_address (tree addr)
671 {
672   tree orig = addr;
673 
674   STRIP_NOPS (addr);
675 
676   /* The base address may be obtained by casting from integer, in that case
677      keep the cast.  */
678   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679     return orig;
680 
681   if (TREE_CODE (addr) != ADDR_EXPR)
682     return addr;
683 
684   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
685 }
686 
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688    basic block that contains it.  Returns true if analysis succeed or false
689    otherwise.  */
690 
691 bool
dr_analyze_innermost(struct data_reference * dr,struct loop * nest)692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
693 {
694   gimple stmt = DR_STMT (dr);
695   struct loop *loop = loop_containing_stmt (stmt);
696   tree ref = DR_REF (dr);
697   HOST_WIDE_INT pbitsize, pbitpos;
698   tree base, poffset;
699   enum machine_mode pmode;
700   int punsignedp, pvolatilep;
701   affine_iv base_iv, offset_iv;
702   tree init, dinit, step;
703   bool in_loop = (loop && loop->num);
704 
705   if (dump_file && (dump_flags & TDF_DETAILS))
706     fprintf (dump_file, "analyze_innermost: ");
707 
708   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 			      &pmode, &punsignedp, &pvolatilep, false);
710   gcc_assert (base != NULL_TREE);
711 
712   if (pbitpos % BITS_PER_UNIT != 0)
713     {
714       if (dump_file && (dump_flags & TDF_DETAILS))
715 	fprintf (dump_file, "failed: bit offset alignment.\n");
716       return false;
717     }
718 
719   if (TREE_CODE (base) == MEM_REF)
720     {
721       if (!integer_zerop (TREE_OPERAND (base, 1)))
722 	{
723 	  double_int moff = mem_ref_offset (base);
724 	  tree mofft = double_int_to_tree (sizetype, moff);
725 	  if (!poffset)
726 	    poffset = mofft;
727 	  else
728 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
729 	}
730       base = TREE_OPERAND (base, 0);
731     }
732   else
733     base = build_fold_addr_expr (base);
734 
735   if (in_loop)
736     {
737       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
738                       nest ? true : false))
739         {
740           if (nest)
741             {
742               if (dump_file && (dump_flags & TDF_DETAILS))
743                 fprintf (dump_file, "failed: evolution of base is not"
744                                     " affine.\n");
745               return false;
746             }
747           else
748             {
749               base_iv.base = base;
750               base_iv.step = ssize_int (0);
751               base_iv.no_overflow = true;
752             }
753         }
754     }
755   else
756     {
757       base_iv.base = base;
758       base_iv.step = ssize_int (0);
759       base_iv.no_overflow = true;
760     }
761 
762   if (!poffset)
763     {
764       offset_iv.base = ssize_int (0);
765       offset_iv.step = ssize_int (0);
766     }
767   else
768     {
769       if (!in_loop)
770         {
771           offset_iv.base = poffset;
772           offset_iv.step = ssize_int (0);
773         }
774       else if (!simple_iv (loop, loop_containing_stmt (stmt),
775                            poffset, &offset_iv,
776 			   nest ? true : false))
777         {
778           if (nest)
779             {
780               if (dump_file && (dump_flags & TDF_DETAILS))
781                 fprintf (dump_file, "failed: evolution of offset is not"
782                                     " affine.\n");
783               return false;
784             }
785           else
786             {
787               offset_iv.base = poffset;
788               offset_iv.step = ssize_int (0);
789             }
790         }
791     }
792 
793   init = ssize_int (pbitpos / BITS_PER_UNIT);
794   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795   init =  size_binop (PLUS_EXPR, init, dinit);
796   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797   init =  size_binop (PLUS_EXPR, init, dinit);
798 
799   step = size_binop (PLUS_EXPR,
800 		     fold_convert (ssizetype, base_iv.step),
801 		     fold_convert (ssizetype, offset_iv.step));
802 
803   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
804 
805   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806   DR_INIT (dr) = init;
807   DR_STEP (dr) = step;
808 
809   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
810 
811   if (dump_file && (dump_flags & TDF_DETAILS))
812     fprintf (dump_file, "success.\n");
813 
814   return true;
815 }
816 
817 /* Determines the base object and the list of indices of memory reference
818    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
819 
820 static void
dr_analyze_indices(struct data_reference * dr,loop_p nest,loop_p loop)821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
822 {
823   vec<tree> access_fns = vNULL;
824   tree ref, op;
825   tree base, off, access_fn;
826   basic_block before_loop;
827 
828   /* If analyzing a basic-block there are no indices to analyze
829      and thus no access functions.  */
830   if (!nest)
831     {
832       DR_BASE_OBJECT (dr) = DR_REF (dr);
833       DR_ACCESS_FNS (dr).create (0);
834       return;
835     }
836 
837   ref = DR_REF (dr);
838   before_loop = block_before_loop (nest);
839 
840   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841      into a two element array with a constant index.  The base is
842      then just the immediate underlying object.  */
843   if (TREE_CODE (ref) == REALPART_EXPR)
844     {
845       ref = TREE_OPERAND (ref, 0);
846       access_fns.safe_push (integer_zero_node);
847     }
848   else if (TREE_CODE (ref) == IMAGPART_EXPR)
849     {
850       ref = TREE_OPERAND (ref, 0);
851       access_fns.safe_push (integer_one_node);
852     }
853 
854   /* Analyze access functions of dimensions we know to be independent.  */
855   while (handled_component_p (ref))
856     {
857       if (TREE_CODE (ref) == ARRAY_REF)
858 	{
859 	  op = TREE_OPERAND (ref, 1);
860 	  access_fn = analyze_scalar_evolution (loop, op);
861 	  access_fn = instantiate_scev (before_loop, loop, access_fn);
862 	  access_fns.safe_push (access_fn);
863 	}
864       else if (TREE_CODE (ref) == COMPONENT_REF
865 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
866 	{
867 	  /* For COMPONENT_REFs of records (but not unions!) use the
868 	     FIELD_DECL offset as constant access function so we can
869 	     disambiguate a[i].f1 and a[i].f2.  */
870 	  tree off = component_ref_field_offset (ref);
871 	  off = size_binop (PLUS_EXPR,
872 			    size_binop (MULT_EXPR,
873 					fold_convert (bitsizetype, off),
874 					bitsize_int (BITS_PER_UNIT)),
875 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 	  access_fns.safe_push (off);
877 	}
878       else
879 	/* If we have an unhandled component we could not translate
880 	   to an access function stop analyzing.  We have determined
881 	   our base object in this case.  */
882 	break;
883 
884       ref = TREE_OPERAND (ref, 0);
885     }
886 
887   /* If the address operand of a MEM_REF base has an evolution in the
888      analyzed nest, add it as an additional independent access-function.  */
889   if (TREE_CODE (ref) == MEM_REF)
890     {
891       op = TREE_OPERAND (ref, 0);
892       access_fn = analyze_scalar_evolution (loop, op);
893       access_fn = instantiate_scev (before_loop, loop, access_fn);
894       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
895 	{
896 	  tree orig_type;
897 	  tree memoff = TREE_OPERAND (ref, 1);
898 	  base = initial_condition (access_fn);
899 	  orig_type = TREE_TYPE (base);
900 	  STRIP_USELESS_TYPE_CONVERSION (base);
901 	  split_constant_offset (base, &base, &off);
902 	  /* Fold the MEM_REF offset into the evolutions initial
903 	     value to make more bases comparable.  */
904 	  if (!integer_zerop (memoff))
905 	    {
906 	      off = size_binop (PLUS_EXPR, off,
907 				fold_convert (ssizetype, memoff));
908 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
909 	    }
910 	  access_fn = chrec_replace_initial_condition
911 	      (access_fn, fold_convert (orig_type, off));
912 	  /* ???  This is still not a suitable base object for
913 	     dr_may_alias_p - the base object needs to be an
914 	     access that covers the object as whole.  With
915 	     an evolution in the pointer this cannot be
916 	     guaranteed.
917 	     As a band-aid, mark the access so we can special-case
918 	     it in dr_may_alias_p.  */
919 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
920 				 MEM_REF, TREE_TYPE (ref),
921 				 base, memoff);
922 	  DR_UNCONSTRAINED_BASE (dr) = true;
923 	  access_fns.safe_push (access_fn);
924 	}
925     }
926   else if (DECL_P (ref))
927     {
928       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
929       ref = build2 (MEM_REF, TREE_TYPE (ref),
930 		    build_fold_addr_expr (ref),
931 		    build_int_cst (reference_alias_ptr_type (ref), 0));
932     }
933 
934   DR_BASE_OBJECT (dr) = ref;
935   DR_ACCESS_FNS (dr) = access_fns;
936 }
937 
938 /* Extracts the alias analysis information from the memory reference DR.  */
939 
940 static void
dr_analyze_alias(struct data_reference * dr)941 dr_analyze_alias (struct data_reference *dr)
942 {
943   tree ref = DR_REF (dr);
944   tree base = get_base_address (ref), addr;
945 
946   if (INDIRECT_REF_P (base)
947       || TREE_CODE (base) == MEM_REF)
948     {
949       addr = TREE_OPERAND (base, 0);
950       if (TREE_CODE (addr) == SSA_NAME)
951 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
952     }
953 }
954 
955 /* Frees data reference DR.  */
956 
957 void
free_data_ref(data_reference_p dr)958 free_data_ref (data_reference_p dr)
959 {
960   DR_ACCESS_FNS (dr).release ();
961   free (dr);
962 }
963 
964 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
965    is read if IS_READ is true, write otherwise.  Returns the
966    data_reference description of MEMREF.  NEST is the outermost loop
967    in which the reference should be instantiated, LOOP is the loop in
968    which the data reference should be analyzed.  */
969 
970 struct data_reference *
create_data_ref(loop_p nest,loop_p loop,tree memref,gimple stmt,bool is_read)971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 		 bool is_read)
973 {
974   struct data_reference *dr;
975 
976   if (dump_file && (dump_flags & TDF_DETAILS))
977     {
978       fprintf (dump_file, "Creating dr for ");
979       print_generic_expr (dump_file, memref, TDF_SLIM);
980       fprintf (dump_file, "\n");
981     }
982 
983   dr = XCNEW (struct data_reference);
984   DR_STMT (dr) = stmt;
985   DR_REF (dr) = memref;
986   DR_IS_READ (dr) = is_read;
987 
988   dr_analyze_innermost (dr, nest);
989   dr_analyze_indices (dr, nest, loop);
990   dr_analyze_alias (dr);
991 
992   if (dump_file && (dump_flags & TDF_DETAILS))
993     {
994       unsigned i;
995       fprintf (dump_file, "\tbase_address: ");
996       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997       fprintf (dump_file, "\n\toffset from base address: ");
998       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999       fprintf (dump_file, "\n\tconstant offset from base address: ");
1000       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1001       fprintf (dump_file, "\n\tstep: ");
1002       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1003       fprintf (dump_file, "\n\taligned to: ");
1004       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005       fprintf (dump_file, "\n\tbase_object: ");
1006       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1007       fprintf (dump_file, "\n");
1008       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1009 	{
1010 	  fprintf (dump_file, "\tAccess function %d: ", i);
1011 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1012 	}
1013     }
1014 
1015   return dr;
1016 }
1017 
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019    expressions.  */
1020 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)1021 dr_equal_offsets_p1 (tree offset1, tree offset2)
1022 {
1023   bool res;
1024 
1025   STRIP_NOPS (offset1);
1026   STRIP_NOPS (offset2);
1027 
1028   if (offset1 == offset2)
1029     return true;
1030 
1031   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033     return false;
1034 
1035   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036                              TREE_OPERAND (offset2, 0));
1037 
1038   if (!res || !BINARY_CLASS_P (offset1))
1039     return res;
1040 
1041   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042                              TREE_OPERAND (offset2, 1));
1043 
1044   return res;
1045 }
1046 
1047 /* Check if DRA and DRB have equal offsets.  */
1048 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)1049 dr_equal_offsets_p (struct data_reference *dra,
1050                     struct data_reference *drb)
1051 {
1052   tree offset1, offset2;
1053 
1054   offset1 = DR_OFFSET (dra);
1055   offset2 = DR_OFFSET (drb);
1056 
1057   return dr_equal_offsets_p1 (offset1, offset2);
1058 }
1059 
1060 /* Returns true if FNA == FNB.  */
1061 
1062 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)1063 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1064 {
1065   unsigned i, n = fna.length ();
1066 
1067   if (n != fnb.length ())
1068     return false;
1069 
1070   for (i = 0; i < n; i++)
1071     if (!operand_equal_p (fna[i], fnb[i], 0))
1072       return false;
1073 
1074   return true;
1075 }
1076 
1077 /* If all the functions in CF are the same, returns one of them,
1078    otherwise returns NULL.  */
1079 
1080 static affine_fn
common_affine_function(conflict_function * cf)1081 common_affine_function (conflict_function *cf)
1082 {
1083   unsigned i;
1084   affine_fn comm;
1085 
1086   if (!CF_NONTRIVIAL_P (cf))
1087     return affine_fn();
1088 
1089   comm = cf->fns[0];
1090 
1091   for (i = 1; i < cf->n; i++)
1092     if (!affine_function_equal_p (comm, cf->fns[i]))
1093       return affine_fn();
1094 
1095   return comm;
1096 }
1097 
1098 /* Returns the base of the affine function FN.  */
1099 
1100 static tree
affine_function_base(affine_fn fn)1101 affine_function_base (affine_fn fn)
1102 {
1103   return fn[0];
1104 }
1105 
1106 /* Returns true if FN is a constant.  */
1107 
1108 static bool
affine_function_constant_p(affine_fn fn)1109 affine_function_constant_p (affine_fn fn)
1110 {
1111   unsigned i;
1112   tree coef;
1113 
1114   for (i = 1; fn.iterate (i, &coef); i++)
1115     if (!integer_zerop (coef))
1116       return false;
1117 
1118   return true;
1119 }
1120 
1121 /* Returns true if FN is the zero constant function.  */
1122 
1123 static bool
affine_function_zero_p(affine_fn fn)1124 affine_function_zero_p (affine_fn fn)
1125 {
1126   return (integer_zerop (affine_function_base (fn))
1127 	  && affine_function_constant_p (fn));
1128 }
1129 
1130 /* Returns a signed integer type with the largest precision from TA
1131    and TB.  */
1132 
1133 static tree
signed_type_for_types(tree ta,tree tb)1134 signed_type_for_types (tree ta, tree tb)
1135 {
1136   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1137     return signed_type_for (ta);
1138   else
1139     return signed_type_for (tb);
1140 }
1141 
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1143    result.  */
1144 
1145 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1147 {
1148   unsigned i, n, m;
1149   affine_fn ret;
1150   tree coef;
1151 
1152   if (fnb.length () > fna.length ())
1153     {
1154       n = fna.length ();
1155       m = fnb.length ();
1156     }
1157   else
1158     {
1159       n = fnb.length ();
1160       m = fna.length ();
1161     }
1162 
1163   ret.create (m);
1164   for (i = 0; i < n; i++)
1165     {
1166       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1167 					 TREE_TYPE (fnb[i]));
1168       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1169     }
1170 
1171   for (; fna.iterate (i, &coef); i++)
1172     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 				 coef, integer_zero_node));
1174   for (; fnb.iterate (i, &coef); i++)
1175     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1176 				 integer_zero_node, coef));
1177 
1178   return ret;
1179 }
1180 
1181 /* Returns the sum of affine functions FNA and FNB.  */
1182 
1183 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)1184 affine_fn_plus (affine_fn fna, affine_fn fnb)
1185 {
1186   return affine_fn_op (PLUS_EXPR, fna, fnb);
1187 }
1188 
1189 /* Returns the difference of affine functions FNA and FNB.  */
1190 
1191 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)1192 affine_fn_minus (affine_fn fna, affine_fn fnb)
1193 {
1194   return affine_fn_op (MINUS_EXPR, fna, fnb);
1195 }
1196 
1197 /* Frees affine function FN.  */
1198 
1199 static void
affine_fn_free(affine_fn fn)1200 affine_fn_free (affine_fn fn)
1201 {
1202   fn.release ();
1203 }
1204 
1205 /* Determine for each subscript in the data dependence relation DDR
1206    the distance.  */
1207 
1208 static void
compute_subscript_distance(struct data_dependence_relation * ddr)1209 compute_subscript_distance (struct data_dependence_relation *ddr)
1210 {
1211   conflict_function *cf_a, *cf_b;
1212   affine_fn fn_a, fn_b, diff;
1213 
1214   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1215     {
1216       unsigned int i;
1217 
1218       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1219  	{
1220  	  struct subscript *subscript;
1221 
1222  	  subscript = DDR_SUBSCRIPT (ddr, i);
1223  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
1224  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
1225 
1226 	  fn_a = common_affine_function (cf_a);
1227 	  fn_b = common_affine_function (cf_b);
1228 	  if (!fn_a.exists () || !fn_b.exists ())
1229 	    {
1230 	      SUB_DISTANCE (subscript) = chrec_dont_know;
1231 	      return;
1232 	    }
1233 	  diff = affine_fn_minus (fn_a, fn_b);
1234 
1235  	  if (affine_function_constant_p (diff))
1236  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
1237  	  else
1238  	    SUB_DISTANCE (subscript) = chrec_dont_know;
1239 
1240 	  affine_fn_free (diff);
1241  	}
1242     }
1243 }
1244 
1245 /* Returns the conflict function for "unknown".  */
1246 
1247 static conflict_function *
conflict_fn_not_known(void)1248 conflict_fn_not_known (void)
1249 {
1250   conflict_function *fn = XCNEW (conflict_function);
1251   fn->n = NOT_KNOWN;
1252 
1253   return fn;
1254 }
1255 
1256 /* Returns the conflict function for "independent".  */
1257 
1258 static conflict_function *
conflict_fn_no_dependence(void)1259 conflict_fn_no_dependence (void)
1260 {
1261   conflict_function *fn = XCNEW (conflict_function);
1262   fn->n = NO_DEPENDENCE;
1263 
1264   return fn;
1265 }
1266 
1267 /* Returns true if the address of OBJ is invariant in LOOP.  */
1268 
1269 static bool
object_address_invariant_in_loop_p(const struct loop * loop,const_tree obj)1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1271 {
1272   while (handled_component_p (obj))
1273     {
1274       if (TREE_CODE (obj) == ARRAY_REF)
1275 	{
1276 	  /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 	     need to check the stride and the lower bound of the reference.  */
1278 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1279 						      loop->num)
1280 	      || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1281 							 loop->num))
1282 	    return false;
1283 	}
1284       else if (TREE_CODE (obj) == COMPONENT_REF)
1285 	{
1286 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 						      loop->num))
1288 	    return false;
1289 	}
1290       obj = TREE_OPERAND (obj, 0);
1291     }
1292 
1293   if (!INDIRECT_REF_P (obj)
1294       && TREE_CODE (obj) != MEM_REF)
1295     return true;
1296 
1297   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1298 						  loop->num);
1299 }
1300 
1301 /* Returns false if we can prove that data references A and B do not alias,
1302    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1303    considered.  */
1304 
1305 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,bool loop_nest)1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1307 		bool loop_nest)
1308 {
1309   tree addr_a = DR_BASE_OBJECT (a);
1310   tree addr_b = DR_BASE_OBJECT (b);
1311 
1312   /* If we are not processing a loop nest but scalar code we
1313      do not need to care about possible cross-iteration dependences
1314      and thus can process the full original reference.  Do so,
1315      similar to how loop invariant motion applies extra offset-based
1316      disambiguation.  */
1317   if (!loop_nest)
1318     {
1319       aff_tree off1, off2;
1320       double_int size1, size2;
1321       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1322       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1323       aff_combination_scale (&off1, double_int_minus_one);
1324       aff_combination_add (&off2, &off1);
1325       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1326 	return false;
1327     }
1328 
1329   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1330      do not know the size of the base-object.  So we cannot do any
1331      offset/overlap based analysis but have to rely on points-to
1332      information only.  */
1333   if (TREE_CODE (addr_a) == MEM_REF
1334       && (DR_UNCONSTRAINED_BASE (a)
1335 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
1336     {
1337       /* For true dependences we can apply TBAA.  */
1338       if (flag_strict_aliasing
1339 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
1340 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1341 				     get_alias_set (DR_REF (b))))
1342 	return false;
1343       if (TREE_CODE (addr_b) == MEM_REF)
1344 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1345 				       TREE_OPERAND (addr_b, 0));
1346       else
1347 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1348 				       build_fold_addr_expr (addr_b));
1349     }
1350   else if (TREE_CODE (addr_b) == MEM_REF
1351 	   && (DR_UNCONSTRAINED_BASE (b)
1352 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
1353     {
1354       /* For true dependences we can apply TBAA.  */
1355       if (flag_strict_aliasing
1356 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
1357 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1358 				     get_alias_set (DR_REF (b))))
1359 	return false;
1360       if (TREE_CODE (addr_a) == MEM_REF)
1361 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1362 				       TREE_OPERAND (addr_b, 0));
1363       else
1364 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1365 				       TREE_OPERAND (addr_b, 0));
1366     }
1367 
1368   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1369      that is being subsetted in the loop nest.  */
1370   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1371     return refs_output_dependent_p (addr_a, addr_b);
1372   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1373     return refs_anti_dependent_p (addr_a, addr_b);
1374   return refs_may_alias_p (addr_a, addr_b);
1375 }
1376 
1377 /* Initialize a data dependence relation between data accesses A and
1378    B.  NB_LOOPS is the number of loops surrounding the references: the
1379    size of the classic distance/direction vectors.  */
1380 
1381 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)1382 initialize_data_dependence_relation (struct data_reference *a,
1383 				     struct data_reference *b,
1384  				     vec<loop_p> loop_nest)
1385 {
1386   struct data_dependence_relation *res;
1387   unsigned int i;
1388 
1389   res = XNEW (struct data_dependence_relation);
1390   DDR_A (res) = a;
1391   DDR_B (res) = b;
1392   DDR_LOOP_NEST (res).create (0);
1393   DDR_REVERSED_P (res) = false;
1394   DDR_SUBSCRIPTS (res).create (0);
1395   DDR_DIR_VECTS (res).create (0);
1396   DDR_DIST_VECTS (res).create (0);
1397 
1398   if (a == NULL || b == NULL)
1399     {
1400       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1401       return res;
1402     }
1403 
1404   /* If the data references do not alias, then they are independent.  */
1405   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1406     {
1407       DDR_ARE_DEPENDENT (res) = chrec_known;
1408       return res;
1409     }
1410 
1411   /* The case where the references are exactly the same.  */
1412   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1413     {
1414      if (loop_nest.exists ()
1415         && !object_address_invariant_in_loop_p (loop_nest[0],
1416        					        DR_BASE_OBJECT (a)))
1417       {
1418         DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1419         return res;
1420       }
1421       DDR_AFFINE_P (res) = true;
1422       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1423       DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1424       DDR_LOOP_NEST (res) = loop_nest;
1425       DDR_INNER_LOOP (res) = 0;
1426       DDR_SELF_REFERENCE (res) = true;
1427       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1428        {
1429          struct subscript *subscript;
1430 
1431          subscript = XNEW (struct subscript);
1432          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1433          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1434          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1435          SUB_DISTANCE (subscript) = chrec_dont_know;
1436          DDR_SUBSCRIPTS (res).safe_push (subscript);
1437        }
1438       return res;
1439     }
1440 
1441   /* If the references do not access the same object, we do not know
1442      whether they alias or not.  */
1443   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1444     {
1445       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1446       return res;
1447     }
1448 
1449   /* If the base of the object is not invariant in the loop nest, we cannot
1450      analyze it.  TODO -- in fact, it would suffice to record that there may
1451      be arbitrary dependences in the loops where the base object varies.  */
1452   if (loop_nest.exists ()
1453       && !object_address_invariant_in_loop_p (loop_nest[0],
1454      					      DR_BASE_OBJECT (a)))
1455     {
1456       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1457       return res;
1458     }
1459 
1460   /* If the number of dimensions of the access to not agree we can have
1461      a pointer access to a component of the array element type and an
1462      array access while the base-objects are still the same.  Punt.  */
1463   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1464     {
1465       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1466       return res;
1467     }
1468 
1469   DDR_AFFINE_P (res) = true;
1470   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1471   DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1472   DDR_LOOP_NEST (res) = loop_nest;
1473   DDR_INNER_LOOP (res) = 0;
1474   DDR_SELF_REFERENCE (res) = false;
1475 
1476   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1477     {
1478       struct subscript *subscript;
1479 
1480       subscript = XNEW (struct subscript);
1481       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1482       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1483       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1484       SUB_DISTANCE (subscript) = chrec_dont_know;
1485       DDR_SUBSCRIPTS (res).safe_push (subscript);
1486     }
1487 
1488   return res;
1489 }
1490 
1491 /* Frees memory used by the conflict function F.  */
1492 
1493 static void
free_conflict_function(conflict_function * f)1494 free_conflict_function (conflict_function *f)
1495 {
1496   unsigned i;
1497 
1498   if (CF_NONTRIVIAL_P (f))
1499     {
1500       for (i = 0; i < f->n; i++)
1501 	affine_fn_free (f->fns[i]);
1502     }
1503   free (f);
1504 }
1505 
1506 /* Frees memory used by SUBSCRIPTS.  */
1507 
1508 static void
free_subscripts(vec<subscript_p> subscripts)1509 free_subscripts (vec<subscript_p> subscripts)
1510 {
1511   unsigned i;
1512   subscript_p s;
1513 
1514   FOR_EACH_VEC_ELT (subscripts, i, s)
1515     {
1516       free_conflict_function (s->conflicting_iterations_in_a);
1517       free_conflict_function (s->conflicting_iterations_in_b);
1518       free (s);
1519     }
1520   subscripts.release ();
1521 }
1522 
1523 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1524    description.  */
1525 
1526 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)1527 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1528 			tree chrec)
1529 {
1530   DDR_ARE_DEPENDENT (ddr) = chrec;
1531   free_subscripts (DDR_SUBSCRIPTS (ddr));
1532   DDR_SUBSCRIPTS (ddr).create (0);
1533 }
1534 
1535 /* The dependence relation DDR cannot be represented by a distance
1536    vector.  */
1537 
1538 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)1539 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1540 {
1541   if (dump_file && (dump_flags & TDF_DETAILS))
1542     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1543 
1544   DDR_AFFINE_P (ddr) = false;
1545 }
1546 
1547 
1548 
1549 /* This section contains the classic Banerjee tests.  */
1550 
1551 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1552    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1553 
1554 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)1555 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1556 {
1557   return (evolution_function_is_constant_p (chrec_a)
1558 	  && evolution_function_is_constant_p (chrec_b));
1559 }
1560 
1561 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1562    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1563 
1564 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)1565 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1566 {
1567   if ((evolution_function_is_constant_p (chrec_a)
1568        && evolution_function_is_univariate_p (chrec_b))
1569       || (evolution_function_is_constant_p (chrec_b)
1570 	  && evolution_function_is_univariate_p (chrec_a)))
1571     return true;
1572 
1573   if (evolution_function_is_univariate_p (chrec_a)
1574       && evolution_function_is_univariate_p (chrec_b))
1575     {
1576       switch (TREE_CODE (chrec_a))
1577 	{
1578 	case POLYNOMIAL_CHREC:
1579 	  switch (TREE_CODE (chrec_b))
1580 	    {
1581 	    case POLYNOMIAL_CHREC:
1582 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1583 		return false;
1584 
1585 	    default:
1586 	      return true;
1587 	    }
1588 
1589 	default:
1590 	  return true;
1591 	}
1592     }
1593 
1594   return false;
1595 }
1596 
1597 /* Creates a conflict function with N dimensions.  The affine functions
1598    in each dimension follow.  */
1599 
1600 static conflict_function *
conflict_fn(unsigned n,...)1601 conflict_fn (unsigned n, ...)
1602 {
1603   unsigned i;
1604   conflict_function *ret = XCNEW (conflict_function);
1605   va_list ap;
1606 
1607   gcc_assert (0 < n && n <= MAX_DIM);
1608   va_start(ap, n);
1609 
1610   ret->n = n;
1611   for (i = 0; i < n; i++)
1612     ret->fns[i] = va_arg (ap, affine_fn);
1613   va_end(ap);
1614 
1615   return ret;
1616 }
1617 
1618 /* Returns constant affine function with value CST.  */
1619 
1620 static affine_fn
affine_fn_cst(tree cst)1621 affine_fn_cst (tree cst)
1622 {
1623   affine_fn fn;
1624   fn.create (1);
1625   fn.quick_push (cst);
1626   return fn;
1627 }
1628 
1629 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1630 
1631 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)1632 affine_fn_univar (tree cst, unsigned dim, tree coef)
1633 {
1634   affine_fn fn;
1635   fn.create (dim + 1);
1636   unsigned i;
1637 
1638   gcc_assert (dim > 0);
1639   fn.quick_push (cst);
1640   for (i = 1; i < dim; i++)
1641     fn.quick_push (integer_zero_node);
1642   fn.quick_push (coef);
1643   return fn;
1644 }
1645 
1646 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1647    *OVERLAPS_B are initialized to the functions that describe the
1648    relation between the elements accessed twice by CHREC_A and
1649    CHREC_B.  For k >= 0, the following property is verified:
1650 
1651    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1652 
1653 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1654 analyze_ziv_subscript (tree chrec_a,
1655 		       tree chrec_b,
1656 		       conflict_function **overlaps_a,
1657 		       conflict_function **overlaps_b,
1658 		       tree *last_conflicts)
1659 {
1660   tree type, difference;
1661   dependence_stats.num_ziv++;
1662 
1663   if (dump_file && (dump_flags & TDF_DETAILS))
1664     fprintf (dump_file, "(analyze_ziv_subscript \n");
1665 
1666   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1667   chrec_a = chrec_convert (type, chrec_a, NULL);
1668   chrec_b = chrec_convert (type, chrec_b, NULL);
1669   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1670 
1671   switch (TREE_CODE (difference))
1672     {
1673     case INTEGER_CST:
1674       if (integer_zerop (difference))
1675 	{
1676 	  /* The difference is equal to zero: the accessed index
1677 	     overlaps for each iteration in the loop.  */
1678 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1679 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1680 	  *last_conflicts = chrec_dont_know;
1681 	  dependence_stats.num_ziv_dependent++;
1682 	}
1683       else
1684 	{
1685 	  /* The accesses do not overlap.  */
1686 	  *overlaps_a = conflict_fn_no_dependence ();
1687 	  *overlaps_b = conflict_fn_no_dependence ();
1688 	  *last_conflicts = integer_zero_node;
1689 	  dependence_stats.num_ziv_independent++;
1690 	}
1691       break;
1692 
1693     default:
1694       /* We're not sure whether the indexes overlap.  For the moment,
1695 	 conservatively answer "don't know".  */
1696       if (dump_file && (dump_flags & TDF_DETAILS))
1697 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1698 
1699       *overlaps_a = conflict_fn_not_known ();
1700       *overlaps_b = conflict_fn_not_known ();
1701       *last_conflicts = chrec_dont_know;
1702       dependence_stats.num_ziv_unimplemented++;
1703       break;
1704     }
1705 
1706   if (dump_file && (dump_flags & TDF_DETAILS))
1707     fprintf (dump_file, ")\n");
1708 }
1709 
1710 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1711    and only if it fits to the int type.  If this is not the case, or the
1712    bound  on the number of iterations of LOOP could not be derived, returns
1713    chrec_dont_know.  */
1714 
1715 static tree
max_stmt_executions_tree(struct loop * loop)1716 max_stmt_executions_tree (struct loop *loop)
1717 {
1718   double_int nit;
1719 
1720   if (!max_stmt_executions (loop, &nit))
1721     return chrec_dont_know;
1722 
1723   if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1724     return chrec_dont_know;
1725 
1726   return double_int_to_tree (unsigned_type_node, nit);
1727 }
1728 
1729 /* Determine whether the CHREC is always positive/negative.  If the expression
1730    cannot be statically analyzed, return false, otherwise set the answer into
1731    VALUE.  */
1732 
1733 static bool
chrec_is_positive(tree chrec,bool * value)1734 chrec_is_positive (tree chrec, bool *value)
1735 {
1736   bool value0, value1, value2;
1737   tree end_value, nb_iter;
1738 
1739   switch (TREE_CODE (chrec))
1740     {
1741     case POLYNOMIAL_CHREC:
1742       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1743 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1744 	return false;
1745 
1746       /* FIXME -- overflows.  */
1747       if (value0 == value1)
1748 	{
1749 	  *value = value0;
1750 	  return true;
1751 	}
1752 
1753       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1754 	 and the proof consists in showing that the sign never
1755 	 changes during the execution of the loop, from 0 to
1756 	 loop->nb_iterations.  */
1757       if (!evolution_function_is_affine_p (chrec))
1758 	return false;
1759 
1760       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1761       if (chrec_contains_undetermined (nb_iter))
1762 	return false;
1763 
1764 #if 0
1765       /* TODO -- If the test is after the exit, we may decrease the number of
1766 	 iterations by one.  */
1767       if (after_exit)
1768 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1769 #endif
1770 
1771       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1772 
1773       if (!chrec_is_positive (end_value, &value2))
1774 	return false;
1775 
1776       *value = value0;
1777       return value0 == value1;
1778 
1779     case INTEGER_CST:
1780       switch (tree_int_cst_sgn (chrec))
1781 	{
1782 	case -1:
1783 	  *value = false;
1784 	  break;
1785 	case 1:
1786 	  *value = true;
1787 	  break;
1788 	default:
1789 	  return false;
1790 	}
1791       return true;
1792 
1793     default:
1794       return false;
1795     }
1796 }
1797 
1798 
1799 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1800    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1801    *OVERLAPS_B are initialized to the functions that describe the
1802    relation between the elements accessed twice by CHREC_A and
1803    CHREC_B.  For k >= 0, the following property is verified:
1804 
1805    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1806 
1807 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)1808 analyze_siv_subscript_cst_affine (tree chrec_a,
1809 				  tree chrec_b,
1810 				  conflict_function **overlaps_a,
1811 				  conflict_function **overlaps_b,
1812 				  tree *last_conflicts)
1813 {
1814   bool value0, value1, value2;
1815   tree type, difference, tmp;
1816 
1817   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1818   chrec_a = chrec_convert (type, chrec_a, NULL);
1819   chrec_b = chrec_convert (type, chrec_b, NULL);
1820   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1821 
1822   /* Special case overlap in the first iteration.  */
1823   if (integer_zerop (difference))
1824     {
1825       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1826       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1827       *last_conflicts = integer_one_node;
1828       return;
1829     }
1830 
1831   if (!chrec_is_positive (initial_condition (difference), &value0))
1832     {
1833       if (dump_file && (dump_flags & TDF_DETAILS))
1834 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1835 
1836       dependence_stats.num_siv_unimplemented++;
1837       *overlaps_a = conflict_fn_not_known ();
1838       *overlaps_b = conflict_fn_not_known ();
1839       *last_conflicts = chrec_dont_know;
1840       return;
1841     }
1842   else
1843     {
1844       if (value0 == false)
1845 	{
1846 	  if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1847 	    {
1848 	      if (dump_file && (dump_flags & TDF_DETAILS))
1849 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1850 
1851 	      *overlaps_a = conflict_fn_not_known ();
1852 	      *overlaps_b = conflict_fn_not_known ();
1853 	      *last_conflicts = chrec_dont_know;
1854 	      dependence_stats.num_siv_unimplemented++;
1855 	      return;
1856 	    }
1857 	  else
1858 	    {
1859 	      if (value1 == true)
1860 		{
1861 		  /* Example:
1862 		     chrec_a = 12
1863 		     chrec_b = {10, +, 1}
1864 		  */
1865 
1866 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1867 		    {
1868 		      HOST_WIDE_INT numiter;
1869 		      struct loop *loop = get_chrec_loop (chrec_b);
1870 
1871 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1872 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
1873 					 fold_build1 (ABS_EXPR, type, difference),
1874 					 CHREC_RIGHT (chrec_b));
1875 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1876 		      *last_conflicts = integer_one_node;
1877 
1878 
1879 		      /* Perform weak-zero siv test to see if overlap is
1880 			 outside the loop bounds.  */
1881 		      numiter = max_stmt_executions_int (loop);
1882 
1883 		      if (numiter >= 0
1884 			  && compare_tree_int (tmp, numiter) > 0)
1885 			{
1886 			  free_conflict_function (*overlaps_a);
1887 			  free_conflict_function (*overlaps_b);
1888 			  *overlaps_a = conflict_fn_no_dependence ();
1889 			  *overlaps_b = conflict_fn_no_dependence ();
1890 			  *last_conflicts = integer_zero_node;
1891 			  dependence_stats.num_siv_independent++;
1892 			  return;
1893 			}
1894 		      dependence_stats.num_siv_dependent++;
1895 		      return;
1896 		    }
1897 
1898 		  /* When the step does not divide the difference, there are
1899 		     no overlaps.  */
1900 		  else
1901 		    {
1902 		      *overlaps_a = conflict_fn_no_dependence ();
1903 		      *overlaps_b = conflict_fn_no_dependence ();
1904 		      *last_conflicts = integer_zero_node;
1905 		      dependence_stats.num_siv_independent++;
1906 		      return;
1907 		    }
1908 		}
1909 
1910 	      else
1911 		{
1912 		  /* Example:
1913 		     chrec_a = 12
1914 		     chrec_b = {10, +, -1}
1915 
1916 		     In this case, chrec_a will not overlap with chrec_b.  */
1917 		  *overlaps_a = conflict_fn_no_dependence ();
1918 		  *overlaps_b = conflict_fn_no_dependence ();
1919 		  *last_conflicts = integer_zero_node;
1920 		  dependence_stats.num_siv_independent++;
1921 		  return;
1922 		}
1923 	    }
1924 	}
1925       else
1926 	{
1927 	  if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1928 	    {
1929 	      if (dump_file && (dump_flags & TDF_DETAILS))
1930 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
1931 
1932 	      *overlaps_a = conflict_fn_not_known ();
1933 	      *overlaps_b = conflict_fn_not_known ();
1934 	      *last_conflicts = chrec_dont_know;
1935 	      dependence_stats.num_siv_unimplemented++;
1936 	      return;
1937 	    }
1938 	  else
1939 	    {
1940 	      if (value2 == false)
1941 		{
1942 		  /* Example:
1943 		     chrec_a = 3
1944 		     chrec_b = {10, +, -1}
1945 		  */
1946 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1947 		    {
1948 		      HOST_WIDE_INT numiter;
1949 		      struct loop *loop = get_chrec_loop (chrec_b);
1950 
1951 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1952 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1953 					 CHREC_RIGHT (chrec_b));
1954 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1955 		      *last_conflicts = integer_one_node;
1956 
1957 		      /* Perform weak-zero siv test to see if overlap is
1958 			 outside the loop bounds.  */
1959 		      numiter = max_stmt_executions_int (loop);
1960 
1961 		      if (numiter >= 0
1962 			  && compare_tree_int (tmp, numiter) > 0)
1963 			{
1964 			  free_conflict_function (*overlaps_a);
1965 			  free_conflict_function (*overlaps_b);
1966 			  *overlaps_a = conflict_fn_no_dependence ();
1967 			  *overlaps_b = conflict_fn_no_dependence ();
1968 			  *last_conflicts = integer_zero_node;
1969 			  dependence_stats.num_siv_independent++;
1970 			  return;
1971 			}
1972 		      dependence_stats.num_siv_dependent++;
1973 		      return;
1974 		    }
1975 
1976 		  /* When the step does not divide the difference, there
1977 		     are no overlaps.  */
1978 		  else
1979 		    {
1980 		      *overlaps_a = conflict_fn_no_dependence ();
1981 		      *overlaps_b = conflict_fn_no_dependence ();
1982 		      *last_conflicts = integer_zero_node;
1983 		      dependence_stats.num_siv_independent++;
1984 		      return;
1985 		    }
1986 		}
1987 	      else
1988 		{
1989 		  /* Example:
1990 		     chrec_a = 3
1991 		     chrec_b = {4, +, 1}
1992 
1993 		     In this case, chrec_a will not overlap with chrec_b.  */
1994 		  *overlaps_a = conflict_fn_no_dependence ();
1995 		  *overlaps_b = conflict_fn_no_dependence ();
1996 		  *last_conflicts = integer_zero_node;
1997 		  dependence_stats.num_siv_independent++;
1998 		  return;
1999 		}
2000 	    }
2001 	}
2002     }
2003 }
2004 
2005 /* Helper recursive function for initializing the matrix A.  Returns
2006    the initial value of CHREC.  */
2007 
2008 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)2009 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2010 {
2011   gcc_assert (chrec);
2012 
2013   switch (TREE_CODE (chrec))
2014     {
2015     case POLYNOMIAL_CHREC:
2016       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2017 
2018       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2019       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2020 
2021     case PLUS_EXPR:
2022     case MULT_EXPR:
2023     case MINUS_EXPR:
2024       {
2025 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2026 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2027 
2028 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2029       }
2030 
2031     case NOP_EXPR:
2032       {
2033 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2034 	return chrec_convert (chrec_type (chrec), op, NULL);
2035       }
2036 
2037     case BIT_NOT_EXPR:
2038       {
2039 	/* Handle ~X as -1 - X.  */
2040 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2041 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2042 			      build_int_cst (TREE_TYPE (chrec), -1), op);
2043       }
2044 
2045     case INTEGER_CST:
2046       return chrec;
2047 
2048     default:
2049       gcc_unreachable ();
2050       return NULL_TREE;
2051     }
2052 }
2053 
2054 #define FLOOR_DIV(x,y) ((x) / (y))
2055 
2056 /* Solves the special case of the Diophantine equation:
2057    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2058 
2059    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2060    number of iterations that loops X and Y run.  The overlaps will be
2061    constructed as evolutions in dimension DIM.  */
2062 
2063 static void
compute_overlap_steps_for_affine_univar(int niter,int step_a,int step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)2064 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2065 					 affine_fn *overlaps_a,
2066 					 affine_fn *overlaps_b,
2067 					 tree *last_conflicts, int dim)
2068 {
2069   if (((step_a > 0 && step_b > 0)
2070        || (step_a < 0 && step_b < 0)))
2071     {
2072       int step_overlaps_a, step_overlaps_b;
2073       int gcd_steps_a_b, last_conflict, tau2;
2074 
2075       gcd_steps_a_b = gcd (step_a, step_b);
2076       step_overlaps_a = step_b / gcd_steps_a_b;
2077       step_overlaps_b = step_a / gcd_steps_a_b;
2078 
2079       if (niter > 0)
2080 	{
2081 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
2082 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2083 	  last_conflict = tau2;
2084 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2085 	}
2086       else
2087 	*last_conflicts = chrec_dont_know;
2088 
2089       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2090 				      build_int_cst (NULL_TREE,
2091 						     step_overlaps_a));
2092       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2093 				      build_int_cst (NULL_TREE,
2094 						     step_overlaps_b));
2095     }
2096 
2097   else
2098     {
2099       *overlaps_a = affine_fn_cst (integer_zero_node);
2100       *overlaps_b = affine_fn_cst (integer_zero_node);
2101       *last_conflicts = integer_zero_node;
2102     }
2103 }
2104 
2105 /* Solves the special case of a Diophantine equation where CHREC_A is
2106    an affine bivariate function, and CHREC_B is an affine univariate
2107    function.  For example,
2108 
2109    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2110 
2111    has the following overlapping functions:
2112 
2113    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2114    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2115    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2116 
2117    FORNOW: This is a specialized implementation for a case occurring in
2118    a common benchmark.  Implement the general algorithm.  */
2119 
2120 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2121 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2122 				      conflict_function **overlaps_a,
2123 				      conflict_function **overlaps_b,
2124 				      tree *last_conflicts)
2125 {
2126   bool xz_p, yz_p, xyz_p;
2127   int step_x, step_y, step_z;
2128   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2129   affine_fn overlaps_a_xz, overlaps_b_xz;
2130   affine_fn overlaps_a_yz, overlaps_b_yz;
2131   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2132   affine_fn ova1, ova2, ovb;
2133   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2134 
2135   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2136   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2137   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2138 
2139   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2140   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2141   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2142 
2143   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2144     {
2145       if (dump_file && (dump_flags & TDF_DETAILS))
2146 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2147 
2148       *overlaps_a = conflict_fn_not_known ();
2149       *overlaps_b = conflict_fn_not_known ();
2150       *last_conflicts = chrec_dont_know;
2151       return;
2152     }
2153 
2154   niter = MIN (niter_x, niter_z);
2155   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2156 					   &overlaps_a_xz,
2157 					   &overlaps_b_xz,
2158 					   &last_conflicts_xz, 1);
2159   niter = MIN (niter_y, niter_z);
2160   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2161 					   &overlaps_a_yz,
2162 					   &overlaps_b_yz,
2163 					   &last_conflicts_yz, 2);
2164   niter = MIN (niter_x, niter_z);
2165   niter = MIN (niter_y, niter);
2166   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2167 					   &overlaps_a_xyz,
2168 					   &overlaps_b_xyz,
2169 					   &last_conflicts_xyz, 3);
2170 
2171   xz_p = !integer_zerop (last_conflicts_xz);
2172   yz_p = !integer_zerop (last_conflicts_yz);
2173   xyz_p = !integer_zerop (last_conflicts_xyz);
2174 
2175   if (xz_p || yz_p || xyz_p)
2176     {
2177       ova1 = affine_fn_cst (integer_zero_node);
2178       ova2 = affine_fn_cst (integer_zero_node);
2179       ovb = affine_fn_cst (integer_zero_node);
2180       if (xz_p)
2181 	{
2182 	  affine_fn t0 = ova1;
2183 	  affine_fn t2 = ovb;
2184 
2185 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2186 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
2187 	  affine_fn_free (t0);
2188 	  affine_fn_free (t2);
2189 	  *last_conflicts = last_conflicts_xz;
2190 	}
2191       if (yz_p)
2192 	{
2193 	  affine_fn t0 = ova2;
2194 	  affine_fn t2 = ovb;
2195 
2196 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2197 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
2198 	  affine_fn_free (t0);
2199 	  affine_fn_free (t2);
2200 	  *last_conflicts = last_conflicts_yz;
2201 	}
2202       if (xyz_p)
2203 	{
2204 	  affine_fn t0 = ova1;
2205 	  affine_fn t2 = ova2;
2206 	  affine_fn t4 = ovb;
2207 
2208 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2209 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2210 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2211 	  affine_fn_free (t0);
2212 	  affine_fn_free (t2);
2213 	  affine_fn_free (t4);
2214 	  *last_conflicts = last_conflicts_xyz;
2215 	}
2216       *overlaps_a = conflict_fn (2, ova1, ova2);
2217       *overlaps_b = conflict_fn (1, ovb);
2218     }
2219   else
2220     {
2221       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2222       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2223       *last_conflicts = integer_zero_node;
2224     }
2225 
2226   affine_fn_free (overlaps_a_xz);
2227   affine_fn_free (overlaps_b_xz);
2228   affine_fn_free (overlaps_a_yz);
2229   affine_fn_free (overlaps_b_yz);
2230   affine_fn_free (overlaps_a_xyz);
2231   affine_fn_free (overlaps_b_xyz);
2232 }
2233 
2234 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2235 
2236 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)2237 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2238 		    int size)
2239 {
2240   memcpy (vec2, vec1, size * sizeof (*vec1));
2241 }
2242 
2243 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2244 
2245 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)2246 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2247 		    int m, int n)
2248 {
2249   int i;
2250 
2251   for (i = 0; i < m; i++)
2252     lambda_vector_copy (mat1[i], mat2[i], n);
2253 }
2254 
2255 /* Store the N x N identity matrix in MAT.  */
2256 
2257 static void
lambda_matrix_id(lambda_matrix mat,int size)2258 lambda_matrix_id (lambda_matrix mat, int size)
2259 {
2260   int i, j;
2261 
2262   for (i = 0; i < size; i++)
2263     for (j = 0; j < size; j++)
2264       mat[i][j] = (i == j) ? 1 : 0;
2265 }
2266 
2267 /* Return the first nonzero element of vector VEC1 between START and N.
2268    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2269 
2270 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)2271 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2272 {
2273   int j = start;
2274   while (j < n && vec1[j] == 0)
2275     j++;
2276   return j;
2277 }
2278 
2279 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2280    R2 = R2 + CONST1 * R1.  */
2281 
2282 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,int const1)2283 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2284 {
2285   int i;
2286 
2287   if (const1 == 0)
2288     return;
2289 
2290   for (i = 0; i < n; i++)
2291     mat[r2][i] += const1 * mat[r1][i];
2292 }
2293 
2294 /* Swap rows R1 and R2 in matrix MAT.  */
2295 
2296 static void
lambda_matrix_row_exchange(lambda_matrix mat,int r1,int r2)2297 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2298 {
2299   lambda_vector row;
2300 
2301   row = mat[r1];
2302   mat[r1] = mat[r2];
2303   mat[r2] = row;
2304 }
2305 
2306 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2307    and store the result in VEC2.  */
2308 
2309 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,int const1)2310 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2311 			  int size, int const1)
2312 {
2313   int i;
2314 
2315   if (const1 == 0)
2316     lambda_vector_clear (vec2, size);
2317   else
2318     for (i = 0; i < size; i++)
2319       vec2[i] = const1 * vec1[i];
2320 }
2321 
2322 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2323 
2324 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)2325 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2326 		      int size)
2327 {
2328   lambda_vector_mult_const (vec1, vec2, size, -1);
2329 }
2330 
2331 /* Negate row R1 of matrix MAT which has N columns.  */
2332 
2333 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)2334 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2335 {
2336   lambda_vector_negate (mat[r1], mat[r1], n);
2337 }
2338 
2339 /* Return true if two vectors are equal.  */
2340 
2341 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)2342 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2343 {
2344   int i;
2345   for (i = 0; i < size; i++)
2346     if (vec1[i] != vec2[i])
2347       return false;
2348   return true;
2349 }
2350 
2351 /* Given an M x N integer matrix A, this function determines an M x
2352    M unimodular matrix U, and an M x N echelon matrix S such that
2353    "U.A = S".  This decomposition is also known as "right Hermite".
2354 
2355    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2356    Restructuring Compilers" Utpal Banerjee.  */
2357 
2358 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)2359 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2360 			     lambda_matrix S, lambda_matrix U)
2361 {
2362   int i, j, i0 = 0;
2363 
2364   lambda_matrix_copy (A, S, m, n);
2365   lambda_matrix_id (U, m);
2366 
2367   for (j = 0; j < n; j++)
2368     {
2369       if (lambda_vector_first_nz (S[j], m, i0) < m)
2370 	{
2371 	  ++i0;
2372 	  for (i = m - 1; i >= i0; i--)
2373 	    {
2374 	      while (S[i][j] != 0)
2375 		{
2376 		  int sigma, factor, a, b;
2377 
2378 		  a = S[i-1][j];
2379 		  b = S[i][j];
2380 		  sigma = (a * b < 0) ? -1: 1;
2381 		  a = abs (a);
2382 		  b = abs (b);
2383 		  factor = sigma * (a / b);
2384 
2385 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
2386 		  lambda_matrix_row_exchange (S, i, i-1);
2387 
2388 		  lambda_matrix_row_add (U, m, i, i-1, -factor);
2389 		  lambda_matrix_row_exchange (U, i, i-1);
2390 		}
2391 	    }
2392 	}
2393     }
2394 }
2395 
2396 /* Determines the overlapping elements due to accesses CHREC_A and
2397    CHREC_B, that are affine functions.  This function cannot handle
2398    symbolic evolution functions, ie. when initial conditions are
2399    parameters, because it uses lambda matrices of integers.  */
2400 
2401 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2402 analyze_subscript_affine_affine (tree chrec_a,
2403 				 tree chrec_b,
2404 				 conflict_function **overlaps_a,
2405 				 conflict_function **overlaps_b,
2406 				 tree *last_conflicts)
2407 {
2408   unsigned nb_vars_a, nb_vars_b, dim;
2409   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2410   lambda_matrix A, U, S;
2411   struct obstack scratch_obstack;
2412 
2413   if (eq_evolutions_p (chrec_a, chrec_b))
2414     {
2415       /* The accessed index overlaps for each iteration in the
2416 	 loop.  */
2417       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2418       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2419       *last_conflicts = chrec_dont_know;
2420       return;
2421     }
2422   if (dump_file && (dump_flags & TDF_DETAILS))
2423     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2424 
2425   /* For determining the initial intersection, we have to solve a
2426      Diophantine equation.  This is the most time consuming part.
2427 
2428      For answering to the question: "Is there a dependence?" we have
2429      to prove that there exists a solution to the Diophantine
2430      equation, and that the solution is in the iteration domain,
2431      i.e. the solution is positive or zero, and that the solution
2432      happens before the upper bound loop.nb_iterations.  Otherwise
2433      there is no dependence.  This function outputs a description of
2434      the iterations that hold the intersections.  */
2435 
2436   nb_vars_a = nb_vars_in_chrec (chrec_a);
2437   nb_vars_b = nb_vars_in_chrec (chrec_b);
2438 
2439   gcc_obstack_init (&scratch_obstack);
2440 
2441   dim = nb_vars_a + nb_vars_b;
2442   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2443   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2444   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2445 
2446   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2447   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2448   gamma = init_b - init_a;
2449 
2450   /* Don't do all the hard work of solving the Diophantine equation
2451      when we already know the solution: for example,
2452      | {3, +, 1}_1
2453      | {3, +, 4}_2
2454      | gamma = 3 - 3 = 0.
2455      Then the first overlap occurs during the first iterations:
2456      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2457   */
2458   if (gamma == 0)
2459     {
2460       if (nb_vars_a == 1 && nb_vars_b == 1)
2461 	{
2462 	  HOST_WIDE_INT step_a, step_b;
2463 	  HOST_WIDE_INT niter, niter_a, niter_b;
2464 	  affine_fn ova, ovb;
2465 
2466 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2467 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2468 	  niter = MIN (niter_a, niter_b);
2469 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2470 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2471 
2472 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2473 						   &ova, &ovb,
2474 						   last_conflicts, 1);
2475 	  *overlaps_a = conflict_fn (1, ova);
2476 	  *overlaps_b = conflict_fn (1, ovb);
2477 	}
2478 
2479       else if (nb_vars_a == 2 && nb_vars_b == 1)
2480 	compute_overlap_steps_for_affine_1_2
2481 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2482 
2483       else if (nb_vars_a == 1 && nb_vars_b == 2)
2484 	compute_overlap_steps_for_affine_1_2
2485 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2486 
2487       else
2488 	{
2489 	  if (dump_file && (dump_flags & TDF_DETAILS))
2490 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2491 	  *overlaps_a = conflict_fn_not_known ();
2492 	  *overlaps_b = conflict_fn_not_known ();
2493 	  *last_conflicts = chrec_dont_know;
2494 	}
2495       goto end_analyze_subs_aa;
2496     }
2497 
2498   /* U.A = S */
2499   lambda_matrix_right_hermite (A, dim, 1, S, U);
2500 
2501   if (S[0][0] < 0)
2502     {
2503       S[0][0] *= -1;
2504       lambda_matrix_row_negate (U, dim, 0);
2505     }
2506   gcd_alpha_beta = S[0][0];
2507 
2508   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2509      but that is a quite strange case.  Instead of ICEing, answer
2510      don't know.  */
2511   if (gcd_alpha_beta == 0)
2512     {
2513       *overlaps_a = conflict_fn_not_known ();
2514       *overlaps_b = conflict_fn_not_known ();
2515       *last_conflicts = chrec_dont_know;
2516       goto end_analyze_subs_aa;
2517     }
2518 
2519   /* The classic "gcd-test".  */
2520   if (!int_divides_p (gcd_alpha_beta, gamma))
2521     {
2522       /* The "gcd-test" has determined that there is no integer
2523 	 solution, i.e. there is no dependence.  */
2524       *overlaps_a = conflict_fn_no_dependence ();
2525       *overlaps_b = conflict_fn_no_dependence ();
2526       *last_conflicts = integer_zero_node;
2527     }
2528 
2529   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2530   else if (nb_vars_a == 1 && nb_vars_b == 1)
2531     {
2532       /* Both functions should have the same evolution sign.  */
2533       if (((A[0][0] > 0 && -A[1][0] > 0)
2534 	   || (A[0][0] < 0 && -A[1][0] < 0)))
2535 	{
2536 	  /* The solutions are given by:
2537 	     |
2538 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2539 	     |                           [u21 u22]    [y0]
2540 
2541 	     For a given integer t.  Using the following variables,
2542 
2543 	     | i0 = u11 * gamma / gcd_alpha_beta
2544 	     | j0 = u12 * gamma / gcd_alpha_beta
2545 	     | i1 = u21
2546 	     | j1 = u22
2547 
2548 	     the solutions are:
2549 
2550 	     | x0 = i0 + i1 * t,
2551 	     | y0 = j0 + j1 * t.  */
2552       	  HOST_WIDE_INT i0, j0, i1, j1;
2553 
2554 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
2555 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
2556 	  i1 = U[1][0];
2557 	  j1 = U[1][1];
2558 
2559 	  if ((i1 == 0 && i0 < 0)
2560 	      || (j1 == 0 && j0 < 0))
2561 	    {
2562 	      /* There is no solution.
2563 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2564 		 falls in here, but for the moment we don't look at the
2565 		 upper bound of the iteration domain.  */
2566 	      *overlaps_a = conflict_fn_no_dependence ();
2567 	      *overlaps_b = conflict_fn_no_dependence ();
2568 	      *last_conflicts = integer_zero_node;
2569 	      goto end_analyze_subs_aa;
2570 	    }
2571 
2572 	  if (i1 > 0 && j1 > 0)
2573 	    {
2574 	      HOST_WIDE_INT niter_a
2575 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
2576 	      HOST_WIDE_INT niter_b
2577 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
2578 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2579 
2580 	      /* (X0, Y0) is a solution of the Diophantine equation:
2581 		 "chrec_a (X0) = chrec_b (Y0)".  */
2582 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2583 					CEIL (-j0, j1));
2584 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
2585 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
2586 
2587 	      /* (X1, Y1) is the smallest positive solution of the eq
2588 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2589 		 first conflict occurs.  */
2590 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2591 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2592 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2593 
2594 	      if (niter > 0)
2595 		{
2596 		  HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2597 					    FLOOR_DIV (niter - j0, j1));
2598 		  HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2599 
2600 		  /* If the overlap occurs outside of the bounds of the
2601 		     loop, there is no dependence.  */
2602 		  if (x1 >= niter || y1 >= niter)
2603 		    {
2604 		      *overlaps_a = conflict_fn_no_dependence ();
2605 		      *overlaps_b = conflict_fn_no_dependence ();
2606 		      *last_conflicts = integer_zero_node;
2607 		      goto end_analyze_subs_aa;
2608 		    }
2609 		  else
2610 		    *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2611 		}
2612 	      else
2613 		*last_conflicts = chrec_dont_know;
2614 
2615 	      *overlaps_a
2616 		= conflict_fn (1,
2617 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
2618 						 1,
2619 						 build_int_cst (NULL_TREE, i1)));
2620 	      *overlaps_b
2621 		= conflict_fn (1,
2622 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
2623 						 1,
2624 						 build_int_cst (NULL_TREE, j1)));
2625 	    }
2626 	  else
2627 	    {
2628 	      /* FIXME: For the moment, the upper bound of the
2629 		 iteration domain for i and j is not checked.  */
2630 	      if (dump_file && (dump_flags & TDF_DETAILS))
2631 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2632 	      *overlaps_a = conflict_fn_not_known ();
2633 	      *overlaps_b = conflict_fn_not_known ();
2634 	      *last_conflicts = chrec_dont_know;
2635 	    }
2636 	}
2637       else
2638 	{
2639 	  if (dump_file && (dump_flags & TDF_DETAILS))
2640 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2641 	  *overlaps_a = conflict_fn_not_known ();
2642 	  *overlaps_b = conflict_fn_not_known ();
2643 	  *last_conflicts = chrec_dont_know;
2644 	}
2645     }
2646   else
2647     {
2648       if (dump_file && (dump_flags & TDF_DETAILS))
2649 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2650       *overlaps_a = conflict_fn_not_known ();
2651       *overlaps_b = conflict_fn_not_known ();
2652       *last_conflicts = chrec_dont_know;
2653     }
2654 
2655 end_analyze_subs_aa:
2656   obstack_free (&scratch_obstack, NULL);
2657   if (dump_file && (dump_flags & TDF_DETAILS))
2658     {
2659       fprintf (dump_file, "  (overlaps_a = ");
2660       dump_conflict_function (dump_file, *overlaps_a);
2661       fprintf (dump_file, ")\n  (overlaps_b = ");
2662       dump_conflict_function (dump_file, *overlaps_b);
2663       fprintf (dump_file, "))\n");
2664     }
2665 }
2666 
2667 /* Returns true when analyze_subscript_affine_affine can be used for
2668    determining the dependence relation between chrec_a and chrec_b,
2669    that contain symbols.  This function modifies chrec_a and chrec_b
2670    such that the analysis result is the same, and such that they don't
2671    contain symbols, and then can safely be passed to the analyzer.
2672 
2673    Example: The analysis of the following tuples of evolutions produce
2674    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2675    vs. {0, +, 1}_1
2676 
2677    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2678    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2679 */
2680 
2681 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)2682 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2683 {
2684   tree diff, type, left_a, left_b, right_b;
2685 
2686   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2687       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2688     /* FIXME: For the moment not handled.  Might be refined later.  */
2689     return false;
2690 
2691   type = chrec_type (*chrec_a);
2692   left_a = CHREC_LEFT (*chrec_a);
2693   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2694   diff = chrec_fold_minus (type, left_a, left_b);
2695 
2696   if (!evolution_function_is_constant_p (diff))
2697     return false;
2698 
2699   if (dump_file && (dump_flags & TDF_DETAILS))
2700     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2701 
2702   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2703 				     diff, CHREC_RIGHT (*chrec_a));
2704   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2705   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2706 				     build_int_cst (type, 0),
2707 				     right_b);
2708   return true;
2709 }
2710 
2711 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2712    *OVERLAPS_B are initialized to the functions that describe the
2713    relation between the elements accessed twice by CHREC_A and
2714    CHREC_B.  For k >= 0, the following property is verified:
2715 
2716    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2717 
2718 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)2719 analyze_siv_subscript (tree chrec_a,
2720 		       tree chrec_b,
2721 		       conflict_function **overlaps_a,
2722 		       conflict_function **overlaps_b,
2723 		       tree *last_conflicts,
2724 		       int loop_nest_num)
2725 {
2726   dependence_stats.num_siv++;
2727 
2728   if (dump_file && (dump_flags & TDF_DETAILS))
2729     fprintf (dump_file, "(analyze_siv_subscript \n");
2730 
2731   if (evolution_function_is_constant_p (chrec_a)
2732       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2733     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2734 				      overlaps_a, overlaps_b, last_conflicts);
2735 
2736   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2737 	   && evolution_function_is_constant_p (chrec_b))
2738     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2739 				      overlaps_b, overlaps_a, last_conflicts);
2740 
2741   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2742 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2743     {
2744       if (!chrec_contains_symbols (chrec_a)
2745 	  && !chrec_contains_symbols (chrec_b))
2746 	{
2747 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
2748 					   overlaps_a, overlaps_b,
2749 					   last_conflicts);
2750 
2751 	  if (CF_NOT_KNOWN_P (*overlaps_a)
2752 	      || CF_NOT_KNOWN_P (*overlaps_b))
2753 	    dependence_stats.num_siv_unimplemented++;
2754 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2755 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
2756 	    dependence_stats.num_siv_independent++;
2757 	  else
2758 	    dependence_stats.num_siv_dependent++;
2759 	}
2760       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2761 							&chrec_b))
2762 	{
2763 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
2764 					   overlaps_a, overlaps_b,
2765 					   last_conflicts);
2766 
2767 	  if (CF_NOT_KNOWN_P (*overlaps_a)
2768 	      || CF_NOT_KNOWN_P (*overlaps_b))
2769 	    dependence_stats.num_siv_unimplemented++;
2770 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2771 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
2772 	    dependence_stats.num_siv_independent++;
2773 	  else
2774 	    dependence_stats.num_siv_dependent++;
2775 	}
2776       else
2777 	goto siv_subscript_dontknow;
2778     }
2779 
2780   else
2781     {
2782     siv_subscript_dontknow:;
2783       if (dump_file && (dump_flags & TDF_DETAILS))
2784 	fprintf (dump_file, "  siv test failed: unimplemented");
2785       *overlaps_a = conflict_fn_not_known ();
2786       *overlaps_b = conflict_fn_not_known ();
2787       *last_conflicts = chrec_dont_know;
2788       dependence_stats.num_siv_unimplemented++;
2789     }
2790 
2791   if (dump_file && (dump_flags & TDF_DETAILS))
2792     fprintf (dump_file, ")\n");
2793 }
2794 
2795 /* Returns false if we can prove that the greatest common divisor of the steps
2796    of CHREC does not divide CST, false otherwise.  */
2797 
2798 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)2799 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2800 {
2801   HOST_WIDE_INT cd = 0, val;
2802   tree step;
2803 
2804   if (!host_integerp (cst, 0))
2805     return true;
2806   val = tree_low_cst (cst, 0);
2807 
2808   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2809     {
2810       step = CHREC_RIGHT (chrec);
2811       if (!host_integerp (step, 0))
2812 	return true;
2813       cd = gcd (cd, tree_low_cst (step, 0));
2814       chrec = CHREC_LEFT (chrec);
2815     }
2816 
2817   return val % cd == 0;
2818 }
2819 
2820 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2821    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2822    functions that describe the relation between the elements accessed
2823    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2824    is verified:
2825 
2826    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2827 
2828 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,struct loop * loop_nest)2829 analyze_miv_subscript (tree chrec_a,
2830 		       tree chrec_b,
2831 		       conflict_function **overlaps_a,
2832 		       conflict_function **overlaps_b,
2833 		       tree *last_conflicts,
2834 		       struct loop *loop_nest)
2835 {
2836   tree type, difference;
2837 
2838   dependence_stats.num_miv++;
2839   if (dump_file && (dump_flags & TDF_DETAILS))
2840     fprintf (dump_file, "(analyze_miv_subscript \n");
2841 
2842   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2843   chrec_a = chrec_convert (type, chrec_a, NULL);
2844   chrec_b = chrec_convert (type, chrec_b, NULL);
2845   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2846 
2847   if (eq_evolutions_p (chrec_a, chrec_b))
2848     {
2849       /* Access functions are the same: all the elements are accessed
2850 	 in the same order.  */
2851       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2852       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2853       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2854       dependence_stats.num_miv_dependent++;
2855     }
2856 
2857   else if (evolution_function_is_constant_p (difference)
2858 	   /* For the moment, the following is verified:
2859 	      evolution_function_is_affine_multivariate_p (chrec_a,
2860 	      loop_nest->num) */
2861 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
2862     {
2863       /* testsuite/.../ssa-chrec-33.c
2864 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2865 
2866 	 The difference is 1, and all the evolution steps are multiples
2867 	 of 2, consequently there are no overlapping elements.  */
2868       *overlaps_a = conflict_fn_no_dependence ();
2869       *overlaps_b = conflict_fn_no_dependence ();
2870       *last_conflicts = integer_zero_node;
2871       dependence_stats.num_miv_independent++;
2872     }
2873 
2874   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2875 	   && !chrec_contains_symbols (chrec_a)
2876 	   && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2877 	   && !chrec_contains_symbols (chrec_b))
2878     {
2879       /* testsuite/.../ssa-chrec-35.c
2880 	 {0, +, 1}_2  vs.  {0, +, 1}_3
2881 	 the overlapping elements are respectively located at iterations:
2882 	 {0, +, 1}_x and {0, +, 1}_x,
2883 	 in other words, we have the equality:
2884 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2885 
2886 	 Other examples:
2887 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2888 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2889 
2890 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2891 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2892       */
2893       analyze_subscript_affine_affine (chrec_a, chrec_b,
2894 				       overlaps_a, overlaps_b, last_conflicts);
2895 
2896       if (CF_NOT_KNOWN_P (*overlaps_a)
2897  	  || CF_NOT_KNOWN_P (*overlaps_b))
2898 	dependence_stats.num_miv_unimplemented++;
2899       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2900 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
2901 	dependence_stats.num_miv_independent++;
2902       else
2903 	dependence_stats.num_miv_dependent++;
2904     }
2905 
2906   else
2907     {
2908       /* When the analysis is too difficult, answer "don't know".  */
2909       if (dump_file && (dump_flags & TDF_DETAILS))
2910 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2911 
2912       *overlaps_a = conflict_fn_not_known ();
2913       *overlaps_b = conflict_fn_not_known ();
2914       *last_conflicts = chrec_dont_know;
2915       dependence_stats.num_miv_unimplemented++;
2916     }
2917 
2918   if (dump_file && (dump_flags & TDF_DETAILS))
2919     fprintf (dump_file, ")\n");
2920 }
2921 
2922 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2923    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2924    OVERLAP_ITERATIONS_B are initialized with two functions that
2925    describe the iterations that contain conflicting elements.
2926 
2927    Remark: For an integer k >= 0, the following equality is true:
2928 
2929    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2930 */
2931 
2932 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,struct loop * loop_nest)2933 analyze_overlapping_iterations (tree chrec_a,
2934 				tree chrec_b,
2935 				conflict_function **overlap_iterations_a,
2936 				conflict_function **overlap_iterations_b,
2937 				tree *last_conflicts, struct loop *loop_nest)
2938 {
2939   unsigned int lnn = loop_nest->num;
2940 
2941   dependence_stats.num_subscript_tests++;
2942 
2943   if (dump_file && (dump_flags & TDF_DETAILS))
2944     {
2945       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2946       fprintf (dump_file, "  (chrec_a = ");
2947       print_generic_expr (dump_file, chrec_a, 0);
2948       fprintf (dump_file, ")\n  (chrec_b = ");
2949       print_generic_expr (dump_file, chrec_b, 0);
2950       fprintf (dump_file, ")\n");
2951     }
2952 
2953   if (chrec_a == NULL_TREE
2954       || chrec_b == NULL_TREE
2955       || chrec_contains_undetermined (chrec_a)
2956       || chrec_contains_undetermined (chrec_b))
2957     {
2958       dependence_stats.num_subscript_undetermined++;
2959 
2960       *overlap_iterations_a = conflict_fn_not_known ();
2961       *overlap_iterations_b = conflict_fn_not_known ();
2962     }
2963 
2964   /* If they are the same chrec, and are affine, they overlap
2965      on every iteration.  */
2966   else if (eq_evolutions_p (chrec_a, chrec_b)
2967 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2968 	       || operand_equal_p (chrec_a, chrec_b, 0)))
2969     {
2970       dependence_stats.num_same_subscript_function++;
2971       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2972       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2973       *last_conflicts = chrec_dont_know;
2974     }
2975 
2976   /* If they aren't the same, and aren't affine, we can't do anything
2977      yet.  */
2978   else if ((chrec_contains_symbols (chrec_a)
2979 	    || chrec_contains_symbols (chrec_b))
2980 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2981 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2982     {
2983       dependence_stats.num_subscript_undetermined++;
2984       *overlap_iterations_a = conflict_fn_not_known ();
2985       *overlap_iterations_b = conflict_fn_not_known ();
2986     }
2987 
2988   else if (ziv_subscript_p (chrec_a, chrec_b))
2989     analyze_ziv_subscript (chrec_a, chrec_b,
2990 			   overlap_iterations_a, overlap_iterations_b,
2991 			   last_conflicts);
2992 
2993   else if (siv_subscript_p (chrec_a, chrec_b))
2994     analyze_siv_subscript (chrec_a, chrec_b,
2995 			   overlap_iterations_a, overlap_iterations_b,
2996 			   last_conflicts, lnn);
2997 
2998   else
2999     analyze_miv_subscript (chrec_a, chrec_b,
3000 			   overlap_iterations_a, overlap_iterations_b,
3001 			   last_conflicts, loop_nest);
3002 
3003   if (dump_file && (dump_flags & TDF_DETAILS))
3004     {
3005       fprintf (dump_file, "  (overlap_iterations_a = ");
3006       dump_conflict_function (dump_file, *overlap_iterations_a);
3007       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
3008       dump_conflict_function (dump_file, *overlap_iterations_b);
3009       fprintf (dump_file, "))\n");
3010     }
3011 }
3012 
3013 /* Helper function for uniquely inserting distance vectors.  */
3014 
3015 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)3016 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3017 {
3018   unsigned i;
3019   lambda_vector v;
3020 
3021   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3022     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3023       return;
3024 
3025   DDR_DIST_VECTS (ddr).safe_push (dist_v);
3026 }
3027 
3028 /* Helper function for uniquely inserting direction vectors.  */
3029 
3030 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)3031 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3032 {
3033   unsigned i;
3034   lambda_vector v;
3035 
3036   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3037     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3038       return;
3039 
3040   DDR_DIR_VECTS (ddr).safe_push (dir_v);
3041 }
3042 
3043 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3044    haven't yet determined a distance for this outer loop, push a new
3045    distance vector composed of the previous distance, and a distance
3046    of 1 for this outer loop.  Example:
3047 
3048    | loop_1
3049    |   loop_2
3050    |     A[10]
3051    |   endloop_2
3052    | endloop_1
3053 
3054    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3055    save (0, 1), then we have to save (1, 0).  */
3056 
3057 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)3058 add_outer_distances (struct data_dependence_relation *ddr,
3059 		     lambda_vector dist_v, int index)
3060 {
3061   /* For each outer loop where init_v is not set, the accesses are
3062      in dependence of distance 1 in the loop.  */
3063   while (--index >= 0)
3064     {
3065       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3066       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3067       save_v[index] = 1;
3068       save_dist_v (ddr, save_v);
3069     }
3070 }
3071 
3072 /* Return false when fail to represent the data dependence as a
3073    distance vector.  INIT_B is set to true when a component has been
3074    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3075    the index in DIST_V that carries the dependence.  */
3076 
3077 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,struct data_reference * ddr_a,struct data_reference * ddr_b,lambda_vector dist_v,bool * init_b,int * index_carry)3078 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3079 			     struct data_reference *ddr_a,
3080 			     struct data_reference *ddr_b,
3081 			     lambda_vector dist_v, bool *init_b,
3082 			     int *index_carry)
3083 {
3084   unsigned i;
3085   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3086 
3087   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3088     {
3089       tree access_fn_a, access_fn_b;
3090       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3091 
3092       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3093 	{
3094 	  non_affine_dependence_relation (ddr);
3095 	  return false;
3096 	}
3097 
3098       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3099       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3100 
3101       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3102 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3103 	{
3104 	  int dist, index;
3105 	  int var_a = CHREC_VARIABLE (access_fn_a);
3106 	  int var_b = CHREC_VARIABLE (access_fn_b);
3107 
3108 	  if (var_a != var_b
3109 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3110 	    {
3111 	      non_affine_dependence_relation (ddr);
3112 	      return false;
3113 	    }
3114 
3115 	  dist = int_cst_value (SUB_DISTANCE (subscript));
3116 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3117 	  *index_carry = MIN (index, *index_carry);
3118 
3119 	  /* This is the subscript coupling test.  If we have already
3120 	     recorded a distance for this loop (a distance coming from
3121 	     another subscript), it should be the same.  For example,
3122 	     in the following code, there is no dependence:
3123 
3124 	     | loop i = 0, N, 1
3125 	     |   T[i+1][i] = ...
3126 	     |   ... = T[i][i]
3127 	     | endloop
3128 	  */
3129 	  if (init_v[index] != 0 && dist_v[index] != dist)
3130 	    {
3131 	      finalize_ddr_dependent (ddr, chrec_known);
3132 	      return false;
3133 	    }
3134 
3135 	  dist_v[index] = dist;
3136 	  init_v[index] = 1;
3137 	  *init_b = true;
3138 	}
3139       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3140 	{
3141 	  /* This can be for example an affine vs. constant dependence
3142 	     (T[i] vs. T[3]) that is not an affine dependence and is
3143 	     not representable as a distance vector.  */
3144 	  non_affine_dependence_relation (ddr);
3145 	  return false;
3146 	}
3147     }
3148 
3149   return true;
3150 }
3151 
3152 /* Return true when the DDR contains only constant access functions.  */
3153 
3154 static bool
constant_access_functions(const struct data_dependence_relation * ddr)3155 constant_access_functions (const struct data_dependence_relation *ddr)
3156 {
3157   unsigned i;
3158 
3159   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3160     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3161 	|| !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3162       return false;
3163 
3164   return true;
3165 }
3166 
3167 /* Helper function for the case where DDR_A and DDR_B are the same
3168    multivariate access function with a constant step.  For an example
3169    see pr34635-1.c.  */
3170 
3171 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)3172 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3173 {
3174   int x_1, x_2;
3175   tree c_1 = CHREC_LEFT (c_2);
3176   tree c_0 = CHREC_LEFT (c_1);
3177   lambda_vector dist_v;
3178   int v1, v2, cd;
3179 
3180   /* Polynomials with more than 2 variables are not handled yet.  When
3181      the evolution steps are parameters, it is not possible to
3182      represent the dependence using classical distance vectors.  */
3183   if (TREE_CODE (c_0) != INTEGER_CST
3184       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3185       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3186     {
3187       DDR_AFFINE_P (ddr) = false;
3188       return;
3189     }
3190 
3191   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3192   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3193 
3194   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3195   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3196   v1 = int_cst_value (CHREC_RIGHT (c_1));
3197   v2 = int_cst_value (CHREC_RIGHT (c_2));
3198   cd = gcd (v1, v2);
3199   v1 /= cd;
3200   v2 /= cd;
3201 
3202   if (v2 < 0)
3203     {
3204       v2 = -v2;
3205       v1 = -v1;
3206     }
3207 
3208   dist_v[x_1] = v2;
3209   dist_v[x_2] = -v1;
3210   save_dist_v (ddr, dist_v);
3211 
3212   add_outer_distances (ddr, dist_v, x_1);
3213 }
3214 
3215 /* Helper function for the case where DDR_A and DDR_B are the same
3216    access functions.  */
3217 
3218 static void
add_other_self_distances(struct data_dependence_relation * ddr)3219 add_other_self_distances (struct data_dependence_relation *ddr)
3220 {
3221   lambda_vector dist_v;
3222   unsigned i;
3223   int index_carry = DDR_NB_LOOPS (ddr);
3224 
3225   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3226     {
3227       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3228 
3229       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3230 	{
3231 	  if (!evolution_function_is_univariate_p (access_fun))
3232 	    {
3233 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3234 		{
3235 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3236 		  return;
3237 		}
3238 
3239 	      access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3240 
3241 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3242 		add_multivariate_self_dist (ddr, access_fun);
3243 	      else
3244 		/* The evolution step is not constant: it varies in
3245 		   the outer loop, so this cannot be represented by a
3246 		   distance vector.  For example in pr34635.c the
3247 		   evolution is {0, +, {0, +, 4}_1}_2.  */
3248 		DDR_AFFINE_P (ddr) = false;
3249 
3250 	      return;
3251 	    }
3252 
3253 	  index_carry = MIN (index_carry,
3254 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
3255 						 DDR_LOOP_NEST (ddr)));
3256 	}
3257     }
3258 
3259   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3260   add_outer_distances (ddr, dist_v, index_carry);
3261 }
3262 
3263 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)3264 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3265 {
3266   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3267 
3268   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3269   save_dist_v (ddr, dist_v);
3270 }
3271 
3272 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3273    is the case for example when access functions are the same and
3274    equal to a constant, as in:
3275 
3276    | loop_1
3277    |   A[3] = ...
3278    |   ... = A[3]
3279    | endloop_1
3280 
3281    in which case the distance vectors are (0) and (1).  */
3282 
3283 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)3284 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3285 {
3286   unsigned i, j;
3287 
3288   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3289     {
3290       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3291       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3292       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3293 
3294       for (j = 0; j < ca->n; j++)
3295 	if (affine_function_zero_p (ca->fns[j]))
3296 	  {
3297 	    insert_innermost_unit_dist_vector (ddr);
3298 	    return;
3299 	  }
3300 
3301       for (j = 0; j < cb->n; j++)
3302 	if (affine_function_zero_p (cb->fns[j]))
3303 	  {
3304 	    insert_innermost_unit_dist_vector (ddr);
3305 	    return;
3306 	  }
3307     }
3308 }
3309 
3310 /* Compute the classic per loop distance vector.  DDR is the data
3311    dependence relation to build a vector from.  Return false when fail
3312    to represent the data dependence as a distance vector.  */
3313 
3314 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,struct loop * loop_nest)3315 build_classic_dist_vector (struct data_dependence_relation *ddr,
3316 			   struct loop *loop_nest)
3317 {
3318   bool init_b = false;
3319   int index_carry = DDR_NB_LOOPS (ddr);
3320   lambda_vector dist_v;
3321 
3322   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3323     return false;
3324 
3325   if (same_access_functions (ddr))
3326     {
3327       /* Save the 0 vector.  */
3328       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3329       save_dist_v (ddr, dist_v);
3330 
3331       if (constant_access_functions (ddr))
3332 	add_distance_for_zero_overlaps (ddr);
3333 
3334       if (DDR_NB_LOOPS (ddr) > 1)
3335 	add_other_self_distances (ddr);
3336 
3337       return true;
3338     }
3339 
3340   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3341   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3342 				    dist_v, &init_b, &index_carry))
3343     return false;
3344 
3345   /* Save the distance vector if we initialized one.  */
3346   if (init_b)
3347     {
3348       /* Verify a basic constraint: classic distance vectors should
3349 	 always be lexicographically positive.
3350 
3351 	 Data references are collected in the order of execution of
3352 	 the program, thus for the following loop
3353 
3354 	 | for (i = 1; i < 100; i++)
3355 	 |   for (j = 1; j < 100; j++)
3356 	 |     {
3357 	 |       t = T[j+1][i-1];  // A
3358 	 |       T[j][i] = t + 2;  // B
3359 	 |     }
3360 
3361 	 references are collected following the direction of the wind:
3362 	 A then B.  The data dependence tests are performed also
3363 	 following this order, such that we're looking at the distance
3364 	 separating the elements accessed by A from the elements later
3365 	 accessed by B.  But in this example, the distance returned by
3366 	 test_dep (A, B) is lexicographically negative (-1, 1), that
3367 	 means that the access A occurs later than B with respect to
3368 	 the outer loop, ie. we're actually looking upwind.  In this
3369 	 case we solve test_dep (B, A) looking downwind to the
3370 	 lexicographically positive solution, that returns the
3371 	 distance vector (1, -1).  */
3372       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3373 	{
3374 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3375 	  if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3376 					      loop_nest))
3377 	    return false;
3378 	  compute_subscript_distance (ddr);
3379 	  if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3380 					    save_v, &init_b, &index_carry))
3381 	    return false;
3382 	  save_dist_v (ddr, save_v);
3383 	  DDR_REVERSED_P (ddr) = true;
3384 
3385 	  /* In this case there is a dependence forward for all the
3386 	     outer loops:
3387 
3388 	     | for (k = 1; k < 100; k++)
3389 	     |  for (i = 1; i < 100; i++)
3390 	     |   for (j = 1; j < 100; j++)
3391 	     |     {
3392 	     |       t = T[j+1][i-1];  // A
3393 	     |       T[j][i] = t + 2;  // B
3394 	     |     }
3395 
3396 	     the vectors are:
3397 	     (0,  1, -1)
3398 	     (1,  1, -1)
3399 	     (1, -1,  1)
3400 	  */
3401 	  if (DDR_NB_LOOPS (ddr) > 1)
3402 	    {
3403  	      add_outer_distances (ddr, save_v, index_carry);
3404 	      add_outer_distances (ddr, dist_v, index_carry);
3405 	    }
3406 	}
3407       else
3408 	{
3409 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3410 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3411 
3412 	  if (DDR_NB_LOOPS (ddr) > 1)
3413 	    {
3414 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3415 
3416 	      if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3417 						  DDR_A (ddr), loop_nest))
3418 		return false;
3419 	      compute_subscript_distance (ddr);
3420 	      if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3421 						opposite_v, &init_b,
3422 						&index_carry))
3423 		return false;
3424 
3425 	      save_dist_v (ddr, save_v);
3426 	      add_outer_distances (ddr, dist_v, index_carry);
3427 	      add_outer_distances (ddr, opposite_v, index_carry);
3428 	    }
3429 	  else
3430 	    save_dist_v (ddr, save_v);
3431 	}
3432     }
3433   else
3434     {
3435       /* There is a distance of 1 on all the outer loops: Example:
3436 	 there is a dependence of distance 1 on loop_1 for the array A.
3437 
3438 	 | loop_1
3439 	 |   A[5] = ...
3440 	 | endloop
3441       */
3442       add_outer_distances (ddr, dist_v,
3443 			   lambda_vector_first_nz (dist_v,
3444 						   DDR_NB_LOOPS (ddr), 0));
3445     }
3446 
3447   if (dump_file && (dump_flags & TDF_DETAILS))
3448     {
3449       unsigned i;
3450 
3451       fprintf (dump_file, "(build_classic_dist_vector\n");
3452       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3453 	{
3454 	  fprintf (dump_file, "  dist_vector = (");
3455 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3456 			       DDR_NB_LOOPS (ddr));
3457 	  fprintf (dump_file, "  )\n");
3458 	}
3459       fprintf (dump_file, ")\n");
3460     }
3461 
3462   return true;
3463 }
3464 
3465 /* Return the direction for a given distance.
3466    FIXME: Computing dir this way is suboptimal, since dir can catch
3467    cases that dist is unable to represent.  */
3468 
3469 static inline enum data_dependence_direction
dir_from_dist(int dist)3470 dir_from_dist (int dist)
3471 {
3472   if (dist > 0)
3473     return dir_positive;
3474   else if (dist < 0)
3475     return dir_negative;
3476   else
3477     return dir_equal;
3478 }
3479 
3480 /* Compute the classic per loop direction vector.  DDR is the data
3481    dependence relation to build a vector from.  */
3482 
3483 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)3484 build_classic_dir_vector (struct data_dependence_relation *ddr)
3485 {
3486   unsigned i, j;
3487   lambda_vector dist_v;
3488 
3489   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3490     {
3491       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3492 
3493       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3494 	dir_v[j] = dir_from_dist (dist_v[j]);
3495 
3496       save_dir_v (ddr, dir_v);
3497     }
3498 }
3499 
3500 /* Helper function.  Returns true when there is a dependence between
3501    data references DRA and DRB.  */
3502 
3503 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,struct data_reference * dra,struct data_reference * drb,struct loop * loop_nest)3504 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3505 			       struct data_reference *dra,
3506 			       struct data_reference *drb,
3507 			       struct loop *loop_nest)
3508 {
3509   unsigned int i;
3510   tree last_conflicts;
3511   struct subscript *subscript;
3512   tree res = NULL_TREE;
3513 
3514   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3515     {
3516       conflict_function *overlaps_a, *overlaps_b;
3517 
3518       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3519 				      DR_ACCESS_FN (drb, i),
3520 				      &overlaps_a, &overlaps_b,
3521 				      &last_conflicts, loop_nest);
3522 
3523       if (SUB_CONFLICTS_IN_A (subscript))
3524 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3525       if (SUB_CONFLICTS_IN_B (subscript))
3526 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3527 
3528       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3529       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3530       SUB_LAST_CONFLICT (subscript) = last_conflicts;
3531 
3532       /* If there is any undetermined conflict function we have to
3533          give a conservative answer in case we cannot prove that
3534 	 no dependence exists when analyzing another subscript.  */
3535       if (CF_NOT_KNOWN_P (overlaps_a)
3536  	  || CF_NOT_KNOWN_P (overlaps_b))
3537  	{
3538 	  res = chrec_dont_know;
3539 	  continue;
3540  	}
3541 
3542       /* When there is a subscript with no dependence we can stop.  */
3543       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3544  	       || CF_NO_DEPENDENCE_P (overlaps_b))
3545  	{
3546 	  res = chrec_known;
3547 	  break;
3548  	}
3549     }
3550 
3551   if (res == NULL_TREE)
3552     return true;
3553 
3554   if (res == chrec_known)
3555     dependence_stats.num_dependence_independent++;
3556   else
3557     dependence_stats.num_dependence_undetermined++;
3558   finalize_ddr_dependent (ddr, res);
3559   return false;
3560 }
3561 
3562 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3563 
3564 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,struct loop * loop_nest)3565 subscript_dependence_tester (struct data_dependence_relation *ddr,
3566 			     struct loop *loop_nest)
3567 {
3568   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3569     dependence_stats.num_dependence_dependent++;
3570 
3571   compute_subscript_distance (ddr);
3572   if (build_classic_dist_vector (ddr, loop_nest))
3573     build_classic_dir_vector (ddr);
3574 }
3575 
3576 /* Returns true when all the access functions of A are affine or
3577    constant with respect to LOOP_NEST.  */
3578 
3579 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const struct loop * loop_nest)3580 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3581 					   const struct loop *loop_nest)
3582 {
3583   unsigned int i;
3584   vec<tree> fns = DR_ACCESS_FNS (a);
3585   tree t;
3586 
3587   FOR_EACH_VEC_ELT (fns, i, t)
3588     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3589 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3590       return false;
3591 
3592   return true;
3593 }
3594 
3595 /* Initializes an equation for an OMEGA problem using the information
3596    contained in the ACCESS_FUN.  Returns true when the operation
3597    succeeded.
3598 
3599    PB is the omega constraint system.
3600    EQ is the number of the equation to be initialized.
3601    OFFSET is used for shifting the variables names in the constraints:
3602    a constrain is composed of 2 * the number of variables surrounding
3603    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3604    then it is set to n.
3605    ACCESS_FUN is expected to be an affine chrec.  */
3606 
3607 static bool
init_omega_eq_with_af(omega_pb pb,unsigned eq,unsigned int offset,tree access_fun,struct data_dependence_relation * ddr)3608 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3609 		       unsigned int offset, tree access_fun,
3610 		       struct data_dependence_relation *ddr)
3611 {
3612   switch (TREE_CODE (access_fun))
3613     {
3614     case POLYNOMIAL_CHREC:
3615       {
3616 	tree left = CHREC_LEFT (access_fun);
3617 	tree right = CHREC_RIGHT (access_fun);
3618 	int var = CHREC_VARIABLE (access_fun);
3619 	unsigned var_idx;
3620 
3621 	if (TREE_CODE (right) != INTEGER_CST)
3622 	  return false;
3623 
3624 	var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3625 	pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3626 
3627 	/* Compute the innermost loop index.  */
3628 	DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3629 
3630 	if (offset == 0)
3631 	  pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3632 	    += int_cst_value (right);
3633 
3634 	switch (TREE_CODE (left))
3635 	  {
3636 	  case POLYNOMIAL_CHREC:
3637 	    return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3638 
3639 	  case INTEGER_CST:
3640 	    pb->eqs[eq].coef[0] += int_cst_value (left);
3641 	    return true;
3642 
3643 	  default:
3644 	    return false;
3645 	  }
3646       }
3647 
3648     case INTEGER_CST:
3649       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3650       return true;
3651 
3652     default:
3653       return false;
3654     }
3655 }
3656 
3657 /* As explained in the comments preceding init_omega_for_ddr, we have
3658    to set up a system for each loop level, setting outer loops
3659    variation to zero, and current loop variation to positive or zero.
3660    Save each lexico positive distance vector.  */
3661 
3662 static void
omega_extract_distance_vectors(omega_pb pb,struct data_dependence_relation * ddr)3663 omega_extract_distance_vectors (omega_pb pb,
3664 				struct data_dependence_relation *ddr)
3665 {
3666   int eq, geq;
3667   unsigned i, j;
3668   struct loop *loopi, *loopj;
3669   enum omega_result res;
3670 
3671   /* Set a new problem for each loop in the nest.  The basis is the
3672      problem that we have initialized until now.  On top of this we
3673      add new constraints.  */
3674   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3675               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3676     {
3677       int dist = 0;
3678       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3679 					   DDR_NB_LOOPS (ddr));
3680 
3681       omega_copy_problem (copy, pb);
3682 
3683       /* For all the outer loops "loop_j", add "dj = 0".  */
3684       for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3685 	{
3686 	  eq = omega_add_zero_eq (copy, omega_black);
3687 	  copy->eqs[eq].coef[j + 1] = 1;
3688 	}
3689 
3690       /* For "loop_i", add "0 <= di".  */
3691       geq = omega_add_zero_geq (copy, omega_black);
3692       copy->geqs[geq].coef[i + 1] = 1;
3693 
3694       /* Reduce the constraint system, and test that the current
3695 	 problem is feasible.  */
3696       res = omega_simplify_problem (copy);
3697       if (res == omega_false
3698 	  || res == omega_unknown
3699 	  || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3700 	goto next_problem;
3701 
3702       for (eq = 0; eq < copy->num_subs; eq++)
3703 	if (copy->subs[eq].key == (int) i + 1)
3704 	  {
3705 	    dist = copy->subs[eq].coef[0];
3706 	    goto found_dist;
3707 	  }
3708 
3709       if (dist == 0)
3710 	{
3711 	  /* Reinitialize problem...  */
3712 	  omega_copy_problem (copy, pb);
3713 	  for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3714 	    {
3715 	      eq = omega_add_zero_eq (copy, omega_black);
3716 	      copy->eqs[eq].coef[j + 1] = 1;
3717 	    }
3718 
3719 	  /* ..., but this time "di = 1".  */
3720 	  eq = omega_add_zero_eq (copy, omega_black);
3721 	  copy->eqs[eq].coef[i + 1] = 1;
3722 	  copy->eqs[eq].coef[0] = -1;
3723 
3724 	  res = omega_simplify_problem (copy);
3725 	  if (res == omega_false
3726 	      || res == omega_unknown
3727 	      || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3728 	    goto next_problem;
3729 
3730 	  for (eq = 0; eq < copy->num_subs; eq++)
3731 	    if (copy->subs[eq].key == (int) i + 1)
3732 	      {
3733 		dist = copy->subs[eq].coef[0];
3734 		goto found_dist;
3735 	      }
3736 	}
3737 
3738     found_dist:;
3739       /* Save the lexicographically positive distance vector.  */
3740       if (dist >= 0)
3741 	{
3742 	  lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3743 	  lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3744 
3745 	  dist_v[i] = dist;
3746 
3747 	  for (eq = 0; eq < copy->num_subs; eq++)
3748 	    if (copy->subs[eq].key > 0)
3749 	      {
3750 		dist = copy->subs[eq].coef[0];
3751 		dist_v[copy->subs[eq].key - 1] = dist;
3752 	      }
3753 
3754 	  for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3755 	    dir_v[j] = dir_from_dist (dist_v[j]);
3756 
3757 	  save_dist_v (ddr, dist_v);
3758 	  save_dir_v (ddr, dir_v);
3759 	}
3760 
3761     next_problem:;
3762       omega_free_problem (copy);
3763     }
3764 }
3765 
3766 /* This is called for each subscript of a tuple of data references:
3767    insert an equality for representing the conflicts.  */
3768 
3769 static bool
omega_setup_subscript(tree access_fun_a,tree access_fun_b,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3770 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3771 		       struct data_dependence_relation *ddr,
3772 		       omega_pb pb, bool *maybe_dependent)
3773 {
3774   int eq;
3775   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3776 				     TREE_TYPE (access_fun_b));
3777   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3778   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3779   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3780   tree minus_one;
3781 
3782   /* When the fun_a - fun_b is not constant, the dependence is not
3783      captured by the classic distance vector representation.  */
3784   if (TREE_CODE (difference) != INTEGER_CST)
3785     return false;
3786 
3787   /* ZIV test.  */
3788   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3789     {
3790       /* There is no dependence.  */
3791       *maybe_dependent = false;
3792       return true;
3793     }
3794 
3795   minus_one = build_int_cst (type, -1);
3796   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3797 
3798   eq = omega_add_zero_eq (pb, omega_black);
3799   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3800       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3801     /* There is probably a dependence, but the system of
3802        constraints cannot be built: answer "don't know".  */
3803     return false;
3804 
3805   /* GCD test.  */
3806   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3807       && !int_divides_p (lambda_vector_gcd
3808 			 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3809 			  2 * DDR_NB_LOOPS (ddr)),
3810 			 pb->eqs[eq].coef[0]))
3811     {
3812       /* There is no dependence.  */
3813       *maybe_dependent = false;
3814       return true;
3815     }
3816 
3817   return true;
3818 }
3819 
3820 /* Helper function, same as init_omega_for_ddr but specialized for
3821    data references A and B.  */
3822 
3823 static bool
init_omega_for_ddr_1(struct data_reference * dra,struct data_reference * drb,struct data_dependence_relation * ddr,omega_pb pb,bool * maybe_dependent)3824 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3825 		      struct data_dependence_relation *ddr,
3826 		      omega_pb pb, bool *maybe_dependent)
3827 {
3828   unsigned i;
3829   int ineq;
3830   struct loop *loopi;
3831   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3832 
3833   /* Insert an equality per subscript.  */
3834   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3835     {
3836       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3837 				  ddr, pb, maybe_dependent))
3838 	return false;
3839       else if (*maybe_dependent == false)
3840 	{
3841 	  /* There is no dependence.  */
3842 	  DDR_ARE_DEPENDENT (ddr) = chrec_known;
3843 	  return true;
3844 	}
3845     }
3846 
3847   /* Insert inequalities: constraints corresponding to the iteration
3848      domain, i.e. the loops surrounding the references "loop_x" and
3849      the distance variables "dx".  The layout of the OMEGA
3850      representation is as follows:
3851      - coef[0] is the constant
3852      - coef[1..nb_loops] are the protected variables that will not be
3853      removed by the solver: the "dx"
3854      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3855   */
3856   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3857 	      && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3858     {
3859       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3860 
3861       /* 0 <= loop_x */
3862       ineq = omega_add_zero_geq (pb, omega_black);
3863       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3864 
3865       /* 0 <= loop_x + dx */
3866       ineq = omega_add_zero_geq (pb, omega_black);
3867       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3868       pb->geqs[ineq].coef[i + 1] = 1;
3869 
3870       if (nbi != -1)
3871 	{
3872 	  /* loop_x <= nb_iters */
3873 	  ineq = omega_add_zero_geq (pb, omega_black);
3874 	  pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3875 	  pb->geqs[ineq].coef[0] = nbi;
3876 
3877 	  /* loop_x + dx <= nb_iters */
3878 	  ineq = omega_add_zero_geq (pb, omega_black);
3879 	  pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3880 	  pb->geqs[ineq].coef[i + 1] = -1;
3881 	  pb->geqs[ineq].coef[0] = nbi;
3882 
3883 	  /* A step "dx" bigger than nb_iters is not feasible, so
3884 	     add "0 <= nb_iters + dx",  */
3885 	  ineq = omega_add_zero_geq (pb, omega_black);
3886 	  pb->geqs[ineq].coef[i + 1] = 1;
3887 	  pb->geqs[ineq].coef[0] = nbi;
3888 	  /* and "dx <= nb_iters".  */
3889 	  ineq = omega_add_zero_geq (pb, omega_black);
3890 	  pb->geqs[ineq].coef[i + 1] = -1;
3891 	  pb->geqs[ineq].coef[0] = nbi;
3892 	}
3893     }
3894 
3895   omega_extract_distance_vectors (pb, ddr);
3896 
3897   return true;
3898 }
3899 
3900 /* Sets up the Omega dependence problem for the data dependence
3901    relation DDR.  Returns false when the constraint system cannot be
3902    built, ie. when the test answers "don't know".  Returns true
3903    otherwise, and when independence has been proved (using one of the
3904    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3905    set MAYBE_DEPENDENT to true.
3906 
3907    Example: for setting up the dependence system corresponding to the
3908    conflicting accesses
3909 
3910    | loop_i
3911    |   loop_j
3912    |     A[i, i+1] = ...
3913    |     ... A[2*j, 2*(i + j)]
3914    |   endloop_j
3915    | endloop_i
3916 
3917    the following constraints come from the iteration domain:
3918 
3919    0 <= i <= Ni
3920    0 <= i + di <= Ni
3921    0 <= j <= Nj
3922    0 <= j + dj <= Nj
3923 
3924    where di, dj are the distance variables.  The constraints
3925    representing the conflicting elements are:
3926 
3927    i = 2 * (j + dj)
3928    i + 1 = 2 * (i + di + j + dj)
3929 
3930    For asking that the resulting distance vector (di, dj) be
3931    lexicographically positive, we insert the constraint "di >= 0".  If
3932    "di = 0" in the solution, we fix that component to zero, and we
3933    look at the inner loops: we set a new problem where all the outer
3934    loop distances are zero, and fix this inner component to be
3935    positive.  When one of the components is positive, we save that
3936    distance, and set a new problem where the distance on this loop is
3937    zero, searching for other distances in the inner loops.  Here is
3938    the classic example that illustrates that we have to set for each
3939    inner loop a new problem:
3940 
3941    | loop_1
3942    |   loop_2
3943    |     A[10]
3944    |   endloop_2
3945    | endloop_1
3946 
3947    we have to save two distances (1, 0) and (0, 1).
3948 
3949    Given two array references, refA and refB, we have to set the
3950    dependence problem twice, refA vs. refB and refB vs. refA, and we
3951    cannot do a single test, as refB might occur before refA in the
3952    inner loops, and the contrary when considering outer loops: ex.
3953 
3954    | loop_0
3955    |   loop_1
3956    |     loop_2
3957    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3958    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3959    |     endloop_2
3960    |   endloop_1
3961    | endloop_0
3962 
3963    refB touches the elements in T before refA, and thus for the same
3964    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3965    but for successive loop_0 iterations, we have (1, -1, 1)
3966 
3967    The Omega solver expects the distance variables ("di" in the
3968    previous example) to come first in the constraint system (as
3969    variables to be protected, or "safe" variables), the constraint
3970    system is built using the following layout:
3971 
3972    "cst | distance vars | index vars".
3973 */
3974 
3975 static bool
init_omega_for_ddr(struct data_dependence_relation * ddr,bool * maybe_dependent)3976 init_omega_for_ddr (struct data_dependence_relation *ddr,
3977 		    bool *maybe_dependent)
3978 {
3979   omega_pb pb;
3980   bool res = false;
3981 
3982   *maybe_dependent = true;
3983 
3984   if (same_access_functions (ddr))
3985     {
3986       unsigned j;
3987       lambda_vector dir_v;
3988 
3989       /* Save the 0 vector.  */
3990       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3991       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3992       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3993 	dir_v[j] = dir_equal;
3994       save_dir_v (ddr, dir_v);
3995 
3996       /* Save the dependences carried by outer loops.  */
3997       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3998       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3999 				  maybe_dependent);
4000       omega_free_problem (pb);
4001       return res;
4002     }
4003 
4004   /* Omega expects the protected variables (those that have to be kept
4005      after elimination) to appear first in the constraint system.
4006      These variables are the distance variables.  In the following
4007      initialization we declare NB_LOOPS safe variables, and the total
4008      number of variables for the constraint system is 2*NB_LOOPS.  */
4009   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4010   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4011 			      maybe_dependent);
4012   omega_free_problem (pb);
4013 
4014   /* Stop computation if not decidable, or no dependence.  */
4015   if (res == false || *maybe_dependent == false)
4016     return res;
4017 
4018   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4019   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4020 			      maybe_dependent);
4021   omega_free_problem (pb);
4022 
4023   return res;
4024 }
4025 
4026 /* Return true when DDR contains the same information as that stored
4027    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
4028 
4029 static bool
ddr_consistent_p(FILE * file,struct data_dependence_relation * ddr,vec<lambda_vector> dist_vects,vec<lambda_vector> dir_vects)4030 ddr_consistent_p (FILE *file,
4031 		  struct data_dependence_relation *ddr,
4032 		  vec<lambda_vector> dist_vects,
4033 		  vec<lambda_vector> dir_vects)
4034 {
4035   unsigned int i, j;
4036 
4037   /* If dump_file is set, output there.  */
4038   if (dump_file && (dump_flags & TDF_DETAILS))
4039     file = dump_file;
4040 
4041   if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4042     {
4043       lambda_vector b_dist_v;
4044       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4045 	       dist_vects.length (),
4046 	       DDR_NUM_DIST_VECTS (ddr));
4047 
4048       fprintf (file, "Banerjee dist vectors:\n");
4049       FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4050 	print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4051 
4052       fprintf (file, "Omega dist vectors:\n");
4053       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4054 	print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4055 
4056       fprintf (file, "data dependence relation:\n");
4057       dump_data_dependence_relation (file, ddr);
4058 
4059       fprintf (file, ")\n");
4060       return false;
4061     }
4062 
4063   if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4064     {
4065       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4066 	       dir_vects.length (),
4067 	       DDR_NUM_DIR_VECTS (ddr));
4068       return false;
4069     }
4070 
4071   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4072     {
4073       lambda_vector a_dist_v;
4074       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4075 
4076       /* Distance vectors are not ordered in the same way in the DDR
4077 	 and in the DIST_VECTS: search for a matching vector.  */
4078       FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4079 	if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4080 	  break;
4081 
4082       if (j == dist_vects.length ())
4083 	{
4084 	  fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4085 	  print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4086 	  fprintf (file, "not found in Omega dist vectors:\n");
4087 	  print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4088 	  fprintf (file, "data dependence relation:\n");
4089 	  dump_data_dependence_relation (file, ddr);
4090 	  fprintf (file, ")\n");
4091 	}
4092     }
4093 
4094   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4095     {
4096       lambda_vector a_dir_v;
4097       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4098 
4099       /* Direction vectors are not ordered in the same way in the DDR
4100 	 and in the DIR_VECTS: search for a matching vector.  */
4101       FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4102 	if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4103 	  break;
4104 
4105       if (j == dist_vects.length ())
4106 	{
4107 	  fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4108 	  print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4109 	  fprintf (file, "not found in Omega dir vectors:\n");
4110 	  print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4111 	  fprintf (file, "data dependence relation:\n");
4112 	  dump_data_dependence_relation (file, ddr);
4113 	  fprintf (file, ")\n");
4114 	}
4115     }
4116 
4117   return true;
4118 }
4119 
4120 /* This computes the affine dependence relation between A and B with
4121    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4122    independence between two accesses, while CHREC_DONT_KNOW is used
4123    for representing the unknown relation.
4124 
4125    Note that it is possible to stop the computation of the dependence
4126    relation the first time we detect a CHREC_KNOWN element for a given
4127    subscript.  */
4128 
4129 void
compute_affine_dependence(struct data_dependence_relation * ddr,struct loop * loop_nest)4130 compute_affine_dependence (struct data_dependence_relation *ddr,
4131 			   struct loop *loop_nest)
4132 {
4133   struct data_reference *dra = DDR_A (ddr);
4134   struct data_reference *drb = DDR_B (ddr);
4135 
4136   if (dump_file && (dump_flags & TDF_DETAILS))
4137     {
4138       fprintf (dump_file, "(compute_affine_dependence\n");
4139       fprintf (dump_file, "  stmt_a: ");
4140       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4141       fprintf (dump_file, "  stmt_b: ");
4142       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4143     }
4144 
4145   /* Analyze only when the dependence relation is not yet known.  */
4146   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4147     {
4148       dependence_stats.num_dependence_tests++;
4149 
4150       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4151 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
4152 	{
4153 	  subscript_dependence_tester (ddr, loop_nest);
4154 
4155 	  if (flag_check_data_deps)
4156 	    {
4157 	      /* Dump the dependences from the first algorithm.  */
4158 	      if (dump_file && (dump_flags & TDF_DETAILS))
4159 		{
4160 		  fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4161 		  dump_data_dependence_relation (dump_file, ddr);
4162 		}
4163 
4164 	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4165 		{
4166 		  bool maybe_dependent;
4167 		  vec<lambda_vector> dir_vects, dist_vects;
4168 
4169 		  /* Save the result of the first DD analyzer.  */
4170 		  dist_vects = DDR_DIST_VECTS (ddr);
4171 		  dir_vects = DDR_DIR_VECTS (ddr);
4172 
4173 		  /* Reset the information.  */
4174 		  DDR_DIST_VECTS (ddr).create (0);
4175 		  DDR_DIR_VECTS (ddr).create (0);
4176 
4177 		  /* Compute the same information using Omega.  */
4178 		  if (!init_omega_for_ddr (ddr, &maybe_dependent))
4179 		    goto csys_dont_know;
4180 
4181 		  if (dump_file && (dump_flags & TDF_DETAILS))
4182 		    {
4183 		      fprintf (dump_file, "Omega Analyzer\n");
4184 		      dump_data_dependence_relation (dump_file, ddr);
4185 		    }
4186 
4187 		  /* Check that we get the same information.  */
4188 		  if (maybe_dependent)
4189 		    gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4190 						  dir_vects));
4191 		}
4192 	    }
4193 	}
4194 
4195       /* As a last case, if the dependence cannot be determined, or if
4196 	 the dependence is considered too difficult to determine, answer
4197 	 "don't know".  */
4198       else
4199 	{
4200 	csys_dont_know:;
4201 	  dependence_stats.num_dependence_undetermined++;
4202 
4203 	  if (dump_file && (dump_flags & TDF_DETAILS))
4204 	    {
4205 	      fprintf (dump_file, "Data ref a:\n");
4206 	      dump_data_reference (dump_file, dra);
4207 	      fprintf (dump_file, "Data ref b:\n");
4208 	      dump_data_reference (dump_file, drb);
4209 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4210 	    }
4211 	  finalize_ddr_dependent (ddr, chrec_dont_know);
4212 	}
4213     }
4214 
4215   if (dump_file && (dump_flags & TDF_DETAILS))
4216     {
4217       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4218 	fprintf (dump_file, ") -> no dependence\n");
4219       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4220 	fprintf (dump_file, ") -> dependence analysis failed\n");
4221       else
4222 	fprintf (dump_file, ")\n");
4223     }
4224 }
4225 
4226 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4227    the data references in DATAREFS, in the LOOP_NEST.  When
4228    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4229    relations.  Return true when successful, i.e. data references number
4230    is small enough to be handled.  */
4231 
4232 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)4233 compute_all_dependences (vec<data_reference_p> datarefs,
4234 			 vec<ddr_p> *dependence_relations,
4235 			 vec<loop_p> loop_nest,
4236 			 bool compute_self_and_rr)
4237 {
4238   struct data_dependence_relation *ddr;
4239   struct data_reference *a, *b;
4240   unsigned int i, j;
4241 
4242   if ((int) datarefs.length ()
4243       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4244     {
4245       struct data_dependence_relation *ddr;
4246 
4247       /* Insert a single relation into dependence_relations:
4248 	 chrec_dont_know.  */
4249       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4250       dependence_relations->safe_push (ddr);
4251       return false;
4252     }
4253 
4254   FOR_EACH_VEC_ELT (datarefs, i, a)
4255     for (j = i + 1; datarefs.iterate (j, &b); j++)
4256       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4257 	{
4258 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
4259 	  dependence_relations->safe_push (ddr);
4260           if (loop_nest.exists ())
4261    	    compute_affine_dependence (ddr, loop_nest[0]);
4262 	}
4263 
4264   if (compute_self_and_rr)
4265     FOR_EACH_VEC_ELT (datarefs, i, a)
4266       {
4267 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
4268 	dependence_relations->safe_push (ddr);
4269         if (loop_nest.exists ())
4270    	  compute_affine_dependence (ddr, loop_nest[0]);
4271       }
4272 
4273   return true;
4274 }
4275 
4276 /* Describes a location of a memory reference.  */
4277 
4278 typedef struct data_ref_loc_d
4279 {
4280     /* Position of the memory reference.  */
4281     tree *pos;
4282 
4283       /* True if the memory reference is read.  */
4284       bool is_read;
4285 } data_ref_loc;
4286 
4287 
4288 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4289    true if STMT clobbers memory, false otherwise.  */
4290 
4291 static bool
get_references_in_stmt(gimple stmt,vec<data_ref_loc> * references)4292 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
4293 {
4294   bool clobbers_memory = false;
4295   data_ref_loc ref;
4296   tree *op0, *op1;
4297   enum gimple_code stmt_code = gimple_code (stmt);
4298 
4299   references->create (0);
4300 
4301   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4302      As we cannot model data-references to not spelled out
4303      accesses give up if they may occur.  */
4304   if ((stmt_code == GIMPLE_CALL
4305        && !(gimple_call_flags (stmt) & ECF_CONST))
4306       || (stmt_code == GIMPLE_ASM
4307 	  && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4308     clobbers_memory = true;
4309 
4310   if (!gimple_vuse (stmt))
4311     return clobbers_memory;
4312 
4313   if (stmt_code == GIMPLE_ASSIGN)
4314     {
4315       tree base;
4316       op0 = gimple_assign_lhs_ptr (stmt);
4317       op1 = gimple_assign_rhs1_ptr (stmt);
4318 
4319       if (DECL_P (*op1)
4320 	  || (REFERENCE_CLASS_P (*op1)
4321 	      && (base = get_base_address (*op1))
4322 	      && TREE_CODE (base) != SSA_NAME))
4323 	{
4324 	  ref.pos = op1;
4325 	  ref.is_read = true;
4326 	  references->safe_push (ref);
4327 	}
4328     }
4329   else if (stmt_code == GIMPLE_CALL)
4330     {
4331       unsigned i, n;
4332 
4333       op0 = gimple_call_lhs_ptr (stmt);
4334       n = gimple_call_num_args (stmt);
4335       for (i = 0; i < n; i++)
4336 	{
4337 	  op1 = gimple_call_arg_ptr (stmt, i);
4338 
4339 	  if (DECL_P (*op1)
4340 	      || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4341 	    {
4342 	      ref.pos = op1;
4343 	      ref.is_read = true;
4344 	      references->safe_push (ref);
4345 	    }
4346 	}
4347     }
4348   else
4349     return clobbers_memory;
4350 
4351   if (*op0
4352       && (DECL_P (*op0)
4353 	  || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4354     {
4355       ref.pos = op0;
4356       ref.is_read = false;
4357       references->safe_push (ref);
4358     }
4359   return clobbers_memory;
4360 }
4361 
4362 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4363    reference, returns false, otherwise returns true.  NEST is the outermost
4364    loop of the loop nest in which the references should be analyzed.  */
4365 
4366 bool
find_data_references_in_stmt(struct loop * nest,gimple stmt,vec<data_reference_p> * datarefs)4367 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4368 			      vec<data_reference_p> *datarefs)
4369 {
4370   unsigned i;
4371   vec<data_ref_loc> references;
4372   data_ref_loc *ref;
4373   bool ret = true;
4374   data_reference_p dr;
4375 
4376   if (get_references_in_stmt (stmt, &references))
4377     {
4378       references.release ();
4379       return false;
4380     }
4381 
4382   FOR_EACH_VEC_ELT (references, i, ref)
4383     {
4384       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4385 			    *ref->pos, stmt, ref->is_read);
4386       gcc_assert (dr != NULL);
4387       datarefs->safe_push (dr);
4388     }
4389   references.release ();
4390   return ret;
4391 }
4392 
4393 /* Stores the data references in STMT to DATAREFS.  If there is an
4394    unanalyzable reference, returns false, otherwise returns true.
4395    NEST is the outermost loop of the loop nest in which the references
4396    should be instantiated, LOOP is the loop in which the references
4397    should be analyzed.  */
4398 
4399 bool
graphite_find_data_references_in_stmt(loop_p nest,loop_p loop,gimple stmt,vec<data_reference_p> * datarefs)4400 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4401 				       vec<data_reference_p> *datarefs)
4402 {
4403   unsigned i;
4404   vec<data_ref_loc> references;
4405   data_ref_loc *ref;
4406   bool ret = true;
4407   data_reference_p dr;
4408 
4409   if (get_references_in_stmt (stmt, &references))
4410     {
4411       references.release ();
4412       return false;
4413     }
4414 
4415   FOR_EACH_VEC_ELT (references, i, ref)
4416     {
4417       dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4418       gcc_assert (dr != NULL);
4419       datarefs->safe_push (dr);
4420     }
4421 
4422   references.release ();
4423   return ret;
4424 }
4425 
4426 /* Search the data references in LOOP, and record the information into
4427    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4428    difficult case, returns NULL_TREE otherwise.  */
4429 
4430 tree
find_data_references_in_bb(struct loop * loop,basic_block bb,vec<data_reference_p> * datarefs)4431 find_data_references_in_bb (struct loop *loop, basic_block bb,
4432                             vec<data_reference_p> *datarefs)
4433 {
4434   gimple_stmt_iterator bsi;
4435 
4436   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4437     {
4438       gimple stmt = gsi_stmt (bsi);
4439 
4440       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4441         {
4442           struct data_reference *res;
4443           res = XCNEW (struct data_reference);
4444           datarefs->safe_push (res);
4445 
4446           return chrec_dont_know;
4447         }
4448     }
4449 
4450   return NULL_TREE;
4451 }
4452 
4453 /* Search the data references in LOOP, and record the information into
4454    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4455    difficult case, returns NULL_TREE otherwise.
4456 
4457    TODO: This function should be made smarter so that it can handle address
4458    arithmetic as if they were array accesses, etc.  */
4459 
4460 static tree
find_data_references_in_loop(struct loop * loop,vec<data_reference_p> * datarefs)4461 find_data_references_in_loop (struct loop *loop,
4462 			      vec<data_reference_p> *datarefs)
4463 {
4464   basic_block bb, *bbs;
4465   unsigned int i;
4466 
4467   bbs = get_loop_body_in_dom_order (loop);
4468 
4469   for (i = 0; i < loop->num_nodes; i++)
4470     {
4471       bb = bbs[i];
4472 
4473       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4474         {
4475           free (bbs);
4476           return chrec_dont_know;
4477         }
4478     }
4479   free (bbs);
4480 
4481   return NULL_TREE;
4482 }
4483 
4484 /* Recursive helper function.  */
4485 
4486 static bool
find_loop_nest_1(struct loop * loop,vec<loop_p> * loop_nest)4487 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4488 {
4489   /* Inner loops of the nest should not contain siblings.  Example:
4490      when there are two consecutive loops,
4491 
4492      | loop_0
4493      |   loop_1
4494      |     A[{0, +, 1}_1]
4495      |   endloop_1
4496      |   loop_2
4497      |     A[{0, +, 1}_2]
4498      |   endloop_2
4499      | endloop_0
4500 
4501      the dependence relation cannot be captured by the distance
4502      abstraction.  */
4503   if (loop->next)
4504     return false;
4505 
4506   loop_nest->safe_push (loop);
4507   if (loop->inner)
4508     return find_loop_nest_1 (loop->inner, loop_nest);
4509   return true;
4510 }
4511 
4512 /* Return false when the LOOP is not well nested.  Otherwise return
4513    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4514    contain the loops from the outermost to the innermost, as they will
4515    appear in the classic distance vector.  */
4516 
4517 bool
find_loop_nest(struct loop * loop,vec<loop_p> * loop_nest)4518 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4519 {
4520   loop_nest->safe_push (loop);
4521   if (loop->inner)
4522     return find_loop_nest_1 (loop->inner, loop_nest);
4523   return true;
4524 }
4525 
4526 /* Returns true when the data dependences have been computed, false otherwise.
4527    Given a loop nest LOOP, the following vectors are returned:
4528    DATAREFS is initialized to all the array elements contained in this loop,
4529    DEPENDENCE_RELATIONS contains the relations between the data references.
4530    Compute read-read and self relations if
4531    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4532 
4533 bool
compute_data_dependences_for_loop(struct loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4534 compute_data_dependences_for_loop (struct loop *loop,
4535 				   bool compute_self_and_read_read_dependences,
4536 				   vec<loop_p> *loop_nest,
4537 				   vec<data_reference_p> *datarefs,
4538 				   vec<ddr_p> *dependence_relations)
4539 {
4540   bool res = true;
4541 
4542   memset (&dependence_stats, 0, sizeof (dependence_stats));
4543 
4544   /* If the loop nest is not well formed, or one of the data references
4545      is not computable, give up without spending time to compute other
4546      dependences.  */
4547   if (!loop
4548       || !find_loop_nest (loop, loop_nest)
4549       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4550       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4551 				   compute_self_and_read_read_dependences))
4552     res = false;
4553 
4554   if (dump_file && (dump_flags & TDF_STATS))
4555     {
4556       fprintf (dump_file, "Dependence tester statistics:\n");
4557 
4558       fprintf (dump_file, "Number of dependence tests: %d\n",
4559 	       dependence_stats.num_dependence_tests);
4560       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4561 	       dependence_stats.num_dependence_dependent);
4562       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4563 	       dependence_stats.num_dependence_independent);
4564       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4565 	       dependence_stats.num_dependence_undetermined);
4566 
4567       fprintf (dump_file, "Number of subscript tests: %d\n",
4568 	       dependence_stats.num_subscript_tests);
4569       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4570 	       dependence_stats.num_subscript_undetermined);
4571       fprintf (dump_file, "Number of same subscript function: %d\n",
4572 	       dependence_stats.num_same_subscript_function);
4573 
4574       fprintf (dump_file, "Number of ziv tests: %d\n",
4575 	       dependence_stats.num_ziv);
4576       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4577 	       dependence_stats.num_ziv_dependent);
4578       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4579 	       dependence_stats.num_ziv_independent);
4580       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4581 	       dependence_stats.num_ziv_unimplemented);
4582 
4583       fprintf (dump_file, "Number of siv tests: %d\n",
4584 	       dependence_stats.num_siv);
4585       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4586 	       dependence_stats.num_siv_dependent);
4587       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4588 	       dependence_stats.num_siv_independent);
4589       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4590 	       dependence_stats.num_siv_unimplemented);
4591 
4592       fprintf (dump_file, "Number of miv tests: %d\n",
4593 	       dependence_stats.num_miv);
4594       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4595 	       dependence_stats.num_miv_dependent);
4596       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4597 	       dependence_stats.num_miv_independent);
4598       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4599 	       dependence_stats.num_miv_unimplemented);
4600     }
4601 
4602   return res;
4603 }
4604 
4605 /* Returns true when the data dependences for the basic block BB have been
4606    computed, false otherwise.
4607    DATAREFS is initialized to all the array elements contained in this basic
4608    block, DEPENDENCE_RELATIONS contains the relations between the data
4609    references. Compute read-read and self relations if
4610    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4611 bool
compute_data_dependences_for_bb(basic_block bb,bool compute_self_and_read_read_dependences,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)4612 compute_data_dependences_for_bb (basic_block bb,
4613                                  bool compute_self_and_read_read_dependences,
4614                                  vec<data_reference_p> *datarefs,
4615                                  vec<ddr_p> *dependence_relations)
4616 {
4617   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4618     return false;
4619 
4620   return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4621 				  compute_self_and_read_read_dependences);
4622 }
4623 
4624 /* Entry point (for testing only).  Analyze all the data references
4625    and the dependence relations in LOOP.
4626 
4627    The data references are computed first.
4628 
4629    A relation on these nodes is represented by a complete graph.  Some
4630    of the relations could be of no interest, thus the relations can be
4631    computed on demand.
4632 
4633    In the following function we compute all the relations.  This is
4634    just a first implementation that is here for:
4635    - for showing how to ask for the dependence relations,
4636    - for the debugging the whole dependence graph,
4637    - for the dejagnu testcases and maintenance.
4638 
4639    It is possible to ask only for a part of the graph, avoiding to
4640    compute the whole dependence graph.  The computed dependences are
4641    stored in a knowledge base (KB) such that later queries don't
4642    recompute the same information.  The implementation of this KB is
4643    transparent to the optimizer, and thus the KB can be changed with a
4644    more efficient implementation, or the KB could be disabled.  */
4645 static void
analyze_all_data_dependences(struct loop * loop)4646 analyze_all_data_dependences (struct loop *loop)
4647 {
4648   unsigned int i;
4649   int nb_data_refs = 10;
4650   vec<data_reference_p> datarefs;
4651   datarefs.create (nb_data_refs);
4652   vec<ddr_p> dependence_relations;
4653   dependence_relations.create (nb_data_refs * nb_data_refs);
4654   vec<loop_p> loop_nest;
4655   loop_nest.create (3);
4656 
4657   /* Compute DDs on the whole function.  */
4658   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4659 				     &dependence_relations);
4660 
4661   if (dump_file)
4662     {
4663       dump_data_dependence_relations (dump_file, dependence_relations);
4664       fprintf (dump_file, "\n\n");
4665 
4666       if (dump_flags & TDF_DETAILS)
4667 	dump_dist_dir_vectors (dump_file, dependence_relations);
4668 
4669       if (dump_flags & TDF_STATS)
4670 	{
4671 	  unsigned nb_top_relations = 0;
4672 	  unsigned nb_bot_relations = 0;
4673 	  unsigned nb_chrec_relations = 0;
4674 	  struct data_dependence_relation *ddr;
4675 
4676 	  FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4677 	    {
4678 	      if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4679 		nb_top_relations++;
4680 
4681 	      else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4682 		nb_bot_relations++;
4683 
4684 	      else
4685 		nb_chrec_relations++;
4686 	    }
4687 
4688 	  gather_stats_on_scev_database ();
4689 	}
4690     }
4691 
4692   loop_nest.release ();
4693   free_dependence_relations (dependence_relations);
4694   free_data_refs (datarefs);
4695 }
4696 
4697 /* Computes all the data dependences and check that the results of
4698    several analyzers are the same.  */
4699 
4700 void
tree_check_data_deps(void)4701 tree_check_data_deps (void)
4702 {
4703   loop_iterator li;
4704   struct loop *loop_nest;
4705 
4706   FOR_EACH_LOOP (li, loop_nest, 0)
4707     analyze_all_data_dependences (loop_nest);
4708 }
4709 
4710 /* Free the memory used by a data dependence relation DDR.  */
4711 
4712 void
free_dependence_relation(struct data_dependence_relation * ddr)4713 free_dependence_relation (struct data_dependence_relation *ddr)
4714 {
4715   if (ddr == NULL)
4716     return;
4717 
4718   if (DDR_SUBSCRIPTS (ddr).exists ())
4719     free_subscripts (DDR_SUBSCRIPTS (ddr));
4720   DDR_DIST_VECTS (ddr).release ();
4721   DDR_DIR_VECTS (ddr).release ();
4722 
4723   free (ddr);
4724 }
4725 
4726 /* Free the memory used by the data dependence relations from
4727    DEPENDENCE_RELATIONS.  */
4728 
4729 void
free_dependence_relations(vec<ddr_p> dependence_relations)4730 free_dependence_relations (vec<ddr_p> dependence_relations)
4731 {
4732   unsigned int i;
4733   struct data_dependence_relation *ddr;
4734 
4735   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4736     if (ddr)
4737       free_dependence_relation (ddr);
4738 
4739   dependence_relations.release ();
4740 }
4741 
4742 /* Free the memory used by the data references from DATAREFS.  */
4743 
4744 void
free_data_refs(vec<data_reference_p> datarefs)4745 free_data_refs (vec<data_reference_p> datarefs)
4746 {
4747   unsigned int i;
4748   struct data_reference *dr;
4749 
4750   FOR_EACH_VEC_ELT (datarefs, i, dr)
4751     free_data_ref (dr);
4752   datarefs.release ();
4753 }
4754 
4755 
4756 
4757 /* Dump vertex I in RDG to FILE.  */
4758 
4759 static void
dump_rdg_vertex(FILE * file,struct graph * rdg,int i)4760 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4761 {
4762   struct vertex *v = &(rdg->vertices[i]);
4763   struct graph_edge *e;
4764 
4765   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4766 	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4767 	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4768 
4769   if (v->pred)
4770     for (e = v->pred; e; e = e->pred_next)
4771       fprintf (file, " %d", e->src);
4772 
4773   fprintf (file, ") (out:");
4774 
4775   if (v->succ)
4776     for (e = v->succ; e; e = e->succ_next)
4777       fprintf (file, " %d", e->dest);
4778 
4779   fprintf (file, ")\n");
4780   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4781   fprintf (file, ")\n");
4782 }
4783 
4784 /* Call dump_rdg_vertex on stderr.  */
4785 
4786 DEBUG_FUNCTION void
debug_rdg_vertex(struct graph * rdg,int i)4787 debug_rdg_vertex (struct graph *rdg, int i)
4788 {
4789   dump_rdg_vertex (stderr, rdg, i);
4790 }
4791 
4792 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4793    dumped vertices to that bitmap.  */
4794 
4795 static void
dump_rdg_component(FILE * file,struct graph * rdg,int c,bitmap dumped)4796 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4797 {
4798   int i;
4799 
4800   fprintf (file, "(%d\n", c);
4801 
4802   for (i = 0; i < rdg->n_vertices; i++)
4803     if (rdg->vertices[i].component == c)
4804       {
4805 	if (dumped)
4806 	  bitmap_set_bit (dumped, i);
4807 
4808 	dump_rdg_vertex (file, rdg, i);
4809       }
4810 
4811   fprintf (file, ")\n");
4812 }
4813 
4814 /* Call dump_rdg_vertex on stderr.  */
4815 
4816 DEBUG_FUNCTION void
debug_rdg_component(struct graph * rdg,int c)4817 debug_rdg_component (struct graph *rdg, int c)
4818 {
4819   dump_rdg_component (stderr, rdg, c, NULL);
4820 }
4821 
4822 /* Dump the reduced dependence graph RDG to FILE.  */
4823 
4824 void
dump_rdg(FILE * file,struct graph * rdg)4825 dump_rdg (FILE *file, struct graph *rdg)
4826 {
4827   int i;
4828   bitmap dumped = BITMAP_ALLOC (NULL);
4829 
4830   fprintf (file, "(rdg\n");
4831 
4832   for (i = 0; i < rdg->n_vertices; i++)
4833     if (!bitmap_bit_p (dumped, i))
4834       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4835 
4836   fprintf (file, ")\n");
4837   BITMAP_FREE (dumped);
4838 }
4839 
4840 /* Call dump_rdg on stderr.  */
4841 
4842 DEBUG_FUNCTION void
debug_rdg(struct graph * rdg)4843 debug_rdg (struct graph *rdg)
4844 {
4845   dump_rdg (stderr, rdg);
4846 }
4847 
4848 static void
dot_rdg_1(FILE * file,struct graph * rdg)4849 dot_rdg_1 (FILE *file, struct graph *rdg)
4850 {
4851   int i;
4852 
4853   fprintf (file, "digraph RDG {\n");
4854 
4855   for (i = 0; i < rdg->n_vertices; i++)
4856     {
4857       struct vertex *v = &(rdg->vertices[i]);
4858       struct graph_edge *e;
4859 
4860       /* Highlight reads from memory.  */
4861       if (RDG_MEM_READS_STMT (rdg, i))
4862        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4863 
4864       /* Highlight stores to memory.  */
4865       if (RDG_MEM_WRITE_STMT (rdg, i))
4866        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4867 
4868       if (v->succ)
4869        for (e = v->succ; e; e = e->succ_next)
4870          switch (RDGE_TYPE (e))
4871            {
4872            case input_dd:
4873              fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4874              break;
4875 
4876            case output_dd:
4877              fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4878              break;
4879 
4880            case flow_dd:
4881              /* These are the most common dependences: don't print these. */
4882              fprintf (file, "%d -> %d \n", i, e->dest);
4883              break;
4884 
4885            case anti_dd:
4886              fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4887              break;
4888 
4889            default:
4890              gcc_unreachable ();
4891            }
4892     }
4893 
4894   fprintf (file, "}\n\n");
4895 }
4896 
4897 /* Display the Reduced Dependence Graph using dotty.  */
4898 extern void dot_rdg (struct graph *);
4899 
4900 DEBUG_FUNCTION void
dot_rdg(struct graph * rdg)4901 dot_rdg (struct graph *rdg)
4902 {
4903   /* When debugging, enable the following code.  This cannot be used
4904      in production compilers because it calls "system".  */
4905 #if 0
4906   FILE *file = fopen ("/tmp/rdg.dot", "w");
4907   gcc_assert (file != NULL);
4908 
4909   dot_rdg_1 (file, rdg);
4910   fclose (file);
4911 
4912   system ("dotty /tmp/rdg.dot &");
4913 #else
4914   dot_rdg_1 (stderr, rdg);
4915 #endif
4916 }
4917 
4918 /* Returns the index of STMT in RDG.  */
4919 
4920 int
rdg_vertex_for_stmt(struct graph * rdg ATTRIBUTE_UNUSED,gimple stmt)4921 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
4922 {
4923   int index = gimple_uid (stmt);
4924   gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4925   return index;
4926 }
4927 
4928 /* Creates an edge in RDG for each distance vector from DDR.  The
4929    order that we keep track of in the RDG is the order in which
4930    statements have to be executed.  */
4931 
4932 static void
create_rdg_edge_for_ddr(struct graph * rdg,ddr_p ddr)4933 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4934 {
4935   struct graph_edge *e;
4936   int va, vb;
4937   data_reference_p dra = DDR_A (ddr);
4938   data_reference_p drb = DDR_B (ddr);
4939   unsigned level = ddr_dependence_level (ddr);
4940 
4941   /* For non scalar dependences, when the dependence is REVERSED,
4942      statement B has to be executed before statement A.  */
4943   if (level > 0
4944       && !DDR_REVERSED_P (ddr))
4945     {
4946       data_reference_p tmp = dra;
4947       dra = drb;
4948       drb = tmp;
4949     }
4950 
4951   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4952   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4953 
4954   if (va < 0 || vb < 0)
4955     return;
4956 
4957   e = add_edge (rdg, va, vb);
4958   e->data = XNEW (struct rdg_edge);
4959 
4960   RDGE_LEVEL (e) = level;
4961   RDGE_RELATION (e) = ddr;
4962 
4963   /* Determines the type of the data dependence.  */
4964   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4965     RDGE_TYPE (e) = input_dd;
4966   else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4967     RDGE_TYPE (e) = output_dd;
4968   else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4969     RDGE_TYPE (e) = flow_dd;
4970   else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4971     RDGE_TYPE (e) = anti_dd;
4972 }
4973 
4974 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4975    the index of DEF in RDG.  */
4976 
4977 static void
create_rdg_edges_for_scalar(struct graph * rdg,tree def,int idef)4978 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4979 {
4980   use_operand_p imm_use_p;
4981   imm_use_iterator iterator;
4982 
4983   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4984     {
4985       struct graph_edge *e;
4986       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4987 
4988       if (use < 0)
4989 	continue;
4990 
4991       e = add_edge (rdg, idef, use);
4992       e->data = XNEW (struct rdg_edge);
4993       RDGE_TYPE (e) = flow_dd;
4994       RDGE_RELATION (e) = NULL;
4995     }
4996 }
4997 
4998 /* Creates the edges of the reduced dependence graph RDG.  */
4999 
5000 static void
create_rdg_edges(struct graph * rdg,vec<ddr_p> ddrs)5001 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
5002 {
5003   int i;
5004   struct data_dependence_relation *ddr;
5005   def_operand_p def_p;
5006   ssa_op_iter iter;
5007 
5008   FOR_EACH_VEC_ELT (ddrs, i, ddr)
5009     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5010       create_rdg_edge_for_ddr (rdg, ddr);
5011 
5012   for (i = 0; i < rdg->n_vertices; i++)
5013     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5014 			      iter, SSA_OP_DEF)
5015       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5016 }
5017 
5018 /* Build the vertices of the reduced dependence graph RDG.  */
5019 
5020 static void
create_rdg_vertices(struct graph * rdg,vec<gimple> stmts,loop_p loop)5021 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop)
5022 {
5023   int i, j;
5024   gimple stmt;
5025 
5026   FOR_EACH_VEC_ELT (stmts, i, stmt)
5027     {
5028       vec<data_ref_loc> references;
5029       data_ref_loc *ref;
5030       struct vertex *v = &(rdg->vertices[i]);
5031 
5032       /* Record statement to vertex mapping.  */
5033       gimple_set_uid (stmt, i);
5034 
5035       v->data = XNEW (struct rdg_vertex);
5036       RDGV_STMT (v) = stmt;
5037       RDGV_DATAREFS (v).create (0);
5038       RDGV_HAS_MEM_WRITE (v) = false;
5039       RDGV_HAS_MEM_READS (v) = false;
5040       if (gimple_code (stmt) == GIMPLE_PHI)
5041 	continue;
5042 
5043       get_references_in_stmt (stmt, &references);
5044       FOR_EACH_VEC_ELT (references, j, ref)
5045 	{
5046 	  data_reference_p dr;
5047 	  if (!ref->is_read)
5048 	    RDGV_HAS_MEM_WRITE (v) = true;
5049 	  else
5050 	    RDGV_HAS_MEM_READS (v) = true;
5051 	  dr = create_data_ref (loop, loop_containing_stmt (stmt),
5052 				*ref->pos, stmt, ref->is_read);
5053 	  if (dr)
5054 	    RDGV_DATAREFS (v).safe_push (dr);
5055 	}
5056       references.release ();
5057     }
5058 }
5059 
5060 /* Initialize STMTS with all the statements of LOOP.  When
5061    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
5062    which we discover statements is important as
5063    generate_loops_for_partition is using the same traversal for
5064    identifying statements. */
5065 
5066 static void
stmts_from_loop(struct loop * loop,vec<gimple> * stmts)5067 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
5068 {
5069   unsigned int i;
5070   basic_block *bbs = get_loop_body_in_dom_order (loop);
5071 
5072   for (i = 0; i < loop->num_nodes; i++)
5073     {
5074       basic_block bb = bbs[i];
5075       gimple_stmt_iterator bsi;
5076       gimple stmt;
5077 
5078       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5079 	stmts->safe_push (gsi_stmt (bsi));
5080 
5081       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5082 	{
5083 	  stmt = gsi_stmt (bsi);
5084 	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5085 	    stmts->safe_push (stmt);
5086 	}
5087     }
5088 
5089   free (bbs);
5090 }
5091 
5092 /* Returns true when all the dependences are computable.  */
5093 
5094 static bool
known_dependences_p(vec<ddr_p> dependence_relations)5095 known_dependences_p (vec<ddr_p> dependence_relations)
5096 {
5097   ddr_p ddr;
5098   unsigned int i;
5099 
5100   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5101     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5102       return false;
5103 
5104   return true;
5105 }
5106 
5107 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5108    statement of the loop nest, and one edge per data dependence or
5109    scalar dependence.  */
5110 
5111 struct graph *
build_empty_rdg(int n_stmts)5112 build_empty_rdg (int n_stmts)
5113 {
5114   struct graph *rdg = new_graph (n_stmts);
5115   return rdg;
5116 }
5117 
5118 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5119    statement of the loop nest, and one edge per data dependence or
5120    scalar dependence.  */
5121 
5122 struct graph *
build_rdg(struct loop * loop,vec<loop_p> * loop_nest,vec<ddr_p> * dependence_relations,vec<data_reference_p> * datarefs)5123 build_rdg (struct loop *loop,
5124 	   vec<loop_p> *loop_nest,
5125 	   vec<ddr_p> *dependence_relations,
5126 	   vec<data_reference_p> *datarefs)
5127 {
5128   struct graph *rdg = NULL;
5129 
5130   if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5131 					 dependence_relations)
5132       && known_dependences_p (*dependence_relations))
5133     {
5134       vec<gimple> stmts;
5135       stmts.create (10);
5136       stmts_from_loop (loop, &stmts);
5137       rdg = build_empty_rdg (stmts.length ());
5138       create_rdg_vertices (rdg, stmts, loop);
5139       create_rdg_edges (rdg, *dependence_relations);
5140       stmts.release ();
5141     }
5142 
5143   return rdg;
5144 }
5145 
5146 /* Free the reduced dependence graph RDG.  */
5147 
5148 void
free_rdg(struct graph * rdg)5149 free_rdg (struct graph *rdg)
5150 {
5151   int i;
5152 
5153   for (i = 0; i < rdg->n_vertices; i++)
5154     {
5155       struct vertex *v = &(rdg->vertices[i]);
5156       struct graph_edge *e;
5157 
5158       for (e = v->succ; e; e = e->succ_next)
5159 	free (e->data);
5160 
5161       gimple_set_uid (RDGV_STMT (v), -1);
5162       free_data_refs (RDGV_DATAREFS (v));
5163       free (v->data);
5164     }
5165 
5166   free_graph (rdg);
5167 }
5168 
5169 /* Determines whether the statement from vertex V of the RDG has a
5170    definition used outside the loop that contains this statement.  */
5171 
5172 bool
rdg_defs_used_in_other_loops_p(struct graph * rdg,int v)5173 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5174 {
5175   gimple stmt = RDG_STMT (rdg, v);
5176   struct loop *loop = loop_containing_stmt (stmt);
5177   use_operand_p imm_use_p;
5178   imm_use_iterator iterator;
5179   ssa_op_iter it;
5180   def_operand_p def_p;
5181 
5182   if (!loop)
5183     return true;
5184 
5185   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5186     {
5187       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5188 	{
5189 	  if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5190 	    return true;
5191 	}
5192     }
5193 
5194   return false;
5195 }
5196