1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "params.h"
97 #include "builtins.h"
98 #include "stringpool.h"
99 #include "tree-vrp.h"
100 #include "tree-ssanames.h"
101 #include "tree-eh.h"
102 
103 static struct datadep_stats
104 {
105   int num_dependence_tests;
106   int num_dependence_dependent;
107   int num_dependence_independent;
108   int num_dependence_undetermined;
109 
110   int num_subscript_tests;
111   int num_subscript_undetermined;
112   int num_same_subscript_function;
113 
114   int num_ziv;
115   int num_ziv_independent;
116   int num_ziv_dependent;
117   int num_ziv_unimplemented;
118 
119   int num_siv;
120   int num_siv_independent;
121   int num_siv_dependent;
122   int num_siv_unimplemented;
123 
124   int num_miv;
125   int num_miv_independent;
126   int num_miv_dependent;
127   int num_miv_unimplemented;
128 } dependence_stats;
129 
130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
131 					   unsigned int, unsigned int,
132 					   struct loop *);
133 /* Returns true iff A divides B.  */
134 
135 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)136 tree_fold_divides_p (const_tree a, const_tree b)
137 {
138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
141 }
142 
143 /* Returns true iff A divides B.  */
144 
145 static inline bool
int_divides_p(int a,int b)146 int_divides_p (int a, int b)
147 {
148   return ((b % a) == 0);
149 }
150 
151 /* Return true if reference REF contains a union access.  */
152 
153 static bool
ref_contains_union_access_p(tree ref)154 ref_contains_union_access_p (tree ref)
155 {
156   while (handled_component_p (ref))
157     {
158       ref = TREE_OPERAND (ref, 0);
159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
160 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
161 	return true;
162     }
163   return false;
164 }
165 
166 
167 
168 /* Dump into FILE all the data references from DATAREFS.  */
169 
170 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
172 {
173   unsigned int i;
174   struct data_reference *dr;
175 
176   FOR_EACH_VEC_ELT (datarefs, i, dr)
177     dump_data_reference (file, dr);
178 }
179 
180 /* Unified dump into FILE all the data references from DATAREFS.  */
181 
182 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)183 debug (vec<data_reference_p> &ref)
184 {
185   dump_data_references (stderr, ref);
186 }
187 
188 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)189 debug (vec<data_reference_p> *ptr)
190 {
191   if (ptr)
192     debug (*ptr);
193   else
194     fprintf (stderr, "<nil>\n");
195 }
196 
197 
198 /* Dump into STDERR all the data references from DATAREFS.  */
199 
200 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)201 debug_data_references (vec<data_reference_p> datarefs)
202 {
203   dump_data_references (stderr, datarefs);
204 }
205 
206 /* Print to STDERR the data_reference DR.  */
207 
208 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)209 debug_data_reference (struct data_reference *dr)
210 {
211   dump_data_reference (stderr, dr);
212 }
213 
214 /* Dump function for a DATA_REFERENCE structure.  */
215 
216 void
dump_data_reference(FILE * outf,struct data_reference * dr)217 dump_data_reference (FILE *outf,
218 		     struct data_reference *dr)
219 {
220   unsigned int i;
221 
222   fprintf (outf, "#(Data Ref: \n");
223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224   fprintf (outf, "#  stmt: ");
225   print_gimple_stmt (outf, DR_STMT (dr), 0);
226   fprintf (outf, "#  ref: ");
227   print_generic_stmt (outf, DR_REF (dr));
228   fprintf (outf, "#  base_object: ");
229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 
231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232     {
233       fprintf (outf, "#  Access function %d: ", i);
234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235     }
236   fprintf (outf, "#)\n");
237 }
238 
239 /* Unified dump function for a DATA_REFERENCE structure.  */
240 
241 DEBUG_FUNCTION void
debug(data_reference & ref)242 debug (data_reference &ref)
243 {
244   dump_data_reference (stderr, &ref);
245 }
246 
247 DEBUG_FUNCTION void
debug(data_reference * ptr)248 debug (data_reference *ptr)
249 {
250   if (ptr)
251     debug (*ptr);
252   else
253     fprintf (stderr, "<nil>\n");
254 }
255 
256 
257 /* Dumps the affine function described by FN to the file OUTF.  */
258 
259 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)260 dump_affine_function (FILE *outf, affine_fn fn)
261 {
262   unsigned i;
263   tree coef;
264 
265   print_generic_expr (outf, fn[0], TDF_SLIM);
266   for (i = 1; fn.iterate (i, &coef); i++)
267     {
268       fprintf (outf, " + ");
269       print_generic_expr (outf, coef, TDF_SLIM);
270       fprintf (outf, " * x_%u", i);
271     }
272 }
273 
274 /* Dumps the conflict function CF to the file OUTF.  */
275 
276 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)277 dump_conflict_function (FILE *outf, conflict_function *cf)
278 {
279   unsigned i;
280 
281   if (cf->n == NO_DEPENDENCE)
282     fprintf (outf, "no dependence");
283   else if (cf->n == NOT_KNOWN)
284     fprintf (outf, "not known");
285   else
286     {
287       for (i = 0; i < cf->n; i++)
288 	{
289 	  if (i != 0)
290 	    fprintf (outf, " ");
291 	  fprintf (outf, "[");
292 	  dump_affine_function (outf, cf->fns[i]);
293 	  fprintf (outf, "]");
294 	}
295     }
296 }
297 
298 /* Dump function for a SUBSCRIPT structure.  */
299 
300 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)301 dump_subscript (FILE *outf, struct subscript *subscript)
302 {
303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 
305   fprintf (outf, "\n (subscript \n");
306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
307   dump_conflict_function (outf, cf);
308   if (CF_NONTRIVIAL_P (cf))
309     {
310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
311       fprintf (outf, "\n  last_conflict: ");
312       print_generic_expr (outf, last_iteration);
313     }
314 
315   cf = SUB_CONFLICTS_IN_B (subscript);
316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
317   dump_conflict_function (outf, cf);
318   if (CF_NONTRIVIAL_P (cf))
319     {
320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
321       fprintf (outf, "\n  last_conflict: ");
322       print_generic_expr (outf, last_iteration);
323     }
324 
325   fprintf (outf, "\n  (Subscript distance: ");
326   print_generic_expr (outf, SUB_DISTANCE (subscript));
327   fprintf (outf, " ))\n");
328 }
329 
330 /* Print the classic direction vector DIRV to OUTF.  */
331 
332 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)333 print_direction_vector (FILE *outf,
334 			lambda_vector dirv,
335 			int length)
336 {
337   int eq;
338 
339   for (eq = 0; eq < length; eq++)
340     {
341       enum data_dependence_direction dir = ((enum data_dependence_direction)
342 					    dirv[eq]);
343 
344       switch (dir)
345 	{
346 	case dir_positive:
347 	  fprintf (outf, "    +");
348 	  break;
349 	case dir_negative:
350 	  fprintf (outf, "    -");
351 	  break;
352 	case dir_equal:
353 	  fprintf (outf, "    =");
354 	  break;
355 	case dir_positive_or_equal:
356 	  fprintf (outf, "   +=");
357 	  break;
358 	case dir_positive_or_negative:
359 	  fprintf (outf, "   +-");
360 	  break;
361 	case dir_negative_or_equal:
362 	  fprintf (outf, "   -=");
363 	  break;
364 	case dir_star:
365 	  fprintf (outf, "    *");
366 	  break;
367 	default:
368 	  fprintf (outf, "indep");
369 	  break;
370 	}
371     }
372   fprintf (outf, "\n");
373 }
374 
375 /* Print a vector of direction vectors.  */
376 
377 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 		   int length)
380 {
381   unsigned j;
382   lambda_vector v;
383 
384   FOR_EACH_VEC_ELT (dir_vects, j, v)
385     print_direction_vector (outf, v, length);
386 }
387 
388 /* Print out a vector VEC of length N to OUTFILE.  */
389 
390 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
392 {
393   int i;
394 
395   for (i = 0; i < n; i++)
396     fprintf (outfile, "%3d ", vector[i]);
397   fprintf (outfile, "\n");
398 }
399 
400 /* Print a vector of distance vectors.  */
401 
402 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
404 		    int length)
405 {
406   unsigned j;
407   lambda_vector v;
408 
409   FOR_EACH_VEC_ELT (dist_vects, j, v)
410     print_lambda_vector (outf, v, length);
411 }
412 
413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
414 
415 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)416 dump_data_dependence_relation (FILE *outf,
417 			       struct data_dependence_relation *ddr)
418 {
419   struct data_reference *dra, *drb;
420 
421   fprintf (outf, "(Data Dep: \n");
422 
423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
424     {
425       if (ddr)
426 	{
427 	  dra = DDR_A (ddr);
428 	  drb = DDR_B (ddr);
429 	  if (dra)
430 	    dump_data_reference (outf, dra);
431 	  else
432 	    fprintf (outf, "    (nil)\n");
433 	  if (drb)
434 	    dump_data_reference (outf, drb);
435 	  else
436 	    fprintf (outf, "    (nil)\n");
437 	}
438       fprintf (outf, "    (don't know)\n)\n");
439       return;
440     }
441 
442   dra = DDR_A (ddr);
443   drb = DDR_B (ddr);
444   dump_data_reference (outf, dra);
445   dump_data_reference (outf, drb);
446 
447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
448     fprintf (outf, "    (no dependence)\n");
449 
450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
451     {
452       unsigned int i;
453       struct loop *loopi;
454 
455       subscript *sub;
456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
457 	{
458 	  fprintf (outf, "  access_fn_A: ");
459 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
460 	  fprintf (outf, "  access_fn_B: ");
461 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
462 	  dump_subscript (outf, sub);
463 	}
464 
465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
466       fprintf (outf, "  loop nest: (");
467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
468 	fprintf (outf, "%d ", loopi->num);
469       fprintf (outf, ")\n");
470 
471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
472 	{
473 	  fprintf (outf, "  distance_vector: ");
474 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
475 			       DDR_NB_LOOPS (ddr));
476 	}
477 
478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
479 	{
480 	  fprintf (outf, "  direction_vector: ");
481 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
482 				  DDR_NB_LOOPS (ddr));
483 	}
484     }
485 
486   fprintf (outf, ")\n");
487 }
488 
489 /* Debug version.  */
490 
491 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
493 {
494   dump_data_dependence_relation (stderr, ddr);
495 }
496 
497 /* Dump into FILE all the dependence relations from DDRS.  */
498 
499 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)500 dump_data_dependence_relations (FILE *file,
501 				vec<ddr_p> ddrs)
502 {
503   unsigned int i;
504   struct data_dependence_relation *ddr;
505 
506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
507     dump_data_dependence_relation (file, ddr);
508 }
509 
510 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)511 debug (vec<ddr_p> &ref)
512 {
513   dump_data_dependence_relations (stderr, ref);
514 }
515 
516 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)517 debug (vec<ddr_p> *ptr)
518 {
519   if (ptr)
520     debug (*ptr);
521   else
522     fprintf (stderr, "<nil>\n");
523 }
524 
525 
526 /* Dump to STDERR all the dependence relations from DDRS.  */
527 
528 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)529 debug_data_dependence_relations (vec<ddr_p> ddrs)
530 {
531   dump_data_dependence_relations (stderr, ddrs);
532 }
533 
534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
535    the dependence relations, and VECT_SIZE is the size of the
536    dependence vectors, or in other words the number of loops in the
537    considered nest.  */
538 
539 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
541 {
542   unsigned int i, j;
543   struct data_dependence_relation *ddr;
544   lambda_vector v;
545 
546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
548       {
549 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
550 	  {
551 	    fprintf (file, "DISTANCE_V (");
552 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
553 	    fprintf (file, ")\n");
554 	  }
555 
556 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
557 	  {
558 	    fprintf (file, "DIRECTION_V (");
559 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
560 	    fprintf (file, ")\n");
561 	  }
562       }
563 
564   fprintf (file, "\n\n");
565 }
566 
567 /* Dumps the data dependence relations DDRS in FILE.  */
568 
569 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
571 {
572   unsigned int i;
573   struct data_dependence_relation *ddr;
574 
575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
576     dump_data_dependence_relation (file, ddr);
577 
578   fprintf (file, "\n\n");
579 }
580 
581 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)582 debug_ddrs (vec<ddr_p> ddrs)
583 {
584   dump_ddrs (stderr, ddrs);
585 }
586 
587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
589    constant of type ssizetype, and returns true.  If we cannot do this
590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
591    is returned.  */
592 
593 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off)594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
595 			 tree *var, tree *off)
596 {
597   tree var0, var1;
598   tree off0, off1;
599   enum tree_code ocode = code;
600 
601   *var = NULL_TREE;
602   *off = NULL_TREE;
603 
604   switch (code)
605     {
606     case INTEGER_CST:
607       *var = build_int_cst (type, 0);
608       *off = fold_convert (ssizetype, op0);
609       return true;
610 
611     case POINTER_PLUS_EXPR:
612       ocode = PLUS_EXPR;
613       /* FALLTHROUGH */
614     case PLUS_EXPR:
615     case MINUS_EXPR:
616       split_constant_offset (op0, &var0, &off0);
617       split_constant_offset (op1, &var1, &off1);
618       *var = fold_build2 (code, type, var0, var1);
619       *off = size_binop (ocode, off0, off1);
620       return true;
621 
622     case MULT_EXPR:
623       if (TREE_CODE (op1) != INTEGER_CST)
624 	return false;
625 
626       split_constant_offset (op0, &var0, &off0);
627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
629       return true;
630 
631     case ADDR_EXPR:
632       {
633 	tree base, poffset;
634 	poly_int64 pbitsize, pbitpos, pbytepos;
635 	machine_mode pmode;
636 	int punsignedp, preversep, pvolatilep;
637 
638 	op0 = TREE_OPERAND (op0, 0);
639 	base
640 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
641 				 &punsignedp, &preversep, &pvolatilep);
642 
643 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
644 	  return false;
645 	base = build_fold_addr_expr (base);
646 	off0 = ssize_int (pbytepos);
647 
648 	if (poffset)
649 	  {
650 	    split_constant_offset (poffset, &poffset, &off1);
651 	    off0 = size_binop (PLUS_EXPR, off0, off1);
652 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
653 	      base = fold_build_pointer_plus (base, poffset);
654 	    else
655 	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
656 				  fold_convert (TREE_TYPE (base), poffset));
657 	  }
658 
659 	var0 = fold_convert (type, base);
660 
661 	/* If variable length types are involved, punt, otherwise casts
662 	   might be converted into ARRAY_REFs in gimplify_conversion.
663 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
664 	   possibly no longer appears in current GIMPLE, might resurface.
665 	   This perhaps could run
666 	   if (CONVERT_EXPR_P (var0))
667 	     {
668 	       gimplify_conversion (&var0);
669 	       // Attempt to fill in any within var0 found ARRAY_REF's
670 	       // element size from corresponding op embedded ARRAY_REF,
671 	       // if unsuccessful, just punt.
672 	     }  */
673 	while (POINTER_TYPE_P (type))
674 	  type = TREE_TYPE (type);
675 	if (int_size_in_bytes (type) < 0)
676 	  return false;
677 
678 	*var = var0;
679 	*off = off0;
680 	return true;
681       }
682 
683     case SSA_NAME:
684       {
685 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
686 	  return false;
687 
688 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
689 	enum tree_code subcode;
690 
691 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
692 	  return false;
693 
694 	var0 = gimple_assign_rhs1 (def_stmt);
695 	subcode = gimple_assign_rhs_code (def_stmt);
696 	var1 = gimple_assign_rhs2 (def_stmt);
697 
698 	return split_constant_offset_1 (type, var0, subcode, var1, var, off);
699       }
700     CASE_CONVERT:
701       {
702 	/* We must not introduce undefined overflow, and we must not change the value.
703 	   Hence we're okay if the inner type doesn't overflow to start with
704 	   (pointer or signed), the outer type also is an integer or pointer
705 	   and the outer precision is at least as large as the inner.  */
706 	tree itype = TREE_TYPE (op0);
707 	if ((POINTER_TYPE_P (itype)
708 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
709 	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
710 	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
711 	  {
712 	    if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
713 	      {
714 		/* Split the unconverted operand and try to prove that
715 		   wrapping isn't a problem.  */
716 		tree tmp_var, tmp_off;
717 		split_constant_offset (op0, &tmp_var, &tmp_off);
718 
719 		/* See whether we have an SSA_NAME whose range is known
720 		   to be [A, B].  */
721 		if (TREE_CODE (tmp_var) != SSA_NAME)
722 		  return false;
723 		wide_int var_min, var_max;
724 		value_range_type vr_type = get_range_info (tmp_var, &var_min,
725 							   &var_max);
726 		wide_int var_nonzero = get_nonzero_bits (tmp_var);
727 		signop sgn = TYPE_SIGN (itype);
728 		if (intersect_range_with_nonzero_bits (vr_type, &var_min,
729 						       &var_max, var_nonzero,
730 						       sgn) != VR_RANGE)
731 		  return false;
732 
733 		/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
734 		   is known to be [A + TMP_OFF, B + TMP_OFF], with all
735 		   operations done in ITYPE.  The addition must overflow
736 		   at both ends of the range or at neither.  */
737 		bool overflow[2];
738 		unsigned int prec = TYPE_PRECISION (itype);
739 		wide_int woff = wi::to_wide (tmp_off, prec);
740 		wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
741 		wi::add (var_max, woff, sgn, &overflow[1]);
742 		if (overflow[0] != overflow[1])
743 		  return false;
744 
745 		/* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
746 		widest_int diff = (widest_int::from (op0_min, sgn)
747 				   - widest_int::from (var_min, sgn));
748 		var0 = tmp_var;
749 		*off = wide_int_to_tree (ssizetype, diff);
750 	      }
751 	    else
752 	      split_constant_offset (op0, &var0, off);
753 	    *var = fold_convert (type, var0);
754 	    return true;
755 	  }
756 	return false;
757       }
758 
759     default:
760       return false;
761     }
762 }
763 
764 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
765    will be ssizetype.  */
766 
767 void
split_constant_offset(tree exp,tree * var,tree * off)768 split_constant_offset (tree exp, tree *var, tree *off)
769 {
770   tree type = TREE_TYPE (exp), op0, op1, e, o;
771   enum tree_code code;
772 
773   *var = exp;
774   *off = ssize_int (0);
775 
776   if (tree_is_chrec (exp)
777       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
778     return;
779 
780   code = TREE_CODE (exp);
781   extract_ops_from_tree (exp, &code, &op0, &op1);
782   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
783     {
784       *var = e;
785       *off = o;
786     }
787 }
788 
789 /* Returns the address ADDR of an object in a canonical shape (without nop
790    casts, and with type of pointer to the object).  */
791 
792 static tree
canonicalize_base_object_address(tree addr)793 canonicalize_base_object_address (tree addr)
794 {
795   tree orig = addr;
796 
797   STRIP_NOPS (addr);
798 
799   /* The base address may be obtained by casting from integer, in that case
800      keep the cast.  */
801   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
802     return orig;
803 
804   if (TREE_CODE (addr) != ADDR_EXPR)
805     return addr;
806 
807   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
808 }
809 
810 /* Analyze the behavior of memory reference REF.  There are two modes:
811 
812    - BB analysis.  In this case we simply split the address into base,
813      init and offset components, without reference to any containing loop.
814      The resulting base and offset are general expressions and they can
815      vary arbitrarily from one iteration of the containing loop to the next.
816      The step is always zero.
817 
818    - loop analysis.  In this case we analyze the reference both wrt LOOP
819      and on the basis that the reference occurs (is "used") in LOOP;
820      see the comment above analyze_scalar_evolution_in_loop for more
821      information about this distinction.  The base, init, offset and
822      step fields are all invariant in LOOP.
823 
824    Perform BB analysis if LOOP is null, or if LOOP is the function's
825    dummy outermost loop.  In other cases perform loop analysis.
826 
827    Return true if the analysis succeeded and store the results in DRB if so.
828    BB analysis can only fail for bitfield or reversed-storage accesses.  */
829 
830 bool
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,struct loop * loop)831 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
832 		      struct loop *loop)
833 {
834   poly_int64 pbitsize, pbitpos;
835   tree base, poffset;
836   machine_mode pmode;
837   int punsignedp, preversep, pvolatilep;
838   affine_iv base_iv, offset_iv;
839   tree init, dinit, step;
840   bool in_loop = (loop && loop->num);
841 
842   if (dump_file && (dump_flags & TDF_DETAILS))
843     fprintf (dump_file, "analyze_innermost: ");
844 
845   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
846 			      &punsignedp, &preversep, &pvolatilep);
847   gcc_assert (base != NULL_TREE);
848 
849   poly_int64 pbytepos;
850   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
851     {
852       if (dump_file && (dump_flags & TDF_DETAILS))
853 	fprintf (dump_file, "failed: bit offset alignment.\n");
854       return false;
855     }
856 
857   if (preversep)
858     {
859       if (dump_file && (dump_flags & TDF_DETAILS))
860 	fprintf (dump_file, "failed: reverse storage order.\n");
861       return false;
862     }
863 
864   /* Calculate the alignment and misalignment for the inner reference.  */
865   unsigned int HOST_WIDE_INT bit_base_misalignment;
866   unsigned int bit_base_alignment;
867   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
868 
869   /* There are no bitfield references remaining in BASE, so the values
870      we got back must be whole bytes.  */
871   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
872 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
873   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
874   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
875 
876   if (TREE_CODE (base) == MEM_REF)
877     {
878       if (!integer_zerop (TREE_OPERAND (base, 1)))
879 	{
880 	  /* Subtract MOFF from the base and add it to POFFSET instead.
881 	     Adjust the misalignment to reflect the amount we subtracted.  */
882 	  poly_offset_int moff = mem_ref_offset (base);
883 	  base_misalignment -= moff.force_shwi ();
884 	  tree mofft = wide_int_to_tree (sizetype, moff);
885 	  if (!poffset)
886 	    poffset = mofft;
887 	  else
888 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
889 	}
890       base = TREE_OPERAND (base, 0);
891     }
892   else
893     base = build_fold_addr_expr (base);
894 
895   if (in_loop)
896     {
897       if (!simple_iv (loop, loop, base, &base_iv, true))
898         {
899 	  if (dump_file && (dump_flags & TDF_DETAILS))
900 	    fprintf (dump_file, "failed: evolution of base is not affine.\n");
901 	  return false;
902         }
903     }
904   else
905     {
906       base_iv.base = base;
907       base_iv.step = ssize_int (0);
908       base_iv.no_overflow = true;
909     }
910 
911   if (!poffset)
912     {
913       offset_iv.base = ssize_int (0);
914       offset_iv.step = ssize_int (0);
915     }
916   else
917     {
918       if (!in_loop)
919         {
920           offset_iv.base = poffset;
921           offset_iv.step = ssize_int (0);
922         }
923       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
924         {
925 	  if (dump_file && (dump_flags & TDF_DETAILS))
926 	    fprintf (dump_file, "failed: evolution of offset is not affine.\n");
927 	  return false;
928         }
929     }
930 
931   init = ssize_int (pbytepos);
932 
933   /* Subtract any constant component from the base and add it to INIT instead.
934      Adjust the misalignment to reflect the amount we subtracted.  */
935   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
936   init = size_binop (PLUS_EXPR, init, dinit);
937   base_misalignment -= TREE_INT_CST_LOW (dinit);
938 
939   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
940   init = size_binop (PLUS_EXPR, init, dinit);
941 
942   step = size_binop (PLUS_EXPR,
943 		     fold_convert (ssizetype, base_iv.step),
944 		     fold_convert (ssizetype, offset_iv.step));
945 
946   base = canonicalize_base_object_address (base_iv.base);
947 
948   /* See if get_pointer_alignment can guarantee a higher alignment than
949      the one we calculated above.  */
950   unsigned int HOST_WIDE_INT alt_misalignment;
951   unsigned int alt_alignment;
952   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
953 
954   /* As above, these values must be whole bytes.  */
955   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
956 	      && alt_misalignment % BITS_PER_UNIT == 0);
957   alt_alignment /= BITS_PER_UNIT;
958   alt_misalignment /= BITS_PER_UNIT;
959 
960   if (base_alignment < alt_alignment)
961     {
962       base_alignment = alt_alignment;
963       base_misalignment = alt_misalignment;
964     }
965 
966   drb->base_address = base;
967   drb->offset = fold_convert (ssizetype, offset_iv.base);
968   drb->init = init;
969   drb->step = step;
970   if (known_misalignment (base_misalignment, base_alignment,
971 			  &drb->base_misalignment))
972     drb->base_alignment = base_alignment;
973   else
974     {
975       drb->base_alignment = known_alignment (base_misalignment);
976       drb->base_misalignment = 0;
977     }
978   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
979   drb->step_alignment = highest_pow2_factor (step);
980 
981   if (dump_file && (dump_flags & TDF_DETAILS))
982     fprintf (dump_file, "success.\n");
983 
984   return true;
985 }
986 
987 /* Return true if OP is a valid component reference for a DR access
988    function.  This accepts a subset of what handled_component_p accepts.  */
989 
990 static bool
access_fn_component_p(tree op)991 access_fn_component_p (tree op)
992 {
993   switch (TREE_CODE (op))
994     {
995     case REALPART_EXPR:
996     case IMAGPART_EXPR:
997     case ARRAY_REF:
998       return true;
999 
1000     case COMPONENT_REF:
1001       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1002 
1003     default:
1004       return false;
1005     }
1006 }
1007 
1008 /* Determines the base object and the list of indices of memory reference
1009    DR, analyzed in LOOP and instantiated before NEST.  */
1010 
1011 static void
dr_analyze_indices(struct data_reference * dr,edge nest,loop_p loop)1012 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1013 {
1014   vec<tree> access_fns = vNULL;
1015   tree ref, op;
1016   tree base, off, access_fn;
1017 
1018   /* If analyzing a basic-block there are no indices to analyze
1019      and thus no access functions.  */
1020   if (!nest)
1021     {
1022       DR_BASE_OBJECT (dr) = DR_REF (dr);
1023       DR_ACCESS_FNS (dr).create (0);
1024       return;
1025     }
1026 
1027   ref = DR_REF (dr);
1028 
1029   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1030      into a two element array with a constant index.  The base is
1031      then just the immediate underlying object.  */
1032   if (TREE_CODE (ref) == REALPART_EXPR)
1033     {
1034       ref = TREE_OPERAND (ref, 0);
1035       access_fns.safe_push (integer_zero_node);
1036     }
1037   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1038     {
1039       ref = TREE_OPERAND (ref, 0);
1040       access_fns.safe_push (integer_one_node);
1041     }
1042 
1043   /* Analyze access functions of dimensions we know to be independent.
1044      The list of component references handled here should be kept in
1045      sync with access_fn_component_p.  */
1046   while (handled_component_p (ref))
1047     {
1048       if (TREE_CODE (ref) == ARRAY_REF)
1049 	{
1050 	  op = TREE_OPERAND (ref, 1);
1051 	  access_fn = analyze_scalar_evolution (loop, op);
1052 	  access_fn = instantiate_scev (nest, loop, access_fn);
1053 	  access_fns.safe_push (access_fn);
1054 	}
1055       else if (TREE_CODE (ref) == COMPONENT_REF
1056 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1057 	{
1058 	  /* For COMPONENT_REFs of records (but not unions!) use the
1059 	     FIELD_DECL offset as constant access function so we can
1060 	     disambiguate a[i].f1 and a[i].f2.  */
1061 	  tree off = component_ref_field_offset (ref);
1062 	  off = size_binop (PLUS_EXPR,
1063 			    size_binop (MULT_EXPR,
1064 					fold_convert (bitsizetype, off),
1065 					bitsize_int (BITS_PER_UNIT)),
1066 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1067 	  access_fns.safe_push (off);
1068 	}
1069       else
1070 	/* If we have an unhandled component we could not translate
1071 	   to an access function stop analyzing.  We have determined
1072 	   our base object in this case.  */
1073 	break;
1074 
1075       ref = TREE_OPERAND (ref, 0);
1076     }
1077 
1078   /* If the address operand of a MEM_REF base has an evolution in the
1079      analyzed nest, add it as an additional independent access-function.  */
1080   if (TREE_CODE (ref) == MEM_REF)
1081     {
1082       op = TREE_OPERAND (ref, 0);
1083       access_fn = analyze_scalar_evolution (loop, op);
1084       access_fn = instantiate_scev (nest, loop, access_fn);
1085       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1086 	{
1087 	  tree orig_type;
1088 	  tree memoff = TREE_OPERAND (ref, 1);
1089 	  base = initial_condition (access_fn);
1090 	  orig_type = TREE_TYPE (base);
1091 	  STRIP_USELESS_TYPE_CONVERSION (base);
1092 	  split_constant_offset (base, &base, &off);
1093 	  STRIP_USELESS_TYPE_CONVERSION (base);
1094 	  /* Fold the MEM_REF offset into the evolutions initial
1095 	     value to make more bases comparable.  */
1096 	  if (!integer_zerop (memoff))
1097 	    {
1098 	      off = size_binop (PLUS_EXPR, off,
1099 				fold_convert (ssizetype, memoff));
1100 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1101 	    }
1102 	  /* Adjust the offset so it is a multiple of the access type
1103 	     size and thus we separate bases that can possibly be used
1104 	     to produce partial overlaps (which the access_fn machinery
1105 	     cannot handle).  */
1106 	  wide_int rem;
1107 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1108 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1109 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1110 	    rem = wi::mod_trunc
1111 	      (wi::to_wide (off),
1112 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1113 	       SIGNED);
1114 	  else
1115 	    /* If we can't compute the remainder simply force the initial
1116 	       condition to zero.  */
1117 	    rem = wi::to_wide (off);
1118 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1119 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1120 	  /* And finally replace the initial condition.  */
1121 	  access_fn = chrec_replace_initial_condition
1122 	      (access_fn, fold_convert (orig_type, off));
1123 	  /* ???  This is still not a suitable base object for
1124 	     dr_may_alias_p - the base object needs to be an
1125 	     access that covers the object as whole.  With
1126 	     an evolution in the pointer this cannot be
1127 	     guaranteed.
1128 	     As a band-aid, mark the access so we can special-case
1129 	     it in dr_may_alias_p.  */
1130 	  tree old = ref;
1131 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1132 				 MEM_REF, TREE_TYPE (ref),
1133 				 base, memoff);
1134 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1135 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1136 	  DR_UNCONSTRAINED_BASE (dr) = true;
1137 	  access_fns.safe_push (access_fn);
1138 	}
1139     }
1140   else if (DECL_P (ref))
1141     {
1142       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1143       ref = build2 (MEM_REF, TREE_TYPE (ref),
1144 		    build_fold_addr_expr (ref),
1145 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1146     }
1147 
1148   DR_BASE_OBJECT (dr) = ref;
1149   DR_ACCESS_FNS (dr) = access_fns;
1150 }
1151 
1152 /* Extracts the alias analysis information from the memory reference DR.  */
1153 
1154 static void
dr_analyze_alias(struct data_reference * dr)1155 dr_analyze_alias (struct data_reference *dr)
1156 {
1157   tree ref = DR_REF (dr);
1158   tree base = get_base_address (ref), addr;
1159 
1160   if (INDIRECT_REF_P (base)
1161       || TREE_CODE (base) == MEM_REF)
1162     {
1163       addr = TREE_OPERAND (base, 0);
1164       if (TREE_CODE (addr) == SSA_NAME)
1165 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1166     }
1167 }
1168 
1169 /* Frees data reference DR.  */
1170 
1171 void
free_data_ref(data_reference_p dr)1172 free_data_ref (data_reference_p dr)
1173 {
1174   DR_ACCESS_FNS (dr).release ();
1175   free (dr);
1176 }
1177 
1178 /* Analyze memory reference MEMREF, which is accessed in STMT.
1179    The reference is a read if IS_READ is true, otherwise it is a write.
1180    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1181    within STMT, i.e. that it might not occur even if STMT is executed
1182    and runs to completion.
1183 
1184    Return the data_reference description of MEMREF.  NEST is the outermost
1185    loop in which the reference should be instantiated, LOOP is the loop
1186    in which the data reference should be analyzed.  */
1187 
1188 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1189 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1190 		 bool is_read, bool is_conditional_in_stmt)
1191 {
1192   struct data_reference *dr;
1193 
1194   if (dump_file && (dump_flags & TDF_DETAILS))
1195     {
1196       fprintf (dump_file, "Creating dr for ");
1197       print_generic_expr (dump_file, memref, TDF_SLIM);
1198       fprintf (dump_file, "\n");
1199     }
1200 
1201   dr = XCNEW (struct data_reference);
1202   DR_STMT (dr) = stmt;
1203   DR_REF (dr) = memref;
1204   DR_IS_READ (dr) = is_read;
1205   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1206 
1207   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1208 			nest != NULL ? loop : NULL);
1209   dr_analyze_indices (dr, nest, loop);
1210   dr_analyze_alias (dr);
1211 
1212   if (dump_file && (dump_flags & TDF_DETAILS))
1213     {
1214       unsigned i;
1215       fprintf (dump_file, "\tbase_address: ");
1216       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1217       fprintf (dump_file, "\n\toffset from base address: ");
1218       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1219       fprintf (dump_file, "\n\tconstant offset from base address: ");
1220       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1221       fprintf (dump_file, "\n\tstep: ");
1222       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1223       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1224       fprintf (dump_file, "\n\tbase misalignment: %d",
1225 	       DR_BASE_MISALIGNMENT (dr));
1226       fprintf (dump_file, "\n\toffset alignment: %d",
1227 	       DR_OFFSET_ALIGNMENT (dr));
1228       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1229       fprintf (dump_file, "\n\tbase_object: ");
1230       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1231       fprintf (dump_file, "\n");
1232       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1233 	{
1234 	  fprintf (dump_file, "\tAccess function %d: ", i);
1235 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1236 	}
1237     }
1238 
1239   return dr;
1240 }
1241 
1242 /*  A helper function computes order between two tree epxressions T1 and T2.
1243     This is used in comparator functions sorting objects based on the order
1244     of tree expressions.  The function returns -1, 0, or 1.  */
1245 
1246 int
data_ref_compare_tree(tree t1,tree t2)1247 data_ref_compare_tree (tree t1, tree t2)
1248 {
1249   int i, cmp;
1250   enum tree_code code;
1251   char tclass;
1252 
1253   if (t1 == t2)
1254     return 0;
1255   if (t1 == NULL)
1256     return -1;
1257   if (t2 == NULL)
1258     return 1;
1259 
1260   STRIP_USELESS_TYPE_CONVERSION (t1);
1261   STRIP_USELESS_TYPE_CONVERSION (t2);
1262   if (t1 == t2)
1263     return 0;
1264 
1265   if (TREE_CODE (t1) != TREE_CODE (t2)
1266       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1267     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1268 
1269   code = TREE_CODE (t1);
1270   switch (code)
1271     {
1272     case INTEGER_CST:
1273       return tree_int_cst_compare (t1, t2);
1274 
1275     case STRING_CST:
1276       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1277 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1278       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1279 		     TREE_STRING_LENGTH (t1));
1280 
1281     case SSA_NAME:
1282       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1283 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1284       break;
1285 
1286     default:
1287       if (POLY_INT_CST_P (t1))
1288 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1289 				       wi::to_poly_widest (t2));
1290 
1291       tclass = TREE_CODE_CLASS (code);
1292 
1293       /* For decls, compare their UIDs.  */
1294       if (tclass == tcc_declaration)
1295 	{
1296 	  if (DECL_UID (t1) != DECL_UID (t2))
1297 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1298 	  break;
1299 	}
1300       /* For expressions, compare their operands recursively.  */
1301       else if (IS_EXPR_CODE_CLASS (tclass))
1302 	{
1303 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1304 	    {
1305 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1306 					   TREE_OPERAND (t2, i));
1307 	      if (cmp != 0)
1308 		return cmp;
1309 	    }
1310 	}
1311       else
1312 	gcc_unreachable ();
1313     }
1314 
1315   return 0;
1316 }
1317 
1318 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1319    check.  */
1320 
1321 bool
runtime_alias_check_p(ddr_p ddr,struct loop * loop,bool speed_p)1322 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1323 {
1324   if (dump_enabled_p ())
1325     {
1326       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1327       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1328       dump_printf (MSG_NOTE,  " and ");
1329       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1330       dump_printf (MSG_NOTE, "\n");
1331     }
1332 
1333   if (!speed_p)
1334     {
1335       if (dump_enabled_p ())
1336 	dump_printf (MSG_MISSED_OPTIMIZATION,
1337 		     "runtime alias check not supported when optimizing "
1338 		     "for size.\n");
1339       return false;
1340     }
1341 
1342   /* FORNOW: We don't support versioning with outer-loop in either
1343      vectorization or loop distribution.  */
1344   if (loop != NULL && loop->inner != NULL)
1345     {
1346       if (dump_enabled_p ())
1347 	dump_printf (MSG_MISSED_OPTIMIZATION,
1348 		     "runtime alias check not supported for outer loop.\n");
1349       return false;
1350     }
1351 
1352   return true;
1353 }
1354 
1355 /* Operator == between two dr_with_seg_len objects.
1356 
1357    This equality operator is used to make sure two data refs
1358    are the same one so that we will consider to combine the
1359    aliasing checks of those two pairs of data dependent data
1360    refs.  */
1361 
1362 static bool
1363 operator == (const dr_with_seg_len& d1,
1364 	     const dr_with_seg_len& d2)
1365 {
1366   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1367 			   DR_BASE_ADDRESS (d2.dr), 0)
1368 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1369 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1370 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1371 	  && known_eq (d1.access_size, d2.access_size)
1372 	  && d1.align == d2.align);
1373 }
1374 
1375 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1376    so that we can combine aliasing checks in one scan.  */
1377 
1378 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1379 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1380 {
1381   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1382   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1383   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1384   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1385 
1386   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1387      if a and c have the same basic address snd step, and b and d have the same
1388      address and step.  Therefore, if any a&c or b&d don't have the same address
1389      and step, we don't care the order of those two pairs after sorting.  */
1390   int comp_res;
1391 
1392   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1393 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1394     return comp_res;
1395   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1396 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1397     return comp_res;
1398   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1399 					 DR_STEP (b1.dr))) != 0)
1400     return comp_res;
1401   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1402 					 DR_STEP (b2.dr))) != 0)
1403     return comp_res;
1404   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1405 					 DR_OFFSET (b1.dr))) != 0)
1406     return comp_res;
1407   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1408 					 DR_INIT (b1.dr))) != 0)
1409     return comp_res;
1410   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1411 					 DR_OFFSET (b2.dr))) != 0)
1412     return comp_res;
1413   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1414 					 DR_INIT (b2.dr))) != 0)
1415     return comp_res;
1416 
1417   return 0;
1418 }
1419 
1420 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1421    FACTOR is number of iterations that each data reference is accessed.
1422 
1423    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1424    we create an expression:
1425 
1426    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1427    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1428 
1429    for aliasing checks.  However, in some cases we can decrease the number
1430    of checks by combining two checks into one.  For example, suppose we have
1431    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1432    condition is satisfied:
1433 
1434    load_ptr_0 < load_ptr_1  &&
1435    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1436 
1437    (this condition means, in each iteration of vectorized loop, the accessed
1438    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1439    load_ptr_1.)
1440 
1441    we then can use only the following expression to finish the alising checks
1442    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1443 
1444    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1445    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1446 
1447    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1448    basic address.  */
1449 
1450 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1451 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1452 			       poly_uint64)
1453 {
1454   /* Sort the collected data ref pairs so that we can scan them once to
1455      combine all possible aliasing checks.  */
1456   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1457 
1458   /* Scan the sorted dr pairs and check if we can combine alias checks
1459      of two neighboring dr pairs.  */
1460   for (size_t i = 1; i < alias_pairs->length (); ++i)
1461     {
1462       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1463       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1464 		      *dr_b1 = &(*alias_pairs)[i-1].second,
1465 		      *dr_a2 = &(*alias_pairs)[i].first,
1466 		      *dr_b2 = &(*alias_pairs)[i].second;
1467 
1468       /* Remove duplicate data ref pairs.  */
1469       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1470 	{
1471 	  if (dump_enabled_p ())
1472 	    {
1473 	      dump_printf (MSG_NOTE, "found equal ranges ");
1474 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1475 	      dump_printf (MSG_NOTE,  ", ");
1476 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1477 	      dump_printf (MSG_NOTE,  " and ");
1478 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1479 	      dump_printf (MSG_NOTE,  ", ");
1480 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1481 	      dump_printf (MSG_NOTE, "\n");
1482 	    }
1483 	  alias_pairs->ordered_remove (i--);
1484 	  continue;
1485 	}
1486 
1487       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1488 	{
1489 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1490 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1491 	  if (*dr_a1 == *dr_a2)
1492 	    {
1493 	      std::swap (dr_a1, dr_b1);
1494 	      std::swap (dr_a2, dr_b2);
1495 	    }
1496 
1497 	  poly_int64 init_a1, init_a2;
1498 	  /* Only consider cases in which the distance between the initial
1499 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1500 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1501 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1502 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1503 				   DR_OFFSET (dr_a2->dr), 0)
1504 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1505 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1506 	    continue;
1507 
1508 	  /* Don't combine if we can't tell which one comes first.  */
1509 	  if (!ordered_p (init_a1, init_a2))
1510 	    continue;
1511 
1512 	  /* Make sure dr_a1 starts left of dr_a2.  */
1513 	  if (maybe_gt (init_a1, init_a2))
1514 	    {
1515 	      std::swap (*dr_a1, *dr_a2);
1516 	      std::swap (init_a1, init_a2);
1517 	    }
1518 
1519 	  /* Work out what the segment length would be if we did combine
1520 	     DR_A1 and DR_A2:
1521 
1522 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1523 	       also the combined length.
1524 
1525 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1526 	       length is the lower bound on those lengths.
1527 
1528 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1529 	       length is the upper bound on those lengths.
1530 
1531 	     Other cases are unlikely to give a useful combination.
1532 
1533 	     The lengths both have sizetype, so the sign is taken from
1534 	     the step instead.  */
1535 	  if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1536 	    {
1537 	      poly_uint64 seg_len_a1, seg_len_a2;
1538 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1539 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1540 		continue;
1541 
1542 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1543 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1544 		continue;
1545 
1546 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1547 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1548 		continue;
1549 
1550 	      int sign_a = tree_int_cst_sgn (indicator_a);
1551 	      int sign_b = tree_int_cst_sgn (indicator_b);
1552 
1553 	      poly_uint64 new_seg_len;
1554 	      if (sign_a <= 0 && sign_b <= 0)
1555 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1556 	      else if (sign_a >= 0 && sign_b >= 0)
1557 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1558 	      else
1559 		continue;
1560 
1561 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1562 					      new_seg_len);
1563 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1564 	    }
1565 
1566 	  /* This is always positive due to the swap above.  */
1567 	  poly_uint64 diff = init_a2 - init_a1;
1568 
1569 	  /* The new check will start at DR_A1.  Make sure that its access
1570 	     size encompasses the initial DR_A2.  */
1571 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1572 	    {
1573 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1574 						diff + dr_a2->access_size);
1575 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1576 	      dr_a1->align = MIN (dr_a1->align, new_align);
1577 	    }
1578 	  if (dump_enabled_p ())
1579 	    {
1580 	      dump_printf (MSG_NOTE, "merging ranges for ");
1581 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1582 	      dump_printf (MSG_NOTE,  ", ");
1583 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1584 	      dump_printf (MSG_NOTE,  " and ");
1585 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1586 	      dump_printf (MSG_NOTE,  ", ");
1587 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1588 	      dump_printf (MSG_NOTE, "\n");
1589 	    }
1590 	  alias_pairs->ordered_remove (i);
1591 	  i--;
1592 	}
1593     }
1594 }
1595 
1596 /* Given LOOP's two data references and segment lengths described by DR_A
1597    and DR_B, create expression checking if the two addresses ranges intersect
1598    with each other based on index of the two addresses.  This can only be
1599    done if DR_A and DR_B referring to the same (array) object and the index
1600    is the only difference.  For example:
1601 
1602                        DR_A                           DR_B
1603       data-ref         arr[i]                         arr[j]
1604       base_object      arr                            arr
1605       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1606 
1607    The addresses and their index are like:
1608 
1609         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1610      ------------------------------------------------------->
1611         |   |   |   |   |          |   |   |   |   |
1612      ------------------------------------------------------->
1613         i_0 ...         i_0+4      j_0 ...         j_0+4
1614 
1615    We can create expression based on index rather than address:
1616 
1617      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1618 
1619    Note evolution step of index needs to be considered in comparison.  */
1620 
1621 static bool
create_intersect_range_checks_index(struct loop * loop,tree * cond_expr,const dr_with_seg_len & dr_a,const dr_with_seg_len & dr_b)1622 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1623 				     const dr_with_seg_len& dr_a,
1624 				     const dr_with_seg_len& dr_b)
1625 {
1626   if (integer_zerop (DR_STEP (dr_a.dr))
1627       || integer_zerop (DR_STEP (dr_b.dr))
1628       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1629     return false;
1630 
1631   poly_uint64 seg_len1, seg_len2;
1632   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1633       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1634     return false;
1635 
1636   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1637     return false;
1638 
1639   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1640     return false;
1641 
1642   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1643     return false;
1644 
1645   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1646 
1647   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1648   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1649   if (neg_step)
1650     {
1651       abs_step = -abs_step;
1652       seg_len1 = -seg_len1;
1653       seg_len2 = -seg_len2;
1654     }
1655   else
1656     {
1657       /* Include the access size in the length, so that we only have one
1658 	 tree addition below.  */
1659       seg_len1 += dr_a.access_size;
1660       seg_len2 += dr_b.access_size;
1661     }
1662 
1663   /* Infer the number of iterations with which the memory segment is accessed
1664      by DR.  In other words, alias is checked if memory segment accessed by
1665      DR_A in some iterations intersect with memory segment accessed by DR_B
1666      in the same amount iterations.
1667      Note segnment length is a linear function of number of iterations with
1668      DR_STEP as the coefficient.  */
1669   poly_uint64 niter_len1, niter_len2;
1670   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1671       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1672     return false;
1673 
1674   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1675   if (neg_step)
1676     {
1677       /* Divide each access size by the byte step, rounding up.  */
1678       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1679 			    abs_step, &niter_access1)
1680 	  || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1681 			       abs_step, &niter_access2))
1682 	return false;
1683     }
1684 
1685   unsigned int i;
1686   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1687     {
1688       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1689       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1690       /* Two indices must be the same if they are not scev, or not scev wrto
1691 	 current loop being vecorized.  */
1692       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1693 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
1694 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
1695 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1696 	{
1697 	  if (operand_equal_p (access1, access2, 0))
1698 	    continue;
1699 
1700 	  return false;
1701 	}
1702       /* The two indices must have the same step.  */
1703       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1704 	return false;
1705 
1706       tree idx_step = CHREC_RIGHT (access1);
1707       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1708       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1709       /* Index must evaluate in the same direction as DR.  */
1710       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1711 
1712       tree min1 = CHREC_LEFT (access1);
1713       tree min2 = CHREC_LEFT (access2);
1714       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1715 	return false;
1716 
1717       /* Ideally, alias can be checked against loop's control IV, but we
1718 	 need to prove linear mapping between control IV and reference
1719 	 index.  Although that should be true, we check against (array)
1720 	 index of data reference.  Like segment length, index length is
1721 	 linear function of the number of iterations with index_step as
1722 	 the coefficient, i.e, niter_len * idx_step.  */
1723       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1724 				   build_int_cst (TREE_TYPE (min1),
1725 						  niter_len1));
1726       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1727 				   build_int_cst (TREE_TYPE (min2),
1728 						  niter_len2));
1729       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1730       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1731       /* Adjust ranges for negative step.  */
1732       if (neg_step)
1733 	{
1734 	  /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1735 	  std::swap (min1, max1);
1736 	  std::swap (min2, max2);
1737 
1738 	  /* As with the lengths just calculated, we've measured the access
1739 	     sizes in iterations, so multiply them by the index step.  */
1740 	  tree idx_access1
1741 	    = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1742 			   build_int_cst (TREE_TYPE (min1), niter_access1));
1743 	  tree idx_access2
1744 	    = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1745 			   build_int_cst (TREE_TYPE (min2), niter_access2));
1746 
1747 	  /* MINUS_EXPR because the above values are negative.  */
1748 	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1749 	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1750 	}
1751       tree part_cond_expr
1752 	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1753 	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1754 	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1755       if (*cond_expr)
1756 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1757 				  *cond_expr, part_cond_expr);
1758       else
1759 	*cond_expr = part_cond_expr;
1760     }
1761   return true;
1762 }
1763 
1764 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1765    every address ADDR accessed by D:
1766 
1767      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1768 
1769    In this case, every element accessed by D is aligned to at least
1770    ALIGN bytes.
1771 
1772    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1773 
1774      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1775 
1776 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)1777 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1778 		     tree *seg_max_out, HOST_WIDE_INT align)
1779 {
1780   /* Each access has the following pattern:
1781 
1782 	  <- |seg_len| ->
1783 	  <--- A: -ve step --->
1784 	  +-----+-------+-----+-------+-----+
1785 	  | n-1 | ,.... |  0  | ..... | n-1 |
1786 	  +-----+-------+-----+-------+-----+
1787 			<--- B: +ve step --->
1788 			<- |seg_len| ->
1789 			|
1790 		   base address
1791 
1792      where "n" is the number of scalar iterations covered by the segment.
1793      (This should be VF for a particular pair if we know that both steps
1794      are the same, otherwise it will be the full number of scalar loop
1795      iterations.)
1796 
1797      A is the range of bytes accessed when the step is negative,
1798      B is the range when the step is positive.
1799 
1800      If the access size is "access_size" bytes, the lowest addressed byte is:
1801 
1802 	 base + (step < 0 ? seg_len : 0)   [LB]
1803 
1804      and the highest addressed byte is always below:
1805 
1806 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1807 
1808      Thus:
1809 
1810 	 LB <= ADDR < UB
1811 
1812      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1813      bytes, so:
1814 
1815 	 LB <= ADDR <= UB - ALIGN
1816 
1817      where "- ALIGN" folds naturally with the "+ access_size" and often
1818      cancels it out.
1819 
1820      We don't try to simplify LB and UB beyond this (e.g. by using
1821      MIN and MAX based on whether seg_len rather than the stride is
1822      negative) because it is possible for the absolute size of the
1823      segment to overflow the range of a ssize_t.
1824 
1825      Keeping the pointer_plus outside of the cond_expr should allow
1826      the cond_exprs to be shared with other alias checks.  */
1827   tree indicator = dr_direction_indicator (d.dr);
1828   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1829 			       fold_convert (ssizetype, indicator),
1830 			       ssize_int (0));
1831   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1832 					    DR_OFFSET (d.dr));
1833   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1834   tree seg_len
1835     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1836 
1837   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1838 				seg_len, size_zero_node);
1839   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1840 				size_zero_node, seg_len);
1841   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1842 			   size_int (d.access_size - align));
1843 
1844   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1845   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1846 }
1847 
1848 /* Given two data references and segment lengths described by DR_A and DR_B,
1849    create expression checking if the two addresses ranges intersect with
1850    each other:
1851 
1852      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1853      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1854 
1855 static void
create_intersect_range_checks(struct loop * loop,tree * cond_expr,const dr_with_seg_len & dr_a,const dr_with_seg_len & dr_b)1856 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1857 			       const dr_with_seg_len& dr_a,
1858 			       const dr_with_seg_len& dr_b)
1859 {
1860   *cond_expr = NULL_TREE;
1861   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1862     return;
1863 
1864   unsigned HOST_WIDE_INT min_align;
1865   tree_code cmp_code;
1866   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1867       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1868     {
1869       /* In this case adding access_size to seg_len is likely to give
1870 	 a simple X * step, where X is either the number of scalar
1871 	 iterations or the vectorization factor.  We're better off
1872 	 keeping that, rather than subtracting an alignment from it.
1873 
1874 	 In this case the maximum values are exclusive and so there is
1875 	 no alias if the maximum of one segment equals the minimum
1876 	 of another.  */
1877       min_align = 0;
1878       cmp_code = LE_EXPR;
1879     }
1880   else
1881     {
1882       /* Calculate the minimum alignment shared by all four pointers,
1883 	 then arrange for this alignment to be subtracted from the
1884 	 exclusive maximum values to get inclusive maximum values.
1885 	 This "- min_align" is cumulative with a "+ access_size"
1886 	 in the calculation of the maximum values.  In the best
1887 	 (and common) case, the two cancel each other out, leaving
1888 	 us with an inclusive bound based only on seg_len.  In the
1889 	 worst case we're simply adding a smaller number than before.
1890 
1891 	 Because the maximum values are inclusive, there is an alias
1892 	 if the maximum value of one segment is equal to the minimum
1893 	 value of the other.  */
1894       min_align = MIN (dr_a.align, dr_b.align);
1895       cmp_code = LT_EXPR;
1896     }
1897 
1898   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1899   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1900   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1901 
1902   *cond_expr
1903     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1904 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1905 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1906 }
1907 
1908 /* Create a conditional expression that represents the run-time checks for
1909    overlapping of address ranges represented by a list of data references
1910    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1911    COND_EXPR is the conditional expression to be used in the if statement
1912    that controls which version of the loop gets executed at runtime.  */
1913 
1914 void
create_runtime_alias_checks(struct loop * loop,vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)1915 create_runtime_alias_checks (struct loop *loop,
1916 			     vec<dr_with_seg_len_pair_t> *alias_pairs,
1917 			     tree * cond_expr)
1918 {
1919   tree part_cond_expr;
1920 
1921   fold_defer_overflow_warnings ();
1922   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1923     {
1924       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1925       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1926 
1927       if (dump_enabled_p ())
1928 	{
1929 	  dump_printf (MSG_NOTE, "create runtime check for data references ");
1930 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1931 	  dump_printf (MSG_NOTE, " and ");
1932 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1933 	  dump_printf (MSG_NOTE, "\n");
1934 	}
1935 
1936       /* Create condition expression for each pair data references.  */
1937       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1938       if (*cond_expr)
1939 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1940 				  *cond_expr, part_cond_expr);
1941       else
1942 	*cond_expr = part_cond_expr;
1943     }
1944   fold_undefer_and_ignore_overflow_warnings ();
1945 }
1946 
1947 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1948    expressions.  */
1949 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)1950 dr_equal_offsets_p1 (tree offset1, tree offset2)
1951 {
1952   bool res;
1953 
1954   STRIP_NOPS (offset1);
1955   STRIP_NOPS (offset2);
1956 
1957   if (offset1 == offset2)
1958     return true;
1959 
1960   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1961       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1962     return false;
1963 
1964   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1965                              TREE_OPERAND (offset2, 0));
1966 
1967   if (!res || !BINARY_CLASS_P (offset1))
1968     return res;
1969 
1970   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1971                              TREE_OPERAND (offset2, 1));
1972 
1973   return res;
1974 }
1975 
1976 /* Check if DRA and DRB have equal offsets.  */
1977 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)1978 dr_equal_offsets_p (struct data_reference *dra,
1979                     struct data_reference *drb)
1980 {
1981   tree offset1, offset2;
1982 
1983   offset1 = DR_OFFSET (dra);
1984   offset2 = DR_OFFSET (drb);
1985 
1986   return dr_equal_offsets_p1 (offset1, offset2);
1987 }
1988 
1989 /* Returns true if FNA == FNB.  */
1990 
1991 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)1992 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1993 {
1994   unsigned i, n = fna.length ();
1995 
1996   if (n != fnb.length ())
1997     return false;
1998 
1999   for (i = 0; i < n; i++)
2000     if (!operand_equal_p (fna[i], fnb[i], 0))
2001       return false;
2002 
2003   return true;
2004 }
2005 
2006 /* If all the functions in CF are the same, returns one of them,
2007    otherwise returns NULL.  */
2008 
2009 static affine_fn
common_affine_function(conflict_function * cf)2010 common_affine_function (conflict_function *cf)
2011 {
2012   unsigned i;
2013   affine_fn comm;
2014 
2015   if (!CF_NONTRIVIAL_P (cf))
2016     return affine_fn ();
2017 
2018   comm = cf->fns[0];
2019 
2020   for (i = 1; i < cf->n; i++)
2021     if (!affine_function_equal_p (comm, cf->fns[i]))
2022       return affine_fn ();
2023 
2024   return comm;
2025 }
2026 
2027 /* Returns the base of the affine function FN.  */
2028 
2029 static tree
affine_function_base(affine_fn fn)2030 affine_function_base (affine_fn fn)
2031 {
2032   return fn[0];
2033 }
2034 
2035 /* Returns true if FN is a constant.  */
2036 
2037 static bool
affine_function_constant_p(affine_fn fn)2038 affine_function_constant_p (affine_fn fn)
2039 {
2040   unsigned i;
2041   tree coef;
2042 
2043   for (i = 1; fn.iterate (i, &coef); i++)
2044     if (!integer_zerop (coef))
2045       return false;
2046 
2047   return true;
2048 }
2049 
2050 /* Returns true if FN is the zero constant function.  */
2051 
2052 static bool
affine_function_zero_p(affine_fn fn)2053 affine_function_zero_p (affine_fn fn)
2054 {
2055   return (integer_zerop (affine_function_base (fn))
2056 	  && affine_function_constant_p (fn));
2057 }
2058 
2059 /* Returns a signed integer type with the largest precision from TA
2060    and TB.  */
2061 
2062 static tree
signed_type_for_types(tree ta,tree tb)2063 signed_type_for_types (tree ta, tree tb)
2064 {
2065   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2066     return signed_type_for (ta);
2067   else
2068     return signed_type_for (tb);
2069 }
2070 
2071 /* Applies operation OP on affine functions FNA and FNB, and returns the
2072    result.  */
2073 
2074 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2075 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2076 {
2077   unsigned i, n, m;
2078   affine_fn ret;
2079   tree coef;
2080 
2081   if (fnb.length () > fna.length ())
2082     {
2083       n = fna.length ();
2084       m = fnb.length ();
2085     }
2086   else
2087     {
2088       n = fnb.length ();
2089       m = fna.length ();
2090     }
2091 
2092   ret.create (m);
2093   for (i = 0; i < n; i++)
2094     {
2095       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2096 					 TREE_TYPE (fnb[i]));
2097       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2098     }
2099 
2100   for (; fna.iterate (i, &coef); i++)
2101     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2102 				 coef, integer_zero_node));
2103   for (; fnb.iterate (i, &coef); i++)
2104     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2105 				 integer_zero_node, coef));
2106 
2107   return ret;
2108 }
2109 
2110 /* Returns the sum of affine functions FNA and FNB.  */
2111 
2112 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2113 affine_fn_plus (affine_fn fna, affine_fn fnb)
2114 {
2115   return affine_fn_op (PLUS_EXPR, fna, fnb);
2116 }
2117 
2118 /* Returns the difference of affine functions FNA and FNB.  */
2119 
2120 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2121 affine_fn_minus (affine_fn fna, affine_fn fnb)
2122 {
2123   return affine_fn_op (MINUS_EXPR, fna, fnb);
2124 }
2125 
2126 /* Frees affine function FN.  */
2127 
2128 static void
affine_fn_free(affine_fn fn)2129 affine_fn_free (affine_fn fn)
2130 {
2131   fn.release ();
2132 }
2133 
2134 /* Determine for each subscript in the data dependence relation DDR
2135    the distance.  */
2136 
2137 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2138 compute_subscript_distance (struct data_dependence_relation *ddr)
2139 {
2140   conflict_function *cf_a, *cf_b;
2141   affine_fn fn_a, fn_b, diff;
2142 
2143   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2144     {
2145       unsigned int i;
2146 
2147       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2148  	{
2149  	  struct subscript *subscript;
2150 
2151  	  subscript = DDR_SUBSCRIPT (ddr, i);
2152  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2153  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2154 
2155 	  fn_a = common_affine_function (cf_a);
2156 	  fn_b = common_affine_function (cf_b);
2157 	  if (!fn_a.exists () || !fn_b.exists ())
2158 	    {
2159 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2160 	      return;
2161 	    }
2162 	  diff = affine_fn_minus (fn_a, fn_b);
2163 
2164  	  if (affine_function_constant_p (diff))
2165  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2166  	  else
2167  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2168 
2169 	  affine_fn_free (diff);
2170  	}
2171     }
2172 }
2173 
2174 /* Returns the conflict function for "unknown".  */
2175 
2176 static conflict_function *
conflict_fn_not_known(void)2177 conflict_fn_not_known (void)
2178 {
2179   conflict_function *fn = XCNEW (conflict_function);
2180   fn->n = NOT_KNOWN;
2181 
2182   return fn;
2183 }
2184 
2185 /* Returns the conflict function for "independent".  */
2186 
2187 static conflict_function *
conflict_fn_no_dependence(void)2188 conflict_fn_no_dependence (void)
2189 {
2190   conflict_function *fn = XCNEW (conflict_function);
2191   fn->n = NO_DEPENDENCE;
2192 
2193   return fn;
2194 }
2195 
2196 /* Returns true if the address of OBJ is invariant in LOOP.  */
2197 
2198 static bool
object_address_invariant_in_loop_p(const struct loop * loop,const_tree obj)2199 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2200 {
2201   while (handled_component_p (obj))
2202     {
2203       if (TREE_CODE (obj) == ARRAY_REF)
2204 	{
2205 	  for (int i = 1; i < 4; ++i)
2206 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2207 							loop->num))
2208 	      return false;
2209 	}
2210       else if (TREE_CODE (obj) == COMPONENT_REF)
2211 	{
2212 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2213 						      loop->num))
2214 	    return false;
2215 	}
2216       obj = TREE_OPERAND (obj, 0);
2217     }
2218 
2219   if (!INDIRECT_REF_P (obj)
2220       && TREE_CODE (obj) != MEM_REF)
2221     return true;
2222 
2223   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2224 						  loop->num);
2225 }
2226 
2227 /* Returns false if we can prove that data references A and B do not alias,
2228    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2229    considered.  */
2230 
2231 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,struct loop * loop_nest)2232 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2233 		struct loop *loop_nest)
2234 {
2235   tree addr_a = DR_BASE_OBJECT (a);
2236   tree addr_b = DR_BASE_OBJECT (b);
2237 
2238   /* If we are not processing a loop nest but scalar code we
2239      do not need to care about possible cross-iteration dependences
2240      and thus can process the full original reference.  Do so,
2241      similar to how loop invariant motion applies extra offset-based
2242      disambiguation.  */
2243   if (!loop_nest)
2244     {
2245       aff_tree off1, off2;
2246       poly_widest_int size1, size2;
2247       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2248       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2249       aff_combination_scale (&off1, -1);
2250       aff_combination_add (&off2, &off1);
2251       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2252 	return false;
2253     }
2254 
2255   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2256       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2257       /* For cross-iteration dependences the cliques must be valid for the
2258 	 whole loop, not just individual iterations.  */
2259       && (!loop_nest
2260 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2261 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2262       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2263       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2264     return false;
2265 
2266   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2267      do not know the size of the base-object.  So we cannot do any
2268      offset/overlap based analysis but have to rely on points-to
2269      information only.  */
2270   if (TREE_CODE (addr_a) == MEM_REF
2271       && (DR_UNCONSTRAINED_BASE (a)
2272 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2273     {
2274       /* For true dependences we can apply TBAA.  */
2275       if (flag_strict_aliasing
2276 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2277 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2278 				     get_alias_set (DR_REF (b))))
2279 	return false;
2280       if (TREE_CODE (addr_b) == MEM_REF)
2281 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2282 				       TREE_OPERAND (addr_b, 0));
2283       else
2284 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2285 				       build_fold_addr_expr (addr_b));
2286     }
2287   else if (TREE_CODE (addr_b) == MEM_REF
2288 	   && (DR_UNCONSTRAINED_BASE (b)
2289 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2290     {
2291       /* For true dependences we can apply TBAA.  */
2292       if (flag_strict_aliasing
2293 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2294 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2295 				     get_alias_set (DR_REF (b))))
2296 	return false;
2297       if (TREE_CODE (addr_a) == MEM_REF)
2298 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2299 				       TREE_OPERAND (addr_b, 0));
2300       else
2301 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2302 				       TREE_OPERAND (addr_b, 0));
2303     }
2304 
2305   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2306      that is being subsetted in the loop nest.  */
2307   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2308     return refs_output_dependent_p (addr_a, addr_b);
2309   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2310     return refs_anti_dependent_p (addr_a, addr_b);
2311   return refs_may_alias_p (addr_a, addr_b);
2312 }
2313 
2314 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2315    if it is meaningful to compare their associated access functions
2316    when checking for dependencies.  */
2317 
2318 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)2319 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2320 {
2321   /* Allow pairs of component refs from the following sets:
2322 
2323        { REALPART_EXPR, IMAGPART_EXPR }
2324        { COMPONENT_REF }
2325        { ARRAY_REF }.  */
2326   tree_code code_a = TREE_CODE (ref_a);
2327   tree_code code_b = TREE_CODE (ref_b);
2328   if (code_a == IMAGPART_EXPR)
2329     code_a = REALPART_EXPR;
2330   if (code_b == IMAGPART_EXPR)
2331     code_b = REALPART_EXPR;
2332   if (code_a != code_b)
2333     return false;
2334 
2335   if (TREE_CODE (ref_a) == COMPONENT_REF)
2336     /* ??? We cannot simply use the type of operand #0 of the refs here as
2337        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2338        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2339     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2340 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2341 
2342   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2343 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2344 }
2345 
2346 /* Initialize a data dependence relation between data accesses A and
2347    B.  NB_LOOPS is the number of loops surrounding the references: the
2348    size of the classic distance/direction vectors.  */
2349 
2350 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)2351 initialize_data_dependence_relation (struct data_reference *a,
2352 				     struct data_reference *b,
2353  				     vec<loop_p> loop_nest)
2354 {
2355   struct data_dependence_relation *res;
2356   unsigned int i;
2357 
2358   res = XCNEW (struct data_dependence_relation);
2359   DDR_A (res) = a;
2360   DDR_B (res) = b;
2361   DDR_LOOP_NEST (res).create (0);
2362   DDR_SUBSCRIPTS (res).create (0);
2363   DDR_DIR_VECTS (res).create (0);
2364   DDR_DIST_VECTS (res).create (0);
2365 
2366   if (a == NULL || b == NULL)
2367     {
2368       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2369       return res;
2370     }
2371 
2372   /* If the data references do not alias, then they are independent.  */
2373   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2374     {
2375       DDR_ARE_DEPENDENT (res) = chrec_known;
2376       return res;
2377     }
2378 
2379   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2380   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2381   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2382     {
2383       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2384       return res;
2385     }
2386 
2387   /* For unconstrained bases, the root (highest-indexed) subscript
2388      describes a variation in the base of the original DR_REF rather
2389      than a component access.  We have no type that accurately describes
2390      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2391      applying this subscript) so limit the search to the last real
2392      component access.
2393 
2394      E.g. for:
2395 
2396 	void
2397 	f (int a[][8], int b[][8])
2398 	{
2399 	  for (int i = 0; i < 8; ++i)
2400 	    a[i * 2][0] = b[i][0];
2401 	}
2402 
2403      the a and b accesses have a single ARRAY_REF component reference [0]
2404      but have two subscripts.  */
2405   if (DR_UNCONSTRAINED_BASE (a))
2406     num_dimensions_a -= 1;
2407   if (DR_UNCONSTRAINED_BASE (b))
2408     num_dimensions_b -= 1;
2409 
2410   /* These structures describe sequences of component references in
2411      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2412      specific access function.  */
2413   struct {
2414     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2415        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2416        indices.  In C notation, these are the indices of the rightmost
2417        component references; e.g. for a sequence .b.c.d, the start
2418        index is for .d.  */
2419     unsigned int start_a;
2420     unsigned int start_b;
2421 
2422     /* The sequence contains LENGTH consecutive access functions from
2423        each DR.  */
2424     unsigned int length;
2425 
2426     /* The enclosing objects for the A and B sequences respectively,
2427        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2428        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2429     tree object_a;
2430     tree object_b;
2431   } full_seq = {}, struct_seq = {};
2432 
2433   /* Before each iteration of the loop:
2434 
2435      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2436      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2437   unsigned int index_a = 0;
2438   unsigned int index_b = 0;
2439   tree ref_a = DR_REF (a);
2440   tree ref_b = DR_REF (b);
2441 
2442   /* Now walk the component references from the final DR_REFs back up to
2443      the enclosing base objects.  Each component reference corresponds
2444      to one access function in the DR, with access function 0 being for
2445      the final DR_REF and the highest-indexed access function being the
2446      one that is applied to the base of the DR.
2447 
2448      Look for a sequence of component references whose access functions
2449      are comparable (see access_fn_components_comparable_p).  If more
2450      than one such sequence exists, pick the one nearest the base
2451      (which is the leftmost sequence in C notation).  Store this sequence
2452      in FULL_SEQ.
2453 
2454      For example, if we have:
2455 
2456 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2457 
2458 	A: a[0][i].s.c.d
2459 	B: __real b[0][i].s.e[i].f
2460 
2461      (where d is the same type as the real component of f) then the access
2462      functions would be:
2463 
2464 			 0   1   2   3
2465 	A:              .d  .c  .s [i]
2466 
2467 		 0   1   2   3   4   5
2468 	B:  __real  .f [i]  .e  .s [i]
2469 
2470      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2471      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2472      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2473      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2474      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2475      index foo[10] arrays, so is again comparable.  The sequence is
2476      therefore:
2477 
2478         A: [1, 3]  (i.e. [i].s.c)
2479         B: [3, 5]  (i.e. [i].s.e)
2480 
2481      Also look for sequences of component references whose access
2482      functions are comparable and whose enclosing objects have the same
2483      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2484      example, STRUCT_SEQ would be:
2485 
2486         A: [1, 2]  (i.e. s.c)
2487         B: [3, 4]  (i.e. s.e)  */
2488   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2489     {
2490       /* REF_A and REF_B must be one of the component access types
2491 	 allowed by dr_analyze_indices.  */
2492       gcc_checking_assert (access_fn_component_p (ref_a));
2493       gcc_checking_assert (access_fn_component_p (ref_b));
2494 
2495       /* Get the immediately-enclosing objects for REF_A and REF_B,
2496 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2497 	 and DR_ACCESS_FN (B, INDEX_B).  */
2498       tree object_a = TREE_OPERAND (ref_a, 0);
2499       tree object_b = TREE_OPERAND (ref_b, 0);
2500 
2501       tree type_a = TREE_TYPE (object_a);
2502       tree type_b = TREE_TYPE (object_b);
2503       if (access_fn_components_comparable_p (ref_a, ref_b))
2504 	{
2505 	  /* This pair of component accesses is comparable for dependence
2506 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2507 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2508 	  if (full_seq.start_a + full_seq.length != index_a
2509 	      || full_seq.start_b + full_seq.length != index_b)
2510 	    {
2511 	      /* The accesses don't extend the current sequence,
2512 		 so start a new one here.  */
2513 	      full_seq.start_a = index_a;
2514 	      full_seq.start_b = index_b;
2515 	      full_seq.length = 0;
2516 	    }
2517 
2518 	  /* Add this pair of references to the sequence.  */
2519 	  full_seq.length += 1;
2520 	  full_seq.object_a = object_a;
2521 	  full_seq.object_b = object_b;
2522 
2523 	  /* If the enclosing objects are structures (and thus have the
2524 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2525 	  if (TREE_CODE (type_a) == RECORD_TYPE)
2526 	    struct_seq = full_seq;
2527 
2528 	  /* Move to the next containing reference for both A and B.  */
2529 	  ref_a = object_a;
2530 	  ref_b = object_b;
2531 	  index_a += 1;
2532 	  index_b += 1;
2533 	  continue;
2534 	}
2535 
2536       /* Try to approach equal type sizes.  */
2537       if (!COMPLETE_TYPE_P (type_a)
2538 	  || !COMPLETE_TYPE_P (type_b)
2539 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2540 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2541 	break;
2542 
2543       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2544       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2545       if (size_a <= size_b)
2546 	{
2547 	  index_a += 1;
2548 	  ref_a = object_a;
2549 	}
2550       if (size_b <= size_a)
2551 	{
2552 	  index_b += 1;
2553 	  ref_b = object_b;
2554 	}
2555     }
2556 
2557   /* See whether FULL_SEQ ends at the base and whether the two bases
2558      are equal.  We do not care about TBAA or alignment info so we can
2559      use OEP_ADDRESS_OF to avoid false negatives.  */
2560   tree base_a = DR_BASE_OBJECT (a);
2561   tree base_b = DR_BASE_OBJECT (b);
2562   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2563 		      && full_seq.start_b + full_seq.length == num_dimensions_b
2564 		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2565 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2566 		      && types_compatible_p (TREE_TYPE (base_a),
2567 					     TREE_TYPE (base_b))
2568 		      && (!loop_nest.exists ()
2569 			  || (object_address_invariant_in_loop_p
2570 			      (loop_nest[0], base_a))));
2571 
2572   /* If the bases are the same, we can include the base variation too.
2573      E.g. the b accesses in:
2574 
2575        for (int i = 0; i < n; ++i)
2576          b[i + 4][0] = b[i][0];
2577 
2578      have a definite dependence distance of 4, while for:
2579 
2580        for (int i = 0; i < n; ++i)
2581          a[i + 4][0] = b[i][0];
2582 
2583      the dependence distance depends on the gap between a and b.
2584 
2585      If the bases are different then we can only rely on the sequence
2586      rooted at a structure access, since arrays are allowed to overlap
2587      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2588      valid code:
2589 
2590        int a[256];
2591        ...
2592        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2593 
2594      where two lvalues with the same int[4][3] type overlap, and where
2595      both lvalues are distinct from the object's declared type.  */
2596   if (same_base_p)
2597     {
2598       if (DR_UNCONSTRAINED_BASE (a))
2599 	full_seq.length += 1;
2600     }
2601   else
2602     full_seq = struct_seq;
2603 
2604   /* Punt if we didn't find a suitable sequence.  */
2605   if (full_seq.length == 0)
2606     {
2607       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2608       return res;
2609     }
2610 
2611   if (!same_base_p)
2612     {
2613       /* Partial overlap is possible for different bases when strict aliasing
2614 	 is not in effect.  It's also possible if either base involves a union
2615 	 access; e.g. for:
2616 
2617 	   struct s1 { int a[2]; };
2618 	   struct s2 { struct s1 b; int c; };
2619 	   struct s3 { int d; struct s1 e; };
2620 	   union u { struct s2 f; struct s3 g; } *p, *q;
2621 
2622 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2623 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
2624 	 "q->g.e" (base "q->g").  */
2625       if (!flag_strict_aliasing
2626 	  || ref_contains_union_access_p (full_seq.object_a)
2627 	  || ref_contains_union_access_p (full_seq.object_b))
2628 	{
2629 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2630 	  return res;
2631 	}
2632 
2633       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2634       if (!loop_nest.exists ()
2635 	  || (object_address_invariant_in_loop_p (loop_nest[0],
2636 						  full_seq.object_a)
2637 	      && object_address_invariant_in_loop_p (loop_nest[0],
2638 						     full_seq.object_b)))
2639 	{
2640 	  DDR_OBJECT_A (res) = full_seq.object_a;
2641 	  DDR_OBJECT_B (res) = full_seq.object_b;
2642 	}
2643     }
2644 
2645   DDR_AFFINE_P (res) = true;
2646   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2647   DDR_SUBSCRIPTS (res).create (full_seq.length);
2648   DDR_LOOP_NEST (res) = loop_nest;
2649   DDR_INNER_LOOP (res) = 0;
2650   DDR_SELF_REFERENCE (res) = false;
2651 
2652   for (i = 0; i < full_seq.length; ++i)
2653     {
2654       struct subscript *subscript;
2655 
2656       subscript = XNEW (struct subscript);
2657       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2658       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2659       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2660       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2661       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2662       SUB_DISTANCE (subscript) = chrec_dont_know;
2663       DDR_SUBSCRIPTS (res).safe_push (subscript);
2664     }
2665 
2666   return res;
2667 }
2668 
2669 /* Frees memory used by the conflict function F.  */
2670 
2671 static void
free_conflict_function(conflict_function * f)2672 free_conflict_function (conflict_function *f)
2673 {
2674   unsigned i;
2675 
2676   if (CF_NONTRIVIAL_P (f))
2677     {
2678       for (i = 0; i < f->n; i++)
2679 	affine_fn_free (f->fns[i]);
2680     }
2681   free (f);
2682 }
2683 
2684 /* Frees memory used by SUBSCRIPTS.  */
2685 
2686 static void
free_subscripts(vec<subscript_p> subscripts)2687 free_subscripts (vec<subscript_p> subscripts)
2688 {
2689   unsigned i;
2690   subscript_p s;
2691 
2692   FOR_EACH_VEC_ELT (subscripts, i, s)
2693     {
2694       free_conflict_function (s->conflicting_iterations_in_a);
2695       free_conflict_function (s->conflicting_iterations_in_b);
2696       free (s);
2697     }
2698   subscripts.release ();
2699 }
2700 
2701 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2702    description.  */
2703 
2704 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)2705 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2706 			tree chrec)
2707 {
2708   DDR_ARE_DEPENDENT (ddr) = chrec;
2709   free_subscripts (DDR_SUBSCRIPTS (ddr));
2710   DDR_SUBSCRIPTS (ddr).create (0);
2711 }
2712 
2713 /* The dependence relation DDR cannot be represented by a distance
2714    vector.  */
2715 
2716 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)2717 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2718 {
2719   if (dump_file && (dump_flags & TDF_DETAILS))
2720     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2721 
2722   DDR_AFFINE_P (ddr) = false;
2723 }
2724 
2725 
2726 
2727 /* This section contains the classic Banerjee tests.  */
2728 
2729 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2730    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2731 
2732 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)2733 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2734 {
2735   return (evolution_function_is_constant_p (chrec_a)
2736 	  && evolution_function_is_constant_p (chrec_b));
2737 }
2738 
2739 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2740    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2741 
2742 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)2743 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2744 {
2745   if ((evolution_function_is_constant_p (chrec_a)
2746        && evolution_function_is_univariate_p (chrec_b))
2747       || (evolution_function_is_constant_p (chrec_b)
2748 	  && evolution_function_is_univariate_p (chrec_a)))
2749     return true;
2750 
2751   if (evolution_function_is_univariate_p (chrec_a)
2752       && evolution_function_is_univariate_p (chrec_b))
2753     {
2754       switch (TREE_CODE (chrec_a))
2755 	{
2756 	case POLYNOMIAL_CHREC:
2757 	  switch (TREE_CODE (chrec_b))
2758 	    {
2759 	    case POLYNOMIAL_CHREC:
2760 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2761 		return false;
2762 	      /* FALLTHRU */
2763 
2764 	    default:
2765 	      return true;
2766 	    }
2767 
2768 	default:
2769 	  return true;
2770 	}
2771     }
2772 
2773   return false;
2774 }
2775 
2776 /* Creates a conflict function with N dimensions.  The affine functions
2777    in each dimension follow.  */
2778 
2779 static conflict_function *
conflict_fn(unsigned n,...)2780 conflict_fn (unsigned n, ...)
2781 {
2782   unsigned i;
2783   conflict_function *ret = XCNEW (conflict_function);
2784   va_list ap;
2785 
2786   gcc_assert (n > 0 && n <= MAX_DIM);
2787   va_start (ap, n);
2788 
2789   ret->n = n;
2790   for (i = 0; i < n; i++)
2791     ret->fns[i] = va_arg (ap, affine_fn);
2792   va_end (ap);
2793 
2794   return ret;
2795 }
2796 
2797 /* Returns constant affine function with value CST.  */
2798 
2799 static affine_fn
affine_fn_cst(tree cst)2800 affine_fn_cst (tree cst)
2801 {
2802   affine_fn fn;
2803   fn.create (1);
2804   fn.quick_push (cst);
2805   return fn;
2806 }
2807 
2808 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2809 
2810 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)2811 affine_fn_univar (tree cst, unsigned dim, tree coef)
2812 {
2813   affine_fn fn;
2814   fn.create (dim + 1);
2815   unsigned i;
2816 
2817   gcc_assert (dim > 0);
2818   fn.quick_push (cst);
2819   for (i = 1; i < dim; i++)
2820     fn.quick_push (integer_zero_node);
2821   fn.quick_push (coef);
2822   return fn;
2823 }
2824 
2825 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2826    *OVERLAPS_B are initialized to the functions that describe the
2827    relation between the elements accessed twice by CHREC_A and
2828    CHREC_B.  For k >= 0, the following property is verified:
2829 
2830    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2831 
2832 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2833 analyze_ziv_subscript (tree chrec_a,
2834 		       tree chrec_b,
2835 		       conflict_function **overlaps_a,
2836 		       conflict_function **overlaps_b,
2837 		       tree *last_conflicts)
2838 {
2839   tree type, difference;
2840   dependence_stats.num_ziv++;
2841 
2842   if (dump_file && (dump_flags & TDF_DETAILS))
2843     fprintf (dump_file, "(analyze_ziv_subscript \n");
2844 
2845   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2846   chrec_a = chrec_convert (type, chrec_a, NULL);
2847   chrec_b = chrec_convert (type, chrec_b, NULL);
2848   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2849 
2850   switch (TREE_CODE (difference))
2851     {
2852     case INTEGER_CST:
2853       if (integer_zerop (difference))
2854 	{
2855 	  /* The difference is equal to zero: the accessed index
2856 	     overlaps for each iteration in the loop.  */
2857 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2858 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2859 	  *last_conflicts = chrec_dont_know;
2860 	  dependence_stats.num_ziv_dependent++;
2861 	}
2862       else
2863 	{
2864 	  /* The accesses do not overlap.  */
2865 	  *overlaps_a = conflict_fn_no_dependence ();
2866 	  *overlaps_b = conflict_fn_no_dependence ();
2867 	  *last_conflicts = integer_zero_node;
2868 	  dependence_stats.num_ziv_independent++;
2869 	}
2870       break;
2871 
2872     default:
2873       /* We're not sure whether the indexes overlap.  For the moment,
2874 	 conservatively answer "don't know".  */
2875       if (dump_file && (dump_flags & TDF_DETAILS))
2876 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2877 
2878       *overlaps_a = conflict_fn_not_known ();
2879       *overlaps_b = conflict_fn_not_known ();
2880       *last_conflicts = chrec_dont_know;
2881       dependence_stats.num_ziv_unimplemented++;
2882       break;
2883     }
2884 
2885   if (dump_file && (dump_flags & TDF_DETAILS))
2886     fprintf (dump_file, ")\n");
2887 }
2888 
2889 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2890    and only if it fits to the int type.  If this is not the case, or the
2891    bound  on the number of iterations of LOOP could not be derived, returns
2892    chrec_dont_know.  */
2893 
2894 static tree
max_stmt_executions_tree(struct loop * loop)2895 max_stmt_executions_tree (struct loop *loop)
2896 {
2897   widest_int nit;
2898 
2899   if (!max_stmt_executions (loop, &nit))
2900     return chrec_dont_know;
2901 
2902   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2903     return chrec_dont_know;
2904 
2905   return wide_int_to_tree (unsigned_type_node, nit);
2906 }
2907 
2908 /* Determine whether the CHREC is always positive/negative.  If the expression
2909    cannot be statically analyzed, return false, otherwise set the answer into
2910    VALUE.  */
2911 
2912 static bool
chrec_is_positive(tree chrec,bool * value)2913 chrec_is_positive (tree chrec, bool *value)
2914 {
2915   bool value0, value1, value2;
2916   tree end_value, nb_iter;
2917 
2918   switch (TREE_CODE (chrec))
2919     {
2920     case POLYNOMIAL_CHREC:
2921       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2922 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2923 	return false;
2924 
2925       /* FIXME -- overflows.  */
2926       if (value0 == value1)
2927 	{
2928 	  *value = value0;
2929 	  return true;
2930 	}
2931 
2932       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2933 	 and the proof consists in showing that the sign never
2934 	 changes during the execution of the loop, from 0 to
2935 	 loop->nb_iterations.  */
2936       if (!evolution_function_is_affine_p (chrec))
2937 	return false;
2938 
2939       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2940       if (chrec_contains_undetermined (nb_iter))
2941 	return false;
2942 
2943 #if 0
2944       /* TODO -- If the test is after the exit, we may decrease the number of
2945 	 iterations by one.  */
2946       if (after_exit)
2947 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2948 #endif
2949 
2950       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2951 
2952       if (!chrec_is_positive (end_value, &value2))
2953 	return false;
2954 
2955       *value = value0;
2956       return value0 == value1;
2957 
2958     case INTEGER_CST:
2959       switch (tree_int_cst_sgn (chrec))
2960 	{
2961 	case -1:
2962 	  *value = false;
2963 	  break;
2964 	case 1:
2965 	  *value = true;
2966 	  break;
2967 	default:
2968 	  return false;
2969 	}
2970       return true;
2971 
2972     default:
2973       return false;
2974     }
2975 }
2976 
2977 
2978 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2979    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2980    *OVERLAPS_B are initialized to the functions that describe the
2981    relation between the elements accessed twice by CHREC_A and
2982    CHREC_B.  For k >= 0, the following property is verified:
2983 
2984    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2985 
2986 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)2987 analyze_siv_subscript_cst_affine (tree chrec_a,
2988 				  tree chrec_b,
2989 				  conflict_function **overlaps_a,
2990 				  conflict_function **overlaps_b,
2991 				  tree *last_conflicts)
2992 {
2993   bool value0, value1, value2;
2994   tree type, difference, tmp;
2995 
2996   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2997   chrec_a = chrec_convert (type, chrec_a, NULL);
2998   chrec_b = chrec_convert (type, chrec_b, NULL);
2999   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3000 
3001   /* Special case overlap in the first iteration.  */
3002   if (integer_zerop (difference))
3003     {
3004       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3005       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3006       *last_conflicts = integer_one_node;
3007       return;
3008     }
3009 
3010   if (!chrec_is_positive (initial_condition (difference), &value0))
3011     {
3012       if (dump_file && (dump_flags & TDF_DETAILS))
3013 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3014 
3015       dependence_stats.num_siv_unimplemented++;
3016       *overlaps_a = conflict_fn_not_known ();
3017       *overlaps_b = conflict_fn_not_known ();
3018       *last_conflicts = chrec_dont_know;
3019       return;
3020     }
3021   else
3022     {
3023       if (value0 == false)
3024 	{
3025 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3026 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3027 	    {
3028 	      if (dump_file && (dump_flags & TDF_DETAILS))
3029 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3030 
3031 	      *overlaps_a = conflict_fn_not_known ();
3032 	      *overlaps_b = conflict_fn_not_known ();
3033 	      *last_conflicts = chrec_dont_know;
3034 	      dependence_stats.num_siv_unimplemented++;
3035 	      return;
3036 	    }
3037 	  else
3038 	    {
3039 	      if (value1 == true)
3040 		{
3041 		  /* Example:
3042 		     chrec_a = 12
3043 		     chrec_b = {10, +, 1}
3044 		  */
3045 
3046 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3047 		    {
3048 		      HOST_WIDE_INT numiter;
3049 		      struct loop *loop = get_chrec_loop (chrec_b);
3050 
3051 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3052 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3053 					 fold_build1 (ABS_EXPR, type, difference),
3054 					 CHREC_RIGHT (chrec_b));
3055 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3056 		      *last_conflicts = integer_one_node;
3057 
3058 
3059 		      /* Perform weak-zero siv test to see if overlap is
3060 			 outside the loop bounds.  */
3061 		      numiter = max_stmt_executions_int (loop);
3062 
3063 		      if (numiter >= 0
3064 			  && compare_tree_int (tmp, numiter) > 0)
3065 			{
3066 			  free_conflict_function (*overlaps_a);
3067 			  free_conflict_function (*overlaps_b);
3068 			  *overlaps_a = conflict_fn_no_dependence ();
3069 			  *overlaps_b = conflict_fn_no_dependence ();
3070 			  *last_conflicts = integer_zero_node;
3071 			  dependence_stats.num_siv_independent++;
3072 			  return;
3073 			}
3074 		      dependence_stats.num_siv_dependent++;
3075 		      return;
3076 		    }
3077 
3078 		  /* When the step does not divide the difference, there are
3079 		     no overlaps.  */
3080 		  else
3081 		    {
3082 		      *overlaps_a = conflict_fn_no_dependence ();
3083 		      *overlaps_b = conflict_fn_no_dependence ();
3084 		      *last_conflicts = integer_zero_node;
3085 		      dependence_stats.num_siv_independent++;
3086 		      return;
3087 		    }
3088 		}
3089 
3090 	      else
3091 		{
3092 		  /* Example:
3093 		     chrec_a = 12
3094 		     chrec_b = {10, +, -1}
3095 
3096 		     In this case, chrec_a will not overlap with chrec_b.  */
3097 		  *overlaps_a = conflict_fn_no_dependence ();
3098 		  *overlaps_b = conflict_fn_no_dependence ();
3099 		  *last_conflicts = integer_zero_node;
3100 		  dependence_stats.num_siv_independent++;
3101 		  return;
3102 		}
3103 	    }
3104 	}
3105       else
3106 	{
3107 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3108 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3109 	    {
3110 	      if (dump_file && (dump_flags & TDF_DETAILS))
3111 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3112 
3113 	      *overlaps_a = conflict_fn_not_known ();
3114 	      *overlaps_b = conflict_fn_not_known ();
3115 	      *last_conflicts = chrec_dont_know;
3116 	      dependence_stats.num_siv_unimplemented++;
3117 	      return;
3118 	    }
3119 	  else
3120 	    {
3121 	      if (value2 == false)
3122 		{
3123 		  /* Example:
3124 		     chrec_a = 3
3125 		     chrec_b = {10, +, -1}
3126 		  */
3127 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3128 		    {
3129 		      HOST_WIDE_INT numiter;
3130 		      struct loop *loop = get_chrec_loop (chrec_b);
3131 
3132 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3133 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3134 					 CHREC_RIGHT (chrec_b));
3135 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3136 		      *last_conflicts = integer_one_node;
3137 
3138 		      /* Perform weak-zero siv test to see if overlap is
3139 			 outside the loop bounds.  */
3140 		      numiter = max_stmt_executions_int (loop);
3141 
3142 		      if (numiter >= 0
3143 			  && compare_tree_int (tmp, numiter) > 0)
3144 			{
3145 			  free_conflict_function (*overlaps_a);
3146 			  free_conflict_function (*overlaps_b);
3147 			  *overlaps_a = conflict_fn_no_dependence ();
3148 			  *overlaps_b = conflict_fn_no_dependence ();
3149 			  *last_conflicts = integer_zero_node;
3150 			  dependence_stats.num_siv_independent++;
3151 			  return;
3152 			}
3153 		      dependence_stats.num_siv_dependent++;
3154 		      return;
3155 		    }
3156 
3157 		  /* When the step does not divide the difference, there
3158 		     are no overlaps.  */
3159 		  else
3160 		    {
3161 		      *overlaps_a = conflict_fn_no_dependence ();
3162 		      *overlaps_b = conflict_fn_no_dependence ();
3163 		      *last_conflicts = integer_zero_node;
3164 		      dependence_stats.num_siv_independent++;
3165 		      return;
3166 		    }
3167 		}
3168 	      else
3169 		{
3170 		  /* Example:
3171 		     chrec_a = 3
3172 		     chrec_b = {4, +, 1}
3173 
3174 		     In this case, chrec_a will not overlap with chrec_b.  */
3175 		  *overlaps_a = conflict_fn_no_dependence ();
3176 		  *overlaps_b = conflict_fn_no_dependence ();
3177 		  *last_conflicts = integer_zero_node;
3178 		  dependence_stats.num_siv_independent++;
3179 		  return;
3180 		}
3181 	    }
3182 	}
3183     }
3184 }
3185 
3186 /* Helper recursive function for initializing the matrix A.  Returns
3187    the initial value of CHREC.  */
3188 
3189 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3190 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3191 {
3192   gcc_assert (chrec);
3193 
3194   switch (TREE_CODE (chrec))
3195     {
3196     case POLYNOMIAL_CHREC:
3197       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3198 	return chrec_dont_know;
3199       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3200       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3201 
3202     case PLUS_EXPR:
3203     case MULT_EXPR:
3204     case MINUS_EXPR:
3205       {
3206 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3207 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3208 
3209 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3210       }
3211 
3212     CASE_CONVERT:
3213       {
3214 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3215 	return chrec_convert (chrec_type (chrec), op, NULL);
3216       }
3217 
3218     case BIT_NOT_EXPR:
3219       {
3220 	/* Handle ~X as -1 - X.  */
3221 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3222 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3223 			      build_int_cst (TREE_TYPE (chrec), -1), op);
3224       }
3225 
3226     case INTEGER_CST:
3227       return chrec;
3228 
3229     default:
3230       gcc_unreachable ();
3231       return NULL_TREE;
3232     }
3233 }
3234 
3235 #define FLOOR_DIV(x,y) ((x) / (y))
3236 
3237 /* Solves the special case of the Diophantine equation:
3238    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3239 
3240    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3241    number of iterations that loops X and Y run.  The overlaps will be
3242    constructed as evolutions in dimension DIM.  */
3243 
3244 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)3245 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3246 					 HOST_WIDE_INT step_a,
3247 					 HOST_WIDE_INT step_b,
3248 					 affine_fn *overlaps_a,
3249 					 affine_fn *overlaps_b,
3250 					 tree *last_conflicts, int dim)
3251 {
3252   if (((step_a > 0 && step_b > 0)
3253        || (step_a < 0 && step_b < 0)))
3254     {
3255       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3256       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3257 
3258       gcd_steps_a_b = gcd (step_a, step_b);
3259       step_overlaps_a = step_b / gcd_steps_a_b;
3260       step_overlaps_b = step_a / gcd_steps_a_b;
3261 
3262       if (niter > 0)
3263 	{
3264 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
3265 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3266 	  last_conflict = tau2;
3267 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3268 	}
3269       else
3270 	*last_conflicts = chrec_dont_know;
3271 
3272       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3273 				      build_int_cst (NULL_TREE,
3274 						     step_overlaps_a));
3275       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3276 				      build_int_cst (NULL_TREE,
3277 						     step_overlaps_b));
3278     }
3279 
3280   else
3281     {
3282       *overlaps_a = affine_fn_cst (integer_zero_node);
3283       *overlaps_b = affine_fn_cst (integer_zero_node);
3284       *last_conflicts = integer_zero_node;
3285     }
3286 }
3287 
3288 /* Solves the special case of a Diophantine equation where CHREC_A is
3289    an affine bivariate function, and CHREC_B is an affine univariate
3290    function.  For example,
3291 
3292    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3293 
3294    has the following overlapping functions:
3295 
3296    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3297    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3298    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3299 
3300    FORNOW: This is a specialized implementation for a case occurring in
3301    a common benchmark.  Implement the general algorithm.  */
3302 
3303 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3304 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3305 				      conflict_function **overlaps_a,
3306 				      conflict_function **overlaps_b,
3307 				      tree *last_conflicts)
3308 {
3309   bool xz_p, yz_p, xyz_p;
3310   HOST_WIDE_INT step_x, step_y, step_z;
3311   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3312   affine_fn overlaps_a_xz, overlaps_b_xz;
3313   affine_fn overlaps_a_yz, overlaps_b_yz;
3314   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3315   affine_fn ova1, ova2, ovb;
3316   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3317 
3318   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3319   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3320   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3321 
3322   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3323   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3324   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3325 
3326   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3327     {
3328       if (dump_file && (dump_flags & TDF_DETAILS))
3329 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3330 
3331       *overlaps_a = conflict_fn_not_known ();
3332       *overlaps_b = conflict_fn_not_known ();
3333       *last_conflicts = chrec_dont_know;
3334       return;
3335     }
3336 
3337   niter = MIN (niter_x, niter_z);
3338   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3339 					   &overlaps_a_xz,
3340 					   &overlaps_b_xz,
3341 					   &last_conflicts_xz, 1);
3342   niter = MIN (niter_y, niter_z);
3343   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3344 					   &overlaps_a_yz,
3345 					   &overlaps_b_yz,
3346 					   &last_conflicts_yz, 2);
3347   niter = MIN (niter_x, niter_z);
3348   niter = MIN (niter_y, niter);
3349   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3350 					   &overlaps_a_xyz,
3351 					   &overlaps_b_xyz,
3352 					   &last_conflicts_xyz, 3);
3353 
3354   xz_p = !integer_zerop (last_conflicts_xz);
3355   yz_p = !integer_zerop (last_conflicts_yz);
3356   xyz_p = !integer_zerop (last_conflicts_xyz);
3357 
3358   if (xz_p || yz_p || xyz_p)
3359     {
3360       ova1 = affine_fn_cst (integer_zero_node);
3361       ova2 = affine_fn_cst (integer_zero_node);
3362       ovb = affine_fn_cst (integer_zero_node);
3363       if (xz_p)
3364 	{
3365 	  affine_fn t0 = ova1;
3366 	  affine_fn t2 = ovb;
3367 
3368 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3369 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
3370 	  affine_fn_free (t0);
3371 	  affine_fn_free (t2);
3372 	  *last_conflicts = last_conflicts_xz;
3373 	}
3374       if (yz_p)
3375 	{
3376 	  affine_fn t0 = ova2;
3377 	  affine_fn t2 = ovb;
3378 
3379 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3380 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
3381 	  affine_fn_free (t0);
3382 	  affine_fn_free (t2);
3383 	  *last_conflicts = last_conflicts_yz;
3384 	}
3385       if (xyz_p)
3386 	{
3387 	  affine_fn t0 = ova1;
3388 	  affine_fn t2 = ova2;
3389 	  affine_fn t4 = ovb;
3390 
3391 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3392 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3393 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3394 	  affine_fn_free (t0);
3395 	  affine_fn_free (t2);
3396 	  affine_fn_free (t4);
3397 	  *last_conflicts = last_conflicts_xyz;
3398 	}
3399       *overlaps_a = conflict_fn (2, ova1, ova2);
3400       *overlaps_b = conflict_fn (1, ovb);
3401     }
3402   else
3403     {
3404       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3405       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3406       *last_conflicts = integer_zero_node;
3407     }
3408 
3409   affine_fn_free (overlaps_a_xz);
3410   affine_fn_free (overlaps_b_xz);
3411   affine_fn_free (overlaps_a_yz);
3412   affine_fn_free (overlaps_b_yz);
3413   affine_fn_free (overlaps_a_xyz);
3414   affine_fn_free (overlaps_b_xyz);
3415 }
3416 
3417 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3418 
3419 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)3420 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3421 		    int size)
3422 {
3423   memcpy (vec2, vec1, size * sizeof (*vec1));
3424 }
3425 
3426 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3427 
3428 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)3429 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3430 		    int m, int n)
3431 {
3432   int i;
3433 
3434   for (i = 0; i < m; i++)
3435     lambda_vector_copy (mat1[i], mat2[i], n);
3436 }
3437 
3438 /* Store the N x N identity matrix in MAT.  */
3439 
3440 static void
lambda_matrix_id(lambda_matrix mat,int size)3441 lambda_matrix_id (lambda_matrix mat, int size)
3442 {
3443   int i, j;
3444 
3445   for (i = 0; i < size; i++)
3446     for (j = 0; j < size; j++)
3447       mat[i][j] = (i == j) ? 1 : 0;
3448 }
3449 
3450 /* Return the first nonzero element of vector VEC1 between START and N.
3451    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3452 
3453 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)3454 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3455 {
3456   int j = start;
3457   while (j < n && vec1[j] == 0)
3458     j++;
3459   return j;
3460 }
3461 
3462 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3463    R2 = R2 + CONST1 * R1.  */
3464 
3465 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,int const1)3466 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3467 {
3468   int i;
3469 
3470   if (const1 == 0)
3471     return;
3472 
3473   for (i = 0; i < n; i++)
3474     mat[r2][i] += const1 * mat[r1][i];
3475 }
3476 
3477 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3478    and store the result in VEC2.  */
3479 
3480 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,int const1)3481 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3482 			  int size, int const1)
3483 {
3484   int i;
3485 
3486   if (const1 == 0)
3487     lambda_vector_clear (vec2, size);
3488   else
3489     for (i = 0; i < size; i++)
3490       vec2[i] = const1 * vec1[i];
3491 }
3492 
3493 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3494 
3495 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)3496 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3497 		      int size)
3498 {
3499   lambda_vector_mult_const (vec1, vec2, size, -1);
3500 }
3501 
3502 /* Negate row R1 of matrix MAT which has N columns.  */
3503 
3504 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)3505 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3506 {
3507   lambda_vector_negate (mat[r1], mat[r1], n);
3508 }
3509 
3510 /* Return true if two vectors are equal.  */
3511 
3512 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)3513 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3514 {
3515   int i;
3516   for (i = 0; i < size; i++)
3517     if (vec1[i] != vec2[i])
3518       return false;
3519   return true;
3520 }
3521 
3522 /* Given an M x N integer matrix A, this function determines an M x
3523    M unimodular matrix U, and an M x N echelon matrix S such that
3524    "U.A = S".  This decomposition is also known as "right Hermite".
3525 
3526    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3527    Restructuring Compilers" Utpal Banerjee.  */
3528 
3529 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)3530 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3531 			     lambda_matrix S, lambda_matrix U)
3532 {
3533   int i, j, i0 = 0;
3534 
3535   lambda_matrix_copy (A, S, m, n);
3536   lambda_matrix_id (U, m);
3537 
3538   for (j = 0; j < n; j++)
3539     {
3540       if (lambda_vector_first_nz (S[j], m, i0) < m)
3541 	{
3542 	  ++i0;
3543 	  for (i = m - 1; i >= i0; i--)
3544 	    {
3545 	      while (S[i][j] != 0)
3546 		{
3547 		  int sigma, factor, a, b;
3548 
3549 		  a = S[i-1][j];
3550 		  b = S[i][j];
3551 		  sigma = (a * b < 0) ? -1: 1;
3552 		  a = abs (a);
3553 		  b = abs (b);
3554 		  factor = sigma * (a / b);
3555 
3556 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
3557 		  std::swap (S[i], S[i-1]);
3558 
3559 		  lambda_matrix_row_add (U, m, i, i-1, -factor);
3560 		  std::swap (U[i], U[i-1]);
3561 		}
3562 	    }
3563 	}
3564     }
3565 }
3566 
3567 /* Determines the overlapping elements due to accesses CHREC_A and
3568    CHREC_B, that are affine functions.  This function cannot handle
3569    symbolic evolution functions, ie. when initial conditions are
3570    parameters, because it uses lambda matrices of integers.  */
3571 
3572 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3573 analyze_subscript_affine_affine (tree chrec_a,
3574 				 tree chrec_b,
3575 				 conflict_function **overlaps_a,
3576 				 conflict_function **overlaps_b,
3577 				 tree *last_conflicts)
3578 {
3579   unsigned nb_vars_a, nb_vars_b, dim;
3580   HOST_WIDE_INT gamma, gcd_alpha_beta;
3581   lambda_matrix A, U, S;
3582   struct obstack scratch_obstack;
3583 
3584   if (eq_evolutions_p (chrec_a, chrec_b))
3585     {
3586       /* The accessed index overlaps for each iteration in the
3587 	 loop.  */
3588       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3589       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3590       *last_conflicts = chrec_dont_know;
3591       return;
3592     }
3593   if (dump_file && (dump_flags & TDF_DETAILS))
3594     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3595 
3596   /* For determining the initial intersection, we have to solve a
3597      Diophantine equation.  This is the most time consuming part.
3598 
3599      For answering to the question: "Is there a dependence?" we have
3600      to prove that there exists a solution to the Diophantine
3601      equation, and that the solution is in the iteration domain,
3602      i.e. the solution is positive or zero, and that the solution
3603      happens before the upper bound loop.nb_iterations.  Otherwise
3604      there is no dependence.  This function outputs a description of
3605      the iterations that hold the intersections.  */
3606 
3607   nb_vars_a = nb_vars_in_chrec (chrec_a);
3608   nb_vars_b = nb_vars_in_chrec (chrec_b);
3609 
3610   gcc_obstack_init (&scratch_obstack);
3611 
3612   dim = nb_vars_a + nb_vars_b;
3613   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3614   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3615   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3616 
3617   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3618   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3619   if (init_a == chrec_dont_know
3620       || init_b == chrec_dont_know)
3621     {
3622       if (dump_file && (dump_flags & TDF_DETAILS))
3623 	fprintf (dump_file, "affine-affine test failed: "
3624 		 "representation issue.\n");
3625       *overlaps_a = conflict_fn_not_known ();
3626       *overlaps_b = conflict_fn_not_known ();
3627       *last_conflicts = chrec_dont_know;
3628       goto end_analyze_subs_aa;
3629     }
3630   gamma = int_cst_value (init_b) - int_cst_value (init_a);
3631 
3632   /* Don't do all the hard work of solving the Diophantine equation
3633      when we already know the solution: for example,
3634      | {3, +, 1}_1
3635      | {3, +, 4}_2
3636      | gamma = 3 - 3 = 0.
3637      Then the first overlap occurs during the first iterations:
3638      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3639   */
3640   if (gamma == 0)
3641     {
3642       if (nb_vars_a == 1 && nb_vars_b == 1)
3643 	{
3644 	  HOST_WIDE_INT step_a, step_b;
3645 	  HOST_WIDE_INT niter, niter_a, niter_b;
3646 	  affine_fn ova, ovb;
3647 
3648 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3649 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3650 	  niter = MIN (niter_a, niter_b);
3651 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3652 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3653 
3654 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3655 						   &ova, &ovb,
3656 						   last_conflicts, 1);
3657 	  *overlaps_a = conflict_fn (1, ova);
3658 	  *overlaps_b = conflict_fn (1, ovb);
3659 	}
3660 
3661       else if (nb_vars_a == 2 && nb_vars_b == 1)
3662 	compute_overlap_steps_for_affine_1_2
3663 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3664 
3665       else if (nb_vars_a == 1 && nb_vars_b == 2)
3666 	compute_overlap_steps_for_affine_1_2
3667 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3668 
3669       else
3670 	{
3671 	  if (dump_file && (dump_flags & TDF_DETAILS))
3672 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3673 	  *overlaps_a = conflict_fn_not_known ();
3674 	  *overlaps_b = conflict_fn_not_known ();
3675 	  *last_conflicts = chrec_dont_know;
3676 	}
3677       goto end_analyze_subs_aa;
3678     }
3679 
3680   /* U.A = S */
3681   lambda_matrix_right_hermite (A, dim, 1, S, U);
3682 
3683   if (S[0][0] < 0)
3684     {
3685       S[0][0] *= -1;
3686       lambda_matrix_row_negate (U, dim, 0);
3687     }
3688   gcd_alpha_beta = S[0][0];
3689 
3690   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3691      but that is a quite strange case.  Instead of ICEing, answer
3692      don't know.  */
3693   if (gcd_alpha_beta == 0)
3694     {
3695       *overlaps_a = conflict_fn_not_known ();
3696       *overlaps_b = conflict_fn_not_known ();
3697       *last_conflicts = chrec_dont_know;
3698       goto end_analyze_subs_aa;
3699     }
3700 
3701   /* The classic "gcd-test".  */
3702   if (!int_divides_p (gcd_alpha_beta, gamma))
3703     {
3704       /* The "gcd-test" has determined that there is no integer
3705 	 solution, i.e. there is no dependence.  */
3706       *overlaps_a = conflict_fn_no_dependence ();
3707       *overlaps_b = conflict_fn_no_dependence ();
3708       *last_conflicts = integer_zero_node;
3709     }
3710 
3711   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3712   else if (nb_vars_a == 1 && nb_vars_b == 1)
3713     {
3714       /* Both functions should have the same evolution sign.  */
3715       if (((A[0][0] > 0 && -A[1][0] > 0)
3716 	   || (A[0][0] < 0 && -A[1][0] < 0)))
3717 	{
3718 	  /* The solutions are given by:
3719 	     |
3720 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3721 	     |                           [u21 u22]    [y0]
3722 
3723 	     For a given integer t.  Using the following variables,
3724 
3725 	     | i0 = u11 * gamma / gcd_alpha_beta
3726 	     | j0 = u12 * gamma / gcd_alpha_beta
3727 	     | i1 = u21
3728 	     | j1 = u22
3729 
3730 	     the solutions are:
3731 
3732 	     | x0 = i0 + i1 * t,
3733 	     | y0 = j0 + j1 * t.  */
3734       	  HOST_WIDE_INT i0, j0, i1, j1;
3735 
3736 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
3737 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
3738 	  i1 = U[1][0];
3739 	  j1 = U[1][1];
3740 
3741 	  if ((i1 == 0 && i0 < 0)
3742 	      || (j1 == 0 && j0 < 0))
3743 	    {
3744 	      /* There is no solution.
3745 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3746 		 falls in here, but for the moment we don't look at the
3747 		 upper bound of the iteration domain.  */
3748 	      *overlaps_a = conflict_fn_no_dependence ();
3749 	      *overlaps_b = conflict_fn_no_dependence ();
3750 	      *last_conflicts = integer_zero_node;
3751 	      goto end_analyze_subs_aa;
3752 	    }
3753 
3754 	  if (i1 > 0 && j1 > 0)
3755 	    {
3756 	      HOST_WIDE_INT niter_a
3757 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
3758 	      HOST_WIDE_INT niter_b
3759 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
3760 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3761 
3762 	      /* (X0, Y0) is a solution of the Diophantine equation:
3763 		 "chrec_a (X0) = chrec_b (Y0)".  */
3764 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3765 					CEIL (-j0, j1));
3766 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
3767 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
3768 
3769 	      /* (X1, Y1) is the smallest positive solution of the eq
3770 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3771 		 first conflict occurs.  */
3772 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3773 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3774 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3775 
3776 	      if (niter > 0)
3777 		{
3778 		  HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3779 					    FLOOR_DIV (niter_b - j0, j1));
3780 		  HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3781 
3782 		  /* If the overlap occurs outside of the bounds of the
3783 		     loop, there is no dependence.  */
3784 		  if (x1 >= niter_a || y1 >= niter_b)
3785 		    {
3786 		      *overlaps_a = conflict_fn_no_dependence ();
3787 		      *overlaps_b = conflict_fn_no_dependence ();
3788 		      *last_conflicts = integer_zero_node;
3789 		      goto end_analyze_subs_aa;
3790 		    }
3791 		  else
3792 		    *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3793 		}
3794 	      else
3795 		*last_conflicts = chrec_dont_know;
3796 
3797 	      *overlaps_a
3798 		= conflict_fn (1,
3799 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
3800 						 1,
3801 						 build_int_cst (NULL_TREE, i1)));
3802 	      *overlaps_b
3803 		= conflict_fn (1,
3804 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
3805 						 1,
3806 						 build_int_cst (NULL_TREE, j1)));
3807 	    }
3808 	  else
3809 	    {
3810 	      /* FIXME: For the moment, the upper bound of the
3811 		 iteration domain for i and j is not checked.  */
3812 	      if (dump_file && (dump_flags & TDF_DETAILS))
3813 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3814 	      *overlaps_a = conflict_fn_not_known ();
3815 	      *overlaps_b = conflict_fn_not_known ();
3816 	      *last_conflicts = chrec_dont_know;
3817 	    }
3818 	}
3819       else
3820 	{
3821 	  if (dump_file && (dump_flags & TDF_DETAILS))
3822 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3823 	  *overlaps_a = conflict_fn_not_known ();
3824 	  *overlaps_b = conflict_fn_not_known ();
3825 	  *last_conflicts = chrec_dont_know;
3826 	}
3827     }
3828   else
3829     {
3830       if (dump_file && (dump_flags & TDF_DETAILS))
3831 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3832       *overlaps_a = conflict_fn_not_known ();
3833       *overlaps_b = conflict_fn_not_known ();
3834       *last_conflicts = chrec_dont_know;
3835     }
3836 
3837 end_analyze_subs_aa:
3838   obstack_free (&scratch_obstack, NULL);
3839   if (dump_file && (dump_flags & TDF_DETAILS))
3840     {
3841       fprintf (dump_file, "  (overlaps_a = ");
3842       dump_conflict_function (dump_file, *overlaps_a);
3843       fprintf (dump_file, ")\n  (overlaps_b = ");
3844       dump_conflict_function (dump_file, *overlaps_b);
3845       fprintf (dump_file, "))\n");
3846     }
3847 }
3848 
3849 /* Returns true when analyze_subscript_affine_affine can be used for
3850    determining the dependence relation between chrec_a and chrec_b,
3851    that contain symbols.  This function modifies chrec_a and chrec_b
3852    such that the analysis result is the same, and such that they don't
3853    contain symbols, and then can safely be passed to the analyzer.
3854 
3855    Example: The analysis of the following tuples of evolutions produce
3856    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3857    vs. {0, +, 1}_1
3858 
3859    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3860    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3861 */
3862 
3863 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)3864 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3865 {
3866   tree diff, type, left_a, left_b, right_b;
3867 
3868   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3869       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3870     /* FIXME: For the moment not handled.  Might be refined later.  */
3871     return false;
3872 
3873   type = chrec_type (*chrec_a);
3874   left_a = CHREC_LEFT (*chrec_a);
3875   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3876   diff = chrec_fold_minus (type, left_a, left_b);
3877 
3878   if (!evolution_function_is_constant_p (diff))
3879     return false;
3880 
3881   if (dump_file && (dump_flags & TDF_DETAILS))
3882     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3883 
3884   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3885 				     diff, CHREC_RIGHT (*chrec_a));
3886   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3887   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3888 				     build_int_cst (type, 0),
3889 				     right_b);
3890   return true;
3891 }
3892 
3893 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3894    *OVERLAPS_B are initialized to the functions that describe the
3895    relation between the elements accessed twice by CHREC_A and
3896    CHREC_B.  For k >= 0, the following property is verified:
3897 
3898    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3899 
3900 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)3901 analyze_siv_subscript (tree chrec_a,
3902 		       tree chrec_b,
3903 		       conflict_function **overlaps_a,
3904 		       conflict_function **overlaps_b,
3905 		       tree *last_conflicts,
3906 		       int loop_nest_num)
3907 {
3908   dependence_stats.num_siv++;
3909 
3910   if (dump_file && (dump_flags & TDF_DETAILS))
3911     fprintf (dump_file, "(analyze_siv_subscript \n");
3912 
3913   if (evolution_function_is_constant_p (chrec_a)
3914       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3915     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3916 				      overlaps_a, overlaps_b, last_conflicts);
3917 
3918   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3919 	   && evolution_function_is_constant_p (chrec_b))
3920     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3921 				      overlaps_b, overlaps_a, last_conflicts);
3922 
3923   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3924 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3925     {
3926       if (!chrec_contains_symbols (chrec_a)
3927 	  && !chrec_contains_symbols (chrec_b))
3928 	{
3929 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
3930 					   overlaps_a, overlaps_b,
3931 					   last_conflicts);
3932 
3933 	  if (CF_NOT_KNOWN_P (*overlaps_a)
3934 	      || CF_NOT_KNOWN_P (*overlaps_b))
3935 	    dependence_stats.num_siv_unimplemented++;
3936 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3937 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
3938 	    dependence_stats.num_siv_independent++;
3939 	  else
3940 	    dependence_stats.num_siv_dependent++;
3941 	}
3942       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3943 							&chrec_b))
3944 	{
3945 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
3946 					   overlaps_a, overlaps_b,
3947 					   last_conflicts);
3948 
3949 	  if (CF_NOT_KNOWN_P (*overlaps_a)
3950 	      || CF_NOT_KNOWN_P (*overlaps_b))
3951 	    dependence_stats.num_siv_unimplemented++;
3952 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3953 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
3954 	    dependence_stats.num_siv_independent++;
3955 	  else
3956 	    dependence_stats.num_siv_dependent++;
3957 	}
3958       else
3959 	goto siv_subscript_dontknow;
3960     }
3961 
3962   else
3963     {
3964     siv_subscript_dontknow:;
3965       if (dump_file && (dump_flags & TDF_DETAILS))
3966 	fprintf (dump_file, "  siv test failed: unimplemented");
3967       *overlaps_a = conflict_fn_not_known ();
3968       *overlaps_b = conflict_fn_not_known ();
3969       *last_conflicts = chrec_dont_know;
3970       dependence_stats.num_siv_unimplemented++;
3971     }
3972 
3973   if (dump_file && (dump_flags & TDF_DETAILS))
3974     fprintf (dump_file, ")\n");
3975 }
3976 
3977 /* Returns false if we can prove that the greatest common divisor of the steps
3978    of CHREC does not divide CST, false otherwise.  */
3979 
3980 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)3981 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3982 {
3983   HOST_WIDE_INT cd = 0, val;
3984   tree step;
3985 
3986   if (!tree_fits_shwi_p (cst))
3987     return true;
3988   val = tree_to_shwi (cst);
3989 
3990   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3991     {
3992       step = CHREC_RIGHT (chrec);
3993       if (!tree_fits_shwi_p (step))
3994 	return true;
3995       cd = gcd (cd, tree_to_shwi (step));
3996       chrec = CHREC_LEFT (chrec);
3997     }
3998 
3999   return val % cd == 0;
4000 }
4001 
4002 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4003    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4004    functions that describe the relation between the elements accessed
4005    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4006    is verified:
4007 
4008    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4009 
4010 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,struct loop * loop_nest)4011 analyze_miv_subscript (tree chrec_a,
4012 		       tree chrec_b,
4013 		       conflict_function **overlaps_a,
4014 		       conflict_function **overlaps_b,
4015 		       tree *last_conflicts,
4016 		       struct loop *loop_nest)
4017 {
4018   tree type, difference;
4019 
4020   dependence_stats.num_miv++;
4021   if (dump_file && (dump_flags & TDF_DETAILS))
4022     fprintf (dump_file, "(analyze_miv_subscript \n");
4023 
4024   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4025   chrec_a = chrec_convert (type, chrec_a, NULL);
4026   chrec_b = chrec_convert (type, chrec_b, NULL);
4027   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4028 
4029   if (eq_evolutions_p (chrec_a, chrec_b))
4030     {
4031       /* Access functions are the same: all the elements are accessed
4032 	 in the same order.  */
4033       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4034       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4035       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4036       dependence_stats.num_miv_dependent++;
4037     }
4038 
4039   else if (evolution_function_is_constant_p (difference)
4040 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4041 							   loop_nest->num)
4042 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4043     {
4044       /* testsuite/.../ssa-chrec-33.c
4045 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4046 
4047 	 The difference is 1, and all the evolution steps are multiples
4048 	 of 2, consequently there are no overlapping elements.  */
4049       *overlaps_a = conflict_fn_no_dependence ();
4050       *overlaps_b = conflict_fn_no_dependence ();
4051       *last_conflicts = integer_zero_node;
4052       dependence_stats.num_miv_independent++;
4053     }
4054 
4055   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
4056 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4057 	   && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4058 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4059     {
4060       /* testsuite/.../ssa-chrec-35.c
4061 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4062 	 the overlapping elements are respectively located at iterations:
4063 	 {0, +, 1}_x and {0, +, 1}_x,
4064 	 in other words, we have the equality:
4065 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4066 
4067 	 Other examples:
4068 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4069 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4070 
4071 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4072 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4073       */
4074       analyze_subscript_affine_affine (chrec_a, chrec_b,
4075 				       overlaps_a, overlaps_b, last_conflicts);
4076 
4077       if (CF_NOT_KNOWN_P (*overlaps_a)
4078  	  || CF_NOT_KNOWN_P (*overlaps_b))
4079 	dependence_stats.num_miv_unimplemented++;
4080       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4081 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4082 	dependence_stats.num_miv_independent++;
4083       else
4084 	dependence_stats.num_miv_dependent++;
4085     }
4086 
4087   else
4088     {
4089       /* When the analysis is too difficult, answer "don't know".  */
4090       if (dump_file && (dump_flags & TDF_DETAILS))
4091 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4092 
4093       *overlaps_a = conflict_fn_not_known ();
4094       *overlaps_b = conflict_fn_not_known ();
4095       *last_conflicts = chrec_dont_know;
4096       dependence_stats.num_miv_unimplemented++;
4097     }
4098 
4099   if (dump_file && (dump_flags & TDF_DETAILS))
4100     fprintf (dump_file, ")\n");
4101 }
4102 
4103 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4104    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4105    OVERLAP_ITERATIONS_B are initialized with two functions that
4106    describe the iterations that contain conflicting elements.
4107 
4108    Remark: For an integer k >= 0, the following equality is true:
4109 
4110    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4111 */
4112 
4113 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,struct loop * loop_nest)4114 analyze_overlapping_iterations (tree chrec_a,
4115 				tree chrec_b,
4116 				conflict_function **overlap_iterations_a,
4117 				conflict_function **overlap_iterations_b,
4118 				tree *last_conflicts, struct loop *loop_nest)
4119 {
4120   unsigned int lnn = loop_nest->num;
4121 
4122   dependence_stats.num_subscript_tests++;
4123 
4124   if (dump_file && (dump_flags & TDF_DETAILS))
4125     {
4126       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4127       fprintf (dump_file, "  (chrec_a = ");
4128       print_generic_expr (dump_file, chrec_a);
4129       fprintf (dump_file, ")\n  (chrec_b = ");
4130       print_generic_expr (dump_file, chrec_b);
4131       fprintf (dump_file, ")\n");
4132     }
4133 
4134   if (chrec_a == NULL_TREE
4135       || chrec_b == NULL_TREE
4136       || chrec_contains_undetermined (chrec_a)
4137       || chrec_contains_undetermined (chrec_b))
4138     {
4139       dependence_stats.num_subscript_undetermined++;
4140 
4141       *overlap_iterations_a = conflict_fn_not_known ();
4142       *overlap_iterations_b = conflict_fn_not_known ();
4143     }
4144 
4145   /* If they are the same chrec, and are affine, they overlap
4146      on every iteration.  */
4147   else if (eq_evolutions_p (chrec_a, chrec_b)
4148 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4149 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4150     {
4151       dependence_stats.num_same_subscript_function++;
4152       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4153       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4154       *last_conflicts = chrec_dont_know;
4155     }
4156 
4157   /* If they aren't the same, and aren't affine, we can't do anything
4158      yet.  */
4159   else if ((chrec_contains_symbols (chrec_a)
4160 	    || chrec_contains_symbols (chrec_b))
4161 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4162 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4163     {
4164       dependence_stats.num_subscript_undetermined++;
4165       *overlap_iterations_a = conflict_fn_not_known ();
4166       *overlap_iterations_b = conflict_fn_not_known ();
4167     }
4168 
4169   else if (ziv_subscript_p (chrec_a, chrec_b))
4170     analyze_ziv_subscript (chrec_a, chrec_b,
4171 			   overlap_iterations_a, overlap_iterations_b,
4172 			   last_conflicts);
4173 
4174   else if (siv_subscript_p (chrec_a, chrec_b))
4175     analyze_siv_subscript (chrec_a, chrec_b,
4176 			   overlap_iterations_a, overlap_iterations_b,
4177 			   last_conflicts, lnn);
4178 
4179   else
4180     analyze_miv_subscript (chrec_a, chrec_b,
4181 			   overlap_iterations_a, overlap_iterations_b,
4182 			   last_conflicts, loop_nest);
4183 
4184   if (dump_file && (dump_flags & TDF_DETAILS))
4185     {
4186       fprintf (dump_file, "  (overlap_iterations_a = ");
4187       dump_conflict_function (dump_file, *overlap_iterations_a);
4188       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4189       dump_conflict_function (dump_file, *overlap_iterations_b);
4190       fprintf (dump_file, "))\n");
4191     }
4192 }
4193 
4194 /* Helper function for uniquely inserting distance vectors.  */
4195 
4196 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)4197 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4198 {
4199   unsigned i;
4200   lambda_vector v;
4201 
4202   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4203     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4204       return;
4205 
4206   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4207 }
4208 
4209 /* Helper function for uniquely inserting direction vectors.  */
4210 
4211 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)4212 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4213 {
4214   unsigned i;
4215   lambda_vector v;
4216 
4217   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4218     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4219       return;
4220 
4221   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4222 }
4223 
4224 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4225    haven't yet determined a distance for this outer loop, push a new
4226    distance vector composed of the previous distance, and a distance
4227    of 1 for this outer loop.  Example:
4228 
4229    | loop_1
4230    |   loop_2
4231    |     A[10]
4232    |   endloop_2
4233    | endloop_1
4234 
4235    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4236    save (0, 1), then we have to save (1, 0).  */
4237 
4238 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)4239 add_outer_distances (struct data_dependence_relation *ddr,
4240 		     lambda_vector dist_v, int index)
4241 {
4242   /* For each outer loop where init_v is not set, the accesses are
4243      in dependence of distance 1 in the loop.  */
4244   while (--index >= 0)
4245     {
4246       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4247       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4248       save_v[index] = 1;
4249       save_dist_v (ddr, save_v);
4250     }
4251 }
4252 
4253 /* Return false when fail to represent the data dependence as a
4254    distance vector.  A_INDEX is the index of the first reference
4255    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4256    second reference.  INIT_B is set to true when a component has been
4257    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4258    the index in DIST_V that carries the dependence.  */
4259 
4260 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)4261 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4262 			     unsigned int a_index, unsigned int b_index,
4263 			     lambda_vector dist_v, bool *init_b,
4264 			     int *index_carry)
4265 {
4266   unsigned i;
4267   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4268   struct loop *loop = DDR_LOOP_NEST (ddr)[0];
4269 
4270   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4271     {
4272       tree access_fn_a, access_fn_b;
4273       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4274 
4275       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4276 	{
4277 	  non_affine_dependence_relation (ddr);
4278 	  return false;
4279 	}
4280 
4281       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4282       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4283 
4284       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4285 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4286 	{
4287 	  HOST_WIDE_INT dist;
4288 	  int index;
4289 	  int var_a = CHREC_VARIABLE (access_fn_a);
4290 	  int var_b = CHREC_VARIABLE (access_fn_b);
4291 
4292 	  if (var_a != var_b
4293 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4294 	    {
4295 	      non_affine_dependence_relation (ddr);
4296 	      return false;
4297 	    }
4298 
4299 	  /* When data references are collected in a loop while data
4300 	     dependences are analyzed in loop nest nested in the loop, we
4301 	     would have more number of access functions than number of
4302 	     loops.  Skip access functions of loops not in the loop nest.
4303 
4304 	     See PR89725 for more information.  */
4305 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4306 	    continue;
4307 
4308 	  dist = int_cst_value (SUB_DISTANCE (subscript));
4309 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4310 	  *index_carry = MIN (index, *index_carry);
4311 
4312 	  /* This is the subscript coupling test.  If we have already
4313 	     recorded a distance for this loop (a distance coming from
4314 	     another subscript), it should be the same.  For example,
4315 	     in the following code, there is no dependence:
4316 
4317 	     | loop i = 0, N, 1
4318 	     |   T[i+1][i] = ...
4319 	     |   ... = T[i][i]
4320 	     | endloop
4321 	  */
4322 	  if (init_v[index] != 0 && dist_v[index] != dist)
4323 	    {
4324 	      finalize_ddr_dependent (ddr, chrec_known);
4325 	      return false;
4326 	    }
4327 
4328 	  dist_v[index] = dist;
4329 	  init_v[index] = 1;
4330 	  *init_b = true;
4331 	}
4332       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4333 	{
4334 	  /* This can be for example an affine vs. constant dependence
4335 	     (T[i] vs. T[3]) that is not an affine dependence and is
4336 	     not representable as a distance vector.  */
4337 	  non_affine_dependence_relation (ddr);
4338 	  return false;
4339 	}
4340     }
4341 
4342   return true;
4343 }
4344 
4345 /* Return true when the DDR contains only constant access functions.  */
4346 
4347 static bool
constant_access_functions(const struct data_dependence_relation * ddr)4348 constant_access_functions (const struct data_dependence_relation *ddr)
4349 {
4350   unsigned i;
4351   subscript *sub;
4352 
4353   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4354     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4355 	|| !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4356       return false;
4357 
4358   return true;
4359 }
4360 
4361 /* Helper function for the case where DDR_A and DDR_B are the same
4362    multivariate access function with a constant step.  For an example
4363    see pr34635-1.c.  */
4364 
4365 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)4366 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4367 {
4368   int x_1, x_2;
4369   tree c_1 = CHREC_LEFT (c_2);
4370   tree c_0 = CHREC_LEFT (c_1);
4371   lambda_vector dist_v;
4372   HOST_WIDE_INT v1, v2, cd;
4373 
4374   /* Polynomials with more than 2 variables are not handled yet.  When
4375      the evolution steps are parameters, it is not possible to
4376      represent the dependence using classical distance vectors.  */
4377   if (TREE_CODE (c_0) != INTEGER_CST
4378       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4379       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4380     {
4381       DDR_AFFINE_P (ddr) = false;
4382       return;
4383     }
4384 
4385   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4386   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4387 
4388   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4389   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4390   v1 = int_cst_value (CHREC_RIGHT (c_1));
4391   v2 = int_cst_value (CHREC_RIGHT (c_2));
4392   cd = gcd (v1, v2);
4393   v1 /= cd;
4394   v2 /= cd;
4395 
4396   if (v2 < 0)
4397     {
4398       v2 = -v2;
4399       v1 = -v1;
4400     }
4401 
4402   dist_v[x_1] = v2;
4403   dist_v[x_2] = -v1;
4404   save_dist_v (ddr, dist_v);
4405 
4406   add_outer_distances (ddr, dist_v, x_1);
4407 }
4408 
4409 /* Helper function for the case where DDR_A and DDR_B are the same
4410    access functions.  */
4411 
4412 static void
add_other_self_distances(struct data_dependence_relation * ddr)4413 add_other_self_distances (struct data_dependence_relation *ddr)
4414 {
4415   lambda_vector dist_v;
4416   unsigned i;
4417   int index_carry = DDR_NB_LOOPS (ddr);
4418   subscript *sub;
4419   struct loop *loop = DDR_LOOP_NEST (ddr)[0];
4420 
4421   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4422     {
4423       tree access_fun = SUB_ACCESS_FN (sub, 0);
4424 
4425       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4426 	{
4427 	  if (!evolution_function_is_univariate_p (access_fun))
4428 	    {
4429 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4430 		{
4431 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4432 		  return;
4433 		}
4434 
4435 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4436 
4437 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4438 		add_multivariate_self_dist (ddr, access_fun);
4439 	      else
4440 		/* The evolution step is not constant: it varies in
4441 		   the outer loop, so this cannot be represented by a
4442 		   distance vector.  For example in pr34635.c the
4443 		   evolution is {0, +, {0, +, 4}_1}_2.  */
4444 		DDR_AFFINE_P (ddr) = false;
4445 
4446 	      return;
4447 	    }
4448 
4449 	  /* When data references are collected in a loop while data
4450 	     dependences are analyzed in loop nest nested in the loop, we
4451 	     would have more number of access functions than number of
4452 	     loops.  Skip access functions of loops not in the loop nest.
4453 
4454 	     See PR89725 for more information.  */
4455 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4456 				  loop))
4457 	    continue;
4458 
4459 	  index_carry = MIN (index_carry,
4460 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
4461 						 DDR_LOOP_NEST (ddr)));
4462 	}
4463     }
4464 
4465   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4466   add_outer_distances (ddr, dist_v, index_carry);
4467 }
4468 
4469 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)4470 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4471 {
4472   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4473 
4474   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4475   save_dist_v (ddr, dist_v);
4476 }
4477 
4478 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4479    is the case for example when access functions are the same and
4480    equal to a constant, as in:
4481 
4482    | loop_1
4483    |   A[3] = ...
4484    |   ... = A[3]
4485    | endloop_1
4486 
4487    in which case the distance vectors are (0) and (1).  */
4488 
4489 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)4490 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4491 {
4492   unsigned i, j;
4493 
4494   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4495     {
4496       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4497       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4498       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4499 
4500       for (j = 0; j < ca->n; j++)
4501 	if (affine_function_zero_p (ca->fns[j]))
4502 	  {
4503 	    insert_innermost_unit_dist_vector (ddr);
4504 	    return;
4505 	  }
4506 
4507       for (j = 0; j < cb->n; j++)
4508 	if (affine_function_zero_p (cb->fns[j]))
4509 	  {
4510 	    insert_innermost_unit_dist_vector (ddr);
4511 	    return;
4512 	  }
4513     }
4514 }
4515 
4516 /* Return true when the DDR contains two data references that have the
4517    same access functions.  */
4518 
4519 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)4520 same_access_functions (const struct data_dependence_relation *ddr)
4521 {
4522   unsigned i;
4523   subscript *sub;
4524 
4525   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4526     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4527 			  SUB_ACCESS_FN (sub, 1)))
4528       return false;
4529 
4530   return true;
4531 }
4532 
4533 /* Compute the classic per loop distance vector.  DDR is the data
4534    dependence relation to build a vector from.  Return false when fail
4535    to represent the data dependence as a distance vector.  */
4536 
4537 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,struct loop * loop_nest)4538 build_classic_dist_vector (struct data_dependence_relation *ddr,
4539 			   struct loop *loop_nest)
4540 {
4541   bool init_b = false;
4542   int index_carry = DDR_NB_LOOPS (ddr);
4543   lambda_vector dist_v;
4544 
4545   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4546     return false;
4547 
4548   if (same_access_functions (ddr))
4549     {
4550       /* Save the 0 vector.  */
4551       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4552       save_dist_v (ddr, dist_v);
4553 
4554       if (constant_access_functions (ddr))
4555 	add_distance_for_zero_overlaps (ddr);
4556 
4557       if (DDR_NB_LOOPS (ddr) > 1)
4558 	add_other_self_distances (ddr);
4559 
4560       return true;
4561     }
4562 
4563   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4564   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4565     return false;
4566 
4567   /* Save the distance vector if we initialized one.  */
4568   if (init_b)
4569     {
4570       /* Verify a basic constraint: classic distance vectors should
4571 	 always be lexicographically positive.
4572 
4573 	 Data references are collected in the order of execution of
4574 	 the program, thus for the following loop
4575 
4576 	 | for (i = 1; i < 100; i++)
4577 	 |   for (j = 1; j < 100; j++)
4578 	 |     {
4579 	 |       t = T[j+1][i-1];  // A
4580 	 |       T[j][i] = t + 2;  // B
4581 	 |     }
4582 
4583 	 references are collected following the direction of the wind:
4584 	 A then B.  The data dependence tests are performed also
4585 	 following this order, such that we're looking at the distance
4586 	 separating the elements accessed by A from the elements later
4587 	 accessed by B.  But in this example, the distance returned by
4588 	 test_dep (A, B) is lexicographically negative (-1, 1), that
4589 	 means that the access A occurs later than B with respect to
4590 	 the outer loop, ie. we're actually looking upwind.  In this
4591 	 case we solve test_dep (B, A) looking downwind to the
4592 	 lexicographically positive solution, that returns the
4593 	 distance vector (1, -1).  */
4594       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4595 	{
4596 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4597 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4598 	    return false;
4599 	  compute_subscript_distance (ddr);
4600 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4601 					    &index_carry))
4602 	    return false;
4603 	  save_dist_v (ddr, save_v);
4604 	  DDR_REVERSED_P (ddr) = true;
4605 
4606 	  /* In this case there is a dependence forward for all the
4607 	     outer loops:
4608 
4609 	     | for (k = 1; k < 100; k++)
4610 	     |  for (i = 1; i < 100; i++)
4611 	     |   for (j = 1; j < 100; j++)
4612 	     |     {
4613 	     |       t = T[j+1][i-1];  // A
4614 	     |       T[j][i] = t + 2;  // B
4615 	     |     }
4616 
4617 	     the vectors are:
4618 	     (0,  1, -1)
4619 	     (1,  1, -1)
4620 	     (1, -1,  1)
4621 	  */
4622 	  if (DDR_NB_LOOPS (ddr) > 1)
4623 	    {
4624  	      add_outer_distances (ddr, save_v, index_carry);
4625 	      add_outer_distances (ddr, dist_v, index_carry);
4626 	    }
4627 	}
4628       else
4629 	{
4630 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4631 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4632 
4633 	  if (DDR_NB_LOOPS (ddr) > 1)
4634 	    {
4635 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4636 
4637 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4638 		return false;
4639 	      compute_subscript_distance (ddr);
4640 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4641 						&index_carry))
4642 		return false;
4643 
4644 	      save_dist_v (ddr, save_v);
4645 	      add_outer_distances (ddr, dist_v, index_carry);
4646 	      add_outer_distances (ddr, opposite_v, index_carry);
4647 	    }
4648 	  else
4649 	    save_dist_v (ddr, save_v);
4650 	}
4651     }
4652   else
4653     {
4654       /* There is a distance of 1 on all the outer loops: Example:
4655 	 there is a dependence of distance 1 on loop_1 for the array A.
4656 
4657 	 | loop_1
4658 	 |   A[5] = ...
4659 	 | endloop
4660       */
4661       add_outer_distances (ddr, dist_v,
4662 			   lambda_vector_first_nz (dist_v,
4663 						   DDR_NB_LOOPS (ddr), 0));
4664     }
4665 
4666   if (dump_file && (dump_flags & TDF_DETAILS))
4667     {
4668       unsigned i;
4669 
4670       fprintf (dump_file, "(build_classic_dist_vector\n");
4671       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4672 	{
4673 	  fprintf (dump_file, "  dist_vector = (");
4674 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4675 			       DDR_NB_LOOPS (ddr));
4676 	  fprintf (dump_file, "  )\n");
4677 	}
4678       fprintf (dump_file, ")\n");
4679     }
4680 
4681   return true;
4682 }
4683 
4684 /* Return the direction for a given distance.
4685    FIXME: Computing dir this way is suboptimal, since dir can catch
4686    cases that dist is unable to represent.  */
4687 
4688 static inline enum data_dependence_direction
dir_from_dist(int dist)4689 dir_from_dist (int dist)
4690 {
4691   if (dist > 0)
4692     return dir_positive;
4693   else if (dist < 0)
4694     return dir_negative;
4695   else
4696     return dir_equal;
4697 }
4698 
4699 /* Compute the classic per loop direction vector.  DDR is the data
4700    dependence relation to build a vector from.  */
4701 
4702 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)4703 build_classic_dir_vector (struct data_dependence_relation *ddr)
4704 {
4705   unsigned i, j;
4706   lambda_vector dist_v;
4707 
4708   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4709     {
4710       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4711 
4712       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4713 	dir_v[j] = dir_from_dist (dist_v[j]);
4714 
4715       save_dir_v (ddr, dir_v);
4716     }
4717 }
4718 
4719 /* Helper function.  Returns true when there is a dependence between the
4720    data references.  A_INDEX is the index of the first reference (0 for
4721    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4722 
4723 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,struct loop * loop_nest)4724 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4725 			       unsigned int a_index, unsigned int b_index,
4726 			       struct loop *loop_nest)
4727 {
4728   unsigned int i;
4729   tree last_conflicts;
4730   struct subscript *subscript;
4731   tree res = NULL_TREE;
4732 
4733   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4734     {
4735       conflict_function *overlaps_a, *overlaps_b;
4736 
4737       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4738 				      SUB_ACCESS_FN (subscript, b_index),
4739 				      &overlaps_a, &overlaps_b,
4740 				      &last_conflicts, loop_nest);
4741 
4742       if (SUB_CONFLICTS_IN_A (subscript))
4743 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4744       if (SUB_CONFLICTS_IN_B (subscript))
4745 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4746 
4747       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4748       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4749       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4750 
4751       /* If there is any undetermined conflict function we have to
4752          give a conservative answer in case we cannot prove that
4753 	 no dependence exists when analyzing another subscript.  */
4754       if (CF_NOT_KNOWN_P (overlaps_a)
4755  	  || CF_NOT_KNOWN_P (overlaps_b))
4756  	{
4757 	  res = chrec_dont_know;
4758 	  continue;
4759  	}
4760 
4761       /* When there is a subscript with no dependence we can stop.  */
4762       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4763  	       || CF_NO_DEPENDENCE_P (overlaps_b))
4764  	{
4765 	  res = chrec_known;
4766 	  break;
4767  	}
4768     }
4769 
4770   if (res == NULL_TREE)
4771     return true;
4772 
4773   if (res == chrec_known)
4774     dependence_stats.num_dependence_independent++;
4775   else
4776     dependence_stats.num_dependence_undetermined++;
4777   finalize_ddr_dependent (ddr, res);
4778   return false;
4779 }
4780 
4781 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4782 
4783 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,struct loop * loop_nest)4784 subscript_dependence_tester (struct data_dependence_relation *ddr,
4785 			     struct loop *loop_nest)
4786 {
4787   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4788     dependence_stats.num_dependence_dependent++;
4789 
4790   compute_subscript_distance (ddr);
4791   if (build_classic_dist_vector (ddr, loop_nest))
4792     build_classic_dir_vector (ddr);
4793 }
4794 
4795 /* Returns true when all the access functions of A are affine or
4796    constant with respect to LOOP_NEST.  */
4797 
4798 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const struct loop * loop_nest)4799 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4800 					   const struct loop *loop_nest)
4801 {
4802   unsigned int i;
4803   vec<tree> fns = DR_ACCESS_FNS (a);
4804   tree t;
4805 
4806   FOR_EACH_VEC_ELT (fns, i, t)
4807     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4808 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4809       return false;
4810 
4811   return true;
4812 }
4813 
4814 /* This computes the affine dependence relation between A and B with
4815    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4816    independence between two accesses, while CHREC_DONT_KNOW is used
4817    for representing the unknown relation.
4818 
4819    Note that it is possible to stop the computation of the dependence
4820    relation the first time we detect a CHREC_KNOWN element for a given
4821    subscript.  */
4822 
4823 void
compute_affine_dependence(struct data_dependence_relation * ddr,struct loop * loop_nest)4824 compute_affine_dependence (struct data_dependence_relation *ddr,
4825 			   struct loop *loop_nest)
4826 {
4827   struct data_reference *dra = DDR_A (ddr);
4828   struct data_reference *drb = DDR_B (ddr);
4829 
4830   if (dump_file && (dump_flags & TDF_DETAILS))
4831     {
4832       fprintf (dump_file, "(compute_affine_dependence\n");
4833       fprintf (dump_file, "  stmt_a: ");
4834       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4835       fprintf (dump_file, "  stmt_b: ");
4836       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4837     }
4838 
4839   /* Analyze only when the dependence relation is not yet known.  */
4840   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4841     {
4842       dependence_stats.num_dependence_tests++;
4843 
4844       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4845 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
4846 	subscript_dependence_tester (ddr, loop_nest);
4847 
4848       /* As a last case, if the dependence cannot be determined, or if
4849 	 the dependence is considered too difficult to determine, answer
4850 	 "don't know".  */
4851       else
4852 	{
4853 	  dependence_stats.num_dependence_undetermined++;
4854 
4855 	  if (dump_file && (dump_flags & TDF_DETAILS))
4856 	    {
4857 	      fprintf (dump_file, "Data ref a:\n");
4858 	      dump_data_reference (dump_file, dra);
4859 	      fprintf (dump_file, "Data ref b:\n");
4860 	      dump_data_reference (dump_file, drb);
4861 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4862 	    }
4863 	  finalize_ddr_dependent (ddr, chrec_dont_know);
4864 	}
4865     }
4866 
4867   if (dump_file && (dump_flags & TDF_DETAILS))
4868     {
4869       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4870 	fprintf (dump_file, ") -> no dependence\n");
4871       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4872 	fprintf (dump_file, ") -> dependence analysis failed\n");
4873       else
4874 	fprintf (dump_file, ")\n");
4875     }
4876 }
4877 
4878 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4879    the data references in DATAREFS, in the LOOP_NEST.  When
4880    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4881    relations.  Return true when successful, i.e. data references number
4882    is small enough to be handled.  */
4883 
4884 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)4885 compute_all_dependences (vec<data_reference_p> datarefs,
4886 			 vec<ddr_p> *dependence_relations,
4887 			 vec<loop_p> loop_nest,
4888 			 bool compute_self_and_rr)
4889 {
4890   struct data_dependence_relation *ddr;
4891   struct data_reference *a, *b;
4892   unsigned int i, j;
4893 
4894   if ((int) datarefs.length ()
4895       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4896     {
4897       struct data_dependence_relation *ddr;
4898 
4899       /* Insert a single relation into dependence_relations:
4900 	 chrec_dont_know.  */
4901       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4902       dependence_relations->safe_push (ddr);
4903       return false;
4904     }
4905 
4906   FOR_EACH_VEC_ELT (datarefs, i, a)
4907     for (j = i + 1; datarefs.iterate (j, &b); j++)
4908       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4909 	{
4910 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
4911 	  dependence_relations->safe_push (ddr);
4912           if (loop_nest.exists ())
4913    	    compute_affine_dependence (ddr, loop_nest[0]);
4914 	}
4915 
4916   if (compute_self_and_rr)
4917     FOR_EACH_VEC_ELT (datarefs, i, a)
4918       {
4919 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
4920 	dependence_relations->safe_push (ddr);
4921         if (loop_nest.exists ())
4922    	  compute_affine_dependence (ddr, loop_nest[0]);
4923       }
4924 
4925   return true;
4926 }
4927 
4928 /* Describes a location of a memory reference.  */
4929 
4930 struct data_ref_loc
4931 {
4932   /* The memory reference.  */
4933   tree ref;
4934 
4935   /* True if the memory reference is read.  */
4936   bool is_read;
4937 
4938   /* True if the data reference is conditional within the containing
4939      statement, i.e. if it might not occur even when the statement
4940      is executed and runs to completion.  */
4941   bool is_conditional_in_stmt;
4942 };
4943 
4944 
4945 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4946    true if STMT clobbers memory, false otherwise.  */
4947 
4948 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)4949 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4950 {
4951   bool clobbers_memory = false;
4952   data_ref_loc ref;
4953   tree op0, op1;
4954   enum gimple_code stmt_code = gimple_code (stmt);
4955 
4956   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4957      As we cannot model data-references to not spelled out
4958      accesses give up if they may occur.  */
4959   if (stmt_code == GIMPLE_CALL
4960       && !(gimple_call_flags (stmt) & ECF_CONST))
4961     {
4962       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4963       if (gimple_call_internal_p (stmt))
4964 	switch (gimple_call_internal_fn (stmt))
4965 	  {
4966 	  case IFN_GOMP_SIMD_LANE:
4967 	    {
4968 	      struct loop *loop = gimple_bb (stmt)->loop_father;
4969 	      tree uid = gimple_call_arg (stmt, 0);
4970 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
4971 	      if (loop == NULL
4972 		  || loop->simduid != SSA_NAME_VAR (uid))
4973 		clobbers_memory = true;
4974 	      break;
4975 	    }
4976 	  case IFN_MASK_LOAD:
4977 	  case IFN_MASK_STORE:
4978 	    break;
4979 	  default:
4980 	    clobbers_memory = true;
4981 	    break;
4982 	  }
4983       else
4984 	clobbers_memory = true;
4985     }
4986   else if (stmt_code == GIMPLE_ASM
4987 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4988 	       || gimple_vuse (stmt)))
4989     clobbers_memory = true;
4990 
4991   if (!gimple_vuse (stmt))
4992     return clobbers_memory;
4993 
4994   if (stmt_code == GIMPLE_ASSIGN)
4995     {
4996       tree base;
4997       op0 = gimple_assign_lhs (stmt);
4998       op1 = gimple_assign_rhs1 (stmt);
4999 
5000       if (DECL_P (op1)
5001 	  || (REFERENCE_CLASS_P (op1)
5002 	      && (base = get_base_address (op1))
5003 	      && TREE_CODE (base) != SSA_NAME
5004 	      && !is_gimple_min_invariant (base)))
5005 	{
5006 	  ref.ref = op1;
5007 	  ref.is_read = true;
5008 	  ref.is_conditional_in_stmt = false;
5009 	  references->safe_push (ref);
5010 	}
5011     }
5012   else if (stmt_code == GIMPLE_CALL)
5013     {
5014       unsigned i, n;
5015       tree ptr, type;
5016       unsigned int align;
5017 
5018       ref.is_read = false;
5019       if (gimple_call_internal_p (stmt))
5020 	switch (gimple_call_internal_fn (stmt))
5021 	  {
5022 	  case IFN_MASK_LOAD:
5023 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5024 	      break;
5025 	    ref.is_read = true;
5026 	    /* FALLTHRU */
5027 	  case IFN_MASK_STORE:
5028 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5029 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5030 	    if (ref.is_read)
5031 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5032 	    else
5033 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5034 	    if (TYPE_ALIGN (type) != align)
5035 	      type = build_aligned_type (type, align);
5036 	    ref.is_conditional_in_stmt = true;
5037 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5038 				   ptr);
5039 	    references->safe_push (ref);
5040 	    return false;
5041 	  default:
5042 	    break;
5043 	  }
5044 
5045       op0 = gimple_call_lhs (stmt);
5046       n = gimple_call_num_args (stmt);
5047       for (i = 0; i < n; i++)
5048 	{
5049 	  op1 = gimple_call_arg (stmt, i);
5050 
5051 	  if (DECL_P (op1)
5052 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5053 	    {
5054 	      ref.ref = op1;
5055 	      ref.is_read = true;
5056 	      ref.is_conditional_in_stmt = false;
5057 	      references->safe_push (ref);
5058 	    }
5059 	}
5060     }
5061   else
5062     return clobbers_memory;
5063 
5064   if (op0
5065       && (DECL_P (op0)
5066 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5067     {
5068       ref.ref = op0;
5069       ref.is_read = false;
5070       ref.is_conditional_in_stmt = false;
5071       references->safe_push (ref);
5072     }
5073   return clobbers_memory;
5074 }
5075 
5076 
5077 /* Returns true if the loop-nest has any data reference.  */
5078 
5079 bool
loop_nest_has_data_refs(loop_p loop)5080 loop_nest_has_data_refs (loop_p loop)
5081 {
5082   basic_block *bbs = get_loop_body (loop);
5083   auto_vec<data_ref_loc, 3> references;
5084 
5085   for (unsigned i = 0; i < loop->num_nodes; i++)
5086     {
5087       basic_block bb = bbs[i];
5088       gimple_stmt_iterator bsi;
5089 
5090       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5091 	{
5092 	  gimple *stmt = gsi_stmt (bsi);
5093 	  get_references_in_stmt (stmt, &references);
5094 	  if (references.length ())
5095 	    {
5096 	      free (bbs);
5097 	      return true;
5098 	    }
5099 	}
5100     }
5101   free (bbs);
5102   return false;
5103 }
5104 
5105 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5106    reference, returns false, otherwise returns true.  NEST is the outermost
5107    loop of the loop nest in which the references should be analyzed.  */
5108 
5109 bool
find_data_references_in_stmt(struct loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5110 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5111 			      vec<data_reference_p> *datarefs)
5112 {
5113   unsigned i;
5114   auto_vec<data_ref_loc, 2> references;
5115   data_ref_loc *ref;
5116   bool ret = true;
5117   data_reference_p dr;
5118 
5119   if (get_references_in_stmt (stmt, &references))
5120     return false;
5121 
5122   FOR_EACH_VEC_ELT (references, i, ref)
5123     {
5124       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5125 			    loop_containing_stmt (stmt), ref->ref,
5126 			    stmt, ref->is_read, ref->is_conditional_in_stmt);
5127       gcc_assert (dr != NULL);
5128       datarefs->safe_push (dr);
5129     }
5130 
5131   return ret;
5132 }
5133 
5134 /* Stores the data references in STMT to DATAREFS.  If there is an
5135    unanalyzable reference, returns false, otherwise returns true.
5136    NEST is the outermost loop of the loop nest in which the references
5137    should be instantiated, LOOP is the loop in which the references
5138    should be analyzed.  */
5139 
5140 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5141 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5142 				       vec<data_reference_p> *datarefs)
5143 {
5144   unsigned i;
5145   auto_vec<data_ref_loc, 2> references;
5146   data_ref_loc *ref;
5147   bool ret = true;
5148   data_reference_p dr;
5149 
5150   if (get_references_in_stmt (stmt, &references))
5151     return false;
5152 
5153   FOR_EACH_VEC_ELT (references, i, ref)
5154     {
5155       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5156 			    ref->is_conditional_in_stmt);
5157       gcc_assert (dr != NULL);
5158       datarefs->safe_push (dr);
5159     }
5160 
5161   return ret;
5162 }
5163 
5164 /* Search the data references in LOOP, and record the information into
5165    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5166    difficult case, returns NULL_TREE otherwise.  */
5167 
5168 tree
find_data_references_in_bb(struct loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5169 find_data_references_in_bb (struct loop *loop, basic_block bb,
5170                             vec<data_reference_p> *datarefs)
5171 {
5172   gimple_stmt_iterator bsi;
5173 
5174   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5175     {
5176       gimple *stmt = gsi_stmt (bsi);
5177 
5178       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5179         {
5180           struct data_reference *res;
5181           res = XCNEW (struct data_reference);
5182           datarefs->safe_push (res);
5183 
5184           return chrec_dont_know;
5185         }
5186     }
5187 
5188   return NULL_TREE;
5189 }
5190 
5191 /* Search the data references in LOOP, and record the information into
5192    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5193    difficult case, returns NULL_TREE otherwise.
5194 
5195    TODO: This function should be made smarter so that it can handle address
5196    arithmetic as if they were array accesses, etc.  */
5197 
5198 tree
find_data_references_in_loop(struct loop * loop,vec<data_reference_p> * datarefs)5199 find_data_references_in_loop (struct loop *loop,
5200 			      vec<data_reference_p> *datarefs)
5201 {
5202   basic_block bb, *bbs;
5203   unsigned int i;
5204 
5205   bbs = get_loop_body_in_dom_order (loop);
5206 
5207   for (i = 0; i < loop->num_nodes; i++)
5208     {
5209       bb = bbs[i];
5210 
5211       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5212         {
5213           free (bbs);
5214           return chrec_dont_know;
5215         }
5216     }
5217   free (bbs);
5218 
5219   return NULL_TREE;
5220 }
5221 
5222 /* Return the alignment in bytes that DRB is guaranteed to have at all
5223    times.  */
5224 
5225 unsigned int
dr_alignment(innermost_loop_behavior * drb)5226 dr_alignment (innermost_loop_behavior *drb)
5227 {
5228   /* Get the alignment of BASE_ADDRESS + INIT.  */
5229   unsigned int alignment = drb->base_alignment;
5230   unsigned int misalignment = (drb->base_misalignment
5231 			       + TREE_INT_CST_LOW (drb->init));
5232   if (misalignment != 0)
5233     alignment = MIN (alignment, misalignment & -misalignment);
5234 
5235   /* Cap it to the alignment of OFFSET.  */
5236   if (!integer_zerop (drb->offset))
5237     alignment = MIN (alignment, drb->offset_alignment);
5238 
5239   /* Cap it to the alignment of STEP.  */
5240   if (!integer_zerop (drb->step))
5241     alignment = MIN (alignment, drb->step_alignment);
5242 
5243   return alignment;
5244 }
5245 
5246 /* If BASE is a pointer-typed SSA name, try to find the object that it
5247    is based on.  Return this object X on success and store the alignment
5248    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5249 
5250 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)5251 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5252 {
5253   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5254     return NULL_TREE;
5255 
5256   gimple *def = SSA_NAME_DEF_STMT (base);
5257   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5258 
5259   /* Peel chrecs and record the minimum alignment preserved by
5260      all steps.  */
5261   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5262   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5263     {
5264       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5265       alignment = MIN (alignment, step_alignment);
5266       base = CHREC_LEFT (base);
5267     }
5268 
5269   /* Punt if the expression is too complicated to handle.  */
5270   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5271     return NULL_TREE;
5272 
5273   /* The only useful cases are those for which a dereference folds to something
5274      other than an INDIRECT_REF.  */
5275   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5276   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5277   if (!ref)
5278     return NULL_TREE;
5279 
5280   /* Analyze the base to which the steps we peeled were applied.  */
5281   poly_int64 bitsize, bitpos, bytepos;
5282   machine_mode mode;
5283   int unsignedp, reversep, volatilep;
5284   tree offset;
5285   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5286 			      &unsignedp, &reversep, &volatilep);
5287   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5288     return NULL_TREE;
5289 
5290   /* Restrict the alignment to that guaranteed by the offsets.  */
5291   unsigned int bytepos_alignment = known_alignment (bytepos);
5292   if (bytepos_alignment != 0)
5293     alignment = MIN (alignment, bytepos_alignment);
5294   if (offset)
5295     {
5296       unsigned int offset_alignment = highest_pow2_factor (offset);
5297       alignment = MIN (alignment, offset_alignment);
5298     }
5299 
5300   *alignment_out = alignment;
5301   return base;
5302 }
5303 
5304 /* Return the object whose alignment would need to be changed in order
5305    to increase the alignment of ADDR.  Store the maximum achievable
5306    alignment in *MAX_ALIGNMENT.  */
5307 
5308 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)5309 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5310 {
5311   tree base = get_base_for_alignment_1 (addr, max_alignment);
5312   if (base)
5313     return base;
5314 
5315   if (TREE_CODE (addr) == ADDR_EXPR)
5316     addr = TREE_OPERAND (addr, 0);
5317   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5318   return addr;
5319 }
5320 
5321 /* Recursive helper function.  */
5322 
5323 static bool
find_loop_nest_1(struct loop * loop,vec<loop_p> * loop_nest)5324 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5325 {
5326   /* Inner loops of the nest should not contain siblings.  Example:
5327      when there are two consecutive loops,
5328 
5329      | loop_0
5330      |   loop_1
5331      |     A[{0, +, 1}_1]
5332      |   endloop_1
5333      |   loop_2
5334      |     A[{0, +, 1}_2]
5335      |   endloop_2
5336      | endloop_0
5337 
5338      the dependence relation cannot be captured by the distance
5339      abstraction.  */
5340   if (loop->next)
5341     return false;
5342 
5343   loop_nest->safe_push (loop);
5344   if (loop->inner)
5345     return find_loop_nest_1 (loop->inner, loop_nest);
5346   return true;
5347 }
5348 
5349 /* Return false when the LOOP is not well nested.  Otherwise return
5350    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5351    contain the loops from the outermost to the innermost, as they will
5352    appear in the classic distance vector.  */
5353 
5354 bool
find_loop_nest(struct loop * loop,vec<loop_p> * loop_nest)5355 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5356 {
5357   loop_nest->safe_push (loop);
5358   if (loop->inner)
5359     return find_loop_nest_1 (loop->inner, loop_nest);
5360   return true;
5361 }
5362 
5363 /* Returns true when the data dependences have been computed, false otherwise.
5364    Given a loop nest LOOP, the following vectors are returned:
5365    DATAREFS is initialized to all the array elements contained in this loop,
5366    DEPENDENCE_RELATIONS contains the relations between the data references.
5367    Compute read-read and self relations if
5368    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5369 
5370 bool
compute_data_dependences_for_loop(struct loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)5371 compute_data_dependences_for_loop (struct loop *loop,
5372 				   bool compute_self_and_read_read_dependences,
5373 				   vec<loop_p> *loop_nest,
5374 				   vec<data_reference_p> *datarefs,
5375 				   vec<ddr_p> *dependence_relations)
5376 {
5377   bool res = true;
5378 
5379   memset (&dependence_stats, 0, sizeof (dependence_stats));
5380 
5381   /* If the loop nest is not well formed, or one of the data references
5382      is not computable, give up without spending time to compute other
5383      dependences.  */
5384   if (!loop
5385       || !find_loop_nest (loop, loop_nest)
5386       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5387       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5388 				   compute_self_and_read_read_dependences))
5389     res = false;
5390 
5391   if (dump_file && (dump_flags & TDF_STATS))
5392     {
5393       fprintf (dump_file, "Dependence tester statistics:\n");
5394 
5395       fprintf (dump_file, "Number of dependence tests: %d\n",
5396 	       dependence_stats.num_dependence_tests);
5397       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5398 	       dependence_stats.num_dependence_dependent);
5399       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5400 	       dependence_stats.num_dependence_independent);
5401       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5402 	       dependence_stats.num_dependence_undetermined);
5403 
5404       fprintf (dump_file, "Number of subscript tests: %d\n",
5405 	       dependence_stats.num_subscript_tests);
5406       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5407 	       dependence_stats.num_subscript_undetermined);
5408       fprintf (dump_file, "Number of same subscript function: %d\n",
5409 	       dependence_stats.num_same_subscript_function);
5410 
5411       fprintf (dump_file, "Number of ziv tests: %d\n",
5412 	       dependence_stats.num_ziv);
5413       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5414 	       dependence_stats.num_ziv_dependent);
5415       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5416 	       dependence_stats.num_ziv_independent);
5417       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5418 	       dependence_stats.num_ziv_unimplemented);
5419 
5420       fprintf (dump_file, "Number of siv tests: %d\n",
5421 	       dependence_stats.num_siv);
5422       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5423 	       dependence_stats.num_siv_dependent);
5424       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5425 	       dependence_stats.num_siv_independent);
5426       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5427 	       dependence_stats.num_siv_unimplemented);
5428 
5429       fprintf (dump_file, "Number of miv tests: %d\n",
5430 	       dependence_stats.num_miv);
5431       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5432 	       dependence_stats.num_miv_dependent);
5433       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5434 	       dependence_stats.num_miv_independent);
5435       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5436 	       dependence_stats.num_miv_unimplemented);
5437     }
5438 
5439   return res;
5440 }
5441 
5442 /* Free the memory used by a data dependence relation DDR.  */
5443 
5444 void
free_dependence_relation(struct data_dependence_relation * ddr)5445 free_dependence_relation (struct data_dependence_relation *ddr)
5446 {
5447   if (ddr == NULL)
5448     return;
5449 
5450   if (DDR_SUBSCRIPTS (ddr).exists ())
5451     free_subscripts (DDR_SUBSCRIPTS (ddr));
5452   DDR_DIST_VECTS (ddr).release ();
5453   DDR_DIR_VECTS (ddr).release ();
5454 
5455   free (ddr);
5456 }
5457 
5458 /* Free the memory used by the data dependence relations from
5459    DEPENDENCE_RELATIONS.  */
5460 
5461 void
free_dependence_relations(vec<ddr_p> dependence_relations)5462 free_dependence_relations (vec<ddr_p> dependence_relations)
5463 {
5464   unsigned int i;
5465   struct data_dependence_relation *ddr;
5466 
5467   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5468     if (ddr)
5469       free_dependence_relation (ddr);
5470 
5471   dependence_relations.release ();
5472 }
5473 
5474 /* Free the memory used by the data references from DATAREFS.  */
5475 
5476 void
free_data_refs(vec<data_reference_p> datarefs)5477 free_data_refs (vec<data_reference_p> datarefs)
5478 {
5479   unsigned int i;
5480   struct data_reference *dr;
5481 
5482   FOR_EACH_VEC_ELT (datarefs, i, dr)
5483     free_data_ref (dr);
5484   datarefs.release ();
5485 }
5486 
5487 /* Common routine implementing both dr_direction_indicator and
5488    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5489    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5490    Return the step as the indicator otherwise.  */
5491 
5492 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)5493 dr_step_indicator (struct data_reference *dr, int useful_min)
5494 {
5495   tree step = DR_STEP (dr);
5496   STRIP_NOPS (step);
5497   /* Look for cases where the step is scaled by a positive constant
5498      integer, which will often be the access size.  If the multiplication
5499      doesn't change the sign (due to overflow effects) then we can
5500      test the unscaled value instead.  */
5501   if (TREE_CODE (step) == MULT_EXPR
5502       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5503       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5504     {
5505       tree factor = TREE_OPERAND (step, 1);
5506       step = TREE_OPERAND (step, 0);
5507 
5508       /* Strip widening and truncating conversions as well as nops.  */
5509       if (CONVERT_EXPR_P (step)
5510 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5511 	step = TREE_OPERAND (step, 0);
5512       tree type = TREE_TYPE (step);
5513 
5514       /* Get the range of step values that would not cause overflow.  */
5515       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5516 			 / wi::to_widest (factor));
5517       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5518 			 / wi::to_widest (factor));
5519 
5520       /* Get the range of values that the unconverted step actually has.  */
5521       wide_int step_min, step_max;
5522       if (TREE_CODE (step) != SSA_NAME
5523 	  || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5524 	{
5525 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5526 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5527 	}
5528 
5529       /* Check whether the unconverted step has an acceptable range.  */
5530       signop sgn = TYPE_SIGN (type);
5531       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5532 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5533 	{
5534 	  if (wi::ge_p (step_min, useful_min, sgn))
5535 	    return ssize_int (useful_min);
5536 	  else if (wi::lt_p (step_max, 0, sgn))
5537 	    return ssize_int (-1);
5538 	  else
5539 	    return fold_convert (ssizetype, step);
5540 	}
5541     }
5542   return DR_STEP (dr);
5543 }
5544 
5545 /* Return a value that is negative iff DR has a negative step.  */
5546 
5547 tree
dr_direction_indicator(struct data_reference * dr)5548 dr_direction_indicator (struct data_reference *dr)
5549 {
5550   return dr_step_indicator (dr, 0);
5551 }
5552 
5553 /* Return a value that is zero iff DR has a zero step.  */
5554 
5555 tree
dr_zero_step_indicator(struct data_reference * dr)5556 dr_zero_step_indicator (struct data_reference *dr)
5557 {
5558   return dr_step_indicator (dr, 1);
5559 }
5560 
5561 /* Return true if DR is known to have a nonnegative (but possibly zero)
5562    step.  */
5563 
5564 bool
dr_known_forward_stride_p(struct data_reference * dr)5565 dr_known_forward_stride_p (struct data_reference *dr)
5566 {
5567   tree indicator = dr_direction_indicator (dr);
5568   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5569 				   fold_convert (ssizetype, indicator),
5570 				   ssize_int (0));
5571   return neg_step_val && integer_zerop (neg_step_val);
5572 }
5573