1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 #include "range-op.h"
101 #include "vr-values.h"
102 
103 static struct datadep_stats
104 {
105   int num_dependence_tests;
106   int num_dependence_dependent;
107   int num_dependence_independent;
108   int num_dependence_undetermined;
109 
110   int num_subscript_tests;
111   int num_subscript_undetermined;
112   int num_same_subscript_function;
113 
114   int num_ziv;
115   int num_ziv_independent;
116   int num_ziv_dependent;
117   int num_ziv_unimplemented;
118 
119   int num_siv;
120   int num_siv_independent;
121   int num_siv_dependent;
122   int num_siv_unimplemented;
123 
124   int num_miv;
125   int num_miv_independent;
126   int num_miv_dependent;
127   int num_miv_unimplemented;
128 } dependence_stats;
129 
130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
131 					   unsigned int, unsigned int,
132 					   class loop *);
133 /* Returns true iff A divides B.  */
134 
135 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)136 tree_fold_divides_p (const_tree a, const_tree b)
137 {
138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
141 }
142 
143 /* Returns true iff A divides B.  */
144 
145 static inline bool
int_divides_p(lambda_int a,lambda_int b)146 int_divides_p (lambda_int a, lambda_int b)
147 {
148   return ((b % a) == 0);
149 }
150 
151 /* Return true if reference REF contains a union access.  */
152 
153 static bool
ref_contains_union_access_p(tree ref)154 ref_contains_union_access_p (tree ref)
155 {
156   while (handled_component_p (ref))
157     {
158       ref = TREE_OPERAND (ref, 0);
159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
160 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
161 	return true;
162     }
163   return false;
164 }
165 
166 
167 
168 /* Dump into FILE all the data references from DATAREFS.  */
169 
170 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
172 {
173   unsigned int i;
174   struct data_reference *dr;
175 
176   FOR_EACH_VEC_ELT (datarefs, i, dr)
177     dump_data_reference (file, dr);
178 }
179 
180 /* Unified dump into FILE all the data references from DATAREFS.  */
181 
182 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)183 debug (vec<data_reference_p> &ref)
184 {
185   dump_data_references (stderr, ref);
186 }
187 
188 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)189 debug (vec<data_reference_p> *ptr)
190 {
191   if (ptr)
192     debug (*ptr);
193   else
194     fprintf (stderr, "<nil>\n");
195 }
196 
197 
198 /* Dump into STDERR all the data references from DATAREFS.  */
199 
200 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)201 debug_data_references (vec<data_reference_p> datarefs)
202 {
203   dump_data_references (stderr, datarefs);
204 }
205 
206 /* Print to STDERR the data_reference DR.  */
207 
208 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)209 debug_data_reference (struct data_reference *dr)
210 {
211   dump_data_reference (stderr, dr);
212 }
213 
214 /* Dump function for a DATA_REFERENCE structure.  */
215 
216 void
dump_data_reference(FILE * outf,struct data_reference * dr)217 dump_data_reference (FILE *outf,
218 		     struct data_reference *dr)
219 {
220   unsigned int i;
221 
222   fprintf (outf, "#(Data Ref: \n");
223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224   fprintf (outf, "#  stmt: ");
225   print_gimple_stmt (outf, DR_STMT (dr), 0);
226   fprintf (outf, "#  ref: ");
227   print_generic_stmt (outf, DR_REF (dr));
228   fprintf (outf, "#  base_object: ");
229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230 
231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232     {
233       fprintf (outf, "#  Access function %d: ", i);
234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235     }
236   fprintf (outf, "#)\n");
237 }
238 
239 /* Unified dump function for a DATA_REFERENCE structure.  */
240 
241 DEBUG_FUNCTION void
debug(data_reference & ref)242 debug (data_reference &ref)
243 {
244   dump_data_reference (stderr, &ref);
245 }
246 
247 DEBUG_FUNCTION void
debug(data_reference * ptr)248 debug (data_reference *ptr)
249 {
250   if (ptr)
251     debug (*ptr);
252   else
253     fprintf (stderr, "<nil>\n");
254 }
255 
256 
257 /* Dumps the affine function described by FN to the file OUTF.  */
258 
259 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)260 dump_affine_function (FILE *outf, affine_fn fn)
261 {
262   unsigned i;
263   tree coef;
264 
265   print_generic_expr (outf, fn[0], TDF_SLIM);
266   for (i = 1; fn.iterate (i, &coef); i++)
267     {
268       fprintf (outf, " + ");
269       print_generic_expr (outf, coef, TDF_SLIM);
270       fprintf (outf, " * x_%u", i);
271     }
272 }
273 
274 /* Dumps the conflict function CF to the file OUTF.  */
275 
276 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)277 dump_conflict_function (FILE *outf, conflict_function *cf)
278 {
279   unsigned i;
280 
281   if (cf->n == NO_DEPENDENCE)
282     fprintf (outf, "no dependence");
283   else if (cf->n == NOT_KNOWN)
284     fprintf (outf, "not known");
285   else
286     {
287       for (i = 0; i < cf->n; i++)
288 	{
289 	  if (i != 0)
290 	    fprintf (outf, " ");
291 	  fprintf (outf, "[");
292 	  dump_affine_function (outf, cf->fns[i]);
293 	  fprintf (outf, "]");
294 	}
295     }
296 }
297 
298 /* Dump function for a SUBSCRIPT structure.  */
299 
300 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)301 dump_subscript (FILE *outf, struct subscript *subscript)
302 {
303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304 
305   fprintf (outf, "\n (subscript \n");
306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
307   dump_conflict_function (outf, cf);
308   if (CF_NONTRIVIAL_P (cf))
309     {
310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
311       fprintf (outf, "\n  last_conflict: ");
312       print_generic_expr (outf, last_iteration);
313     }
314 
315   cf = SUB_CONFLICTS_IN_B (subscript);
316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
317   dump_conflict_function (outf, cf);
318   if (CF_NONTRIVIAL_P (cf))
319     {
320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
321       fprintf (outf, "\n  last_conflict: ");
322       print_generic_expr (outf, last_iteration);
323     }
324 
325   fprintf (outf, "\n  (Subscript distance: ");
326   print_generic_expr (outf, SUB_DISTANCE (subscript));
327   fprintf (outf, " ))\n");
328 }
329 
330 /* Print the classic direction vector DIRV to OUTF.  */
331 
332 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)333 print_direction_vector (FILE *outf,
334 			lambda_vector dirv,
335 			int length)
336 {
337   int eq;
338 
339   for (eq = 0; eq < length; eq++)
340     {
341       enum data_dependence_direction dir = ((enum data_dependence_direction)
342 					    dirv[eq]);
343 
344       switch (dir)
345 	{
346 	case dir_positive:
347 	  fprintf (outf, "    +");
348 	  break;
349 	case dir_negative:
350 	  fprintf (outf, "    -");
351 	  break;
352 	case dir_equal:
353 	  fprintf (outf, "    =");
354 	  break;
355 	case dir_positive_or_equal:
356 	  fprintf (outf, "   +=");
357 	  break;
358 	case dir_positive_or_negative:
359 	  fprintf (outf, "   +-");
360 	  break;
361 	case dir_negative_or_equal:
362 	  fprintf (outf, "   -=");
363 	  break;
364 	case dir_star:
365 	  fprintf (outf, "    *");
366 	  break;
367 	default:
368 	  fprintf (outf, "indep");
369 	  break;
370 	}
371     }
372   fprintf (outf, "\n");
373 }
374 
375 /* Print a vector of direction vectors.  */
376 
377 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 		   int length)
380 {
381   unsigned j;
382   lambda_vector v;
383 
384   FOR_EACH_VEC_ELT (dir_vects, j, v)
385     print_direction_vector (outf, v, length);
386 }
387 
388 /* Print out a vector VEC of length N to OUTFILE.  */
389 
390 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
392 {
393   int i;
394 
395   for (i = 0; i < n; i++)
396     fprintf (outfile, "%3d ", (int)vector[i]);
397   fprintf (outfile, "\n");
398 }
399 
400 /* Print a vector of distance vectors.  */
401 
402 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
404 		    int length)
405 {
406   unsigned j;
407   lambda_vector v;
408 
409   FOR_EACH_VEC_ELT (dist_vects, j, v)
410     print_lambda_vector (outf, v, length);
411 }
412 
413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
414 
415 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)416 dump_data_dependence_relation (FILE *outf,
417 			       struct data_dependence_relation *ddr)
418 {
419   struct data_reference *dra, *drb;
420 
421   fprintf (outf, "(Data Dep: \n");
422 
423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
424     {
425       if (ddr)
426 	{
427 	  dra = DDR_A (ddr);
428 	  drb = DDR_B (ddr);
429 	  if (dra)
430 	    dump_data_reference (outf, dra);
431 	  else
432 	    fprintf (outf, "    (nil)\n");
433 	  if (drb)
434 	    dump_data_reference (outf, drb);
435 	  else
436 	    fprintf (outf, "    (nil)\n");
437 	}
438       fprintf (outf, "    (don't know)\n)\n");
439       return;
440     }
441 
442   dra = DDR_A (ddr);
443   drb = DDR_B (ddr);
444   dump_data_reference (outf, dra);
445   dump_data_reference (outf, drb);
446 
447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
448     fprintf (outf, "    (no dependence)\n");
449 
450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
451     {
452       unsigned int i;
453       class loop *loopi;
454 
455       subscript *sub;
456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
457 	{
458 	  fprintf (outf, "  access_fn_A: ");
459 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
460 	  fprintf (outf, "  access_fn_B: ");
461 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
462 	  dump_subscript (outf, sub);
463 	}
464 
465       fprintf (outf, "  loop nest: (");
466       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
467 	fprintf (outf, "%d ", loopi->num);
468       fprintf (outf, ")\n");
469 
470       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
471 	{
472 	  fprintf (outf, "  distance_vector: ");
473 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
474 			       DDR_NB_LOOPS (ddr));
475 	}
476 
477       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
478 	{
479 	  fprintf (outf, "  direction_vector: ");
480 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
481 				  DDR_NB_LOOPS (ddr));
482 	}
483     }
484 
485   fprintf (outf, ")\n");
486 }
487 
488 /* Debug version.  */
489 
490 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)491 debug_data_dependence_relation (struct data_dependence_relation *ddr)
492 {
493   dump_data_dependence_relation (stderr, ddr);
494 }
495 
496 /* Dump into FILE all the dependence relations from DDRS.  */
497 
498 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)499 dump_data_dependence_relations (FILE *file,
500 				vec<ddr_p> ddrs)
501 {
502   unsigned int i;
503   struct data_dependence_relation *ddr;
504 
505   FOR_EACH_VEC_ELT (ddrs, i, ddr)
506     dump_data_dependence_relation (file, ddr);
507 }
508 
509 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)510 debug (vec<ddr_p> &ref)
511 {
512   dump_data_dependence_relations (stderr, ref);
513 }
514 
515 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)516 debug (vec<ddr_p> *ptr)
517 {
518   if (ptr)
519     debug (*ptr);
520   else
521     fprintf (stderr, "<nil>\n");
522 }
523 
524 
525 /* Dump to STDERR all the dependence relations from DDRS.  */
526 
527 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)528 debug_data_dependence_relations (vec<ddr_p> ddrs)
529 {
530   dump_data_dependence_relations (stderr, ddrs);
531 }
532 
533 /* Dumps the distance and direction vectors in FILE.  DDRS contains
534    the dependence relations, and VECT_SIZE is the size of the
535    dependence vectors, or in other words the number of loops in the
536    considered nest.  */
537 
538 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)539 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
540 {
541   unsigned int i, j;
542   struct data_dependence_relation *ddr;
543   lambda_vector v;
544 
545   FOR_EACH_VEC_ELT (ddrs, i, ddr)
546     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
547       {
548 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
549 	  {
550 	    fprintf (file, "DISTANCE_V (");
551 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
552 	    fprintf (file, ")\n");
553 	  }
554 
555 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
556 	  {
557 	    fprintf (file, "DIRECTION_V (");
558 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
559 	    fprintf (file, ")\n");
560 	  }
561       }
562 
563   fprintf (file, "\n\n");
564 }
565 
566 /* Dumps the data dependence relations DDRS in FILE.  */
567 
568 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)569 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
570 {
571   unsigned int i;
572   struct data_dependence_relation *ddr;
573 
574   FOR_EACH_VEC_ELT (ddrs, i, ddr)
575     dump_data_dependence_relation (file, ddr);
576 
577   fprintf (file, "\n\n");
578 }
579 
580 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)581 debug_ddrs (vec<ddr_p> ddrs)
582 {
583   dump_ddrs (stderr, ddrs);
584 }
585 
586 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
587    OP0 CODE OP1, where:
588 
589    - OP0 CODE OP1 has integral type TYPE
590    - the range of OP0 is given by OP0_RANGE and
591    - the range of OP1 is given by OP1_RANGE.
592 
593    Independently of RESULT_RANGE, try to compute:
594 
595      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
596 	     - (sizetype) (OP0 CODE OP1)
597 
598    as a constant and subtract DELTA from the ssizetype constant in *OFF.
599    Return true on success, or false if DELTA is not known at compile time.
600 
601    Truncation and sign changes are known to distribute over CODE, i.e.
602 
603      (itype) (A CODE B) == (itype) A CODE (itype) B
604 
605    for any integral type ITYPE whose precision is no greater than the
606    precision of A and B.  */
607 
608 static bool
compute_distributive_range(tree type,value_range & op0_range,tree_code code,value_range & op1_range,tree * off,value_range * result_range)609 compute_distributive_range (tree type, value_range &op0_range,
610 			    tree_code code, value_range &op1_range,
611 			    tree *off, value_range *result_range)
612 {
613   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
614   if (result_range)
615     {
616       range_operator *op = range_op_handler (code, type);
617       op->fold_range (*result_range, type, op0_range, op1_range);
618     }
619 
620   /* The distributive property guarantees that if TYPE is no narrower
621      than SIZETYPE,
622 
623        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
624 
625      and so we can treat DELTA as zero.  */
626   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
627     return true;
628 
629   /* If overflow is undefined, we can assume that:
630 
631        X == (ssizetype) OP0 CODE (ssizetype) OP1
632 
633      is within the range of TYPE, i.e.:
634 
635        X == (ssizetype) (TYPE) X
636 
637      Distributing the (TYPE) truncation over X gives:
638 
639        X == (ssizetype) (OP0 CODE OP1)
640 
641      Casting both sides to sizetype and distributing the sizetype cast
642      over X gives:
643 
644        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
645 
646      and so we can treat DELTA as zero.  */
647   if (TYPE_OVERFLOW_UNDEFINED (type))
648     return true;
649 
650   /* Compute the range of:
651 
652        (ssizetype) OP0 CODE (ssizetype) OP1
653 
654      The distributive property guarantees that this has the same bitpattern as:
655 
656        (sizetype) OP0 CODE (sizetype) OP1
657 
658      but its range is more conducive to analysis.  */
659   range_cast (op0_range, ssizetype);
660   range_cast (op1_range, ssizetype);
661   value_range wide_range;
662   range_operator *op = range_op_handler (code, ssizetype);
663   bool saved_flag_wrapv = flag_wrapv;
664   flag_wrapv = 1;
665   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
666   flag_wrapv = saved_flag_wrapv;
667   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
668     return false;
669 
670   wide_int lb = wide_range.lower_bound ();
671   wide_int ub = wide_range.upper_bound ();
672 
673   /* Calculate the number of times that each end of the range overflows or
674      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
675   unsigned int precision = TYPE_PRECISION (type);
676   if (!TYPE_UNSIGNED (type))
677     {
678       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
679       lb -= type_min;
680       ub -= type_min;
681     }
682   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
683   lb &= upper_bits;
684   ub &= upper_bits;
685   if (lb != ub)
686     return false;
687 
688   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
689      negative values indicating underflow.  The low PRECISION bits of LB
690      are clear, so DELTA is therefore LB (== UB).  */
691   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
692   return true;
693 }
694 
695 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
696    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
697    FROM_TYPE are integral types.  */
698 
699 static bool
nop_conversion_for_offset_p(tree to_type,tree from_type,value_range & range)700 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
701 {
702   gcc_assert (INTEGRAL_TYPE_P (to_type)
703 	      && INTEGRAL_TYPE_P (from_type)
704 	      && !TYPE_OVERFLOW_TRAPS (to_type)
705 	      && !TYPE_OVERFLOW_TRAPS (from_type));
706 
707   /* Converting to something no narrower than sizetype and then to sizetype
708      is equivalent to converting directly to sizetype.  */
709   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
710     return true;
711 
712   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
713   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
714       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
715     return true;
716 
717   /* For narrowing conversions, we could in principle test whether
718      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
719      and apply a constant adjustment.
720 
721      For other conversions (which involve a sign change) we could
722      check that the signs are always equal, and apply a constant
723      adjustment if the signs are negative.
724 
725      However, both cases should be rare.  */
726   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
727 			    TYPE_SIGN (to_type));
728 }
729 
730 static void
731 split_constant_offset (tree type, tree *var, tree *off,
732 		       value_range *result_range,
733 		       hash_map<tree, std::pair<tree, tree> > &cache,
734 		       unsigned *limit);
735 
736 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
737    try to express OP0 CODE OP1 as:
738 
739      POINTER_PLUS <*VAR, (sizetype) *OFF>
740 
741    where:
742 
743    - *VAR has type TYPE
744    - *OFF is a constant of type ssizetype.
745 
746    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
747 
748      *VAR + (sizetype) *OFF
749 
750    where:
751 
752    - *VAR has type sizetype
753    - *OFF is a constant of type ssizetype.
754 
755    In both cases, OP0 CODE OP1 has type TYPE.
756 
757    Return true on success.  A false return value indicates that we can't
758    do better than set *OFF to zero.
759 
760    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
761    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
762 
763    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
764    visited.  LIMIT counts down the number of SSA names that we are
765    allowed to process before giving up.  */
766 
767 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off,value_range * result_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)768 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
769 			 tree *var, tree *off, value_range *result_range,
770 			 hash_map<tree, std::pair<tree, tree> > &cache,
771 			 unsigned *limit)
772 {
773   tree var0, var1;
774   tree off0, off1;
775   value_range op0_range, op1_range;
776 
777   *var = NULL_TREE;
778   *off = NULL_TREE;
779 
780   switch (code)
781     {
782     case INTEGER_CST:
783       *var = size_int (0);
784       *off = fold_convert (ssizetype, op0);
785       if (result_range)
786 	result_range->set (op0, op0);
787       return true;
788 
789     case POINTER_PLUS_EXPR:
790       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
791       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
792       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
793       *off = size_binop (PLUS_EXPR, off0, off1);
794       return true;
795 
796     case PLUS_EXPR:
797     case MINUS_EXPR:
798       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
799       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
800       *off = size_binop (code, off0, off1);
801       if (!compute_distributive_range (type, op0_range, code, op1_range,
802 				       off, result_range))
803 	return false;
804       *var = fold_build2 (code, sizetype, var0, var1);
805       return true;
806 
807     case MULT_EXPR:
808       if (TREE_CODE (op1) != INTEGER_CST)
809 	return false;
810 
811       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
812       op1_range.set (op1, op1);
813       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
814       if (!compute_distributive_range (type, op0_range, code, op1_range,
815 				       off, result_range))
816 	return false;
817       *var = fold_build2 (MULT_EXPR, sizetype, var0,
818 			  fold_convert (sizetype, op1));
819       return true;
820 
821     case ADDR_EXPR:
822       {
823 	tree base, poffset;
824 	poly_int64 pbitsize, pbitpos, pbytepos;
825 	machine_mode pmode;
826 	int punsignedp, preversep, pvolatilep;
827 
828 	op0 = TREE_OPERAND (op0, 0);
829 	base
830 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
831 				 &punsignedp, &preversep, &pvolatilep);
832 
833 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
834 	  return false;
835 	base = build_fold_addr_expr (base);
836 	off0 = ssize_int (pbytepos);
837 
838 	if (poffset)
839 	  {
840 	    split_constant_offset (poffset, &poffset, &off1, nullptr,
841 				   cache, limit);
842 	    off0 = size_binop (PLUS_EXPR, off0, off1);
843 	    base = fold_build_pointer_plus (base, poffset);
844 	  }
845 
846 	var0 = fold_convert (type, base);
847 
848 	/* If variable length types are involved, punt, otherwise casts
849 	   might be converted into ARRAY_REFs in gimplify_conversion.
850 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
851 	   possibly no longer appears in current GIMPLE, might resurface.
852 	   This perhaps could run
853 	   if (CONVERT_EXPR_P (var0))
854 	     {
855 	       gimplify_conversion (&var0);
856 	       // Attempt to fill in any within var0 found ARRAY_REF's
857 	       // element size from corresponding op embedded ARRAY_REF,
858 	       // if unsuccessful, just punt.
859 	     }  */
860 	while (POINTER_TYPE_P (type))
861 	  type = TREE_TYPE (type);
862 	if (int_size_in_bytes (type) < 0)
863 	  return false;
864 
865 	*var = var0;
866 	*off = off0;
867 	return true;
868       }
869 
870     case SSA_NAME:
871       {
872 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
873 	  return false;
874 
875 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
876 	enum tree_code subcode;
877 
878 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
879 	  return false;
880 
881 	subcode = gimple_assign_rhs_code (def_stmt);
882 
883 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
884 	bool use_cache = false;
885 	if (!has_single_use (op0)
886 	    && (subcode == POINTER_PLUS_EXPR
887 		|| subcode == PLUS_EXPR
888 		|| subcode == MINUS_EXPR
889 		|| subcode == MULT_EXPR
890 		|| subcode == ADDR_EXPR
891 		|| CONVERT_EXPR_CODE_P (subcode)))
892 	  {
893 	    use_cache = true;
894 	    bool existed;
895 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
896 	    if (existed)
897 	      {
898 		if (integer_zerop (e.second))
899 		  return false;
900 		*var = e.first;
901 		*off = e.second;
902 		/* The caller sets the range in this case.  */
903 		return true;
904 	      }
905 	    e = std::make_pair (op0, ssize_int (0));
906 	  }
907 
908 	if (*limit == 0)
909 	  return false;
910 	--*limit;
911 
912 	var0 = gimple_assign_rhs1 (def_stmt);
913 	var1 = gimple_assign_rhs2 (def_stmt);
914 
915 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
916 					    var, off, nullptr, cache, limit);
917 	if (res && use_cache)
918 	  *cache.get (op0) = std::make_pair (*var, *off);
919 	/* The caller sets the range in this case.  */
920 	return res;
921       }
922     CASE_CONVERT:
923       {
924 	/* We can only handle the following conversions:
925 
926 	   - Conversions from one pointer type to another pointer type.
927 
928 	   - Conversions from one non-trapping integral type to another
929 	     non-trapping integral type.  In this case, the recursive
930 	     call makes sure that:
931 
932 	       (sizetype) OP0
933 
934 	     can be expressed as a sizetype operation involving VAR and OFF,
935 	     and all we need to do is check whether:
936 
937 	       (sizetype) OP0 == (sizetype) (TYPE) OP0
938 
939 	   - Conversions from a non-trapping sizetype-size integral type to
940 	     a like-sized pointer type.  In this case, the recursive call
941 	     makes sure that:
942 
943 	       (sizetype) OP0 == *VAR + (sizetype) *OFF
944 
945 	     and we can convert that to:
946 
947 	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
948 
949 	   - Conversions from a sizetype-sized pointer type to a like-sized
950 	     non-trapping integral type.  In this case, the recursive call
951 	     makes sure that:
952 
953 	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
954 
955 	     where the POINTER_PLUS and *VAR have the same precision as
956 	     TYPE (and the same precision as sizetype).  Then:
957 
958 	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
959 	tree itype = TREE_TYPE (op0);
960 	if ((POINTER_TYPE_P (itype)
961 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
962 	    && (POINTER_TYPE_P (type)
963 		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
964 	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
965 		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
966 		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
967 	  {
968 	    if (POINTER_TYPE_P (type))
969 	      {
970 		split_constant_offset (op0, var, off, nullptr, cache, limit);
971 		*var = fold_convert (type, *var);
972 	      }
973 	    else if (POINTER_TYPE_P (itype))
974 	      {
975 		split_constant_offset (op0, var, off, nullptr, cache, limit);
976 		*var = fold_convert (sizetype, *var);
977 	      }
978 	    else
979 	      {
980 		split_constant_offset (op0, var, off, &op0_range,
981 				       cache, limit);
982 		if (!nop_conversion_for_offset_p (type, itype, op0_range))
983 		  return false;
984 		if (result_range)
985 		  {
986 		    *result_range = op0_range;
987 		    range_cast (*result_range, type);
988 		  }
989 	      }
990 	    return true;
991 	  }
992 	return false;
993       }
994 
995     default:
996       return false;
997     }
998 }
999 
1000 /* If EXP has pointer type, try to express it as:
1001 
1002      POINTER_PLUS <*VAR, (sizetype) *OFF>
1003 
1004    where:
1005 
1006    - *VAR has the same type as EXP
1007    - *OFF is a constant of type ssizetype.
1008 
1009    If EXP has an integral type, try to express (sizetype) EXP as:
1010 
1011      *VAR + (sizetype) *OFF
1012 
1013    where:
1014 
1015    - *VAR has type sizetype
1016    - *OFF is a constant of type ssizetype.
1017 
1018    If EXP_RANGE is nonnull, set it to the range of EXP.
1019 
1020    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1021    visited.  LIMIT counts down the number of SSA names that we are
1022    allowed to process before giving up.  */
1023 
1024 static void
split_constant_offset(tree exp,tree * var,tree * off,value_range * exp_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)1025 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1026 		       hash_map<tree, std::pair<tree, tree> > &cache,
1027 		       unsigned *limit)
1028 {
1029   tree type = TREE_TYPE (exp), op0, op1;
1030   enum tree_code code;
1031 
1032   code = TREE_CODE (exp);
1033   if (exp_range)
1034     {
1035       *exp_range = type;
1036       if (code == SSA_NAME)
1037 	{
1038 	  wide_int var_min, var_max;
1039 	  value_range_kind vr_kind = get_range_info (exp, &var_min, &var_max);
1040 	  wide_int var_nonzero = get_nonzero_bits (exp);
1041 	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1042 						       &var_min, &var_max,
1043 						       var_nonzero,
1044 						       TYPE_SIGN (type));
1045 	  if (vr_kind == VR_RANGE)
1046 	    *exp_range = value_range (type, var_min, var_max);
1047 	}
1048     }
1049 
1050   if (!tree_is_chrec (exp)
1051       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052     {
1053       extract_ops_from_tree (exp, &code, &op0, &op1);
1054       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055 				   exp_range, cache, limit))
1056 	return;
1057     }
1058 
1059   *var = exp;
1060   if (INTEGRAL_TYPE_P (type))
1061     *var = fold_convert (sizetype, *var);
1062   *off = ssize_int (0);
1063   if (exp_range && code != SSA_NAME)
1064     {
1065       wide_int var_min, var_max;
1066       if (determine_value_range (exp, &var_min, &var_max) == VR_RANGE)
1067 	*exp_range = value_range (type, var_min, var_max);
1068     }
1069 }
1070 
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1072    type as EXP while OFF has type ssizetype.  */
1073 
1074 void
split_constant_offset(tree exp,tree * var,tree * off)1075 split_constant_offset (tree exp, tree *var, tree *off)
1076 {
1077   unsigned limit = param_ssa_name_def_chain_limit;
1078   static hash_map<tree, std::pair<tree, tree> > *cache;
1079   if (!cache)
1080     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082   *var = fold_convert (TREE_TYPE (exp), *var);
1083   cache->empty ();
1084 }
1085 
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087    casts, and with type of pointer to the object).  */
1088 
1089 static tree
canonicalize_base_object_address(tree addr)1090 canonicalize_base_object_address (tree addr)
1091 {
1092   tree orig = addr;
1093 
1094   STRIP_NOPS (addr);
1095 
1096   /* The base address may be obtained by casting from integer, in that case
1097      keep the cast.  */
1098   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099     return orig;
1100 
1101   if (TREE_CODE (addr) != ADDR_EXPR)
1102     return addr;
1103 
1104   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1105 }
1106 
1107 /* Analyze the behavior of memory reference REF within STMT.
1108    There are two modes:
1109 
1110    - BB analysis.  In this case we simply split the address into base,
1111      init and offset components, without reference to any containing loop.
1112      The resulting base and offset are general expressions and they can
1113      vary arbitrarily from one iteration of the containing loop to the next.
1114      The step is always zero.
1115 
1116    - loop analysis.  In this case we analyze the reference both wrt LOOP
1117      and on the basis that the reference occurs (is "used") in LOOP;
1118      see the comment above analyze_scalar_evolution_in_loop for more
1119      information about this distinction.  The base, init, offset and
1120      step fields are all invariant in LOOP.
1121 
1122    Perform BB analysis if LOOP is null, or if LOOP is the function's
1123    dummy outermost loop.  In other cases perform loop analysis.
1124 
1125    Return true if the analysis succeeded and store the results in DRB if so.
1126    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1127 
1128 opt_result
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,class loop * loop,const gimple * stmt)1129 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130 		      class loop *loop, const gimple *stmt)
1131 {
1132   poly_int64 pbitsize, pbitpos;
1133   tree base, poffset;
1134   machine_mode pmode;
1135   int punsignedp, preversep, pvolatilep;
1136   affine_iv base_iv, offset_iv;
1137   tree init, dinit, step;
1138   bool in_loop = (loop && loop->num);
1139 
1140   if (dump_file && (dump_flags & TDF_DETAILS))
1141     fprintf (dump_file, "analyze_innermost: ");
1142 
1143   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144 			      &punsignedp, &preversep, &pvolatilep);
1145   gcc_assert (base != NULL_TREE);
1146 
1147   poly_int64 pbytepos;
1148   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149     return opt_result::failure_at (stmt,
1150 				   "failed: bit offset alignment.\n");
1151 
1152   if (preversep)
1153     return opt_result::failure_at (stmt,
1154 				   "failed: reverse storage order.\n");
1155 
1156   /* Calculate the alignment and misalignment for the inner reference.  */
1157   unsigned int HOST_WIDE_INT bit_base_misalignment;
1158   unsigned int bit_base_alignment;
1159   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160 
1161   /* There are no bitfield references remaining in BASE, so the values
1162      we got back must be whole bytes.  */
1163   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
1165   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167 
1168   if (TREE_CODE (base) == MEM_REF)
1169     {
1170       if (!integer_zerop (TREE_OPERAND (base, 1)))
1171 	{
1172 	  /* Subtract MOFF from the base and add it to POFFSET instead.
1173 	     Adjust the misalignment to reflect the amount we subtracted.  */
1174 	  poly_offset_int moff = mem_ref_offset (base);
1175 	  base_misalignment -= moff.force_shwi ();
1176 	  tree mofft = wide_int_to_tree (sizetype, moff);
1177 	  if (!poffset)
1178 	    poffset = mofft;
1179 	  else
1180 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181 	}
1182       base = TREE_OPERAND (base, 0);
1183     }
1184   else
1185     base = build_fold_addr_expr (base);
1186 
1187   if (in_loop)
1188     {
1189       if (!simple_iv (loop, loop, base, &base_iv, true))
1190 	return opt_result::failure_at
1191 	  (stmt, "failed: evolution of base is not affine.\n");
1192     }
1193   else
1194     {
1195       base_iv.base = base;
1196       base_iv.step = ssize_int (0);
1197       base_iv.no_overflow = true;
1198     }
1199 
1200   if (!poffset)
1201     {
1202       offset_iv.base = ssize_int (0);
1203       offset_iv.step = ssize_int (0);
1204     }
1205   else
1206     {
1207       if (!in_loop)
1208         {
1209           offset_iv.base = poffset;
1210           offset_iv.step = ssize_int (0);
1211         }
1212       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213 	return opt_result::failure_at
1214 	  (stmt, "failed: evolution of offset is not affine.\n");
1215     }
1216 
1217   init = ssize_int (pbytepos);
1218 
1219   /* Subtract any constant component from the base and add it to INIT instead.
1220      Adjust the misalignment to reflect the amount we subtracted.  */
1221   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1222   init = size_binop (PLUS_EXPR, init, dinit);
1223   base_misalignment -= TREE_INT_CST_LOW (dinit);
1224 
1225   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1226   init = size_binop (PLUS_EXPR, init, dinit);
1227 
1228   step = size_binop (PLUS_EXPR,
1229 		     fold_convert (ssizetype, base_iv.step),
1230 		     fold_convert (ssizetype, offset_iv.step));
1231 
1232   base = canonicalize_base_object_address (base_iv.base);
1233 
1234   /* See if get_pointer_alignment can guarantee a higher alignment than
1235      the one we calculated above.  */
1236   unsigned int HOST_WIDE_INT alt_misalignment;
1237   unsigned int alt_alignment;
1238   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1239 
1240   /* As above, these values must be whole bytes.  */
1241   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242 	      && alt_misalignment % BITS_PER_UNIT == 0);
1243   alt_alignment /= BITS_PER_UNIT;
1244   alt_misalignment /= BITS_PER_UNIT;
1245 
1246   if (base_alignment < alt_alignment)
1247     {
1248       base_alignment = alt_alignment;
1249       base_misalignment = alt_misalignment;
1250     }
1251 
1252   drb->base_address = base;
1253   drb->offset = fold_convert (ssizetype, offset_iv.base);
1254   drb->init = init;
1255   drb->step = step;
1256   if (known_misalignment (base_misalignment, base_alignment,
1257 			  &drb->base_misalignment))
1258     drb->base_alignment = base_alignment;
1259   else
1260     {
1261       drb->base_alignment = known_alignment (base_misalignment);
1262       drb->base_misalignment = 0;
1263     }
1264   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265   drb->step_alignment = highest_pow2_factor (step);
1266 
1267   if (dump_file && (dump_flags & TDF_DETAILS))
1268     fprintf (dump_file, "success.\n");
1269 
1270   return opt_result::success ();
1271 }
1272 
1273 /* Return true if OP is a valid component reference for a DR access
1274    function.  This accepts a subset of what handled_component_p accepts.  */
1275 
1276 static bool
access_fn_component_p(tree op)1277 access_fn_component_p (tree op)
1278 {
1279   switch (TREE_CODE (op))
1280     {
1281     case REALPART_EXPR:
1282     case IMAGPART_EXPR:
1283     case ARRAY_REF:
1284       return true;
1285 
1286     case COMPONENT_REF:
1287       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1288 
1289     default:
1290       return false;
1291     }
1292 }
1293 
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1295    as base.  */
1296 
1297 static bool
base_supports_access_fn_components_p(tree base)1298 base_supports_access_fn_components_p (tree base)
1299 {
1300   switch (TREE_CODE (TREE_TYPE (base)))
1301     {
1302     case COMPLEX_TYPE:
1303     case ARRAY_TYPE:
1304     case RECORD_TYPE:
1305       return true;
1306     default:
1307       return false;
1308     }
1309 }
1310 
1311 /* Determines the base object and the list of indices of memory reference
1312    DR, analyzed in LOOP and instantiated before NEST.  */
1313 
1314 static void
dr_analyze_indices(struct data_reference * dr,edge nest,loop_p loop)1315 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1316 {
1317   vec<tree> access_fns = vNULL;
1318   tree ref, op;
1319   tree base, off, access_fn;
1320 
1321   /* If analyzing a basic-block there are no indices to analyze
1322      and thus no access functions.  */
1323   if (!nest)
1324     {
1325       DR_BASE_OBJECT (dr) = DR_REF (dr);
1326       DR_ACCESS_FNS (dr).create (0);
1327       return;
1328     }
1329 
1330   ref = DR_REF (dr);
1331 
1332   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1333      into a two element array with a constant index.  The base is
1334      then just the immediate underlying object.  */
1335   if (TREE_CODE (ref) == REALPART_EXPR)
1336     {
1337       ref = TREE_OPERAND (ref, 0);
1338       access_fns.safe_push (integer_zero_node);
1339     }
1340   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1341     {
1342       ref = TREE_OPERAND (ref, 0);
1343       access_fns.safe_push (integer_one_node);
1344     }
1345 
1346   /* Analyze access functions of dimensions we know to be independent.
1347      The list of component references handled here should be kept in
1348      sync with access_fn_component_p.  */
1349   while (handled_component_p (ref))
1350     {
1351       if (TREE_CODE (ref) == ARRAY_REF)
1352 	{
1353 	  op = TREE_OPERAND (ref, 1);
1354 	  access_fn = analyze_scalar_evolution (loop, op);
1355 	  access_fn = instantiate_scev (nest, loop, access_fn);
1356 	  access_fns.safe_push (access_fn);
1357 	}
1358       else if (TREE_CODE (ref) == COMPONENT_REF
1359 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1360 	{
1361 	  /* For COMPONENT_REFs of records (but not unions!) use the
1362 	     FIELD_DECL offset as constant access function so we can
1363 	     disambiguate a[i].f1 and a[i].f2.  */
1364 	  tree off = component_ref_field_offset (ref);
1365 	  off = size_binop (PLUS_EXPR,
1366 			    size_binop (MULT_EXPR,
1367 					fold_convert (bitsizetype, off),
1368 					bitsize_int (BITS_PER_UNIT)),
1369 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1370 	  access_fns.safe_push (off);
1371 	}
1372       else
1373 	/* If we have an unhandled component we could not translate
1374 	   to an access function stop analyzing.  We have determined
1375 	   our base object in this case.  */
1376 	break;
1377 
1378       ref = TREE_OPERAND (ref, 0);
1379     }
1380 
1381   /* If the address operand of a MEM_REF base has an evolution in the
1382      analyzed nest, add it as an additional independent access-function.  */
1383   if (TREE_CODE (ref) == MEM_REF)
1384     {
1385       op = TREE_OPERAND (ref, 0);
1386       access_fn = analyze_scalar_evolution (loop, op);
1387       access_fn = instantiate_scev (nest, loop, access_fn);
1388       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1389 	{
1390 	  tree orig_type;
1391 	  tree memoff = TREE_OPERAND (ref, 1);
1392 	  base = initial_condition (access_fn);
1393 	  orig_type = TREE_TYPE (base);
1394 	  STRIP_USELESS_TYPE_CONVERSION (base);
1395 	  split_constant_offset (base, &base, &off);
1396 	  STRIP_USELESS_TYPE_CONVERSION (base);
1397 	  /* Fold the MEM_REF offset into the evolutions initial
1398 	     value to make more bases comparable.  */
1399 	  if (!integer_zerop (memoff))
1400 	    {
1401 	      off = size_binop (PLUS_EXPR, off,
1402 				fold_convert (ssizetype, memoff));
1403 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1404 	    }
1405 	  /* Adjust the offset so it is a multiple of the access type
1406 	     size and thus we separate bases that can possibly be used
1407 	     to produce partial overlaps (which the access_fn machinery
1408 	     cannot handle).  */
1409 	  wide_int rem;
1410 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1411 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1412 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1413 	    rem = wi::mod_trunc
1414 	      (wi::to_wide (off),
1415 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1416 	       SIGNED);
1417 	  else
1418 	    /* If we can't compute the remainder simply force the initial
1419 	       condition to zero.  */
1420 	    rem = wi::to_wide (off);
1421 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1422 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1423 	  /* And finally replace the initial condition.  */
1424 	  access_fn = chrec_replace_initial_condition
1425 	      (access_fn, fold_convert (orig_type, off));
1426 	  /* ???  This is still not a suitable base object for
1427 	     dr_may_alias_p - the base object needs to be an
1428 	     access that covers the object as whole.  With
1429 	     an evolution in the pointer this cannot be
1430 	     guaranteed.
1431 	     As a band-aid, mark the access so we can special-case
1432 	     it in dr_may_alias_p.  */
1433 	  tree old = ref;
1434 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1435 				 MEM_REF, TREE_TYPE (ref),
1436 				 base, memoff);
1437 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1438 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1439 	  DR_UNCONSTRAINED_BASE (dr) = true;
1440 	  access_fns.safe_push (access_fn);
1441 	}
1442     }
1443   else if (DECL_P (ref))
1444     {
1445       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1446       ref = build2 (MEM_REF, TREE_TYPE (ref),
1447 		    build_fold_addr_expr (ref),
1448 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1449     }
1450 
1451   DR_BASE_OBJECT (dr) = ref;
1452   DR_ACCESS_FNS (dr) = access_fns;
1453 }
1454 
1455 /* Extracts the alias analysis information from the memory reference DR.  */
1456 
1457 static void
dr_analyze_alias(struct data_reference * dr)1458 dr_analyze_alias (struct data_reference *dr)
1459 {
1460   tree ref = DR_REF (dr);
1461   tree base = get_base_address (ref), addr;
1462 
1463   if (INDIRECT_REF_P (base)
1464       || TREE_CODE (base) == MEM_REF)
1465     {
1466       addr = TREE_OPERAND (base, 0);
1467       if (TREE_CODE (addr) == SSA_NAME)
1468 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1469     }
1470 }
1471 
1472 /* Frees data reference DR.  */
1473 
1474 void
free_data_ref(data_reference_p dr)1475 free_data_ref (data_reference_p dr)
1476 {
1477   DR_ACCESS_FNS (dr).release ();
1478   free (dr);
1479 }
1480 
1481 /* Analyze memory reference MEMREF, which is accessed in STMT.
1482    The reference is a read if IS_READ is true, otherwise it is a write.
1483    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1484    within STMT, i.e. that it might not occur even if STMT is executed
1485    and runs to completion.
1486 
1487    Return the data_reference description of MEMREF.  NEST is the outermost
1488    loop in which the reference should be instantiated, LOOP is the loop
1489    in which the data reference should be analyzed.  */
1490 
1491 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1492 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1493 		 bool is_read, bool is_conditional_in_stmt)
1494 {
1495   struct data_reference *dr;
1496 
1497   if (dump_file && (dump_flags & TDF_DETAILS))
1498     {
1499       fprintf (dump_file, "Creating dr for ");
1500       print_generic_expr (dump_file, memref, TDF_SLIM);
1501       fprintf (dump_file, "\n");
1502     }
1503 
1504   dr = XCNEW (struct data_reference);
1505   DR_STMT (dr) = stmt;
1506   DR_REF (dr) = memref;
1507   DR_IS_READ (dr) = is_read;
1508   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1509 
1510   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1511 			nest != NULL ? loop : NULL, stmt);
1512   dr_analyze_indices (dr, nest, loop);
1513   dr_analyze_alias (dr);
1514 
1515   if (dump_file && (dump_flags & TDF_DETAILS))
1516     {
1517       unsigned i;
1518       fprintf (dump_file, "\tbase_address: ");
1519       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1520       fprintf (dump_file, "\n\toffset from base address: ");
1521       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1522       fprintf (dump_file, "\n\tconstant offset from base address: ");
1523       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1524       fprintf (dump_file, "\n\tstep: ");
1525       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1526       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1527       fprintf (dump_file, "\n\tbase misalignment: %d",
1528 	       DR_BASE_MISALIGNMENT (dr));
1529       fprintf (dump_file, "\n\toffset alignment: %d",
1530 	       DR_OFFSET_ALIGNMENT (dr));
1531       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1532       fprintf (dump_file, "\n\tbase_object: ");
1533       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1534       fprintf (dump_file, "\n");
1535       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1536 	{
1537 	  fprintf (dump_file, "\tAccess function %d: ", i);
1538 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1539 	}
1540     }
1541 
1542   return dr;
1543 }
1544 
1545 /*  A helper function computes order between two tree expressions T1 and T2.
1546     This is used in comparator functions sorting objects based on the order
1547     of tree expressions.  The function returns -1, 0, or 1.  */
1548 
1549 int
data_ref_compare_tree(tree t1,tree t2)1550 data_ref_compare_tree (tree t1, tree t2)
1551 {
1552   int i, cmp;
1553   enum tree_code code;
1554   char tclass;
1555 
1556   if (t1 == t2)
1557     return 0;
1558   if (t1 == NULL)
1559     return -1;
1560   if (t2 == NULL)
1561     return 1;
1562 
1563   STRIP_USELESS_TYPE_CONVERSION (t1);
1564   STRIP_USELESS_TYPE_CONVERSION (t2);
1565   if (t1 == t2)
1566     return 0;
1567 
1568   if (TREE_CODE (t1) != TREE_CODE (t2)
1569       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1570     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1571 
1572   code = TREE_CODE (t1);
1573   switch (code)
1574     {
1575     case INTEGER_CST:
1576       return tree_int_cst_compare (t1, t2);
1577 
1578     case STRING_CST:
1579       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1580 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1581       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1582 		     TREE_STRING_LENGTH (t1));
1583 
1584     case SSA_NAME:
1585       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1586 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1587       break;
1588 
1589     default:
1590       if (POLY_INT_CST_P (t1))
1591 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1592 				       wi::to_poly_widest (t2));
1593 
1594       tclass = TREE_CODE_CLASS (code);
1595 
1596       /* For decls, compare their UIDs.  */
1597       if (tclass == tcc_declaration)
1598 	{
1599 	  if (DECL_UID (t1) != DECL_UID (t2))
1600 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1601 	  break;
1602 	}
1603       /* For expressions, compare their operands recursively.  */
1604       else if (IS_EXPR_CODE_CLASS (tclass))
1605 	{
1606 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1607 	    {
1608 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1609 					   TREE_OPERAND (t2, i));
1610 	      if (cmp != 0)
1611 		return cmp;
1612 	    }
1613 	}
1614       else
1615 	gcc_unreachable ();
1616     }
1617 
1618   return 0;
1619 }
1620 
1621 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1622    check.  */
1623 
1624 opt_result
runtime_alias_check_p(ddr_p ddr,class loop * loop,bool speed_p)1625 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1626 {
1627   if (dump_enabled_p ())
1628     dump_printf (MSG_NOTE,
1629 		 "consider run-time aliasing test between %T and %T\n",
1630 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1631 
1632   if (!speed_p)
1633     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1634 				   "runtime alias check not supported when"
1635 				   " optimizing for size.\n");
1636 
1637   /* FORNOW: We don't support versioning with outer-loop in either
1638      vectorization or loop distribution.  */
1639   if (loop != NULL && loop->inner != NULL)
1640     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1641 				   "runtime alias check not supported for"
1642 				   " outer loop.\n");
1643 
1644   return opt_result::success ();
1645 }
1646 
1647 /* Operator == between two dr_with_seg_len objects.
1648 
1649    This equality operator is used to make sure two data refs
1650    are the same one so that we will consider to combine the
1651    aliasing checks of those two pairs of data dependent data
1652    refs.  */
1653 
1654 static bool
1655 operator == (const dr_with_seg_len& d1,
1656 	     const dr_with_seg_len& d2)
1657 {
1658   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1659 			   DR_BASE_ADDRESS (d2.dr), 0)
1660 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1661 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1662 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1663 	  && known_eq (d1.access_size, d2.access_size)
1664 	  && d1.align == d2.align);
1665 }
1666 
1667 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1668    so that we can combine aliasing checks in one scan.  */
1669 
1670 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1671 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1672 {
1673   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1674   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1675   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1676   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1677 
1678   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1679      if a and c have the same basic address snd step, and b and d have the same
1680      address and step.  Therefore, if any a&c or b&d don't have the same address
1681      and step, we don't care the order of those two pairs after sorting.  */
1682   int comp_res;
1683 
1684   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1685 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1686     return comp_res;
1687   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1688 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1689     return comp_res;
1690   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1691 					 DR_STEP (b1.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1694 					 DR_STEP (b2.dr))) != 0)
1695     return comp_res;
1696   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1697 					 DR_OFFSET (b1.dr))) != 0)
1698     return comp_res;
1699   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1700 					 DR_INIT (b1.dr))) != 0)
1701     return comp_res;
1702   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1703 					 DR_OFFSET (b2.dr))) != 0)
1704     return comp_res;
1705   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1706 					 DR_INIT (b2.dr))) != 0)
1707     return comp_res;
1708 
1709   return 0;
1710 }
1711 
1712 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1713 
1714 static void
dump_alias_pair(dr_with_seg_len_pair_t * alias_pair,const char * indent)1715 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1716 {
1717   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1718 	       DR_REF (alias_pair->first.dr),
1719 	       DR_REF (alias_pair->second.dr));
1720 
1721   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1722 	       alias_pair->first.seg_len);
1723   if (!operand_equal_p (alias_pair->first.seg_len,
1724 			alias_pair->second.seg_len, 0))
1725     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1726 
1727   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1728   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1729   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1730     {
1731       dump_printf (MSG_NOTE, " vs. ");
1732       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1733     }
1734 
1735   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1736 	       alias_pair->first.align);
1737   if (alias_pair->first.align != alias_pair->second.align)
1738     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1739 
1740   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1741   if (alias_pair->flags & DR_ALIAS_RAW)
1742     dump_printf (MSG_NOTE, " RAW");
1743   if (alias_pair->flags & DR_ALIAS_WAR)
1744     dump_printf (MSG_NOTE, " WAR");
1745   if (alias_pair->flags & DR_ALIAS_WAW)
1746     dump_printf (MSG_NOTE, " WAW");
1747   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1748     dump_printf (MSG_NOTE, " ARBITRARY");
1749   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1750     dump_printf (MSG_NOTE, " SWAPPED");
1751   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1752     dump_printf (MSG_NOTE, " UNSWAPPED");
1753   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1754     dump_printf (MSG_NOTE, " MIXED_STEPS");
1755   if (alias_pair->flags == 0)
1756     dump_printf (MSG_NOTE, " <none>");
1757   dump_printf (MSG_NOTE, "\n");
1758 }
1759 
1760 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1761    FACTOR is number of iterations that each data reference is accessed.
1762 
1763    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1764    we create an expression:
1765 
1766    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1767    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1768 
1769    for aliasing checks.  However, in some cases we can decrease the number
1770    of checks by combining two checks into one.  For example, suppose we have
1771    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1772    condition is satisfied:
1773 
1774    load_ptr_0 < load_ptr_1  &&
1775    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1776 
1777    (this condition means, in each iteration of vectorized loop, the accessed
1778    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1779    load_ptr_1.)
1780 
1781    we then can use only the following expression to finish the alising checks
1782    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1783 
1784    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1785    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1786 
1787    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1788    basic address.  */
1789 
1790 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1791 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1792 			       poly_uint64)
1793 {
1794   if (alias_pairs->is_empty ())
1795     return;
1796 
1797   /* Canonicalize each pair so that the base components are ordered wrt
1798      data_ref_compare_tree.  This allows the loop below to merge more
1799      cases.  */
1800   unsigned int i;
1801   dr_with_seg_len_pair_t *alias_pair;
1802   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1803     {
1804       data_reference_p dr_a = alias_pair->first.dr;
1805       data_reference_p dr_b = alias_pair->second.dr;
1806       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1807 					    DR_BASE_ADDRESS (dr_b));
1808       if (comp_res == 0)
1809 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1810       if (comp_res == 0)
1811 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1812       if (comp_res > 0)
1813 	{
1814 	  std::swap (alias_pair->first, alias_pair->second);
1815 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1816 	}
1817       else
1818 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1819     }
1820 
1821   /* Sort the collected data ref pairs so that we can scan them once to
1822      combine all possible aliasing checks.  */
1823   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1824 
1825   /* Scan the sorted dr pairs and check if we can combine alias checks
1826      of two neighboring dr pairs.  */
1827   unsigned int last = 0;
1828   for (i = 1; i < alias_pairs->length (); ++i)
1829     {
1830       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1831       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1832       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1833 
1834       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1835       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1836       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1837       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1838 
1839       /* Remove duplicate data ref pairs.  */
1840       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1841 	{
1842 	  if (dump_enabled_p ())
1843 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1844 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1845 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1846 	  alias_pair1->flags |= alias_pair2->flags;
1847 	  continue;
1848 	}
1849 
1850       /* Assume that we won't be able to merge the pairs, then correct
1851 	 if we do.  */
1852       last += 1;
1853       if (last != i)
1854 	(*alias_pairs)[last] = (*alias_pairs)[i];
1855 
1856       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1857 	{
1858 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1859 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1860 	  if (*dr_a1 == *dr_a2)
1861 	    {
1862 	      std::swap (dr_a1, dr_b1);
1863 	      std::swap (dr_a2, dr_b2);
1864 	    }
1865 
1866 	  poly_int64 init_a1, init_a2;
1867 	  /* Only consider cases in which the distance between the initial
1868 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1869 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1870 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1871 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1872 				   DR_OFFSET (dr_a2->dr), 0)
1873 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1874 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1875 	    continue;
1876 
1877 	  /* Don't combine if we can't tell which one comes first.  */
1878 	  if (!ordered_p (init_a1, init_a2))
1879 	    continue;
1880 
1881 	  /* Work out what the segment length would be if we did combine
1882 	     DR_A1 and DR_A2:
1883 
1884 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1885 	       also the combined length.
1886 
1887 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1888 	       length is the lower bound on those lengths.
1889 
1890 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1891 	       length is the upper bound on those lengths.
1892 
1893 	     Other cases are unlikely to give a useful combination.
1894 
1895 	     The lengths both have sizetype, so the sign is taken from
1896 	     the step instead.  */
1897 	  poly_uint64 new_seg_len = 0;
1898 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1899 						 dr_a2->seg_len, 0);
1900 	  if (new_seg_len_p)
1901 	    {
1902 	      poly_uint64 seg_len_a1, seg_len_a2;
1903 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1904 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1905 		continue;
1906 
1907 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1908 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1909 		continue;
1910 
1911 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1912 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1913 		continue;
1914 
1915 	      int sign_a = tree_int_cst_sgn (indicator_a);
1916 	      int sign_b = tree_int_cst_sgn (indicator_b);
1917 
1918 	      if (sign_a <= 0 && sign_b <= 0)
1919 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1920 	      else if (sign_a >= 0 && sign_b >= 0)
1921 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1922 	      else
1923 		continue;
1924 	    }
1925 	  /* At this point we're committed to merging the refs.  */
1926 
1927 	  /* Make sure dr_a1 starts left of dr_a2.  */
1928 	  if (maybe_gt (init_a1, init_a2))
1929 	    {
1930 	      std::swap (*dr_a1, *dr_a2);
1931 	      std::swap (init_a1, init_a2);
1932 	    }
1933 
1934 	  /* The DR_Bs are equal, so only the DR_As can introduce
1935 	     mixed steps.  */
1936 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1937 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1938 
1939 	  if (new_seg_len_p)
1940 	    {
1941 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1942 					      new_seg_len);
1943 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1944 	    }
1945 
1946 	  /* This is always positive due to the swap above.  */
1947 	  poly_uint64 diff = init_a2 - init_a1;
1948 
1949 	  /* The new check will start at DR_A1.  Make sure that its access
1950 	     size encompasses the initial DR_A2.  */
1951 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1952 	    {
1953 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1954 						diff + dr_a2->access_size);
1955 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1956 	      dr_a1->align = MIN (dr_a1->align, new_align);
1957 	    }
1958 	  if (dump_enabled_p ())
1959 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1960 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1961 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1962 	  alias_pair1->flags |= alias_pair2->flags;
1963 	  last -= 1;
1964 	}
1965     }
1966   alias_pairs->truncate (last + 1);
1967 
1968   /* Try to restore the original dr_with_seg_len order within each
1969      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1970      unswapped pairs into the same check, we have to invalidate any
1971      RAW, WAR and WAW information for it.  */
1972   if (dump_enabled_p ())
1973     dump_printf (MSG_NOTE, "merged alias checks:\n");
1974   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1975     {
1976       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1977       unsigned int swapped = (alias_pair->flags & swap_mask);
1978       if (swapped == DR_ALIAS_SWAPPED)
1979 	std::swap (alias_pair->first, alias_pair->second);
1980       else if (swapped != DR_ALIAS_UNSWAPPED)
1981 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1982       alias_pair->flags &= ~swap_mask;
1983       if (dump_enabled_p ())
1984 	dump_alias_pair (alias_pair, "  ");
1985     }
1986 }
1987 
1988 /* A subroutine of create_intersect_range_checks, with a subset of the
1989    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1990    to optimize cases in which the references form a simple RAW, WAR or
1991    WAR dependence.  */
1992 
1993 static bool
create_ifn_alias_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1994 create_ifn_alias_checks (tree *cond_expr,
1995 			 const dr_with_seg_len_pair_t &alias_pair)
1996 {
1997   const dr_with_seg_len& dr_a = alias_pair.first;
1998   const dr_with_seg_len& dr_b = alias_pair.second;
1999 
2000   /* Check for cases in which:
2001 
2002      (a) we have a known RAW, WAR or WAR dependence
2003      (b) the accesses are well-ordered in both the original and new code
2004 	 (see the comment above the DR_ALIAS_* flags for details); and
2005      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2006   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2007     return false;
2008 
2009   /* Make sure that both DRs access the same pattern of bytes,
2010      with a constant length and step.  */
2011   poly_uint64 seg_len;
2012   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2013       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2014       || maybe_ne (dr_a.access_size, dr_b.access_size)
2015       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2016       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2017     return false;
2018 
2019   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2020   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2021   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2022 
2023   /* See whether the target suports what we want to do.  WAW checks are
2024      equivalent to WAR checks here.  */
2025   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2026 		     ? IFN_CHECK_RAW_PTRS
2027 		     : IFN_CHECK_WAR_PTRS);
2028   unsigned int align = MIN (dr_a.align, dr_b.align);
2029   poly_uint64 full_length = seg_len + bytes;
2030   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2031 					   full_length, align))
2032     {
2033       full_length = seg_len + dr_a.access_size;
2034       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2035 					       full_length, align))
2036 	return false;
2037     }
2038 
2039   /* Commit to using this form of test.  */
2040   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2041   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2042 
2043   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2044   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2045 
2046   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2047 					     ifn, boolean_type_node,
2048 					     4, addr_a, addr_b,
2049 					     size_int (full_length),
2050 					     size_int (align));
2051 
2052   if (dump_enabled_p ())
2053     {
2054       if (ifn == IFN_CHECK_RAW_PTRS)
2055 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2056       else
2057 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2058     }
2059   return true;
2060 }
2061 
2062 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2063    free of aliases, using a condition based on index values instead
2064    of a condition based on addresses.  Return true on success,
2065    storing the condition in *COND_EXPR.
2066 
2067    This can only be done if the two data references in ALIAS_PAIR access
2068    the same array object and the index is the only difference.  For example,
2069    if the two data references are DR_A and DR_B:
2070 
2071                        DR_A                           DR_B
2072       data-ref         arr[i]                         arr[j]
2073       base_object      arr                            arr
2074       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2075 
2076    The addresses and their index are like:
2077 
2078         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2079      ------------------------------------------------------->
2080         |   |   |   |   |          |   |   |   |   |
2081      ------------------------------------------------------->
2082         i_0 ...         i_0+4      j_0 ...         j_0+4
2083 
2084    We can create expression based on index rather than address:
2085 
2086      (unsigned) (i_0 - j_0 + 3) <= 6
2087 
2088    i.e. the indices are less than 4 apart.
2089 
2090    Note evolution step of index needs to be considered in comparison.  */
2091 
2092 static bool
create_intersect_range_checks_index(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2093 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2094 				     const dr_with_seg_len_pair_t &alias_pair)
2095 {
2096   const dr_with_seg_len &dr_a = alias_pair.first;
2097   const dr_with_seg_len &dr_b = alias_pair.second;
2098   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2099       || integer_zerop (DR_STEP (dr_a.dr))
2100       || integer_zerop (DR_STEP (dr_b.dr))
2101       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2102     return false;
2103 
2104   poly_uint64 seg_len1, seg_len2;
2105   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2106       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2107     return false;
2108 
2109   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2110     return false;
2111 
2112   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2113     return false;
2114 
2115   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2116     return false;
2117 
2118   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2119 
2120   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2121   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2122   if (neg_step)
2123     {
2124       abs_step = -abs_step;
2125       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2126       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2127     }
2128 
2129   /* Infer the number of iterations with which the memory segment is accessed
2130      by DR.  In other words, alias is checked if memory segment accessed by
2131      DR_A in some iterations intersect with memory segment accessed by DR_B
2132      in the same amount iterations.
2133      Note segnment length is a linear function of number of iterations with
2134      DR_STEP as the coefficient.  */
2135   poly_uint64 niter_len1, niter_len2;
2136   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2137       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2138     return false;
2139 
2140   /* Divide each access size by the byte step, rounding up.  */
2141   poly_uint64 niter_access1, niter_access2;
2142   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2143 			abs_step, &niter_access1)
2144       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2145 			   abs_step, &niter_access2))
2146     return false;
2147 
2148   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2149 
2150   int found = -1;
2151   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2152     {
2153       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2154       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2155       /* Two indices must be the same if they are not scev, or not scev wrto
2156 	 current loop being vecorized.  */
2157       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2158 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
2159 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
2160 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2161 	{
2162 	  if (operand_equal_p (access1, access2, 0))
2163 	    continue;
2164 
2165 	  return false;
2166 	}
2167       if (found >= 0)
2168 	return false;
2169       found = i;
2170     }
2171 
2172   /* Ought not to happen in practice, since if all accesses are equal then the
2173      alias should be decidable at compile time.  */
2174   if (found < 0)
2175     return false;
2176 
2177   /* The two indices must have the same step.  */
2178   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2179   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2180   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2181     return false;
2182 
2183   tree idx_step = CHREC_RIGHT (access1);
2184   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2185   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2186   /* Index must evaluate in the same direction as DR.  */
2187   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2188 
2189   tree min1 = CHREC_LEFT (access1);
2190   tree min2 = CHREC_LEFT (access2);
2191   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2192     return false;
2193 
2194   /* Ideally, alias can be checked against loop's control IV, but we
2195      need to prove linear mapping between control IV and reference
2196      index.  Although that should be true, we check against (array)
2197      index of data reference.  Like segment length, index length is
2198      linear function of the number of iterations with index_step as
2199      the coefficient, i.e, niter_len * idx_step.  */
2200   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2201 					      SIGNED);
2202   if (neg_step)
2203     abs_idx_step = -abs_idx_step;
2204   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2205   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2206   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2207   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2208 
2209   gcc_assert (known_ge (idx_len1, 0)
2210 	      && known_ge (idx_len2, 0)
2211 	      && known_ge (idx_access1, 0)
2212 	      && known_ge (idx_access2, 0));
2213 
2214   /* Each access has the following pattern, with lengths measured
2215      in units of INDEX:
2216 
2217 	  <-- idx_len -->
2218 	  <--- A: -ve step --->
2219 	  +-----+-------+-----+-------+-----+
2220 	  | n-1 | ..... |  0  | ..... | n-1 |
2221 	  +-----+-------+-----+-------+-----+
2222 			<--- B: +ve step --->
2223 			<-- idx_len -->
2224 			|
2225 		       min
2226 
2227      where "n" is the number of scalar iterations covered by the segment
2228      and where each access spans idx_access units.
2229 
2230      A is the range of bytes accessed when the step is negative,
2231      B is the range when the step is positive.
2232 
2233      When checking for general overlap, we need to test whether
2234      the range:
2235 
2236        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2237 
2238      overlaps:
2239 
2240        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2241 
2242      where:
2243 
2244 	low_offsetN = +ve step ? 0 : -idx_lenN;
2245        high_offsetN = +ve step ? idx_lenN : 0;
2246 
2247      This is equivalent to testing whether:
2248 
2249        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2250        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2251 
2252      Converting this into a single test, there is an overlap if:
2253 
2254        0 <= min2 - min1 + bias <= limit
2255 
2256      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2257 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2258 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
2259       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2260 
2261      Combining the tests requires limit to be computable in an unsigned
2262      form of the index type; if it isn't, we fall back to the usual
2263      pointer-based checks.
2264 
2265      We can do better if DR_B is a write and if DR_A and DR_B are
2266      well-ordered in both the original and the new code (see the
2267      comment above the DR_ALIAS_* flags for details).  In this case
2268      we know that for each i in [0, n-1], the write performed by
2269      access i of DR_B occurs after access numbers j<=i of DR_A in
2270      both the original and the new code.  Any write or anti
2271      dependencies wrt those DR_A accesses are therefore maintained.
2272 
2273      We just need to make sure that each individual write in DR_B does not
2274      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2275      after the DR_B access in the original code but happen before it in
2276      the new code.
2277 
2278      We know the steps for both accesses are equal, so by induction, we
2279      just need to test whether the first write of DR_B overlaps a later
2280      access of DR_A.  In other words, we need to move min1 along by
2281      one iteration:
2282 
2283        min1' = min1 + idx_step
2284 
2285      and use the ranges:
2286 
2287        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2288 
2289      and:
2290 
2291        [min2, min2 + idx_access2 - 1]
2292 
2293      where:
2294 
2295 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2296        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2297   if (waw_or_war_p)
2298     idx_len1 -= abs_idx_step;
2299 
2300   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2301   if (!waw_or_war_p)
2302     limit += idx_len2;
2303 
2304   tree utype = unsigned_type_for (TREE_TYPE (min1));
2305   if (!wi::fits_to_tree_p (limit, utype))
2306     return false;
2307 
2308   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2309   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2310   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2311   /* Equivalent to adding IDX_STEP to MIN1.  */
2312   if (waw_or_war_p)
2313     bias -= wi::to_offset (idx_step);
2314 
2315   tree subject = fold_build2 (MINUS_EXPR, utype,
2316 			      fold_convert (utype, min2),
2317 			      fold_convert (utype, min1));
2318   subject = fold_build2 (PLUS_EXPR, utype, subject,
2319 			 wide_int_to_tree (utype, bias));
2320   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2321 				     wide_int_to_tree (utype, limit));
2322   if (*cond_expr)
2323     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2324 			      *cond_expr, part_cond_expr);
2325   else
2326     *cond_expr = part_cond_expr;
2327   if (dump_enabled_p ())
2328     {
2329       if (waw_or_war_p)
2330 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2331       else
2332 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2333     }
2334   return true;
2335 }
2336 
2337 /* A subroutine of create_intersect_range_checks, with a subset of the
2338    same arguments.  Try to optimize cases in which the second access
2339    is a write and in which some overlap is valid.  */
2340 
2341 static bool
create_waw_or_war_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2342 create_waw_or_war_checks (tree *cond_expr,
2343 			  const dr_with_seg_len_pair_t &alias_pair)
2344 {
2345   const dr_with_seg_len& dr_a = alias_pair.first;
2346   const dr_with_seg_len& dr_b = alias_pair.second;
2347 
2348   /* Check for cases in which:
2349 
2350      (a) DR_B is always a write;
2351      (b) the accesses are well-ordered in both the original and new code
2352 	 (see the comment above the DR_ALIAS_* flags for details); and
2353      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2354   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2355     return false;
2356 
2357   /* Check for equal (but possibly variable) steps.  */
2358   tree step = DR_STEP (dr_a.dr);
2359   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2360     return false;
2361 
2362   /* Make sure that we can operate on sizetype without loss of precision.  */
2363   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2364   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2365     return false;
2366 
2367   /* All addresses involved are known to have a common alignment ALIGN.
2368      We can therefore subtract ALIGN from an exclusive endpoint to get
2369      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2370      same as the access sizes of both DRs, and so subtracting ALIGN
2371      cancels out the addition of an access size.  */
2372   unsigned int align = MIN (dr_a.align, dr_b.align);
2373   poly_uint64 last_chunk_a = dr_a.access_size - align;
2374   poly_uint64 last_chunk_b = dr_b.access_size - align;
2375 
2376   /* Get a boolean expression that is true when the step is negative.  */
2377   tree indicator = dr_direction_indicator (dr_a.dr);
2378   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2379 			       fold_convert (ssizetype, indicator),
2380 			       ssize_int (0));
2381 
2382   /* Get lengths in sizetype.  */
2383   tree seg_len_a
2384     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2385   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2386 
2387   /* Each access has the following pattern:
2388 
2389 	  <- |seg_len| ->
2390 	  <--- A: -ve step --->
2391 	  +-----+-------+-----+-------+-----+
2392 	  | n-1 | ..... |  0  | ..... | n-1 |
2393 	  +-----+-------+-----+-------+-----+
2394 			<--- B: +ve step --->
2395 			<- |seg_len| ->
2396 			|
2397 		   base address
2398 
2399      where "n" is the number of scalar iterations covered by the segment.
2400 
2401      A is the range of bytes accessed when the step is negative,
2402      B is the range when the step is positive.
2403 
2404      We know that DR_B is a write.  We also know (from checking that
2405      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2406      the write performed by access i of DR_B occurs after access numbers
2407      j<=i of DR_A in both the original and the new code.  Any write or
2408      anti dependencies wrt those DR_A accesses are therefore maintained.
2409 
2410      We just need to make sure that each individual write in DR_B does not
2411      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2412      after the DR_B access in the original code but happen before it in
2413      the new code.
2414 
2415      We know the steps for both accesses are equal, so by induction, we
2416      just need to test whether the first write of DR_B overlaps a later
2417      access of DR_A.  In other words, we need to move addr_a along by
2418      one iteration:
2419 
2420        addr_a' = addr_a + step
2421 
2422      and check whether:
2423 
2424        [addr_b, addr_b + last_chunk_b]
2425 
2426      overlaps:
2427 
2428        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2429 
2430      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2431 
2432 	low_offset_a = +ve step ? 0 : seg_len_a - step
2433        high_offset_a = +ve step ? seg_len_a - step : 0
2434 
2435      This is equivalent to testing whether:
2436 
2437        addr_a' + low_offset_a <= addr_b + last_chunk_b
2438        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2439 
2440      Converting this into a single test, there is an overlap if:
2441 
2442        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2443 
2444      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2445 
2446      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2447      less than the size of the object underlying DR_A.  We also know
2448      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2449      guaranteed at compile time.  There can therefore be no overflow if
2450      "limit" is calculated in an unsigned type with pointer precision.  */
2451   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2452 					 DR_OFFSET (dr_a.dr));
2453   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2454 
2455   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2456 					 DR_OFFSET (dr_b.dr));
2457   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2458 
2459   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2460   addr_a = fold_build_pointer_plus (addr_a, step);
2461   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2462 					   seg_len_a, step);
2463   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2464     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2465 
2466   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2467 				   seg_len_a_minus_step, size_zero_node);
2468   if (!CONSTANT_CLASS_P (low_offset_a))
2469     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2470 
2471   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2472      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2473   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2474 				    low_offset_a);
2475 
2476   /* The amount added to addr_b - addr_a'.  */
2477   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2478 			   size_int (last_chunk_b), low_offset_a);
2479 
2480   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2481   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2482 		       size_int (last_chunk_a + last_chunk_b));
2483 
2484   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2485   subject = fold_build2 (PLUS_EXPR, sizetype,
2486 			 fold_convert (sizetype, subject), bias);
2487 
2488   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2489   if (dump_enabled_p ())
2490     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2491   return true;
2492 }
2493 
2494 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2495    every address ADDR accessed by D:
2496 
2497      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2498 
2499    In this case, every element accessed by D is aligned to at least
2500    ALIGN bytes.
2501 
2502    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2503 
2504      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2505 
2506 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)2507 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2508 		     tree *seg_max_out, HOST_WIDE_INT align)
2509 {
2510   /* Each access has the following pattern:
2511 
2512 	  <- |seg_len| ->
2513 	  <--- A: -ve step --->
2514 	  +-----+-------+-----+-------+-----+
2515 	  | n-1 | ,.... |  0  | ..... | n-1 |
2516 	  +-----+-------+-----+-------+-----+
2517 			<--- B: +ve step --->
2518 			<- |seg_len| ->
2519 			|
2520 		   base address
2521 
2522      where "n" is the number of scalar iterations covered by the segment.
2523      (This should be VF for a particular pair if we know that both steps
2524      are the same, otherwise it will be the full number of scalar loop
2525      iterations.)
2526 
2527      A is the range of bytes accessed when the step is negative,
2528      B is the range when the step is positive.
2529 
2530      If the access size is "access_size" bytes, the lowest addressed byte is:
2531 
2532 	 base + (step < 0 ? seg_len : 0)   [LB]
2533 
2534      and the highest addressed byte is always below:
2535 
2536 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2537 
2538      Thus:
2539 
2540 	 LB <= ADDR < UB
2541 
2542      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2543      bytes, so:
2544 
2545 	 LB <= ADDR <= UB - ALIGN
2546 
2547      where "- ALIGN" folds naturally with the "+ access_size" and often
2548      cancels it out.
2549 
2550      We don't try to simplify LB and UB beyond this (e.g. by using
2551      MIN and MAX based on whether seg_len rather than the stride is
2552      negative) because it is possible for the absolute size of the
2553      segment to overflow the range of a ssize_t.
2554 
2555      Keeping the pointer_plus outside of the cond_expr should allow
2556      the cond_exprs to be shared with other alias checks.  */
2557   tree indicator = dr_direction_indicator (d.dr);
2558   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2559 			       fold_convert (ssizetype, indicator),
2560 			       ssize_int (0));
2561   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2562 					    DR_OFFSET (d.dr));
2563   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2564   tree seg_len
2565     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2566 
2567   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2568 				seg_len, size_zero_node);
2569   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2570 				size_zero_node, seg_len);
2571   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2572 			   size_int (d.access_size - align));
2573 
2574   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2575   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2576 }
2577 
2578 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2579    storing the condition in *COND_EXPR.  The fallback is to generate a
2580    a test that the two accesses do not overlap:
2581 
2582      end_a <= start_b || end_b <= start_a.  */
2583 
2584 static void
create_intersect_range_checks(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2585 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2586 			       const dr_with_seg_len_pair_t &alias_pair)
2587 {
2588   const dr_with_seg_len& dr_a = alias_pair.first;
2589   const dr_with_seg_len& dr_b = alias_pair.second;
2590   *cond_expr = NULL_TREE;
2591   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2592     return;
2593 
2594   if (create_ifn_alias_checks (cond_expr, alias_pair))
2595     return;
2596 
2597   if (create_waw_or_war_checks (cond_expr, alias_pair))
2598     return;
2599 
2600   unsigned HOST_WIDE_INT min_align;
2601   tree_code cmp_code;
2602   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2603      are equivalent.  This is just an optimization heuristic.  */
2604   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2605       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2606     {
2607       /* In this case adding access_size to seg_len is likely to give
2608 	 a simple X * step, where X is either the number of scalar
2609 	 iterations or the vectorization factor.  We're better off
2610 	 keeping that, rather than subtracting an alignment from it.
2611 
2612 	 In this case the maximum values are exclusive and so there is
2613 	 no alias if the maximum of one segment equals the minimum
2614 	 of another.  */
2615       min_align = 0;
2616       cmp_code = LE_EXPR;
2617     }
2618   else
2619     {
2620       /* Calculate the minimum alignment shared by all four pointers,
2621 	 then arrange for this alignment to be subtracted from the
2622 	 exclusive maximum values to get inclusive maximum values.
2623 	 This "- min_align" is cumulative with a "+ access_size"
2624 	 in the calculation of the maximum values.  In the best
2625 	 (and common) case, the two cancel each other out, leaving
2626 	 us with an inclusive bound based only on seg_len.  In the
2627 	 worst case we're simply adding a smaller number than before.
2628 
2629 	 Because the maximum values are inclusive, there is an alias
2630 	 if the maximum value of one segment is equal to the minimum
2631 	 value of the other.  */
2632       min_align = MIN (dr_a.align, dr_b.align);
2633       cmp_code = LT_EXPR;
2634     }
2635 
2636   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2637   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2638   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2639 
2640   *cond_expr
2641     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2642 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2643 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2644   if (dump_enabled_p ())
2645     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2646 }
2647 
2648 /* Create a conditional expression that represents the run-time checks for
2649    overlapping of address ranges represented by a list of data references
2650    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2651    COND_EXPR is the conditional expression to be used in the if statement
2652    that controls which version of the loop gets executed at runtime.  */
2653 
2654 void
create_runtime_alias_checks(class loop * loop,vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)2655 create_runtime_alias_checks (class loop *loop,
2656 			     vec<dr_with_seg_len_pair_t> *alias_pairs,
2657 			     tree * cond_expr)
2658 {
2659   tree part_cond_expr;
2660 
2661   fold_defer_overflow_warnings ();
2662   dr_with_seg_len_pair_t *alias_pair;
2663   unsigned int i;
2664   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2665     {
2666       gcc_assert (alias_pair->flags);
2667       if (dump_enabled_p ())
2668 	dump_printf (MSG_NOTE,
2669 		     "create runtime check for data references %T and %T\n",
2670 		     DR_REF (alias_pair->first.dr),
2671 		     DR_REF (alias_pair->second.dr));
2672 
2673       /* Create condition expression for each pair data references.  */
2674       create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2675       if (*cond_expr)
2676 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2677 				  *cond_expr, part_cond_expr);
2678       else
2679 	*cond_expr = part_cond_expr;
2680     }
2681   fold_undefer_and_ignore_overflow_warnings ();
2682 }
2683 
2684 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2685    expressions.  */
2686 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)2687 dr_equal_offsets_p1 (tree offset1, tree offset2)
2688 {
2689   bool res;
2690 
2691   STRIP_NOPS (offset1);
2692   STRIP_NOPS (offset2);
2693 
2694   if (offset1 == offset2)
2695     return true;
2696 
2697   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2698       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2699     return false;
2700 
2701   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2702                              TREE_OPERAND (offset2, 0));
2703 
2704   if (!res || !BINARY_CLASS_P (offset1))
2705     return res;
2706 
2707   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2708                              TREE_OPERAND (offset2, 1));
2709 
2710   return res;
2711 }
2712 
2713 /* Check if DRA and DRB have equal offsets.  */
2714 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)2715 dr_equal_offsets_p (struct data_reference *dra,
2716                     struct data_reference *drb)
2717 {
2718   tree offset1, offset2;
2719 
2720   offset1 = DR_OFFSET (dra);
2721   offset2 = DR_OFFSET (drb);
2722 
2723   return dr_equal_offsets_p1 (offset1, offset2);
2724 }
2725 
2726 /* Returns true if FNA == FNB.  */
2727 
2728 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)2729 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2730 {
2731   unsigned i, n = fna.length ();
2732 
2733   if (n != fnb.length ())
2734     return false;
2735 
2736   for (i = 0; i < n; i++)
2737     if (!operand_equal_p (fna[i], fnb[i], 0))
2738       return false;
2739 
2740   return true;
2741 }
2742 
2743 /* If all the functions in CF are the same, returns one of them,
2744    otherwise returns NULL.  */
2745 
2746 static affine_fn
common_affine_function(conflict_function * cf)2747 common_affine_function (conflict_function *cf)
2748 {
2749   unsigned i;
2750   affine_fn comm;
2751 
2752   if (!CF_NONTRIVIAL_P (cf))
2753     return affine_fn ();
2754 
2755   comm = cf->fns[0];
2756 
2757   for (i = 1; i < cf->n; i++)
2758     if (!affine_function_equal_p (comm, cf->fns[i]))
2759       return affine_fn ();
2760 
2761   return comm;
2762 }
2763 
2764 /* Returns the base of the affine function FN.  */
2765 
2766 static tree
affine_function_base(affine_fn fn)2767 affine_function_base (affine_fn fn)
2768 {
2769   return fn[0];
2770 }
2771 
2772 /* Returns true if FN is a constant.  */
2773 
2774 static bool
affine_function_constant_p(affine_fn fn)2775 affine_function_constant_p (affine_fn fn)
2776 {
2777   unsigned i;
2778   tree coef;
2779 
2780   for (i = 1; fn.iterate (i, &coef); i++)
2781     if (!integer_zerop (coef))
2782       return false;
2783 
2784   return true;
2785 }
2786 
2787 /* Returns true if FN is the zero constant function.  */
2788 
2789 static bool
affine_function_zero_p(affine_fn fn)2790 affine_function_zero_p (affine_fn fn)
2791 {
2792   return (integer_zerop (affine_function_base (fn))
2793 	  && affine_function_constant_p (fn));
2794 }
2795 
2796 /* Returns a signed integer type with the largest precision from TA
2797    and TB.  */
2798 
2799 static tree
signed_type_for_types(tree ta,tree tb)2800 signed_type_for_types (tree ta, tree tb)
2801 {
2802   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2803     return signed_type_for (ta);
2804   else
2805     return signed_type_for (tb);
2806 }
2807 
2808 /* Applies operation OP on affine functions FNA and FNB, and returns the
2809    result.  */
2810 
2811 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2812 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2813 {
2814   unsigned i, n, m;
2815   affine_fn ret;
2816   tree coef;
2817 
2818   if (fnb.length () > fna.length ())
2819     {
2820       n = fna.length ();
2821       m = fnb.length ();
2822     }
2823   else
2824     {
2825       n = fnb.length ();
2826       m = fna.length ();
2827     }
2828 
2829   ret.create (m);
2830   for (i = 0; i < n; i++)
2831     {
2832       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2833 					 TREE_TYPE (fnb[i]));
2834       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2835     }
2836 
2837   for (; fna.iterate (i, &coef); i++)
2838     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2839 				 coef, integer_zero_node));
2840   for (; fnb.iterate (i, &coef); i++)
2841     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2842 				 integer_zero_node, coef));
2843 
2844   return ret;
2845 }
2846 
2847 /* Returns the sum of affine functions FNA and FNB.  */
2848 
2849 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2850 affine_fn_plus (affine_fn fna, affine_fn fnb)
2851 {
2852   return affine_fn_op (PLUS_EXPR, fna, fnb);
2853 }
2854 
2855 /* Returns the difference of affine functions FNA and FNB.  */
2856 
2857 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2858 affine_fn_minus (affine_fn fna, affine_fn fnb)
2859 {
2860   return affine_fn_op (MINUS_EXPR, fna, fnb);
2861 }
2862 
2863 /* Frees affine function FN.  */
2864 
2865 static void
affine_fn_free(affine_fn fn)2866 affine_fn_free (affine_fn fn)
2867 {
2868   fn.release ();
2869 }
2870 
2871 /* Determine for each subscript in the data dependence relation DDR
2872    the distance.  */
2873 
2874 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2875 compute_subscript_distance (struct data_dependence_relation *ddr)
2876 {
2877   conflict_function *cf_a, *cf_b;
2878   affine_fn fn_a, fn_b, diff;
2879 
2880   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2881     {
2882       unsigned int i;
2883 
2884       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2885  	{
2886  	  struct subscript *subscript;
2887 
2888  	  subscript = DDR_SUBSCRIPT (ddr, i);
2889  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2890  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2891 
2892 	  fn_a = common_affine_function (cf_a);
2893 	  fn_b = common_affine_function (cf_b);
2894 	  if (!fn_a.exists () || !fn_b.exists ())
2895 	    {
2896 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2897 	      return;
2898 	    }
2899 	  diff = affine_fn_minus (fn_a, fn_b);
2900 
2901  	  if (affine_function_constant_p (diff))
2902  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2903  	  else
2904  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2905 
2906 	  affine_fn_free (diff);
2907  	}
2908     }
2909 }
2910 
2911 /* Returns the conflict function for "unknown".  */
2912 
2913 static conflict_function *
conflict_fn_not_known(void)2914 conflict_fn_not_known (void)
2915 {
2916   conflict_function *fn = XCNEW (conflict_function);
2917   fn->n = NOT_KNOWN;
2918 
2919   return fn;
2920 }
2921 
2922 /* Returns the conflict function for "independent".  */
2923 
2924 static conflict_function *
conflict_fn_no_dependence(void)2925 conflict_fn_no_dependence (void)
2926 {
2927   conflict_function *fn = XCNEW (conflict_function);
2928   fn->n = NO_DEPENDENCE;
2929 
2930   return fn;
2931 }
2932 
2933 /* Returns true if the address of OBJ is invariant in LOOP.  */
2934 
2935 static bool
object_address_invariant_in_loop_p(const class loop * loop,const_tree obj)2936 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2937 {
2938   while (handled_component_p (obj))
2939     {
2940       if (TREE_CODE (obj) == ARRAY_REF)
2941 	{
2942 	  for (int i = 1; i < 4; ++i)
2943 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2944 							loop->num))
2945 	      return false;
2946 	}
2947       else if (TREE_CODE (obj) == COMPONENT_REF)
2948 	{
2949 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2950 						      loop->num))
2951 	    return false;
2952 	}
2953       obj = TREE_OPERAND (obj, 0);
2954     }
2955 
2956   if (!INDIRECT_REF_P (obj)
2957       && TREE_CODE (obj) != MEM_REF)
2958     return true;
2959 
2960   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2961 						  loop->num);
2962 }
2963 
2964 /* Returns false if we can prove that data references A and B do not alias,
2965    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2966    considered.  */
2967 
2968 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,class loop * loop_nest)2969 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2970 		class loop *loop_nest)
2971 {
2972   tree addr_a = DR_BASE_OBJECT (a);
2973   tree addr_b = DR_BASE_OBJECT (b);
2974 
2975   /* If we are not processing a loop nest but scalar code we
2976      do not need to care about possible cross-iteration dependences
2977      and thus can process the full original reference.  Do so,
2978      similar to how loop invariant motion applies extra offset-based
2979      disambiguation.  */
2980   if (!loop_nest)
2981     {
2982       aff_tree off1, off2;
2983       poly_widest_int size1, size2;
2984       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2985       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2986       aff_combination_scale (&off1, -1);
2987       aff_combination_add (&off2, &off1);
2988       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2989 	return false;
2990     }
2991 
2992   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2993       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2994       /* For cross-iteration dependences the cliques must be valid for the
2995 	 whole loop, not just individual iterations.  */
2996       && (!loop_nest
2997 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2998 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2999       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3000       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3001     return false;
3002 
3003   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3004      do not know the size of the base-object.  So we cannot do any
3005      offset/overlap based analysis but have to rely on points-to
3006      information only.  */
3007   if (TREE_CODE (addr_a) == MEM_REF
3008       && (DR_UNCONSTRAINED_BASE (a)
3009 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3010     {
3011       /* For true dependences we can apply TBAA.  */
3012       if (flag_strict_aliasing
3013 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3014 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3015 				     get_alias_set (DR_REF (b))))
3016 	return false;
3017       if (TREE_CODE (addr_b) == MEM_REF)
3018 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3019 				       TREE_OPERAND (addr_b, 0));
3020       else
3021 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3022 				       build_fold_addr_expr (addr_b));
3023     }
3024   else if (TREE_CODE (addr_b) == MEM_REF
3025 	   && (DR_UNCONSTRAINED_BASE (b)
3026 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3027     {
3028       /* For true dependences we can apply TBAA.  */
3029       if (flag_strict_aliasing
3030 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3031 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3032 				     get_alias_set (DR_REF (b))))
3033 	return false;
3034       if (TREE_CODE (addr_a) == MEM_REF)
3035 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3036 				       TREE_OPERAND (addr_b, 0));
3037       else
3038 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3039 				       TREE_OPERAND (addr_b, 0));
3040     }
3041 
3042   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3043      that is being subsetted in the loop nest.  */
3044   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3045     return refs_output_dependent_p (addr_a, addr_b);
3046   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3047     return refs_anti_dependent_p (addr_a, addr_b);
3048   return refs_may_alias_p (addr_a, addr_b);
3049 }
3050 
3051 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3052    if it is meaningful to compare their associated access functions
3053    when checking for dependencies.  */
3054 
3055 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)3056 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3057 {
3058   /* Allow pairs of component refs from the following sets:
3059 
3060        { REALPART_EXPR, IMAGPART_EXPR }
3061        { COMPONENT_REF }
3062        { ARRAY_REF }.  */
3063   tree_code code_a = TREE_CODE (ref_a);
3064   tree_code code_b = TREE_CODE (ref_b);
3065   if (code_a == IMAGPART_EXPR)
3066     code_a = REALPART_EXPR;
3067   if (code_b == IMAGPART_EXPR)
3068     code_b = REALPART_EXPR;
3069   if (code_a != code_b)
3070     return false;
3071 
3072   if (TREE_CODE (ref_a) == COMPONENT_REF)
3073     /* ??? We cannot simply use the type of operand #0 of the refs here as
3074        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3075        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3076     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3077 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3078 
3079   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3080 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3081 }
3082 
3083 /* Initialize a data dependence relation between data accesses A and
3084    B.  NB_LOOPS is the number of loops surrounding the references: the
3085    size of the classic distance/direction vectors.  */
3086 
3087 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)3088 initialize_data_dependence_relation (struct data_reference *a,
3089 				     struct data_reference *b,
3090  				     vec<loop_p> loop_nest)
3091 {
3092   struct data_dependence_relation *res;
3093   unsigned int i;
3094 
3095   res = XCNEW (struct data_dependence_relation);
3096   DDR_A (res) = a;
3097   DDR_B (res) = b;
3098   DDR_LOOP_NEST (res).create (0);
3099   DDR_SUBSCRIPTS (res).create (0);
3100   DDR_DIR_VECTS (res).create (0);
3101   DDR_DIST_VECTS (res).create (0);
3102 
3103   if (a == NULL || b == NULL)
3104     {
3105       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3106       return res;
3107     }
3108 
3109   /* If the data references do not alias, then they are independent.  */
3110   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3111     {
3112       DDR_ARE_DEPENDENT (res) = chrec_known;
3113       return res;
3114     }
3115 
3116   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
3117   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
3118   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3119     {
3120       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3121       return res;
3122     }
3123 
3124   /* For unconstrained bases, the root (highest-indexed) subscript
3125      describes a variation in the base of the original DR_REF rather
3126      than a component access.  We have no type that accurately describes
3127      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3128      applying this subscript) so limit the search to the last real
3129      component access.
3130 
3131      E.g. for:
3132 
3133 	void
3134 	f (int a[][8], int b[][8])
3135 	{
3136 	  for (int i = 0; i < 8; ++i)
3137 	    a[i * 2][0] = b[i][0];
3138 	}
3139 
3140      the a and b accesses have a single ARRAY_REF component reference [0]
3141      but have two subscripts.  */
3142   if (DR_UNCONSTRAINED_BASE (a))
3143     num_dimensions_a -= 1;
3144   if (DR_UNCONSTRAINED_BASE (b))
3145     num_dimensions_b -= 1;
3146 
3147   /* These structures describe sequences of component references in
3148      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3149      specific access function.  */
3150   struct {
3151     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3152        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3153        indices.  In C notation, these are the indices of the rightmost
3154        component references; e.g. for a sequence .b.c.d, the start
3155        index is for .d.  */
3156     unsigned int start_a;
3157     unsigned int start_b;
3158 
3159     /* The sequence contains LENGTH consecutive access functions from
3160        each DR.  */
3161     unsigned int length;
3162 
3163     /* The enclosing objects for the A and B sequences respectively,
3164        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3165        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3166     tree object_a;
3167     tree object_b;
3168   } full_seq = {}, struct_seq = {};
3169 
3170   /* Before each iteration of the loop:
3171 
3172      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3173      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3174   unsigned int index_a = 0;
3175   unsigned int index_b = 0;
3176   tree ref_a = DR_REF (a);
3177   tree ref_b = DR_REF (b);
3178 
3179   /* Now walk the component references from the final DR_REFs back up to
3180      the enclosing base objects.  Each component reference corresponds
3181      to one access function in the DR, with access function 0 being for
3182      the final DR_REF and the highest-indexed access function being the
3183      one that is applied to the base of the DR.
3184 
3185      Look for a sequence of component references whose access functions
3186      are comparable (see access_fn_components_comparable_p).  If more
3187      than one such sequence exists, pick the one nearest the base
3188      (which is the leftmost sequence in C notation).  Store this sequence
3189      in FULL_SEQ.
3190 
3191      For example, if we have:
3192 
3193 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3194 
3195 	A: a[0][i].s.c.d
3196 	B: __real b[0][i].s.e[i].f
3197 
3198      (where d is the same type as the real component of f) then the access
3199      functions would be:
3200 
3201 			 0   1   2   3
3202 	A:              .d  .c  .s [i]
3203 
3204 		 0   1   2   3   4   5
3205 	B:  __real  .f [i]  .e  .s [i]
3206 
3207      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3208      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3209      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3210      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3211      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3212      index foo[10] arrays, so is again comparable.  The sequence is
3213      therefore:
3214 
3215         A: [1, 3]  (i.e. [i].s.c)
3216         B: [3, 5]  (i.e. [i].s.e)
3217 
3218      Also look for sequences of component references whose access
3219      functions are comparable and whose enclosing objects have the same
3220      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3221      example, STRUCT_SEQ would be:
3222 
3223         A: [1, 2]  (i.e. s.c)
3224         B: [3, 4]  (i.e. s.e)  */
3225   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3226     {
3227       /* REF_A and REF_B must be one of the component access types
3228 	 allowed by dr_analyze_indices.  */
3229       gcc_checking_assert (access_fn_component_p (ref_a));
3230       gcc_checking_assert (access_fn_component_p (ref_b));
3231 
3232       /* Get the immediately-enclosing objects for REF_A and REF_B,
3233 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3234 	 and DR_ACCESS_FN (B, INDEX_B).  */
3235       tree object_a = TREE_OPERAND (ref_a, 0);
3236       tree object_b = TREE_OPERAND (ref_b, 0);
3237 
3238       tree type_a = TREE_TYPE (object_a);
3239       tree type_b = TREE_TYPE (object_b);
3240       if (access_fn_components_comparable_p (ref_a, ref_b))
3241 	{
3242 	  /* This pair of component accesses is comparable for dependence
3243 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3244 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3245 	  if (full_seq.start_a + full_seq.length != index_a
3246 	      || full_seq.start_b + full_seq.length != index_b)
3247 	    {
3248 	      /* The accesses don't extend the current sequence,
3249 		 so start a new one here.  */
3250 	      full_seq.start_a = index_a;
3251 	      full_seq.start_b = index_b;
3252 	      full_seq.length = 0;
3253 	    }
3254 
3255 	  /* Add this pair of references to the sequence.  */
3256 	  full_seq.length += 1;
3257 	  full_seq.object_a = object_a;
3258 	  full_seq.object_b = object_b;
3259 
3260 	  /* If the enclosing objects are structures (and thus have the
3261 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3262 	  if (TREE_CODE (type_a) == RECORD_TYPE)
3263 	    struct_seq = full_seq;
3264 
3265 	  /* Move to the next containing reference for both A and B.  */
3266 	  ref_a = object_a;
3267 	  ref_b = object_b;
3268 	  index_a += 1;
3269 	  index_b += 1;
3270 	  continue;
3271 	}
3272 
3273       /* Try to approach equal type sizes.  */
3274       if (!COMPLETE_TYPE_P (type_a)
3275 	  || !COMPLETE_TYPE_P (type_b)
3276 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3277 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3278 	break;
3279 
3280       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3281       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3282       if (size_a <= size_b)
3283 	{
3284 	  index_a += 1;
3285 	  ref_a = object_a;
3286 	}
3287       if (size_b <= size_a)
3288 	{
3289 	  index_b += 1;
3290 	  ref_b = object_b;
3291 	}
3292     }
3293 
3294   /* See whether FULL_SEQ ends at the base and whether the two bases
3295      are equal.  We do not care about TBAA or alignment info so we can
3296      use OEP_ADDRESS_OF to avoid false negatives.  */
3297   tree base_a = DR_BASE_OBJECT (a);
3298   tree base_b = DR_BASE_OBJECT (b);
3299   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3300 		      && full_seq.start_b + full_seq.length == num_dimensions_b
3301 		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
3302 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3303 		      && (types_compatible_p (TREE_TYPE (base_a),
3304 					      TREE_TYPE (base_b))
3305 			  || (!base_supports_access_fn_components_p (base_a)
3306 			      && !base_supports_access_fn_components_p (base_b)
3307 			      && operand_equal_p
3308 				   (TYPE_SIZE (TREE_TYPE (base_a)),
3309 				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3310 		      && (!loop_nest.exists ()
3311 			  || (object_address_invariant_in_loop_p
3312 			      (loop_nest[0], base_a))));
3313 
3314   /* If the bases are the same, we can include the base variation too.
3315      E.g. the b accesses in:
3316 
3317        for (int i = 0; i < n; ++i)
3318          b[i + 4][0] = b[i][0];
3319 
3320      have a definite dependence distance of 4, while for:
3321 
3322        for (int i = 0; i < n; ++i)
3323          a[i + 4][0] = b[i][0];
3324 
3325      the dependence distance depends on the gap between a and b.
3326 
3327      If the bases are different then we can only rely on the sequence
3328      rooted at a structure access, since arrays are allowed to overlap
3329      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3330      valid code:
3331 
3332        int a[256];
3333        ...
3334        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3335 
3336      where two lvalues with the same int[4][3] type overlap, and where
3337      both lvalues are distinct from the object's declared type.  */
3338   if (same_base_p)
3339     {
3340       if (DR_UNCONSTRAINED_BASE (a))
3341 	full_seq.length += 1;
3342     }
3343   else
3344     full_seq = struct_seq;
3345 
3346   /* Punt if we didn't find a suitable sequence.  */
3347   if (full_seq.length == 0)
3348     {
3349       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3350       return res;
3351     }
3352 
3353   if (!same_base_p)
3354     {
3355       /* Partial overlap is possible for different bases when strict aliasing
3356 	 is not in effect.  It's also possible if either base involves a union
3357 	 access; e.g. for:
3358 
3359 	   struct s1 { int a[2]; };
3360 	   struct s2 { struct s1 b; int c; };
3361 	   struct s3 { int d; struct s1 e; };
3362 	   union u { struct s2 f; struct s3 g; } *p, *q;
3363 
3364 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3365 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3366 	 "q->g.e" (base "q->g").  */
3367       if (!flag_strict_aliasing
3368 	  || ref_contains_union_access_p (full_seq.object_a)
3369 	  || ref_contains_union_access_p (full_seq.object_b))
3370 	{
3371 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3372 	  return res;
3373 	}
3374 
3375       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3376       if (!loop_nest.exists ()
3377 	  || (object_address_invariant_in_loop_p (loop_nest[0],
3378 						  full_seq.object_a)
3379 	      && object_address_invariant_in_loop_p (loop_nest[0],
3380 						     full_seq.object_b)))
3381 	{
3382 	  DDR_OBJECT_A (res) = full_seq.object_a;
3383 	  DDR_OBJECT_B (res) = full_seq.object_b;
3384 	}
3385     }
3386 
3387   DDR_AFFINE_P (res) = true;
3388   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3389   DDR_SUBSCRIPTS (res).create (full_seq.length);
3390   DDR_LOOP_NEST (res) = loop_nest;
3391   DDR_SELF_REFERENCE (res) = false;
3392 
3393   for (i = 0; i < full_seq.length; ++i)
3394     {
3395       struct subscript *subscript;
3396 
3397       subscript = XNEW (struct subscript);
3398       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
3399       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
3400       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3401       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3402       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3403       SUB_DISTANCE (subscript) = chrec_dont_know;
3404       DDR_SUBSCRIPTS (res).safe_push (subscript);
3405     }
3406 
3407   return res;
3408 }
3409 
3410 /* Frees memory used by the conflict function F.  */
3411 
3412 static void
free_conflict_function(conflict_function * f)3413 free_conflict_function (conflict_function *f)
3414 {
3415   unsigned i;
3416 
3417   if (CF_NONTRIVIAL_P (f))
3418     {
3419       for (i = 0; i < f->n; i++)
3420 	affine_fn_free (f->fns[i]);
3421     }
3422   free (f);
3423 }
3424 
3425 /* Frees memory used by SUBSCRIPTS.  */
3426 
3427 static void
free_subscripts(vec<subscript_p> subscripts)3428 free_subscripts (vec<subscript_p> subscripts)
3429 {
3430   unsigned i;
3431   subscript_p s;
3432 
3433   FOR_EACH_VEC_ELT (subscripts, i, s)
3434     {
3435       free_conflict_function (s->conflicting_iterations_in_a);
3436       free_conflict_function (s->conflicting_iterations_in_b);
3437       free (s);
3438     }
3439   subscripts.release ();
3440 }
3441 
3442 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3443    description.  */
3444 
3445 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)3446 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3447 			tree chrec)
3448 {
3449   DDR_ARE_DEPENDENT (ddr) = chrec;
3450   free_subscripts (DDR_SUBSCRIPTS (ddr));
3451   DDR_SUBSCRIPTS (ddr).create (0);
3452 }
3453 
3454 /* The dependence relation DDR cannot be represented by a distance
3455    vector.  */
3456 
3457 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)3458 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3459 {
3460   if (dump_file && (dump_flags & TDF_DETAILS))
3461     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3462 
3463   DDR_AFFINE_P (ddr) = false;
3464 }
3465 
3466 
3467 
3468 /* This section contains the classic Banerjee tests.  */
3469 
3470 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3471    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3472 
3473 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)3474 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3475 {
3476   return (evolution_function_is_constant_p (chrec_a)
3477 	  && evolution_function_is_constant_p (chrec_b));
3478 }
3479 
3480 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3481    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3482 
3483 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)3484 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3485 {
3486   if ((evolution_function_is_constant_p (chrec_a)
3487        && evolution_function_is_univariate_p (chrec_b))
3488       || (evolution_function_is_constant_p (chrec_b)
3489 	  && evolution_function_is_univariate_p (chrec_a)))
3490     return true;
3491 
3492   if (evolution_function_is_univariate_p (chrec_a)
3493       && evolution_function_is_univariate_p (chrec_b))
3494     {
3495       switch (TREE_CODE (chrec_a))
3496 	{
3497 	case POLYNOMIAL_CHREC:
3498 	  switch (TREE_CODE (chrec_b))
3499 	    {
3500 	    case POLYNOMIAL_CHREC:
3501 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3502 		return false;
3503 	      /* FALLTHRU */
3504 
3505 	    default:
3506 	      return true;
3507 	    }
3508 
3509 	default:
3510 	  return true;
3511 	}
3512     }
3513 
3514   return false;
3515 }
3516 
3517 /* Creates a conflict function with N dimensions.  The affine functions
3518    in each dimension follow.  */
3519 
3520 static conflict_function *
conflict_fn(unsigned n,...)3521 conflict_fn (unsigned n, ...)
3522 {
3523   unsigned i;
3524   conflict_function *ret = XCNEW (conflict_function);
3525   va_list ap;
3526 
3527   gcc_assert (n > 0 && n <= MAX_DIM);
3528   va_start (ap, n);
3529 
3530   ret->n = n;
3531   for (i = 0; i < n; i++)
3532     ret->fns[i] = va_arg (ap, affine_fn);
3533   va_end (ap);
3534 
3535   return ret;
3536 }
3537 
3538 /* Returns constant affine function with value CST.  */
3539 
3540 static affine_fn
affine_fn_cst(tree cst)3541 affine_fn_cst (tree cst)
3542 {
3543   affine_fn fn;
3544   fn.create (1);
3545   fn.quick_push (cst);
3546   return fn;
3547 }
3548 
3549 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3550 
3551 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)3552 affine_fn_univar (tree cst, unsigned dim, tree coef)
3553 {
3554   affine_fn fn;
3555   fn.create (dim + 1);
3556   unsigned i;
3557 
3558   gcc_assert (dim > 0);
3559   fn.quick_push (cst);
3560   for (i = 1; i < dim; i++)
3561     fn.quick_push (integer_zero_node);
3562   fn.quick_push (coef);
3563   return fn;
3564 }
3565 
3566 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3567    *OVERLAPS_B are initialized to the functions that describe the
3568    relation between the elements accessed twice by CHREC_A and
3569    CHREC_B.  For k >= 0, the following property is verified:
3570 
3571    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3572 
3573 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3574 analyze_ziv_subscript (tree chrec_a,
3575 		       tree chrec_b,
3576 		       conflict_function **overlaps_a,
3577 		       conflict_function **overlaps_b,
3578 		       tree *last_conflicts)
3579 {
3580   tree type, difference;
3581   dependence_stats.num_ziv++;
3582 
3583   if (dump_file && (dump_flags & TDF_DETAILS))
3584     fprintf (dump_file, "(analyze_ziv_subscript \n");
3585 
3586   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3587   chrec_a = chrec_convert (type, chrec_a, NULL);
3588   chrec_b = chrec_convert (type, chrec_b, NULL);
3589   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3590 
3591   switch (TREE_CODE (difference))
3592     {
3593     case INTEGER_CST:
3594       if (integer_zerop (difference))
3595 	{
3596 	  /* The difference is equal to zero: the accessed index
3597 	     overlaps for each iteration in the loop.  */
3598 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3599 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3600 	  *last_conflicts = chrec_dont_know;
3601 	  dependence_stats.num_ziv_dependent++;
3602 	}
3603       else
3604 	{
3605 	  /* The accesses do not overlap.  */
3606 	  *overlaps_a = conflict_fn_no_dependence ();
3607 	  *overlaps_b = conflict_fn_no_dependence ();
3608 	  *last_conflicts = integer_zero_node;
3609 	  dependence_stats.num_ziv_independent++;
3610 	}
3611       break;
3612 
3613     default:
3614       /* We're not sure whether the indexes overlap.  For the moment,
3615 	 conservatively answer "don't know".  */
3616       if (dump_file && (dump_flags & TDF_DETAILS))
3617 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3618 
3619       *overlaps_a = conflict_fn_not_known ();
3620       *overlaps_b = conflict_fn_not_known ();
3621       *last_conflicts = chrec_dont_know;
3622       dependence_stats.num_ziv_unimplemented++;
3623       break;
3624     }
3625 
3626   if (dump_file && (dump_flags & TDF_DETAILS))
3627     fprintf (dump_file, ")\n");
3628 }
3629 
3630 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3631    and only if it fits to the int type.  If this is not the case, or the
3632    bound  on the number of iterations of LOOP could not be derived, returns
3633    chrec_dont_know.  */
3634 
3635 static tree
max_stmt_executions_tree(class loop * loop)3636 max_stmt_executions_tree (class loop *loop)
3637 {
3638   widest_int nit;
3639 
3640   if (!max_stmt_executions (loop, &nit))
3641     return chrec_dont_know;
3642 
3643   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3644     return chrec_dont_know;
3645 
3646   return wide_int_to_tree (unsigned_type_node, nit);
3647 }
3648 
3649 /* Determine whether the CHREC is always positive/negative.  If the expression
3650    cannot be statically analyzed, return false, otherwise set the answer into
3651    VALUE.  */
3652 
3653 static bool
chrec_is_positive(tree chrec,bool * value)3654 chrec_is_positive (tree chrec, bool *value)
3655 {
3656   bool value0, value1, value2;
3657   tree end_value, nb_iter;
3658 
3659   switch (TREE_CODE (chrec))
3660     {
3661     case POLYNOMIAL_CHREC:
3662       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3663 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3664 	return false;
3665 
3666       /* FIXME -- overflows.  */
3667       if (value0 == value1)
3668 	{
3669 	  *value = value0;
3670 	  return true;
3671 	}
3672 
3673       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3674 	 and the proof consists in showing that the sign never
3675 	 changes during the execution of the loop, from 0 to
3676 	 loop->nb_iterations.  */
3677       if (!evolution_function_is_affine_p (chrec))
3678 	return false;
3679 
3680       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3681       if (chrec_contains_undetermined (nb_iter))
3682 	return false;
3683 
3684 #if 0
3685       /* TODO -- If the test is after the exit, we may decrease the number of
3686 	 iterations by one.  */
3687       if (after_exit)
3688 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3689 #endif
3690 
3691       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3692 
3693       if (!chrec_is_positive (end_value, &value2))
3694 	return false;
3695 
3696       *value = value0;
3697       return value0 == value1;
3698 
3699     case INTEGER_CST:
3700       switch (tree_int_cst_sgn (chrec))
3701 	{
3702 	case -1:
3703 	  *value = false;
3704 	  break;
3705 	case 1:
3706 	  *value = true;
3707 	  break;
3708 	default:
3709 	  return false;
3710 	}
3711       return true;
3712 
3713     default:
3714       return false;
3715     }
3716 }
3717 
3718 
3719 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3720    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3721    *OVERLAPS_B are initialized to the functions that describe the
3722    relation between the elements accessed twice by CHREC_A and
3723    CHREC_B.  For k >= 0, the following property is verified:
3724 
3725    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3726 
3727 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3728 analyze_siv_subscript_cst_affine (tree chrec_a,
3729 				  tree chrec_b,
3730 				  conflict_function **overlaps_a,
3731 				  conflict_function **overlaps_b,
3732 				  tree *last_conflicts)
3733 {
3734   bool value0, value1, value2;
3735   tree type, difference, tmp;
3736 
3737   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3738   chrec_a = chrec_convert (type, chrec_a, NULL);
3739   chrec_b = chrec_convert (type, chrec_b, NULL);
3740   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3741 
3742   /* Special case overlap in the first iteration.  */
3743   if (integer_zerop (difference))
3744     {
3745       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3746       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3747       *last_conflicts = integer_one_node;
3748       return;
3749     }
3750 
3751   if (!chrec_is_positive (initial_condition (difference), &value0))
3752     {
3753       if (dump_file && (dump_flags & TDF_DETAILS))
3754 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3755 
3756       dependence_stats.num_siv_unimplemented++;
3757       *overlaps_a = conflict_fn_not_known ();
3758       *overlaps_b = conflict_fn_not_known ();
3759       *last_conflicts = chrec_dont_know;
3760       return;
3761     }
3762   else
3763     {
3764       if (value0 == false)
3765 	{
3766 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3767 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3768 	    {
3769 	      if (dump_file && (dump_flags & TDF_DETAILS))
3770 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3771 
3772 	      *overlaps_a = conflict_fn_not_known ();
3773 	      *overlaps_b = conflict_fn_not_known ();
3774 	      *last_conflicts = chrec_dont_know;
3775 	      dependence_stats.num_siv_unimplemented++;
3776 	      return;
3777 	    }
3778 	  else
3779 	    {
3780 	      if (value1 == true)
3781 		{
3782 		  /* Example:
3783 		     chrec_a = 12
3784 		     chrec_b = {10, +, 1}
3785 		  */
3786 
3787 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3788 		    {
3789 		      HOST_WIDE_INT numiter;
3790 		      class loop *loop = get_chrec_loop (chrec_b);
3791 
3792 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3793 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3794 					 fold_build1 (ABS_EXPR, type, difference),
3795 					 CHREC_RIGHT (chrec_b));
3796 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3797 		      *last_conflicts = integer_one_node;
3798 
3799 
3800 		      /* Perform weak-zero siv test to see if overlap is
3801 			 outside the loop bounds.  */
3802 		      numiter = max_stmt_executions_int (loop);
3803 
3804 		      if (numiter >= 0
3805 			  && compare_tree_int (tmp, numiter) > 0)
3806 			{
3807 			  free_conflict_function (*overlaps_a);
3808 			  free_conflict_function (*overlaps_b);
3809 			  *overlaps_a = conflict_fn_no_dependence ();
3810 			  *overlaps_b = conflict_fn_no_dependence ();
3811 			  *last_conflicts = integer_zero_node;
3812 			  dependence_stats.num_siv_independent++;
3813 			  return;
3814 			}
3815 		      dependence_stats.num_siv_dependent++;
3816 		      return;
3817 		    }
3818 
3819 		  /* When the step does not divide the difference, there are
3820 		     no overlaps.  */
3821 		  else
3822 		    {
3823 		      *overlaps_a = conflict_fn_no_dependence ();
3824 		      *overlaps_b = conflict_fn_no_dependence ();
3825 		      *last_conflicts = integer_zero_node;
3826 		      dependence_stats.num_siv_independent++;
3827 		      return;
3828 		    }
3829 		}
3830 
3831 	      else
3832 		{
3833 		  /* Example:
3834 		     chrec_a = 12
3835 		     chrec_b = {10, +, -1}
3836 
3837 		     In this case, chrec_a will not overlap with chrec_b.  */
3838 		  *overlaps_a = conflict_fn_no_dependence ();
3839 		  *overlaps_b = conflict_fn_no_dependence ();
3840 		  *last_conflicts = integer_zero_node;
3841 		  dependence_stats.num_siv_independent++;
3842 		  return;
3843 		}
3844 	    }
3845 	}
3846       else
3847 	{
3848 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3849 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3850 	    {
3851 	      if (dump_file && (dump_flags & TDF_DETAILS))
3852 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3853 
3854 	      *overlaps_a = conflict_fn_not_known ();
3855 	      *overlaps_b = conflict_fn_not_known ();
3856 	      *last_conflicts = chrec_dont_know;
3857 	      dependence_stats.num_siv_unimplemented++;
3858 	      return;
3859 	    }
3860 	  else
3861 	    {
3862 	      if (value2 == false)
3863 		{
3864 		  /* Example:
3865 		     chrec_a = 3
3866 		     chrec_b = {10, +, -1}
3867 		  */
3868 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3869 		    {
3870 		      HOST_WIDE_INT numiter;
3871 		      class loop *loop = get_chrec_loop (chrec_b);
3872 
3873 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3874 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3875 					 CHREC_RIGHT (chrec_b));
3876 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3877 		      *last_conflicts = integer_one_node;
3878 
3879 		      /* Perform weak-zero siv test to see if overlap is
3880 			 outside the loop bounds.  */
3881 		      numiter = max_stmt_executions_int (loop);
3882 
3883 		      if (numiter >= 0
3884 			  && compare_tree_int (tmp, numiter) > 0)
3885 			{
3886 			  free_conflict_function (*overlaps_a);
3887 			  free_conflict_function (*overlaps_b);
3888 			  *overlaps_a = conflict_fn_no_dependence ();
3889 			  *overlaps_b = conflict_fn_no_dependence ();
3890 			  *last_conflicts = integer_zero_node;
3891 			  dependence_stats.num_siv_independent++;
3892 			  return;
3893 			}
3894 		      dependence_stats.num_siv_dependent++;
3895 		      return;
3896 		    }
3897 
3898 		  /* When the step does not divide the difference, there
3899 		     are no overlaps.  */
3900 		  else
3901 		    {
3902 		      *overlaps_a = conflict_fn_no_dependence ();
3903 		      *overlaps_b = conflict_fn_no_dependence ();
3904 		      *last_conflicts = integer_zero_node;
3905 		      dependence_stats.num_siv_independent++;
3906 		      return;
3907 		    }
3908 		}
3909 	      else
3910 		{
3911 		  /* Example:
3912 		     chrec_a = 3
3913 		     chrec_b = {4, +, 1}
3914 
3915 		     In this case, chrec_a will not overlap with chrec_b.  */
3916 		  *overlaps_a = conflict_fn_no_dependence ();
3917 		  *overlaps_b = conflict_fn_no_dependence ();
3918 		  *last_conflicts = integer_zero_node;
3919 		  dependence_stats.num_siv_independent++;
3920 		  return;
3921 		}
3922 	    }
3923 	}
3924     }
3925 }
3926 
3927 /* Helper recursive function for initializing the matrix A.  Returns
3928    the initial value of CHREC.  */
3929 
3930 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3931 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3932 {
3933   gcc_assert (chrec);
3934 
3935   switch (TREE_CODE (chrec))
3936     {
3937     case POLYNOMIAL_CHREC:
3938       HOST_WIDE_INT chrec_right;
3939       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3940 	return chrec_dont_know;
3941       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3942       /* We want to be able to negate without overflow.  */
3943       if (chrec_right == HOST_WIDE_INT_MIN)
3944 	return chrec_dont_know;
3945       A[index][0] = mult * chrec_right;
3946       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3947 
3948     case PLUS_EXPR:
3949     case MULT_EXPR:
3950     case MINUS_EXPR:
3951       {
3952 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3953 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3954 
3955 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3956       }
3957 
3958     CASE_CONVERT:
3959       {
3960 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3961 	return chrec_convert (chrec_type (chrec), op, NULL);
3962       }
3963 
3964     case BIT_NOT_EXPR:
3965       {
3966 	/* Handle ~X as -1 - X.  */
3967 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3968 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3969 			      build_int_cst (TREE_TYPE (chrec), -1), op);
3970       }
3971 
3972     case INTEGER_CST:
3973       return chrec;
3974 
3975     default:
3976       gcc_unreachable ();
3977       return NULL_TREE;
3978     }
3979 }
3980 
3981 #define FLOOR_DIV(x,y) ((x) / (y))
3982 
3983 /* Solves the special case of the Diophantine equation:
3984    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3985 
3986    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3987    number of iterations that loops X and Y run.  The overlaps will be
3988    constructed as evolutions in dimension DIM.  */
3989 
3990 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)3991 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3992 					 HOST_WIDE_INT step_a,
3993 					 HOST_WIDE_INT step_b,
3994 					 affine_fn *overlaps_a,
3995 					 affine_fn *overlaps_b,
3996 					 tree *last_conflicts, int dim)
3997 {
3998   if (((step_a > 0 && step_b > 0)
3999        || (step_a < 0 && step_b < 0)))
4000     {
4001       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4002       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4003 
4004       gcd_steps_a_b = gcd (step_a, step_b);
4005       step_overlaps_a = step_b / gcd_steps_a_b;
4006       step_overlaps_b = step_a / gcd_steps_a_b;
4007 
4008       if (niter > 0)
4009 	{
4010 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
4011 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4012 	  last_conflict = tau2;
4013 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4014 	}
4015       else
4016 	*last_conflicts = chrec_dont_know;
4017 
4018       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4019 				      build_int_cst (NULL_TREE,
4020 						     step_overlaps_a));
4021       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4022 				      build_int_cst (NULL_TREE,
4023 						     step_overlaps_b));
4024     }
4025 
4026   else
4027     {
4028       *overlaps_a = affine_fn_cst (integer_zero_node);
4029       *overlaps_b = affine_fn_cst (integer_zero_node);
4030       *last_conflicts = integer_zero_node;
4031     }
4032 }
4033 
4034 /* Solves the special case of a Diophantine equation where CHREC_A is
4035    an affine bivariate function, and CHREC_B is an affine univariate
4036    function.  For example,
4037 
4038    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4039 
4040    has the following overlapping functions:
4041 
4042    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4043    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4044    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4045 
4046    FORNOW: This is a specialized implementation for a case occurring in
4047    a common benchmark.  Implement the general algorithm.  */
4048 
4049 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4050 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4051 				      conflict_function **overlaps_a,
4052 				      conflict_function **overlaps_b,
4053 				      tree *last_conflicts)
4054 {
4055   bool xz_p, yz_p, xyz_p;
4056   HOST_WIDE_INT step_x, step_y, step_z;
4057   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4058   affine_fn overlaps_a_xz, overlaps_b_xz;
4059   affine_fn overlaps_a_yz, overlaps_b_yz;
4060   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4061   affine_fn ova1, ova2, ovb;
4062   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4063 
4064   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4065   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4066   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4067 
4068   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4069   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4070   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4071 
4072   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4073     {
4074       if (dump_file && (dump_flags & TDF_DETAILS))
4075 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4076 
4077       *overlaps_a = conflict_fn_not_known ();
4078       *overlaps_b = conflict_fn_not_known ();
4079       *last_conflicts = chrec_dont_know;
4080       return;
4081     }
4082 
4083   niter = MIN (niter_x, niter_z);
4084   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4085 					   &overlaps_a_xz,
4086 					   &overlaps_b_xz,
4087 					   &last_conflicts_xz, 1);
4088   niter = MIN (niter_y, niter_z);
4089   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4090 					   &overlaps_a_yz,
4091 					   &overlaps_b_yz,
4092 					   &last_conflicts_yz, 2);
4093   niter = MIN (niter_x, niter_z);
4094   niter = MIN (niter_y, niter);
4095   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4096 					   &overlaps_a_xyz,
4097 					   &overlaps_b_xyz,
4098 					   &last_conflicts_xyz, 3);
4099 
4100   xz_p = !integer_zerop (last_conflicts_xz);
4101   yz_p = !integer_zerop (last_conflicts_yz);
4102   xyz_p = !integer_zerop (last_conflicts_xyz);
4103 
4104   if (xz_p || yz_p || xyz_p)
4105     {
4106       ova1 = affine_fn_cst (integer_zero_node);
4107       ova2 = affine_fn_cst (integer_zero_node);
4108       ovb = affine_fn_cst (integer_zero_node);
4109       if (xz_p)
4110 	{
4111 	  affine_fn t0 = ova1;
4112 	  affine_fn t2 = ovb;
4113 
4114 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4115 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
4116 	  affine_fn_free (t0);
4117 	  affine_fn_free (t2);
4118 	  *last_conflicts = last_conflicts_xz;
4119 	}
4120       if (yz_p)
4121 	{
4122 	  affine_fn t0 = ova2;
4123 	  affine_fn t2 = ovb;
4124 
4125 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4126 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
4127 	  affine_fn_free (t0);
4128 	  affine_fn_free (t2);
4129 	  *last_conflicts = last_conflicts_yz;
4130 	}
4131       if (xyz_p)
4132 	{
4133 	  affine_fn t0 = ova1;
4134 	  affine_fn t2 = ova2;
4135 	  affine_fn t4 = ovb;
4136 
4137 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4138 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4139 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4140 	  affine_fn_free (t0);
4141 	  affine_fn_free (t2);
4142 	  affine_fn_free (t4);
4143 	  *last_conflicts = last_conflicts_xyz;
4144 	}
4145       *overlaps_a = conflict_fn (2, ova1, ova2);
4146       *overlaps_b = conflict_fn (1, ovb);
4147     }
4148   else
4149     {
4150       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4151       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4152       *last_conflicts = integer_zero_node;
4153     }
4154 
4155   affine_fn_free (overlaps_a_xz);
4156   affine_fn_free (overlaps_b_xz);
4157   affine_fn_free (overlaps_a_yz);
4158   affine_fn_free (overlaps_b_yz);
4159   affine_fn_free (overlaps_a_xyz);
4160   affine_fn_free (overlaps_b_xyz);
4161 }
4162 
4163 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4164 
4165 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)4166 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4167 		    int size)
4168 {
4169   memcpy (vec2, vec1, size * sizeof (*vec1));
4170 }
4171 
4172 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4173 
4174 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)4175 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4176 		    int m, int n)
4177 {
4178   int i;
4179 
4180   for (i = 0; i < m; i++)
4181     lambda_vector_copy (mat1[i], mat2[i], n);
4182 }
4183 
4184 /* Store the N x N identity matrix in MAT.  */
4185 
4186 static void
lambda_matrix_id(lambda_matrix mat,int size)4187 lambda_matrix_id (lambda_matrix mat, int size)
4188 {
4189   int i, j;
4190 
4191   for (i = 0; i < size; i++)
4192     for (j = 0; j < size; j++)
4193       mat[i][j] = (i == j) ? 1 : 0;
4194 }
4195 
4196 /* Return the index of the first nonzero element of vector VEC1 between
4197    START and N.  We must have START <= N.
4198    Returns N if VEC1 is the zero vector.  */
4199 
4200 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)4201 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4202 {
4203   int j = start;
4204   while (j < n && vec1[j] == 0)
4205     j++;
4206   return j;
4207 }
4208 
4209 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4210    R2 = R2 + CONST1 * R1.  */
4211 
4212 static bool
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,lambda_int const1)4213 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4214 		       lambda_int const1)
4215 {
4216   int i;
4217 
4218   if (const1 == 0)
4219     return true;
4220 
4221   for (i = 0; i < n; i++)
4222     {
4223       bool ovf;
4224       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4225       if (ovf)
4226 	return false;
4227       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4228       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4229 	return false;
4230       mat[r2][i] = tem2;
4231     }
4232 
4233   return true;
4234 }
4235 
4236 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4237    and store the result in VEC2.  */
4238 
4239 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,lambda_int const1)4240 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4241 			  int size, lambda_int const1)
4242 {
4243   int i;
4244 
4245   if (const1 == 0)
4246     lambda_vector_clear (vec2, size);
4247   else
4248     for (i = 0; i < size; i++)
4249       vec2[i] = const1 * vec1[i];
4250 }
4251 
4252 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4253 
4254 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)4255 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4256 		      int size)
4257 {
4258   lambda_vector_mult_const (vec1, vec2, size, -1);
4259 }
4260 
4261 /* Negate row R1 of matrix MAT which has N columns.  */
4262 
4263 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)4264 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4265 {
4266   lambda_vector_negate (mat[r1], mat[r1], n);
4267 }
4268 
4269 /* Return true if two vectors are equal.  */
4270 
4271 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)4272 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4273 {
4274   int i;
4275   for (i = 0; i < size; i++)
4276     if (vec1[i] != vec2[i])
4277       return false;
4278   return true;
4279 }
4280 
4281 /* Given an M x N integer matrix A, this function determines an M x
4282    M unimodular matrix U, and an M x N echelon matrix S such that
4283    "U.A = S".  This decomposition is also known as "right Hermite".
4284 
4285    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4286    Restructuring Compilers" Utpal Banerjee.  */
4287 
4288 static bool
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)4289 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4290 			     lambda_matrix S, lambda_matrix U)
4291 {
4292   int i, j, i0 = 0;
4293 
4294   lambda_matrix_copy (A, S, m, n);
4295   lambda_matrix_id (U, m);
4296 
4297   for (j = 0; j < n; j++)
4298     {
4299       if (lambda_vector_first_nz (S[j], m, i0) < m)
4300 	{
4301 	  ++i0;
4302 	  for (i = m - 1; i >= i0; i--)
4303 	    {
4304 	      while (S[i][j] != 0)
4305 		{
4306 		  lambda_int factor, a, b;
4307 
4308 		  a = S[i-1][j];
4309 		  b = S[i][j];
4310 		  gcc_assert (a != HOST_WIDE_INT_MIN);
4311 		  factor = a / b;
4312 
4313 		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4314 		    return false;
4315 		  std::swap (S[i], S[i-1]);
4316 
4317 		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4318 		    return false;
4319 		  std::swap (U[i], U[i-1]);
4320 		}
4321 	    }
4322 	}
4323     }
4324 
4325   return true;
4326 }
4327 
4328 /* Determines the overlapping elements due to accesses CHREC_A and
4329    CHREC_B, that are affine functions.  This function cannot handle
4330    symbolic evolution functions, ie. when initial conditions are
4331    parameters, because it uses lambda matrices of integers.  */
4332 
4333 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4334 analyze_subscript_affine_affine (tree chrec_a,
4335 				 tree chrec_b,
4336 				 conflict_function **overlaps_a,
4337 				 conflict_function **overlaps_b,
4338 				 tree *last_conflicts)
4339 {
4340   unsigned nb_vars_a, nb_vars_b, dim;
4341   lambda_int gamma, gcd_alpha_beta;
4342   lambda_matrix A, U, S;
4343   struct obstack scratch_obstack;
4344 
4345   if (eq_evolutions_p (chrec_a, chrec_b))
4346     {
4347       /* The accessed index overlaps for each iteration in the
4348 	 loop.  */
4349       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4350       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4351       *last_conflicts = chrec_dont_know;
4352       return;
4353     }
4354   if (dump_file && (dump_flags & TDF_DETAILS))
4355     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4356 
4357   /* For determining the initial intersection, we have to solve a
4358      Diophantine equation.  This is the most time consuming part.
4359 
4360      For answering to the question: "Is there a dependence?" we have
4361      to prove that there exists a solution to the Diophantine
4362      equation, and that the solution is in the iteration domain,
4363      i.e. the solution is positive or zero, and that the solution
4364      happens before the upper bound loop.nb_iterations.  Otherwise
4365      there is no dependence.  This function outputs a description of
4366      the iterations that hold the intersections.  */
4367 
4368   nb_vars_a = nb_vars_in_chrec (chrec_a);
4369   nb_vars_b = nb_vars_in_chrec (chrec_b);
4370 
4371   gcc_obstack_init (&scratch_obstack);
4372 
4373   dim = nb_vars_a + nb_vars_b;
4374   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4375   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4376   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4377 
4378   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4379   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4380   if (init_a == chrec_dont_know
4381       || init_b == chrec_dont_know)
4382     {
4383       if (dump_file && (dump_flags & TDF_DETAILS))
4384 	fprintf (dump_file, "affine-affine test failed: "
4385 		 "representation issue.\n");
4386       *overlaps_a = conflict_fn_not_known ();
4387       *overlaps_b = conflict_fn_not_known ();
4388       *last_conflicts = chrec_dont_know;
4389       goto end_analyze_subs_aa;
4390     }
4391   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4392 
4393   /* Don't do all the hard work of solving the Diophantine equation
4394      when we already know the solution: for example,
4395      | {3, +, 1}_1
4396      | {3, +, 4}_2
4397      | gamma = 3 - 3 = 0.
4398      Then the first overlap occurs during the first iterations:
4399      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4400   */
4401   if (gamma == 0)
4402     {
4403       if (nb_vars_a == 1 && nb_vars_b == 1)
4404 	{
4405 	  HOST_WIDE_INT step_a, step_b;
4406 	  HOST_WIDE_INT niter, niter_a, niter_b;
4407 	  affine_fn ova, ovb;
4408 
4409 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4410 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4411 	  niter = MIN (niter_a, niter_b);
4412 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4413 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4414 
4415 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4416 						   &ova, &ovb,
4417 						   last_conflicts, 1);
4418 	  *overlaps_a = conflict_fn (1, ova);
4419 	  *overlaps_b = conflict_fn (1, ovb);
4420 	}
4421 
4422       else if (nb_vars_a == 2 && nb_vars_b == 1)
4423 	compute_overlap_steps_for_affine_1_2
4424 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4425 
4426       else if (nb_vars_a == 1 && nb_vars_b == 2)
4427 	compute_overlap_steps_for_affine_1_2
4428 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4429 
4430       else
4431 	{
4432 	  if (dump_file && (dump_flags & TDF_DETAILS))
4433 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4434 	  *overlaps_a = conflict_fn_not_known ();
4435 	  *overlaps_b = conflict_fn_not_known ();
4436 	  *last_conflicts = chrec_dont_know;
4437 	}
4438       goto end_analyze_subs_aa;
4439     }
4440 
4441   /* U.A = S */
4442   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4443     {
4444       *overlaps_a = conflict_fn_not_known ();
4445       *overlaps_b = conflict_fn_not_known ();
4446       *last_conflicts = chrec_dont_know;
4447       goto end_analyze_subs_aa;
4448     }
4449 
4450   if (S[0][0] < 0)
4451     {
4452       S[0][0] *= -1;
4453       lambda_matrix_row_negate (U, dim, 0);
4454     }
4455   gcd_alpha_beta = S[0][0];
4456 
4457   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4458      but that is a quite strange case.  Instead of ICEing, answer
4459      don't know.  */
4460   if (gcd_alpha_beta == 0)
4461     {
4462       *overlaps_a = conflict_fn_not_known ();
4463       *overlaps_b = conflict_fn_not_known ();
4464       *last_conflicts = chrec_dont_know;
4465       goto end_analyze_subs_aa;
4466     }
4467 
4468   /* The classic "gcd-test".  */
4469   if (!int_divides_p (gcd_alpha_beta, gamma))
4470     {
4471       /* The "gcd-test" has determined that there is no integer
4472 	 solution, i.e. there is no dependence.  */
4473       *overlaps_a = conflict_fn_no_dependence ();
4474       *overlaps_b = conflict_fn_no_dependence ();
4475       *last_conflicts = integer_zero_node;
4476     }
4477 
4478   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4479   else if (nb_vars_a == 1 && nb_vars_b == 1)
4480     {
4481       /* Both functions should have the same evolution sign.  */
4482       if (((A[0][0] > 0 && -A[1][0] > 0)
4483 	   || (A[0][0] < 0 && -A[1][0] < 0)))
4484 	{
4485 	  /* The solutions are given by:
4486 	     |
4487 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4488 	     |                           [u21 u22]    [y0]
4489 
4490 	     For a given integer t.  Using the following variables,
4491 
4492 	     | i0 = u11 * gamma / gcd_alpha_beta
4493 	     | j0 = u12 * gamma / gcd_alpha_beta
4494 	     | i1 = u21
4495 	     | j1 = u22
4496 
4497 	     the solutions are:
4498 
4499 	     | x0 = i0 + i1 * t,
4500 	     | y0 = j0 + j1 * t.  */
4501       	  HOST_WIDE_INT i0, j0, i1, j1;
4502 
4503 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4504 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4505 	  i1 = U[1][0];
4506 	  j1 = U[1][1];
4507 
4508 	  if ((i1 == 0 && i0 < 0)
4509 	      || (j1 == 0 && j0 < 0))
4510 	    {
4511 	      /* There is no solution.
4512 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4513 		 falls in here, but for the moment we don't look at the
4514 		 upper bound of the iteration domain.  */
4515 	      *overlaps_a = conflict_fn_no_dependence ();
4516 	      *overlaps_b = conflict_fn_no_dependence ();
4517 	      *last_conflicts = integer_zero_node;
4518 	      goto end_analyze_subs_aa;
4519 	    }
4520 
4521 	  if (i1 > 0 && j1 > 0)
4522 	    {
4523 	      HOST_WIDE_INT niter_a
4524 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4525 	      HOST_WIDE_INT niter_b
4526 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4527 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4528 
4529 	      /* (X0, Y0) is a solution of the Diophantine equation:
4530 		 "chrec_a (X0) = chrec_b (Y0)".  */
4531 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4532 					CEIL (-j0, j1));
4533 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4534 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4535 
4536 	      /* (X1, Y1) is the smallest positive solution of the eq
4537 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4538 		 first conflict occurs.  */
4539 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4540 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4541 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4542 
4543 	      if (niter > 0)
4544 		{
4545 		  /* If the overlap occurs outside of the bounds of the
4546 		     loop, there is no dependence.  */
4547 		  if (x1 >= niter_a || y1 >= niter_b)
4548 		    {
4549 		      *overlaps_a = conflict_fn_no_dependence ();
4550 		      *overlaps_b = conflict_fn_no_dependence ();
4551 		      *last_conflicts = integer_zero_node;
4552 		      goto end_analyze_subs_aa;
4553 		    }
4554 
4555 		  /* max stmt executions can get quite large, avoid
4556 		     overflows by using wide ints here.  */
4557 		  widest_int tau2
4558 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4559 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4560 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4561 		  if (wi::min_precision (last_conflict, SIGNED)
4562 		      <= TYPE_PRECISION (integer_type_node))
4563 		    *last_conflicts
4564 		       = build_int_cst (integer_type_node,
4565 					last_conflict.to_shwi ());
4566 		  else
4567 		    *last_conflicts = chrec_dont_know;
4568 		}
4569 	      else
4570 		*last_conflicts = chrec_dont_know;
4571 
4572 	      *overlaps_a
4573 		= conflict_fn (1,
4574 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4575 						 1,
4576 						 build_int_cst (NULL_TREE, i1)));
4577 	      *overlaps_b
4578 		= conflict_fn (1,
4579 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4580 						 1,
4581 						 build_int_cst (NULL_TREE, j1)));
4582 	    }
4583 	  else
4584 	    {
4585 	      /* FIXME: For the moment, the upper bound of the
4586 		 iteration domain for i and j is not checked.  */
4587 	      if (dump_file && (dump_flags & TDF_DETAILS))
4588 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4589 	      *overlaps_a = conflict_fn_not_known ();
4590 	      *overlaps_b = conflict_fn_not_known ();
4591 	      *last_conflicts = chrec_dont_know;
4592 	    }
4593 	}
4594       else
4595 	{
4596 	  if (dump_file && (dump_flags & TDF_DETAILS))
4597 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4598 	  *overlaps_a = conflict_fn_not_known ();
4599 	  *overlaps_b = conflict_fn_not_known ();
4600 	  *last_conflicts = chrec_dont_know;
4601 	}
4602     }
4603   else
4604     {
4605       if (dump_file && (dump_flags & TDF_DETAILS))
4606 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4607       *overlaps_a = conflict_fn_not_known ();
4608       *overlaps_b = conflict_fn_not_known ();
4609       *last_conflicts = chrec_dont_know;
4610     }
4611 
4612 end_analyze_subs_aa:
4613   obstack_free (&scratch_obstack, NULL);
4614   if (dump_file && (dump_flags & TDF_DETAILS))
4615     {
4616       fprintf (dump_file, "  (overlaps_a = ");
4617       dump_conflict_function (dump_file, *overlaps_a);
4618       fprintf (dump_file, ")\n  (overlaps_b = ");
4619       dump_conflict_function (dump_file, *overlaps_b);
4620       fprintf (dump_file, "))\n");
4621     }
4622 }
4623 
4624 /* Returns true when analyze_subscript_affine_affine can be used for
4625    determining the dependence relation between chrec_a and chrec_b,
4626    that contain symbols.  This function modifies chrec_a and chrec_b
4627    such that the analysis result is the same, and such that they don't
4628    contain symbols, and then can safely be passed to the analyzer.
4629 
4630    Example: The analysis of the following tuples of evolutions produce
4631    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4632    vs. {0, +, 1}_1
4633 
4634    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4635    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4636 */
4637 
4638 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)4639 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4640 {
4641   tree diff, type, left_a, left_b, right_b;
4642 
4643   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4644       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4645     /* FIXME: For the moment not handled.  Might be refined later.  */
4646     return false;
4647 
4648   type = chrec_type (*chrec_a);
4649   left_a = CHREC_LEFT (*chrec_a);
4650   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4651   diff = chrec_fold_minus (type, left_a, left_b);
4652 
4653   if (!evolution_function_is_constant_p (diff))
4654     return false;
4655 
4656   if (dump_file && (dump_flags & TDF_DETAILS))
4657     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4658 
4659   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4660 				     diff, CHREC_RIGHT (*chrec_a));
4661   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4662   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4663 				     build_int_cst (type, 0),
4664 				     right_b);
4665   return true;
4666 }
4667 
4668 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4669    *OVERLAPS_B are initialized to the functions that describe the
4670    relation between the elements accessed twice by CHREC_A and
4671    CHREC_B.  For k >= 0, the following property is verified:
4672 
4673    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4674 
4675 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)4676 analyze_siv_subscript (tree chrec_a,
4677 		       tree chrec_b,
4678 		       conflict_function **overlaps_a,
4679 		       conflict_function **overlaps_b,
4680 		       tree *last_conflicts,
4681 		       int loop_nest_num)
4682 {
4683   dependence_stats.num_siv++;
4684 
4685   if (dump_file && (dump_flags & TDF_DETAILS))
4686     fprintf (dump_file, "(analyze_siv_subscript \n");
4687 
4688   if (evolution_function_is_constant_p (chrec_a)
4689       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4690     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4691 				      overlaps_a, overlaps_b, last_conflicts);
4692 
4693   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4694 	   && evolution_function_is_constant_p (chrec_b))
4695     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4696 				      overlaps_b, overlaps_a, last_conflicts);
4697 
4698   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4699 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4700     {
4701       if (!chrec_contains_symbols (chrec_a)
4702 	  && !chrec_contains_symbols (chrec_b))
4703 	{
4704 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4705 					   overlaps_a, overlaps_b,
4706 					   last_conflicts);
4707 
4708 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4709 	      || CF_NOT_KNOWN_P (*overlaps_b))
4710 	    dependence_stats.num_siv_unimplemented++;
4711 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4712 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4713 	    dependence_stats.num_siv_independent++;
4714 	  else
4715 	    dependence_stats.num_siv_dependent++;
4716 	}
4717       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4718 							&chrec_b))
4719 	{
4720 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4721 					   overlaps_a, overlaps_b,
4722 					   last_conflicts);
4723 
4724 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4725 	      || CF_NOT_KNOWN_P (*overlaps_b))
4726 	    dependence_stats.num_siv_unimplemented++;
4727 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4728 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4729 	    dependence_stats.num_siv_independent++;
4730 	  else
4731 	    dependence_stats.num_siv_dependent++;
4732 	}
4733       else
4734 	goto siv_subscript_dontknow;
4735     }
4736 
4737   else
4738     {
4739     siv_subscript_dontknow:;
4740       if (dump_file && (dump_flags & TDF_DETAILS))
4741 	fprintf (dump_file, "  siv test failed: unimplemented");
4742       *overlaps_a = conflict_fn_not_known ();
4743       *overlaps_b = conflict_fn_not_known ();
4744       *last_conflicts = chrec_dont_know;
4745       dependence_stats.num_siv_unimplemented++;
4746     }
4747 
4748   if (dump_file && (dump_flags & TDF_DETAILS))
4749     fprintf (dump_file, ")\n");
4750 }
4751 
4752 /* Returns false if we can prove that the greatest common divisor of the steps
4753    of CHREC does not divide CST, false otherwise.  */
4754 
4755 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)4756 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4757 {
4758   HOST_WIDE_INT cd = 0, val;
4759   tree step;
4760 
4761   if (!tree_fits_shwi_p (cst))
4762     return true;
4763   val = tree_to_shwi (cst);
4764 
4765   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4766     {
4767       step = CHREC_RIGHT (chrec);
4768       if (!tree_fits_shwi_p (step))
4769 	return true;
4770       cd = gcd (cd, tree_to_shwi (step));
4771       chrec = CHREC_LEFT (chrec);
4772     }
4773 
4774   return val % cd == 0;
4775 }
4776 
4777 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4778    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4779    functions that describe the relation between the elements accessed
4780    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4781    is verified:
4782 
4783    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4784 
4785 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,class loop * loop_nest)4786 analyze_miv_subscript (tree chrec_a,
4787 		       tree chrec_b,
4788 		       conflict_function **overlaps_a,
4789 		       conflict_function **overlaps_b,
4790 		       tree *last_conflicts,
4791 		       class loop *loop_nest)
4792 {
4793   tree type, difference;
4794 
4795   dependence_stats.num_miv++;
4796   if (dump_file && (dump_flags & TDF_DETAILS))
4797     fprintf (dump_file, "(analyze_miv_subscript \n");
4798 
4799   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4800   chrec_a = chrec_convert (type, chrec_a, NULL);
4801   chrec_b = chrec_convert (type, chrec_b, NULL);
4802   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4803 
4804   if (eq_evolutions_p (chrec_a, chrec_b))
4805     {
4806       /* Access functions are the same: all the elements are accessed
4807 	 in the same order.  */
4808       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4809       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4810       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4811       dependence_stats.num_miv_dependent++;
4812     }
4813 
4814   else if (evolution_function_is_constant_p (difference)
4815 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4816 							   loop_nest->num)
4817 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4818     {
4819       /* testsuite/.../ssa-chrec-33.c
4820 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4821 
4822 	 The difference is 1, and all the evolution steps are multiples
4823 	 of 2, consequently there are no overlapping elements.  */
4824       *overlaps_a = conflict_fn_no_dependence ();
4825       *overlaps_b = conflict_fn_no_dependence ();
4826       *last_conflicts = integer_zero_node;
4827       dependence_stats.num_miv_independent++;
4828     }
4829 
4830   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4831 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4832 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4833 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4834     {
4835       /* testsuite/.../ssa-chrec-35.c
4836 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4837 	 the overlapping elements are respectively located at iterations:
4838 	 {0, +, 1}_x and {0, +, 1}_x,
4839 	 in other words, we have the equality:
4840 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4841 
4842 	 Other examples:
4843 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4844 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4845 
4846 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4847 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4848       */
4849       analyze_subscript_affine_affine (chrec_a, chrec_b,
4850 				       overlaps_a, overlaps_b, last_conflicts);
4851 
4852       if (CF_NOT_KNOWN_P (*overlaps_a)
4853  	  || CF_NOT_KNOWN_P (*overlaps_b))
4854 	dependence_stats.num_miv_unimplemented++;
4855       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4856 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4857 	dependence_stats.num_miv_independent++;
4858       else
4859 	dependence_stats.num_miv_dependent++;
4860     }
4861 
4862   else
4863     {
4864       /* When the analysis is too difficult, answer "don't know".  */
4865       if (dump_file && (dump_flags & TDF_DETAILS))
4866 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4867 
4868       *overlaps_a = conflict_fn_not_known ();
4869       *overlaps_b = conflict_fn_not_known ();
4870       *last_conflicts = chrec_dont_know;
4871       dependence_stats.num_miv_unimplemented++;
4872     }
4873 
4874   if (dump_file && (dump_flags & TDF_DETAILS))
4875     fprintf (dump_file, ")\n");
4876 }
4877 
4878 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4879    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4880    OVERLAP_ITERATIONS_B are initialized with two functions that
4881    describe the iterations that contain conflicting elements.
4882 
4883    Remark: For an integer k >= 0, the following equality is true:
4884 
4885    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4886 */
4887 
4888 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,class loop * loop_nest)4889 analyze_overlapping_iterations (tree chrec_a,
4890 				tree chrec_b,
4891 				conflict_function **overlap_iterations_a,
4892 				conflict_function **overlap_iterations_b,
4893 				tree *last_conflicts, class loop *loop_nest)
4894 {
4895   unsigned int lnn = loop_nest->num;
4896 
4897   dependence_stats.num_subscript_tests++;
4898 
4899   if (dump_file && (dump_flags & TDF_DETAILS))
4900     {
4901       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4902       fprintf (dump_file, "  (chrec_a = ");
4903       print_generic_expr (dump_file, chrec_a);
4904       fprintf (dump_file, ")\n  (chrec_b = ");
4905       print_generic_expr (dump_file, chrec_b);
4906       fprintf (dump_file, ")\n");
4907     }
4908 
4909   if (chrec_a == NULL_TREE
4910       || chrec_b == NULL_TREE
4911       || chrec_contains_undetermined (chrec_a)
4912       || chrec_contains_undetermined (chrec_b))
4913     {
4914       dependence_stats.num_subscript_undetermined++;
4915 
4916       *overlap_iterations_a = conflict_fn_not_known ();
4917       *overlap_iterations_b = conflict_fn_not_known ();
4918     }
4919 
4920   /* If they are the same chrec, and are affine, they overlap
4921      on every iteration.  */
4922   else if (eq_evolutions_p (chrec_a, chrec_b)
4923 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4924 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4925     {
4926       dependence_stats.num_same_subscript_function++;
4927       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4928       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4929       *last_conflicts = chrec_dont_know;
4930     }
4931 
4932   /* If they aren't the same, and aren't affine, we can't do anything
4933      yet.  */
4934   else if ((chrec_contains_symbols (chrec_a)
4935 	    || chrec_contains_symbols (chrec_b))
4936 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4937 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4938     {
4939       dependence_stats.num_subscript_undetermined++;
4940       *overlap_iterations_a = conflict_fn_not_known ();
4941       *overlap_iterations_b = conflict_fn_not_known ();
4942     }
4943 
4944   else if (ziv_subscript_p (chrec_a, chrec_b))
4945     analyze_ziv_subscript (chrec_a, chrec_b,
4946 			   overlap_iterations_a, overlap_iterations_b,
4947 			   last_conflicts);
4948 
4949   else if (siv_subscript_p (chrec_a, chrec_b))
4950     analyze_siv_subscript (chrec_a, chrec_b,
4951 			   overlap_iterations_a, overlap_iterations_b,
4952 			   last_conflicts, lnn);
4953 
4954   else
4955     analyze_miv_subscript (chrec_a, chrec_b,
4956 			   overlap_iterations_a, overlap_iterations_b,
4957 			   last_conflicts, loop_nest);
4958 
4959   if (dump_file && (dump_flags & TDF_DETAILS))
4960     {
4961       fprintf (dump_file, "  (overlap_iterations_a = ");
4962       dump_conflict_function (dump_file, *overlap_iterations_a);
4963       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4964       dump_conflict_function (dump_file, *overlap_iterations_b);
4965       fprintf (dump_file, "))\n");
4966     }
4967 }
4968 
4969 /* Helper function for uniquely inserting distance vectors.  */
4970 
4971 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)4972 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4973 {
4974   unsigned i;
4975   lambda_vector v;
4976 
4977   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4978     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4979       return;
4980 
4981   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4982 }
4983 
4984 /* Helper function for uniquely inserting direction vectors.  */
4985 
4986 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)4987 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4988 {
4989   unsigned i;
4990   lambda_vector v;
4991 
4992   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4993     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4994       return;
4995 
4996   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4997 }
4998 
4999 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5000    haven't yet determined a distance for this outer loop, push a new
5001    distance vector composed of the previous distance, and a distance
5002    of 1 for this outer loop.  Example:
5003 
5004    | loop_1
5005    |   loop_2
5006    |     A[10]
5007    |   endloop_2
5008    | endloop_1
5009 
5010    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5011    save (0, 1), then we have to save (1, 0).  */
5012 
5013 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)5014 add_outer_distances (struct data_dependence_relation *ddr,
5015 		     lambda_vector dist_v, int index)
5016 {
5017   /* For each outer loop where init_v is not set, the accesses are
5018      in dependence of distance 1 in the loop.  */
5019   while (--index >= 0)
5020     {
5021       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5022       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5023       save_v[index] = 1;
5024       save_dist_v (ddr, save_v);
5025     }
5026 }
5027 
5028 /* Return false when fail to represent the data dependence as a
5029    distance vector.  A_INDEX is the index of the first reference
5030    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5031    second reference.  INIT_B is set to true when a component has been
5032    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5033    the index in DIST_V that carries the dependence.  */
5034 
5035 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)5036 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5037 			     unsigned int a_index, unsigned int b_index,
5038 			     lambda_vector dist_v, bool *init_b,
5039 			     int *index_carry)
5040 {
5041   unsigned i;
5042   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5043   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5044 
5045   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5046     {
5047       tree access_fn_a, access_fn_b;
5048       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5049 
5050       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5051 	{
5052 	  non_affine_dependence_relation (ddr);
5053 	  return false;
5054 	}
5055 
5056       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5057       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5058 
5059       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5060 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5061 	{
5062 	  HOST_WIDE_INT dist;
5063 	  int index;
5064 	  int var_a = CHREC_VARIABLE (access_fn_a);
5065 	  int var_b = CHREC_VARIABLE (access_fn_b);
5066 
5067 	  if (var_a != var_b
5068 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5069 	    {
5070 	      non_affine_dependence_relation (ddr);
5071 	      return false;
5072 	    }
5073 
5074 	  /* When data references are collected in a loop while data
5075 	     dependences are analyzed in loop nest nested in the loop, we
5076 	     would have more number of access functions than number of
5077 	     loops.  Skip access functions of loops not in the loop nest.
5078 
5079 	     See PR89725 for more information.  */
5080 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5081 	    continue;
5082 
5083 	  dist = int_cst_value (SUB_DISTANCE (subscript));
5084 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5085 	  *index_carry = MIN (index, *index_carry);
5086 
5087 	  /* This is the subscript coupling test.  If we have already
5088 	     recorded a distance for this loop (a distance coming from
5089 	     another subscript), it should be the same.  For example,
5090 	     in the following code, there is no dependence:
5091 
5092 	     | loop i = 0, N, 1
5093 	     |   T[i+1][i] = ...
5094 	     |   ... = T[i][i]
5095 	     | endloop
5096 	  */
5097 	  if (init_v[index] != 0 && dist_v[index] != dist)
5098 	    {
5099 	      finalize_ddr_dependent (ddr, chrec_known);
5100 	      return false;
5101 	    }
5102 
5103 	  dist_v[index] = dist;
5104 	  init_v[index] = 1;
5105 	  *init_b = true;
5106 	}
5107       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5108 	{
5109 	  /* This can be for example an affine vs. constant dependence
5110 	     (T[i] vs. T[3]) that is not an affine dependence and is
5111 	     not representable as a distance vector.  */
5112 	  non_affine_dependence_relation (ddr);
5113 	  return false;
5114 	}
5115       else
5116 	*init_b = true;
5117     }
5118 
5119   return true;
5120 }
5121 
5122 /* Return true when the DDR contains only invariant access functions wrto. loop
5123    number LNUM.  */
5124 
5125 static bool
invariant_access_functions(const struct data_dependence_relation * ddr,int lnum)5126 invariant_access_functions (const struct data_dependence_relation *ddr,
5127 			    int lnum)
5128 {
5129   unsigned i;
5130   subscript *sub;
5131 
5132   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5133     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5134 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5135       return false;
5136 
5137   return true;
5138 }
5139 
5140 /* Helper function for the case where DDR_A and DDR_B are the same
5141    multivariate access function with a constant step.  For an example
5142    see pr34635-1.c.  */
5143 
5144 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)5145 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5146 {
5147   int x_1, x_2;
5148   tree c_1 = CHREC_LEFT (c_2);
5149   tree c_0 = CHREC_LEFT (c_1);
5150   lambda_vector dist_v;
5151   HOST_WIDE_INT v1, v2, cd;
5152 
5153   /* Polynomials with more than 2 variables are not handled yet.  When
5154      the evolution steps are parameters, it is not possible to
5155      represent the dependence using classical distance vectors.  */
5156   if (TREE_CODE (c_0) != INTEGER_CST
5157       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5158       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5159     {
5160       DDR_AFFINE_P (ddr) = false;
5161       return;
5162     }
5163 
5164   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5165   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5166 
5167   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5168   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5169   v1 = int_cst_value (CHREC_RIGHT (c_1));
5170   v2 = int_cst_value (CHREC_RIGHT (c_2));
5171   cd = gcd (v1, v2);
5172   v1 /= cd;
5173   v2 /= cd;
5174 
5175   if (v2 < 0)
5176     {
5177       v2 = -v2;
5178       v1 = -v1;
5179     }
5180 
5181   dist_v[x_1] = v2;
5182   dist_v[x_2] = -v1;
5183   save_dist_v (ddr, dist_v);
5184 
5185   add_outer_distances (ddr, dist_v, x_1);
5186 }
5187 
5188 /* Helper function for the case where DDR_A and DDR_B are the same
5189    access functions.  */
5190 
5191 static void
add_other_self_distances(struct data_dependence_relation * ddr)5192 add_other_self_distances (struct data_dependence_relation *ddr)
5193 {
5194   lambda_vector dist_v;
5195   unsigned i;
5196   int index_carry = DDR_NB_LOOPS (ddr);
5197   subscript *sub;
5198   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5199 
5200   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5201     {
5202       tree access_fun = SUB_ACCESS_FN (sub, 0);
5203 
5204       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5205 	{
5206 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
5207 	    {
5208 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5209 		{
5210 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5211 		  return;
5212 		}
5213 
5214 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5215 
5216 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5217 		add_multivariate_self_dist (ddr, access_fun);
5218 	      else
5219 		/* The evolution step is not constant: it varies in
5220 		   the outer loop, so this cannot be represented by a
5221 		   distance vector.  For example in pr34635.c the
5222 		   evolution is {0, +, {0, +, 4}_1}_2.  */
5223 		DDR_AFFINE_P (ddr) = false;
5224 
5225 	      return;
5226 	    }
5227 
5228 	  /* When data references are collected in a loop while data
5229 	     dependences are analyzed in loop nest nested in the loop, we
5230 	     would have more number of access functions than number of
5231 	     loops.  Skip access functions of loops not in the loop nest.
5232 
5233 	     See PR89725 for more information.  */
5234 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5235 				  loop))
5236 	    continue;
5237 
5238 	  index_carry = MIN (index_carry,
5239 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
5240 						 DDR_LOOP_NEST (ddr)));
5241 	}
5242     }
5243 
5244   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5245   add_outer_distances (ddr, dist_v, index_carry);
5246 }
5247 
5248 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)5249 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5250 {
5251   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5252 
5253   dist_v[0] = 1;
5254   save_dist_v (ddr, dist_v);
5255 }
5256 
5257 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5258    is the case for example when access functions are the same and
5259    equal to a constant, as in:
5260 
5261    | loop_1
5262    |   A[3] = ...
5263    |   ... = A[3]
5264    | endloop_1
5265 
5266    in which case the distance vectors are (0) and (1).  */
5267 
5268 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)5269 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5270 {
5271   unsigned i, j;
5272 
5273   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5274     {
5275       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5276       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5277       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5278 
5279       for (j = 0; j < ca->n; j++)
5280 	if (affine_function_zero_p (ca->fns[j]))
5281 	  {
5282 	    insert_innermost_unit_dist_vector (ddr);
5283 	    return;
5284 	  }
5285 
5286       for (j = 0; j < cb->n; j++)
5287 	if (affine_function_zero_p (cb->fns[j]))
5288 	  {
5289 	    insert_innermost_unit_dist_vector (ddr);
5290 	    return;
5291 	  }
5292     }
5293 }
5294 
5295 /* Return true when the DDR contains two data references that have the
5296    same access functions.  */
5297 
5298 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)5299 same_access_functions (const struct data_dependence_relation *ddr)
5300 {
5301   unsigned i;
5302   subscript *sub;
5303 
5304   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5305     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5306 			  SUB_ACCESS_FN (sub, 1)))
5307       return false;
5308 
5309   return true;
5310 }
5311 
5312 /* Compute the classic per loop distance vector.  DDR is the data
5313    dependence relation to build a vector from.  Return false when fail
5314    to represent the data dependence as a distance vector.  */
5315 
5316 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,class loop * loop_nest)5317 build_classic_dist_vector (struct data_dependence_relation *ddr,
5318 			   class loop *loop_nest)
5319 {
5320   bool init_b = false;
5321   int index_carry = DDR_NB_LOOPS (ddr);
5322   lambda_vector dist_v;
5323 
5324   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5325     return false;
5326 
5327   if (same_access_functions (ddr))
5328     {
5329       /* Save the 0 vector.  */
5330       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5331       save_dist_v (ddr, dist_v);
5332 
5333       if (invariant_access_functions (ddr, loop_nest->num))
5334 	add_distance_for_zero_overlaps (ddr);
5335 
5336       if (DDR_NB_LOOPS (ddr) > 1)
5337 	add_other_self_distances (ddr);
5338 
5339       return true;
5340     }
5341 
5342   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5343   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5344     return false;
5345 
5346   /* Save the distance vector if we initialized one.  */
5347   if (init_b)
5348     {
5349       /* Verify a basic constraint: classic distance vectors should
5350 	 always be lexicographically positive.
5351 
5352 	 Data references are collected in the order of execution of
5353 	 the program, thus for the following loop
5354 
5355 	 | for (i = 1; i < 100; i++)
5356 	 |   for (j = 1; j < 100; j++)
5357 	 |     {
5358 	 |       t = T[j+1][i-1];  // A
5359 	 |       T[j][i] = t + 2;  // B
5360 	 |     }
5361 
5362 	 references are collected following the direction of the wind:
5363 	 A then B.  The data dependence tests are performed also
5364 	 following this order, such that we're looking at the distance
5365 	 separating the elements accessed by A from the elements later
5366 	 accessed by B.  But in this example, the distance returned by
5367 	 test_dep (A, B) is lexicographically negative (-1, 1), that
5368 	 means that the access A occurs later than B with respect to
5369 	 the outer loop, ie. we're actually looking upwind.  In this
5370 	 case we solve test_dep (B, A) looking downwind to the
5371 	 lexicographically positive solution, that returns the
5372 	 distance vector (1, -1).  */
5373       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5374 	{
5375 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5376 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5377 	    return false;
5378 	  compute_subscript_distance (ddr);
5379 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5380 					    &index_carry))
5381 	    return false;
5382 	  save_dist_v (ddr, save_v);
5383 	  DDR_REVERSED_P (ddr) = true;
5384 
5385 	  /* In this case there is a dependence forward for all the
5386 	     outer loops:
5387 
5388 	     | for (k = 1; k < 100; k++)
5389 	     |  for (i = 1; i < 100; i++)
5390 	     |   for (j = 1; j < 100; j++)
5391 	     |     {
5392 	     |       t = T[j+1][i-1];  // A
5393 	     |       T[j][i] = t + 2;  // B
5394 	     |     }
5395 
5396 	     the vectors are:
5397 	     (0,  1, -1)
5398 	     (1,  1, -1)
5399 	     (1, -1,  1)
5400 	  */
5401 	  if (DDR_NB_LOOPS (ddr) > 1)
5402 	    {
5403  	      add_outer_distances (ddr, save_v, index_carry);
5404 	      add_outer_distances (ddr, dist_v, index_carry);
5405 	    }
5406 	}
5407       else
5408 	{
5409 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5410 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5411 
5412 	  if (DDR_NB_LOOPS (ddr) > 1)
5413 	    {
5414 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5415 
5416 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5417 		return false;
5418 	      compute_subscript_distance (ddr);
5419 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5420 						&index_carry))
5421 		return false;
5422 
5423 	      save_dist_v (ddr, save_v);
5424 	      add_outer_distances (ddr, dist_v, index_carry);
5425 	      add_outer_distances (ddr, opposite_v, index_carry);
5426 	    }
5427 	  else
5428 	    save_dist_v (ddr, save_v);
5429 	}
5430     }
5431   else
5432     {
5433       /* There is a distance of 1 on all the outer loops: Example:
5434 	 there is a dependence of distance 1 on loop_1 for the array A.
5435 
5436 	 | loop_1
5437 	 |   A[5] = ...
5438 	 | endloop
5439       */
5440       add_outer_distances (ddr, dist_v,
5441 			   lambda_vector_first_nz (dist_v,
5442 						   DDR_NB_LOOPS (ddr), 0));
5443     }
5444 
5445   if (dump_file && (dump_flags & TDF_DETAILS))
5446     {
5447       unsigned i;
5448 
5449       fprintf (dump_file, "(build_classic_dist_vector\n");
5450       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5451 	{
5452 	  fprintf (dump_file, "  dist_vector = (");
5453 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5454 			       DDR_NB_LOOPS (ddr));
5455 	  fprintf (dump_file, "  )\n");
5456 	}
5457       fprintf (dump_file, ")\n");
5458     }
5459 
5460   return true;
5461 }
5462 
5463 /* Return the direction for a given distance.
5464    FIXME: Computing dir this way is suboptimal, since dir can catch
5465    cases that dist is unable to represent.  */
5466 
5467 static inline enum data_dependence_direction
dir_from_dist(int dist)5468 dir_from_dist (int dist)
5469 {
5470   if (dist > 0)
5471     return dir_positive;
5472   else if (dist < 0)
5473     return dir_negative;
5474   else
5475     return dir_equal;
5476 }
5477 
5478 /* Compute the classic per loop direction vector.  DDR is the data
5479    dependence relation to build a vector from.  */
5480 
5481 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)5482 build_classic_dir_vector (struct data_dependence_relation *ddr)
5483 {
5484   unsigned i, j;
5485   lambda_vector dist_v;
5486 
5487   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5488     {
5489       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5490 
5491       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5492 	dir_v[j] = dir_from_dist (dist_v[j]);
5493 
5494       save_dir_v (ddr, dir_v);
5495     }
5496 }
5497 
5498 /* Helper function.  Returns true when there is a dependence between the
5499    data references.  A_INDEX is the index of the first reference (0 for
5500    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5501 
5502 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,class loop * loop_nest)5503 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5504 			       unsigned int a_index, unsigned int b_index,
5505 			       class loop *loop_nest)
5506 {
5507   unsigned int i;
5508   tree last_conflicts;
5509   struct subscript *subscript;
5510   tree res = NULL_TREE;
5511 
5512   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5513     {
5514       conflict_function *overlaps_a, *overlaps_b;
5515 
5516       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5517 				      SUB_ACCESS_FN (subscript, b_index),
5518 				      &overlaps_a, &overlaps_b,
5519 				      &last_conflicts, loop_nest);
5520 
5521       if (SUB_CONFLICTS_IN_A (subscript))
5522 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5523       if (SUB_CONFLICTS_IN_B (subscript))
5524 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5525 
5526       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5527       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5528       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5529 
5530       /* If there is any undetermined conflict function we have to
5531          give a conservative answer in case we cannot prove that
5532 	 no dependence exists when analyzing another subscript.  */
5533       if (CF_NOT_KNOWN_P (overlaps_a)
5534  	  || CF_NOT_KNOWN_P (overlaps_b))
5535  	{
5536 	  res = chrec_dont_know;
5537 	  continue;
5538  	}
5539 
5540       /* When there is a subscript with no dependence we can stop.  */
5541       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5542  	       || CF_NO_DEPENDENCE_P (overlaps_b))
5543  	{
5544 	  res = chrec_known;
5545 	  break;
5546  	}
5547     }
5548 
5549   if (res == NULL_TREE)
5550     return true;
5551 
5552   if (res == chrec_known)
5553     dependence_stats.num_dependence_independent++;
5554   else
5555     dependence_stats.num_dependence_undetermined++;
5556   finalize_ddr_dependent (ddr, res);
5557   return false;
5558 }
5559 
5560 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5561 
5562 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,class loop * loop_nest)5563 subscript_dependence_tester (struct data_dependence_relation *ddr,
5564 			     class loop *loop_nest)
5565 {
5566   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5567     dependence_stats.num_dependence_dependent++;
5568 
5569   compute_subscript_distance (ddr);
5570   if (build_classic_dist_vector (ddr, loop_nest))
5571     build_classic_dir_vector (ddr);
5572 }
5573 
5574 /* Returns true when all the access functions of A are affine or
5575    constant with respect to LOOP_NEST.  */
5576 
5577 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const class loop * loop_nest)5578 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5579 					   const class loop *loop_nest)
5580 {
5581   unsigned int i;
5582   vec<tree> fns = DR_ACCESS_FNS (a);
5583   tree t;
5584 
5585   FOR_EACH_VEC_ELT (fns, i, t)
5586     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5587 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5588       return false;
5589 
5590   return true;
5591 }
5592 
5593 /* This computes the affine dependence relation between A and B with
5594    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5595    independence between two accesses, while CHREC_DONT_KNOW is used
5596    for representing the unknown relation.
5597 
5598    Note that it is possible to stop the computation of the dependence
5599    relation the first time we detect a CHREC_KNOWN element for a given
5600    subscript.  */
5601 
5602 void
compute_affine_dependence(struct data_dependence_relation * ddr,class loop * loop_nest)5603 compute_affine_dependence (struct data_dependence_relation *ddr,
5604 			   class loop *loop_nest)
5605 {
5606   struct data_reference *dra = DDR_A (ddr);
5607   struct data_reference *drb = DDR_B (ddr);
5608 
5609   if (dump_file && (dump_flags & TDF_DETAILS))
5610     {
5611       fprintf (dump_file, "(compute_affine_dependence\n");
5612       fprintf (dump_file, "  ref_a: ");
5613       print_generic_expr (dump_file, DR_REF (dra));
5614       fprintf (dump_file, ", stmt_a: ");
5615       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5616       fprintf (dump_file, "  ref_b: ");
5617       print_generic_expr (dump_file, DR_REF (drb));
5618       fprintf (dump_file, ", stmt_b: ");
5619       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5620     }
5621 
5622   /* Analyze only when the dependence relation is not yet known.  */
5623   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5624     {
5625       dependence_stats.num_dependence_tests++;
5626 
5627       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5628 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5629 	subscript_dependence_tester (ddr, loop_nest);
5630 
5631       /* As a last case, if the dependence cannot be determined, or if
5632 	 the dependence is considered too difficult to determine, answer
5633 	 "don't know".  */
5634       else
5635 	{
5636 	  dependence_stats.num_dependence_undetermined++;
5637 
5638 	  if (dump_file && (dump_flags & TDF_DETAILS))
5639 	    {
5640 	      fprintf (dump_file, "Data ref a:\n");
5641 	      dump_data_reference (dump_file, dra);
5642 	      fprintf (dump_file, "Data ref b:\n");
5643 	      dump_data_reference (dump_file, drb);
5644 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5645 	    }
5646 	  finalize_ddr_dependent (ddr, chrec_dont_know);
5647 	}
5648     }
5649 
5650   if (dump_file && (dump_flags & TDF_DETAILS))
5651     {
5652       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5653 	fprintf (dump_file, ") -> no dependence\n");
5654       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5655 	fprintf (dump_file, ") -> dependence analysis failed\n");
5656       else
5657 	fprintf (dump_file, ")\n");
5658     }
5659 }
5660 
5661 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5662    the data references in DATAREFS, in the LOOP_NEST.  When
5663    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5664    relations.  Return true when successful, i.e. data references number
5665    is small enough to be handled.  */
5666 
5667 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)5668 compute_all_dependences (vec<data_reference_p> datarefs,
5669 			 vec<ddr_p> *dependence_relations,
5670 			 vec<loop_p> loop_nest,
5671 			 bool compute_self_and_rr)
5672 {
5673   struct data_dependence_relation *ddr;
5674   struct data_reference *a, *b;
5675   unsigned int i, j;
5676 
5677   if ((int) datarefs.length ()
5678       > param_loop_max_datarefs_for_datadeps)
5679     {
5680       struct data_dependence_relation *ddr;
5681 
5682       /* Insert a single relation into dependence_relations:
5683 	 chrec_dont_know.  */
5684       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5685       dependence_relations->safe_push (ddr);
5686       return false;
5687     }
5688 
5689   FOR_EACH_VEC_ELT (datarefs, i, a)
5690     for (j = i + 1; datarefs.iterate (j, &b); j++)
5691       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5692 	{
5693 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5694 	  dependence_relations->safe_push (ddr);
5695           if (loop_nest.exists ())
5696    	    compute_affine_dependence (ddr, loop_nest[0]);
5697 	}
5698 
5699   if (compute_self_and_rr)
5700     FOR_EACH_VEC_ELT (datarefs, i, a)
5701       {
5702 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5703 	dependence_relations->safe_push (ddr);
5704         if (loop_nest.exists ())
5705    	  compute_affine_dependence (ddr, loop_nest[0]);
5706       }
5707 
5708   return true;
5709 }
5710 
5711 /* Describes a location of a memory reference.  */
5712 
5713 struct data_ref_loc
5714 {
5715   /* The memory reference.  */
5716   tree ref;
5717 
5718   /* True if the memory reference is read.  */
5719   bool is_read;
5720 
5721   /* True if the data reference is conditional within the containing
5722      statement, i.e. if it might not occur even when the statement
5723      is executed and runs to completion.  */
5724   bool is_conditional_in_stmt;
5725 };
5726 
5727 
5728 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5729    true if STMT clobbers memory, false otherwise.  */
5730 
5731 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)5732 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5733 {
5734   bool clobbers_memory = false;
5735   data_ref_loc ref;
5736   tree op0, op1;
5737   enum gimple_code stmt_code = gimple_code (stmt);
5738 
5739   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5740      As we cannot model data-references to not spelled out
5741      accesses give up if they may occur.  */
5742   if (stmt_code == GIMPLE_CALL
5743       && !(gimple_call_flags (stmt) & ECF_CONST))
5744     {
5745       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5746       if (gimple_call_internal_p (stmt))
5747 	switch (gimple_call_internal_fn (stmt))
5748 	  {
5749 	  case IFN_GOMP_SIMD_LANE:
5750 	    {
5751 	      class loop *loop = gimple_bb (stmt)->loop_father;
5752 	      tree uid = gimple_call_arg (stmt, 0);
5753 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5754 	      if (loop == NULL
5755 		  || loop->simduid != SSA_NAME_VAR (uid))
5756 		clobbers_memory = true;
5757 	      break;
5758 	    }
5759 	  case IFN_MASK_LOAD:
5760 	  case IFN_MASK_STORE:
5761 	    break;
5762 	  default:
5763 	    clobbers_memory = true;
5764 	    break;
5765 	  }
5766       else
5767 	clobbers_memory = true;
5768     }
5769   else if (stmt_code == GIMPLE_ASM
5770 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5771 	       || gimple_vuse (stmt)))
5772     clobbers_memory = true;
5773 
5774   if (!gimple_vuse (stmt))
5775     return clobbers_memory;
5776 
5777   if (stmt_code == GIMPLE_ASSIGN)
5778     {
5779       tree base;
5780       op0 = gimple_assign_lhs (stmt);
5781       op1 = gimple_assign_rhs1 (stmt);
5782 
5783       if (DECL_P (op1)
5784 	  || (REFERENCE_CLASS_P (op1)
5785 	      && (base = get_base_address (op1))
5786 	      && TREE_CODE (base) != SSA_NAME
5787 	      && !is_gimple_min_invariant (base)))
5788 	{
5789 	  ref.ref = op1;
5790 	  ref.is_read = true;
5791 	  ref.is_conditional_in_stmt = false;
5792 	  references->safe_push (ref);
5793 	}
5794     }
5795   else if (stmt_code == GIMPLE_CALL)
5796     {
5797       unsigned i, n;
5798       tree ptr, type;
5799       unsigned int align;
5800 
5801       ref.is_read = false;
5802       if (gimple_call_internal_p (stmt))
5803 	switch (gimple_call_internal_fn (stmt))
5804 	  {
5805 	  case IFN_MASK_LOAD:
5806 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5807 	      break;
5808 	    ref.is_read = true;
5809 	    /* FALLTHRU */
5810 	  case IFN_MASK_STORE:
5811 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5812 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5813 	    if (ref.is_read)
5814 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5815 	    else
5816 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5817 	    if (TYPE_ALIGN (type) != align)
5818 	      type = build_aligned_type (type, align);
5819 	    ref.is_conditional_in_stmt = true;
5820 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5821 				   ptr);
5822 	    references->safe_push (ref);
5823 	    return false;
5824 	  default:
5825 	    break;
5826 	  }
5827 
5828       op0 = gimple_call_lhs (stmt);
5829       n = gimple_call_num_args (stmt);
5830       for (i = 0; i < n; i++)
5831 	{
5832 	  op1 = gimple_call_arg (stmt, i);
5833 
5834 	  if (DECL_P (op1)
5835 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5836 	    {
5837 	      ref.ref = op1;
5838 	      ref.is_read = true;
5839 	      ref.is_conditional_in_stmt = false;
5840 	      references->safe_push (ref);
5841 	    }
5842 	}
5843     }
5844   else
5845     return clobbers_memory;
5846 
5847   if (op0
5848       && (DECL_P (op0)
5849 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5850     {
5851       ref.ref = op0;
5852       ref.is_read = false;
5853       ref.is_conditional_in_stmt = false;
5854       references->safe_push (ref);
5855     }
5856   return clobbers_memory;
5857 }
5858 
5859 
5860 /* Returns true if the loop-nest has any data reference.  */
5861 
5862 bool
loop_nest_has_data_refs(loop_p loop)5863 loop_nest_has_data_refs (loop_p loop)
5864 {
5865   basic_block *bbs = get_loop_body (loop);
5866   auto_vec<data_ref_loc, 3> references;
5867 
5868   for (unsigned i = 0; i < loop->num_nodes; i++)
5869     {
5870       basic_block bb = bbs[i];
5871       gimple_stmt_iterator bsi;
5872 
5873       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5874 	{
5875 	  gimple *stmt = gsi_stmt (bsi);
5876 	  get_references_in_stmt (stmt, &references);
5877 	  if (references.length ())
5878 	    {
5879 	      free (bbs);
5880 	      return true;
5881 	    }
5882 	}
5883     }
5884   free (bbs);
5885   return false;
5886 }
5887 
5888 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5889    reference, returns false, otherwise returns true.  NEST is the outermost
5890    loop of the loop nest in which the references should be analyzed.  */
5891 
5892 opt_result
find_data_references_in_stmt(class loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5893 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5894 			      vec<data_reference_p> *datarefs)
5895 {
5896   unsigned i;
5897   auto_vec<data_ref_loc, 2> references;
5898   data_ref_loc *ref;
5899   data_reference_p dr;
5900 
5901   if (get_references_in_stmt (stmt, &references))
5902     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5903 				   stmt);
5904 
5905   FOR_EACH_VEC_ELT (references, i, ref)
5906     {
5907       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5908 			    loop_containing_stmt (stmt), ref->ref,
5909 			    stmt, ref->is_read, ref->is_conditional_in_stmt);
5910       gcc_assert (dr != NULL);
5911       datarefs->safe_push (dr);
5912     }
5913 
5914   return opt_result::success ();
5915 }
5916 
5917 /* Stores the data references in STMT to DATAREFS.  If there is an
5918    unanalyzable reference, returns false, otherwise returns true.
5919    NEST is the outermost loop of the loop nest in which the references
5920    should be instantiated, LOOP is the loop in which the references
5921    should be analyzed.  */
5922 
5923 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5924 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5925 				       vec<data_reference_p> *datarefs)
5926 {
5927   unsigned i;
5928   auto_vec<data_ref_loc, 2> references;
5929   data_ref_loc *ref;
5930   bool ret = true;
5931   data_reference_p dr;
5932 
5933   if (get_references_in_stmt (stmt, &references))
5934     return false;
5935 
5936   FOR_EACH_VEC_ELT (references, i, ref)
5937     {
5938       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5939 			    ref->is_conditional_in_stmt);
5940       gcc_assert (dr != NULL);
5941       datarefs->safe_push (dr);
5942     }
5943 
5944   return ret;
5945 }
5946 
5947 /* Search the data references in LOOP, and record the information into
5948    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5949    difficult case, returns NULL_TREE otherwise.  */
5950 
5951 tree
find_data_references_in_bb(class loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5952 find_data_references_in_bb (class loop *loop, basic_block bb,
5953                             vec<data_reference_p> *datarefs)
5954 {
5955   gimple_stmt_iterator bsi;
5956 
5957   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5958     {
5959       gimple *stmt = gsi_stmt (bsi);
5960 
5961       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5962         {
5963           struct data_reference *res;
5964           res = XCNEW (struct data_reference);
5965           datarefs->safe_push (res);
5966 
5967           return chrec_dont_know;
5968         }
5969     }
5970 
5971   return NULL_TREE;
5972 }
5973 
5974 /* Search the data references in LOOP, and record the information into
5975    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5976    difficult case, returns NULL_TREE otherwise.
5977 
5978    TODO: This function should be made smarter so that it can handle address
5979    arithmetic as if they were array accesses, etc.  */
5980 
5981 tree
find_data_references_in_loop(class loop * loop,vec<data_reference_p> * datarefs)5982 find_data_references_in_loop (class loop *loop,
5983 			      vec<data_reference_p> *datarefs)
5984 {
5985   basic_block bb, *bbs;
5986   unsigned int i;
5987 
5988   bbs = get_loop_body_in_dom_order (loop);
5989 
5990   for (i = 0; i < loop->num_nodes; i++)
5991     {
5992       bb = bbs[i];
5993 
5994       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5995         {
5996           free (bbs);
5997           return chrec_dont_know;
5998         }
5999     }
6000   free (bbs);
6001 
6002   return NULL_TREE;
6003 }
6004 
6005 /* Return the alignment in bytes that DRB is guaranteed to have at all
6006    times.  */
6007 
6008 unsigned int
dr_alignment(innermost_loop_behavior * drb)6009 dr_alignment (innermost_loop_behavior *drb)
6010 {
6011   /* Get the alignment of BASE_ADDRESS + INIT.  */
6012   unsigned int alignment = drb->base_alignment;
6013   unsigned int misalignment = (drb->base_misalignment
6014 			       + TREE_INT_CST_LOW (drb->init));
6015   if (misalignment != 0)
6016     alignment = MIN (alignment, misalignment & -misalignment);
6017 
6018   /* Cap it to the alignment of OFFSET.  */
6019   if (!integer_zerop (drb->offset))
6020     alignment = MIN (alignment, drb->offset_alignment);
6021 
6022   /* Cap it to the alignment of STEP.  */
6023   if (!integer_zerop (drb->step))
6024     alignment = MIN (alignment, drb->step_alignment);
6025 
6026   return alignment;
6027 }
6028 
6029 /* If BASE is a pointer-typed SSA name, try to find the object that it
6030    is based on.  Return this object X on success and store the alignment
6031    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6032 
6033 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)6034 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6035 {
6036   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6037     return NULL_TREE;
6038 
6039   gimple *def = SSA_NAME_DEF_STMT (base);
6040   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6041 
6042   /* Peel chrecs and record the minimum alignment preserved by
6043      all steps.  */
6044   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6045   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6046     {
6047       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6048       alignment = MIN (alignment, step_alignment);
6049       base = CHREC_LEFT (base);
6050     }
6051 
6052   /* Punt if the expression is too complicated to handle.  */
6053   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6054     return NULL_TREE;
6055 
6056   /* The only useful cases are those for which a dereference folds to something
6057      other than an INDIRECT_REF.  */
6058   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6059   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6060   if (!ref)
6061     return NULL_TREE;
6062 
6063   /* Analyze the base to which the steps we peeled were applied.  */
6064   poly_int64 bitsize, bitpos, bytepos;
6065   machine_mode mode;
6066   int unsignedp, reversep, volatilep;
6067   tree offset;
6068   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6069 			      &unsignedp, &reversep, &volatilep);
6070   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6071     return NULL_TREE;
6072 
6073   /* Restrict the alignment to that guaranteed by the offsets.  */
6074   unsigned int bytepos_alignment = known_alignment (bytepos);
6075   if (bytepos_alignment != 0)
6076     alignment = MIN (alignment, bytepos_alignment);
6077   if (offset)
6078     {
6079       unsigned int offset_alignment = highest_pow2_factor (offset);
6080       alignment = MIN (alignment, offset_alignment);
6081     }
6082 
6083   *alignment_out = alignment;
6084   return base;
6085 }
6086 
6087 /* Return the object whose alignment would need to be changed in order
6088    to increase the alignment of ADDR.  Store the maximum achievable
6089    alignment in *MAX_ALIGNMENT.  */
6090 
6091 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)6092 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6093 {
6094   tree base = get_base_for_alignment_1 (addr, max_alignment);
6095   if (base)
6096     return base;
6097 
6098   if (TREE_CODE (addr) == ADDR_EXPR)
6099     addr = TREE_OPERAND (addr, 0);
6100   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6101   return addr;
6102 }
6103 
6104 /* Recursive helper function.  */
6105 
6106 static bool
find_loop_nest_1(class loop * loop,vec<loop_p> * loop_nest)6107 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6108 {
6109   /* Inner loops of the nest should not contain siblings.  Example:
6110      when there are two consecutive loops,
6111 
6112      | loop_0
6113      |   loop_1
6114      |     A[{0, +, 1}_1]
6115      |   endloop_1
6116      |   loop_2
6117      |     A[{0, +, 1}_2]
6118      |   endloop_2
6119      | endloop_0
6120 
6121      the dependence relation cannot be captured by the distance
6122      abstraction.  */
6123   if (loop->next)
6124     return false;
6125 
6126   loop_nest->safe_push (loop);
6127   if (loop->inner)
6128     return find_loop_nest_1 (loop->inner, loop_nest);
6129   return true;
6130 }
6131 
6132 /* Return false when the LOOP is not well nested.  Otherwise return
6133    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6134    contain the loops from the outermost to the innermost, as they will
6135    appear in the classic distance vector.  */
6136 
6137 bool
find_loop_nest(class loop * loop,vec<loop_p> * loop_nest)6138 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6139 {
6140   loop_nest->safe_push (loop);
6141   if (loop->inner)
6142     return find_loop_nest_1 (loop->inner, loop_nest);
6143   return true;
6144 }
6145 
6146 /* Returns true when the data dependences have been computed, false otherwise.
6147    Given a loop nest LOOP, the following vectors are returned:
6148    DATAREFS is initialized to all the array elements contained in this loop,
6149    DEPENDENCE_RELATIONS contains the relations between the data references.
6150    Compute read-read and self relations if
6151    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6152 
6153 bool
compute_data_dependences_for_loop(class loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)6154 compute_data_dependences_for_loop (class loop *loop,
6155 				   bool compute_self_and_read_read_dependences,
6156 				   vec<loop_p> *loop_nest,
6157 				   vec<data_reference_p> *datarefs,
6158 				   vec<ddr_p> *dependence_relations)
6159 {
6160   bool res = true;
6161 
6162   memset (&dependence_stats, 0, sizeof (dependence_stats));
6163 
6164   /* If the loop nest is not well formed, or one of the data references
6165      is not computable, give up without spending time to compute other
6166      dependences.  */
6167   if (!loop
6168       || !find_loop_nest (loop, loop_nest)
6169       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6170       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6171 				   compute_self_and_read_read_dependences))
6172     res = false;
6173 
6174   if (dump_file && (dump_flags & TDF_STATS))
6175     {
6176       fprintf (dump_file, "Dependence tester statistics:\n");
6177 
6178       fprintf (dump_file, "Number of dependence tests: %d\n",
6179 	       dependence_stats.num_dependence_tests);
6180       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6181 	       dependence_stats.num_dependence_dependent);
6182       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6183 	       dependence_stats.num_dependence_independent);
6184       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6185 	       dependence_stats.num_dependence_undetermined);
6186 
6187       fprintf (dump_file, "Number of subscript tests: %d\n",
6188 	       dependence_stats.num_subscript_tests);
6189       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6190 	       dependence_stats.num_subscript_undetermined);
6191       fprintf (dump_file, "Number of same subscript function: %d\n",
6192 	       dependence_stats.num_same_subscript_function);
6193 
6194       fprintf (dump_file, "Number of ziv tests: %d\n",
6195 	       dependence_stats.num_ziv);
6196       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6197 	       dependence_stats.num_ziv_dependent);
6198       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6199 	       dependence_stats.num_ziv_independent);
6200       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6201 	       dependence_stats.num_ziv_unimplemented);
6202 
6203       fprintf (dump_file, "Number of siv tests: %d\n",
6204 	       dependence_stats.num_siv);
6205       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6206 	       dependence_stats.num_siv_dependent);
6207       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6208 	       dependence_stats.num_siv_independent);
6209       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6210 	       dependence_stats.num_siv_unimplemented);
6211 
6212       fprintf (dump_file, "Number of miv tests: %d\n",
6213 	       dependence_stats.num_miv);
6214       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6215 	       dependence_stats.num_miv_dependent);
6216       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6217 	       dependence_stats.num_miv_independent);
6218       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6219 	       dependence_stats.num_miv_unimplemented);
6220     }
6221 
6222   return res;
6223 }
6224 
6225 /* Free the memory used by a data dependence relation DDR.  */
6226 
6227 void
free_dependence_relation(struct data_dependence_relation * ddr)6228 free_dependence_relation (struct data_dependence_relation *ddr)
6229 {
6230   if (ddr == NULL)
6231     return;
6232 
6233   if (DDR_SUBSCRIPTS (ddr).exists ())
6234     free_subscripts (DDR_SUBSCRIPTS (ddr));
6235   DDR_DIST_VECTS (ddr).release ();
6236   DDR_DIR_VECTS (ddr).release ();
6237 
6238   free (ddr);
6239 }
6240 
6241 /* Free the memory used by the data dependence relations from
6242    DEPENDENCE_RELATIONS.  */
6243 
6244 void
free_dependence_relations(vec<ddr_p> dependence_relations)6245 free_dependence_relations (vec<ddr_p> dependence_relations)
6246 {
6247   unsigned int i;
6248   struct data_dependence_relation *ddr;
6249 
6250   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
6251     if (ddr)
6252       free_dependence_relation (ddr);
6253 
6254   dependence_relations.release ();
6255 }
6256 
6257 /* Free the memory used by the data references from DATAREFS.  */
6258 
6259 void
free_data_refs(vec<data_reference_p> datarefs)6260 free_data_refs (vec<data_reference_p> datarefs)
6261 {
6262   unsigned int i;
6263   struct data_reference *dr;
6264 
6265   FOR_EACH_VEC_ELT (datarefs, i, dr)
6266     free_data_ref (dr);
6267   datarefs.release ();
6268 }
6269 
6270 /* Common routine implementing both dr_direction_indicator and
6271    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6272    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6273    Return the step as the indicator otherwise.  */
6274 
6275 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)6276 dr_step_indicator (struct data_reference *dr, int useful_min)
6277 {
6278   tree step = DR_STEP (dr);
6279   if (!step)
6280     return NULL_TREE;
6281   STRIP_NOPS (step);
6282   /* Look for cases where the step is scaled by a positive constant
6283      integer, which will often be the access size.  If the multiplication
6284      doesn't change the sign (due to overflow effects) then we can
6285      test the unscaled value instead.  */
6286   if (TREE_CODE (step) == MULT_EXPR
6287       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6288       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6289     {
6290       tree factor = TREE_OPERAND (step, 1);
6291       step = TREE_OPERAND (step, 0);
6292 
6293       /* Strip widening and truncating conversions as well as nops.  */
6294       if (CONVERT_EXPR_P (step)
6295 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6296 	step = TREE_OPERAND (step, 0);
6297       tree type = TREE_TYPE (step);
6298 
6299       /* Get the range of step values that would not cause overflow.  */
6300       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6301 			 / wi::to_widest (factor));
6302       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6303 			 / wi::to_widest (factor));
6304 
6305       /* Get the range of values that the unconverted step actually has.  */
6306       wide_int step_min, step_max;
6307       if (TREE_CODE (step) != SSA_NAME
6308 	  || get_range_info (step, &step_min, &step_max) != VR_RANGE)
6309 	{
6310 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6311 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6312 	}
6313 
6314       /* Check whether the unconverted step has an acceptable range.  */
6315       signop sgn = TYPE_SIGN (type);
6316       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6317 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6318 	{
6319 	  if (wi::ge_p (step_min, useful_min, sgn))
6320 	    return ssize_int (useful_min);
6321 	  else if (wi::lt_p (step_max, 0, sgn))
6322 	    return ssize_int (-1);
6323 	  else
6324 	    return fold_convert (ssizetype, step);
6325 	}
6326     }
6327   return DR_STEP (dr);
6328 }
6329 
6330 /* Return a value that is negative iff DR has a negative step.  */
6331 
6332 tree
dr_direction_indicator(struct data_reference * dr)6333 dr_direction_indicator (struct data_reference *dr)
6334 {
6335   return dr_step_indicator (dr, 0);
6336 }
6337 
6338 /* Return a value that is zero iff DR has a zero step.  */
6339 
6340 tree
dr_zero_step_indicator(struct data_reference * dr)6341 dr_zero_step_indicator (struct data_reference *dr)
6342 {
6343   return dr_step_indicator (dr, 1);
6344 }
6345 
6346 /* Return true if DR is known to have a nonnegative (but possibly zero)
6347    step.  */
6348 
6349 bool
dr_known_forward_stride_p(struct data_reference * dr)6350 dr_known_forward_stride_p (struct data_reference *dr)
6351 {
6352   tree indicator = dr_direction_indicator (dr);
6353   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6354 				   fold_convert (ssizetype, indicator),
6355 				   ssize_int (0));
6356   return neg_step_val && integer_zerop (neg_step_val);
6357 }
6358